ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. ckanapi_harvesters/__init__.py +32 -10
  2. ckanapi_harvesters/auxiliary/__init__.py +26 -0
  3. ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
  4. ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
  5. ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
  6. ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
  7. ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
  8. ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
  9. ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
  10. ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
  11. ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
  12. ckanapi_harvesters/auxiliary/deprecated.py +82 -0
  13. ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
  14. ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
  15. ckanapi_harvesters/auxiliary/list_records.py +60 -0
  16. ckanapi_harvesters/auxiliary/login.py +163 -0
  17. ckanapi_harvesters/auxiliary/path.py +208 -0
  18. ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
  19. ckanapi_harvesters/auxiliary/urls.py +40 -0
  20. ckanapi_harvesters/builder/__init__.py +40 -0
  21. ckanapi_harvesters/builder/builder_aux.py +20 -0
  22. ckanapi_harvesters/builder/builder_ckan.py +238 -0
  23. ckanapi_harvesters/builder/builder_errors.py +36 -0
  24. ckanapi_harvesters/builder/builder_field.py +122 -0
  25. ckanapi_harvesters/builder/builder_package.py +9 -0
  26. ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
  27. ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
  28. ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
  29. ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
  30. ckanapi_harvesters/builder/builder_resource.py +589 -0
  31. ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
  32. ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
  33. ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
  34. ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
  35. ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
  36. ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
  37. ckanapi_harvesters/builder/builder_resource_init.py +126 -0
  38. ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
  39. ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
  40. ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
  41. ckanapi_harvesters/builder/example/__init__.py +21 -0
  42. ckanapi_harvesters/builder/example/builder_example.py +21 -0
  43. ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
  44. ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
  45. ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
  46. ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
  47. ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
  48. ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
  49. ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
  50. ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
  51. ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
  52. ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
  53. ckanapi_harvesters/builder/mapper_datastore.py +93 -0
  54. ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
  55. ckanapi_harvesters/builder/specific/__init__.py +11 -0
  56. ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
  57. ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
  58. ckanapi_harvesters/ckan_api/__init__.py +20 -0
  59. ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
  60. ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
  61. ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
  62. ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
  63. ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
  64. ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
  65. ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
  66. ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
  67. ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
  68. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
  69. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
  70. ckanapi_harvesters/harvesters/__init__.py +23 -0
  71. ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
  72. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
  73. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
  74. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
  75. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
  76. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
  77. ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
  78. ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
  79. ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
  80. ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
  81. ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
  82. ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
  83. ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
  84. ckanapi_harvesters/harvesters/harvester_init.py +30 -0
  85. ckanapi_harvesters/harvesters/harvester_model.py +49 -0
  86. ckanapi_harvesters/harvesters/harvester_params.py +323 -0
  87. ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
  88. ckanapi_harvesters/harvesters/postgre_params.py +86 -0
  89. ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
  90. ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
  91. ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
  92. ckanapi_harvesters/policies/__init__.py +20 -0
  93. ckanapi_harvesters/policies/data_format_policy.py +269 -0
  94. ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
  95. ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
  96. ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
  97. ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
  98. ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
  99. ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
  100. ckanapi_harvesters/reports/__init__.py +11 -0
  101. ckanapi_harvesters/reports/admin_report.py +292 -0
  102. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
  103. ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
  104. ckanapi_harvesters/divider/__init__.py +0 -27
  105. ckanapi_harvesters/divider/divider.py +0 -53
  106. ckanapi_harvesters/divider/divider_error.py +0 -59
  107. ckanapi_harvesters/main.py +0 -30
  108. ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
  109. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
  110. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,262 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Code to define the bondage between a file and a database query
5
+ in the context of a large DataStore defined by the concatenation of multiple files.
6
+ """
7
+ from warnings import warn
8
+ from abc import ABC, abstractmethod
9
+ from typing import Dict, List, Iterable, Callable, Any, Tuple, Generator, Set, Union
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ from ckanapi_harvesters.builder.builder_resource_datastore import DataSchemeConversion
15
+ from ckanapi_harvesters.auxiliary.ckan_model import UpsertChoice
16
+ from ckanapi_harvesters.auxiliary.ckan_defs import ckan_tags_sep
17
+ from ckanapi_harvesters.ckan_api import CkanApi
18
+
19
+
20
+ class RequestMapperABC(DataSchemeConversion, ABC):
21
+ """
22
+ Class to define how to reconstruct a file from the full dataset
23
+ This class overloads some data scheme conversion class functions
24
+ This abstract class can be derived to specify custom data treatments
25
+ """
26
+ def __init__(self,
27
+ *, df_upload_fun:Callable[[pd.DataFrame], Any] = None,
28
+ df_download_fun:Callable[[pd.DataFrame], Any] = None):
29
+ super().__init__(df_upload_fun=df_upload_fun, df_download_fun=df_download_fun)
30
+ self.upsert_only_missing_rows:bool = False
31
+
32
+ ## upsert request preparation ----------------
33
+ def get_file_query_of_df(self, df_upload:pd.DataFrame) -> Union[dict,None]:
34
+ """
35
+ Return the dict of {field: value} combinations representing the arguments of the query to reconstruct a file
36
+
37
+ :param df_upload: the DataFrame representing the file
38
+ :return:
39
+ """
40
+ return None
41
+
42
+ def last_inserted_row_request(self, ckan:CkanApi, resource_id:str, file_query:dict) -> Union[pd.DataFrame,None]:
43
+ """
44
+ Request in CKAN the last inserted row(s) corresponding to a given file_query
45
+
46
+ :param ckan:
47
+ :param resource_id:
48
+ :param file_query: a dict of {field: value} combinations representing the arguments of the query to reconstruct a file
49
+ :return: The last row(s) in the database or None (if no specific method was defined)
50
+ """
51
+ return None
52
+
53
+ def last_inserted_index_request(self, ckan:CkanApi, resource_id:str, file_query:dict, df_upload:pd.DataFrame) -> Tuple[int, bool, int, Union[pd.DataFrame,None]]:
54
+ """
55
+ Knowing the data which needs to be uploaded, this function compares the last known row(s) to the dataframe
56
+ and returns the index to restart the upload process.
57
+
58
+ :param ckan:
59
+ :param resource_id:
60
+ :param file_query: a dict of {field: value} combinations representing the arguments of the query to reconstruct a file
61
+ :param df_upload: the known data corresponding to the file_query to be sent
62
+ :return: a tuple (i_restart, upload_needed, row_count, df_last_row):
63
+ - i_restart: the last known index in the dataframe
64
+ - upload_needed: a boolean indicating if an update is necessary
65
+ - row_count: the number of rows corresponding to the file_query
66
+ - df_last_row: the last found row in the dataframe
67
+ """
68
+ return 0, True, -1, None
69
+
70
+ ## download preparation ----------------
71
+ @abstractmethod
72
+ def download_file_query_list(self, ckan: CkanApi, resource_id: str) -> List[dict]:
73
+ """
74
+ Function to list the {key: value} combinations present in the CKAN datastore to reconstruct the file database before downloading.
75
+
76
+ :param ckan:
77
+ :param resource_id:
78
+ :return: a list of query arguments defining each file
79
+ """
80
+ raise NotImplementedError()
81
+
82
+ def download_file_query_generator(self, ckan: CkanApi, resource_id: str) -> Generator[dict, Any, None]:
83
+ """
84
+ Generator for download_file_query_list which can be customized
85
+
86
+ :param ckan:
87
+ :param resource_id:
88
+ :return:
89
+ """
90
+ for file_query in self.download_file_query_list(ckan=ckan, resource_id=resource_id):
91
+ yield file_query
92
+
93
+ def download_file_query(self, ckan: CkanApi, resource_id: str, file_query:dict) -> pd.DataFrame:
94
+ return ckan.datastore_search(resource_id=resource_id, **file_query, search_all=True)
95
+
96
+
97
+ class RequestFileMapperABC(RequestMapperABC, ABC):
98
+ """
99
+ Class to define how to reconstruct a file from the full dataset
100
+ This abstract class is oriented to treating files in the file system
101
+ """
102
+ def __init__(self,
103
+ *, df_upload_fun:Callable[[pd.DataFrame], Any] = None,
104
+ df_download_fun:Callable[[pd.DataFrame], Any] = None):
105
+ super().__init__(df_upload_fun=df_upload_fun, df_download_fun=df_download_fun)
106
+ self.file_name_prefix:str = "table_"
107
+ self.file_name_suffix:str = ".csv"
108
+ self.file_name_function:Union[Callable[[dict], str], None] = None
109
+
110
+ def get_file_name_of_query(self, file_query:dict) -> str:
111
+ if self.file_name_function is None:
112
+ file_filters_str = '_'.join([str(key)+'_'+str(value) for key,value in file_query.items()])
113
+ else:
114
+ file_filters_str = self.file_name_function(file_query)
115
+ return f"{self.file_name_prefix}{file_filters_str}{self.file_name_suffix}"
116
+
117
+
118
+ class RequestFileMapperUser(RequestFileMapperABC):
119
+ """
120
+ Use this basic implementation if the file query list is provided by the user or if the builder is only used to upload files.
121
+ """
122
+ def __init__(self, file_query_list: Iterable[Tuple[str, dict]],
123
+ *, df_upload_fun:Callable[[pd.DataFrame], Any] = None,
124
+ df_download_fun:Callable[[pd.DataFrame], Any] = None):
125
+ super().__init__(df_upload_fun=df_upload_fun, df_download_fun=df_download_fun)
126
+ # file_query_list must be stored in the BuilderDataStoreMultiAbc instance
127
+
128
+ def download_file_query_list(self, ckan: CkanApi, resource_id: str) -> List[dict]:
129
+ raise RuntimeError("File query list is provided by user")
130
+
131
+
132
+ class RequestFileMapperLimit(RequestFileMapperABC):
133
+ """
134
+ In this implementation, a file is defined by a certain amount of rows
135
+ """
136
+ default_limit = 10000
137
+
138
+ def __init__(self, limit:int=None,
139
+ *, df_upload_fun:Callable[[pd.DataFrame], Any] = None,
140
+ df_download_fun:Callable[[pd.DataFrame], Any] = None):
141
+ super().__init__(df_upload_fun=df_upload_fun, df_download_fun=df_download_fun)
142
+ if limit is None:
143
+ limit = RequestFileMapperLimit.default_limit
144
+ self.limit:int = limit
145
+
146
+ ## download preparation ----------------
147
+ def get_file_name_of_query(self, file_query:dict) -> str:
148
+ if self.file_name_function is None:
149
+ # file_filters_str = str(file_query["offset"] // self.limit)
150
+ file_filters_str = f'{file_query["offset"]}_{file_query["offset"]+self.limit-1}'
151
+ else:
152
+ file_filters_str = self.file_name_function(file_query)
153
+ return f"{self.file_name_prefix}{file_filters_str}{self.file_name_suffix}"
154
+
155
+ def download_file_query_list(self, ckan: CkanApi, resource_id: str) -> List[dict]:
156
+ # get number of rows and return a list of [offset,limit] combinations
157
+ row_count = ckan.datastore_search_row_count(resource_id)
158
+ return [{"offset": self.limit*counter, "limit": self.limit} for counter in range(row_count // self.limit + 1)]
159
+
160
+ def download_file_query(self, ckan: CkanApi, resource_id: str, file_query:dict) -> pd.DataFrame:
161
+ return ckan.datastore_search(resource_id=resource_id, offset=file_query["offset"], limit=file_query["limit"], search_all=True)
162
+
163
+
164
+ class RequestFileMapperIndexKeys(RequestFileMapperABC):
165
+ """
166
+ In this implementation, a file is defined by a combination of file_keys values
167
+ It is optionally ordered by an index_keys which enables to restart a transfer when interrupted
168
+ By default, the index_keys is the last field of the primary key
169
+ and the file_keys are the fields preceding the index_keys in the primary key
170
+ """
171
+ last_rows_limit = 1
172
+ def __init__(self, group_by_keys:List[str], sort_by_keys:List[str] = None,
173
+ *, df_upload_fun:Callable[[pd.DataFrame], Any] = None,
174
+ df_download_fun:Callable[[pd.DataFrame], Any] = None):
175
+ super().__init__(df_upload_fun=df_upload_fun, df_download_fun=df_download_fun)
176
+ self.group_by_keys: List[str] = group_by_keys # fields to filter to obtain one file
177
+ self.sort_by_keys: Union[List[str],None] = None # field to order the document
178
+ if sort_by_keys is not None:
179
+ self.sort_by_keys = sort_by_keys
180
+
181
+ def get_necessary_fields(self) -> Set[str]:
182
+ fields = set(self.group_by_keys)
183
+ if self.sort_by_keys is not None:
184
+ fields = fields.union(set(self.sort_by_keys))
185
+ return fields
186
+
187
+ def df_upload_alter(self, df_local: pd.DataFrame, file_name:str=None, mapper_kwargs:dict=None, **kwargs) -> pd.DataFrame:
188
+ # overload of df_upload_alter calling self.df_upload_fun
189
+ # order dataframes before sending to database in order to be able to restart transfer from last transmitted index
190
+ df_database = super().df_upload_alter(df_local, file_name=file_name, mapper_kwargs=mapper_kwargs, **kwargs)
191
+ if self.sort_by_keys is not None:
192
+ if self.df_upload_fun is None:
193
+ df_database = df_database.copy()
194
+ df_database.sort_values(self.sort_by_keys, inplace=True)
195
+ return df_database
196
+
197
+ ## upsert request preparation ----------------
198
+ def get_file_query_of_df(self, df_upload:pd.DataFrame) -> Union[dict,None]:
199
+ df_file_query = df_upload[self.group_by_keys].drop_duplicates(subset=self.group_by_keys)
200
+ if len(df_file_query) == 1:
201
+ return {"filters": df_file_query.to_dict(orient="records")[0]}
202
+ else:
203
+ return None
204
+
205
+ def last_inserted_row_request(self, ckan:CkanApi, resource_id:str, file_query:dict) -> Union[pd.DataFrame,None]:
206
+ if self.sort_by_keys is None or not self.upsert_only_missing_rows:
207
+ return None
208
+ else:
209
+ df = ckan.datastore_search(resource_id, filters=file_query["filters"], sort=ckan_tags_sep.join(self.sort_by_keys) + " desc",
210
+ limit=self.last_rows_limit, search_all=False) #, fields=self.file_keys + self.index_keys)
211
+ return df
212
+
213
+ def last_inserted_index_request(self, ckan:CkanApi, resource_id:str, file_query:dict, df_upload:pd.DataFrame) -> Tuple[int, bool, int, pd.DataFrame]:
214
+ # df_upload is in the database format (df_upload_fun has been applied)
215
+ # df_last_row has just been downloaded but no field typing has been applied
216
+ df_last_row = self.last_inserted_row_request(ckan=ckan, resource_id=resource_id, file_query=file_query)
217
+ if df_last_row is None or df_last_row.empty:
218
+ return 0, True, df_last_row.attrs["total"] if df_last_row is not None else 0, df_last_row
219
+ else:
220
+ for key in self.sort_by_keys:
221
+ if key in df_upload.columns:
222
+ # apply field typing from df_upload in order to perform line-by-line comparison
223
+ df_last_row[key] = df_last_row[key].astype(df_upload[key].dtype)
224
+ match_table = np.column_stack([df_upload[key] == df_last_row[key].iloc[0] for key in self.sort_by_keys])
225
+ match_array = np.logical_and.reduce(match_table, 1)
226
+ i_restart = np.argwhere(match_array) + 1
227
+ if len(i_restart) == 1 and len(i_restart[0]) == 1:
228
+ i_restart_py = int(i_restart[0][0])
229
+ return i_restart_py, i_restart_py < len(df_upload), df_last_row.attrs["total"], df_last_row
230
+ else:
231
+ msg = "Multiple results obtained when querying the last inserted index"
232
+ warn(msg)
233
+ return 0, True, df_last_row.attrs["total"], df_last_row
234
+
235
+ ## download preparation ----------------
236
+ def get_file_name_of_query(self, file_query:dict) -> str:
237
+ if self.file_name_function is None:
238
+ file_filters_str = '_'.join([str(key)+'_'+str(value) for key,value in file_query['filters'].items()])
239
+ else:
240
+ file_filters_str = self.file_name_function(file_query['filters'])
241
+ return f"{self.file_name_prefix}{file_filters_str}{self.file_name_suffix}"
242
+
243
+ def download_file_query_list(self, ckan: CkanApi, resource_id: str) -> List[dict]:
244
+ # function to list the files which are defined by unique file_keys combinations in the database
245
+ df_list = ckan.datastore_search(resource_id, fields=self.group_by_keys, search_all=True, distinct=True)
246
+ # df_list = ckan.datastore_search(resource_id, filters={key: 0 for key in self.order_keys}, fields=self.file_keys, search_all=True)
247
+ filters_list = df_list.to_dict(orient="records")
248
+ return [{"filters": file_filter} for file_filter in filters_list]
249
+
250
+ # def download_file_query(self, ckan: CkanApi, resource_id: str, file_query:dict) -> pd.DataFrame:
251
+ # return ckan.datastore_search(resource_id=resource_id, filters=file_query["filters"], search_all=True)
252
+
253
+
254
+ def default_file_mapper_from_primary_key(primary_key:List[str]=None, file_query_list: Iterable[Tuple[str,dict]]=None) -> RequestFileMapperABC:
255
+ if primary_key is None or len(primary_key) <= 1:
256
+ if file_query_list is not None:
257
+ return RequestFileMapperUser(file_query_list)
258
+ else:
259
+ return RequestFileMapperLimit()
260
+ else:
261
+ return RequestFileMapperIndexKeys(group_by_keys=primary_key[:-1], sort_by_keys=[primary_key[-1]])
262
+
@@ -0,0 +1,11 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Default package builders
5
+ """
6
+
7
+ from . import configuration_builder
8
+
9
+ # alias
10
+ from .configuration_builder import ConfigurationBuilder
11
+
@@ -0,0 +1,66 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ CKAN configuration builder
5
+ """
6
+ from typing import Union, List, Dict
7
+
8
+ from ckanapi_harvesters.auxiliary import ckan_configuration
9
+ from ckanapi_harvesters.ckan_api import CkanApi
10
+ from ckanapi_harvesters.policies.data_format_policy import CkanPackageDataFormatPolicy
11
+ from ckanapi_harvesters.policies.data_format_policy_errors import DataPolicyError
12
+ from ckanapi_harvesters.builder.builder_resource import BuilderResourceUnmanaged
13
+ from ckanapi_harvesters.builder.specific_builder_abc import SpecificBuilderABC
14
+
15
+
16
+ class ConfigurationBuilder(SpecificBuilderABC):
17
+ def __init__(self, ckan:CkanApi, organization_name:str):
18
+ super().__init__(ckan, package_name=ckan_configuration.configuration_package_name, organization_name=organization_name,
19
+ title="Configuration for scripts",
20
+ description="Configuration for use with Python scripts",
21
+ private=True,
22
+ )
23
+ self.resource_builders[ckan_configuration.policy_resource] = \
24
+ BuilderResourceUnmanaged(name=ckan_configuration.policy_resource, format="JSON",
25
+ description="CKAN Data format policy for use with Python scripts")
26
+
27
+ def patch_policy(self, ckan:CkanApi, policy: CkanPackageDataFormatPolicy,
28
+ *, reduced_size:bool=None, update_ckan:bool=True):
29
+ package_info = self.patch_request_package(ckan)
30
+ package_id = package_info.id
31
+ if policy is not None:
32
+ payload = policy.to_jsons(reduced_size=reduced_size).encode()
33
+ policy_builder: BuilderResourceUnmanaged = self.resource_builders[ckan_configuration.policy_resource]
34
+ policy_builder.patch_request(ckan, package_id, payload=payload, reupload=True)
35
+ else:
36
+ # delete data format policy
37
+ self.resource_builders[ckan_configuration.policy_resource].delete_request(ckan, package_id)
38
+ if update_ckan:
39
+ ckan.policy = policy
40
+
41
+ def load_default_policy(self, ckan:CkanApi) -> CkanPackageDataFormatPolicy:
42
+ return ckan.load_default_policy(force=True)
43
+
44
+ def policy_check(self, ckan: CkanApi,
45
+ package_list: Union[str, List[str]] = None, *, owner_org:str=None,
46
+ policy:CkanPackageDataFormatPolicy=None, buffer:Dict[str, List[DataPolicyError]]=None,
47
+ raise_error:bool=False, verbose:bool=None) -> bool:
48
+ """
49
+ Check package list against currently loaded data format policy loaded in CKAN (or the one provided by argument).
50
+ If not provided, the package list is the full list of packages, restrained to an organization (requires an API request).
51
+ :param ckan:
52
+ :param package_list:
53
+ :param owner_org:
54
+ :param policy:
55
+ :param buffer:
56
+ :param raise_error:
57
+ :param verbose:
58
+ :return:
59
+ """
60
+ # recommended to run load_default_policy before
61
+ package_list = ckan.complete_package_list(package_list, owner_org=owner_org)
62
+ ckan.map_resources(package_list, owner_org=owner_org)
63
+ return ckan.policy_check(package_list, policy=policy, buffer=buffer, verbose=verbose, raise_error=raise_error)
64
+
65
+
66
+
@@ -0,0 +1,23 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Abstract class to implement specific builders from code
5
+ """
6
+ from abc import ABC
7
+ from typing import List
8
+
9
+ from ckanapi_harvesters.auxiliary.ckan_model import CkanState
10
+ from ckanapi_harvesters.ckan_api import CkanApi
11
+ from ckanapi_harvesters.builder.builder_package import BuilderPackage
12
+
13
+ class SpecificBuilderABC(BuilderPackage, ABC):
14
+ def __init__(self, ckan:CkanApi, package_name:str, organization_name:str, *,
15
+ title: str = None, description: str = None, private: bool = None, state: CkanState = None,
16
+ version: str = None,
17
+ url: str = None, tags: List[str] = None,
18
+ license_name:str=None):
19
+ super().__init__(package_name=package_name, title=title,
20
+ description=description, private=private, state=state, version=version, url=url,
21
+ tags=tags, organization_name=organization_name, license_name=license_name)
22
+ self.ckan_builder.from_ckan(ckan)
23
+
@@ -0,0 +1,20 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Package with helper functions for CKAN requests using pandas DataFrames.
5
+ """
6
+
7
+ from . import ckan_api_params
8
+ from . import ckan_api_0_base
9
+ from . import ckan_api_1_map
10
+ from . import ckan_api_2_readonly
11
+ from . import ckan_api_3_policy
12
+ from . import ckan_api_4_readwrite
13
+ from . import ckan_api_5_manage
14
+ from . import ckan_api
15
+ # from . import deprecated
16
+
17
+ # usage shortcuts
18
+ from ckanapi_harvesters.ckan_api.ckan_api import CkanApi, CkanApiParams, CkanApiABC, CKAN_API_VERSION, CkanApiMap
19
+
20
+
@@ -0,0 +1,11 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Alias to most complete CkanApi implementation
5
+ """
6
+
7
+ from ckanapi_harvesters.ckan_api.ckan_api_0_base import CkanApiABC, CKAN_API_VERSION
8
+ from ckanapi_harvesters.ckan_api.ckan_api_1_map import CkanApiMap
9
+ from ckanapi_harvesters.ckan_api.ckan_api_5_manage import CkanApiManage as CkanApi # alias
10
+ from ckanapi_harvesters.ckan_api.ckan_api_5_manage import CkanApiExtendedParams as CkanApiParams # alias
11
+