ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanapi_harvesters/__init__.py +32 -10
- ckanapi_harvesters/auxiliary/__init__.py +26 -0
- ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
- ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
- ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
- ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
- ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
- ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
- ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
- ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
- ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
- ckanapi_harvesters/auxiliary/deprecated.py +82 -0
- ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
- ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
- ckanapi_harvesters/auxiliary/list_records.py +60 -0
- ckanapi_harvesters/auxiliary/login.py +163 -0
- ckanapi_harvesters/auxiliary/path.py +208 -0
- ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
- ckanapi_harvesters/auxiliary/urls.py +40 -0
- ckanapi_harvesters/builder/__init__.py +40 -0
- ckanapi_harvesters/builder/builder_aux.py +20 -0
- ckanapi_harvesters/builder/builder_ckan.py +238 -0
- ckanapi_harvesters/builder/builder_errors.py +36 -0
- ckanapi_harvesters/builder/builder_field.py +122 -0
- ckanapi_harvesters/builder/builder_package.py +9 -0
- ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
- ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
- ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
- ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
- ckanapi_harvesters/builder/builder_resource.py +589 -0
- ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
- ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
- ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
- ckanapi_harvesters/builder/builder_resource_init.py +126 -0
- ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
- ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
- ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
- ckanapi_harvesters/builder/example/__init__.py +21 -0
- ckanapi_harvesters/builder/example/builder_example.py +21 -0
- ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
- ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
- ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
- ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
- ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
- ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
- ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
- ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
- ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
- ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
- ckanapi_harvesters/builder/mapper_datastore.py +93 -0
- ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
- ckanapi_harvesters/builder/specific/__init__.py +11 -0
- ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
- ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
- ckanapi_harvesters/ckan_api/__init__.py +20 -0
- ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
- ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
- ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
- ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
- ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
- ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
- ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
- ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
- ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
- ckanapi_harvesters/harvesters/__init__.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
- ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
- ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
- ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
- ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
- ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
- ckanapi_harvesters/harvesters/harvester_init.py +30 -0
- ckanapi_harvesters/harvesters/harvester_model.py +49 -0
- ckanapi_harvesters/harvesters/harvester_params.py +323 -0
- ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
- ckanapi_harvesters/harvesters/postgre_params.py +86 -0
- ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
- ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
- ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
- ckanapi_harvesters/policies/__init__.py +20 -0
- ckanapi_harvesters/policies/data_format_policy.py +269 -0
- ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
- ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
- ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
- ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
- ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
- ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
- ckanapi_harvesters/reports/__init__.py +11 -0
- ckanapi_harvesters/reports/admin_report.py +292 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
- ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
- ckanapi_harvesters/divider/__init__.py +0 -27
- ckanapi_harvesters/divider/divider.py +0 -53
- ckanapi_harvesters/divider/divider_error.py +0 -59
- ckanapi_harvesters/main.py +0 -30
- ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Section of the package dedicated to the harvesting of data using APIs, or databases
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from . import file_formats
|
|
8
|
+
from . import data_cleaner
|
|
9
|
+
|
|
10
|
+
from . import harvester_errors
|
|
11
|
+
from . import harvester_model
|
|
12
|
+
from . import harvester_params
|
|
13
|
+
from . import harvester_abc
|
|
14
|
+
from . import pymongo_data_cleaner
|
|
15
|
+
from . import pymongo_params
|
|
16
|
+
from . import pymongo_harvester
|
|
17
|
+
from . import postgre_params
|
|
18
|
+
from . import postgre_harvester
|
|
19
|
+
from . import harvester_init
|
|
20
|
+
|
|
21
|
+
# usage shortcuts
|
|
22
|
+
|
|
23
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Section of the package dedicated to the conversion of records to a CKAN-compatible format.
|
|
5
|
+
This is linked to the data harvesters.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from . import data_cleaner_errors
|
|
9
|
+
from . import data_cleaner_abc
|
|
10
|
+
from . import data_cleaner_upload_1_basic
|
|
11
|
+
from . import data_cleaner_upload_2_geom
|
|
12
|
+
from . import data_cleaner_upload
|
|
13
|
+
|
|
14
|
+
# usage shortcuts
|
|
15
|
+
from ckanapi_harvesters.harvesters.data_cleaner.data_cleaner_upload import CkanDataCleanerUpload
|
|
16
|
+
|
|
17
|
+
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Functions to clean data before upload.
|
|
5
|
+
"""
|
|
6
|
+
from typing import Union, List, Any, Dict, Set, Type, Tuple
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from collections import OrderedDict
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from ckanapi_harvesters.auxiliary.ckan_model import CkanField
|
|
13
|
+
from ckanapi_harvesters.auxiliary.ckan_auxiliary import assert_or_raise
|
|
14
|
+
from ckanapi_harvesters.auxiliary.ckan_auxiliary import dict_recursive_update
|
|
15
|
+
|
|
16
|
+
non_finite_authorized_types = {"numeric", "float4", "float8", "float2"}
|
|
17
|
+
real_number_types = non_finite_authorized_types
|
|
18
|
+
dtype_mapper = {
|
|
19
|
+
"float64": "numeric",
|
|
20
|
+
"int64": "numeric",
|
|
21
|
+
"datetime64[ns]": "timestamp",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CkanDataCleanerABC(ABC):
|
|
26
|
+
"""
|
|
27
|
+
Data cleaner abstract base class.
|
|
28
|
+
|
|
29
|
+
A table is defined by a list of fields with a data type.
|
|
30
|
+
Each row can specify the value of all/some fields.
|
|
31
|
+
When a value is nested (dictionary or list), the functions iterate over the values of these elements with a recursive implementation.
|
|
32
|
+
These elements are called sub-values.
|
|
33
|
+
"""
|
|
34
|
+
def __init__(self):
|
|
35
|
+
# options
|
|
36
|
+
self.param_enable:bool = True # global activation flag
|
|
37
|
+
self.param_replace_forbidden:bool = False # option to replace all other forbidden values (Infs) by None
|
|
38
|
+
self.param_cast_types:bool = True # option to cast to strings fields which have text data type
|
|
39
|
+
self.param_apply_field_subs:bool = True # option to apply suggested field renamings (True by default because these are suggested only when necessary)
|
|
40
|
+
self.param_apply_field_changes:bool = False # option to apply suggested field type changes
|
|
41
|
+
self.param_raise_error:bool = False # recommended: do not raise an error: the CKAN server will
|
|
42
|
+
self.param_create_new_fields:bool = True # option to enable the requests to create missing fields in the CKAN DataStore (this requires the specific function to be called)
|
|
43
|
+
self.param_verbose:bool = True
|
|
44
|
+
self.param_field_subs:Dict[str,str] = {} # user-imposed field name substitutions
|
|
45
|
+
self.param_field_primary_key:Union[List[str],None] = None
|
|
46
|
+
# outputs
|
|
47
|
+
self.fields_encountered:OrderedDict[str,None] = OrderedDict()
|
|
48
|
+
self.warnings:Dict[str,Set[str]] = {}
|
|
49
|
+
self.fields_new:OrderedDict[str,CkanField] = OrderedDict()
|
|
50
|
+
self.field_changes:Dict[str,CkanField] = {}
|
|
51
|
+
self.field_subs:Dict[str, str] = {}
|
|
52
|
+
self.field_subs_path:Dict[str, str] = {}
|
|
53
|
+
self.field_suggested_primary_key:Union[List[str],None] = None
|
|
54
|
+
self.field_suggested_index:Set[str] = set()
|
|
55
|
+
self._new_columns_in_row: Dict[str,Any] = None # is initialized at each row
|
|
56
|
+
|
|
57
|
+
def clear_outputs_new_dataframe(self):
|
|
58
|
+
self.fields_encountered = OrderedDict()
|
|
59
|
+
self.warnings = {}
|
|
60
|
+
self.fields_new = OrderedDict()
|
|
61
|
+
self.field_changes = {}
|
|
62
|
+
self.field_subs = {}
|
|
63
|
+
self.field_subs_path = {}
|
|
64
|
+
self.field_suggested_primary_key = self.param_field_primary_key
|
|
65
|
+
self.field_suggested_index = set()
|
|
66
|
+
self._new_columns_in_row = None
|
|
67
|
+
|
|
68
|
+
def clear_all_outputs(self):
|
|
69
|
+
"""
|
|
70
|
+
Some values must not be cleared for each DataFrame upload.
|
|
71
|
+
The cleaner is stateful for certain values cleared only here.
|
|
72
|
+
"""
|
|
73
|
+
self.clear_outputs_new_dataframe()
|
|
74
|
+
|
|
75
|
+
@abstractmethod
|
|
76
|
+
def copy(self, dest=None):
|
|
77
|
+
dest.param_enable = self.param_enable
|
|
78
|
+
dest.param_replace_forbidden = self.param_replace_forbidden
|
|
79
|
+
dest.param_apply_field_subs = self.param_apply_field_subs
|
|
80
|
+
dest.param_apply_field_changes = self.param_apply_field_changes
|
|
81
|
+
dest.param_raise_error = self.param_raise_error
|
|
82
|
+
dest.param_create_new_fields = self.param_create_new_fields
|
|
83
|
+
dest.param_verbose = self.param_verbose
|
|
84
|
+
dest.clear_outputs_new_dataframe()
|
|
85
|
+
return dest
|
|
86
|
+
|
|
87
|
+
def __copy__(self):
|
|
88
|
+
return self.copy()
|
|
89
|
+
|
|
90
|
+
## Field type detection ------------------
|
|
91
|
+
def _detect_standard_field_bypass(self, field_name: str, values: Union[Any, pd.Series]) -> Union[CkanField,None]:
|
|
92
|
+
"""
|
|
93
|
+
Auxiliary function of create_new_field to detect field type used to bypass the default criteria.
|
|
94
|
+
"""
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
def _detect_non_standard_field(self, field_name: str, values: Union[Any, pd.Series]) -> CkanField:
|
|
98
|
+
"""
|
|
99
|
+
Auxiliary function of create_new_field to detect field type used if the default criteria did not match any specific case.
|
|
100
|
+
"""
|
|
101
|
+
return CkanField(field_name, "text")
|
|
102
|
+
|
|
103
|
+
@abstractmethod
|
|
104
|
+
def create_new_field(self, field_name:str, values: Union[Any, pd.Series]) -> CkanField:
|
|
105
|
+
"""
|
|
106
|
+
This method adds a new field definition
|
|
107
|
+
"""
|
|
108
|
+
raise NotImplementedError()
|
|
109
|
+
|
|
110
|
+
@abstractmethod
|
|
111
|
+
def detect_field_types_and_subs(self, records: Union[List[dict], pd.DataFrame]) -> OrderedDict[str, str]:
|
|
112
|
+
"""
|
|
113
|
+
This function detects the initial fields and necessary field renamings
|
|
114
|
+
"""
|
|
115
|
+
raise NotImplementedError()
|
|
116
|
+
|
|
117
|
+
## Records cleansing -------------
|
|
118
|
+
@abstractmethod
|
|
119
|
+
def clean_value_field(self, value: Any, field: CkanField) -> Any:
|
|
120
|
+
"""
|
|
121
|
+
Cleaning of a value. A value is directly the value of a cell.
|
|
122
|
+
"""
|
|
123
|
+
raise NotImplementedError()
|
|
124
|
+
|
|
125
|
+
def _replace_standard_value_bypass(self, value: Any, field: CkanField, *, field_data_type: str) -> Tuple[Any, bool]:
|
|
126
|
+
"""
|
|
127
|
+
Auxiliary function of clean_value_field to perform type castings/checks used to bypass the default criteria.
|
|
128
|
+
"""
|
|
129
|
+
return None, False
|
|
130
|
+
|
|
131
|
+
def _replace_non_standard_value(self, value: Any, field: CkanField, *, field_data_type: str) -> Any:
|
|
132
|
+
"""
|
|
133
|
+
Auxiliary function of clean_value_field to perform type castings/checks used if none of the default criteria were met.
|
|
134
|
+
"""
|
|
135
|
+
return value
|
|
136
|
+
|
|
137
|
+
@abstractmethod
|
|
138
|
+
def _clean_subvalue(self, subvalue: Any, field: CkanField, path: str, level: int,
|
|
139
|
+
*, field_data_type: str) -> Any:
|
|
140
|
+
"""
|
|
141
|
+
Cleaning of a subvalue. A subvalue is a value within a nested cell.
|
|
142
|
+
"""
|
|
143
|
+
raise NotImplementedError()
|
|
144
|
+
|
|
145
|
+
def _replace_standard_subvalue_bypass(self, subvalue:Any, field:CkanField, path:str, level:int,
|
|
146
|
+
*, field_data_type:str) -> Tuple[Any,bool]:
|
|
147
|
+
"""
|
|
148
|
+
Auxiliary function of _clean_subvalue to perform type castings/checks used to bypass the default criteria.
|
|
149
|
+
"""
|
|
150
|
+
return None, False
|
|
151
|
+
|
|
152
|
+
def _replace_non_standard_subvalue(self, subvalue:Any, field:CkanField, path:str, level:int,
|
|
153
|
+
*, field_data_type:str) -> Any:
|
|
154
|
+
"""
|
|
155
|
+
Auxiliary function of _clean_subvalue to perform type castings/checks used if none of the default criteria were met.
|
|
156
|
+
"""
|
|
157
|
+
return subvalue
|
|
158
|
+
|
|
159
|
+
def _add_field_from_path(self, path:str, data_type:str, new_field_name:str=None,
|
|
160
|
+
suggest_index:bool=True, notes:str=None) -> None:
|
|
161
|
+
"""
|
|
162
|
+
Auxiliary method to define a new column from a nested object.
|
|
163
|
+
"""
|
|
164
|
+
if new_field_name is None:
|
|
165
|
+
new_field_name = path.replace(".", "_")
|
|
166
|
+
assert_or_raise(new_field_name not in self.fields_encountered, KeyError(f"{new_field_name} already exists and cannot be replaced"))
|
|
167
|
+
self.fields_new[new_field_name] = CkanField(new_field_name, data_type, notes=notes)
|
|
168
|
+
self.field_subs_path[path] = new_field_name
|
|
169
|
+
if suggest_index:
|
|
170
|
+
self.field_suggested_index.add(new_field_name)
|
|
171
|
+
self.fields_encountered[new_field_name] = None
|
|
172
|
+
|
|
173
|
+
@abstractmethod
|
|
174
|
+
def clean_records(self, records: Union[List[dict], pd.DataFrame], known_fields:Union[OrderedDict[str, CkanField], None],
|
|
175
|
+
*, inplace:bool=False) -> Union[List[dict], pd.DataFrame]:
|
|
176
|
+
"""
|
|
177
|
+
Main function to clean a list of records.
|
|
178
|
+
|
|
179
|
+
:param records:
|
|
180
|
+
:param known_fields:
|
|
181
|
+
:param inplace:
|
|
182
|
+
:return:
|
|
183
|
+
"""
|
|
184
|
+
raise NotImplementedError()
|
|
185
|
+
|
|
186
|
+
@abstractmethod
|
|
187
|
+
def _clean_final_steps(self, records: Union[List[dict], pd.DataFrame], fields:Union[OrderedDict[str, CkanField], None],
|
|
188
|
+
known_fields:Union[OrderedDict[str, CkanField], None]) -> Union[List[dict], pd.DataFrame]:
|
|
189
|
+
"""
|
|
190
|
+
Method called at the end of clean_records
|
|
191
|
+
"""
|
|
192
|
+
raise NotImplementedError()
|
|
193
|
+
|
|
194
|
+
def _extra_checks(self, records: Union[List[dict], pd.DataFrame], fields:Union[OrderedDict[str, CkanField], None]) -> None:
|
|
195
|
+
"""
|
|
196
|
+
Method called at the end of _clean_final_steps
|
|
197
|
+
"""
|
|
198
|
+
pass
|
|
199
|
+
|
|
200
|
+
### post-treatments -------------
|
|
201
|
+
def apply_new_fields_request(self, ckan, resource_id:str):
|
|
202
|
+
"""
|
|
203
|
+
This method performs the field patch if a new field was detected.
|
|
204
|
+
Call before upsert.
|
|
205
|
+
"""
|
|
206
|
+
if self.param_create_new_fields and len(self.fields_new) > 0:
|
|
207
|
+
ckan.datastore_field_patch(resource_id, fields_update=self.fields_new)
|
|
208
|
+
|
|
209
|
+
def merge_field_changes(self, fields:List[dict]=None) -> List[dict]:
|
|
210
|
+
"""
|
|
211
|
+
This method merges the fields argument of a datastore_create with the fields detected by the data cleaner.
|
|
212
|
+
Fields already defined in the fields argument are not overwritten.
|
|
213
|
+
"""
|
|
214
|
+
if fields is not None:
|
|
215
|
+
fields_dict = OrderedDict([(field_dict["id"], CkanField.from_ckan_dict(field_dict)) for field_dict in fields])
|
|
216
|
+
else:
|
|
217
|
+
fields_dict = OrderedDict()
|
|
218
|
+
if len(self.fields_new) > 0:
|
|
219
|
+
for field_name, field_info in self.fields_new.items():
|
|
220
|
+
if field_name not in fields_dict.keys():
|
|
221
|
+
fields_dict[field_name] = field_info
|
|
222
|
+
else:
|
|
223
|
+
# was not new? => merge changes?
|
|
224
|
+
fields_dict[field_name] = fields_dict[field_name].merge(field_info)
|
|
225
|
+
raise RuntimeError()
|
|
226
|
+
# fields_dict = dict_recursive_update(fields_dict, {field_info.name: field_info.to_ckan_dict() for field_info in self.fields_new.values()})
|
|
227
|
+
if self.param_apply_field_changes:
|
|
228
|
+
if len(self.field_changes) > 0:
|
|
229
|
+
for field_name, field_info in self.field_changes.items():
|
|
230
|
+
if field_name not in fields_dict.keys():
|
|
231
|
+
# new? => create?
|
|
232
|
+
fields_dict[field_name] = field_info
|
|
233
|
+
raise RuntimeError()
|
|
234
|
+
else:
|
|
235
|
+
fields_dict[field_name] = fields_dict[field_name].merge(field_info)
|
|
236
|
+
# fields_dict = dict_recursive_update(fields_dict, {field_info.name: field_info.to_ckan_dict() for field_info in self.field_changes.values()})
|
|
237
|
+
return [field_info.to_ckan_dict() for field_info in fields_dict.values()]
|
|
238
|
+
else:
|
|
239
|
+
return fields
|
|
240
|
+
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Error codes for data cleaner
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from ckanapi_harvesters.auxiliary.ckan_errors import RequirementError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CleanError(Exception):
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
class CleanerRequirementError(RequirementError):
|
|
14
|
+
def __init__(self, requirement:str, data_type:str):
|
|
15
|
+
super().__init__(f"The package {requirement} is required to clean using this data type ({data_type}).")
|
|
16
|
+
|
|
17
|
+
class UnexpectedGeometryError(Exception):
|
|
18
|
+
def __init__(self, found_type:str, expected_type:str):
|
|
19
|
+
super().__init__(f"Unexpected GeoJSON type: {found_type}. Expected {expected_type}.")
|
|
20
|
+
|
|
21
|
+
class FormatError(Exception):
|
|
22
|
+
def __init__(self, data:str, data_type:str):
|
|
23
|
+
super().__init__(f"Format not recognized for type {data_type}: {data}.")
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Alias
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from ckanapi_harvesters.harvesters.data_cleaner.data_cleaner_upload_1_basic import _pd_series_type_detect
|
|
8
|
+
from ckanapi_harvesters.harvesters.data_cleaner.data_cleaner_upload_2_geom import CkanDataCleanerUploadGeom as CkanDataCleanerUpload # alias
|
|
9
|
+
|