ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. ckanapi_harvesters/__init__.py +32 -10
  2. ckanapi_harvesters/auxiliary/__init__.py +26 -0
  3. ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
  4. ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
  5. ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
  6. ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
  7. ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
  8. ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
  9. ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
  10. ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
  11. ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
  12. ckanapi_harvesters/auxiliary/deprecated.py +82 -0
  13. ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
  14. ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
  15. ckanapi_harvesters/auxiliary/list_records.py +60 -0
  16. ckanapi_harvesters/auxiliary/login.py +163 -0
  17. ckanapi_harvesters/auxiliary/path.py +208 -0
  18. ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
  19. ckanapi_harvesters/auxiliary/urls.py +40 -0
  20. ckanapi_harvesters/builder/__init__.py +40 -0
  21. ckanapi_harvesters/builder/builder_aux.py +20 -0
  22. ckanapi_harvesters/builder/builder_ckan.py +238 -0
  23. ckanapi_harvesters/builder/builder_errors.py +36 -0
  24. ckanapi_harvesters/builder/builder_field.py +122 -0
  25. ckanapi_harvesters/builder/builder_package.py +9 -0
  26. ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
  27. ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
  28. ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
  29. ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
  30. ckanapi_harvesters/builder/builder_resource.py +589 -0
  31. ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
  32. ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
  33. ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
  34. ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
  35. ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
  36. ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
  37. ckanapi_harvesters/builder/builder_resource_init.py +126 -0
  38. ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
  39. ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
  40. ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
  41. ckanapi_harvesters/builder/example/__init__.py +21 -0
  42. ckanapi_harvesters/builder/example/builder_example.py +21 -0
  43. ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
  44. ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
  45. ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
  46. ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
  47. ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
  48. ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
  49. ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
  50. ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
  51. ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
  52. ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
  53. ckanapi_harvesters/builder/mapper_datastore.py +93 -0
  54. ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
  55. ckanapi_harvesters/builder/specific/__init__.py +11 -0
  56. ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
  57. ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
  58. ckanapi_harvesters/ckan_api/__init__.py +20 -0
  59. ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
  60. ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
  61. ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
  62. ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
  63. ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
  64. ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
  65. ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
  66. ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
  67. ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
  68. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
  69. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
  70. ckanapi_harvesters/harvesters/__init__.py +23 -0
  71. ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
  72. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
  73. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
  74. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
  75. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
  76. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
  77. ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
  78. ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
  79. ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
  80. ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
  81. ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
  82. ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
  83. ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
  84. ckanapi_harvesters/harvesters/harvester_init.py +30 -0
  85. ckanapi_harvesters/harvesters/harvester_model.py +49 -0
  86. ckanapi_harvesters/harvesters/harvester_params.py +323 -0
  87. ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
  88. ckanapi_harvesters/harvesters/postgre_params.py +86 -0
  89. ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
  90. ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
  91. ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
  92. ckanapi_harvesters/policies/__init__.py +20 -0
  93. ckanapi_harvesters/policies/data_format_policy.py +269 -0
  94. ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
  95. ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
  96. ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
  97. ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
  98. ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
  99. ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
  100. ckanapi_harvesters/reports/__init__.py +11 -0
  101. ckanapi_harvesters/reports/admin_report.py +292 -0
  102. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/METADATA +74 -38
  103. ckanapi_harvesters-0.0.2.dist-info/RECORD +105 -0
  104. ckanapi_harvesters/divider/__init__.py +0 -27
  105. ckanapi_harvesters/divider/divider.py +0 -53
  106. ckanapi_harvesters/divider/divider_error.py +0 -59
  107. ckanapi_harvesters/main.py +0 -30
  108. ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
  109. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/WHEEL +0 -0
  110. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,129 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Shapefile format support
5
+ """
6
+ from typing import Union, Dict
7
+ from types import SimpleNamespace
8
+ import io
9
+ from warnings import warn
10
+ from enum import IntEnum
11
+
12
+ import pandas as pd
13
+ try:
14
+ import geopandas as gpd
15
+ except ImportError:
16
+ gpd = SimpleNamespace(GeoDataFrame=None)
17
+ try:
18
+ import pyproj
19
+ except ImportError:
20
+ pyproj = None
21
+
22
+ from ckanapi_harvesters.auxiliary.list_records import ListRecords
23
+ from ckanapi_harvesters.auxiliary.ckan_model import CkanField
24
+ from ckanapi_harvesters.auxiliary.ckan_errors import FileFormatRequirementError, UnknownTargetCRSError
25
+ from ckanapi_harvesters.auxiliary.ckan_configuration import default_ckan_target_epsg
26
+ from ckanapi_harvesters.harvesters.file_formats.file_format_abc import FileFormatABC
27
+
28
+
29
+ shp_upload_read_file_kwargs = dict(encoding='utf-8')
30
+
31
+ class DownloadedShapeFileConversion(IntEnum):
32
+ CsvWkb = 0
33
+ ShapefileProjection = 2
34
+ ShapefileAsIs = 3
35
+
36
+ class ShapeFileFormat(FileFormatABC):
37
+ def __init__(self, read_file_kwargs=None) -> None:
38
+ if gpd.GeoDataFrame is None:
39
+ raise FileFormatRequirementError("geopandas", "SHP")
40
+ if pyproj is None:
41
+ raise FileFormatRequirementError("pyproj", "SHP")
42
+ if read_file_kwargs is None: read_file_kwargs = shp_upload_read_file_kwargs
43
+ self.read_file_kwargs:dict = read_file_kwargs
44
+ self.require_field_crs:bool = True
45
+ self.download_conversion = DownloadedShapeFileConversion.ShapefileProjection
46
+
47
+ # loading a file before upload ----------------
48
+ def read_file(self, file_path: Union[str,io.StringIO], fields: Union[Dict[str, CkanField],None]) -> Union[pd.DataFrame, ListRecords]:
49
+ # target EPSG = EPSG used in CKAN, source EPSG read from SHP file
50
+ gdf = gpd.read_file(file_path, **self.read_file_kwargs)
51
+ geo_columns = list(gdf.select_dtypes('geometry'))
52
+ for field_name in geo_columns:
53
+ gdf.set_geometry(field_name, inplace=True) # select the current column for geometry computations
54
+ crs_source = gdf.crs
55
+ if field_name in fields.keys():
56
+ field = fields[field_name]
57
+ field_data_type = field.data_type
58
+ epsg_target = field.internal_attrs.epsg_target
59
+ epsg_source_from_params = field.internal_attrs.epsg_source
60
+ else:
61
+ # default field data type, with a generic geometry type and the default EPSG
62
+ epsg_target = default_ckan_target_epsg
63
+ field_data_type = f"geometry(geometry,{epsg_target})"
64
+ fields[field_name] = CkanField(field_name, field_data_type) # TODO: update field data type in caller? user can change data type afterwards?
65
+ epsg_source_from_params = None
66
+ msg = f"PostGIS geometric destination type was not specified and will not be transmitted to CKAN. Assuming default {field_data_type}."
67
+ warn(msg)
68
+ if field_data_type == "geometry" or field_data_type.startswith("geometry("): # and field.internal_attrs.geometry_as_source:
69
+ if epsg_target is not None:
70
+ crs_target = pyproj.CRS.from_epsg(epsg_target)
71
+ if not crs_source == crs_target:
72
+ gdf.to_crs(crs_target, inplace=True)
73
+ elif self.require_field_crs:
74
+ raise UnknownTargetCRSError(crs_source, file_path)
75
+ if epsg_source_from_params is not None:
76
+ crs_source_from_params = pyproj.CRS.from_epsg(epsg_source_from_params)
77
+ if not crs_source_from_params == crs_source:
78
+ msg = f"EPSG in SHP file ({crs_source}) does not match given source EPSG ({crs_source_from_params}). The downloaded result will differ from original format."
79
+ warn(msg)
80
+ else:
81
+ raise NotImplementedError(f"Field {field_data_type} is not implemented or not compatible with geometric representations.")
82
+ df = gdf.to_wkb(hex=True) # converts all geometric fields to WKB and returns a standard DataFrame object
83
+ return df
84
+
85
+ def read_buffer(self, buffer: io.StringIO, fields: Union[Dict[str, CkanField],None]) -> Union[pd.DataFrame, ListRecords]:
86
+ return self.read_file(buffer, fields=fields)
87
+
88
+ # saving a file after download -------------
89
+ def downloaded_df_to_gdf(self, df: pd.DataFrame, *, fields: Union[Dict[str, CkanField],None], context:str=None) -> gpd.GeoDataFrame:
90
+ # NB: target EPSG = CRS in database (required), source = option to recover original CRS
91
+ gdf = gpd.GeoDataFrame(df)
92
+ if self.download_conversion == DownloadedShapeFileConversion.CsvWkb:
93
+ # do not look at CRS information and leave in WKB format
94
+ return gdf
95
+ for field_name in df.columns:
96
+ if field_name in fields.keys():
97
+ field = fields[field_name]
98
+ field_data_type = field.data_type
99
+ if field_data_type == "geometry" or field_data_type.startswith("geometry("):
100
+ crs_target = pyproj.CRS.from_epsg(field.internal_attrs.epsg_target)
101
+ if crs_target is None and self.require_field_crs:
102
+ raise UnknownTargetCRSError(field.internal_attrs.epsg_source, context)
103
+ gdf[field_name] = gpd.geoseries.from_wkb(df[field_name], crs=crs_target)
104
+ if (self.download_conversion == DownloadedShapeFileConversion.ShapefileProjection
105
+ and field.internal_attrs.epsg_source is not None and crs_target is not None
106
+ and not field.internal_attrs.epsg_target == field.internal_attrs.epsg_source):
107
+ crs_source = pyproj.CRS.from_epsg(field.internal_attrs.epsg_source)
108
+ if not crs_source == crs_target:
109
+ gdf.to_crs(crs_source, inplace=True)
110
+ return gdf
111
+
112
+ def write_file(self, df: pd.DataFrame, file_path: str, fields: Union[Dict[str, CkanField],None]) -> None:
113
+ # this writes the shp file and auxiliary shx, dbf, cpg, prj files
114
+ gdf = self.downloaded_df_to_gdf(df, fields=fields, context=file_path)
115
+ gdf.to_file(file_path, driver="ESRI Shapefile")
116
+
117
+ def write_in_memory(self, df: pd.DataFrame, fields: Union[Dict[str, CkanField],None]) -> bytes:
118
+ # how could this work because there are multiple files?
119
+ gdf = self.downloaded_df_to_gdf(df, fields=fields)
120
+ buffer = io.StringIO()
121
+ gdf.to_file(buffer, driver="ESRI Shapefile")
122
+ return buffer.getvalue().encode("utf8")
123
+
124
+ def copy(self):
125
+ dest = ShapeFileFormat(self.read_file_kwargs)
126
+ dest.download_conversion = self.download_conversion
127
+ dest.require_field_crs = self.require_field_crs
128
+ return dest
129
+
@@ -0,0 +1,190 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Harvester base class
5
+ """
6
+ from typing import Union, List, Any, Callable
7
+ from collections import OrderedDict
8
+ from abc import ABC, abstractmethod
9
+
10
+ import pandas as pd
11
+
12
+ from ckanapi_harvesters.auxiliary.error_level_message import ContextErrorLevelMessage
13
+ from ckanapi_harvesters.harvesters.harvester_model import DatasetMetadata, TableMetadata
14
+ from ckanapi_harvesters.harvesters.harvester_params import DatabaseParams, DatasetParams, TableParams
15
+ from ckanapi_harvesters.harvesters.data_cleaner.data_cleaner_abc import CkanDataCleanerABC
16
+
17
+
18
+ class HarvesterConnectABC(ABC):
19
+ def __del__(self):
20
+ self.disconnect()
21
+
22
+ @abstractmethod
23
+ def connect(self, *, cancel_if_connected:bool=True) -> Any:
24
+ raise NotImplementedError()
25
+
26
+ @abstractmethod
27
+ def _finalize_connection(self):
28
+ raise NotImplementedError()
29
+
30
+ @abstractmethod
31
+ def is_connected(self) -> bool:
32
+ raise NotImplementedError()
33
+
34
+ @abstractmethod
35
+ def check_connection(self, *, new_connection:bool=False, raise_error:bool=False) -> Union[None, ContextErrorLevelMessage]:
36
+ raise NotImplementedError()
37
+
38
+ @abstractmethod
39
+ def disconnect(self) -> None:
40
+ raise NotImplementedError()
41
+
42
+ @abstractmethod
43
+ def update_from_ckan(self, ckan):
44
+ raise NotImplementedError()
45
+
46
+
47
+ class DatabaseHarvesterABC(HarvesterConnectABC, ABC):
48
+ def __init__(self, params:DatabaseParams=None):
49
+ if params is None:
50
+ params = DatabaseParams()
51
+ self.params: DatabaseParams = params
52
+
53
+ @abstractmethod
54
+ def copy(self, *, dest=None):
55
+ dest.params = self.params.copy()
56
+ return dest
57
+
58
+ @staticmethod
59
+ @abstractmethod
60
+ def init_from_options_string(options_string:str, *, base_dir:str=None) -> "DatabaseHarvesterABC":
61
+ raise NotImplementedError()
62
+
63
+ def _finalize_connection(self):
64
+ pass
65
+
66
+ ## query methods interface ---------------
67
+ @abstractmethod
68
+ def get_dataset_harvester(self, dataset_name:str) -> "DatasetHarvesterABC":
69
+ raise NotImplementedError()
70
+
71
+ @abstractmethod
72
+ def list_datasets(self, return_metadata:bool=True) -> Union[List[str], OrderedDict[str, DatasetMetadata]]:
73
+ raise NotImplementedError()
74
+
75
+ def update_from_ckan(self, ckan):
76
+ self.params._update_from_ckan(ckan)
77
+
78
+
79
+ class DatasetHarvesterABC(DatabaseHarvesterABC, ABC):
80
+ def __init__(self, params:DatasetParams=None):
81
+ if params is None:
82
+ params = DatasetParams()
83
+ super().__init__(params)
84
+ self.params: DatasetParams = params
85
+ self.dataset_metadata: Union[DatasetMetadata, None] = None
86
+
87
+ def __del__(self):
88
+ self.disconnect()
89
+
90
+ @abstractmethod
91
+ def _finalize_connection(self):
92
+ raise NotImplementedError()
93
+
94
+ @abstractmethod
95
+ def copy(self, *, dest=None):
96
+ super().copy(dest=dest)
97
+ dest.dataset_metadata = self.dataset_metadata.copy() if self.dataset_metadata is not None else None
98
+ return dest
99
+
100
+ @staticmethod
101
+ @abstractmethod
102
+ def init_from_options_string(options_string:str, *, base_dir:str=None) -> "DatasetHarvesterABC":
103
+ raise NotImplementedError()
104
+
105
+ ## metadata interface ---------------
106
+ @abstractmethod
107
+ def query_dataset_metadata(self, cancel_if_present:bool=True) -> DatasetMetadata:
108
+ self.connect()
109
+ if cancel_if_present and self.dataset_metadata is not None:
110
+ return self.dataset_metadata
111
+ else:
112
+ self.dataset_metadata = DatasetMetadata()
113
+ # user needs to complete here
114
+ self.dataset_metadata.tables = self.list_tables(return_metadata=True)
115
+ return self.dataset_metadata
116
+
117
+ def clean_dataset_metadata(self) -> DatasetMetadata:
118
+ return self.query_dataset_metadata().copy()
119
+
120
+ ## query methods interface ---------------
121
+ @abstractmethod
122
+ def get_table_harvester(self, table_name:str) -> "TableHarvesterABC":
123
+ raise NotImplementedError()
124
+
125
+ @abstractmethod
126
+ def list_tables(self, return_metadata:bool=True) -> Union[List[str], OrderedDict[str, TableMetadata]]:
127
+ raise NotImplementedError()
128
+
129
+
130
+ class TableHarvesterABC(DatasetHarvesterABC, ABC):
131
+ _default_upload_fun: Union[Callable[[Any], pd.DataFrame], None] = None
132
+ _default_primary_key: Union[List[str], None] = None
133
+
134
+ def __init__(self, params:TableParams=None):
135
+ if params is None:
136
+ params = TableParams()
137
+ super().__init__(params)
138
+ self.params: TableParams = params
139
+ self.table_metadata: Union[TableMetadata, None] = None
140
+
141
+ def __del__(self):
142
+ self.disconnect()
143
+
144
+ @abstractmethod
145
+ def copy(self, *, dest=None):
146
+ super().copy(dest=dest)
147
+ dest.table_metadata = self.table_metadata.copy() if self.table_metadata is not None else None
148
+ return dest
149
+
150
+ @staticmethod
151
+ @abstractmethod
152
+ def init_from_options_string(options_string:str, *, base_dir:str=None, file_url_attr:str=None) -> "TableHarvesterABC":
153
+ raise NotImplementedError()
154
+
155
+ ## metadata interface ---------------
156
+ @abstractmethod
157
+ def query_table_metadata(self, cancel_if_present:bool=True) -> TableMetadata:
158
+ self.connect()
159
+ if cancel_if_present and self.table_metadata is not None:
160
+ return self.table_metadata
161
+ else:
162
+ self.table_metadata = TableMetadata()
163
+ # user needs to complete here
164
+ return self.table_metadata
165
+
166
+ def clean_table_metadata(self) -> TableMetadata:
167
+ return self.query_table_metadata().copy()
168
+
169
+ @classmethod
170
+ def get_default_df_upload_fun(cls) -> Union[Callable[[Any], pd.DataFrame], None]:
171
+ return cls._default_upload_fun
172
+
173
+ def get_default_data_cleaner(self) -> Union[CkanDataCleanerABC, None]:
174
+ return None
175
+
176
+ @abstractmethod
177
+ def get_default_primary_key(self) -> List[str]:
178
+ return []
179
+
180
+ ## query methods interface ---------------
181
+ @abstractmethod
182
+ def list_queries(self, *, new_connection:bool=False) -> List[Any]:
183
+ self.connect(cancel_if_connected=not new_connection)
184
+ raise NotImplementedError()
185
+
186
+ @abstractmethod
187
+ def query_data(self, query:Any) -> Union[List[dict], pd.DataFrame]:
188
+ raise NotImplementedError()
189
+
190
+
@@ -0,0 +1,31 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Errors specific to harvesting data
5
+ """
6
+
7
+ from ckanapi_harvesters.auxiliary.ckan_errors import RequirementError
8
+ from ckanapi_harvesters.harvesters.data_cleaner.data_cleaner_errors import CleanError, CleanerRequirementError # alias
9
+
10
+
11
+ class HarvestMethodRequiredError(Exception):
12
+ def __init__(self):
13
+ super().__init__("The harvesting method argument --harvester is required.")
14
+
15
+ class HarvesterArgumentError(Exception):
16
+ pass
17
+
18
+ class HarvesterArgumentRequiredError(HarvesterArgumentError):
19
+ def __init__(self, argument:str, harvest_method:str, help:str=None):
20
+ if help is None: help = ""
21
+ super().__init__(f"The argument {argument} is required for harvest method {harvest_method}. " + help)
22
+
23
+ class HarvesterRequirementError(RequirementError):
24
+ def __init__(self, requirement:str, harvest_method:str):
25
+ super().__init__(f"The package {requirement} is required for this harvester ({harvest_method}).")
26
+
27
+ class ResourceNotFoundError(Exception):
28
+ def __init__(self, resource_type:str, table_name:str, host:str):
29
+ super().__init__(f"{resource_type} {table_name} was not found on host ({host}).")
30
+
31
+
@@ -0,0 +1,30 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Harvester initialization from the options_string arguments
5
+ """
6
+ from ckanapi_harvesters.harvesters.harvester_abc import TableHarvesterABC, DatasetHarvesterABC
7
+ from ckanapi_harvesters.harvesters.harvester_params import TableParams
8
+ from ckanapi_harvesters.harvesters.harvester_params import DatasetParams
9
+ from ckanapi_harvesters.harvesters.postgre_harvester import TableHarvesterPostgre, DatasetHarvesterPostgre
10
+ from ckanapi_harvesters.harvesters.pymongo_harvester import TableHarvesterMongoCollection, DatasetHarvesterMongoDatabase
11
+
12
+
13
+ def init_table_harvester_from_options_string(options_string:str, *, file_url_attr:str, base_dir:str=None) -> TableHarvesterABC:
14
+ harvest_method = TableParams.parse_harvest_method(options_string)
15
+ if harvest_method == "pymongo":
16
+ return TableHarvesterMongoCollection.init_from_options_string(options_string, file_url_attr=file_url_attr, base_dir=base_dir)
17
+ elif harvest_method == "postgre":
18
+ return TableHarvesterPostgre.init_from_options_string(options_string, file_url_attr=file_url_attr, base_dir=base_dir)
19
+ else:
20
+ raise NotImplementedError(f"harvester method {harvest_method} not implemented")
21
+
22
+
23
+ def init_dataset_harvester_from_options_string(options_string:str, *, base_dir:str=None) -> DatasetHarvesterABC:
24
+ harvest_method = DatasetParams.parse_harvest_method(options_string)
25
+ if harvest_method == "pymongo":
26
+ return DatasetHarvesterMongoDatabase.init_from_options_string(options_string, base_dir=base_dir)
27
+ elif harvest_method == "postgre":
28
+ return DatasetHarvesterPostgre.init_from_options_string(options_string, base_dir=base_dir)
29
+ else:
30
+ raise NotImplementedError(f"harvester method {harvest_method} not implemented")
@@ -0,0 +1,49 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Harvester base class
5
+ """
6
+ from typing import Union, List
7
+ from collections import OrderedDict
8
+ import copy
9
+
10
+ from ckanapi_harvesters.auxiliary.ckan_auxiliary import CkanFieldInternalAttrs
11
+
12
+
13
+ class FieldMetadata:
14
+ def __init__(self):
15
+ self.name:str = ""
16
+ self.description:Union[str,None] = None
17
+ self.label:Union[str,None] = None
18
+ self.data_type:Union[str,None] = None
19
+ self.is_index:Union[bool,None] = None
20
+ self.uniquekey:Union[bool,None] = None
21
+ self.notnull:Union[bool,None] = None
22
+ self.internal_attrs: CkanFieldInternalAttrs = CkanFieldInternalAttrs()
23
+ self.harvester_attrs: dict = {}
24
+
25
+ def copy(self):
26
+ return copy.deepcopy(self)
27
+
28
+
29
+ class TableMetadata:
30
+ def __init__(self):
31
+ self.name: str = ""
32
+ self.primary_key: Union[List[str],None] = None
33
+ self.indexes: Union[List[str],None] = None
34
+ self.unique_keys: Union[List[str],None] = None
35
+ self.description: Union[str,None] = None
36
+ self.fields: Union[OrderedDict[str,FieldMetadata],None] = None
37
+
38
+ def copy(self):
39
+ return copy.deepcopy(self)
40
+
41
+
42
+ class DatasetMetadata:
43
+ def __init__(self):
44
+ self.name:str = ""
45
+ self.description:Union[str,None] = None
46
+ self.tables:Union[OrderedDict[str,TableMetadata],None] = None
47
+
48
+ def copy(self):
49
+ return copy.deepcopy(self)