ckanapi-harvesters 0.0.0__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ckanapi_harvesters-0.0.0 → ckanapi_harvesters-0.0.3}/LICENSE +1 -1
- {ckanapi_harvesters-0.0.0/src/ckanapi_harvesters.egg-info → ckanapi_harvesters-0.0.3}/PKG-INFO +84 -38
- {ckanapi_harvesters-0.0.0 → ckanapi_harvesters-0.0.3}/README.md +53 -33
- ckanapi_harvesters-0.0.3/pyproject.toml +109 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/__init__.py +37 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/__init__.py +26 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/deprecated.py +82 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/list_records.py +60 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/login.py +163 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/path.py +208 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/auxiliary/urls.py +40 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/__init__.py +40 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_aux.py +20 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_ckan.py +238 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_errors.py +36 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_field.py +122 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_package.py +9 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_resource.py +589 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_resource_init.py +126 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/example/__init__.py +21 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/example/builder_example.py +21 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/mapper_datastore.py +93 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/specific/__init__.py +11 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/ckan_api/__init__.py +20 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/__init__.py +23 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/harvester_init.py +30 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/harvester_model.py +49 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/harvester_params.py +323 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/postgre_params.py +86 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/policies/__init__.py +20 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/policies/data_format_policy.py +269 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/reports/__init__.py +11 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters/reports/admin_report.py +292 -0
- {ckanapi_harvesters-0.0.0 → ckanapi_harvesters-0.0.3/src/ckanapi_harvesters.egg-info}/PKG-INFO +84 -38
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters.egg-info/SOURCES.txt +110 -0
- ckanapi_harvesters-0.0.3/src/ckanapi_harvesters.egg-info/requires.txt +25 -0
- ckanapi_harvesters-0.0.3/tests/test_builder_example_offline.py +41 -0
- ckanapi_harvesters-0.0.0/pyproject.toml +0 -66
- ckanapi_harvesters-0.0.0/src/ckanapi_harvesters/__init__.py +0 -15
- ckanapi_harvesters-0.0.0/src/ckanapi_harvesters/divider/__init__.py +0 -27
- ckanapi_harvesters-0.0.0/src/ckanapi_harvesters/divider/divider.py +0 -53
- ckanapi_harvesters-0.0.0/src/ckanapi_harvesters/divider/divider_error.py +0 -59
- ckanapi_harvesters-0.0.0/src/ckanapi_harvesters/main.py +0 -30
- ckanapi_harvesters-0.0.0/src/ckanapi_harvesters.egg-info/SOURCES.txt +0 -15
- ckanapi_harvesters-0.0.0/src/ckanapi_harvesters.egg-info/requires.txt +0 -1
- ckanapi_harvesters-0.0.0/tests/test_divider.py +0 -11
- ckanapi_harvesters-0.0.0/tests/test_helloworld.py +0 -5
- {ckanapi_harvesters-0.0.0 → ckanapi_harvesters-0.0.3}/setup.cfg +0 -0
- {ckanapi_harvesters-0.0.0 → ckanapi_harvesters-0.0.3}/src/ckanapi_harvesters.egg-info/dependency_links.txt +0 -0
- {ckanapi_harvesters-0.0.0 → ckanapi_harvesters-0.0.3}/src/ckanapi_harvesters.egg-info/top_level.txt +0 -0
{ckanapi_harvesters-0.0.0/src/ckanapi_harvesters.egg-info → ckanapi_harvesters-0.0.3}/PKG-INFO
RENAMED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ckanapi_harvesters
|
|
3
|
-
Version: 0.0.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.0.3
|
|
4
|
+
Summary: Package helping to upload local resources to a CKAN server using the CKAN API. Metadata and the list of resources can be defined in an Excel spreadsheet. The package includes requests to download resources and checks metadata against formatting rules.
|
|
5
|
+
Author-email: ifpen-gp <63413841+ifpen-gp@users.noreply.github.com>
|
|
6
|
+
Maintainer-email: ifpen-gp <63413841+ifpen-gp@users.noreply.github.com>
|
|
5
7
|
License: MIT License
|
|
6
8
|
|
|
7
|
-
Copyright (c)
|
|
9
|
+
Copyright (c) 2026 IFPEN
|
|
8
10
|
|
|
9
11
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
12
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -24,25 +26,91 @@ License: MIT License
|
|
|
24
26
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
25
27
|
SOFTWARE.
|
|
26
28
|
|
|
27
|
-
|
|
29
|
+
Project-URL: Documentation, https://mobidec.github.io/ckanapi_harvesters/index.html
|
|
30
|
+
Project-URL: Repository, https://github.com/Mobidec/ckanapi_harvesters.git
|
|
31
|
+
Project-URL: Issues, https://github.com/Mobidec/ckanapi_harvesters/issues
|
|
32
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
28
33
|
Classifier: License :: OSI Approved :: MIT License
|
|
29
34
|
Classifier: Operating System :: OS Independent
|
|
30
|
-
Requires-Python: >=3.
|
|
35
|
+
Requires-Python: >=3.10
|
|
31
36
|
Description-Content-Type: text/markdown
|
|
32
37
|
Requires-Dist: pytest==8.0.1
|
|
38
|
+
Requires-Dist: myst-nb>=1.3.0
|
|
39
|
+
Requires-Dist: requests>=2.32.5
|
|
40
|
+
Requires-Dist: pandas>=2.3.3
|
|
41
|
+
Requires-Dist: numpy>=2.2.6
|
|
42
|
+
Requires-Dist: openpyxl>=3.1.5
|
|
43
|
+
Provides-Extra: extras
|
|
44
|
+
Requires-Dist: pymongo>=4.8.0; extra == "extras"
|
|
45
|
+
Requires-Dist: psycopg2>=2.9.11; extra == "extras"
|
|
46
|
+
Requires-Dist: shapely>=2.1.2; extra == "extras"
|
|
47
|
+
Requires-Dist: pyproj==3.6.1; extra == "extras"
|
|
48
|
+
Requires-Dist: geopandas>=1.1.2; extra == "extras"
|
|
49
|
+
Provides-Extra: harvesters
|
|
50
|
+
Requires-Dist: pymongo>=4.8.0; extra == "harvesters"
|
|
51
|
+
Requires-Dist: psycopg2>=2.9.11; extra == "harvesters"
|
|
52
|
+
Requires-Dist: shapely>=2.1.2; extra == "harvesters"
|
|
53
|
+
Requires-Dist: pyproj==3.6.1; extra == "harvesters"
|
|
54
|
+
Provides-Extra: alt
|
|
55
|
+
Requires-Dist: bson; extra == "alt"
|
|
56
|
+
Requires-Dist: shapely>=2.1.2; extra == "alt"
|
|
57
|
+
Requires-Dist: pyproj==3.6.1; extra == "alt"
|
|
58
|
+
Requires-Dist: geopandas>=1.1.2; extra == "alt"
|
|
33
59
|
|
|
34
60
|
# ckanapi_harvesters
|
|
35
61
|
|
|
62
|
+
<img src="https://raw.githubusercontent.com/Mobidec/ckanapi_harvesters/refs/heads/main/doc/assets/France2030-Logo-1024x576.png" alt="logo">
|
|
63
|
+
|
|
36
64
|
---
|
|
37
65
|
|
|
38
66
|
## Description
|
|
39
67
|
|
|
40
|
-
This
|
|
68
|
+
This package enables users to benefit from the CKAN API and provides functions which
|
|
69
|
+
realize complex API calls to achieve specific operations.
|
|
70
|
+
In this package, DataStores are returned/inputted as pandas DataFrames.
|
|
71
|
+
The underlying request mechanism uses the requests Session object, which improves performance with multiple requests.
|
|
72
|
+
This package is oriented in the management of CKAN datasets and resources.
|
|
73
|
+
Only a selection of API calls has been implemented in this objective.
|
|
74
|
+
To perform custom API calls, the function `api_action_call` is provided to the end user.
|
|
75
|
+
This package was initially designed to harvest a large DataStores from your local file system.
|
|
76
|
+
It also implements particular requests which can define a large DataStore.
|
|
77
|
+
Large datasets composed of multiple files can be uploaded/downloaded
|
|
78
|
+
through scripts into a single resource or multiple resources.
|
|
79
|
+
For a DataStore, large files are uploaded with a limited number of rows per request.
|
|
80
|
+
|
|
81
|
+
The package is divided in the following sections:
|
|
82
|
+
- `ckan_api`: functions interacting with the CKAN API.
|
|
83
|
+
In addition to the base class which manages basic parameters and requests, API functions are divided as follows:
|
|
84
|
+
1) functions to map the CKAN packages and resources. The remote data structures are mapped in a mirrored data structure.
|
|
85
|
+
CKAN DataStore information, organizations, licenses and resource views are optionally tracked.
|
|
86
|
+
2) functions to query a DataStore or to download file resources.
|
|
87
|
+
3) functions to apply a test a data format policy on a given package.
|
|
88
|
+
4) functions to upsert data to a DataStore or to upload files to a resource.
|
|
89
|
+
5) functions to manage CKAN objects
|
|
90
|
+
(creating, patching, or removing packages, resources, and DataStores).
|
|
91
|
+
These functions enable the user to change the metadata for these objects.
|
|
92
|
+
The other objects are meant to be managed through the API.
|
|
93
|
+
- `policies`: functions to check data format policies. A data format policy defines which attributes
|
|
94
|
+
are mandatory for a package or resource.
|
|
95
|
+
Specific rules can be implemented to restrict package tags to certain lists,
|
|
96
|
+
grouped by [vocabulary](https://docs.ckan.org/en/2.9/maintaining/tag-vocabularies.html).
|
|
97
|
+
Extra key-pair values of packages can be enforced. Resource formats can be restricted to a certain list.
|
|
98
|
+
- `reports`: functions to extract a report on the CKAN database in order to monitor
|
|
99
|
+
package user access rights, resource memory occupation, modification dates and data format policy messages.
|
|
100
|
+
- `harvesters`: this module implements ways to load data from your local machine.
|
|
101
|
+
- `file_formats`: The primary approach is to use files on you local file system. The CSV and SHP (shape file) formats are currently supported.
|
|
102
|
+
- In addition to the file formats, harvesters have been implemented to transfer data from a database.
|
|
103
|
+
This is particularly useful if the database cannot be accessed by CKAN harvester extensions
|
|
104
|
+
because it would only be available locally. MongoDB and PostgreSQL databases are currently supported.
|
|
105
|
+
- `builder`: functions to automate package and resource metadata patching and data uploads or downloads.
|
|
106
|
+
These parameters can be defined in an Excel workbook and files from the local file system can be referred as inputs for the data upload.
|
|
107
|
+
The parameters can also be deduced from an online CKAN package through the API.
|
|
108
|
+
- Example scripts are given in this module, referring to an example Excel workbook.
|
|
109
|
+
The Excel workbook is available in the package and at this link:
|
|
110
|
+
[builder_package_example.xlsx](src/ckanapi_harvesters/builder/builder_package_example.xlsx)
|
|
111
|
+
See also the notebook example in the current documentation here:
|
|
112
|
+
[builder_example_notebook.ipynb](sphinx/notebooks/builder_example_notebook.ipynb).
|
|
41
113
|
|
|
42
|
-
This package includes:
|
|
43
|
-
- A unit test structure based on the [Pytest](https://docs.pytest.org/en/stable/) library
|
|
44
|
-
- Automatic documentation generation based on the [Sphinx](https://www.sphinx-doc.org/en/master/) library
|
|
45
|
-
- A CI/CD pipeline for deploying the Python package to a Python server
|
|
46
114
|
|
|
47
115
|
## Github Pages
|
|
48
116
|
|
|
@@ -130,51 +198,29 @@ For more details on contributing and best practices, please refer to the `CONTRI
|
|
|
130
198
|
|
|
131
199
|
### Installation
|
|
132
200
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
```bash
|
|
136
|
-
pip install ckanapi_harvesters
|
|
137
|
-
```
|
|
138
|
-
|
|
139
|
-
Otherwise, you can specify the package index depending on whether you are in an internal (on-premise) or external (cloud) environment.
|
|
201
|
+
The package and its optional dependencies can be installed with the following command:
|
|
140
202
|
|
|
141
203
|
```bash
|
|
142
|
-
|
|
143
|
-
pip install ckanapi_harvesters --extra-index-url https://nexus.ifpen.fr/repository/fast-it/simple
|
|
144
|
-
|
|
145
|
-
# On Cloud
|
|
146
|
-
pip install ckanapi_harvesters --extra-index-url https://nexus.fastit.dev/repository/fast-it/simple
|
|
204
|
+
pip install ckanapi_harvesters[extras]
|
|
147
205
|
```
|
|
148
206
|
|
|
149
|
-
Alternatively, you can set the package index URL as an environment variable:
|
|
150
|
-
|
|
151
|
-
```bash
|
|
152
|
-
# On-premise
|
|
153
|
-
export PIP_EXTRA_INDEX_URL=https://nexus.fastit.dev/repository/fast-it/simple
|
|
154
|
-
|
|
155
|
-
# On Cloud
|
|
156
|
-
export PIP_EXTRA_INDEX_URL=https://nexus.ifpen.fr/repository/fast-it/simple
|
|
157
|
-
```
|
|
158
207
|
|
|
159
208
|
### Example Usage of the Python Package in Your Code
|
|
160
209
|
|
|
161
210
|
After installation, you can import and use your package and its functions in your Python code:
|
|
162
211
|
|
|
163
212
|
```python
|
|
164
|
-
from
|
|
213
|
+
from ckanapi_harvesters import CkanApi
|
|
165
214
|
|
|
166
|
-
|
|
215
|
+
ckan = CkanApi()
|
|
167
216
|
```
|
|
168
217
|
|
|
169
218
|
To use sub-modules defined in the package:
|
|
170
219
|
|
|
171
220
|
```python
|
|
172
|
-
from
|
|
173
|
-
|
|
174
|
-
a = 4.0
|
|
175
|
-
b = 2.0
|
|
221
|
+
from ckanapi_harvesters.ckan_api import CkanApi
|
|
176
222
|
|
|
177
|
-
|
|
223
|
+
ckan = CkanApi()
|
|
178
224
|
```
|
|
179
225
|
|
|
180
226
|
These instructions will allow you to access the package and utilize its features effectively and in line with your development configuration.
|
|
@@ -1,15 +1,57 @@
|
|
|
1
1
|
# ckanapi_harvesters
|
|
2
2
|
|
|
3
|
+
<img src="https://raw.githubusercontent.com/Mobidec/ckanapi_harvesters/refs/heads/main/doc/assets/France2030-Logo-1024x576.png" alt="logo">
|
|
4
|
+
|
|
3
5
|
---
|
|
4
6
|
|
|
5
7
|
## Description
|
|
6
8
|
|
|
7
|
-
This
|
|
9
|
+
This package enables users to benefit from the CKAN API and provides functions which
|
|
10
|
+
realize complex API calls to achieve specific operations.
|
|
11
|
+
In this package, DataStores are returned/inputted as pandas DataFrames.
|
|
12
|
+
The underlying request mechanism uses the requests Session object, which improves performance with multiple requests.
|
|
13
|
+
This package is oriented in the management of CKAN datasets and resources.
|
|
14
|
+
Only a selection of API calls has been implemented in this objective.
|
|
15
|
+
To perform custom API calls, the function `api_action_call` is provided to the end user.
|
|
16
|
+
This package was initially designed to harvest a large DataStores from your local file system.
|
|
17
|
+
It also implements particular requests which can define a large DataStore.
|
|
18
|
+
Large datasets composed of multiple files can be uploaded/downloaded
|
|
19
|
+
through scripts into a single resource or multiple resources.
|
|
20
|
+
For a DataStore, large files are uploaded with a limited number of rows per request.
|
|
21
|
+
|
|
22
|
+
The package is divided in the following sections:
|
|
23
|
+
- `ckan_api`: functions interacting with the CKAN API.
|
|
24
|
+
In addition to the base class which manages basic parameters and requests, API functions are divided as follows:
|
|
25
|
+
1) functions to map the CKAN packages and resources. The remote data structures are mapped in a mirrored data structure.
|
|
26
|
+
CKAN DataStore information, organizations, licenses and resource views are optionally tracked.
|
|
27
|
+
2) functions to query a DataStore or to download file resources.
|
|
28
|
+
3) functions to apply a test a data format policy on a given package.
|
|
29
|
+
4) functions to upsert data to a DataStore or to upload files to a resource.
|
|
30
|
+
5) functions to manage CKAN objects
|
|
31
|
+
(creating, patching, or removing packages, resources, and DataStores).
|
|
32
|
+
These functions enable the user to change the metadata for these objects.
|
|
33
|
+
The other objects are meant to be managed through the API.
|
|
34
|
+
- `policies`: functions to check data format policies. A data format policy defines which attributes
|
|
35
|
+
are mandatory for a package or resource.
|
|
36
|
+
Specific rules can be implemented to restrict package tags to certain lists,
|
|
37
|
+
grouped by [vocabulary](https://docs.ckan.org/en/2.9/maintaining/tag-vocabularies.html).
|
|
38
|
+
Extra key-pair values of packages can be enforced. Resource formats can be restricted to a certain list.
|
|
39
|
+
- `reports`: functions to extract a report on the CKAN database in order to monitor
|
|
40
|
+
package user access rights, resource memory occupation, modification dates and data format policy messages.
|
|
41
|
+
- `harvesters`: this module implements ways to load data from your local machine.
|
|
42
|
+
- `file_formats`: The primary approach is to use files on you local file system. The CSV and SHP (shape file) formats are currently supported.
|
|
43
|
+
- In addition to the file formats, harvesters have been implemented to transfer data from a database.
|
|
44
|
+
This is particularly useful if the database cannot be accessed by CKAN harvester extensions
|
|
45
|
+
because it would only be available locally. MongoDB and PostgreSQL databases are currently supported.
|
|
46
|
+
- `builder`: functions to automate package and resource metadata patching and data uploads or downloads.
|
|
47
|
+
These parameters can be defined in an Excel workbook and files from the local file system can be referred as inputs for the data upload.
|
|
48
|
+
The parameters can also be deduced from an online CKAN package through the API.
|
|
49
|
+
- Example scripts are given in this module, referring to an example Excel workbook.
|
|
50
|
+
The Excel workbook is available in the package and at this link:
|
|
51
|
+
[builder_package_example.xlsx](src/ckanapi_harvesters/builder/builder_package_example.xlsx)
|
|
52
|
+
See also the notebook example in the current documentation here:
|
|
53
|
+
[builder_example_notebook.ipynb](sphinx/notebooks/builder_example_notebook.ipynb).
|
|
8
54
|
|
|
9
|
-
This package includes:
|
|
10
|
-
- A unit test structure based on the [Pytest](https://docs.pytest.org/en/stable/) library
|
|
11
|
-
- Automatic documentation generation based on the [Sphinx](https://www.sphinx-doc.org/en/master/) library
|
|
12
|
-
- A CI/CD pipeline for deploying the Python package to a Python server
|
|
13
55
|
|
|
14
56
|
## Github Pages
|
|
15
57
|
|
|
@@ -97,51 +139,29 @@ For more details on contributing and best practices, please refer to the `CONTRI
|
|
|
97
139
|
|
|
98
140
|
### Installation
|
|
99
141
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
```bash
|
|
103
|
-
pip install ckanapi_harvesters
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
Otherwise, you can specify the package index depending on whether you are in an internal (on-premise) or external (cloud) environment.
|
|
142
|
+
The package and its optional dependencies can be installed with the following command:
|
|
107
143
|
|
|
108
144
|
```bash
|
|
109
|
-
|
|
110
|
-
pip install ckanapi_harvesters --extra-index-url https://nexus.ifpen.fr/repository/fast-it/simple
|
|
111
|
-
|
|
112
|
-
# On Cloud
|
|
113
|
-
pip install ckanapi_harvesters --extra-index-url https://nexus.fastit.dev/repository/fast-it/simple
|
|
145
|
+
pip install ckanapi_harvesters[extras]
|
|
114
146
|
```
|
|
115
147
|
|
|
116
|
-
Alternatively, you can set the package index URL as an environment variable:
|
|
117
|
-
|
|
118
|
-
```bash
|
|
119
|
-
# On-premise
|
|
120
|
-
export PIP_EXTRA_INDEX_URL=https://nexus.fastit.dev/repository/fast-it/simple
|
|
121
|
-
|
|
122
|
-
# On Cloud
|
|
123
|
-
export PIP_EXTRA_INDEX_URL=https://nexus.ifpen.fr/repository/fast-it/simple
|
|
124
|
-
```
|
|
125
148
|
|
|
126
149
|
### Example Usage of the Python Package in Your Code
|
|
127
150
|
|
|
128
151
|
After installation, you can import and use your package and its functions in your Python code:
|
|
129
152
|
|
|
130
153
|
```python
|
|
131
|
-
from
|
|
154
|
+
from ckanapi_harvesters import CkanApi
|
|
132
155
|
|
|
133
|
-
|
|
156
|
+
ckan = CkanApi()
|
|
134
157
|
```
|
|
135
158
|
|
|
136
159
|
To use sub-modules defined in the package:
|
|
137
160
|
|
|
138
161
|
```python
|
|
139
|
-
from
|
|
140
|
-
|
|
141
|
-
a = 4.0
|
|
142
|
-
b = 2.0
|
|
162
|
+
from ckanapi_harvesters.ckan_api import CkanApi
|
|
143
163
|
|
|
144
|
-
|
|
164
|
+
ckan = CkanApi()
|
|
145
165
|
```
|
|
146
166
|
|
|
147
167
|
These instructions will allow you to access the package and utilize its features effectively and in line with your development configuration.
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "ckanapi_harvesters"
|
|
3
|
+
version = '0.0.3'
|
|
4
|
+
authors = [
|
|
5
|
+
{ name = "ifpen-gp", email = "63413841+ifpen-gp@users.noreply.github.com" },
|
|
6
|
+
]
|
|
7
|
+
maintainers = [
|
|
8
|
+
{ name = "ifpen-gp", email = "63413841+ifpen-gp@users.noreply.github.com" },
|
|
9
|
+
]
|
|
10
|
+
description = "Package helping to upload local resources to a CKAN server using the CKAN API. Metadata and the list of resources can be defined in an Excel spreadsheet. The package includes requests to download resources and checks metadata against formatting rules."
|
|
11
|
+
readme = "README.md"
|
|
12
|
+
requires-python = ">=3.10"
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Programming Language :: Python :: 3.10",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Operating System :: OS Independent",
|
|
17
|
+
]
|
|
18
|
+
dependencies = [
|
|
19
|
+
"pytest == 8.0.1",
|
|
20
|
+
"myst-nb >= 1.3.0",
|
|
21
|
+
# ajouter les dépendances necessaires
|
|
22
|
+
"requests >= 2.32.5",
|
|
23
|
+
"pandas >= 2.3.3",
|
|
24
|
+
"numpy >= 2.2.6",
|
|
25
|
+
"openpyxl >= 3.1.5", # could be an optional extra
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
license = { file = "LICENSE" }
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Documentation = "https://mobidec.github.io/ckanapi_harvesters/index.html"
|
|
32
|
+
Repository = "https://github.com/Mobidec/ckanapi_harvesters.git"
|
|
33
|
+
Issues = "https://github.com/Mobidec/ckanapi_harvesters/issues"
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
extras = [ # default extras
|
|
37
|
+
"pymongo >= 4.8.0", # MongoDB harvester
|
|
38
|
+
"psycopg2 >= 2.9.11", # Postgre harvester
|
|
39
|
+
"shapely >= 2.1.2", # GeoJSON support
|
|
40
|
+
"pyproj == 3.6.1", # GeoJSON transformations (v3.6.1 is compatible with Python 3.10)
|
|
41
|
+
"geopandas >= 1.1.2", # Shapefile reading functions
|
|
42
|
+
]
|
|
43
|
+
harvesters = [ # dependencies for database harvesters
|
|
44
|
+
"pymongo >= 4.8.0", # MongoDB harvester
|
|
45
|
+
"psycopg2 >= 2.9.11", # Postgre harvester
|
|
46
|
+
"shapely >= 2.1.2", # GeoJSON support
|
|
47
|
+
"pyproj == 3.6.1", # GeoJSON transformations (v3.6.1 is compatible with Python 3.10)
|
|
48
|
+
]
|
|
49
|
+
alt = [ # optional packages alternative
|
|
50
|
+
"bson", # bson implementation, independent of pymongo
|
|
51
|
+
"shapely >= 2.1.2", # GeoJSON support
|
|
52
|
+
"pyproj == 3.6.1", # GeoJSON transformations (v3.6.1 is compatible with Python 3.10)
|
|
53
|
+
"geopandas >= 1.1.2", # Shapefile reading functions
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
[tool.setuptools.package-data]
|
|
57
|
+
# non-source files to include in the package
|
|
58
|
+
"ckanapi_harvesters" = [
|
|
59
|
+
"builder/builder_package_example.xlsx",
|
|
60
|
+
"builder/example/package/*.*",
|
|
61
|
+
"builder/example/*.ipynb",
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
[tool.ckanapi_harvesters]
|
|
65
|
+
version = "1.0.2"
|
|
66
|
+
|
|
67
|
+
[tool.setuptools.packages.find]
|
|
68
|
+
where = ["src"]
|
|
69
|
+
|
|
70
|
+
[tool.setuptools_scm]
|
|
71
|
+
version_scheme = "guess-next-dev"
|
|
72
|
+
local_scheme = "dirty-tag"
|
|
73
|
+
|
|
74
|
+
[tool.setuptools]
|
|
75
|
+
py-modules = []
|
|
76
|
+
license-files = []
|
|
77
|
+
|
|
78
|
+
[tool.ruff]
|
|
79
|
+
# Set the maximum line length to 140.
|
|
80
|
+
line-length = 140
|
|
81
|
+
exclude = [
|
|
82
|
+
"tests",
|
|
83
|
+
"sphinx",
|
|
84
|
+
"src",
|
|
85
|
+
]
|
|
86
|
+
|
|
87
|
+
[tool.ruff.lint]
|
|
88
|
+
# See documentation: https://docs.astral.sh/ruff/rules/
|
|
89
|
+
extend-select = [
|
|
90
|
+
"UP",
|
|
91
|
+
"E501",
|
|
92
|
+
"I",
|
|
93
|
+
"B",
|
|
94
|
+
"F",
|
|
95
|
+
"E",
|
|
96
|
+
"N",
|
|
97
|
+
"A",
|
|
98
|
+
"PL",
|
|
99
|
+
"D"
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
[tool.ruff.lint.pydocstyle]
|
|
104
|
+
convention = "numpy" # needs to be changed (rst docstrings)
|
|
105
|
+
|
|
106
|
+
[tool.ruff.format]
|
|
107
|
+
quote-style = "single"
|
|
108
|
+
indent-style = "space"
|
|
109
|
+
docstring-code-format = true
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Package with helper function for CKAN requests using pandas DataFrames.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# builder_file_format_version = "0.0.1"
|
|
8
|
+
try:
|
|
9
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
10
|
+
except ImportError: # Python <3.8
|
|
11
|
+
from importlib_metadata import version, PackageNotFoundError
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
__version__ = version("ckanapi_harvesters")
|
|
15
|
+
except PackageNotFoundError:
|
|
16
|
+
__version__ = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
import os
|
|
20
|
+
self_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
from . import auxiliary
|
|
24
|
+
from . import policies
|
|
25
|
+
from . import harvesters
|
|
26
|
+
from . import ckan_api
|
|
27
|
+
from . import builder
|
|
28
|
+
from . import reports
|
|
29
|
+
|
|
30
|
+
# usage shortcuts
|
|
31
|
+
from .auxiliary import CkanMap
|
|
32
|
+
from .policies import CkanPackageDataFormatPolicy
|
|
33
|
+
from .ckan_api import CkanApi, CKAN_API_VERSION
|
|
34
|
+
from .builder import BUILDER_FILE_FORMAT_VERSION
|
|
35
|
+
from .builder import BuilderPackage, BuilderDataStoreMultiABC, BuilderDataStoreFolder, RequestFileMapperIndexKeys
|
|
36
|
+
|
|
37
|
+
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Package with helper function for CKAN requests using pandas DataFrames.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from . import ckan_defs
|
|
8
|
+
from . import path
|
|
9
|
+
from . import login
|
|
10
|
+
from . import urls
|
|
11
|
+
from . import proxy_config
|
|
12
|
+
from . import external_code_import
|
|
13
|
+
from . import list_records
|
|
14
|
+
from . import ckan_action
|
|
15
|
+
from . import ckan_errors
|
|
16
|
+
from . import ckan_configuration
|
|
17
|
+
from . import ckan_api_key
|
|
18
|
+
from . import ckan_model
|
|
19
|
+
from . import ckan_map
|
|
20
|
+
from . import ckan_vocabulary_deprecated
|
|
21
|
+
from . import ckan_auxiliary
|
|
22
|
+
from . import deprecated
|
|
23
|
+
|
|
24
|
+
from .ckan_map import CkanMap
|
|
25
|
+
from .external_code_import import unlock_external_code_execution
|
|
26
|
+
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Action response common treatments
|
|
5
|
+
"""
|
|
6
|
+
from typing import Union
|
|
7
|
+
import json
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CkanActionResponse:
|
|
13
|
+
"""
|
|
14
|
+
Class which decodes and checks the response of a CKAN request
|
|
15
|
+
"""
|
|
16
|
+
def __init__(self, response: requests.Response, dry_run: bool=False):
|
|
17
|
+
self.response: requests.Response = response # for debug purposes
|
|
18
|
+
self.response_dict:Union[dict,None] = None
|
|
19
|
+
self.status_code:int = response.status_code
|
|
20
|
+
self.success:bool = False
|
|
21
|
+
self.success_json_loads:bool = False
|
|
22
|
+
self.result:Union[dict,None] = None
|
|
23
|
+
self.error_message: Union[None,str,dict] = None
|
|
24
|
+
self.len:Union[int,None] = None
|
|
25
|
+
self.dry_run:bool = dry_run
|
|
26
|
+
|
|
27
|
+
if response.content is None and response.request is None:
|
|
28
|
+
# dry run
|
|
29
|
+
assert(dry_run)
|
|
30
|
+
self.success = True
|
|
31
|
+
self.success_json_loads = False
|
|
32
|
+
self.status_code = 1
|
|
33
|
+
self.error_message = "Request not sent: dry run mode"
|
|
34
|
+
self.len = 0
|
|
35
|
+
else:
|
|
36
|
+
try:
|
|
37
|
+
response_dict = json.loads(response.content.decode())
|
|
38
|
+
self.response_dict = response_dict
|
|
39
|
+
self.success_json_loads = True
|
|
40
|
+
if (response.status_code == 200 and "success" in response_dict.keys() and "result" in response_dict.keys()
|
|
41
|
+
and response_dict["success"]):
|
|
42
|
+
self.success = True
|
|
43
|
+
self.result = response_dict["result"]
|
|
44
|
+
else:
|
|
45
|
+
if "error" in response_dict.keys():
|
|
46
|
+
self.error_message = response_dict["error"]
|
|
47
|
+
else:
|
|
48
|
+
self.error_message = response.content.decode()
|
|
49
|
+
except Exception as json_error:
|
|
50
|
+
self.error_message = f"JSON decode error {json_error} & CKAN error {response.content.decode()}"
|
|
51
|
+
|
|
52
|
+
def __len__(self):
|
|
53
|
+
if self.len is None:
|
|
54
|
+
raise RuntimeError("queried len but does not have len")
|
|
55
|
+
return self.len
|
|
56
|
+
|
|
57
|
+
def default_error(self, ckan) -> "CkanActionError":
|
|
58
|
+
"""
|
|
59
|
+
Raise specific error codes depending on response
|
|
60
|
+
"""
|
|
61
|
+
if self.status_code == 404 and self.success_json_loads and self.error_message["__type"] == "Not Found Error":
|
|
62
|
+
return CkanNotFoundError(ckan, "(Generic)", self)
|
|
63
|
+
elif self.status_code == 403 and self.success_json_loads and self.error_message["__type"] == "Authorization Error":
|
|
64
|
+
return CkanAuthorizationError(ckan, self)
|
|
65
|
+
else:
|
|
66
|
+
return CkanActionError(ckan, self)
|
|
67
|
+
|
|
68
|
+
## action error codes
|
|
69
|
+
class CkanActionError(Exception):
|
|
70
|
+
def __init__(self, ckan, response: CkanActionResponse, display_request:bool=True):
|
|
71
|
+
super().__init__(response.error_message)
|
|
72
|
+
self.response = response
|
|
73
|
+
self.status_code = response.status_code
|
|
74
|
+
if display_request:
|
|
75
|
+
ckan._error_print_debug_response(response.response)
|
|
76
|
+
|
|
77
|
+
def __str__(self):
|
|
78
|
+
return f"Server code [{self.status_code}]: " + super().__str__()
|
|
79
|
+
|
|
80
|
+
class CkanNotFoundError(CkanActionError):
|
|
81
|
+
def __init__(self, ckan, object_type:str, response: CkanActionResponse, display_request:bool=True):
|
|
82
|
+
response.error_message = f"{object_type} not found: {response.error_message}"
|
|
83
|
+
super().__init__(ckan, response, display_request=display_request)
|
|
84
|
+
self.object_type = object_type
|
|
85
|
+
|
|
86
|
+
class CkanAuthorizationError(CkanActionError):
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
class CkanSqlCapabilityError(CkanActionError):
|
|
90
|
+
def __init__(self, ckan, response: CkanActionResponse, display_request:bool=True):
|
|
91
|
+
response.error_message = f"sql capabilities are not activated on CKAN server. See documentation for option ckan.datastore.sqlsearch.enabled"
|
|
92
|
+
super().__init__(ckan, response, display_request=display_request)
|
|
93
|
+
|