ckanapi-harvesters 0.0.0__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. {ckanapi_harvesters-0.0.0 → ckanapi_harvesters-0.0.2}/LICENSE +1 -1
  2. {ckanapi_harvesters-0.0.0/src/ckanapi_harvesters.egg-info → ckanapi_harvesters-0.0.2}/PKG-INFO +74 -38
  3. {ckanapi_harvesters-0.0.0 → ckanapi_harvesters-0.0.2}/README.md +53 -33
  4. ckanapi_harvesters-0.0.2/pyproject.toml +96 -0
  5. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/__init__.py +37 -0
  6. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/__init__.py +26 -0
  7. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
  8. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
  9. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
  10. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
  11. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
  12. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
  13. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
  14. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
  15. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
  16. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/deprecated.py +82 -0
  17. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
  18. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
  19. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/list_records.py +60 -0
  20. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/login.py +163 -0
  21. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/path.py +208 -0
  22. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
  23. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/auxiliary/urls.py +40 -0
  24. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/__init__.py +40 -0
  25. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_aux.py +20 -0
  26. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_ckan.py +238 -0
  27. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_errors.py +36 -0
  28. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_field.py +122 -0
  29. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_package.py +9 -0
  30. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
  31. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
  32. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
  33. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
  34. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_resource.py +589 -0
  35. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
  36. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
  37. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
  38. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
  39. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
  40. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
  41. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_resource_init.py +126 -0
  42. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
  43. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
  44. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
  45. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/example/__init__.py +21 -0
  46. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/example/builder_example.py +21 -0
  47. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
  48. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
  49. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
  50. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
  51. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
  52. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
  53. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
  54. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
  55. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
  56. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
  57. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/mapper_datastore.py +93 -0
  58. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
  59. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/specific/__init__.py +11 -0
  60. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
  61. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
  62. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/ckan_api/__init__.py +20 -0
  63. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
  64. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
  65. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
  66. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
  67. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
  68. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
  69. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
  70. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
  71. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
  72. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
  73. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
  74. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/__init__.py +23 -0
  75. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
  76. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
  77. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
  78. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
  79. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
  80. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
  81. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
  82. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
  83. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
  84. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
  85. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
  86. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
  87. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
  88. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/harvester_init.py +30 -0
  89. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/harvester_model.py +49 -0
  90. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/harvester_params.py +323 -0
  91. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
  92. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/postgre_params.py +86 -0
  93. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
  94. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
  95. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
  96. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/policies/__init__.py +20 -0
  97. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/policies/data_format_policy.py +269 -0
  98. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
  99. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
  100. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
  101. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
  102. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
  103. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
  104. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/reports/__init__.py +11 -0
  105. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters/reports/admin_report.py +292 -0
  106. {ckanapi_harvesters-0.0.0 → ckanapi_harvesters-0.0.2/src/ckanapi_harvesters.egg-info}/PKG-INFO +74 -38
  107. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters.egg-info/SOURCES.txt +110 -0
  108. ckanapi_harvesters-0.0.2/src/ckanapi_harvesters.egg-info/requires.txt +18 -0
  109. ckanapi_harvesters-0.0.2/tests/test_builder_example_offline.py +41 -0
  110. ckanapi_harvesters-0.0.0/pyproject.toml +0 -66
  111. ckanapi_harvesters-0.0.0/src/ckanapi_harvesters/__init__.py +0 -15
  112. ckanapi_harvesters-0.0.0/src/ckanapi_harvesters/divider/__init__.py +0 -27
  113. ckanapi_harvesters-0.0.0/src/ckanapi_harvesters/divider/divider.py +0 -53
  114. ckanapi_harvesters-0.0.0/src/ckanapi_harvesters/divider/divider_error.py +0 -59
  115. ckanapi_harvesters-0.0.0/src/ckanapi_harvesters/main.py +0 -30
  116. ckanapi_harvesters-0.0.0/src/ckanapi_harvesters.egg-info/SOURCES.txt +0 -15
  117. ckanapi_harvesters-0.0.0/src/ckanapi_harvesters.egg-info/requires.txt +0 -1
  118. ckanapi_harvesters-0.0.0/tests/test_divider.py +0 -11
  119. ckanapi_harvesters-0.0.0/tests/test_helloworld.py +0 -5
  120. {ckanapi_harvesters-0.0.0 → ckanapi_harvesters-0.0.2}/setup.cfg +0 -0
  121. {ckanapi_harvesters-0.0.0 → ckanapi_harvesters-0.0.2}/src/ckanapi_harvesters.egg-info/dependency_links.txt +0 -0
  122. {ckanapi_harvesters-0.0.0 → ckanapi_harvesters-0.0.2}/src/ckanapi_harvesters.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024 ifpen
3
+ Copyright (c) 2026 IFPEN
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,10 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ckanapi_harvesters
3
- Version: 0.0.0
4
- Summary: Adaptation of the template for github.
3
+ Version: 0.0.2
4
+ Summary: Package helping to upload local resources to a CKAN server using the CKAN API. Metadata and the list of resources can be defined in an Excel spreadsheet. The package includes requests to download resources and checks metadata against formatting rules.
5
+ Maintainer-email: ifpen-gp <63413841+ifpen-gp@users.noreply.github.com>
5
6
  License: MIT License
6
7
 
7
- Copyright (c) 2024 ifpen
8
+ Copyright (c) 2026 IFPEN
8
9
 
9
10
  Permission is hereby granted, free of charge, to any person obtaining a copy
10
11
  of this software and associated documentation files (the "Software"), to deal
@@ -24,25 +25,82 @@ License: MIT License
24
25
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
26
  SOFTWARE.
26
27
 
27
- Classifier: Programming Language :: Python :: 3.12
28
+ Classifier: Programming Language :: Python :: 3.10
28
29
  Classifier: License :: OSI Approved :: MIT License
29
30
  Classifier: Operating System :: OS Independent
30
- Requires-Python: >=3.12
31
+ Requires-Python: >=3.10
31
32
  Description-Content-Type: text/markdown
32
33
  Requires-Dist: pytest==8.0.1
34
+ Requires-Dist: myst-nb>=1.3.0
35
+ Requires-Dist: requests>=2.32.5
36
+ Requires-Dist: pandas>=2.3.3
37
+ Requires-Dist: numpy>=2.2.6
38
+ Requires-Dist: openpyxl>=3.1.5
39
+ Provides-Extra: harvesters
40
+ Requires-Dist: pymongo>=4.8.0; extra == "harvesters"
41
+ Requires-Dist: psycopg2>=2.9.11; extra == "harvesters"
42
+ Requires-Dist: shapely>=2.1.2; extra == "harvesters"
43
+ Requires-Dist: pyproj==3.6.1; extra == "harvesters"
44
+ Provides-Extra: alt
45
+ Requires-Dist: bson; extra == "alt"
46
+ Requires-Dist: shapely>=2.1.2; extra == "alt"
47
+ Requires-Dist: pyproj==3.6.1; extra == "alt"
48
+ Requires-Dist: geopandas>=1.1.2; extra == "alt"
33
49
 
34
50
  # ckanapi_harvesters
35
51
 
52
+ <img src="doc/assets/France2030-Logo-1024x576.png" alt="logo">
53
+
36
54
  ---
37
55
 
38
56
  ## Description
39
57
 
40
- This "Python Package" template is a complete template designed to create a Python package according to IFPEN's development standards, deployable internally within IFPEN or on the Cloud. This template provides developers with a Python project architecture in which they can contribute, document, and make it available to all IFPEN developers.
58
+ This package enables users to benefit from the CKAN API and provides functions which
59
+ realize complex API calls to achieve specific operations.
60
+ In this package, DataStores are returned/inputted as pandas DataFrames.
61
+ The underlying request mechanism uses the requests Session object, which improves performance with multiple requests.
62
+ This package is oriented in the management of CKAN datasets and resources.
63
+ Only a selection of API calls has been implemented in this objective.
64
+ To perform custom API calls, the function `api_action_call` is provided to the end user.
65
+ This package was initially designed to harvest a large DataStores from your local file system.
66
+ It also implements particular requests which can define a large DataStore.
67
+ Large datasets composed of multiple files can be uploaded/downloaded
68
+ through scripts into a single resource or multiple resources.
69
+ For a DataStore, large files are uploaded with a limited number of rows per request.
70
+
71
+ The package is divided in the following sections:
72
+ - `ckan_api`: functions interacting with the CKAN API.
73
+ In addition to the base class which manages basic parameters and requests, API functions are divided as follows:
74
+ 1) functions to map the CKAN packages and resources. The remote data structures are mapped in a mirrored data structure.
75
+ CKAN DataStore information, organizations, licenses and resource views are optionally tracked.
76
+ 2) functions to query a DataStore or to download file resources.
77
+ 3) functions to apply a test a data format policy on a given package.
78
+ 4) functions to upsert data to a DataStore or to upload files to a resource.
79
+ 5) functions to manage CKAN objects
80
+ (creating, patching, or removing packages, resources, and DataStores).
81
+ These functions enable the user to change the metadata for these objects.
82
+ The other objects are meant to be managed through the API.
83
+ - `policies`: functions to check data format policies. A data format policy defines which attributes
84
+ are mandatory for a package or resource.
85
+ Specific rules can be implemented to restrict package tags to certain lists,
86
+ grouped by [vocabulary](https://docs.ckan.org/en/2.9/maintaining/tag-vocabularies.html).
87
+ Extra key-pair values of packages can be enforced. Resource formats can be restricted to a certain list.
88
+ - `reports`: functions to extract a report on the CKAN database in order to monitor
89
+ package user access rights, resource memory occupation, modification dates and data format policy messages.
90
+ - `harvesters`: this module implements ways to load data from your local machine.
91
+ - `file_formats`: The primary approach is to use files on you local file system. The CSV and SHP (shape file) formats are currently supported.
92
+ - In addition to the file formats, harvesters have been implemented to transfer data from a database.
93
+ This is particularly useful if the database cannot be accessed by CKAN harvester extensions
94
+ because it would only be available locally. MongoDB and PostgreSQL databases are currently supported.
95
+ - `builder`: functions to automate package and resource metadata patching and data uploads or downloads.
96
+ These parameters can be defined in an Excel workbook and files from the local file system can be referred as inputs for the data upload.
97
+ The parameters can also be deduced from an online CKAN package through the API.
98
+ - Example scripts are given in this module, referring to an example Excel workbook.
99
+ The Excel workbook is available in the package and at this link:
100
+ [builder_package_example.xlsx](src/ckanapi_harvesters/builder/builder_package_example.xlsx)
101
+ See also the notebook example in the current documentation here:
102
+ [builder_example_notebook.ipynb](sphinx/notebooks/builder_example_notebook.ipynb).
41
103
 
42
- This package includes:
43
- - A unit test structure based on the [Pytest](https://docs.pytest.org/en/stable/) library
44
- - Automatic documentation generation based on the [Sphinx](https://www.sphinx-doc.org/en/master/) library
45
- - A CI/CD pipeline for deploying the Python package to a Python server
46
104
 
47
105
  ## Github Pages
48
106
 
@@ -130,51 +188,29 @@ For more details on contributing and best practices, please refer to the `CONTRI
130
188
 
131
189
  ### Installation
132
190
 
133
- If you are using the provided `pip.conf`, you can simply run:
134
-
135
- ```bash
136
- pip install ckanapi_harvesters
137
- ```
138
-
139
- Otherwise, you can specify the package index depending on whether you are in an internal (on-premise) or external (cloud) environment.
191
+ The package and its optional dependencies can be installed with the following command:
140
192
 
141
193
  ```bash
142
- # On-premise
143
- pip install ckanapi_harvesters --extra-index-url https://nexus.ifpen.fr/repository/fast-it/simple
144
-
145
- # On Cloud
146
- pip install ckanapi_harvesters --extra-index-url https://nexus.fastit.dev/repository/fast-it/simple
194
+ pip install ckanapi_harvesters[extras]
147
195
  ```
148
196
 
149
- Alternatively, you can set the package index URL as an environment variable:
150
-
151
- ```bash
152
- # On-premise
153
- export PIP_EXTRA_INDEX_URL=https://nexus.fastit.dev/repository/fast-it/simple
154
-
155
- # On Cloud
156
- export PIP_EXTRA_INDEX_URL=https://nexus.ifpen.fr/repository/fast-it/simple
157
- ```
158
197
 
159
198
  ### Example Usage of the Python Package in Your Code
160
199
 
161
200
  After installation, you can import and use your package and its functions in your Python code:
162
201
 
163
202
  ```python
164
- from package import hello_world
203
+ from ckanapi_harvesters import CkanApi
165
204
 
166
- hello_world()
205
+ ckan = CkanApi()
167
206
  ```
168
207
 
169
208
  To use sub-modules defined in the package:
170
209
 
171
210
  ```python
172
- from package.divider import divide
173
-
174
- a = 4.0
175
- b = 2.0
211
+ from ckanapi_harvesters.ckan_api import CkanApi
176
212
 
177
- c = divide(4., 2.)
213
+ ckan = CkanApi()
178
214
  ```
179
215
 
180
216
  These instructions will allow you to access the package and utilize its features effectively and in line with your development configuration.
@@ -1,15 +1,57 @@
1
1
  # ckanapi_harvesters
2
2
 
3
+ <img src="doc/assets/France2030-Logo-1024x576.png" alt="logo">
4
+
3
5
  ---
4
6
 
5
7
  ## Description
6
8
 
7
- This "Python Package" template is a complete template designed to create a Python package according to IFPEN's development standards, deployable internally within IFPEN or on the Cloud. This template provides developers with a Python project architecture in which they can contribute, document, and make it available to all IFPEN developers.
9
+ This package enables users to benefit from the CKAN API and provides functions which
10
+ realize complex API calls to achieve specific operations.
11
+ In this package, DataStores are returned/inputted as pandas DataFrames.
12
+ The underlying request mechanism uses the requests Session object, which improves performance with multiple requests.
13
+ This package is oriented in the management of CKAN datasets and resources.
14
+ Only a selection of API calls has been implemented in this objective.
15
+ To perform custom API calls, the function `api_action_call` is provided to the end user.
16
+ This package was initially designed to harvest a large DataStores from your local file system.
17
+ It also implements particular requests which can define a large DataStore.
18
+ Large datasets composed of multiple files can be uploaded/downloaded
19
+ through scripts into a single resource or multiple resources.
20
+ For a DataStore, large files are uploaded with a limited number of rows per request.
21
+
22
+ The package is divided in the following sections:
23
+ - `ckan_api`: functions interacting with the CKAN API.
24
+ In addition to the base class which manages basic parameters and requests, API functions are divided as follows:
25
+ 1) functions to map the CKAN packages and resources. The remote data structures are mapped in a mirrored data structure.
26
+ CKAN DataStore information, organizations, licenses and resource views are optionally tracked.
27
+ 2) functions to query a DataStore or to download file resources.
28
+ 3) functions to apply a test a data format policy on a given package.
29
+ 4) functions to upsert data to a DataStore or to upload files to a resource.
30
+ 5) functions to manage CKAN objects
31
+ (creating, patching, or removing packages, resources, and DataStores).
32
+ These functions enable the user to change the metadata for these objects.
33
+ The other objects are meant to be managed through the API.
34
+ - `policies`: functions to check data format policies. A data format policy defines which attributes
35
+ are mandatory for a package or resource.
36
+ Specific rules can be implemented to restrict package tags to certain lists,
37
+ grouped by [vocabulary](https://docs.ckan.org/en/2.9/maintaining/tag-vocabularies.html).
38
+ Extra key-pair values of packages can be enforced. Resource formats can be restricted to a certain list.
39
+ - `reports`: functions to extract a report on the CKAN database in order to monitor
40
+ package user access rights, resource memory occupation, modification dates and data format policy messages.
41
+ - `harvesters`: this module implements ways to load data from your local machine.
42
+ - `file_formats`: The primary approach is to use files on you local file system. The CSV and SHP (shape file) formats are currently supported.
43
+ - In addition to the file formats, harvesters have been implemented to transfer data from a database.
44
+ This is particularly useful if the database cannot be accessed by CKAN harvester extensions
45
+ because it would only be available locally. MongoDB and PostgreSQL databases are currently supported.
46
+ - `builder`: functions to automate package and resource metadata patching and data uploads or downloads.
47
+ These parameters can be defined in an Excel workbook and files from the local file system can be referred as inputs for the data upload.
48
+ The parameters can also be deduced from an online CKAN package through the API.
49
+ - Example scripts are given in this module, referring to an example Excel workbook.
50
+ The Excel workbook is available in the package and at this link:
51
+ [builder_package_example.xlsx](src/ckanapi_harvesters/builder/builder_package_example.xlsx)
52
+ See also the notebook example in the current documentation here:
53
+ [builder_example_notebook.ipynb](sphinx/notebooks/builder_example_notebook.ipynb).
8
54
 
9
- This package includes:
10
- - A unit test structure based on the [Pytest](https://docs.pytest.org/en/stable/) library
11
- - Automatic documentation generation based on the [Sphinx](https://www.sphinx-doc.org/en/master/) library
12
- - A CI/CD pipeline for deploying the Python package to a Python server
13
55
 
14
56
  ## Github Pages
15
57
 
@@ -97,51 +139,29 @@ For more details on contributing and best practices, please refer to the `CONTRI
97
139
 
98
140
  ### Installation
99
141
 
100
- If you are using the provided `pip.conf`, you can simply run:
101
-
102
- ```bash
103
- pip install ckanapi_harvesters
104
- ```
105
-
106
- Otherwise, you can specify the package index depending on whether you are in an internal (on-premise) or external (cloud) environment.
142
+ The package and its optional dependencies can be installed with the following command:
107
143
 
108
144
  ```bash
109
- # On-premise
110
- pip install ckanapi_harvesters --extra-index-url https://nexus.ifpen.fr/repository/fast-it/simple
111
-
112
- # On Cloud
113
- pip install ckanapi_harvesters --extra-index-url https://nexus.fastit.dev/repository/fast-it/simple
145
+ pip install ckanapi_harvesters[extras]
114
146
  ```
115
147
 
116
- Alternatively, you can set the package index URL as an environment variable:
117
-
118
- ```bash
119
- # On-premise
120
- export PIP_EXTRA_INDEX_URL=https://nexus.fastit.dev/repository/fast-it/simple
121
-
122
- # On Cloud
123
- export PIP_EXTRA_INDEX_URL=https://nexus.ifpen.fr/repository/fast-it/simple
124
- ```
125
148
 
126
149
  ### Example Usage of the Python Package in Your Code
127
150
 
128
151
  After installation, you can import and use your package and its functions in your Python code:
129
152
 
130
153
  ```python
131
- from package import hello_world
154
+ from ckanapi_harvesters import CkanApi
132
155
 
133
- hello_world()
156
+ ckan = CkanApi()
134
157
  ```
135
158
 
136
159
  To use sub-modules defined in the package:
137
160
 
138
161
  ```python
139
- from package.divider import divide
140
-
141
- a = 4.0
142
- b = 2.0
162
+ from ckanapi_harvesters.ckan_api import CkanApi
143
163
 
144
- c = divide(4., 2.)
164
+ ckan = CkanApi()
145
165
  ```
146
166
 
147
167
  These instructions will allow you to access the package and utilize its features effectively and in line with your development configuration.
@@ -0,0 +1,96 @@
1
+ [project]
2
+ name = "ckanapi_harvesters"
3
+ version = '0.0.2'
4
+ authors = [
5
+ ]
6
+ maintainers = [
7
+ { name = "ifpen-gp", email = "63413841+ifpen-gp@users.noreply.github.com" },
8
+ ]
9
+ description = "Package helping to upload local resources to a CKAN server using the CKAN API. Metadata and the list of resources can be defined in an Excel spreadsheet. The package includes requests to download resources and checks metadata against formatting rules."
10
+ readme = "README.md"
11
+ requires-python = ">=3.10"
12
+ classifiers = [
13
+ "Programming Language :: Python :: 3.10",
14
+ "License :: OSI Approved :: MIT License",
15
+ "Operating System :: OS Independent",
16
+ ]
17
+ dependencies = [
18
+ "pytest == 8.0.1",
19
+ "myst-nb >= 1.3.0",
20
+ # ajouter les dépendances necessaires
21
+ "requests >= 2.32.5",
22
+ "pandas >= 2.3.3",
23
+ "numpy >= 2.2.6",
24
+ "openpyxl >= 3.1.5", # could be an optional extra
25
+ ]
26
+
27
+ license = { file = "LICENSE" }
28
+
29
+ [project.optional-dependencies]
30
+ harvesters = [
31
+ "pymongo >= 4.8.0", # MongoDB harvester
32
+ "psycopg2 >= 2.9.11", # Postgre harvester
33
+ "shapely >= 2.1.2", # GeoJSON support
34
+ "pyproj == 3.6.1", # GeoJSON transformations (v3.6.1 is compatible with Python 3.10)
35
+ ]
36
+ alt = [
37
+ "bson", # bson implementation, independent of pymongo
38
+ "shapely >= 2.1.2", # GeoJSON support
39
+ "pyproj == 3.6.1", # GeoJSON transformations (v3.6.1 is compatible with Python 3.10)
40
+ "geopandas >= 1.1.2", # Shapefile reading functions
41
+ ]
42
+
43
+ [tool.setuptools.package-data]
44
+ # non-source files to include in the package
45
+ "ckanapi_harvesters" = [
46
+ "builder/builder_package_example.xlsx",
47
+ "builder/example/package/*.*",
48
+ "builder/example/*.ipynb",
49
+ ]
50
+
51
+ [tool.ckanapi_harvesters]
52
+ version = "1.0.2"
53
+
54
+ [tool.setuptools.packages.find]
55
+ where = ["src"]
56
+
57
+ [tool.setuptools_scm]
58
+ version_scheme = "guess-next-dev"
59
+ local_scheme = "dirty-tag"
60
+
61
+ [tool.setuptools]
62
+ py-modules = []
63
+ license-files = []
64
+
65
+ [tool.ruff]
66
+ # Set the maximum line length to 140.
67
+ line-length = 140
68
+ exclude = [
69
+ "tests",
70
+ "sphinx",
71
+ "src",
72
+ ]
73
+
74
+ [tool.ruff.lint]
75
+ # See documentation: https://docs.astral.sh/ruff/rules/
76
+ extend-select = [
77
+ "UP",
78
+ "E501",
79
+ "I",
80
+ "B",
81
+ "F",
82
+ "E",
83
+ "N",
84
+ "A",
85
+ "PL",
86
+ "D"
87
+ ]
88
+
89
+
90
+ [tool.ruff.lint.pydocstyle]
91
+ convention = "numpy" # needs to be changed (rst docstrings)
92
+
93
+ [tool.ruff.format]
94
+ quote-style = "single"
95
+ indent-style = "space"
96
+ docstring-code-format = true
@@ -0,0 +1,37 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Package with helper function for CKAN requests using pandas DataFrames.
5
+ """
6
+
7
+ # builder_file_format_version = "0.0.1"
8
+ try:
9
+ from importlib.metadata import version, PackageNotFoundError
10
+ except ImportError: # Python <3.8
11
+ from importlib_metadata import version, PackageNotFoundError
12
+
13
+ try:
14
+ __version__ = version("ckanapi_harvesters")
15
+ except PackageNotFoundError:
16
+ __version__ = None
17
+
18
+
19
+ import os
20
+ self_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
21
+
22
+
23
+ from . import auxiliary
24
+ from . import policies
25
+ from . import harvesters
26
+ from . import ckan_api
27
+ from . import builder
28
+ from . import reports
29
+
30
+ # usage shortcuts
31
+ from .auxiliary import CkanMap
32
+ from .policies import CkanPackageDataFormatPolicy
33
+ from .ckan_api import CkanApi, CKAN_API_VERSION
34
+ from .builder import BUILDER_FILE_FORMAT_VERSION
35
+ from .builder import BuilderPackage, BuilderDataStoreMultiABC, BuilderDataStoreFolder, RequestFileMapperIndexKeys
36
+
37
+
@@ -0,0 +1,26 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Package with helper function for CKAN requests using pandas DataFrames.
5
+ """
6
+
7
+ from . import ckan_defs
8
+ from . import path
9
+ from . import login
10
+ from . import urls
11
+ from . import proxy_config
12
+ from . import external_code_import
13
+ from . import list_records
14
+ from . import ckan_action
15
+ from . import ckan_errors
16
+ from . import ckan_configuration
17
+ from . import ckan_api_key
18
+ from . import ckan_model
19
+ from . import ckan_map
20
+ from . import ckan_vocabulary_deprecated
21
+ from . import ckan_auxiliary
22
+ from . import deprecated
23
+
24
+ from .ckan_map import CkanMap
25
+ from .external_code_import import unlock_external_code_execution
26
+
@@ -0,0 +1,93 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Action response common treatments
5
+ """
6
+ from typing import Union
7
+ import json
8
+
9
+ import requests
10
+
11
+
12
+ class CkanActionResponse:
13
+ """
14
+ Class which decodes and checks the response of a CKAN request
15
+ """
16
+ def __init__(self, response: requests.Response, dry_run: bool=False):
17
+ self.response: requests.Response = response # for debug purposes
18
+ self.response_dict:Union[dict,None] = None
19
+ self.status_code:int = response.status_code
20
+ self.success:bool = False
21
+ self.success_json_loads:bool = False
22
+ self.result:Union[dict,None] = None
23
+ self.error_message: Union[None,str,dict] = None
24
+ self.len:Union[int,None] = None
25
+ self.dry_run:bool = dry_run
26
+
27
+ if response.content is None and response.request is None:
28
+ # dry run
29
+ assert(dry_run)
30
+ self.success = True
31
+ self.success_json_loads = False
32
+ self.status_code = 1
33
+ self.error_message = "Request not sent: dry run mode"
34
+ self.len = 0
35
+ else:
36
+ try:
37
+ response_dict = json.loads(response.content.decode())
38
+ self.response_dict = response_dict
39
+ self.success_json_loads = True
40
+ if (response.status_code == 200 and "success" in response_dict.keys() and "result" in response_dict.keys()
41
+ and response_dict["success"]):
42
+ self.success = True
43
+ self.result = response_dict["result"]
44
+ else:
45
+ if "error" in response_dict.keys():
46
+ self.error_message = response_dict["error"]
47
+ else:
48
+ self.error_message = response.content.decode()
49
+ except Exception as json_error:
50
+ self.error_message = f"JSON decode error {json_error} & CKAN error {response.content.decode()}"
51
+
52
+ def __len__(self):
53
+ if self.len is None:
54
+ raise RuntimeError("queried len but does not have len")
55
+ return self.len
56
+
57
+ def default_error(self, ckan) -> "CkanActionError":
58
+ """
59
+ Raise specific error codes depending on response
60
+ """
61
+ if self.status_code == 404 and self.success_json_loads and self.error_message["__type"] == "Not Found Error":
62
+ return CkanNotFoundError(ckan, "(Generic)", self)
63
+ elif self.status_code == 403 and self.success_json_loads and self.error_message["__type"] == "Authorization Error":
64
+ return CkanAuthorizationError(ckan, self)
65
+ else:
66
+ return CkanActionError(ckan, self)
67
+
68
+ ## action error codes
69
+ class CkanActionError(Exception):
70
+ def __init__(self, ckan, response: CkanActionResponse, display_request:bool=True):
71
+ super().__init__(response.error_message)
72
+ self.response = response
73
+ self.status_code = response.status_code
74
+ if display_request:
75
+ ckan._error_print_debug_response(response.response)
76
+
77
+ def __str__(self):
78
+ return f"Server code [{self.status_code}]: " + super().__str__()
79
+
80
+ class CkanNotFoundError(CkanActionError):
81
+ def __init__(self, ckan, object_type:str, response: CkanActionResponse, display_request:bool=True):
82
+ response.error_message = f"{object_type} not found: {response.error_message}"
83
+ super().__init__(ckan, response, display_request=display_request)
84
+ self.object_type = object_type
85
+
86
+ class CkanAuthorizationError(CkanActionError):
87
+ pass
88
+
89
+ class CkanSqlCapabilityError(CkanActionError):
90
+ def __init__(self, ckan, response: CkanActionResponse, display_request:bool=True):
91
+ response.error_message = f"sql capabilities are not activated on CKAN server. See documentation for option ckan.datastore.sqlsearch.enabled"
92
+ super().__init__(ckan, response, display_request=display_request)
93
+