ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. ckanapi_harvesters/__init__.py +32 -10
  2. ckanapi_harvesters/auxiliary/__init__.py +26 -0
  3. ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
  4. ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
  5. ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
  6. ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
  7. ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
  8. ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
  9. ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
  10. ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
  11. ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
  12. ckanapi_harvesters/auxiliary/deprecated.py +82 -0
  13. ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
  14. ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
  15. ckanapi_harvesters/auxiliary/list_records.py +60 -0
  16. ckanapi_harvesters/auxiliary/login.py +163 -0
  17. ckanapi_harvesters/auxiliary/path.py +208 -0
  18. ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
  19. ckanapi_harvesters/auxiliary/urls.py +40 -0
  20. ckanapi_harvesters/builder/__init__.py +40 -0
  21. ckanapi_harvesters/builder/builder_aux.py +20 -0
  22. ckanapi_harvesters/builder/builder_ckan.py +238 -0
  23. ckanapi_harvesters/builder/builder_errors.py +36 -0
  24. ckanapi_harvesters/builder/builder_field.py +122 -0
  25. ckanapi_harvesters/builder/builder_package.py +9 -0
  26. ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
  27. ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
  28. ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
  29. ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
  30. ckanapi_harvesters/builder/builder_resource.py +589 -0
  31. ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
  32. ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
  33. ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
  34. ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
  35. ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
  36. ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
  37. ckanapi_harvesters/builder/builder_resource_init.py +126 -0
  38. ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
  39. ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
  40. ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
  41. ckanapi_harvesters/builder/example/__init__.py +21 -0
  42. ckanapi_harvesters/builder/example/builder_example.py +21 -0
  43. ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
  44. ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
  45. ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
  46. ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
  47. ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
  48. ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
  49. ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
  50. ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
  51. ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
  52. ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
  53. ckanapi_harvesters/builder/mapper_datastore.py +93 -0
  54. ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
  55. ckanapi_harvesters/builder/specific/__init__.py +11 -0
  56. ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
  57. ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
  58. ckanapi_harvesters/ckan_api/__init__.py +20 -0
  59. ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
  60. ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
  61. ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
  62. ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
  63. ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
  64. ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
  65. ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
  66. ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
  67. ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
  68. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
  69. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
  70. ckanapi_harvesters/harvesters/__init__.py +23 -0
  71. ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
  72. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
  73. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
  74. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
  75. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
  76. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
  77. ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
  78. ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
  79. ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
  80. ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
  81. ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
  82. ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
  83. ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
  84. ckanapi_harvesters/harvesters/harvester_init.py +30 -0
  85. ckanapi_harvesters/harvesters/harvester_model.py +49 -0
  86. ckanapi_harvesters/harvesters/harvester_params.py +323 -0
  87. ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
  88. ckanapi_harvesters/harvesters/postgre_params.py +86 -0
  89. ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
  90. ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
  91. ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
  92. ckanapi_harvesters/policies/__init__.py +20 -0
  93. ckanapi_harvesters/policies/data_format_policy.py +269 -0
  94. ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
  95. ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
  96. ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
  97. ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
  98. ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
  99. ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
  100. ckanapi_harvesters/reports/__init__.py +11 -0
  101. ckanapi_harvesters/reports/admin_report.py +292 -0
  102. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/METADATA +74 -38
  103. ckanapi_harvesters-0.0.2.dist-info/RECORD +105 -0
  104. ckanapi_harvesters/divider/__init__.py +0 -27
  105. ckanapi_harvesters/divider/divider.py +0 -53
  106. ckanapi_harvesters/divider/divider_error.py +0 -59
  107. ckanapi_harvesters/main.py +0 -30
  108. ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
  109. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/WHEEL +0 -0
  110. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,73 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Code to generate sample data for the dataset example
5
+ """
6
+ from typing import Tuple
7
+ import os
8
+ import re
9
+
10
+ import pandas as pd
11
+ import numpy as np
12
+
13
+ from ckanapi_harvesters.builder.example import example_package_dir
14
+ self_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
15
+
16
+
17
+ GPS_DIGITS = 6
18
+ def degrees_DMS(lat:str, lon:str) -> Tuple[float, float]:
19
+ """
20
+ Returns angles for GPS coordinates in the form
21
+ :param lat: example: 48° 51' 12.24845" N
22
+ :param lon: example: 2° 20' 55.62563" E
23
+ :return:
24
+ """
25
+ deg, minutes, seconds, direction = re.split('[°\'"]', lat)
26
+ lat_val = (float(deg) + float(minutes) / 60 + float(seconds) / (60 * 60)) * (-1 if direction in ['W', 'S'] else 1)
27
+ deg, minutes, seconds, direction = re.split('[°\'"]', lon)
28
+ lon_val = (float(deg) + float(minutes) / 60 + float(seconds) / (60 * 60)) * (-1 if direction in ['W', 'S'] else 1)
29
+ return (lat_val, lon_val)
30
+
31
+
32
+ def run():
33
+ N = 100
34
+ df_users = pd.DataFrame([{"user_id": 1, "age": 30},
35
+ {"user_id": 2, "age": 80},
36
+ ])
37
+ point_0 = degrees_DMS('48° 51\' 12.24845" N', '2° 20\' 55.62563" E')
38
+ traces = [{"user_id": 1, "origin": point_0, "destination": degrees_DMS('48°53\'37.6"N', '2°23\'24.9"E')},
39
+ {"user_id": 1, "origin": point_0, "destination": degrees_DMS('48°50\'01.7"N', '2°19\'57.1"E')},
40
+ {"user_id": 2, "origin": point_0, "destination": degrees_DMS('48°52\'45.2"N', '2°18\'32.8"E')},
41
+ ]
42
+ users_file = os.path.join(example_package_dir, "users_local.csv")
43
+ df_users.to_csv(users_file, index=False)
44
+ traces_dir = os.path.join(example_package_dir, "traces")
45
+ traces_dir_multi = os.path.join(example_package_dir, "traces_multi")
46
+ for trace_id, trace in enumerate(traces):
47
+ table_index = np.array([0, N-1])
48
+ table_lat = np.array([trace["origin"][0], trace["destination"][0]])
49
+ table_lon = np.array([trace["origin"][1], trace["destination"][1]])
50
+ df_trace = pd.DataFrame()
51
+ index = np.arange(N)
52
+ df_trace["index_in_trace"] = index
53
+ df_trace.insert(loc=0, column="trace_id", value=trace_id)
54
+ df_trace["user_id"] = trace["user_id"]
55
+ df_trace["timestamp"] = pd.Timestamp(year=2025, month=1, day=1, hour=12, minute=0, second=0, microsecond=0) + index.astype('timedelta64[s]')
56
+ df_trace["timestamp_local"] = df_trace["timestamp"] + pd.Timedelta(hours=1) # local = UTC+1 for winter light saving time in Paris
57
+ df_trace["timestamp"] = df_trace["timestamp"].apply(pd.Timestamp.isoformat) # ISO-8601 format
58
+ df_trace["timestamp_local"] = df_trace["timestamp_local"].apply(pd.Timestamp.isoformat) # ISO-8601 format
59
+ df_trace["latitude"] = np.interp(index, xp=table_lat, fp=table_lat)
60
+ df_trace["longitude"] = np.interp(index, xp=table_lat, fp=table_lon)
61
+ df_trace["latitude"] = df_trace["latitude"].values.round(GPS_DIGITS)
62
+ df_trace["longitude"] = df_trace["longitude"].values.round(GPS_DIGITS)
63
+ df_trace.set_index(keys=["trace_id", "index_in_trace"], drop=False, inplace=True, verify_integrity=True)
64
+ trace_file = os.path.join(traces_dir, f"trace_{trace_id:03d}.csv")
65
+ df_trace.to_csv(trace_file, index=False)
66
+ trace_file_multi = os.path.join(traces_dir_multi, f"trace_{trace_id:03d}.csv")
67
+ df_trace.to_csv(trace_file_multi, index=False)
68
+ print("Example data regenerated")
69
+
70
+
71
+ if __name__ == '__main__':
72
+ run()
73
+
@@ -0,0 +1,51 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Example code to upload the builder example to a CKAN server
5
+ """
6
+ from typing import Tuple
7
+ import os
8
+ import re
9
+ import getpass
10
+
11
+ import pandas as pd
12
+ import numpy as np
13
+
14
+ from ckanapi_harvesters.builder.builder_package import BuilderPackage
15
+ from ckanapi_harvesters.ckan_api import CkanApi
16
+
17
+ from ckanapi_harvesters.builder.example import example_package_xls
18
+ self_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
19
+
20
+
21
+ def run(ckan:CkanApi = None):
22
+ BuilderPackage.unlock_external_code_execution()
23
+
24
+ mdl = BuilderPackage.from_excel(example_package_xls)
25
+ ckan = mdl.init_ckan(ckan)
26
+ ckan.input_missing_info(input_args_if_necessary=True, input_owner_org=True)
27
+ ckan.set_limits(10000) # reduce if server hangs up
28
+ ckan.set_submit_timeout(5)
29
+ ckan.set_verbosity(True)
30
+ # ckan.set_default_map_mode(datastore_info=True) # uncomment to query DataStore information
31
+
32
+ # Test re-encoding the Excel file from the loaded model
33
+ example_package_xls_reencoded = os.path.abspath("builder_package_example-reencoded.xlsx")
34
+ mdl.to_excel(example_package_xls_reencoded)
35
+
36
+ # Patch package: apply metadata and upload small resources
37
+ reupload = True # True: reuploads all documents and resets large datasets to the first document
38
+ mdl.patch_request_full(ckan, reupload=reupload)
39
+
40
+ # Upload large datasets
41
+ threads = 3 # > 1: multi-threading mode - reduce if HTTP 502 errors
42
+ mdl.upload_large_datasets(ckan, threads=threads)
43
+
44
+ print("Update done.")
45
+
46
+
47
+ if __name__ == '__main__':
48
+ ckan = CkanApi(None)
49
+ ckan.initialize_from_cli_args()
50
+ run(ckan)
51
+
@@ -0,0 +1,114 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Tests to perform after the example package was uploaded
5
+ """
6
+ from typing import Tuple
7
+ import os
8
+ import re
9
+ import json
10
+ import io
11
+
12
+ import pandas as pd
13
+ import numpy as np
14
+
15
+ from ckanapi_harvesters.auxiliary import CkanMap
16
+ from ckanapi_harvesters.builder.builder_package import BuilderPackage
17
+ from ckanapi_harvesters.ckan_api import CkanApi
18
+ from ckanapi_harvesters.policies.data_format_policy import CkanPackageDataFormatPolicy
19
+ from ckanapi_harvesters.policies.data_format_policy import (SingleValueListPolicy, ValueListPolicy, StringValueSpecification,
20
+ ListChoiceMode, CustomFieldsPolicy, CustomFieldSpecification,
21
+ GroupedValueListPolicy, ErrorLevel, DataPolicyError,
22
+ StringMatchMode, TagListPolicy, TagGroupsListPolicy)
23
+
24
+ from ckanapi_harvesters.builder.specific.configuration_builder import ConfigurationBuilder
25
+ from ckanapi_harvesters.builder.example import example_package_xls
26
+ self_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
27
+
28
+
29
+ enable_write = False # be careful, setting this to True could erase your CKAN data format policy !!!
30
+
31
+
32
+ def run(ckan:CkanApi = None):
33
+ BuilderPackage.unlock_external_code_execution() # comment to test if the safety feature is enabled
34
+
35
+ mdl = BuilderPackage.from_excel(example_package_xls)
36
+ ckan = mdl.init_ckan(ckan)
37
+ ckan.initialize_from_cli_args()
38
+ ckan.input_missing_info(input_args_if_necessary=True, input_owner_org=True)
39
+ ckan.set_verbosity(False)
40
+ ckan.verbose_policy = True
41
+
42
+ policy = CkanPackageDataFormatPolicy()
43
+ policy.resource_format = SingleValueListPolicy(ValueListPolicy([StringValueSpecification("CSV")]), extra_values=ListChoiceMode.NoExtra)
44
+ policy.package_custom_fields = CustomFieldsPolicy([
45
+ CustomFieldSpecification(key="New field", mandatory=True),
46
+ CustomFieldSpecification(key="Algorithm", values=["Random"], match_mode=StringMatchMode.Match, mandatory=True)], error_level=ErrorLevel.Error)
47
+ policy.package_tags = TagGroupsListPolicy([TagListPolicy([StringValueSpecification("Test")], group_name="Vocabulary_Test")])
48
+ policy.package_mandatory_attributes = {"description", "author"}
49
+ ckan.policy = policy
50
+
51
+ policy_dict = policy.to_dict(sets_as_lists=False)
52
+ policy_from_dict = CkanPackageDataFormatPolicy.from_dict(policy_dict)
53
+ policy_dict_bis = policy_from_dict.to_dict(sets_as_lists=False)
54
+ assert(policy_dict == policy_dict_bis)
55
+
56
+ # serialisation
57
+ policy_dict = policy.to_dict(sets_as_lists=True)
58
+ policy_json_file = os.path.abspath("policy_py.json")
59
+ with open(policy_json_file, "w") as f:
60
+ json.dump(policy_dict, f, indent=4)
61
+ with open(policy_json_file, "r") as f:
62
+ policy_json_dict = json.load(f)
63
+ # policy_json = CkanPackageDataFormatPolicy.from_dict(policy_json_dict)
64
+ policy_json = CkanPackageDataFormatPolicy.from_json(policy_json_file)
65
+ assert(policy_dict == policy_json_dict)
66
+
67
+ # test on local definition (offline)
68
+ buffer = {}
69
+ success = mdl.local_policy_check(policy, buffer=buffer)
70
+ # test if an error is raised (this mode does not display all messages)
71
+ try:
72
+ success = mdl.local_policy_check(policy)
73
+ except DataPolicyError as e:
74
+ print("Exception: " + str(e))
75
+ assert(not success)
76
+ else:
77
+ print(f"No exception / success={success}")
78
+
79
+ # test on remote definition (CKAN API)
80
+ print("Test on remote")
81
+ buffer = {}
82
+ ckan.map_resources(mdl.package_name)
83
+ success = ckan.policy_check(buffer=buffer)
84
+
85
+ ckan.set_verbosity(True)
86
+
87
+ # update vocabularies: deprecated
88
+ # if enable_write:
89
+ # ckan.vocabularies_clear()
90
+ # ckan.initiate_vocabularies_from_policy(policy, remove_others=True)
91
+
92
+ # upload default policy
93
+ config_ckan = ConfigurationBuilder(ckan, ckan.owner_org)
94
+ if enable_write:
95
+ config_ckan.patch_policy(ckan, policy, reduced_size=False)
96
+
97
+ # download default policy
98
+ # default_policy = ckan.load_default_policy(force=True)
99
+ config_ckan.load_default_policy(ckan)
100
+
101
+ # check all packages against policy
102
+ ckan.owner_org = None
103
+ print(" ")
104
+ buffer = {}
105
+ config_ckan.policy_check(ckan, policy=policy, buffer=buffer, verbose=True)
106
+
107
+ print("Tests done.")
108
+
109
+
110
+ if __name__ == '__main__':
111
+ ckan = CkanApi(None)
112
+ ckan.initialize_from_cli_args()
113
+ run(ckan)
114
+
@@ -0,0 +1,53 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Tests to perform after the example package was uploaded
5
+ """
6
+ from typing import Tuple
7
+ import os
8
+ import re
9
+ import getpass
10
+ import json
11
+
12
+ import pandas as pd
13
+ import numpy as np
14
+
15
+ from ckanapi_harvesters.auxiliary import CkanMap
16
+ from ckanapi_harvesters.builder.builder_package import BuilderPackage
17
+ from ckanapi_harvesters.ckan_api import CkanApi
18
+ from ckanapi_harvesters.auxiliary.ckan_action import CkanNotFoundError, CkanSqlCapabilityError
19
+
20
+ from ckanapi_harvesters.builder.example import example_package_xls
21
+ self_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
22
+
23
+
24
+ def run(ckan:CkanApi = None):
25
+ BuilderPackage.unlock_external_code_execution() # comment to test if the safety feature is enabled
26
+
27
+ mdl = BuilderPackage.from_excel(example_package_xls)
28
+ ckan = mdl.init_ckan(ckan)
29
+ ckan.initialize_from_cli_args()
30
+ ckan.input_missing_info(input_args_if_necessary=True, input_owner_org=True)
31
+ ckan.set_verbosity(True)
32
+
33
+ # ckan.apikey.clear() # uncomment for specific test
34
+
35
+ capability = ckan.test_sql_capabilities()
36
+ try:
37
+ ckan.api_help_show("datastore_search_sql")
38
+ except CkanNotFoundError:
39
+ print("No datastore_search_sql help")
40
+ users_table_id = mdl.get_or_query_resource_id(ckan, "users.csv")
41
+ try:
42
+ ckan.datastore_search_sql(f'SELECT * FROM "{users_table_id}"')
43
+ except CkanSqlCapabilityError:
44
+ print("datastore_search_sql is not accessible")
45
+
46
+ print("Tests done.")
47
+
48
+
49
+ if __name__ == '__main__':
50
+ ckan = CkanApi(None)
51
+ ckan.initialize_from_cli_args()
52
+ run(ckan)
53
+
@@ -0,0 +1,87 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Tests to perform after the example package was uploaded
5
+ """
6
+ from typing import Tuple
7
+ import os
8
+ import re
9
+ import getpass
10
+ import json
11
+
12
+ import pandas as pd
13
+ import numpy as np
14
+
15
+ from ckanapi_harvesters.auxiliary import CkanMap
16
+ from ckanapi_harvesters.builder.builder_package import BuilderPackage
17
+ from ckanapi_harvesters.ckan_api import CkanApi
18
+
19
+ from ckanapi_harvesters.builder.example import example_package_xls
20
+ self_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
21
+
22
+
23
+ def run(ckan:CkanApi = None):
24
+ BuilderPackage.unlock_external_code_execution() # comment to test if the safety feature is enabled
25
+
26
+ mdl = BuilderPackage.from_excel(example_package_xls)
27
+ ckan = mdl.init_ckan(ckan)
28
+ ckan.initialize_from_cli_args()
29
+ ckan.input_missing_info(input_args_if_necessary=True, input_owner_org=True)
30
+ ckan.set_verbosity(True)
31
+
32
+ # Test re-encoding the Excel file from the loaded model
33
+ example_package_xls_reencoded = os.path.abspath("builder_package_example-reencoded.xlsx")
34
+ mdl.to_excel(example_package_xls_reencoded)
35
+
36
+ # map package
37
+ ckan.map_resources(mdl.package_name, datastore_info=True, license_list=True)
38
+ map_init = ckan.map.copy()
39
+ ckan.purge(purge_map=True)
40
+
41
+ # Test using the model to update CKAN map (update_ckan_map)
42
+ mdl.info_request_full(ckan)
43
+ map_queried = ckan.map.copy()
44
+ ckan.purge(purge_map=True)
45
+ mdl.update_ckan_map(ckan)
46
+ map_from_mdl = ckan.map.copy()
47
+
48
+ # Test saving the map to a dictionary
49
+ dict_map_queried = map_queried.to_dict()
50
+ map_queried_from_dict = CkanMap.from_dict(dict_map_queried)
51
+
52
+ dict_map_from_mdl = map_from_mdl.to_dict()
53
+ map_from_mdl_from_dict = CkanMap.from_dict(dict_map_from_mdl)
54
+
55
+ # test the function that recreates the Excel file from the online information
56
+ ckan.purge(purge_map=True)
57
+ mdl_api = BuilderPackage.from_ckan(ckan, mdl.package_name)
58
+ example_package_xls_from_api = os.path.abspath("builder_package_example-from-api.xlsx")
59
+ mdl_api.to_excel(example_package_xls_from_api)
60
+
61
+ # export mdl to dict and re-import
62
+ base_dir = os.path.abspath(".")
63
+ mdl_dict = mdl.to_dict(base_dir=base_dir)
64
+ mdl_from_dict = BuilderPackage.from_dict(mdl_dict, base_dir=base_dir)
65
+ mdl_dict_bis = mdl_from_dict.to_dict(base_dir=base_dir)
66
+ assert(mdl_dict == mdl_dict_bis)
67
+
68
+ # test json serialization
69
+ example_package_json_reencoded = os.path.abspath("builder_package_example-reencoded.json")
70
+ with open(example_package_json_reencoded, "w") as json_file:
71
+ json.dump(mdl_dict, json_file, indent=4)
72
+ json_file.close()
73
+
74
+ # test copy constructors
75
+ ckan_copy = ckan.copy()
76
+ mdl_copy = mdl.copy()
77
+ mdl_copy_dict = mdl_copy.to_dict(base_dir=base_dir)
78
+ assert(mdl_dict == mdl_copy_dict)
79
+
80
+ print("Tests done.")
81
+
82
+
83
+ if __name__ == '__main__':
84
+ ckan = CkanApi(None)
85
+ ckan.initialize_from_cli_args()
86
+ run(ckan)
87
+
@@ -0,0 +1,57 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Tests to perform after the example package was uploaded
5
+ """
6
+ from typing import Tuple
7
+ import os
8
+ import re
9
+ import getpass
10
+ import json
11
+
12
+ import pandas as pd
13
+ import numpy as np
14
+
15
+ from ckanapi_harvesters.auxiliary import CkanMap
16
+ from ckanapi_harvesters.builder.builder_package import BuilderPackage
17
+ from ckanapi_harvesters.ckan_api import CkanApi
18
+
19
+ from ckanapi_harvesters.builder.example import example_package_xls
20
+ self_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
21
+
22
+
23
+ if __name__ == '__main__':
24
+ BuilderPackage.unlock_external_code_execution() # comment to test if the safety feature is enabled
25
+
26
+ mdl = BuilderPackage.from_excel(example_package_xls)
27
+
28
+ ckan = CkanApi()
29
+ ckan = mdl.init_ckan(ckan)
30
+ # mdl.update_ckan_map(ckan)
31
+ # map_from_mdl = ckan.map.copy()
32
+
33
+ # Test re-encoding the Excel file from the loaded model
34
+ example_package_xls_reencoded = os.path.abspath("builder_package_example-reencoded.xlsx")
35
+ mdl.to_excel(example_package_xls_reencoded)
36
+
37
+ # export mdl to dict and re-import
38
+ base_dir = os.path.abspath(".")
39
+ mdl_dict = mdl.to_dict(base_dir=base_dir)
40
+ mdl_from_dict = BuilderPackage.from_dict(mdl_dict, base_dir=base_dir)
41
+ mdl_dict_bis = mdl_from_dict.to_dict(base_dir=base_dir)
42
+ assert(mdl_dict == mdl_dict_bis)
43
+
44
+ # test json serialization
45
+ example_package_json_reencoded = os.path.abspath("builder_package_example-reencoded.json")
46
+ with open(example_package_json_reencoded, "w") as json_file:
47
+ json.dump(mdl_dict, json_file, indent=4)
48
+
49
+ # test copy constructors
50
+ ckan_copy = ckan.copy()
51
+ mdl_copy = mdl.copy()
52
+ mdl_copy_dict = mdl_copy.to_dict(base_dir=base_dir)
53
+ assert(mdl_dict == mdl_copy_dict)
54
+
55
+ print("Tests done.")
56
+
57
+
@@ -0,0 +1,74 @@
1
+ <svg width="236" height="50" viewBox="0 0 236 50" fill="none" xmlns="http://www.w3.org/2000/svg">
2
+ <path d="M228.17 41H199.236L196 34.7557L196.071 34.6044L201.995 22H230.902L234.129 28.2621L228.17 41ZM199.641 40.3324H227.742L233.386 28.2799L230.497 22.6721H202.422L196.743 34.7423L199.641 40.3324Z" fill="#2C269A"/>
3
+ <path d="M228.17 41H199.236L196 34.7557L196.071 34.6044L201.995 22H230.902L234.129 28.2621L228.17 41ZM199.641 40.3324H227.742L233.386 28.2799L230.497 22.6721H202.422L196.743 34.7423L199.641 40.3324Z" fill="#82DBE1"/>
4
+ <path d="M201.996 22H230.498L233.387 28.2755L227.743 40.3324H199.642L196.744 34.7423L201.996 22Z" fill="#82DBE1"/>
5
+ <path d="M203.482 28.0137H205.312C207.154 28.0137 208.659 29.5091 208.659 31.3606C208.659 33.2121 207.154 34.7075 205.312 34.7075H203.482V28.0137ZM205.303 33.9954C206.789 33.9954 207.946 32.8694 207.946 31.3606C207.946 29.8518 206.789 28.7258 205.303 28.7258H204.186V33.9909H205.303V33.9954Z" fill="#2C269A"/>
6
+ <path d="M214.289 29.8741C214.289 30.9111 213.461 31.7255 212.424 31.7255H210.738V34.7075H210.025V28.0137H212.424C213.461 28.0137 214.289 28.8504 214.289 29.8741ZM213.577 29.8874C213.577 29.1753 213.03 28.7035 212.362 28.7035H210.742V31.049H212.362C213.03 31.049 213.577 30.6218 213.577 29.8874Z" fill="#2C269A"/>
7
+ <path d="M221.934 31.1595V31.3598C221.934 33.2558 220.394 34.7957 218.498 34.7957C216.589 34.7957 215.062 33.2558 215.062 31.3598C215.062 29.4638 216.594 27.9238 218.498 27.9238C219.78 27.9238 220.897 28.627 221.476 29.664L220.862 30.0112C220.403 29.1834 219.513 28.6404 218.498 28.6404C216.99 28.6404 215.775 29.8465 215.775 31.3642C215.775 32.8819 216.99 34.088 218.498 34.088C219.847 34.088 220.964 33.1267 221.178 31.8315H218.49V31.1639H221.934V31.1595Z" fill="#2C269A"/>
8
+ <path d="M234.319 26.0703H225.453V34.9361H234.319V26.0703Z" fill="#2C269A"/>
9
+ <path d="M235.922 28.847L228.244 24.4141L223.811 32.0918L231.489 36.5247L235.922 28.847Z" fill="#2C269A"/>
10
+ <path d="M231.49 24.4128L223.812 28.8457L228.245 36.5235L235.923 32.0906L231.49 24.4128Z" fill="#2C269A"/>
11
+ <path d="M227.643 30.5719L229.325 32.2542L232.601 28.9785" stroke="white" stroke-miterlimit="10"/>
12
+ <path d="M87.9128 10.8496L65.283 45.6255L27.2637 34.0335L87.9128 10.8496Z" fill="#D4E5F1"/>
13
+ <path d="M34.4015 49.5221L27.2637 34.2207L65.378 45.5987L34.4015 49.5221Z" fill="#CADBE9"/>
14
+ <path d="M0 8.3457L27.2643 33.9402L19.8699 38.9485L0 8.3457Z" fill="#ABC1D7"/>
15
+ <path d="M34.2187 49.5221L27.0459 34.2207L19.752 39.1676L34.2187 49.5221Z" fill="#95B0C4"/>
16
+ <path d="M22.8125 30.1048L38.9485 0L36.4491 30.6257L27.0973 34.2195L22.8125 30.1048Z" fill="#EAE735"/>
17
+ <path d="M55.9198 23.1603L38.978 0L36.4453 30.6028L55.9198 23.1603Z" fill="#ED5248"/>
18
+ <path d="M87.9137 10.8496L53.416 19.5607L56.0418 23.0907L87.9137 10.8496Z" fill="#DEEAF5"/>
19
+ <path d="M30.3246 15.9509L0 8.3457L22.6739 29.7676L30.3246 15.9509Z" fill="#E0E9F5"/>
20
+ <path opacity="0.1" d="M26.4659 15.0215L22.8125 29.7665L30.3241 15.9988L26.4659 15.0215Z" fill="black"/>
21
+ <path opacity="0.1" d="M54.8327 19.1953L55.9199 23.0902L53.416 19.5638L54.8327 19.1953Z" fill="black"/>
22
+ <path opacity="0.1" d="M38.9408 0L40.3402 29.0935L36.4453 30.6028L38.9408 0Z" fill="black"/>
23
+ <path opacity="0.1" d="M34.498 31.1592L38.9494 0L36.4445 30.4177L34.498 31.1592Z" fill="black"/>
24
+ <path d="M26.8979 34.219L0 8.58703L0.224005 8.3457L26.9668 33.8225L87.7928 10.8624L87.9134 11.1726L26.8979 34.219Z" fill="white"/>
25
+ <path d="M98.2051 46.461C99.5465 46.2935 100.718 45.9583 101.686 45.4219C103.095 44.6678 104.335 43.6622 105.438 42.4555L102.416 39.5059L98.2051 46.461Z" fill="url(#paint0_linear_1251_237)"/>
26
+ <path d="M102.098 30.6036L105.442 27.0822C104.345 25.8909 103.051 24.9449 101.595 24.209C101.145 23.9813 100.66 23.8061 100.156 23.6484L101.9 30.3759C101.954 30.4635 102.026 30.5335 102.098 30.6036Z" fill="url(#paint1_linear_1251_237)"/>
27
+ <path d="M122.382 32.8567L131.314 23.6484H125.061L115.734 33.6687V39.7844L118.878 36.5019L122.382 32.8567Z" fill="url(#paint2_linear_1251_237)"/>
28
+ <path d="M115.733 31.605V15.0215H110.447V45.9025H115.733V42.9678V39.5044V33.4647V31.605Z" fill="url(#paint3_linear_1251_237)"/>
29
+ <path d="M118.793 36.3835L125.45 45.9039H131.312L122.24 32.8281L118.793 36.3835Z" fill="url(#paint4_linear_1251_237)"/>
30
+ <path d="M143.858 23.0918C141.999 23.0918 140.401 23.2635 139.08 23.6068C137.761 23.9501 136.474 24.3964 135.207 24.9628L136.631 29.0826C137.657 28.6534 138.681 28.3101 139.689 28.0699C140.697 27.8124 141.843 27.6922 143.128 27.6922C144.935 27.6922 146.325 28.1041 147.28 28.9282C148.236 29.7521 148.722 30.9365 148.722 32.4985V33.0478V36.3436C148.757 36.3608 148.774 36.3608 148.809 36.378V37.7855C148.809 38.0945 148.774 38.3863 148.722 38.661V45.9048H153.847V32.5843C153.847 29.5804 153.031 27.2458 151.397 25.5809C149.765 23.9158 147.245 23.0918 143.858 23.0918Z" fill="#414242"/>
31
+ <path d="M148.203 39.8615C147.912 40.4427 147.484 40.9385 146.952 41.3487C146.421 41.759 145.787 42.0838 145.068 42.306C144.348 42.5283 143.56 42.648 142.704 42.648C141.487 42.648 140.458 42.3573 139.619 41.7932C138.779 41.229 138.368 40.4085 138.368 39.3316V39.246C138.368 38.0836 138.814 37.1948 139.722 36.5793C140.63 35.9639 141.881 35.6391 143.508 35.6391C144.502 35.6391 145.445 35.7246 146.336 35.8955C147.192 36.0493 147.929 36.2544 148.563 36.4938V33.2117C147.689 32.9382 146.764 32.6988 145.822 32.5278C144.879 32.3569 143.714 32.2715 142.36 32.2715C141.058 32.2715 139.841 32.4253 138.728 32.7159C137.614 33.0066 136.655 33.4509 135.832 34.0493C135.027 34.6476 134.393 35.3998 133.947 36.2887C133.502 37.1948 133.262 38.2546 133.262 39.4683V39.5538C133.262 40.682 133.484 41.6735 133.914 42.5453C134.359 43.4001 134.924 44.1352 135.661 44.6992C136.38 45.2805 137.22 45.7078 138.18 46.0155C139.139 46.3061 140.167 46.46 141.23 46.46C142.926 46.46 144.365 46.1523 145.565 45.5198C146.747 44.9044 147.723 44.1352 148.495 43.2291V45.9814H148.528V38.7674C148.495 39.1776 148.374 39.5367 148.203 39.8615Z" fill="url(#paint5_linear_1251_237)"/>
32
+ <path d="M177.918 25.4141C176.521 23.8602 174.557 23.0918 171.991 23.0918C171.112 23.0918 170.319 23.1943 169.63 23.4162C168.941 23.6383 168.304 23.9285 167.718 24.2871C167.132 24.6457 166.632 25.0726 166.185 25.5507C165.736 26.0289 165.34 26.524 164.979 27.0704V33.3201C164.979 31.5784 165.444 30.2123 166.375 29.2391C167.304 28.2657 168.511 27.7876 170.009 27.7876C171.526 27.7876 172.714 28.2657 173.541 29.2049C174.369 30.144 174.799 31.4931 174.799 33.2347V45.9048H180.002V31.6809C180.002 29.0512 179.313 26.968 177.918 25.4141Z" fill="url(#paint6_linear_1251_237)"/>
33
+ <path d="M164.976 23.6484H159.969V45.905H164.976V35.7374V33.374V27.134V23.6484Z" fill="url(#paint7_linear_1251_237)"/>
34
+ <path d="M91.6826 29.7369C92.2576 29.081 92.9545 28.5632 93.7559 28.1834C94.5573 27.8038 95.4459 27.6139 96.3866 27.6139C97.7108 27.6139 98.8258 27.8728 99.7318 28.4079C100.568 28.8911 101.352 29.4952 102.101 30.2374L100.411 23.6095C99.2614 23.2643 97.9372 23.0918 96.4389 23.0918C94.7315 23.0918 93.1635 23.4024 91.7 24.0411C90.2366 24.6624 88.9821 25.5254 87.9368 26.6128C86.874 27.7001 86.0551 28.9602 85.4629 30.3755C84.8705 31.808 84.5742 33.3269 84.5742 34.9494C84.5742 36.5718 84.8705 38.0907 85.4629 39.506L90.4107 31.877C90.7244 31.0486 91.1425 30.3582 91.6826 29.7369Z" fill="#414242"/>
35
+ <path d="M102.346 39.4096C101.519 40.2355 100.641 40.9065 99.7276 41.4399C98.7972 41.9732 97.729 42.2312 96.5057 42.2312C95.5064 42.2312 94.5933 42.0421 93.7663 41.6635C92.9392 41.285 92.2329 40.7689 91.6299 40.115C91.0269 39.4612 90.5617 38.687 90.2343 37.8095C89.9069 36.932 89.7347 35.9685 89.7347 34.9534C89.7347 33.9555 89.8897 33.0264 90.2343 32.1661C90.2515 32.0973 90.286 32.0457 90.3032 31.9941L85.4102 39.5988C85.996 41.0097 86.8229 42.2485 87.8567 43.3153C88.9076 44.3819 90.1482 45.225 91.561 45.8272C92.9909 46.4466 94.5415 46.7391 96.2128 46.7391C96.8847 46.7391 97.5049 46.7046 98.1079 46.6187L102.381 39.4784L102.346 39.4096Z" fill="url(#paint8_linear_1251_237)"/>
36
+ <defs>
37
+ <linearGradient id="paint0_linear_1251_237" x1="98.2042" y1="42.9851" x2="105.433" y2="42.9851" gradientUnits="userSpaceOnUse">
38
+ <stop stop-color="#999999"/>
39
+ <stop offset="1"/>
40
+ </linearGradient>
41
+ <linearGradient id="paint1_linear_1251_237" x1="100.138" y1="27.1344" x2="105.438" y2="27.1344" gradientUnits="userSpaceOnUse">
42
+ <stop stop-color="#999999"/>
43
+ <stop offset="1"/>
44
+ </linearGradient>
45
+ <linearGradient id="paint2_linear_1251_237" x1="112.012" y1="40.1492" x2="126.924" y2="25.3243" gradientUnits="userSpaceOnUse">
46
+ <stop stop-color="#999999"/>
47
+ <stop offset="1"/>
48
+ </linearGradient>
49
+ <linearGradient id="paint3_linear_1251_237" x1="113.092" y1="5.02253" x2="113.092" y2="39.0676" gradientUnits="userSpaceOnUse">
50
+ <stop stop-color="#999999"/>
51
+ <stop offset="1"/>
52
+ </linearGradient>
53
+ <linearGradient id="paint4_linear_1251_237" x1="116.49" y1="27.8655" x2="128.674" y2="46.8393" gradientUnits="userSpaceOnUse">
54
+ <stop stop-color="#999999"/>
55
+ <stop offset="1"/>
56
+ </linearGradient>
57
+ <linearGradient id="paint5_linear_1251_237" x1="154.218" y1="39.3965" x2="139.738" y2="39.3965" gradientUnits="userSpaceOnUse">
58
+ <stop stop-color="#999999"/>
59
+ <stop offset="1"/>
60
+ </linearGradient>
61
+ <linearGradient id="paint6_linear_1251_237" x1="159.59" y1="34.4929" x2="177.454" y2="34.4929" gradientUnits="userSpaceOnUse">
62
+ <stop stop-color="#999999"/>
63
+ <stop offset="1"/>
64
+ </linearGradient>
65
+ <linearGradient id="paint7_linear_1251_237" x1="162.475" y1="16.2046" x2="162.475" y2="47.9144" gradientUnits="userSpaceOnUse">
66
+ <stop stop-color="#999999"/>
67
+ <stop offset="1"/>
68
+ </linearGradient>
69
+ <linearGradient id="paint8_linear_1251_237" x1="93.2522" y1="30.0485" x2="94.5713" y2="49.2974" gradientUnits="userSpaceOnUse">
70
+ <stop stop-color="#999999"/>
71
+ <stop offset="1"/>
72
+ </linearGradient>
73
+ </defs>
74
+ </svg>
@@ -0,0 +1,3 @@
1
+ user_id,age
2
+ 1,30
3
+ 2,80
@@ -0,0 +1,93 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Code to upload metadata to the CKAN server to create/update an existing package
5
+ The metadata is defined by the user in an Excel worksheet
6
+ This file implements functions to convert formats between database and local files.
7
+ """
8
+ from typing import Dict, List, Callable, Any, Tuple, Union, Set
9
+ import copy
10
+
11
+ import pandas as pd
12
+
13
+ from ckanapi_harvesters.builder.builder_errors import MissingCodeFileError
14
+ from ckanapi_harvesters.auxiliary.external_code_import import PythonUserCode
15
+ from ckanapi_harvesters.auxiliary.list_records import ListRecords
16
+
17
+
18
+ def simple_upload_fun(df_local: pd.DataFrame) -> None:
19
+ for field in df_local.columns:
20
+ if df_local[field].dtype == pd.Timestamp:
21
+ df_local[field] = df_local[field].apply(pd.Timestamp.isoformat) # ISO-8601 format
22
+
23
+
24
+ class DataSchemeConversion:
25
+ def __init__(self, *, df_upload_fun:Callable[[pd.DataFrame], Any] = None,
26
+ df_download_fun:Callable[[pd.DataFrame], Any] = None):
27
+ """
28
+ Class to convert between local data formats and database formats
29
+
30
+ :param df_upload_fun:
31
+ :param df_download_fun:
32
+ """
33
+ self.df_upload_fun:Union[Callable[[Any, Any], Union[ListRecords, pd.DataFrame]], None] = df_upload_fun
34
+ self.df_download_fun:Union[Callable[[pd.DataFrame, Any], pd.DataFrame], None] = df_download_fun
35
+
36
+ def copy(self):
37
+ return copy.deepcopy(self)
38
+
39
+ def df_upload_alter(self, df_local: Union[pd.DataFrame, Any], file_name:str=None, mapper_kwargs:dict=None, **kwargs) -> pd.DataFrame:
40
+ """
41
+ Apply used-defined df_upload_fun if present
42
+
43
+ :param df_local: the dataframe to upload
44
+ :return: the dataframe ready for upload, converted in the format of the database
45
+ """
46
+ if mapper_kwargs is None: mapper_kwargs = {}
47
+ mapper_kwargs["file_name"] = file_name
48
+ if file_name is not None and isinstance(df_local, pd.DataFrame):
49
+ df_local.attrs["source"] = file_name
50
+ df_database = df_local
51
+ if self.df_upload_fun is not None:
52
+ # df_database = df_local.copy() # unnecessary copy
53
+ df_upload_fun = self.df_upload_fun
54
+ df_database = df_upload_fun(df_database, **mapper_kwargs, **kwargs)
55
+ if not isinstance(df_database, pd.DataFrame):
56
+ if isinstance(df_database, ListRecords):
57
+ pass # also accept ListRecords (List[dict])
58
+ elif self.df_upload_fun is None:
59
+ raise TypeError("No upload function was defined to convert the data format to a DataFrame")
60
+ else:
61
+ raise TypeError("df_upload_fun must return a DataFrame")
62
+ return df_database
63
+
64
+ def df_download_alter(self, df_database:pd.DataFrame, file_query:dict=None, mapper_kwargs:dict=None, **kwargs) -> pd.DataFrame:
65
+ """
66
+ Apply used-defined df_download_fun if present.
67
+ df_download_fun should be the reverse function of df_upload_fun
68
+
69
+ :param df_database: the downloaded dataframe from the database
70
+ :return: the dataframe ready to save, converted in the local format
71
+ """
72
+ if mapper_kwargs is None: mapper_kwargs = {}
73
+ mapper_kwargs["file_query"] = file_query
74
+ if file_query is not None:
75
+ df_database.attrs["query"] = file_query
76
+ df_local = df_database
77
+ if self.df_download_fun is not None:
78
+ # df_local = df_database.copy() # unnecessary copy
79
+ df_download_fun = self.df_download_fun
80
+ df_local = df_download_fun(df_local, **mapper_kwargs, **kwargs)
81
+ return df_local
82
+
83
+ def _connect_aux_functions(self, module: PythonUserCode, aux_upload_fun_name:str, aux_download_fun_name:str) -> None:
84
+ if (aux_upload_fun_name or aux_download_fun_name) and module is None:
85
+ raise MissingCodeFileError()
86
+ if aux_upload_fun_name:
87
+ self.df_upload_fun = module.function_pointer(aux_upload_fun_name)
88
+ if aux_download_fun_name:
89
+ self.df_download_fun = module.function_pointer(aux_download_fun_name)
90
+
91
+ def get_necessary_fields(self) -> Set[str]:
92
+ return set()
93
+