ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanapi_harvesters/__init__.py +32 -10
- ckanapi_harvesters/auxiliary/__init__.py +26 -0
- ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
- ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
- ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
- ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
- ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
- ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
- ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
- ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
- ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
- ckanapi_harvesters/auxiliary/deprecated.py +82 -0
- ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
- ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
- ckanapi_harvesters/auxiliary/list_records.py +60 -0
- ckanapi_harvesters/auxiliary/login.py +163 -0
- ckanapi_harvesters/auxiliary/path.py +208 -0
- ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
- ckanapi_harvesters/auxiliary/urls.py +40 -0
- ckanapi_harvesters/builder/__init__.py +40 -0
- ckanapi_harvesters/builder/builder_aux.py +20 -0
- ckanapi_harvesters/builder/builder_ckan.py +238 -0
- ckanapi_harvesters/builder/builder_errors.py +36 -0
- ckanapi_harvesters/builder/builder_field.py +122 -0
- ckanapi_harvesters/builder/builder_package.py +9 -0
- ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
- ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
- ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
- ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
- ckanapi_harvesters/builder/builder_resource.py +589 -0
- ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
- ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
- ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
- ckanapi_harvesters/builder/builder_resource_init.py +126 -0
- ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
- ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
- ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
- ckanapi_harvesters/builder/example/__init__.py +21 -0
- ckanapi_harvesters/builder/example/builder_example.py +21 -0
- ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
- ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
- ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
- ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
- ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
- ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
- ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
- ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
- ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
- ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
- ckanapi_harvesters/builder/mapper_datastore.py +93 -0
- ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
- ckanapi_harvesters/builder/specific/__init__.py +11 -0
- ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
- ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
- ckanapi_harvesters/ckan_api/__init__.py +20 -0
- ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
- ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
- ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
- ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
- ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
- ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
- ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
- ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
- ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
- ckanapi_harvesters/harvesters/__init__.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
- ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
- ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
- ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
- ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
- ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
- ckanapi_harvesters/harvesters/harvester_init.py +30 -0
- ckanapi_harvesters/harvesters/harvester_model.py +49 -0
- ckanapi_harvesters/harvesters/harvester_params.py +323 -0
- ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
- ckanapi_harvesters/harvesters/postgre_params.py +86 -0
- ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
- ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
- ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
- ckanapi_harvesters/policies/__init__.py +20 -0
- ckanapi_harvesters/policies/data_format_policy.py +269 -0
- ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
- ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
- ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
- ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
- ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
- ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
- ckanapi_harvesters/reports/__init__.py +11 -0
- ckanapi_harvesters/reports/admin_report.py +292 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
- ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
- ckanapi_harvesters/divider/__init__.py +0 -27
- ckanapi_harvesters/divider/divider.py +0 -53
- ckanapi_harvesters/divider/divider_error.py +0 -59
- ckanapi_harvesters/main.py +0 -30
- ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
CKAN tag vocabulary information
|
|
5
|
+
"""
|
|
6
|
+
from typing import List, Dict, Union
|
|
7
|
+
import copy
|
|
8
|
+
|
|
9
|
+
from ckanapi_harvesters.auxiliary.ckan_model import CkanTagInfo
|
|
10
|
+
from ckanapi_harvesters.auxiliary.ckan_map import CkanMapABC
|
|
11
|
+
from ckanapi_harvesters.auxiliary.ckan_errors import NotMappedObjectNameError
|
|
12
|
+
|
|
13
|
+
class CkanTagVocabularyInfo:
|
|
14
|
+
def __init__(self, d: dict):
|
|
15
|
+
self.vocabulary_name: str = d["name"]
|
|
16
|
+
self.tags: Dict[str, CkanTagInfo] = {tag_dict["name"]: CkanTagInfo.from_dict(tag_dict) for tag_dict in d["tags"]}
|
|
17
|
+
self.id: str = d["id"]
|
|
18
|
+
self.details:dict = d
|
|
19
|
+
|
|
20
|
+
def __str__(self):
|
|
21
|
+
return f"Vocabulary '{self.vocabulary_name}' ({self.id})"
|
|
22
|
+
|
|
23
|
+
def to_dict(self, include_details:bool=True) -> dict:
|
|
24
|
+
d = dict()
|
|
25
|
+
if self.details is not None and include_details:
|
|
26
|
+
d.update(self.details)
|
|
27
|
+
d.update({"id": self.id, "name": self.vocabulary_name,
|
|
28
|
+
"tags": [tag_info.to_dict() for tag_info in self.tags.values()]})
|
|
29
|
+
return d
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def from_dict(d:dict) -> "CkanTagVocabularyInfo":
|
|
33
|
+
return CkanTagVocabularyInfo(d)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class CkanVocabularyMap(CkanMapABC):
|
|
37
|
+
def __init__(self):
|
|
38
|
+
self.vocabularies: Dict[str, CkanTagVocabularyInfo] = {} # id -> info
|
|
39
|
+
self.vocabulary_id_index: Dict[str, str] = {} # name -> id
|
|
40
|
+
self.vocabularies_listed: bool = False
|
|
41
|
+
self._mapping_query_vocabulary_list: bool = True
|
|
42
|
+
|
|
43
|
+
def purge(self):
|
|
44
|
+
self.vocabularies = None
|
|
45
|
+
self.vocabulary_id_index = None
|
|
46
|
+
self.vocabularies_listed = False
|
|
47
|
+
|
|
48
|
+
def copy(self) -> "CkanVocabularyMap":
|
|
49
|
+
return copy.deepcopy(self)
|
|
50
|
+
|
|
51
|
+
def to_dict(self) -> dict:
|
|
52
|
+
return {"vocabularies":[vocabulary_info.to_dict() for vocabulary_info in self.vocabularies.values()],
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
def update_from_dict(self, data:dict) -> None:
|
|
56
|
+
for vocabulary_dict in data["packages"]:
|
|
57
|
+
self._update_vocabulary_info(CkanTagVocabularyInfo.from_dict(vocabulary_dict))
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def from_dict(d: dict) -> "CkanVocabularyMap":
|
|
61
|
+
map = CkanVocabularyMap()
|
|
62
|
+
map.update_from_dict(d)
|
|
63
|
+
return map
|
|
64
|
+
|
|
65
|
+
## Vocabulary functions
|
|
66
|
+
def get_vocabulary_id(self, vocabulary_name:str, *, error_not_mapped:bool=True, search_title:bool=True) -> Union[str,None]:
|
|
67
|
+
"""
|
|
68
|
+
Retrieve the vocabulary id for a given vocabulary name based on the vocabulary map.
|
|
69
|
+
|
|
70
|
+
:param vocabulary_name: vocabulary name or id.
|
|
71
|
+
:return:
|
|
72
|
+
"""
|
|
73
|
+
if vocabulary_name is None:
|
|
74
|
+
raise ValueError("vocabulary_name cannot be None")
|
|
75
|
+
if vocabulary_name in self.vocabularies.keys():
|
|
76
|
+
# recognized vocabulary_id
|
|
77
|
+
vocabulary_id = vocabulary_name
|
|
78
|
+
elif vocabulary_name in self.vocabulary_id_index.keys():
|
|
79
|
+
vocabulary_id = self.vocabulary_id_index[vocabulary_name]
|
|
80
|
+
elif error_not_mapped:
|
|
81
|
+
raise NotMappedObjectNameError(f"Vocabulary {vocabulary_name} is not mapped or does not exist.")
|
|
82
|
+
else:
|
|
83
|
+
vocabulary_id = None
|
|
84
|
+
return vocabulary_id
|
|
85
|
+
|
|
86
|
+
def _update_vocabulary_info(self, vocabulary_info:Union[CkanTagVocabularyInfo, List[CkanTagVocabularyInfo]],
|
|
87
|
+
vocabularies_listed:bool=False) -> None:
|
|
88
|
+
"""
|
|
89
|
+
Internal function to update the information of a vocabulary.
|
|
90
|
+
"""
|
|
91
|
+
if not(isinstance(vocabulary_info, list)):
|
|
92
|
+
vocabulary_info = [vocabulary_info]
|
|
93
|
+
self.vocabularies.update({vocab_info.id: vocab_info for vocab_info in vocabulary_info})
|
|
94
|
+
self.vocabulary_id_index.update({vocab_info.vocabulary_name: vocab_info.id for vocab_info in vocabulary_info})
|
|
95
|
+
if vocabularies_listed:
|
|
96
|
+
self.vocabularies_listed = True
|
|
97
|
+
|
|
98
|
+
def _record_vocabulary_delete(self, vocabulary_id: str) -> None:
|
|
99
|
+
# only pass in delete state
|
|
100
|
+
vocabulary_info = self.vocabularies[vocabulary_id]
|
|
101
|
+
self.vocabulary_id_index.pop(vocabulary_info.vocabulary_name)
|
|
102
|
+
self.vocabularies.pop(vocabulary_id)
|
|
103
|
+
|
|
104
|
+
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Dead code from auxiliary functions
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from enum import IntEnum
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CkanBasicDataFieldType(IntEnum):
|
|
13
|
+
Default = 0 # no override
|
|
14
|
+
Text = 1
|
|
15
|
+
Numeric = 2
|
|
16
|
+
TimeStamp = 3
|
|
17
|
+
|
|
18
|
+
def __str__(self):
|
|
19
|
+
if self == CkanBasicDataFieldType.Default:
|
|
20
|
+
return ""
|
|
21
|
+
else:
|
|
22
|
+
return self.name.lower()
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def from_str(s):
|
|
26
|
+
s = s.lower().strip()
|
|
27
|
+
if s == "text":
|
|
28
|
+
return CkanBasicDataFieldType.Text
|
|
29
|
+
elif s == "numeric":
|
|
30
|
+
return CkanBasicDataFieldType.Numeric
|
|
31
|
+
elif s == "timestamp":
|
|
32
|
+
return CkanBasicDataFieldType.TimeStamp
|
|
33
|
+
elif s == "" or np.isnan(s):
|
|
34
|
+
return CkanBasicDataFieldType.Default
|
|
35
|
+
else:
|
|
36
|
+
raise ValueError(s)
|
|
37
|
+
|
|
38
|
+
class CkanCollaboratorCapacity(IntEnum):
|
|
39
|
+
"""
|
|
40
|
+
Collaboration capacities of users associated to a package/dataset
|
|
41
|
+
"""
|
|
42
|
+
Excluded = 0
|
|
43
|
+
Member = 1
|
|
44
|
+
Editor = 2
|
|
45
|
+
|
|
46
|
+
def __str__(self):
|
|
47
|
+
return self.name.lower()
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def from_str(s):
|
|
51
|
+
s = s.lower().strip()
|
|
52
|
+
if s == "excluded":
|
|
53
|
+
return CkanCollaboratorCapacity.Excluded
|
|
54
|
+
elif s == "member":
|
|
55
|
+
return CkanCollaboratorCapacity.Member
|
|
56
|
+
elif s == "editor":
|
|
57
|
+
return CkanCollaboratorCapacity.Editor
|
|
58
|
+
else:
|
|
59
|
+
raise ValueError(s)
|
|
60
|
+
|
|
61
|
+
class CkanGroupCapacity(IntEnum):
|
|
62
|
+
"""
|
|
63
|
+
Capacities of users in a group
|
|
64
|
+
"""
|
|
65
|
+
Excluded = 0
|
|
66
|
+
Member = 1
|
|
67
|
+
Admin = 3
|
|
68
|
+
|
|
69
|
+
def __str__(self):
|
|
70
|
+
return self.name.lower()
|
|
71
|
+
|
|
72
|
+
@staticmethod
|
|
73
|
+
def from_str(s):
|
|
74
|
+
s = s.lower().strip()
|
|
75
|
+
if s == "excluded":
|
|
76
|
+
return CkanGroupCapacity.Excluded
|
|
77
|
+
elif s == "member":
|
|
78
|
+
return CkanGroupCapacity.Member
|
|
79
|
+
elif s == "admin":
|
|
80
|
+
return CkanGroupCapacity.Admin
|
|
81
|
+
else:
|
|
82
|
+
raise ValueError(s)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Functions to define messages with an error level
|
|
5
|
+
"""
|
|
6
|
+
from enum import IntEnum
|
|
7
|
+
from collections import OrderedDict
|
|
8
|
+
from warnings import warn
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ErrorLevel(IntEnum):
|
|
12
|
+
Information = 0
|
|
13
|
+
Warning = 1
|
|
14
|
+
Error = 2
|
|
15
|
+
|
|
16
|
+
def __str__(self):
|
|
17
|
+
return self.name.lower()
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def from_str(s):
|
|
21
|
+
s = s.lower().strip()
|
|
22
|
+
if s == "information":
|
|
23
|
+
return ErrorLevel.Information
|
|
24
|
+
elif s == "warning":
|
|
25
|
+
return ErrorLevel.Warning
|
|
26
|
+
elif s == "error":
|
|
27
|
+
return ErrorLevel.Error
|
|
28
|
+
else:
|
|
29
|
+
raise ValueError(s)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ErrorLevelMessage(Exception):
|
|
33
|
+
def __init__(self, error_level:ErrorLevel, message: str):
|
|
34
|
+
super().__init__(message)
|
|
35
|
+
self.error_level: ErrorLevel = error_level
|
|
36
|
+
self.message: str = message
|
|
37
|
+
|
|
38
|
+
def to_dict(self) -> dict:
|
|
39
|
+
return OrderedDict([
|
|
40
|
+
("level", str(self.error_level)),
|
|
41
|
+
("message", self.message),
|
|
42
|
+
])
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ContextErrorLevelMessage(ErrorLevelMessage):
|
|
46
|
+
def __init__(self, context:str, error_level:ErrorLevel, specific_message: str):
|
|
47
|
+
message = f"In {context} / {error_level.name}: {specific_message}"
|
|
48
|
+
super().__init__(error_level, message)
|
|
49
|
+
self.context: str = context
|
|
50
|
+
self.specific_message: str = specific_message
|
|
51
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
This implements functionality to dynamically call functions specified by the user.
|
|
5
|
+
This functionality is disabled by default. You must call unlock_external_code_execution to enable external code execution.
|
|
6
|
+
__Warning__:
|
|
7
|
+
only run code if you trust the source!
|
|
8
|
+
"""
|
|
9
|
+
from typing import Callable
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
from warnings import warn
|
|
13
|
+
import importlib
|
|
14
|
+
import importlib.util
|
|
15
|
+
|
|
16
|
+
from ckanapi_harvesters.auxiliary.path import path_rel_to_dir
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def unlock_external_code_execution(value:bool=True) -> None:
|
|
21
|
+
"""
|
|
22
|
+
This function enables external code execution for the PythonUserCode class.
|
|
23
|
+
|
|
24
|
+
__Warning__:
|
|
25
|
+
only run code if you trust the source!
|
|
26
|
+
|
|
27
|
+
:return:
|
|
28
|
+
"""
|
|
29
|
+
PythonUserCode.enable_external_code = value
|
|
30
|
+
if value:
|
|
31
|
+
msg = "External code is enabled. Only run code if you trust the source!"
|
|
32
|
+
warn(msg)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
var_name_subs_re = '\W|^(?=\d)'
|
|
36
|
+
|
|
37
|
+
def clean_var_name(variable_name: str) -> str:
|
|
38
|
+
return re.sub(var_name_subs_re,'_', variable_name)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ExternalUserCodeDisabledException(Exception):
|
|
42
|
+
def __init__(self, function_name:str, source_file:str) -> None:
|
|
43
|
+
super().__init__(f"{function_name} in {source_file} cannot be executed because the external code execution is locked. Use unlock_external_code_execution to unlock. Warning: Only run code if you trust the source!")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class PythonUserCode:
|
|
47
|
+
"""
|
|
48
|
+
This class imports an arbitrary Python file as a module and makes it available to the rest of the code.
|
|
49
|
+
This functionality is disabled by default. You must call unlock_external_code_execution to enable external code execution.
|
|
50
|
+
|
|
51
|
+
__Warning__:
|
|
52
|
+
only run code if you trust the source!
|
|
53
|
+
"""
|
|
54
|
+
enable_external_code = False # remain False to ensure no custom code is executed from the builder specification
|
|
55
|
+
|
|
56
|
+
def __init__(self, python_file:str, base_dir:str=None):
|
|
57
|
+
self.python_file: str = ""
|
|
58
|
+
if python_file is None:
|
|
59
|
+
# only use None argument to initialize copy
|
|
60
|
+
self.python_file = ""
|
|
61
|
+
else:
|
|
62
|
+
self.python_file = path_rel_to_dir(python_file, base_dir=base_dir)
|
|
63
|
+
self.module = None
|
|
64
|
+
self.imported_module_name: str = ""
|
|
65
|
+
if PythonUserCode.enable_external_code and python_file is not None:
|
|
66
|
+
module_base = clean_var_name(os.path.splitext(os.path.split(self.python_file)[1])[0])
|
|
67
|
+
self.imported_module_name = "ckan_builder_aux_funcs__" + module_base
|
|
68
|
+
spec = importlib.util.spec_from_file_location(self.imported_module_name, self.python_file)
|
|
69
|
+
self.module = importlib.util.module_from_spec(spec)
|
|
70
|
+
spec.loader.exec_module(self.module)
|
|
71
|
+
elif python_file is not None: # and not enable_external_code
|
|
72
|
+
# External python script execution is locked
|
|
73
|
+
raise ExternalUserCodeDisabledException("code", os.path.split(self.python_file)[1])
|
|
74
|
+
|
|
75
|
+
def __copy__(self):
|
|
76
|
+
return self.copy()
|
|
77
|
+
|
|
78
|
+
def copy(self) -> "PythonUserCode":
|
|
79
|
+
# do not execute module twice and do not make copies either
|
|
80
|
+
dest = PythonUserCode(python_file=None, base_dir=None)
|
|
81
|
+
dest.module = self.module
|
|
82
|
+
dest.imported_module_name = self.imported_module_name
|
|
83
|
+
dest.python_file = self.python_file
|
|
84
|
+
return dest
|
|
85
|
+
|
|
86
|
+
def function_pointer(self, function_name:str) -> Callable:
|
|
87
|
+
"""
|
|
88
|
+
Obtain function pointer for a given name in the loaded Python module.
|
|
89
|
+
|
|
90
|
+
:param function_name:
|
|
91
|
+
:return:
|
|
92
|
+
"""
|
|
93
|
+
if not PythonUserCode.enable_external_code:
|
|
94
|
+
raise ExternalUserCodeDisabledException(function_name, os.path.split(self.python_file)[1])
|
|
95
|
+
fun = getattr(self.module, function_name)
|
|
96
|
+
assert(isinstance(fun, Callable))
|
|
97
|
+
return fun
|
|
98
|
+
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Give partial DataFrame behavior to a list of dictionaries
|
|
5
|
+
"""
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import numpy as np
|
|
8
|
+
from typing import Union, List
|
|
9
|
+
|
|
10
|
+
class _ListRecords_index:
|
|
11
|
+
def __init__(self, parent) -> None:
|
|
12
|
+
self.parent = parent
|
|
13
|
+
|
|
14
|
+
def __getitem__(self, slice):
|
|
15
|
+
return self.parent[slice]
|
|
16
|
+
|
|
17
|
+
# def __setitem__(self, slice, value):
|
|
18
|
+
# self.parent[slice] = value
|
|
19
|
+
|
|
20
|
+
class ListRecords(list): # List[dict]
|
|
21
|
+
"""
|
|
22
|
+
Give partial DataFrame behavior to a list of dictionaries
|
|
23
|
+
"""
|
|
24
|
+
def __init__(self,*args,**kwargs):
|
|
25
|
+
super().__init__(*args,**kwargs)
|
|
26
|
+
self.columns: Union[List[str],None] = None
|
|
27
|
+
if len(self) > 0:
|
|
28
|
+
self.columns = list(self[0].keys())
|
|
29
|
+
else:
|
|
30
|
+
self.columns = []
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def iloc(self):
|
|
34
|
+
return _ListRecords_index(self)
|
|
35
|
+
|
|
36
|
+
def records_to_df(records: Union[List[dict], ListRecords], df_args:dict=None, *,
|
|
37
|
+
missing_value="", none_value="None") -> pd.DataFrame:
|
|
38
|
+
"""
|
|
39
|
+
Keep source values (lesser type inference) and replace cells with missing keys with a fixed value.
|
|
40
|
+
None values are also preserved using the none_value.
|
|
41
|
+
|
|
42
|
+
:param records: input data
|
|
43
|
+
:param df_args: arguments to pass to DataFrame constructor
|
|
44
|
+
:param missing_value: value to set if a column is not specified on a row.
|
|
45
|
+
:param none_value: value to set if a value is None in the input data.
|
|
46
|
+
:return:
|
|
47
|
+
"""
|
|
48
|
+
if df_args is None:
|
|
49
|
+
df_args = {}
|
|
50
|
+
df = pd.DataFrame(records, dtype=object, **df_args)
|
|
51
|
+
fieldnames = df.columns
|
|
52
|
+
nrows = len(df)
|
|
53
|
+
# df.fillna(np.nan, inplace=True, downcast="object")
|
|
54
|
+
for (i, row), record in zip(df.iterrows(), records):
|
|
55
|
+
for field in fieldnames:
|
|
56
|
+
if field not in record.keys():
|
|
57
|
+
df.loc[i, field] = missing_value
|
|
58
|
+
elif record[field] is None:
|
|
59
|
+
df.loc[i, field] = none_value
|
|
60
|
+
return df
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Methods to load authentication credentials (user, password)
|
|
5
|
+
"""
|
|
6
|
+
from typing import Union, Tuple, Dict
|
|
7
|
+
import getpass
|
|
8
|
+
from warnings import warn
|
|
9
|
+
import os
|
|
10
|
+
import argparse
|
|
11
|
+
import shlex
|
|
12
|
+
|
|
13
|
+
from ckanapi_harvesters.auxiliary.path import path_rel_to_dir
|
|
14
|
+
from ckanapi_harvesters.auxiliary.ckan_defs import environ_keyword
|
|
15
|
+
from ckanapi_harvesters.auxiliary.ckan_errors import ApiKeyFileError
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Login:
|
|
19
|
+
def __init__(self, username:str=None, password:str=None, login_file:str=None):
|
|
20
|
+
self._username:Union[str,None] = username
|
|
21
|
+
self._password:Union[str,None] = password
|
|
22
|
+
self.login_file: str = login_file # path to a file containing a valid API key in the first line of text (optional)
|
|
23
|
+
|
|
24
|
+
def __del__(self):
|
|
25
|
+
self.clear()
|
|
26
|
+
|
|
27
|
+
def __copy__(self):
|
|
28
|
+
return self.copy()
|
|
29
|
+
|
|
30
|
+
def copy(self, *, dest=None):
|
|
31
|
+
if dest is None:
|
|
32
|
+
dest = Login()
|
|
33
|
+
dest.login_file = self.login_file
|
|
34
|
+
dest._username = self._username
|
|
35
|
+
dest._password = self._password
|
|
36
|
+
return dest
|
|
37
|
+
|
|
38
|
+
def clear(self) -> None:
|
|
39
|
+
self._username = None
|
|
40
|
+
self._password = None
|
|
41
|
+
|
|
42
|
+
def is_empty(self):
|
|
43
|
+
return self._username is None or self._password is None
|
|
44
|
+
|
|
45
|
+
def __str__(self):
|
|
46
|
+
if self.is_empty():
|
|
47
|
+
return "None"
|
|
48
|
+
elif self._username == "" and self._password == "":
|
|
49
|
+
return "<empty string>"
|
|
50
|
+
elif self._username == "":
|
|
51
|
+
return "<empty user>"
|
|
52
|
+
elif self._password == "":
|
|
53
|
+
return "<empty password>"
|
|
54
|
+
else:
|
|
55
|
+
return "*****"
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def username(self) -> Union[str,None]:
|
|
59
|
+
return self._username
|
|
60
|
+
@username.setter
|
|
61
|
+
def username(self, value:Union[str,None]):
|
|
62
|
+
self._username = value
|
|
63
|
+
@property
|
|
64
|
+
def password(self) -> Union[str,None]:
|
|
65
|
+
return self._password
|
|
66
|
+
@password.setter
|
|
67
|
+
def password(self, value:Union[str,None]):
|
|
68
|
+
self._password = value
|
|
69
|
+
|
|
70
|
+
def load_from_file(self, login_file:str=None, *, base_dir:str=None, error_not_found:bool=True) -> bool:
|
|
71
|
+
"""
|
|
72
|
+
Load the credentials from file.
|
|
73
|
+
The file should contain username in first line and password in second line.
|
|
74
|
+
|
|
75
|
+
:param login_file: path to the API key file. The following keywords are accepted:
|
|
76
|
+
- "environ": the API key will be looked up in the environment variable with load_from_environ
|
|
77
|
+
:param base_dir: base directory to find the API key file, if a relative path is provided
|
|
78
|
+
:param error_not_found: option to raise an exception if the API key file is not found
|
|
79
|
+
:return:
|
|
80
|
+
"""
|
|
81
|
+
if login_file is None:
|
|
82
|
+
login_file = self.login_file
|
|
83
|
+
login_file = path_rel_to_dir(login_file, base_dir=base_dir, keyword_exceptions={environ_keyword})
|
|
84
|
+
if login_file is None:
|
|
85
|
+
raise ApiKeyFileError('login_file is required')
|
|
86
|
+
api_keyword = login_file.strip().lower()
|
|
87
|
+
if api_keyword == environ_keyword:
|
|
88
|
+
return False # self.load_from_environ(error_not_found=error_not_found)
|
|
89
|
+
if not(os.path.isfile(login_file)) and not error_not_found:
|
|
90
|
+
msg = f"Login file does not exist: {login_file}"
|
|
91
|
+
warn(msg)
|
|
92
|
+
return False
|
|
93
|
+
with open(login_file, 'r') as f:
|
|
94
|
+
self.username = f.readline().strip()
|
|
95
|
+
self.password = f.readline().rstrip('\n')
|
|
96
|
+
f.close()
|
|
97
|
+
self.login_file = login_file
|
|
98
|
+
return True
|
|
99
|
+
|
|
100
|
+
def input(self):
|
|
101
|
+
"""
|
|
102
|
+
Prompt the user to input the login credentials in the console window.
|
|
103
|
+
|
|
104
|
+
:return:
|
|
105
|
+
"""
|
|
106
|
+
value = input("Please enter user name: ")
|
|
107
|
+
self.username = value
|
|
108
|
+
value = getpass.getpass("Please enter password: ")
|
|
109
|
+
self.password = value
|
|
110
|
+
|
|
111
|
+
@staticmethod
|
|
112
|
+
def _setup_cli_parser(parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
|
|
113
|
+
if parser is None:
|
|
114
|
+
parser = argparse.ArgumentParser(description="Login credentials initialization")
|
|
115
|
+
parser.add_argument("--login-file", type=str,
|
|
116
|
+
help="Path to a text file containing login credentials for authentification (user, password)")
|
|
117
|
+
return parser
|
|
118
|
+
|
|
119
|
+
def _cli_args_apply(self, args: argparse.Namespace, *, base_dir: str = None, error_not_found: bool = True) -> None:
|
|
120
|
+
if args.login_file is not None:
|
|
121
|
+
self.load_from_file(args.login_file, base_dir=base_dir, error_not_found=error_not_found)
|
|
122
|
+
|
|
123
|
+
def to_tuple(self) -> Tuple[str,str]:
|
|
124
|
+
return self.username, self.password
|
|
125
|
+
|
|
126
|
+
@staticmethod
|
|
127
|
+
def from_tuple(values: Tuple[str,str]) -> "Login":
|
|
128
|
+
login = Login(*values)
|
|
129
|
+
return login
|
|
130
|
+
|
|
131
|
+
def to_dict(self) -> Dict[str,str]:
|
|
132
|
+
return {"username": self.username, "password": self.password}
|
|
133
|
+
|
|
134
|
+
@staticmethod
|
|
135
|
+
def from_dict(values: Dict[str,str]) -> "Login":
|
|
136
|
+
login = Login(**values)
|
|
137
|
+
return login
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class SSHLogin(Login):
|
|
141
|
+
@staticmethod
|
|
142
|
+
def _setup_cli_parser(parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
|
|
143
|
+
if parser is None:
|
|
144
|
+
parser = argparse.ArgumentParser(description="SSH login credentials initialization")
|
|
145
|
+
parser.add_argument("--ssh-login-file", type=str,
|
|
146
|
+
help="Path to a text file containing SSH login credentials for authentification (user, password)")
|
|
147
|
+
return parser
|
|
148
|
+
|
|
149
|
+
def _cli_args_apply(self, args: argparse.Namespace, *, base_dir: str = None, error_not_found: bool = True) -> None:
|
|
150
|
+
if args.login_file is not None:
|
|
151
|
+
self.load_from_file(args.ssh_login_file, base_dir=base_dir, error_not_found=error_not_found)
|
|
152
|
+
|
|
153
|
+
def input(self):
|
|
154
|
+
"""
|
|
155
|
+
Prompt the user to input the login credentials in the console window.
|
|
156
|
+
|
|
157
|
+
:return:
|
|
158
|
+
"""
|
|
159
|
+
value = input("Please enter SSH user name: ")
|
|
160
|
+
self.username = value
|
|
161
|
+
value = getpass.getpass("Please enter SSH password: ")
|
|
162
|
+
self.password = value
|
|
163
|
+
|