ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. ckanapi_harvesters/__init__.py +32 -10
  2. ckanapi_harvesters/auxiliary/__init__.py +26 -0
  3. ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
  4. ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
  5. ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
  6. ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
  7. ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
  8. ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
  9. ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
  10. ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
  11. ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
  12. ckanapi_harvesters/auxiliary/deprecated.py +82 -0
  13. ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
  14. ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
  15. ckanapi_harvesters/auxiliary/list_records.py +60 -0
  16. ckanapi_harvesters/auxiliary/login.py +163 -0
  17. ckanapi_harvesters/auxiliary/path.py +208 -0
  18. ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
  19. ckanapi_harvesters/auxiliary/urls.py +40 -0
  20. ckanapi_harvesters/builder/__init__.py +40 -0
  21. ckanapi_harvesters/builder/builder_aux.py +20 -0
  22. ckanapi_harvesters/builder/builder_ckan.py +238 -0
  23. ckanapi_harvesters/builder/builder_errors.py +36 -0
  24. ckanapi_harvesters/builder/builder_field.py +122 -0
  25. ckanapi_harvesters/builder/builder_package.py +9 -0
  26. ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
  27. ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
  28. ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
  29. ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
  30. ckanapi_harvesters/builder/builder_resource.py +589 -0
  31. ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
  32. ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
  33. ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
  34. ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
  35. ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
  36. ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
  37. ckanapi_harvesters/builder/builder_resource_init.py +126 -0
  38. ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
  39. ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
  40. ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
  41. ckanapi_harvesters/builder/example/__init__.py +21 -0
  42. ckanapi_harvesters/builder/example/builder_example.py +21 -0
  43. ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
  44. ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
  45. ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
  46. ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
  47. ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
  48. ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
  49. ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
  50. ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
  51. ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
  52. ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
  53. ckanapi_harvesters/builder/mapper_datastore.py +93 -0
  54. ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
  55. ckanapi_harvesters/builder/specific/__init__.py +11 -0
  56. ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
  57. ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
  58. ckanapi_harvesters/ckan_api/__init__.py +20 -0
  59. ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
  60. ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
  61. ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
  62. ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
  63. ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
  64. ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
  65. ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
  66. ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
  67. ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
  68. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
  69. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
  70. ckanapi_harvesters/harvesters/__init__.py +23 -0
  71. ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
  72. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
  73. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
  74. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
  75. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
  76. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
  77. ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
  78. ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
  79. ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
  80. ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
  81. ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
  82. ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
  83. ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
  84. ckanapi_harvesters/harvesters/harvester_init.py +30 -0
  85. ckanapi_harvesters/harvesters/harvester_model.py +49 -0
  86. ckanapi_harvesters/harvesters/harvester_params.py +323 -0
  87. ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
  88. ckanapi_harvesters/harvesters/postgre_params.py +86 -0
  89. ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
  90. ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
  91. ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
  92. ckanapi_harvesters/policies/__init__.py +20 -0
  93. ckanapi_harvesters/policies/data_format_policy.py +269 -0
  94. ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
  95. ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
  96. ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
  97. ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
  98. ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
  99. ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
  100. ckanapi_harvesters/reports/__init__.py +11 -0
  101. ckanapi_harvesters/reports/admin_report.py +292 -0
  102. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
  103. ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
  104. ckanapi_harvesters/divider/__init__.py +0 -27
  105. ckanapi_harvesters/divider/divider.py +0 -53
  106. ckanapi_harvesters/divider/divider_error.py +0 -59
  107. ckanapi_harvesters/main.py +0 -30
  108. ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
  109. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
  110. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,104 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ CKAN tag vocabulary information
5
+ """
6
+ from typing import List, Dict, Union
7
+ import copy
8
+
9
+ from ckanapi_harvesters.auxiliary.ckan_model import CkanTagInfo
10
+ from ckanapi_harvesters.auxiliary.ckan_map import CkanMapABC
11
+ from ckanapi_harvesters.auxiliary.ckan_errors import NotMappedObjectNameError
12
+
13
+ class CkanTagVocabularyInfo:
14
+ def __init__(self, d: dict):
15
+ self.vocabulary_name: str = d["name"]
16
+ self.tags: Dict[str, CkanTagInfo] = {tag_dict["name"]: CkanTagInfo.from_dict(tag_dict) for tag_dict in d["tags"]}
17
+ self.id: str = d["id"]
18
+ self.details:dict = d
19
+
20
+ def __str__(self):
21
+ return f"Vocabulary '{self.vocabulary_name}' ({self.id})"
22
+
23
+ def to_dict(self, include_details:bool=True) -> dict:
24
+ d = dict()
25
+ if self.details is not None and include_details:
26
+ d.update(self.details)
27
+ d.update({"id": self.id, "name": self.vocabulary_name,
28
+ "tags": [tag_info.to_dict() for tag_info in self.tags.values()]})
29
+ return d
30
+
31
+ @staticmethod
32
+ def from_dict(d:dict) -> "CkanTagVocabularyInfo":
33
+ return CkanTagVocabularyInfo(d)
34
+
35
+
36
+ class CkanVocabularyMap(CkanMapABC):
37
+ def __init__(self):
38
+ self.vocabularies: Dict[str, CkanTagVocabularyInfo] = {} # id -> info
39
+ self.vocabulary_id_index: Dict[str, str] = {} # name -> id
40
+ self.vocabularies_listed: bool = False
41
+ self._mapping_query_vocabulary_list: bool = True
42
+
43
+ def purge(self):
44
+ self.vocabularies = None
45
+ self.vocabulary_id_index = None
46
+ self.vocabularies_listed = False
47
+
48
+ def copy(self) -> "CkanVocabularyMap":
49
+ return copy.deepcopy(self)
50
+
51
+ def to_dict(self) -> dict:
52
+ return {"vocabularies":[vocabulary_info.to_dict() for vocabulary_info in self.vocabularies.values()],
53
+ }
54
+
55
+ def update_from_dict(self, data:dict) -> None:
56
+ for vocabulary_dict in data["packages"]:
57
+ self._update_vocabulary_info(CkanTagVocabularyInfo.from_dict(vocabulary_dict))
58
+
59
+ @staticmethod
60
+ def from_dict(d: dict) -> "CkanVocabularyMap":
61
+ map = CkanVocabularyMap()
62
+ map.update_from_dict(d)
63
+ return map
64
+
65
+ ## Vocabulary functions
66
+ def get_vocabulary_id(self, vocabulary_name:str, *, error_not_mapped:bool=True, search_title:bool=True) -> Union[str,None]:
67
+ """
68
+ Retrieve the vocabulary id for a given vocabulary name based on the vocabulary map.
69
+
70
+ :param vocabulary_name: vocabulary name or id.
71
+ :return:
72
+ """
73
+ if vocabulary_name is None:
74
+ raise ValueError("vocabulary_name cannot be None")
75
+ if vocabulary_name in self.vocabularies.keys():
76
+ # recognized vocabulary_id
77
+ vocabulary_id = vocabulary_name
78
+ elif vocabulary_name in self.vocabulary_id_index.keys():
79
+ vocabulary_id = self.vocabulary_id_index[vocabulary_name]
80
+ elif error_not_mapped:
81
+ raise NotMappedObjectNameError(f"Vocabulary {vocabulary_name} is not mapped or does not exist.")
82
+ else:
83
+ vocabulary_id = None
84
+ return vocabulary_id
85
+
86
+ def _update_vocabulary_info(self, vocabulary_info:Union[CkanTagVocabularyInfo, List[CkanTagVocabularyInfo]],
87
+ vocabularies_listed:bool=False) -> None:
88
+ """
89
+ Internal function to update the information of a vocabulary.
90
+ """
91
+ if not(isinstance(vocabulary_info, list)):
92
+ vocabulary_info = [vocabulary_info]
93
+ self.vocabularies.update({vocab_info.id: vocab_info for vocab_info in vocabulary_info})
94
+ self.vocabulary_id_index.update({vocab_info.vocabulary_name: vocab_info.id for vocab_info in vocabulary_info})
95
+ if vocabularies_listed:
96
+ self.vocabularies_listed = True
97
+
98
+ def _record_vocabulary_delete(self, vocabulary_id: str) -> None:
99
+ # only pass in delete state
100
+ vocabulary_info = self.vocabularies[vocabulary_id]
101
+ self.vocabulary_id_index.pop(vocabulary_info.vocabulary_name)
102
+ self.vocabularies.pop(vocabulary_id)
103
+
104
+
@@ -0,0 +1,82 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Dead code from auxiliary functions
5
+ """
6
+
7
+ from enum import IntEnum
8
+
9
+ import numpy as np
10
+
11
+
12
+ class CkanBasicDataFieldType(IntEnum):
13
+ Default = 0 # no override
14
+ Text = 1
15
+ Numeric = 2
16
+ TimeStamp = 3
17
+
18
+ def __str__(self):
19
+ if self == CkanBasicDataFieldType.Default:
20
+ return ""
21
+ else:
22
+ return self.name.lower()
23
+
24
+ @staticmethod
25
+ def from_str(s):
26
+ s = s.lower().strip()
27
+ if s == "text":
28
+ return CkanBasicDataFieldType.Text
29
+ elif s == "numeric":
30
+ return CkanBasicDataFieldType.Numeric
31
+ elif s == "timestamp":
32
+ return CkanBasicDataFieldType.TimeStamp
33
+ elif s == "" or np.isnan(s):
34
+ return CkanBasicDataFieldType.Default
35
+ else:
36
+ raise ValueError(s)
37
+
38
+ class CkanCollaboratorCapacity(IntEnum):
39
+ """
40
+ Collaboration capacities of users associated to a package/dataset
41
+ """
42
+ Excluded = 0
43
+ Member = 1
44
+ Editor = 2
45
+
46
+ def __str__(self):
47
+ return self.name.lower()
48
+
49
+ @staticmethod
50
+ def from_str(s):
51
+ s = s.lower().strip()
52
+ if s == "excluded":
53
+ return CkanCollaboratorCapacity.Excluded
54
+ elif s == "member":
55
+ return CkanCollaboratorCapacity.Member
56
+ elif s == "editor":
57
+ return CkanCollaboratorCapacity.Editor
58
+ else:
59
+ raise ValueError(s)
60
+
61
+ class CkanGroupCapacity(IntEnum):
62
+ """
63
+ Capacities of users in a group
64
+ """
65
+ Excluded = 0
66
+ Member = 1
67
+ Admin = 3
68
+
69
+ def __str__(self):
70
+ return self.name.lower()
71
+
72
+ @staticmethod
73
+ def from_str(s):
74
+ s = s.lower().strip()
75
+ if s == "excluded":
76
+ return CkanGroupCapacity.Excluded
77
+ elif s == "member":
78
+ return CkanGroupCapacity.Member
79
+ elif s == "admin":
80
+ return CkanGroupCapacity.Admin
81
+ else:
82
+ raise ValueError(s)
@@ -0,0 +1,51 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Functions to define messages with an error level
5
+ """
6
+ from enum import IntEnum
7
+ from collections import OrderedDict
8
+ from warnings import warn
9
+
10
+
11
+ class ErrorLevel(IntEnum):
12
+ Information = 0
13
+ Warning = 1
14
+ Error = 2
15
+
16
+ def __str__(self):
17
+ return self.name.lower()
18
+
19
+ @staticmethod
20
+ def from_str(s):
21
+ s = s.lower().strip()
22
+ if s == "information":
23
+ return ErrorLevel.Information
24
+ elif s == "warning":
25
+ return ErrorLevel.Warning
26
+ elif s == "error":
27
+ return ErrorLevel.Error
28
+ else:
29
+ raise ValueError(s)
30
+
31
+
32
+ class ErrorLevelMessage(Exception):
33
+ def __init__(self, error_level:ErrorLevel, message: str):
34
+ super().__init__(message)
35
+ self.error_level: ErrorLevel = error_level
36
+ self.message: str = message
37
+
38
+ def to_dict(self) -> dict:
39
+ return OrderedDict([
40
+ ("level", str(self.error_level)),
41
+ ("message", self.message),
42
+ ])
43
+
44
+
45
+ class ContextErrorLevelMessage(ErrorLevelMessage):
46
+ def __init__(self, context:str, error_level:ErrorLevel, specific_message: str):
47
+ message = f"In {context} / {error_level.name}: {specific_message}"
48
+ super().__init__(error_level, message)
49
+ self.context: str = context
50
+ self.specific_message: str = specific_message
51
+
@@ -0,0 +1,98 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ This implements functionality to dynamically call functions specified by the user.
5
+ This functionality is disabled by default. You must call unlock_external_code_execution to enable external code execution.
6
+ __Warning__:
7
+ only run code if you trust the source!
8
+ """
9
+ from typing import Callable
10
+ import os
11
+ import re
12
+ from warnings import warn
13
+ import importlib
14
+ import importlib.util
15
+
16
+ from ckanapi_harvesters.auxiliary.path import path_rel_to_dir
17
+
18
+
19
+
20
+ def unlock_external_code_execution(value:bool=True) -> None:
21
+ """
22
+ This function enables external code execution for the PythonUserCode class.
23
+
24
+ __Warning__:
25
+ only run code if you trust the source!
26
+
27
+ :return:
28
+ """
29
+ PythonUserCode.enable_external_code = value
30
+ if value:
31
+ msg = "External code is enabled. Only run code if you trust the source!"
32
+ warn(msg)
33
+
34
+
35
+ var_name_subs_re = '\W|^(?=\d)'
36
+
37
+ def clean_var_name(variable_name: str) -> str:
38
+ return re.sub(var_name_subs_re,'_', variable_name)
39
+
40
+
41
+ class ExternalUserCodeDisabledException(Exception):
42
+ def __init__(self, function_name:str, source_file:str) -> None:
43
+ super().__init__(f"{function_name} in {source_file} cannot be executed because the external code execution is locked. Use unlock_external_code_execution to unlock. Warning: Only run code if you trust the source!")
44
+
45
+
46
+ class PythonUserCode:
47
+ """
48
+ This class imports an arbitrary Python file as a module and makes it available to the rest of the code.
49
+ This functionality is disabled by default. You must call unlock_external_code_execution to enable external code execution.
50
+
51
+ __Warning__:
52
+ only run code if you trust the source!
53
+ """
54
+ enable_external_code = False # remain False to ensure no custom code is executed from the builder specification
55
+
56
+ def __init__(self, python_file:str, base_dir:str=None):
57
+ self.python_file: str = ""
58
+ if python_file is None:
59
+ # only use None argument to initialize copy
60
+ self.python_file = ""
61
+ else:
62
+ self.python_file = path_rel_to_dir(python_file, base_dir=base_dir)
63
+ self.module = None
64
+ self.imported_module_name: str = ""
65
+ if PythonUserCode.enable_external_code and python_file is not None:
66
+ module_base = clean_var_name(os.path.splitext(os.path.split(self.python_file)[1])[0])
67
+ self.imported_module_name = "ckan_builder_aux_funcs__" + module_base
68
+ spec = importlib.util.spec_from_file_location(self.imported_module_name, self.python_file)
69
+ self.module = importlib.util.module_from_spec(spec)
70
+ spec.loader.exec_module(self.module)
71
+ elif python_file is not None: # and not enable_external_code
72
+ # External python script execution is locked
73
+ raise ExternalUserCodeDisabledException("code", os.path.split(self.python_file)[1])
74
+
75
+ def __copy__(self):
76
+ return self.copy()
77
+
78
+ def copy(self) -> "PythonUserCode":
79
+ # do not execute module twice and do not make copies either
80
+ dest = PythonUserCode(python_file=None, base_dir=None)
81
+ dest.module = self.module
82
+ dest.imported_module_name = self.imported_module_name
83
+ dest.python_file = self.python_file
84
+ return dest
85
+
86
+ def function_pointer(self, function_name:str) -> Callable:
87
+ """
88
+ Obtain function pointer for a given name in the loaded Python module.
89
+
90
+ :param function_name:
91
+ :return:
92
+ """
93
+ if not PythonUserCode.enable_external_code:
94
+ raise ExternalUserCodeDisabledException(function_name, os.path.split(self.python_file)[1])
95
+ fun = getattr(self.module, function_name)
96
+ assert(isinstance(fun, Callable))
97
+ return fun
98
+
@@ -0,0 +1,60 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Give partial DataFrame behavior to a list of dictionaries
5
+ """
6
+ import pandas as pd
7
+ import numpy as np
8
+ from typing import Union, List
9
+
10
+ class _ListRecords_index:
11
+ def __init__(self, parent) -> None:
12
+ self.parent = parent
13
+
14
+ def __getitem__(self, slice):
15
+ return self.parent[slice]
16
+
17
+ # def __setitem__(self, slice, value):
18
+ # self.parent[slice] = value
19
+
20
+ class ListRecords(list): # List[dict]
21
+ """
22
+ Give partial DataFrame behavior to a list of dictionaries
23
+ """
24
+ def __init__(self,*args,**kwargs):
25
+ super().__init__(*args,**kwargs)
26
+ self.columns: Union[List[str],None] = None
27
+ if len(self) > 0:
28
+ self.columns = list(self[0].keys())
29
+ else:
30
+ self.columns = []
31
+
32
+ @property
33
+ def iloc(self):
34
+ return _ListRecords_index(self)
35
+
36
+ def records_to_df(records: Union[List[dict], ListRecords], df_args:dict=None, *,
37
+ missing_value="", none_value="None") -> pd.DataFrame:
38
+ """
39
+ Keep source values (lesser type inference) and replace cells with missing keys with a fixed value.
40
+ None values are also preserved using the none_value.
41
+
42
+ :param records: input data
43
+ :param df_args: arguments to pass to DataFrame constructor
44
+ :param missing_value: value to set if a column is not specified on a row.
45
+ :param none_value: value to set if a value is None in the input data.
46
+ :return:
47
+ """
48
+ if df_args is None:
49
+ df_args = {}
50
+ df = pd.DataFrame(records, dtype=object, **df_args)
51
+ fieldnames = df.columns
52
+ nrows = len(df)
53
+ # df.fillna(np.nan, inplace=True, downcast="object")
54
+ for (i, row), record in zip(df.iterrows(), records):
55
+ for field in fieldnames:
56
+ if field not in record.keys():
57
+ df.loc[i, field] = missing_value
58
+ elif record[field] is None:
59
+ df.loc[i, field] = none_value
60
+ return df
@@ -0,0 +1,163 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Methods to load authentication credentials (user, password)
5
+ """
6
+ from typing import Union, Tuple, Dict
7
+ import getpass
8
+ from warnings import warn
9
+ import os
10
+ import argparse
11
+ import shlex
12
+
13
+ from ckanapi_harvesters.auxiliary.path import path_rel_to_dir
14
+ from ckanapi_harvesters.auxiliary.ckan_defs import environ_keyword
15
+ from ckanapi_harvesters.auxiliary.ckan_errors import ApiKeyFileError
16
+
17
+
18
+ class Login:
19
+ def __init__(self, username:str=None, password:str=None, login_file:str=None):
20
+ self._username:Union[str,None] = username
21
+ self._password:Union[str,None] = password
22
+ self.login_file: str = login_file # path to a file containing a valid API key in the first line of text (optional)
23
+
24
+ def __del__(self):
25
+ self.clear()
26
+
27
+ def __copy__(self):
28
+ return self.copy()
29
+
30
+ def copy(self, *, dest=None):
31
+ if dest is None:
32
+ dest = Login()
33
+ dest.login_file = self.login_file
34
+ dest._username = self._username
35
+ dest._password = self._password
36
+ return dest
37
+
38
+ def clear(self) -> None:
39
+ self._username = None
40
+ self._password = None
41
+
42
+ def is_empty(self):
43
+ return self._username is None or self._password is None
44
+
45
+ def __str__(self):
46
+ if self.is_empty():
47
+ return "None"
48
+ elif self._username == "" and self._password == "":
49
+ return "<empty string>"
50
+ elif self._username == "":
51
+ return "<empty user>"
52
+ elif self._password == "":
53
+ return "<empty password>"
54
+ else:
55
+ return "*****"
56
+
57
+ @property
58
+ def username(self) -> Union[str,None]:
59
+ return self._username
60
+ @username.setter
61
+ def username(self, value:Union[str,None]):
62
+ self._username = value
63
+ @property
64
+ def password(self) -> Union[str,None]:
65
+ return self._password
66
+ @password.setter
67
+ def password(self, value:Union[str,None]):
68
+ self._password = value
69
+
70
+ def load_from_file(self, login_file:str=None, *, base_dir:str=None, error_not_found:bool=True) -> bool:
71
+ """
72
+ Load the credentials from file.
73
+ The file should contain username in first line and password in second line.
74
+
75
+ :param login_file: path to the API key file. The following keywords are accepted:
76
+ - "environ": the API key will be looked up in the environment variable with load_from_environ
77
+ :param base_dir: base directory to find the API key file, if a relative path is provided
78
+ :param error_not_found: option to raise an exception if the API key file is not found
79
+ :return:
80
+ """
81
+ if login_file is None:
82
+ login_file = self.login_file
83
+ login_file = path_rel_to_dir(login_file, base_dir=base_dir, keyword_exceptions={environ_keyword})
84
+ if login_file is None:
85
+ raise ApiKeyFileError('login_file is required')
86
+ api_keyword = login_file.strip().lower()
87
+ if api_keyword == environ_keyword:
88
+ return False # self.load_from_environ(error_not_found=error_not_found)
89
+ if not(os.path.isfile(login_file)) and not error_not_found:
90
+ msg = f"Login file does not exist: {login_file}"
91
+ warn(msg)
92
+ return False
93
+ with open(login_file, 'r') as f:
94
+ self.username = f.readline().strip()
95
+ self.password = f.readline().rstrip('\n')
96
+ f.close()
97
+ self.login_file = login_file
98
+ return True
99
+
100
+ def input(self):
101
+ """
102
+ Prompt the user to input the login credentials in the console window.
103
+
104
+ :return:
105
+ """
106
+ value = input("Please enter user name: ")
107
+ self.username = value
108
+ value = getpass.getpass("Please enter password: ")
109
+ self.password = value
110
+
111
+ @staticmethod
112
+ def _setup_cli_parser(parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
113
+ if parser is None:
114
+ parser = argparse.ArgumentParser(description="Login credentials initialization")
115
+ parser.add_argument("--login-file", type=str,
116
+ help="Path to a text file containing login credentials for authentification (user, password)")
117
+ return parser
118
+
119
+ def _cli_args_apply(self, args: argparse.Namespace, *, base_dir: str = None, error_not_found: bool = True) -> None:
120
+ if args.login_file is not None:
121
+ self.load_from_file(args.login_file, base_dir=base_dir, error_not_found=error_not_found)
122
+
123
+ def to_tuple(self) -> Tuple[str,str]:
124
+ return self.username, self.password
125
+
126
+ @staticmethod
127
+ def from_tuple(values: Tuple[str,str]) -> "Login":
128
+ login = Login(*values)
129
+ return login
130
+
131
+ def to_dict(self) -> Dict[str,str]:
132
+ return {"username": self.username, "password": self.password}
133
+
134
+ @staticmethod
135
+ def from_dict(values: Dict[str,str]) -> "Login":
136
+ login = Login(**values)
137
+ return login
138
+
139
+
140
+ class SSHLogin(Login):
141
+ @staticmethod
142
+ def _setup_cli_parser(parser:argparse.ArgumentParser=None) -> argparse.ArgumentParser:
143
+ if parser is None:
144
+ parser = argparse.ArgumentParser(description="SSH login credentials initialization")
145
+ parser.add_argument("--ssh-login-file", type=str,
146
+ help="Path to a text file containing SSH login credentials for authentification (user, password)")
147
+ return parser
148
+
149
+ def _cli_args_apply(self, args: argparse.Namespace, *, base_dir: str = None, error_not_found: bool = True) -> None:
150
+ if args.login_file is not None:
151
+ self.load_from_file(args.ssh_login_file, base_dir=base_dir, error_not_found=error_not_found)
152
+
153
+ def input(self):
154
+ """
155
+ Prompt the user to input the login credentials in the console window.
156
+
157
+ :return:
158
+ """
159
+ value = input("Please enter SSH user name: ")
160
+ self.username = value
161
+ value = getpass.getpass("Please enter SSH password: ")
162
+ self.password = value
163
+