ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. ckanapi_harvesters/__init__.py +32 -10
  2. ckanapi_harvesters/auxiliary/__init__.py +26 -0
  3. ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
  4. ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
  5. ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
  6. ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
  7. ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
  8. ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
  9. ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
  10. ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
  11. ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
  12. ckanapi_harvesters/auxiliary/deprecated.py +82 -0
  13. ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
  14. ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
  15. ckanapi_harvesters/auxiliary/list_records.py +60 -0
  16. ckanapi_harvesters/auxiliary/login.py +163 -0
  17. ckanapi_harvesters/auxiliary/path.py +208 -0
  18. ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
  19. ckanapi_harvesters/auxiliary/urls.py +40 -0
  20. ckanapi_harvesters/builder/__init__.py +40 -0
  21. ckanapi_harvesters/builder/builder_aux.py +20 -0
  22. ckanapi_harvesters/builder/builder_ckan.py +238 -0
  23. ckanapi_harvesters/builder/builder_errors.py +36 -0
  24. ckanapi_harvesters/builder/builder_field.py +122 -0
  25. ckanapi_harvesters/builder/builder_package.py +9 -0
  26. ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
  27. ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
  28. ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
  29. ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
  30. ckanapi_harvesters/builder/builder_resource.py +589 -0
  31. ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
  32. ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
  33. ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
  34. ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
  35. ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
  36. ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
  37. ckanapi_harvesters/builder/builder_resource_init.py +126 -0
  38. ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
  39. ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
  40. ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
  41. ckanapi_harvesters/builder/example/__init__.py +21 -0
  42. ckanapi_harvesters/builder/example/builder_example.py +21 -0
  43. ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
  44. ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
  45. ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
  46. ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
  47. ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
  48. ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
  49. ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
  50. ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
  51. ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
  52. ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
  53. ckanapi_harvesters/builder/mapper_datastore.py +93 -0
  54. ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
  55. ckanapi_harvesters/builder/specific/__init__.py +11 -0
  56. ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
  57. ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
  58. ckanapi_harvesters/ckan_api/__init__.py +20 -0
  59. ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
  60. ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
  61. ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
  62. ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
  63. ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
  64. ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
  65. ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
  66. ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
  67. ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
  68. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
  69. ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
  70. ckanapi_harvesters/harvesters/__init__.py +23 -0
  71. ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
  72. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
  73. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
  74. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
  75. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
  76. ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
  77. ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
  78. ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
  79. ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
  80. ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
  81. ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
  82. ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
  83. ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
  84. ckanapi_harvesters/harvesters/harvester_init.py +30 -0
  85. ckanapi_harvesters/harvesters/harvester_model.py +49 -0
  86. ckanapi_harvesters/harvesters/harvester_params.py +323 -0
  87. ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
  88. ckanapi_harvesters/harvesters/postgre_params.py +86 -0
  89. ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
  90. ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
  91. ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
  92. ckanapi_harvesters/policies/__init__.py +20 -0
  93. ckanapi_harvesters/policies/data_format_policy.py +269 -0
  94. ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
  95. ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
  96. ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
  97. ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
  98. ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
  99. ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
  100. ckanapi_harvesters/reports/__init__.py +11 -0
  101. ckanapi_harvesters/reports/admin_report.py +292 -0
  102. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
  103. ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
  104. ckanapi_harvesters/divider/__init__.py +0 -27
  105. ckanapi_harvesters/divider/divider.py +0 -53
  106. ckanapi_harvesters/divider/divider_error.py +0 -59
  107. ckanapi_harvesters/main.py +0 -30
  108. ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
  109. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
  110. {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,208 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Extensions of os.path and operations on urls
5
+ """
6
+ from typing import Union, Set, List
7
+ from warnings import warn
8
+ import os
9
+ import re
10
+
11
+
12
+ disable_relative_path_constraint = False
13
+
14
+ def unlock_relative_path_constraint(value:bool=True) -> None:
15
+ """
16
+ This function disables relative path error messages when a relative path is required.
17
+
18
+ :return:
19
+ """
20
+ global disable_relative_path_constraint
21
+ disable_relative_path_constraint = value
22
+ if value:
23
+ msg = "Relative path constraint is disabled to your own risk."
24
+ warn(msg)
25
+
26
+
27
+ class BaseDirUndefError(Exception):
28
+ def __init__(self, path: str):
29
+ super().__init__(f"Could not determine the file path because no base_dir was provided: {path}")
30
+
31
+ class AbsolutePathError(Exception):
32
+ def __init__(self, field:str, path: str):
33
+ super().__init__(f"A relative path is highly suggested for this field ({field}): {path}. To disable this error message, run unlock_relative_path_constraint().")
34
+
35
+
36
+ def sanitize_path(path:Union[str,None],
37
+ *, expand_path:bool=False, keyword_exceptions:Set[str]=None) -> Union[str,None]:
38
+ """
39
+ Sanitize paths from user inputs
40
+ """
41
+ if path is not None:
42
+ if os.path.sep == '\\':
43
+ path = re.sub(r"[\\/]", "\\\\", path)
44
+ else:
45
+ path = re.sub(r"[\\/]", os.path.sep, path)
46
+ if expand_path:
47
+ path = os.path.expandvars(path)
48
+ path_keyword = path.lower().strip()
49
+ if path_keyword in keyword_exceptions:
50
+ return path_keyword
51
+ else:
52
+ path = os.path.expanduser(path) # cover the case where the path starts with '~'
53
+ return path
54
+ else:
55
+ return None
56
+
57
+
58
+ def path_rel_to_dir(path:Union[str,None], base_dir:str=None, *, keyword_exceptions:Set[str]=None,
59
+ error_base_dir_undef:bool=False, default_value:str=None,
60
+ only_relative:bool=False, abs_error:bool=False, field:str=None) -> Union[str,None]:
61
+ """
62
+ Returns the absolute path. If relative, the base directory can be specified. If not specified, the cwd is used.
63
+
64
+ :param path: original path string
65
+ :param base_dir: the base directory, for relative paths if provided (default = cwd)
66
+ :param keyword_exceptions: some values are not replaced and must be treated after this function call.
67
+ :param error_base_dir_undef: Option to raise an error if no base_dir was provided (cwd is used by default).
68
+ :param default_value: the value to return if path is None.
69
+ :param only_relative: If set to True, a warning or error message is raised if an absolute path is provided.
70
+ :param abs_error: Condition to choose between a warning or an error message.
71
+ :param field: name of the field for the error message.
72
+ :return: absolute path or keyword
73
+ """
74
+ if keyword_exceptions is None:
75
+ keyword_exceptions = set()
76
+ path_src = path
77
+ path = sanitize_path(path)
78
+ if path is None:
79
+ return default_value
80
+ else:
81
+ path = os.path.expandvars(path) # replace environment variables mentioned in the path, if any
82
+ path_keyword = path.lower().strip()
83
+ if path_keyword in keyword_exceptions:
84
+ return path_keyword
85
+ else:
86
+ path = os.path.expanduser(path) # cover the case where the path starts with '~'
87
+ if os.path.isabs(path):
88
+ if only_relative:
89
+ msg = AbsolutePathError(field, path_src)
90
+ if abs_error:
91
+ raise msg
92
+ else:
93
+ warn(str(msg))
94
+ return path.strip()
95
+ elif base_dir is not None:
96
+ return os.path.join(base_dir, path.strip())
97
+ elif error_base_dir_undef:
98
+ raise BaseDirUndefError(path)
99
+ else:
100
+ return os.path.abspath(path)
101
+
102
+
103
+ def resolve_rel_path(base_dir:str, rel_path:str, *args: str, field:str, only_relative:bool=True) -> str:
104
+ """
105
+ Alias to path_rel_to_dir, with arguments order similar to os.path.join and requirement for a relative path.
106
+ Relative path verification can be removed by calling unlock_relative_path_constraint.
107
+ field: name of the field for the error message.
108
+
109
+ :return:
110
+ """
111
+ if len(args) > 0:
112
+ rel_path = os.path.join(rel_path, *args)
113
+ return path_rel_to_dir(rel_path, base_dir=base_dir, field=field,
114
+ only_relative=only_relative, abs_error=not disable_relative_path_constraint,
115
+ error_base_dir_undef=True)
116
+
117
+
118
+ # reverse function:
119
+ def make_path_relative(path:str, to_base_dir:str = None, *, default_value:str=None,
120
+ source_string:str=None, keyword_exceptions:Set[str]=None, same_destination:bool=True) -> str:
121
+ """
122
+ When you save a file to a new location, make relative paths relative to the new file location,
123
+ pointing to the same destination (except if same_destination is False -> source_string is used in this case, if present and relative path)
124
+ The source_string is the path present in the original document.
125
+
126
+ :param path: full file path (absolute, ideally output from path_rel_to_dir)
127
+ :param to_base_dir: the new base directory, to derive the relative paths from
128
+ :param default_value: the value to return if the path is None
129
+ :param source_string: string representing the path in the original document, without any treatments
130
+ :param keyword_exceptions: keywords to return as-is
131
+ :return: path relative to to_base_dir or keyword/path relative to environment variable/home directory symbol (~)
132
+ """
133
+ if path is None:
134
+ return default_value
135
+ if source_string is not None:
136
+ if (not same_destination) and (not os.path.isabs(source_string)):
137
+ return source_string
138
+ if keyword_exceptions is None:
139
+ keyword_exceptions = set()
140
+ source_string = sanitize_path(source_string)
141
+ source_keyword = source_string.lower().strip()
142
+ if source_keyword in keyword_exceptions:
143
+ return source_keyword
144
+ elif source_string.startswith('~') or source_string.startswith('$'):
145
+ return source_string
146
+ # elif not (os.path.expanduser(source_string) == source_string or os.path.expandvars(source_string) == source_string):
147
+ # # condition to confirm
148
+ # return source_string
149
+ if path.startswith('~') or path.startswith('$'):
150
+ return path
151
+ else:
152
+ return path if to_base_dir is None else os.path.relpath(path, to_base_dir)
153
+
154
+ # File search
155
+ def list_files_scandir(path:str) -> List[str]:
156
+ # see also: glob.glob - this does not apply any filter
157
+ list_files = []
158
+ with os.scandir(path) as entries:
159
+ for entry in entries:
160
+ if entry.is_file():
161
+ list_files.append(entry.path)
162
+ elif entry.is_dir():
163
+ list_files += list_files_scandir(entry.path)
164
+ entries.close()
165
+ return list_files
166
+
167
+ glob_chars = r"*?![]"
168
+ glob_re = "[\\*\\?\\!\\[\\]]+"
169
+
170
+ def glob_rm_glob(glob_str:str, *, default_rec_dir:str=None) -> str:
171
+ """
172
+ Extract directory name from a glob string (first elements of path without glob characters).
173
+
174
+ :param glob_str: the glob string
175
+ :param default_rec_dir: if the last removed element is "**" (directory recursion), the name of the directory to use instead
176
+ :return: a path without glob characters
177
+
178
+ Examples:
179
+ >>> glob_rm_glob(r"test\*.csv")
180
+ 'test'
181
+
182
+ >>> glob_rm_glob(r"**\*.csv", default_rec_dir="hello")
183
+ 'hello'
184
+ """
185
+ glob_free = glob_str
186
+ while re.search(glob_re, glob_free):
187
+ glob_free, glob_sub = os.path.split(glob_free)
188
+ if glob_sub == "**" and re.search(glob_re, glob_free) is None and default_rec_dir is not None:
189
+ return os.path.join(glob_free, default_rec_dir)
190
+ return glob_free
191
+
192
+ def glob_name(glob_str:str):
193
+ """
194
+ Extract file name glob from a glob string (last element of path, except if it is "**")
195
+
196
+ :param glob_str:
197
+ :return:
198
+
199
+ Example:
200
+ >>> glob_name(r"**\*.csv")
201
+ '*.csv'
202
+ """
203
+ glob_dir, glob_file = os.path.split(glob_str)
204
+ if not glob_file == "**":
205
+ return glob_file
206
+ else:
207
+ return ""
208
+
@@ -0,0 +1,298 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Setting the proxy from simple command line arguments
5
+ """
6
+ import urllib.request
7
+ from typing import Union, Sequence, Tuple
8
+ import os
9
+ import argparse
10
+ from warnings import warn
11
+ import copy
12
+ import json
13
+
14
+ from ckanapi_harvesters.auxiliary.ckan_defs import environ_keyword
15
+ from ckanapi_harvesters.auxiliary.path import sanitize_path, path_rel_to_dir
16
+ from ckanapi_harvesters.auxiliary.login import Login
17
+
18
+ import requests
19
+ from requests.auth import AuthBase, HTTPProxyAuth, HTTPBasicAuth
20
+
21
+ PROXY_AUTH_ENVIRON = "PROXY_AUTH_FILE"
22
+
23
+ class HttpsProxyDefError(Exception):
24
+ def __init__(self):
25
+ super().__init__("Only one of http_proxy or https_proxy is set")
26
+
27
+ def get_proxies_from_environ() -> dict:
28
+ proxies = urllib.request.getproxies()
29
+ return proxies
30
+ # http_proxy = os.environ.get("http_proxy")
31
+ # https_proxy = os.environ.get("https_proxy")
32
+ # no_proxy = os.environ.get("no_proxy")
33
+ # if http_proxy is not None and https_proxy is not None:
34
+ # proxies = {"http": http_proxy, "https": https_proxy}
35
+ # elif http_proxy is not None:
36
+ # proxies = {"http": http_proxy, "https": http_proxy}
37
+ # elif https_proxy is not None:
38
+ # raise HttpsProxyDefError()
39
+ # else:
40
+ # proxies = None
41
+ # if proxies is not None and no_proxy is not None:
42
+ # proxies["no"] = no_proxy
43
+ # return proxies
44
+
45
+ def host_port_sep(url:Union[str,None], *, default_port:int=None) -> Tuple[Union[str,None],Union[int,None]]:
46
+ if url is None:
47
+ return None, None
48
+ if ':' in url:
49
+ host_prefix, host_suffix = "", ""
50
+ if '@' in url:
51
+ host_prefix, url = url.split('@')
52
+ if '?' in url:
53
+ url, host_suffix = url.split('?')
54
+ host, port_str = url.split(':')
55
+ host = host_prefix + host + host_suffix
56
+ port = int(port_str)
57
+ else:
58
+ host, port = url, default_port
59
+ return host, port
60
+
61
+ def _define_proxies(proxy_string:Union[str, dict], default_proxies:dict=None) -> dict:
62
+ if proxy_string is None:
63
+ proxies = None
64
+ elif isinstance(proxy_string, dict):
65
+ proxies = proxy_string
66
+ elif isinstance(proxy_string, str):
67
+ proxy_string = proxy_string.strip()
68
+ proxy_mode = proxy_string.lower()
69
+ if proxy_mode == environ_keyword:
70
+ proxies = get_proxies_from_environ()
71
+ if proxies is None:
72
+ proxies = default_proxies
73
+ elif proxy_mode == "unspecified":
74
+ proxies = None # do not specify the proxies - is equivalent to "environ"
75
+ elif proxy_mode == "noproxy":
76
+ proxies = {"http": "", "https": ""} # do not use any proxy
77
+ elif proxy_mode == "default":
78
+ proxies = default_proxies # default proxies, provided in argument
79
+ elif proxy_string.startswith('{'):
80
+ # proxy string is a string representation of proxy dictionary
81
+ proxies = json.loads(proxy_string)
82
+ else:
83
+ # suppose string contains an url to a proxy server
84
+ proxies = {"http": proxy_string, "https": proxy_string}
85
+ # if "http" not in proxy_string:
86
+ # # url without http
87
+ # proxies = {"http": f"http://{proxy_string}", "https": f"http://{proxy_string}"}
88
+ # else:
89
+ # proxies = {"http": proxy_string, "https": proxy_string}
90
+ else:
91
+ raise TypeError("proxy must be str or dict")
92
+ return proxies
93
+
94
+
95
+ class ProxyConfig:
96
+ def __init__(self, proxy_string:Union[str,dict]=None, default_proxies:dict=None,
97
+ proxy_headers:dict = None, proxy_auth:Union[AuthBase, Tuple[str,str]]=None) -> None:
98
+ """
99
+ :param proxy_string: string or proxies dict or ProxyConfig object.
100
+ If a string is provided, it must be an url to a proxy or one of the following values:
101
+ - "environ": use the proxies specified in the environment variables "http_proxy" and "https_proxy"
102
+ - "noproxy": do not use any proxies
103
+ - "unspecified": do not specify the proxies
104
+ - "default": use value provided by default_proxies
105
+ :param default_proxies: proxies used if proxies="default"
106
+ :param proxy_headers: headers used to access the proxies, generally for authentication
107
+ """
108
+ if proxy_headers is None: proxy_headers = {}
109
+ self._proxy_string:Union[str, dict, None] = None
110
+ self._proxies:Union[dict,None] = None
111
+ self._is_defined:bool = False
112
+ self._default_proxies:Union[dict,None] = default_proxies
113
+ self.proxy_headers: dict = proxy_headers
114
+ self._proxy_auth: Union[AuthBase, Tuple[str,str], None] = proxy_auth
115
+ self.proxy_auth_file: Union[str,None] = None
116
+ self.proxy_auth_from_env: bool = False
117
+ self.proxy_string = proxy_string # property
118
+ # self.load_proxy_auth_environ(error_not_found=False) # recommended to base these parameters on user demand (confirm if there is a risk of leakage)
119
+
120
+ def __str__(self):
121
+ return str(self._proxies)
122
+
123
+ def __copy__(self):
124
+ return self.copy()
125
+
126
+ def copy(self) -> "ProxyConfig":
127
+ dest = ProxyConfig()
128
+ dest._proxies = copy.deepcopy(self._proxies)
129
+ dest._proxy_string = copy.deepcopy(self._proxy_string)
130
+ dest._default_proxies = copy.deepcopy(self._default_proxies)
131
+ dest.proxy_headers = copy.deepcopy(self.proxy_headers)
132
+ dest._proxy_auth = copy.deepcopy(self._proxy_auth)
133
+ dest._is_defined = self._is_defined
134
+ dest.proxy_auth_file = self.proxy_auth_file
135
+ dest.proxy_auth_from_env = self.proxy_auth_from_env
136
+ return dest
137
+
138
+
139
+ @property
140
+ def proxy_string(self) -> Union[str, dict, None]:
141
+ return self._proxy_string
142
+ @proxy_string.setter
143
+ def proxy_string(self, proxy_string:Union[str, dict, None]):
144
+ self._proxy_string = proxy_string
145
+ self._is_defined = proxy_string is not None
146
+ self._proxies = _define_proxies(proxy_string, default_proxies=self._default_proxies)
147
+ @property
148
+ def proxies(self) -> dict:
149
+ return self._proxies
150
+ @proxies.setter
151
+ def proxies(self, proxies:dict):
152
+ self._proxy_string = proxies
153
+ self._proxies = proxies
154
+ self._is_defined = True
155
+ @property
156
+ def proxy_auth(self) -> Union[AuthBase, Tuple[str,str]]:
157
+ return self._proxy_auth
158
+ @proxy_auth.setter
159
+ def proxy_auth(self, proxy_auth:Union[AuthBase, Tuple[str,str]]):
160
+ self._proxy_auth = proxy_auth
161
+ self.proxy_auth_file = None
162
+ self.proxy_auth_from_env = False
163
+
164
+ def get_host_port(self) -> Tuple[Union[str,None],Union[int,None]]:
165
+ # special mode
166
+ if self._proxies is None:
167
+ return None, None
168
+ elif "http_proxy" in self._proxies.keys() and self._proxies["http_proxy"] is not None:
169
+ return host_port_sep(self._proxies["http_proxy"])
170
+ else:
171
+ return None, None
172
+
173
+ def get_proxy_login(self) -> Login:
174
+ if self._proxy_auth is None:
175
+ return Login()
176
+ else:
177
+ assert(isinstance(self._proxy_auth, HTTPBasicAuth)) # HTTPProxyAuth is a super class of HTTPBasicAuth
178
+ return Login(self._proxy_auth.username, self._proxy_auth.password)
179
+
180
+ @staticmethod
181
+ def from_str_or_config(proxies:Union[str,dict, "ProxyConfig"],
182
+ *, default_proxies:dict=None, proxy_headers:dict=None) -> "ProxyConfig":
183
+ if proxies is None:
184
+ return ProxyConfig(None, default_proxies=default_proxies , proxy_headers=proxy_headers)
185
+ elif isinstance(proxies, ProxyConfig):
186
+ if proxy_headers is not None:
187
+ proxies.proxy_headers = proxy_headers
188
+ return proxies
189
+ else:
190
+ return ProxyConfig(proxies, default_proxies=default_proxies, proxy_headers=proxy_headers)
191
+
192
+ def replace_default_proxy(self, default_proxies:dict) -> None:
193
+ if self._proxy_string is not None and self._proxy_string.lower() == "default":
194
+ self._proxies = default_proxies
195
+
196
+ def reset(self) -> None:
197
+ self._proxy_string = None
198
+ self._proxies = None
199
+ self._is_defined = False
200
+
201
+ def is_defined(self) -> bool:
202
+ return self._is_defined
203
+
204
+ def load_proxy_auth_environ(self, *, error_not_found:bool=False) -> bool:
205
+ proxy_auth_file = sanitize_path(os.environ.get(PROXY_AUTH_ENVIRON)) # "PROXY_AUTH_FILE"
206
+ if proxy_auth_file is not None:
207
+ proxy_keyword = proxy_auth_file.strip().lower()
208
+ assert(not proxy_keyword == environ_keyword) # this value would create an infinite loop
209
+ if self.load_proxy_auth_from_file(proxy_auth_file, error_not_found=error_not_found):
210
+ self.proxy_auth_from_env = True
211
+ return True
212
+ return False
213
+
214
+ def load_proxy_auth_from_file(self, file_path:str, *, base_dir:str=None, error_not_found:bool=True) -> bool:
215
+ file_path = path_rel_to_dir(file_path, base_dir=base_dir, keyword_exceptions={environ_keyword})
216
+ proxy_keyword = file_path.strip().lower()
217
+ if proxy_keyword == environ_keyword:
218
+ # this keyword is not very useful if proxy authentication file is loaded from environment anyway
219
+ return self.load_proxy_auth_environ(error_not_found=error_not_found)
220
+ if (not error_not_found) and (not os.path.isfile(file_path)):
221
+ msg = f"Proxy authentication file does not exist: {file_path}"
222
+ warn(msg)
223
+ return False
224
+ self.proxy_auth_file = file_path
225
+ self.proxy_auth_from_env = False
226
+ with open(file_path, "r") as f:
227
+ auth_type = f.readline().strip().lower()
228
+ username = f.readline().strip()
229
+ password = f.readline().strip()
230
+ if auth_type == "basic" or auth_type == "httpbasicauth":
231
+ self._proxy_auth = requests.auth.HTTPBasicAuth(username, password)
232
+ elif auth_type == "proxy" or auth_type == "httpproxyauth":
233
+ self._proxy_auth = requests.auth.HTTPProxyAuth(username, password)
234
+ elif auth_type == "digest" or auth_type == "httpdigestauth":
235
+ self._proxy_auth = requests.auth.HTTPDigestAuth(username, password)
236
+ elif auth_type == "none":
237
+ self._proxy_auth = None
238
+ else:
239
+ raise KeyError(f"Unknown auth type {auth_type}")
240
+ return True
241
+
242
+
243
+ @staticmethod
244
+ def _setup_cli_proxy_parser(parser: argparse.ArgumentParser = None) -> argparse.ArgumentParser:
245
+ """
246
+ Define or add CLI arguments to initialize the proxy
247
+ parser help message:
248
+
249
+ Proxy parameters initialization
250
+
251
+ options:
252
+ -h, --help show this help message and exit
253
+ --proxy PROXY Proxy for HTTP and HTTPS
254
+
255
+ :param parser: option to provide an existing parser to add the specific fields needed to initialize a CKAN API connection
256
+ :return:
257
+ """
258
+ if parser is None:
259
+ parser = argparse.ArgumentParser(description="Proxy parameters initialization")
260
+ parser.add_argument("--proxy", type=str,
261
+ help="Proxy for HTTP and HTTPS")
262
+ parser.add_argument("--http-proxy", type=str,
263
+ help="HTTP proxy")
264
+ parser.add_argument("--https-proxy", type=str,
265
+ help="HTTPS proxy")
266
+ parser.add_argument("--no-proxy", type=str,
267
+ help="Proxy exceptions")
268
+ parser.add_argument("--proxy-auth-file", type=str,
269
+ help="Path to a proxy authentication file with 3 lines (authentication method, username, password)")
270
+ return parser
271
+
272
+ @staticmethod
273
+ def from_cli_args(args: argparse.Namespace, *, base_dir:str=None, error_not_found:bool=True,
274
+ default_proxies:dict=None, proxy_headers:dict=None) -> "ProxyConfig":
275
+ proxy_string, proxies = None, None
276
+ if args.proxy is not None:
277
+ proxy_string = args.proxy
278
+ elif args.http_proxy is not None and args.https_proxy is not None:
279
+ proxies = {"http": args.http_proxy, "https": args.https_proxy}
280
+ elif args.http_proxy is not None:
281
+ proxies = {"http": args.http_proxy, "https": args.http_proxy}
282
+ if proxies is not None and args.no_proxy is not None:
283
+ proxies["no"] = args.no_proxy
284
+ elif args.https_proxy is not None:
285
+ raise HttpsProxyDefError()
286
+ if proxy_string is not None:
287
+ proxy_config = ProxyConfig(proxy_string, default_proxies=default_proxies, proxy_headers=proxy_headers)
288
+ elif proxies is not None:
289
+ proxy_config = ProxyConfig(proxies, default_proxies=default_proxies, proxy_headers=proxy_headers)
290
+ else:
291
+ proxy_config = None
292
+ if args.proxy_auth_file is not None:
293
+ if proxy_config is not None:
294
+ proxy_config.load_proxy_auth_from_file(args.proxy_auth_file, base_dir=base_dir, error_not_found=error_not_found)
295
+ else:
296
+ raise Exception(f"Proxy authentication file specified without proxy specification: {args.proxy_auth_file}")
297
+ return proxy_config
298
+
@@ -0,0 +1,40 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Operations on urls
5
+ """
6
+ import urllib.parse
7
+
8
+ from ckanapi_harvesters.auxiliary.login import Login
9
+
10
+
11
+ urlsep = '/'
12
+
13
+
14
+ def is_valid_url(url:str) -> bool:
15
+ try:
16
+ result = urllib.parse.urlparse(url)
17
+ return all([result.scheme, result.netloc, result.path])
18
+ except ValueError as e:
19
+ return False
20
+
21
+ def url_join(base:str, *args:str) -> str:
22
+ url = base
23
+ for arg in args:
24
+ if len(arg) > 0:
25
+ if not url.endswith(urlsep):
26
+ url += urlsep
27
+ url += arg
28
+ return url
29
+
30
+ def url_insert_login(url:str, login:Login):
31
+ """
32
+ Insert user authentication parameters in a url
33
+ """
34
+ if login is None:
35
+ return url
36
+ parsed_url = urllib.parse.urlparse(url)
37
+ netloc_with_auth = f"{login.username}:{login.password}@{parsed_url.netloc}"
38
+ updated_url = parsed_url._replace(netloc=netloc_with_auth)
39
+ final_url = urllib.parse.urlunparse(updated_url)
40
+ return final_url
@@ -0,0 +1,40 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Section of the package dedicated to the initialization of a CKAN package
5
+ """
6
+
7
+ BUILDER_FILE_FORMAT_VERSION = "0.0.0" # version of the Excel & JSON file format
8
+
9
+ from . import builder_aux
10
+ from . import builder_errors
11
+ from . import builder_field
12
+ from . import builder_resource
13
+ from . import builder_resource_multi_abc
14
+ from . import builder_resource_multi_file
15
+ from . import mapper_datastore
16
+ from . import builder_resource_datastore
17
+ from . import builder_resource_multi_datastore
18
+ from . import builder_resource_datastore_url
19
+ from . import builder_resource_datastore_unmanaged
20
+ from . import mapper_datastore_multi
21
+ from . import builder_resource_datastore_multi_abc
22
+ from . import builder_resource_datastore_multi_folder
23
+ from . import builder_resource_datastore_multi_harvester
24
+ from . import builder_resource_init
25
+ from . import builder_ckan
26
+ from . import builder_package_1_basic
27
+ from . import builder_package_2_harvesters
28
+ from . import builder_package_3_multi_threaded
29
+ from . import builder_package
30
+
31
+ from . import specific
32
+ from . import example
33
+
34
+ # usage shortcuts
35
+ from .builder_package import BuilderPackage
36
+ from .mapper_datastore_multi import RequestFileMapperIndexKeys
37
+ from .builder_resource_datastore_multi_abc import BuilderDataStoreMultiABC
38
+ from .builder_resource_datastore_multi_folder import BuilderDataStoreFolder
39
+
40
+
@@ -0,0 +1,20 @@
1
+ #!python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Auxiliary functions
5
+ """
6
+ from typing import List, Union
7
+ import os
8
+
9
+ def positive_end_index(end_index:Union[int,None], total:int) -> int:
10
+ """
11
+ Return stop index for a loop, following pythonic definition for slices (last index treated = end_index-1).
12
+ If end_index is negative, the index is taken from the end of the slice. end_index = -1 means end just before the last element.
13
+ """
14
+ if end_index is None:
15
+ return total
16
+ elif end_index < 0:
17
+ return max(0, total + end_index)
18
+ else:
19
+ return end_index
20
+