ckanapi-harvesters 0.0.0__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanapi_harvesters/__init__.py +32 -10
- ckanapi_harvesters/auxiliary/__init__.py +26 -0
- ckanapi_harvesters/auxiliary/ckan_action.py +93 -0
- ckanapi_harvesters/auxiliary/ckan_api_key.py +213 -0
- ckanapi_harvesters/auxiliary/ckan_auxiliary.py +293 -0
- ckanapi_harvesters/auxiliary/ckan_configuration.py +50 -0
- ckanapi_harvesters/auxiliary/ckan_defs.py +10 -0
- ckanapi_harvesters/auxiliary/ckan_errors.py +129 -0
- ckanapi_harvesters/auxiliary/ckan_map.py +509 -0
- ckanapi_harvesters/auxiliary/ckan_model.py +992 -0
- ckanapi_harvesters/auxiliary/ckan_vocabulary_deprecated.py +104 -0
- ckanapi_harvesters/auxiliary/deprecated.py +82 -0
- ckanapi_harvesters/auxiliary/error_level_message.py +51 -0
- ckanapi_harvesters/auxiliary/external_code_import.py +98 -0
- ckanapi_harvesters/auxiliary/list_records.py +60 -0
- ckanapi_harvesters/auxiliary/login.py +163 -0
- ckanapi_harvesters/auxiliary/path.py +208 -0
- ckanapi_harvesters/auxiliary/proxy_config.py +298 -0
- ckanapi_harvesters/auxiliary/urls.py +40 -0
- ckanapi_harvesters/builder/__init__.py +40 -0
- ckanapi_harvesters/builder/builder_aux.py +20 -0
- ckanapi_harvesters/builder/builder_ckan.py +238 -0
- ckanapi_harvesters/builder/builder_errors.py +36 -0
- ckanapi_harvesters/builder/builder_field.py +122 -0
- ckanapi_harvesters/builder/builder_package.py +9 -0
- ckanapi_harvesters/builder/builder_package_1_basic.py +1291 -0
- ckanapi_harvesters/builder/builder_package_2_harvesters.py +40 -0
- ckanapi_harvesters/builder/builder_package_3_multi_threaded.py +45 -0
- ckanapi_harvesters/builder/builder_package_example.xlsx +0 -0
- ckanapi_harvesters/builder/builder_resource.py +589 -0
- ckanapi_harvesters/builder/builder_resource_datastore.py +561 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_abc.py +367 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_folder.py +273 -0
- ckanapi_harvesters/builder/builder_resource_datastore_multi_harvester.py +278 -0
- ckanapi_harvesters/builder/builder_resource_datastore_unmanaged.py +145 -0
- ckanapi_harvesters/builder/builder_resource_datastore_url.py +150 -0
- ckanapi_harvesters/builder/builder_resource_init.py +126 -0
- ckanapi_harvesters/builder/builder_resource_multi_abc.py +361 -0
- ckanapi_harvesters/builder/builder_resource_multi_datastore.py +146 -0
- ckanapi_harvesters/builder/builder_resource_multi_file.py +505 -0
- ckanapi_harvesters/builder/example/__init__.py +21 -0
- ckanapi_harvesters/builder/example/builder_example.py +21 -0
- ckanapi_harvesters/builder/example/builder_example_aux_fun.py +24 -0
- ckanapi_harvesters/builder/example/builder_example_download.py +44 -0
- ckanapi_harvesters/builder/example/builder_example_generate_data.py +73 -0
- ckanapi_harvesters/builder/example/builder_example_patch_upload.py +51 -0
- ckanapi_harvesters/builder/example/builder_example_policy.py +114 -0
- ckanapi_harvesters/builder/example/builder_example_test_sql.py +53 -0
- ckanapi_harvesters/builder/example/builder_example_tests.py +87 -0
- ckanapi_harvesters/builder/example/builder_example_tests_offline.py +57 -0
- ckanapi_harvesters/builder/example/package/ckan-dpg.svg +74 -0
- ckanapi_harvesters/builder/example/package/users_local.csv +3 -0
- ckanapi_harvesters/builder/mapper_datastore.py +93 -0
- ckanapi_harvesters/builder/mapper_datastore_multi.py +262 -0
- ckanapi_harvesters/builder/specific/__init__.py +11 -0
- ckanapi_harvesters/builder/specific/configuration_builder.py +66 -0
- ckanapi_harvesters/builder/specific_builder_abc.py +23 -0
- ckanapi_harvesters/ckan_api/__init__.py +20 -0
- ckanapi_harvesters/ckan_api/ckan_api.py +11 -0
- ckanapi_harvesters/ckan_api/ckan_api_0_base.py +896 -0
- ckanapi_harvesters/ckan_api/ckan_api_1_map.py +1028 -0
- ckanapi_harvesters/ckan_api/ckan_api_2_readonly.py +934 -0
- ckanapi_harvesters/ckan_api/ckan_api_3_policy.py +229 -0
- ckanapi_harvesters/ckan_api/ckan_api_4_readwrite.py +579 -0
- ckanapi_harvesters/ckan_api/ckan_api_5_manage.py +1225 -0
- ckanapi_harvesters/ckan_api/ckan_api_params.py +192 -0
- ckanapi_harvesters/ckan_api/deprecated/__init__.py +9 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated.py +267 -0
- ckanapi_harvesters/ckan_api/deprecated/ckan_api_deprecated_vocabularies.py +189 -0
- ckanapi_harvesters/harvesters/__init__.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/__init__.py +17 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_abc.py +240 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_errors.py +23 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload.py +9 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_1_basic.py +430 -0
- ckanapi_harvesters/harvesters/data_cleaner/data_cleaner_upload_2_geom.py +98 -0
- ckanapi_harvesters/harvesters/file_formats/__init__.py +10 -0
- ckanapi_harvesters/harvesters/file_formats/csv_format.py +43 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_abc.py +39 -0
- ckanapi_harvesters/harvesters/file_formats/file_format_init.py +25 -0
- ckanapi_harvesters/harvesters/file_formats/shp_format.py +129 -0
- ckanapi_harvesters/harvesters/harvester_abc.py +190 -0
- ckanapi_harvesters/harvesters/harvester_errors.py +31 -0
- ckanapi_harvesters/harvesters/harvester_init.py +30 -0
- ckanapi_harvesters/harvesters/harvester_model.py +49 -0
- ckanapi_harvesters/harvesters/harvester_params.py +323 -0
- ckanapi_harvesters/harvesters/postgre_harvester.py +495 -0
- ckanapi_harvesters/harvesters/postgre_params.py +86 -0
- ckanapi_harvesters/harvesters/pymongo_data_cleaner.py +173 -0
- ckanapi_harvesters/harvesters/pymongo_harvester.py +355 -0
- ckanapi_harvesters/harvesters/pymongo_params.py +54 -0
- ckanapi_harvesters/policies/__init__.py +20 -0
- ckanapi_harvesters/policies/data_format_policy.py +269 -0
- ckanapi_harvesters/policies/data_format_policy_abc.py +97 -0
- ckanapi_harvesters/policies/data_format_policy_custom_fields.py +156 -0
- ckanapi_harvesters/policies/data_format_policy_defs.py +135 -0
- ckanapi_harvesters/policies/data_format_policy_errors.py +79 -0
- ckanapi_harvesters/policies/data_format_policy_lists.py +234 -0
- ckanapi_harvesters/policies/data_format_policy_tag_groups.py +35 -0
- ckanapi_harvesters/reports/__init__.py +11 -0
- ckanapi_harvesters/reports/admin_report.py +292 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/METADATA +84 -38
- ckanapi_harvesters-0.0.3.dist-info/RECORD +105 -0
- ckanapi_harvesters/divider/__init__.py +0 -27
- ckanapi_harvesters/divider/divider.py +0 -53
- ckanapi_harvesters/divider/divider_error.py +0 -59
- ckanapi_harvesters/main.py +0 -30
- ckanapi_harvesters-0.0.0.dist-info/RECORD +0 -9
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/WHEEL +0 -0
- {ckanapi_harvesters-0.0.0.dist-info → ckanapi_harvesters-0.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Code to initialize a resource builder from a row
|
|
5
|
+
"""
|
|
6
|
+
from typing import Union
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
from ckanapi_harvesters.ckan_api import CkanApiMap
|
|
11
|
+
from ckanapi_harvesters.auxiliary.ckan_model import CkanResourceInfo, CkanDataStoreInfo
|
|
12
|
+
from ckanapi_harvesters.auxiliary.ckan_auxiliary import assert_or_raise
|
|
13
|
+
from ckanapi_harvesters.auxiliary.ckan_defs import ckan_tags_sep
|
|
14
|
+
from ckanapi_harvesters.auxiliary.ckan_errors import (UnexpectedError)
|
|
15
|
+
from ckanapi_harvesters.builder.builder_errors import MissingDataStoreInfoError
|
|
16
|
+
from ckanapi_harvesters.builder.builder_resource import (BuilderResourceABC, BuilderFileBinary, BuilderUrl,
|
|
17
|
+
BuilderResourceUnmanaged)
|
|
18
|
+
from ckanapi_harvesters.builder.builder_resource_multi_file import BuilderMultiFile
|
|
19
|
+
from ckanapi_harvesters.builder.builder_resource_datastore import (BuilderDataStoreABC, BuilderDataStoreFile,
|
|
20
|
+
BuilderResourceIgnored)
|
|
21
|
+
from ckanapi_harvesters.builder.builder_resource_multi_datastore import BuilderMultiDataStore
|
|
22
|
+
from ckanapi_harvesters.builder.builder_resource_datastore_url import BuilderDataStoreUrl
|
|
23
|
+
from ckanapi_harvesters.builder.builder_resource_datastore_multi_harvester import BuilderDataStoreHarvester
|
|
24
|
+
from ckanapi_harvesters.builder.builder_resource_datastore_unmanaged import BuilderDataStoreUnmanaged
|
|
25
|
+
from ckanapi_harvesters.builder.builder_resource_datastore_multi_abc import BuilderDataStoreMultiABC
|
|
26
|
+
from ckanapi_harvesters.builder.builder_resource_datastore_multi_folder import BuilderDataStoreFolder
|
|
27
|
+
from ckanapi_harvesters.builder.builder_field import BuilderField
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
import_as_folder_row_count_threshold: Union[int,None] = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def init_resource_from_df(row: pd.Series, base_dir:str=None) -> BuilderResourceABC:
|
|
34
|
+
"""
|
|
35
|
+
Function mapping keywords to a resource builder type.
|
|
36
|
+
|
|
37
|
+
:param row:
|
|
38
|
+
:return:
|
|
39
|
+
"""
|
|
40
|
+
mode = row["mode"].lower().strip()
|
|
41
|
+
if mode == "file":
|
|
42
|
+
resource_builder = BuilderFileBinary()
|
|
43
|
+
elif mode == "url":
|
|
44
|
+
resource_builder = BuilderUrl()
|
|
45
|
+
elif mode == "datastore from file":
|
|
46
|
+
resource_builder = BuilderDataStoreFile()
|
|
47
|
+
elif mode == "datastore from folder":
|
|
48
|
+
resource_builder = BuilderDataStoreFolder()
|
|
49
|
+
elif mode == "datastore from url":
|
|
50
|
+
resource_builder = BuilderDataStoreUrl()
|
|
51
|
+
elif mode == "datastore from harvester":
|
|
52
|
+
resource_builder = BuilderDataStoreHarvester()
|
|
53
|
+
elif mode == "unmanaged":
|
|
54
|
+
resource_builder = BuilderResourceUnmanaged()
|
|
55
|
+
elif mode == "unmanaged datastore":
|
|
56
|
+
resource_builder = BuilderDataStoreUnmanaged()
|
|
57
|
+
elif mode == "multifile":
|
|
58
|
+
resource_builder = BuilderMultiFile()
|
|
59
|
+
elif mode == "multidatastore":
|
|
60
|
+
resource_builder = BuilderMultiDataStore()
|
|
61
|
+
elif mode == "ignored":
|
|
62
|
+
resource_builder = BuilderResourceIgnored()
|
|
63
|
+
else:
|
|
64
|
+
raise ValueError(f"{mode} is not a valid mode")
|
|
65
|
+
resource_builder._load_from_df_row(row=row, base_dir=base_dir)
|
|
66
|
+
return resource_builder
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def init_resource_from_ckan(ckan: CkanApiMap, resource_info: CkanResourceInfo) -> BuilderResourceABC:
|
|
70
|
+
"""
|
|
71
|
+
Function initiating a resource builder based on information provided by the CKAN API.
|
|
72
|
+
|
|
73
|
+
:return:
|
|
74
|
+
"""
|
|
75
|
+
# assert_or_raise(ckan.map._mapping_query_datastore_info, MissingDataStoreInfoError())
|
|
76
|
+
assert_or_raise(resource_info.datastore_queried(), MissingDataStoreInfoError())
|
|
77
|
+
d = {
|
|
78
|
+
"name": resource_info.name,
|
|
79
|
+
"format": resource_info.format,
|
|
80
|
+
"description": resource_info.description,
|
|
81
|
+
"state": resource_info.state.name if resource_info.state is not None else "",
|
|
82
|
+
"file/url": resource_info.name,
|
|
83
|
+
"primary key": "",
|
|
84
|
+
"indexes": "",
|
|
85
|
+
"known id": resource_info.id,
|
|
86
|
+
"known url": resource_info.download_url,
|
|
87
|
+
}
|
|
88
|
+
if (isinstance(resource_info.datastore_info, CkanDataStoreInfo)
|
|
89
|
+
and resource_info.datastore_info.row_count is not None
|
|
90
|
+
and len(resource_info.datastore_info.fields_id_list) > 0):
|
|
91
|
+
# DataStore
|
|
92
|
+
d["indexes"] = ckan_tags_sep.join(resource_info.datastore_info.index_fields)
|
|
93
|
+
d["aliases"] = ckan_tags_sep.join(resource_info.datastore_info.aliases)
|
|
94
|
+
if len(resource_info.download_url) > 0 and not ckan.is_url_internal(resource_info.download_url):
|
|
95
|
+
d["file/url"] = resource_info.download_url
|
|
96
|
+
row = pd.Series(d)
|
|
97
|
+
resource = BuilderDataStoreUrl()
|
|
98
|
+
resource._load_from_df_row(row=row)
|
|
99
|
+
elif resource_info.format.lower() == "csv":
|
|
100
|
+
row = pd.Series(d)
|
|
101
|
+
resource = BuilderDataStoreUnmanaged()
|
|
102
|
+
resource._load_from_df_row(row=row)
|
|
103
|
+
if import_as_folder_row_count_threshold is not None and resource_info.datastore_info.row_count > import_as_folder_row_count_threshold:
|
|
104
|
+
resource = BuilderDataStoreFolder.from_file_datastore(resource)
|
|
105
|
+
else:
|
|
106
|
+
raise UnexpectedError(f"Format of data store {resource_info.name} ({resource_info.format}) is not recognized")
|
|
107
|
+
# load fields information
|
|
108
|
+
resource.field_builders = {}
|
|
109
|
+
for field_id in resource_info.datastore_info.fields_id_list:
|
|
110
|
+
field_info = resource_info.datastore_info.fields_dict[field_id]
|
|
111
|
+
resource.field_builders[field_id] = BuilderField._from_ckan_field(field_info)
|
|
112
|
+
elif len(resource_info.download_url) > 0 and not ckan.is_url_internal(resource_info.download_url):
|
|
113
|
+
# external resource
|
|
114
|
+
d["file/url"] = resource_info.download_url
|
|
115
|
+
row = pd.Series(d)
|
|
116
|
+
resource = BuilderUrl()
|
|
117
|
+
resource._load_from_df_row(row=row)
|
|
118
|
+
assert_or_raise(not resource_info.datastore_active and not isinstance(resource_info.datastore_info, CkanResourceInfo), UnexpectedError())
|
|
119
|
+
else:
|
|
120
|
+
# file
|
|
121
|
+
row = pd.Series(d)
|
|
122
|
+
resource = BuilderResourceUnmanaged()
|
|
123
|
+
resource._load_from_df_row(row=row)
|
|
124
|
+
resource.package_name = resource_info.package_id
|
|
125
|
+
return resource
|
|
126
|
+
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Code to upload metadata to the CKAN server to create/update an existing package
|
|
5
|
+
The metadata is defined by the user in an Excel worksheet
|
|
6
|
+
This file implements the basic resources. See builder_datastore for specific functions to initiate datastores.
|
|
7
|
+
"""
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
9
|
+
import threading
|
|
10
|
+
from threading import current_thread
|
|
11
|
+
from typing import Any, Generator, Union, Callable, Set, List, Dict, Tuple
|
|
12
|
+
from abc import ABC, abstractmethod
|
|
13
|
+
import io
|
|
14
|
+
import os
|
|
15
|
+
import glob
|
|
16
|
+
import fnmatch
|
|
17
|
+
from warnings import warn
|
|
18
|
+
import copy
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
21
|
+
import requests
|
|
22
|
+
|
|
23
|
+
from ckanapi_harvesters.auxiliary.error_level_message import ContextErrorLevelMessage, ErrorLevel
|
|
24
|
+
from ckanapi_harvesters.auxiliary.ckan_auxiliary import _string_from_element
|
|
25
|
+
from ckanapi_harvesters.auxiliary.ckan_defs import ckan_tags_sep
|
|
26
|
+
from ckanapi_harvesters.ckan_api import CkanApi
|
|
27
|
+
from ckanapi_harvesters.auxiliary.ckan_model import UpsertChoice, CkanResourceInfo
|
|
28
|
+
from ckanapi_harvesters.builder.builder_aux import positive_end_index
|
|
29
|
+
from ckanapi_harvesters.builder.builder_errors import ResourceFileNotExistMessage
|
|
30
|
+
from ckanapi_harvesters.builder.builder_field import BuilderField
|
|
31
|
+
from ckanapi_harvesters.builder.builder_resource import BuilderResourceABC
|
|
32
|
+
|
|
33
|
+
multi_file_exclude_other_files:bool = True
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def default_progress_callback(index:int, total:int, info:Any, *, context:str=None, **kwargs) -> None:
|
|
37
|
+
if context is None:
|
|
38
|
+
context = ""
|
|
39
|
+
if index == total:
|
|
40
|
+
# info is None
|
|
41
|
+
print(f"{context} Finished {index}/{total} (100%)")
|
|
42
|
+
elif info is None:
|
|
43
|
+
print(f"{context} Request {index}/{total} ({index/total*100.0:.2f}%)")
|
|
44
|
+
else:
|
|
45
|
+
if isinstance(info, str):
|
|
46
|
+
info_str = info
|
|
47
|
+
elif isinstance(info, pd.DataFrame):
|
|
48
|
+
if "source" in info.attrs.keys():
|
|
49
|
+
info_str = str(info.attrs["source"])
|
|
50
|
+
else:
|
|
51
|
+
info_str = "<DataFrame>"
|
|
52
|
+
elif isinstance(info, list):
|
|
53
|
+
info_str = "<records>"
|
|
54
|
+
else:
|
|
55
|
+
info_str = str(info)
|
|
56
|
+
print(f"{context} Request {index}/{total} ({index/total*100.0:.2f}%): " + info_str)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class BuilderMultiABC(ABC):
|
|
60
|
+
def __init__(self):
|
|
61
|
+
self.progress_callback: Union[Callable[[int, int, Any], None], None] = default_progress_callback
|
|
62
|
+
self.progress_callback_kwargs: dict = {}
|
|
63
|
+
self.stop_event = threading.Event()
|
|
64
|
+
self.thread_ckan: Dict[str, CkanApi] = {}
|
|
65
|
+
self.enable_multi_threaded_upload:bool = True
|
|
66
|
+
self.enable_multi_threaded_download:bool = True
|
|
67
|
+
# from Resource (for code validation)
|
|
68
|
+
self.name:str = ""
|
|
69
|
+
self.enable_download:bool = True
|
|
70
|
+
|
|
71
|
+
def copy(self, *, dest=None):
|
|
72
|
+
dest.progress_callback = self.progress_callback
|
|
73
|
+
dest.progress_callback_kwargs = copy.deepcopy(self.progress_callback_kwargs)
|
|
74
|
+
dest.enable_multi_threaded_upload = self.enable_multi_threaded_upload
|
|
75
|
+
dest.enable_multi_threaded_download = self.enable_multi_threaded_download
|
|
76
|
+
# do not copy stop_event
|
|
77
|
+
return dest
|
|
78
|
+
|
|
79
|
+
def _call_progress_callback(self, index:int, total:int, *, info:Any=None, context:str=None) -> None:
|
|
80
|
+
if self.progress_callback is not None:
|
|
81
|
+
self.progress_callback(index, total, info=info, context=context, **self.progress_callback_kwargs)
|
|
82
|
+
|
|
83
|
+
def _prepare_for_multithreading(self, ckan: CkanApi):
|
|
84
|
+
self.stop_event.clear()
|
|
85
|
+
self.thread_ckan.clear()
|
|
86
|
+
|
|
87
|
+
def _init_thread(self, ckan: CkanApi):
|
|
88
|
+
thread_name = current_thread().name
|
|
89
|
+
ckan_thread = ckan.copy(new_identifier=thread_name)
|
|
90
|
+
ckan_thread.prepare_for_multithreading(True) # prepare CKAN object for multi-threading
|
|
91
|
+
self.thread_ckan[thread_name] = ckan_thread
|
|
92
|
+
|
|
93
|
+
def _terminate_thread(self):
|
|
94
|
+
for ckan in self.thread_ckan.values():
|
|
95
|
+
ckan.disconnect()
|
|
96
|
+
self.thread_ckan.clear()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
## upload -----------------------------------------------------------------
|
|
100
|
+
@abstractmethod
|
|
101
|
+
def init_local_files_list(self, resources_base_dir:str, cancel_if_present:bool=True, **kwargs) -> List[str]:
|
|
102
|
+
"""
|
|
103
|
+
Behavior to list parts of an upload.
|
|
104
|
+
"""
|
|
105
|
+
raise NotImplementedError()
|
|
106
|
+
|
|
107
|
+
@abstractmethod
|
|
108
|
+
def get_local_file_len(self) -> int:
|
|
109
|
+
"""
|
|
110
|
+
Get the number of parts of the upload.
|
|
111
|
+
"""
|
|
112
|
+
raise NotImplementedError()
|
|
113
|
+
|
|
114
|
+
@abstractmethod
|
|
115
|
+
def get_local_file_generator(self, resources_base_dir:str, **kwargs) -> Generator[Any, None, None]:
|
|
116
|
+
"""
|
|
117
|
+
Returns an iterator over the parts of the upload.
|
|
118
|
+
"""
|
|
119
|
+
raise NotImplementedError()
|
|
120
|
+
|
|
121
|
+
@abstractmethod
|
|
122
|
+
def upload_request_final(self, ckan:CkanApi, *, force:bool=False) -> None:
|
|
123
|
+
raise NotImplementedError()
|
|
124
|
+
|
|
125
|
+
@abstractmethod
|
|
126
|
+
def _unit_upload_apply(self, *, ckan:CkanApi, file:Any,
|
|
127
|
+
index:int, start_index:int, end_index:int, total:int, **kwargs) -> Any:
|
|
128
|
+
"""
|
|
129
|
+
Unitary function deciding whether to perform upload and making the steps for the upload.
|
|
130
|
+
"""
|
|
131
|
+
raise NotImplementedError()
|
|
132
|
+
|
|
133
|
+
def upload_request_full(self, ckan:CkanApi, resources_base_dir:str, *,
|
|
134
|
+
threads:int=1, external_stop_event=None,
|
|
135
|
+
start_index:int=0, end_index:int=None, **kwargs) -> None:
|
|
136
|
+
"""
|
|
137
|
+
Perform all the upload requests.
|
|
138
|
+
|
|
139
|
+
:param ckan:
|
|
140
|
+
:param resources_base_dir:
|
|
141
|
+
:param threads:
|
|
142
|
+
:param external_stop_event:
|
|
143
|
+
:param only_missing:
|
|
144
|
+
:param start_index:
|
|
145
|
+
:param end_index:
|
|
146
|
+
:return:
|
|
147
|
+
"""
|
|
148
|
+
if threads < 0:
|
|
149
|
+
# cancel large uploads in this case
|
|
150
|
+
return None
|
|
151
|
+
elif (threads is None or threads > 1) and self.enable_multi_threaded_upload:
|
|
152
|
+
return self.upload_request_full_multi_threaded(ckan=ckan, resources_base_dir=resources_base_dir,
|
|
153
|
+
threads=threads, external_stop_event=external_stop_event,
|
|
154
|
+
start_index=start_index, end_index=end_index, **kwargs)
|
|
155
|
+
else:
|
|
156
|
+
self.init_local_files_list(resources_base_dir=resources_base_dir, cancel_if_present=True, **kwargs)
|
|
157
|
+
if ckan.params.verbose_extra:
|
|
158
|
+
print(f"Launching single-threaded upload of multi-file resource {self.name}")
|
|
159
|
+
total = self.get_local_file_len()
|
|
160
|
+
end_index = positive_end_index(end_index, total)
|
|
161
|
+
for index, file_path in enumerate(self.get_local_file_generator(resources_base_dir=resources_base_dir, **kwargs)):
|
|
162
|
+
if external_stop_event is not None and external_stop_event.is_set():
|
|
163
|
+
print(f"{ckan.identifier} Interrupted")
|
|
164
|
+
return
|
|
165
|
+
self._unit_upload_apply(ckan=ckan, file=file_path, index=index,
|
|
166
|
+
start_index=start_index, end_index=end_index, total=total, **kwargs)
|
|
167
|
+
self._call_progress_callback(total, total, context=f"{ckan.identifier} single-thread upload")
|
|
168
|
+
# at last, apply final actions:
|
|
169
|
+
self.upload_request_final(ckan)
|
|
170
|
+
|
|
171
|
+
def upload_request_graceful(self, ckan:CkanApi, file_path: str, *, index:int,
|
|
172
|
+
external_stop_event=None,
|
|
173
|
+
start_index:int=0, end_index:int=None, **kwargs) -> None:
|
|
174
|
+
"""
|
|
175
|
+
Calls upload_file with checks specific to multi-threading.
|
|
176
|
+
|
|
177
|
+
:return:
|
|
178
|
+
"""
|
|
179
|
+
# ckan.session_reset()
|
|
180
|
+
# ckan.identifier = current_thread().name
|
|
181
|
+
ckan = self.thread_ckan[current_thread().name]
|
|
182
|
+
total = self.get_local_file_len()
|
|
183
|
+
end_index = positive_end_index(end_index, total)
|
|
184
|
+
if self.stop_event.is_set():
|
|
185
|
+
return
|
|
186
|
+
if external_stop_event is not None and external_stop_event.is_set():
|
|
187
|
+
print(f"{ckan.identifier} Interrupted")
|
|
188
|
+
return
|
|
189
|
+
try:
|
|
190
|
+
self._unit_upload_apply(ckan=ckan, file=file_path, index=index,
|
|
191
|
+
start_index=start_index, end_index=end_index, total=total, **kwargs)
|
|
192
|
+
except Exception as e:
|
|
193
|
+
self.stop_event.set() # Ensure all threads stop
|
|
194
|
+
if ckan.params.verbose_extra:
|
|
195
|
+
print(f"Stopping all threads because an exception occurred in thread: {e}")
|
|
196
|
+
raise e from e
|
|
197
|
+
|
|
198
|
+
def upload_request_full_multi_threaded(self, ckan:CkanApi, resources_base_dir:str,
|
|
199
|
+
threads:int=1, external_stop_event=None,
|
|
200
|
+
start_index:int=0, end_index:int=None, **kwargs):
|
|
201
|
+
"""
|
|
202
|
+
Multi-threaded implementation of upload_request_full, using ThreadPoolExecutor.
|
|
203
|
+
"""
|
|
204
|
+
self.init_local_files_list(resources_base_dir=resources_base_dir, cancel_if_present=True, **kwargs)
|
|
205
|
+
self._prepare_for_multithreading(ckan)
|
|
206
|
+
try:
|
|
207
|
+
with ThreadPoolExecutor(max_workers=threads, initializer=self._init_thread, initargs=(ckan,)) as executor:
|
|
208
|
+
if ckan.params.verbose_extra:
|
|
209
|
+
print(f"Launching multi-threaded upload of multi-file resource {self.name}")
|
|
210
|
+
futures = [executor.submit(self.upload_request_graceful, ckan=ckan, file_path=file_path, index=index,
|
|
211
|
+
start_index=start_index, end_index=end_index, external_stop_event=external_stop_event,
|
|
212
|
+
**kwargs)
|
|
213
|
+
for index, file_path in enumerate(self.get_local_file_generator(resources_base_dir=resources_base_dir, **kwargs))]
|
|
214
|
+
for future in futures:
|
|
215
|
+
future.result() # This will propagate the exception
|
|
216
|
+
total = self.get_local_file_len()
|
|
217
|
+
self._call_progress_callback(total, total, context=f"{ckan.identifier} multi-thread upload")
|
|
218
|
+
except Exception as e:
|
|
219
|
+
self.stop_event.set() # Ensure all threads stop
|
|
220
|
+
if ckan.params.verbose_extra:
|
|
221
|
+
print(f"Stopping all threads because an exception occurred: {e}")
|
|
222
|
+
raise e from e
|
|
223
|
+
finally:
|
|
224
|
+
# self.stop_event.set() # Ensure all threads stop
|
|
225
|
+
if ckan.params.verbose_extra:
|
|
226
|
+
print("End of multi-threaded upload...")
|
|
227
|
+
# at last, apply final actions:
|
|
228
|
+
self._terminate_thread()
|
|
229
|
+
self.upload_request_final(ckan)
|
|
230
|
+
|
|
231
|
+
## download -------------------------------------------------------------
|
|
232
|
+
@abstractmethod
|
|
233
|
+
def init_download_file_query_list(self, ckan: CkanApi, out_dir: str, cancel_if_present: bool = True, **kwargs) -> List[Any]:
|
|
234
|
+
"""
|
|
235
|
+
Determine the list of queries to download to reconstruct the uploaded parts.
|
|
236
|
+
By default, the unique combinations of the first columns of the primary key are used.
|
|
237
|
+
"""
|
|
238
|
+
raise NotImplementedError()
|
|
239
|
+
|
|
240
|
+
@abstractmethod
|
|
241
|
+
def get_file_query_generator(self) -> Generator[Any, Any, None]:
|
|
242
|
+
"""
|
|
243
|
+
Returns an iterator on all the file_queries.
|
|
244
|
+
"""
|
|
245
|
+
raise NotImplementedError()
|
|
246
|
+
|
|
247
|
+
@abstractmethod
|
|
248
|
+
def get_file_query_len(self) -> int:
|
|
249
|
+
"""
|
|
250
|
+
Returns the total number of file_queries.
|
|
251
|
+
"""
|
|
252
|
+
raise NotImplementedError()
|
|
253
|
+
|
|
254
|
+
@abstractmethod
|
|
255
|
+
def download_file_query_item(self, ckan: CkanApi, out_dir: str, file_query_item: Any) -> Any:
|
|
256
|
+
"""
|
|
257
|
+
Download the file_query item with the its arguments
|
|
258
|
+
"""
|
|
259
|
+
raise NotImplementedError()
|
|
260
|
+
|
|
261
|
+
@abstractmethod
|
|
262
|
+
def download_request_generator(self, ckan: CkanApi, out_dir: str) -> Generator[Any, Any, None]:
|
|
263
|
+
"""
|
|
264
|
+
Generator to apply treatments after each request (single-threaded).
|
|
265
|
+
|
|
266
|
+
:param ckan:
|
|
267
|
+
:param out_dir:
|
|
268
|
+
:return:
|
|
269
|
+
"""
|
|
270
|
+
raise NotImplementedError()
|
|
271
|
+
|
|
272
|
+
@abstractmethod
|
|
273
|
+
def _unit_download_apply(self, ckan:CkanApi, file_query_item:Any, out_dir:str,
|
|
274
|
+
index:int, start_index:int, end_index:int, total:int, **kwargs) -> Any:
|
|
275
|
+
"""
|
|
276
|
+
Unitary function deciding whether to perform download and making the steps for the request.
|
|
277
|
+
"""
|
|
278
|
+
raise NotImplementedError()
|
|
279
|
+
|
|
280
|
+
def download_request_full(self, ckan: CkanApi, out_dir: str, threads:int=1, external_stop_event=None,
|
|
281
|
+
start_index:int=0, end_index:int=None, force:bool=False, **kwargs) -> None:
|
|
282
|
+
|
|
283
|
+
if (not self.enable_download) and (not force):
|
|
284
|
+
msg = f"Did not download resource {self.name} because download was disabled."
|
|
285
|
+
warn(msg)
|
|
286
|
+
return None
|
|
287
|
+
if threads < 0:
|
|
288
|
+
# do not download large datasets in this case
|
|
289
|
+
return None
|
|
290
|
+
elif (threads is None or threads > 1) and self.enable_multi_threaded_download:
|
|
291
|
+
return self.download_request_full_multi_threaded(ckan=ckan, out_dir=out_dir,
|
|
292
|
+
threads=threads, external_stop_event=external_stop_event,
|
|
293
|
+
start_index=start_index, end_index=end_index, **kwargs)
|
|
294
|
+
else:
|
|
295
|
+
self.init_download_file_query_list(ckan=ckan, out_dir=out_dir, cancel_if_present=True, **kwargs)
|
|
296
|
+
if ckan.params.verbose_extra:
|
|
297
|
+
print(f"Launching single-threaded download of multi-file resource {self.name}")
|
|
298
|
+
total = self.get_file_query_len()
|
|
299
|
+
end_index = positive_end_index(end_index, total)
|
|
300
|
+
for index, file_query_item in enumerate(self.get_file_query_generator()):
|
|
301
|
+
if external_stop_event is not None and external_stop_event.is_set():
|
|
302
|
+
print(f"{ckan.identifier} Interrupted")
|
|
303
|
+
return
|
|
304
|
+
self._unit_download_apply(ckan=ckan, file_query_item=file_query_item, out_dir=out_dir,
|
|
305
|
+
index=index, start_index=start_index, end_index=end_index, total=total, **kwargs)
|
|
306
|
+
self._call_progress_callback(total, total, context=f"{ckan.identifier} single-thread download")
|
|
307
|
+
|
|
308
|
+
def download_file_query_item_graceful(self, ckan: CkanApi, out_dir: str, file_query_item: Any, index:int,
|
|
309
|
+
external_stop_event=None, start_index:int=0, end_index:int=None, **kwargs) -> None:
|
|
310
|
+
"""
|
|
311
|
+
Implementation of download_file_query_item with checks for a multi-threaded download.
|
|
312
|
+
"""
|
|
313
|
+
# ckan.session_reset()
|
|
314
|
+
# ckan.identifier = current_thread().name
|
|
315
|
+
ckan = self.thread_ckan[current_thread().name]
|
|
316
|
+
total = self.get_file_query_len()
|
|
317
|
+
end_index = positive_end_index(end_index, total)
|
|
318
|
+
if self.stop_event.is_set():
|
|
319
|
+
return
|
|
320
|
+
if external_stop_event is not None and external_stop_event.is_set():
|
|
321
|
+
print(f"{ckan.identifier} Interrupted")
|
|
322
|
+
return
|
|
323
|
+
try:
|
|
324
|
+
self._unit_download_apply(ckan=ckan, file_query_item=file_query_item, out_dir=out_dir,
|
|
325
|
+
index=index, start_index=start_index, end_index=end_index, total=total, **kwargs)
|
|
326
|
+
except Exception as e:
|
|
327
|
+
self.stop_event.set() # Ensure all threads stop
|
|
328
|
+
if ckan.params.verbose_extra:
|
|
329
|
+
print(f"Stopping all threads because an exception occurred in thread: {e}")
|
|
330
|
+
raise e from e
|
|
331
|
+
|
|
332
|
+
def download_request_full_multi_threaded(self, ckan: CkanApi, out_dir: str,
|
|
333
|
+
threads: int = None, external_stop_event=None,
|
|
334
|
+
start_index:int=0, end_index:int=-1, **kwargs) -> None:
|
|
335
|
+
"""
|
|
336
|
+
Multi-threaded implementation of download_request_full using ThreadPoolExecutor.
|
|
337
|
+
"""
|
|
338
|
+
self.init_download_file_query_list(ckan=ckan, out_dir=out_dir, cancel_if_present=True, **kwargs)
|
|
339
|
+
self._prepare_for_multithreading(ckan)
|
|
340
|
+
try:
|
|
341
|
+
with ThreadPoolExecutor(max_workers=threads, initializer=self._init_thread, initargs=(ckan,)) as executor:
|
|
342
|
+
if ckan.params.verbose_extra:
|
|
343
|
+
print(f"Launching multi-threaded download of multi-file resource {self.name}")
|
|
344
|
+
futures = [executor.submit(self.download_file_query_item_graceful, ckan=ckan, out_dir=out_dir, file_query_item=file_query_item,
|
|
345
|
+
index=index, external_stop_event=external_stop_event, start_index=start_index, end_index=end_index, **kwargs)
|
|
346
|
+
for index, file_query_item in enumerate(self.get_file_query_generator())]
|
|
347
|
+
for future in futures:
|
|
348
|
+
future.result() # This will propagate the exception
|
|
349
|
+
total = self.get_file_query_len()
|
|
350
|
+
self._call_progress_callback(total, total, context=f"multi-thread download")
|
|
351
|
+
except Exception as e:
|
|
352
|
+
self.stop_event.set() # Ensure all threads stop
|
|
353
|
+
if ckan.params.verbose_extra:
|
|
354
|
+
print(f"Stopping all threads because an exception occurred: {e}")
|
|
355
|
+
raise e from e
|
|
356
|
+
finally:
|
|
357
|
+
# self.stop_event.set() # Ensure all threads stop
|
|
358
|
+
if ckan.params.verbose_extra:
|
|
359
|
+
print("End of multi-threaded download...")
|
|
360
|
+
# at last, apply final actions:
|
|
361
|
+
self._terminate_thread()
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
#!python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Code to upload metadata to the CKAN server to create/update an existing package
|
|
5
|
+
The metadata is defined by the user in an Excel worksheet
|
|
6
|
+
This file implements the basic resources. See builder_datastore for specific functions to initiate datastores.
|
|
7
|
+
"""
|
|
8
|
+
from typing import Any, Generator, Union, Set, List, Dict, Tuple
|
|
9
|
+
import os
|
|
10
|
+
import requests
|
|
11
|
+
import copy
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
from ckanapi_harvesters.auxiliary.ckan_auxiliary import _string_from_element, find_duplicates
|
|
16
|
+
from ckanapi_harvesters.auxiliary.ckan_defs import ckan_tags_sep
|
|
17
|
+
from ckanapi_harvesters.auxiliary.ckan_errors import DuplicateNameError
|
|
18
|
+
from ckanapi_harvesters.auxiliary.path import resolve_rel_path, glob_rm_glob
|
|
19
|
+
from ckanapi_harvesters.ckan_api import CkanApi
|
|
20
|
+
from ckanapi_harvesters.harvesters.data_cleaner.data_cleaner_abc import CkanDataCleanerABC
|
|
21
|
+
from ckanapi_harvesters.auxiliary.ckan_model import CkanResourceInfo
|
|
22
|
+
from ckanapi_harvesters.builder.builder_field import BuilderField
|
|
23
|
+
from ckanapi_harvesters.builder.builder_resource_datastore import BuilderDataStoreFile
|
|
24
|
+
from ckanapi_harvesters.builder.builder_resource_multi_file import BuilderMultiFile
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BuilderMultiDataStore(BuilderMultiFile):
|
|
28
|
+
def __init__(self, *, name:str=None, format:str=None, description:str=None,
|
|
29
|
+
resource_id:str=None, download_url:str=None):
|
|
30
|
+
super().__init__(name=name, format=format, description=description, resource_id=resource_id, download_url=download_url)
|
|
31
|
+
self.field_builders: Union[Dict[str, BuilderField],None] = None
|
|
32
|
+
self.primary_key: Union[List[str],None] = None
|
|
33
|
+
self.indexes: Union[List[str],None] = None
|
|
34
|
+
self.aux_upload_fun_name:str = ""
|
|
35
|
+
self.aux_download_fun_name:str = ""
|
|
36
|
+
self.data_cleaner_upload:Union[CkanDataCleanerABC,None] = None
|
|
37
|
+
|
|
38
|
+
def copy(self, *, dest=None):
|
|
39
|
+
if dest is None:
|
|
40
|
+
dest = BuilderMultiDataStore()
|
|
41
|
+
super().copy(dest=dest)
|
|
42
|
+
dest.field_builders = copy.deepcopy(self.field_builders)
|
|
43
|
+
dest.primary_key = copy.deepcopy(self.primary_key)
|
|
44
|
+
dest.indexes = copy.deepcopy(self.indexes)
|
|
45
|
+
dest.aux_upload_fun_name = self.aux_upload_fun_name
|
|
46
|
+
dest.aux_download_fun_name = self.aux_download_fun_name
|
|
47
|
+
return dest
|
|
48
|
+
|
|
49
|
+
def _load_from_df_row(self, row: pd.Series, base_dir:str=None):
|
|
50
|
+
super()._load_from_df_row(row=row)
|
|
51
|
+
primary_keys_string: str = _string_from_element(row["primary key"])
|
|
52
|
+
indexes_string: str = _string_from_element(row["indexes"])
|
|
53
|
+
if primary_keys_string is not None:
|
|
54
|
+
if primary_keys_string.lower() == "none":
|
|
55
|
+
self.primary_key = []
|
|
56
|
+
else:
|
|
57
|
+
self.primary_key = [field.strip() for field in primary_keys_string.split(ckan_tags_sep)]
|
|
58
|
+
if indexes_string is not None:
|
|
59
|
+
if indexes_string.lower() == "none":
|
|
60
|
+
self.indexes = []
|
|
61
|
+
else:
|
|
62
|
+
self.indexes = [field.strip() for field in indexes_string.split(ckan_tags_sep)]
|
|
63
|
+
if "upload function" in row.keys():
|
|
64
|
+
self.aux_upload_fun_name: str = _string_from_element(row["upload function"], empty_value="")
|
|
65
|
+
if "download function" in row.keys():
|
|
66
|
+
self.aux_download_fun_name: str = _string_from_element(row["download function"], empty_value="")
|
|
67
|
+
|
|
68
|
+
def _load_fields_df(self, fields_df: pd.DataFrame):
|
|
69
|
+
fields_df.columns = fields_df.columns.map(str.lower)
|
|
70
|
+
fields_df.columns = fields_df.columns.map(str.strip)
|
|
71
|
+
self.field_builders = {}
|
|
72
|
+
for index, row in fields_df.iterrows():
|
|
73
|
+
field_builder = BuilderField()
|
|
74
|
+
field_builder._load_from_df_row(row=row)
|
|
75
|
+
self.field_builders[field_builder.name] = field_builder
|
|
76
|
+
|
|
77
|
+
def _check_field_duplicates(self):
|
|
78
|
+
duplicates = find_duplicates([field_builder.name for field_builder in self.field_builders.values()])
|
|
79
|
+
if len(duplicates) > 0:
|
|
80
|
+
raise DuplicateNameError("Field", duplicates)
|
|
81
|
+
|
|
82
|
+
def _get_fields_dict(self) -> Dict[str, dict]:
|
|
83
|
+
self._check_field_duplicates()
|
|
84
|
+
if self.field_builders is not None:
|
|
85
|
+
fields_dict = {field_builder.name: field_builder._to_dict() for field_builder in self.field_builders.values()}
|
|
86
|
+
else:
|
|
87
|
+
fields_dict = None
|
|
88
|
+
return fields_dict
|
|
89
|
+
|
|
90
|
+
def _get_fields_df(self) -> pd.DataFrame:
|
|
91
|
+
fields_dict_list = [value for value in self._get_fields_dict().values()]
|
|
92
|
+
fields_df = pd.DataFrame.from_records(fields_dict_list)
|
|
93
|
+
return fields_df
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
def resource_mode_str() -> str:
|
|
97
|
+
return "MultiDataStore"
|
|
98
|
+
|
|
99
|
+
def _to_dict(self, include_id:bool=True) -> dict:
|
|
100
|
+
d = super()._to_dict(include_id=include_id)
|
|
101
|
+
d["Primary key"] = ckan_tags_sep.join(self.primary_key) if self.primary_key else ""
|
|
102
|
+
d["Indexes"] = ckan_tags_sep.join(self.indexes) if self.indexes is not None else ""
|
|
103
|
+
return d
|
|
104
|
+
|
|
105
|
+
def _data_store_builder_of_file(self, file_path:str) -> Tuple[BuilderDataStoreFile, str]:
|
|
106
|
+
file_dir, file_name = os.path.split(file_path)
|
|
107
|
+
ds_builder = BuilderDataStoreFile(name=file_name, description=self.description, download_url=self.download_url,
|
|
108
|
+
format=self.format, file_name=file_name)
|
|
109
|
+
ds_builder.field_builders = self.field_builders
|
|
110
|
+
ds_builder.primary_key = self.primary_key
|
|
111
|
+
ds_builder.indexes = self.indexes
|
|
112
|
+
ds_builder.package_name = self.package_name
|
|
113
|
+
ds_builder.aux_upload_fun_name = self.aux_upload_fun_name
|
|
114
|
+
ds_builder.aux_download_fun_name = self.aux_download_fun_name
|
|
115
|
+
ds_builder.aliases = None
|
|
116
|
+
ds_builder.data_cleaner_upload = self.data_cleaner_upload
|
|
117
|
+
return ds_builder, file_dir
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
## Upload ----------------
|
|
121
|
+
def upload_file(self, ckan:CkanApi, package_id:str, file_path:str, *,
|
|
122
|
+
reupload:bool=False, cancel_if_present:bool=True) -> CkanResourceInfo:
|
|
123
|
+
ds_builder, file_dir = self._data_store_builder_of_file(file_path=file_path)
|
|
124
|
+
return ds_builder.patch_request(ckan=ckan, package_id=package_id, reupload=reupload,
|
|
125
|
+
resources_base_dir=file_dir)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
## Download --------------
|
|
129
|
+
def download_file_query_item_df(self, ckan: CkanApi, out_dir: str, file_query_item: str, full_download:bool=True) -> Tuple[str, pd.DataFrame]:
|
|
130
|
+
resource_name = file_query_item
|
|
131
|
+
ds_builder, _ = self._data_store_builder_of_file(file_path=resource_name)
|
|
132
|
+
file_dir = resolve_rel_path(out_dir, glob_rm_glob(self.dir_name), field=f"File/URL of resource {self.name}")
|
|
133
|
+
df = ds_builder.download_request(ckan, out_dir=file_dir, full_download=full_download)
|
|
134
|
+
return ds_builder.downloaded_destination, df
|
|
135
|
+
|
|
136
|
+
def download_file_query_item(self, ckan: CkanApi, out_dir: str, file_query_item: str, full_download:bool=True) -> Tuple[Union[str,None], Union[requests.Response,None]]:
|
|
137
|
+
downloaded_destination, df = self.download_file_query_item_df(ckan=ckan, out_dir=out_dir, file_query_item=file_query_item,full_download=full_download)
|
|
138
|
+
return downloaded_destination, None
|
|
139
|
+
|
|
140
|
+
def download_request_generator_df(self, ckan: CkanApi, out_dir: str,
|
|
141
|
+
excluded_resource_names:Set[str]=None) -> Generator[Tuple[Union[str,None], Union[pd.DataFrame,None]], Any, None]:
|
|
142
|
+
self.init_download_file_query_list(ckan=ckan, out_dir=out_dir, cancel_if_present=True,
|
|
143
|
+
excluded_resource_names=excluded_resource_names)
|
|
144
|
+
for file_query_item in self.get_file_query_generator():
|
|
145
|
+
yield self.download_file_query_item_df(ckan=ckan, out_dir=out_dir, file_query_item=file_query_item)
|
|
146
|
+
|