datamarket 0.6.0__py3-none-any.whl → 0.10.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- datamarket/__init__.py +0 -1
- datamarket/exceptions/__init__.py +1 -0
- datamarket/exceptions/main.py +118 -0
- datamarket/interfaces/alchemy.py +1934 -25
- datamarket/interfaces/aws.py +81 -14
- datamarket/interfaces/azure.py +127 -0
- datamarket/interfaces/drive.py +60 -10
- datamarket/interfaces/ftp.py +37 -14
- datamarket/interfaces/llm.py +1220 -0
- datamarket/interfaces/nominatim.py +314 -42
- datamarket/interfaces/peerdb.py +272 -104
- datamarket/interfaces/proxy.py +354 -50
- datamarket/interfaces/tinybird.py +7 -15
- datamarket/params/nominatim.py +439 -0
- datamarket/utils/__init__.py +1 -1
- datamarket/utils/airflow.py +10 -7
- datamarket/utils/alchemy.py +2 -1
- datamarket/utils/logs.py +88 -0
- datamarket/utils/main.py +138 -10
- datamarket/utils/nominatim.py +201 -0
- datamarket/utils/playwright/__init__.py +0 -0
- datamarket/utils/playwright/async_api.py +274 -0
- datamarket/utils/playwright/sync_api.py +281 -0
- datamarket/utils/requests.py +655 -0
- datamarket/utils/selenium.py +6 -12
- datamarket/utils/strings/__init__.py +1 -0
- datamarket/utils/strings/normalization.py +217 -0
- datamarket/utils/strings/obfuscation.py +153 -0
- datamarket/utils/strings/standardization.py +40 -0
- datamarket/utils/typer.py +2 -1
- datamarket/utils/types.py +1 -0
- datamarket-0.10.3.dist-info/METADATA +172 -0
- datamarket-0.10.3.dist-info/RECORD +38 -0
- {datamarket-0.6.0.dist-info → datamarket-0.10.3.dist-info}/WHEEL +1 -2
- datamarket-0.6.0.dist-info/METADATA +0 -49
- datamarket-0.6.0.dist-info/RECORD +0 -24
- datamarket-0.6.0.dist-info/top_level.txt +0 -1
- {datamarket-0.6.0.dist-info → datamarket-0.10.3.dist-info/licenses}/LICENSE +0 -0
datamarket/interfaces/aws.py
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
|
|
4
4
|
import io
|
|
5
5
|
import logging
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
6
8
|
import boto3
|
|
7
9
|
|
|
8
10
|
########################################################################################################################
|
|
@@ -12,34 +14,44 @@ logger = logging.getLogger(__name__)
|
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class AWSInterface:
|
|
15
|
-
def __init__(self, config):
|
|
16
|
-
self.profiles = []
|
|
17
|
+
def __init__(self, config) -> None:
|
|
18
|
+
self.profiles: List[Dict[str, Any]] = []
|
|
17
19
|
self.config = config
|
|
18
20
|
|
|
19
|
-
for section in self.config
|
|
21
|
+
for section in getattr(self.config, "sections", lambda: [])():
|
|
20
22
|
if section.startswith("aws:"):
|
|
21
23
|
profile_name = section.split(":", 1)[1]
|
|
24
|
+
bucket_value = self.config[section].get("buckets", "")
|
|
25
|
+
buckets = [b.strip() for b in bucket_value.split(",") if b.strip()]
|
|
26
|
+
session = boto3.Session(profile_name=profile_name)
|
|
27
|
+
|
|
22
28
|
self.profiles.append(
|
|
23
29
|
{
|
|
24
30
|
"profile": profile_name,
|
|
25
|
-
"
|
|
26
|
-
"session":
|
|
31
|
+
"buckets": buckets,
|
|
32
|
+
"session": session,
|
|
27
33
|
}
|
|
28
34
|
)
|
|
29
35
|
|
|
30
36
|
if not self.profiles:
|
|
31
37
|
logger.warning("No AWS profiles found in config file")
|
|
32
38
|
|
|
33
|
-
self.current_profile = self.profiles[0] if self.profiles else None
|
|
39
|
+
self.current_profile: Optional[Dict[str, Any]] = self.profiles[0] if self.profiles else None
|
|
34
40
|
self._update_resources()
|
|
35
41
|
|
|
36
|
-
def _update_resources(self):
|
|
42
|
+
def _update_resources(self) -> None:
|
|
43
|
+
"""Refresh S3 resources for the current profile and set default bucket (first in list)"""
|
|
37
44
|
if self.current_profile:
|
|
38
45
|
self.s3 = self.current_profile["session"].resource("s3")
|
|
39
46
|
self.s3_client = self.s3.meta.client
|
|
40
|
-
|
|
47
|
+
buckets = self.current_profile.get("buckets", [])
|
|
48
|
+
self.bucket = buckets[0] if buckets else None
|
|
49
|
+
else:
|
|
50
|
+
self.s3 = None
|
|
51
|
+
self.s3_client = None
|
|
52
|
+
self.bucket = None
|
|
41
53
|
|
|
42
|
-
def switch_profile(self, profile_name):
|
|
54
|
+
def switch_profile(self, profile_name: str) -> None:
|
|
43
55
|
for profile in self.profiles:
|
|
44
56
|
if profile["profile"] == profile_name:
|
|
45
57
|
self.current_profile = profile
|
|
@@ -47,14 +59,69 @@ class AWSInterface:
|
|
|
47
59
|
return
|
|
48
60
|
logger.warning(f"Profile {profile_name} not found")
|
|
49
61
|
|
|
50
|
-
def
|
|
62
|
+
def switch_bucket(self, bucket: str) -> None:
|
|
63
|
+
if not self.current_profile:
|
|
64
|
+
logger.warning("No current AWS profile to switch bucket on")
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
buckets = self.current_profile.get("buckets") or []
|
|
68
|
+
if bucket not in buckets:
|
|
69
|
+
logger.warning(f"Bucket {bucket} not found in profile {self.current_profile.get('profile')}")
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
self.bucket = bucket
|
|
73
|
+
|
|
74
|
+
def switch_bucket_for_profile(self, profile_name: str, bucket: str) -> None:
|
|
75
|
+
"""
|
|
76
|
+
Select a profile and then switch its active bucket.
|
|
77
|
+
"""
|
|
78
|
+
for profile in self.profiles:
|
|
79
|
+
if profile["profile"] == profile_name:
|
|
80
|
+
self.current_profile = profile
|
|
81
|
+
self._update_resources() # sets default bucket & s3 clients
|
|
82
|
+
self.switch_bucket(bucket) # only sets self.bucket if valid
|
|
83
|
+
return
|
|
84
|
+
logger.warning(f"Profile {profile_name} not found")
|
|
85
|
+
|
|
86
|
+
def get_bucket_url(self) -> Optional[str]:
|
|
87
|
+
"""Return active bucket URL."""
|
|
88
|
+
if not self.bucket:
|
|
89
|
+
logger.warning("No active bucket selected")
|
|
90
|
+
return None
|
|
91
|
+
region = self.s3_client.meta.region_name
|
|
92
|
+
return f"https://{self.bucket}.s3.{region}.amazonaws.com"
|
|
93
|
+
|
|
94
|
+
def get_file(self, s3_path: str):
|
|
95
|
+
if not self.bucket:
|
|
96
|
+
logger.warning("No active bucket selected")
|
|
97
|
+
return None
|
|
51
98
|
try:
|
|
52
99
|
return self.s3.Object(self.bucket, s3_path).get()
|
|
53
100
|
except self.s3_client.exceptions.NoSuchKey:
|
|
54
101
|
logger.info(f"{s3_path} does not exist")
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
def file_exists(self, s3_path: str) -> bool:
|
|
105
|
+
if not self.bucket:
|
|
106
|
+
logger.warning("No active bucket selected")
|
|
107
|
+
return False
|
|
108
|
+
try:
|
|
109
|
+
self.s3_client.head_object(Bucket=self.bucket, Key=s3_path)
|
|
110
|
+
return True
|
|
111
|
+
except self.s3_client.exceptions.NoSuchKey:
|
|
112
|
+
return False
|
|
113
|
+
except Exception as e:
|
|
114
|
+
logger.error(f"Error checking existence of {s3_path}: {e}")
|
|
115
|
+
raise
|
|
55
116
|
|
|
56
|
-
def read_file_as_bytes(self, s3_path):
|
|
57
|
-
|
|
117
|
+
def read_file_as_bytes(self, s3_path: str) -> Optional[io.BytesIO]:
|
|
118
|
+
obj = self.get_file(s3_path)
|
|
119
|
+
if not obj:
|
|
120
|
+
return None
|
|
121
|
+
return io.BytesIO(obj["Body"].read())
|
|
58
122
|
|
|
59
|
-
def upload_file(self, local_path, s3_path):
|
|
60
|
-
self.
|
|
123
|
+
def upload_file(self, local_path: str, s3_path: str, **kwargs) -> None:
|
|
124
|
+
if not self.bucket:
|
|
125
|
+
logger.warning("No active bucket selected")
|
|
126
|
+
return
|
|
127
|
+
self.s3.Bucket(self.bucket).upload_file(local_path, s3_path, **kwargs)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
########################################################################################################################
|
|
2
|
+
# IMPORTS
|
|
3
|
+
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
from azure.storage.blob import BlobServiceClient, ContainerClient
|
|
9
|
+
from pendulum import now
|
|
10
|
+
|
|
11
|
+
########################################################################################################################
|
|
12
|
+
# CLASSES
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AzureBlobInterface:
|
|
18
|
+
def __init__(self, config):
|
|
19
|
+
self.profiles: List[Dict[str, Any]] = []
|
|
20
|
+
self.config = config
|
|
21
|
+
|
|
22
|
+
for section in getattr(self.config, "sections", lambda: [])():
|
|
23
|
+
if section.startswith("azure:"):
|
|
24
|
+
profile_name = section.split(":", 1)[1]
|
|
25
|
+
connection_string = self.config[section].get("connection_string")
|
|
26
|
+
container_name = self.config[section].get("container_name")
|
|
27
|
+
sas_container_url = self.config[section].get("sas_container_url")
|
|
28
|
+
|
|
29
|
+
if sas_container_url:
|
|
30
|
+
session = ContainerClient.from_container_url(sas_container_url)
|
|
31
|
+
elif connection_string and container_name:
|
|
32
|
+
session = BlobServiceClient.from_connection_string(connection_string).get_container_client(
|
|
33
|
+
container_name
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
self.profiles.append(
|
|
37
|
+
{
|
|
38
|
+
"profile": profile_name,
|
|
39
|
+
"container_name": container_name,
|
|
40
|
+
"session": session,
|
|
41
|
+
}
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
if not self.profiles:
|
|
45
|
+
logger.warning("No Azure profiles found in config file")
|
|
46
|
+
self.current_profile: Optional[Dict[str, Any]] = self.profiles[0] if self.profiles else None
|
|
47
|
+
|
|
48
|
+
def switch_profile(self, profile_name: str) -> None:
|
|
49
|
+
for profile in self.profiles:
|
|
50
|
+
if profile["profile"] == profile_name:
|
|
51
|
+
self.current_profile = profile
|
|
52
|
+
return
|
|
53
|
+
logger.warning(f"Profile {profile_name} not found")
|
|
54
|
+
|
|
55
|
+
def upload_file(
|
|
56
|
+
self,
|
|
57
|
+
local_file,
|
|
58
|
+
remote_folder,
|
|
59
|
+
remote_file=None,
|
|
60
|
+
upload_file_info=False,
|
|
61
|
+
**file_info_data,
|
|
62
|
+
):
|
|
63
|
+
if not remote_file:
|
|
64
|
+
remote_file = Path(local_file).name
|
|
65
|
+
|
|
66
|
+
remote_path = f"{remote_folder}/{remote_file}" if remote_folder else remote_file
|
|
67
|
+
|
|
68
|
+
blob_client = self.current_profile["session"].get_blob_client(remote_path)
|
|
69
|
+
with open(local_file, "rb") as data:
|
|
70
|
+
blob_client.upload_blob(data, overwrite=True)
|
|
71
|
+
|
|
72
|
+
if upload_file_info:
|
|
73
|
+
self.upload_file_info(remote_path, **file_info_data)
|
|
74
|
+
|
|
75
|
+
def upload_file_info(self, remote_path, **file_info_data):
|
|
76
|
+
summary_file = remote_path.split(".")[0] + "_resumen.csv"
|
|
77
|
+
blob_client = self.current_profile["session"].get_blob_client(summary_file)
|
|
78
|
+
|
|
79
|
+
new_record = {
|
|
80
|
+
"file": remote_path,
|
|
81
|
+
"num_rows": file_info_data.get("num_rows"),
|
|
82
|
+
"schema_version": file_info_data.get("schema_version"),
|
|
83
|
+
"upload_date": now(tz="Europe/Madrid").to_datetime_string(),
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
new_record_str = "file,num_rows,schema_version,upload_date\n"
|
|
87
|
+
new_record_str += ",".join([str(v) for v in new_record.values()]) + "\n"
|
|
88
|
+
|
|
89
|
+
blob_client.upload_blob(new_record_str, overwrite=True)
|
|
90
|
+
|
|
91
|
+
def download_file(self, local_file, remote_path):
|
|
92
|
+
blob_client = self.current_profile["session"].get_blob_client(remote_path)
|
|
93
|
+
blob_data = blob_client.download_blob()
|
|
94
|
+
with open(local_file, "wb") as f:
|
|
95
|
+
blob_data.readinto(f)
|
|
96
|
+
|
|
97
|
+
def check_file_exists_and_not_empty(self, remote_file, remote_folder):
|
|
98
|
+
"""
|
|
99
|
+
Checks if a blob exists in the specified folder and has a size greater than 100 bytes.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
remote_file (str): The name of the file (blob) to check.
|
|
103
|
+
remote_folder (str): The folder (prefix) where the file is located.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
bool: True if the blob exists and has a size greater than 100, False otherwise.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
remote_path = f"{remote_folder}/{remote_file}" if remote_folder else remote_file
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
blob_client = self.current_profile["session"].get_blob_client(remote_path)
|
|
113
|
+
if blob_client.exists():
|
|
114
|
+
properties = blob_client.get_blob_properties()
|
|
115
|
+
if properties.size > 100: # Check if size is greater than 100 bytes
|
|
116
|
+
logger.debug(f"Blob '{remote_path}' exists and is not empty (size: {properties.size}).")
|
|
117
|
+
return True
|
|
118
|
+
else:
|
|
119
|
+
logger.debug(f"Blob '{remote_path}' exists but size ({properties.size}) is not > 100 bytes.")
|
|
120
|
+
return False
|
|
121
|
+
else:
|
|
122
|
+
logger.debug(f"Blob '{remote_path}' does not exist.")
|
|
123
|
+
return False
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.error(f"Error checking blob '{remote_path}': {e}")
|
|
126
|
+
# In case of error, assume it doesn't exist or is empty to allow upload attempt
|
|
127
|
+
return False
|
datamarket/interfaces/drive.py
CHANGED
|
@@ -18,13 +18,9 @@ class DriveInterface:
|
|
|
18
18
|
if "drive" in config:
|
|
19
19
|
self.config = config["drive"]
|
|
20
20
|
|
|
21
|
-
GoogleAuth.DEFAULT_SETTINGS[
|
|
22
|
-
"client_config_file"
|
|
23
|
-
] = f'{self.config["config_path"]}/credentials.json'
|
|
21
|
+
GoogleAuth.DEFAULT_SETTINGS["client_config_file"] = f"{self.config['config_path']}/credentials.json"
|
|
24
22
|
|
|
25
|
-
self.gauth = GoogleAuth(
|
|
26
|
-
settings_file=f'{self.config["config_path"]}/settings.yaml'
|
|
27
|
-
)
|
|
23
|
+
self.gauth = GoogleAuth(settings_file=f"{self.config['config_path']}/settings.yaml")
|
|
28
24
|
self.gauth.LocalWebserverAuth()
|
|
29
25
|
|
|
30
26
|
self.drive = GoogleDrive(self.gauth)
|
|
@@ -48,24 +44,78 @@ class DriveInterface:
|
|
|
48
44
|
logger.info(f"deleting old {filename}...")
|
|
49
45
|
drive_file.Delete(param={"supportsTeamDrives": True})
|
|
50
46
|
|
|
47
|
+
def _create_remote_dir_tree(self, base_folder_id, path_parts):
|
|
48
|
+
"""
|
|
49
|
+
Ensure the nested folders described by path_parts exist under base_folder_id.
|
|
50
|
+
Returns the folder_id of the deepest folder (or base_folder_id if path_parts is empty).
|
|
51
|
+
"""
|
|
52
|
+
parent_id = base_folder_id
|
|
53
|
+
for part in path_parts:
|
|
54
|
+
part = part.strip()
|
|
55
|
+
if not part:
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
query = (
|
|
59
|
+
f"'{parent_id}' in parents and title = '{part}'"
|
|
60
|
+
" and mimeType = 'application/vnd.google-apps.folder' and trashed=false"
|
|
61
|
+
)
|
|
62
|
+
results = self.drive.ListFile(
|
|
63
|
+
{
|
|
64
|
+
"q": query,
|
|
65
|
+
"corpora": "teamDrive",
|
|
66
|
+
"teamDriveId": self.team_id,
|
|
67
|
+
"includeTeamDriveItems": True,
|
|
68
|
+
"supportsTeamDrives": True,
|
|
69
|
+
}
|
|
70
|
+
).GetList()
|
|
71
|
+
|
|
72
|
+
if results:
|
|
73
|
+
parent_id = results[0]["id"]
|
|
74
|
+
else:
|
|
75
|
+
folder_metadata = {
|
|
76
|
+
"title": part,
|
|
77
|
+
"mimeType": "application/vnd.google-apps.folder",
|
|
78
|
+
"parents": [
|
|
79
|
+
{
|
|
80
|
+
"kind": "drive#fileLink",
|
|
81
|
+
"teamDriveId": self.team_id,
|
|
82
|
+
"id": parent_id,
|
|
83
|
+
}
|
|
84
|
+
],
|
|
85
|
+
}
|
|
86
|
+
folder = self.drive.CreateFile(folder_metadata)
|
|
87
|
+
folder.Upload(param={"supportsTeamDrives": True})
|
|
88
|
+
parent_id = folder["id"]
|
|
89
|
+
|
|
90
|
+
return parent_id
|
|
91
|
+
|
|
51
92
|
def upload_file(self, local_filename, drive_filename, folder_id):
|
|
52
|
-
|
|
93
|
+
drive_filename = drive_filename.strip("/")
|
|
94
|
+
parts = drive_filename.split("/")
|
|
95
|
+
if len(parts) > 1:
|
|
96
|
+
*folders, filename = parts
|
|
97
|
+
target_folder_id = self._create_remote_dir_tree(folder_id, folders)
|
|
98
|
+
else:
|
|
99
|
+
filename = parts[0]
|
|
100
|
+
target_folder_id = folder_id
|
|
101
|
+
|
|
102
|
+
self.delete_old_files(filename, target_folder_id)
|
|
53
103
|
|
|
54
104
|
f = self.drive.CreateFile(
|
|
55
105
|
{
|
|
56
|
-
"title":
|
|
106
|
+
"title": filename,
|
|
57
107
|
"parents": [
|
|
58
108
|
{
|
|
59
109
|
"kind": "drive#fileLink",
|
|
60
110
|
"teamDriveId": self.team_id,
|
|
61
|
-
"id":
|
|
111
|
+
"id": target_folder_id,
|
|
62
112
|
}
|
|
63
113
|
],
|
|
64
114
|
}
|
|
65
115
|
)
|
|
66
116
|
f.SetContentFile(local_filename)
|
|
67
117
|
|
|
68
|
-
logger.info(f"uploading {drive_filename} to folder: {
|
|
118
|
+
logger.info(f"uploading {drive_filename} to folder: {target_folder_id}...")
|
|
69
119
|
f.Upload(param={"supportsTeamDrives": True})
|
|
70
120
|
|
|
71
121
|
def validate_file(self, filename, folder_id):
|
datamarket/interfaces/ftp.py
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
import logging
|
|
5
5
|
from ftplib import FTP, FTP_TLS
|
|
6
6
|
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
7
8
|
|
|
8
9
|
########################################################################################################################
|
|
9
10
|
# CLASSES
|
|
@@ -13,29 +14,37 @@ logger = logging.getLogger(__name__)
|
|
|
13
14
|
|
|
14
15
|
class FTPInterface:
|
|
15
16
|
def __init__(self, config):
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
self.profiles: List[Dict[str, Any]] = []
|
|
18
|
+
self.config = config
|
|
19
|
+
for section in getattr(self.config, "sections", lambda: [])():
|
|
20
|
+
if section.startswith("ftp:"):
|
|
21
|
+
profile_name = section.split(":", 1)[1]
|
|
22
|
+
ftps = self.config[section]["ftps"].lower() == "true"
|
|
23
|
+
ftp_conn = FTP_TLS(self.config[section]["server"]) if ftps else FTP(self.config[section]["server"]) # noqa: S321
|
|
24
|
+
ftp_conn.login(self.config[section]["username"], self.config[section]["password"])
|
|
25
|
+
self.profiles.append({"profile": profile_name, "session": ftp_conn})
|
|
18
26
|
|
|
19
|
-
|
|
20
|
-
else:
|
|
27
|
+
if not self.profiles:
|
|
21
28
|
logger.warning("no ftp section in config")
|
|
22
29
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
ftp_conn = FTP_TLS(self.config["server"])
|
|
30
|
+
self.current_profile: Optional[Dict[str, Any]] = self.profiles[0] if self.profiles else None
|
|
31
|
+
self.ftp = self.current_profile["session"] if self.current_profile else None
|
|
26
32
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
+
def switch_profile(self, profile_name: str) -> None:
|
|
34
|
+
for profile in self.profiles:
|
|
35
|
+
if profile["profile"] == profile_name:
|
|
36
|
+
self.current_profile = profile
|
|
37
|
+
self.ftp = profile["session"]
|
|
38
|
+
return
|
|
39
|
+
logger.warning(f"Profile {profile_name} not found")
|
|
33
40
|
|
|
34
41
|
def upload_file(self, local_file, remote_folder, remote_file=None):
|
|
35
42
|
if not remote_file:
|
|
36
43
|
remote_file = Path(local_file).name
|
|
37
44
|
|
|
38
|
-
self.
|
|
45
|
+
self._create_remote_dir_tree(full_path=f"/{remote_folder}{remote_file}")
|
|
46
|
+
|
|
47
|
+
self.ftp.cwd(f"/{remote_folder}")
|
|
39
48
|
|
|
40
49
|
with open(local_file, "rb") as f:
|
|
41
50
|
self.ftp.storbinary(f"STOR {remote_file}", f)
|
|
@@ -43,3 +52,17 @@ class FTPInterface:
|
|
|
43
52
|
def download_file(self, local_file, remote_file):
|
|
44
53
|
with open(local_file, "wb") as f:
|
|
45
54
|
self.ftp.retrbinary(f"RETR {remote_file}", f.write)
|
|
55
|
+
|
|
56
|
+
def _create_remote_dir_tree(self, full_path):
|
|
57
|
+
dir_tree = full_path.split("/")[0:-1] # Exclude filename
|
|
58
|
+
|
|
59
|
+
for part in dir_tree:
|
|
60
|
+
if not part:
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
self.ftp.cwd(part)
|
|
65
|
+
except Exception as e:
|
|
66
|
+
logger.warning(f"Error while creating remote directory: {e}")
|
|
67
|
+
self.ftp.mkd(part)
|
|
68
|
+
self.ftp.cwd(part)
|