datamarket 0.7.41__py3-none-any.whl → 0.7.125__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,127 @@
1
+ ########################################################################################################################
2
+ # IMPORTS
3
+
4
+ import logging
5
+ from pathlib import Path
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from azure.storage.blob import BlobServiceClient, ContainerClient
9
+ from pendulum import now
10
+
11
+ ########################################################################################################################
12
+ # CLASSES
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class AzureBlobInterface:
18
+ def __init__(self, config):
19
+ self.profiles: List[Dict[str, Any]] = []
20
+ self.config = config
21
+
22
+ for section in getattr(self.config, "sections", lambda: [])():
23
+ if section.startswith("azure:"):
24
+ profile_name = section.split(":", 1)[1]
25
+ connection_string = self.config[section].get("connection_string")
26
+ container_name = self.config[section].get("container_name")
27
+ sas_container_url = self.config[section].get("sas_container_url")
28
+
29
+ if sas_container_url:
30
+ session = ContainerClient.from_container_url(sas_container_url)
31
+ elif connection_string and container_name:
32
+ session = BlobServiceClient.from_connection_string(connection_string).get_container_client(
33
+ container_name
34
+ )
35
+
36
+ self.profiles.append(
37
+ {
38
+ "profile": profile_name,
39
+ "container_name": container_name,
40
+ "session": session,
41
+ }
42
+ )
43
+
44
+ if not self.profiles:
45
+ logger.warning("No Azure profiles found in config file")
46
+ self.current_profile: Optional[Dict[str, Any]] = self.profiles[0] if self.profiles else None
47
+
48
+ def switch_profile(self, profile_name: str) -> None:
49
+ for profile in self.profiles:
50
+ if profile["profile"] == profile_name:
51
+ self.current_profile = profile
52
+ return
53
+ logger.warning(f"Profile {profile_name} not found")
54
+
55
+ def upload_file(
56
+ self,
57
+ local_file,
58
+ remote_folder,
59
+ remote_file=None,
60
+ upload_file_info=False,
61
+ **file_info_data,
62
+ ):
63
+ if not remote_file:
64
+ remote_file = Path(local_file).name
65
+
66
+ remote_path = f"{remote_folder}/{remote_file}" if remote_folder else remote_file
67
+
68
+ blob_client = self.current_profile["session"].get_blob_client(remote_path)
69
+ with open(local_file, "rb") as data:
70
+ blob_client.upload_blob(data, overwrite=True)
71
+
72
+ if upload_file_info:
73
+ self.upload_file_info(remote_path, **file_info_data)
74
+
75
+ def upload_file_info(self, remote_path, **file_info_data):
76
+ summary_file = remote_path.split(".")[0] + "_resumen.csv"
77
+ blob_client = self.current_profile["session"].get_blob_client(summary_file)
78
+
79
+ new_record = {
80
+ "file": remote_path,
81
+ "num_rows": file_info_data.get("num_rows"),
82
+ "schema_version": file_info_data.get("schema_version"),
83
+ "upload_date": now(tz="Europe/Madrid").to_datetime_string(),
84
+ }
85
+
86
+ new_record_str = "file,num_rows,schema_version,upload_date\n"
87
+ new_record_str += ",".join([str(v) for v in new_record.values()]) + "\n"
88
+
89
+ blob_client.upload_blob(new_record_str, overwrite=True)
90
+
91
+ def download_file(self, local_file, remote_path):
92
+ blob_client = self.current_profile["session"].get_blob_client(remote_path)
93
+ blob_data = blob_client.download_blob()
94
+ with open(local_file, "wb") as f:
95
+ blob_data.readinto(f)
96
+
97
+ def check_file_exists_and_not_empty(self, remote_file, remote_folder):
98
+ """
99
+ Checks if a blob exists in the specified folder and has a size greater than 100 bytes.
100
+
101
+ Args:
102
+ remote_file (str): The name of the file (blob) to check.
103
+ remote_folder (str): The folder (prefix) where the file is located.
104
+
105
+ Returns:
106
+ bool: True if the blob exists and has a size greater than 100, False otherwise.
107
+ """
108
+
109
+ remote_path = f"{remote_folder}/{remote_file}" if remote_folder else remote_file
110
+
111
+ try:
112
+ blob_client = self.current_profile["session"].get_blob_client(remote_path)
113
+ if blob_client.exists():
114
+ properties = blob_client.get_blob_properties()
115
+ if properties.size > 100: # Check if size is greater than 100 bytes
116
+ logger.debug(f"Blob '{remote_path}' exists and is not empty (size: {properties.size}).")
117
+ return True
118
+ else:
119
+ logger.debug(f"Blob '{remote_path}' exists but size ({properties.size}) is not > 100 bytes.")
120
+ return False
121
+ else:
122
+ logger.debug(f"Blob '{remote_path}' does not exist.")
123
+ return False
124
+ except Exception as e:
125
+ logger.error(f"Error checking blob '{remote_path}': {e}")
126
+ # In case of error, assume it doesn't exist or is empty to allow upload attempt
127
+ return False
@@ -18,13 +18,9 @@ class DriveInterface:
18
18
  if "drive" in config:
19
19
  self.config = config["drive"]
20
20
 
21
- GoogleAuth.DEFAULT_SETTINGS[
22
- "client_config_file"
23
- ] = f'{self.config["config_path"]}/credentials.json'
21
+ GoogleAuth.DEFAULT_SETTINGS["client_config_file"] = f"{self.config['config_path']}/credentials.json"
24
22
 
25
- self.gauth = GoogleAuth(
26
- settings_file=f'{self.config["config_path"]}/settings.yaml'
27
- )
23
+ self.gauth = GoogleAuth(settings_file=f"{self.config['config_path']}/settings.yaml")
28
24
  self.gauth.LocalWebserverAuth()
29
25
 
30
26
  self.drive = GoogleDrive(self.gauth)
@@ -48,24 +44,78 @@ class DriveInterface:
48
44
  logger.info(f"deleting old {filename}...")
49
45
  drive_file.Delete(param={"supportsTeamDrives": True})
50
46
 
47
+ def _create_remote_dir_tree(self, base_folder_id, path_parts):
48
+ """
49
+ Ensure the nested folders described by path_parts exist under base_folder_id.
50
+ Returns the folder_id of the deepest folder (or base_folder_id if path_parts is empty).
51
+ """
52
+ parent_id = base_folder_id
53
+ for part in path_parts:
54
+ part = part.strip()
55
+ if not part:
56
+ continue
57
+
58
+ query = (
59
+ f"'{parent_id}' in parents and title = '{part}'"
60
+ " and mimeType = 'application/vnd.google-apps.folder' and trashed=false"
61
+ )
62
+ results = self.drive.ListFile(
63
+ {
64
+ "q": query,
65
+ "corpora": "teamDrive",
66
+ "teamDriveId": self.team_id,
67
+ "includeTeamDriveItems": True,
68
+ "supportsTeamDrives": True,
69
+ }
70
+ ).GetList()
71
+
72
+ if results:
73
+ parent_id = results[0]["id"]
74
+ else:
75
+ folder_metadata = {
76
+ "title": part,
77
+ "mimeType": "application/vnd.google-apps.folder",
78
+ "parents": [
79
+ {
80
+ "kind": "drive#fileLink",
81
+ "teamDriveId": self.team_id,
82
+ "id": parent_id,
83
+ }
84
+ ],
85
+ }
86
+ folder = self.drive.CreateFile(folder_metadata)
87
+ folder.Upload(param={"supportsTeamDrives": True})
88
+ parent_id = folder["id"]
89
+
90
+ return parent_id
91
+
51
92
  def upload_file(self, local_filename, drive_filename, folder_id):
52
- self.delete_old_files(drive_filename, folder_id)
93
+ drive_filename = drive_filename.strip("/")
94
+ parts = drive_filename.split("/")
95
+ if len(parts) > 1:
96
+ *folders, filename = parts
97
+ target_folder_id = self._create_remote_dir_tree(folder_id, folders)
98
+ else:
99
+ filename = parts[0]
100
+ target_folder_id = folder_id
101
+
102
+ self.delete_old_files(filename, target_folder_id)
53
103
 
54
104
  f = self.drive.CreateFile(
55
105
  {
56
- "title": drive_filename,
106
+ "title": filename,
57
107
  "parents": [
58
108
  {
59
109
  "kind": "drive#fileLink",
60
110
  "teamDriveId": self.team_id,
61
- "id": folder_id,
111
+ "id": target_folder_id,
62
112
  }
63
113
  ],
64
114
  }
65
115
  )
66
116
  f.SetContentFile(local_filename)
67
117
 
68
- logger.info(f"uploading {drive_filename} to folder: {folder_id}...")
118
+ logger.info(f"uploading {drive_filename} to folder: {target_folder_id}...")
69
119
  f.Upload(param={"supportsTeamDrives": True})
70
120
 
71
121
  def validate_file(self, filename, folder_id):
@@ -4,6 +4,7 @@
4
4
  import logging
5
5
  from ftplib import FTP, FTP_TLS
6
6
  from pathlib import Path
7
+ from typing import Any, Dict, List, Optional
7
8
 
8
9
  ########################################################################################################################
9
10
  # CLASSES
@@ -13,31 +14,37 @@ logger = logging.getLogger(__name__)
13
14
 
14
15
  class FTPInterface:
15
16
  def __init__(self, config):
16
- if "ftp" in config:
17
- self.config = config["ftp"]
18
-
19
- self.ftp = self.get_ftp()
20
- else:
17
+ self.profiles: List[Dict[str, Any]] = []
18
+ self.config = config
19
+ for section in getattr(self.config, "sections", lambda: [])():
20
+ if section.startswith("ftp:"):
21
+ profile_name = section.split(":", 1)[1]
22
+ ftps = self.config[section]["ftps"].lower() == "true"
23
+ ftp_conn = FTP_TLS(self.config[section]["server"]) if ftps else FTP(self.config[section]["server"]) # noqa: S321
24
+ ftp_conn.login(self.config[section]["username"], self.config[section]["password"])
25
+ self.profiles.append({"profile": profile_name, "session": ftp_conn})
26
+
27
+ if not self.profiles:
21
28
  logger.warning("no ftp section in config")
22
29
 
23
- def get_ftp(self):
24
- if self.config["ftps"].lower() == "true":
25
- ftp_conn = FTP_TLS(self.config["server"])
26
-
27
- else:
28
- ftp_conn = FTP(self.config["server"])
29
-
30
- ftp_conn.login(self.config["username"], self.config["password"])
30
+ self.current_profile: Optional[Dict[str, Any]] = self.profiles[0] if self.profiles else None
31
+ self.ftp = self.current_profile["session"] if self.current_profile else None
31
32
 
32
- return ftp_conn
33
+ def switch_profile(self, profile_name: str) -> None:
34
+ for profile in self.profiles:
35
+ if profile["profile"] == profile_name:
36
+ self.current_profile = profile
37
+ self.ftp = profile["session"]
38
+ return
39
+ logger.warning(f"Profile {profile_name} not found")
33
40
 
34
41
  def upload_file(self, local_file, remote_folder, remote_file=None):
35
42
  if not remote_file:
36
43
  remote_file = Path(local_file).name
37
44
 
38
- self._create_remote_dir_tree(full_path=f"{remote_folder}{remote_file}")
45
+ self._create_remote_dir_tree(full_path=f"/{remote_folder}{remote_file}")
39
46
 
40
- self.ftp.cwd(remote_folder)
47
+ self.ftp.cwd(f"/{remote_folder}")
41
48
 
42
49
  with open(local_file, "rb") as f:
43
50
  self.ftp.storbinary(f"STOR {remote_file}", f)