PyS3Uploader 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PyS3Uploader might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: PyS3Uploader
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Python module to upload objects to an S3 bucket.
5
5
  Author-email: Vignesh Rao <svignesh1793@gmail.com>
6
6
  License: MIT License
@@ -90,7 +90,7 @@ if __name__ == '__main__':
90
90
  wrapper = s3.Uploader(
91
91
  bucket_name="BUCKET_NAME",
92
92
  upload_dir="FULL_PATH_TO_UPLOAD",
93
- prefix_dir="START_DIRECTORY_IN_S3"
93
+ exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
94
94
  )
95
95
  wrapper.run_in_parallel()
96
96
  ```
@@ -103,7 +103,7 @@ if __name__ == '__main__':
103
103
  wrapper = s3.Uploader(
104
104
  bucket_name="BUCKET_NAME",
105
105
  upload_dir="FULL_PATH_TO_UPLOAD",
106
- prefix_dir="START_DIRECTORY_IN_S3"
106
+ exclude_path="PART_OF_UPLOAD_DIR_TO_EXCLUDE"
107
107
  )
108
108
  wrapper.run()
109
109
  ```
@@ -113,7 +113,8 @@ if __name__ == '__main__':
113
113
  - **upload_dir** - Directory to upload.
114
114
 
115
115
  #### Optional kwargs
116
- - **prefix_dir** - Start directory from ``upload_dir`` to use as root in S3. Defaults to `None`
116
+ - **s3_prefix** - S3 object prefix for each file. Defaults to ``None``
117
+ - **exclude_path** - Path in ``upload_dir`` that has to be excluded in object keys. Defaults to `None`
117
118
  - **logger** - Bring your own custom pre-configured logger. Defaults to on-screen logging.
118
119
  <br><br>
119
120
  - **region_name** - AWS region name. Defaults to the env var `AWS_DEFAULT_REGION`
@@ -0,0 +1,11 @@
1
+ s3/__init__.py,sha256=XgYHKbn7gc5_nzydIKmKVjigeMtOBLqRHKHb8GJi5M4,66
2
+ s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
+ s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
4
+ s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
+ s3/uploader.py,sha256=tQaelL7grZSWFydZOekQgVz4Fipm0PHzbt2J17ddYHs,8563
6
+ s3/utils.py,sha256=pKVT2GbDGQKpFaHOmVrCfiQhvgr1vuSsITt_0gHguAA,687
7
+ pys3uploader-0.1.1.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
+ pys3uploader-0.1.1.dist-info/METADATA,sha256=sW_fsQxpoZ8f8ivI0Vb4oUXt1RSlFuHJDmpP9h_CXVU,7286
9
+ pys3uploader-0.1.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
+ pys3uploader-0.1.1.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
+ pys3uploader-0.1.1.dist-info/RECORD,,
s3/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from s3.uploader import Uploader # noqa: F401
2
2
 
3
- version = "0.1.0"
3
+ version = "0.1.1"
s3/uploader.py CHANGED
@@ -11,7 +11,7 @@ from tqdm import tqdm
11
11
 
12
12
  from s3.exceptions import BucketNotFound
13
13
  from s3.logger import default_logger
14
- from s3.utils import UploadResults, get_object_path, getenv
14
+ from s3.utils import UploadResults, getenv, urljoin
15
15
 
16
16
 
17
17
  class Uploader:
@@ -27,7 +27,8 @@ class Uploader:
27
27
  self,
28
28
  bucket_name: str,
29
29
  upload_dir: str,
30
- prefix_dir: str = None,
30
+ s3_prefix: str = None,
31
+ exclude_path: str = None,
31
32
  region_name: str = None,
32
33
  profile_name: str = None,
33
34
  aws_access_key_id: str = None,
@@ -38,13 +39,28 @@ class Uploader:
38
39
 
39
40
  Args:
40
41
  bucket_name: Name of the bucket.
41
- upload_dir: Name of the directory to be uploaded.
42
- prefix_dir: Start folder name from upload_dir.
42
+ upload_dir: Full path of the directory to be uploaded.
43
+ s3_prefix: Particular bucket prefix within which the upload should happen.
44
+ exclude_path: Full directory path to exclude from S3 object prefix.
43
45
  region_name: Name of the AWS region.
44
46
  profile_name: AWS profile name.
45
47
  aws_access_key_id: AWS access key ID.
46
48
  aws_secret_access_key: AWS secret access key.
47
49
  logger: Bring your own logger.
50
+
51
+ See Also:
52
+ exclude_path:
53
+ When upload directory is "/home/ubuntu/Desktop/S3Upload", each file will naturally have the full prefix.
54
+ However, this behavior can be avoided by specifying the ``exclude_path`` parameter.
55
+
56
+ If exclude_path is set to: ``/home/ubuntu/Desktop``, then the file path
57
+ ``/home/ubuntu/Desktop/S3Upload/sub-dir/photo.jpg`` will be uploaded as ``S3Upload/sub-dir/photo.jpg``
58
+
59
+ s3_prefix:
60
+ If provided, ``s3_prefix`` will always be attached to each object.
61
+
62
+ If ``s3_prefix`` is set to: ``2025``, then the file path
63
+ ``/home/ubuntu/Desktop/S3Upload/sub/photo.jpg`` will be uploaded as ``2025/S3Upload/sub/photo.jpg``
48
64
  """
49
65
  self.session = boto3.Session(
50
66
  profile_name=profile_name or getenv("PROFILE_NAME"),
@@ -54,8 +70,9 @@ class Uploader:
54
70
  )
55
71
  self.s3 = self.session.resource(service_name="s3", config=self.RETRY_CONFIG)
56
72
  self.logger = logger or default_logger()
57
- self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "SOURCE")
58
- self.prefix_dir = prefix_dir
73
+ self.upload_dir = upload_dir or getenv("UPLOAD_DIR", "UPLOAD_SOURCE")
74
+ self.s3_prefix = s3_prefix
75
+ self.exclude_path = exclude_path
59
76
  self.bucket_name = bucket_name
60
77
  # noinspection PyUnresolvedReferences
61
78
  self.bucket: boto3.resources.factory.s3.Bucket = None
@@ -70,9 +87,9 @@ class Uploader:
70
87
  BucketNotFound: If bucket name was not found.
71
88
  """
72
89
  self.start = time.time()
73
- if self.prefix_dir and self.prefix_dir not in self.upload_dir.split(os.sep):
90
+ if self.exclude_path and self.exclude_path not in self.upload_dir:
74
91
  raise ValueError(
75
- f"\n\n\tPrefix folder name {self.prefix_dir!r} is not a part of upload directory {self.upload_dir!r}"
92
+ f"\n\n\tStart folder {self.exclude_path!r} is not a part of upload directory {self.upload_dir!r}"
76
93
  )
77
94
  if not self.upload_dir:
78
95
  raise ValueError("\n\n\tCannot proceed without an upload directory.")
@@ -87,7 +104,6 @@ class Uploader:
87
104
  if self.bucket_name not in buckets:
88
105
  raise BucketNotFound(f"\n\n\t{self.bucket_name} was not found in {_alias} account.\n\tAvailable: {buckets}")
89
106
  self.upload_dir = os.path.abspath(self.upload_dir)
90
- self.logger.info("Bucket objects from '%s' will be uploaded to '%s'", self.upload_dir, self.bucket_name)
91
107
  # noinspection PyUnresolvedReferences
92
108
  self.bucket: boto3.resources.factory.s3.Bucket = self.s3.Bucket(self.bucket_name)
93
109
 
@@ -119,14 +135,18 @@ class Uploader:
119
135
  for __path, __directory, __files in os.walk(self.upload_dir):
120
136
  for file_ in __files:
121
137
  file_path = os.path.join(__path, file_)
122
- if self.prefix_dir:
123
- try:
124
- object_path = get_object_path(file_path, self.prefix_dir)
125
- except ValueError as error:
126
- self.logger.error(error)
127
- continue
128
- else:
129
- object_path = self.prefix_dir
138
+ if self.exclude_path:
139
+ file_path = file_path.replace(self.exclude_path, "")
140
+ # Lists in python are ordered, so s3 prefix will get loaded first when provided
141
+ url_parts = []
142
+ if self.s3_prefix:
143
+ url_parts.extend(
144
+ self.s3_prefix.split(os.sep) if os.sep in self.s3_prefix else self.s3_prefix.split("/")
145
+ )
146
+ # Add rest of the file path to parts before normalizing as an S3 object URL
147
+ url_parts.extend(file_path.split(os.sep))
148
+ # Remove falsy values using filter - "None", "bool", "len" or "lambda item: item"
149
+ object_path = urljoin(*filter(None, url_parts))
130
150
  files_to_upload[object_path] = file_path
131
151
  return files_to_upload
132
152
 
@@ -135,6 +155,7 @@ class Uploader:
135
155
  self.init()
136
156
  keys = self._get_files()
137
157
  self.logger.debug(keys)
158
+ self.logger.info("%d files from '%s' will be uploaded to '%s'", len(keys), self.upload_dir, self.bucket_name)
138
159
  self.logger.info("Initiating upload process.")
139
160
  for objectpath, filepath in tqdm(
140
161
  keys.items(), total=len(keys), unit="file", leave=True, desc=f"Uploading files from {self.upload_dir}"
@@ -154,9 +175,15 @@ class Uploader:
154
175
  max_workers: Number of maximum threads to use.
155
176
  """
156
177
  self.init()
157
- self.logger.info(f"Number of threads: {max_workers}")
158
178
  keys = self._get_files()
159
- self.logger.info("Initiating upload process.")
179
+ self.logger.debug(keys)
180
+ self.logger.info(
181
+ "%d files from '%s' will be uploaded to '%s' with maximum concurrency of: %d",
182
+ len(keys),
183
+ self.upload_dir,
184
+ self.bucket_name,
185
+ max_workers,
186
+ )
160
187
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
161
188
  futures = [executor.submit(self._uploader, *kv) for kv in keys.items()]
162
189
  for future in tqdm(
s3/utils.py CHANGED
@@ -20,26 +20,11 @@ def getenv(*args, default: str = None) -> str:
20
20
  return default
21
21
 
22
22
 
23
- def get_object_path(filepath: str, start_folder_name: str):
24
- """Construct object path without absolute path's pretext.
25
-
26
- Args:
27
- filepath: Absolute file path to upload.
28
- start_folder_name: Folder name to begin object path.
23
+ def urljoin(*args) -> str:
24
+ """Joins given arguments into a url. Trailing but not leading slashes are stripped for each argument.
29
25
 
30
26
  Returns:
31
27
  str:
32
- Returns the object name.
28
+ Joined url.
33
29
  """
34
- # Split file_path into parts
35
- parts = filepath.split(os.sep)
36
- try:
37
- # Find index of the folder to start from
38
- start_index = parts.index(start_folder_name)
39
- except ValueError:
40
- # Folder not found in path, fallback to full path or raise error
41
- raise ValueError(f"Folder '{start_folder_name}' not found in path '{filepath}'")
42
- # Reconstruct path from start_folder_name onwards
43
- relative_parts = parts[start_index:]
44
- # Join with os.sep for system-appropriate separators
45
- return os.sep.join(relative_parts)
30
+ return "/".join(map(lambda x: str(x).rstrip("/").lstrip("/"), args))
@@ -1,11 +0,0 @@
1
- s3/__init__.py,sha256=zSLbLrsnVD-tRtiiTcT3JDWzmpnSC9mP6uHMXt2cyEc,66
2
- s3/exceptions.py,sha256=hH3jlMOe8yjBatQK9EdndWZz4QESU74KSY_iDhQ37SY,2585
3
- s3/logger.py,sha256=oH540oq8jY723jA4lDWlgfFPLbNgGXTkDwFpB7TLO_o,1196
4
- s3/tree.py,sha256=DiQ2ekMMaj2m_P3-iKkEqSuJCJZ_UZxcAwHtAoPVa5c,1824
5
- s3/uploader.py,sha256=Z2EvtUlR5jlL1xbeQWj4XLBfhTn4yWPm9E8WhPcz6Qk,7056
6
- s3/utils.py,sha256=swkdwkfn43e8I3dGL9HAGZ-dba3fIeorihVAjTE07wc,1291
7
- pys3uploader-0.1.0.dist-info/LICENSE,sha256=8k-hEraOzyum0GvmmK65YxNRTFXK7eIFHJ0OshJXeTk,1068
8
- pys3uploader-0.1.0.dist-info/METADATA,sha256=gkAfOF-hEXYfW9p0PZiJhcrpLjhNorK7LFZNtU_ybrE,7188
9
- pys3uploader-0.1.0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
10
- pys3uploader-0.1.0.dist-info/top_level.txt,sha256=iQp4y1P58Q633gj8M08kHE4mqqT0hixuDWcniDk_RJ4,3
11
- pys3uploader-0.1.0.dist-info/RECORD,,