persidict 0.34.1__tar.gz → 0.34.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: persidict
3
- Version: 0.34.1
3
+ Version: 0.34.2
4
4
  Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
5
5
  Keywords: persistence,dicts,distributed,parallel
6
6
  Author: Vlad (Volodymyr) Pavlov
@@ -4,7 +4,7 @@ build-backend = "uv_build"
4
4
 
5
5
  [project]
6
6
  name = "persidict"
7
- version = "0.34.1"
7
+ version = "0.34.2"
8
8
  description = "Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -197,48 +197,61 @@ class S3Dict(PersiDict):
197
197
 
198
198
  obj_name = self._build_full_objectname(key)
199
199
 
200
-
201
- try:
202
- head = self.s3_client.head_object(
203
- Bucket=self.bucket_name, Key=obj_name)
204
- s3_etag = head.get("ETag")
205
- except ClientError as e:
206
- if e.response['Error']['Code'] == '404':
207
- raise KeyError(f"Key {key} not found in S3 bucket {self.bucket_name}")
208
- else:
209
- # Re-raise other client errors (e.g., permissions, throttling)
210
- raise
211
-
200
+ cached_etag = None
212
201
  etag_file_name = file_name + ".__etag__"
213
- if not self.immutable_items and os.path.exists(file_name) and os.path.exists(etag_file_name):
202
+ if not self.immutable_items and os.path.exists(file_name) and os.path.exists(
203
+ etag_file_name):
214
204
  with open(etag_file_name, "r") as f:
215
205
  cached_etag = f.read()
216
- if cached_etag == s3_etag:
217
- return self.local_cache._read_from_file(file_name)
218
-
219
- dir_name = os.path.dirname(file_name)
220
- fd, temp_path = tempfile.mkstemp(dir=dir_name, prefix=".__tmp__")
221
206
 
222
207
  try:
223
- with os.fdopen(fd, 'wb') as f:
224
- self.s3_client.download_fileobj(self.bucket_name, obj_name, f)
225
- f.flush()
226
- os.fsync(f.fileno())
227
- os.replace(temp_path, file_name)
208
+ get_kwargs = {'Bucket': self.bucket_name, 'Key': obj_name}
209
+ if cached_etag:
210
+ get_kwargs['IfNoneMatch'] = cached_etag
211
+
212
+ response = self.s3_client.get_object(**get_kwargs)
213
+
214
+ # 200 OK: object was downloaded, either because it's new or changed.
215
+ s3_etag = response.get("ETag")
216
+ body = response['Body']
217
+
218
+ dir_name = os.path.dirname(file_name)
219
+ fd, temp_path = tempfile.mkstemp(dir=dir_name, prefix=".__tmp__")
220
+
228
221
  try:
229
- if os.name == 'posix':
230
- dir_fd = os.open(dir_name, os.O_RDONLY)
231
- try:
232
- os.fsync(dir_fd)
233
- finally:
234
- os.close(dir_fd)
235
- except OSError:
236
- pass
237
- except:
238
- os.remove(temp_path) # Clean up temp file on failure
239
- raise
222
+ with os.fdopen(fd, 'wb') as f:
223
+ # Stream body to file to avoid loading all in memory
224
+ for chunk in body.iter_chunks():
225
+ f.write(chunk)
226
+ f.flush()
227
+ os.fsync(f.fileno())
228
+ os.replace(temp_path, file_name)
229
+ try:
230
+ if os.name == 'posix':
231
+ dir_fd = os.open(dir_name, os.O_RDONLY)
232
+ try:
233
+ os.fsync(dir_fd)
234
+ finally:
235
+ os.close(dir_fd)
236
+ except OSError:
237
+ pass
238
+ except:
239
+ os.remove(temp_path) # Clean up temp file on failure
240
+ raise
241
+
242
+ self._write_etag_file(file_name, s3_etag)
240
243
 
241
- self._write_etag_file(file_name, s3_etag)
244
+ except ClientError as e:
245
+ error_code = e.response.get("Error", {}).get("Code")
246
+ if e.response['ResponseMetadata']['HTTPStatusCode'] == 304:
247
+ # 304 Not Modified: our cached version is up-to-date.
248
+ # The file will be read from cache at the end of the function.
249
+ pass
250
+ elif e.response.get("Error", {}).get("Code") == 'NoSuchKey':
251
+ raise KeyError(f"Key {key} not found in S3 bucket {self.bucket_name}")
252
+ else:
253
+ # Re-raise other client errors (e.g., permissions, throttling)
254
+ raise
242
255
 
243
256
  return self.local_cache._read_from_file(file_name)
244
257
 
File without changes