persidict 0.37.1__py3-none-any.whl → 0.37.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

@@ -23,12 +23,70 @@ import jsonpickle.ext.pandas as jsonpickle_pandas
23
23
  import parameterizable
24
24
  from parameterizable import sort_dict_by_keys
25
25
 
26
- from .jokers import KEEP_CURRENT, DELETE_CURRENT, Joker
26
+ from .jokers import Joker
27
27
  from .safe_chars import replace_unsafe_chars
28
28
  from .safe_str_tuple import SafeStrTuple
29
29
  from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
30
30
  from .persi_dict import PersiDict, PersiDictKey, non_empty_persidict_key
31
31
 
32
+
33
+ if os.name == 'nt':
34
+ import msvcrt
35
+ import ctypes
36
+ from ctypes import wintypes
37
+
38
+ GENERIC_READ = 0x80000000
39
+ FILE_SHARE_READ = 0x00000001
40
+ FILE_SHARE_WRITE = 0x00000002
41
+ FILE_SHARE_DELETE = 0x00000004
42
+ OPEN_EXISTING = 3
43
+ INVALID_HANDLE_VALUE = -1
44
+
45
+ CreateFileW = ctypes.windll.kernel32.CreateFileW
46
+ CreateFileW.argtypes = [wintypes.LPWSTR, wintypes.DWORD, wintypes.DWORD, wintypes.LPVOID, wintypes.DWORD, wintypes.DWORD, wintypes.HANDLE]
47
+ CreateFileW.restype = wintypes.HANDLE
48
+
49
+ CloseHandle = ctypes.windll.kernel32.CloseHandle
50
+ CloseHandle.argtypes = [wintypes.HANDLE]
51
+ CloseHandle.restype = wintypes.BOOL
52
+
53
+ def add_long_path_prefix(path: str) -> str:
54
+ """Add the '\\\\?\\' prefix to a path on Windows to support long paths.
55
+
56
+ Args:
57
+ path (str): The original file or directory path.
58
+
59
+ Returns:
60
+ str: The modified path with the '\\\\?\\' prefix if on Windows
61
+ and not already present; otherwise, the original path.
62
+ """
63
+ if not path.startswith('\\\\?\\'):
64
+ return f'\\\\?\\{path}'
65
+ else:
66
+ return path
67
+
68
+ def drop_long_path_prefix(path: str) -> str:
69
+ """Remove the '\\\\?\\' prefix from a path on Windows if present.
70
+
71
+ Args:
72
+ path (str): The file or directory path, possibly with the '\\\\?\\' prefix.
73
+
74
+ Returns:
75
+ str: The path without the '\\\\?\\' prefix if it was present; otherwise,
76
+ the original path.
77
+ """
78
+ if path.startswith('\\\\?\\'):
79
+ return path[4:]
80
+ else:
81
+ return path
82
+
83
+ else:
84
+ def add_long_path_prefix(path: str) -> str:
85
+ return path
86
+
87
+ def drop_long_path_prefix(path: str) -> str:
88
+ return path
89
+
32
90
  jsonpickle_numpy.register_handlers()
33
91
  jsonpickle_pandas.register_handlers()
34
92
 
@@ -94,17 +152,16 @@ class FileDirDict(PersiDict):
94
152
  raise ValueError("For non-string values file_type must be either 'pkl' or 'json'.")
95
153
 
96
154
  base_dir = str(base_dir)
155
+ self._base_dir = os.path.abspath(base_dir)
156
+ self._base_dir = add_long_path_prefix(self._base_dir)
97
157
 
98
- if os.path.isfile(base_dir):
158
+ if os.path.isfile(self._base_dir):
99
159
  raise ValueError(f"{base_dir} is a file, not a directory.")
100
160
 
101
- os.makedirs(base_dir, exist_ok=True)
102
- if not os.path.isdir(base_dir):
161
+ os.makedirs(self._base_dir, exist_ok=True)
162
+ if not os.path.isdir(self._base_dir):
103
163
  raise RuntimeError(f"Failed to create or access directory: {base_dir}")
104
164
 
105
- # self.base_dir_param = _base_dir
106
- self._base_dir = os.path.abspath(base_dir)
107
-
108
165
 
109
166
  def get_params(self):
110
167
  """Return configuration parameters of the dictionary.
@@ -134,8 +191,7 @@ class FileDirDict(PersiDict):
134
191
  Returns:
135
192
  str: URL of the underlying storage in the form "file://<abs_path>".
136
193
  """
137
- return pathlib.Path(self._base_dir).as_uri()
138
-
194
+ return pathlib.Path(self._base_dir).resolve().as_uri()
139
195
 
140
196
 
141
197
  @property
@@ -147,7 +203,7 @@ class FileDirDict(PersiDict):
147
203
  Returns:
148
204
  str: Absolute path to the base directory used by this dictionary.
149
205
  """
150
- return self._base_dir
206
+ return drop_long_path_prefix(self._base_dir)
151
207
 
152
208
 
153
209
  def __len__(self) -> int:
@@ -220,69 +276,77 @@ class FileDirDict(PersiDict):
220
276
  the key prefix.
221
277
 
222
278
  Returns:
223
- str: An absolute path within base_dir corresponding to the key.
279
+ str: An absolute path within base_dir corresponding to the key. On
280
+ Windows, this path is prefixed with '\\\\?\\' to support paths
281
+ longer than 260 characters.
224
282
  """
225
283
 
226
284
  key = sign_safe_str_tuple(key, self.digest_len)
227
- key = [self._base_dir] + list(key.strings)
228
- dir_names = key[:-1] if is_file_path else key
285
+ key_components = [self._base_dir] + list(key.strings)
286
+ dir_names = key_components[:-1] if is_file_path else key_components
287
+
288
+ dir_path = str(os.path.join(*dir_names))
229
289
 
230
290
  if create_subdirs:
231
- dir_path = os.path.join(*dir_names)
232
- os.makedirs(dir_path, exist_ok=True)
291
+ path_for_makedirs = dir_path
292
+ path_for_makedirs = add_long_path_prefix(path_for_makedirs)
293
+ os.makedirs(path_for_makedirs, exist_ok=True)
233
294
 
234
295
  if is_file_path:
235
- file_name = key[-1] + "." + self.file_type
236
- return os.path.join(*dir_names, file_name)
296
+ file_name = key_components[-1] + "." + self.file_type
297
+ final_path = os.path.join(dir_path, file_name)
237
298
  else:
238
- return str(os.path.join(*dir_names))
299
+ final_path = dir_path
239
300
 
301
+ return add_long_path_prefix(final_path)
240
302
 
241
- def _build_key_from_full_path(self, full_path:str)->SafeStrTuple:
242
- """Convert an absolute filesystem path back into a SafeStrTuple key.
243
303
 
244
- This function reverses _build_full_path, stripping base_dir, removing the
245
- file_type extension if the path points to a file, and unsigning the key
246
- components according to digest_len.
304
+ def _build_key_from_full_path(self, full_path:str)->SafeStrTuple:
305
+ """Convert an absolute filesystem path back into a SafeStrTuple key.
247
306
 
248
- Args:
249
- full_path (str): Absolute path within the dictionary's base
250
- directory.
307
+ This function reverses _build_full_path, stripping base_dir, removing the
308
+ file_type extension if the path points to a file, and unsigning the key
309
+ components according to digest_len.
251
310
 
252
- Returns:
253
- SafeStrTuple: The reconstructed (unsigned) key.
311
+ Args:
312
+ full_path (str): Absolute path within the dictionary's base
313
+ directory.
254
314
 
255
- Raises:
256
- ValueError: If full_path is not located under base_dir.
257
- """
315
+ Returns:
316
+ SafeStrTuple: The reconstructed (unsigned) key.
258
317
 
259
- # Ensure we're working with absolute paths
260
- full_path = os.path.abspath(full_path)
318
+ Raises:
319
+ ValueError: If full_path is not located under base_dir.
320
+ """
261
321
 
262
- # Remove the base directory from the path
263
- if not full_path.startswith(self._base_dir):
264
- raise ValueError(f"Path {full_path} is not within base directory {self._base_dir}")
322
+ # Remove the base directory from the path
323
+ if not full_path.startswith(self._base_dir):
324
+ raise ValueError(f"Path {full_path} is not within base directory {self._base_dir}")
265
325
 
266
- # Get the relative path
267
- rel_path = full_path[len(self._base_dir):].lstrip(os.sep)
326
+ # Get the relative path
327
+ rel_path = os.path.relpath(
328
+ drop_long_path_prefix(full_path),
329
+ drop_long_path_prefix(self._base_dir))
330
+ rel_path = os.path.normpath(rel_path)
268
331
 
269
- if not rel_path:
270
- return SafeStrTuple()
332
+ if not rel_path or rel_path == ".":
333
+ return SafeStrTuple()
271
334
 
272
- # Split the path into components
273
- path_components = rel_path.split(os.sep)
335
+ # Split the path into components
336
+ path_components = rel_path.split(os.sep)
274
337
 
275
- # If it's a file path, remove the file extension from the last component
276
- if os.path.isfile(full_path) and path_components[-1].endswith("." + self.file_type):
277
- path_components[-1] = path_components[-1][:-len("." + self.file_type)]
338
+ # If it's a file path, remove the file extension from the last component
339
+ suffix = "." + self.file_type
340
+ if path_components[-1].endswith(suffix):
341
+ path_components[-1] = path_components[-1][:-len(suffix)]
278
342
 
279
- # Create a SafeStrTuple from the path components
280
- key = SafeStrTuple(*path_components)
343
+ # Create a SafeStrTuple from the path components
344
+ key = SafeStrTuple(*path_components)
281
345
 
282
- # Unsign the key
283
- key = unsign_safe_str_tuple(key, self.digest_len)
346
+ # Unsign the key
347
+ key = unsign_safe_str_tuple(key, self.digest_len)
284
348
 
285
- return key
349
+ return key
286
350
 
287
351
 
288
352
  def get_subdict(self, key:PersiDictKey) -> FileDirDict:
@@ -301,7 +365,9 @@ class FileDirDict(PersiDict):
301
365
  """
302
366
  key = SafeStrTuple(key)
303
367
  full_dir_path = self._build_full_path(
304
- key, create_subdirs = True, is_file_path = False)
368
+ key,
369
+ create_subdirs = True,
370
+ is_file_path = False)
305
371
  return FileDirDict(
306
372
  base_dir= full_dir_path
307
373
  , file_type=self.file_type
@@ -319,17 +385,50 @@ class FileDirDict(PersiDict):
319
385
  Returns:
320
386
  Any: The deserialized value according to file_type.
321
387
  """
388
+ file_open_mode = 'rb' if self.file_type == "pkl" else 'r'
389
+ if os.name == 'nt':
390
+ handle = CreateFileW(file_name, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE | FILE_SHARE_WRITE, None, OPEN_EXISTING, 0, None)
391
+ if int(handle) == INVALID_HANDLE_VALUE:
392
+ error_code = ctypes.GetLastError()
393
+ raise ctypes.WinError(error_code)
394
+
395
+ fd = None
396
+ try:
397
+ if self.file_type == "pkl":
398
+ fd_open_mode = os.O_RDONLY | os.O_BINARY
399
+ else:
400
+ fd_open_mode = os.O_RDONLY
401
+ fd = msvcrt.open_osfhandle(int(handle),fd_open_mode)
402
+ except Exception:
403
+ CloseHandle(handle)
404
+ raise
405
+
406
+ try:
407
+ f = os.fdopen(fd, file_open_mode)
408
+ fd = None
409
+ except Exception:
410
+ if fd is not None:
411
+ os.close(fd)
412
+ raise
413
+
414
+ with f:
415
+ if self.file_type == "pkl":
416
+ result = joblib.load(f)
417
+ elif self.file_type == "json":
418
+ result = jsonpickle.loads(f.read())
419
+ else:
420
+ result = f.read()
322
421
 
323
- if self.file_type == "pkl":
324
- with open(file_name, 'rb') as f:
325
- result = joblib.load(f)
326
- elif self.file_type == "json":
327
- with open(file_name, 'r') as f:
328
- result = jsonpickle.loads(f.read())
422
+ return result
329
423
  else:
330
- with open(file_name, 'r') as f:
331
- result = f.read()
332
- return result
424
+ with open(file_name, file_open_mode) as f:
425
+ if self.file_type == "pkl":
426
+ result = joblib.load(f)
427
+ elif self.file_type == "json":
428
+ result = jsonpickle.loads(f.read())
429
+ else:
430
+ result = f.read()
431
+ return result
333
432
 
334
433
 
335
434
  def _read_from_file(self,file_name:str) -> Any:
@@ -355,14 +454,14 @@ class FileDirDict(PersiDict):
355
454
  raise ValueError("When base_class_for_values is not str,"
356
455
  + " file_type must be pkl or json.")
357
456
 
358
- n_retries = 8
457
+ n_retries = 12
359
458
  # extra protections to better handle concurrent writes
360
459
  for i in range(n_retries):
361
460
  try:
362
461
  return self._read_from_file_impl(file_name)
363
462
  except Exception as e:
364
463
  if i < n_retries - 1:
365
- time.sleep(random.uniform(0.01, 0.1) * (2 ** i))
464
+ time.sleep(random.uniform(0.01, 0.2) * (1.5 ** i))
366
465
  else:
367
466
  raise e
368
467
 
@@ -406,12 +505,36 @@ class FileDirDict(PersiDict):
406
505
  os.fsync(dir_fd)
407
506
  finally:
408
507
  os.close(dir_fd)
508
+ elif os.name == 'nt':
509
+ # On Windows, try to flush directory metadata
510
+ # This is less reliable than on POSIX systems
511
+ try:
512
+ handle = CreateFileW(
513
+ dir_name,
514
+ GENERIC_READ,
515
+ FILE_SHARE_READ | FILE_SHARE_WRITE,
516
+ None,
517
+ OPEN_EXISTING,
518
+ 0x02000000, # FILE_FLAG_BACKUP_SEMANTICS (needed for directories)
519
+ None
520
+ )
521
+ if int(handle) != INVALID_HANDLE_VALUE:
522
+ try:
523
+ kernel32 = ctypes.windll.kernel32
524
+ kernel32.FlushFileBuffers(handle)
525
+ finally:
526
+ CloseHandle(handle)
527
+ except:
528
+ pass
529
+
409
530
  except OSError:
410
531
  pass
411
532
 
412
533
  except:
413
- os.remove(temp_path)
414
- raise
534
+ try:
535
+ os.remove(temp_path)
536
+ finally:
537
+ raise
415
538
 
416
539
  def _save_to_file(self, file_name:str, value:Any) -> None:
417
540
  """Save a value to a file with retry/backoff.
@@ -434,7 +557,7 @@ class FileDirDict(PersiDict):
434
557
  raise ValueError("When base_class_for_values is not str,"
435
558
  + " file_type must be pkl or json.")
436
559
 
437
- n_retries = 8
560
+ n_retries = 12
438
561
  # extra protections to better handle concurrent writes
439
562
  for i in range(n_retries):
440
563
  try:
@@ -442,7 +565,7 @@ class FileDirDict(PersiDict):
442
565
  return
443
566
  except Exception as e:
444
567
  if i < n_retries - 1:
445
- time.sleep(random.uniform(0.01, 0.1) * (2 ** i))
568
+ time.sleep(random.uniform(0.01, 0.2) * (1.5 ** i))
446
569
  else:
447
570
  raise e
448
571
 
@@ -582,7 +705,8 @@ class FileDirDict(PersiDict):
582
705
  for f in files:
583
706
  if f.endswith(suffix):
584
707
  prefix_key = os.path.relpath(
585
- dir_name, start=self._base_dir)
708
+ drop_long_path_prefix(dir_name),
709
+ start=drop_long_path_prefix(self._base_dir))
586
710
 
587
711
  result_key = (*splitter(prefix_key), f[:-ext_len])
588
712
  result_key = SafeStrTuple(result_key)
@@ -643,7 +767,7 @@ class FileDirDict(PersiDict):
643
767
  # canonicalise extension once
644
768
  ext = None
645
769
  if self.file_type:
646
- ext = self.file_type.lower()
770
+ ext = self.file_type
647
771
  if not ext.startswith("."):
648
772
  ext = "." + ext
649
773
 
@@ -661,7 +785,7 @@ class FileDirDict(PersiDict):
661
785
  continue
662
786
 
663
787
  # cheap name test before stat()
664
- if ext and not ent.name.lower().endswith(ext):
788
+ if ext and not ent.name.endswith(ext):
665
789
  continue
666
790
 
667
791
  if ent.is_file(follow_symlinks=False):
@@ -674,7 +798,8 @@ class FileDirDict(PersiDict):
674
798
  if winner is None:
675
799
  return None
676
800
  else:
677
- return self._build_key_from_full_path(os.path.abspath(winner))
678
-
801
+ winner = os.path.abspath(winner)
802
+ winner = add_long_path_prefix(winner)
803
+ return self._build_key_from_full_path(winner)
679
804
 
680
805
  parameterizable.register_parameterizable_class(FileDirDict)
persidict/s3_dict.py CHANGED
@@ -1,7 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import os
4
- import tempfile
5
3
  from typing import Any, Optional
6
4
 
7
5
  import boto3
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import time
4
- from functools import cache
5
4
 
6
5
  from deepdiff import DeepDiff
7
6
  from parameterizable import register_parameterizable_class, sort_dict_by_keys
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: persidict
3
- Version: 0.37.1
3
+ Version: 0.37.2
4
4
  Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
5
5
  Keywords: persistence,dicts,distributed,parallel
6
6
  Author: Vlad (Volodymyr) Pavlov
@@ -1,14 +1,14 @@
1
1
  persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
2
2
  persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
3
- persidict/file_dir_dict.py,sha256=2McFaDdr03g-PXlCIiG3fPb7h59LXW_3hDo0xLA17DE,24804
3
+ persidict/file_dir_dict.py,sha256=Wll343YPkYGyg3mLaOLwBakYLBPNieqKdWpxORIIwwA,29548
4
4
  persidict/jokers.py,sha256=gTu7g2l2MIgBc3-hjvUrcwcgWs6tcbLyxB0u57M3bfU,3012
5
5
  persidict/overlapping_multi_dict.py,sha256=UFyPEG2GbMmMHY48UmcaLHpsaxMqRH3bc_UA8S90yJo,5947
6
6
  persidict/persi_dict.py,sha256=CKVHy8YELLRVgLWgo0Akbd8RznCVxqt8JHszIjqA_sI,23176
7
- persidict/s3_dict.py,sha256=F5N7DlpZBkEEUsDk7OQXiqACY-mJ2SOMNrh3AlHC9qo,21454
7
+ persidict/s3_dict.py,sha256=44jJPinE0bNHiCw2apFRzNZ_4IxIWOirIsLJqrObnuI,21428
8
8
  persidict/safe_chars.py,sha256=H-cL9waCmDtwaRR5Y4b4oTzcBx09nc8wn8u61SVZDY0,1728
9
9
  persidict/safe_str_tuple.py,sha256=YBTcYjUKIffznOawXb9xKjz4HaKdklrgyVtegJFmr5w,7202
10
10
  persidict/safe_str_tuple_signing.py,sha256=mpOfx_xyprc0_c60XPB_EihI3vR1gOn6T03iCx1HwwQ,7494
11
- persidict/write_once_dict.py,sha256=nv5vx9uh6VZ5Qh3HJcBgUHLnDX9KY843FbHndcy-63E,11677
12
- persidict-0.37.1.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
13
- persidict-0.37.1.dist-info/METADATA,sha256=M2EVewTSqdjfqdMi-7VHkSUKHqlL-ohlXYrcVRj7ViA,12387
14
- persidict-0.37.1.dist-info/RECORD,,
11
+ persidict/write_once_dict.py,sha256=Fthmpltm2yK3FmpbNGV7KQVLhsroQenxwavG8Z95Jts,11649
12
+ persidict-0.37.2.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
13
+ persidict-0.37.2.dist-info/METADATA,sha256=nJavGC2ccuT1wyD4egOVyAfYb2K6zbrAwvCJKfq33Cw,12387
14
+ persidict-0.37.2.dist-info/RECORD,,