persidict 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of persidict might be problematic. Click here for more details.
- persidict/__init__.py +2 -1
- persidict/file_dir_dict.py +4 -2
- persidict/s3_dict.py +260 -0
- {persidict-0.0.7.dist-info → persidict-0.0.8.dist-info}/METADATA +1 -1
- persidict-0.0.8.dist-info/RECORD +11 -0
- persidict-0.0.7.dist-info/RECORD +0 -10
- {persidict-0.0.7.dist-info → persidict-0.0.8.dist-info}/LICENSE +0 -0
- {persidict-0.0.7.dist-info → persidict-0.0.8.dist-info}/WHEEL +0 -0
- {persidict-0.0.7.dist-info → persidict-0.0.8.dist-info}/top_level.txt +0 -0
persidict/__init__.py
CHANGED
persidict/file_dir_dict.py
CHANGED
|
@@ -40,7 +40,8 @@ class FileDirDict(PersiDict):
|
|
|
40
40
|
def __init__(self
|
|
41
41
|
, dir_name: str = "FileDirDict"
|
|
42
42
|
, file_type: str = "pkl"
|
|
43
|
-
, immutable_items:bool = False
|
|
43
|
+
, immutable_items:bool = False
|
|
44
|
+
, digest_len:int = 8):
|
|
44
45
|
"""A constructor defines location of the store and file format to use.
|
|
45
46
|
|
|
46
47
|
dir_name is a directory that will contain all the files in
|
|
@@ -51,7 +52,8 @@ class FileDirDict(PersiDict):
|
|
|
51
52
|
to store values.
|
|
52
53
|
"""
|
|
53
54
|
|
|
54
|
-
super().__init__(immutable_items = immutable_items
|
|
55
|
+
super().__init__(immutable_items = immutable_items
|
|
56
|
+
,digetst_len = digest_len)
|
|
55
57
|
|
|
56
58
|
self.file_type = file_type
|
|
57
59
|
|
persidict/s3_dict.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import boto3
|
|
7
|
+
|
|
8
|
+
from .safe_str_tuple import SafeStrTuple
|
|
9
|
+
from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
|
|
10
|
+
from .persi_dict import PersiDict
|
|
11
|
+
from .file_dir_dict import FileDirDict
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class S3Dict(PersiDict):
|
|
15
|
+
""" A persistent dictionary that stores key-value pairs as S3 objects.
|
|
16
|
+
|
|
17
|
+
A new object is created for each key-value pair.
|
|
18
|
+
|
|
19
|
+
A key is either an objectname (a 'filename' without an extension),
|
|
20
|
+
or a sequence of folder names (object name prefixes) that ends
|
|
21
|
+
with an objectname. A value can be an instance of any Python type,
|
|
22
|
+
and will be stored as an S3-object.
|
|
23
|
+
|
|
24
|
+
S3Dict can store objects in binary objects (as pickles)
|
|
25
|
+
or in human-readable texts objects (using jsonpickles).
|
|
26
|
+
|
|
27
|
+
Unlike in native Python dictionaries, insertion order is not preserved.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def __init__(self, bucket_name:str
|
|
32
|
+
, region:str = None
|
|
33
|
+
, root_prefix:str = ""
|
|
34
|
+
, dir_name:str = "S3_Dict"
|
|
35
|
+
, file_type:str = "pkl"
|
|
36
|
+
, immutable_items:bool = False
|
|
37
|
+
, digest_len:int = 8
|
|
38
|
+
,*args ,**kwargs):
|
|
39
|
+
"""A constructor defines location of the store and object format to use.
|
|
40
|
+
|
|
41
|
+
bucket_name and region define an S3 location of the storage
|
|
42
|
+
that will contain all the objects in the S3_Dict.
|
|
43
|
+
If the bucket does not exist, it will be created.
|
|
44
|
+
|
|
45
|
+
root_prefix is a common S3 prefix for all objectnames in a dictionary.
|
|
46
|
+
|
|
47
|
+
dir_name is a local directory that will be used to store tmp files.
|
|
48
|
+
|
|
49
|
+
file_type can take one of two values: "pkl" or "json".
|
|
50
|
+
It defines which object format will be used by S3_Dict
|
|
51
|
+
to store values.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
super().__init__(immutable_items = immutable_items, digest_len = 0)
|
|
55
|
+
self.file_type = file_type
|
|
56
|
+
|
|
57
|
+
self.local_cache = FileDirDict(
|
|
58
|
+
dir_name = dir_name
|
|
59
|
+
, file_type = file_type
|
|
60
|
+
, immutable_items = immutable_items
|
|
61
|
+
, digest_len = digest_len)
|
|
62
|
+
|
|
63
|
+
self.region = region
|
|
64
|
+
if region is None:
|
|
65
|
+
self.s3_client = boto3.client('s3')
|
|
66
|
+
else:
|
|
67
|
+
self.s3_client = boto3.client('s3', region_name=region)
|
|
68
|
+
|
|
69
|
+
self.bucket = self.s3_client.create_bucket(Bucket=bucket_name)
|
|
70
|
+
self.bucket_name = bucket_name
|
|
71
|
+
|
|
72
|
+
self.root_prefix=root_prefix
|
|
73
|
+
if len(self.root_prefix) and self.root_prefix[-1] != "/":
|
|
74
|
+
self.root_prefix += "/"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def __repr__(self):
|
|
78
|
+
"""Return repr(self)."""
|
|
79
|
+
|
|
80
|
+
repr_str = super().__repr__()
|
|
81
|
+
repr_str = repr_str[:-1] + f", dir_name={self.local_cache.base_dir}"
|
|
82
|
+
repr_str += f", file_type={self.file_type}"
|
|
83
|
+
repr_str += f", region={self.region}"
|
|
84
|
+
repr_str += f", bucket_name={self.bucket_name}"
|
|
85
|
+
repr_str += f", root_prefix={self.root_prefix}"
|
|
86
|
+
repr_str += " )"
|
|
87
|
+
|
|
88
|
+
return repr_str
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _build_full_objectname(self, key:SafeStrTuple) -> str:
|
|
92
|
+
""" Convert SafeStrTuple into an S3 objectname. """
|
|
93
|
+
|
|
94
|
+
key = sign_safe_str_tuple(key, self.digest_len)
|
|
95
|
+
objectname = self.root_prefix + "/".join(key)+ "." + self.file_type
|
|
96
|
+
return objectname
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def __contains__(self, key:SafeStrTuple) -> bool:
|
|
100
|
+
"""True if the dictionary has the specified key, else False. """
|
|
101
|
+
if self.immutable_items:
|
|
102
|
+
file_name = self.local_cache._build_full_path(
|
|
103
|
+
key, create_subdirs=True)
|
|
104
|
+
if os.path.exists(file_name):
|
|
105
|
+
return True
|
|
106
|
+
try:
|
|
107
|
+
obj_name = self._build_full_objectname(key)
|
|
108
|
+
self.s3_client.head_object(Bucket=self.bucket_name, Key=obj_name)
|
|
109
|
+
return True
|
|
110
|
+
except:
|
|
111
|
+
return False
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def __getitem__(self, key:SafeStrTuple) -> Any:
|
|
115
|
+
"""X.__getitem__(y) is an equivalent to X[y]. """
|
|
116
|
+
|
|
117
|
+
file_name = self.local_cache._build_full_path(key, create_subdirs=True)
|
|
118
|
+
|
|
119
|
+
if self.immutable_items:
|
|
120
|
+
try:
|
|
121
|
+
result = self.local_cache._read_from_file(file_name)
|
|
122
|
+
return result
|
|
123
|
+
except:
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
obj_name = self._build_full_objectname(key)
|
|
127
|
+
self.s3_client.download_file(self.bucket_name, obj_name, file_name)
|
|
128
|
+
result = self.local_cache._read_from_file(file_name)
|
|
129
|
+
if not self.immutable_items:
|
|
130
|
+
os.remove(file_name)
|
|
131
|
+
|
|
132
|
+
return result
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def __setitem__(self, key:SafeStrTuple, value:Any):
|
|
136
|
+
"""Set self[key] to value. """
|
|
137
|
+
|
|
138
|
+
file_name = self.local_cache._build_full_path(key, create_subdirs=True)
|
|
139
|
+
obj_name = self._build_full_objectname(key)
|
|
140
|
+
|
|
141
|
+
if self.immutable_items:
|
|
142
|
+
key_is_present = False
|
|
143
|
+
if os.path.exists(file_name):
|
|
144
|
+
key_is_present = True
|
|
145
|
+
else:
|
|
146
|
+
try:
|
|
147
|
+
self.s3_client.head_object(
|
|
148
|
+
Bucket=self.bucket_name, Key=obj_name)
|
|
149
|
+
key_is_present = True
|
|
150
|
+
except:
|
|
151
|
+
key_is_present = False
|
|
152
|
+
|
|
153
|
+
assert not key_is_present, "Can't modify an immutable item"
|
|
154
|
+
|
|
155
|
+
self.local_cache._save_to_file(file_name, value)
|
|
156
|
+
self.s3_client.upload_file(file_name, self.bucket_name, obj_name)
|
|
157
|
+
if not self.immutable_items:
|
|
158
|
+
os.remove(file_name)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def __delitem__(self, key:SafeStrTuple):
|
|
162
|
+
"""Delete self[key]. """
|
|
163
|
+
|
|
164
|
+
assert not self.immutable_items, "Can't delete an immutable item"
|
|
165
|
+
obj_name = self._build_full_objectname(key)
|
|
166
|
+
self.s3_client.delete_object(Bucket = self.bucket_name, Key = obj_name)
|
|
167
|
+
file_name = self.local_cache._build_full_path(key)
|
|
168
|
+
if os.path.isfile(file_name):
|
|
169
|
+
os.remove(file_name)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def __len__(self) -> int:
|
|
173
|
+
"""Return len(self). """
|
|
174
|
+
|
|
175
|
+
num_files = 0
|
|
176
|
+
suffix = "." + self.file_type
|
|
177
|
+
|
|
178
|
+
paginator = self.s3_client.get_paginator("list_objects")
|
|
179
|
+
page_iterator = paginator.paginate(
|
|
180
|
+
Bucket=self.bucket_name, Prefix = self.root_prefix)
|
|
181
|
+
|
|
182
|
+
for page in page_iterator:
|
|
183
|
+
if "Contents" in page:
|
|
184
|
+
for key in page["Contents"]:
|
|
185
|
+
obj_name = key["Key"]
|
|
186
|
+
if obj_name.endswith(suffix):
|
|
187
|
+
num_files += 1
|
|
188
|
+
|
|
189
|
+
return num_files
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _generic_iter(self, iter_type: str):
|
|
193
|
+
"""Underlying implementation for .items()/.keys()/.values() iterators"""
|
|
194
|
+
assert iter_type in {"keys", "values", "items"}
|
|
195
|
+
suffix = "." + self.file_type
|
|
196
|
+
ext_len = len(self.file_type) + 1
|
|
197
|
+
prefix_len = len(self.root_prefix)
|
|
198
|
+
|
|
199
|
+
def splitter(full_name: str) -> SafeStrTuple:
|
|
200
|
+
assert full_name.startswith(self.root_prefix)
|
|
201
|
+
result = full_name[prefix_len:-ext_len].split(sep="/")
|
|
202
|
+
return SafeStrTuple(result)
|
|
203
|
+
|
|
204
|
+
def step():
|
|
205
|
+
paginator = self.s3_client.get_paginator("list_objects")
|
|
206
|
+
page_iterator = paginator.paginate(
|
|
207
|
+
Bucket=self.bucket_name, Prefix = self.root_prefix)
|
|
208
|
+
|
|
209
|
+
for page in page_iterator:
|
|
210
|
+
if "Contents" in page:
|
|
211
|
+
for key in page["Contents"]:
|
|
212
|
+
obj_name = key["Key"]
|
|
213
|
+
if not obj_name.endswith(suffix):
|
|
214
|
+
continue
|
|
215
|
+
obj_key = splitter(obj_name)
|
|
216
|
+
if iter_type == "keys":
|
|
217
|
+
yield unsign_safe_str_tuple(
|
|
218
|
+
obj_key, self.digest_len)
|
|
219
|
+
elif iter_type == "values":
|
|
220
|
+
yield self[obj_key]
|
|
221
|
+
else:
|
|
222
|
+
yield (unsign_safe_str_tuple(
|
|
223
|
+
obj_key, self.digest_len), self[obj_key])
|
|
224
|
+
|
|
225
|
+
return step()
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def get_subdict(self, key:SafeStrTuple) -> S3Dict:
|
|
229
|
+
"""Get a subdictionary containing items with the same prefix_key.
|
|
230
|
+
|
|
231
|
+
This method is absent in the original dict API.
|
|
232
|
+
"""
|
|
233
|
+
if len(key):
|
|
234
|
+
key = SafeStrTuple(key)
|
|
235
|
+
key = sign_safe_str_tuple(key, self.digest_len)
|
|
236
|
+
full_root_prefix = self.root_prefix + "/".join(key)
|
|
237
|
+
else:
|
|
238
|
+
full_root_prefix = self.root_prefix
|
|
239
|
+
|
|
240
|
+
new_dir_path = self.local_cache._build_full_path(
|
|
241
|
+
key, create_subdirs = True, is_file_path = False)
|
|
242
|
+
|
|
243
|
+
return S3Dict(
|
|
244
|
+
bucket_name = self.bucket_name
|
|
245
|
+
, region = self.region
|
|
246
|
+
, root_prefix = full_root_prefix
|
|
247
|
+
, dir_name = new_dir_path
|
|
248
|
+
, file_type = self.file_type
|
|
249
|
+
, immutable_items = self.immutable_items)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def mtimestamp(self,key:SafeStrTuple) -> float:
|
|
253
|
+
"""Get last modification time (in seconds, Unix epoch time).
|
|
254
|
+
|
|
255
|
+
This method is absent in the original dict API.
|
|
256
|
+
"""
|
|
257
|
+
#TODO: check work with timezones
|
|
258
|
+
obj_name = self._build_full_objectname(key)
|
|
259
|
+
response = self.s3_client.head_object(Bucket=self.bucket_name, Key=obj_name)
|
|
260
|
+
return response["LastModified"].timestamp()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: persidict
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.8
|
|
4
4
|
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
5
|
Home-page: https://github.com/vladlpavlov/persidict
|
|
6
6
|
Author: Vlad (Volodymyr) Pavlov
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
persidict/__init__.py,sha256=v28AnmkmoLSqZmKAoSsN3a3B86N1ohUcDih5te8vCiA,882
|
|
2
|
+
persidict/file_dir_dict.py,sha256=8tS5nKUHgFITcxW5NwAnZdtINV87ap4PqXN19_uRP_M,9002
|
|
3
|
+
persidict/persi_dict.py,sha256=g9BiEjnfoExXMxJHm3kONt_8iPkhCYqy_imUKDtzimg,7254
|
|
4
|
+
persidict/s3_dict.py,sha256=AXYY3gHeKRxoO4B51Evd1GV1I_t9HmsPp9q1QLJLwoo,9151
|
|
5
|
+
persidict/safe_str_tuple.py,sha256=jnXq8Har5nJyKtu0Qy2tOiRZ9DuIrY1YNpkwMHc3f1I,3416
|
|
6
|
+
persidict/safe_str_tuple_signing.py,sha256=OvGyaNOhx_I518nTifz3sRbej-vxtuRWqd4w1aEp8FY,3743
|
|
7
|
+
persidict-0.0.8.dist-info/LICENSE,sha256=oWpRWnm32aM5jVoboapPqgHi-drh1OPeZrrafyE1zTs,1113
|
|
8
|
+
persidict-0.0.8.dist-info/METADATA,sha256=ipg31oYZJI_cTvUo9taVRyk-az_fzH1dFJJX-JKBqvo,2114
|
|
9
|
+
persidict-0.0.8.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
|
|
10
|
+
persidict-0.0.8.dist-info/top_level.txt,sha256=7Kr8wnF-PGd513PbAORWUpp7Bi09VIZVTQS7ZN-dJXc,10
|
|
11
|
+
persidict-0.0.8.dist-info/RECORD,,
|
persidict-0.0.7.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
persidict/__init__.py,sha256=MSoe2iiRMVJJ_OETL0eC812ZXu9cY4djLm3f5JVmiV0,854
|
|
2
|
-
persidict/file_dir_dict.py,sha256=4FF1lFx1QLnDPy_nz-6rQyf4SSR2ErUILXGelvEHlGY,8922
|
|
3
|
-
persidict/persi_dict.py,sha256=g9BiEjnfoExXMxJHm3kONt_8iPkhCYqy_imUKDtzimg,7254
|
|
4
|
-
persidict/safe_str_tuple.py,sha256=jnXq8Har5nJyKtu0Qy2tOiRZ9DuIrY1YNpkwMHc3f1I,3416
|
|
5
|
-
persidict/safe_str_tuple_signing.py,sha256=OvGyaNOhx_I518nTifz3sRbej-vxtuRWqd4w1aEp8FY,3743
|
|
6
|
-
persidict-0.0.7.dist-info/LICENSE,sha256=oWpRWnm32aM5jVoboapPqgHi-drh1OPeZrrafyE1zTs,1113
|
|
7
|
-
persidict-0.0.7.dist-info/METADATA,sha256=YKuhGwoKzQnkxBnp3f8depNfh8Qa95a-toblFbdrVXs,2114
|
|
8
|
-
persidict-0.0.7.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
|
|
9
|
-
persidict-0.0.7.dist-info/top_level.txt,sha256=7Kr8wnF-PGd513PbAORWUpp7Bi09VIZVTQS7ZN-dJXc,10
|
|
10
|
-
persidict-0.0.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|