persidict 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of persidict might be problematic. Click here for more details.

persidict/__init__.py CHANGED
@@ -21,4 +21,5 @@ as a pickle or a json S3 object.
21
21
 
22
22
  from .safe_str_tuple import SafeStrTuple
23
23
  from .persi_dict import PersiDict
24
- from .file_dir_dict import FileDirDict
24
+ from .file_dir_dict import FileDirDict
25
+ from .s3_dict import S3Dict
@@ -40,7 +40,8 @@ class FileDirDict(PersiDict):
40
40
  def __init__(self
41
41
  , dir_name: str = "FileDirDict"
42
42
  , file_type: str = "pkl"
43
- , immutable_items:bool = False):
43
+ , immutable_items:bool = False
44
+ , digest_len:int = 8):
44
45
  """A constructor defines location of the store and file format to use.
45
46
 
46
47
  dir_name is a directory that will contain all the files in
@@ -51,7 +52,8 @@ class FileDirDict(PersiDict):
51
52
  to store values.
52
53
  """
53
54
 
54
- super().__init__(immutable_items = immutable_items)
55
+ super().__init__(immutable_items = immutable_items
56
+ ,digetst_len = digest_len)
55
57
 
56
58
  self.file_type = file_type
57
59
 
persidict/s3_dict.py ADDED
@@ -0,0 +1,260 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Any
5
+
6
+ import boto3
7
+
8
+ from .safe_str_tuple import SafeStrTuple
9
+ from .safe_str_tuple_signing import sign_safe_str_tuple, unsign_safe_str_tuple
10
+ from .persi_dict import PersiDict
11
+ from .file_dir_dict import FileDirDict
12
+
13
+
14
+ class S3Dict(PersiDict):
15
+ """ A persistent dictionary that stores key-value pairs as S3 objects.
16
+
17
+ A new object is created for each key-value pair.
18
+
19
+ A key is either an objectname (a 'filename' without an extension),
20
+ or a sequence of folder names (object name prefixes) that ends
21
+ with an objectname. A value can be an instance of any Python type,
22
+ and will be stored as an S3-object.
23
+
24
+ S3Dict can store objects in binary objects (as pickles)
25
+ or in human-readable texts objects (using jsonpickles).
26
+
27
+ Unlike in native Python dictionaries, insertion order is not preserved.
28
+ """
29
+
30
+
31
+ def __init__(self, bucket_name:str
32
+ , region:str = None
33
+ , root_prefix:str = ""
34
+ , dir_name:str = "S3_Dict"
35
+ , file_type:str = "pkl"
36
+ , immutable_items:bool = False
37
+ , digest_len:int = 8
38
+ ,*args ,**kwargs):
39
+ """A constructor defines location of the store and object format to use.
40
+
41
+ bucket_name and region define an S3 location of the storage
42
+ that will contain all the objects in the S3_Dict.
43
+ If the bucket does not exist, it will be created.
44
+
45
+ root_prefix is a common S3 prefix for all objectnames in a dictionary.
46
+
47
+ dir_name is a local directory that will be used to store tmp files.
48
+
49
+ file_type can take one of two values: "pkl" or "json".
50
+ It defines which object format will be used by S3_Dict
51
+ to store values.
52
+ """
53
+
54
+ super().__init__(immutable_items = immutable_items, digest_len = 0)
55
+ self.file_type = file_type
56
+
57
+ self.local_cache = FileDirDict(
58
+ dir_name = dir_name
59
+ , file_type = file_type
60
+ , immutable_items = immutable_items
61
+ , digest_len = digest_len)
62
+
63
+ self.region = region
64
+ if region is None:
65
+ self.s3_client = boto3.client('s3')
66
+ else:
67
+ self.s3_client = boto3.client('s3', region_name=region)
68
+
69
+ self.bucket = self.s3_client.create_bucket(Bucket=bucket_name)
70
+ self.bucket_name = bucket_name
71
+
72
+ self.root_prefix=root_prefix
73
+ if len(self.root_prefix) and self.root_prefix[-1] != "/":
74
+ self.root_prefix += "/"
75
+
76
+
77
+ def __repr__(self):
78
+ """Return repr(self)."""
79
+
80
+ repr_str = super().__repr__()
81
+ repr_str = repr_str[:-1] + f", dir_name={self.local_cache.base_dir}"
82
+ repr_str += f", file_type={self.file_type}"
83
+ repr_str += f", region={self.region}"
84
+ repr_str += f", bucket_name={self.bucket_name}"
85
+ repr_str += f", root_prefix={self.root_prefix}"
86
+ repr_str += " )"
87
+
88
+ return repr_str
89
+
90
+
91
+ def _build_full_objectname(self, key:SafeStrTuple) -> str:
92
+ """ Convert SafeStrTuple into an S3 objectname. """
93
+
94
+ key = sign_safe_str_tuple(key, self.digest_len)
95
+ objectname = self.root_prefix + "/".join(key)+ "." + self.file_type
96
+ return objectname
97
+
98
+
99
+ def __contains__(self, key:SafeStrTuple) -> bool:
100
+ """True if the dictionary has the specified key, else False. """
101
+ if self.immutable_items:
102
+ file_name = self.local_cache._build_full_path(
103
+ key, create_subdirs=True)
104
+ if os.path.exists(file_name):
105
+ return True
106
+ try:
107
+ obj_name = self._build_full_objectname(key)
108
+ self.s3_client.head_object(Bucket=self.bucket_name, Key=obj_name)
109
+ return True
110
+ except:
111
+ return False
112
+
113
+
114
+ def __getitem__(self, key:SafeStrTuple) -> Any:
115
+ """X.__getitem__(y) is an equivalent to X[y]. """
116
+
117
+ file_name = self.local_cache._build_full_path(key, create_subdirs=True)
118
+
119
+ if self.immutable_items:
120
+ try:
121
+ result = self.local_cache._read_from_file(file_name)
122
+ return result
123
+ except:
124
+ pass
125
+
126
+ obj_name = self._build_full_objectname(key)
127
+ self.s3_client.download_file(self.bucket_name, obj_name, file_name)
128
+ result = self.local_cache._read_from_file(file_name)
129
+ if not self.immutable_items:
130
+ os.remove(file_name)
131
+
132
+ return result
133
+
134
+
135
+ def __setitem__(self, key:SafeStrTuple, value:Any):
136
+ """Set self[key] to value. """
137
+
138
+ file_name = self.local_cache._build_full_path(key, create_subdirs=True)
139
+ obj_name = self._build_full_objectname(key)
140
+
141
+ if self.immutable_items:
142
+ key_is_present = False
143
+ if os.path.exists(file_name):
144
+ key_is_present = True
145
+ else:
146
+ try:
147
+ self.s3_client.head_object(
148
+ Bucket=self.bucket_name, Key=obj_name)
149
+ key_is_present = True
150
+ except:
151
+ key_is_present = False
152
+
153
+ assert not key_is_present, "Can't modify an immutable item"
154
+
155
+ self.local_cache._save_to_file(file_name, value)
156
+ self.s3_client.upload_file(file_name, self.bucket_name, obj_name)
157
+ if not self.immutable_items:
158
+ os.remove(file_name)
159
+
160
+
161
+ def __delitem__(self, key:SafeStrTuple):
162
+ """Delete self[key]. """
163
+
164
+ assert not self.immutable_items, "Can't delete an immutable item"
165
+ obj_name = self._build_full_objectname(key)
166
+ self.s3_client.delete_object(Bucket = self.bucket_name, Key = obj_name)
167
+ file_name = self.local_cache._build_full_path(key)
168
+ if os.path.isfile(file_name):
169
+ os.remove(file_name)
170
+
171
+
172
+ def __len__(self) -> int:
173
+ """Return len(self). """
174
+
175
+ num_files = 0
176
+ suffix = "." + self.file_type
177
+
178
+ paginator = self.s3_client.get_paginator("list_objects")
179
+ page_iterator = paginator.paginate(
180
+ Bucket=self.bucket_name, Prefix = self.root_prefix)
181
+
182
+ for page in page_iterator:
183
+ if "Contents" in page:
184
+ for key in page["Contents"]:
185
+ obj_name = key["Key"]
186
+ if obj_name.endswith(suffix):
187
+ num_files += 1
188
+
189
+ return num_files
190
+
191
+
192
+ def _generic_iter(self, iter_type: str):
193
+ """Underlying implementation for .items()/.keys()/.values() iterators"""
194
+ assert iter_type in {"keys", "values", "items"}
195
+ suffix = "." + self.file_type
196
+ ext_len = len(self.file_type) + 1
197
+ prefix_len = len(self.root_prefix)
198
+
199
+ def splitter(full_name: str) -> SafeStrTuple:
200
+ assert full_name.startswith(self.root_prefix)
201
+ result = full_name[prefix_len:-ext_len].split(sep="/")
202
+ return SafeStrTuple(result)
203
+
204
+ def step():
205
+ paginator = self.s3_client.get_paginator("list_objects")
206
+ page_iterator = paginator.paginate(
207
+ Bucket=self.bucket_name, Prefix = self.root_prefix)
208
+
209
+ for page in page_iterator:
210
+ if "Contents" in page:
211
+ for key in page["Contents"]:
212
+ obj_name = key["Key"]
213
+ if not obj_name.endswith(suffix):
214
+ continue
215
+ obj_key = splitter(obj_name)
216
+ if iter_type == "keys":
217
+ yield unsign_safe_str_tuple(
218
+ obj_key, self.digest_len)
219
+ elif iter_type == "values":
220
+ yield self[obj_key]
221
+ else:
222
+ yield (unsign_safe_str_tuple(
223
+ obj_key, self.digest_len), self[obj_key])
224
+
225
+ return step()
226
+
227
+
228
+ def get_subdict(self, key:SafeStrTuple) -> S3Dict:
229
+ """Get a subdictionary containing items with the same prefix_key.
230
+
231
+ This method is absent in the original dict API.
232
+ """
233
+ if len(key):
234
+ key = SafeStrTuple(key)
235
+ key = sign_safe_str_tuple(key, self.digest_len)
236
+ full_root_prefix = self.root_prefix + "/".join(key)
237
+ else:
238
+ full_root_prefix = self.root_prefix
239
+
240
+ new_dir_path = self.local_cache._build_full_path(
241
+ key, create_subdirs = True, is_file_path = False)
242
+
243
+ return S3Dict(
244
+ bucket_name = self.bucket_name
245
+ , region = self.region
246
+ , root_prefix = full_root_prefix
247
+ , dir_name = new_dir_path
248
+ , file_type = self.file_type
249
+ , immutable_items = self.immutable_items)
250
+
251
+
252
+ def mtimestamp(self,key:SafeStrTuple) -> float:
253
+ """Get last modification time (in seconds, Unix epoch time).
254
+
255
+ This method is absent in the original dict API.
256
+ """
257
+ #TODO: check work with timezones
258
+ obj_name = self._build_full_objectname(key)
259
+ response = self.s3_client.head_object(Bucket=self.bucket_name, Key=obj_name)
260
+ return response["LastModified"].timestamp()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: persidict
3
- Version: 0.0.7
3
+ Version: 0.0.8
4
4
  Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
5
5
  Home-page: https://github.com/vladlpavlov/persidict
6
6
  Author: Vlad (Volodymyr) Pavlov
@@ -0,0 +1,11 @@
1
+ persidict/__init__.py,sha256=v28AnmkmoLSqZmKAoSsN3a3B86N1ohUcDih5te8vCiA,882
2
+ persidict/file_dir_dict.py,sha256=8tS5nKUHgFITcxW5NwAnZdtINV87ap4PqXN19_uRP_M,9002
3
+ persidict/persi_dict.py,sha256=g9BiEjnfoExXMxJHm3kONt_8iPkhCYqy_imUKDtzimg,7254
4
+ persidict/s3_dict.py,sha256=AXYY3gHeKRxoO4B51Evd1GV1I_t9HmsPp9q1QLJLwoo,9151
5
+ persidict/safe_str_tuple.py,sha256=jnXq8Har5nJyKtu0Qy2tOiRZ9DuIrY1YNpkwMHc3f1I,3416
6
+ persidict/safe_str_tuple_signing.py,sha256=OvGyaNOhx_I518nTifz3sRbej-vxtuRWqd4w1aEp8FY,3743
7
+ persidict-0.0.8.dist-info/LICENSE,sha256=oWpRWnm32aM5jVoboapPqgHi-drh1OPeZrrafyE1zTs,1113
8
+ persidict-0.0.8.dist-info/METADATA,sha256=ipg31oYZJI_cTvUo9taVRyk-az_fzH1dFJJX-JKBqvo,2114
9
+ persidict-0.0.8.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
10
+ persidict-0.0.8.dist-info/top_level.txt,sha256=7Kr8wnF-PGd513PbAORWUpp7Bi09VIZVTQS7ZN-dJXc,10
11
+ persidict-0.0.8.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- persidict/__init__.py,sha256=MSoe2iiRMVJJ_OETL0eC812ZXu9cY4djLm3f5JVmiV0,854
2
- persidict/file_dir_dict.py,sha256=4FF1lFx1QLnDPy_nz-6rQyf4SSR2ErUILXGelvEHlGY,8922
3
- persidict/persi_dict.py,sha256=g9BiEjnfoExXMxJHm3kONt_8iPkhCYqy_imUKDtzimg,7254
4
- persidict/safe_str_tuple.py,sha256=jnXq8Har5nJyKtu0Qy2tOiRZ9DuIrY1YNpkwMHc3f1I,3416
5
- persidict/safe_str_tuple_signing.py,sha256=OvGyaNOhx_I518nTifz3sRbej-vxtuRWqd4w1aEp8FY,3743
6
- persidict-0.0.7.dist-info/LICENSE,sha256=oWpRWnm32aM5jVoboapPqgHi-drh1OPeZrrafyE1zTs,1113
7
- persidict-0.0.7.dist-info/METADATA,sha256=YKuhGwoKzQnkxBnp3f8depNfh8Qa95a-toblFbdrVXs,2114
8
- persidict-0.0.7.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
9
- persidict-0.0.7.dist-info/top_level.txt,sha256=7Kr8wnF-PGd513PbAORWUpp7Bi09VIZVTQS7ZN-dJXc,10
10
- persidict-0.0.7.dist-info/RECORD,,