persidict 0.36.0__py3-none-any.whl → 0.36.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of persidict might be problematic. Click here for more details.
persidict/file_dir_dict.py
CHANGED
|
@@ -10,6 +10,7 @@ serialized depending on ``file_type``.
|
|
|
10
10
|
from __future__ import annotations
|
|
11
11
|
|
|
12
12
|
import os
|
|
13
|
+
import pathlib
|
|
13
14
|
import random
|
|
14
15
|
import tempfile
|
|
15
16
|
import time
|
|
@@ -137,7 +138,8 @@ class FileDirDict(PersiDict):
|
|
|
137
138
|
Returns:
|
|
138
139
|
str: URL of the underlying storage in the form "file://<abs_path>".
|
|
139
140
|
"""
|
|
140
|
-
return
|
|
141
|
+
return pathlib.Path(self._base_dir).as_uri()
|
|
142
|
+
|
|
141
143
|
|
|
142
144
|
|
|
143
145
|
@property
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: persidict
|
|
3
|
+
Version: 0.36.2
|
|
4
|
+
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
|
+
Keywords: persistence,dicts,distributed,parallel
|
|
6
|
+
Author: Vlad (Volodymyr) Pavlov
|
|
7
|
+
Author-email: Vlad (Volodymyr) Pavlov <vlpavlov@ieee.org>
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Programming Language :: Python
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Requires-Dist: parameterizable
|
|
19
|
+
Requires-Dist: lz4
|
|
20
|
+
Requires-Dist: joblib
|
|
21
|
+
Requires-Dist: numpy
|
|
22
|
+
Requires-Dist: pandas
|
|
23
|
+
Requires-Dist: jsonpickle
|
|
24
|
+
Requires-Dist: deepdiff
|
|
25
|
+
Requires-Dist: boto3 ; extra == 'aws'
|
|
26
|
+
Requires-Dist: boto3 ; extra == 'dev'
|
|
27
|
+
Requires-Dist: moto ; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest ; extra == 'dev'
|
|
29
|
+
Requires-Python: >=3.10
|
|
30
|
+
Project-URL: Homepage, https://github.com/pythagoras-dev/persidict
|
|
31
|
+
Provides-Extra: aws
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
# persidict
|
|
36
|
+
|
|
37
|
+
Simple persistent dictionaries for distributed applications in Python.
|
|
38
|
+
|
|
39
|
+
## 1. What Is It?
|
|
40
|
+
|
|
41
|
+
`persidict` offers a simple persistent key-value store for Python.
|
|
42
|
+
It saves the content of the dictionary in a folder on a disk
|
|
43
|
+
or in an S3 bucket on AWS. Each value is stored as a separate file / S3 object.
|
|
44
|
+
Only text strings or sequences of strings are allowed as keys.
|
|
45
|
+
|
|
46
|
+
Unlike other persistent dictionaries (e.g. Python's native `shelve`),
|
|
47
|
+
`persidict` is designed for use in highly **distributed environments**,
|
|
48
|
+
where multiple instances of a program run concurrently across many machines,
|
|
49
|
+
accessing the same dictionary via a shared storage.
|
|
50
|
+
|
|
51
|
+
## 2. Features
|
|
52
|
+
* **Persistent Storage**: Save dictionaries to the local filesystem
|
|
53
|
+
(`FileDirDict`) or AWS S3 (`S3Dict`).
|
|
54
|
+
* **Standard Dictionary API**: Use persidict objects like standard
|
|
55
|
+
Python dictionaries with methods like `__getitem__`, `__setitem__`,
|
|
56
|
+
`__delitem__`, `keys`, `values`, `items`, etc.
|
|
57
|
+
* **Distributed Computing Ready**: Designed for concurrent access
|
|
58
|
+
in distributed environments.
|
|
59
|
+
* **Flexible Serialization**: Store values as pickles (`pkl`),
|
|
60
|
+
JSON (`json`), or plain text.
|
|
61
|
+
* **Type Safety**: Optionally enforce that all values in a dictionary are
|
|
62
|
+
instances of a specific class.
|
|
63
|
+
* **Advanced Functionality**: Includes features like write-once dictionaries,
|
|
64
|
+
timestamping of entries, and tools for handling file-system-safe keys.
|
|
65
|
+
|
|
66
|
+
## 3. Usage
|
|
67
|
+
|
|
68
|
+
### 3.1 Storing Data on a Local Disk
|
|
69
|
+
|
|
70
|
+
The `FileDirDict` class saves your dictionary to a local folder.
|
|
71
|
+
Each key-value pair is stored as a separate file.
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from persidict import FileDirDict
|
|
75
|
+
|
|
76
|
+
# Create a dictionary that will be stored in the "my_app_data" folder.
|
|
77
|
+
# The folder will be created automatically if it doesn't exist.
|
|
78
|
+
app_settings = FileDirDict(base_dir="my_app_data")
|
|
79
|
+
|
|
80
|
+
# Add and update items just like a regular dictionary.
|
|
81
|
+
app_settings["username"] = "alex"
|
|
82
|
+
app_settings["theme"] = "dark"
|
|
83
|
+
app_settings["notifications_enabled"] = True
|
|
84
|
+
|
|
85
|
+
# Values can be any pickleable Python object.
|
|
86
|
+
app_settings["recent_projects"] = ["project_a", "project_b"]
|
|
87
|
+
|
|
88
|
+
print(f"Current theme is: {app_settings['theme']}")
|
|
89
|
+
# >>> Current theme is: dark
|
|
90
|
+
|
|
91
|
+
# The data persists!
|
|
92
|
+
# If you run the script again or create a new dictionary object
|
|
93
|
+
# pointing to the same folder, the data will be there.
|
|
94
|
+
reloaded_settings = FileDirDict(base_dir="my_app_data")
|
|
95
|
+
|
|
96
|
+
print(f"Number of settings: {len(reloaded_settings)}")
|
|
97
|
+
# >>> Number of settings: 4
|
|
98
|
+
|
|
99
|
+
print("username" in reloaded_settings)
|
|
100
|
+
# >>> True
|
|
101
|
+
```
|
|
102
|
+
### 3.2 Storing Data in the Cloud (AWS S3)
|
|
103
|
+
|
|
104
|
+
For distributed applications, you can use **`S3Dict`** to store data in
|
|
105
|
+
an AWS S3 bucket. The usage is identical, allowing you to switch
|
|
106
|
+
between local and cloud storage with minimal code changes.
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from persidict import S3Dict
|
|
110
|
+
|
|
111
|
+
# Create a dictionary that will be stored in an S3 bucket.
|
|
112
|
+
# The bucket will be created if it doesn't exist.
|
|
113
|
+
cloud_config = S3Dict(bucket_name="my-app-config-bucket")
|
|
114
|
+
|
|
115
|
+
# Use it just like a FileDirDict.
|
|
116
|
+
cloud_config["api_key"] = "ABC-123-XYZ"
|
|
117
|
+
cloud_config["timeout_seconds"] = 30
|
|
118
|
+
|
|
119
|
+
print(f"API Key: {cloud_config['api_key']}")
|
|
120
|
+
# >>> API Key: ABC-123-XYZ
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## 4. Glossary
|
|
124
|
+
|
|
125
|
+
### 4.1 Core Concepts
|
|
126
|
+
|
|
127
|
+
* **`PersiDict`**: The abstract base class that defines the common interface
|
|
128
|
+
for all persistent dictionaries in the package. It's the foundation
|
|
129
|
+
upon which everything else is built.
|
|
130
|
+
* **`PersiDictKey`**: A type hint that specifies what can be used
|
|
131
|
+
as a key in any `PersiDict`. It can be a `SafeStrTuple`,
|
|
132
|
+
a single string, or a sequence of strings.
|
|
133
|
+
* **`SafeStrTuple`**: The core data structure for keys. It's an immutable,
|
|
134
|
+
flat tuple of non-empty, URL/filename-safe strings, ensuring that
|
|
135
|
+
keys are consistent and safe for various storage backends.
|
|
136
|
+
|
|
137
|
+
### 4.2 Main Implementations
|
|
138
|
+
|
|
139
|
+
* **`FileDirDict`**: A primary, concrete implementation of `PersiDict`
|
|
140
|
+
that stores each key-value pair as a separate file in a local directory.
|
|
141
|
+
* **`S3Dict`**: The other primary implementation of `PersiDict`,
|
|
142
|
+
which stores each key-value pair as an object in an AWS S3 bucket,
|
|
143
|
+
suitable for distributed environments.
|
|
144
|
+
|
|
145
|
+
### 4.3 Key Parameters
|
|
146
|
+
|
|
147
|
+
* **`file_type`**: A key parameter for `FileDirDict` and `S3Dict` that
|
|
148
|
+
determines the serialization format for values.
|
|
149
|
+
Common options are `"pkl"` (pickle) and `"json"`.
|
|
150
|
+
Any other value is treated as plain text for string storage.
|
|
151
|
+
* **`base_class_for_values`**: An optional parameter for any `PersiDict`
|
|
152
|
+
that enforces type checking on all stored values, ensuring they are
|
|
153
|
+
instances of a specific class.
|
|
154
|
+
* **`immutable_items`**: A boolean parameter that can make a `PersiDict`
|
|
155
|
+
"write-once," preventing any modification or deletion of existing items.
|
|
156
|
+
* **`digest_len`**: An integer that specifies the length of a hash suffix
|
|
157
|
+
added to key components to prevent collisions on case-insensitive file systems.
|
|
158
|
+
* **`base_dir`**: A string specifying the directory path where a `FileDirDict`
|
|
159
|
+
stores its files. For `S3Dict`, this directory is used to cache files locally.
|
|
160
|
+
* **`bucket_name`**: A string specifying the name of the S3 bucket where
|
|
161
|
+
an `S3Dict` stores its objects.
|
|
162
|
+
* **`region`**: An optional string specifying the AWS region for the S3 bucket.
|
|
163
|
+
|
|
164
|
+
### 4.4 Advanced Classes
|
|
165
|
+
|
|
166
|
+
* **`WriteOnceDict`**: A wrapper that enforces write-once behavior
|
|
167
|
+
on any `PersiDict`, ignoring subsequent writes to the same key.
|
|
168
|
+
It also allows for random consistency checks to ensure subsequent
|
|
169
|
+
writes to the same key always match the original value.
|
|
170
|
+
* **`OverlappingMultiDict`**: An advanced container that holds
|
|
171
|
+
multiple `PersiDict` instances sharing the same storage
|
|
172
|
+
but with different `file_type`s.
|
|
173
|
+
|
|
174
|
+
### 4.5 Special "Joker" Values
|
|
175
|
+
|
|
176
|
+
* **`Joker`**: The base class for special command-like values that
|
|
177
|
+
can be assigned to a key to trigger an action instead of storing a value.
|
|
178
|
+
* **`KEEP_CURRENT`**: A "joker" value that, when assigned to a key,
|
|
179
|
+
ensures the existing value is not changed.
|
|
180
|
+
* **`DELETE_CURRENT`**: A "joker" value that deletes the key-value pair
|
|
181
|
+
from the dictionary when assigned to a key.
|
|
182
|
+
|
|
183
|
+
## 5. Comparison With Python Built-in Dictionaries
|
|
184
|
+
|
|
185
|
+
### 5.1 Similarities
|
|
186
|
+
|
|
187
|
+
`PersiDict` subclasses can be used like regular Python dictionaries, supporting:
|
|
188
|
+
|
|
189
|
+
* Get, set, and delete operations with square brackets (`[]`).
|
|
190
|
+
* Iteration over keys, values, and items.
|
|
191
|
+
* Membership testing with `in`.
|
|
192
|
+
* Length checking with `len()`.
|
|
193
|
+
* Standard methods like `keys()`, `values()`, `items()`, `get()`, `clear()`
|
|
194
|
+
, `setdefault()`, and `update()`.
|
|
195
|
+
|
|
196
|
+
### 5.2 Differences
|
|
197
|
+
|
|
198
|
+
* **Persistence**: Data is saved between program executions.
|
|
199
|
+
* **Keys**: Keys must be strings or sequences of URL/filename-safe strings.
|
|
200
|
+
* **Values**: Values must be pickleable.
|
|
201
|
+
You can also constrain values to a specific class.
|
|
202
|
+
* **Order**: Insertion order is not preserved.
|
|
203
|
+
* **Additional Methods**: `PersiDict` provides extra methods not in the standard
|
|
204
|
+
dict API, such as `timestamp()`, `random_key()`, `newest_keys()`, `subdicts()`
|
|
205
|
+
, `delete_if_exists()`, `get_params()` and more.
|
|
206
|
+
* **Special Values**: Use `KEEP_CURRENT` to avoid updating a value
|
|
207
|
+
and `DELETE_CURRENT` to delete a value during an assignment.
|
|
208
|
+
|
|
209
|
+
## 6. Installation
|
|
210
|
+
|
|
211
|
+
The source code is hosted on GitHub at:
|
|
212
|
+
[https://github.com/pythagoras-dev/persidict](https://github.com/pythagoras-dev/persidict)
|
|
213
|
+
|
|
214
|
+
Binary installers for the latest released version are available at the Python package index at:
|
|
215
|
+
[https://pypi.org/project/persidict](https://pypi.org/project/persidict)
|
|
216
|
+
|
|
217
|
+
You can install `persidict` using `pip` or your favorite package manager:
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
pip install persidict
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
To include the AWS S3 extra dependencies:
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
pip install persidict[aws]
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
For development, including test dependencies:
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
pip install persidict[dev]
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
## 7. Dependencies
|
|
236
|
+
|
|
237
|
+
`persidict` has the following core dependencies:
|
|
238
|
+
|
|
239
|
+
* [parameterizable](https://pypi.org/project/parameterizable/)
|
|
240
|
+
* [jsonpickle](https://jsonpickle.github.io)
|
|
241
|
+
* [joblib](https://joblib.readthedocs.io)
|
|
242
|
+
* [lz4](https://python-lz4.readthedocs.io)
|
|
243
|
+
* [pandas](https://pandas.pydata.org)
|
|
244
|
+
* [numpy](https://numpy.org)
|
|
245
|
+
* [deepdiff](https://zepworks.com/deepdiff)
|
|
246
|
+
|
|
247
|
+
For AWS S3 support (S3Dict), you will also need:
|
|
248
|
+
* [boto3](https://boto3.readthedocs.io)
|
|
249
|
+
|
|
250
|
+
For development and testing, the following packages are used:
|
|
251
|
+
* [pytest](https://pytest.org)
|
|
252
|
+
* [moto](http://getmoto.org)
|
|
253
|
+
|
|
254
|
+
## 8. Contributing
|
|
255
|
+
Contributions are welcome! Please see the contributing [guide](https://github.com/pythagoras-dev/persidict?tab=contributing-ov-file) for more details
|
|
256
|
+
on how to get started, run tests, and submit pull requests.
|
|
257
|
+
|
|
258
|
+
## 9. License
|
|
259
|
+
`persidict` is licensed under the MIT License. See the [LICENSE](https://github.com/pythagoras-dev/persidict?tab=MIT-1-ov-file) file for more details.
|
|
260
|
+
|
|
261
|
+
## 10. Key Contacts
|
|
262
|
+
|
|
263
|
+
* [Vlad (Volodymyr) Pavlov](https://www.linkedin.com/in/vlpavlov/)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
persidict/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
|
|
2
2
|
persidict/__init__.py,sha256=CDOSJGgCnyRTkGUTzaeg3Cqsxwx0-0EFieOtldXwAls,1380
|
|
3
|
-
persidict/file_dir_dict.py,sha256=
|
|
3
|
+
persidict/file_dir_dict.py,sha256=IDRb6a3YQvM7Gf0jbqKkTi4VuSPecTw6Ca6HZ947Qj8,25784
|
|
4
4
|
persidict/jokers.py,sha256=Ow4tWOTTMGKvolJyVuEF-oEgE_u3vDZtA9UFwTdhNV4,2731
|
|
5
5
|
persidict/overlapping_multi_dict.py,sha256=gBiHaCb5pTGNW3ZrakgaiGDid6oCfoP7Vq1rxXGnFWg,5476
|
|
6
6
|
persidict/persi_dict.py,sha256=DIMQaY4gE8NSYTlHlk9rfOJJEYUuLV8kmQ-gc474py4,20052
|
|
@@ -9,6 +9,6 @@ persidict/safe_chars.py,sha256=9Qy24fu2dmiJOdmCF8mKZULfQaRp7H4oxfgDXeLgogI,1160
|
|
|
9
9
|
persidict/safe_str_tuple.py,sha256=YBTcYjUKIffznOawXb9xKjz4HaKdklrgyVtegJFmr5w,7202
|
|
10
10
|
persidict/safe_str_tuple_signing.py,sha256=RQAj4fnpRVaOe0KpwLler1UTaeNOgXCQpU3t80ixtxg,7493
|
|
11
11
|
persidict/write_once_dict.py,sha256=-lPQ_yuU62pczHT0BYO6SFbiZBKFq8Tj9ln3jCzNDzA,11443
|
|
12
|
-
persidict-0.36.
|
|
13
|
-
persidict-0.36.
|
|
14
|
-
persidict-0.36.
|
|
12
|
+
persidict-0.36.2.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
|
|
13
|
+
persidict-0.36.2.dist-info/METADATA,sha256=KS86C2ZjXL6VsvpBkz6ah1xPl6XimlmmqywDdbnnfhs,10021
|
|
14
|
+
persidict-0.36.2.dist-info/RECORD,,
|
|
@@ -1,228 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: persidict
|
|
3
|
-
Version: 0.36.0
|
|
4
|
-
Summary: Simple persistent key-value store for Python. Values are stored as files on a disk or as S3 objects on AWS cloud.
|
|
5
|
-
Keywords: persistence,dicts,distributed,parallel
|
|
6
|
-
Author: Vlad (Volodymyr) Pavlov
|
|
7
|
-
Author-email: Vlad (Volodymyr) Pavlov <vlpavlov@ieee.org>
|
|
8
|
-
License: MIT
|
|
9
|
-
Classifier: Development Status :: 3 - Alpha
|
|
10
|
-
Classifier: Intended Audience :: Developers
|
|
11
|
-
Classifier: Intended Audience :: Science/Research
|
|
12
|
-
Classifier: Programming Language :: Python
|
|
13
|
-
Classifier: Programming Language :: Python :: 3
|
|
14
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
-
Classifier: Operating System :: OS Independent
|
|
16
|
-
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
-
Requires-Dist: parameterizable
|
|
19
|
-
Requires-Dist: lz4
|
|
20
|
-
Requires-Dist: joblib
|
|
21
|
-
Requires-Dist: numpy
|
|
22
|
-
Requires-Dist: pandas
|
|
23
|
-
Requires-Dist: jsonpickle
|
|
24
|
-
Requires-Dist: deepdiff
|
|
25
|
-
Requires-Dist: boto3 ; extra == 'aws'
|
|
26
|
-
Requires-Dist: boto3 ; extra == 'dev'
|
|
27
|
-
Requires-Dist: moto ; extra == 'dev'
|
|
28
|
-
Requires-Dist: pytest ; extra == 'dev'
|
|
29
|
-
Requires-Python: >=3.10
|
|
30
|
-
Project-URL: Homepage, https://github.com/pythagoras-dev/persidict
|
|
31
|
-
Provides-Extra: aws
|
|
32
|
-
Provides-Extra: dev
|
|
33
|
-
Description-Content-Type: text/markdown
|
|
34
|
-
|
|
35
|
-
# persidict
|
|
36
|
-
|
|
37
|
-
Simple persistent dictionaries for Python.
|
|
38
|
-
|
|
39
|
-
## What Is It?
|
|
40
|
-
|
|
41
|
-
`persidict` offers a very simple persistent key-value store for Python.
|
|
42
|
-
It saves the content of the dictionary in a folder on a disk
|
|
43
|
-
or in an S3 bucket on AWS. Each value is stored as a separate file / S3 object.
|
|
44
|
-
Only text strings or sequences of strings are allowed as keys.
|
|
45
|
-
|
|
46
|
-
Unlike other persistent dictionaries (e.g. Python's native `shelve`),
|
|
47
|
-
`persidict` is designed for use in highly **distributed environments**,
|
|
48
|
-
where multiple instances of a program run concurrently across many machines.
|
|
49
|
-
|
|
50
|
-
## Usage
|
|
51
|
-
Class `FileDirDict` is a persistent dictionary that stores its content
|
|
52
|
-
in a folder on a disk.
|
|
53
|
-
|
|
54
|
-
from persidict import FileDirDict
|
|
55
|
-
my_dictionary = FileDirDict(base_dir="my_folder")
|
|
56
|
-
|
|
57
|
-
Once created, it can be used as a regular Python dictionary
|
|
58
|
-
that stores key-value pairs. A key must be a sequence of strings,
|
|
59
|
-
a value can be any (pickleable) Python object:
|
|
60
|
-
|
|
61
|
-
my_dictionary["Eliza"] = "MIT Eliza was a mock psychotherapist."
|
|
62
|
-
my_dictionary["Eliza","year"] = 1965
|
|
63
|
-
my_dictionary["Eliza","authors"] = ["Joseph Weizenbaum"]
|
|
64
|
-
|
|
65
|
-
my_dictionary["Shoebox"] = "IBM Shoebox performed arithmetic operations"
|
|
66
|
-
my_dictionary["Shoebox"] += " on voice commands."
|
|
67
|
-
my_dictionary["Shoebox", "year"] = 1961
|
|
68
|
-
my_dictionary["Shoebox", "authors"] = ["W.C. Dersch", "E.A. Quade"]
|
|
69
|
-
|
|
70
|
-
for k in my_dictionary:
|
|
71
|
-
print(list(k), "==>", my_dictionary[k])
|
|
72
|
-
|
|
73
|
-
if not "Eliza" in my_dictionary:
|
|
74
|
-
print("Something is wrong")
|
|
75
|
-
|
|
76
|
-
If you run the code above, it will produce the following output:
|
|
77
|
-
|
|
78
|
-
>>> ['Eliza'] ==> MIT Eliza was a mock psychotherapist.
|
|
79
|
-
>>> ['Shoebox'] ==> IBM Shoebox performed arithmetic operations on voice commands.
|
|
80
|
-
>>> ['Shoebox', 'authors'] ==> ['W.C. Dersch', 'E.A. Quade']
|
|
81
|
-
>>> ['Shoebox', 'year'] ==> 1961
|
|
82
|
-
>>> ['Eliza', 'authors'] ==> ['Joseph Weizenbaum']
|
|
83
|
-
>>> ['Eliza', 'year'] ==> 1965
|
|
84
|
-
|
|
85
|
-
The dictionary automatically creates a folder named "my_folder"
|
|
86
|
-
on the local disk. Each key-value pair is stored as
|
|
87
|
-
a separate file within this folder.
|
|
88
|
-
|
|
89
|
-
If the key is a string, it becomes the filename for the object.
|
|
90
|
-
If the key is a sequence of strings, all strings except the last
|
|
91
|
-
are used to create nested subfolders within the main folder.
|
|
92
|
-
The final string in the sequence serves as the filename for the object,
|
|
93
|
-
which is stored in the deepest subfolder.
|
|
94
|
-
|
|
95
|
-
Persistent dictionaries only accept sequences of strings as keys.
|
|
96
|
-
Any pickleable Python object can be used as a value.
|
|
97
|
-
Unlike regular Python dictionaries, insertion order is not preserved.
|
|
98
|
-
|
|
99
|
-
del my_dictionary
|
|
100
|
-
new_dict = FileDirDict(base_dir="my_folder")
|
|
101
|
-
print("len(new_dict) == ",len(new_dict))
|
|
102
|
-
|
|
103
|
-
The code above will create a new object named new_dict and then will
|
|
104
|
-
print its length:
|
|
105
|
-
|
|
106
|
-
>>> len(new_dict) == 6
|
|
107
|
-
|
|
108
|
-
The length is 6, because the dictionary was already stored on a disk
|
|
109
|
-
in the "my_folder" directory, which contained 6 pickle files.
|
|
110
|
-
|
|
111
|
-
Technically, `FileDirDict` saves its content in a folder on a local disk.
|
|
112
|
-
But you can share this folder with other machines
|
|
113
|
-
(for example, using Dropbox or NFS), and work with the same dictionary
|
|
114
|
-
simultaneously from multiple computers (from multiple instances of your program).
|
|
115
|
-
This approach would allow you to use a persistent dictionary in
|
|
116
|
-
a system that is distributed over dozens of computers.
|
|
117
|
-
|
|
118
|
-
If you need to run your program on hundreds (or more) computers,
|
|
119
|
-
class `S3Dict` is a better choice: it's a persistent dictionary that
|
|
120
|
-
stores its content in an AWS S3 bucket.
|
|
121
|
-
|
|
122
|
-
from persidict import S3Dict
|
|
123
|
-
my_cloud_dictionary = S3Dict(bucket_name="my_bucket")
|
|
124
|
-
|
|
125
|
-
Once created, it can be used as a regular Python dictionary.
|
|
126
|
-
|
|
127
|
-
## Key Classes
|
|
128
|
-
|
|
129
|
-
* `SafeStrTuple` - an immutable sequence of URL/filename-safe non-empty strings.
|
|
130
|
-
* `PersiDict` - an abstract base class for persistent dictionaries.
|
|
131
|
-
* `FileDirDict` - a persistent dictionary that stores its content
|
|
132
|
-
in a folder on a disk.
|
|
133
|
-
* `S3Dict` - a persistent dictionary that stores its content
|
|
134
|
-
in an AWS S3 bucket.
|
|
135
|
-
|
|
136
|
-
## Key Similarities With Python Built-in Dictionaries
|
|
137
|
-
|
|
138
|
-
`PersiDict` and its subclasses can be used as regular Python dictionaries.
|
|
139
|
-
|
|
140
|
-
* You can use square brackets to get, set, or delete values.
|
|
141
|
-
* You can iterate over keys, values, or items.
|
|
142
|
-
* You can check if a key is in the dictionary.
|
|
143
|
-
* You can check whether two dicts are equal
|
|
144
|
-
(meaning they contain the same key-value pairs).
|
|
145
|
-
* You can get the length of the dictionary.
|
|
146
|
-
* Methods `keys()`, `values()`, `items()`, `get()`, `clear()`
|
|
147
|
-
, `setdefault()`, `update()` etc. work as expected.
|
|
148
|
-
|
|
149
|
-
## Key Differences From Python Built-in Dictionaries
|
|
150
|
-
|
|
151
|
-
`PersiDict` and its subclasses persist values between program executions,
|
|
152
|
-
as well as make it possible to concurrently run programs
|
|
153
|
-
that simultaneously work with the same instance of a dictionary.
|
|
154
|
-
|
|
155
|
-
* Keys must be sequences of URL/filename-safe non-empty strings.
|
|
156
|
-
* Values must be pickleable Python objects.
|
|
157
|
-
* You can constrain values to be an instance of a specific class.
|
|
158
|
-
* Insertion order is not preserved.
|
|
159
|
-
* You cannot assign initial key-value pairs to a dictionary in its constructor.
|
|
160
|
-
* `PersiDict` API has additional methods `delete_if_exists()`, `timestamp()`,
|
|
161
|
-
`get_subdict()`, `subdicts()`, `random_key()`, `newest_keys()`,
|
|
162
|
-
`oldest_keys()`, `newest_values()`, `oldest_values()`, and
|
|
163
|
-
`get_params()`, which are not available in native Python dicts.
|
|
164
|
-
* You can use KEEP_CURRENT constant as a fake new value
|
|
165
|
-
to avoid actually setting/updating a value. Or DELETE_CURRENT as
|
|
166
|
-
a fake new value to delete the previous value from a dictionary.
|
|
167
|
-
|
|
168
|
-
## Fine Tuning
|
|
169
|
-
|
|
170
|
-
`PersiDict` subclasses have a number of parameters that can be used
|
|
171
|
-
to impact behaviour of a dictionary.
|
|
172
|
-
|
|
173
|
-
* `base_class_for_values` - A base class for values stored in a dictionary.
|
|
174
|
-
If specified, it will be used to check types of values in the dictionary.
|
|
175
|
-
If not specified (if set to `None`), no type checking will be performed
|
|
176
|
-
and all types will be allowed.
|
|
177
|
-
* `file_type` - a string that specifies the type of files used to store objects.
|
|
178
|
-
If `file_type` has one of two values: "pkl" or "json", it defines
|
|
179
|
-
which file format will be used by the dictionary to store values.
|
|
180
|
-
For all other values of `file_type`, the file format will always be plain
|
|
181
|
-
text. "pkl" or "json" allow to store arbitrary Python objects,
|
|
182
|
-
while all other file_type-s only work with str objects;
|
|
183
|
-
it means `base_class_for_values` must be explicitly set to `str`
|
|
184
|
-
if `file_type` is not set to "pkl" or "json".
|
|
185
|
-
* `immutable_items` - a boolean that specifies whether items in a dictionary
|
|
186
|
-
can be modified/deleted. It enables various distributed cache optimizations
|
|
187
|
-
for remote storage. True means an append-only dictionary.
|
|
188
|
-
False means normal dict-like behaviour. The default value is False.
|
|
189
|
-
* `digest_len` - a length of a hash signature suffix which `PersiDict`
|
|
190
|
-
automatically adds to each string in a key while mapping the key to
|
|
191
|
-
the address of a value in a persistent storage backend
|
|
192
|
-
(e.g. a filename or an S3 objectname). It is needed to ensure correct work
|
|
193
|
-
of persistent dictionaries with case-insensitive (even if case-preserving)
|
|
194
|
-
filesystems, such as MacOS HFS. The default value is 8.
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
## How To Get It?
|
|
198
|
-
|
|
199
|
-
The source code is hosted on GitHub at:
|
|
200
|
-
[https://github.com/pythagoras-dev/persidict](https://github.com/pythagoras-dev/persidict)
|
|
201
|
-
|
|
202
|
-
Binary installers for the latest released version are available at the Python package index at:
|
|
203
|
-
[https://pypi.org/project/persidict](https://pypi.org/project/persidict)
|
|
204
|
-
|
|
205
|
-
Using uv :
|
|
206
|
-
```
|
|
207
|
-
uv add persidict
|
|
208
|
-
```
|
|
209
|
-
|
|
210
|
-
Using pip (legacy alternative to uv):
|
|
211
|
-
```
|
|
212
|
-
pip install persidict
|
|
213
|
-
```
|
|
214
|
-
|
|
215
|
-
## Dependencies
|
|
216
|
-
|
|
217
|
-
* [jsonpickle](https://jsonpickle.github.io)
|
|
218
|
-
* [joblib](https://joblib.readthedocs.io)
|
|
219
|
-
* [lz4](https://python-lz4.readthedocs.io)
|
|
220
|
-
* [pandas](https://pandas.pydata.org)
|
|
221
|
-
* [numpy](https://numpy.org)
|
|
222
|
-
* [boto3](https://boto3.readthedocs.io)
|
|
223
|
-
* [pytest](https://pytest.org)
|
|
224
|
-
* [moto](http://getmoto.org)
|
|
225
|
-
|
|
226
|
-
## Key Contacts
|
|
227
|
-
|
|
228
|
-
* [Vlad (Volodymyr) Pavlov](https://www.linkedin.com/in/vlpavlov/)
|
|
File without changes
|