dtlpy 1.115.44__py3-none-any.whl → 1.117.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtlpy/__init__.py +491 -491
- dtlpy/__version__.py +1 -1
- dtlpy/assets/__init__.py +26 -26
- dtlpy/assets/code_server/config.yaml +2 -2
- dtlpy/assets/code_server/installation.sh +24 -24
- dtlpy/assets/code_server/launch.json +13 -13
- dtlpy/assets/code_server/settings.json +2 -2
- dtlpy/assets/main.py +53 -53
- dtlpy/assets/main_partial.py +18 -18
- dtlpy/assets/mock.json +11 -11
- dtlpy/assets/model_adapter.py +83 -83
- dtlpy/assets/package.json +61 -61
- dtlpy/assets/package_catalog.json +29 -29
- dtlpy/assets/package_gitignore +307 -307
- dtlpy/assets/service_runners/__init__.py +33 -33
- dtlpy/assets/service_runners/converter.py +96 -96
- dtlpy/assets/service_runners/multi_method.py +49 -49
- dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
- dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
- dtlpy/assets/service_runners/multi_method_item.py +52 -52
- dtlpy/assets/service_runners/multi_method_json.py +52 -52
- dtlpy/assets/service_runners/single_method.py +37 -37
- dtlpy/assets/service_runners/single_method_annotation.py +43 -43
- dtlpy/assets/service_runners/single_method_dataset.py +43 -43
- dtlpy/assets/service_runners/single_method_item.py +41 -41
- dtlpy/assets/service_runners/single_method_json.py +42 -42
- dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
- dtlpy/assets/voc_annotation_template.xml +23 -23
- dtlpy/caches/base_cache.py +32 -32
- dtlpy/caches/cache.py +473 -473
- dtlpy/caches/dl_cache.py +201 -201
- dtlpy/caches/filesystem_cache.py +89 -89
- dtlpy/caches/redis_cache.py +84 -84
- dtlpy/dlp/__init__.py +20 -20
- dtlpy/dlp/cli_utilities.py +367 -367
- dtlpy/dlp/command_executor.py +764 -764
- dtlpy/dlp/dlp +1 -1
- dtlpy/dlp/dlp.bat +1 -1
- dtlpy/dlp/dlp.py +128 -128
- dtlpy/dlp/parser.py +651 -651
- dtlpy/entities/__init__.py +83 -83
- dtlpy/entities/analytic.py +347 -347
- dtlpy/entities/annotation.py +1879 -1879
- dtlpy/entities/annotation_collection.py +699 -699
- dtlpy/entities/annotation_definitions/__init__.py +20 -20
- dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
- dtlpy/entities/annotation_definitions/box.py +195 -195
- dtlpy/entities/annotation_definitions/classification.py +67 -67
- dtlpy/entities/annotation_definitions/comparison.py +72 -72
- dtlpy/entities/annotation_definitions/cube.py +204 -204
- dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
- dtlpy/entities/annotation_definitions/description.py +32 -32
- dtlpy/entities/annotation_definitions/ellipse.py +124 -124
- dtlpy/entities/annotation_definitions/free_text.py +62 -62
- dtlpy/entities/annotation_definitions/gis.py +69 -69
- dtlpy/entities/annotation_definitions/note.py +139 -139
- dtlpy/entities/annotation_definitions/point.py +117 -117
- dtlpy/entities/annotation_definitions/polygon.py +182 -182
- dtlpy/entities/annotation_definitions/polyline.py +111 -111
- dtlpy/entities/annotation_definitions/pose.py +92 -92
- dtlpy/entities/annotation_definitions/ref_image.py +86 -86
- dtlpy/entities/annotation_definitions/segmentation.py +240 -240
- dtlpy/entities/annotation_definitions/subtitle.py +34 -34
- dtlpy/entities/annotation_definitions/text.py +85 -85
- dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
- dtlpy/entities/app.py +220 -220
- dtlpy/entities/app_module.py +107 -107
- dtlpy/entities/artifact.py +174 -174
- dtlpy/entities/assignment.py +399 -399
- dtlpy/entities/base_entity.py +214 -214
- dtlpy/entities/bot.py +113 -113
- dtlpy/entities/codebase.py +292 -292
- dtlpy/entities/collection.py +38 -38
- dtlpy/entities/command.py +169 -169
- dtlpy/entities/compute.py +449 -449
- dtlpy/entities/dataset.py +1299 -1299
- dtlpy/entities/directory_tree.py +44 -44
- dtlpy/entities/dpk.py +470 -470
- dtlpy/entities/driver.py +235 -235
- dtlpy/entities/execution.py +397 -397
- dtlpy/entities/feature.py +124 -124
- dtlpy/entities/feature_set.py +152 -145
- dtlpy/entities/filters.py +798 -798
- dtlpy/entities/gis_item.py +107 -107
- dtlpy/entities/integration.py +184 -184
- dtlpy/entities/item.py +975 -959
- dtlpy/entities/label.py +123 -123
- dtlpy/entities/links.py +85 -85
- dtlpy/entities/message.py +175 -175
- dtlpy/entities/model.py +684 -684
- dtlpy/entities/node.py +1005 -1005
- dtlpy/entities/ontology.py +810 -803
- dtlpy/entities/organization.py +287 -287
- dtlpy/entities/package.py +657 -657
- dtlpy/entities/package_defaults.py +5 -5
- dtlpy/entities/package_function.py +185 -185
- dtlpy/entities/package_module.py +113 -113
- dtlpy/entities/package_slot.py +118 -118
- dtlpy/entities/paged_entities.py +299 -299
- dtlpy/entities/pipeline.py +624 -624
- dtlpy/entities/pipeline_execution.py +279 -279
- dtlpy/entities/project.py +394 -394
- dtlpy/entities/prompt_item.py +505 -505
- dtlpy/entities/recipe.py +301 -301
- dtlpy/entities/reflect_dict.py +102 -102
- dtlpy/entities/resource_execution.py +138 -138
- dtlpy/entities/service.py +974 -963
- dtlpy/entities/service_driver.py +117 -117
- dtlpy/entities/setting.py +294 -294
- dtlpy/entities/task.py +495 -495
- dtlpy/entities/time_series.py +143 -143
- dtlpy/entities/trigger.py +426 -426
- dtlpy/entities/user.py +118 -118
- dtlpy/entities/webhook.py +124 -124
- dtlpy/examples/__init__.py +19 -19
- dtlpy/examples/add_labels.py +135 -135
- dtlpy/examples/add_metadata_to_item.py +21 -21
- dtlpy/examples/annotate_items_using_model.py +65 -65
- dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
- dtlpy/examples/annotations_convert_to_voc.py +9 -9
- dtlpy/examples/annotations_convert_to_yolo.py +9 -9
- dtlpy/examples/convert_annotation_types.py +51 -51
- dtlpy/examples/converter.py +143 -143
- dtlpy/examples/copy_annotations.py +22 -22
- dtlpy/examples/copy_folder.py +31 -31
- dtlpy/examples/create_annotations.py +51 -51
- dtlpy/examples/create_video_annotations.py +83 -83
- dtlpy/examples/delete_annotations.py +26 -26
- dtlpy/examples/filters.py +113 -113
- dtlpy/examples/move_item.py +23 -23
- dtlpy/examples/play_video_annotation.py +13 -13
- dtlpy/examples/show_item_and_mask.py +53 -53
- dtlpy/examples/triggers.py +49 -49
- dtlpy/examples/upload_batch_of_items.py +20 -20
- dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
- dtlpy/examples/upload_items_with_modalities.py +43 -43
- dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
- dtlpy/examples/upload_yolo_format_annotations.py +70 -70
- dtlpy/exceptions.py +125 -125
- dtlpy/miscellaneous/__init__.py +20 -20
- dtlpy/miscellaneous/dict_differ.py +95 -95
- dtlpy/miscellaneous/git_utils.py +217 -217
- dtlpy/miscellaneous/json_utils.py +14 -14
- dtlpy/miscellaneous/list_print.py +105 -105
- dtlpy/miscellaneous/zipping.py +130 -130
- dtlpy/ml/__init__.py +20 -20
- dtlpy/ml/base_feature_extractor_adapter.py +27 -27
- dtlpy/ml/base_model_adapter.py +1287 -1230
- dtlpy/ml/metrics.py +461 -461
- dtlpy/ml/predictions_utils.py +274 -274
- dtlpy/ml/summary_writer.py +57 -57
- dtlpy/ml/train_utils.py +60 -60
- dtlpy/new_instance.py +252 -252
- dtlpy/repositories/__init__.py +56 -56
- dtlpy/repositories/analytics.py +85 -85
- dtlpy/repositories/annotations.py +916 -916
- dtlpy/repositories/apps.py +383 -383
- dtlpy/repositories/artifacts.py +452 -452
- dtlpy/repositories/assignments.py +599 -599
- dtlpy/repositories/bots.py +213 -213
- dtlpy/repositories/codebases.py +559 -559
- dtlpy/repositories/collections.py +332 -332
- dtlpy/repositories/commands.py +152 -152
- dtlpy/repositories/compositions.py +61 -61
- dtlpy/repositories/computes.py +439 -439
- dtlpy/repositories/datasets.py +1585 -1504
- dtlpy/repositories/downloader.py +1157 -923
- dtlpy/repositories/dpks.py +433 -433
- dtlpy/repositories/drivers.py +482 -482
- dtlpy/repositories/executions.py +815 -815
- dtlpy/repositories/feature_sets.py +256 -226
- dtlpy/repositories/features.py +255 -255
- dtlpy/repositories/integrations.py +484 -484
- dtlpy/repositories/items.py +912 -912
- dtlpy/repositories/messages.py +94 -94
- dtlpy/repositories/models.py +1000 -1000
- dtlpy/repositories/nodes.py +80 -80
- dtlpy/repositories/ontologies.py +511 -511
- dtlpy/repositories/organizations.py +525 -525
- dtlpy/repositories/packages.py +1941 -1941
- dtlpy/repositories/pipeline_executions.py +451 -451
- dtlpy/repositories/pipelines.py +640 -640
- dtlpy/repositories/projects.py +539 -539
- dtlpy/repositories/recipes.py +429 -399
- dtlpy/repositories/resource_executions.py +137 -137
- dtlpy/repositories/schema.py +120 -120
- dtlpy/repositories/service_drivers.py +213 -213
- dtlpy/repositories/services.py +1704 -1704
- dtlpy/repositories/settings.py +339 -339
- dtlpy/repositories/tasks.py +1477 -1477
- dtlpy/repositories/times_series.py +278 -278
- dtlpy/repositories/triggers.py +536 -536
- dtlpy/repositories/upload_element.py +257 -257
- dtlpy/repositories/uploader.py +661 -661
- dtlpy/repositories/webhooks.py +249 -249
- dtlpy/services/__init__.py +22 -22
- dtlpy/services/aihttp_retry.py +131 -131
- dtlpy/services/api_client.py +1786 -1785
- dtlpy/services/api_reference.py +40 -40
- dtlpy/services/async_utils.py +133 -133
- dtlpy/services/calls_counter.py +44 -44
- dtlpy/services/check_sdk.py +68 -68
- dtlpy/services/cookie.py +115 -115
- dtlpy/services/create_logger.py +156 -156
- dtlpy/services/events.py +84 -84
- dtlpy/services/logins.py +235 -235
- dtlpy/services/reporter.py +256 -256
- dtlpy/services/service_defaults.py +91 -91
- dtlpy/utilities/__init__.py +20 -20
- dtlpy/utilities/annotations/__init__.py +16 -16
- dtlpy/utilities/annotations/annotation_converters.py +269 -269
- dtlpy/utilities/base_package_runner.py +285 -264
- dtlpy/utilities/converter.py +1650 -1650
- dtlpy/utilities/dataset_generators/__init__.py +1 -1
- dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
- dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
- dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
- dtlpy/utilities/local_development/__init__.py +1 -1
- dtlpy/utilities/local_development/local_session.py +179 -179
- dtlpy/utilities/reports/__init__.py +2 -2
- dtlpy/utilities/reports/figures.py +343 -343
- dtlpy/utilities/reports/report.py +71 -71
- dtlpy/utilities/videos/__init__.py +17 -17
- dtlpy/utilities/videos/video_player.py +598 -598
- dtlpy/utilities/videos/videos.py +470 -470
- {dtlpy-1.115.44.data → dtlpy-1.117.6.data}/scripts/dlp +1 -1
- dtlpy-1.117.6.data/scripts/dlp.bat +2 -0
- {dtlpy-1.115.44.data → dtlpy-1.117.6.data}/scripts/dlp.py +128 -128
- {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/METADATA +186 -186
- dtlpy-1.117.6.dist-info/RECORD +239 -0
- {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/WHEEL +1 -1
- {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/licenses/LICENSE +200 -200
- tests/features/environment.py +551 -551
- dtlpy/assets/__pycache__/__init__.cpython-310.pyc +0 -0
- dtlpy-1.115.44.data/scripts/dlp.bat +0 -2
- dtlpy-1.115.44.dist-info/RECORD +0 -240
- {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/entry_points.txt +0 -0
- {dtlpy-1.115.44.dist-info → dtlpy-1.117.6.dist-info}/top_level.txt +0 -0
dtlpy/caches/cache.py
CHANGED
|
@@ -1,473 +1,473 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import os
|
|
3
|
-
import shutil
|
|
4
|
-
import time
|
|
5
|
-
from enum import Enum
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
import mmap
|
|
8
|
-
from filelock import FileLock
|
|
9
|
-
import logging
|
|
10
|
-
import base64
|
|
11
|
-
|
|
12
|
-
from .dl_cache import DiskCache
|
|
13
|
-
from .redis_cache import RedisCache
|
|
14
|
-
from .filesystem_cache import FileSystemCache
|
|
15
|
-
|
|
16
|
-
logger = logging.getLogger(name='dtlpy')
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class ObjectType(str, Enum):
|
|
20
|
-
BINARY = "binary"
|
|
21
|
-
OBJECT = "object"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class CacheType(Enum):
|
|
25
|
-
DISKCACHE = 'diskcache'
|
|
26
|
-
REDIS = 'redis'
|
|
27
|
-
FILESYSTEM = 'filesystem'
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class CacheConfig:
|
|
31
|
-
def __init__(self, cache_type=CacheType.DISKCACHE, ttl=1000, level=1, options=None):
|
|
32
|
-
"""
|
|
33
|
-
Cache config settings
|
|
34
|
-
|
|
35
|
-
:param CacheType cache_type: CacheType diskcache, filesystem, redis
|
|
36
|
-
:param int ttl: time to hold the item in the cache in seconds (SEC)
|
|
37
|
-
:param int level: cache level
|
|
38
|
-
:param dict options: the configs for the caches types
|
|
39
|
-
"""
|
|
40
|
-
if isinstance(cache_type, CacheType):
|
|
41
|
-
cache_type = cache_type.value
|
|
42
|
-
if isinstance(cache_type, str) and cache_type not in CacheType._value2member_map_:
|
|
43
|
-
raise ValueError('cache type must be redis or diskcache')
|
|
44
|
-
|
|
45
|
-
self.type = cache_type
|
|
46
|
-
self.ttl = ttl
|
|
47
|
-
self.level = level
|
|
48
|
-
self.options = options
|
|
49
|
-
|
|
50
|
-
def to_string(self):
|
|
51
|
-
"""
|
|
52
|
-
convert object to base 64 string
|
|
53
|
-
"""
|
|
54
|
-
base64_bytes = base64.b64encode(json.dumps(self.to_json()).encode("ascii"))
|
|
55
|
-
base64_string = base64_bytes.decode("ascii")
|
|
56
|
-
return base64_string
|
|
57
|
-
|
|
58
|
-
@staticmethod
|
|
59
|
-
def from_string(cls, base64_string):
|
|
60
|
-
"""
|
|
61
|
-
convert from base 64 string to the class object
|
|
62
|
-
|
|
63
|
-
:param str base64_string: string in base64 the have a json configs
|
|
64
|
-
"""
|
|
65
|
-
base64_bytes = base64_string.encode("ascii")
|
|
66
|
-
sample_string_bytes = base64.b64decode(base64_bytes)
|
|
67
|
-
_json = json.loads(sample_string_bytes.decode("ascii"))
|
|
68
|
-
return cls(cache_type=_json.get('type', CacheType.DISKCACHE),
|
|
69
|
-
ttl=_json.get('ttl', 1000),
|
|
70
|
-
level=_json.get('level', 1),
|
|
71
|
-
options=_json.get('options', None))
|
|
72
|
-
|
|
73
|
-
def to_json(self):
|
|
74
|
-
"""
|
|
75
|
-
convert the class to json
|
|
76
|
-
"""
|
|
77
|
-
return {
|
|
78
|
-
'type': self.type,
|
|
79
|
-
'ttl': self.ttl,
|
|
80
|
-
'level': self.level,
|
|
81
|
-
'options': self.options,
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
@staticmethod
|
|
85
|
-
def from_json(cls, _json):
|
|
86
|
-
"""
|
|
87
|
-
make a class attribute from json
|
|
88
|
-
|
|
89
|
-
:param _json: _json have the class attributes
|
|
90
|
-
"""
|
|
91
|
-
if isinstance(_json, str):
|
|
92
|
-
_json = json.loads(_json)
|
|
93
|
-
return cls(cache_type=_json.get('type', CacheType.DISKCACHE),
|
|
94
|
-
ttl=_json.get('ttl', 1000),
|
|
95
|
-
level=_json.get('level', 1),
|
|
96
|
-
options=_json.get('options', None))
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
class CacheKey:
|
|
100
|
-
def __init__(self,
|
|
101
|
-
master_type='**',
|
|
102
|
-
master_id='**',
|
|
103
|
-
entity_type='**',
|
|
104
|
-
entity_id='*',
|
|
105
|
-
object_type=ObjectType.OBJECT):
|
|
106
|
-
"""
|
|
107
|
-
:param str master_type: master type
|
|
108
|
-
:param str master_id: master id
|
|
109
|
-
:param str entity_type: entity type
|
|
110
|
-
:param str entity_id: entity id
|
|
111
|
-
:param str object_type: object type object/binary
|
|
112
|
-
"""
|
|
113
|
-
self.master_type = master_type
|
|
114
|
-
self.master_id = master_id
|
|
115
|
-
self.entity_type = entity_type
|
|
116
|
-
self.entity_id = entity_id
|
|
117
|
-
self.object_type = object_type
|
|
118
|
-
|
|
119
|
-
def get(self):
|
|
120
|
-
"""
|
|
121
|
-
return the build key
|
|
122
|
-
"""
|
|
123
|
-
return os.path.join(self.master_type, self.master_id, self.entity_type, self.entity_id, self.object_type)
|
|
124
|
-
|
|
125
|
-
def get_key(self):
|
|
126
|
-
"""
|
|
127
|
-
return the build key
|
|
128
|
-
"""
|
|
129
|
-
return os.path.join(self.entity_type, self.entity_id, self.object_type)
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
class CacheManger:
|
|
133
|
-
def __init__(self, cache_configs: list, bin_cache_size=1000):
|
|
134
|
-
"""
|
|
135
|
-
Cache manger for config and mange the cache
|
|
136
|
-
|
|
137
|
-
:param cache_configs: CacheConfig object
|
|
138
|
-
:param bin_cache_size: size on MB for binary cache
|
|
139
|
-
"""
|
|
140
|
-
self.cache_levels = dict()
|
|
141
|
-
self._max_level = 1
|
|
142
|
-
self.bin_cache_size = bin_cache_size
|
|
143
|
-
self.bin_cache_path = os.environ['DEFAULT_CACHE_PATH']
|
|
144
|
-
self._current_bin_cache_size = 0
|
|
145
|
-
for config in cache_configs:
|
|
146
|
-
try:
|
|
147
|
-
self.cache_levels[config.level] = self._load_cache_handler(config)
|
|
148
|
-
if config.level < self._max_level:
|
|
149
|
-
self._max_level = config.level
|
|
150
|
-
except:
|
|
151
|
-
raise "Failed to build Cache"
|
|
152
|
-
|
|
153
|
-
self.parent_dict = {
|
|
154
|
-
"annotations": 'items',
|
|
155
|
-
"items": 'datasets',
|
|
156
|
-
"datasets": 'projects',
|
|
157
|
-
"projects": 'org',
|
|
158
|
-
"org": '',
|
|
159
|
-
"annotationtasks": 'datasets',
|
|
160
|
-
"assignments": 'annotationtasks',
|
|
161
|
-
"models": 'packages',
|
|
162
|
-
"packages": 'projects',
|
|
163
|
-
"services": 'packages',
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
def _load_cache_handler(self, config: CacheConfig):
|
|
167
|
-
"""
|
|
168
|
-
the function the build the cache form the configs that get
|
|
169
|
-
"""
|
|
170
|
-
from ..services import DataloopLogger
|
|
171
|
-
cache = None
|
|
172
|
-
if config.type == CacheType.REDIS.value:
|
|
173
|
-
try:
|
|
174
|
-
cache = RedisCache(options=config.options, ttl=config.ttl)
|
|
175
|
-
except:
|
|
176
|
-
logger.warning("Failed to build Redis")
|
|
177
|
-
raise Exception("Failed to build Redis")
|
|
178
|
-
|
|
179
|
-
elif config.type == CacheType.DISKCACHE.value:
|
|
180
|
-
cache = DiskCache(name='object_cache', options=config.options, ttl=config.ttl)
|
|
181
|
-
elif config.type == CacheType.FILESYSTEM.value:
|
|
182
|
-
cache = FileSystemCache(options=config.options, ttl=config.ttl)
|
|
183
|
-
DataloopLogger.clean_dataloop_cache(cache_path=cache.root_dir,
|
|
184
|
-
max_param={'max_time': cache.ttl})
|
|
185
|
-
DataloopLogger.clean_dataloop_cache(cache_path=self.bin_cache_path,
|
|
186
|
-
max_param={'max_time': config.ttl})
|
|
187
|
-
return cache
|
|
188
|
-
|
|
189
|
-
def get(self, key: CacheKey):
|
|
190
|
-
"""
|
|
191
|
-
Cache get
|
|
192
|
-
|
|
193
|
-
:param CacheKey key: CacheKey object
|
|
194
|
-
:return: success, list of the get result
|
|
195
|
-
"""
|
|
196
|
-
res = []
|
|
197
|
-
success = False
|
|
198
|
-
for i in range(1, self._max_level + 1):
|
|
199
|
-
res = self.cache_levels[i].get(key=key.get_key())
|
|
200
|
-
if res:
|
|
201
|
-
success = True
|
|
202
|
-
break
|
|
203
|
-
return success, res
|
|
204
|
-
|
|
205
|
-
def ping(self):
|
|
206
|
-
"""
|
|
207
|
-
Cache ping check if connection is working
|
|
208
|
-
"""
|
|
209
|
-
try:
|
|
210
|
-
for i in range(1, self._max_level + 1):
|
|
211
|
-
self.cache_levels[i].ping()
|
|
212
|
-
except Exception as e:
|
|
213
|
-
raise Exception('cache connection failed ')
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
def set(self, key: str, value):
|
|
217
|
-
"""
|
|
218
|
-
Cache set, add or update the key value
|
|
219
|
-
|
|
220
|
-
:param CacheKey key: CacheKey object
|
|
221
|
-
:param value: value to set
|
|
222
|
-
"""
|
|
223
|
-
if isinstance(value, dict):
|
|
224
|
-
value = json.dumps(value)
|
|
225
|
-
self.cache_levels[1].set(key, value)
|
|
226
|
-
|
|
227
|
-
def _delete_parent(self, key: CacheKey, level):
|
|
228
|
-
parent_key = CacheKey(master_type=self.parent_dict[key.entity_type],
|
|
229
|
-
entity_type=key.entity_type,
|
|
230
|
-
entity_id=key.entity_id,
|
|
231
|
-
object_type='*')
|
|
232
|
-
list_keys = self.cache_levels[level].list(pattern=parent_key.get())
|
|
233
|
-
for k in list_keys:
|
|
234
|
-
if 'binary' in k:
|
|
235
|
-
val = self.cache_levels[level].get(key=k)
|
|
236
|
-
if os.path.isfile(val):
|
|
237
|
-
os.remove(val)
|
|
238
|
-
self.cache_levels[level].delete(k)
|
|
239
|
-
|
|
240
|
-
def delete(self, key: CacheKey):
|
|
241
|
-
"""
|
|
242
|
-
Cache delete
|
|
243
|
-
|
|
244
|
-
:param CacheKey key: CacheKey object
|
|
245
|
-
"""
|
|
246
|
-
for i in range(1, self._max_level + 1):
|
|
247
|
-
self.cache_levels[i].delete(key.get_key())
|
|
248
|
-
self._delete_parent(key=key, level=i)
|
|
249
|
-
key.object_type = '*'
|
|
250
|
-
list_keys = self.cache_levels[i].list(pattern=key.get_key())
|
|
251
|
-
for k in list_keys:
|
|
252
|
-
val = self.cache_levels[i].get(key=k)
|
|
253
|
-
self.cache_levels[i].delete(k)
|
|
254
|
-
if 'binary' in k:
|
|
255
|
-
if os.path.isfile(val):
|
|
256
|
-
os.remove(val)
|
|
257
|
-
continue
|
|
258
|
-
e_type, e_id, e_obj = val.split('\\')
|
|
259
|
-
self.delete(key=CacheKey(entity_type=e_type, entity_id=e_id, object_type=e_obj))
|
|
260
|
-
|
|
261
|
-
def build_cache_key(self, entity_json: dict):
|
|
262
|
-
"""
|
|
263
|
-
Build a format of the cache key from the entity json we get
|
|
264
|
-
|
|
265
|
-
:param dict entity_json: json of an entity
|
|
266
|
-
:return: CacheKey object
|
|
267
|
-
"""
|
|
268
|
-
child_entity = False
|
|
269
|
-
if 'url' in entity_json:
|
|
270
|
-
split_url = entity_json['url'].split('/')
|
|
271
|
-
entity_type = split_url[-2]
|
|
272
|
-
child_entity = True
|
|
273
|
-
elif 'org' in entity_json:
|
|
274
|
-
entity_type = 'projects'
|
|
275
|
-
else:
|
|
276
|
-
entity_type = 'org'
|
|
277
|
-
entity_id = entity_json['id']
|
|
278
|
-
master_type = self.parent_dict[entity_type]
|
|
279
|
-
master_id = '**'
|
|
280
|
-
if child_entity:
|
|
281
|
-
master_id_key = master_type[:-1] + 'Id'
|
|
282
|
-
if master_id_key in entity_json:
|
|
283
|
-
master_id = entity_json[master_id_key]
|
|
284
|
-
elif master_type in entity_json:
|
|
285
|
-
master_id = entity_json[master_type][0]
|
|
286
|
-
elif entity_type == 'projects':
|
|
287
|
-
master_id = entity_json[master_type]['id']
|
|
288
|
-
|
|
289
|
-
return CacheKey(master_type=master_type, master_id=master_id, entity_type=entity_type, entity_id=entity_id)
|
|
290
|
-
|
|
291
|
-
def _update_config_file(self, filepath: str, update: bool, size: float = 0):
|
|
292
|
-
"""
|
|
293
|
-
Update the config file the have all the details about binary cache
|
|
294
|
-
|
|
295
|
-
:param str filepath: path of the file the work on
|
|
296
|
-
:param bool update: if True update the use of the file
|
|
297
|
-
:param int size: file size
|
|
298
|
-
"""
|
|
299
|
-
config_file_path = os.path.join(self.bin_cache_path, 'cacheConfig.json')
|
|
300
|
-
if os.path.isfile(config_file_path):
|
|
301
|
-
with FileLock(config_file_path + ".lock"):
|
|
302
|
-
with open(config_file_path, mode="r", encoding="utf-8") as con:
|
|
303
|
-
with mmap.mmap(con.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
|
|
304
|
-
text = mmap_obj.read().decode('utf8').replace("'", '"')
|
|
305
|
-
config_file = json.loads(text)
|
|
306
|
-
else:
|
|
307
|
-
config_file = {'size': 0, 'keys': []}
|
|
308
|
-
|
|
309
|
-
if update and filepath in config_file['keys']:
|
|
310
|
-
config_file['keys'].remove(filepath)
|
|
311
|
-
|
|
312
|
-
if filepath not in config_file['keys']:
|
|
313
|
-
config_file['keys'].append(filepath)
|
|
314
|
-
config_file['size'] += size
|
|
315
|
-
self._current_bin_cache_size = config_file['size']
|
|
316
|
-
json_object = json.dumps(config_file, indent=4)
|
|
317
|
-
with FileLock(config_file_path + ".lock"):
|
|
318
|
-
with open(config_file_path, mode="w", encoding="utf-8") as outfile:
|
|
319
|
-
outfile.write(json_object)
|
|
320
|
-
|
|
321
|
-
def _lru_cache(self):
|
|
322
|
-
"""
|
|
323
|
-
Make lru on the binary cache remove 30% of the files
|
|
324
|
-
"""
|
|
325
|
-
config_file_path = os.path.join(self.bin_cache_path, 'cacheConfig.json')
|
|
326
|
-
with FileLock(config_file_path + ".lock"):
|
|
327
|
-
with open(config_file_path, mode="r", encoding="utf-8") as con:
|
|
328
|
-
with mmap.mmap(con.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
|
|
329
|
-
text = mmap_obj.read().decode('utf8').replace("'", '"')
|
|
330
|
-
config_file = json.loads(text)
|
|
331
|
-
|
|
332
|
-
size = config_file['size']
|
|
333
|
-
end = 70 / 100 * self.bin_cache_size
|
|
334
|
-
|
|
335
|
-
while size > end and len(config_file['keys']) > 1:
|
|
336
|
-
to_delete = config_file['keys'][0]
|
|
337
|
-
|
|
338
|
-
size -= (Path(to_delete).stat().st_size / 1000000)
|
|
339
|
-
os.remove(to_delete)
|
|
340
|
-
config_file['keys'].remove(to_delete)
|
|
341
|
-
|
|
342
|
-
config_file['size'] = size
|
|
343
|
-
json_object = json.dumps(config_file, indent=4)
|
|
344
|
-
|
|
345
|
-
with FileLock(config_file_path + ".lock"):
|
|
346
|
-
with open(config_file_path, "w") as outfile:
|
|
347
|
-
outfile.write(json_object)
|
|
348
|
-
|
|
349
|
-
def read_stream(self, request_path, dataset_id=None):
|
|
350
|
-
"""
|
|
351
|
-
Cache binary get
|
|
352
|
-
|
|
353
|
-
:param str request_path: the request
|
|
354
|
-
:param str dataset_id: dataset id of the binary object
|
|
355
|
-
:return: success, list of the get result
|
|
356
|
-
"""
|
|
357
|
-
entity_id = request_path.split('/')[-2]
|
|
358
|
-
key = CacheKey(master_type='datasets',
|
|
359
|
-
master_id=dataset_id,
|
|
360
|
-
entity_id=entity_id,
|
|
361
|
-
entity_type='items',
|
|
362
|
-
object_type=ObjectType.BINARY.value)
|
|
363
|
-
hit, response = self.get(key=key)
|
|
364
|
-
if hit:
|
|
365
|
-
source_path = os.path.normpath(response[0])
|
|
366
|
-
self._update_config_file(filepath=source_path, update=True)
|
|
367
|
-
return hit, [source_path]
|
|
368
|
-
else:
|
|
369
|
-
return False, None
|
|
370
|
-
|
|
371
|
-
def write_stream(self,
|
|
372
|
-
request_path,
|
|
373
|
-
response=None,
|
|
374
|
-
buffer=None,
|
|
375
|
-
file_name=None,
|
|
376
|
-
entity_id=None,
|
|
377
|
-
dataset_id=None
|
|
378
|
-
):
|
|
379
|
-
"""
|
|
380
|
-
Cache binary set
|
|
381
|
-
|
|
382
|
-
:param request_path: the request
|
|
383
|
-
:param response: the response of stream
|
|
384
|
-
:param buffer: the steam buffer
|
|
385
|
-
:param file_name: the file name
|
|
386
|
-
:param entity_id: entity id
|
|
387
|
-
:param dataset_id: dataset id of the binary object
|
|
388
|
-
:return: the file path of the binary
|
|
389
|
-
"""
|
|
390
|
-
if entity_id is None:
|
|
391
|
-
entity_id = request_path.split('/')[-2]
|
|
392
|
-
key = CacheKey(master_type='datasets',
|
|
393
|
-
master_id=dataset_id,
|
|
394
|
-
entity_id=entity_id,
|
|
395
|
-
entity_type='items',
|
|
396
|
-
object_type=ObjectType.BINARY)
|
|
397
|
-
filepath = self.bin_cache_path
|
|
398
|
-
if file_name is None:
|
|
399
|
-
file_name = (dict(response.headers)['Content-Disposition'].split('=')[1][2:-1])
|
|
400
|
-
filepath = os.path.join(
|
|
401
|
-
filepath,
|
|
402
|
-
'items',
|
|
403
|
-
file_name
|
|
404
|
-
)
|
|
405
|
-
self.set(key=key.get(), value=filepath)
|
|
406
|
-
if not os.path.isfile(filepath):
|
|
407
|
-
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
|
408
|
-
if buffer is None:
|
|
409
|
-
try:
|
|
410
|
-
temp_file_path = filepath + '.download'
|
|
411
|
-
with open(temp_file_path, "wb") as f:
|
|
412
|
-
for chunk in response.iter_content(chunk_size=8192):
|
|
413
|
-
if chunk: # filter out keep-alive new chunks
|
|
414
|
-
f.write(chunk)
|
|
415
|
-
shutil.move(temp_file_path, filepath)
|
|
416
|
-
except:
|
|
417
|
-
if os.path.isfile(temp_file_path):
|
|
418
|
-
os.remove(temp_file_path)
|
|
419
|
-
return ''
|
|
420
|
-
else:
|
|
421
|
-
if os.path.isfile(buffer.name):
|
|
422
|
-
shutil.copyfile(buffer.name, filepath)
|
|
423
|
-
else:
|
|
424
|
-
with open(filepath, "wb") as f:
|
|
425
|
-
f.write(buffer.getbuffer())
|
|
426
|
-
self._update_config_file(filepath=filepath, update=False, size=(Path(filepath).stat().st_size / 1000000))
|
|
427
|
-
if (Path(filepath).stat().st_size / 1000000) + self._current_bin_cache_size > self.bin_cache_size:
|
|
428
|
-
self._lru_cache()
|
|
429
|
-
return filepath
|
|
430
|
-
|
|
431
|
-
def read(self, request_path: str):
|
|
432
|
-
"""
|
|
433
|
-
Cache entity get
|
|
434
|
-
|
|
435
|
-
:param str request_path: the request
|
|
436
|
-
:return: success, list of the get result
|
|
437
|
-
"""
|
|
438
|
-
entity_id = request_path.split('/')[-1]
|
|
439
|
-
entity_type = request_path.split('/')[-2]
|
|
440
|
-
key = CacheKey(entity_id=entity_id, entity_type=entity_type)
|
|
441
|
-
hit, response = self.get(key=key)
|
|
442
|
-
if hit:
|
|
443
|
-
return hit, response
|
|
444
|
-
return False, None
|
|
445
|
-
|
|
446
|
-
def write(self, list_entities_json):
|
|
447
|
-
"""
|
|
448
|
-
Add or update the entity cache
|
|
449
|
-
|
|
450
|
-
:param list list_entities_json: list of jsons of entities to set
|
|
451
|
-
"""
|
|
452
|
-
for entity_json in list_entities_json:
|
|
453
|
-
key = self.build_cache_key(entity_json)
|
|
454
|
-
redis_key = key.get_key()
|
|
455
|
-
self.set(key=redis_key, value=entity_json)
|
|
456
|
-
self.set(key=key.get(), value=redis_key)
|
|
457
|
-
|
|
458
|
-
def invalidate(self, path):
|
|
459
|
-
"""
|
|
460
|
-
Delete from the caches
|
|
461
|
-
|
|
462
|
-
:param str path: the request path
|
|
463
|
-
"""
|
|
464
|
-
entity_id = path.split('/')[-1]
|
|
465
|
-
entity_type = path.split('/')[-2]
|
|
466
|
-
key = CacheKey(entity_id=entity_id, entity_type=entity_type)
|
|
467
|
-
self.delete(key)
|
|
468
|
-
|
|
469
|
-
def clear(self):
|
|
470
|
-
self.cache_levels[1].clear()
|
|
471
|
-
|
|
472
|
-
def keys(self):
|
|
473
|
-
return [k for k in self.cache_levels[1].keys()]
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
import time
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import mmap
|
|
8
|
+
from filelock import FileLock
|
|
9
|
+
import logging
|
|
10
|
+
import base64
|
|
11
|
+
|
|
12
|
+
from .dl_cache import DiskCache
|
|
13
|
+
from .redis_cache import RedisCache
|
|
14
|
+
from .filesystem_cache import FileSystemCache
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(name='dtlpy')
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ObjectType(str, Enum):
|
|
20
|
+
BINARY = "binary"
|
|
21
|
+
OBJECT = "object"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CacheType(Enum):
|
|
25
|
+
DISKCACHE = 'diskcache'
|
|
26
|
+
REDIS = 'redis'
|
|
27
|
+
FILESYSTEM = 'filesystem'
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class CacheConfig:
|
|
31
|
+
def __init__(self, cache_type=CacheType.DISKCACHE, ttl=1000, level=1, options=None):
|
|
32
|
+
"""
|
|
33
|
+
Cache config settings
|
|
34
|
+
|
|
35
|
+
:param CacheType cache_type: CacheType diskcache, filesystem, redis
|
|
36
|
+
:param int ttl: time to hold the item in the cache in seconds (SEC)
|
|
37
|
+
:param int level: cache level
|
|
38
|
+
:param dict options: the configs for the caches types
|
|
39
|
+
"""
|
|
40
|
+
if isinstance(cache_type, CacheType):
|
|
41
|
+
cache_type = cache_type.value
|
|
42
|
+
if isinstance(cache_type, str) and cache_type not in CacheType._value2member_map_:
|
|
43
|
+
raise ValueError('cache type must be redis or diskcache')
|
|
44
|
+
|
|
45
|
+
self.type = cache_type
|
|
46
|
+
self.ttl = ttl
|
|
47
|
+
self.level = level
|
|
48
|
+
self.options = options
|
|
49
|
+
|
|
50
|
+
def to_string(self):
|
|
51
|
+
"""
|
|
52
|
+
convert object to base 64 string
|
|
53
|
+
"""
|
|
54
|
+
base64_bytes = base64.b64encode(json.dumps(self.to_json()).encode("ascii"))
|
|
55
|
+
base64_string = base64_bytes.decode("ascii")
|
|
56
|
+
return base64_string
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def from_string(cls, base64_string):
|
|
60
|
+
"""
|
|
61
|
+
convert from base 64 string to the class object
|
|
62
|
+
|
|
63
|
+
:param str base64_string: string in base64 the have a json configs
|
|
64
|
+
"""
|
|
65
|
+
base64_bytes = base64_string.encode("ascii")
|
|
66
|
+
sample_string_bytes = base64.b64decode(base64_bytes)
|
|
67
|
+
_json = json.loads(sample_string_bytes.decode("ascii"))
|
|
68
|
+
return cls(cache_type=_json.get('type', CacheType.DISKCACHE),
|
|
69
|
+
ttl=_json.get('ttl', 1000),
|
|
70
|
+
level=_json.get('level', 1),
|
|
71
|
+
options=_json.get('options', None))
|
|
72
|
+
|
|
73
|
+
def to_json(self):
|
|
74
|
+
"""
|
|
75
|
+
convert the class to json
|
|
76
|
+
"""
|
|
77
|
+
return {
|
|
78
|
+
'type': self.type,
|
|
79
|
+
'ttl': self.ttl,
|
|
80
|
+
'level': self.level,
|
|
81
|
+
'options': self.options,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
@staticmethod
|
|
85
|
+
def from_json(cls, _json):
|
|
86
|
+
"""
|
|
87
|
+
make a class attribute from json
|
|
88
|
+
|
|
89
|
+
:param _json: _json have the class attributes
|
|
90
|
+
"""
|
|
91
|
+
if isinstance(_json, str):
|
|
92
|
+
_json = json.loads(_json)
|
|
93
|
+
return cls(cache_type=_json.get('type', CacheType.DISKCACHE),
|
|
94
|
+
ttl=_json.get('ttl', 1000),
|
|
95
|
+
level=_json.get('level', 1),
|
|
96
|
+
options=_json.get('options', None))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class CacheKey:
|
|
100
|
+
def __init__(self,
|
|
101
|
+
master_type='**',
|
|
102
|
+
master_id='**',
|
|
103
|
+
entity_type='**',
|
|
104
|
+
entity_id='*',
|
|
105
|
+
object_type=ObjectType.OBJECT):
|
|
106
|
+
"""
|
|
107
|
+
:param str master_type: master type
|
|
108
|
+
:param str master_id: master id
|
|
109
|
+
:param str entity_type: entity type
|
|
110
|
+
:param str entity_id: entity id
|
|
111
|
+
:param str object_type: object type object/binary
|
|
112
|
+
"""
|
|
113
|
+
self.master_type = master_type
|
|
114
|
+
self.master_id = master_id
|
|
115
|
+
self.entity_type = entity_type
|
|
116
|
+
self.entity_id = entity_id
|
|
117
|
+
self.object_type = object_type
|
|
118
|
+
|
|
119
|
+
def get(self):
|
|
120
|
+
"""
|
|
121
|
+
return the build key
|
|
122
|
+
"""
|
|
123
|
+
return os.path.join(self.master_type, self.master_id, self.entity_type, self.entity_id, self.object_type)
|
|
124
|
+
|
|
125
|
+
def get_key(self):
|
|
126
|
+
"""
|
|
127
|
+
return the build key
|
|
128
|
+
"""
|
|
129
|
+
return os.path.join(self.entity_type, self.entity_id, self.object_type)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class CacheManger:
|
|
133
|
+
def __init__(self, cache_configs: list, bin_cache_size=1000):
|
|
134
|
+
"""
|
|
135
|
+
Cache manger for config and mange the cache
|
|
136
|
+
|
|
137
|
+
:param cache_configs: CacheConfig object
|
|
138
|
+
:param bin_cache_size: size on MB for binary cache
|
|
139
|
+
"""
|
|
140
|
+
self.cache_levels = dict()
|
|
141
|
+
self._max_level = 1
|
|
142
|
+
self.bin_cache_size = bin_cache_size
|
|
143
|
+
self.bin_cache_path = os.environ['DEFAULT_CACHE_PATH']
|
|
144
|
+
self._current_bin_cache_size = 0
|
|
145
|
+
for config in cache_configs:
|
|
146
|
+
try:
|
|
147
|
+
self.cache_levels[config.level] = self._load_cache_handler(config)
|
|
148
|
+
if config.level < self._max_level:
|
|
149
|
+
self._max_level = config.level
|
|
150
|
+
except:
|
|
151
|
+
raise "Failed to build Cache"
|
|
152
|
+
|
|
153
|
+
self.parent_dict = {
|
|
154
|
+
"annotations": 'items',
|
|
155
|
+
"items": 'datasets',
|
|
156
|
+
"datasets": 'projects',
|
|
157
|
+
"projects": 'org',
|
|
158
|
+
"org": '',
|
|
159
|
+
"annotationtasks": 'datasets',
|
|
160
|
+
"assignments": 'annotationtasks',
|
|
161
|
+
"models": 'packages',
|
|
162
|
+
"packages": 'projects',
|
|
163
|
+
"services": 'packages',
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
def _load_cache_handler(self, config: CacheConfig):
|
|
167
|
+
"""
|
|
168
|
+
the function the build the cache form the configs that get
|
|
169
|
+
"""
|
|
170
|
+
from ..services import DataloopLogger
|
|
171
|
+
cache = None
|
|
172
|
+
if config.type == CacheType.REDIS.value:
|
|
173
|
+
try:
|
|
174
|
+
cache = RedisCache(options=config.options, ttl=config.ttl)
|
|
175
|
+
except:
|
|
176
|
+
logger.warning("Failed to build Redis")
|
|
177
|
+
raise Exception("Failed to build Redis")
|
|
178
|
+
|
|
179
|
+
elif config.type == CacheType.DISKCACHE.value:
|
|
180
|
+
cache = DiskCache(name='object_cache', options=config.options, ttl=config.ttl)
|
|
181
|
+
elif config.type == CacheType.FILESYSTEM.value:
|
|
182
|
+
cache = FileSystemCache(options=config.options, ttl=config.ttl)
|
|
183
|
+
DataloopLogger.clean_dataloop_cache(cache_path=cache.root_dir,
|
|
184
|
+
max_param={'max_time': cache.ttl})
|
|
185
|
+
DataloopLogger.clean_dataloop_cache(cache_path=self.bin_cache_path,
|
|
186
|
+
max_param={'max_time': config.ttl})
|
|
187
|
+
return cache
|
|
188
|
+
|
|
189
|
+
def get(self, key: CacheKey):
|
|
190
|
+
"""
|
|
191
|
+
Cache get
|
|
192
|
+
|
|
193
|
+
:param CacheKey key: CacheKey object
|
|
194
|
+
:return: success, list of the get result
|
|
195
|
+
"""
|
|
196
|
+
res = []
|
|
197
|
+
success = False
|
|
198
|
+
for i in range(1, self._max_level + 1):
|
|
199
|
+
res = self.cache_levels[i].get(key=key.get_key())
|
|
200
|
+
if res:
|
|
201
|
+
success = True
|
|
202
|
+
break
|
|
203
|
+
return success, res
|
|
204
|
+
|
|
205
|
+
def ping(self):
|
|
206
|
+
"""
|
|
207
|
+
Cache ping check if connection is working
|
|
208
|
+
"""
|
|
209
|
+
try:
|
|
210
|
+
for i in range(1, self._max_level + 1):
|
|
211
|
+
self.cache_levels[i].ping()
|
|
212
|
+
except Exception as e:
|
|
213
|
+
raise Exception('cache connection failed ')
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def set(self, key: str, value):
|
|
217
|
+
"""
|
|
218
|
+
Cache set, add or update the key value
|
|
219
|
+
|
|
220
|
+
:param CacheKey key: CacheKey object
|
|
221
|
+
:param value: value to set
|
|
222
|
+
"""
|
|
223
|
+
if isinstance(value, dict):
|
|
224
|
+
value = json.dumps(value)
|
|
225
|
+
self.cache_levels[1].set(key, value)
|
|
226
|
+
|
|
227
|
+
def _delete_parent(self, key: CacheKey, level):
|
|
228
|
+
parent_key = CacheKey(master_type=self.parent_dict[key.entity_type],
|
|
229
|
+
entity_type=key.entity_type,
|
|
230
|
+
entity_id=key.entity_id,
|
|
231
|
+
object_type='*')
|
|
232
|
+
list_keys = self.cache_levels[level].list(pattern=parent_key.get())
|
|
233
|
+
for k in list_keys:
|
|
234
|
+
if 'binary' in k:
|
|
235
|
+
val = self.cache_levels[level].get(key=k)
|
|
236
|
+
if os.path.isfile(val):
|
|
237
|
+
os.remove(val)
|
|
238
|
+
self.cache_levels[level].delete(k)
|
|
239
|
+
|
|
240
|
+
def delete(self, key: CacheKey):
|
|
241
|
+
"""
|
|
242
|
+
Cache delete
|
|
243
|
+
|
|
244
|
+
:param CacheKey key: CacheKey object
|
|
245
|
+
"""
|
|
246
|
+
for i in range(1, self._max_level + 1):
|
|
247
|
+
self.cache_levels[i].delete(key.get_key())
|
|
248
|
+
self._delete_parent(key=key, level=i)
|
|
249
|
+
key.object_type = '*'
|
|
250
|
+
list_keys = self.cache_levels[i].list(pattern=key.get_key())
|
|
251
|
+
for k in list_keys:
|
|
252
|
+
val = self.cache_levels[i].get(key=k)
|
|
253
|
+
self.cache_levels[i].delete(k)
|
|
254
|
+
if 'binary' in k:
|
|
255
|
+
if os.path.isfile(val):
|
|
256
|
+
os.remove(val)
|
|
257
|
+
continue
|
|
258
|
+
e_type, e_id, e_obj = val.split('\\')
|
|
259
|
+
self.delete(key=CacheKey(entity_type=e_type, entity_id=e_id, object_type=e_obj))
|
|
260
|
+
|
|
261
|
+
def build_cache_key(self, entity_json: dict):
|
|
262
|
+
"""
|
|
263
|
+
Build a format of the cache key from the entity json we get
|
|
264
|
+
|
|
265
|
+
:param dict entity_json: json of an entity
|
|
266
|
+
:return: CacheKey object
|
|
267
|
+
"""
|
|
268
|
+
child_entity = False
|
|
269
|
+
if 'url' in entity_json:
|
|
270
|
+
split_url = entity_json['url'].split('/')
|
|
271
|
+
entity_type = split_url[-2]
|
|
272
|
+
child_entity = True
|
|
273
|
+
elif 'org' in entity_json:
|
|
274
|
+
entity_type = 'projects'
|
|
275
|
+
else:
|
|
276
|
+
entity_type = 'org'
|
|
277
|
+
entity_id = entity_json['id']
|
|
278
|
+
master_type = self.parent_dict[entity_type]
|
|
279
|
+
master_id = '**'
|
|
280
|
+
if child_entity:
|
|
281
|
+
master_id_key = master_type[:-1] + 'Id'
|
|
282
|
+
if master_id_key in entity_json:
|
|
283
|
+
master_id = entity_json[master_id_key]
|
|
284
|
+
elif master_type in entity_json:
|
|
285
|
+
master_id = entity_json[master_type][0]
|
|
286
|
+
elif entity_type == 'projects':
|
|
287
|
+
master_id = entity_json[master_type]['id']
|
|
288
|
+
|
|
289
|
+
return CacheKey(master_type=master_type, master_id=master_id, entity_type=entity_type, entity_id=entity_id)
|
|
290
|
+
|
|
291
|
+
def _update_config_file(self, filepath: str, update: bool, size: float = 0):
|
|
292
|
+
"""
|
|
293
|
+
Update the config file the have all the details about binary cache
|
|
294
|
+
|
|
295
|
+
:param str filepath: path of the file the work on
|
|
296
|
+
:param bool update: if True update the use of the file
|
|
297
|
+
:param int size: file size
|
|
298
|
+
"""
|
|
299
|
+
config_file_path = os.path.join(self.bin_cache_path, 'cacheConfig.json')
|
|
300
|
+
if os.path.isfile(config_file_path):
|
|
301
|
+
with FileLock(config_file_path + ".lock"):
|
|
302
|
+
with open(config_file_path, mode="r", encoding="utf-8") as con:
|
|
303
|
+
with mmap.mmap(con.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
|
|
304
|
+
text = mmap_obj.read().decode('utf8').replace("'", '"')
|
|
305
|
+
config_file = json.loads(text)
|
|
306
|
+
else:
|
|
307
|
+
config_file = {'size': 0, 'keys': []}
|
|
308
|
+
|
|
309
|
+
if update and filepath in config_file['keys']:
|
|
310
|
+
config_file['keys'].remove(filepath)
|
|
311
|
+
|
|
312
|
+
if filepath not in config_file['keys']:
|
|
313
|
+
config_file['keys'].append(filepath)
|
|
314
|
+
config_file['size'] += size
|
|
315
|
+
self._current_bin_cache_size = config_file['size']
|
|
316
|
+
json_object = json.dumps(config_file, indent=4)
|
|
317
|
+
with FileLock(config_file_path + ".lock"):
|
|
318
|
+
with open(config_file_path, mode="w", encoding="utf-8") as outfile:
|
|
319
|
+
outfile.write(json_object)
|
|
320
|
+
|
|
321
|
+
def _lru_cache(self):
|
|
322
|
+
"""
|
|
323
|
+
Make lru on the binary cache remove 30% of the files
|
|
324
|
+
"""
|
|
325
|
+
config_file_path = os.path.join(self.bin_cache_path, 'cacheConfig.json')
|
|
326
|
+
with FileLock(config_file_path + ".lock"):
|
|
327
|
+
with open(config_file_path, mode="r", encoding="utf-8") as con:
|
|
328
|
+
with mmap.mmap(con.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
|
|
329
|
+
text = mmap_obj.read().decode('utf8').replace("'", '"')
|
|
330
|
+
config_file = json.loads(text)
|
|
331
|
+
|
|
332
|
+
size = config_file['size']
|
|
333
|
+
end = 70 / 100 * self.bin_cache_size
|
|
334
|
+
|
|
335
|
+
while size > end and len(config_file['keys']) > 1:
|
|
336
|
+
to_delete = config_file['keys'][0]
|
|
337
|
+
|
|
338
|
+
size -= (Path(to_delete).stat().st_size / 1000000)
|
|
339
|
+
os.remove(to_delete)
|
|
340
|
+
config_file['keys'].remove(to_delete)
|
|
341
|
+
|
|
342
|
+
config_file['size'] = size
|
|
343
|
+
json_object = json.dumps(config_file, indent=4)
|
|
344
|
+
|
|
345
|
+
with FileLock(config_file_path + ".lock"):
|
|
346
|
+
with open(config_file_path, "w") as outfile:
|
|
347
|
+
outfile.write(json_object)
|
|
348
|
+
|
|
349
|
+
def read_stream(self, request_path, dataset_id=None):
|
|
350
|
+
"""
|
|
351
|
+
Cache binary get
|
|
352
|
+
|
|
353
|
+
:param str request_path: the request
|
|
354
|
+
:param str dataset_id: dataset id of the binary object
|
|
355
|
+
:return: success, list of the get result
|
|
356
|
+
"""
|
|
357
|
+
entity_id = request_path.split('/')[-2]
|
|
358
|
+
key = CacheKey(master_type='datasets',
|
|
359
|
+
master_id=dataset_id,
|
|
360
|
+
entity_id=entity_id,
|
|
361
|
+
entity_type='items',
|
|
362
|
+
object_type=ObjectType.BINARY.value)
|
|
363
|
+
hit, response = self.get(key=key)
|
|
364
|
+
if hit:
|
|
365
|
+
source_path = os.path.normpath(response[0])
|
|
366
|
+
self._update_config_file(filepath=source_path, update=True)
|
|
367
|
+
return hit, [source_path]
|
|
368
|
+
else:
|
|
369
|
+
return False, None
|
|
370
|
+
|
|
371
|
+
def write_stream(self,
|
|
372
|
+
request_path,
|
|
373
|
+
response=None,
|
|
374
|
+
buffer=None,
|
|
375
|
+
file_name=None,
|
|
376
|
+
entity_id=None,
|
|
377
|
+
dataset_id=None
|
|
378
|
+
):
|
|
379
|
+
"""
|
|
380
|
+
Cache binary set
|
|
381
|
+
|
|
382
|
+
:param request_path: the request
|
|
383
|
+
:param response: the response of stream
|
|
384
|
+
:param buffer: the steam buffer
|
|
385
|
+
:param file_name: the file name
|
|
386
|
+
:param entity_id: entity id
|
|
387
|
+
:param dataset_id: dataset id of the binary object
|
|
388
|
+
:return: the file path of the binary
|
|
389
|
+
"""
|
|
390
|
+
if entity_id is None:
|
|
391
|
+
entity_id = request_path.split('/')[-2]
|
|
392
|
+
key = CacheKey(master_type='datasets',
|
|
393
|
+
master_id=dataset_id,
|
|
394
|
+
entity_id=entity_id,
|
|
395
|
+
entity_type='items',
|
|
396
|
+
object_type=ObjectType.BINARY)
|
|
397
|
+
filepath = self.bin_cache_path
|
|
398
|
+
if file_name is None:
|
|
399
|
+
file_name = (dict(response.headers)['Content-Disposition'].split('=')[1][2:-1])
|
|
400
|
+
filepath = os.path.join(
|
|
401
|
+
filepath,
|
|
402
|
+
'items',
|
|
403
|
+
file_name
|
|
404
|
+
)
|
|
405
|
+
self.set(key=key.get(), value=filepath)
|
|
406
|
+
if not os.path.isfile(filepath):
|
|
407
|
+
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
|
408
|
+
if buffer is None:
|
|
409
|
+
try:
|
|
410
|
+
temp_file_path = filepath + '.download'
|
|
411
|
+
with open(temp_file_path, "wb") as f:
|
|
412
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
413
|
+
if chunk: # filter out keep-alive new chunks
|
|
414
|
+
f.write(chunk)
|
|
415
|
+
shutil.move(temp_file_path, filepath)
|
|
416
|
+
except:
|
|
417
|
+
if os.path.isfile(temp_file_path):
|
|
418
|
+
os.remove(temp_file_path)
|
|
419
|
+
return ''
|
|
420
|
+
else:
|
|
421
|
+
if os.path.isfile(buffer.name):
|
|
422
|
+
shutil.copyfile(buffer.name, filepath)
|
|
423
|
+
else:
|
|
424
|
+
with open(filepath, "wb") as f:
|
|
425
|
+
f.write(buffer.getbuffer())
|
|
426
|
+
self._update_config_file(filepath=filepath, update=False, size=(Path(filepath).stat().st_size / 1000000))
|
|
427
|
+
if (Path(filepath).stat().st_size / 1000000) + self._current_bin_cache_size > self.bin_cache_size:
|
|
428
|
+
self._lru_cache()
|
|
429
|
+
return filepath
|
|
430
|
+
|
|
431
|
+
def read(self, request_path: str):
|
|
432
|
+
"""
|
|
433
|
+
Cache entity get
|
|
434
|
+
|
|
435
|
+
:param str request_path: the request
|
|
436
|
+
:return: success, list of the get result
|
|
437
|
+
"""
|
|
438
|
+
entity_id = request_path.split('/')[-1]
|
|
439
|
+
entity_type = request_path.split('/')[-2]
|
|
440
|
+
key = CacheKey(entity_id=entity_id, entity_type=entity_type)
|
|
441
|
+
hit, response = self.get(key=key)
|
|
442
|
+
if hit:
|
|
443
|
+
return hit, response
|
|
444
|
+
return False, None
|
|
445
|
+
|
|
446
|
+
def write(self, list_entities_json):
|
|
447
|
+
"""
|
|
448
|
+
Add or update the entity cache
|
|
449
|
+
|
|
450
|
+
:param list list_entities_json: list of jsons of entities to set
|
|
451
|
+
"""
|
|
452
|
+
for entity_json in list_entities_json:
|
|
453
|
+
key = self.build_cache_key(entity_json)
|
|
454
|
+
redis_key = key.get_key()
|
|
455
|
+
self.set(key=redis_key, value=entity_json)
|
|
456
|
+
self.set(key=key.get(), value=redis_key)
|
|
457
|
+
|
|
458
|
+
def invalidate(self, path):
|
|
459
|
+
"""
|
|
460
|
+
Delete from the caches
|
|
461
|
+
|
|
462
|
+
:param str path: the request path
|
|
463
|
+
"""
|
|
464
|
+
entity_id = path.split('/')[-1]
|
|
465
|
+
entity_type = path.split('/')[-2]
|
|
466
|
+
key = CacheKey(entity_id=entity_id, entity_type=entity_type)
|
|
467
|
+
self.delete(key)
|
|
468
|
+
|
|
469
|
+
def clear(self):
|
|
470
|
+
self.cache_levels[1].clear()
|
|
471
|
+
|
|
472
|
+
def keys(self):
|
|
473
|
+
return [k for k in self.cache_levels[1].keys()]
|