dtlpy 1.115.44__py3-none-any.whl → 1.116.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. dtlpy/__init__.py +491 -491
  2. dtlpy/__version__.py +1 -1
  3. dtlpy/assets/__init__.py +26 -26
  4. dtlpy/assets/code_server/config.yaml +2 -2
  5. dtlpy/assets/code_server/installation.sh +24 -24
  6. dtlpy/assets/code_server/launch.json +13 -13
  7. dtlpy/assets/code_server/settings.json +2 -2
  8. dtlpy/assets/main.py +53 -53
  9. dtlpy/assets/main_partial.py +18 -18
  10. dtlpy/assets/mock.json +11 -11
  11. dtlpy/assets/model_adapter.py +83 -83
  12. dtlpy/assets/package.json +61 -61
  13. dtlpy/assets/package_catalog.json +29 -29
  14. dtlpy/assets/package_gitignore +307 -307
  15. dtlpy/assets/service_runners/__init__.py +33 -33
  16. dtlpy/assets/service_runners/converter.py +96 -96
  17. dtlpy/assets/service_runners/multi_method.py +49 -49
  18. dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
  19. dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
  20. dtlpy/assets/service_runners/multi_method_item.py +52 -52
  21. dtlpy/assets/service_runners/multi_method_json.py +52 -52
  22. dtlpy/assets/service_runners/single_method.py +37 -37
  23. dtlpy/assets/service_runners/single_method_annotation.py +43 -43
  24. dtlpy/assets/service_runners/single_method_dataset.py +43 -43
  25. dtlpy/assets/service_runners/single_method_item.py +41 -41
  26. dtlpy/assets/service_runners/single_method_json.py +42 -42
  27. dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
  28. dtlpy/assets/voc_annotation_template.xml +23 -23
  29. dtlpy/caches/base_cache.py +32 -32
  30. dtlpy/caches/cache.py +473 -473
  31. dtlpy/caches/dl_cache.py +201 -201
  32. dtlpy/caches/filesystem_cache.py +89 -89
  33. dtlpy/caches/redis_cache.py +84 -84
  34. dtlpy/dlp/__init__.py +20 -20
  35. dtlpy/dlp/cli_utilities.py +367 -367
  36. dtlpy/dlp/command_executor.py +764 -764
  37. dtlpy/dlp/dlp +1 -1
  38. dtlpy/dlp/dlp.bat +1 -1
  39. dtlpy/dlp/dlp.py +128 -128
  40. dtlpy/dlp/parser.py +651 -651
  41. dtlpy/entities/__init__.py +83 -83
  42. dtlpy/entities/analytic.py +347 -347
  43. dtlpy/entities/annotation.py +1879 -1879
  44. dtlpy/entities/annotation_collection.py +699 -699
  45. dtlpy/entities/annotation_definitions/__init__.py +20 -20
  46. dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
  47. dtlpy/entities/annotation_definitions/box.py +195 -195
  48. dtlpy/entities/annotation_definitions/classification.py +67 -67
  49. dtlpy/entities/annotation_definitions/comparison.py +72 -72
  50. dtlpy/entities/annotation_definitions/cube.py +204 -204
  51. dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
  52. dtlpy/entities/annotation_definitions/description.py +32 -32
  53. dtlpy/entities/annotation_definitions/ellipse.py +124 -124
  54. dtlpy/entities/annotation_definitions/free_text.py +62 -62
  55. dtlpy/entities/annotation_definitions/gis.py +69 -69
  56. dtlpy/entities/annotation_definitions/note.py +139 -139
  57. dtlpy/entities/annotation_definitions/point.py +117 -117
  58. dtlpy/entities/annotation_definitions/polygon.py +182 -182
  59. dtlpy/entities/annotation_definitions/polyline.py +111 -111
  60. dtlpy/entities/annotation_definitions/pose.py +92 -92
  61. dtlpy/entities/annotation_definitions/ref_image.py +86 -86
  62. dtlpy/entities/annotation_definitions/segmentation.py +240 -240
  63. dtlpy/entities/annotation_definitions/subtitle.py +34 -34
  64. dtlpy/entities/annotation_definitions/text.py +85 -85
  65. dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
  66. dtlpy/entities/app.py +220 -220
  67. dtlpy/entities/app_module.py +107 -107
  68. dtlpy/entities/artifact.py +174 -174
  69. dtlpy/entities/assignment.py +399 -399
  70. dtlpy/entities/base_entity.py +214 -214
  71. dtlpy/entities/bot.py +113 -113
  72. dtlpy/entities/codebase.py +292 -292
  73. dtlpy/entities/collection.py +38 -38
  74. dtlpy/entities/command.py +169 -169
  75. dtlpy/entities/compute.py +449 -449
  76. dtlpy/entities/dataset.py +1299 -1299
  77. dtlpy/entities/directory_tree.py +44 -44
  78. dtlpy/entities/dpk.py +470 -470
  79. dtlpy/entities/driver.py +235 -235
  80. dtlpy/entities/execution.py +397 -397
  81. dtlpy/entities/feature.py +124 -124
  82. dtlpy/entities/feature_set.py +145 -145
  83. dtlpy/entities/filters.py +798 -798
  84. dtlpy/entities/gis_item.py +107 -107
  85. dtlpy/entities/integration.py +184 -184
  86. dtlpy/entities/item.py +959 -959
  87. dtlpy/entities/label.py +123 -123
  88. dtlpy/entities/links.py +85 -85
  89. dtlpy/entities/message.py +175 -175
  90. dtlpy/entities/model.py +684 -684
  91. dtlpy/entities/node.py +1005 -1005
  92. dtlpy/entities/ontology.py +810 -803
  93. dtlpy/entities/organization.py +287 -287
  94. dtlpy/entities/package.py +657 -657
  95. dtlpy/entities/package_defaults.py +5 -5
  96. dtlpy/entities/package_function.py +185 -185
  97. dtlpy/entities/package_module.py +113 -113
  98. dtlpy/entities/package_slot.py +118 -118
  99. dtlpy/entities/paged_entities.py +299 -299
  100. dtlpy/entities/pipeline.py +624 -624
  101. dtlpy/entities/pipeline_execution.py +279 -279
  102. dtlpy/entities/project.py +394 -394
  103. dtlpy/entities/prompt_item.py +505 -505
  104. dtlpy/entities/recipe.py +301 -301
  105. dtlpy/entities/reflect_dict.py +102 -102
  106. dtlpy/entities/resource_execution.py +138 -138
  107. dtlpy/entities/service.py +963 -963
  108. dtlpy/entities/service_driver.py +117 -117
  109. dtlpy/entities/setting.py +294 -294
  110. dtlpy/entities/task.py +495 -495
  111. dtlpy/entities/time_series.py +143 -143
  112. dtlpy/entities/trigger.py +426 -426
  113. dtlpy/entities/user.py +118 -118
  114. dtlpy/entities/webhook.py +124 -124
  115. dtlpy/examples/__init__.py +19 -19
  116. dtlpy/examples/add_labels.py +135 -135
  117. dtlpy/examples/add_metadata_to_item.py +21 -21
  118. dtlpy/examples/annotate_items_using_model.py +65 -65
  119. dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
  120. dtlpy/examples/annotations_convert_to_voc.py +9 -9
  121. dtlpy/examples/annotations_convert_to_yolo.py +9 -9
  122. dtlpy/examples/convert_annotation_types.py +51 -51
  123. dtlpy/examples/converter.py +143 -143
  124. dtlpy/examples/copy_annotations.py +22 -22
  125. dtlpy/examples/copy_folder.py +31 -31
  126. dtlpy/examples/create_annotations.py +51 -51
  127. dtlpy/examples/create_video_annotations.py +83 -83
  128. dtlpy/examples/delete_annotations.py +26 -26
  129. dtlpy/examples/filters.py +113 -113
  130. dtlpy/examples/move_item.py +23 -23
  131. dtlpy/examples/play_video_annotation.py +13 -13
  132. dtlpy/examples/show_item_and_mask.py +53 -53
  133. dtlpy/examples/triggers.py +49 -49
  134. dtlpy/examples/upload_batch_of_items.py +20 -20
  135. dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
  136. dtlpy/examples/upload_items_with_modalities.py +43 -43
  137. dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
  138. dtlpy/examples/upload_yolo_format_annotations.py +70 -70
  139. dtlpy/exceptions.py +125 -125
  140. dtlpy/miscellaneous/__init__.py +20 -20
  141. dtlpy/miscellaneous/dict_differ.py +95 -95
  142. dtlpy/miscellaneous/git_utils.py +217 -217
  143. dtlpy/miscellaneous/json_utils.py +14 -14
  144. dtlpy/miscellaneous/list_print.py +105 -105
  145. dtlpy/miscellaneous/zipping.py +130 -130
  146. dtlpy/ml/__init__.py +20 -20
  147. dtlpy/ml/base_feature_extractor_adapter.py +27 -27
  148. dtlpy/ml/base_model_adapter.py +1257 -1230
  149. dtlpy/ml/metrics.py +461 -461
  150. dtlpy/ml/predictions_utils.py +274 -274
  151. dtlpy/ml/summary_writer.py +57 -57
  152. dtlpy/ml/train_utils.py +60 -60
  153. dtlpy/new_instance.py +252 -252
  154. dtlpy/repositories/__init__.py +56 -56
  155. dtlpy/repositories/analytics.py +85 -85
  156. dtlpy/repositories/annotations.py +916 -916
  157. dtlpy/repositories/apps.py +383 -383
  158. dtlpy/repositories/artifacts.py +452 -452
  159. dtlpy/repositories/assignments.py +599 -599
  160. dtlpy/repositories/bots.py +213 -213
  161. dtlpy/repositories/codebases.py +559 -559
  162. dtlpy/repositories/collections.py +332 -332
  163. dtlpy/repositories/commands.py +152 -152
  164. dtlpy/repositories/compositions.py +61 -61
  165. dtlpy/repositories/computes.py +439 -439
  166. dtlpy/repositories/datasets.py +1504 -1504
  167. dtlpy/repositories/downloader.py +976 -923
  168. dtlpy/repositories/dpks.py +433 -433
  169. dtlpy/repositories/drivers.py +482 -482
  170. dtlpy/repositories/executions.py +815 -815
  171. dtlpy/repositories/feature_sets.py +226 -226
  172. dtlpy/repositories/features.py +255 -255
  173. dtlpy/repositories/integrations.py +484 -484
  174. dtlpy/repositories/items.py +912 -912
  175. dtlpy/repositories/messages.py +94 -94
  176. dtlpy/repositories/models.py +1000 -1000
  177. dtlpy/repositories/nodes.py +80 -80
  178. dtlpy/repositories/ontologies.py +511 -511
  179. dtlpy/repositories/organizations.py +525 -525
  180. dtlpy/repositories/packages.py +1941 -1941
  181. dtlpy/repositories/pipeline_executions.py +451 -451
  182. dtlpy/repositories/pipelines.py +640 -640
  183. dtlpy/repositories/projects.py +539 -539
  184. dtlpy/repositories/recipes.py +419 -399
  185. dtlpy/repositories/resource_executions.py +137 -137
  186. dtlpy/repositories/schema.py +120 -120
  187. dtlpy/repositories/service_drivers.py +213 -213
  188. dtlpy/repositories/services.py +1704 -1704
  189. dtlpy/repositories/settings.py +339 -339
  190. dtlpy/repositories/tasks.py +1477 -1477
  191. dtlpy/repositories/times_series.py +278 -278
  192. dtlpy/repositories/triggers.py +536 -536
  193. dtlpy/repositories/upload_element.py +257 -257
  194. dtlpy/repositories/uploader.py +661 -661
  195. dtlpy/repositories/webhooks.py +249 -249
  196. dtlpy/services/__init__.py +22 -22
  197. dtlpy/services/aihttp_retry.py +131 -131
  198. dtlpy/services/api_client.py +1785 -1785
  199. dtlpy/services/api_reference.py +40 -40
  200. dtlpy/services/async_utils.py +133 -133
  201. dtlpy/services/calls_counter.py +44 -44
  202. dtlpy/services/check_sdk.py +68 -68
  203. dtlpy/services/cookie.py +115 -115
  204. dtlpy/services/create_logger.py +156 -156
  205. dtlpy/services/events.py +84 -84
  206. dtlpy/services/logins.py +235 -235
  207. dtlpy/services/reporter.py +256 -256
  208. dtlpy/services/service_defaults.py +91 -91
  209. dtlpy/utilities/__init__.py +20 -20
  210. dtlpy/utilities/annotations/__init__.py +16 -16
  211. dtlpy/utilities/annotations/annotation_converters.py +269 -269
  212. dtlpy/utilities/base_package_runner.py +285 -264
  213. dtlpy/utilities/converter.py +1650 -1650
  214. dtlpy/utilities/dataset_generators/__init__.py +1 -1
  215. dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
  216. dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
  217. dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
  218. dtlpy/utilities/local_development/__init__.py +1 -1
  219. dtlpy/utilities/local_development/local_session.py +179 -179
  220. dtlpy/utilities/reports/__init__.py +2 -2
  221. dtlpy/utilities/reports/figures.py +343 -343
  222. dtlpy/utilities/reports/report.py +71 -71
  223. dtlpy/utilities/videos/__init__.py +17 -17
  224. dtlpy/utilities/videos/video_player.py +598 -598
  225. dtlpy/utilities/videos/videos.py +470 -470
  226. {dtlpy-1.115.44.data → dtlpy-1.116.6.data}/scripts/dlp +1 -1
  227. dtlpy-1.116.6.data/scripts/dlp.bat +2 -0
  228. {dtlpy-1.115.44.data → dtlpy-1.116.6.data}/scripts/dlp.py +128 -128
  229. {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/METADATA +186 -186
  230. dtlpy-1.116.6.dist-info/RECORD +239 -0
  231. {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/WHEEL +1 -1
  232. {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/licenses/LICENSE +200 -200
  233. tests/features/environment.py +551 -551
  234. dtlpy/assets/__pycache__/__init__.cpython-310.pyc +0 -0
  235. dtlpy-1.115.44.data/scripts/dlp.bat +0 -2
  236. dtlpy-1.115.44.dist-info/RECORD +0 -240
  237. {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/entry_points.txt +0 -0
  238. {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/top_level.txt +0 -0
dtlpy/caches/cache.py CHANGED
@@ -1,473 +1,473 @@
1
- import json
2
- import os
3
- import shutil
4
- import time
5
- from enum import Enum
6
- from pathlib import Path
7
- import mmap
8
- from filelock import FileLock
9
- import logging
10
- import base64
11
-
12
- from .dl_cache import DiskCache
13
- from .redis_cache import RedisCache
14
- from .filesystem_cache import FileSystemCache
15
-
16
- logger = logging.getLogger(name='dtlpy')
17
-
18
-
19
- class ObjectType(str, Enum):
20
- BINARY = "binary"
21
- OBJECT = "object"
22
-
23
-
24
- class CacheType(Enum):
25
- DISKCACHE = 'diskcache'
26
- REDIS = 'redis'
27
- FILESYSTEM = 'filesystem'
28
-
29
-
30
- class CacheConfig:
31
- def __init__(self, cache_type=CacheType.DISKCACHE, ttl=1000, level=1, options=None):
32
- """
33
- Cache config settings
34
-
35
- :param CacheType cache_type: CacheType diskcache, filesystem, redis
36
- :param int ttl: time to hold the item in the cache in seconds (SEC)
37
- :param int level: cache level
38
- :param dict options: the configs for the caches types
39
- """
40
- if isinstance(cache_type, CacheType):
41
- cache_type = cache_type.value
42
- if isinstance(cache_type, str) and cache_type not in CacheType._value2member_map_:
43
- raise ValueError('cache type must be redis or diskcache')
44
-
45
- self.type = cache_type
46
- self.ttl = ttl
47
- self.level = level
48
- self.options = options
49
-
50
- def to_string(self):
51
- """
52
- convert object to base 64 string
53
- """
54
- base64_bytes = base64.b64encode(json.dumps(self.to_json()).encode("ascii"))
55
- base64_string = base64_bytes.decode("ascii")
56
- return base64_string
57
-
58
- @staticmethod
59
- def from_string(cls, base64_string):
60
- """
61
- convert from base 64 string to the class object
62
-
63
- :param str base64_string: string in base64 the have a json configs
64
- """
65
- base64_bytes = base64_string.encode("ascii")
66
- sample_string_bytes = base64.b64decode(base64_bytes)
67
- _json = json.loads(sample_string_bytes.decode("ascii"))
68
- return cls(cache_type=_json.get('type', CacheType.DISKCACHE),
69
- ttl=_json.get('ttl', 1000),
70
- level=_json.get('level', 1),
71
- options=_json.get('options', None))
72
-
73
- def to_json(self):
74
- """
75
- convert the class to json
76
- """
77
- return {
78
- 'type': self.type,
79
- 'ttl': self.ttl,
80
- 'level': self.level,
81
- 'options': self.options,
82
- }
83
-
84
- @staticmethod
85
- def from_json(cls, _json):
86
- """
87
- make a class attribute from json
88
-
89
- :param _json: _json have the class attributes
90
- """
91
- if isinstance(_json, str):
92
- _json = json.loads(_json)
93
- return cls(cache_type=_json.get('type', CacheType.DISKCACHE),
94
- ttl=_json.get('ttl', 1000),
95
- level=_json.get('level', 1),
96
- options=_json.get('options', None))
97
-
98
-
99
- class CacheKey:
100
- def __init__(self,
101
- master_type='**',
102
- master_id='**',
103
- entity_type='**',
104
- entity_id='*',
105
- object_type=ObjectType.OBJECT):
106
- """
107
- :param str master_type: master type
108
- :param str master_id: master id
109
- :param str entity_type: entity type
110
- :param str entity_id: entity id
111
- :param str object_type: object type object/binary
112
- """
113
- self.master_type = master_type
114
- self.master_id = master_id
115
- self.entity_type = entity_type
116
- self.entity_id = entity_id
117
- self.object_type = object_type
118
-
119
- def get(self):
120
- """
121
- return the build key
122
- """
123
- return os.path.join(self.master_type, self.master_id, self.entity_type, self.entity_id, self.object_type)
124
-
125
- def get_key(self):
126
- """
127
- return the build key
128
- """
129
- return os.path.join(self.entity_type, self.entity_id, self.object_type)
130
-
131
-
132
- class CacheManger:
133
- def __init__(self, cache_configs: list, bin_cache_size=1000):
134
- """
135
- Cache manger for config and mange the cache
136
-
137
- :param cache_configs: CacheConfig object
138
- :param bin_cache_size: size on MB for binary cache
139
- """
140
- self.cache_levels = dict()
141
- self._max_level = 1
142
- self.bin_cache_size = bin_cache_size
143
- self.bin_cache_path = os.environ['DEFAULT_CACHE_PATH']
144
- self._current_bin_cache_size = 0
145
- for config in cache_configs:
146
- try:
147
- self.cache_levels[config.level] = self._load_cache_handler(config)
148
- if config.level < self._max_level:
149
- self._max_level = config.level
150
- except:
151
- raise "Failed to build Cache"
152
-
153
- self.parent_dict = {
154
- "annotations": 'items',
155
- "items": 'datasets',
156
- "datasets": 'projects',
157
- "projects": 'org',
158
- "org": '',
159
- "annotationtasks": 'datasets',
160
- "assignments": 'annotationtasks',
161
- "models": 'packages',
162
- "packages": 'projects',
163
- "services": 'packages',
164
- }
165
-
166
- def _load_cache_handler(self, config: CacheConfig):
167
- """
168
- the function the build the cache form the configs that get
169
- """
170
- from ..services import DataloopLogger
171
- cache = None
172
- if config.type == CacheType.REDIS.value:
173
- try:
174
- cache = RedisCache(options=config.options, ttl=config.ttl)
175
- except:
176
- logger.warning("Failed to build Redis")
177
- raise Exception("Failed to build Redis")
178
-
179
- elif config.type == CacheType.DISKCACHE.value:
180
- cache = DiskCache(name='object_cache', options=config.options, ttl=config.ttl)
181
- elif config.type == CacheType.FILESYSTEM.value:
182
- cache = FileSystemCache(options=config.options, ttl=config.ttl)
183
- DataloopLogger.clean_dataloop_cache(cache_path=cache.root_dir,
184
- max_param={'max_time': cache.ttl})
185
- DataloopLogger.clean_dataloop_cache(cache_path=self.bin_cache_path,
186
- max_param={'max_time': config.ttl})
187
- return cache
188
-
189
- def get(self, key: CacheKey):
190
- """
191
- Cache get
192
-
193
- :param CacheKey key: CacheKey object
194
- :return: success, list of the get result
195
- """
196
- res = []
197
- success = False
198
- for i in range(1, self._max_level + 1):
199
- res = self.cache_levels[i].get(key=key.get_key())
200
- if res:
201
- success = True
202
- break
203
- return success, res
204
-
205
- def ping(self):
206
- """
207
- Cache ping check if connection is working
208
- """
209
- try:
210
- for i in range(1, self._max_level + 1):
211
- self.cache_levels[i].ping()
212
- except Exception as e:
213
- raise Exception('cache connection failed ')
214
-
215
-
216
- def set(self, key: str, value):
217
- """
218
- Cache set, add or update the key value
219
-
220
- :param CacheKey key: CacheKey object
221
- :param value: value to set
222
- """
223
- if isinstance(value, dict):
224
- value = json.dumps(value)
225
- self.cache_levels[1].set(key, value)
226
-
227
- def _delete_parent(self, key: CacheKey, level):
228
- parent_key = CacheKey(master_type=self.parent_dict[key.entity_type],
229
- entity_type=key.entity_type,
230
- entity_id=key.entity_id,
231
- object_type='*')
232
- list_keys = self.cache_levels[level].list(pattern=parent_key.get())
233
- for k in list_keys:
234
- if 'binary' in k:
235
- val = self.cache_levels[level].get(key=k)
236
- if os.path.isfile(val):
237
- os.remove(val)
238
- self.cache_levels[level].delete(k)
239
-
240
- def delete(self, key: CacheKey):
241
- """
242
- Cache delete
243
-
244
- :param CacheKey key: CacheKey object
245
- """
246
- for i in range(1, self._max_level + 1):
247
- self.cache_levels[i].delete(key.get_key())
248
- self._delete_parent(key=key, level=i)
249
- key.object_type = '*'
250
- list_keys = self.cache_levels[i].list(pattern=key.get_key())
251
- for k in list_keys:
252
- val = self.cache_levels[i].get(key=k)
253
- self.cache_levels[i].delete(k)
254
- if 'binary' in k:
255
- if os.path.isfile(val):
256
- os.remove(val)
257
- continue
258
- e_type, e_id, e_obj = val.split('\\')
259
- self.delete(key=CacheKey(entity_type=e_type, entity_id=e_id, object_type=e_obj))
260
-
261
- def build_cache_key(self, entity_json: dict):
262
- """
263
- Build a format of the cache key from the entity json we get
264
-
265
- :param dict entity_json: json of an entity
266
- :return: CacheKey object
267
- """
268
- child_entity = False
269
- if 'url' in entity_json:
270
- split_url = entity_json['url'].split('/')
271
- entity_type = split_url[-2]
272
- child_entity = True
273
- elif 'org' in entity_json:
274
- entity_type = 'projects'
275
- else:
276
- entity_type = 'org'
277
- entity_id = entity_json['id']
278
- master_type = self.parent_dict[entity_type]
279
- master_id = '**'
280
- if child_entity:
281
- master_id_key = master_type[:-1] + 'Id'
282
- if master_id_key in entity_json:
283
- master_id = entity_json[master_id_key]
284
- elif master_type in entity_json:
285
- master_id = entity_json[master_type][0]
286
- elif entity_type == 'projects':
287
- master_id = entity_json[master_type]['id']
288
-
289
- return CacheKey(master_type=master_type, master_id=master_id, entity_type=entity_type, entity_id=entity_id)
290
-
291
- def _update_config_file(self, filepath: str, update: bool, size: float = 0):
292
- """
293
- Update the config file the have all the details about binary cache
294
-
295
- :param str filepath: path of the file the work on
296
- :param bool update: if True update the use of the file
297
- :param int size: file size
298
- """
299
- config_file_path = os.path.join(self.bin_cache_path, 'cacheConfig.json')
300
- if os.path.isfile(config_file_path):
301
- with FileLock(config_file_path + ".lock"):
302
- with open(config_file_path, mode="r", encoding="utf-8") as con:
303
- with mmap.mmap(con.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
304
- text = mmap_obj.read().decode('utf8').replace("'", '"')
305
- config_file = json.loads(text)
306
- else:
307
- config_file = {'size': 0, 'keys': []}
308
-
309
- if update and filepath in config_file['keys']:
310
- config_file['keys'].remove(filepath)
311
-
312
- if filepath not in config_file['keys']:
313
- config_file['keys'].append(filepath)
314
- config_file['size'] += size
315
- self._current_bin_cache_size = config_file['size']
316
- json_object = json.dumps(config_file, indent=4)
317
- with FileLock(config_file_path + ".lock"):
318
- with open(config_file_path, mode="w", encoding="utf-8") as outfile:
319
- outfile.write(json_object)
320
-
321
- def _lru_cache(self):
322
- """
323
- Make lru on the binary cache remove 30% of the files
324
- """
325
- config_file_path = os.path.join(self.bin_cache_path, 'cacheConfig.json')
326
- with FileLock(config_file_path + ".lock"):
327
- with open(config_file_path, mode="r", encoding="utf-8") as con:
328
- with mmap.mmap(con.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
329
- text = mmap_obj.read().decode('utf8').replace("'", '"')
330
- config_file = json.loads(text)
331
-
332
- size = config_file['size']
333
- end = 70 / 100 * self.bin_cache_size
334
-
335
- while size > end and len(config_file['keys']) > 1:
336
- to_delete = config_file['keys'][0]
337
-
338
- size -= (Path(to_delete).stat().st_size / 1000000)
339
- os.remove(to_delete)
340
- config_file['keys'].remove(to_delete)
341
-
342
- config_file['size'] = size
343
- json_object = json.dumps(config_file, indent=4)
344
-
345
- with FileLock(config_file_path + ".lock"):
346
- with open(config_file_path, "w") as outfile:
347
- outfile.write(json_object)
348
-
349
- def read_stream(self, request_path, dataset_id=None):
350
- """
351
- Cache binary get
352
-
353
- :param str request_path: the request
354
- :param str dataset_id: dataset id of the binary object
355
- :return: success, list of the get result
356
- """
357
- entity_id = request_path.split('/')[-2]
358
- key = CacheKey(master_type='datasets',
359
- master_id=dataset_id,
360
- entity_id=entity_id,
361
- entity_type='items',
362
- object_type=ObjectType.BINARY.value)
363
- hit, response = self.get(key=key)
364
- if hit:
365
- source_path = os.path.normpath(response[0])
366
- self._update_config_file(filepath=source_path, update=True)
367
- return hit, [source_path]
368
- else:
369
- return False, None
370
-
371
- def write_stream(self,
372
- request_path,
373
- response=None,
374
- buffer=None,
375
- file_name=None,
376
- entity_id=None,
377
- dataset_id=None
378
- ):
379
- """
380
- Cache binary set
381
-
382
- :param request_path: the request
383
- :param response: the response of stream
384
- :param buffer: the steam buffer
385
- :param file_name: the file name
386
- :param entity_id: entity id
387
- :param dataset_id: dataset id of the binary object
388
- :return: the file path of the binary
389
- """
390
- if entity_id is None:
391
- entity_id = request_path.split('/')[-2]
392
- key = CacheKey(master_type='datasets',
393
- master_id=dataset_id,
394
- entity_id=entity_id,
395
- entity_type='items',
396
- object_type=ObjectType.BINARY)
397
- filepath = self.bin_cache_path
398
- if file_name is None:
399
- file_name = (dict(response.headers)['Content-Disposition'].split('=')[1][2:-1])
400
- filepath = os.path.join(
401
- filepath,
402
- 'items',
403
- file_name
404
- )
405
- self.set(key=key.get(), value=filepath)
406
- if not os.path.isfile(filepath):
407
- os.makedirs(os.path.dirname(filepath), exist_ok=True)
408
- if buffer is None:
409
- try:
410
- temp_file_path = filepath + '.download'
411
- with open(temp_file_path, "wb") as f:
412
- for chunk in response.iter_content(chunk_size=8192):
413
- if chunk: # filter out keep-alive new chunks
414
- f.write(chunk)
415
- shutil.move(temp_file_path, filepath)
416
- except:
417
- if os.path.isfile(temp_file_path):
418
- os.remove(temp_file_path)
419
- return ''
420
- else:
421
- if os.path.isfile(buffer.name):
422
- shutil.copyfile(buffer.name, filepath)
423
- else:
424
- with open(filepath, "wb") as f:
425
- f.write(buffer.getbuffer())
426
- self._update_config_file(filepath=filepath, update=False, size=(Path(filepath).stat().st_size / 1000000))
427
- if (Path(filepath).stat().st_size / 1000000) + self._current_bin_cache_size > self.bin_cache_size:
428
- self._lru_cache()
429
- return filepath
430
-
431
- def read(self, request_path: str):
432
- """
433
- Cache entity get
434
-
435
- :param str request_path: the request
436
- :return: success, list of the get result
437
- """
438
- entity_id = request_path.split('/')[-1]
439
- entity_type = request_path.split('/')[-2]
440
- key = CacheKey(entity_id=entity_id, entity_type=entity_type)
441
- hit, response = self.get(key=key)
442
- if hit:
443
- return hit, response
444
- return False, None
445
-
446
- def write(self, list_entities_json):
447
- """
448
- Add or update the entity cache
449
-
450
- :param list list_entities_json: list of jsons of entities to set
451
- """
452
- for entity_json in list_entities_json:
453
- key = self.build_cache_key(entity_json)
454
- redis_key = key.get_key()
455
- self.set(key=redis_key, value=entity_json)
456
- self.set(key=key.get(), value=redis_key)
457
-
458
- def invalidate(self, path):
459
- """
460
- Delete from the caches
461
-
462
- :param str path: the request path
463
- """
464
- entity_id = path.split('/')[-1]
465
- entity_type = path.split('/')[-2]
466
- key = CacheKey(entity_id=entity_id, entity_type=entity_type)
467
- self.delete(key)
468
-
469
- def clear(self):
470
- self.cache_levels[1].clear()
471
-
472
- def keys(self):
473
- return [k for k in self.cache_levels[1].keys()]
1
+ import json
2
+ import os
3
+ import shutil
4
+ import time
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ import mmap
8
+ from filelock import FileLock
9
+ import logging
10
+ import base64
11
+
12
+ from .dl_cache import DiskCache
13
+ from .redis_cache import RedisCache
14
+ from .filesystem_cache import FileSystemCache
15
+
16
+ logger = logging.getLogger(name='dtlpy')
17
+
18
+
19
+ class ObjectType(str, Enum):
20
+ BINARY = "binary"
21
+ OBJECT = "object"
22
+
23
+
24
+ class CacheType(Enum):
25
+ DISKCACHE = 'diskcache'
26
+ REDIS = 'redis'
27
+ FILESYSTEM = 'filesystem'
28
+
29
+
30
+ class CacheConfig:
31
+ def __init__(self, cache_type=CacheType.DISKCACHE, ttl=1000, level=1, options=None):
32
+ """
33
+ Cache config settings
34
+
35
+ :param CacheType cache_type: CacheType diskcache, filesystem, redis
36
+ :param int ttl: time to hold the item in the cache in seconds (SEC)
37
+ :param int level: cache level
38
+ :param dict options: the configs for the caches types
39
+ """
40
+ if isinstance(cache_type, CacheType):
41
+ cache_type = cache_type.value
42
+ if isinstance(cache_type, str) and cache_type not in CacheType._value2member_map_:
43
+ raise ValueError('cache type must be redis or diskcache')
44
+
45
+ self.type = cache_type
46
+ self.ttl = ttl
47
+ self.level = level
48
+ self.options = options
49
+
50
+ def to_string(self):
51
+ """
52
+ convert object to base 64 string
53
+ """
54
+ base64_bytes = base64.b64encode(json.dumps(self.to_json()).encode("ascii"))
55
+ base64_string = base64_bytes.decode("ascii")
56
+ return base64_string
57
+
58
+ @staticmethod
59
+ def from_string(cls, base64_string):
60
+ """
61
+ convert from base 64 string to the class object
62
+
63
+ :param str base64_string: string in base64 the have a json configs
64
+ """
65
+ base64_bytes = base64_string.encode("ascii")
66
+ sample_string_bytes = base64.b64decode(base64_bytes)
67
+ _json = json.loads(sample_string_bytes.decode("ascii"))
68
+ return cls(cache_type=_json.get('type', CacheType.DISKCACHE),
69
+ ttl=_json.get('ttl', 1000),
70
+ level=_json.get('level', 1),
71
+ options=_json.get('options', None))
72
+
73
+ def to_json(self):
74
+ """
75
+ convert the class to json
76
+ """
77
+ return {
78
+ 'type': self.type,
79
+ 'ttl': self.ttl,
80
+ 'level': self.level,
81
+ 'options': self.options,
82
+ }
83
+
84
+ @staticmethod
85
+ def from_json(cls, _json):
86
+ """
87
+ make a class attribute from json
88
+
89
+ :param _json: _json have the class attributes
90
+ """
91
+ if isinstance(_json, str):
92
+ _json = json.loads(_json)
93
+ return cls(cache_type=_json.get('type', CacheType.DISKCACHE),
94
+ ttl=_json.get('ttl', 1000),
95
+ level=_json.get('level', 1),
96
+ options=_json.get('options', None))
97
+
98
+
99
+ class CacheKey:
100
+ def __init__(self,
101
+ master_type='**',
102
+ master_id='**',
103
+ entity_type='**',
104
+ entity_id='*',
105
+ object_type=ObjectType.OBJECT):
106
+ """
107
+ :param str master_type: master type
108
+ :param str master_id: master id
109
+ :param str entity_type: entity type
110
+ :param str entity_id: entity id
111
+ :param str object_type: object type object/binary
112
+ """
113
+ self.master_type = master_type
114
+ self.master_id = master_id
115
+ self.entity_type = entity_type
116
+ self.entity_id = entity_id
117
+ self.object_type = object_type
118
+
119
+ def get(self):
120
+ """
121
+ return the build key
122
+ """
123
+ return os.path.join(self.master_type, self.master_id, self.entity_type, self.entity_id, self.object_type)
124
+
125
+ def get_key(self):
126
+ """
127
+ return the build key
128
+ """
129
+ return os.path.join(self.entity_type, self.entity_id, self.object_type)
130
+
131
+
132
+ class CacheManger:
133
+ def __init__(self, cache_configs: list, bin_cache_size=1000):
134
+ """
135
+ Cache manger for config and mange the cache
136
+
137
+ :param cache_configs: CacheConfig object
138
+ :param bin_cache_size: size on MB for binary cache
139
+ """
140
+ self.cache_levels = dict()
141
+ self._max_level = 1
142
+ self.bin_cache_size = bin_cache_size
143
+ self.bin_cache_path = os.environ['DEFAULT_CACHE_PATH']
144
+ self._current_bin_cache_size = 0
145
+ for config in cache_configs:
146
+ try:
147
+ self.cache_levels[config.level] = self._load_cache_handler(config)
148
+ if config.level < self._max_level:
149
+ self._max_level = config.level
150
+ except:
151
+ raise "Failed to build Cache"
152
+
153
+ self.parent_dict = {
154
+ "annotations": 'items',
155
+ "items": 'datasets',
156
+ "datasets": 'projects',
157
+ "projects": 'org',
158
+ "org": '',
159
+ "annotationtasks": 'datasets',
160
+ "assignments": 'annotationtasks',
161
+ "models": 'packages',
162
+ "packages": 'projects',
163
+ "services": 'packages',
164
+ }
165
+
166
+ def _load_cache_handler(self, config: CacheConfig):
167
+ """
168
+ the function the build the cache form the configs that get
169
+ """
170
+ from ..services import DataloopLogger
171
+ cache = None
172
+ if config.type == CacheType.REDIS.value:
173
+ try:
174
+ cache = RedisCache(options=config.options, ttl=config.ttl)
175
+ except:
176
+ logger.warning("Failed to build Redis")
177
+ raise Exception("Failed to build Redis")
178
+
179
+ elif config.type == CacheType.DISKCACHE.value:
180
+ cache = DiskCache(name='object_cache', options=config.options, ttl=config.ttl)
181
+ elif config.type == CacheType.FILESYSTEM.value:
182
+ cache = FileSystemCache(options=config.options, ttl=config.ttl)
183
+ DataloopLogger.clean_dataloop_cache(cache_path=cache.root_dir,
184
+ max_param={'max_time': cache.ttl})
185
+ DataloopLogger.clean_dataloop_cache(cache_path=self.bin_cache_path,
186
+ max_param={'max_time': config.ttl})
187
+ return cache
188
+
189
+ def get(self, key: CacheKey):
190
+ """
191
+ Cache get
192
+
193
+ :param CacheKey key: CacheKey object
194
+ :return: success, list of the get result
195
+ """
196
+ res = []
197
+ success = False
198
+ for i in range(1, self._max_level + 1):
199
+ res = self.cache_levels[i].get(key=key.get_key())
200
+ if res:
201
+ success = True
202
+ break
203
+ return success, res
204
+
205
+ def ping(self):
206
+ """
207
+ Cache ping check if connection is working
208
+ """
209
+ try:
210
+ for i in range(1, self._max_level + 1):
211
+ self.cache_levels[i].ping()
212
+ except Exception as e:
213
+ raise Exception('cache connection failed ')
214
+
215
+
216
+ def set(self, key: str, value):
217
+ """
218
+ Cache set, add or update the key value
219
+
220
+ :param CacheKey key: CacheKey object
221
+ :param value: value to set
222
+ """
223
+ if isinstance(value, dict):
224
+ value = json.dumps(value)
225
+ self.cache_levels[1].set(key, value)
226
+
227
+ def _delete_parent(self, key: CacheKey, level):
228
+ parent_key = CacheKey(master_type=self.parent_dict[key.entity_type],
229
+ entity_type=key.entity_type,
230
+ entity_id=key.entity_id,
231
+ object_type='*')
232
+ list_keys = self.cache_levels[level].list(pattern=parent_key.get())
233
+ for k in list_keys:
234
+ if 'binary' in k:
235
+ val = self.cache_levels[level].get(key=k)
236
+ if os.path.isfile(val):
237
+ os.remove(val)
238
+ self.cache_levels[level].delete(k)
239
+
240
+ def delete(self, key: CacheKey):
241
+ """
242
+ Cache delete
243
+
244
+ :param CacheKey key: CacheKey object
245
+ """
246
+ for i in range(1, self._max_level + 1):
247
+ self.cache_levels[i].delete(key.get_key())
248
+ self._delete_parent(key=key, level=i)
249
+ key.object_type = '*'
250
+ list_keys = self.cache_levels[i].list(pattern=key.get_key())
251
+ for k in list_keys:
252
+ val = self.cache_levels[i].get(key=k)
253
+ self.cache_levels[i].delete(k)
254
+ if 'binary' in k:
255
+ if os.path.isfile(val):
256
+ os.remove(val)
257
+ continue
258
+ e_type, e_id, e_obj = val.split('\\')
259
+ self.delete(key=CacheKey(entity_type=e_type, entity_id=e_id, object_type=e_obj))
260
+
261
+ def build_cache_key(self, entity_json: dict):
262
+ """
263
+ Build a format of the cache key from the entity json we get
264
+
265
+ :param dict entity_json: json of an entity
266
+ :return: CacheKey object
267
+ """
268
+ child_entity = False
269
+ if 'url' in entity_json:
270
+ split_url = entity_json['url'].split('/')
271
+ entity_type = split_url[-2]
272
+ child_entity = True
273
+ elif 'org' in entity_json:
274
+ entity_type = 'projects'
275
+ else:
276
+ entity_type = 'org'
277
+ entity_id = entity_json['id']
278
+ master_type = self.parent_dict[entity_type]
279
+ master_id = '**'
280
+ if child_entity:
281
+ master_id_key = master_type[:-1] + 'Id'
282
+ if master_id_key in entity_json:
283
+ master_id = entity_json[master_id_key]
284
+ elif master_type in entity_json:
285
+ master_id = entity_json[master_type][0]
286
+ elif entity_type == 'projects':
287
+ master_id = entity_json[master_type]['id']
288
+
289
+ return CacheKey(master_type=master_type, master_id=master_id, entity_type=entity_type, entity_id=entity_id)
290
+
291
+ def _update_config_file(self, filepath: str, update: bool, size: float = 0):
292
+ """
293
+ Update the config file the have all the details about binary cache
294
+
295
+ :param str filepath: path of the file the work on
296
+ :param bool update: if True update the use of the file
297
+ :param int size: file size
298
+ """
299
+ config_file_path = os.path.join(self.bin_cache_path, 'cacheConfig.json')
300
+ if os.path.isfile(config_file_path):
301
+ with FileLock(config_file_path + ".lock"):
302
+ with open(config_file_path, mode="r", encoding="utf-8") as con:
303
+ with mmap.mmap(con.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
304
+ text = mmap_obj.read().decode('utf8').replace("'", '"')
305
+ config_file = json.loads(text)
306
+ else:
307
+ config_file = {'size': 0, 'keys': []}
308
+
309
+ if update and filepath in config_file['keys']:
310
+ config_file['keys'].remove(filepath)
311
+
312
+ if filepath not in config_file['keys']:
313
+ config_file['keys'].append(filepath)
314
+ config_file['size'] += size
315
+ self._current_bin_cache_size = config_file['size']
316
+ json_object = json.dumps(config_file, indent=4)
317
+ with FileLock(config_file_path + ".lock"):
318
+ with open(config_file_path, mode="w", encoding="utf-8") as outfile:
319
+ outfile.write(json_object)
320
+
321
+ def _lru_cache(self):
322
+ """
323
+ Make lru on the binary cache remove 30% of the files
324
+ """
325
+ config_file_path = os.path.join(self.bin_cache_path, 'cacheConfig.json')
326
+ with FileLock(config_file_path + ".lock"):
327
+ with open(config_file_path, mode="r", encoding="utf-8") as con:
328
+ with mmap.mmap(con.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
329
+ text = mmap_obj.read().decode('utf8').replace("'", '"')
330
+ config_file = json.loads(text)
331
+
332
+ size = config_file['size']
333
+ end = 70 / 100 * self.bin_cache_size
334
+
335
+ while size > end and len(config_file['keys']) > 1:
336
+ to_delete = config_file['keys'][0]
337
+
338
+ size -= (Path(to_delete).stat().st_size / 1000000)
339
+ os.remove(to_delete)
340
+ config_file['keys'].remove(to_delete)
341
+
342
+ config_file['size'] = size
343
+ json_object = json.dumps(config_file, indent=4)
344
+
345
+ with FileLock(config_file_path + ".lock"):
346
+ with open(config_file_path, "w") as outfile:
347
+ outfile.write(json_object)
348
+
349
+ def read_stream(self, request_path, dataset_id=None):
350
+ """
351
+ Cache binary get
352
+
353
+ :param str request_path: the request
354
+ :param str dataset_id: dataset id of the binary object
355
+ :return: success, list of the get result
356
+ """
357
+ entity_id = request_path.split('/')[-2]
358
+ key = CacheKey(master_type='datasets',
359
+ master_id=dataset_id,
360
+ entity_id=entity_id,
361
+ entity_type='items',
362
+ object_type=ObjectType.BINARY.value)
363
+ hit, response = self.get(key=key)
364
+ if hit:
365
+ source_path = os.path.normpath(response[0])
366
+ self._update_config_file(filepath=source_path, update=True)
367
+ return hit, [source_path]
368
+ else:
369
+ return False, None
370
+
371
+ def write_stream(self,
372
+ request_path,
373
+ response=None,
374
+ buffer=None,
375
+ file_name=None,
376
+ entity_id=None,
377
+ dataset_id=None
378
+ ):
379
+ """
380
+ Cache binary set
381
+
382
+ :param request_path: the request
383
+ :param response: the response of stream
384
+ :param buffer: the steam buffer
385
+ :param file_name: the file name
386
+ :param entity_id: entity id
387
+ :param dataset_id: dataset id of the binary object
388
+ :return: the file path of the binary
389
+ """
390
+ if entity_id is None:
391
+ entity_id = request_path.split('/')[-2]
392
+ key = CacheKey(master_type='datasets',
393
+ master_id=dataset_id,
394
+ entity_id=entity_id,
395
+ entity_type='items',
396
+ object_type=ObjectType.BINARY)
397
+ filepath = self.bin_cache_path
398
+ if file_name is None:
399
+ file_name = (dict(response.headers)['Content-Disposition'].split('=')[1][2:-1])
400
+ filepath = os.path.join(
401
+ filepath,
402
+ 'items',
403
+ file_name
404
+ )
405
+ self.set(key=key.get(), value=filepath)
406
+ if not os.path.isfile(filepath):
407
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
408
+ if buffer is None:
409
+ try:
410
+ temp_file_path = filepath + '.download'
411
+ with open(temp_file_path, "wb") as f:
412
+ for chunk in response.iter_content(chunk_size=8192):
413
+ if chunk: # filter out keep-alive new chunks
414
+ f.write(chunk)
415
+ shutil.move(temp_file_path, filepath)
416
+ except:
417
+ if os.path.isfile(temp_file_path):
418
+ os.remove(temp_file_path)
419
+ return ''
420
+ else:
421
+ if os.path.isfile(buffer.name):
422
+ shutil.copyfile(buffer.name, filepath)
423
+ else:
424
+ with open(filepath, "wb") as f:
425
+ f.write(buffer.getbuffer())
426
+ self._update_config_file(filepath=filepath, update=False, size=(Path(filepath).stat().st_size / 1000000))
427
+ if (Path(filepath).stat().st_size / 1000000) + self._current_bin_cache_size > self.bin_cache_size:
428
+ self._lru_cache()
429
+ return filepath
430
+
431
+ def read(self, request_path: str):
432
+ """
433
+ Cache entity get
434
+
435
+ :param str request_path: the request
436
+ :return: success, list of the get result
437
+ """
438
+ entity_id = request_path.split('/')[-1]
439
+ entity_type = request_path.split('/')[-2]
440
+ key = CacheKey(entity_id=entity_id, entity_type=entity_type)
441
+ hit, response = self.get(key=key)
442
+ if hit:
443
+ return hit, response
444
+ return False, None
445
+
446
+ def write(self, list_entities_json):
447
+ """
448
+ Add or update the entity cache
449
+
450
+ :param list list_entities_json: list of jsons of entities to set
451
+ """
452
+ for entity_json in list_entities_json:
453
+ key = self.build_cache_key(entity_json)
454
+ redis_key = key.get_key()
455
+ self.set(key=redis_key, value=entity_json)
456
+ self.set(key=key.get(), value=redis_key)
457
+
458
+ def invalidate(self, path):
459
+ """
460
+ Delete from the caches
461
+
462
+ :param str path: the request path
463
+ """
464
+ entity_id = path.split('/')[-1]
465
+ entity_type = path.split('/')[-2]
466
+ key = CacheKey(entity_id=entity_id, entity_type=entity_type)
467
+ self.delete(key)
468
+
469
+ def clear(self):
470
+ self.cache_levels[1].clear()
471
+
472
+ def keys(self):
473
+ return [k for k in self.cache_levels[1].keys()]