dtlpy 1.113.10__py3-none-any.whl → 1.114.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. dtlpy/__init__.py +488 -488
  2. dtlpy/__version__.py +1 -1
  3. dtlpy/assets/__init__.py +26 -26
  4. dtlpy/assets/__pycache__/__init__.cpython-38.pyc +0 -0
  5. dtlpy/assets/code_server/config.yaml +2 -2
  6. dtlpy/assets/code_server/installation.sh +24 -24
  7. dtlpy/assets/code_server/launch.json +13 -13
  8. dtlpy/assets/code_server/settings.json +2 -2
  9. dtlpy/assets/main.py +53 -53
  10. dtlpy/assets/main_partial.py +18 -18
  11. dtlpy/assets/mock.json +11 -11
  12. dtlpy/assets/model_adapter.py +83 -83
  13. dtlpy/assets/package.json +61 -61
  14. dtlpy/assets/package_catalog.json +29 -29
  15. dtlpy/assets/package_gitignore +307 -307
  16. dtlpy/assets/service_runners/__init__.py +33 -33
  17. dtlpy/assets/service_runners/converter.py +96 -96
  18. dtlpy/assets/service_runners/multi_method.py +49 -49
  19. dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
  20. dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
  21. dtlpy/assets/service_runners/multi_method_item.py +52 -52
  22. dtlpy/assets/service_runners/multi_method_json.py +52 -52
  23. dtlpy/assets/service_runners/single_method.py +37 -37
  24. dtlpy/assets/service_runners/single_method_annotation.py +43 -43
  25. dtlpy/assets/service_runners/single_method_dataset.py +43 -43
  26. dtlpy/assets/service_runners/single_method_item.py +41 -41
  27. dtlpy/assets/service_runners/single_method_json.py +42 -42
  28. dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
  29. dtlpy/assets/voc_annotation_template.xml +23 -23
  30. dtlpy/caches/base_cache.py +32 -32
  31. dtlpy/caches/cache.py +473 -473
  32. dtlpy/caches/dl_cache.py +201 -201
  33. dtlpy/caches/filesystem_cache.py +89 -89
  34. dtlpy/caches/redis_cache.py +84 -84
  35. dtlpy/dlp/__init__.py +20 -20
  36. dtlpy/dlp/cli_utilities.py +367 -367
  37. dtlpy/dlp/command_executor.py +764 -764
  38. dtlpy/dlp/dlp +1 -1
  39. dtlpy/dlp/dlp.bat +1 -1
  40. dtlpy/dlp/dlp.py +128 -128
  41. dtlpy/dlp/parser.py +651 -651
  42. dtlpy/entities/__init__.py +83 -83
  43. dtlpy/entities/analytic.py +311 -311
  44. dtlpy/entities/annotation.py +1879 -1879
  45. dtlpy/entities/annotation_collection.py +699 -699
  46. dtlpy/entities/annotation_definitions/__init__.py +20 -20
  47. dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
  48. dtlpy/entities/annotation_definitions/box.py +195 -195
  49. dtlpy/entities/annotation_definitions/classification.py +67 -67
  50. dtlpy/entities/annotation_definitions/comparison.py +72 -72
  51. dtlpy/entities/annotation_definitions/cube.py +204 -204
  52. dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
  53. dtlpy/entities/annotation_definitions/description.py +32 -32
  54. dtlpy/entities/annotation_definitions/ellipse.py +124 -124
  55. dtlpy/entities/annotation_definitions/free_text.py +62 -62
  56. dtlpy/entities/annotation_definitions/gis.py +69 -69
  57. dtlpy/entities/annotation_definitions/note.py +139 -139
  58. dtlpy/entities/annotation_definitions/point.py +117 -117
  59. dtlpy/entities/annotation_definitions/polygon.py +182 -182
  60. dtlpy/entities/annotation_definitions/polyline.py +111 -111
  61. dtlpy/entities/annotation_definitions/pose.py +92 -92
  62. dtlpy/entities/annotation_definitions/ref_image.py +86 -86
  63. dtlpy/entities/annotation_definitions/segmentation.py +240 -240
  64. dtlpy/entities/annotation_definitions/subtitle.py +34 -34
  65. dtlpy/entities/annotation_definitions/text.py +85 -85
  66. dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
  67. dtlpy/entities/app.py +220 -220
  68. dtlpy/entities/app_module.py +107 -107
  69. dtlpy/entities/artifact.py +174 -174
  70. dtlpy/entities/assignment.py +399 -399
  71. dtlpy/entities/base_entity.py +214 -214
  72. dtlpy/entities/bot.py +113 -113
  73. dtlpy/entities/codebase.py +296 -296
  74. dtlpy/entities/collection.py +38 -38
  75. dtlpy/entities/command.py +169 -169
  76. dtlpy/entities/compute.py +442 -442
  77. dtlpy/entities/dataset.py +1285 -1285
  78. dtlpy/entities/directory_tree.py +44 -44
  79. dtlpy/entities/dpk.py +470 -470
  80. dtlpy/entities/driver.py +222 -222
  81. dtlpy/entities/execution.py +397 -397
  82. dtlpy/entities/feature.py +124 -124
  83. dtlpy/entities/feature_set.py +145 -145
  84. dtlpy/entities/filters.py +641 -641
  85. dtlpy/entities/gis_item.py +107 -107
  86. dtlpy/entities/integration.py +184 -184
  87. dtlpy/entities/item.py +953 -953
  88. dtlpy/entities/label.py +123 -123
  89. dtlpy/entities/links.py +85 -85
  90. dtlpy/entities/message.py +175 -175
  91. dtlpy/entities/model.py +694 -691
  92. dtlpy/entities/node.py +1005 -1005
  93. dtlpy/entities/ontology.py +803 -803
  94. dtlpy/entities/organization.py +287 -287
  95. dtlpy/entities/package.py +657 -657
  96. dtlpy/entities/package_defaults.py +5 -5
  97. dtlpy/entities/package_function.py +185 -185
  98. dtlpy/entities/package_module.py +113 -113
  99. dtlpy/entities/package_slot.py +118 -118
  100. dtlpy/entities/paged_entities.py +290 -267
  101. dtlpy/entities/pipeline.py +593 -593
  102. dtlpy/entities/pipeline_execution.py +279 -279
  103. dtlpy/entities/project.py +394 -394
  104. dtlpy/entities/prompt_item.py +499 -499
  105. dtlpy/entities/recipe.py +301 -301
  106. dtlpy/entities/reflect_dict.py +102 -102
  107. dtlpy/entities/resource_execution.py +138 -138
  108. dtlpy/entities/service.py +958 -958
  109. dtlpy/entities/service_driver.py +117 -117
  110. dtlpy/entities/setting.py +294 -294
  111. dtlpy/entities/task.py +491 -491
  112. dtlpy/entities/time_series.py +143 -143
  113. dtlpy/entities/trigger.py +426 -426
  114. dtlpy/entities/user.py +118 -118
  115. dtlpy/entities/webhook.py +124 -124
  116. dtlpy/examples/__init__.py +19 -19
  117. dtlpy/examples/add_labels.py +135 -135
  118. dtlpy/examples/add_metadata_to_item.py +21 -21
  119. dtlpy/examples/annotate_items_using_model.py +65 -65
  120. dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
  121. dtlpy/examples/annotations_convert_to_voc.py +9 -9
  122. dtlpy/examples/annotations_convert_to_yolo.py +9 -9
  123. dtlpy/examples/convert_annotation_types.py +51 -51
  124. dtlpy/examples/converter.py +143 -143
  125. dtlpy/examples/copy_annotations.py +22 -22
  126. dtlpy/examples/copy_folder.py +31 -31
  127. dtlpy/examples/create_annotations.py +51 -51
  128. dtlpy/examples/create_video_annotations.py +83 -83
  129. dtlpy/examples/delete_annotations.py +26 -26
  130. dtlpy/examples/filters.py +113 -113
  131. dtlpy/examples/move_item.py +23 -23
  132. dtlpy/examples/play_video_annotation.py +13 -13
  133. dtlpy/examples/show_item_and_mask.py +53 -53
  134. dtlpy/examples/triggers.py +49 -49
  135. dtlpy/examples/upload_batch_of_items.py +20 -20
  136. dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
  137. dtlpy/examples/upload_items_with_modalities.py +43 -43
  138. dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
  139. dtlpy/examples/upload_yolo_format_annotations.py +70 -70
  140. dtlpy/exceptions.py +125 -125
  141. dtlpy/miscellaneous/__init__.py +20 -20
  142. dtlpy/miscellaneous/dict_differ.py +95 -95
  143. dtlpy/miscellaneous/git_utils.py +217 -217
  144. dtlpy/miscellaneous/json_utils.py +14 -14
  145. dtlpy/miscellaneous/list_print.py +105 -105
  146. dtlpy/miscellaneous/zipping.py +130 -130
  147. dtlpy/ml/__init__.py +20 -20
  148. dtlpy/ml/base_feature_extractor_adapter.py +27 -27
  149. dtlpy/ml/base_model_adapter.py +945 -940
  150. dtlpy/ml/metrics.py +461 -461
  151. dtlpy/ml/predictions_utils.py +274 -274
  152. dtlpy/ml/summary_writer.py +57 -57
  153. dtlpy/ml/train_utils.py +60 -60
  154. dtlpy/new_instance.py +252 -252
  155. dtlpy/repositories/__init__.py +56 -56
  156. dtlpy/repositories/analytics.py +85 -85
  157. dtlpy/repositories/annotations.py +916 -916
  158. dtlpy/repositories/apps.py +383 -383
  159. dtlpy/repositories/artifacts.py +452 -452
  160. dtlpy/repositories/assignments.py +599 -599
  161. dtlpy/repositories/bots.py +213 -213
  162. dtlpy/repositories/codebases.py +559 -559
  163. dtlpy/repositories/collections.py +332 -348
  164. dtlpy/repositories/commands.py +158 -158
  165. dtlpy/repositories/compositions.py +61 -61
  166. dtlpy/repositories/computes.py +434 -406
  167. dtlpy/repositories/datasets.py +1291 -1291
  168. dtlpy/repositories/downloader.py +895 -895
  169. dtlpy/repositories/dpks.py +433 -433
  170. dtlpy/repositories/drivers.py +266 -266
  171. dtlpy/repositories/executions.py +817 -817
  172. dtlpy/repositories/feature_sets.py +226 -226
  173. dtlpy/repositories/features.py +238 -238
  174. dtlpy/repositories/integrations.py +484 -484
  175. dtlpy/repositories/items.py +909 -915
  176. dtlpy/repositories/messages.py +94 -94
  177. dtlpy/repositories/models.py +877 -867
  178. dtlpy/repositories/nodes.py +80 -80
  179. dtlpy/repositories/ontologies.py +511 -511
  180. dtlpy/repositories/organizations.py +525 -525
  181. dtlpy/repositories/packages.py +1941 -1941
  182. dtlpy/repositories/pipeline_executions.py +448 -448
  183. dtlpy/repositories/pipelines.py +642 -642
  184. dtlpy/repositories/projects.py +539 -539
  185. dtlpy/repositories/recipes.py +399 -399
  186. dtlpy/repositories/resource_executions.py +137 -137
  187. dtlpy/repositories/schema.py +120 -120
  188. dtlpy/repositories/service_drivers.py +213 -213
  189. dtlpy/repositories/services.py +1704 -1704
  190. dtlpy/repositories/settings.py +339 -339
  191. dtlpy/repositories/tasks.py +1124 -1124
  192. dtlpy/repositories/times_series.py +278 -278
  193. dtlpy/repositories/triggers.py +536 -536
  194. dtlpy/repositories/upload_element.py +257 -257
  195. dtlpy/repositories/uploader.py +651 -651
  196. dtlpy/repositories/webhooks.py +249 -249
  197. dtlpy/services/__init__.py +22 -22
  198. dtlpy/services/aihttp_retry.py +131 -131
  199. dtlpy/services/api_client.py +1782 -1782
  200. dtlpy/services/api_reference.py +40 -40
  201. dtlpy/services/async_utils.py +133 -133
  202. dtlpy/services/calls_counter.py +44 -44
  203. dtlpy/services/check_sdk.py +68 -68
  204. dtlpy/services/cookie.py +115 -115
  205. dtlpy/services/create_logger.py +156 -156
  206. dtlpy/services/events.py +84 -84
  207. dtlpy/services/logins.py +235 -235
  208. dtlpy/services/reporter.py +256 -256
  209. dtlpy/services/service_defaults.py +91 -91
  210. dtlpy/utilities/__init__.py +20 -20
  211. dtlpy/utilities/annotations/__init__.py +16 -16
  212. dtlpy/utilities/annotations/annotation_converters.py +269 -269
  213. dtlpy/utilities/base_package_runner.py +264 -264
  214. dtlpy/utilities/converter.py +1650 -1650
  215. dtlpy/utilities/dataset_generators/__init__.py +1 -1
  216. dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
  217. dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
  218. dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
  219. dtlpy/utilities/local_development/__init__.py +1 -1
  220. dtlpy/utilities/local_development/local_session.py +179 -179
  221. dtlpy/utilities/reports/__init__.py +2 -2
  222. dtlpy/utilities/reports/figures.py +343 -343
  223. dtlpy/utilities/reports/report.py +71 -71
  224. dtlpy/utilities/videos/__init__.py +17 -17
  225. dtlpy/utilities/videos/video_player.py +598 -598
  226. dtlpy/utilities/videos/videos.py +470 -470
  227. {dtlpy-1.113.10.data → dtlpy-1.114.13.data}/scripts/dlp +1 -1
  228. dtlpy-1.114.13.data/scripts/dlp.bat +2 -0
  229. {dtlpy-1.113.10.data → dtlpy-1.114.13.data}/scripts/dlp.py +128 -128
  230. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/LICENSE +200 -200
  231. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/METADATA +172 -172
  232. dtlpy-1.114.13.dist-info/RECORD +240 -0
  233. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/WHEEL +1 -1
  234. tests/features/environment.py +551 -550
  235. dtlpy-1.113.10.data/scripts/dlp.bat +0 -2
  236. dtlpy-1.113.10.dist-info/RECORD +0 -244
  237. tests/assets/__init__.py +0 -0
  238. tests/assets/models_flow/__init__.py +0 -0
  239. tests/assets/models_flow/failedmain.py +0 -52
  240. tests/assets/models_flow/main.py +0 -62
  241. tests/assets/models_flow/main_model.py +0 -54
  242. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/entry_points.txt +0 -0
  243. {dtlpy-1.113.10.dist-info → dtlpy-1.114.13.dist-info}/top_level.txt +0 -0
dtlpy/caches/cache.py CHANGED
@@ -1,473 +1,473 @@
1
- import json
2
- import os
3
- import shutil
4
- import time
5
- from enum import Enum
6
- from pathlib import Path
7
- import mmap
8
- from filelock import FileLock
9
- import logging
10
- import base64
11
-
12
- from .dl_cache import DiskCache
13
- from .redis_cache import RedisCache
14
- from .filesystem_cache import FileSystemCache
15
-
16
- logger = logging.getLogger(name='dtlpy')
17
-
18
-
19
- class ObjectType(str, Enum):
20
- BINARY = "binary"
21
- OBJECT = "object"
22
-
23
-
24
- class CacheType(Enum):
25
- DISKCACHE = 'diskcache'
26
- REDIS = 'redis'
27
- FILESYSTEM = 'filesystem'
28
-
29
-
30
- class CacheConfig:
31
- def __init__(self, cache_type=CacheType.DISKCACHE, ttl=1000, level=1, options=None):
32
- """
33
- Cache config settings
34
-
35
- :param CacheType cache_type: CacheType diskcache, filesystem, redis
36
- :param int ttl: time to hold the item in the cache in seconds (SEC)
37
- :param int level: cache level
38
- :param dict options: the configs for the caches types
39
- """
40
- if isinstance(cache_type, CacheType):
41
- cache_type = cache_type.value
42
- if isinstance(cache_type, str) and cache_type not in CacheType._value2member_map_:
43
- raise ValueError('cache type must be redis or diskcache')
44
-
45
- self.type = cache_type
46
- self.ttl = ttl
47
- self.level = level
48
- self.options = options
49
-
50
- def to_string(self):
51
- """
52
- convert object to base 64 string
53
- """
54
- base64_bytes = base64.b64encode(json.dumps(self.to_json()).encode("ascii"))
55
- base64_string = base64_bytes.decode("ascii")
56
- return base64_string
57
-
58
- @staticmethod
59
- def from_string(cls, base64_string):
60
- """
61
- convert from base 64 string to the class object
62
-
63
- :param str base64_string: string in base64 the have a json configs
64
- """
65
- base64_bytes = base64_string.encode("ascii")
66
- sample_string_bytes = base64.b64decode(base64_bytes)
67
- _json = json.loads(sample_string_bytes.decode("ascii"))
68
- return cls(cache_type=_json.get('type', CacheType.DISKCACHE),
69
- ttl=_json.get('ttl', 1000),
70
- level=_json.get('level', 1),
71
- options=_json.get('options', None))
72
-
73
- def to_json(self):
74
- """
75
- convert the class to json
76
- """
77
- return {
78
- 'type': self.type,
79
- 'ttl': self.ttl,
80
- 'level': self.level,
81
- 'options': self.options,
82
- }
83
-
84
- @staticmethod
85
- def from_json(cls, _json):
86
- """
87
- make a class attribute from json
88
-
89
- :param _json: _json have the class attributes
90
- """
91
- if isinstance(_json, str):
92
- _json = json.loads(_json)
93
- return cls(cache_type=_json.get('type', CacheType.DISKCACHE),
94
- ttl=_json.get('ttl', 1000),
95
- level=_json.get('level', 1),
96
- options=_json.get('options', None))
97
-
98
-
99
- class CacheKey:
100
- def __init__(self,
101
- master_type='**',
102
- master_id='**',
103
- entity_type='**',
104
- entity_id='*',
105
- object_type=ObjectType.OBJECT):
106
- """
107
- :param str master_type: master type
108
- :param str master_id: master id
109
- :param str entity_type: entity type
110
- :param str entity_id: entity id
111
- :param str object_type: object type object/binary
112
- """
113
- self.master_type = master_type
114
- self.master_id = master_id
115
- self.entity_type = entity_type
116
- self.entity_id = entity_id
117
- self.object_type = object_type
118
-
119
- def get(self):
120
- """
121
- return the build key
122
- """
123
- return os.path.join(self.master_type, self.master_id, self.entity_type, self.entity_id, self.object_type)
124
-
125
- def get_key(self):
126
- """
127
- return the build key
128
- """
129
- return os.path.join(self.entity_type, self.entity_id, self.object_type)
130
-
131
-
132
- class CacheManger:
133
- def __init__(self, cache_configs: list, bin_cache_size=1000):
134
- """
135
- Cache manger for config and mange the cache
136
-
137
- :param cache_configs: CacheConfig object
138
- :param bin_cache_size: size on MB for binary cache
139
- """
140
- self.cache_levels = dict()
141
- self._max_level = 1
142
- self.bin_cache_size = bin_cache_size
143
- self.bin_cache_path = os.environ['DEFAULT_CACHE_PATH']
144
- self._current_bin_cache_size = 0
145
- for config in cache_configs:
146
- try:
147
- self.cache_levels[config.level] = self._load_cache_handler(config)
148
- if config.level < self._max_level:
149
- self._max_level = config.level
150
- except:
151
- raise "Failed to build Cache"
152
-
153
- self.parent_dict = {
154
- "annotations": 'items',
155
- "items": 'datasets',
156
- "datasets": 'projects',
157
- "projects": 'org',
158
- "org": '',
159
- "annotationtasks": 'datasets',
160
- "assignments": 'annotationtasks',
161
- "models": 'packages',
162
- "packages": 'projects',
163
- "services": 'packages',
164
- }
165
-
166
- def _load_cache_handler(self, config: CacheConfig):
167
- """
168
- the function the build the cache form the configs that get
169
- """
170
- from ..services import DataloopLogger
171
- cache = None
172
- if config.type == CacheType.REDIS.value:
173
- try:
174
- cache = RedisCache(options=config.options, ttl=config.ttl)
175
- except:
176
- logger.warning("Failed to build Redis")
177
- raise Exception("Failed to build Redis")
178
-
179
- elif config.type == CacheType.DISKCACHE.value:
180
- cache = DiskCache(name='object_cache', options=config.options, ttl=config.ttl)
181
- elif config.type == CacheType.FILESYSTEM.value:
182
- cache = FileSystemCache(options=config.options, ttl=config.ttl)
183
- DataloopLogger.clean_dataloop_cache(cache_path=cache.root_dir,
184
- max_param={'max_time': cache.ttl})
185
- DataloopLogger.clean_dataloop_cache(cache_path=self.bin_cache_path,
186
- max_param={'max_time': config.ttl})
187
- return cache
188
-
189
- def get(self, key: CacheKey):
190
- """
191
- Cache get
192
-
193
- :param CacheKey key: CacheKey object
194
- :return: success, list of the get result
195
- """
196
- res = []
197
- success = False
198
- for i in range(1, self._max_level + 1):
199
- res = self.cache_levels[i].get(key=key.get_key())
200
- if res:
201
- success = True
202
- break
203
- return success, res
204
-
205
- def ping(self):
206
- """
207
- Cache ping check if connection is working
208
- """
209
- try:
210
- for i in range(1, self._max_level + 1):
211
- self.cache_levels[i].ping()
212
- except Exception as e:
213
- raise Exception('cache connection failed ')
214
-
215
-
216
- def set(self, key: str, value):
217
- """
218
- Cache set, add or update the key value
219
-
220
- :param CacheKey key: CacheKey object
221
- :param value: value to set
222
- """
223
- if isinstance(value, dict):
224
- value = json.dumps(value)
225
- self.cache_levels[1].set(key, value)
226
-
227
- def _delete_parent(self, key: CacheKey, level):
228
- parent_key = CacheKey(master_type=self.parent_dict[key.entity_type],
229
- entity_type=key.entity_type,
230
- entity_id=key.entity_id,
231
- object_type='*')
232
- list_keys = self.cache_levels[level].list(pattern=parent_key.get())
233
- for k in list_keys:
234
- if 'binary' in k:
235
- val = self.cache_levels[level].get(key=k)
236
- if os.path.isfile(val):
237
- os.remove(val)
238
- self.cache_levels[level].delete(k)
239
-
240
- def delete(self, key: CacheKey):
241
- """
242
- Cache delete
243
-
244
- :param CacheKey key: CacheKey object
245
- """
246
- for i in range(1, self._max_level + 1):
247
- self.cache_levels[i].delete(key.get_key())
248
- self._delete_parent(key=key, level=i)
249
- key.object_type = '*'
250
- list_keys = self.cache_levels[i].list(pattern=key.get_key())
251
- for k in list_keys:
252
- val = self.cache_levels[i].get(key=k)
253
- self.cache_levels[i].delete(k)
254
- if 'binary' in k:
255
- if os.path.isfile(val):
256
- os.remove(val)
257
- continue
258
- e_type, e_id, e_obj = val.split('\\')
259
- self.delete(key=CacheKey(entity_type=e_type, entity_id=e_id, object_type=e_obj))
260
-
261
- def build_cache_key(self, entity_json: dict):
262
- """
263
- Build a format of the cache key from the entity json we get
264
-
265
- :param dict entity_json: json of an entity
266
- :return: CacheKey object
267
- """
268
- child_entity = False
269
- if 'url' in entity_json:
270
- split_url = entity_json['url'].split('/')
271
- entity_type = split_url[-2]
272
- child_entity = True
273
- elif 'org' in entity_json:
274
- entity_type = 'projects'
275
- else:
276
- entity_type = 'org'
277
- entity_id = entity_json['id']
278
- master_type = self.parent_dict[entity_type]
279
- master_id = '**'
280
- if child_entity:
281
- master_id_key = master_type[:-1] + 'Id'
282
- if master_id_key in entity_json:
283
- master_id = entity_json[master_id_key]
284
- elif master_type in entity_json:
285
- master_id = entity_json[master_type][0]
286
- elif entity_type == 'projects':
287
- master_id = entity_json[master_type]['id']
288
-
289
- return CacheKey(master_type=master_type, master_id=master_id, entity_type=entity_type, entity_id=entity_id)
290
-
291
- def _update_config_file(self, filepath: str, update: bool, size: float = 0):
292
- """
293
- Update the config file the have all the details about binary cache
294
-
295
- :param str filepath: path of the file the work on
296
- :param bool update: if True update the use of the file
297
- :param int size: file size
298
- """
299
- config_file_path = os.path.join(self.bin_cache_path, 'cacheConfig.json')
300
- if os.path.isfile(config_file_path):
301
- with FileLock(config_file_path + ".lock"):
302
- with open(config_file_path, mode="r", encoding="utf-8") as con:
303
- with mmap.mmap(con.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
304
- text = mmap_obj.read().decode('utf8').replace("'", '"')
305
- config_file = json.loads(text)
306
- else:
307
- config_file = {'size': 0, 'keys': []}
308
-
309
- if update and filepath in config_file['keys']:
310
- config_file['keys'].remove(filepath)
311
-
312
- if filepath not in config_file['keys']:
313
- config_file['keys'].append(filepath)
314
- config_file['size'] += size
315
- self._current_bin_cache_size = config_file['size']
316
- json_object = json.dumps(config_file, indent=4)
317
- with FileLock(config_file_path + ".lock"):
318
- with open(config_file_path, mode="w", encoding="utf-8") as outfile:
319
- outfile.write(json_object)
320
-
321
- def _lru_cache(self):
322
- """
323
- Make lru on the binary cache remove 30% of the files
324
- """
325
- config_file_path = os.path.join(self.bin_cache_path, 'cacheConfig.json')
326
- with FileLock(config_file_path + ".lock"):
327
- with open(config_file_path, mode="r", encoding="utf-8") as con:
328
- with mmap.mmap(con.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
329
- text = mmap_obj.read().decode('utf8').replace("'", '"')
330
- config_file = json.loads(text)
331
-
332
- size = config_file['size']
333
- end = 70 / 100 * self.bin_cache_size
334
-
335
- while size > end and len(config_file['keys']) > 1:
336
- to_delete = config_file['keys'][0]
337
-
338
- size -= (Path(to_delete).stat().st_size / 1000000)
339
- os.remove(to_delete)
340
- config_file['keys'].remove(to_delete)
341
-
342
- config_file['size'] = size
343
- json_object = json.dumps(config_file, indent=4)
344
-
345
- with FileLock(config_file_path + ".lock"):
346
- with open(config_file_path, "w") as outfile:
347
- outfile.write(json_object)
348
-
349
- def read_stream(self, request_path, dataset_id=None):
350
- """
351
- Cache binary get
352
-
353
- :param str request_path: the request
354
- :param str dataset_id: dataset id of the binary object
355
- :return: success, list of the get result
356
- """
357
- entity_id = request_path.split('/')[-2]
358
- key = CacheKey(master_type='datasets',
359
- master_id=dataset_id,
360
- entity_id=entity_id,
361
- entity_type='items',
362
- object_type=ObjectType.BINARY.value)
363
- hit, response = self.get(key=key)
364
- if hit:
365
- source_path = os.path.normpath(response[0])
366
- self._update_config_file(filepath=source_path, update=True)
367
- return hit, [source_path]
368
- else:
369
- return False, None
370
-
371
- def write_stream(self,
372
- request_path,
373
- response=None,
374
- buffer=None,
375
- file_name=None,
376
- entity_id=None,
377
- dataset_id=None
378
- ):
379
- """
380
- Cache binary set
381
-
382
- :param request_path: the request
383
- :param response: the response of stream
384
- :param buffer: the steam buffer
385
- :param file_name: the file name
386
- :param entity_id: entity id
387
- :param dataset_id: dataset id of the binary object
388
- :return: the file path of the binary
389
- """
390
- if entity_id is None:
391
- entity_id = request_path.split('/')[-2]
392
- key = CacheKey(master_type='datasets',
393
- master_id=dataset_id,
394
- entity_id=entity_id,
395
- entity_type='items',
396
- object_type=ObjectType.BINARY)
397
- filepath = self.bin_cache_path
398
- if file_name is None:
399
- file_name = (dict(response.headers)['Content-Disposition'].split('=')[1][2:-1])
400
- filepath = os.path.join(
401
- filepath,
402
- 'items',
403
- file_name
404
- )
405
- self.set(key=key.get(), value=filepath)
406
- if not os.path.isfile(filepath):
407
- os.makedirs(os.path.dirname(filepath), exist_ok=True)
408
- if buffer is None:
409
- try:
410
- temp_file_path = filepath + '.download'
411
- with open(temp_file_path, "wb") as f:
412
- for chunk in response.iter_content(chunk_size=8192):
413
- if chunk: # filter out keep-alive new chunks
414
- f.write(chunk)
415
- shutil.move(temp_file_path, filepath)
416
- except:
417
- if os.path.isfile(temp_file_path):
418
- os.remove(temp_file_path)
419
- return ''
420
- else:
421
- if os.path.isfile(buffer.name):
422
- shutil.copyfile(buffer.name, filepath)
423
- else:
424
- with open(filepath, "wb") as f:
425
- f.write(buffer.getbuffer())
426
- self._update_config_file(filepath=filepath, update=False, size=(Path(filepath).stat().st_size / 1000000))
427
- if (Path(filepath).stat().st_size / 1000000) + self._current_bin_cache_size > self.bin_cache_size:
428
- self._lru_cache()
429
- return filepath
430
-
431
- def read(self, request_path: str):
432
- """
433
- Cache entity get
434
-
435
- :param str request_path: the request
436
- :return: success, list of the get result
437
- """
438
- entity_id = request_path.split('/')[-1]
439
- entity_type = request_path.split('/')[-2]
440
- key = CacheKey(entity_id=entity_id, entity_type=entity_type)
441
- hit, response = self.get(key=key)
442
- if hit:
443
- return hit, response
444
- return False, None
445
-
446
- def write(self, list_entities_json):
447
- """
448
- Add or update the entity cache
449
-
450
- :param list list_entities_json: list of jsons of entities to set
451
- """
452
- for entity_json in list_entities_json:
453
- key = self.build_cache_key(entity_json)
454
- redis_key = key.get_key()
455
- self.set(key=redis_key, value=entity_json)
456
- self.set(key=key.get(), value=redis_key)
457
-
458
- def invalidate(self, path):
459
- """
460
- Delete from the caches
461
-
462
- :param str path: the request path
463
- """
464
- entity_id = path.split('/')[-1]
465
- entity_type = path.split('/')[-2]
466
- key = CacheKey(entity_id=entity_id, entity_type=entity_type)
467
- self.delete(key)
468
-
469
- def clear(self):
470
- self.cache_levels[1].clear()
471
-
472
- def keys(self):
473
- return [k for k in self.cache_levels[1].keys()]
1
+ import json
2
+ import os
3
+ import shutil
4
+ import time
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ import mmap
8
+ from filelock import FileLock
9
+ import logging
10
+ import base64
11
+
12
+ from .dl_cache import DiskCache
13
+ from .redis_cache import RedisCache
14
+ from .filesystem_cache import FileSystemCache
15
+
16
+ logger = logging.getLogger(name='dtlpy')
17
+
18
+
19
+ class ObjectType(str, Enum):
20
+ BINARY = "binary"
21
+ OBJECT = "object"
22
+
23
+
24
+ class CacheType(Enum):
25
+ DISKCACHE = 'diskcache'
26
+ REDIS = 'redis'
27
+ FILESYSTEM = 'filesystem'
28
+
29
+
30
+ class CacheConfig:
31
+ def __init__(self, cache_type=CacheType.DISKCACHE, ttl=1000, level=1, options=None):
32
+ """
33
+ Cache config settings
34
+
35
+ :param CacheType cache_type: CacheType diskcache, filesystem, redis
36
+ :param int ttl: time to hold the item in the cache in seconds (SEC)
37
+ :param int level: cache level
38
+ :param dict options: the configs for the caches types
39
+ """
40
+ if isinstance(cache_type, CacheType):
41
+ cache_type = cache_type.value
42
+ if isinstance(cache_type, str) and cache_type not in CacheType._value2member_map_:
43
+ raise ValueError('cache type must be redis or diskcache')
44
+
45
+ self.type = cache_type
46
+ self.ttl = ttl
47
+ self.level = level
48
+ self.options = options
49
+
50
+ def to_string(self):
51
+ """
52
+ convert object to base 64 string
53
+ """
54
+ base64_bytes = base64.b64encode(json.dumps(self.to_json()).encode("ascii"))
55
+ base64_string = base64_bytes.decode("ascii")
56
+ return base64_string
57
+
58
+ @staticmethod
59
+ def from_string(cls, base64_string):
60
+ """
61
+ convert from base 64 string to the class object
62
+
63
+ :param str base64_string: string in base64 the have a json configs
64
+ """
65
+ base64_bytes = base64_string.encode("ascii")
66
+ sample_string_bytes = base64.b64decode(base64_bytes)
67
+ _json = json.loads(sample_string_bytes.decode("ascii"))
68
+ return cls(cache_type=_json.get('type', CacheType.DISKCACHE),
69
+ ttl=_json.get('ttl', 1000),
70
+ level=_json.get('level', 1),
71
+ options=_json.get('options', None))
72
+
73
+ def to_json(self):
74
+ """
75
+ convert the class to json
76
+ """
77
+ return {
78
+ 'type': self.type,
79
+ 'ttl': self.ttl,
80
+ 'level': self.level,
81
+ 'options': self.options,
82
+ }
83
+
84
+ @staticmethod
85
+ def from_json(cls, _json):
86
+ """
87
+ make a class attribute from json
88
+
89
+ :param _json: _json have the class attributes
90
+ """
91
+ if isinstance(_json, str):
92
+ _json = json.loads(_json)
93
+ return cls(cache_type=_json.get('type', CacheType.DISKCACHE),
94
+ ttl=_json.get('ttl', 1000),
95
+ level=_json.get('level', 1),
96
+ options=_json.get('options', None))
97
+
98
+
99
+ class CacheKey:
100
+ def __init__(self,
101
+ master_type='**',
102
+ master_id='**',
103
+ entity_type='**',
104
+ entity_id='*',
105
+ object_type=ObjectType.OBJECT):
106
+ """
107
+ :param str master_type: master type
108
+ :param str master_id: master id
109
+ :param str entity_type: entity type
110
+ :param str entity_id: entity id
111
+ :param str object_type: object type object/binary
112
+ """
113
+ self.master_type = master_type
114
+ self.master_id = master_id
115
+ self.entity_type = entity_type
116
+ self.entity_id = entity_id
117
+ self.object_type = object_type
118
+
119
+ def get(self):
120
+ """
121
+ return the build key
122
+ """
123
+ return os.path.join(self.master_type, self.master_id, self.entity_type, self.entity_id, self.object_type)
124
+
125
+ def get_key(self):
126
+ """
127
+ return the build key
128
+ """
129
+ return os.path.join(self.entity_type, self.entity_id, self.object_type)
130
+
131
+
132
+ class CacheManger:
133
+ def __init__(self, cache_configs: list, bin_cache_size=1000):
134
+ """
135
+ Cache manger for config and mange the cache
136
+
137
+ :param cache_configs: CacheConfig object
138
+ :param bin_cache_size: size on MB for binary cache
139
+ """
140
+ self.cache_levels = dict()
141
+ self._max_level = 1
142
+ self.bin_cache_size = bin_cache_size
143
+ self.bin_cache_path = os.environ['DEFAULT_CACHE_PATH']
144
+ self._current_bin_cache_size = 0
145
+ for config in cache_configs:
146
+ try:
147
+ self.cache_levels[config.level] = self._load_cache_handler(config)
148
+ if config.level < self._max_level:
149
+ self._max_level = config.level
150
+ except:
151
+ raise "Failed to build Cache"
152
+
153
+ self.parent_dict = {
154
+ "annotations": 'items',
155
+ "items": 'datasets',
156
+ "datasets": 'projects',
157
+ "projects": 'org',
158
+ "org": '',
159
+ "annotationtasks": 'datasets',
160
+ "assignments": 'annotationtasks',
161
+ "models": 'packages',
162
+ "packages": 'projects',
163
+ "services": 'packages',
164
+ }
165
+
166
+ def _load_cache_handler(self, config: CacheConfig):
167
+ """
168
+ the function the build the cache form the configs that get
169
+ """
170
+ from ..services import DataloopLogger
171
+ cache = None
172
+ if config.type == CacheType.REDIS.value:
173
+ try:
174
+ cache = RedisCache(options=config.options, ttl=config.ttl)
175
+ except:
176
+ logger.warning("Failed to build Redis")
177
+ raise Exception("Failed to build Redis")
178
+
179
+ elif config.type == CacheType.DISKCACHE.value:
180
+ cache = DiskCache(name='object_cache', options=config.options, ttl=config.ttl)
181
+ elif config.type == CacheType.FILESYSTEM.value:
182
+ cache = FileSystemCache(options=config.options, ttl=config.ttl)
183
+ DataloopLogger.clean_dataloop_cache(cache_path=cache.root_dir,
184
+ max_param={'max_time': cache.ttl})
185
+ DataloopLogger.clean_dataloop_cache(cache_path=self.bin_cache_path,
186
+ max_param={'max_time': config.ttl})
187
+ return cache
188
+
189
+ def get(self, key: CacheKey):
190
+ """
191
+ Cache get
192
+
193
+ :param CacheKey key: CacheKey object
194
+ :return: success, list of the get result
195
+ """
196
+ res = []
197
+ success = False
198
+ for i in range(1, self._max_level + 1):
199
+ res = self.cache_levels[i].get(key=key.get_key())
200
+ if res:
201
+ success = True
202
+ break
203
+ return success, res
204
+
205
+ def ping(self):
206
+ """
207
+ Cache ping check if connection is working
208
+ """
209
+ try:
210
+ for i in range(1, self._max_level + 1):
211
+ self.cache_levels[i].ping()
212
+ except Exception as e:
213
+ raise Exception('cache connection failed ')
214
+
215
+
216
+ def set(self, key: str, value):
217
+ """
218
+ Cache set, add or update the key value
219
+
220
+ :param CacheKey key: CacheKey object
221
+ :param value: value to set
222
+ """
223
+ if isinstance(value, dict):
224
+ value = json.dumps(value)
225
+ self.cache_levels[1].set(key, value)
226
+
227
+ def _delete_parent(self, key: CacheKey, level):
228
+ parent_key = CacheKey(master_type=self.parent_dict[key.entity_type],
229
+ entity_type=key.entity_type,
230
+ entity_id=key.entity_id,
231
+ object_type='*')
232
+ list_keys = self.cache_levels[level].list(pattern=parent_key.get())
233
+ for k in list_keys:
234
+ if 'binary' in k:
235
+ val = self.cache_levels[level].get(key=k)
236
+ if os.path.isfile(val):
237
+ os.remove(val)
238
+ self.cache_levels[level].delete(k)
239
+
240
+ def delete(self, key: CacheKey):
241
+ """
242
+ Cache delete
243
+
244
+ :param CacheKey key: CacheKey object
245
+ """
246
+ for i in range(1, self._max_level + 1):
247
+ self.cache_levels[i].delete(key.get_key())
248
+ self._delete_parent(key=key, level=i)
249
+ key.object_type = '*'
250
+ list_keys = self.cache_levels[i].list(pattern=key.get_key())
251
+ for k in list_keys:
252
+ val = self.cache_levels[i].get(key=k)
253
+ self.cache_levels[i].delete(k)
254
+ if 'binary' in k:
255
+ if os.path.isfile(val):
256
+ os.remove(val)
257
+ continue
258
+ e_type, e_id, e_obj = val.split('\\')
259
+ self.delete(key=CacheKey(entity_type=e_type, entity_id=e_id, object_type=e_obj))
260
+
261
+ def build_cache_key(self, entity_json: dict):
262
+ """
263
+ Build a format of the cache key from the entity json we get
264
+
265
+ :param dict entity_json: json of an entity
266
+ :return: CacheKey object
267
+ """
268
+ child_entity = False
269
+ if 'url' in entity_json:
270
+ split_url = entity_json['url'].split('/')
271
+ entity_type = split_url[-2]
272
+ child_entity = True
273
+ elif 'org' in entity_json:
274
+ entity_type = 'projects'
275
+ else:
276
+ entity_type = 'org'
277
+ entity_id = entity_json['id']
278
+ master_type = self.parent_dict[entity_type]
279
+ master_id = '**'
280
+ if child_entity:
281
+ master_id_key = master_type[:-1] + 'Id'
282
+ if master_id_key in entity_json:
283
+ master_id = entity_json[master_id_key]
284
+ elif master_type in entity_json:
285
+ master_id = entity_json[master_type][0]
286
+ elif entity_type == 'projects':
287
+ master_id = entity_json[master_type]['id']
288
+
289
+ return CacheKey(master_type=master_type, master_id=master_id, entity_type=entity_type, entity_id=entity_id)
290
+
291
+ def _update_config_file(self, filepath: str, update: bool, size: float = 0):
292
+ """
293
+ Update the config file the have all the details about binary cache
294
+
295
+ :param str filepath: path of the file the work on
296
+ :param bool update: if True update the use of the file
297
+ :param int size: file size
298
+ """
299
+ config_file_path = os.path.join(self.bin_cache_path, 'cacheConfig.json')
300
+ if os.path.isfile(config_file_path):
301
+ with FileLock(config_file_path + ".lock"):
302
+ with open(config_file_path, mode="r", encoding="utf-8") as con:
303
+ with mmap.mmap(con.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
304
+ text = mmap_obj.read().decode('utf8').replace("'", '"')
305
+ config_file = json.loads(text)
306
+ else:
307
+ config_file = {'size': 0, 'keys': []}
308
+
309
+ if update and filepath in config_file['keys']:
310
+ config_file['keys'].remove(filepath)
311
+
312
+ if filepath not in config_file['keys']:
313
+ config_file['keys'].append(filepath)
314
+ config_file['size'] += size
315
+ self._current_bin_cache_size = config_file['size']
316
+ json_object = json.dumps(config_file, indent=4)
317
+ with FileLock(config_file_path + ".lock"):
318
+ with open(config_file_path, mode="w", encoding="utf-8") as outfile:
319
+ outfile.write(json_object)
320
+
321
+ def _lru_cache(self):
322
+ """
323
+ Make lru on the binary cache remove 30% of the files
324
+ """
325
+ config_file_path = os.path.join(self.bin_cache_path, 'cacheConfig.json')
326
+ with FileLock(config_file_path + ".lock"):
327
+ with open(config_file_path, mode="r", encoding="utf-8") as con:
328
+ with mmap.mmap(con.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
329
+ text = mmap_obj.read().decode('utf8').replace("'", '"')
330
+ config_file = json.loads(text)
331
+
332
+ size = config_file['size']
333
+ end = 70 / 100 * self.bin_cache_size
334
+
335
+ while size > end and len(config_file['keys']) > 1:
336
+ to_delete = config_file['keys'][0]
337
+
338
+ size -= (Path(to_delete).stat().st_size / 1000000)
339
+ os.remove(to_delete)
340
+ config_file['keys'].remove(to_delete)
341
+
342
+ config_file['size'] = size
343
+ json_object = json.dumps(config_file, indent=4)
344
+
345
+ with FileLock(config_file_path + ".lock"):
346
+ with open(config_file_path, "w") as outfile:
347
+ outfile.write(json_object)
348
+
349
+ def read_stream(self, request_path, dataset_id=None):
350
+ """
351
+ Cache binary get
352
+
353
+ :param str request_path: the request
354
+ :param str dataset_id: dataset id of the binary object
355
+ :return: success, list of the get result
356
+ """
357
+ entity_id = request_path.split('/')[-2]
358
+ key = CacheKey(master_type='datasets',
359
+ master_id=dataset_id,
360
+ entity_id=entity_id,
361
+ entity_type='items',
362
+ object_type=ObjectType.BINARY.value)
363
+ hit, response = self.get(key=key)
364
+ if hit:
365
+ source_path = os.path.normpath(response[0])
366
+ self._update_config_file(filepath=source_path, update=True)
367
+ return hit, [source_path]
368
+ else:
369
+ return False, None
370
+
371
+ def write_stream(self,
372
+ request_path,
373
+ response=None,
374
+ buffer=None,
375
+ file_name=None,
376
+ entity_id=None,
377
+ dataset_id=None
378
+ ):
379
+ """
380
+ Cache binary set
381
+
382
+ :param request_path: the request
383
+ :param response: the response of stream
384
+ :param buffer: the steam buffer
385
+ :param file_name: the file name
386
+ :param entity_id: entity id
387
+ :param dataset_id: dataset id of the binary object
388
+ :return: the file path of the binary
389
+ """
390
+ if entity_id is None:
391
+ entity_id = request_path.split('/')[-2]
392
+ key = CacheKey(master_type='datasets',
393
+ master_id=dataset_id,
394
+ entity_id=entity_id,
395
+ entity_type='items',
396
+ object_type=ObjectType.BINARY)
397
+ filepath = self.bin_cache_path
398
+ if file_name is None:
399
+ file_name = (dict(response.headers)['Content-Disposition'].split('=')[1][2:-1])
400
+ filepath = os.path.join(
401
+ filepath,
402
+ 'items',
403
+ file_name
404
+ )
405
+ self.set(key=key.get(), value=filepath)
406
+ if not os.path.isfile(filepath):
407
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
408
+ if buffer is None:
409
+ try:
410
+ temp_file_path = filepath + '.download'
411
+ with open(temp_file_path, "wb") as f:
412
+ for chunk in response.iter_content(chunk_size=8192):
413
+ if chunk: # filter out keep-alive new chunks
414
+ f.write(chunk)
415
+ shutil.move(temp_file_path, filepath)
416
+ except:
417
+ if os.path.isfile(temp_file_path):
418
+ os.remove(temp_file_path)
419
+ return ''
420
+ else:
421
+ if os.path.isfile(buffer.name):
422
+ shutil.copyfile(buffer.name, filepath)
423
+ else:
424
+ with open(filepath, "wb") as f:
425
+ f.write(buffer.getbuffer())
426
+ self._update_config_file(filepath=filepath, update=False, size=(Path(filepath).stat().st_size / 1000000))
427
+ if (Path(filepath).stat().st_size / 1000000) + self._current_bin_cache_size > self.bin_cache_size:
428
+ self._lru_cache()
429
+ return filepath
430
+
431
+ def read(self, request_path: str):
432
+ """
433
+ Cache entity get
434
+
435
+ :param str request_path: the request
436
+ :return: success, list of the get result
437
+ """
438
+ entity_id = request_path.split('/')[-1]
439
+ entity_type = request_path.split('/')[-2]
440
+ key = CacheKey(entity_id=entity_id, entity_type=entity_type)
441
+ hit, response = self.get(key=key)
442
+ if hit:
443
+ return hit, response
444
+ return False, None
445
+
446
+ def write(self, list_entities_json):
447
+ """
448
+ Add or update the entity cache
449
+
450
+ :param list list_entities_json: list of jsons of entities to set
451
+ """
452
+ for entity_json in list_entities_json:
453
+ key = self.build_cache_key(entity_json)
454
+ redis_key = key.get_key()
455
+ self.set(key=redis_key, value=entity_json)
456
+ self.set(key=key.get(), value=redis_key)
457
+
458
+ def invalidate(self, path):
459
+ """
460
+ Delete from the caches
461
+
462
+ :param str path: the request path
463
+ """
464
+ entity_id = path.split('/')[-1]
465
+ entity_type = path.split('/')[-2]
466
+ key = CacheKey(entity_id=entity_id, entity_type=entity_type)
467
+ self.delete(key)
468
+
469
+ def clear(self):
470
+ self.cache_levels[1].clear()
471
+
472
+ def keys(self):
473
+ return [k for k in self.cache_levels[1].keys()]