dtlpy 1.115.44__py3-none-any.whl → 1.116.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtlpy/__init__.py +491 -491
- dtlpy/__version__.py +1 -1
- dtlpy/assets/__init__.py +26 -26
- dtlpy/assets/code_server/config.yaml +2 -2
- dtlpy/assets/code_server/installation.sh +24 -24
- dtlpy/assets/code_server/launch.json +13 -13
- dtlpy/assets/code_server/settings.json +2 -2
- dtlpy/assets/main.py +53 -53
- dtlpy/assets/main_partial.py +18 -18
- dtlpy/assets/mock.json +11 -11
- dtlpy/assets/model_adapter.py +83 -83
- dtlpy/assets/package.json +61 -61
- dtlpy/assets/package_catalog.json +29 -29
- dtlpy/assets/package_gitignore +307 -307
- dtlpy/assets/service_runners/__init__.py +33 -33
- dtlpy/assets/service_runners/converter.py +96 -96
- dtlpy/assets/service_runners/multi_method.py +49 -49
- dtlpy/assets/service_runners/multi_method_annotation.py +54 -54
- dtlpy/assets/service_runners/multi_method_dataset.py +55 -55
- dtlpy/assets/service_runners/multi_method_item.py +52 -52
- dtlpy/assets/service_runners/multi_method_json.py +52 -52
- dtlpy/assets/service_runners/single_method.py +37 -37
- dtlpy/assets/service_runners/single_method_annotation.py +43 -43
- dtlpy/assets/service_runners/single_method_dataset.py +43 -43
- dtlpy/assets/service_runners/single_method_item.py +41 -41
- dtlpy/assets/service_runners/single_method_json.py +42 -42
- dtlpy/assets/service_runners/single_method_multi_input.py +45 -45
- dtlpy/assets/voc_annotation_template.xml +23 -23
- dtlpy/caches/base_cache.py +32 -32
- dtlpy/caches/cache.py +473 -473
- dtlpy/caches/dl_cache.py +201 -201
- dtlpy/caches/filesystem_cache.py +89 -89
- dtlpy/caches/redis_cache.py +84 -84
- dtlpy/dlp/__init__.py +20 -20
- dtlpy/dlp/cli_utilities.py +367 -367
- dtlpy/dlp/command_executor.py +764 -764
- dtlpy/dlp/dlp +1 -1
- dtlpy/dlp/dlp.bat +1 -1
- dtlpy/dlp/dlp.py +128 -128
- dtlpy/dlp/parser.py +651 -651
- dtlpy/entities/__init__.py +83 -83
- dtlpy/entities/analytic.py +347 -347
- dtlpy/entities/annotation.py +1879 -1879
- dtlpy/entities/annotation_collection.py +699 -699
- dtlpy/entities/annotation_definitions/__init__.py +20 -20
- dtlpy/entities/annotation_definitions/base_annotation_definition.py +100 -100
- dtlpy/entities/annotation_definitions/box.py +195 -195
- dtlpy/entities/annotation_definitions/classification.py +67 -67
- dtlpy/entities/annotation_definitions/comparison.py +72 -72
- dtlpy/entities/annotation_definitions/cube.py +204 -204
- dtlpy/entities/annotation_definitions/cube_3d.py +149 -149
- dtlpy/entities/annotation_definitions/description.py +32 -32
- dtlpy/entities/annotation_definitions/ellipse.py +124 -124
- dtlpy/entities/annotation_definitions/free_text.py +62 -62
- dtlpy/entities/annotation_definitions/gis.py +69 -69
- dtlpy/entities/annotation_definitions/note.py +139 -139
- dtlpy/entities/annotation_definitions/point.py +117 -117
- dtlpy/entities/annotation_definitions/polygon.py +182 -182
- dtlpy/entities/annotation_definitions/polyline.py +111 -111
- dtlpy/entities/annotation_definitions/pose.py +92 -92
- dtlpy/entities/annotation_definitions/ref_image.py +86 -86
- dtlpy/entities/annotation_definitions/segmentation.py +240 -240
- dtlpy/entities/annotation_definitions/subtitle.py +34 -34
- dtlpy/entities/annotation_definitions/text.py +85 -85
- dtlpy/entities/annotation_definitions/undefined_annotation.py +74 -74
- dtlpy/entities/app.py +220 -220
- dtlpy/entities/app_module.py +107 -107
- dtlpy/entities/artifact.py +174 -174
- dtlpy/entities/assignment.py +399 -399
- dtlpy/entities/base_entity.py +214 -214
- dtlpy/entities/bot.py +113 -113
- dtlpy/entities/codebase.py +292 -292
- dtlpy/entities/collection.py +38 -38
- dtlpy/entities/command.py +169 -169
- dtlpy/entities/compute.py +449 -449
- dtlpy/entities/dataset.py +1299 -1299
- dtlpy/entities/directory_tree.py +44 -44
- dtlpy/entities/dpk.py +470 -470
- dtlpy/entities/driver.py +235 -235
- dtlpy/entities/execution.py +397 -397
- dtlpy/entities/feature.py +124 -124
- dtlpy/entities/feature_set.py +145 -145
- dtlpy/entities/filters.py +798 -798
- dtlpy/entities/gis_item.py +107 -107
- dtlpy/entities/integration.py +184 -184
- dtlpy/entities/item.py +959 -959
- dtlpy/entities/label.py +123 -123
- dtlpy/entities/links.py +85 -85
- dtlpy/entities/message.py +175 -175
- dtlpy/entities/model.py +684 -684
- dtlpy/entities/node.py +1005 -1005
- dtlpy/entities/ontology.py +810 -803
- dtlpy/entities/organization.py +287 -287
- dtlpy/entities/package.py +657 -657
- dtlpy/entities/package_defaults.py +5 -5
- dtlpy/entities/package_function.py +185 -185
- dtlpy/entities/package_module.py +113 -113
- dtlpy/entities/package_slot.py +118 -118
- dtlpy/entities/paged_entities.py +299 -299
- dtlpy/entities/pipeline.py +624 -624
- dtlpy/entities/pipeline_execution.py +279 -279
- dtlpy/entities/project.py +394 -394
- dtlpy/entities/prompt_item.py +505 -505
- dtlpy/entities/recipe.py +301 -301
- dtlpy/entities/reflect_dict.py +102 -102
- dtlpy/entities/resource_execution.py +138 -138
- dtlpy/entities/service.py +963 -963
- dtlpy/entities/service_driver.py +117 -117
- dtlpy/entities/setting.py +294 -294
- dtlpy/entities/task.py +495 -495
- dtlpy/entities/time_series.py +143 -143
- dtlpy/entities/trigger.py +426 -426
- dtlpy/entities/user.py +118 -118
- dtlpy/entities/webhook.py +124 -124
- dtlpy/examples/__init__.py +19 -19
- dtlpy/examples/add_labels.py +135 -135
- dtlpy/examples/add_metadata_to_item.py +21 -21
- dtlpy/examples/annotate_items_using_model.py +65 -65
- dtlpy/examples/annotate_video_using_model_and_tracker.py +75 -75
- dtlpy/examples/annotations_convert_to_voc.py +9 -9
- dtlpy/examples/annotations_convert_to_yolo.py +9 -9
- dtlpy/examples/convert_annotation_types.py +51 -51
- dtlpy/examples/converter.py +143 -143
- dtlpy/examples/copy_annotations.py +22 -22
- dtlpy/examples/copy_folder.py +31 -31
- dtlpy/examples/create_annotations.py +51 -51
- dtlpy/examples/create_video_annotations.py +83 -83
- dtlpy/examples/delete_annotations.py +26 -26
- dtlpy/examples/filters.py +113 -113
- dtlpy/examples/move_item.py +23 -23
- dtlpy/examples/play_video_annotation.py +13 -13
- dtlpy/examples/show_item_and_mask.py +53 -53
- dtlpy/examples/triggers.py +49 -49
- dtlpy/examples/upload_batch_of_items.py +20 -20
- dtlpy/examples/upload_items_and_custom_format_annotations.py +55 -55
- dtlpy/examples/upload_items_with_modalities.py +43 -43
- dtlpy/examples/upload_segmentation_annotations_from_mask_image.py +44 -44
- dtlpy/examples/upload_yolo_format_annotations.py +70 -70
- dtlpy/exceptions.py +125 -125
- dtlpy/miscellaneous/__init__.py +20 -20
- dtlpy/miscellaneous/dict_differ.py +95 -95
- dtlpy/miscellaneous/git_utils.py +217 -217
- dtlpy/miscellaneous/json_utils.py +14 -14
- dtlpy/miscellaneous/list_print.py +105 -105
- dtlpy/miscellaneous/zipping.py +130 -130
- dtlpy/ml/__init__.py +20 -20
- dtlpy/ml/base_feature_extractor_adapter.py +27 -27
- dtlpy/ml/base_model_adapter.py +1257 -1230
- dtlpy/ml/metrics.py +461 -461
- dtlpy/ml/predictions_utils.py +274 -274
- dtlpy/ml/summary_writer.py +57 -57
- dtlpy/ml/train_utils.py +60 -60
- dtlpy/new_instance.py +252 -252
- dtlpy/repositories/__init__.py +56 -56
- dtlpy/repositories/analytics.py +85 -85
- dtlpy/repositories/annotations.py +916 -916
- dtlpy/repositories/apps.py +383 -383
- dtlpy/repositories/artifacts.py +452 -452
- dtlpy/repositories/assignments.py +599 -599
- dtlpy/repositories/bots.py +213 -213
- dtlpy/repositories/codebases.py +559 -559
- dtlpy/repositories/collections.py +332 -332
- dtlpy/repositories/commands.py +152 -152
- dtlpy/repositories/compositions.py +61 -61
- dtlpy/repositories/computes.py +439 -439
- dtlpy/repositories/datasets.py +1504 -1504
- dtlpy/repositories/downloader.py +976 -923
- dtlpy/repositories/dpks.py +433 -433
- dtlpy/repositories/drivers.py +482 -482
- dtlpy/repositories/executions.py +815 -815
- dtlpy/repositories/feature_sets.py +226 -226
- dtlpy/repositories/features.py +255 -255
- dtlpy/repositories/integrations.py +484 -484
- dtlpy/repositories/items.py +912 -912
- dtlpy/repositories/messages.py +94 -94
- dtlpy/repositories/models.py +1000 -1000
- dtlpy/repositories/nodes.py +80 -80
- dtlpy/repositories/ontologies.py +511 -511
- dtlpy/repositories/organizations.py +525 -525
- dtlpy/repositories/packages.py +1941 -1941
- dtlpy/repositories/pipeline_executions.py +451 -451
- dtlpy/repositories/pipelines.py +640 -640
- dtlpy/repositories/projects.py +539 -539
- dtlpy/repositories/recipes.py +419 -399
- dtlpy/repositories/resource_executions.py +137 -137
- dtlpy/repositories/schema.py +120 -120
- dtlpy/repositories/service_drivers.py +213 -213
- dtlpy/repositories/services.py +1704 -1704
- dtlpy/repositories/settings.py +339 -339
- dtlpy/repositories/tasks.py +1477 -1477
- dtlpy/repositories/times_series.py +278 -278
- dtlpy/repositories/triggers.py +536 -536
- dtlpy/repositories/upload_element.py +257 -257
- dtlpy/repositories/uploader.py +661 -661
- dtlpy/repositories/webhooks.py +249 -249
- dtlpy/services/__init__.py +22 -22
- dtlpy/services/aihttp_retry.py +131 -131
- dtlpy/services/api_client.py +1785 -1785
- dtlpy/services/api_reference.py +40 -40
- dtlpy/services/async_utils.py +133 -133
- dtlpy/services/calls_counter.py +44 -44
- dtlpy/services/check_sdk.py +68 -68
- dtlpy/services/cookie.py +115 -115
- dtlpy/services/create_logger.py +156 -156
- dtlpy/services/events.py +84 -84
- dtlpy/services/logins.py +235 -235
- dtlpy/services/reporter.py +256 -256
- dtlpy/services/service_defaults.py +91 -91
- dtlpy/utilities/__init__.py +20 -20
- dtlpy/utilities/annotations/__init__.py +16 -16
- dtlpy/utilities/annotations/annotation_converters.py +269 -269
- dtlpy/utilities/base_package_runner.py +285 -264
- dtlpy/utilities/converter.py +1650 -1650
- dtlpy/utilities/dataset_generators/__init__.py +1 -1
- dtlpy/utilities/dataset_generators/dataset_generator.py +670 -670
- dtlpy/utilities/dataset_generators/dataset_generator_tensorflow.py +23 -23
- dtlpy/utilities/dataset_generators/dataset_generator_torch.py +21 -21
- dtlpy/utilities/local_development/__init__.py +1 -1
- dtlpy/utilities/local_development/local_session.py +179 -179
- dtlpy/utilities/reports/__init__.py +2 -2
- dtlpy/utilities/reports/figures.py +343 -343
- dtlpy/utilities/reports/report.py +71 -71
- dtlpy/utilities/videos/__init__.py +17 -17
- dtlpy/utilities/videos/video_player.py +598 -598
- dtlpy/utilities/videos/videos.py +470 -470
- {dtlpy-1.115.44.data → dtlpy-1.116.6.data}/scripts/dlp +1 -1
- dtlpy-1.116.6.data/scripts/dlp.bat +2 -0
- {dtlpy-1.115.44.data → dtlpy-1.116.6.data}/scripts/dlp.py +128 -128
- {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/METADATA +186 -186
- dtlpy-1.116.6.dist-info/RECORD +239 -0
- {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/WHEEL +1 -1
- {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/licenses/LICENSE +200 -200
- tests/features/environment.py +551 -551
- dtlpy/assets/__pycache__/__init__.cpython-310.pyc +0 -0
- dtlpy-1.115.44.data/scripts/dlp.bat +0 -2
- dtlpy-1.115.44.dist-info/RECORD +0 -240
- {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/entry_points.txt +0 -0
- {dtlpy-1.115.44.dist-info → dtlpy-1.116.6.dist-info}/top_level.txt +0 -0
dtlpy/entities/prompt_item.py
CHANGED
|
@@ -1,506 +1,506 @@
|
|
|
1
|
-
import requests
|
|
2
|
-
import logging
|
|
3
|
-
import base64
|
|
4
|
-
import enum
|
|
5
|
-
import json
|
|
6
|
-
import io
|
|
7
|
-
import os
|
|
8
|
-
from typing import List, Optional
|
|
9
|
-
|
|
10
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
11
|
-
from .. import entities, repositories
|
|
12
|
-
from dtlpy.services.api_client import client as client_api
|
|
13
|
-
|
|
14
|
-
logger = logging.getLogger(name='dtlpy')
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class PromptType(str, enum.Enum):
|
|
18
|
-
TEXT = 'application/text'
|
|
19
|
-
IMAGE = 'image/*'
|
|
20
|
-
AUDIO = 'audio/*'
|
|
21
|
-
VIDEO = 'video/*'
|
|
22
|
-
METADATA = 'metadata'
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class Prompt:
|
|
26
|
-
def __init__(self, key, role='user'):
|
|
27
|
-
"""
|
|
28
|
-
Create a single Prompt. Prompt can contain multiple mimetype elements, e.g. text sentence and an image.
|
|
29
|
-
:param key: unique identifier of the prompt in the item
|
|
30
|
-
"""
|
|
31
|
-
self.key = key
|
|
32
|
-
self.elements = list()
|
|
33
|
-
# to avoid broken stream of json files - DAT-75653
|
|
34
|
-
client_api.default_headers['x-dl-sanitize'] = '0'
|
|
35
|
-
self._items = repositories.Items(client_api=client_api)
|
|
36
|
-
self.metadata = {'role': role}
|
|
37
|
-
|
|
38
|
-
def add_element(self, value, mimetype='application/text'):
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
:param value: url or string of the input
|
|
42
|
-
:param mimetype: mimetype of the input. options: `text`, `image/*`, `video/*`, `audio/*`
|
|
43
|
-
:return:
|
|
44
|
-
"""
|
|
45
|
-
allowed_prompt_types = [prompt_type for prompt_type in PromptType]
|
|
46
|
-
if mimetype not in allowed_prompt_types:
|
|
47
|
-
raise ValueError(f'Invalid mimetype: {mimetype}. Allowed values: {allowed_prompt_types}')
|
|
48
|
-
if mimetype == PromptType.METADATA and isinstance(value, dict):
|
|
49
|
-
self.metadata.update(value)
|
|
50
|
-
else:
|
|
51
|
-
self.elements.append({'mimetype': mimetype,
|
|
52
|
-
'value': value})
|
|
53
|
-
|
|
54
|
-
def to_json(self):
|
|
55
|
-
"""
|
|
56
|
-
Convert Prompt entity to the item json
|
|
57
|
-
|
|
58
|
-
:return:
|
|
59
|
-
"""
|
|
60
|
-
elements_json = [
|
|
61
|
-
{
|
|
62
|
-
"mimetype": e['mimetype'],
|
|
63
|
-
"value": e['value'],
|
|
64
|
-
} for e in self.elements if not e['mimetype'] == PromptType.METADATA
|
|
65
|
-
]
|
|
66
|
-
elements_json.append({
|
|
67
|
-
"mimetype": PromptType.METADATA,
|
|
68
|
-
"value": self.metadata
|
|
69
|
-
})
|
|
70
|
-
return {
|
|
71
|
-
self.key: elements_json
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
def _convert_stream_to_binary(self, image_url: str):
|
|
75
|
-
"""
|
|
76
|
-
Convert a stream to binary
|
|
77
|
-
:param image_url: dataloop image stream url
|
|
78
|
-
:return: binary object
|
|
79
|
-
"""
|
|
80
|
-
image_buffer = None
|
|
81
|
-
if '.' in image_url and 'dataloop.ai' not in image_url:
|
|
82
|
-
# URL and not DL item stream
|
|
83
|
-
try:
|
|
84
|
-
response = requests.get(image_url, stream=True)
|
|
85
|
-
response.raise_for_status() # Raise an exception for bad status codes
|
|
86
|
-
|
|
87
|
-
# Check for valid image content type
|
|
88
|
-
if response.headers["Content-Type"].startswith("image/"):
|
|
89
|
-
# Read the image data in chunks to avoid loading large images in memory
|
|
90
|
-
image_buffer = b"".join(chunk for chunk in response.iter_content(1024))
|
|
91
|
-
except requests.exceptions.RequestException as e:
|
|
92
|
-
logger.error(f"Failed to download image from URL: {image_url}, error: {e}")
|
|
93
|
-
|
|
94
|
-
elif '.' in image_url and 'stream' in image_url:
|
|
95
|
-
# DL Stream URL
|
|
96
|
-
item_id = image_url.split("/stream")[0].split("/items/")[-1]
|
|
97
|
-
image_buffer = self._items.get(item_id=item_id).download(save_locally=False).getvalue()
|
|
98
|
-
else:
|
|
99
|
-
# DL item ID
|
|
100
|
-
image_buffer = self._items.get(item_id=image_url).download(save_locally=False).getvalue()
|
|
101
|
-
|
|
102
|
-
if image_buffer is not None:
|
|
103
|
-
encoded_image = base64.b64encode(image_buffer).decode()
|
|
104
|
-
else:
|
|
105
|
-
logger.error(f'Invalid image url: {image_url}')
|
|
106
|
-
return None
|
|
107
|
-
|
|
108
|
-
return f'data:image/jpeg;base64,{encoded_image}'
|
|
109
|
-
|
|
110
|
-
def messages(self):
|
|
111
|
-
"""
|
|
112
|
-
return a list of messages in the prompt item,
|
|
113
|
-
messages are returned following the openai SDK format https://platform.openai.com/docs/guides/vision
|
|
114
|
-
"""
|
|
115
|
-
messages = []
|
|
116
|
-
for element in self.elements:
|
|
117
|
-
if element['mimetype'] == PromptType.TEXT:
|
|
118
|
-
data = {
|
|
119
|
-
"type": "text",
|
|
120
|
-
"text": element['value']
|
|
121
|
-
}
|
|
122
|
-
messages.append(data)
|
|
123
|
-
elif element['mimetype'] == PromptType.IMAGE:
|
|
124
|
-
image_url = self._convert_stream_to_binary(element['value'])
|
|
125
|
-
data = {
|
|
126
|
-
"type": "image_url",
|
|
127
|
-
"image_url": {
|
|
128
|
-
"url": image_url
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
messages.append(data)
|
|
132
|
-
elif element['mimetype'] == PromptType.AUDIO:
|
|
133
|
-
raise NotImplementedError('Audio prompt is not supported yet')
|
|
134
|
-
elif element['mimetype'] == PromptType.VIDEO:
|
|
135
|
-
data = {
|
|
136
|
-
"type": "video_url",
|
|
137
|
-
"video_url": {
|
|
138
|
-
"url": element['value']
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
messages.append(data)
|
|
142
|
-
else:
|
|
143
|
-
raise ValueError(f'Invalid mimetype: {element["mimetype"]}')
|
|
144
|
-
return messages, self.key
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
class PromptItem:
|
|
148
|
-
def __init__(self, name, item: entities.Item = None, role_mapping=None):
|
|
149
|
-
if role_mapping is None:
|
|
150
|
-
role_mapping = {'user': 'item',
|
|
151
|
-
'assistant': 'annotation'}
|
|
152
|
-
if not isinstance(role_mapping, dict):
|
|
153
|
-
raise ValueError(f'input role_mapping must be dict. type: {type(role_mapping)}')
|
|
154
|
-
self.role_mapping = role_mapping
|
|
155
|
-
# prompt item name
|
|
156
|
-
self.name = name
|
|
157
|
-
# list of user prompts in the prompt item
|
|
158
|
-
self.prompts = list()
|
|
159
|
-
self.assistant_prompts = list()
|
|
160
|
-
# list of assistant (annotations) prompts in the prompt item
|
|
161
|
-
# Dataloop Item
|
|
162
|
-
self._item: entities.Item = item
|
|
163
|
-
self._messages = []
|
|
164
|
-
self._annotations: entities.AnnotationCollection = None
|
|
165
|
-
if item is not None:
|
|
166
|
-
if 'json' not in item.mimetype or item.system.get('shebang', dict()).get('dltype') != 'prompt':
|
|
167
|
-
raise ValueError('Expecting a json item with system.shebang.dltype = prompt')
|
|
168
|
-
self._items = item.items
|
|
169
|
-
self.fetch()
|
|
170
|
-
else:
|
|
171
|
-
self._items = repositories.Items(client_api=client_api)
|
|
172
|
-
|
|
173
|
-
# to avoid broken stream of json files - DAT-75653
|
|
174
|
-
self._items._client_api.default_headers['x-dl-sanitize'] = '0'
|
|
175
|
-
|
|
176
|
-
@classmethod
|
|
177
|
-
def from_messages(cls, messages: list):
|
|
178
|
-
...
|
|
179
|
-
|
|
180
|
-
@classmethod
|
|
181
|
-
def from_item(cls, item: entities.Item):
|
|
182
|
-
"""
|
|
183
|
-
Load a prompt item from the platform
|
|
184
|
-
:param item : Item object
|
|
185
|
-
:return: PromptItem object
|
|
186
|
-
"""
|
|
187
|
-
if 'json' not in item.mimetype or item.system.get('shebang', dict()).get('dltype') != 'prompt':
|
|
188
|
-
raise ValueError('Expecting a json item with system.shebang.dltype = prompt')
|
|
189
|
-
return cls(name=item.name, item=item)
|
|
190
|
-
|
|
191
|
-
@classmethod
|
|
192
|
-
def from_local_file(cls, filepath):
|
|
193
|
-
"""
|
|
194
|
-
Create a new prompt item from a file
|
|
195
|
-
:param filepath: path to the file
|
|
196
|
-
:return: PromptItem object
|
|
197
|
-
"""
|
|
198
|
-
if os.path.exists(filepath) is False:
|
|
199
|
-
raise FileNotFoundError(f'File does not exists: {filepath}')
|
|
200
|
-
if 'json' not in os.path.splitext(filepath)[-1]:
|
|
201
|
-
raise ValueError(f'Expected path to json item, got {os.path.splitext(filepath)[-1]}')
|
|
202
|
-
prompt_item = cls(name=filepath)
|
|
203
|
-
with open(filepath, 'r', encoding='utf-8') as f:
|
|
204
|
-
data = json.load(f)
|
|
205
|
-
prompt_item.prompts = prompt_item._load_item_prompts(data=data)
|
|
206
|
-
return prompt_item
|
|
207
|
-
|
|
208
|
-
@staticmethod
|
|
209
|
-
def _load_item_prompts(data):
|
|
210
|
-
prompts = list()
|
|
211
|
-
for prompt_key, prompt_elements in data.get('prompts', dict()).items():
|
|
212
|
-
content = list()
|
|
213
|
-
for element in prompt_elements:
|
|
214
|
-
content.append({'value': element.get('value', dict()),
|
|
215
|
-
'mimetype': element['mimetype']})
|
|
216
|
-
prompt = Prompt(key=prompt_key, role="user")
|
|
217
|
-
for element in content:
|
|
218
|
-
prompt.add_element(value=element.get('value', ''),
|
|
219
|
-
mimetype=element.get('mimetype', PromptType.TEXT))
|
|
220
|
-
prompts.append(prompt)
|
|
221
|
-
return prompts
|
|
222
|
-
|
|
223
|
-
@staticmethod
|
|
224
|
-
def _load_annotations_prompts(annotations: entities.AnnotationCollection):
|
|
225
|
-
"""
|
|
226
|
-
Get all the annotations in the item for the assistant messages
|
|
227
|
-
"""
|
|
228
|
-
# clearing the assistant prompts from previous annotations that might not belong
|
|
229
|
-
assistant_prompts = list()
|
|
230
|
-
for annotation in annotations:
|
|
231
|
-
prompt_id = annotation.metadata.get('system', dict()).get('promptId', None)
|
|
232
|
-
model_info = annotation.metadata.get('user', dict()).get('model', dict())
|
|
233
|
-
annotation_id = annotation.id
|
|
234
|
-
if annotation.type == 'ref_image':
|
|
235
|
-
prompt = Prompt(key=prompt_id, role='assistant')
|
|
236
|
-
prompt.add_element(value=annotation.annotation_definition.coordinates.get('ref'),
|
|
237
|
-
mimetype=PromptType.IMAGE)
|
|
238
|
-
elif annotation.type == 'text':
|
|
239
|
-
prompt = Prompt(key=prompt_id, role='assistant')
|
|
240
|
-
prompt.add_element(value=annotation.annotation_definition.coordinates,
|
|
241
|
-
mimetype=PromptType.TEXT)
|
|
242
|
-
else:
|
|
243
|
-
raise ValueError(f"Unsupported annotation type: {annotation.type}")
|
|
244
|
-
|
|
245
|
-
prompt.add_element(value={'id': annotation_id,
|
|
246
|
-
'model_info': model_info},
|
|
247
|
-
mimetype=PromptType.METADATA)
|
|
248
|
-
assistant_prompts.append(prompt)
|
|
249
|
-
return assistant_prompts
|
|
250
|
-
|
|
251
|
-
def to_json(self):
|
|
252
|
-
"""
|
|
253
|
-
Convert the entity to a platform item.
|
|
254
|
-
|
|
255
|
-
:return:
|
|
256
|
-
"""
|
|
257
|
-
prompts_json = {
|
|
258
|
-
"shebang": "dataloop",
|
|
259
|
-
"metadata": {
|
|
260
|
-
"dltype": 'prompt'
|
|
261
|
-
},
|
|
262
|
-
"prompts": {}
|
|
263
|
-
}
|
|
264
|
-
for prompt in self.prompts:
|
|
265
|
-
for prompt_key, prompt_values in prompt.to_json().items():
|
|
266
|
-
prompts_json["prompts"][prompt_key] = prompt_values
|
|
267
|
-
return prompts_json
|
|
268
|
-
|
|
269
|
-
def to_messages(self, model_name=None, include_assistant=True):
|
|
270
|
-
all_prompts_messages = dict()
|
|
271
|
-
for prompt in self.prompts:
|
|
272
|
-
if prompt.key not in all_prompts_messages:
|
|
273
|
-
all_prompts_messages[prompt.key] = list()
|
|
274
|
-
prompt_messages, prompt_key = prompt.messages()
|
|
275
|
-
messages = {
|
|
276
|
-
'role': prompt.metadata.get('role', 'user'),
|
|
277
|
-
'content': prompt_messages
|
|
278
|
-
}
|
|
279
|
-
all_prompts_messages[prompt.key].append(messages)
|
|
280
|
-
if include_assistant is True:
|
|
281
|
-
# reload to filer model annotations
|
|
282
|
-
for prompt in self.assistant_prompts:
|
|
283
|
-
prompt_model_name = prompt.metadata.get('model_info', dict()).get('name')
|
|
284
|
-
if model_name is not None and prompt_model_name != model_name:
|
|
285
|
-
continue
|
|
286
|
-
if prompt.key not in all_prompts_messages:
|
|
287
|
-
logger.warning(
|
|
288
|
-
f'Prompt key {prompt.key} is not found in the user prompts, skipping Assistant prompt')
|
|
289
|
-
continue
|
|
290
|
-
prompt_messages, prompt_key = prompt.messages()
|
|
291
|
-
assistant_messages = {
|
|
292
|
-
'role': 'assistant',
|
|
293
|
-
'content': prompt_messages
|
|
294
|
-
}
|
|
295
|
-
all_prompts_messages[prompt.key].append(assistant_messages)
|
|
296
|
-
res = list()
|
|
297
|
-
for prompts in all_prompts_messages.values():
|
|
298
|
-
for prompt in prompts:
|
|
299
|
-
res.append(prompt)
|
|
300
|
-
self._messages = res
|
|
301
|
-
return self._messages
|
|
302
|
-
|
|
303
|
-
def to_bytes_io(self):
|
|
304
|
-
# Used for item upload, do not delete
|
|
305
|
-
byte_io = io.BytesIO()
|
|
306
|
-
byte_io.name = self.name
|
|
307
|
-
byte_io.write(json.dumps(self.to_json()).encode())
|
|
308
|
-
byte_io.seek(0)
|
|
309
|
-
return byte_io
|
|
310
|
-
|
|
311
|
-
def fetch(self):
|
|
312
|
-
if self._item is None:
|
|
313
|
-
raise ValueError('Missing item, nothing to fetch..')
|
|
314
|
-
self._item = self._items.get(item_id=self._item.id)
|
|
315
|
-
self._annotations = self._item.annotations.list()
|
|
316
|
-
self.prompts = self._load_item_prompts(data=json.load(self._item.download(save_locally=False)))
|
|
317
|
-
self.assistant_prompts = self._load_annotations_prompts(self._annotations)
|
|
318
|
-
|
|
319
|
-
def build_context(self, nearest_items, add_metadata=None) -> str:
|
|
320
|
-
"""
|
|
321
|
-
Create a context stream from nearest items list.
|
|
322
|
-
add_metadata is a list of location in the item.metadata to add to the context, for instance ['system.document.source']
|
|
323
|
-
:param nearest_items: list of item ids
|
|
324
|
-
:param add_metadata: list of metadata location to add metadata to context
|
|
325
|
-
:return:
|
|
326
|
-
"""
|
|
327
|
-
if add_metadata is None:
|
|
328
|
-
add_metadata = list()
|
|
329
|
-
|
|
330
|
-
def stream_single(w_id):
|
|
331
|
-
context_item = self._items.get(item_id=w_id)
|
|
332
|
-
buf = context_item.download(save_locally=False)
|
|
333
|
-
text = buf.read().decode(encoding='utf-8')
|
|
334
|
-
m = ""
|
|
335
|
-
for path in add_metadata:
|
|
336
|
-
parts = path.split('.')
|
|
337
|
-
value = context_item.metadata
|
|
338
|
-
part = ""
|
|
339
|
-
for part in parts:
|
|
340
|
-
if isinstance(value, dict):
|
|
341
|
-
value = value.get(part)
|
|
342
|
-
else:
|
|
343
|
-
value = ""
|
|
344
|
-
|
|
345
|
-
m += f"{part}:{value}\n"
|
|
346
|
-
return text, m
|
|
347
|
-
|
|
348
|
-
pool = ThreadPoolExecutor(max_workers=32)
|
|
349
|
-
context = ""
|
|
350
|
-
if len(nearest_items) > 0:
|
|
351
|
-
# build context
|
|
352
|
-
results = pool.map(stream_single, nearest_items)
|
|
353
|
-
for res in results:
|
|
354
|
-
context += f"\n<source>\n{res[1]}\n</source>\n<text>\n{res[0]}\n</text>"
|
|
355
|
-
return context
|
|
356
|
-
|
|
357
|
-
def add(self,
|
|
358
|
-
message: dict,
|
|
359
|
-
prompt_key: str = None,
|
|
360
|
-
model_info: dict = None):
|
|
361
|
-
"""
|
|
362
|
-
add a prompt to the prompt item
|
|
363
|
-
prompt: a dictionary. keys are prompt message id, values are prompt messages
|
|
364
|
-
responses: a list of annotations representing responses to the prompt
|
|
365
|
-
|
|
366
|
-
:param message:
|
|
367
|
-
:param prompt_key:
|
|
368
|
-
:param model_info:
|
|
369
|
-
:return:
|
|
370
|
-
"""
|
|
371
|
-
role = message.get('role', 'user')
|
|
372
|
-
content = message.get('content', list())
|
|
373
|
-
|
|
374
|
-
if self.role_mapping.get(role, 'item') == 'item':
|
|
375
|
-
if prompt_key is None:
|
|
376
|
-
prompt_key = str(len(self.prompts) + 1)
|
|
377
|
-
# for new prompt we need a new key
|
|
378
|
-
prompt = Prompt(key=prompt_key, role=role)
|
|
379
|
-
for element in content:
|
|
380
|
-
prompt.add_element(value=element.get('value', ''),
|
|
381
|
-
mimetype=element.get('mimetype', PromptType.TEXT))
|
|
382
|
-
|
|
383
|
-
# create new prompt and add to prompts
|
|
384
|
-
self.prompts.append(prompt)
|
|
385
|
-
if self._item is not None:
|
|
386
|
-
self._item._Item__update_item_binary(_json=self.to_json())
|
|
387
|
-
else:
|
|
388
|
-
if prompt_key is None:
|
|
389
|
-
prompt_key = str(len(self.prompts))
|
|
390
|
-
assistant_message = content[0]
|
|
391
|
-
assistant_mimetype = assistant_message.get('mimetype', PromptType.TEXT)
|
|
392
|
-
uploaded_annotation = None
|
|
393
|
-
|
|
394
|
-
# find if prompt
|
|
395
|
-
if model_info is None:
|
|
396
|
-
# dont search for existing if there's no model information
|
|
397
|
-
existing_prompt = None
|
|
398
|
-
else:
|
|
399
|
-
existing_prompts = list()
|
|
400
|
-
for prompt in self.assistant_prompts:
|
|
401
|
-
prompt_id = prompt.key
|
|
402
|
-
model_name = prompt.metadata.get('model_info', dict()).get('name')
|
|
403
|
-
if prompt_id == prompt_key and model_name == model_info.get('name'):
|
|
404
|
-
# TODO how to handle multiple annotations
|
|
405
|
-
existing_prompts.append(prompt)
|
|
406
|
-
if len(existing_prompts) > 1:
|
|
407
|
-
assert False, "shouldn't be here! more than 1 annotation for a single model"
|
|
408
|
-
elif len(existing_prompts) == 1:
|
|
409
|
-
# found model annotation to upload
|
|
410
|
-
existing_prompt = existing_prompts[0]
|
|
411
|
-
else:
|
|
412
|
-
# no annotation found
|
|
413
|
-
existing_prompt = None
|
|
414
|
-
|
|
415
|
-
if existing_prompt is None:
|
|
416
|
-
prompt = Prompt(key=prompt_key)
|
|
417
|
-
if assistant_mimetype == PromptType.TEXT:
|
|
418
|
-
annotation_definition = entities.FreeText(text=assistant_message.get('value'))
|
|
419
|
-
prompt.add_element(value=annotation_definition.to_coordinates(None),
|
|
420
|
-
mimetype=PromptType.TEXT)
|
|
421
|
-
elif assistant_mimetype == PromptType.IMAGE:
|
|
422
|
-
annotation_definition = entities.RefImage(ref=assistant_message.get('value'))
|
|
423
|
-
prompt.add_element(value=annotation_definition.to_coordinates(None).get('ref'),
|
|
424
|
-
mimetype=PromptType.IMAGE)
|
|
425
|
-
else:
|
|
426
|
-
raise NotImplementedError('Only images of mimetype image and text are supported')
|
|
427
|
-
metadata = {'system': {'promptId': prompt_key},
|
|
428
|
-
'user': {'model': model_info}}
|
|
429
|
-
prompt.add_element(mimetype=PromptType.METADATA,
|
|
430
|
-
value={"model_info": model_info})
|
|
431
|
-
|
|
432
|
-
existing_annotation = entities.Annotation.new(item=self._item,
|
|
433
|
-
metadata=metadata,
|
|
434
|
-
annotation_definition=annotation_definition)
|
|
435
|
-
uploaded_annotation = existing_annotation.upload()
|
|
436
|
-
prompt.add_element(mimetype=PromptType.METADATA,
|
|
437
|
-
value={"id": uploaded_annotation.id})
|
|
438
|
-
existing_prompt = prompt
|
|
439
|
-
self.assistant_prompts.append(prompt)
|
|
440
|
-
|
|
441
|
-
existing_prompt_element = [element for element in existing_prompt.elements if
|
|
442
|
-
element['mimetype'] != PromptType.METADATA][-1]
|
|
443
|
-
existing_prompt_element['value'] = assistant_message.get('value')
|
|
444
|
-
if uploaded_annotation is None:
|
|
445
|
-
# Creating annotation with old dict to match platform dict
|
|
446
|
-
annotation_definition = entities.FreeText(text='')
|
|
447
|
-
metadata = {'system': {'promptId': prompt_key},
|
|
448
|
-
'user': {'model': existing_prompt.metadata.get('model_info')}}
|
|
449
|
-
annotation = entities.Annotation.new(item=self._item,
|
|
450
|
-
metadata=metadata,
|
|
451
|
-
annotation_definition=annotation_definition
|
|
452
|
-
)
|
|
453
|
-
annotation.id = existing_prompt.metadata['id']
|
|
454
|
-
# set the platform dict to match the old annotation for the dict difference check, otherwise it won't
|
|
455
|
-
# update
|
|
456
|
-
annotation._platform_dict = annotation.to_json()
|
|
457
|
-
# update the annotation with the new text
|
|
458
|
-
annotation.annotation_definition.text = existing_prompt_element['value']
|
|
459
|
-
self._item.annotations.update(annotation)
|
|
460
|
-
|
|
461
|
-
def update(self):
|
|
462
|
-
"""
|
|
463
|
-
Update the prompt item in the platform.
|
|
464
|
-
"""
|
|
465
|
-
if self._item is not None:
|
|
466
|
-
self._item._Item__update_item_binary(_json=self.to_json())
|
|
467
|
-
self._item = self._item.update()
|
|
468
|
-
else:
|
|
469
|
-
raise ValueError('Cannot update PromptItem without an item.')
|
|
470
|
-
|
|
471
|
-
# Properties
|
|
472
|
-
@property
|
|
473
|
-
def item(self) -> Optional['entities.Item']:
|
|
474
|
-
"""
|
|
475
|
-
Get the underlying Item object.
|
|
476
|
-
|
|
477
|
-
:return: The Item object associated with this PromptItem, or None.
|
|
478
|
-
:rtype: Optional[dtlpy.entities.Item]
|
|
479
|
-
"""
|
|
480
|
-
return self._item
|
|
481
|
-
|
|
482
|
-
@item.setter
|
|
483
|
-
def item(self, item: Optional['entities.Item']):
|
|
484
|
-
"""
|
|
485
|
-
Set the underlying Item object.
|
|
486
|
-
|
|
487
|
-
:param item: The Item object to associate with this PromptItem, or None.
|
|
488
|
-
:type item: Optional[dtlpy.entities.Item]
|
|
489
|
-
"""
|
|
490
|
-
if item is not None and not isinstance(item, entities.Item):
|
|
491
|
-
raise ValueError(f"Expected dtlpy.entities.Item or None, got {type(item)}")
|
|
492
|
-
self._item = item
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
@property
|
|
496
|
-
def metadata(self) -> dict:
|
|
497
|
-
"""
|
|
498
|
-
Get the metadata from the underlying Item object.
|
|
499
|
-
|
|
500
|
-
:return: Metadata dictionary from the item, or empty dict if no item exists.
|
|
501
|
-
:rtype: dict
|
|
502
|
-
"""
|
|
503
|
-
if self._item is not None:
|
|
504
|
-
return self._item.metadata
|
|
505
|
-
else:
|
|
1
|
+
import requests
|
|
2
|
+
import logging
|
|
3
|
+
import base64
|
|
4
|
+
import enum
|
|
5
|
+
import json
|
|
6
|
+
import io
|
|
7
|
+
import os
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
|
|
10
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
11
|
+
from .. import entities, repositories
|
|
12
|
+
from dtlpy.services.api_client import client as client_api
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(name='dtlpy')
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PromptType(str, enum.Enum):
|
|
18
|
+
TEXT = 'application/text'
|
|
19
|
+
IMAGE = 'image/*'
|
|
20
|
+
AUDIO = 'audio/*'
|
|
21
|
+
VIDEO = 'video/*'
|
|
22
|
+
METADATA = 'metadata'
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Prompt:
|
|
26
|
+
def __init__(self, key, role='user'):
|
|
27
|
+
"""
|
|
28
|
+
Create a single Prompt. Prompt can contain multiple mimetype elements, e.g. text sentence and an image.
|
|
29
|
+
:param key: unique identifier of the prompt in the item
|
|
30
|
+
"""
|
|
31
|
+
self.key = key
|
|
32
|
+
self.elements = list()
|
|
33
|
+
# to avoid broken stream of json files - DAT-75653
|
|
34
|
+
client_api.default_headers['x-dl-sanitize'] = '0'
|
|
35
|
+
self._items = repositories.Items(client_api=client_api)
|
|
36
|
+
self.metadata = {'role': role}
|
|
37
|
+
|
|
38
|
+
def add_element(self, value, mimetype='application/text'):
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
:param value: url or string of the input
|
|
42
|
+
:param mimetype: mimetype of the input. options: `text`, `image/*`, `video/*`, `audio/*`
|
|
43
|
+
:return:
|
|
44
|
+
"""
|
|
45
|
+
allowed_prompt_types = [prompt_type for prompt_type in PromptType]
|
|
46
|
+
if mimetype not in allowed_prompt_types:
|
|
47
|
+
raise ValueError(f'Invalid mimetype: {mimetype}. Allowed values: {allowed_prompt_types}')
|
|
48
|
+
if mimetype == PromptType.METADATA and isinstance(value, dict):
|
|
49
|
+
self.metadata.update(value)
|
|
50
|
+
else:
|
|
51
|
+
self.elements.append({'mimetype': mimetype,
|
|
52
|
+
'value': value})
|
|
53
|
+
|
|
54
|
+
def to_json(self):
|
|
55
|
+
"""
|
|
56
|
+
Convert Prompt entity to the item json
|
|
57
|
+
|
|
58
|
+
:return:
|
|
59
|
+
"""
|
|
60
|
+
elements_json = [
|
|
61
|
+
{
|
|
62
|
+
"mimetype": e['mimetype'],
|
|
63
|
+
"value": e['value'],
|
|
64
|
+
} for e in self.elements if not e['mimetype'] == PromptType.METADATA
|
|
65
|
+
]
|
|
66
|
+
elements_json.append({
|
|
67
|
+
"mimetype": PromptType.METADATA,
|
|
68
|
+
"value": self.metadata
|
|
69
|
+
})
|
|
70
|
+
return {
|
|
71
|
+
self.key: elements_json
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
def _convert_stream_to_binary(self, image_url: str):
|
|
75
|
+
"""
|
|
76
|
+
Convert a stream to binary
|
|
77
|
+
:param image_url: dataloop image stream url
|
|
78
|
+
:return: binary object
|
|
79
|
+
"""
|
|
80
|
+
image_buffer = None
|
|
81
|
+
if '.' in image_url and 'dataloop.ai' not in image_url:
|
|
82
|
+
# URL and not DL item stream
|
|
83
|
+
try:
|
|
84
|
+
response = requests.get(image_url, stream=True)
|
|
85
|
+
response.raise_for_status() # Raise an exception for bad status codes
|
|
86
|
+
|
|
87
|
+
# Check for valid image content type
|
|
88
|
+
if response.headers["Content-Type"].startswith("image/"):
|
|
89
|
+
# Read the image data in chunks to avoid loading large images in memory
|
|
90
|
+
image_buffer = b"".join(chunk for chunk in response.iter_content(1024))
|
|
91
|
+
except requests.exceptions.RequestException as e:
|
|
92
|
+
logger.error(f"Failed to download image from URL: {image_url}, error: {e}")
|
|
93
|
+
|
|
94
|
+
elif '.' in image_url and 'stream' in image_url:
|
|
95
|
+
# DL Stream URL
|
|
96
|
+
item_id = image_url.split("/stream")[0].split("/items/")[-1]
|
|
97
|
+
image_buffer = self._items.get(item_id=item_id).download(save_locally=False).getvalue()
|
|
98
|
+
else:
|
|
99
|
+
# DL item ID
|
|
100
|
+
image_buffer = self._items.get(item_id=image_url).download(save_locally=False).getvalue()
|
|
101
|
+
|
|
102
|
+
if image_buffer is not None:
|
|
103
|
+
encoded_image = base64.b64encode(image_buffer).decode()
|
|
104
|
+
else:
|
|
105
|
+
logger.error(f'Invalid image url: {image_url}')
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
return f'data:image/jpeg;base64,{encoded_image}'
|
|
109
|
+
|
|
110
|
+
def messages(self):
|
|
111
|
+
"""
|
|
112
|
+
return a list of messages in the prompt item,
|
|
113
|
+
messages are returned following the openai SDK format https://platform.openai.com/docs/guides/vision
|
|
114
|
+
"""
|
|
115
|
+
messages = []
|
|
116
|
+
for element in self.elements:
|
|
117
|
+
if element['mimetype'] == PromptType.TEXT:
|
|
118
|
+
data = {
|
|
119
|
+
"type": "text",
|
|
120
|
+
"text": element['value']
|
|
121
|
+
}
|
|
122
|
+
messages.append(data)
|
|
123
|
+
elif element['mimetype'] == PromptType.IMAGE:
|
|
124
|
+
image_url = self._convert_stream_to_binary(element['value'])
|
|
125
|
+
data = {
|
|
126
|
+
"type": "image_url",
|
|
127
|
+
"image_url": {
|
|
128
|
+
"url": image_url
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
messages.append(data)
|
|
132
|
+
elif element['mimetype'] == PromptType.AUDIO:
|
|
133
|
+
raise NotImplementedError('Audio prompt is not supported yet')
|
|
134
|
+
elif element['mimetype'] == PromptType.VIDEO:
|
|
135
|
+
data = {
|
|
136
|
+
"type": "video_url",
|
|
137
|
+
"video_url": {
|
|
138
|
+
"url": element['value']
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
messages.append(data)
|
|
142
|
+
else:
|
|
143
|
+
raise ValueError(f'Invalid mimetype: {element["mimetype"]}')
|
|
144
|
+
return messages, self.key
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class PromptItem:
|
|
148
|
+
def __init__(self, name, item: entities.Item = None, role_mapping=None):
|
|
149
|
+
if role_mapping is None:
|
|
150
|
+
role_mapping = {'user': 'item',
|
|
151
|
+
'assistant': 'annotation'}
|
|
152
|
+
if not isinstance(role_mapping, dict):
|
|
153
|
+
raise ValueError(f'input role_mapping must be dict. type: {type(role_mapping)}')
|
|
154
|
+
self.role_mapping = role_mapping
|
|
155
|
+
# prompt item name
|
|
156
|
+
self.name = name
|
|
157
|
+
# list of user prompts in the prompt item
|
|
158
|
+
self.prompts = list()
|
|
159
|
+
self.assistant_prompts = list()
|
|
160
|
+
# list of assistant (annotations) prompts in the prompt item
|
|
161
|
+
# Dataloop Item
|
|
162
|
+
self._item: entities.Item = item
|
|
163
|
+
self._messages = []
|
|
164
|
+
self._annotations: entities.AnnotationCollection = None
|
|
165
|
+
if item is not None:
|
|
166
|
+
if 'json' not in item.mimetype or item.system.get('shebang', dict()).get('dltype') != 'prompt':
|
|
167
|
+
raise ValueError('Expecting a json item with system.shebang.dltype = prompt')
|
|
168
|
+
self._items = item.items
|
|
169
|
+
self.fetch()
|
|
170
|
+
else:
|
|
171
|
+
self._items = repositories.Items(client_api=client_api)
|
|
172
|
+
|
|
173
|
+
# to avoid broken stream of json files - DAT-75653
|
|
174
|
+
self._items._client_api.default_headers['x-dl-sanitize'] = '0'
|
|
175
|
+
|
|
176
|
+
@classmethod
|
|
177
|
+
def from_messages(cls, messages: list):
|
|
178
|
+
...
|
|
179
|
+
|
|
180
|
+
@classmethod
|
|
181
|
+
def from_item(cls, item: entities.Item):
|
|
182
|
+
"""
|
|
183
|
+
Load a prompt item from the platform
|
|
184
|
+
:param item : Item object
|
|
185
|
+
:return: PromptItem object
|
|
186
|
+
"""
|
|
187
|
+
if 'json' not in item.mimetype or item.system.get('shebang', dict()).get('dltype') != 'prompt':
|
|
188
|
+
raise ValueError('Expecting a json item with system.shebang.dltype = prompt')
|
|
189
|
+
return cls(name=item.name, item=item)
|
|
190
|
+
|
|
191
|
+
@classmethod
|
|
192
|
+
def from_local_file(cls, filepath):
|
|
193
|
+
"""
|
|
194
|
+
Create a new prompt item from a file
|
|
195
|
+
:param filepath: path to the file
|
|
196
|
+
:return: PromptItem object
|
|
197
|
+
"""
|
|
198
|
+
if os.path.exists(filepath) is False:
|
|
199
|
+
raise FileNotFoundError(f'File does not exists: {filepath}')
|
|
200
|
+
if 'json' not in os.path.splitext(filepath)[-1]:
|
|
201
|
+
raise ValueError(f'Expected path to json item, got {os.path.splitext(filepath)[-1]}')
|
|
202
|
+
prompt_item = cls(name=filepath)
|
|
203
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
|
204
|
+
data = json.load(f)
|
|
205
|
+
prompt_item.prompts = prompt_item._load_item_prompts(data=data)
|
|
206
|
+
return prompt_item
|
|
207
|
+
|
|
208
|
+
@staticmethod
|
|
209
|
+
def _load_item_prompts(data):
|
|
210
|
+
prompts = list()
|
|
211
|
+
for prompt_key, prompt_elements in data.get('prompts', dict()).items():
|
|
212
|
+
content = list()
|
|
213
|
+
for element in prompt_elements:
|
|
214
|
+
content.append({'value': element.get('value', dict()),
|
|
215
|
+
'mimetype': element['mimetype']})
|
|
216
|
+
prompt = Prompt(key=prompt_key, role="user")
|
|
217
|
+
for element in content:
|
|
218
|
+
prompt.add_element(value=element.get('value', ''),
|
|
219
|
+
mimetype=element.get('mimetype', PromptType.TEXT))
|
|
220
|
+
prompts.append(prompt)
|
|
221
|
+
return prompts
|
|
222
|
+
|
|
223
|
+
@staticmethod
|
|
224
|
+
def _load_annotations_prompts(annotations: entities.AnnotationCollection):
|
|
225
|
+
"""
|
|
226
|
+
Get all the annotations in the item for the assistant messages
|
|
227
|
+
"""
|
|
228
|
+
# clearing the assistant prompts from previous annotations that might not belong
|
|
229
|
+
assistant_prompts = list()
|
|
230
|
+
for annotation in annotations:
|
|
231
|
+
prompt_id = annotation.metadata.get('system', dict()).get('promptId', None)
|
|
232
|
+
model_info = annotation.metadata.get('user', dict()).get('model', dict())
|
|
233
|
+
annotation_id = annotation.id
|
|
234
|
+
if annotation.type == 'ref_image':
|
|
235
|
+
prompt = Prompt(key=prompt_id, role='assistant')
|
|
236
|
+
prompt.add_element(value=annotation.annotation_definition.coordinates.get('ref'),
|
|
237
|
+
mimetype=PromptType.IMAGE)
|
|
238
|
+
elif annotation.type == 'text':
|
|
239
|
+
prompt = Prompt(key=prompt_id, role='assistant')
|
|
240
|
+
prompt.add_element(value=annotation.annotation_definition.coordinates,
|
|
241
|
+
mimetype=PromptType.TEXT)
|
|
242
|
+
else:
|
|
243
|
+
raise ValueError(f"Unsupported annotation type: {annotation.type}")
|
|
244
|
+
|
|
245
|
+
prompt.add_element(value={'id': annotation_id,
|
|
246
|
+
'model_info': model_info},
|
|
247
|
+
mimetype=PromptType.METADATA)
|
|
248
|
+
assistant_prompts.append(prompt)
|
|
249
|
+
return assistant_prompts
|
|
250
|
+
|
|
251
|
+
def to_json(self):
|
|
252
|
+
"""
|
|
253
|
+
Convert the entity to a platform item.
|
|
254
|
+
|
|
255
|
+
:return:
|
|
256
|
+
"""
|
|
257
|
+
prompts_json = {
|
|
258
|
+
"shebang": "dataloop",
|
|
259
|
+
"metadata": {
|
|
260
|
+
"dltype": 'prompt'
|
|
261
|
+
},
|
|
262
|
+
"prompts": {}
|
|
263
|
+
}
|
|
264
|
+
for prompt in self.prompts:
|
|
265
|
+
for prompt_key, prompt_values in prompt.to_json().items():
|
|
266
|
+
prompts_json["prompts"][prompt_key] = prompt_values
|
|
267
|
+
return prompts_json
|
|
268
|
+
|
|
269
|
+
def to_messages(self, model_name=None, include_assistant=True):
|
|
270
|
+
all_prompts_messages = dict()
|
|
271
|
+
for prompt in self.prompts:
|
|
272
|
+
if prompt.key not in all_prompts_messages:
|
|
273
|
+
all_prompts_messages[prompt.key] = list()
|
|
274
|
+
prompt_messages, prompt_key = prompt.messages()
|
|
275
|
+
messages = {
|
|
276
|
+
'role': prompt.metadata.get('role', 'user'),
|
|
277
|
+
'content': prompt_messages
|
|
278
|
+
}
|
|
279
|
+
all_prompts_messages[prompt.key].append(messages)
|
|
280
|
+
if include_assistant is True:
|
|
281
|
+
# reload to filer model annotations
|
|
282
|
+
for prompt in self.assistant_prompts:
|
|
283
|
+
prompt_model_name = prompt.metadata.get('model_info', dict()).get('name')
|
|
284
|
+
if model_name is not None and prompt_model_name != model_name:
|
|
285
|
+
continue
|
|
286
|
+
if prompt.key not in all_prompts_messages:
|
|
287
|
+
logger.warning(
|
|
288
|
+
f'Prompt key {prompt.key} is not found in the user prompts, skipping Assistant prompt')
|
|
289
|
+
continue
|
|
290
|
+
prompt_messages, prompt_key = prompt.messages()
|
|
291
|
+
assistant_messages = {
|
|
292
|
+
'role': 'assistant',
|
|
293
|
+
'content': prompt_messages
|
|
294
|
+
}
|
|
295
|
+
all_prompts_messages[prompt.key].append(assistant_messages)
|
|
296
|
+
res = list()
|
|
297
|
+
for prompts in all_prompts_messages.values():
|
|
298
|
+
for prompt in prompts:
|
|
299
|
+
res.append(prompt)
|
|
300
|
+
self._messages = res
|
|
301
|
+
return self._messages
|
|
302
|
+
|
|
303
|
+
def to_bytes_io(self):
|
|
304
|
+
# Used for item upload, do not delete
|
|
305
|
+
byte_io = io.BytesIO()
|
|
306
|
+
byte_io.name = self.name
|
|
307
|
+
byte_io.write(json.dumps(self.to_json()).encode())
|
|
308
|
+
byte_io.seek(0)
|
|
309
|
+
return byte_io
|
|
310
|
+
|
|
311
|
+
def fetch(self):
|
|
312
|
+
if self._item is None:
|
|
313
|
+
raise ValueError('Missing item, nothing to fetch..')
|
|
314
|
+
self._item = self._items.get(item_id=self._item.id)
|
|
315
|
+
self._annotations = self._item.annotations.list()
|
|
316
|
+
self.prompts = self._load_item_prompts(data=json.load(self._item.download(save_locally=False)))
|
|
317
|
+
self.assistant_prompts = self._load_annotations_prompts(self._annotations)
|
|
318
|
+
|
|
319
|
+
def build_context(self, nearest_items, add_metadata=None) -> str:
|
|
320
|
+
"""
|
|
321
|
+
Create a context stream from nearest items list.
|
|
322
|
+
add_metadata is a list of location in the item.metadata to add to the context, for instance ['system.document.source']
|
|
323
|
+
:param nearest_items: list of item ids
|
|
324
|
+
:param add_metadata: list of metadata location to add metadata to context
|
|
325
|
+
:return:
|
|
326
|
+
"""
|
|
327
|
+
if add_metadata is None:
|
|
328
|
+
add_metadata = list()
|
|
329
|
+
|
|
330
|
+
def stream_single(w_id):
|
|
331
|
+
context_item = self._items.get(item_id=w_id)
|
|
332
|
+
buf = context_item.download(save_locally=False)
|
|
333
|
+
text = buf.read().decode(encoding='utf-8')
|
|
334
|
+
m = ""
|
|
335
|
+
for path in add_metadata:
|
|
336
|
+
parts = path.split('.')
|
|
337
|
+
value = context_item.metadata
|
|
338
|
+
part = ""
|
|
339
|
+
for part in parts:
|
|
340
|
+
if isinstance(value, dict):
|
|
341
|
+
value = value.get(part)
|
|
342
|
+
else:
|
|
343
|
+
value = ""
|
|
344
|
+
|
|
345
|
+
m += f"{part}:{value}\n"
|
|
346
|
+
return text, m
|
|
347
|
+
|
|
348
|
+
pool = ThreadPoolExecutor(max_workers=32)
|
|
349
|
+
context = ""
|
|
350
|
+
if len(nearest_items) > 0:
|
|
351
|
+
# build context
|
|
352
|
+
results = pool.map(stream_single, nearest_items)
|
|
353
|
+
for res in results:
|
|
354
|
+
context += f"\n<source>\n{res[1]}\n</source>\n<text>\n{res[0]}\n</text>"
|
|
355
|
+
return context
|
|
356
|
+
|
|
357
|
+
def add(self,
|
|
358
|
+
message: dict,
|
|
359
|
+
prompt_key: str = None,
|
|
360
|
+
model_info: dict = None):
|
|
361
|
+
"""
|
|
362
|
+
add a prompt to the prompt item
|
|
363
|
+
prompt: a dictionary. keys are prompt message id, values are prompt messages
|
|
364
|
+
responses: a list of annotations representing responses to the prompt
|
|
365
|
+
|
|
366
|
+
:param message:
|
|
367
|
+
:param prompt_key:
|
|
368
|
+
:param model_info:
|
|
369
|
+
:return:
|
|
370
|
+
"""
|
|
371
|
+
role = message.get('role', 'user')
|
|
372
|
+
content = message.get('content', list())
|
|
373
|
+
|
|
374
|
+
if self.role_mapping.get(role, 'item') == 'item':
|
|
375
|
+
if prompt_key is None:
|
|
376
|
+
prompt_key = str(len(self.prompts) + 1)
|
|
377
|
+
# for new prompt we need a new key
|
|
378
|
+
prompt = Prompt(key=prompt_key, role=role)
|
|
379
|
+
for element in content:
|
|
380
|
+
prompt.add_element(value=element.get('value', ''),
|
|
381
|
+
mimetype=element.get('mimetype', PromptType.TEXT))
|
|
382
|
+
|
|
383
|
+
# create new prompt and add to prompts
|
|
384
|
+
self.prompts.append(prompt)
|
|
385
|
+
if self._item is not None:
|
|
386
|
+
self._item._Item__update_item_binary(_json=self.to_json())
|
|
387
|
+
else:
|
|
388
|
+
if prompt_key is None:
|
|
389
|
+
prompt_key = str(len(self.prompts))
|
|
390
|
+
assistant_message = content[0]
|
|
391
|
+
assistant_mimetype = assistant_message.get('mimetype', PromptType.TEXT)
|
|
392
|
+
uploaded_annotation = None
|
|
393
|
+
|
|
394
|
+
# find if prompt
|
|
395
|
+
if model_info is None:
|
|
396
|
+
# dont search for existing if there's no model information
|
|
397
|
+
existing_prompt = None
|
|
398
|
+
else:
|
|
399
|
+
existing_prompts = list()
|
|
400
|
+
for prompt in self.assistant_prompts:
|
|
401
|
+
prompt_id = prompt.key
|
|
402
|
+
model_name = prompt.metadata.get('model_info', dict()).get('name')
|
|
403
|
+
if prompt_id == prompt_key and model_name == model_info.get('name'):
|
|
404
|
+
# TODO how to handle multiple annotations
|
|
405
|
+
existing_prompts.append(prompt)
|
|
406
|
+
if len(existing_prompts) > 1:
|
|
407
|
+
assert False, "shouldn't be here! more than 1 annotation for a single model"
|
|
408
|
+
elif len(existing_prompts) == 1:
|
|
409
|
+
# found model annotation to upload
|
|
410
|
+
existing_prompt = existing_prompts[0]
|
|
411
|
+
else:
|
|
412
|
+
# no annotation found
|
|
413
|
+
existing_prompt = None
|
|
414
|
+
|
|
415
|
+
if existing_prompt is None:
|
|
416
|
+
prompt = Prompt(key=prompt_key)
|
|
417
|
+
if assistant_mimetype == PromptType.TEXT:
|
|
418
|
+
annotation_definition = entities.FreeText(text=assistant_message.get('value'))
|
|
419
|
+
prompt.add_element(value=annotation_definition.to_coordinates(None),
|
|
420
|
+
mimetype=PromptType.TEXT)
|
|
421
|
+
elif assistant_mimetype == PromptType.IMAGE:
|
|
422
|
+
annotation_definition = entities.RefImage(ref=assistant_message.get('value'))
|
|
423
|
+
prompt.add_element(value=annotation_definition.to_coordinates(None).get('ref'),
|
|
424
|
+
mimetype=PromptType.IMAGE)
|
|
425
|
+
else:
|
|
426
|
+
raise NotImplementedError('Only images of mimetype image and text are supported')
|
|
427
|
+
metadata = {'system': {'promptId': prompt_key},
|
|
428
|
+
'user': {'model': model_info}}
|
|
429
|
+
prompt.add_element(mimetype=PromptType.METADATA,
|
|
430
|
+
value={"model_info": model_info})
|
|
431
|
+
|
|
432
|
+
existing_annotation = entities.Annotation.new(item=self._item,
|
|
433
|
+
metadata=metadata,
|
|
434
|
+
annotation_definition=annotation_definition)
|
|
435
|
+
uploaded_annotation = existing_annotation.upload()
|
|
436
|
+
prompt.add_element(mimetype=PromptType.METADATA,
|
|
437
|
+
value={"id": uploaded_annotation.id})
|
|
438
|
+
existing_prompt = prompt
|
|
439
|
+
self.assistant_prompts.append(prompt)
|
|
440
|
+
|
|
441
|
+
existing_prompt_element = [element for element in existing_prompt.elements if
|
|
442
|
+
element['mimetype'] != PromptType.METADATA][-1]
|
|
443
|
+
existing_prompt_element['value'] = assistant_message.get('value')
|
|
444
|
+
if uploaded_annotation is None:
|
|
445
|
+
# Creating annotation with old dict to match platform dict
|
|
446
|
+
annotation_definition = entities.FreeText(text='')
|
|
447
|
+
metadata = {'system': {'promptId': prompt_key},
|
|
448
|
+
'user': {'model': existing_prompt.metadata.get('model_info')}}
|
|
449
|
+
annotation = entities.Annotation.new(item=self._item,
|
|
450
|
+
metadata=metadata,
|
|
451
|
+
annotation_definition=annotation_definition
|
|
452
|
+
)
|
|
453
|
+
annotation.id = existing_prompt.metadata['id']
|
|
454
|
+
# set the platform dict to match the old annotation for the dict difference check, otherwise it won't
|
|
455
|
+
# update
|
|
456
|
+
annotation._platform_dict = annotation.to_json()
|
|
457
|
+
# update the annotation with the new text
|
|
458
|
+
annotation.annotation_definition.text = existing_prompt_element['value']
|
|
459
|
+
self._item.annotations.update(annotation)
|
|
460
|
+
|
|
461
|
+
def update(self):
|
|
462
|
+
"""
|
|
463
|
+
Update the prompt item in the platform.
|
|
464
|
+
"""
|
|
465
|
+
if self._item is not None:
|
|
466
|
+
self._item._Item__update_item_binary(_json=self.to_json())
|
|
467
|
+
self._item = self._item.update()
|
|
468
|
+
else:
|
|
469
|
+
raise ValueError('Cannot update PromptItem without an item.')
|
|
470
|
+
|
|
471
|
+
# Properties
|
|
472
|
+
@property
|
|
473
|
+
def item(self) -> Optional['entities.Item']:
|
|
474
|
+
"""
|
|
475
|
+
Get the underlying Item object.
|
|
476
|
+
|
|
477
|
+
:return: The Item object associated with this PromptItem, or None.
|
|
478
|
+
:rtype: Optional[dtlpy.entities.Item]
|
|
479
|
+
"""
|
|
480
|
+
return self._item
|
|
481
|
+
|
|
482
|
+
@item.setter
|
|
483
|
+
def item(self, item: Optional['entities.Item']):
|
|
484
|
+
"""
|
|
485
|
+
Set the underlying Item object.
|
|
486
|
+
|
|
487
|
+
:param item: The Item object to associate with this PromptItem, or None.
|
|
488
|
+
:type item: Optional[dtlpy.entities.Item]
|
|
489
|
+
"""
|
|
490
|
+
if item is not None and not isinstance(item, entities.Item):
|
|
491
|
+
raise ValueError(f"Expected dtlpy.entities.Item or None, got {type(item)}")
|
|
492
|
+
self._item = item
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
@property
|
|
496
|
+
def metadata(self) -> dict:
|
|
497
|
+
"""
|
|
498
|
+
Get the metadata from the underlying Item object.
|
|
499
|
+
|
|
500
|
+
:return: Metadata dictionary from the item, or empty dict if no item exists.
|
|
501
|
+
:rtype: dict
|
|
502
|
+
"""
|
|
503
|
+
if self._item is not None:
|
|
504
|
+
return self._item.metadata
|
|
505
|
+
else:
|
|
506
506
|
raise ValueError('No item found, cannot get metadata, to set item use prompt_item.item = item')
|