dataverse-sdk 2.2.2__tar.gz → 2.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/PKG-INFO +6 -4
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/README.md +5 -3
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/apis/backend.py +13 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/client.py +37 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/base.py +1 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/coco.py +4 -1
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/exporter.py +6 -4
- dataverse_sdk-2.3.1/dataverse_sdk/export/utils.py +53 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/visionai.py +130 -43
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/yolo.py +4 -1
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/schemas/api.py +11 -1
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/schemas/client.py +26 -1
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk.egg-info/PKG-INFO +6 -4
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/setup.py +1 -1
- dataverse_sdk-2.2.2/dataverse_sdk/export/utils.py +0 -26
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/__init__.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/apis/__init__.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/apis/third_party.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/connections.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/constants.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/exceptions/__init__.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/exceptions/client.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/__init__.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/constant.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/vqa.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/schemas/__init__.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/schemas/common.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/schemas/format.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/utils/__init__.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/utils/utils.py +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk.egg-info/SOURCES.txt +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk.egg-info/dependency_links.txt +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk.egg-info/requires.txt +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk.egg-info/top_level.txt +0 -0
- {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataverse-sdk
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.1
|
|
4
4
|
Summary: Dataverse SDK For Python
|
|
5
5
|
Home-page:
|
|
6
6
|
Author: LinkerVision
|
|
@@ -55,19 +55,21 @@ Interaction with the Dataverse site starts with an instance of the `DataverseCli
|
|
|
55
55
|
```Python
|
|
56
56
|
from dataverse_sdk import *
|
|
57
57
|
from dataverse_sdk.connections import get_connection
|
|
58
|
+
from dataverse_sdk.constants import DataverseHost
|
|
59
|
+
|
|
58
60
|
client = DataverseClient(
|
|
59
|
-
host=DataverseHost.PRODUCTION, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
|
|
61
|
+
host=DataverseHost.PRODUCTION.value, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
|
|
60
62
|
)
|
|
61
63
|
assert client is get_connection("default")
|
|
62
64
|
|
|
63
65
|
# Should provide different alias if you are trying to connect to different workspaces
|
|
64
66
|
client2 = DataverseClient(
|
|
65
|
-
host=DataverseHost.PRODUCTION, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
|
|
67
|
+
host=DataverseHost.PRODUCTION.value, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
|
|
66
68
|
)
|
|
67
69
|
assert client2 is get_connection(client2.alias)
|
|
68
70
|
|
|
69
71
|
client3 = DataverseClient(
|
|
70
|
-
host=DataverseHost.PRODUCTION, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
|
|
72
|
+
host=DataverseHost.PRODUCTION.value, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
|
|
71
73
|
)
|
|
72
74
|
assert client3 is get_connection(client3.alias)
|
|
73
75
|
```
|
|
@@ -28,19 +28,21 @@ Interaction with the Dataverse site starts with an instance of the `DataverseCli
|
|
|
28
28
|
```Python
|
|
29
29
|
from dataverse_sdk import *
|
|
30
30
|
from dataverse_sdk.connections import get_connection
|
|
31
|
+
from dataverse_sdk.constants import DataverseHost
|
|
32
|
+
|
|
31
33
|
client = DataverseClient(
|
|
32
|
-
host=DataverseHost.PRODUCTION, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
|
|
34
|
+
host=DataverseHost.PRODUCTION.value, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
|
|
33
35
|
)
|
|
34
36
|
assert client is get_connection("default")
|
|
35
37
|
|
|
36
38
|
# Should provide different alias if you are trying to connect to different workspaces
|
|
37
39
|
client2 = DataverseClient(
|
|
38
|
-
host=DataverseHost.PRODUCTION, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
|
|
40
|
+
host=DataverseHost.PRODUCTION.value, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
|
|
39
41
|
)
|
|
40
42
|
assert client2 is get_connection(client2.alias)
|
|
41
43
|
|
|
42
44
|
client3 = DataverseClient(
|
|
43
|
-
host=DataverseHost.PRODUCTION, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
|
|
45
|
+
host=DataverseHost.PRODUCTION.value, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
|
|
44
46
|
)
|
|
45
47
|
assert client3 is get_connection(client3.alias)
|
|
46
48
|
```
|
|
@@ -460,6 +460,19 @@ class BackendAPI:
|
|
|
460
460
|
)
|
|
461
461
|
return resp.json()
|
|
462
462
|
|
|
463
|
+
def create_custom_model(self, **kwargs):
|
|
464
|
+
permission = kwargs.pop("permission", "")
|
|
465
|
+
if permission:
|
|
466
|
+
self.headers["X-Request-Source"] = permission
|
|
467
|
+
|
|
468
|
+
resp = self.send_request(
|
|
469
|
+
url=f"{self.host}/api/ml_models/custom-model/",
|
|
470
|
+
method="post",
|
|
471
|
+
headers=self.headers,
|
|
472
|
+
data=kwargs,
|
|
473
|
+
)
|
|
474
|
+
return resp.json()
|
|
475
|
+
|
|
463
476
|
|
|
464
477
|
class AsyncBackendAPI:
|
|
465
478
|
def __init__(
|
|
@@ -26,6 +26,7 @@ from .exceptions.client import (
|
|
|
26
26
|
)
|
|
27
27
|
from .schemas.api import (
|
|
28
28
|
AttributeAPISchema,
|
|
29
|
+
CreateCustomModelAPISchema,
|
|
29
30
|
DatasetAPISchema,
|
|
30
31
|
OntologyAPISchema,
|
|
31
32
|
ProjectAPISchema,
|
|
@@ -690,10 +691,14 @@ class DataverseClient:
|
|
|
690
691
|
raise InvalidProcessError("The project type is not VQA!")
|
|
691
692
|
output_list = []
|
|
692
693
|
for question in project.ontology.classes:
|
|
694
|
+
answer = question.attributes[0]
|
|
695
|
+
option_list = [opt.value for opt in answer.options]
|
|
693
696
|
output_list.append(
|
|
694
697
|
{
|
|
695
698
|
"question_id": question.rank,
|
|
696
699
|
"question": question.extended_class["question"],
|
|
700
|
+
"type": answer.type,
|
|
701
|
+
"options": option_list,
|
|
697
702
|
}
|
|
698
703
|
)
|
|
699
704
|
import json
|
|
@@ -1747,6 +1752,38 @@ of this project OR has been added before"
|
|
|
1747
1752
|
)
|
|
1748
1753
|
return create_dataset_uuid
|
|
1749
1754
|
|
|
1755
|
+
@staticmethod
|
|
1756
|
+
def create_custom_model(
|
|
1757
|
+
project: Project,
|
|
1758
|
+
name: str,
|
|
1759
|
+
input_classes: list[str],
|
|
1760
|
+
resolution_width: int,
|
|
1761
|
+
resolution_height: int,
|
|
1762
|
+
model_structure: str,
|
|
1763
|
+
weight_url: str,
|
|
1764
|
+
client: Optional["DataverseClient"] = None,
|
|
1765
|
+
client_alias: Optional[str] = None,
|
|
1766
|
+
permission: str = "",
|
|
1767
|
+
):
|
|
1768
|
+
try:
|
|
1769
|
+
payload = CreateCustomModelAPISchema(
|
|
1770
|
+
project_id=project.id,
|
|
1771
|
+
name=name,
|
|
1772
|
+
input_classes=input_classes,
|
|
1773
|
+
resolution_width=resolution_width,
|
|
1774
|
+
resolution_height=resolution_height,
|
|
1775
|
+
model_structure=model_structure,
|
|
1776
|
+
weight_url=weight_url,
|
|
1777
|
+
).model_dump()
|
|
1778
|
+
except ValidationError as e:
|
|
1779
|
+
raise APIValidationError(f"Something wrong when creating custom model: {e}")
|
|
1780
|
+
|
|
1781
|
+
api, _ = DataverseClient._get_api_client(
|
|
1782
|
+
client=client, client_alias=client_alias, is_async=False
|
|
1783
|
+
)
|
|
1784
|
+
|
|
1785
|
+
api.create_custom_model(**payload, permission=permission)
|
|
1786
|
+
|
|
1750
1787
|
@staticmethod
|
|
1751
1788
|
async def run_generate_presigned_urls(
|
|
1752
1789
|
file_paths: list, api: AsyncBackendAPI, data_folder: str
|
|
@@ -22,7 +22,7 @@ from .constant import (
|
|
|
22
22
|
ExportFormat,
|
|
23
23
|
)
|
|
24
24
|
from .exporter import Exporter
|
|
25
|
-
from .utils import convert_to_bytes
|
|
25
|
+
from .utils import convert_to_bytes, gen_empty_vai
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
@Exporter.register(format=ExportFormat.COCO)
|
|
@@ -156,6 +156,9 @@ def convert_annotation(
|
|
|
156
156
|
datarow["items"].get("predictions", {}).get(annotation_name, {})
|
|
157
157
|
)
|
|
158
158
|
|
|
159
|
+
if not target_visionai:
|
|
160
|
+
target_visionai = gen_empty_vai(datarow=datarow, sequence_folder_url="")
|
|
161
|
+
|
|
159
162
|
(
|
|
160
163
|
category_idx_map,
|
|
161
164
|
image_update,
|
|
@@ -77,6 +77,7 @@ class Exporter:
|
|
|
77
77
|
sequence_frame_map: dict,
|
|
78
78
|
question_id_map: dict,
|
|
79
79
|
annotation_name: str,
|
|
80
|
+
is_sequential: bool,
|
|
80
81
|
) -> AsyncGenerator[tuple[bytes, str]]:
|
|
81
82
|
async for data, path in self.export_annot.producer(
|
|
82
83
|
class_names=class_names,
|
|
@@ -85,6 +86,7 @@ class Exporter:
|
|
|
85
86
|
target_folder=self.target_folder,
|
|
86
87
|
datarow_generator_func=await self._gen(self.curation_api),
|
|
87
88
|
annotation_name=annotation_name,
|
|
89
|
+
is_sequential=is_sequential,
|
|
88
90
|
):
|
|
89
91
|
if not path:
|
|
90
92
|
continue
|
|
@@ -141,7 +143,7 @@ class Exporter:
|
|
|
141
143
|
gen: AsyncGenerator = curation_api.get_datarows(
|
|
142
144
|
id_set_list=id_chunks,
|
|
143
145
|
batch_size=BATCH_SIZE,
|
|
144
|
-
fields="id,items,vlm_items,url,frame_id,image_width,image_height,sensor_name,original_url",
|
|
146
|
+
fields="id,items,vlm_items,url,frame_id,image_width,image_height,sensor_name,original_url,type",
|
|
145
147
|
)
|
|
146
148
|
async for batched_datarow in gen:
|
|
147
149
|
for datarow in batched_datarow:
|
|
@@ -238,9 +240,9 @@ def get_datarow_sequences(
|
|
|
238
240
|
for frame_datarow_id, datarow_id_list in sequence_frame_map[
|
|
239
241
|
sequence_datarow_id
|
|
240
242
|
].items():
|
|
241
|
-
new_datarows_sequence_map[sequence_order][
|
|
242
|
-
|
|
243
|
-
|
|
243
|
+
new_datarows_sequence_map[sequence_order][frame_datarow_id] = (
|
|
244
|
+
datarow_id_list
|
|
245
|
+
)
|
|
244
246
|
if not is_sequential or sequence_datarow_id == NONE_SEQUENCE_DATAROW_ID:
|
|
245
247
|
sequence_order += 1
|
|
246
248
|
if is_sequential:
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Union
|
|
3
|
+
|
|
4
|
+
from visionai_data_format.schemas.visionai_schema import (
|
|
5
|
+
Frame,
|
|
6
|
+
FrameProperties,
|
|
7
|
+
FramePropertyStream,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from ..apis.third_party import ThirdPartyAPI
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def convert_to_bytes(obj: Union[dict, list, str]) -> bytes:
|
|
14
|
+
if isinstance(obj, (dict, list)):
|
|
15
|
+
jstr = json.dumps(obj)
|
|
16
|
+
elif isinstance(obj, str):
|
|
17
|
+
jstr = obj
|
|
18
|
+
else:
|
|
19
|
+
raise TypeError("un-support type")
|
|
20
|
+
return bytes(jstr, encoding="utf8")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def download_url_file_async(data_url: str) -> bytes | None:
|
|
24
|
+
# get data from url link
|
|
25
|
+
try:
|
|
26
|
+
data: bytes = await ThirdPartyAPI.async_download_file(
|
|
27
|
+
url=data_url, method="GET"
|
|
28
|
+
)
|
|
29
|
+
except Exception:
|
|
30
|
+
print(f"Retrieving data from url {data_url} error")
|
|
31
|
+
return None
|
|
32
|
+
return data
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def gen_empty_vai(datarow: dict, sequence_folder_url: str) -> dict:
|
|
36
|
+
new_sensor_data_folder = f"{sequence_folder_url}/data/{datarow['sensor_name']}/"
|
|
37
|
+
dest_url = f"{new_sensor_data_folder}{datarow['url'].split('/')[-1]}"
|
|
38
|
+
|
|
39
|
+
# generate visionai empty frame
|
|
40
|
+
frames = {}
|
|
41
|
+
frame_num = datarow["frame_id"]
|
|
42
|
+
frames[frame_num] = Frame(
|
|
43
|
+
frame_properties=FrameProperties(
|
|
44
|
+
streams={datarow["sensor_name"]: FramePropertyStream(uri=dest_url)}
|
|
45
|
+
),
|
|
46
|
+
objects={},
|
|
47
|
+
).model_dump(exclude_none=True)
|
|
48
|
+
if datarow["type"] == "image":
|
|
49
|
+
stream = {datarow["sensor_name"]: {"type": "camera", "uri": dest_url}}
|
|
50
|
+
else:
|
|
51
|
+
stream = {datarow["sensor_name"]: {"type": "lidar", "uri": dest_url}}
|
|
52
|
+
|
|
53
|
+
return {"frames": frames, "streams": stream}
|
|
@@ -19,7 +19,7 @@ from .constant import (
|
|
|
19
19
|
ExportFormat,
|
|
20
20
|
)
|
|
21
21
|
from .exporter import Exporter
|
|
22
|
-
from .utils import convert_to_bytes
|
|
22
|
+
from .utils import convert_to_bytes, gen_empty_vai
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
def merge_intervals(intervals: list[tuple[int, int]]):
|
|
@@ -227,7 +227,9 @@ def aggregate_static_annotations(
|
|
|
227
227
|
return large_data
|
|
228
228
|
|
|
229
229
|
|
|
230
|
-
def update_streams_uri(
|
|
230
|
+
def update_streams_uri(
|
|
231
|
+
streams: dict, sequence_folder_url: str, original_file_name: Optional[str] = None
|
|
232
|
+
) -> dict:
|
|
231
233
|
"""Update streams under frames uri
|
|
232
234
|
|
|
233
235
|
Example:
|
|
@@ -246,6 +248,8 @@ def update_streams_uri(streams: dict, sequence_folder_url: str) -> dict:
|
|
|
246
248
|
streams data contains multiple sensors and its uri
|
|
247
249
|
sequence_folder_url : str
|
|
248
250
|
sequence folder url destination
|
|
251
|
+
original_file_name: Optional[str]
|
|
252
|
+
original file name for the given image/pcd
|
|
249
253
|
|
|
250
254
|
|
|
251
255
|
Returns
|
|
@@ -259,6 +263,8 @@ def update_streams_uri(streams: dict, sequence_folder_url: str) -> dict:
|
|
|
259
263
|
old_uri_path_list = stream_data["uri"].split("/")
|
|
260
264
|
file_path = "/".join(old_uri_path_list[-3:])
|
|
261
265
|
stream_data["uri"] = sequence_folder_url + file_path
|
|
266
|
+
if original_file_name is not None:
|
|
267
|
+
stream_data["original_file_name"] = original_file_name
|
|
262
268
|
return current_streams
|
|
263
269
|
|
|
264
270
|
|
|
@@ -419,6 +425,7 @@ def aggregate_datarows_annotations(
|
|
|
419
425
|
datarow_id: int = datarow["id"]
|
|
420
426
|
datarow_items: dict = datarow["items"]
|
|
421
427
|
frame_num = int(datarow["frame_id"])
|
|
428
|
+
original_file_name = os.path.basename(datarow["original_url"])
|
|
422
429
|
|
|
423
430
|
if annotation_name == GROUNDTRUTH:
|
|
424
431
|
vai = copy.deepcopy(datarow_items.get(GROUND_TRUTH_ANNOTATION_NAME, {}))
|
|
@@ -427,6 +434,11 @@ def aggregate_datarows_annotations(
|
|
|
427
434
|
datarow_items.get("predictions", {}).get(annotation_name, {})
|
|
428
435
|
)
|
|
429
436
|
|
|
437
|
+
if not vai:
|
|
438
|
+
vai = gen_empty_vai(
|
|
439
|
+
datarow=datarow, sequence_folder_url=sequence_folder_url
|
|
440
|
+
)
|
|
441
|
+
|
|
430
442
|
# we could retrieve the first data of frames under items
|
|
431
443
|
# since each items inside datarow contains only one frames
|
|
432
444
|
try:
|
|
@@ -485,6 +497,7 @@ def aggregate_datarows_annotations(
|
|
|
485
497
|
update_streams_uri(
|
|
486
498
|
streams=frame["frame_properties"]["streams"],
|
|
487
499
|
sequence_folder_url=sequence_folder_url,
|
|
500
|
+
original_file_name=original_file_name,
|
|
488
501
|
)
|
|
489
502
|
)
|
|
490
503
|
# coordinate system can be optional in visionai
|
|
@@ -494,7 +507,9 @@ def aggregate_datarows_annotations(
|
|
|
494
507
|
if current_vai_sensor not in streams:
|
|
495
508
|
streams.update(
|
|
496
509
|
update_streams_uri(
|
|
497
|
-
streams=vai["streams"],
|
|
510
|
+
streams=vai["streams"],
|
|
511
|
+
sequence_folder_url=sequence_folder_url,
|
|
512
|
+
original_file_name=None,
|
|
498
513
|
)
|
|
499
514
|
)
|
|
500
515
|
if not current_frame.get("objects"):
|
|
@@ -506,17 +521,17 @@ def aggregate_datarows_annotations(
|
|
|
506
521
|
# list out all frames number of combined frames
|
|
507
522
|
frame_num_set = {int(num) for num in combined_frames.keys()}
|
|
508
523
|
# combine all objects under visionai
|
|
509
|
-
static_objects_map: dict[
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
524
|
+
static_objects_map: dict[str, list[tuple[int, dict]]] = (
|
|
525
|
+
aggregate_static_annotations(
|
|
526
|
+
datarows=all_datarows, root_key="objects", annotation_name=annotation_name
|
|
527
|
+
)
|
|
513
528
|
)
|
|
514
529
|
|
|
515
530
|
# combine all contexts under visionai
|
|
516
|
-
static_contexts_map: dict[
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
531
|
+
static_contexts_map: dict[str, list[tuple[int, dict]]] = (
|
|
532
|
+
aggregate_static_annotations(
|
|
533
|
+
datarows=all_datarows, root_key="contexts", annotation_name=annotation_name
|
|
534
|
+
)
|
|
520
535
|
)
|
|
521
536
|
|
|
522
537
|
# retrieve tags under visionai
|
|
@@ -562,7 +577,7 @@ def aggregate_datarows_annotations(
|
|
|
562
577
|
visionai["contexts"] = combined_contexts_map
|
|
563
578
|
if tags_under_visionai:
|
|
564
579
|
visionai["tags"] = tags_under_visionai
|
|
565
|
-
return VisionAIModel(**{"visionai": visionai}).
|
|
580
|
+
return VisionAIModel(**{"visionai": visionai}).model_dump(exclude_none=True)
|
|
566
581
|
|
|
567
582
|
|
|
568
583
|
@Exporter.register(format=ExportFormat.VISIONAI)
|
|
@@ -626,48 +641,79 @@ class ExportVisionAI(ExportAnnotationBase):
|
|
|
626
641
|
annotation_name: str,
|
|
627
642
|
datarow_id_to_frame_datarow_id: dict[int, int],
|
|
628
643
|
current_batch: list[dict],
|
|
629
|
-
pre_frame_datarow_id: int,
|
|
644
|
+
pre_frame_datarow_id: int | None,
|
|
630
645
|
last_batch: bool,
|
|
631
|
-
|
|
646
|
+
is_sequential: bool,
|
|
647
|
+
) -> tuple[
|
|
648
|
+
list[tuple[bytes, str]],
|
|
649
|
+
defaultdict[int, list[dict]],
|
|
650
|
+
list[int],
|
|
651
|
+
int | None,
|
|
652
|
+
list[dict],
|
|
653
|
+
]:
|
|
632
654
|
annotation_results = []
|
|
633
655
|
|
|
656
|
+
def create_aggregated_annotation(
|
|
657
|
+
frame_datarows: dict, seq_id: int
|
|
658
|
+
) -> tuple[bytes, str]:
|
|
659
|
+
"""Helper to create aggregated annotation bytes and path."""
|
|
660
|
+
annot_bytes = convert_to_bytes(
|
|
661
|
+
aggregate_datarows_annotations(
|
|
662
|
+
frame_datarows=frame_datarows,
|
|
663
|
+
sequence_folder_url=os.path.join(
|
|
664
|
+
target_folder, f"{seq_id:012d}", ""
|
|
665
|
+
),
|
|
666
|
+
annotation_name=annotation_name,
|
|
667
|
+
)
|
|
668
|
+
)
|
|
669
|
+
anno_path = os.path.join(
|
|
670
|
+
f"{seq_id:012d}", "annotations", annotation_name, "visionai.json"
|
|
671
|
+
)
|
|
672
|
+
return (annot_bytes, anno_path)
|
|
673
|
+
|
|
674
|
+
if is_sequential:
|
|
675
|
+
async for datarow in datarow_generator_func(datarow_id_list):
|
|
676
|
+
frame_datarow_id = datarow_id_to_frame_datarow_id[datarow["id"]]
|
|
677
|
+
sequence_frame_datarows[frame_datarow_id].append(datarow)
|
|
678
|
+
current_batch.append(datarow)
|
|
679
|
+
|
|
680
|
+
sequence_id = frame_datarow_id_to_sequence_id[frame_datarow_id]
|
|
681
|
+
annotation_results.append(
|
|
682
|
+
create_aggregated_annotation(sequence_frame_datarows, sequence_id)
|
|
683
|
+
)
|
|
684
|
+
sequence_frame_datarows = defaultdict(list)
|
|
685
|
+
|
|
686
|
+
return (
|
|
687
|
+
annotation_results,
|
|
688
|
+
sequence_frame_datarows,
|
|
689
|
+
datarow_id_list,
|
|
690
|
+
pre_frame_datarow_id,
|
|
691
|
+
current_batch,
|
|
692
|
+
)
|
|
693
|
+
|
|
634
694
|
async for datarow in datarow_generator_func(datarow_id_list):
|
|
635
695
|
frame_datarow_id = datarow_id_to_frame_datarow_id[datarow["id"]]
|
|
636
696
|
current_batch.append(datarow)
|
|
637
|
-
sequence_frame_datarows[frame_datarow_id].append(datarow)
|
|
638
697
|
if pre_frame_datarow_id is None:
|
|
639
698
|
pre_frame_datarow_id = frame_datarow_id
|
|
699
|
+
sequence_frame_datarows[frame_datarow_id].append(datarow)
|
|
640
700
|
elif pre_frame_datarow_id != frame_datarow_id:
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
+ f"{sequence_id:012d}/",
|
|
647
|
-
annotation_name=annotation_name,
|
|
701
|
+
# export previous frame when frame_datarow_id changes
|
|
702
|
+
pre_sequence_id = frame_datarow_id_to_sequence_id[pre_frame_datarow_id]
|
|
703
|
+
annotation_results.append(
|
|
704
|
+
create_aggregated_annotation(
|
|
705
|
+
sequence_frame_datarows, pre_sequence_id
|
|
648
706
|
)
|
|
649
707
|
)
|
|
650
|
-
anno_path = os.path.join(
|
|
651
|
-
f"{sequence_id:012d}", "annotations", "groundtruth", "visionai.json"
|
|
652
|
-
)
|
|
653
|
-
annotation_results.append((annot_bytes, anno_path))
|
|
654
708
|
sequence_frame_datarows.pop(pre_frame_datarow_id)
|
|
709
|
+
sequence_frame_datarows[frame_datarow_id].append(datarow)
|
|
655
710
|
pre_frame_datarow_id = frame_datarow_id
|
|
656
711
|
|
|
657
712
|
if last_batch:
|
|
658
713
|
sequence_id = frame_datarow_id_to_sequence_id[frame_datarow_id]
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
frame_datarows=sequence_frame_datarows,
|
|
662
|
-
sequence_folder_url=f"{target_folder.rstrip('/')}/"
|
|
663
|
-
+ f"{sequence_id:012d}/",
|
|
664
|
-
annotation_name=annotation_name,
|
|
665
|
-
)
|
|
714
|
+
annotation_results.append(
|
|
715
|
+
create_aggregated_annotation(sequence_frame_datarows, sequence_id)
|
|
666
716
|
)
|
|
667
|
-
anno_path = os.path.join(
|
|
668
|
-
f"{sequence_id:012d}", "annotations", "groundtruth", "visionai.json"
|
|
669
|
-
)
|
|
670
|
-
annotation_results.append((annot_bytes, anno_path))
|
|
671
717
|
sequence_frame_datarows = defaultdict(list)
|
|
672
718
|
|
|
673
719
|
return (
|
|
@@ -684,6 +730,7 @@ class ExportVisionAI(ExportAnnotationBase):
|
|
|
684
730
|
sequence_frame_map: dict[int, dict[int, list[int]]],
|
|
685
731
|
datarow_generator_func: Callable[[list], AsyncGenerator[dict]],
|
|
686
732
|
annotation_name: str,
|
|
733
|
+
is_sequential: bool,
|
|
687
734
|
*_,
|
|
688
735
|
**kwargs,
|
|
689
736
|
) -> AsyncGenerator[bytes, str]:
|
|
@@ -705,10 +752,10 @@ class ExportVisionAI(ExportAnnotationBase):
|
|
|
705
752
|
datarow_id_list.extend(datarow_ids)
|
|
706
753
|
frame_datarow_id_to_sequence_id[frame_datarow_id] = sequence_id
|
|
707
754
|
for datarow_id in datarow_ids:
|
|
708
|
-
datarow_id_to_frame_datarow_id[
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
if len(datarow_id_list) >= BATCH_SIZE:
|
|
755
|
+
datarow_id_to_frame_datarow_id[datarow_id] = (
|
|
756
|
+
frame_datarow_id
|
|
757
|
+
)
|
|
758
|
+
if not is_sequential and len(datarow_id_list) >= BATCH_SIZE:
|
|
712
759
|
(
|
|
713
760
|
annotation_results,
|
|
714
761
|
sequence_frame_datarows,
|
|
@@ -726,6 +773,7 @@ class ExportVisionAI(ExportAnnotationBase):
|
|
|
726
773
|
current_batch,
|
|
727
774
|
pre_frame_datarow_id,
|
|
728
775
|
last_batch=False,
|
|
776
|
+
is_sequential=is_sequential,
|
|
729
777
|
)
|
|
730
778
|
results = await self.download_batch(
|
|
731
779
|
session,
|
|
@@ -741,11 +789,49 @@ class ExportVisionAI(ExportAnnotationBase):
|
|
|
741
789
|
current_batch = []
|
|
742
790
|
datarow_id_list = []
|
|
743
791
|
|
|
744
|
-
if len(annotation_results) >= BATCH_SIZE:
|
|
745
792
|
for annotation_result in annotation_results:
|
|
746
793
|
yield annotation_result
|
|
747
794
|
annotation_results = []
|
|
748
|
-
|
|
795
|
+
if is_sequential:
|
|
796
|
+
# process sequence
|
|
797
|
+
(
|
|
798
|
+
annotation_results,
|
|
799
|
+
sequence_frame_datarows,
|
|
800
|
+
datarow_id_list,
|
|
801
|
+
pre_frame_datarow_id,
|
|
802
|
+
current_batch,
|
|
803
|
+
) = await self.process_datarows(
|
|
804
|
+
datarow_generator_func,
|
|
805
|
+
datarow_id_list,
|
|
806
|
+
frame_datarow_id_to_sequence_id,
|
|
807
|
+
sequence_frame_datarows,
|
|
808
|
+
target_folder,
|
|
809
|
+
annotation_name,
|
|
810
|
+
datarow_id_to_frame_datarow_id,
|
|
811
|
+
current_batch,
|
|
812
|
+
pre_frame_datarow_id,
|
|
813
|
+
last_batch=False,
|
|
814
|
+
is_sequential=is_sequential,
|
|
815
|
+
)
|
|
816
|
+
# download sequence
|
|
817
|
+
results = await self.download_batch(
|
|
818
|
+
session,
|
|
819
|
+
semaphore,
|
|
820
|
+
current_batch,
|
|
821
|
+
datarow_id_to_frame_datarow_id,
|
|
822
|
+
frame_datarow_id_to_sequence_id,
|
|
823
|
+
)
|
|
824
|
+
for result in results:
|
|
825
|
+
if result:
|
|
826
|
+
yield result
|
|
827
|
+
progress_bar.update(1)
|
|
828
|
+
current_batch = []
|
|
829
|
+
datarow_id_list = []
|
|
830
|
+
for annotation_result in annotation_results:
|
|
831
|
+
yield annotation_result
|
|
832
|
+
annotation_results = []
|
|
833
|
+
|
|
834
|
+
# update for non-sequential last batch
|
|
749
835
|
if datarow_id_list:
|
|
750
836
|
(
|
|
751
837
|
annotation_results,
|
|
@@ -764,6 +850,7 @@ class ExportVisionAI(ExportAnnotationBase):
|
|
|
764
850
|
current_batch,
|
|
765
851
|
pre_frame_datarow_id,
|
|
766
852
|
last_batch=True,
|
|
853
|
+
is_sequential=is_sequential,
|
|
767
854
|
)
|
|
768
855
|
results = await self.download_batch(
|
|
769
856
|
session,
|
|
@@ -16,7 +16,7 @@ from .constant import (
|
|
|
16
16
|
ExportFormat,
|
|
17
17
|
)
|
|
18
18
|
from .exporter import Exporter
|
|
19
|
-
from .utils import convert_to_bytes
|
|
19
|
+
from .utils import convert_to_bytes, gen_empty_vai
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
@Exporter.register(format=ExportFormat.YOLO)
|
|
@@ -152,6 +152,9 @@ def convert_annotation(
|
|
|
152
152
|
datarow["items"].get("predictions", {}).get(annotation_name, {})
|
|
153
153
|
)
|
|
154
154
|
|
|
155
|
+
if not visionai_dict:
|
|
156
|
+
visionai_dict = gen_empty_vai(datarow=datarow, sequence_folder_url="")
|
|
157
|
+
|
|
155
158
|
(category_map, image_labels_map, _, _) = VAItoYOLO.convert_single_visionai_to_yolo(
|
|
156
159
|
dest_img_folder="",
|
|
157
160
|
visionai_dict={"visionai": visionai_dict},
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import Optional, Union
|
|
2
|
+
from typing import Literal, Optional, Union
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel, ConfigDict, field_validator
|
|
5
5
|
|
|
@@ -133,3 +133,13 @@ class DatasetAPISchema(BaseModel):
|
|
|
133
133
|
annotations: Optional[list[str]] = []
|
|
134
134
|
access_key_id: Optional[str] = None
|
|
135
135
|
secret_access_key: Optional[str] = None
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class CreateCustomModelAPISchema(BaseModel):
|
|
139
|
+
project_id: int
|
|
140
|
+
name: str
|
|
141
|
+
input_classes: list[str]
|
|
142
|
+
resolution_width: int
|
|
143
|
+
resolution_height: int
|
|
144
|
+
model_structure: Literal["yolov9-c", "yolov9-e", "yolov9-s"]
|
|
145
|
+
weight_url: str
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import Optional, Union
|
|
2
|
+
from typing import Literal, Optional, Union
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel, ConfigDict, field_validator
|
|
5
5
|
from pydantic_core.core_schema import ValidationInfo
|
|
@@ -543,3 +543,28 @@ class MLModel(BaseModel):
|
|
|
543
543
|
return DataverseClient.get_convert_record(
|
|
544
544
|
convert_record_id=convert_record_id, client_alias=self.client_alias
|
|
545
545
|
)
|
|
546
|
+
|
|
547
|
+
def create_custom_model(
|
|
548
|
+
self,
|
|
549
|
+
project: Project,
|
|
550
|
+
name: str,
|
|
551
|
+
input_classes: list[str],
|
|
552
|
+
resolution_width: int,
|
|
553
|
+
resolution_height: int,
|
|
554
|
+
model_structure: Literal["yolov9-c", "yolov9-e", "yolov9-s"],
|
|
555
|
+
weight_url: str,
|
|
556
|
+
permission: str = "",
|
|
557
|
+
):
|
|
558
|
+
from ..client import DataverseClient
|
|
559
|
+
|
|
560
|
+
return DataverseClient.create_custom_model(
|
|
561
|
+
project=project,
|
|
562
|
+
name=name,
|
|
563
|
+
input_classes=input_classes,
|
|
564
|
+
resolution_width=resolution_width,
|
|
565
|
+
resolution_height=resolution_height,
|
|
566
|
+
model_structure=model_structure,
|
|
567
|
+
weight_url=weight_url,
|
|
568
|
+
client_alias=self.client_alias,
|
|
569
|
+
permission=permission,
|
|
570
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataverse-sdk
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.1
|
|
4
4
|
Summary: Dataverse SDK For Python
|
|
5
5
|
Home-page:
|
|
6
6
|
Author: LinkerVision
|
|
@@ -55,19 +55,21 @@ Interaction with the Dataverse site starts with an instance of the `DataverseCli
|
|
|
55
55
|
```Python
|
|
56
56
|
from dataverse_sdk import *
|
|
57
57
|
from dataverse_sdk.connections import get_connection
|
|
58
|
+
from dataverse_sdk.constants import DataverseHost
|
|
59
|
+
|
|
58
60
|
client = DataverseClient(
|
|
59
|
-
host=DataverseHost.PRODUCTION, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
|
|
61
|
+
host=DataverseHost.PRODUCTION.value, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
|
|
60
62
|
)
|
|
61
63
|
assert client is get_connection("default")
|
|
62
64
|
|
|
63
65
|
# Should provide different alias if you are trying to connect to different workspaces
|
|
64
66
|
client2 = DataverseClient(
|
|
65
|
-
host=DataverseHost.PRODUCTION, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
|
|
67
|
+
host=DataverseHost.PRODUCTION.value, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
|
|
66
68
|
)
|
|
67
69
|
assert client2 is get_connection(client2.alias)
|
|
68
70
|
|
|
69
71
|
client3 = DataverseClient(
|
|
70
|
-
host=DataverseHost.PRODUCTION, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
|
|
72
|
+
host=DataverseHost.PRODUCTION.value, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
|
|
71
73
|
)
|
|
72
74
|
assert client3 is get_connection(client3.alias)
|
|
73
75
|
```
|
|
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
|
|
|
2
2
|
|
|
3
3
|
AUTHOR = "LinkerVision"
|
|
4
4
|
PACKAGE_NAME = "dataverse-sdk"
|
|
5
|
-
PACKAGE_VERSION = "2.
|
|
5
|
+
PACKAGE_VERSION = "2.3.1"
|
|
6
6
|
DESC = "Dataverse SDK For Python"
|
|
7
7
|
with open("README.md", encoding="utf-8") as fh:
|
|
8
8
|
long_description = fh.read()
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from typing import Union
|
|
3
|
-
|
|
4
|
-
from ..apis.third_party import ThirdPartyAPI
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def convert_to_bytes(obj: Union[dict, list, str]) -> bytes:
|
|
8
|
-
if isinstance(obj, (dict, list)):
|
|
9
|
-
jstr = json.dumps(obj)
|
|
10
|
-
elif isinstance(obj, str):
|
|
11
|
-
jstr = obj
|
|
12
|
-
else:
|
|
13
|
-
raise TypeError("un-support type")
|
|
14
|
-
return bytes(jstr, encoding="utf8")
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
async def download_url_file_async(data_url: str) -> bytes | None:
|
|
18
|
-
# get data from url link
|
|
19
|
-
try:
|
|
20
|
-
data: bytes = await ThirdPartyAPI.async_download_file(
|
|
21
|
-
url=data_url, method="GET"
|
|
22
|
-
)
|
|
23
|
-
except Exception:
|
|
24
|
-
print(f"Retrieving data from url {data_url} error")
|
|
25
|
-
return None
|
|
26
|
-
return data
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|