dataverse-sdk 2.2.2__tar.gz → 2.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/PKG-INFO +6 -4
  2. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/README.md +5 -3
  3. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/apis/backend.py +13 -0
  4. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/client.py +37 -0
  5. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/base.py +1 -0
  6. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/coco.py +4 -1
  7. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/exporter.py +6 -4
  8. dataverse_sdk-2.3.1/dataverse_sdk/export/utils.py +53 -0
  9. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/visionai.py +130 -43
  10. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/yolo.py +4 -1
  11. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/schemas/api.py +11 -1
  12. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/schemas/client.py +26 -1
  13. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk.egg-info/PKG-INFO +6 -4
  14. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/setup.py +1 -1
  15. dataverse_sdk-2.2.2/dataverse_sdk/export/utils.py +0 -26
  16. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/__init__.py +0 -0
  17. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/apis/__init__.py +0 -0
  18. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/apis/third_party.py +0 -0
  19. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/connections.py +0 -0
  20. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/constants.py +0 -0
  21. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/exceptions/__init__.py +0 -0
  22. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/exceptions/client.py +0 -0
  23. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/__init__.py +0 -0
  24. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/constant.py +0 -0
  25. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/export/vqa.py +0 -0
  26. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/schemas/__init__.py +0 -0
  27. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/schemas/common.py +0 -0
  28. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/schemas/format.py +0 -0
  29. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/utils/__init__.py +0 -0
  30. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk/utils/utils.py +0 -0
  31. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk.egg-info/SOURCES.txt +0 -0
  32. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk.egg-info/dependency_links.txt +0 -0
  33. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk.egg-info/requires.txt +0 -0
  34. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/dataverse_sdk.egg-info/top_level.txt +0 -0
  35. {dataverse_sdk-2.2.2 → dataverse_sdk-2.3.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataverse-sdk
3
- Version: 2.2.2
3
+ Version: 2.3.1
4
4
  Summary: Dataverse SDK For Python
5
5
  Home-page:
6
6
  Author: LinkerVision
@@ -55,19 +55,21 @@ Interaction with the Dataverse site starts with an instance of the `DataverseCli
55
55
  ```Python
56
56
  from dataverse_sdk import *
57
57
  from dataverse_sdk.connections import get_connection
58
+ from dataverse_sdk.constants import DataverseHost
59
+
58
60
  client = DataverseClient(
59
- host=DataverseHost.PRODUCTION, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
61
+ host=DataverseHost.PRODUCTION.value, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
60
62
  )
61
63
  assert client is get_connection("default")
62
64
 
63
65
  # Should provide different alias if you are trying to connect to different workspaces
64
66
  client2 = DataverseClient(
65
- host=DataverseHost.PRODUCTION, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
67
+ host=DataverseHost.PRODUCTION.value, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
66
68
  )
67
69
  assert client2 is get_connection(client2.alias)
68
70
 
69
71
  client3 = DataverseClient(
70
- host=DataverseHost.PRODUCTION, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
72
+ host=DataverseHost.PRODUCTION.value, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
71
73
  )
72
74
  assert client3 is get_connection(client3.alias)
73
75
  ```
@@ -28,19 +28,21 @@ Interaction with the Dataverse site starts with an instance of the `DataverseCli
28
28
  ```Python
29
29
  from dataverse_sdk import *
30
30
  from dataverse_sdk.connections import get_connection
31
+ from dataverse_sdk.constants import DataverseHost
32
+
31
33
  client = DataverseClient(
32
- host=DataverseHost.PRODUCTION, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
34
+ host=DataverseHost.PRODUCTION.value, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
33
35
  )
34
36
  assert client is get_connection("default")
35
37
 
36
38
  # Should provide different alias if you are trying to connect to different workspaces
37
39
  client2 = DataverseClient(
38
- host=DataverseHost.PRODUCTION, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
40
+ host=DataverseHost.PRODUCTION.value, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
39
41
  )
40
42
  assert client2 is get_connection(client2.alias)
41
43
 
42
44
  client3 = DataverseClient(
43
- host=DataverseHost.PRODUCTION, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
45
+ host=DataverseHost.PRODUCTION.value, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
44
46
  )
45
47
  assert client3 is get_connection(client3.alias)
46
48
  ```
@@ -460,6 +460,19 @@ class BackendAPI:
460
460
  )
461
461
  return resp.json()
462
462
 
463
+ def create_custom_model(self, **kwargs):
464
+ permission = kwargs.pop("permission", "")
465
+ if permission:
466
+ self.headers["X-Request-Source"] = permission
467
+
468
+ resp = self.send_request(
469
+ url=f"{self.host}/api/ml_models/custom-model/",
470
+ method="post",
471
+ headers=self.headers,
472
+ data=kwargs,
473
+ )
474
+ return resp.json()
475
+
463
476
 
464
477
  class AsyncBackendAPI:
465
478
  def __init__(
@@ -26,6 +26,7 @@ from .exceptions.client import (
26
26
  )
27
27
  from .schemas.api import (
28
28
  AttributeAPISchema,
29
+ CreateCustomModelAPISchema,
29
30
  DatasetAPISchema,
30
31
  OntologyAPISchema,
31
32
  ProjectAPISchema,
@@ -690,10 +691,14 @@ class DataverseClient:
690
691
  raise InvalidProcessError("The project type is not VQA!")
691
692
  output_list = []
692
693
  for question in project.ontology.classes:
694
+ answer = question.attributes[0]
695
+ option_list = [opt.value for opt in answer.options]
693
696
  output_list.append(
694
697
  {
695
698
  "question_id": question.rank,
696
699
  "question": question.extended_class["question"],
700
+ "type": answer.type,
701
+ "options": option_list,
697
702
  }
698
703
  )
699
704
  import json
@@ -1747,6 +1752,38 @@ of this project OR has been added before"
1747
1752
  )
1748
1753
  return create_dataset_uuid
1749
1754
 
1755
+ @staticmethod
1756
+ def create_custom_model(
1757
+ project: Project,
1758
+ name: str,
1759
+ input_classes: list[str],
1760
+ resolution_width: int,
1761
+ resolution_height: int,
1762
+ model_structure: str,
1763
+ weight_url: str,
1764
+ client: Optional["DataverseClient"] = None,
1765
+ client_alias: Optional[str] = None,
1766
+ permission: str = "",
1767
+ ):
1768
+ try:
1769
+ payload = CreateCustomModelAPISchema(
1770
+ project_id=project.id,
1771
+ name=name,
1772
+ input_classes=input_classes,
1773
+ resolution_width=resolution_width,
1774
+ resolution_height=resolution_height,
1775
+ model_structure=model_structure,
1776
+ weight_url=weight_url,
1777
+ ).model_dump()
1778
+ except ValidationError as e:
1779
+ raise APIValidationError(f"Something wrong when creating custom model: {e}")
1780
+
1781
+ api, _ = DataverseClient._get_api_client(
1782
+ client=client, client_alias=client_alias, is_async=False
1783
+ )
1784
+
1785
+ api.create_custom_model(**payload, permission=permission)
1786
+
1750
1787
  @staticmethod
1751
1788
  async def run_generate_presigned_urls(
1752
1789
  file_paths: list, api: AsyncBackendAPI, data_folder: str
@@ -12,6 +12,7 @@ class ExportAnnotationBase(abc.ABC):
12
12
  sequence_frame_map: dict[int, dict[int, list[int]]],
13
13
  datarow_generator_func: Callable[[list], Generator[dict]],
14
14
  annotation_name: str,
15
+ is_sequential: bool,
15
16
  *args,
16
17
  **kwargs,
17
18
  ) -> Generator[bytes, str]:
@@ -22,7 +22,7 @@ from .constant import (
22
22
  ExportFormat,
23
23
  )
24
24
  from .exporter import Exporter
25
- from .utils import convert_to_bytes
25
+ from .utils import convert_to_bytes, gen_empty_vai
26
26
 
27
27
 
28
28
  @Exporter.register(format=ExportFormat.COCO)
@@ -156,6 +156,9 @@ def convert_annotation(
156
156
  datarow["items"].get("predictions", {}).get(annotation_name, {})
157
157
  )
158
158
 
159
+ if not target_visionai:
160
+ target_visionai = gen_empty_vai(datarow=datarow, sequence_folder_url="")
161
+
159
162
  (
160
163
  category_idx_map,
161
164
  image_update,
@@ -77,6 +77,7 @@ class Exporter:
77
77
  sequence_frame_map: dict,
78
78
  question_id_map: dict,
79
79
  annotation_name: str,
80
+ is_sequential: bool,
80
81
  ) -> AsyncGenerator[tuple[bytes, str]]:
81
82
  async for data, path in self.export_annot.producer(
82
83
  class_names=class_names,
@@ -85,6 +86,7 @@ class Exporter:
85
86
  target_folder=self.target_folder,
86
87
  datarow_generator_func=await self._gen(self.curation_api),
87
88
  annotation_name=annotation_name,
89
+ is_sequential=is_sequential,
88
90
  ):
89
91
  if not path:
90
92
  continue
@@ -141,7 +143,7 @@ class Exporter:
141
143
  gen: AsyncGenerator = curation_api.get_datarows(
142
144
  id_set_list=id_chunks,
143
145
  batch_size=BATCH_SIZE,
144
- fields="id,items,vlm_items,url,frame_id,image_width,image_height,sensor_name,original_url",
146
+ fields="id,items,vlm_items,url,frame_id,image_width,image_height,sensor_name,original_url,type",
145
147
  )
146
148
  async for batched_datarow in gen:
147
149
  for datarow in batched_datarow:
@@ -238,9 +240,9 @@ def get_datarow_sequences(
238
240
  for frame_datarow_id, datarow_id_list in sequence_frame_map[
239
241
  sequence_datarow_id
240
242
  ].items():
241
- new_datarows_sequence_map[sequence_order][
242
- frame_datarow_id
243
- ] = datarow_id_list
243
+ new_datarows_sequence_map[sequence_order][frame_datarow_id] = (
244
+ datarow_id_list
245
+ )
244
246
  if not is_sequential or sequence_datarow_id == NONE_SEQUENCE_DATAROW_ID:
245
247
  sequence_order += 1
246
248
  if is_sequential:
@@ -0,0 +1,53 @@
1
+ import json
2
+ from typing import Union
3
+
4
+ from visionai_data_format.schemas.visionai_schema import (
5
+ Frame,
6
+ FrameProperties,
7
+ FramePropertyStream,
8
+ )
9
+
10
+ from ..apis.third_party import ThirdPartyAPI
11
+
12
+
13
+ def convert_to_bytes(obj: Union[dict, list, str]) -> bytes:
14
+ if isinstance(obj, (dict, list)):
15
+ jstr = json.dumps(obj)
16
+ elif isinstance(obj, str):
17
+ jstr = obj
18
+ else:
19
+ raise TypeError("un-support type")
20
+ return bytes(jstr, encoding="utf8")
21
+
22
+
23
+ async def download_url_file_async(data_url: str) -> bytes | None:
24
+ # get data from url link
25
+ try:
26
+ data: bytes = await ThirdPartyAPI.async_download_file(
27
+ url=data_url, method="GET"
28
+ )
29
+ except Exception:
30
+ print(f"Retrieving data from url {data_url} error")
31
+ return None
32
+ return data
33
+
34
+
35
+ def gen_empty_vai(datarow: dict, sequence_folder_url: str) -> dict:
36
+ new_sensor_data_folder = f"{sequence_folder_url}/data/{datarow['sensor_name']}/"
37
+ dest_url = f"{new_sensor_data_folder}{datarow['url'].split('/')[-1]}"
38
+
39
+ # generate visionai empty frame
40
+ frames = {}
41
+ frame_num = datarow["frame_id"]
42
+ frames[frame_num] = Frame(
43
+ frame_properties=FrameProperties(
44
+ streams={datarow["sensor_name"]: FramePropertyStream(uri=dest_url)}
45
+ ),
46
+ objects={},
47
+ ).model_dump(exclude_none=True)
48
+ if datarow["type"] == "image":
49
+ stream = {datarow["sensor_name"]: {"type": "camera", "uri": dest_url}}
50
+ else:
51
+ stream = {datarow["sensor_name"]: {"type": "lidar", "uri": dest_url}}
52
+
53
+ return {"frames": frames, "streams": stream}
@@ -19,7 +19,7 @@ from .constant import (
19
19
  ExportFormat,
20
20
  )
21
21
  from .exporter import Exporter
22
- from .utils import convert_to_bytes
22
+ from .utils import convert_to_bytes, gen_empty_vai
23
23
 
24
24
 
25
25
  def merge_intervals(intervals: list[tuple[int, int]]):
@@ -227,7 +227,9 @@ def aggregate_static_annotations(
227
227
  return large_data
228
228
 
229
229
 
230
- def update_streams_uri(streams: dict, sequence_folder_url: str) -> dict:
230
+ def update_streams_uri(
231
+ streams: dict, sequence_folder_url: str, original_file_name: Optional[str] = None
232
+ ) -> dict:
231
233
  """Update streams under frames uri
232
234
 
233
235
  Example:
@@ -246,6 +248,8 @@ def update_streams_uri(streams: dict, sequence_folder_url: str) -> dict:
246
248
  streams data contains multiple sensors and its uri
247
249
  sequence_folder_url : str
248
250
  sequence folder url destination
251
+ original_file_name: Optional[str]
252
+ original file name for the given image/pcd
249
253
 
250
254
 
251
255
  Returns
@@ -259,6 +263,8 @@ def update_streams_uri(streams: dict, sequence_folder_url: str) -> dict:
259
263
  old_uri_path_list = stream_data["uri"].split("/")
260
264
  file_path = "/".join(old_uri_path_list[-3:])
261
265
  stream_data["uri"] = sequence_folder_url + file_path
266
+ if original_file_name is not None:
267
+ stream_data["original_file_name"] = original_file_name
262
268
  return current_streams
263
269
 
264
270
 
@@ -419,6 +425,7 @@ def aggregate_datarows_annotations(
419
425
  datarow_id: int = datarow["id"]
420
426
  datarow_items: dict = datarow["items"]
421
427
  frame_num = int(datarow["frame_id"])
428
+ original_file_name = os.path.basename(datarow["original_url"])
422
429
 
423
430
  if annotation_name == GROUNDTRUTH:
424
431
  vai = copy.deepcopy(datarow_items.get(GROUND_TRUTH_ANNOTATION_NAME, {}))
@@ -427,6 +434,11 @@ def aggregate_datarows_annotations(
427
434
  datarow_items.get("predictions", {}).get(annotation_name, {})
428
435
  )
429
436
 
437
+ if not vai:
438
+ vai = gen_empty_vai(
439
+ datarow=datarow, sequence_folder_url=sequence_folder_url
440
+ )
441
+
430
442
  # we could retrieve the first data of frames under items
431
443
  # since each items inside datarow contains only one frames
432
444
  try:
@@ -485,6 +497,7 @@ def aggregate_datarows_annotations(
485
497
  update_streams_uri(
486
498
  streams=frame["frame_properties"]["streams"],
487
499
  sequence_folder_url=sequence_folder_url,
500
+ original_file_name=original_file_name,
488
501
  )
489
502
  )
490
503
  # coordinate system can be optional in visionai
@@ -494,7 +507,9 @@ def aggregate_datarows_annotations(
494
507
  if current_vai_sensor not in streams:
495
508
  streams.update(
496
509
  update_streams_uri(
497
- streams=vai["streams"], sequence_folder_url=sequence_folder_url
510
+ streams=vai["streams"],
511
+ sequence_folder_url=sequence_folder_url,
512
+ original_file_name=None,
498
513
  )
499
514
  )
500
515
  if not current_frame.get("objects"):
@@ -506,17 +521,17 @@ def aggregate_datarows_annotations(
506
521
  # list out all frames number of combined frames
507
522
  frame_num_set = {int(num) for num in combined_frames.keys()}
508
523
  # combine all objects under visionai
509
- static_objects_map: dict[
510
- str, list[tuple[int, dict]]
511
- ] = aggregate_static_annotations(
512
- datarows=all_datarows, root_key="objects", annotation_name=annotation_name
524
+ static_objects_map: dict[str, list[tuple[int, dict]]] = (
525
+ aggregate_static_annotations(
526
+ datarows=all_datarows, root_key="objects", annotation_name=annotation_name
527
+ )
513
528
  )
514
529
 
515
530
  # combine all contexts under visionai
516
- static_contexts_map: dict[
517
- str, list[tuple[int, dict]]
518
- ] = aggregate_static_annotations(
519
- datarows=all_datarows, root_key="contexts", annotation_name=annotation_name
531
+ static_contexts_map: dict[str, list[tuple[int, dict]]] = (
532
+ aggregate_static_annotations(
533
+ datarows=all_datarows, root_key="contexts", annotation_name=annotation_name
534
+ )
520
535
  )
521
536
 
522
537
  # retrieve tags under visionai
@@ -562,7 +577,7 @@ def aggregate_datarows_annotations(
562
577
  visionai["contexts"] = combined_contexts_map
563
578
  if tags_under_visionai:
564
579
  visionai["tags"] = tags_under_visionai
565
- return VisionAIModel(**{"visionai": visionai}).dict()
580
+ return VisionAIModel(**{"visionai": visionai}).model_dump(exclude_none=True)
566
581
 
567
582
 
568
583
  @Exporter.register(format=ExportFormat.VISIONAI)
@@ -626,48 +641,79 @@ class ExportVisionAI(ExportAnnotationBase):
626
641
  annotation_name: str,
627
642
  datarow_id_to_frame_datarow_id: dict[int, int],
628
643
  current_batch: list[dict],
629
- pre_frame_datarow_id: int,
644
+ pre_frame_datarow_id: int | None,
630
645
  last_batch: bool,
631
- ) -> list[tuple[bytes, str]]:
646
+ is_sequential: bool,
647
+ ) -> tuple[
648
+ list[tuple[bytes, str]],
649
+ defaultdict[int, list[dict]],
650
+ list[int],
651
+ int | None,
652
+ list[dict],
653
+ ]:
632
654
  annotation_results = []
633
655
 
656
+ def create_aggregated_annotation(
657
+ frame_datarows: dict, seq_id: int
658
+ ) -> tuple[bytes, str]:
659
+ """Helper to create aggregated annotation bytes and path."""
660
+ annot_bytes = convert_to_bytes(
661
+ aggregate_datarows_annotations(
662
+ frame_datarows=frame_datarows,
663
+ sequence_folder_url=os.path.join(
664
+ target_folder, f"{seq_id:012d}", ""
665
+ ),
666
+ annotation_name=annotation_name,
667
+ )
668
+ )
669
+ anno_path = os.path.join(
670
+ f"{seq_id:012d}", "annotations", annotation_name, "visionai.json"
671
+ )
672
+ return (annot_bytes, anno_path)
673
+
674
+ if is_sequential:
675
+ async for datarow in datarow_generator_func(datarow_id_list):
676
+ frame_datarow_id = datarow_id_to_frame_datarow_id[datarow["id"]]
677
+ sequence_frame_datarows[frame_datarow_id].append(datarow)
678
+ current_batch.append(datarow)
679
+
680
+ sequence_id = frame_datarow_id_to_sequence_id[frame_datarow_id]
681
+ annotation_results.append(
682
+ create_aggregated_annotation(sequence_frame_datarows, sequence_id)
683
+ )
684
+ sequence_frame_datarows = defaultdict(list)
685
+
686
+ return (
687
+ annotation_results,
688
+ sequence_frame_datarows,
689
+ datarow_id_list,
690
+ pre_frame_datarow_id,
691
+ current_batch,
692
+ )
693
+
634
694
  async for datarow in datarow_generator_func(datarow_id_list):
635
695
  frame_datarow_id = datarow_id_to_frame_datarow_id[datarow["id"]]
636
696
  current_batch.append(datarow)
637
- sequence_frame_datarows[frame_datarow_id].append(datarow)
638
697
  if pre_frame_datarow_id is None:
639
698
  pre_frame_datarow_id = frame_datarow_id
699
+ sequence_frame_datarows[frame_datarow_id].append(datarow)
640
700
  elif pre_frame_datarow_id != frame_datarow_id:
641
- sequence_id = frame_datarow_id_to_sequence_id[pre_frame_datarow_id]
642
- annot_bytes: bytes = convert_to_bytes(
643
- aggregate_datarows_annotations(
644
- frame_datarows=sequence_frame_datarows,
645
- sequence_folder_url=f"{target_folder.rstrip('/')}/"
646
- + f"{sequence_id:012d}/",
647
- annotation_name=annotation_name,
701
+ # export previous frame when frame_datarow_id changes
702
+ pre_sequence_id = frame_datarow_id_to_sequence_id[pre_frame_datarow_id]
703
+ annotation_results.append(
704
+ create_aggregated_annotation(
705
+ sequence_frame_datarows, pre_sequence_id
648
706
  )
649
707
  )
650
- anno_path = os.path.join(
651
- f"{sequence_id:012d}", "annotations", "groundtruth", "visionai.json"
652
- )
653
- annotation_results.append((annot_bytes, anno_path))
654
708
  sequence_frame_datarows.pop(pre_frame_datarow_id)
709
+ sequence_frame_datarows[frame_datarow_id].append(datarow)
655
710
  pre_frame_datarow_id = frame_datarow_id
656
711
 
657
712
  if last_batch:
658
713
  sequence_id = frame_datarow_id_to_sequence_id[frame_datarow_id]
659
- annot_bytes: bytes = convert_to_bytes(
660
- aggregate_datarows_annotations(
661
- frame_datarows=sequence_frame_datarows,
662
- sequence_folder_url=f"{target_folder.rstrip('/')}/"
663
- + f"{sequence_id:012d}/",
664
- annotation_name=annotation_name,
665
- )
714
+ annotation_results.append(
715
+ create_aggregated_annotation(sequence_frame_datarows, sequence_id)
666
716
  )
667
- anno_path = os.path.join(
668
- f"{sequence_id:012d}", "annotations", "groundtruth", "visionai.json"
669
- )
670
- annotation_results.append((annot_bytes, anno_path))
671
717
  sequence_frame_datarows = defaultdict(list)
672
718
 
673
719
  return (
@@ -684,6 +730,7 @@ class ExportVisionAI(ExportAnnotationBase):
684
730
  sequence_frame_map: dict[int, dict[int, list[int]]],
685
731
  datarow_generator_func: Callable[[list], AsyncGenerator[dict]],
686
732
  annotation_name: str,
733
+ is_sequential: bool,
687
734
  *_,
688
735
  **kwargs,
689
736
  ) -> AsyncGenerator[bytes, str]:
@@ -705,10 +752,10 @@ class ExportVisionAI(ExportAnnotationBase):
705
752
  datarow_id_list.extend(datarow_ids)
706
753
  frame_datarow_id_to_sequence_id[frame_datarow_id] = sequence_id
707
754
  for datarow_id in datarow_ids:
708
- datarow_id_to_frame_datarow_id[
709
- datarow_id
710
- ] = frame_datarow_id
711
- if len(datarow_id_list) >= BATCH_SIZE:
755
+ datarow_id_to_frame_datarow_id[datarow_id] = (
756
+ frame_datarow_id
757
+ )
758
+ if not is_sequential and len(datarow_id_list) >= BATCH_SIZE:
712
759
  (
713
760
  annotation_results,
714
761
  sequence_frame_datarows,
@@ -726,6 +773,7 @@ class ExportVisionAI(ExportAnnotationBase):
726
773
  current_batch,
727
774
  pre_frame_datarow_id,
728
775
  last_batch=False,
776
+ is_sequential=is_sequential,
729
777
  )
730
778
  results = await self.download_batch(
731
779
  session,
@@ -741,11 +789,49 @@ class ExportVisionAI(ExportAnnotationBase):
741
789
  current_batch = []
742
790
  datarow_id_list = []
743
791
 
744
- if len(annotation_results) >= BATCH_SIZE:
745
792
  for annotation_result in annotation_results:
746
793
  yield annotation_result
747
794
  annotation_results = []
748
-
795
+ if is_sequential:
796
+ # process sequence
797
+ (
798
+ annotation_results,
799
+ sequence_frame_datarows,
800
+ datarow_id_list,
801
+ pre_frame_datarow_id,
802
+ current_batch,
803
+ ) = await self.process_datarows(
804
+ datarow_generator_func,
805
+ datarow_id_list,
806
+ frame_datarow_id_to_sequence_id,
807
+ sequence_frame_datarows,
808
+ target_folder,
809
+ annotation_name,
810
+ datarow_id_to_frame_datarow_id,
811
+ current_batch,
812
+ pre_frame_datarow_id,
813
+ last_batch=False,
814
+ is_sequential=is_sequential,
815
+ )
816
+ # download sequence
817
+ results = await self.download_batch(
818
+ session,
819
+ semaphore,
820
+ current_batch,
821
+ datarow_id_to_frame_datarow_id,
822
+ frame_datarow_id_to_sequence_id,
823
+ )
824
+ for result in results:
825
+ if result:
826
+ yield result
827
+ progress_bar.update(1)
828
+ current_batch = []
829
+ datarow_id_list = []
830
+ for annotation_result in annotation_results:
831
+ yield annotation_result
832
+ annotation_results = []
833
+
834
+ # update for non-sequential last batch
749
835
  if datarow_id_list:
750
836
  (
751
837
  annotation_results,
@@ -764,6 +850,7 @@ class ExportVisionAI(ExportAnnotationBase):
764
850
  current_batch,
765
851
  pre_frame_datarow_id,
766
852
  last_batch=True,
853
+ is_sequential=is_sequential,
767
854
  )
768
855
  results = await self.download_batch(
769
856
  session,
@@ -16,7 +16,7 @@ from .constant import (
16
16
  ExportFormat,
17
17
  )
18
18
  from .exporter import Exporter
19
- from .utils import convert_to_bytes
19
+ from .utils import convert_to_bytes, gen_empty_vai
20
20
 
21
21
 
22
22
  @Exporter.register(format=ExportFormat.YOLO)
@@ -152,6 +152,9 @@ def convert_annotation(
152
152
  datarow["items"].get("predictions", {}).get(annotation_name, {})
153
153
  )
154
154
 
155
+ if not visionai_dict:
156
+ visionai_dict = gen_empty_vai(datarow=datarow, sequence_folder_url="")
157
+
155
158
  (category_map, image_labels_map, _, _) = VAItoYOLO.convert_single_visionai_to_yolo(
156
159
  dest_img_folder="",
157
160
  visionai_dict={"visionai": visionai_dict},
@@ -1,5 +1,5 @@
1
1
  import re
2
- from typing import Optional, Union
2
+ from typing import Literal, Optional, Union
3
3
 
4
4
  from pydantic import BaseModel, ConfigDict, field_validator
5
5
 
@@ -133,3 +133,13 @@ class DatasetAPISchema(BaseModel):
133
133
  annotations: Optional[list[str]] = []
134
134
  access_key_id: Optional[str] = None
135
135
  secret_access_key: Optional[str] = None
136
+
137
+
138
+ class CreateCustomModelAPISchema(BaseModel):
139
+ project_id: int
140
+ name: str
141
+ input_classes: list[str]
142
+ resolution_width: int
143
+ resolution_height: int
144
+ model_structure: Literal["yolov9-c", "yolov9-e", "yolov9-s"]
145
+ weight_url: str
@@ -1,5 +1,5 @@
1
1
  import re
2
- from typing import Optional, Union
2
+ from typing import Literal, Optional, Union
3
3
 
4
4
  from pydantic import BaseModel, ConfigDict, field_validator
5
5
  from pydantic_core.core_schema import ValidationInfo
@@ -543,3 +543,28 @@ class MLModel(BaseModel):
543
543
  return DataverseClient.get_convert_record(
544
544
  convert_record_id=convert_record_id, client_alias=self.client_alias
545
545
  )
546
+
547
+ def create_custom_model(
548
+ self,
549
+ project: Project,
550
+ name: str,
551
+ input_classes: list[str],
552
+ resolution_width: int,
553
+ resolution_height: int,
554
+ model_structure: Literal["yolov9-c", "yolov9-e", "yolov9-s"],
555
+ weight_url: str,
556
+ permission: str = "",
557
+ ):
558
+ from ..client import DataverseClient
559
+
560
+ return DataverseClient.create_custom_model(
561
+ project=project,
562
+ name=name,
563
+ input_classes=input_classes,
564
+ resolution_width=resolution_width,
565
+ resolution_height=resolution_height,
566
+ model_structure=model_structure,
567
+ weight_url=weight_url,
568
+ client_alias=self.client_alias,
569
+ permission=permission,
570
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataverse-sdk
3
- Version: 2.2.2
3
+ Version: 2.3.1
4
4
  Summary: Dataverse SDK For Python
5
5
  Home-page:
6
6
  Author: LinkerVision
@@ -55,19 +55,21 @@ Interaction with the Dataverse site starts with an instance of the `DataverseCli
55
55
  ```Python
56
56
  from dataverse_sdk import *
57
57
  from dataverse_sdk.connections import get_connection
58
+ from dataverse_sdk.constants import DataverseHost
59
+
58
60
  client = DataverseClient(
59
- host=DataverseHost.PRODUCTION, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
61
+ host=DataverseHost.PRODUCTION.value, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
60
62
  )
61
63
  assert client is get_connection("default")
62
64
 
63
65
  # Should provide different alias if you are trying to connect to different workspaces
64
66
  client2 = DataverseClient(
65
- host=DataverseHost.PRODUCTION, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
67
+ host=DataverseHost.PRODUCTION.value, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
66
68
  )
67
69
  assert client2 is get_connection(client2.alias)
68
70
 
69
71
  client3 = DataverseClient(
70
- host=DataverseHost.PRODUCTION, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
72
+ host=DataverseHost.PRODUCTION.value, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
71
73
  )
72
74
  assert client3 is get_connection(client3.alias)
73
75
  ```
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
2
2
 
3
3
  AUTHOR = "LinkerVision"
4
4
  PACKAGE_NAME = "dataverse-sdk"
5
- PACKAGE_VERSION = "2.2.2"
5
+ PACKAGE_VERSION = "2.3.1"
6
6
  DESC = "Dataverse SDK For Python"
7
7
  with open("README.md", encoding="utf-8") as fh:
8
8
  long_description = fh.read()
@@ -1,26 +0,0 @@
1
- import json
2
- from typing import Union
3
-
4
- from ..apis.third_party import ThirdPartyAPI
5
-
6
-
7
- def convert_to_bytes(obj: Union[dict, list, str]) -> bytes:
8
- if isinstance(obj, (dict, list)):
9
- jstr = json.dumps(obj)
10
- elif isinstance(obj, str):
11
- jstr = obj
12
- else:
13
- raise TypeError("un-support type")
14
- return bytes(jstr, encoding="utf8")
15
-
16
-
17
- async def download_url_file_async(data_url: str) -> bytes | None:
18
- # get data from url link
19
- try:
20
- data: bytes = await ThirdPartyAPI.async_download_file(
21
- url=data_url, method="GET"
22
- )
23
- except Exception:
24
- print(f"Retrieving data from url {data_url} error")
25
- return None
26
- return data
File without changes