dataverse-sdk 2.3.0__tar.gz → 2.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/PKG-INFO +67 -12
  2. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/README.md +66 -11
  3. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/apis/backend.py +39 -6
  4. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/client.py +90 -15
  5. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/base.py +1 -0
  6. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/coco.py +4 -1
  7. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/exporter.py +6 -4
  8. dataverse_sdk-2.4.2/dataverse_sdk/export/utils.py +53 -0
  9. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/visionai.py +132 -45
  10. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/yolo.py +4 -1
  11. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/schemas/api.py +0 -1
  12. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/schemas/client.py +12 -10
  13. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/utils/utils.py +23 -3
  14. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk.egg-info/PKG-INFO +67 -12
  15. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/setup.py +1 -1
  16. dataverse_sdk-2.3.0/dataverse_sdk/export/utils.py +0 -26
  17. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/__init__.py +0 -0
  18. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/apis/__init__.py +0 -0
  19. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/apis/third_party.py +0 -0
  20. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/connections.py +0 -0
  21. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/constants.py +0 -0
  22. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/exceptions/__init__.py +0 -0
  23. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/exceptions/client.py +0 -0
  24. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/__init__.py +0 -0
  25. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/constant.py +0 -0
  26. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/vqa.py +0 -0
  27. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/schemas/__init__.py +0 -0
  28. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/schemas/common.py +0 -0
  29. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/schemas/format.py +0 -0
  30. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/utils/__init__.py +0 -0
  31. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk.egg-info/SOURCES.txt +0 -0
  32. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk.egg-info/dependency_links.txt +0 -0
  33. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk.egg-info/requires.txt +0 -0
  34. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk.egg-info/top_level.txt +0 -0
  35. {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataverse-sdk
3
- Version: 2.3.0
3
+ Version: 2.4.2
4
4
  Summary: Dataverse SDK For Python
5
5
  Home-page:
6
6
  Author: LinkerVision
@@ -55,19 +55,21 @@ Interaction with the Dataverse site starts with an instance of the `DataverseCli
55
55
  ```Python
56
56
  from dataverse_sdk import *
57
57
  from dataverse_sdk.connections import get_connection
58
+ from dataverse_sdk.constants import DataverseHost
59
+
58
60
  client = DataverseClient(
59
- host=DataverseHost.PRODUCTION, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
61
+ host=DataverseHost.PRODUCTION.value, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
60
62
  )
61
63
  assert client is get_connection("default")
62
64
 
63
65
  # Should provide different alias if you are trying to connect to different workspaces
64
66
  client2 = DataverseClient(
65
- host=DataverseHost.PRODUCTION, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
67
+ host=DataverseHost.PRODUCTION.value, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
66
68
  )
67
69
  assert client2 is get_connection(client2.alias)
68
70
 
69
71
  client3 = DataverseClient(
70
- host=DataverseHost.PRODUCTION, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
72
+ host=DataverseHost.PRODUCTION.value, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
71
73
  )
72
74
  assert client3 is get_connection(client3.alias)
73
75
  ```
@@ -345,7 +347,6 @@ dataset_data = {
345
347
  "sequential": False,
346
348
  "render_pcd": False,
347
349
  "generate_metadata": False,
348
- "auto_tagging": ["timeofday"],
349
350
  "sas_token": "azure sas token", # only for azure storage
350
351
  "access_key_id" : "aws s3 access key id",# only for private s3 bucket, don't need to assign it in case of public s3 bucket or azure data source
351
352
  "secret_access_key": "aws s3 secret access key"# only for private s3 bucket, don't need to assign it in case of public s3 bucket or azure data source
@@ -370,7 +371,6 @@ dataset = project.create_dataset(**dataset_data)
370
371
  | sequential | bool | False | data is sequential or not |
371
372
  | render_pcd | bool | False | render pcd preview image or not |
372
373
  | generate_metadata | bool | False | generate image meta data or not |
373
- | auto_tagging | list | None | generate auto_tagging with target models `["weather", "scene", "timeofday"]` |
374
374
  | description | str | None | your dataset description |
375
375
  | sas_token | str | None | SAStoken for azure container |
376
376
  | access_key_id | str | None | access key id for AWS private s3 bucket |
@@ -397,7 +397,6 @@ dataset_data2 = {
397
397
  "annotations": ["groundtruth"], # remove it when type is DatasetType.RAW_DATA
398
398
  "sequential": False,
399
399
  "generate_metadata": False,
400
- "auto_tagging": []
401
400
  "sas_token": ""
402
401
  }
403
402
  dataset2 = project.create_dataset(**dataset_data2)
@@ -452,15 +451,57 @@ client.download_export_dataslice_data(dataslice_id=504, export_record_id=export_
452
451
 
453
452
 
454
453
  ### List Models
455
- The `list_models` method will list all the models in the given project
454
+
455
+ The `list_models` method will list all the models in the given project. You can filter models by type using the `type` parameter.
456
+
457
+ #### Basic Usage
456
458
 
457
459
  ```Python
458
- #1
459
- models = client.list_models(project_id = 1, client_alias=client.alias)
460
- #2
460
+ # Method 1: Using client
461
+ models = client.list_models(project_id=1, client_alias=client.alias)
462
+
463
+ # Method 2: Using project object
461
464
  project = client.get_project(project_id=1)
462
465
  models = project.list_models()
463
466
  ```
467
+
468
+ #### Filtering by Model Type
469
+
470
+ You can filter models by type using strings or lists of strings. The SDK supports multiple model types:
471
+
472
+ ```Python
473
+ # Filter by single type using string
474
+ models = client.list_models(project_id=1, type="trained", client_alias=client.alias)
475
+
476
+ # Filter by single type using list
477
+ models = client.list_models(project_id=1, type=["trained"], client_alias=client.alias)
478
+
479
+ # Filter by multiple types using list
480
+ models = client.list_models(
481
+ project_id=1,
482
+ type=["trained", "byom", "uploaded"],
483
+ client_alias=client.alias
484
+ )
485
+ ```
486
+
487
+ #### Available Model Types
488
+
489
+ | String Value | Description |
490
+ | ------------ | -------------------- |
491
+ | `"trained"` | Trained models |
492
+ | `"byom"` | Bring Your Own Model |
493
+ | `"uploaded"` | Uploaded models |
494
+
495
+ #### Input Arguments
496
+
497
+ | Argument name | Type/Options | Default | Description |
498
+ | ------------- | ----------------------------------------------------------------- | ------------------- | ------------------------ |
499
+ | project_id | int | \*-- | The project ID |
500
+ | client_alias | str | None | The client alias |
501
+ | type | "trained", "byom", "uploaded", list["trained", "byom", "uploaded] | ["trained", "byom"] | Model types to filter by |
502
+
503
+ `*--`: required argument without default
504
+
464
505
  <br>
465
506
 
466
507
  ### Get Model
@@ -574,7 +615,21 @@ python tools/export_dataslice.py -host https://staging.visionai.linkervision.ai/
574
615
  ### Export Large Dataslice and download files
575
616
  ```
576
617
  python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export-model-name / groundtruth} --target_folder {folder path} --export-format {coco, visionai, yolo, vlm ...etc}
577
- ``````
618
+ ```
619
+
620
+ ### Upload videos to create session tasks
621
+ ```
622
+ python tools/upload_videos_create_session.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -f {/YOUR/VIDEOS/LOCAL/FOLDER} -n {session-name}
623
+ ```
624
+
625
+ - Advanced arguments for video curation (sequential data):
626
+
627
+ | Argument name | Type/Options | Default | Description |
628
+ |----------------------------|----------------|-----------|-----------------------------------------------------------------------------|
629
+ | --video-curation | bool | False | enable video curation (sequential data) |
630
+ | --global-mean-threshold | float | 0.001 | Threshold for the video's global average motion magnitude (0.000001 ~ 0.01). Higher values are stricter (flag more clips as low-motion); lower values are looser (flag fewer clips). |
631
+ | --per-patch-256-min-threshold | float | 0.000001 | Minimum average motion magnitude allowed in any 256x256 pixel patch (0.000001 ~ 0.0001). Higher values are stricter per-patch (flag more clips when any 256x256 patch is too still); lower values are looser (flag fewer clips). |
632
+ | --split-duration | int | 5 | Set the length of each split clip in seconds (2 ~ 30s). |
578
633
 
579
634
  ## Links to language repos
580
635
 
@@ -28,19 +28,21 @@ Interaction with the Dataverse site starts with an instance of the `DataverseCli
28
28
  ```Python
29
29
  from dataverse_sdk import *
30
30
  from dataverse_sdk.connections import get_connection
31
+ from dataverse_sdk.constants import DataverseHost
32
+
31
33
  client = DataverseClient(
32
- host=DataverseHost.PRODUCTION, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
34
+ host=DataverseHost.PRODUCTION.value, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
33
35
  )
34
36
  assert client is get_connection("default")
35
37
 
36
38
  # Should provide different alias if you are trying to connect to different workspaces
37
39
  client2 = DataverseClient(
38
- host=DataverseHost.PRODUCTION, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
40
+ host=DataverseHost.PRODUCTION.value, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
39
41
  )
40
42
  assert client2 is get_connection(client2.alias)
41
43
 
42
44
  client3 = DataverseClient(
43
- host=DataverseHost.PRODUCTION, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
45
+ host=DataverseHost.PRODUCTION.value, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
44
46
  )
45
47
  assert client3 is get_connection(client3.alias)
46
48
  ```
@@ -318,7 +320,6 @@ dataset_data = {
318
320
  "sequential": False,
319
321
  "render_pcd": False,
320
322
  "generate_metadata": False,
321
- "auto_tagging": ["timeofday"],
322
323
  "sas_token": "azure sas token", # only for azure storage
323
324
  "access_key_id" : "aws s3 access key id",# only for private s3 bucket, don't need to assign it in case of public s3 bucket or azure data source
324
325
  "secret_access_key": "aws s3 secret access key"# only for private s3 bucket, don't need to assign it in case of public s3 bucket or azure data source
@@ -343,7 +344,6 @@ dataset = project.create_dataset(**dataset_data)
343
344
  | sequential | bool | False | data is sequential or not |
344
345
  | render_pcd | bool | False | render pcd preview image or not |
345
346
  | generate_metadata | bool | False | generate image meta data or not |
346
- | auto_tagging | list | None | generate auto_tagging with target models `["weather", "scene", "timeofday"]` |
347
347
  | description | str | None | your dataset description |
348
348
  | sas_token | str | None | SAStoken for azure container |
349
349
  | access_key_id | str | None | access key id for AWS private s3 bucket |
@@ -370,7 +370,6 @@ dataset_data2 = {
370
370
  "annotations": ["groundtruth"], # remove it when type is DatasetType.RAW_DATA
371
371
  "sequential": False,
372
372
  "generate_metadata": False,
373
- "auto_tagging": []
374
373
  "sas_token": ""
375
374
  }
376
375
  dataset2 = project.create_dataset(**dataset_data2)
@@ -425,15 +424,57 @@ client.download_export_dataslice_data(dataslice_id=504, export_record_id=export_
425
424
 
426
425
 
427
426
  ### List Models
428
- The `list_models` method will list all the models in the given project
427
+
428
+ The `list_models` method will list all the models in the given project. You can filter models by type using the `type` parameter.
429
+
430
+ #### Basic Usage
429
431
 
430
432
  ```Python
431
- #1
432
- models = client.list_models(project_id = 1, client_alias=client.alias)
433
- #2
433
+ # Method 1: Using client
434
+ models = client.list_models(project_id=1, client_alias=client.alias)
435
+
436
+ # Method 2: Using project object
434
437
  project = client.get_project(project_id=1)
435
438
  models = project.list_models()
436
439
  ```
440
+
441
+ #### Filtering by Model Type
442
+
443
+ You can filter models by type using strings or lists of strings. The SDK supports multiple model types:
444
+
445
+ ```Python
446
+ # Filter by single type using string
447
+ models = client.list_models(project_id=1, type="trained", client_alias=client.alias)
448
+
449
+ # Filter by single type using list
450
+ models = client.list_models(project_id=1, type=["trained"], client_alias=client.alias)
451
+
452
+ # Filter by multiple types using list
453
+ models = client.list_models(
454
+ project_id=1,
455
+ type=["trained", "byom", "uploaded"],
456
+ client_alias=client.alias
457
+ )
458
+ ```
459
+
460
+ #### Available Model Types
461
+
462
+ | String Value | Description |
463
+ | ------------ | -------------------- |
464
+ | `"trained"` | Trained models |
465
+ | `"byom"` | Bring Your Own Model |
466
+ | `"uploaded"` | Uploaded models |
467
+
468
+ #### Input Arguments
469
+
470
+ | Argument name | Type/Options | Default | Description |
471
+ | ------------- | ----------------------------------------------------------------- | ------------------- | ------------------------ |
472
+ | project_id | int | \*-- | The project ID |
473
+ | client_alias | str | None | The client alias |
474
+ | type | "trained", "byom", "uploaded", list["trained", "byom", "uploaded] | ["trained", "byom"] | Model types to filter by |
475
+
476
+ `*--`: required argument without default
477
+
437
478
  <br>
438
479
 
439
480
  ### Get Model
@@ -547,7 +588,21 @@ python tools/export_dataslice.py -host https://staging.visionai.linkervision.ai/
547
588
  ### Export Large Dataslice and download files
548
589
  ```
549
590
  python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export-model-name / groundtruth} --target_folder {folder path} --export-format {coco, visionai, yolo, vlm ...etc}
550
- ``````
591
+ ```
592
+
593
+ ### Upload videos to create session tasks
594
+ ```
595
+ python tools/upload_videos_create_session.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -f {/YOUR/VIDEOS/LOCAL/FOLDER} -n {session-name}
596
+ ```
597
+
598
+ - Advanced arguments for video curation (sequential data):
599
+
600
+ | Argument name | Type/Options | Default | Description |
601
+ |----------------------------|----------------|-----------|-----------------------------------------------------------------------------|
602
+ | --video-curation | bool | False | enable video curation (sequential data) |
603
+ | --global-mean-threshold | float | 0.001 | Threshold for the video's global average motion magnitude (0.000001 ~ 0.01). Higher values are stricter (flag more clips as low-motion); lower values are looser (flag fewer clips). |
604
+ | --per-patch-256-min-threshold | float | 0.000001 | Minimum average motion magnitude allowed in any 256x256 pixel patch (0.000001 ~ 0.0001). Higher values are stricter per-patch (flag more clips when any 256x256 patch is too still); lower values are looser (flag fewer clips). |
605
+ | --split-duration | int | 5 | Set the length of each split clip in seconds (2 ~ 30s). |
551
606
 
552
607
  ## Links to language repos
553
608
 
@@ -292,9 +292,14 @@ class BackendAPI:
292
292
  )
293
293
  return resp.json()
294
294
 
295
- def list_ml_models(self, project_id: int, type: str = "trained", **kwargs) -> list:
295
+ def list_ml_models(
296
+ self,
297
+ project_id: int,
298
+ type: str = "trained,byom",
299
+ **kwargs,
300
+ ) -> list:
296
301
  kwargs["project"] = project_id
297
- kwargs["type"] = type
302
+ kwargs["type__in"] = type
298
303
  resp = self.send_request(
299
304
  url=f"{self.host}/api/ml_models/?{urlencode(kwargs)}",
300
305
  method="get",
@@ -374,7 +379,6 @@ class BackendAPI:
374
379
  data_folder: str,
375
380
  sequential: bool = False,
376
381
  generate_metadata: bool = False,
377
- auto_tagging: Optional[list] = None,
378
382
  render_pcd: bool = False,
379
383
  container_name: Optional[str] = None,
380
384
  sas_token: Optional[str] = None,
@@ -384,8 +388,6 @@ class BackendAPI:
384
388
  secret_access_key: Optional[str] = None,
385
389
  create_dataset_uuid: Optional[str] = None,
386
390
  ) -> dict:
387
- if auto_tagging is None:
388
- auto_tagging = []
389
391
  if annotations is None:
390
392
  annotations = []
391
393
  payload_data = {
@@ -400,10 +402,10 @@ class BackendAPI:
400
402
  "sequential": sequential,
401
403
  "annotation_format": annotation_format,
402
404
  "generate_metadata": generate_metadata,
403
- "auto_tagging": auto_tagging,
404
405
  "render_pcd": render_pcd,
405
406
  "description": description if description else "",
406
407
  "annotations": annotations if annotations else [],
408
+ "auto_tagging": [], # FIXME: auto_tagging field is still required by production API.
407
409
  }
408
410
 
409
411
  aws_access_key = {secret_access_key, access_key_id}
@@ -619,6 +621,37 @@ class AsyncBackendAPI:
619
621
  json=payload,
620
622
  )
621
623
 
624
+ async def generate_session_task_presigned_urls(self, filenames: list[str]) -> dict:
625
+ return await self.async_send_request(
626
+ url=f"{self.host}/api/session_tasks/presigned-urls/",
627
+ method="post",
628
+ headers=self.headers,
629
+ data={"filenames": filenames},
630
+ )
631
+
632
+ async def create_session_task(
633
+ self,
634
+ name: str,
635
+ data_folder: str,
636
+ video_curation: bool = False,
637
+ curation_config: Optional[dict] = None,
638
+ ) -> dict:
639
+ payload_data = {
640
+ "name": name,
641
+ "data_folder": data_folder,
642
+ "video_curation": video_curation,
643
+ }
644
+
645
+ if video_curation and curation_config:
646
+ payload_data["curation_config"] = curation_config
647
+
648
+ return await self.async_send_request(
649
+ url=f"{self.host}/api/session_tasks/",
650
+ method="post",
651
+ headers=self.headers,
652
+ data=payload_data,
653
+ )
654
+
622
655
  async def get_project(self, project_id: str) -> dict:
623
656
  try:
624
657
  resp = await self.client.get(
@@ -6,7 +6,7 @@ import platform
6
6
  from asyncio import AbstractEventLoop, Semaphore
7
7
  from collections import deque
8
8
  from pathlib import Path
9
- from typing import Optional, Union
9
+ from typing import Literal, Optional, Union
10
10
  from uuid import uuid4
11
11
 
12
12
  from aiofiles import open as aio_open
@@ -820,9 +820,9 @@ class DataverseClient:
820
820
  "option": {},
821
821
  }
822
822
  for ontology_class in project.ontology.classes:
823
- project_ontology_ids["ontology_class"][
824
- ontology_class.id
825
- ] = ontology_class.aliases
823
+ project_ontology_ids["ontology_class"][ontology_class.id] = (
824
+ ontology_class.aliases
825
+ )
826
826
  for attr in ontology_class.attributes:
827
827
  project_ontology_ids["attribute"][attr.id] = attr.aliases
828
828
  for option in attr.options:
@@ -1178,6 +1178,12 @@ of this project OR has been added before"
1178
1178
  client: Optional["DataverseClient"] = None,
1179
1179
  client_alias: Optional[str] = None,
1180
1180
  project: Optional["Project"] = None,
1181
+ type: Optional[
1182
+ Union[
1183
+ Literal["trained", "byom", "uploaded"],
1184
+ list[Literal["trained", "byom", "uploaded"]],
1185
+ ]
1186
+ ] = ["trained", "byom"],
1181
1187
  ) -> list[MLModel]:
1182
1188
  """Get the model list by project id
1183
1189
 
@@ -1189,10 +1195,11 @@ of this project OR has been added before"
1189
1195
  client_alias: Optional[str], by default None (should be provided if client is None)
1190
1196
  project: Optional["Project"]
1191
1197
  project basemodel, by default None
1198
+ type : Optional[Union[Literal["trained", "byom", "uploaded"], list[Literal["trained", "byom", "uploaded"]]]], by default ["trained", "byom"]
1192
1199
 
1193
1200
  Returns
1194
1201
  -------
1195
- list
1202
+ list[MLModel]
1196
1203
  list of model items
1197
1204
 
1198
1205
  Raises
@@ -1204,7 +1211,9 @@ of this project OR has been added before"
1204
1211
  client=client, client_alias=client_alias
1205
1212
  )
1206
1213
  try:
1207
- model_list: list = api.list_ml_models(project_id=project_id)
1214
+ if isinstance(type, list):
1215
+ type = ",".join(type)
1216
+ model_list: list = api.list_ml_models(project_id=project_id, type=type)
1208
1217
  except DataverseExceptionBase:
1209
1218
  logging.exception("Got api error from Dataverse")
1210
1219
  raise
@@ -1484,7 +1493,6 @@ of this project OR has been added before"
1484
1493
  annotations: Optional[list] = None,
1485
1494
  sequential: bool = False,
1486
1495
  generate_metadata: bool = False,
1487
- auto_tagging: Optional[list] = None,
1488
1496
  render_pcd: bool = False,
1489
1497
  description: Optional[str] = None,
1490
1498
  client: Optional["DataverseClient"] = None,
@@ -1522,8 +1530,6 @@ of this project OR has been added before"
1522
1530
  sequential or not., by default False
1523
1531
  generate_metadata : bool, optional
1524
1532
  generate meta data or not, by default False
1525
- auto_tagging: list
1526
- generate auto_tagging with target models (weather/scene/timeofday)
1527
1533
  description : Optional[str], optional
1528
1534
  description of the dataset, by default None
1529
1535
  render_pcd : bool, optional
@@ -1550,17 +1556,15 @@ of this project OR has been added before"
1550
1556
  """
1551
1557
  if annotations is None:
1552
1558
  annotations = []
1553
- if auto_tagging is None:
1554
- auto_tagging = []
1555
1559
 
1556
1560
  if type == DatasetType.ANNOTATED_DATA and len(annotations) == 0:
1557
1561
  raise ValueError(
1558
1562
  "Annotated data should provide at least one annotation folder name (groundtruth or model_name)"
1559
1563
  )
1560
- api, client_alia = DataverseClient._get_api_client(
1564
+ api, client_alias = DataverseClient._get_api_client(
1561
1565
  client=client, client_alias=client_alias, is_async=False
1562
1566
  )
1563
- async_api, client_alia = DataverseClient._get_api_client(
1567
+ async_api, client_alias = DataverseClient._get_api_client(
1564
1568
  client=client, client_alias=client_alias, is_async=True
1565
1569
  )
1566
1570
 
@@ -1586,7 +1590,6 @@ of this project OR has been added before"
1586
1590
  sas_token=sas_token,
1587
1591
  sequential=sequential,
1588
1592
  generate_metadata=generate_metadata,
1589
- auto_tagging=auto_tagging,
1590
1593
  render_pcd=render_pcd,
1591
1594
  description=description,
1592
1595
  access_key_id=access_key_id,
@@ -1610,7 +1613,6 @@ of this project OR has been added before"
1610
1613
  "project": project,
1611
1614
  "sequential": sequential,
1612
1615
  "generate_metadata": generate_metadata,
1613
- "auto_tagging": auto_tagging,
1614
1616
  "annotations": annotations,
1615
1617
  }
1616
1618
  )
@@ -1975,6 +1977,79 @@ of this project OR has been added before"
1975
1977
  detail=f"the format {annotation_format} is not supported for local upload"
1976
1978
  )
1977
1979
 
1980
+ async def upload_videos_create_session(
1981
+ self,
1982
+ name: str,
1983
+ video_folder: str,
1984
+ video_curation: bool = False,
1985
+ curation_config: Optional[dict] = None,
1986
+ ) -> dict:
1987
+ video_path = Path(video_folder)
1988
+ if not video_path.exists() or not video_path.is_dir():
1989
+ raise ValueError(f"Video folder does not exist: {video_folder}")
1990
+
1991
+ video_extensions = {".mp4", ".avi", ".mov", ".mpeg", ".flv"}
1992
+ video_paths = [
1993
+ path
1994
+ for path in video_path.iterdir()
1995
+ if path.is_file() and path.suffix.lower() in video_extensions
1996
+ ]
1997
+ if not video_paths:
1998
+ raise ValueError(f"No video files found in {video_folder}")
1999
+
2000
+ filenames = [video.name for video in video_paths]
2001
+ logging.info(f"Found {len(filenames)} videos to upload")
2002
+
2003
+ try:
2004
+ # Step 1: Get presigned URLs
2005
+ logging.info("Getting presigned URLs...")
2006
+ presigned_data = (
2007
+ await self._async_api_client.generate_session_task_presigned_urls(
2008
+ filenames=filenames
2009
+ )
2010
+ )
2011
+ data_folder = presigned_data["data_folder"]
2012
+ url_info = presigned_data["url_info"]
2013
+
2014
+ # Step 2: Upload videos concurrently with progress bar
2015
+ logging.info("Uploading videos...")
2016
+ upload_task_queue = deque([(video_paths, url_info)])
2017
+ failed_file_info_batches = await DataverseClient.run_upload_tasks(
2018
+ upload_task_queue
2019
+ )
2020
+ if failed_file_info_batches:
2021
+ raise ClientConnectionError(
2022
+ f"Failed uploads: {failed_file_info_batches}"
2023
+ )
2024
+
2025
+ # Step 3: Create session task
2026
+ logging.info("Creating session task...")
2027
+ session_task_data = await self._async_api_client.create_session_task(
2028
+ name=name,
2029
+ data_folder=data_folder,
2030
+ video_curation=video_curation,
2031
+ curation_config=curation_config,
2032
+ )
2033
+ logging.info(f"✅ Session task '{name}' created successfully!")
2034
+
2035
+ return session_task_data
2036
+
2037
+ except DataverseExceptionBase:
2038
+ logging.exception("Got api error from Dataverse")
2039
+ raise
2040
+ except Exception as e:
2041
+ try:
2042
+ error_data = json.loads(
2043
+ getattr(getattr(e, "response", None), "text", str(e))
2044
+ )
2045
+ error_message = next(iter(error_data.get("error", {}).values()))[0]
2046
+ except Exception:
2047
+ error_message = str(e)
2048
+
2049
+ raise ClientConnectionError(
2050
+ f"Failed to create session task: {error_message}"
2051
+ )
2052
+
1978
2053
 
1979
2054
  class AsyncThirdPartyAPI:
1980
2055
  transport = AsyncHTTPTransport(
@@ -12,6 +12,7 @@ class ExportAnnotationBase(abc.ABC):
12
12
  sequence_frame_map: dict[int, dict[int, list[int]]],
13
13
  datarow_generator_func: Callable[[list], Generator[dict]],
14
14
  annotation_name: str,
15
+ is_sequential: bool,
15
16
  *args,
16
17
  **kwargs,
17
18
  ) -> Generator[bytes, str]:
@@ -22,7 +22,7 @@ from .constant import (
22
22
  ExportFormat,
23
23
  )
24
24
  from .exporter import Exporter
25
- from .utils import convert_to_bytes
25
+ from .utils import convert_to_bytes, gen_empty_vai
26
26
 
27
27
 
28
28
  @Exporter.register(format=ExportFormat.COCO)
@@ -156,6 +156,9 @@ def convert_annotation(
156
156
  datarow["items"].get("predictions", {}).get(annotation_name, {})
157
157
  )
158
158
 
159
+ if not target_visionai:
160
+ target_visionai = gen_empty_vai(datarow=datarow, sequence_folder_url="")
161
+
159
162
  (
160
163
  category_idx_map,
161
164
  image_update,
@@ -77,6 +77,7 @@ class Exporter:
77
77
  sequence_frame_map: dict,
78
78
  question_id_map: dict,
79
79
  annotation_name: str,
80
+ is_sequential: bool,
80
81
  ) -> AsyncGenerator[tuple[bytes, str]]:
81
82
  async for data, path in self.export_annot.producer(
82
83
  class_names=class_names,
@@ -85,6 +86,7 @@ class Exporter:
85
86
  target_folder=self.target_folder,
86
87
  datarow_generator_func=await self._gen(self.curation_api),
87
88
  annotation_name=annotation_name,
89
+ is_sequential=is_sequential,
88
90
  ):
89
91
  if not path:
90
92
  continue
@@ -141,7 +143,7 @@ class Exporter:
141
143
  gen: AsyncGenerator = curation_api.get_datarows(
142
144
  id_set_list=id_chunks,
143
145
  batch_size=BATCH_SIZE,
144
- fields="id,items,vlm_items,url,frame_id,image_width,image_height,sensor_name,original_url",
146
+ fields="id,items,vlm_items,url,frame_id,image_width,image_height,sensor_name,original_url,type",
145
147
  )
146
148
  async for batched_datarow in gen:
147
149
  for datarow in batched_datarow:
@@ -238,9 +240,9 @@ def get_datarow_sequences(
238
240
  for frame_datarow_id, datarow_id_list in sequence_frame_map[
239
241
  sequence_datarow_id
240
242
  ].items():
241
- new_datarows_sequence_map[sequence_order][
242
- frame_datarow_id
243
- ] = datarow_id_list
243
+ new_datarows_sequence_map[sequence_order][frame_datarow_id] = (
244
+ datarow_id_list
245
+ )
244
246
  if not is_sequential or sequence_datarow_id == NONE_SEQUENCE_DATAROW_ID:
245
247
  sequence_order += 1
246
248
  if is_sequential:
@@ -0,0 +1,53 @@
1
+ import json
2
+ from typing import Union
3
+
4
+ from visionai_data_format.schemas.visionai_schema import (
5
+ Frame,
6
+ FrameProperties,
7
+ FramePropertyStream,
8
+ )
9
+
10
+ from ..apis.third_party import ThirdPartyAPI
11
+
12
+
13
+ def convert_to_bytes(obj: Union[dict, list, str]) -> bytes:
14
+ if isinstance(obj, (dict, list)):
15
+ jstr = json.dumps(obj)
16
+ elif isinstance(obj, str):
17
+ jstr = obj
18
+ else:
19
+ raise TypeError("un-support type")
20
+ return bytes(jstr, encoding="utf8")
21
+
22
+
23
+ async def download_url_file_async(data_url: str) -> bytes | None:
24
+ # get data from url link
25
+ try:
26
+ data: bytes = await ThirdPartyAPI.async_download_file(
27
+ url=data_url, method="GET"
28
+ )
29
+ except Exception:
30
+ print(f"Retrieving data from url {data_url} error")
31
+ return None
32
+ return data
33
+
34
+
35
+ def gen_empty_vai(datarow: dict, sequence_folder_url: str) -> dict:
36
+ new_sensor_data_folder = f"{sequence_folder_url}/data/{datarow['sensor_name']}/"
37
+ dest_url = f"{new_sensor_data_folder}{datarow['url'].split('/')[-1]}"
38
+
39
+ # generate visionai empty frame
40
+ frames = {}
41
+ frame_num = datarow["frame_id"]
42
+ frames[frame_num] = Frame(
43
+ frame_properties=FrameProperties(
44
+ streams={datarow["sensor_name"]: FramePropertyStream(uri=dest_url)}
45
+ ),
46
+ objects={},
47
+ ).model_dump(exclude_none=True)
48
+ if datarow["type"] == "image":
49
+ stream = {datarow["sensor_name"]: {"type": "camera", "uri": dest_url}}
50
+ else:
51
+ stream = {datarow["sensor_name"]: {"type": "lidar", "uri": dest_url}}
52
+
53
+ return {"frames": frames, "streams": stream}