PyPI - dataverse-sdk - Versions diffs - 2.3.0__tar.gz → 2.4.2__tar.gz - Mend

dataverse-sdk 2.3.0tar.gz → 2.4.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dataverse-sdk
-Version: 2.3.0
+Version: 2.4.2
 Summary: Dataverse SDK For Python
 Home-page:
 Author: LinkerVision
@@ -55,19 +55,21 @@ Interaction with the Dataverse site starts with an instance of the `DataverseCli
 ```Python
 from dataverse_sdk import *
 from dataverse_sdk.connections import get_connection
+from dataverse_sdk.constants import DataverseHost
 client = DataverseClient(
-    host=DataverseHost.PRODUCTION, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
+    host=DataverseHost.PRODUCTION.value, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
 )
 assert client is get_connection("default")
 # Should provide different alias if you are trying to connect to different workspaces
 client2 = DataverseClient(
-    host=DataverseHost.PRODUCTION, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
+    host=DataverseHost.PRODUCTION.value, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
 )
 assert client2 is get_connection(client2.alias)
 client3 = DataverseClient(
-    host=DataverseHost.PRODUCTION, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
+    host=DataverseHost.PRODUCTION.value, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
 )
 assert client3 is get_connection(client3.alias)
 ```
@@ -345,7 +347,6 @@ dataset_data = {
     "sequential": False,
     "render_pcd": False,
     "generate_metadata": False,
-    "auto_tagging": ["timeofday"],
     "sas_token": "azure sas token",  # only for azure storage
     "access_key_id" : "aws s3 access key id",# only for private s3 bucket, don't need to assign it in case of public s3 bucket or azure data source
     "secret_access_key": "aws s3 secret access key"# only for private s3 bucket, don't need to assign it in case of public s3 bucket or azure data source
@@ -370,7 +371,6 @@ dataset = project.create_dataset(**dataset_data)
 | sequential | bool | False | data is sequential or not   |
 | render_pcd | bool | False | render pcd preview image or not |
 | generate_metadata | bool | False | generate image meta data or not   |
-| auto_tagging | list | None | generate auto_tagging with target models `["weather", "scene", "timeofday"]`   |
 | description  | str | None | your dataset description  |
 | sas_token | str | None | SAStoken for azure container  |
 | access_key_id | str | None |  access key id for AWS private s3 bucket  |
@@ -397,7 +397,6 @@ dataset_data2 = {
     "annotations": ["groundtruth"],  # remove it when type is DatasetType.RAW_DATA
     "sequential": False,
     "generate_metadata": False,
-    "auto_tagging": []
     "sas_token": ""
 }
 dataset2 = project.create_dataset(**dataset_data2)
@@ -452,15 +451,57 @@ client.download_export_dataslice_data(dataslice_id=504, export_record_id=export_
 ### List Models
-The `list_models` method will list all the models in the given project
+The `list_models` method will list all the models in the given project. You can filter models by type using the `type` parameter.
+#### Basic Usage
 ```Python
-#1
-models = client.list_models(project_id = 1, client_alias=client.alias)
-#2
+# Method 1: Using client
+models = client.list_models(project_id=1, client_alias=client.alias)
+# Method 2: Using project object
 project = client.get_project(project_id=1)
 models = project.list_models()
 ```
+#### Filtering by Model Type
+You can filter models by type using strings or lists of strings. The SDK supports multiple model types:
+```Python
+# Filter by single type using string
+models = client.list_models(project_id=1, type="trained", client_alias=client.alias)
+# Filter by single type using list
+models = client.list_models(project_id=1, type=["trained"], client_alias=client.alias)
+# Filter by multiple types using list
+models = client.list_models(
+    project_id=1,
+    type=["trained", "byom", "uploaded"],
+    client_alias=client.alias
+)
+```
+#### Available Model Types
+| String Value | Description          |
+| ------------ | -------------------- |
+| `"trained"`  | Trained models       |
+| `"byom"`     | Bring Your Own Model |
+| `"uploaded"` | Uploaded models      |
+#### Input Arguments
+| Argument name | Type/Options                                                      | Default             | Description              |
+| ------------- | ----------------------------------------------------------------- | ------------------- | ------------------------ |
+| project_id    | int                                                               | \*--                | The project ID           |
+| client_alias  | str                                                               | None                | The client alias         |
+| type          | "trained", "byom", "uploaded", list["trained", "byom", "uploaded] | ["trained", "byom"] | Model types to filter by |
+`＊--`: required argument without default
 <br>
 ### Get Model
@@ -574,7 +615,21 @@ python tools/export_dataslice.py -host https://staging.visionai.linkervision.ai/
 ### Export Large Dataslice and download files
 ```
 python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export-model-name / groundtruth} --target_folder {folder path} --export-format {coco, visionai, yolo, vlm ...etc}
-``````
+```
+### Upload videos to create session tasks
+```
+python tools/upload_videos_create_session.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -f {/YOUR/VIDEOS/LOCAL/FOLDER} -n {session-name}
+```
+- Advanced arguments for video curation (sequential data):
+| Argument name              | Type/Options   | Default   | Description                                                                 |
+|----------------------------|----------------|-----------|-----------------------------------------------------------------------------|
+| --video-curation            | bool | False     | enable video curation (sequential data)                                                     |
+| --global-mean-threshold     | float          | 0.001     | Threshold for the video's global average motion magnitude (0.000001 ~ 0.01). Higher values are stricter (flag more clips as low-motion); lower values are looser (flag fewer clips). |
+| --per-patch-256-min-threshold | float        | 0.000001  | Minimum average motion magnitude allowed in any 256x256 pixel patch (0.000001 ~ 0.0001). Higher values are stricter per-patch (flag more clips when any 256x256 patch is too still); lower values are looser (flag fewer clips). |
+| --split-duration            | int            | 5         | Set the length of each split clip in seconds (2 ~ 30s).                     |
 ## Links to language repos

{dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/README.md RENAMED Viewed

@@ -28,19 +28,21 @@ Interaction with the Dataverse site starts with an instance of the `DataverseCli
 ```Python
 from dataverse_sdk import *
 from dataverse_sdk.connections import get_connection
+from dataverse_sdk.constants import DataverseHost
 client = DataverseClient(
-    host=DataverseHost.PRODUCTION, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
+    host=DataverseHost.PRODUCTION.value, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
 )
 assert client is get_connection("default")
 # Should provide different alias if you are trying to connect to different workspaces
 client2 = DataverseClient(
-    host=DataverseHost.PRODUCTION, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
+    host=DataverseHost.PRODUCTION.value, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
 )
 assert client2 is get_connection(client2.alias)
 client3 = DataverseClient(
-    host=DataverseHost.PRODUCTION, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
+    host=DataverseHost.PRODUCTION.value, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
 )
 assert client3 is get_connection(client3.alias)
 ```
@@ -318,7 +320,6 @@ dataset_data = {
     "sequential": False,
     "render_pcd": False,
     "generate_metadata": False,
-    "auto_tagging": ["timeofday"],
     "sas_token": "azure sas token",  # only for azure storage
     "access_key_id" : "aws s3 access key id",# only for private s3 bucket, don't need to assign it in case of public s3 bucket or azure data source
     "secret_access_key": "aws s3 secret access key"# only for private s3 bucket, don't need to assign it in case of public s3 bucket or azure data source
@@ -343,7 +344,6 @@ dataset = project.create_dataset(**dataset_data)
 | sequential | bool | False | data is sequential or not   |
 | render_pcd | bool | False | render pcd preview image or not |
 | generate_metadata | bool | False | generate image meta data or not   |
-| auto_tagging | list | None | generate auto_tagging with target models `["weather", "scene", "timeofday"]`   |
 | description  | str | None | your dataset description  |
 | sas_token | str | None | SAStoken for azure container  |
 | access_key_id | str | None |  access key id for AWS private s3 bucket  |
@@ -370,7 +370,6 @@ dataset_data2 = {
     "annotations": ["groundtruth"],  # remove it when type is DatasetType.RAW_DATA
     "sequential": False,
     "generate_metadata": False,
-    "auto_tagging": []
     "sas_token": ""
 }
 dataset2 = project.create_dataset(**dataset_data2)
@@ -425,15 +424,57 @@ client.download_export_dataslice_data(dataslice_id=504, export_record_id=export_
 ### List Models
-The `list_models` method will list all the models in the given project
+The `list_models` method will list all the models in the given project. You can filter models by type using the `type` parameter.
+#### Basic Usage
 ```Python
-#1
-models = client.list_models(project_id = 1, client_alias=client.alias)
-#2
+# Method 1: Using client
+models = client.list_models(project_id=1, client_alias=client.alias)
+# Method 2: Using project object
 project = client.get_project(project_id=1)
 models = project.list_models()
 ```
+#### Filtering by Model Type
+You can filter models by type using strings or lists of strings. The SDK supports multiple model types:
+```Python
+# Filter by single type using string
+models = client.list_models(project_id=1, type="trained", client_alias=client.alias)
+# Filter by single type using list
+models = client.list_models(project_id=1, type=["trained"], client_alias=client.alias)
+# Filter by multiple types using list
+models = client.list_models(
+    project_id=1,
+    type=["trained", "byom", "uploaded"],
+    client_alias=client.alias
+)
+```
+#### Available Model Types
+| String Value | Description          |
+| ------------ | -------------------- |
+| `"trained"`  | Trained models       |
+| `"byom"`     | Bring Your Own Model |
+| `"uploaded"` | Uploaded models      |
+#### Input Arguments
+| Argument name | Type/Options                                                      | Default             | Description              |
+| ------------- | ----------------------------------------------------------------- | ------------------- | ------------------------ |
+| project_id    | int                                                               | \*--                | The project ID           |
+| client_alias  | str                                                               | None                | The client alias         |
+| type          | "trained", "byom", "uploaded", list["trained", "byom", "uploaded] | ["trained", "byom"] | Model types to filter by |
+`＊--`: required argument without default
 <br>
 ### Get Model
@@ -547,7 +588,21 @@ python tools/export_dataslice.py -host https://staging.visionai.linkervision.ai/
 ### Export Large Dataslice and download files
 ```
 python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export-model-name / groundtruth} --target_folder {folder path} --export-format {coco, visionai, yolo, vlm ...etc}
-``````
+```
+### Upload videos to create session tasks
+```
+python tools/upload_videos_create_session.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -f {/YOUR/VIDEOS/LOCAL/FOLDER} -n {session-name}
+```
+- Advanced arguments for video curation (sequential data):
+| Argument name              | Type/Options   | Default   | Description                                                                 |
+|----------------------------|----------------|-----------|-----------------------------------------------------------------------------|
+| --video-curation            | bool | False     | enable video curation (sequential data)                                                     |
+| --global-mean-threshold     | float          | 0.001     | Threshold for the video's global average motion magnitude (0.000001 ~ 0.01). Higher values are stricter (flag more clips as low-motion); lower values are looser (flag fewer clips). |
+| --per-patch-256-min-threshold | float        | 0.000001  | Minimum average motion magnitude allowed in any 256x256 pixel patch (0.000001 ~ 0.0001). Higher values are stricter per-patch (flag more clips when any 256x256 patch is too still); lower values are looser (flag fewer clips). |
+| --split-duration            | int            | 5         | Set the length of each split clip in seconds (2 ~ 30s).                     |
 ## Links to language repos

{dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/apis/backend.py RENAMED Viewed

@@ -292,9 +292,14 @@ class BackendAPI:
         )
         return resp.json()
-    def list_ml_models(self, project_id: int, type: str = "trained", **kwargs) -> list:
+    def list_ml_models(
+        self,
+        project_id: int,
+        type: str = "trained,byom",
+        **kwargs,
+    ) -> list:
         kwargs["project"] = project_id
-        kwargs["type"] = type
+        kwargs["type__in"] = type
         resp = self.send_request(
             url=f"{self.host}/api/ml_models/?{urlencode(kwargs)}",
             method="get",
@@ -374,7 +379,6 @@ class BackendAPI:
         data_folder: str,
         sequential: bool = False,
         generate_metadata: bool = False,
-        auto_tagging: Optional[list] = None,
         render_pcd: bool = False,
         container_name: Optional[str] = None,
         sas_token: Optional[str] = None,
@@ -384,8 +388,6 @@ class BackendAPI:
         secret_access_key: Optional[str] = None,
         create_dataset_uuid: Optional[str] = None,
     ) -> dict:
-        if auto_tagging is None:
-            auto_tagging = []
         if annotations is None:
             annotations = []
         payload_data = {
@@ -400,10 +402,10 @@ class BackendAPI:
             "sequential": sequential,
             "annotation_format": annotation_format,
             "generate_metadata": generate_metadata,
-            "auto_tagging": auto_tagging,
             "render_pcd": render_pcd,
             "description": description if description else "",
             "annotations": annotations if annotations else [],
+            "auto_tagging": [],  # FIXME: auto_tagging field is still required by production API.
         }
         aws_access_key = {secret_access_key, access_key_id}
@@ -619,6 +621,37 @@ class AsyncBackendAPI:
             json=payload,
         )
+    async def generate_session_task_presigned_urls(self, filenames: list[str]) -> dict:
+        return await self.async_send_request(
+            url=f"{self.host}/api/session_tasks/presigned-urls/",
+            method="post",
+            headers=self.headers,
+            data={"filenames": filenames},
+        )
+    async def create_session_task(
+        self,
+        name: str,
+        data_folder: str,
+        video_curation: bool = False,
+        curation_config: Optional[dict] = None,
+    ) -> dict:
+        payload_data = {
+            "name": name,
+            "data_folder": data_folder,
+            "video_curation": video_curation,
+        }
+        if video_curation and curation_config:
+            payload_data["curation_config"] = curation_config
+        return await self.async_send_request(
+            url=f"{self.host}/api/session_tasks/",
+            method="post",
+            headers=self.headers,
+            data=payload_data,
+        )
     async def get_project(self, project_id: str) -> dict:
         try:
             resp = await self.client.get(

{dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/client.py RENAMED Viewed

@@ -6,7 +6,7 @@ import platform
 from asyncio import AbstractEventLoop, Semaphore
 from collections import deque
 from pathlib import Path
-from typing import Optional, Union
+from typing import Literal, Optional, Union
 from uuid import uuid4
 from aiofiles import open as aio_open
@@ -820,9 +820,9 @@ class DataverseClient:
             "option": {},
         }
         for ontology_class in project.ontology.classes:
-            project_ontology_ids["ontology_class"][
-                ontology_class.id
-            ] = ontology_class.aliases
+            project_ontology_ids["ontology_class"][ontology_class.id] = (
+                ontology_class.aliases
+            )
             for attr in ontology_class.attributes:
                 project_ontology_ids["attribute"][attr.id] = attr.aliases
                 for option in attr.options:
@@ -1178,6 +1178,12 @@ of this project OR has been added before"
         client: Optional["DataverseClient"] = None,
         client_alias: Optional[str] = None,
         project: Optional["Project"] = None,
+        type: Optional[
+            Union[
+                Literal["trained", "byom", "uploaded"],
+                list[Literal["trained", "byom", "uploaded"]],
+            ]
+        ] = ["trained", "byom"],
     ) -> list[MLModel]:
         """Get the model list by project id
@@ -1189,10 +1195,11 @@ of this project OR has been added before"
         client_alias: Optional[str], by default None (should be provided if client is None)
         project: Optional["Project"]
             project basemodel, by default None
+        type : Optional[Union[Literal["trained", "byom", "uploaded"], list[Literal["trained", "byom", "uploaded"]]]], by default ["trained", "byom"]
         Returns
         -------
-        list
+        list[MLModel]
             list of model items
         Raises
@@ -1204,7 +1211,9 @@ of this project OR has been added before"
             client=client, client_alias=client_alias
         )
         try:
-            model_list: list = api.list_ml_models(project_id=project_id)
+            if isinstance(type, list):
+                type = ",".join(type)
+            model_list: list = api.list_ml_models(project_id=project_id, type=type)
         except DataverseExceptionBase:
             logging.exception("Got api error from Dataverse")
             raise
@@ -1484,7 +1493,6 @@ of this project OR has been added before"
         annotations: Optional[list] = None,
         sequential: bool = False,
         generate_metadata: bool = False,
-        auto_tagging: Optional[list] = None,
         render_pcd: bool = False,
         description: Optional[str] = None,
         client: Optional["DataverseClient"] = None,
@@ -1522,8 +1530,6 @@ of this project OR has been added before"
             sequential or not., by default False
         generate_metadata : bool, optional
             generate meta data or not, by default False
-        auto_tagging: list
-            generate auto_tagging with target models (weather/scene/timeofday)
         description : Optional[str], optional
             description of the dataset, by default None
         render_pcd : bool, optional
@@ -1550,17 +1556,15 @@ of this project OR has been added before"
         """
         if annotations is None:
             annotations = []
-        if auto_tagging is None:
-            auto_tagging = []
         if type == DatasetType.ANNOTATED_DATA and len(annotations) == 0:
             raise ValueError(
                 "Annotated data should provide at least one annotation folder name (groundtruth or model_name)"
             )
-        api, client_alia = DataverseClient._get_api_client(
+        api, client_alias = DataverseClient._get_api_client(
             client=client, client_alias=client_alias, is_async=False
         )
-        async_api, client_alia = DataverseClient._get_api_client(
+        async_api, client_alias = DataverseClient._get_api_client(
             client=client, client_alias=client_alias, is_async=True
         )
@@ -1586,7 +1590,6 @@ of this project OR has been added before"
                 sas_token=sas_token,
                 sequential=sequential,
                 generate_metadata=generate_metadata,
-                auto_tagging=auto_tagging,
                 render_pcd=render_pcd,
                 description=description,
                 access_key_id=access_key_id,
@@ -1610,7 +1613,6 @@ of this project OR has been added before"
                 "project": project,
                 "sequential": sequential,
                 "generate_metadata": generate_metadata,
-                "auto_tagging": auto_tagging,
                 "annotations": annotations,
             }
         )
@@ -1975,6 +1977,79 @@ of this project OR has been added before"
                 detail=f"the format {annotation_format} is not supported for local upload"
             )
+    async def upload_videos_create_session(
+        self,
+        name: str,
+        video_folder: str,
+        video_curation: bool = False,
+        curation_config: Optional[dict] = None,
+    ) -> dict:
+        video_path = Path(video_folder)
+        if not video_path.exists() or not video_path.is_dir():
+            raise ValueError(f"Video folder does not exist: {video_folder}")
+        video_extensions = {".mp4", ".avi", ".mov", ".mpeg", ".flv"}
+        video_paths = [
+            path
+            for path in video_path.iterdir()
+            if path.is_file() and path.suffix.lower() in video_extensions
+        ]
+        if not video_paths:
+            raise ValueError(f"No video files found in {video_folder}")
+        filenames = [video.name for video in video_paths]
+        logging.info(f"Found {len(filenames)} videos to upload")
+        try:
+            # Step 1: Get presigned URLs
+            logging.info("Getting presigned URLs...")
+            presigned_data = (
+                await self._async_api_client.generate_session_task_presigned_urls(
+                    filenames=filenames
+                )
+            )
+            data_folder = presigned_data["data_folder"]
+            url_info = presigned_data["url_info"]
+            # Step 2: Upload videos concurrently with progress bar
+            logging.info("Uploading videos...")
+            upload_task_queue = deque([(video_paths, url_info)])
+            failed_file_info_batches = await DataverseClient.run_upload_tasks(
+                upload_task_queue
+            )
+            if failed_file_info_batches:
+                raise ClientConnectionError(
+                    f"Failed uploads: {failed_file_info_batches}"
+                )
+            # Step 3: Create session task
+            logging.info("Creating session task...")
+            session_task_data = await self._async_api_client.create_session_task(
+                name=name,
+                data_folder=data_folder,
+                video_curation=video_curation,
+                curation_config=curation_config,
+            )
+            logging.info(f"✅ Session task '{name}' created successfully!")
+            return session_task_data
+        except DataverseExceptionBase:
+            logging.exception("Got api error from Dataverse")
+            raise
+        except Exception as e:
+            try:
+                error_data = json.loads(
+                    getattr(getattr(e, "response", None), "text", str(e))
+                )
+                error_message = next(iter(error_data.get("error", {}).values()))[0]
+            except Exception:
+                error_message = str(e)
+            raise ClientConnectionError(
+                f"Failed to create session task: {error_message}"
+            )
 class AsyncThirdPartyAPI:
     transport = AsyncHTTPTransport(

{dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/base.py RENAMED Viewed

@@ -12,6 +12,7 @@ class ExportAnnotationBase(abc.ABC):
         sequence_frame_map: dict[int, dict[int, list[int]]],
         datarow_generator_func: Callable[[list], Generator[dict]],
         annotation_name: str,
+        is_sequential: bool,
         *args,
         **kwargs,
     ) -> Generator[bytes, str]:

{dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/coco.py RENAMED Viewed

@@ -22,7 +22,7 @@ from .constant import (
     ExportFormat,
 )
 from .exporter import Exporter
-from .utils import convert_to_bytes
+from .utils import convert_to_bytes, gen_empty_vai
 @Exporter.register(format=ExportFormat.COCO)
@@ -156,6 +156,9 @@ def convert_annotation(
                 datarow["items"].get("predictions", {}).get(annotation_name, {})
             )
+        if not target_visionai:
+            target_visionai = gen_empty_vai(datarow=datarow, sequence_folder_url="")
         (
             category_idx_map,
             image_update,

{dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/exporter.py RENAMED Viewed

@@ -77,6 +77,7 @@ class Exporter:
         sequence_frame_map: dict,
         question_id_map: dict,
         annotation_name: str,
+        is_sequential: bool,
     ) -> AsyncGenerator[tuple[bytes, str]]:
         async for data, path in self.export_annot.producer(
             class_names=class_names,
@@ -85,6 +86,7 @@ class Exporter:
             target_folder=self.target_folder,
             datarow_generator_func=await self._gen(self.curation_api),
             annotation_name=annotation_name,
+            is_sequential=is_sequential,
         ):
             if not path:
                 continue
@@ -141,7 +143,7 @@ class Exporter:
                 gen: AsyncGenerator = curation_api.get_datarows(
                     id_set_list=id_chunks,
                     batch_size=BATCH_SIZE,
-                    fields="id,items,vlm_items,url,frame_id,image_width,image_height,sensor_name,original_url",
+                    fields="id,items,vlm_items,url,frame_id,image_width,image_height,sensor_name,original_url,type",
                 )
                 async for batched_datarow in gen:
                     for datarow in batched_datarow:
@@ -238,9 +240,9 @@ def get_datarow_sequences(
         for frame_datarow_id, datarow_id_list in sequence_frame_map[
             sequence_datarow_id
         ].items():
-            new_datarows_sequence_map[sequence_order][
-                frame_datarow_id
-            ] = datarow_id_list
+            new_datarows_sequence_map[sequence_order][frame_datarow_id] = (
+                datarow_id_list
+            )
             if not is_sequential or sequence_datarow_id == NONE_SEQUENCE_DATAROW_ID:
                 sequence_order += 1
         if is_sequential:

dataverse_sdk-2.4.2/dataverse_sdk/export/utils.py ADDED Viewed

@@ -0,0 +1,53 @@
+import json
+from typing import Union
+from visionai_data_format.schemas.visionai_schema import (
+    Frame,
+    FrameProperties,
+    FramePropertyStream,
+)
+from ..apis.third_party import ThirdPartyAPI
+def convert_to_bytes(obj: Union[dict, list, str]) -> bytes:
+    if isinstance(obj, (dict, list)):
+        jstr = json.dumps(obj)
+    elif isinstance(obj, str):
+        jstr = obj
+    else:
+        raise TypeError("un-support type")
+    return bytes(jstr, encoding="utf8")
+async def download_url_file_async(data_url: str) -> bytes | None:
+    # get data from url link
+    try:
+        data: bytes = await ThirdPartyAPI.async_download_file(
+            url=data_url, method="GET"
+        )
+    except Exception:
+        print(f"Retrieving data from url {data_url} error")
+        return None
+    return data
+def gen_empty_vai(datarow: dict, sequence_folder_url: str) -> dict:
+    new_sensor_data_folder = f"{sequence_folder_url}/data/{datarow['sensor_name']}/"
+    dest_url = f"{new_sensor_data_folder}{datarow['url'].split('/')[-1]}"
+    # generate visionai empty frame
+    frames = {}
+    frame_num = datarow["frame_id"]
+    frames[frame_num] = Frame(
+        frame_properties=FrameProperties(
+            streams={datarow["sensor_name"]: FramePropertyStream(uri=dest_url)}
+        ),
+        objects={},
+    ).model_dump(exclude_none=True)
+    if datarow["type"] == "image":
+        stream = {datarow["sensor_name"]: {"type": "camera", "uri": dest_url}}
+    else:
+        stream = {datarow["sensor_name"]: {"type": "lidar", "uri": dest_url}}
+    return {"frames": frames, "streams": stream}

dataverse-sdk 2.3.0__tar.gz → 2.4.2__tar.gz

dataverse-sdk 2.3.0tar.gz → 2.4.2tar.gz