dataverse-sdk 2.3.0__tar.gz → 2.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/PKG-INFO +67 -12
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/README.md +66 -11
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/apis/backend.py +39 -6
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/client.py +90 -15
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/base.py +1 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/coco.py +4 -1
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/exporter.py +6 -4
- dataverse_sdk-2.4.2/dataverse_sdk/export/utils.py +53 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/visionai.py +132 -45
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/yolo.py +4 -1
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/schemas/api.py +0 -1
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/schemas/client.py +12 -10
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/utils/utils.py +23 -3
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk.egg-info/PKG-INFO +67 -12
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/setup.py +1 -1
- dataverse_sdk-2.3.0/dataverse_sdk/export/utils.py +0 -26
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/__init__.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/apis/__init__.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/apis/third_party.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/connections.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/constants.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/exceptions/__init__.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/exceptions/client.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/__init__.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/constant.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/export/vqa.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/schemas/__init__.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/schemas/common.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/schemas/format.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk/utils/__init__.py +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk.egg-info/SOURCES.txt +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk.egg-info/dependency_links.txt +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk.egg-info/requires.txt +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/dataverse_sdk.egg-info/top_level.txt +0 -0
- {dataverse_sdk-2.3.0 → dataverse_sdk-2.4.2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataverse-sdk
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.4.2
|
|
4
4
|
Summary: Dataverse SDK For Python
|
|
5
5
|
Home-page:
|
|
6
6
|
Author: LinkerVision
|
|
@@ -55,19 +55,21 @@ Interaction with the Dataverse site starts with an instance of the `DataverseCli
|
|
|
55
55
|
```Python
|
|
56
56
|
from dataverse_sdk import *
|
|
57
57
|
from dataverse_sdk.connections import get_connection
|
|
58
|
+
from dataverse_sdk.constants import DataverseHost
|
|
59
|
+
|
|
58
60
|
client = DataverseClient(
|
|
59
|
-
host=DataverseHost.PRODUCTION, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
|
|
61
|
+
host=DataverseHost.PRODUCTION.value, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
|
|
60
62
|
)
|
|
61
63
|
assert client is get_connection("default")
|
|
62
64
|
|
|
63
65
|
# Should provide different alias if you are trying to connect to different workspaces
|
|
64
66
|
client2 = DataverseClient(
|
|
65
|
-
host=DataverseHost.PRODUCTION, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
|
|
67
|
+
host=DataverseHost.PRODUCTION.value, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
|
|
66
68
|
)
|
|
67
69
|
assert client2 is get_connection(client2.alias)
|
|
68
70
|
|
|
69
71
|
client3 = DataverseClient(
|
|
70
|
-
host=DataverseHost.PRODUCTION, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
|
|
72
|
+
host=DataverseHost.PRODUCTION.value, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
|
|
71
73
|
)
|
|
72
74
|
assert client3 is get_connection(client3.alias)
|
|
73
75
|
```
|
|
@@ -345,7 +347,6 @@ dataset_data = {
|
|
|
345
347
|
"sequential": False,
|
|
346
348
|
"render_pcd": False,
|
|
347
349
|
"generate_metadata": False,
|
|
348
|
-
"auto_tagging": ["timeofday"],
|
|
349
350
|
"sas_token": "azure sas token", # only for azure storage
|
|
350
351
|
"access_key_id" : "aws s3 access key id",# only for private s3 bucket, don't need to assign it in case of public s3 bucket or azure data source
|
|
351
352
|
"secret_access_key": "aws s3 secret access key"# only for private s3 bucket, don't need to assign it in case of public s3 bucket or azure data source
|
|
@@ -370,7 +371,6 @@ dataset = project.create_dataset(**dataset_data)
|
|
|
370
371
|
| sequential | bool | False | data is sequential or not |
|
|
371
372
|
| render_pcd | bool | False | render pcd preview image or not |
|
|
372
373
|
| generate_metadata | bool | False | generate image meta data or not |
|
|
373
|
-
| auto_tagging | list | None | generate auto_tagging with target models `["weather", "scene", "timeofday"]` |
|
|
374
374
|
| description | str | None | your dataset description |
|
|
375
375
|
| sas_token | str | None | SAStoken for azure container |
|
|
376
376
|
| access_key_id | str | None | access key id for AWS private s3 bucket |
|
|
@@ -397,7 +397,6 @@ dataset_data2 = {
|
|
|
397
397
|
"annotations": ["groundtruth"], # remove it when type is DatasetType.RAW_DATA
|
|
398
398
|
"sequential": False,
|
|
399
399
|
"generate_metadata": False,
|
|
400
|
-
"auto_tagging": []
|
|
401
400
|
"sas_token": ""
|
|
402
401
|
}
|
|
403
402
|
dataset2 = project.create_dataset(**dataset_data2)
|
|
@@ -452,15 +451,57 @@ client.download_export_dataslice_data(dataslice_id=504, export_record_id=export_
|
|
|
452
451
|
|
|
453
452
|
|
|
454
453
|
### List Models
|
|
455
|
-
|
|
454
|
+
|
|
455
|
+
The `list_models` method will list all the models in the given project. You can filter models by type using the `type` parameter.
|
|
456
|
+
|
|
457
|
+
#### Basic Usage
|
|
456
458
|
|
|
457
459
|
```Python
|
|
458
|
-
#1
|
|
459
|
-
models = client.list_models(project_id
|
|
460
|
-
|
|
460
|
+
# Method 1: Using client
|
|
461
|
+
models = client.list_models(project_id=1, client_alias=client.alias)
|
|
462
|
+
|
|
463
|
+
# Method 2: Using project object
|
|
461
464
|
project = client.get_project(project_id=1)
|
|
462
465
|
models = project.list_models()
|
|
463
466
|
```
|
|
467
|
+
|
|
468
|
+
#### Filtering by Model Type
|
|
469
|
+
|
|
470
|
+
You can filter models by type using strings or lists of strings. The SDK supports multiple model types:
|
|
471
|
+
|
|
472
|
+
```Python
|
|
473
|
+
# Filter by single type using string
|
|
474
|
+
models = client.list_models(project_id=1, type="trained", client_alias=client.alias)
|
|
475
|
+
|
|
476
|
+
# Filter by single type using list
|
|
477
|
+
models = client.list_models(project_id=1, type=["trained"], client_alias=client.alias)
|
|
478
|
+
|
|
479
|
+
# Filter by multiple types using list
|
|
480
|
+
models = client.list_models(
|
|
481
|
+
project_id=1,
|
|
482
|
+
type=["trained", "byom", "uploaded"],
|
|
483
|
+
client_alias=client.alias
|
|
484
|
+
)
|
|
485
|
+
```
|
|
486
|
+
|
|
487
|
+
#### Available Model Types
|
|
488
|
+
|
|
489
|
+
| String Value | Description |
|
|
490
|
+
| ------------ | -------------------- |
|
|
491
|
+
| `"trained"` | Trained models |
|
|
492
|
+
| `"byom"` | Bring Your Own Model |
|
|
493
|
+
| `"uploaded"` | Uploaded models |
|
|
494
|
+
|
|
495
|
+
#### Input Arguments
|
|
496
|
+
|
|
497
|
+
| Argument name | Type/Options | Default | Description |
|
|
498
|
+
| ------------- | ----------------------------------------------------------------- | ------------------- | ------------------------ |
|
|
499
|
+
| project_id | int | \*-- | The project ID |
|
|
500
|
+
| client_alias | str | None | The client alias |
|
|
501
|
+
| type | "trained", "byom", "uploaded", list["trained", "byom", "uploaded] | ["trained", "byom"] | Model types to filter by |
|
|
502
|
+
|
|
503
|
+
`*--`: required argument without default
|
|
504
|
+
|
|
464
505
|
<br>
|
|
465
506
|
|
|
466
507
|
### Get Model
|
|
@@ -574,7 +615,21 @@ python tools/export_dataslice.py -host https://staging.visionai.linkervision.ai/
|
|
|
574
615
|
### Export Large Dataslice and download files
|
|
575
616
|
```
|
|
576
617
|
python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export-model-name / groundtruth} --target_folder {folder path} --export-format {coco, visionai, yolo, vlm ...etc}
|
|
577
|
-
|
|
618
|
+
```
|
|
619
|
+
|
|
620
|
+
### Upload videos to create session tasks
|
|
621
|
+
```
|
|
622
|
+
python tools/upload_videos_create_session.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -f {/YOUR/VIDEOS/LOCAL/FOLDER} -n {session-name}
|
|
623
|
+
```
|
|
624
|
+
|
|
625
|
+
- Advanced arguments for video curation (sequential data):
|
|
626
|
+
|
|
627
|
+
| Argument name | Type/Options | Default | Description |
|
|
628
|
+
|----------------------------|----------------|-----------|-----------------------------------------------------------------------------|
|
|
629
|
+
| --video-curation | bool | False | enable video curation (sequential data) |
|
|
630
|
+
| --global-mean-threshold | float | 0.001 | Threshold for the video's global average motion magnitude (0.000001 ~ 0.01). Higher values are stricter (flag more clips as low-motion); lower values are looser (flag fewer clips). |
|
|
631
|
+
| --per-patch-256-min-threshold | float | 0.000001 | Minimum average motion magnitude allowed in any 256x256 pixel patch (0.000001 ~ 0.0001). Higher values are stricter per-patch (flag more clips when any 256x256 patch is too still); lower values are looser (flag fewer clips). |
|
|
632
|
+
| --split-duration | int | 5 | Set the length of each split clip in seconds (2 ~ 30s). |
|
|
578
633
|
|
|
579
634
|
## Links to language repos
|
|
580
635
|
|
|
@@ -28,19 +28,21 @@ Interaction with the Dataverse site starts with an instance of the `DataverseCli
|
|
|
28
28
|
```Python
|
|
29
29
|
from dataverse_sdk import *
|
|
30
30
|
from dataverse_sdk.connections import get_connection
|
|
31
|
+
from dataverse_sdk.constants import DataverseHost
|
|
32
|
+
|
|
31
33
|
client = DataverseClient(
|
|
32
|
-
host=DataverseHost.PRODUCTION, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
|
|
34
|
+
host=DataverseHost.PRODUCTION.value, email="XXX", password="***", service_id="xxxx-xxxx-xx-xxx", alias="default", force = False
|
|
33
35
|
)
|
|
34
36
|
assert client is get_connection("default")
|
|
35
37
|
|
|
36
38
|
# Should provide different alias if you are trying to connect to different workspaces
|
|
37
39
|
client2 = DataverseClient(
|
|
38
|
-
host=DataverseHost.PRODUCTION, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
|
|
40
|
+
host=DataverseHost.PRODUCTION.value, email="account-2", password="***", service_id="xxxx-xxxx-xx-xxx", alias="client2", force = False
|
|
39
41
|
)
|
|
40
42
|
assert client2 is get_connection(client2.alias)
|
|
41
43
|
|
|
42
44
|
client3 = DataverseClient(
|
|
43
|
-
host=DataverseHost.PRODUCTION, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
|
|
45
|
+
host=DataverseHost.PRODUCTION.value, email="XXX", password="", service_id="xxxx-xxxx-xx-xxx", access_token="xxx"
|
|
44
46
|
)
|
|
45
47
|
assert client3 is get_connection(client3.alias)
|
|
46
48
|
```
|
|
@@ -318,7 +320,6 @@ dataset_data = {
|
|
|
318
320
|
"sequential": False,
|
|
319
321
|
"render_pcd": False,
|
|
320
322
|
"generate_metadata": False,
|
|
321
|
-
"auto_tagging": ["timeofday"],
|
|
322
323
|
"sas_token": "azure sas token", # only for azure storage
|
|
323
324
|
"access_key_id" : "aws s3 access key id",# only for private s3 bucket, don't need to assign it in case of public s3 bucket or azure data source
|
|
324
325
|
"secret_access_key": "aws s3 secret access key"# only for private s3 bucket, don't need to assign it in case of public s3 bucket or azure data source
|
|
@@ -343,7 +344,6 @@ dataset = project.create_dataset(**dataset_data)
|
|
|
343
344
|
| sequential | bool | False | data is sequential or not |
|
|
344
345
|
| render_pcd | bool | False | render pcd preview image or not |
|
|
345
346
|
| generate_metadata | bool | False | generate image meta data or not |
|
|
346
|
-
| auto_tagging | list | None | generate auto_tagging with target models `["weather", "scene", "timeofday"]` |
|
|
347
347
|
| description | str | None | your dataset description |
|
|
348
348
|
| sas_token | str | None | SAStoken for azure container |
|
|
349
349
|
| access_key_id | str | None | access key id for AWS private s3 bucket |
|
|
@@ -370,7 +370,6 @@ dataset_data2 = {
|
|
|
370
370
|
"annotations": ["groundtruth"], # remove it when type is DatasetType.RAW_DATA
|
|
371
371
|
"sequential": False,
|
|
372
372
|
"generate_metadata": False,
|
|
373
|
-
"auto_tagging": []
|
|
374
373
|
"sas_token": ""
|
|
375
374
|
}
|
|
376
375
|
dataset2 = project.create_dataset(**dataset_data2)
|
|
@@ -425,15 +424,57 @@ client.download_export_dataslice_data(dataslice_id=504, export_record_id=export_
|
|
|
425
424
|
|
|
426
425
|
|
|
427
426
|
### List Models
|
|
428
|
-
|
|
427
|
+
|
|
428
|
+
The `list_models` method will list all the models in the given project. You can filter models by type using the `type` parameter.
|
|
429
|
+
|
|
430
|
+
#### Basic Usage
|
|
429
431
|
|
|
430
432
|
```Python
|
|
431
|
-
#1
|
|
432
|
-
models = client.list_models(project_id
|
|
433
|
-
|
|
433
|
+
# Method 1: Using client
|
|
434
|
+
models = client.list_models(project_id=1, client_alias=client.alias)
|
|
435
|
+
|
|
436
|
+
# Method 2: Using project object
|
|
434
437
|
project = client.get_project(project_id=1)
|
|
435
438
|
models = project.list_models()
|
|
436
439
|
```
|
|
440
|
+
|
|
441
|
+
#### Filtering by Model Type
|
|
442
|
+
|
|
443
|
+
You can filter models by type using strings or lists of strings. The SDK supports multiple model types:
|
|
444
|
+
|
|
445
|
+
```Python
|
|
446
|
+
# Filter by single type using string
|
|
447
|
+
models = client.list_models(project_id=1, type="trained", client_alias=client.alias)
|
|
448
|
+
|
|
449
|
+
# Filter by single type using list
|
|
450
|
+
models = client.list_models(project_id=1, type=["trained"], client_alias=client.alias)
|
|
451
|
+
|
|
452
|
+
# Filter by multiple types using list
|
|
453
|
+
models = client.list_models(
|
|
454
|
+
project_id=1,
|
|
455
|
+
type=["trained", "byom", "uploaded"],
|
|
456
|
+
client_alias=client.alias
|
|
457
|
+
)
|
|
458
|
+
```
|
|
459
|
+
|
|
460
|
+
#### Available Model Types
|
|
461
|
+
|
|
462
|
+
| String Value | Description |
|
|
463
|
+
| ------------ | -------------------- |
|
|
464
|
+
| `"trained"` | Trained models |
|
|
465
|
+
| `"byom"` | Bring Your Own Model |
|
|
466
|
+
| `"uploaded"` | Uploaded models |
|
|
467
|
+
|
|
468
|
+
#### Input Arguments
|
|
469
|
+
|
|
470
|
+
| Argument name | Type/Options | Default | Description |
|
|
471
|
+
| ------------- | ----------------------------------------------------------------- | ------------------- | ------------------------ |
|
|
472
|
+
| project_id | int | \*-- | The project ID |
|
|
473
|
+
| client_alias | str | None | The client alias |
|
|
474
|
+
| type | "trained", "byom", "uploaded", list["trained", "byom", "uploaded] | ["trained", "byom"] | Model types to filter by |
|
|
475
|
+
|
|
476
|
+
`*--`: required argument without default
|
|
477
|
+
|
|
437
478
|
<br>
|
|
438
479
|
|
|
439
480
|
### Get Model
|
|
@@ -547,7 +588,21 @@ python tools/export_dataslice.py -host https://staging.visionai.linkervision.ai/
|
|
|
547
588
|
### Export Large Dataslice and download files
|
|
548
589
|
```
|
|
549
590
|
python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export-model-name / groundtruth} --target_folder {folder path} --export-format {coco, visionai, yolo, vlm ...etc}
|
|
550
|
-
|
|
591
|
+
```
|
|
592
|
+
|
|
593
|
+
### Upload videos to create session tasks
|
|
594
|
+
```
|
|
595
|
+
python tools/upload_videos_create_session.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -f {/YOUR/VIDEOS/LOCAL/FOLDER} -n {session-name}
|
|
596
|
+
```
|
|
597
|
+
|
|
598
|
+
- Advanced arguments for video curation (sequential data):
|
|
599
|
+
|
|
600
|
+
| Argument name | Type/Options | Default | Description |
|
|
601
|
+
|----------------------------|----------------|-----------|-----------------------------------------------------------------------------|
|
|
602
|
+
| --video-curation | bool | False | enable video curation (sequential data) |
|
|
603
|
+
| --global-mean-threshold | float | 0.001 | Threshold for the video's global average motion magnitude (0.000001 ~ 0.01). Higher values are stricter (flag more clips as low-motion); lower values are looser (flag fewer clips). |
|
|
604
|
+
| --per-patch-256-min-threshold | float | 0.000001 | Minimum average motion magnitude allowed in any 256x256 pixel patch (0.000001 ~ 0.0001). Higher values are stricter per-patch (flag more clips when any 256x256 patch is too still); lower values are looser (flag fewer clips). |
|
|
605
|
+
| --split-duration | int | 5 | Set the length of each split clip in seconds (2 ~ 30s). |
|
|
551
606
|
|
|
552
607
|
## Links to language repos
|
|
553
608
|
|
|
@@ -292,9 +292,14 @@ class BackendAPI:
|
|
|
292
292
|
)
|
|
293
293
|
return resp.json()
|
|
294
294
|
|
|
295
|
-
def list_ml_models(
|
|
295
|
+
def list_ml_models(
|
|
296
|
+
self,
|
|
297
|
+
project_id: int,
|
|
298
|
+
type: str = "trained,byom",
|
|
299
|
+
**kwargs,
|
|
300
|
+
) -> list:
|
|
296
301
|
kwargs["project"] = project_id
|
|
297
|
-
kwargs["
|
|
302
|
+
kwargs["type__in"] = type
|
|
298
303
|
resp = self.send_request(
|
|
299
304
|
url=f"{self.host}/api/ml_models/?{urlencode(kwargs)}",
|
|
300
305
|
method="get",
|
|
@@ -374,7 +379,6 @@ class BackendAPI:
|
|
|
374
379
|
data_folder: str,
|
|
375
380
|
sequential: bool = False,
|
|
376
381
|
generate_metadata: bool = False,
|
|
377
|
-
auto_tagging: Optional[list] = None,
|
|
378
382
|
render_pcd: bool = False,
|
|
379
383
|
container_name: Optional[str] = None,
|
|
380
384
|
sas_token: Optional[str] = None,
|
|
@@ -384,8 +388,6 @@ class BackendAPI:
|
|
|
384
388
|
secret_access_key: Optional[str] = None,
|
|
385
389
|
create_dataset_uuid: Optional[str] = None,
|
|
386
390
|
) -> dict:
|
|
387
|
-
if auto_tagging is None:
|
|
388
|
-
auto_tagging = []
|
|
389
391
|
if annotations is None:
|
|
390
392
|
annotations = []
|
|
391
393
|
payload_data = {
|
|
@@ -400,10 +402,10 @@ class BackendAPI:
|
|
|
400
402
|
"sequential": sequential,
|
|
401
403
|
"annotation_format": annotation_format,
|
|
402
404
|
"generate_metadata": generate_metadata,
|
|
403
|
-
"auto_tagging": auto_tagging,
|
|
404
405
|
"render_pcd": render_pcd,
|
|
405
406
|
"description": description if description else "",
|
|
406
407
|
"annotations": annotations if annotations else [],
|
|
408
|
+
"auto_tagging": [], # FIXME: auto_tagging field is still required by production API.
|
|
407
409
|
}
|
|
408
410
|
|
|
409
411
|
aws_access_key = {secret_access_key, access_key_id}
|
|
@@ -619,6 +621,37 @@ class AsyncBackendAPI:
|
|
|
619
621
|
json=payload,
|
|
620
622
|
)
|
|
621
623
|
|
|
624
|
+
async def generate_session_task_presigned_urls(self, filenames: list[str]) -> dict:
|
|
625
|
+
return await self.async_send_request(
|
|
626
|
+
url=f"{self.host}/api/session_tasks/presigned-urls/",
|
|
627
|
+
method="post",
|
|
628
|
+
headers=self.headers,
|
|
629
|
+
data={"filenames": filenames},
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
async def create_session_task(
|
|
633
|
+
self,
|
|
634
|
+
name: str,
|
|
635
|
+
data_folder: str,
|
|
636
|
+
video_curation: bool = False,
|
|
637
|
+
curation_config: Optional[dict] = None,
|
|
638
|
+
) -> dict:
|
|
639
|
+
payload_data = {
|
|
640
|
+
"name": name,
|
|
641
|
+
"data_folder": data_folder,
|
|
642
|
+
"video_curation": video_curation,
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
if video_curation and curation_config:
|
|
646
|
+
payload_data["curation_config"] = curation_config
|
|
647
|
+
|
|
648
|
+
return await self.async_send_request(
|
|
649
|
+
url=f"{self.host}/api/session_tasks/",
|
|
650
|
+
method="post",
|
|
651
|
+
headers=self.headers,
|
|
652
|
+
data=payload_data,
|
|
653
|
+
)
|
|
654
|
+
|
|
622
655
|
async def get_project(self, project_id: str) -> dict:
|
|
623
656
|
try:
|
|
624
657
|
resp = await self.client.get(
|
|
@@ -6,7 +6,7 @@ import platform
|
|
|
6
6
|
from asyncio import AbstractEventLoop, Semaphore
|
|
7
7
|
from collections import deque
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Optional, Union
|
|
9
|
+
from typing import Literal, Optional, Union
|
|
10
10
|
from uuid import uuid4
|
|
11
11
|
|
|
12
12
|
from aiofiles import open as aio_open
|
|
@@ -820,9 +820,9 @@ class DataverseClient:
|
|
|
820
820
|
"option": {},
|
|
821
821
|
}
|
|
822
822
|
for ontology_class in project.ontology.classes:
|
|
823
|
-
project_ontology_ids["ontology_class"][
|
|
824
|
-
ontology_class.
|
|
825
|
-
|
|
823
|
+
project_ontology_ids["ontology_class"][ontology_class.id] = (
|
|
824
|
+
ontology_class.aliases
|
|
825
|
+
)
|
|
826
826
|
for attr in ontology_class.attributes:
|
|
827
827
|
project_ontology_ids["attribute"][attr.id] = attr.aliases
|
|
828
828
|
for option in attr.options:
|
|
@@ -1178,6 +1178,12 @@ of this project OR has been added before"
|
|
|
1178
1178
|
client: Optional["DataverseClient"] = None,
|
|
1179
1179
|
client_alias: Optional[str] = None,
|
|
1180
1180
|
project: Optional["Project"] = None,
|
|
1181
|
+
type: Optional[
|
|
1182
|
+
Union[
|
|
1183
|
+
Literal["trained", "byom", "uploaded"],
|
|
1184
|
+
list[Literal["trained", "byom", "uploaded"]],
|
|
1185
|
+
]
|
|
1186
|
+
] = ["trained", "byom"],
|
|
1181
1187
|
) -> list[MLModel]:
|
|
1182
1188
|
"""Get the model list by project id
|
|
1183
1189
|
|
|
@@ -1189,10 +1195,11 @@ of this project OR has been added before"
|
|
|
1189
1195
|
client_alias: Optional[str], by default None (should be provided if client is None)
|
|
1190
1196
|
project: Optional["Project"]
|
|
1191
1197
|
project basemodel, by default None
|
|
1198
|
+
type : Optional[Union[Literal["trained", "byom", "uploaded"], list[Literal["trained", "byom", "uploaded"]]]], by default ["trained", "byom"]
|
|
1192
1199
|
|
|
1193
1200
|
Returns
|
|
1194
1201
|
-------
|
|
1195
|
-
list
|
|
1202
|
+
list[MLModel]
|
|
1196
1203
|
list of model items
|
|
1197
1204
|
|
|
1198
1205
|
Raises
|
|
@@ -1204,7 +1211,9 @@ of this project OR has been added before"
|
|
|
1204
1211
|
client=client, client_alias=client_alias
|
|
1205
1212
|
)
|
|
1206
1213
|
try:
|
|
1207
|
-
|
|
1214
|
+
if isinstance(type, list):
|
|
1215
|
+
type = ",".join(type)
|
|
1216
|
+
model_list: list = api.list_ml_models(project_id=project_id, type=type)
|
|
1208
1217
|
except DataverseExceptionBase:
|
|
1209
1218
|
logging.exception("Got api error from Dataverse")
|
|
1210
1219
|
raise
|
|
@@ -1484,7 +1493,6 @@ of this project OR has been added before"
|
|
|
1484
1493
|
annotations: Optional[list] = None,
|
|
1485
1494
|
sequential: bool = False,
|
|
1486
1495
|
generate_metadata: bool = False,
|
|
1487
|
-
auto_tagging: Optional[list] = None,
|
|
1488
1496
|
render_pcd: bool = False,
|
|
1489
1497
|
description: Optional[str] = None,
|
|
1490
1498
|
client: Optional["DataverseClient"] = None,
|
|
@@ -1522,8 +1530,6 @@ of this project OR has been added before"
|
|
|
1522
1530
|
sequential or not., by default False
|
|
1523
1531
|
generate_metadata : bool, optional
|
|
1524
1532
|
generate meta data or not, by default False
|
|
1525
|
-
auto_tagging: list
|
|
1526
|
-
generate auto_tagging with target models (weather/scene/timeofday)
|
|
1527
1533
|
description : Optional[str], optional
|
|
1528
1534
|
description of the dataset, by default None
|
|
1529
1535
|
render_pcd : bool, optional
|
|
@@ -1550,17 +1556,15 @@ of this project OR has been added before"
|
|
|
1550
1556
|
"""
|
|
1551
1557
|
if annotations is None:
|
|
1552
1558
|
annotations = []
|
|
1553
|
-
if auto_tagging is None:
|
|
1554
|
-
auto_tagging = []
|
|
1555
1559
|
|
|
1556
1560
|
if type == DatasetType.ANNOTATED_DATA and len(annotations) == 0:
|
|
1557
1561
|
raise ValueError(
|
|
1558
1562
|
"Annotated data should provide at least one annotation folder name (groundtruth or model_name)"
|
|
1559
1563
|
)
|
|
1560
|
-
api,
|
|
1564
|
+
api, client_alias = DataverseClient._get_api_client(
|
|
1561
1565
|
client=client, client_alias=client_alias, is_async=False
|
|
1562
1566
|
)
|
|
1563
|
-
async_api,
|
|
1567
|
+
async_api, client_alias = DataverseClient._get_api_client(
|
|
1564
1568
|
client=client, client_alias=client_alias, is_async=True
|
|
1565
1569
|
)
|
|
1566
1570
|
|
|
@@ -1586,7 +1590,6 @@ of this project OR has been added before"
|
|
|
1586
1590
|
sas_token=sas_token,
|
|
1587
1591
|
sequential=sequential,
|
|
1588
1592
|
generate_metadata=generate_metadata,
|
|
1589
|
-
auto_tagging=auto_tagging,
|
|
1590
1593
|
render_pcd=render_pcd,
|
|
1591
1594
|
description=description,
|
|
1592
1595
|
access_key_id=access_key_id,
|
|
@@ -1610,7 +1613,6 @@ of this project OR has been added before"
|
|
|
1610
1613
|
"project": project,
|
|
1611
1614
|
"sequential": sequential,
|
|
1612
1615
|
"generate_metadata": generate_metadata,
|
|
1613
|
-
"auto_tagging": auto_tagging,
|
|
1614
1616
|
"annotations": annotations,
|
|
1615
1617
|
}
|
|
1616
1618
|
)
|
|
@@ -1975,6 +1977,79 @@ of this project OR has been added before"
|
|
|
1975
1977
|
detail=f"the format {annotation_format} is not supported for local upload"
|
|
1976
1978
|
)
|
|
1977
1979
|
|
|
1980
|
+
async def upload_videos_create_session(
|
|
1981
|
+
self,
|
|
1982
|
+
name: str,
|
|
1983
|
+
video_folder: str,
|
|
1984
|
+
video_curation: bool = False,
|
|
1985
|
+
curation_config: Optional[dict] = None,
|
|
1986
|
+
) -> dict:
|
|
1987
|
+
video_path = Path(video_folder)
|
|
1988
|
+
if not video_path.exists() or not video_path.is_dir():
|
|
1989
|
+
raise ValueError(f"Video folder does not exist: {video_folder}")
|
|
1990
|
+
|
|
1991
|
+
video_extensions = {".mp4", ".avi", ".mov", ".mpeg", ".flv"}
|
|
1992
|
+
video_paths = [
|
|
1993
|
+
path
|
|
1994
|
+
for path in video_path.iterdir()
|
|
1995
|
+
if path.is_file() and path.suffix.lower() in video_extensions
|
|
1996
|
+
]
|
|
1997
|
+
if not video_paths:
|
|
1998
|
+
raise ValueError(f"No video files found in {video_folder}")
|
|
1999
|
+
|
|
2000
|
+
filenames = [video.name for video in video_paths]
|
|
2001
|
+
logging.info(f"Found {len(filenames)} videos to upload")
|
|
2002
|
+
|
|
2003
|
+
try:
|
|
2004
|
+
# Step 1: Get presigned URLs
|
|
2005
|
+
logging.info("Getting presigned URLs...")
|
|
2006
|
+
presigned_data = (
|
|
2007
|
+
await self._async_api_client.generate_session_task_presigned_urls(
|
|
2008
|
+
filenames=filenames
|
|
2009
|
+
)
|
|
2010
|
+
)
|
|
2011
|
+
data_folder = presigned_data["data_folder"]
|
|
2012
|
+
url_info = presigned_data["url_info"]
|
|
2013
|
+
|
|
2014
|
+
# Step 2: Upload videos concurrently with progress bar
|
|
2015
|
+
logging.info("Uploading videos...")
|
|
2016
|
+
upload_task_queue = deque([(video_paths, url_info)])
|
|
2017
|
+
failed_file_info_batches = await DataverseClient.run_upload_tasks(
|
|
2018
|
+
upload_task_queue
|
|
2019
|
+
)
|
|
2020
|
+
if failed_file_info_batches:
|
|
2021
|
+
raise ClientConnectionError(
|
|
2022
|
+
f"Failed uploads: {failed_file_info_batches}"
|
|
2023
|
+
)
|
|
2024
|
+
|
|
2025
|
+
# Step 3: Create session task
|
|
2026
|
+
logging.info("Creating session task...")
|
|
2027
|
+
session_task_data = await self._async_api_client.create_session_task(
|
|
2028
|
+
name=name,
|
|
2029
|
+
data_folder=data_folder,
|
|
2030
|
+
video_curation=video_curation,
|
|
2031
|
+
curation_config=curation_config,
|
|
2032
|
+
)
|
|
2033
|
+
logging.info(f"✅ Session task '{name}' created successfully!")
|
|
2034
|
+
|
|
2035
|
+
return session_task_data
|
|
2036
|
+
|
|
2037
|
+
except DataverseExceptionBase:
|
|
2038
|
+
logging.exception("Got api error from Dataverse")
|
|
2039
|
+
raise
|
|
2040
|
+
except Exception as e:
|
|
2041
|
+
try:
|
|
2042
|
+
error_data = json.loads(
|
|
2043
|
+
getattr(getattr(e, "response", None), "text", str(e))
|
|
2044
|
+
)
|
|
2045
|
+
error_message = next(iter(error_data.get("error", {}).values()))[0]
|
|
2046
|
+
except Exception:
|
|
2047
|
+
error_message = str(e)
|
|
2048
|
+
|
|
2049
|
+
raise ClientConnectionError(
|
|
2050
|
+
f"Failed to create session task: {error_message}"
|
|
2051
|
+
)
|
|
2052
|
+
|
|
1978
2053
|
|
|
1979
2054
|
class AsyncThirdPartyAPI:
|
|
1980
2055
|
transport = AsyncHTTPTransport(
|
|
@@ -22,7 +22,7 @@ from .constant import (
|
|
|
22
22
|
ExportFormat,
|
|
23
23
|
)
|
|
24
24
|
from .exporter import Exporter
|
|
25
|
-
from .utils import convert_to_bytes
|
|
25
|
+
from .utils import convert_to_bytes, gen_empty_vai
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
@Exporter.register(format=ExportFormat.COCO)
|
|
@@ -156,6 +156,9 @@ def convert_annotation(
|
|
|
156
156
|
datarow["items"].get("predictions", {}).get(annotation_name, {})
|
|
157
157
|
)
|
|
158
158
|
|
|
159
|
+
if not target_visionai:
|
|
160
|
+
target_visionai = gen_empty_vai(datarow=datarow, sequence_folder_url="")
|
|
161
|
+
|
|
159
162
|
(
|
|
160
163
|
category_idx_map,
|
|
161
164
|
image_update,
|
|
@@ -77,6 +77,7 @@ class Exporter:
|
|
|
77
77
|
sequence_frame_map: dict,
|
|
78
78
|
question_id_map: dict,
|
|
79
79
|
annotation_name: str,
|
|
80
|
+
is_sequential: bool,
|
|
80
81
|
) -> AsyncGenerator[tuple[bytes, str]]:
|
|
81
82
|
async for data, path in self.export_annot.producer(
|
|
82
83
|
class_names=class_names,
|
|
@@ -85,6 +86,7 @@ class Exporter:
|
|
|
85
86
|
target_folder=self.target_folder,
|
|
86
87
|
datarow_generator_func=await self._gen(self.curation_api),
|
|
87
88
|
annotation_name=annotation_name,
|
|
89
|
+
is_sequential=is_sequential,
|
|
88
90
|
):
|
|
89
91
|
if not path:
|
|
90
92
|
continue
|
|
@@ -141,7 +143,7 @@ class Exporter:
|
|
|
141
143
|
gen: AsyncGenerator = curation_api.get_datarows(
|
|
142
144
|
id_set_list=id_chunks,
|
|
143
145
|
batch_size=BATCH_SIZE,
|
|
144
|
-
fields="id,items,vlm_items,url,frame_id,image_width,image_height,sensor_name,original_url",
|
|
146
|
+
fields="id,items,vlm_items,url,frame_id,image_width,image_height,sensor_name,original_url,type",
|
|
145
147
|
)
|
|
146
148
|
async for batched_datarow in gen:
|
|
147
149
|
for datarow in batched_datarow:
|
|
@@ -238,9 +240,9 @@ def get_datarow_sequences(
|
|
|
238
240
|
for frame_datarow_id, datarow_id_list in sequence_frame_map[
|
|
239
241
|
sequence_datarow_id
|
|
240
242
|
].items():
|
|
241
|
-
new_datarows_sequence_map[sequence_order][
|
|
242
|
-
|
|
243
|
-
|
|
243
|
+
new_datarows_sequence_map[sequence_order][frame_datarow_id] = (
|
|
244
|
+
datarow_id_list
|
|
245
|
+
)
|
|
244
246
|
if not is_sequential or sequence_datarow_id == NONE_SEQUENCE_DATAROW_ID:
|
|
245
247
|
sequence_order += 1
|
|
246
248
|
if is_sequential:
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Union
|
|
3
|
+
|
|
4
|
+
from visionai_data_format.schemas.visionai_schema import (
|
|
5
|
+
Frame,
|
|
6
|
+
FrameProperties,
|
|
7
|
+
FramePropertyStream,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from ..apis.third_party import ThirdPartyAPI
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def convert_to_bytes(obj: Union[dict, list, str]) -> bytes:
|
|
14
|
+
if isinstance(obj, (dict, list)):
|
|
15
|
+
jstr = json.dumps(obj)
|
|
16
|
+
elif isinstance(obj, str):
|
|
17
|
+
jstr = obj
|
|
18
|
+
else:
|
|
19
|
+
raise TypeError("un-support type")
|
|
20
|
+
return bytes(jstr, encoding="utf8")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def download_url_file_async(data_url: str) -> bytes | None:
|
|
24
|
+
# get data from url link
|
|
25
|
+
try:
|
|
26
|
+
data: bytes = await ThirdPartyAPI.async_download_file(
|
|
27
|
+
url=data_url, method="GET"
|
|
28
|
+
)
|
|
29
|
+
except Exception:
|
|
30
|
+
print(f"Retrieving data from url {data_url} error")
|
|
31
|
+
return None
|
|
32
|
+
return data
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def gen_empty_vai(datarow: dict, sequence_folder_url: str) -> dict:
|
|
36
|
+
new_sensor_data_folder = f"{sequence_folder_url}/data/{datarow['sensor_name']}/"
|
|
37
|
+
dest_url = f"{new_sensor_data_folder}{datarow['url'].split('/')[-1]}"
|
|
38
|
+
|
|
39
|
+
# generate visionai empty frame
|
|
40
|
+
frames = {}
|
|
41
|
+
frame_num = datarow["frame_id"]
|
|
42
|
+
frames[frame_num] = Frame(
|
|
43
|
+
frame_properties=FrameProperties(
|
|
44
|
+
streams={datarow["sensor_name"]: FramePropertyStream(uri=dest_url)}
|
|
45
|
+
),
|
|
46
|
+
objects={},
|
|
47
|
+
).model_dump(exclude_none=True)
|
|
48
|
+
if datarow["type"] == "image":
|
|
49
|
+
stream = {datarow["sensor_name"]: {"type": "camera", "uri": dest_url}}
|
|
50
|
+
else:
|
|
51
|
+
stream = {datarow["sensor_name"]: {"type": "lidar", "uri": dest_url}}
|
|
52
|
+
|
|
53
|
+
return {"frames": frames, "streams": stream}
|