dataverse-sdk 2.2.1__tar.gz → 2.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/PKG-INFO +17 -5
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/README.md +16 -4
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/apis/backend.py +63 -29
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/client.py +18 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/exporter.py +1 -3
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/schemas/client.py +12 -3
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/utils/utils.py +6 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk.egg-info/PKG-INFO +17 -5
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/setup.py +1 -1
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/__init__.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/apis/__init__.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/apis/third_party.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/connections.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/constants.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/exceptions/__init__.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/exceptions/client.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/__init__.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/base.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/coco.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/constant.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/utils.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/visionai.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/vqa.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/yolo.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/schemas/__init__.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/schemas/api.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/schemas/common.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/schemas/format.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/utils/__init__.py +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk.egg-info/SOURCES.txt +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk.egg-info/dependency_links.txt +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk.egg-info/requires.txt +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk.egg-info/top_level.txt +0 -0
- {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataverse-sdk
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.2
|
|
4
4
|
Summary: Dataverse SDK For Python
|
|
5
5
|
Home-page:
|
|
6
6
|
Author: LinkerVision
|
|
@@ -100,7 +100,7 @@ The following sections provide examples for the most common DataVerse tasks incl
|
|
|
100
100
|
* [Edit Project](#edit-project)
|
|
101
101
|
* [Update Alias](#update-ontology-alias)
|
|
102
102
|
* [Create Dataset](#create-dataset)
|
|
103
|
-
* [
|
|
103
|
+
* [List Dataset](#list-and-get-dataset)
|
|
104
104
|
* [List Dataslices](#list-and-get-dataslices)
|
|
105
105
|
* [Export Dataslice](#export-dataslice-and-download)
|
|
106
106
|
* [List Models](#list-models)
|
|
@@ -410,7 +410,18 @@ python tools/import_dataset_from_local.py -host https://staging.visionai.linkerv
|
|
|
410
410
|
```
|
|
411
411
|
<br>
|
|
412
412
|
|
|
413
|
-
### Get Dataset
|
|
413
|
+
### List and Get Dataset
|
|
414
|
+
|
|
415
|
+
The `list_datasets` method would return the list of dataset under the given project
|
|
416
|
+
```Python
|
|
417
|
+
project = client.get_project(project_id=1)
|
|
418
|
+
datasets:list = project.list_datasets()
|
|
419
|
+
```
|
|
420
|
+
OR
|
|
421
|
+
```Python
|
|
422
|
+
datasets:list = client.list_datasets(project_id=1, client_alias=client.alias )
|
|
423
|
+
```
|
|
424
|
+
|
|
414
425
|
|
|
415
426
|
The `get_dataset` method retrieves the dataset info from the connected site. The `dataset_id` parameter is the unique integer ID of the dataset, not its "name" property.
|
|
416
427
|
|
|
@@ -561,8 +572,9 @@ python tools/export_dataslice.py -host https://staging.visionai.linkervision.ai/
|
|
|
561
572
|
```
|
|
562
573
|
|
|
563
574
|
### Export Large Dataslice and download files
|
|
564
|
-
|
|
565
|
-
python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export
|
|
575
|
+
```
|
|
576
|
+
python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export-model-name / groundtruth} --target_folder {folder path} --export-format {coco, visionai, yolo, vlm ...etc}
|
|
577
|
+
``````
|
|
566
578
|
|
|
567
579
|
## Links to language repos
|
|
568
580
|
|
|
@@ -73,7 +73,7 @@ The following sections provide examples for the most common DataVerse tasks incl
|
|
|
73
73
|
* [Edit Project](#edit-project)
|
|
74
74
|
* [Update Alias](#update-ontology-alias)
|
|
75
75
|
* [Create Dataset](#create-dataset)
|
|
76
|
-
* [
|
|
76
|
+
* [List Dataset](#list-and-get-dataset)
|
|
77
77
|
* [List Dataslices](#list-and-get-dataslices)
|
|
78
78
|
* [Export Dataslice](#export-dataslice-and-download)
|
|
79
79
|
* [List Models](#list-models)
|
|
@@ -383,7 +383,18 @@ python tools/import_dataset_from_local.py -host https://staging.visionai.linkerv
|
|
|
383
383
|
```
|
|
384
384
|
<br>
|
|
385
385
|
|
|
386
|
-
### Get Dataset
|
|
386
|
+
### List and Get Dataset
|
|
387
|
+
|
|
388
|
+
The `list_datasets` method would return the list of dataset under the given project
|
|
389
|
+
```Python
|
|
390
|
+
project = client.get_project(project_id=1)
|
|
391
|
+
datasets:list = project.list_datasets()
|
|
392
|
+
```
|
|
393
|
+
OR
|
|
394
|
+
```Python
|
|
395
|
+
datasets:list = client.list_datasets(project_id=1, client_alias=client.alias )
|
|
396
|
+
```
|
|
397
|
+
|
|
387
398
|
|
|
388
399
|
The `get_dataset` method retrieves the dataset info from the connected site. The `dataset_id` parameter is the unique integer ID of the dataset, not its "name" property.
|
|
389
400
|
|
|
@@ -534,8 +545,9 @@ python tools/export_dataslice.py -host https://staging.visionai.linkervision.ai/
|
|
|
534
545
|
```
|
|
535
546
|
|
|
536
547
|
### Export Large Dataslice and download files
|
|
537
|
-
|
|
538
|
-
python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export
|
|
548
|
+
```
|
|
549
|
+
python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export-model-name / groundtruth} --target_folder {folder path} --export-format {coco, visionai, yolo, vlm ...etc}
|
|
550
|
+
``````
|
|
539
551
|
|
|
540
552
|
## Links to language repos
|
|
541
553
|
|
|
@@ -12,6 +12,7 @@ from requests import sessions
|
|
|
12
12
|
from requests.adapters import HTTPAdapter, Retry
|
|
13
13
|
|
|
14
14
|
from ..exceptions.client import DataverseExceptionBase
|
|
15
|
+
from ..utils.utils import chunks
|
|
15
16
|
|
|
16
17
|
logger = logging.getLogger(__name__)
|
|
17
18
|
|
|
@@ -232,6 +233,15 @@ class BackendAPI:
|
|
|
232
233
|
)
|
|
233
234
|
return resp.json()["results"]
|
|
234
235
|
|
|
236
|
+
def list_datasets(self, project_id: int, **kwargs) -> list:
|
|
237
|
+
kwargs["project"] = project_id
|
|
238
|
+
resp = self.send_request(
|
|
239
|
+
url=f"{self.host}/api/datasets/?{urlencode(kwargs)}",
|
|
240
|
+
method="get",
|
|
241
|
+
headers=self.headers,
|
|
242
|
+
)
|
|
243
|
+
return resp.json()["results"]
|
|
244
|
+
|
|
235
245
|
def list_dataslices(self, project_id: int, **kwargs) -> list:
|
|
236
246
|
kwargs["project"] = project_id
|
|
237
247
|
resp = self.send_request(
|
|
@@ -613,39 +623,63 @@ class AsyncBackendAPI:
|
|
|
613
623
|
return None
|
|
614
624
|
|
|
615
625
|
async def get_datarows(
|
|
616
|
-
self,
|
|
617
|
-
|
|
626
|
+
self,
|
|
627
|
+
batch_size: int = 20,
|
|
628
|
+
order_by: str = "id",
|
|
629
|
+
id_set_list: Optional[list] = None,
|
|
630
|
+
**kwargs,
|
|
631
|
+
) -> AsyncGenerator[list[dict]]:
|
|
618
632
|
if "offset" in kwargs or "limit" in kwargs:
|
|
619
633
|
raise ValueError("Specifying offset or limit directly is not allowed.")
|
|
620
|
-
|
|
621
634
|
kwargs["order_by"] = order_by
|
|
622
|
-
dataslice_set = kwargs.pop("dataslice_set", [])
|
|
623
|
-
query_params = {
|
|
624
|
-
**kwargs,
|
|
625
|
-
"order_by": order_by,
|
|
626
|
-
"dataslice_set": dataslice_set,
|
|
627
|
-
"limit": batch_size,
|
|
628
|
-
}
|
|
629
|
-
query_string = urlencode(query_params, doseq=True)
|
|
630
|
-
|
|
631
635
|
id_gt = 0
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
636
|
+
if id_set_list:
|
|
637
|
+
for id_chunks in chunks(id_set_list, batch_size):
|
|
638
|
+
while True:
|
|
639
|
+
kwargs.update(
|
|
640
|
+
{
|
|
641
|
+
"id_set": ",".join([str(id_) for id_ in id_chunks]),
|
|
642
|
+
"limit": batch_size,
|
|
643
|
+
"id__gt": id_gt,
|
|
644
|
+
}
|
|
645
|
+
)
|
|
646
|
+
url = f"{self.host}/api/datarows/?{urlencode(kwargs)}"
|
|
647
|
+
resp: dict = await self.async_send_request(
|
|
648
|
+
url=url,
|
|
649
|
+
method="get",
|
|
650
|
+
headers=self.headers,
|
|
651
|
+
)
|
|
652
|
+
json_data = resp
|
|
653
|
+
datarows = json_data["results"]
|
|
654
|
+
if not datarows:
|
|
655
|
+
break
|
|
656
|
+
# Get last datarow id
|
|
657
|
+
id_gt = datarows[-1]["id"]
|
|
658
|
+
yield datarows
|
|
659
|
+
else:
|
|
660
|
+
dataslice_set = kwargs.pop("dataslice_set", [])
|
|
661
|
+
query_params = {
|
|
662
|
+
**kwargs,
|
|
663
|
+
"dataslice_set": dataslice_set,
|
|
664
|
+
"limit": batch_size,
|
|
665
|
+
}
|
|
666
|
+
query_string = urlencode(query_params, doseq=True)
|
|
667
|
+
|
|
668
|
+
id_gt = 0
|
|
669
|
+
while True:
|
|
670
|
+
url = f"{self.host}/api/datarows/?{query_string}&id__gt={id_gt}"
|
|
671
|
+
resp: dict = await self.async_send_request(
|
|
672
|
+
url=url,
|
|
673
|
+
method="get",
|
|
674
|
+
headers=self.headers,
|
|
675
|
+
)
|
|
676
|
+
json_data = resp
|
|
677
|
+
if not json_data["results"]:
|
|
678
|
+
break
|
|
679
|
+
# Get last datarow id
|
|
680
|
+
datarows = json_data["results"]
|
|
681
|
+
id_gt = datarows[-1]["id"]
|
|
682
|
+
yield datarows
|
|
649
683
|
|
|
650
684
|
async def get_datarows_flat_parent(
|
|
651
685
|
self, batch_size: int = 20, order_by: str = "id", **kwargs
|
|
@@ -1149,6 +1149,24 @@ of this project OR has been added before"
|
|
|
1149
1149
|
raise ClientConnectionError(f"Failed to get the models: {e}")
|
|
1150
1150
|
return dataslice_list
|
|
1151
1151
|
|
|
1152
|
+
@staticmethod
|
|
1153
|
+
def list_datasets(
|
|
1154
|
+
project_id: int,
|
|
1155
|
+
client: Optional["DataverseClient"] = None,
|
|
1156
|
+
client_alias: Optional[str] = None,
|
|
1157
|
+
) -> list:
|
|
1158
|
+
api, client_alias = DataverseClient._get_api_client(
|
|
1159
|
+
client=client, client_alias=client_alias
|
|
1160
|
+
)
|
|
1161
|
+
try:
|
|
1162
|
+
dataset_list: list = api.list_datasets(project_id=project_id)
|
|
1163
|
+
except DataverseExceptionBase:
|
|
1164
|
+
logging.exception("Got api error from Dataverse")
|
|
1165
|
+
raise
|
|
1166
|
+
except Exception as e:
|
|
1167
|
+
raise ClientConnectionError(f"Failed to get the models: {e}")
|
|
1168
|
+
return dataset_list
|
|
1169
|
+
|
|
1152
1170
|
@staticmethod
|
|
1153
1171
|
def list_models(
|
|
1154
1172
|
project_id: int,
|
|
@@ -139,9 +139,7 @@ class Exporter:
|
|
|
139
139
|
datarow_id_set: set[int] = set()
|
|
140
140
|
for id_chunks in chunks(datarow_id_list, 1000):
|
|
141
141
|
gen: AsyncGenerator = curation_api.get_datarows(
|
|
142
|
-
|
|
143
|
-
str(_id) for _id in id_chunks
|
|
144
|
-
), # id_set="1,2,3,4,5"
|
|
142
|
+
id_set_list=id_chunks,
|
|
145
143
|
batch_size=BATCH_SIZE,
|
|
146
144
|
fields="id,items,vlm_items,url,frame_id,image_width,image_height,sensor_name,original_url",
|
|
147
145
|
)
|
|
@@ -2,6 +2,7 @@ import re
|
|
|
2
2
|
from typing import Optional, Union
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel, ConfigDict, field_validator
|
|
5
|
+
from pydantic_core.core_schema import ValidationInfo
|
|
5
6
|
|
|
6
7
|
from .common import (
|
|
7
8
|
AnnotationFormat,
|
|
@@ -111,10 +112,10 @@ class QuestionClass(BaseModel):
|
|
|
111
112
|
return value
|
|
112
113
|
|
|
113
114
|
@field_validator("answer_type")
|
|
114
|
-
def answer_type_validator(cls, value, values, **kwargs):
|
|
115
|
-
if value == AttributeType.OPTION and not values.get("answer_options"):
|
|
115
|
+
def answer_type_validator(cls, value, values: ValidationInfo, **kwargs):
|
|
116
|
+
if value == AttributeType.OPTION and not values.data.get("answer_options"):
|
|
116
117
|
raise ValueError(
|
|
117
|
-
f"* {values} Need to assign value for `answer_options` "
|
|
118
|
+
f"* {values.data} Need to assign value for `answer_options` "
|
|
118
119
|
+ "if the Answer type is option"
|
|
119
120
|
)
|
|
120
121
|
return value
|
|
@@ -255,6 +256,14 @@ class Project(BaseModel):
|
|
|
255
256
|
)
|
|
256
257
|
return project
|
|
257
258
|
|
|
259
|
+
def list_datasets(self) -> list:
|
|
260
|
+
from ..client import DataverseClient
|
|
261
|
+
|
|
262
|
+
dataset_list: list = DataverseClient.list_datasets(
|
|
263
|
+
project_id=self.id, client_alias=self.client_alias
|
|
264
|
+
)
|
|
265
|
+
return dataset_list
|
|
266
|
+
|
|
258
267
|
def list_dataslices(self) -> list:
|
|
259
268
|
from ..client import DataverseClient
|
|
260
269
|
|
|
@@ -72,3 +72,9 @@ def download_file_from_url(url: str, save_path: str):
|
|
|
72
72
|
|
|
73
73
|
except requests.exceptions.RequestException as e:
|
|
74
74
|
print(f"An error occurred while downloading the file: {e}")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def chunks(lst: list, n: int):
|
|
78
|
+
"""Yield successive n-sized chunks from lst."""
|
|
79
|
+
for i in range(0, len(lst), n):
|
|
80
|
+
yield lst[i : i + n]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataverse-sdk
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.2
|
|
4
4
|
Summary: Dataverse SDK For Python
|
|
5
5
|
Home-page:
|
|
6
6
|
Author: LinkerVision
|
|
@@ -100,7 +100,7 @@ The following sections provide examples for the most common DataVerse tasks incl
|
|
|
100
100
|
* [Edit Project](#edit-project)
|
|
101
101
|
* [Update Alias](#update-ontology-alias)
|
|
102
102
|
* [Create Dataset](#create-dataset)
|
|
103
|
-
* [
|
|
103
|
+
* [List Dataset](#list-and-get-dataset)
|
|
104
104
|
* [List Dataslices](#list-and-get-dataslices)
|
|
105
105
|
* [Export Dataslice](#export-dataslice-and-download)
|
|
106
106
|
* [List Models](#list-models)
|
|
@@ -410,7 +410,18 @@ python tools/import_dataset_from_local.py -host https://staging.visionai.linkerv
|
|
|
410
410
|
```
|
|
411
411
|
<br>
|
|
412
412
|
|
|
413
|
-
### Get Dataset
|
|
413
|
+
### List and Get Dataset
|
|
414
|
+
|
|
415
|
+
The `list_datasets` method would return the list of dataset under the given project
|
|
416
|
+
```Python
|
|
417
|
+
project = client.get_project(project_id=1)
|
|
418
|
+
datasets:list = project.list_datasets()
|
|
419
|
+
```
|
|
420
|
+
OR
|
|
421
|
+
```Python
|
|
422
|
+
datasets:list = client.list_datasets(project_id=1, client_alias=client.alias )
|
|
423
|
+
```
|
|
424
|
+
|
|
414
425
|
|
|
415
426
|
The `get_dataset` method retrieves the dataset info from the connected site. The `dataset_id` parameter is the unique integer ID of the dataset, not its "name" property.
|
|
416
427
|
|
|
@@ -561,8 +572,9 @@ python tools/export_dataslice.py -host https://staging.visionai.linkervision.ai/
|
|
|
561
572
|
```
|
|
562
573
|
|
|
563
574
|
### Export Large Dataslice and download files
|
|
564
|
-
|
|
565
|
-
python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export
|
|
575
|
+
```
|
|
576
|
+
python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export-model-name / groundtruth} --target_folder {folder path} --export-format {coco, visionai, yolo, vlm ...etc}
|
|
577
|
+
``````
|
|
566
578
|
|
|
567
579
|
## Links to language repos
|
|
568
580
|
|
|
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
|
|
|
2
2
|
|
|
3
3
|
AUTHOR = "LinkerVision"
|
|
4
4
|
PACKAGE_NAME = "dataverse-sdk"
|
|
5
|
-
PACKAGE_VERSION = "2.2.
|
|
5
|
+
PACKAGE_VERSION = "2.2.2"
|
|
6
6
|
DESC = "Dataverse SDK For Python"
|
|
7
7
|
with open("README.md", encoding="utf-8") as fh:
|
|
8
8
|
long_description = fh.read()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|