dataverse-sdk 2.2.1__tar.gz → 2.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/PKG-INFO +17 -5
  2. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/README.md +16 -4
  3. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/apis/backend.py +63 -29
  4. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/client.py +18 -0
  5. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/exporter.py +1 -3
  6. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/schemas/client.py +12 -3
  7. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/utils/utils.py +6 -0
  8. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk.egg-info/PKG-INFO +17 -5
  9. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/setup.py +1 -1
  10. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/__init__.py +0 -0
  11. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/apis/__init__.py +0 -0
  12. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/apis/third_party.py +0 -0
  13. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/connections.py +0 -0
  14. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/constants.py +0 -0
  15. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/exceptions/__init__.py +0 -0
  16. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/exceptions/client.py +0 -0
  17. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/__init__.py +0 -0
  18. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/base.py +0 -0
  19. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/coco.py +0 -0
  20. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/constant.py +0 -0
  21. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/utils.py +0 -0
  22. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/visionai.py +0 -0
  23. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/vqa.py +0 -0
  24. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/export/yolo.py +0 -0
  25. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/schemas/__init__.py +0 -0
  26. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/schemas/api.py +0 -0
  27. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/schemas/common.py +0 -0
  28. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/schemas/format.py +0 -0
  29. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk/utils/__init__.py +0 -0
  30. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk.egg-info/SOURCES.txt +0 -0
  31. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk.egg-info/dependency_links.txt +0 -0
  32. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk.egg-info/requires.txt +0 -0
  33. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/dataverse_sdk.egg-info/top_level.txt +0 -0
  34. {dataverse_sdk-2.2.1 → dataverse_sdk-2.2.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataverse-sdk
3
- Version: 2.2.1
3
+ Version: 2.2.2
4
4
  Summary: Dataverse SDK For Python
5
5
  Home-page:
6
6
  Author: LinkerVision
@@ -100,7 +100,7 @@ The following sections provide examples for the most common DataVerse tasks incl
100
100
  * [Edit Project](#edit-project)
101
101
  * [Update Alias](#update-ontology-alias)
102
102
  * [Create Dataset](#create-dataset)
103
- * [Get Dataset](#get-dataset)
103
+ * [List Dataset](#list-and-get-dataset)
104
104
  * [List Dataslices](#list-and-get-dataslices)
105
105
  * [Export Dataslice](#export-dataslice-and-download)
106
106
  * [List Models](#list-models)
@@ -410,7 +410,18 @@ python tools/import_dataset_from_local.py -host https://staging.visionai.linkerv
410
410
  ```
411
411
  <br>
412
412
 
413
- ### Get Dataset
413
+ ### List and Get Dataset
414
+
415
+ The `list_datasets` method would return the list of dataset under the given project
416
+ ```Python
417
+ project = client.get_project(project_id=1)
418
+ datasets:list = project.list_datasets()
419
+ ```
420
+ OR
421
+ ```Python
422
+ datasets:list = client.list_datasets(project_id=1, client_alias=client.alias )
423
+ ```
424
+
414
425
 
415
426
  The `get_dataset` method retrieves the dataset info from the connected site. The `dataset_id` parameter is the unique integer ID of the dataset, not its "name" property.
416
427
 
@@ -561,8 +572,9 @@ python tools/export_dataslice.py -host https://staging.visionai.linkervision.ai/
561
572
  ```
562
573
 
563
574
  ### Export Large Dataslice and download files
564
-
565
- python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export model name / groundtruth} --target_folder {folder path} --export-format {coco, visionai ...etc}
575
+ ```
576
+ python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export-model-name / groundtruth} --target_folder {folder path} --export-format {coco, visionai, yolo, vlm ...etc}
577
+ ``````
566
578
 
567
579
  ## Links to language repos
568
580
 
@@ -73,7 +73,7 @@ The following sections provide examples for the most common DataVerse tasks incl
73
73
  * [Edit Project](#edit-project)
74
74
  * [Update Alias](#update-ontology-alias)
75
75
  * [Create Dataset](#create-dataset)
76
- * [Get Dataset](#get-dataset)
76
+ * [List Dataset](#list-and-get-dataset)
77
77
  * [List Dataslices](#list-and-get-dataslices)
78
78
  * [Export Dataslice](#export-dataslice-and-download)
79
79
  * [List Models](#list-models)
@@ -383,7 +383,18 @@ python tools/import_dataset_from_local.py -host https://staging.visionai.linkerv
383
383
  ```
384
384
  <br>
385
385
 
386
- ### Get Dataset
386
+ ### List and Get Dataset
387
+
388
+ The `list_datasets` method would return the list of dataset under the given project
389
+ ```Python
390
+ project = client.get_project(project_id=1)
391
+ datasets:list = project.list_datasets()
392
+ ```
393
+ OR
394
+ ```Python
395
+ datasets:list = client.list_datasets(project_id=1, client_alias=client.alias )
396
+ ```
397
+
387
398
 
388
399
  The `get_dataset` method retrieves the dataset info from the connected site. The `dataset_id` parameter is the unique integer ID of the dataset, not its "name" property.
389
400
 
@@ -534,8 +545,9 @@ python tools/export_dataslice.py -host https://staging.visionai.linkervision.ai/
534
545
  ```
535
546
 
536
547
  ### Export Large Dataslice and download files
537
-
538
- python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export model name / groundtruth} --target_folder {folder path} --export-format {coco, visionai ...etc}
548
+ ```
549
+ python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export-model-name / groundtruth} --target_folder {folder path} --export-format {coco, visionai, yolo, vlm ...etc}
550
+ ``````
539
551
 
540
552
  ## Links to language repos
541
553
 
@@ -12,6 +12,7 @@ from requests import sessions
12
12
  from requests.adapters import HTTPAdapter, Retry
13
13
 
14
14
  from ..exceptions.client import DataverseExceptionBase
15
+ from ..utils.utils import chunks
15
16
 
16
17
  logger = logging.getLogger(__name__)
17
18
 
@@ -232,6 +233,15 @@ class BackendAPI:
232
233
  )
233
234
  return resp.json()["results"]
234
235
 
236
+ def list_datasets(self, project_id: int, **kwargs) -> list:
237
+ kwargs["project"] = project_id
238
+ resp = self.send_request(
239
+ url=f"{self.host}/api/datasets/?{urlencode(kwargs)}",
240
+ method="get",
241
+ headers=self.headers,
242
+ )
243
+ return resp.json()["results"]
244
+
235
245
  def list_dataslices(self, project_id: int, **kwargs) -> list:
236
246
  kwargs["project"] = project_id
237
247
  resp = self.send_request(
@@ -613,39 +623,63 @@ class AsyncBackendAPI:
613
623
  return None
614
624
 
615
625
  async def get_datarows(
616
- self, batch_size: int = 20, order_by: str = "id", **kwargs
617
- ) -> AsyncGenerator[list[dict], None, None]:
626
+ self,
627
+ batch_size: int = 20,
628
+ order_by: str = "id",
629
+ id_set_list: Optional[list] = None,
630
+ **kwargs,
631
+ ) -> AsyncGenerator[list[dict]]:
618
632
  if "offset" in kwargs or "limit" in kwargs:
619
633
  raise ValueError("Specifying offset or limit directly is not allowed.")
620
-
621
634
  kwargs["order_by"] = order_by
622
- dataslice_set = kwargs.pop("dataslice_set", [])
623
- query_params = {
624
- **kwargs,
625
- "order_by": order_by,
626
- "dataslice_set": dataslice_set,
627
- "limit": batch_size,
628
- }
629
- query_string = urlencode(query_params, doseq=True)
630
-
631
635
  id_gt = 0
632
- while True:
633
- url = f"{self.host}/api/datarows/?{query_string}&id__gt={id_gt}"
634
- resp: dict = await self.async_send_request(
635
- url=url,
636
- method="get",
637
- headers=self.headers,
638
- )
639
- json_data = resp
640
- if json_data["count"] == 0:
641
- break
642
- if not json_data["results"]:
643
- break
644
-
645
- # Get last datarow id
646
- datarows = json_data["results"]
647
- id_gt = datarows[-1]["id"]
648
- yield datarows
636
+ if id_set_list:
637
+ for id_chunks in chunks(id_set_list, batch_size):
638
+ while True:
639
+ kwargs.update(
640
+ {
641
+ "id_set": ",".join([str(id_) for id_ in id_chunks]),
642
+ "limit": batch_size,
643
+ "id__gt": id_gt,
644
+ }
645
+ )
646
+ url = f"{self.host}/api/datarows/?{urlencode(kwargs)}"
647
+ resp: dict = await self.async_send_request(
648
+ url=url,
649
+ method="get",
650
+ headers=self.headers,
651
+ )
652
+ json_data = resp
653
+ datarows = json_data["results"]
654
+ if not datarows:
655
+ break
656
+ # Get last datarow id
657
+ id_gt = datarows[-1]["id"]
658
+ yield datarows
659
+ else:
660
+ dataslice_set = kwargs.pop("dataslice_set", [])
661
+ query_params = {
662
+ **kwargs,
663
+ "dataslice_set": dataslice_set,
664
+ "limit": batch_size,
665
+ }
666
+ query_string = urlencode(query_params, doseq=True)
667
+
668
+ id_gt = 0
669
+ while True:
670
+ url = f"{self.host}/api/datarows/?{query_string}&id__gt={id_gt}"
671
+ resp: dict = await self.async_send_request(
672
+ url=url,
673
+ method="get",
674
+ headers=self.headers,
675
+ )
676
+ json_data = resp
677
+ if not json_data["results"]:
678
+ break
679
+ # Get last datarow id
680
+ datarows = json_data["results"]
681
+ id_gt = datarows[-1]["id"]
682
+ yield datarows
649
683
 
650
684
  async def get_datarows_flat_parent(
651
685
  self, batch_size: int = 20, order_by: str = "id", **kwargs
@@ -1149,6 +1149,24 @@ of this project OR has been added before"
1149
1149
  raise ClientConnectionError(f"Failed to get the models: {e}")
1150
1150
  return dataslice_list
1151
1151
 
1152
+ @staticmethod
1153
+ def list_datasets(
1154
+ project_id: int,
1155
+ client: Optional["DataverseClient"] = None,
1156
+ client_alias: Optional[str] = None,
1157
+ ) -> list:
1158
+ api, client_alias = DataverseClient._get_api_client(
1159
+ client=client, client_alias=client_alias
1160
+ )
1161
+ try:
1162
+ dataset_list: list = api.list_datasets(project_id=project_id)
1163
+ except DataverseExceptionBase:
1164
+ logging.exception("Got api error from Dataverse")
1165
+ raise
1166
+ except Exception as e:
1167
+ raise ClientConnectionError(f"Failed to get the models: {e}")
1168
+ return dataset_list
1169
+
1152
1170
  @staticmethod
1153
1171
  def list_models(
1154
1172
  project_id: int,
@@ -139,9 +139,7 @@ class Exporter:
139
139
  datarow_id_set: set[int] = set()
140
140
  for id_chunks in chunks(datarow_id_list, 1000):
141
141
  gen: AsyncGenerator = curation_api.get_datarows(
142
- id_set=",".join(
143
- str(_id) for _id in id_chunks
144
- ), # id_set="1,2,3,4,5"
142
+ id_set_list=id_chunks,
145
143
  batch_size=BATCH_SIZE,
146
144
  fields="id,items,vlm_items,url,frame_id,image_width,image_height,sensor_name,original_url",
147
145
  )
@@ -2,6 +2,7 @@ import re
2
2
  from typing import Optional, Union
3
3
 
4
4
  from pydantic import BaseModel, ConfigDict, field_validator
5
+ from pydantic_core.core_schema import ValidationInfo
5
6
 
6
7
  from .common import (
7
8
  AnnotationFormat,
@@ -111,10 +112,10 @@ class QuestionClass(BaseModel):
111
112
  return value
112
113
 
113
114
  @field_validator("answer_type")
114
- def answer_type_validator(cls, value, values, **kwargs):
115
- if value == AttributeType.OPTION and not values.get("answer_options"):
115
+ def answer_type_validator(cls, value, values: ValidationInfo, **kwargs):
116
+ if value == AttributeType.OPTION and not values.data.get("answer_options"):
116
117
  raise ValueError(
117
- f"* {values} Need to assign value for `answer_options` "
118
+ f"* {values.data} Need to assign value for `answer_options` "
118
119
  + "if the Answer type is option"
119
120
  )
120
121
  return value
@@ -255,6 +256,14 @@ class Project(BaseModel):
255
256
  )
256
257
  return project
257
258
 
259
+ def list_datasets(self) -> list:
260
+ from ..client import DataverseClient
261
+
262
+ dataset_list: list = DataverseClient.list_datasets(
263
+ project_id=self.id, client_alias=self.client_alias
264
+ )
265
+ return dataset_list
266
+
258
267
  def list_dataslices(self) -> list:
259
268
  from ..client import DataverseClient
260
269
 
@@ -72,3 +72,9 @@ def download_file_from_url(url: str, save_path: str):
72
72
 
73
73
  except requests.exceptions.RequestException as e:
74
74
  print(f"An error occurred while downloading the file: {e}")
75
+
76
+
77
+ def chunks(lst: list, n: int):
78
+ """Yield successive n-sized chunks from lst."""
79
+ for i in range(0, len(lst), n):
80
+ yield lst[i : i + n]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataverse-sdk
3
- Version: 2.2.1
3
+ Version: 2.2.2
4
4
  Summary: Dataverse SDK For Python
5
5
  Home-page:
6
6
  Author: LinkerVision
@@ -100,7 +100,7 @@ The following sections provide examples for the most common DataVerse tasks incl
100
100
  * [Edit Project](#edit-project)
101
101
  * [Update Alias](#update-ontology-alias)
102
102
  * [Create Dataset](#create-dataset)
103
- * [Get Dataset](#get-dataset)
103
+ * [List Dataset](#list-and-get-dataset)
104
104
  * [List Dataslices](#list-and-get-dataslices)
105
105
  * [Export Dataslice](#export-dataslice-and-download)
106
106
  * [List Models](#list-models)
@@ -410,7 +410,18 @@ python tools/import_dataset_from_local.py -host https://staging.visionai.linkerv
410
410
  ```
411
411
  <br>
412
412
 
413
- ### Get Dataset
413
+ ### List and Get Dataset
414
+
415
+ The `list_datasets` method would return the list of dataset under the given project
416
+ ```Python
417
+ project = client.get_project(project_id=1)
418
+ datasets:list = project.list_datasets()
419
+ ```
420
+ OR
421
+ ```Python
422
+ datasets:list = client.list_datasets(project_id=1, client_alias=client.alias )
423
+ ```
424
+
414
425
 
415
426
  The `get_dataset` method retrieves the dataset info from the connected site. The `dataset_id` parameter is the unique integer ID of the dataset, not its "name" property.
416
427
 
@@ -561,8 +572,9 @@ python tools/export_dataslice.py -host https://staging.visionai.linkervision.ai/
561
572
  ```
562
573
 
563
574
  ### Export Large Dataslice and download files
564
-
565
- python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export model name / groundtruth} --target_folder {folder path} --export-format {coco, visionai ...etc}
575
+ ```
576
+ python tools/export_dataslice_large.py -host https://visionai.linkervision.ai/dataverse/curation -e {your-account-email} -p {PASSWORD} -s {service-id} -dataslice {dataslice_id} --anno {export-model-name / groundtruth} --target_folder {folder path} --export-format {coco, visionai, yolo, vlm ...etc}
577
+ ``````
566
578
 
567
579
  ## Links to language repos
568
580
 
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
2
2
 
3
3
  AUTHOR = "LinkerVision"
4
4
  PACKAGE_NAME = "dataverse-sdk"
5
- PACKAGE_VERSION = "2.2.1"
5
+ PACKAGE_VERSION = "2.2.2"
6
6
  DESC = "Dataverse SDK For Python"
7
7
  with open("README.md", encoding="utf-8") as fh:
8
8
  long_description = fh.read()
File without changes