mbari-aidata 1.41.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. mbari_aidata-1.41.9/PKG-INFO +151 -0
  2. mbari_aidata-1.41.9/README.md +121 -0
  3. mbari_aidata-1.41.9/mbari_aidata/.gitignore +45 -0
  4. mbari_aidata-1.41.9/mbari_aidata/__init__.py +2 -0
  5. mbari_aidata-1.41.9/mbari_aidata/__main__.py +100 -0
  6. mbari_aidata-1.41.9/mbari_aidata/commands/db_utils.py +35 -0
  7. mbari_aidata-1.41.9/mbari_aidata/commands/download.py +194 -0
  8. mbari_aidata-1.41.9/mbari_aidata/commands/load_boxes.py +143 -0
  9. mbari_aidata-1.41.9/mbari_aidata/commands/load_common.py +66 -0
  10. mbari_aidata-1.41.9/mbari_aidata/commands/load_exemplars.py +135 -0
  11. mbari_aidata-1.41.9/mbari_aidata/commands/load_images.py +108 -0
  12. mbari_aidata-1.41.9/mbari_aidata/commands/load_queue.py +92 -0
  13. mbari_aidata-1.41.9/mbari_aidata/commands/load_video.py +117 -0
  14. mbari_aidata-1.41.9/mbari_aidata/commands/transform.py +367 -0
  15. mbari_aidata-1.41.9/mbari_aidata/common_args.py +27 -0
  16. mbari_aidata-1.41.9/mbari_aidata/generators/cifar.py +131 -0
  17. mbari_aidata-1.41.9/mbari_aidata/generators/coco_voc.py +654 -0
  18. mbari_aidata-1.41.9/mbari_aidata/generators/utils.py +120 -0
  19. mbari_aidata-1.41.9/mbari_aidata/logger/__init__.py +121 -0
  20. mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/media_types.py +9 -0
  21. mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/tap_cfe_media.py +142 -0
  22. mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/tap_i2map_media.py +125 -0
  23. mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/tap_planktivore_media.py +45 -0
  24. mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/tap_sdcat_csv.py +46 -0
  25. mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/tap_sony_media.py +87 -0
  26. mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/tap_voc.py +90 -0
  27. mbari_aidata-1.41.9/mbari_aidata/plugins/loaders/tator/attribute_utils.py +65 -0
  28. mbari_aidata-1.41.9/mbari_aidata/plugins/loaders/tator/common.py +123 -0
  29. mbari_aidata-1.41.9/mbari_aidata/plugins/loaders/tator/localization.py +105 -0
  30. mbari_aidata-1.41.9/mbari_aidata/plugins/loaders/tator/media.py +375 -0
  31. mbari_aidata-1.41.9/mbari_aidata/plugins/loaders/tator_redis/consume_localization.py +96 -0
  32. mbari_aidata-1.41.9/mbari_aidata/plugins/loaders/tator_redis/consume_media.py +146 -0
  33. mbari_aidata-1.41.9/mbari_aidata/plugins/module_utils.py +20 -0
  34. mbari_aidata-1.41.9/mbari_aidata/predictors/process_vits.py +117 -0
  35. mbari_aidata-1.41.9/mbari_aidata/predictors/vector_similarity.py +68 -0
  36. mbari_aidata-1.41.9/pyproject.toml +88 -0
@@ -0,0 +1,151 @@
1
+ Metadata-Version: 2.3
2
+ Name: mbari-aidata
3
+ Version: 1.41.9
4
+ Summary: Command line tool to do extract, transform, load and download operations on AI data for a number of projects at MBARI that require detection, clustering or classification workflows.
5
+ License: Apache
6
+ Author: Danelle Cline
7
+ Author-email: dcline@mbari.org
8
+ Requires-Python: >=3.10,<3.12
9
+ Classifier: License :: Other/Proprietary License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Requires-Dist: albucore (==0.0.16)
14
+ Requires-Dist: albumentations (==1.4.15)
15
+ Requires-Dist: click (==8.1.7)
16
+ Requires-Dist: ephem (==4.1.6)
17
+ Requires-Dist: moviepy (==2.1.1)
18
+ Requires-Dist: opencv-contrib-python (==4.11.0.86)
19
+ Requires-Dist: pandas (==2.2.2)
20
+ Requires-Dist: pascal-voc-writer (>=0.1.4)
21
+ Requires-Dist: piexif (==1.1.3)
22
+ Requires-Dist: pytz (==2024.2)
23
+ Requires-Dist: redis (==5.0.7)
24
+ Requires-Dist: requests (==2.32.3)
25
+ Requires-Dist: tator (==1.2.3)
26
+ Requires-Dist: torchvision (==0.18.1)
27
+ Requires-Dist: tqdm (==4.67.1)
28
+ Requires-Dist: transformers (==4.48.0)
29
+ Description-Content-Type: text/markdown
30
+
31
+ [![MBARI](https://www.mbari.org/wp-content/uploads/2014/11/logo-mbari-3b.png)](http://www.mbari.org)
32
+ [![semantic-release](https://img.shields.io/badge/%20%20%F0%9F%93%A6%F0%9F%9A%80-semantic--release-e10079.svg)](https://github.com/semantic-release/semantic-release)
33
+ [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
34
+ [![Python](https://img.shields.io/badge/language-Python-blue.svg)](https://www.python.org/downloads/)
35
+
36
+ *mbari-aidata* is a command line tool to do extract, transform, load and download operations
37
+ on AI data for a number of projects at MBARI that require detection, clustering or classification
38
+ workflows.
39
+
40
+ Full documentation is available on commands at [https://docs.mbari.org/internal/ai/data](https://docs.mbari.org/internal/ai/data/).
41
+
42
+ This supports loading [sdcat](https://github.com/mbari-org/sdcat) formatted output and downloads from [Tator](https://www.tatorapp.com/) and
43
+ [Redis](https://redis.io) databases, although support for other data sources is also possible, e.g. [FathomNet](https://fathomnet.org/).
44
+ so we decided to keep the name generic.
45
+
46
+ This also supports loading media from a directory or URL, and transforming data into various
47
+ formats for machine learning, e.g. COCO, CIFAR, or PASCAL VOC format.
48
+
49
+ ## Requirements
50
+ - Python 3.10 or higher
51
+ - A Tator API token and Redis password for the .env file. Contact the MBARI AI team for access.
52
+ - Docker for development and testing only
53
+
54
+ ## Installation
55
+ Install from PyPi
56
+
57
+ ```shell
58
+ pip install mbari-aidata
59
+ ```
60
+
61
+ Create the .env file with the following contents in the root directory of the project:
62
+ ```shell
63
+ TATOR_TOKEN=your_api_token
64
+ REDIS_PASSWORD=your_redis_password
65
+ ENVIRONMENT=testing or production
66
+ ```
67
+
68
+ Create a configuration file in the root directory of the project:
69
+ ```shell
70
+ touch config_cfe.yaml
71
+ ```
72
+
73
+ This file will be used to configure the project data, such as mounts, plugins, and database connections.
74
+ ```shell
75
+ aidata download --version Baseline --labels "Diatoms, Copepods" --config config_cfe.yml
76
+ ```
77
+
78
+ Example configuration file:
79
+ ```yaml
80
+ # config_cfe.yml
81
+ # Config file for CFE project production
82
+ mounts:
83
+ - name: "image"
84
+ path: "/mnt/CFElab"
85
+ host: "mantis.shore.mbari.org"
86
+ nginx_root: "/CFElab"
87
+
88
+ - name: "video"
89
+ path: "/mnt/CFElab"
90
+ host: "mantis.shore.mbari.org"
91
+ nginx_root: "/CFElab"
92
+
93
+
94
+ plugins:
95
+ - name: "extractor"
96
+ module: "mbari_aidata.plugins.extractors.tap_cfe_media"
97
+ function: "extract_media"
98
+
99
+ redis:
100
+ host: "doris.shore.mbari.org"
101
+ port: 6382
102
+
103
+ vss:
104
+ project: "902111-CFE"
105
+ model: "google/vit-base-patch16-224"
106
+
107
+ tator:
108
+ project: "902111-CFE"
109
+ host: "mantis.shore.mbari.org"
110
+ image:
111
+ attributes:
112
+ iso_datetime:
113
+ type: datetime
114
+ depth:
115
+ type: float
116
+ video:
117
+ attributes:
118
+ iso_start_datetime:
119
+ type: datetime
120
+ box:
121
+ attributes:
122
+ Label:
123
+ type: string
124
+ score:
125
+ type: float
126
+ cluster:
127
+ type: string
128
+ saliency:
129
+ type: float
130
+ area:
131
+ type: int
132
+ exemplar:
133
+ type: bool
134
+ ```
135
+
136
+ A docker version is also available at `mbari/aidata:latest` or `mbari/aidata:latest:cuda-124`.
137
+
138
+ ## Commands
139
+
140
+ * `aidata download --help` - Download data, such as images, boxes, into various formats for machine learning e,g, COCO, CIFAR, or PASCAL VOC format
141
+ * `aidata load --help` - Load data, such as images, and boxes into either a Postgres or REDIS database
142
+ * `aidata db --help` - Commands related to database management
143
+ * `aidata transform --help` - Commands related to transforming downloaded data
144
+ * `aidata -h` - Print help message and exit.
145
+
146
+ Source code is available at [github.com/mbari-org/aidata](https://github.com/mbari-org/aidata/).
147
+
148
+ ## Development
149
+ See the [Development Guide](DEVELOPMENT.md) for more information on how to set up the development environment.
150
+
151
+ **updated: 2025-01-28**
@@ -0,0 +1,121 @@
1
+ [![MBARI](https://www.mbari.org/wp-content/uploads/2014/11/logo-mbari-3b.png)](http://www.mbari.org)
2
+ [![semantic-release](https://img.shields.io/badge/%20%20%F0%9F%93%A6%F0%9F%9A%80-semantic--release-e10079.svg)](https://github.com/semantic-release/semantic-release)
3
+ [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
4
+ [![Python](https://img.shields.io/badge/language-Python-blue.svg)](https://www.python.org/downloads/)
5
+
6
+ *mbari-aidata* is a command line tool to do extract, transform, load and download operations
7
+ on AI data for a number of projects at MBARI that require detection, clustering or classification
8
+ workflows.
9
+
10
+ Full documentation is available on commands at [https://docs.mbari.org/internal/ai/data](https://docs.mbari.org/internal/ai/data/).
11
+
12
+ This supports loading [sdcat](https://github.com/mbari-org/sdcat) formatted output and downloads from [Tator](https://www.tatorapp.com/) and
13
+ [Redis](https://redis.io) databases, although support for other data sources is also possible, e.g. [FathomNet](https://fathomnet.org/).
14
+ so we decided to keep the name generic.
15
+
16
+ This also supports loading media from a directory or URL, and transforming data into various
17
+ formats for machine learning, e.g. COCO, CIFAR, or PASCAL VOC format.
18
+
19
+ ## Requirements
20
+ - Python 3.10 or higher
21
+ - A Tator API token and Redis password for the .env file. Contact the MBARI AI team for access.
22
+ - Docker for development and testing only
23
+
24
+ ## Installation
25
+ Install from PyPi
26
+
27
+ ```shell
28
+ pip install mbari-aidata
29
+ ```
30
+
31
+ Create the .env file with the following contents in the root directory of the project:
32
+ ```shell
33
+ TATOR_TOKEN=your_api_token
34
+ REDIS_PASSWORD=your_redis_password
35
+ ENVIRONMENT=testing or production
36
+ ```
37
+
38
+ Create a configuration file in the root directory of the project:
39
+ ```shell
40
+ touch config_cfe.yaml
41
+ ```
42
+
43
+ This file will be used to configure the project data, such as mounts, plugins, and database connections.
44
+ ```shell
45
+ aidata download --version Baseline --labels "Diatoms, Copepods" --config config_cfe.yml
46
+ ```
47
+
48
+ Example configuration file:
49
+ ```yaml
50
+ # config_cfe.yml
51
+ # Config file for CFE project production
52
+ mounts:
53
+ - name: "image"
54
+ path: "/mnt/CFElab"
55
+ host: "mantis.shore.mbari.org"
56
+ nginx_root: "/CFElab"
57
+
58
+ - name: "video"
59
+ path: "/mnt/CFElab"
60
+ host: "mantis.shore.mbari.org"
61
+ nginx_root: "/CFElab"
62
+
63
+
64
+ plugins:
65
+ - name: "extractor"
66
+ module: "mbari_aidata.plugins.extractors.tap_cfe_media"
67
+ function: "extract_media"
68
+
69
+ redis:
70
+ host: "doris.shore.mbari.org"
71
+ port: 6382
72
+
73
+ vss:
74
+ project: "902111-CFE"
75
+ model: "google/vit-base-patch16-224"
76
+
77
+ tator:
78
+ project: "902111-CFE"
79
+ host: "mantis.shore.mbari.org"
80
+ image:
81
+ attributes:
82
+ iso_datetime:
83
+ type: datetime
84
+ depth:
85
+ type: float
86
+ video:
87
+ attributes:
88
+ iso_start_datetime:
89
+ type: datetime
90
+ box:
91
+ attributes:
92
+ Label:
93
+ type: string
94
+ score:
95
+ type: float
96
+ cluster:
97
+ type: string
98
+ saliency:
99
+ type: float
100
+ area:
101
+ type: int
102
+ exemplar:
103
+ type: bool
104
+ ```
105
+
106
+ A docker version is also available at `mbari/aidata:latest` or `mbari/aidata:latest:cuda-124`.
107
+
108
+ ## Commands
109
+
110
+ * `aidata download --help` - Download data, such as images, boxes, into various formats for machine learning e,g, COCO, CIFAR, or PASCAL VOC format
111
+ * `aidata load --help` - Load data, such as images, and boxes into either a Postgres or REDIS database
112
+ * `aidata db --help` - Commands related to database management
113
+ * `aidata transform --help` - Commands related to transforming downloaded data
114
+ * `aidata -h` - Print help message and exit.
115
+
116
+ Source code is available at [github.com/mbari-org/aidata](https://github.com/mbari-org/aidata/).
117
+
118
+ ## Development
119
+ See the [Development Guide](DEVELOPMENT.md) for more information on how to set up the development environment.
120
+
121
+ **updated: 2025-01-28**
@@ -0,0 +1,45 @@
1
+ # database files
2
+ *.db
3
+
4
+ # test data
5
+ data/
6
+ *.png
7
+ *.tar.gz
8
+
9
+ # Pycharm
10
+ .idea/
11
+
12
+ # Node artifact files
13
+ node_modules/
14
+ dist/
15
+
16
+ # Compiled Python bytecode
17
+ *.py[cod]
18
+
19
+ # JetBrains IDE
20
+ .idea/
21
+
22
+ # Unit test reports
23
+ TEST*.xml
24
+
25
+ # Generated by MacOS
26
+ .DS_Store
27
+
28
+ # Generated by Windows
29
+ Thumbs.db
30
+
31
+ # Applications
32
+ *.app
33
+ *.exe
34
+ *.war
35
+
36
+ # Large media files
37
+ *.mp4
38
+ *.tiff
39
+ *.avi
40
+ *.flv
41
+ *.mov
42
+ *.wmv
43
+
44
+ #Mkdocs
45
+ docs/site
@@ -0,0 +1,2 @@
1
+ #mbari_aidata/__init__.py
2
+ __version__ = "1.41.9"
@@ -0,0 +1,100 @@
1
+ # mbari_aidata, Apache-2.0 license
2
+ # Filename: __main__.py
3
+ # Description: Main entry point for the mbari_aidata command line interface
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+
7
+ import pytz
8
+ import click
9
+ import sys
10
+
11
+ sys.path.insert(0, str(Path(__file__).parent.parent))
12
+
13
+ from pathlib import Path
14
+
15
+ from mbari_aidata.commands.download import download
16
+ from mbari_aidata.commands.load_images import load_images
17
+ from mbari_aidata.commands.load_video import load_video
18
+ from mbari_aidata.commands.load_exemplars import load_exemplars
19
+ from mbari_aidata.commands.db_utils import reset_redis
20
+ from mbari_aidata.commands.transform import transform, voc_to_yolo
21
+ from mbari_aidata.logger import err, info
22
+
23
+ from mbari_aidata import __version__
24
+ from mbari_aidata.commands.load_queue import load_queue
25
+ from mbari_aidata.commands.load_boxes import load_boxes
26
+
27
+ if "LOG_PATH" not in locals():
28
+ LOG_PATH = Path.home().as_posix()
29
+
30
+
31
+ @click.group(context_settings={"help_option_names": ["-h", "--help"]})
32
+ @click.version_option(__version__, "-V", "--version", message="%(prog)s, version %(version)s")
33
+ def cli():
34
+ """
35
+ Load data to tator database from a command line.
36
+ """
37
+ pass
38
+
39
+
40
+ @click.group(name="load")
41
+ def cli_load():
42
+ """
43
+ Load data, such as images, boxes, and exemplars into either a Postgres or REDIS database
44
+ """
45
+ pass
46
+
47
+
48
+ cli.add_command(cli_load)
49
+ cli_load.add_command(load_images)
50
+ cli_load.add_command(load_video)
51
+ cli_load.add_command(load_boxes)
52
+ cli_load.add_command(load_queue)
53
+ cli_load.add_command(load_exemplars)
54
+
55
+
56
+ @click.group(name="download")
57
+ def cli_download():
58
+ """
59
+ Download data, such as images, boxes, into various formats for machine learning e,g, COCO, CIFAR, or PASCAL VOC format
60
+ """
61
+ pass
62
+
63
+
64
+ cli.add_command(cli_download)
65
+ cli_download.add_command(download)
66
+
67
+
68
+ @click.group(name="db")
69
+ def cli_db():
70
+ """
71
+ Commands related to database management
72
+ """
73
+ pass
74
+
75
+
76
+ cli.add_command(cli_db)
77
+ cli_db.add_command(reset_redis)
78
+
79
+
80
+ @click.group(name="transform")
81
+ def cli_transform():
82
+ """
83
+ Commands related to transforming downloaded data
84
+ """
85
+ pass
86
+
87
+
88
+ cli.add_command(cli_transform)
89
+ cli_transform.add_command(transform)
90
+ cli_transform.add_command(voc_to_yolo)
91
+
92
+ if __name__ == "__main__":
93
+ try:
94
+ start = datetime.now(pytz.utc)
95
+ cli()
96
+ end = datetime.now(pytz.utc)
97
+ info(f"Done. Elapsed time: {end - start} seconds")
98
+ except Exception as e:
99
+ err(f"Exiting. Error: {e}")
100
+ exit(-1)
@@ -0,0 +1,35 @@
1
+ # mbari_aidata, Apache-2.0 license
2
+ # Filename: commands/db_utils.py
3
+ # Description: Miscellaneous functions for working with the database
4
+
5
+
6
+ import click
7
+ import redis
8
+
9
+ from mbari_aidata import common_args
10
+ from mbari_aidata.logger import create_logger_file, info
11
+ from mbari_aidata.plugins.loaders.tator.common import init_yaml_config
12
+ from mbari_aidata.predictors.process_vits import ViTWrapper
13
+
14
+
15
+ @click.command("reset", help="Reset the REDIS server")
16
+ @common_args.yaml_config
17
+ @click.option("--redis-password", type=str, required=True, help="Password for the REDIS server")
18
+ def reset_redis(redis_password: str, config: str) -> bool:
19
+ """Reset the REDIS database."""
20
+ try:
21
+ # Load the configuration file
22
+ # Each project needs a separate redis server for exemplar embeddings - this
23
+ # is done through separate ports
24
+ config_dict = init_yaml_config(config)
25
+ redis_host = config_dict["redis"]["host"]
26
+ redis_port = config_dict["redis"]["port"]
27
+ vss_model = config_dict["vss"]["model"]
28
+ info(f"Connecting to REDIS server at {redis_host}:{redis_port}")
29
+ r = redis.Redis(host=redis_host, port=redis_port, password=redis_password)
30
+ ViTWrapper(r, model_name=vss_model, reset=True)
31
+ info("Redis server reset")
32
+ return True
33
+ except Exception as e:
34
+ info(f"Error resetting REDIS server: {e}")
35
+ return False
@@ -0,0 +1,194 @@
1
+ # mbari_aidata, Apache-2.0 license
2
+ # Filename: commands/download.py
3
+ # Description: Download a dataset for training detection or classification models
4
+
5
+ from pathlib import Path
6
+
7
+ import click
8
+
9
+ from mbari_aidata import common_args
10
+ from mbari_aidata.logger import create_logger_file, info, exception
11
+ from mbari_aidata.generators.coco_voc import download as download_full
12
+
13
+ from mbari_aidata.plugins.loaders.tator.common import init_yaml_config, init_api_project, find_project
14
+
15
+ # Default values
16
+ # The base directory is the same directory as this file
17
+ DEFAULT_BASE_DIR = Path.home() / "mbari_aidata" / "datasets"
18
+
19
+
20
+ @click.command(name="dataset", help="Download a dataset for training detection or classification models")
21
+ @common_args.token
22
+ @common_args.yaml_config
23
+ @common_args.version
24
+ @click.option(
25
+ "--base-path",
26
+ default=DEFAULT_BASE_DIR,
27
+ type=Path,
28
+ help=f"Path to the base directory to save all data to. Defaults to {DEFAULT_BASE_DIR}",
29
+ )
30
+ @click.option("--group", help="Group name, e.g. VB250")
31
+ @click.option("--depth", type=int, help="Depth, e.g. 200")
32
+ @click.option("--section", help="Media section name, e.g. 25000_depth_v1")
33
+ @click.option("--min-saliency", type=int, help="Minimum saliency score")
34
+ @click.option("--max-saliency", type=int, help="Maximum saliency score")
35
+ @click.option("--min-score", type=float, help="Minimum model score")
36
+ @click.option("--generator", help="Generator name, e.g. vars-labelbot or vars-annotation")
37
+ @click.option("--labels", default="all", help='Comma separated list of labels to download, or "all" for all labels.')
38
+ @click.option(
39
+ "--concepts",
40
+ default="all",
41
+ help='Comma separated list of concepts to download, or "all" for all concepts. For legacy projects only',
42
+ )
43
+ @click.option("--crop-roi", is_flag=True, help="True to download the rois cropped from the original images/video.")
44
+ @click.option("--resize", type=int, help="Resize images to this size after cropping them.")
45
+ @click.option("--voc", is_flag=True, help="True if export as VOC dataset, False if not.")
46
+ @click.option("--coco", is_flag=True, help="True if export as COCO dataset, False if not.")
47
+ @click.option("--cifar", is_flag=True, help="True if export as CIFAR dataset, False if not.")
48
+ @click.option("--cifar-size", default=32, help="Size of CIFAR images.")
49
+ @click.option("--save-score", is_flag=True, help="True to save score in YOLO output, False if not.")
50
+ @click.option("--verified", is_flag=True, help="True if only download verified annotations.")
51
+ @click.option("--unverified", is_flag=True, help="True if only download not verified annotations.")
52
+ @click.option("--single-class", type=str, help="Set to collapse all classes into a single class, e.g. 'marine organism'")
53
+ @click.option(
54
+ "--skip-image-download", is_flag=True, help="Skip image download, only download annotations. CIFAR requires images."
55
+ )
56
+ def download(
57
+ token: str,
58
+ config: str,
59
+ base_path: Path,
60
+ group: str,
61
+ depth: int,
62
+ section: str,
63
+ min_saliency: int,
64
+ max_saliency: int,
65
+ min_score: float,
66
+ version: str,
67
+ generator: str,
68
+ labels: str,
69
+ concepts: str,
70
+ crop_roi: bool,
71
+ resize: int,
72
+ voc: bool,
73
+ cifar: bool,
74
+ coco: bool,
75
+ cifar_size: int,
76
+ save_score: bool,
77
+ single_class: str,
78
+ skip_image_download: bool,
79
+ verified: bool,
80
+ unverified: bool,
81
+ ) -> bool:
82
+ create_logger_file("download")
83
+ try:
84
+ base_path.mkdir(exist_ok=True, parents=True)
85
+ # Load the configuration file
86
+ config_dict = init_yaml_config(config)
87
+ project = config_dict["tator"]["project"]
88
+ host = config_dict["tator"]["host"]
89
+
90
+ # Initialize the Tator API
91
+ api, tator_project = init_api_project(host, token, project)
92
+
93
+ # Find the project
94
+ project = find_project(api, project)
95
+ info(f"Found project id: {project.name} for project {project}")
96
+
97
+ # Download a dataset by its version if it exists
98
+ if version:
99
+ data_path = base_path / version
100
+ else:
101
+ data_path = base_path
102
+ data_path.mkdir(exist_ok=True)
103
+ info(f"Downloading data to {data_path}")
104
+
105
+ # Convert comma separated list of concepts to a list
106
+ if labels == "all":
107
+ labels_list = []
108
+ else:
109
+ labels_list = labels.split(",")
110
+ labels_list = [l.strip() for l in labels_list]
111
+ # Check if this is empty
112
+ if len(labels_list) == 1 and labels_list[0] == "":
113
+ labels_list = []
114
+ # Strip off any zero length strings
115
+ labels_list = [l for l in labels_list if len(l) > 0]
116
+ if concepts == "all":
117
+ concepts_list = []
118
+ else:
119
+ concepts_list = concepts.split(",")
120
+ concepts_list = [l.strip() for l in concepts_list]
121
+ # Check if this is empty
122
+ if len(concepts_list) == 1 and concepts_list[0] == "":
123
+ concepts_list = []
124
+ # Strip off any zero length strings
125
+ concepts_list = [c for c in concepts_list if len(c) > 0]
126
+
127
+ # Convert comma separated list of versions to a list
128
+ if version:
129
+ version_list = version.split(",")
130
+ version_list = [l.strip() for l in version_list]
131
+ else:
132
+ # If no version is specified, download all versions
133
+ versions = api.get_version_list(project.id)
134
+ version_list = [v.name for v in versions]
135
+
136
+ success = download_full(
137
+ api,
138
+ project_id=project.id,
139
+ group=group,
140
+ depth=depth,
141
+ section=section,
142
+ min_saliency=min_saliency,
143
+ max_saliency=max_saliency,
144
+ min_score=min_score,
145
+ version_list=version_list,
146
+ verified=verified,
147
+ unverified=unverified,
148
+ generator=generator,
149
+ output_path=data_path,
150
+ labels_list=labels_list,
151
+ concepts_list=concepts_list,
152
+ single_class=single_class,
153
+ skip_image_download=skip_image_download,
154
+ save_score=save_score,
155
+ cifar_size=cifar_size,
156
+ voc=voc,
157
+ coco=coco,
158
+ cifar=cifar,
159
+ crop_roi=crop_roi,
160
+ resize=resize
161
+ )
162
+ return success
163
+ except Exception as e:
164
+ exception(f"Error: {e}")
165
+ return False
166
+
167
+
168
+ if __name__ == "__main__":
169
+ import os
170
+
171
+ # To run this script, you need to have the TATOR_TOKEN environment variable set and uncomment all @click decorators above
172
+ # TODO: move this to pytest
173
+ os.environ["ENVIRONMENT"] = "TESTING"
174
+ test_path = Path(__file__).parent.parent.parent / "tests" / "data" / "i2map"
175
+ yaml_path = Path(__file__).parent.parent.parent / "tests" / "config" / "config_i2map.yml"
176
+ base_path = Path(__file__).parent.parent.parent / "tests" / "data" / "download"
177
+ tator_token = os.getenv("TATOR_TOKEN")
178
+ download(
179
+ token=tator_token,
180
+ config=yaml_path.as_posix(),
181
+ version="dino_vits8_20240205_225539,dino_vits8_20240207_022529,dinov2_vits14_hdbscan_",
182
+ base_path=base_path,
183
+ voc=True,
184
+ labels="Acanthamunnopsis milleri,Euphausiacea1,Pyrosoma1,Pyrosoma2",
185
+ concepts="",
186
+ cifar=True,
187
+ coco=True,
188
+ save_score=False,
189
+ skip_image_download=False,
190
+ group="",
191
+ depth="",
192
+ generator="",
193
+ cifar_size=32,
194
+ )