mbari-aidata 1.41.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mbari_aidata-1.41.9/PKG-INFO +151 -0
- mbari_aidata-1.41.9/README.md +121 -0
- mbari_aidata-1.41.9/mbari_aidata/.gitignore +45 -0
- mbari_aidata-1.41.9/mbari_aidata/__init__.py +2 -0
- mbari_aidata-1.41.9/mbari_aidata/__main__.py +100 -0
- mbari_aidata-1.41.9/mbari_aidata/commands/db_utils.py +35 -0
- mbari_aidata-1.41.9/mbari_aidata/commands/download.py +194 -0
- mbari_aidata-1.41.9/mbari_aidata/commands/load_boxes.py +143 -0
- mbari_aidata-1.41.9/mbari_aidata/commands/load_common.py +66 -0
- mbari_aidata-1.41.9/mbari_aidata/commands/load_exemplars.py +135 -0
- mbari_aidata-1.41.9/mbari_aidata/commands/load_images.py +108 -0
- mbari_aidata-1.41.9/mbari_aidata/commands/load_queue.py +92 -0
- mbari_aidata-1.41.9/mbari_aidata/commands/load_video.py +117 -0
- mbari_aidata-1.41.9/mbari_aidata/commands/transform.py +367 -0
- mbari_aidata-1.41.9/mbari_aidata/common_args.py +27 -0
- mbari_aidata-1.41.9/mbari_aidata/generators/cifar.py +131 -0
- mbari_aidata-1.41.9/mbari_aidata/generators/coco_voc.py +654 -0
- mbari_aidata-1.41.9/mbari_aidata/generators/utils.py +120 -0
- mbari_aidata-1.41.9/mbari_aidata/logger/__init__.py +121 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/media_types.py +9 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/tap_cfe_media.py +142 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/tap_i2map_media.py +125 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/tap_planktivore_media.py +45 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/tap_sdcat_csv.py +46 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/tap_sony_media.py +87 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/extractors/tap_voc.py +90 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/loaders/tator/attribute_utils.py +65 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/loaders/tator/common.py +123 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/loaders/tator/localization.py +105 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/loaders/tator/media.py +375 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/loaders/tator_redis/consume_localization.py +96 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/loaders/tator_redis/consume_media.py +146 -0
- mbari_aidata-1.41.9/mbari_aidata/plugins/module_utils.py +20 -0
- mbari_aidata-1.41.9/mbari_aidata/predictors/process_vits.py +117 -0
- mbari_aidata-1.41.9/mbari_aidata/predictors/vector_similarity.py +68 -0
- mbari_aidata-1.41.9/pyproject.toml +88 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: mbari-aidata
|
|
3
|
+
Version: 1.41.9
|
|
4
|
+
Summary: Command line tool to do extract, transform, load and download operations on AI data for a number of projects at MBARI that require detection, clustering or classification workflows.
|
|
5
|
+
License: Apache
|
|
6
|
+
Author: Danelle Cline
|
|
7
|
+
Author-email: dcline@mbari.org
|
|
8
|
+
Requires-Python: >=3.10,<3.12
|
|
9
|
+
Classifier: License :: Other/Proprietary License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Requires-Dist: albucore (==0.0.16)
|
|
14
|
+
Requires-Dist: albumentations (==1.4.15)
|
|
15
|
+
Requires-Dist: click (==8.1.7)
|
|
16
|
+
Requires-Dist: ephem (==4.1.6)
|
|
17
|
+
Requires-Dist: moviepy (==2.1.1)
|
|
18
|
+
Requires-Dist: opencv-contrib-python (==4.11.0.86)
|
|
19
|
+
Requires-Dist: pandas (==2.2.2)
|
|
20
|
+
Requires-Dist: pascal-voc-writer (>=0.1.4)
|
|
21
|
+
Requires-Dist: piexif (==1.1.3)
|
|
22
|
+
Requires-Dist: pytz (==2024.2)
|
|
23
|
+
Requires-Dist: redis (==5.0.7)
|
|
24
|
+
Requires-Dist: requests (==2.32.3)
|
|
25
|
+
Requires-Dist: tator (==1.2.3)
|
|
26
|
+
Requires-Dist: torchvision (==0.18.1)
|
|
27
|
+
Requires-Dist: tqdm (==4.67.1)
|
|
28
|
+
Requires-Dist: transformers (==4.48.0)
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
[](http://www.mbari.org)
|
|
32
|
+
[](https://github.com/semantic-release/semantic-release)
|
|
33
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
34
|
+
[](https://www.python.org/downloads/)
|
|
35
|
+
|
|
36
|
+
*mbari-aidata* is a command line tool to do extract, transform, load and download operations
|
|
37
|
+
on AI data for a number of projects at MBARI that require detection, clustering or classification
|
|
38
|
+
workflows.
|
|
39
|
+
|
|
40
|
+
Full documentation is available on commands at [https://docs.mbari.org/internal/ai/data](https://docs.mbari.org/internal/ai/data/).
|
|
41
|
+
|
|
42
|
+
This supports loading [sdcat](https://github.com/mbari-org/sdcat) formatted output and downloads from [Tator](https://www.tatorapp.com/) and
|
|
43
|
+
[Redis](https://redis.io) databases, although support for other data sources is also possible, e.g. [FathomNet](https://fathomnet.org/).
|
|
44
|
+
so we decided to keep the name generic.
|
|
45
|
+
|
|
46
|
+
This also supports loading media from a directory or URL, and transforming data into various
|
|
47
|
+
formats for machine learning, e.g. COCO, CIFAR, or PASCAL VOC format.
|
|
48
|
+
|
|
49
|
+
## Requirements
|
|
50
|
+
- Python 3.10 or higher
|
|
51
|
+
- A Tator API token and Redis password for the .env file. Contact the MBARI AI team for access.
|
|
52
|
+
- Docker for development and testing only
|
|
53
|
+
|
|
54
|
+
## Installation
|
|
55
|
+
Install from PyPi
|
|
56
|
+
|
|
57
|
+
```shell
|
|
58
|
+
pip install mbari-aidata
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Create the .env file with the following contents in the root directory of the project:
|
|
62
|
+
```shell
|
|
63
|
+
TATOR_TOKEN=your_api_token
|
|
64
|
+
REDIS_PASSWORD=your_redis_password
|
|
65
|
+
ENVIRONMENT=testing or production
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Create a configuration file in the root directory of the project:
|
|
69
|
+
```shell
|
|
70
|
+
touch config_cfe.yaml
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
This file will be used to configure the project data, such as mounts, plugins, and database connections.
|
|
74
|
+
```shell
|
|
75
|
+
aidata download --version Baseline --labels "Diatoms, Copepods" --config config_cfe.yml
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Example configuration file:
|
|
79
|
+
```yaml
|
|
80
|
+
# config_cfe.yml
|
|
81
|
+
# Config file for CFE project production
|
|
82
|
+
mounts:
|
|
83
|
+
- name: "image"
|
|
84
|
+
path: "/mnt/CFElab"
|
|
85
|
+
host: "mantis.shore.mbari.org"
|
|
86
|
+
nginx_root: "/CFElab"
|
|
87
|
+
|
|
88
|
+
- name: "video"
|
|
89
|
+
path: "/mnt/CFElab"
|
|
90
|
+
host: "mantis.shore.mbari.org"
|
|
91
|
+
nginx_root: "/CFElab"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
plugins:
|
|
95
|
+
- name: "extractor"
|
|
96
|
+
module: "mbari_aidata.plugins.extractors.tap_cfe_media"
|
|
97
|
+
function: "extract_media"
|
|
98
|
+
|
|
99
|
+
redis:
|
|
100
|
+
host: "doris.shore.mbari.org"
|
|
101
|
+
port: 6382
|
|
102
|
+
|
|
103
|
+
vss:
|
|
104
|
+
project: "902111-CFE"
|
|
105
|
+
model: "google/vit-base-patch16-224"
|
|
106
|
+
|
|
107
|
+
tator:
|
|
108
|
+
project: "902111-CFE"
|
|
109
|
+
host: "mantis.shore.mbari.org"
|
|
110
|
+
image:
|
|
111
|
+
attributes:
|
|
112
|
+
iso_datetime:
|
|
113
|
+
type: datetime
|
|
114
|
+
depth:
|
|
115
|
+
type: float
|
|
116
|
+
video:
|
|
117
|
+
attributes:
|
|
118
|
+
iso_start_datetime:
|
|
119
|
+
type: datetime
|
|
120
|
+
box:
|
|
121
|
+
attributes:
|
|
122
|
+
Label:
|
|
123
|
+
type: string
|
|
124
|
+
score:
|
|
125
|
+
type: float
|
|
126
|
+
cluster:
|
|
127
|
+
type: string
|
|
128
|
+
saliency:
|
|
129
|
+
type: float
|
|
130
|
+
area:
|
|
131
|
+
type: int
|
|
132
|
+
exemplar:
|
|
133
|
+
type: bool
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
A docker version is also available at `mbari/aidata:latest` or `mbari/aidata:latest:cuda-124`.
|
|
137
|
+
|
|
138
|
+
## Commands
|
|
139
|
+
|
|
140
|
+
* `aidata download --help` - Download data, such as images, boxes, into various formats for machine learning e,g, COCO, CIFAR, or PASCAL VOC format
|
|
141
|
+
* `aidata load --help` - Load data, such as images, and boxes into either a Postgres or REDIS database
|
|
142
|
+
* `aidata db --help` - Commands related to database management
|
|
143
|
+
* `aidata transform --help` - Commands related to transforming downloaded data
|
|
144
|
+
* `aidata -h` - Print help message and exit.
|
|
145
|
+
|
|
146
|
+
Source code is available at [github.com/mbari-org/aidata](https://github.com/mbari-org/aidata/).
|
|
147
|
+
|
|
148
|
+
## Development
|
|
149
|
+
See the [Development Guide](DEVELOPMENT.md) for more information on how to set up the development environment.
|
|
150
|
+
|
|
151
|
+
**updated: 2025-01-28**
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
[](http://www.mbari.org)
|
|
2
|
+
[](https://github.com/semantic-release/semantic-release)
|
|
3
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
4
|
+
[](https://www.python.org/downloads/)
|
|
5
|
+
|
|
6
|
+
*mbari-aidata* is a command line tool to do extract, transform, load and download operations
|
|
7
|
+
on AI data for a number of projects at MBARI that require detection, clustering or classification
|
|
8
|
+
workflows.
|
|
9
|
+
|
|
10
|
+
Full documentation is available on commands at [https://docs.mbari.org/internal/ai/data](https://docs.mbari.org/internal/ai/data/).
|
|
11
|
+
|
|
12
|
+
This supports loading [sdcat](https://github.com/mbari-org/sdcat) formatted output and downloads from [Tator](https://www.tatorapp.com/) and
|
|
13
|
+
[Redis](https://redis.io) databases, although support for other data sources is also possible, e.g. [FathomNet](https://fathomnet.org/).
|
|
14
|
+
so we decided to keep the name generic.
|
|
15
|
+
|
|
16
|
+
This also supports loading media from a directory or URL, and transforming data into various
|
|
17
|
+
formats for machine learning, e.g. COCO, CIFAR, or PASCAL VOC format.
|
|
18
|
+
|
|
19
|
+
## Requirements
|
|
20
|
+
- Python 3.10 or higher
|
|
21
|
+
- A Tator API token and Redis password for the .env file. Contact the MBARI AI team for access.
|
|
22
|
+
- Docker for development and testing only
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
Install from PyPi
|
|
26
|
+
|
|
27
|
+
```shell
|
|
28
|
+
pip install mbari-aidata
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Create the .env file with the following contents in the root directory of the project:
|
|
32
|
+
```shell
|
|
33
|
+
TATOR_TOKEN=your_api_token
|
|
34
|
+
REDIS_PASSWORD=your_redis_password
|
|
35
|
+
ENVIRONMENT=testing or production
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Create a configuration file in the root directory of the project:
|
|
39
|
+
```shell
|
|
40
|
+
touch config_cfe.yaml
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
This file will be used to configure the project data, such as mounts, plugins, and database connections.
|
|
44
|
+
```shell
|
|
45
|
+
aidata download --version Baseline --labels "Diatoms, Copepods" --config config_cfe.yml
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Example configuration file:
|
|
49
|
+
```yaml
|
|
50
|
+
# config_cfe.yml
|
|
51
|
+
# Config file for CFE project production
|
|
52
|
+
mounts:
|
|
53
|
+
- name: "image"
|
|
54
|
+
path: "/mnt/CFElab"
|
|
55
|
+
host: "mantis.shore.mbari.org"
|
|
56
|
+
nginx_root: "/CFElab"
|
|
57
|
+
|
|
58
|
+
- name: "video"
|
|
59
|
+
path: "/mnt/CFElab"
|
|
60
|
+
host: "mantis.shore.mbari.org"
|
|
61
|
+
nginx_root: "/CFElab"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
plugins:
|
|
65
|
+
- name: "extractor"
|
|
66
|
+
module: "mbari_aidata.plugins.extractors.tap_cfe_media"
|
|
67
|
+
function: "extract_media"
|
|
68
|
+
|
|
69
|
+
redis:
|
|
70
|
+
host: "doris.shore.mbari.org"
|
|
71
|
+
port: 6382
|
|
72
|
+
|
|
73
|
+
vss:
|
|
74
|
+
project: "902111-CFE"
|
|
75
|
+
model: "google/vit-base-patch16-224"
|
|
76
|
+
|
|
77
|
+
tator:
|
|
78
|
+
project: "902111-CFE"
|
|
79
|
+
host: "mantis.shore.mbari.org"
|
|
80
|
+
image:
|
|
81
|
+
attributes:
|
|
82
|
+
iso_datetime:
|
|
83
|
+
type: datetime
|
|
84
|
+
depth:
|
|
85
|
+
type: float
|
|
86
|
+
video:
|
|
87
|
+
attributes:
|
|
88
|
+
iso_start_datetime:
|
|
89
|
+
type: datetime
|
|
90
|
+
box:
|
|
91
|
+
attributes:
|
|
92
|
+
Label:
|
|
93
|
+
type: string
|
|
94
|
+
score:
|
|
95
|
+
type: float
|
|
96
|
+
cluster:
|
|
97
|
+
type: string
|
|
98
|
+
saliency:
|
|
99
|
+
type: float
|
|
100
|
+
area:
|
|
101
|
+
type: int
|
|
102
|
+
exemplar:
|
|
103
|
+
type: bool
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
A docker version is also available at `mbari/aidata:latest` or `mbari/aidata:latest:cuda-124`.
|
|
107
|
+
|
|
108
|
+
## Commands
|
|
109
|
+
|
|
110
|
+
* `aidata download --help` - Download data, such as images, boxes, into various formats for machine learning e,g, COCO, CIFAR, or PASCAL VOC format
|
|
111
|
+
* `aidata load --help` - Load data, such as images, and boxes into either a Postgres or REDIS database
|
|
112
|
+
* `aidata db --help` - Commands related to database management
|
|
113
|
+
* `aidata transform --help` - Commands related to transforming downloaded data
|
|
114
|
+
* `aidata -h` - Print help message and exit.
|
|
115
|
+
|
|
116
|
+
Source code is available at [github.com/mbari-org/aidata](https://github.com/mbari-org/aidata/).
|
|
117
|
+
|
|
118
|
+
## Development
|
|
119
|
+
See the [Development Guide](DEVELOPMENT.md) for more information on how to set up the development environment.
|
|
120
|
+
|
|
121
|
+
**updated: 2025-01-28**
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# database files
|
|
2
|
+
*.db
|
|
3
|
+
|
|
4
|
+
# test data
|
|
5
|
+
data/
|
|
6
|
+
*.png
|
|
7
|
+
*.tar.gz
|
|
8
|
+
|
|
9
|
+
# Pycharm
|
|
10
|
+
.idea/
|
|
11
|
+
|
|
12
|
+
# Node artifact files
|
|
13
|
+
node_modules/
|
|
14
|
+
dist/
|
|
15
|
+
|
|
16
|
+
# Compiled Python bytecode
|
|
17
|
+
*.py[cod]
|
|
18
|
+
|
|
19
|
+
# JetBrains IDE
|
|
20
|
+
.idea/
|
|
21
|
+
|
|
22
|
+
# Unit test reports
|
|
23
|
+
TEST*.xml
|
|
24
|
+
|
|
25
|
+
# Generated by MacOS
|
|
26
|
+
.DS_Store
|
|
27
|
+
|
|
28
|
+
# Generated by Windows
|
|
29
|
+
Thumbs.db
|
|
30
|
+
|
|
31
|
+
# Applications
|
|
32
|
+
*.app
|
|
33
|
+
*.exe
|
|
34
|
+
*.war
|
|
35
|
+
|
|
36
|
+
# Large media files
|
|
37
|
+
*.mp4
|
|
38
|
+
*.tiff
|
|
39
|
+
*.avi
|
|
40
|
+
*.flv
|
|
41
|
+
*.mov
|
|
42
|
+
*.wmv
|
|
43
|
+
|
|
44
|
+
#Mkdocs
|
|
45
|
+
docs/site
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# mbari_aidata, Apache-2.0 license
|
|
2
|
+
# Filename: __main__.py
|
|
3
|
+
# Description: Main entry point for the mbari_aidata command line interface
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import pytz
|
|
8
|
+
import click
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
12
|
+
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from mbari_aidata.commands.download import download
|
|
16
|
+
from mbari_aidata.commands.load_images import load_images
|
|
17
|
+
from mbari_aidata.commands.load_video import load_video
|
|
18
|
+
from mbari_aidata.commands.load_exemplars import load_exemplars
|
|
19
|
+
from mbari_aidata.commands.db_utils import reset_redis
|
|
20
|
+
from mbari_aidata.commands.transform import transform, voc_to_yolo
|
|
21
|
+
from mbari_aidata.logger import err, info
|
|
22
|
+
|
|
23
|
+
from mbari_aidata import __version__
|
|
24
|
+
from mbari_aidata.commands.load_queue import load_queue
|
|
25
|
+
from mbari_aidata.commands.load_boxes import load_boxes
|
|
26
|
+
|
|
27
|
+
if "LOG_PATH" not in locals():
|
|
28
|
+
LOG_PATH = Path.home().as_posix()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@click.group(context_settings={"help_option_names": ["-h", "--help"]})
|
|
32
|
+
@click.version_option(__version__, "-V", "--version", message="%(prog)s, version %(version)s")
|
|
33
|
+
def cli():
|
|
34
|
+
"""
|
|
35
|
+
Load data to tator database from a command line.
|
|
36
|
+
"""
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@click.group(name="load")
|
|
41
|
+
def cli_load():
|
|
42
|
+
"""
|
|
43
|
+
Load data, such as images, boxes, and exemplars into either a Postgres or REDIS database
|
|
44
|
+
"""
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
cli.add_command(cli_load)
|
|
49
|
+
cli_load.add_command(load_images)
|
|
50
|
+
cli_load.add_command(load_video)
|
|
51
|
+
cli_load.add_command(load_boxes)
|
|
52
|
+
cli_load.add_command(load_queue)
|
|
53
|
+
cli_load.add_command(load_exemplars)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@click.group(name="download")
|
|
57
|
+
def cli_download():
|
|
58
|
+
"""
|
|
59
|
+
Download data, such as images, boxes, into various formats for machine learning e,g, COCO, CIFAR, or PASCAL VOC format
|
|
60
|
+
"""
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
cli.add_command(cli_download)
|
|
65
|
+
cli_download.add_command(download)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@click.group(name="db")
|
|
69
|
+
def cli_db():
|
|
70
|
+
"""
|
|
71
|
+
Commands related to database management
|
|
72
|
+
"""
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
cli.add_command(cli_db)
|
|
77
|
+
cli_db.add_command(reset_redis)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@click.group(name="transform")
|
|
81
|
+
def cli_transform():
|
|
82
|
+
"""
|
|
83
|
+
Commands related to transforming downloaded data
|
|
84
|
+
"""
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
cli.add_command(cli_transform)
|
|
89
|
+
cli_transform.add_command(transform)
|
|
90
|
+
cli_transform.add_command(voc_to_yolo)
|
|
91
|
+
|
|
92
|
+
if __name__ == "__main__":
|
|
93
|
+
try:
|
|
94
|
+
start = datetime.now(pytz.utc)
|
|
95
|
+
cli()
|
|
96
|
+
end = datetime.now(pytz.utc)
|
|
97
|
+
info(f"Done. Elapsed time: {end - start} seconds")
|
|
98
|
+
except Exception as e:
|
|
99
|
+
err(f"Exiting. Error: {e}")
|
|
100
|
+
exit(-1)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# mbari_aidata, Apache-2.0 license
|
|
2
|
+
# Filename: commands/db_utils.py
|
|
3
|
+
# Description: Miscellaneous functions for working with the database
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
import redis
|
|
8
|
+
|
|
9
|
+
from mbari_aidata import common_args
|
|
10
|
+
from mbari_aidata.logger import create_logger_file, info
|
|
11
|
+
from mbari_aidata.plugins.loaders.tator.common import init_yaml_config
|
|
12
|
+
from mbari_aidata.predictors.process_vits import ViTWrapper
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.command("reset", help="Reset the REDIS server")
|
|
16
|
+
@common_args.yaml_config
|
|
17
|
+
@click.option("--redis-password", type=str, required=True, help="Password for the REDIS server")
|
|
18
|
+
def reset_redis(redis_password: str, config: str) -> bool:
|
|
19
|
+
"""Reset the REDIS database."""
|
|
20
|
+
try:
|
|
21
|
+
# Load the configuration file
|
|
22
|
+
# Each project needs a separate redis server for exemplar embeddings - this
|
|
23
|
+
# is done through separate ports
|
|
24
|
+
config_dict = init_yaml_config(config)
|
|
25
|
+
redis_host = config_dict["redis"]["host"]
|
|
26
|
+
redis_port = config_dict["redis"]["port"]
|
|
27
|
+
vss_model = config_dict["vss"]["model"]
|
|
28
|
+
info(f"Connecting to REDIS server at {redis_host}:{redis_port}")
|
|
29
|
+
r = redis.Redis(host=redis_host, port=redis_port, password=redis_password)
|
|
30
|
+
ViTWrapper(r, model_name=vss_model, reset=True)
|
|
31
|
+
info("Redis server reset")
|
|
32
|
+
return True
|
|
33
|
+
except Exception as e:
|
|
34
|
+
info(f"Error resetting REDIS server: {e}")
|
|
35
|
+
return False
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
# mbari_aidata, Apache-2.0 license
|
|
2
|
+
# Filename: commands/download.py
|
|
3
|
+
# Description: Download a dataset for training detection or classification models
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from mbari_aidata import common_args
|
|
10
|
+
from mbari_aidata.logger import create_logger_file, info, exception
|
|
11
|
+
from mbari_aidata.generators.coco_voc import download as download_full
|
|
12
|
+
|
|
13
|
+
from mbari_aidata.plugins.loaders.tator.common import init_yaml_config, init_api_project, find_project
|
|
14
|
+
|
|
15
|
+
# Default values
|
|
16
|
+
# The base directory is the same directory as this file
|
|
17
|
+
DEFAULT_BASE_DIR = Path.home() / "mbari_aidata" / "datasets"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@click.command(name="dataset", help="Download a dataset for training detection or classification models")
|
|
21
|
+
@common_args.token
|
|
22
|
+
@common_args.yaml_config
|
|
23
|
+
@common_args.version
|
|
24
|
+
@click.option(
|
|
25
|
+
"--base-path",
|
|
26
|
+
default=DEFAULT_BASE_DIR,
|
|
27
|
+
type=Path,
|
|
28
|
+
help=f"Path to the base directory to save all data to. Defaults to {DEFAULT_BASE_DIR}",
|
|
29
|
+
)
|
|
30
|
+
@click.option("--group", help="Group name, e.g. VB250")
|
|
31
|
+
@click.option("--depth", type=int, help="Depth, e.g. 200")
|
|
32
|
+
@click.option("--section", help="Media section name, e.g. 25000_depth_v1")
|
|
33
|
+
@click.option("--min-saliency", type=int, help="Minimum saliency score")
|
|
34
|
+
@click.option("--max-saliency", type=int, help="Maximum saliency score")
|
|
35
|
+
@click.option("--min-score", type=float, help="Minimum model score")
|
|
36
|
+
@click.option("--generator", help="Generator name, e.g. vars-labelbot or vars-annotation")
|
|
37
|
+
@click.option("--labels", default="all", help='Comma separated list of labels to download, or "all" for all labels.')
|
|
38
|
+
@click.option(
|
|
39
|
+
"--concepts",
|
|
40
|
+
default="all",
|
|
41
|
+
help='Comma separated list of concepts to download, or "all" for all concepts. For legacy projects only',
|
|
42
|
+
)
|
|
43
|
+
@click.option("--crop-roi", is_flag=True, help="True to download the rois cropped from the original images/video.")
|
|
44
|
+
@click.option("--resize", type=int, help="Resize images to this size after cropping them.")
|
|
45
|
+
@click.option("--voc", is_flag=True, help="True if export as VOC dataset, False if not.")
|
|
46
|
+
@click.option("--coco", is_flag=True, help="True if export as COCO dataset, False if not.")
|
|
47
|
+
@click.option("--cifar", is_flag=True, help="True if export as CIFAR dataset, False if not.")
|
|
48
|
+
@click.option("--cifar-size", default=32, help="Size of CIFAR images.")
|
|
49
|
+
@click.option("--save-score", is_flag=True, help="True to save score in YOLO output, False if not.")
|
|
50
|
+
@click.option("--verified", is_flag=True, help="True if only download verified annotations.")
|
|
51
|
+
@click.option("--unverified", is_flag=True, help="True if only download not verified annotations.")
|
|
52
|
+
@click.option("--single-class", type=str, help="Set to collapse all classes into a single class, e.g. 'marine organism'")
|
|
53
|
+
@click.option(
|
|
54
|
+
"--skip-image-download", is_flag=True, help="Skip image download, only download annotations. CIFAR requires images."
|
|
55
|
+
)
|
|
56
|
+
def download(
|
|
57
|
+
token: str,
|
|
58
|
+
config: str,
|
|
59
|
+
base_path: Path,
|
|
60
|
+
group: str,
|
|
61
|
+
depth: int,
|
|
62
|
+
section: str,
|
|
63
|
+
min_saliency: int,
|
|
64
|
+
max_saliency: int,
|
|
65
|
+
min_score: float,
|
|
66
|
+
version: str,
|
|
67
|
+
generator: str,
|
|
68
|
+
labels: str,
|
|
69
|
+
concepts: str,
|
|
70
|
+
crop_roi: bool,
|
|
71
|
+
resize: int,
|
|
72
|
+
voc: bool,
|
|
73
|
+
cifar: bool,
|
|
74
|
+
coco: bool,
|
|
75
|
+
cifar_size: int,
|
|
76
|
+
save_score: bool,
|
|
77
|
+
single_class: str,
|
|
78
|
+
skip_image_download: bool,
|
|
79
|
+
verified: bool,
|
|
80
|
+
unverified: bool,
|
|
81
|
+
) -> bool:
|
|
82
|
+
create_logger_file("download")
|
|
83
|
+
try:
|
|
84
|
+
base_path.mkdir(exist_ok=True, parents=True)
|
|
85
|
+
# Load the configuration file
|
|
86
|
+
config_dict = init_yaml_config(config)
|
|
87
|
+
project = config_dict["tator"]["project"]
|
|
88
|
+
host = config_dict["tator"]["host"]
|
|
89
|
+
|
|
90
|
+
# Initialize the Tator API
|
|
91
|
+
api, tator_project = init_api_project(host, token, project)
|
|
92
|
+
|
|
93
|
+
# Find the project
|
|
94
|
+
project = find_project(api, project)
|
|
95
|
+
info(f"Found project id: {project.name} for project {project}")
|
|
96
|
+
|
|
97
|
+
# Download a dataset by its version if it exists
|
|
98
|
+
if version:
|
|
99
|
+
data_path = base_path / version
|
|
100
|
+
else:
|
|
101
|
+
data_path = base_path
|
|
102
|
+
data_path.mkdir(exist_ok=True)
|
|
103
|
+
info(f"Downloading data to {data_path}")
|
|
104
|
+
|
|
105
|
+
# Convert comma separated list of concepts to a list
|
|
106
|
+
if labels == "all":
|
|
107
|
+
labels_list = []
|
|
108
|
+
else:
|
|
109
|
+
labels_list = labels.split(",")
|
|
110
|
+
labels_list = [l.strip() for l in labels_list]
|
|
111
|
+
# Check if this is empty
|
|
112
|
+
if len(labels_list) == 1 and labels_list[0] == "":
|
|
113
|
+
labels_list = []
|
|
114
|
+
# Strip off any zero length strings
|
|
115
|
+
labels_list = [l for l in labels_list if len(l) > 0]
|
|
116
|
+
if concepts == "all":
|
|
117
|
+
concepts_list = []
|
|
118
|
+
else:
|
|
119
|
+
concepts_list = concepts.split(",")
|
|
120
|
+
concepts_list = [l.strip() for l in concepts_list]
|
|
121
|
+
# Check if this is empty
|
|
122
|
+
if len(concepts_list) == 1 and concepts_list[0] == "":
|
|
123
|
+
concepts_list = []
|
|
124
|
+
# Strip off any zero length strings
|
|
125
|
+
concepts_list = [c for c in concepts_list if len(c) > 0]
|
|
126
|
+
|
|
127
|
+
# Convert comma separated list of versions to a list
|
|
128
|
+
if version:
|
|
129
|
+
version_list = version.split(",")
|
|
130
|
+
version_list = [l.strip() for l in version_list]
|
|
131
|
+
else:
|
|
132
|
+
# If no version is specified, download all versions
|
|
133
|
+
versions = api.get_version_list(project.id)
|
|
134
|
+
version_list = [v.name for v in versions]
|
|
135
|
+
|
|
136
|
+
success = download_full(
|
|
137
|
+
api,
|
|
138
|
+
project_id=project.id,
|
|
139
|
+
group=group,
|
|
140
|
+
depth=depth,
|
|
141
|
+
section=section,
|
|
142
|
+
min_saliency=min_saliency,
|
|
143
|
+
max_saliency=max_saliency,
|
|
144
|
+
min_score=min_score,
|
|
145
|
+
version_list=version_list,
|
|
146
|
+
verified=verified,
|
|
147
|
+
unverified=unverified,
|
|
148
|
+
generator=generator,
|
|
149
|
+
output_path=data_path,
|
|
150
|
+
labels_list=labels_list,
|
|
151
|
+
concepts_list=concepts_list,
|
|
152
|
+
single_class=single_class,
|
|
153
|
+
skip_image_download=skip_image_download,
|
|
154
|
+
save_score=save_score,
|
|
155
|
+
cifar_size=cifar_size,
|
|
156
|
+
voc=voc,
|
|
157
|
+
coco=coco,
|
|
158
|
+
cifar=cifar,
|
|
159
|
+
crop_roi=crop_roi,
|
|
160
|
+
resize=resize
|
|
161
|
+
)
|
|
162
|
+
return success
|
|
163
|
+
except Exception as e:
|
|
164
|
+
exception(f"Error: {e}")
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
if __name__ == "__main__":
|
|
169
|
+
import os
|
|
170
|
+
|
|
171
|
+
# To run this script, you need to have the TATOR_TOKEN environment variable set and uncomment all @click decorators above
|
|
172
|
+
# TODO: move this to pytest
|
|
173
|
+
os.environ["ENVIRONMENT"] = "TESTING"
|
|
174
|
+
test_path = Path(__file__).parent.parent.parent / "tests" / "data" / "i2map"
|
|
175
|
+
yaml_path = Path(__file__).parent.parent.parent / "tests" / "config" / "config_i2map.yml"
|
|
176
|
+
base_path = Path(__file__).parent.parent.parent / "tests" / "data" / "download"
|
|
177
|
+
tator_token = os.getenv("TATOR_TOKEN")
|
|
178
|
+
download(
|
|
179
|
+
token=tator_token,
|
|
180
|
+
config=yaml_path.as_posix(),
|
|
181
|
+
version="dino_vits8_20240205_225539,dino_vits8_20240207_022529,dinov2_vits14_hdbscan_",
|
|
182
|
+
base_path=base_path,
|
|
183
|
+
voc=True,
|
|
184
|
+
labels="Acanthamunnopsis milleri,Euphausiacea1,Pyrosoma1,Pyrosoma2",
|
|
185
|
+
concepts="",
|
|
186
|
+
cifar=True,
|
|
187
|
+
coco=True,
|
|
188
|
+
save_score=False,
|
|
189
|
+
skip_image_download=False,
|
|
190
|
+
group="",
|
|
191
|
+
depth="",
|
|
192
|
+
generator="",
|
|
193
|
+
cifar_size=32,
|
|
194
|
+
)
|