birder 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- birder/__init__.py +2 -0
- birder/common/fs_ops.py +81 -1
- birder/common/training_cli.py +6 -1
- birder/common/training_utils.py +4 -0
- birder/data/collators/detection.py +3 -1
- birder/datahub/_lib.py +15 -6
- birder/datahub/evaluation.py +591 -0
- birder/eval/__init__.py +0 -0
- birder/eval/__main__.py +74 -0
- birder/eval/_embeddings.py +50 -0
- birder/eval/adversarial.py +315 -0
- birder/eval/benchmarks/__init__.py +0 -0
- birder/eval/benchmarks/awa2.py +357 -0
- birder/eval/benchmarks/bioscan5m.py +198 -0
- birder/eval/benchmarks/fishnet.py +318 -0
- birder/eval/benchmarks/flowers102.py +210 -0
- birder/eval/benchmarks/fungiclef.py +261 -0
- birder/eval/benchmarks/nabirds.py +202 -0
- birder/eval/benchmarks/newt.py +262 -0
- birder/eval/benchmarks/plankton.py +255 -0
- birder/eval/benchmarks/plantdoc.py +259 -0
- birder/eval/benchmarks/plantnet.py +252 -0
- birder/eval/classification.py +235 -0
- birder/eval/methods/__init__.py +0 -0
- birder/eval/methods/ami.py +78 -0
- birder/eval/methods/knn.py +71 -0
- birder/eval/methods/linear.py +152 -0
- birder/eval/methods/mlp.py +178 -0
- birder/eval/methods/simpleshot.py +100 -0
- birder/eval/methods/svm.py +92 -0
- birder/inference/classification.py +23 -2
- birder/inference/detection.py +35 -15
- birder/net/cswin_transformer.py +2 -1
- birder/net/detection/base.py +41 -18
- birder/net/detection/deformable_detr.py +63 -39
- birder/net/detection/detr.py +23 -20
- birder/net/detection/efficientdet.py +42 -25
- birder/net/detection/faster_rcnn.py +53 -21
- birder/net/detection/fcos.py +42 -23
- birder/net/detection/lw_detr.py +58 -35
- birder/net/detection/plain_detr.py +54 -43
- birder/net/detection/retinanet.py +46 -34
- birder/net/detection/rt_detr_v1.py +41 -38
- birder/net/detection/rt_detr_v2.py +50 -40
- birder/net/detection/ssd.py +47 -31
- birder/net/detection/yolo_v2.py +33 -18
- birder/net/detection/yolo_v3.py +35 -33
- birder/net/detection/yolo_v4.py +35 -20
- birder/net/detection/yolo_v4_tiny.py +1 -2
- birder/net/hiera.py +44 -67
- birder/net/maxvit.py +2 -2
- birder/net/mim/fcmae.py +2 -2
- birder/net/mim/mae_hiera.py +9 -16
- birder/net/nextvit.py +4 -4
- birder/net/rope_deit3.py +1 -1
- birder/net/rope_flexivit.py +1 -1
- birder/net/rope_vit.py +1 -1
- birder/net/squeezenet.py +1 -1
- birder/net/ssl/capi.py +32 -25
- birder/net/ssl/dino_v2.py +12 -15
- birder/net/ssl/franca.py +26 -19
- birder/net/van.py +2 -2
- birder/net/xcit.py +1 -1
- birder/ops/msda.py +46 -16
- birder/scripts/benchmark.py +35 -8
- birder/scripts/predict.py +14 -1
- birder/scripts/predict_detection.py +7 -1
- birder/scripts/train.py +15 -3
- birder/scripts/train_detection.py +16 -6
- birder/scripts/train_franca.py +10 -2
- birder/scripts/train_kd.py +16 -3
- birder/tools/adversarial.py +5 -0
- birder/tools/convert_model.py +101 -43
- birder/tools/quantize_model.py +33 -16
- birder/version.py +1 -1
- {birder-0.4.2.dist-info → birder-0.4.4.dist-info}/METADATA +16 -9
- {birder-0.4.2.dist-info → birder-0.4.4.dist-info}/RECORD +81 -58
- birder/scripts/evaluate.py +0 -176
- {birder-0.4.2.dist-info → birder-0.4.4.dist-info}/WHEEL +0 -0
- {birder-0.4.2.dist-info → birder-0.4.4.dist-info}/entry_points.txt +0 -0
- {birder-0.4.2.dist-info → birder-0.4.4.dist-info}/licenses/LICENSE +0 -0
- {birder-0.4.2.dist-info → birder-0.4.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,591 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from birder.datahub._lib import download_url
|
|
4
|
+
from birder.datahub._lib import extract_archive
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AwA2:
|
|
8
|
+
"""
|
|
9
|
+
Name: AwA2 (Animals with Attributes 2)
|
|
10
|
+
Link: https://cvml.ista.ac.at/AwA2/
|
|
11
|
+
Size: 50 animal classes, 37,322 images, 85 binary attributes per class
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
attribute_names = [
|
|
15
|
+
"black",
|
|
16
|
+
"white",
|
|
17
|
+
"blue",
|
|
18
|
+
"brown",
|
|
19
|
+
"gray",
|
|
20
|
+
"orange",
|
|
21
|
+
"red",
|
|
22
|
+
"yellow",
|
|
23
|
+
"patches",
|
|
24
|
+
"spots",
|
|
25
|
+
"stripes",
|
|
26
|
+
"furry",
|
|
27
|
+
"hairless",
|
|
28
|
+
"toughskin",
|
|
29
|
+
"big",
|
|
30
|
+
"small",
|
|
31
|
+
"bulbous",
|
|
32
|
+
"lean",
|
|
33
|
+
"flippers",
|
|
34
|
+
"hands",
|
|
35
|
+
"hooves",
|
|
36
|
+
"pads",
|
|
37
|
+
"paws",
|
|
38
|
+
"longleg",
|
|
39
|
+
"longneck",
|
|
40
|
+
"tail",
|
|
41
|
+
"chewteeth",
|
|
42
|
+
"meatteeth",
|
|
43
|
+
"buckteeth",
|
|
44
|
+
"strainteeth",
|
|
45
|
+
"horns",
|
|
46
|
+
"claws",
|
|
47
|
+
"tusks",
|
|
48
|
+
"smelly",
|
|
49
|
+
"flys",
|
|
50
|
+
"hops",
|
|
51
|
+
"swims",
|
|
52
|
+
"tunnels",
|
|
53
|
+
"walks",
|
|
54
|
+
"fast",
|
|
55
|
+
"slow",
|
|
56
|
+
"strong",
|
|
57
|
+
"weak",
|
|
58
|
+
"muscle",
|
|
59
|
+
"bipedal",
|
|
60
|
+
"quadrapedal",
|
|
61
|
+
"active",
|
|
62
|
+
"inactive",
|
|
63
|
+
"nocturnal",
|
|
64
|
+
"hibernate",
|
|
65
|
+
"agility",
|
|
66
|
+
"fish",
|
|
67
|
+
"meat",
|
|
68
|
+
"plankton",
|
|
69
|
+
"vegetation",
|
|
70
|
+
"insects",
|
|
71
|
+
"forager",
|
|
72
|
+
"grazer",
|
|
73
|
+
"hunter",
|
|
74
|
+
"scavenger",
|
|
75
|
+
"skimmer",
|
|
76
|
+
"stalker",
|
|
77
|
+
"newworld",
|
|
78
|
+
"oldworld",
|
|
79
|
+
"arctic",
|
|
80
|
+
"coastal",
|
|
81
|
+
"desert",
|
|
82
|
+
"bush",
|
|
83
|
+
"plains",
|
|
84
|
+
"forest",
|
|
85
|
+
"fields",
|
|
86
|
+
"jungle",
|
|
87
|
+
"mountains",
|
|
88
|
+
"ocean",
|
|
89
|
+
"ground",
|
|
90
|
+
"water",
|
|
91
|
+
"tree",
|
|
92
|
+
"cave",
|
|
93
|
+
"fierce",
|
|
94
|
+
"timid",
|
|
95
|
+
"smart",
|
|
96
|
+
"group",
|
|
97
|
+
"solitary",
|
|
98
|
+
"nestspot",
|
|
99
|
+
"domestic",
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
def __init__(self, root: str | Path, download: bool = False, progress_bar: bool = True) -> None:
|
|
103
|
+
if isinstance(root, str):
|
|
104
|
+
root = Path(root)
|
|
105
|
+
|
|
106
|
+
self._root = root
|
|
107
|
+
|
|
108
|
+
if download is True:
|
|
109
|
+
archive_path = root.parent.joinpath("AwA2-data.zip")
|
|
110
|
+
downloaded = download_url(
|
|
111
|
+
"https://cvml.ista.ac.at/AwA2/AwA2-data.zip",
|
|
112
|
+
archive_path,
|
|
113
|
+
sha256="cc5a849879165acaa2b52f1de3f146ffcd1c475f6ef85bab0152c763e573744f",
|
|
114
|
+
progress_bar=progress_bar,
|
|
115
|
+
)
|
|
116
|
+
if downloaded is True or self._root.exists() is False:
|
|
117
|
+
extract_archive(archive_path, root.parent)
|
|
118
|
+
|
|
119
|
+
else:
|
|
120
|
+
if self._root.exists() is False or self._root.is_dir() is False:
|
|
121
|
+
raise RuntimeError("Dataset not found, try download=True to download it")
|
|
122
|
+
|
|
123
|
+
if self.images_dir.exists() is False:
|
|
124
|
+
raise RuntimeError("Dataset seems corrupted: JPEGImages directory not found")
|
|
125
|
+
|
|
126
|
+
if self.predicate_matrix_binary_path.exists() is False:
|
|
127
|
+
raise RuntimeError("Dataset seems corrupted: predicate-matrix-binary.txt not found")
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def images_dir(self) -> Path:
|
|
131
|
+
return self._root.joinpath("JPEGImages")
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def classes_path(self) -> Path:
|
|
135
|
+
return self._root.joinpath("classes.txt")
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def predicates_path(self) -> Path:
|
|
139
|
+
return self._root.joinpath("predicates.txt")
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def predicate_matrix_binary_path(self) -> Path:
|
|
143
|
+
return self._root.joinpath("predicate-matrix-binary.txt")
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def predicate_matrix_continuous_path(self) -> Path:
|
|
147
|
+
return self._root.joinpath("predicate-matrix-continuous.txt")
|
|
148
|
+
|
|
149
|
+
@property
|
|
150
|
+
def trainclasses_path(self) -> Path:
|
|
151
|
+
return self._root.joinpath("trainclasses.txt")
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def testclasses_path(self) -> Path:
|
|
155
|
+
return self._root.joinpath("testclasses.txt")
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class FishNet:
|
|
159
|
+
"""
|
|
160
|
+
Name: FishNet
|
|
161
|
+
Link: https://fishnet-2023.github.io/
|
|
162
|
+
Size: 94,532 images, 17,357 aquatic species, 9 binary traits
|
|
163
|
+
|
|
164
|
+
Traits:
|
|
165
|
+
- FeedingPath (benthic=0, pelagic=1)
|
|
166
|
+
- Tropical, Temperate, Subtropical, Boreal, Polar (habitat, 0/1)
|
|
167
|
+
- freshwater, saltwater, brackish (water type, 0/1)
|
|
168
|
+
|
|
169
|
+
Note: This dataset requires manual download from Google Drive.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
trait_columns = [
|
|
173
|
+
"FeedingPath",
|
|
174
|
+
"Tropical",
|
|
175
|
+
"Temperate",
|
|
176
|
+
"Subtropical",
|
|
177
|
+
"Boreal",
|
|
178
|
+
"Polar",
|
|
179
|
+
"freshwater",
|
|
180
|
+
"saltwater",
|
|
181
|
+
"brackish",
|
|
182
|
+
]
|
|
183
|
+
|
|
184
|
+
def __init__(self, root: str | Path) -> None:
|
|
185
|
+
if isinstance(root, str):
|
|
186
|
+
root = Path(root)
|
|
187
|
+
|
|
188
|
+
self._root = root
|
|
189
|
+
|
|
190
|
+
# Verify dataset exists
|
|
191
|
+
if self._root.exists() is False or self._root.is_dir() is False:
|
|
192
|
+
raise RuntimeError(f"Dataset not found at {self._root}. Download it from https://fishnet-2023.github.io/")
|
|
193
|
+
|
|
194
|
+
if self.images_dir.exists() is False:
|
|
195
|
+
raise RuntimeError("Dataset seems corrupted: images directory not found")
|
|
196
|
+
|
|
197
|
+
if self.train_csv.exists() is False:
|
|
198
|
+
raise RuntimeError("Dataset seems corrupted: train.csv not found")
|
|
199
|
+
|
|
200
|
+
if self.test_csv.exists() is False:
|
|
201
|
+
raise RuntimeError("Dataset seems corrupted: test.csv not found")
|
|
202
|
+
|
|
203
|
+
@property
|
|
204
|
+
def images_dir(self) -> Path:
|
|
205
|
+
return self._root.joinpath("images")
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def train_csv(self) -> Path:
|
|
209
|
+
return self._root.joinpath("train.csv")
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def test_csv(self) -> Path:
|
|
213
|
+
return self._root.joinpath("test.csv")
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class FungiCLEF2023:
|
|
217
|
+
"""
|
|
218
|
+
Name: FungiCLEF2023
|
|
219
|
+
Link: https://www.imageclef.org/FungiCLEF2023
|
|
220
|
+
Size: 1,604 species, ~417K images (train + val + test)
|
|
221
|
+
"""
|
|
222
|
+
|
|
223
|
+
def __init__(self, root: str | Path, download: bool = False, progress_bar: bool = True) -> None:
|
|
224
|
+
if isinstance(root, str):
|
|
225
|
+
root = Path(root)
|
|
226
|
+
|
|
227
|
+
self._root = root
|
|
228
|
+
|
|
229
|
+
if download is True:
|
|
230
|
+
self._root.mkdir(parents=True, exist_ok=True)
|
|
231
|
+
|
|
232
|
+
train_images_archive = self._root.joinpath("DF20-300px.tar.gz")
|
|
233
|
+
downloaded_train_images = download_url(
|
|
234
|
+
"http://ptak.felk.cvut.cz/plants/DanishFungiDataset/DF20-300px.tar.gz",
|
|
235
|
+
train_images_archive,
|
|
236
|
+
sha256="b7b572179c3e99dfdfaed4b75872cb6cc59ad8d7dccab331906687ca6bce3b5a",
|
|
237
|
+
progress_bar=progress_bar,
|
|
238
|
+
)
|
|
239
|
+
if downloaded_train_images is True or self.train_images_dir.exists() is False:
|
|
240
|
+
extract_archive(train_images_archive, self._root)
|
|
241
|
+
|
|
242
|
+
val_test_images_archive = self._root.joinpath("DF21_300px.tar.gz")
|
|
243
|
+
downloaded_val_test_images = download_url(
|
|
244
|
+
"http://ptak.felk.cvut.cz/plants/DanishFungiDataset/DF21_300px.tar.gz",
|
|
245
|
+
val_test_images_archive,
|
|
246
|
+
sha256="c0194d3314370a22fb01fb0800330c2e18c90d83f97def55dea84cb5abc2fc3e",
|
|
247
|
+
progress_bar=progress_bar,
|
|
248
|
+
)
|
|
249
|
+
if downloaded_val_test_images is True or self.val_test_images_dir.exists() is False:
|
|
250
|
+
extract_archive(val_test_images_archive, self._root)
|
|
251
|
+
|
|
252
|
+
download_url(
|
|
253
|
+
"http://ptak.felk.cvut.cz/plants/DanishFungiDataset/FungiCLEF2023_train_metadata_PRODUCTION.csv",
|
|
254
|
+
self.train_metadata_path,
|
|
255
|
+
sha256="dc17fc1ab48f0876947402965ee9c25e437c1622f134edab5c7da6c9b853d907",
|
|
256
|
+
progress_bar=progress_bar,
|
|
257
|
+
)
|
|
258
|
+
download_url(
|
|
259
|
+
"http://ptak.felk.cvut.cz/plants/DanishFungiDataset/FungiCLEF2023_val_metadata_PRODUCTION.csv",
|
|
260
|
+
self.val_metadata_path,
|
|
261
|
+
sha256="9573102de721bc93f36e5e03e878cd50cc7f6031a7a3bc82ed0642ec4c691c2a",
|
|
262
|
+
progress_bar=progress_bar,
|
|
263
|
+
)
|
|
264
|
+
download_url(
|
|
265
|
+
"http://ptak.felk.cvut.cz/plants/DanishFungiDataset/FungiCLEF2023_public_test_metadata_PRODUCTION.csv",
|
|
266
|
+
self.test_metadata_path,
|
|
267
|
+
sha256="56ae171d5abf2a99a3ccf8cd96cb685d0f96a7bda055a37afd2fda3e943d991c",
|
|
268
|
+
progress_bar=progress_bar,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
else:
|
|
272
|
+
if self._root.exists() is False or self._root.is_dir() is False:
|
|
273
|
+
raise RuntimeError("Dataset not found, try download=True to download it")
|
|
274
|
+
|
|
275
|
+
if self.train_images_dir.exists() is False:
|
|
276
|
+
raise RuntimeError("Dataset seems corrupted: DF20_300 directory not found")
|
|
277
|
+
|
|
278
|
+
if self.val_test_images_dir.exists() is False:
|
|
279
|
+
raise RuntimeError("Dataset seems corrupted: DF21_300 directory not found")
|
|
280
|
+
|
|
281
|
+
if self.train_metadata_path.exists() is False:
|
|
282
|
+
raise RuntimeError("Dataset seems corrupted: train metadata CSV not found")
|
|
283
|
+
|
|
284
|
+
if self.val_metadata_path.exists() is False:
|
|
285
|
+
raise RuntimeError("Dataset seems corrupted: validation metadata CSV not found")
|
|
286
|
+
|
|
287
|
+
if self.test_metadata_path.exists() is False:
|
|
288
|
+
raise RuntimeError("Dataset seems corrupted: test metadata CSV not found")
|
|
289
|
+
|
|
290
|
+
@property
|
|
291
|
+
def train_images_dir(self) -> Path:
|
|
292
|
+
return self._root.joinpath("DF20_300")
|
|
293
|
+
|
|
294
|
+
@property
|
|
295
|
+
def val_test_images_dir(self) -> Path:
|
|
296
|
+
return self._root.joinpath("DF21_300")
|
|
297
|
+
|
|
298
|
+
@property
|
|
299
|
+
def val_images_dir(self) -> Path:
|
|
300
|
+
return self.val_test_images_dir
|
|
301
|
+
|
|
302
|
+
@property
|
|
303
|
+
def test_images_dir(self) -> Path:
|
|
304
|
+
return self.val_test_images_dir
|
|
305
|
+
|
|
306
|
+
@property
|
|
307
|
+
def train_metadata_path(self) -> Path:
|
|
308
|
+
return self._root.joinpath("FungiCLEF2023_train_metadata_PRODUCTION.csv")
|
|
309
|
+
|
|
310
|
+
@property
|
|
311
|
+
def val_metadata_path(self) -> Path:
|
|
312
|
+
return self._root.joinpath("FungiCLEF2023_val_metadata_PRODUCTION.csv")
|
|
313
|
+
|
|
314
|
+
@property
|
|
315
|
+
def test_metadata_path(self) -> Path:
|
|
316
|
+
return self._root.joinpath("FungiCLEF2023_public_test_metadata_PRODUCTION.csv")
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
class NABirds:
|
|
320
|
+
"""
|
|
321
|
+
Name: NABirds
|
|
322
|
+
Link: https://dl.allaboutbirds.org/nabirds
|
|
323
|
+
Size: 555 visual categories, ~48K images
|
|
324
|
+
|
|
325
|
+
Note: This dataset requires manual download. Visit the link above.
|
|
326
|
+
"""
|
|
327
|
+
|
|
328
|
+
def __init__(self, root: str | Path) -> None:
|
|
329
|
+
if isinstance(root, str):
|
|
330
|
+
root = Path(root)
|
|
331
|
+
|
|
332
|
+
self._root = root
|
|
333
|
+
|
|
334
|
+
# Verify dataset exists
|
|
335
|
+
if self._root.exists() is False or self._root.is_dir() is False:
|
|
336
|
+
raise RuntimeError(
|
|
337
|
+
f"Dataset not found at {self._root}. Download it from https://dl.allaboutbirds.org/nabirds"
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
if self.images_dir.exists() is False:
|
|
341
|
+
raise RuntimeError("Dataset seems corrupted: images directory not found")
|
|
342
|
+
|
|
343
|
+
@property
|
|
344
|
+
def images_dir(self) -> Path:
|
|
345
|
+
return self._root.joinpath("images")
|
|
346
|
+
|
|
347
|
+
@property
|
|
348
|
+
def images_path(self) -> Path:
|
|
349
|
+
return self._root.joinpath("images.txt")
|
|
350
|
+
|
|
351
|
+
@property
|
|
352
|
+
def classes_path(self) -> Path:
|
|
353
|
+
return self._root.joinpath("classes.txt")
|
|
354
|
+
|
|
355
|
+
@property
|
|
356
|
+
def labels_path(self) -> Path:
|
|
357
|
+
return self._root.joinpath("image_class_labels.txt")
|
|
358
|
+
|
|
359
|
+
@property
|
|
360
|
+
def train_test_split_path(self) -> Path:
|
|
361
|
+
return self._root.joinpath("train_test_split.txt")
|
|
362
|
+
|
|
363
|
+
@property
|
|
364
|
+
def hierarchy_path(self) -> Path:
|
|
365
|
+
return self._root.joinpath("hierarchy.txt")
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
class NeWT:
|
|
369
|
+
"""
|
|
370
|
+
Name: NeWT (Natural World Tasks)
|
|
371
|
+
Link: https://github.com/visipedia/newt
|
|
372
|
+
Size: 164 binary classification tasks, ~36K images
|
|
373
|
+
"""
|
|
374
|
+
|
|
375
|
+
def __init__(self, root: str | Path, download: bool = False, progress_bar: bool = True) -> None:
|
|
376
|
+
if isinstance(root, str):
|
|
377
|
+
root = Path(root)
|
|
378
|
+
|
|
379
|
+
self._root = root
|
|
380
|
+
|
|
381
|
+
if download is True:
|
|
382
|
+
self._root.mkdir(parents=True, exist_ok=True)
|
|
383
|
+
|
|
384
|
+
# Download images
|
|
385
|
+
images_src = root.parent.joinpath("newt2021_images.tar.gz")
|
|
386
|
+
downloaded_images = download_url(
|
|
387
|
+
"https://ml-inat-competition-datasets.s3.amazonaws.com/newt/newt2021_images.tar.gz",
|
|
388
|
+
images_src,
|
|
389
|
+
sha256="8d40958a867c1296f92b5e125f1f1d8ddaa59f249315740fc366fc606995c055",
|
|
390
|
+
progress_bar=progress_bar,
|
|
391
|
+
)
|
|
392
|
+
if downloaded_images is True or self.images_dir.exists() is False:
|
|
393
|
+
extract_archive(images_src, self._root)
|
|
394
|
+
|
|
395
|
+
# Download labels
|
|
396
|
+
labels_src = root.parent.joinpath("newt2021_labels.csv.tar.gz")
|
|
397
|
+
downloaded_labels = download_url(
|
|
398
|
+
"https://ml-inat-competition-datasets.s3.amazonaws.com/newt/newt2021_labels.csv.tar.gz",
|
|
399
|
+
labels_src,
|
|
400
|
+
sha256="e09807842485ef49ccf51d74ac9f6072c599fc16cf5ee755fdf4064f2e4c3828",
|
|
401
|
+
progress_bar=progress_bar,
|
|
402
|
+
)
|
|
403
|
+
if downloaded_labels is True or self.labels_path.exists() is False:
|
|
404
|
+
extract_archive(labels_src, self._root)
|
|
405
|
+
|
|
406
|
+
else:
|
|
407
|
+
# Some sanity checks
|
|
408
|
+
if self._root.exists() is False or self._root.is_dir() is False:
|
|
409
|
+
raise RuntimeError("Dataset not found, try download=True to download it")
|
|
410
|
+
|
|
411
|
+
if self.images_dir.exists() is False:
|
|
412
|
+
raise RuntimeError("Dataset seems corrupted: images directory not found")
|
|
413
|
+
|
|
414
|
+
if self.labels_path.exists() is False:
|
|
415
|
+
raise RuntimeError("Dataset seems corrupted: labels CSV not found")
|
|
416
|
+
|
|
417
|
+
@property
|
|
418
|
+
def images_dir(self) -> Path:
|
|
419
|
+
return self._root.joinpath("newt2021_images")
|
|
420
|
+
|
|
421
|
+
@property
|
|
422
|
+
def labels_path(self) -> Path:
|
|
423
|
+
return self._root.joinpath("newt2021_labels.csv")
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
class Plankton:
|
|
427
|
+
"""
|
|
428
|
+
Name: SYKE-plankton_IFCB_2022
|
|
429
|
+
Link: https://b2share.eudat.eu/records/xvnrp-7ga56
|
|
430
|
+
Size: 50 phytoplankton classes, ~214K images (train + val)
|
|
431
|
+
"""
|
|
432
|
+
|
|
433
|
+
def __init__(self, root: str | Path, download: bool = False, progress_bar: bool = True) -> None:
|
|
434
|
+
if isinstance(root, str):
|
|
435
|
+
root = Path(root)
|
|
436
|
+
|
|
437
|
+
self._root = root
|
|
438
|
+
|
|
439
|
+
if download is True:
|
|
440
|
+
self._root.mkdir(parents=True, exist_ok=True)
|
|
441
|
+
|
|
442
|
+
train_archive = root.parent.joinpath("phytoplankton_labeled.zip")
|
|
443
|
+
downloaded_train = download_url(
|
|
444
|
+
"https://b2share.eudat.eu/records/xvnrp-7ga56/files/phytoplankton_labeled.zip",
|
|
445
|
+
train_archive,
|
|
446
|
+
sha256="0c47acd8dfad46829fe42758a6c24adcdb5e6f2456be4ced975cbb9de9644704",
|
|
447
|
+
progress_bar=progress_bar,
|
|
448
|
+
)
|
|
449
|
+
if downloaded_train is True or self.train_dir.exists() is False:
|
|
450
|
+
extract_archive(train_archive, self._root)
|
|
451
|
+
|
|
452
|
+
val_archive = root.parent.joinpath("phytoplankton_Uto_2021_labeled.zip")
|
|
453
|
+
downloaded_val = download_url(
|
|
454
|
+
"https://b2share.eudat.eu/records/w7y96-6jd66/files/phytoplankton_Ut%C3%B6_2021_labeled.zip",
|
|
455
|
+
val_archive,
|
|
456
|
+
sha256="b017809515c3d58171ecbfd196d6725239e9380c2a22ae880ac56e878bbfcfa4",
|
|
457
|
+
progress_bar=progress_bar,
|
|
458
|
+
)
|
|
459
|
+
if downloaded_val is True or self.val_dir.exists() is False:
|
|
460
|
+
extract_archive(val_archive, self._root)
|
|
461
|
+
# Rename extracted directory to avoid non-ASCII character
|
|
462
|
+
extracted_dir = self._root.joinpath("phytoplankton_Utö_2021_labeled")
|
|
463
|
+
if extracted_dir.exists():
|
|
464
|
+
extracted_dir.rename(self.val_dir)
|
|
465
|
+
|
|
466
|
+
else:
|
|
467
|
+
if self._root.exists() is False or self._root.is_dir() is False:
|
|
468
|
+
raise RuntimeError("Dataset not found, try download=True to download it")
|
|
469
|
+
|
|
470
|
+
if self.train_dir.exists() is False:
|
|
471
|
+
raise RuntimeError("Dataset seems corrupted: train directory not found")
|
|
472
|
+
|
|
473
|
+
if self.val_dir.exists() is False:
|
|
474
|
+
raise RuntimeError("Dataset seems corrupted: val directory not found")
|
|
475
|
+
|
|
476
|
+
@property
|
|
477
|
+
def train_dir(self) -> Path:
|
|
478
|
+
return self._root.joinpath("labeled_20201020")
|
|
479
|
+
|
|
480
|
+
@property
|
|
481
|
+
def val_dir(self) -> Path:
|
|
482
|
+
return self._root.joinpath("phytoplankton_Uto_2021_labeled")
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
class PlantDoc:
|
|
486
|
+
"""
|
|
487
|
+
Name: PlantDoc
|
|
488
|
+
Link: https://github.com/pratikkayal/PlantDoc-Dataset
|
|
489
|
+
Paper: https://arxiv.org/abs/1911.10317
|
|
490
|
+
Size: 27 classes (13 plant species, 17 disease categories), 2,598 images
|
|
491
|
+
"""
|
|
492
|
+
|
|
493
|
+
_archive_dir_name = "PlantDoc-Dataset-5467f6012d78d1c446145d5f582da6096f852ae8"
|
|
494
|
+
|
|
495
|
+
def __init__(self, root: str | Path, download: bool = False, progress_bar: bool = True) -> None:
|
|
496
|
+
if isinstance(root, str):
|
|
497
|
+
root = Path(root)
|
|
498
|
+
|
|
499
|
+
self._root = root
|
|
500
|
+
|
|
501
|
+
if download is True:
|
|
502
|
+
archive_path = root.parent.joinpath("plantdoc.zip")
|
|
503
|
+
downloaded = download_url(
|
|
504
|
+
"https://github.com/pratikkayal/PlantDoc-Dataset/archive/"
|
|
505
|
+
"5467f6012d78d1c446145d5f582da6096f852ae8.zip",
|
|
506
|
+
archive_path,
|
|
507
|
+
sha256="94e2b99a500a63efbd48923ed48588fbb01f9b1db66a2d3b5c24eed6466da20f",
|
|
508
|
+
progress_bar=progress_bar,
|
|
509
|
+
)
|
|
510
|
+
if downloaded is True or self._root.exists() is False:
|
|
511
|
+
extract_archive(archive_path, root.parent)
|
|
512
|
+
# Rename extracted directory from commit hash name to friendly name
|
|
513
|
+
extracted_dir = root.parent.joinpath(self._archive_dir_name)
|
|
514
|
+
extracted_dir.rename(self._root)
|
|
515
|
+
|
|
516
|
+
else:
|
|
517
|
+
if self._root.exists() is False or self._root.is_dir() is False:
|
|
518
|
+
raise RuntimeError("Dataset not found, try download=True to download it")
|
|
519
|
+
|
|
520
|
+
if self.train_dir.exists() is False:
|
|
521
|
+
raise RuntimeError("Dataset seems corrupted: train directory not found")
|
|
522
|
+
|
|
523
|
+
if self.test_dir.exists() is False:
|
|
524
|
+
raise RuntimeError("Dataset seems corrupted: test directory not found")
|
|
525
|
+
|
|
526
|
+
@property
|
|
527
|
+
def train_dir(self) -> Path:
|
|
528
|
+
return self._root.joinpath("train")
|
|
529
|
+
|
|
530
|
+
@property
|
|
531
|
+
def test_dir(self) -> Path:
|
|
532
|
+
return self._root.joinpath("test")
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
class PlantNet:
|
|
536
|
+
"""
|
|
537
|
+
Name: PlantNet-300K
|
|
538
|
+
Link: https://plantnet.org/en/2021/03/30/a-plntnet-dataset-for-machine-learning-researchers/
|
|
539
|
+
Size: 1081 species, ~300K images
|
|
540
|
+
"""
|
|
541
|
+
|
|
542
|
+
def __init__(self, root: str | Path, download: bool = False, progress_bar: bool = True) -> None:
|
|
543
|
+
if isinstance(root, str):
|
|
544
|
+
root = Path(root)
|
|
545
|
+
|
|
546
|
+
self._root = root
|
|
547
|
+
|
|
548
|
+
if download is True:
|
|
549
|
+
archive_path = root.parent.joinpath("plantnet_300K.zip")
|
|
550
|
+
downloaded = download_url(
|
|
551
|
+
"https://zenodo.org/records/5645731/files/plantnet_300K.zip?download=1",
|
|
552
|
+
archive_path,
|
|
553
|
+
sha256="3a079076c8ad4476beac54d89ea344958256a999428937eba47ec352dadce00d",
|
|
554
|
+
progress_bar=progress_bar,
|
|
555
|
+
)
|
|
556
|
+
if downloaded is True or self._root.exists() is False:
|
|
557
|
+
extract_archive(archive_path, root.parent)
|
|
558
|
+
|
|
559
|
+
else:
|
|
560
|
+
if self._root.exists() is False or self._root.is_dir() is False:
|
|
561
|
+
raise RuntimeError("Dataset not found, try download=True to download it")
|
|
562
|
+
|
|
563
|
+
if self.images_dir.exists() is False:
|
|
564
|
+
raise RuntimeError("Dataset seems corrupted: images directory not found")
|
|
565
|
+
|
|
566
|
+
if self.species_id_to_name_path.exists() is False:
|
|
567
|
+
raise RuntimeError("Dataset seems corrupted: species_id_2_name.json not found")
|
|
568
|
+
|
|
569
|
+
@property
|
|
570
|
+
def images_dir(self) -> Path:
|
|
571
|
+
return self._root.joinpath("images")
|
|
572
|
+
|
|
573
|
+
@property
|
|
574
|
+
def train_dir(self) -> Path:
|
|
575
|
+
return self.images_dir.joinpath("train")
|
|
576
|
+
|
|
577
|
+
@property
|
|
578
|
+
def val_dir(self) -> Path:
|
|
579
|
+
return self.images_dir.joinpath("val")
|
|
580
|
+
|
|
581
|
+
@property
|
|
582
|
+
def test_dir(self) -> Path:
|
|
583
|
+
return self.images_dir.joinpath("test")
|
|
584
|
+
|
|
585
|
+
@property
|
|
586
|
+
def species_id_to_name_path(self) -> Path:
|
|
587
|
+
return self._root.joinpath("plantnet300K_species_id_2_name.json")
|
|
588
|
+
|
|
589
|
+
@property
|
|
590
|
+
def metadata_path(self) -> Path:
|
|
591
|
+
return self._root.joinpath("plantnet300K_metadata.json")
|
birder/eval/__init__.py
ADDED
|
File without changes
|
birder/eval/__main__.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
|
|
3
|
+
from birder.common import cli
|
|
4
|
+
from birder.eval import adversarial
|
|
5
|
+
from birder.eval import classification
|
|
6
|
+
from birder.eval.benchmarks import awa2
|
|
7
|
+
from birder.eval.benchmarks import bioscan5m
|
|
8
|
+
from birder.eval.benchmarks import fishnet
|
|
9
|
+
from birder.eval.benchmarks import flowers102
|
|
10
|
+
from birder.eval.benchmarks import fungiclef
|
|
11
|
+
from birder.eval.benchmarks import nabirds
|
|
12
|
+
from birder.eval.benchmarks import newt
|
|
13
|
+
from birder.eval.benchmarks import plankton
|
|
14
|
+
from birder.eval.benchmarks import plantdoc
|
|
15
|
+
from birder.eval.benchmarks import plantnet
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def main() -> None:
|
|
19
|
+
parser = argparse.ArgumentParser(
|
|
20
|
+
prog="python -m birder.eval",
|
|
21
|
+
allow_abbrev=False,
|
|
22
|
+
description="Evaluation module",
|
|
23
|
+
epilog=(
|
|
24
|
+
"Usage examples:\n"
|
|
25
|
+
"python -m birder.eval adversarial -n resnet_v2_50 -t il-all -e 100 --method pgd "
|
|
26
|
+
"--gpu data/validation_il-all_packed\n"
|
|
27
|
+
"python -m birder.eval classification --filter '*il-all*' --gpu data/validation_il-all\n"
|
|
28
|
+
"---\n"
|
|
29
|
+
"python -m birder.eval awa2 --embeddings "
|
|
30
|
+
"results/awa2/*.parquet --dataset-path ~/Datasets/Animals_with_Attributes2 --gpu\n"
|
|
31
|
+
"python -m birder.eval bioscan5m --embeddings "
|
|
32
|
+
"results/bioscan5m/*.parquet --data-path ~/Datasets/BIOSCAN-5M/species/testing_unseen\n"
|
|
33
|
+
"python -m birder.eval fishnet --embeddings "
|
|
34
|
+
"results/vit_b16_224px_embeddings.parquet --dataset-path ~/Datasets/fishnet --gpu\n"
|
|
35
|
+
"python -m birder.eval flowers102 --embeddings "
|
|
36
|
+
"results/flowers102_rope_i_vit_s16_pn_aps_c1_pe-core_0_384px_crop1.0_8189_sc_embeddings.parquet "
|
|
37
|
+
"--dataset-path ~/Datasets/Flowers102\n"
|
|
38
|
+
"python -m birder.eval fungiclef --embeddings "
|
|
39
|
+
"results/fungiclef/*.parquet --dataset-path ~/Datasets/FungiCLEF2023\n"
|
|
40
|
+
"python -m birder.eval nabirds --embeddings "
|
|
41
|
+
"results/vit_b16_224px_crop1.0_48562_embeddings.parquet --dataset-path ~/Datasets/nabirds\n"
|
|
42
|
+
"python -m birder.eval newt --embeddings "
|
|
43
|
+
"results/vit_reg4_so150m_p14_ls_dino-v2-bio_0_e45_224px_crop1.0_36032_output.parquet "
|
|
44
|
+
"--dataset-path ~/Datasets/NeWT\n"
|
|
45
|
+
"python -m birder.eval plankton --embeddings "
|
|
46
|
+
"results/plankton/*.parquet --dataset-path ~/Datasets/plankton --gpu\n"
|
|
47
|
+
"python -m birder.eval plantdoc --embeddings "
|
|
48
|
+
"results/plantdoc_embeddings.parquet --dataset-path ~/Datasets/PlantDoc\n"
|
|
49
|
+
"python -m birder.eval plantnet --embeddings "
|
|
50
|
+
"results/plantnet_embeddings.parquet --dataset-path ~/Datasets/plantnet_300K\n"
|
|
51
|
+
),
|
|
52
|
+
formatter_class=cli.ArgumentHelpFormatter,
|
|
53
|
+
)
|
|
54
|
+
subparsers = parser.add_subparsers(dest="cmd", required=True)
|
|
55
|
+
adversarial.set_parser(subparsers)
|
|
56
|
+
classification.set_parser(subparsers)
|
|
57
|
+
|
|
58
|
+
awa2.set_parser(subparsers)
|
|
59
|
+
bioscan5m.set_parser(subparsers)
|
|
60
|
+
fishnet.set_parser(subparsers)
|
|
61
|
+
flowers102.set_parser(subparsers)
|
|
62
|
+
fungiclef.set_parser(subparsers)
|
|
63
|
+
nabirds.set_parser(subparsers)
|
|
64
|
+
newt.set_parser(subparsers)
|
|
65
|
+
plankton.set_parser(subparsers)
|
|
66
|
+
plantdoc.set_parser(subparsers)
|
|
67
|
+
plantnet.set_parser(subparsers)
|
|
68
|
+
|
|
69
|
+
args = parser.parse_args()
|
|
70
|
+
args.func(args)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
if __name__ == "__main__":
|
|
74
|
+
main()
|