birder 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. birder/__init__.py +2 -0
  2. birder/common/fs_ops.py +81 -1
  3. birder/common/training_cli.py +6 -1
  4. birder/common/training_utils.py +4 -0
  5. birder/data/collators/detection.py +3 -1
  6. birder/datahub/_lib.py +15 -6
  7. birder/datahub/evaluation.py +591 -0
  8. birder/eval/__init__.py +0 -0
  9. birder/eval/__main__.py +74 -0
  10. birder/eval/_embeddings.py +50 -0
  11. birder/eval/adversarial.py +315 -0
  12. birder/eval/benchmarks/__init__.py +0 -0
  13. birder/eval/benchmarks/awa2.py +357 -0
  14. birder/eval/benchmarks/bioscan5m.py +198 -0
  15. birder/eval/benchmarks/fishnet.py +318 -0
  16. birder/eval/benchmarks/flowers102.py +210 -0
  17. birder/eval/benchmarks/fungiclef.py +261 -0
  18. birder/eval/benchmarks/nabirds.py +202 -0
  19. birder/eval/benchmarks/newt.py +262 -0
  20. birder/eval/benchmarks/plankton.py +255 -0
  21. birder/eval/benchmarks/plantdoc.py +259 -0
  22. birder/eval/benchmarks/plantnet.py +252 -0
  23. birder/eval/classification.py +235 -0
  24. birder/eval/methods/__init__.py +0 -0
  25. birder/eval/methods/ami.py +78 -0
  26. birder/eval/methods/knn.py +71 -0
  27. birder/eval/methods/linear.py +152 -0
  28. birder/eval/methods/mlp.py +178 -0
  29. birder/eval/methods/simpleshot.py +100 -0
  30. birder/eval/methods/svm.py +92 -0
  31. birder/inference/classification.py +23 -2
  32. birder/inference/detection.py +35 -15
  33. birder/net/cswin_transformer.py +2 -1
  34. birder/net/detection/base.py +41 -18
  35. birder/net/detection/deformable_detr.py +63 -39
  36. birder/net/detection/detr.py +23 -20
  37. birder/net/detection/efficientdet.py +42 -25
  38. birder/net/detection/faster_rcnn.py +53 -21
  39. birder/net/detection/fcos.py +42 -23
  40. birder/net/detection/lw_detr.py +58 -35
  41. birder/net/detection/plain_detr.py +54 -43
  42. birder/net/detection/retinanet.py +46 -34
  43. birder/net/detection/rt_detr_v1.py +41 -38
  44. birder/net/detection/rt_detr_v2.py +50 -40
  45. birder/net/detection/ssd.py +47 -31
  46. birder/net/detection/yolo_v2.py +33 -18
  47. birder/net/detection/yolo_v3.py +35 -33
  48. birder/net/detection/yolo_v4.py +35 -20
  49. birder/net/detection/yolo_v4_tiny.py +1 -2
  50. birder/net/hiera.py +44 -67
  51. birder/net/maxvit.py +2 -2
  52. birder/net/mim/fcmae.py +2 -2
  53. birder/net/mim/mae_hiera.py +9 -16
  54. birder/net/nextvit.py +4 -4
  55. birder/net/rope_deit3.py +1 -1
  56. birder/net/rope_flexivit.py +1 -1
  57. birder/net/rope_vit.py +1 -1
  58. birder/net/squeezenet.py +1 -1
  59. birder/net/ssl/capi.py +32 -25
  60. birder/net/ssl/dino_v2.py +12 -15
  61. birder/net/ssl/franca.py +26 -19
  62. birder/net/van.py +2 -2
  63. birder/net/xcit.py +1 -1
  64. birder/ops/msda.py +46 -16
  65. birder/scripts/benchmark.py +35 -8
  66. birder/scripts/predict.py +14 -1
  67. birder/scripts/predict_detection.py +7 -1
  68. birder/scripts/train.py +15 -3
  69. birder/scripts/train_detection.py +16 -6
  70. birder/scripts/train_franca.py +10 -2
  71. birder/scripts/train_kd.py +16 -3
  72. birder/tools/adversarial.py +5 -0
  73. birder/tools/convert_model.py +101 -43
  74. birder/tools/quantize_model.py +33 -16
  75. birder/version.py +1 -1
  76. {birder-0.4.2.dist-info → birder-0.4.4.dist-info}/METADATA +16 -9
  77. {birder-0.4.2.dist-info → birder-0.4.4.dist-info}/RECORD +81 -58
  78. birder/scripts/evaluate.py +0 -176
  79. {birder-0.4.2.dist-info → birder-0.4.4.dist-info}/WHEEL +0 -0
  80. {birder-0.4.2.dist-info → birder-0.4.4.dist-info}/entry_points.txt +0 -0
  81. {birder-0.4.2.dist-info → birder-0.4.4.dist-info}/licenses/LICENSE +0 -0
  82. {birder-0.4.2.dist-info → birder-0.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,591 @@
1
+ from pathlib import Path
2
+
3
+ from birder.datahub._lib import download_url
4
+ from birder.datahub._lib import extract_archive
5
+
6
+
7
+ class AwA2:
8
+ """
9
+ Name: AwA2 (Animals with Attributes 2)
10
+ Link: https://cvml.ista.ac.at/AwA2/
11
+ Size: 50 animal classes, 37,322 images, 85 binary attributes per class
12
+ """
13
+
14
+ attribute_names = [
15
+ "black",
16
+ "white",
17
+ "blue",
18
+ "brown",
19
+ "gray",
20
+ "orange",
21
+ "red",
22
+ "yellow",
23
+ "patches",
24
+ "spots",
25
+ "stripes",
26
+ "furry",
27
+ "hairless",
28
+ "toughskin",
29
+ "big",
30
+ "small",
31
+ "bulbous",
32
+ "lean",
33
+ "flippers",
34
+ "hands",
35
+ "hooves",
36
+ "pads",
37
+ "paws",
38
+ "longleg",
39
+ "longneck",
40
+ "tail",
41
+ "chewteeth",
42
+ "meatteeth",
43
+ "buckteeth",
44
+ "strainteeth",
45
+ "horns",
46
+ "claws",
47
+ "tusks",
48
+ "smelly",
49
+ "flys",
50
+ "hops",
51
+ "swims",
52
+ "tunnels",
53
+ "walks",
54
+ "fast",
55
+ "slow",
56
+ "strong",
57
+ "weak",
58
+ "muscle",
59
+ "bipedal",
60
+ "quadrapedal",
61
+ "active",
62
+ "inactive",
63
+ "nocturnal",
64
+ "hibernate",
65
+ "agility",
66
+ "fish",
67
+ "meat",
68
+ "plankton",
69
+ "vegetation",
70
+ "insects",
71
+ "forager",
72
+ "grazer",
73
+ "hunter",
74
+ "scavenger",
75
+ "skimmer",
76
+ "stalker",
77
+ "newworld",
78
+ "oldworld",
79
+ "arctic",
80
+ "coastal",
81
+ "desert",
82
+ "bush",
83
+ "plains",
84
+ "forest",
85
+ "fields",
86
+ "jungle",
87
+ "mountains",
88
+ "ocean",
89
+ "ground",
90
+ "water",
91
+ "tree",
92
+ "cave",
93
+ "fierce",
94
+ "timid",
95
+ "smart",
96
+ "group",
97
+ "solitary",
98
+ "nestspot",
99
+ "domestic",
100
+ ]
101
+
102
+ def __init__(self, root: str | Path, download: bool = False, progress_bar: bool = True) -> None:
103
+ if isinstance(root, str):
104
+ root = Path(root)
105
+
106
+ self._root = root
107
+
108
+ if download is True:
109
+ archive_path = root.parent.joinpath("AwA2-data.zip")
110
+ downloaded = download_url(
111
+ "https://cvml.ista.ac.at/AwA2/AwA2-data.zip",
112
+ archive_path,
113
+ sha256="cc5a849879165acaa2b52f1de3f146ffcd1c475f6ef85bab0152c763e573744f",
114
+ progress_bar=progress_bar,
115
+ )
116
+ if downloaded is True or self._root.exists() is False:
117
+ extract_archive(archive_path, root.parent)
118
+
119
+ else:
120
+ if self._root.exists() is False or self._root.is_dir() is False:
121
+ raise RuntimeError("Dataset not found, try download=True to download it")
122
+
123
+ if self.images_dir.exists() is False:
124
+ raise RuntimeError("Dataset seems corrupted: JPEGImages directory not found")
125
+
126
+ if self.predicate_matrix_binary_path.exists() is False:
127
+ raise RuntimeError("Dataset seems corrupted: predicate-matrix-binary.txt not found")
128
+
129
+ @property
130
+ def images_dir(self) -> Path:
131
+ return self._root.joinpath("JPEGImages")
132
+
133
+ @property
134
+ def classes_path(self) -> Path:
135
+ return self._root.joinpath("classes.txt")
136
+
137
+ @property
138
+ def predicates_path(self) -> Path:
139
+ return self._root.joinpath("predicates.txt")
140
+
141
+ @property
142
+ def predicate_matrix_binary_path(self) -> Path:
143
+ return self._root.joinpath("predicate-matrix-binary.txt")
144
+
145
+ @property
146
+ def predicate_matrix_continuous_path(self) -> Path:
147
+ return self._root.joinpath("predicate-matrix-continuous.txt")
148
+
149
+ @property
150
+ def trainclasses_path(self) -> Path:
151
+ return self._root.joinpath("trainclasses.txt")
152
+
153
+ @property
154
+ def testclasses_path(self) -> Path:
155
+ return self._root.joinpath("testclasses.txt")
156
+
157
+
158
+ class FishNet:
159
+ """
160
+ Name: FishNet
161
+ Link: https://fishnet-2023.github.io/
162
+ Size: 94,532 images, 17,357 aquatic species, 9 binary traits
163
+
164
+ Traits:
165
+ - FeedingPath (benthic=0, pelagic=1)
166
+ - Tropical, Temperate, Subtropical, Boreal, Polar (habitat, 0/1)
167
+ - freshwater, saltwater, brackish (water type, 0/1)
168
+
169
+ Note: This dataset requires manual download from Google Drive.
170
+ """
171
+
172
+ trait_columns = [
173
+ "FeedingPath",
174
+ "Tropical",
175
+ "Temperate",
176
+ "Subtropical",
177
+ "Boreal",
178
+ "Polar",
179
+ "freshwater",
180
+ "saltwater",
181
+ "brackish",
182
+ ]
183
+
184
+ def __init__(self, root: str | Path) -> None:
185
+ if isinstance(root, str):
186
+ root = Path(root)
187
+
188
+ self._root = root
189
+
190
+ # Verify dataset exists
191
+ if self._root.exists() is False or self._root.is_dir() is False:
192
+ raise RuntimeError(f"Dataset not found at {self._root}. Download it from https://fishnet-2023.github.io/")
193
+
194
+ if self.images_dir.exists() is False:
195
+ raise RuntimeError("Dataset seems corrupted: images directory not found")
196
+
197
+ if self.train_csv.exists() is False:
198
+ raise RuntimeError("Dataset seems corrupted: train.csv not found")
199
+
200
+ if self.test_csv.exists() is False:
201
+ raise RuntimeError("Dataset seems corrupted: test.csv not found")
202
+
203
+ @property
204
+ def images_dir(self) -> Path:
205
+ return self._root.joinpath("images")
206
+
207
+ @property
208
+ def train_csv(self) -> Path:
209
+ return self._root.joinpath("train.csv")
210
+
211
+ @property
212
+ def test_csv(self) -> Path:
213
+ return self._root.joinpath("test.csv")
214
+
215
+
216
+ class FungiCLEF2023:
217
+ """
218
+ Name: FungiCLEF2023
219
+ Link: https://www.imageclef.org/FungiCLEF2023
220
+ Size: 1,604 species, ~417K images (train + val + test)
221
+ """
222
+
223
+ def __init__(self, root: str | Path, download: bool = False, progress_bar: bool = True) -> None:
224
+ if isinstance(root, str):
225
+ root = Path(root)
226
+
227
+ self._root = root
228
+
229
+ if download is True:
230
+ self._root.mkdir(parents=True, exist_ok=True)
231
+
232
+ train_images_archive = self._root.joinpath("DF20-300px.tar.gz")
233
+ downloaded_train_images = download_url(
234
+ "http://ptak.felk.cvut.cz/plants/DanishFungiDataset/DF20-300px.tar.gz",
235
+ train_images_archive,
236
+ sha256="b7b572179c3e99dfdfaed4b75872cb6cc59ad8d7dccab331906687ca6bce3b5a",
237
+ progress_bar=progress_bar,
238
+ )
239
+ if downloaded_train_images is True or self.train_images_dir.exists() is False:
240
+ extract_archive(train_images_archive, self._root)
241
+
242
+ val_test_images_archive = self._root.joinpath("DF21_300px.tar.gz")
243
+ downloaded_val_test_images = download_url(
244
+ "http://ptak.felk.cvut.cz/plants/DanishFungiDataset/DF21_300px.tar.gz",
245
+ val_test_images_archive,
246
+ sha256="c0194d3314370a22fb01fb0800330c2e18c90d83f97def55dea84cb5abc2fc3e",
247
+ progress_bar=progress_bar,
248
+ )
249
+ if downloaded_val_test_images is True or self.val_test_images_dir.exists() is False:
250
+ extract_archive(val_test_images_archive, self._root)
251
+
252
+ download_url(
253
+ "http://ptak.felk.cvut.cz/plants/DanishFungiDataset/FungiCLEF2023_train_metadata_PRODUCTION.csv",
254
+ self.train_metadata_path,
255
+ sha256="dc17fc1ab48f0876947402965ee9c25e437c1622f134edab5c7da6c9b853d907",
256
+ progress_bar=progress_bar,
257
+ )
258
+ download_url(
259
+ "http://ptak.felk.cvut.cz/plants/DanishFungiDataset/FungiCLEF2023_val_metadata_PRODUCTION.csv",
260
+ self.val_metadata_path,
261
+ sha256="9573102de721bc93f36e5e03e878cd50cc7f6031a7a3bc82ed0642ec4c691c2a",
262
+ progress_bar=progress_bar,
263
+ )
264
+ download_url(
265
+ "http://ptak.felk.cvut.cz/plants/DanishFungiDataset/FungiCLEF2023_public_test_metadata_PRODUCTION.csv",
266
+ self.test_metadata_path,
267
+ sha256="56ae171d5abf2a99a3ccf8cd96cb685d0f96a7bda055a37afd2fda3e943d991c",
268
+ progress_bar=progress_bar,
269
+ )
270
+
271
+ else:
272
+ if self._root.exists() is False or self._root.is_dir() is False:
273
+ raise RuntimeError("Dataset not found, try download=True to download it")
274
+
275
+ if self.train_images_dir.exists() is False:
276
+ raise RuntimeError("Dataset seems corrupted: DF20_300 directory not found")
277
+
278
+ if self.val_test_images_dir.exists() is False:
279
+ raise RuntimeError("Dataset seems corrupted: DF21_300 directory not found")
280
+
281
+ if self.train_metadata_path.exists() is False:
282
+ raise RuntimeError("Dataset seems corrupted: train metadata CSV not found")
283
+
284
+ if self.val_metadata_path.exists() is False:
285
+ raise RuntimeError("Dataset seems corrupted: validation metadata CSV not found")
286
+
287
+ if self.test_metadata_path.exists() is False:
288
+ raise RuntimeError("Dataset seems corrupted: test metadata CSV not found")
289
+
290
+ @property
291
+ def train_images_dir(self) -> Path:
292
+ return self._root.joinpath("DF20_300")
293
+
294
+ @property
295
+ def val_test_images_dir(self) -> Path:
296
+ return self._root.joinpath("DF21_300")
297
+
298
+ @property
299
+ def val_images_dir(self) -> Path:
300
+ return self.val_test_images_dir
301
+
302
+ @property
303
+ def test_images_dir(self) -> Path:
304
+ return self.val_test_images_dir
305
+
306
+ @property
307
+ def train_metadata_path(self) -> Path:
308
+ return self._root.joinpath("FungiCLEF2023_train_metadata_PRODUCTION.csv")
309
+
310
+ @property
311
+ def val_metadata_path(self) -> Path:
312
+ return self._root.joinpath("FungiCLEF2023_val_metadata_PRODUCTION.csv")
313
+
314
+ @property
315
+ def test_metadata_path(self) -> Path:
316
+ return self._root.joinpath("FungiCLEF2023_public_test_metadata_PRODUCTION.csv")
317
+
318
+
319
+ class NABirds:
320
+ """
321
+ Name: NABirds
322
+ Link: https://dl.allaboutbirds.org/nabirds
323
+ Size: 555 visual categories, ~48K images
324
+
325
+ Note: This dataset requires manual download. Visit the link above.
326
+ """
327
+
328
+ def __init__(self, root: str | Path) -> None:
329
+ if isinstance(root, str):
330
+ root = Path(root)
331
+
332
+ self._root = root
333
+
334
+ # Verify dataset exists
335
+ if self._root.exists() is False or self._root.is_dir() is False:
336
+ raise RuntimeError(
337
+ f"Dataset not found at {self._root}. Download it from https://dl.allaboutbirds.org/nabirds"
338
+ )
339
+
340
+ if self.images_dir.exists() is False:
341
+ raise RuntimeError("Dataset seems corrupted: images directory not found")
342
+
343
+ @property
344
+ def images_dir(self) -> Path:
345
+ return self._root.joinpath("images")
346
+
347
+ @property
348
+ def images_path(self) -> Path:
349
+ return self._root.joinpath("images.txt")
350
+
351
+ @property
352
+ def classes_path(self) -> Path:
353
+ return self._root.joinpath("classes.txt")
354
+
355
+ @property
356
+ def labels_path(self) -> Path:
357
+ return self._root.joinpath("image_class_labels.txt")
358
+
359
+ @property
360
+ def train_test_split_path(self) -> Path:
361
+ return self._root.joinpath("train_test_split.txt")
362
+
363
+ @property
364
+ def hierarchy_path(self) -> Path:
365
+ return self._root.joinpath("hierarchy.txt")
366
+
367
+
368
+ class NeWT:
369
+ """
370
+ Name: NeWT (Natural World Tasks)
371
+ Link: https://github.com/visipedia/newt
372
+ Size: 164 binary classification tasks, ~36K images
373
+ """
374
+
375
+ def __init__(self, root: str | Path, download: bool = False, progress_bar: bool = True) -> None:
376
+ if isinstance(root, str):
377
+ root = Path(root)
378
+
379
+ self._root = root
380
+
381
+ if download is True:
382
+ self._root.mkdir(parents=True, exist_ok=True)
383
+
384
+ # Download images
385
+ images_src = root.parent.joinpath("newt2021_images.tar.gz")
386
+ downloaded_images = download_url(
387
+ "https://ml-inat-competition-datasets.s3.amazonaws.com/newt/newt2021_images.tar.gz",
388
+ images_src,
389
+ sha256="8d40958a867c1296f92b5e125f1f1d8ddaa59f249315740fc366fc606995c055",
390
+ progress_bar=progress_bar,
391
+ )
392
+ if downloaded_images is True or self.images_dir.exists() is False:
393
+ extract_archive(images_src, self._root)
394
+
395
+ # Download labels
396
+ labels_src = root.parent.joinpath("newt2021_labels.csv.tar.gz")
397
+ downloaded_labels = download_url(
398
+ "https://ml-inat-competition-datasets.s3.amazonaws.com/newt/newt2021_labels.csv.tar.gz",
399
+ labels_src,
400
+ sha256="e09807842485ef49ccf51d74ac9f6072c599fc16cf5ee755fdf4064f2e4c3828",
401
+ progress_bar=progress_bar,
402
+ )
403
+ if downloaded_labels is True or self.labels_path.exists() is False:
404
+ extract_archive(labels_src, self._root)
405
+
406
+ else:
407
+ # Some sanity checks
408
+ if self._root.exists() is False or self._root.is_dir() is False:
409
+ raise RuntimeError("Dataset not found, try download=True to download it")
410
+
411
+ if self.images_dir.exists() is False:
412
+ raise RuntimeError("Dataset seems corrupted: images directory not found")
413
+
414
+ if self.labels_path.exists() is False:
415
+ raise RuntimeError("Dataset seems corrupted: labels CSV not found")
416
+
417
+ @property
418
+ def images_dir(self) -> Path:
419
+ return self._root.joinpath("newt2021_images")
420
+
421
+ @property
422
+ def labels_path(self) -> Path:
423
+ return self._root.joinpath("newt2021_labels.csv")
424
+
425
+
426
+ class Plankton:
427
+ """
428
+ Name: SYKE-plankton_IFCB_2022
429
+ Link: https://b2share.eudat.eu/records/xvnrp-7ga56
430
+ Size: 50 phytoplankton classes, ~214K images (train + val)
431
+ """
432
+
433
+ def __init__(self, root: str | Path, download: bool = False, progress_bar: bool = True) -> None:
434
+ if isinstance(root, str):
435
+ root = Path(root)
436
+
437
+ self._root = root
438
+
439
+ if download is True:
440
+ self._root.mkdir(parents=True, exist_ok=True)
441
+
442
+ train_archive = root.parent.joinpath("phytoplankton_labeled.zip")
443
+ downloaded_train = download_url(
444
+ "https://b2share.eudat.eu/records/xvnrp-7ga56/files/phytoplankton_labeled.zip",
445
+ train_archive,
446
+ sha256="0c47acd8dfad46829fe42758a6c24adcdb5e6f2456be4ced975cbb9de9644704",
447
+ progress_bar=progress_bar,
448
+ )
449
+ if downloaded_train is True or self.train_dir.exists() is False:
450
+ extract_archive(train_archive, self._root)
451
+
452
+ val_archive = root.parent.joinpath("phytoplankton_Uto_2021_labeled.zip")
453
+ downloaded_val = download_url(
454
+ "https://b2share.eudat.eu/records/w7y96-6jd66/files/phytoplankton_Ut%C3%B6_2021_labeled.zip",
455
+ val_archive,
456
+ sha256="b017809515c3d58171ecbfd196d6725239e9380c2a22ae880ac56e878bbfcfa4",
457
+ progress_bar=progress_bar,
458
+ )
459
+ if downloaded_val is True or self.val_dir.exists() is False:
460
+ extract_archive(val_archive, self._root)
461
+ # Rename extracted directory to avoid non-ASCII character
462
+ extracted_dir = self._root.joinpath("phytoplankton_Utö_2021_labeled")
463
+ if extracted_dir.exists():
464
+ extracted_dir.rename(self.val_dir)
465
+
466
+ else:
467
+ if self._root.exists() is False or self._root.is_dir() is False:
468
+ raise RuntimeError("Dataset not found, try download=True to download it")
469
+
470
+ if self.train_dir.exists() is False:
471
+ raise RuntimeError("Dataset seems corrupted: train directory not found")
472
+
473
+ if self.val_dir.exists() is False:
474
+ raise RuntimeError("Dataset seems corrupted: val directory not found")
475
+
476
+ @property
477
+ def train_dir(self) -> Path:
478
+ return self._root.joinpath("labeled_20201020")
479
+
480
+ @property
481
+ def val_dir(self) -> Path:
482
+ return self._root.joinpath("phytoplankton_Uto_2021_labeled")
483
+
484
+
485
+ class PlantDoc:
486
+ """
487
+ Name: PlantDoc
488
+ Link: https://github.com/pratikkayal/PlantDoc-Dataset
489
+ Paper: https://arxiv.org/abs/1911.10317
490
+ Size: 27 classes (13 plant species, 17 disease categories), 2,598 images
491
+ """
492
+
493
+ _archive_dir_name = "PlantDoc-Dataset-5467f6012d78d1c446145d5f582da6096f852ae8"
494
+
495
+ def __init__(self, root: str | Path, download: bool = False, progress_bar: bool = True) -> None:
496
+ if isinstance(root, str):
497
+ root = Path(root)
498
+
499
+ self._root = root
500
+
501
+ if download is True:
502
+ archive_path = root.parent.joinpath("plantdoc.zip")
503
+ downloaded = download_url(
504
+ "https://github.com/pratikkayal/PlantDoc-Dataset/archive/"
505
+ "5467f6012d78d1c446145d5f582da6096f852ae8.zip",
506
+ archive_path,
507
+ sha256="94e2b99a500a63efbd48923ed48588fbb01f9b1db66a2d3b5c24eed6466da20f",
508
+ progress_bar=progress_bar,
509
+ )
510
+ if downloaded is True or self._root.exists() is False:
511
+ extract_archive(archive_path, root.parent)
512
+ # Rename extracted directory from commit hash name to friendly name
513
+ extracted_dir = root.parent.joinpath(self._archive_dir_name)
514
+ extracted_dir.rename(self._root)
515
+
516
+ else:
517
+ if self._root.exists() is False or self._root.is_dir() is False:
518
+ raise RuntimeError("Dataset not found, try download=True to download it")
519
+
520
+ if self.train_dir.exists() is False:
521
+ raise RuntimeError("Dataset seems corrupted: train directory not found")
522
+
523
+ if self.test_dir.exists() is False:
524
+ raise RuntimeError("Dataset seems corrupted: test directory not found")
525
+
526
+ @property
527
+ def train_dir(self) -> Path:
528
+ return self._root.joinpath("train")
529
+
530
+ @property
531
+ def test_dir(self) -> Path:
532
+ return self._root.joinpath("test")
533
+
534
+
535
+ class PlantNet:
536
+ """
537
+ Name: PlantNet-300K
538
+ Link: https://plantnet.org/en/2021/03/30/a-plntnet-dataset-for-machine-learning-researchers/
539
+ Size: 1081 species, ~300K images
540
+ """
541
+
542
+ def __init__(self, root: str | Path, download: bool = False, progress_bar: bool = True) -> None:
543
+ if isinstance(root, str):
544
+ root = Path(root)
545
+
546
+ self._root = root
547
+
548
+ if download is True:
549
+ archive_path = root.parent.joinpath("plantnet_300K.zip")
550
+ downloaded = download_url(
551
+ "https://zenodo.org/records/5645731/files/plantnet_300K.zip?download=1",
552
+ archive_path,
553
+ sha256="3a079076c8ad4476beac54d89ea344958256a999428937eba47ec352dadce00d",
554
+ progress_bar=progress_bar,
555
+ )
556
+ if downloaded is True or self._root.exists() is False:
557
+ extract_archive(archive_path, root.parent)
558
+
559
+ else:
560
+ if self._root.exists() is False or self._root.is_dir() is False:
561
+ raise RuntimeError("Dataset not found, try download=True to download it")
562
+
563
+ if self.images_dir.exists() is False:
564
+ raise RuntimeError("Dataset seems corrupted: images directory not found")
565
+
566
+ if self.species_id_to_name_path.exists() is False:
567
+ raise RuntimeError("Dataset seems corrupted: species_id_2_name.json not found")
568
+
569
+ @property
570
+ def images_dir(self) -> Path:
571
+ return self._root.joinpath("images")
572
+
573
+ @property
574
+ def train_dir(self) -> Path:
575
+ return self.images_dir.joinpath("train")
576
+
577
+ @property
578
+ def val_dir(self) -> Path:
579
+ return self.images_dir.joinpath("val")
580
+
581
+ @property
582
+ def test_dir(self) -> Path:
583
+ return self.images_dir.joinpath("test")
584
+
585
+ @property
586
+ def species_id_to_name_path(self) -> Path:
587
+ return self._root.joinpath("plantnet300K_species_id_2_name.json")
588
+
589
+ @property
590
+ def metadata_path(self) -> Path:
591
+ return self._root.joinpath("plantnet300K_metadata.json")
File without changes
@@ -0,0 +1,74 @@
1
+ import argparse
2
+
3
+ from birder.common import cli
4
+ from birder.eval import adversarial
5
+ from birder.eval import classification
6
+ from birder.eval.benchmarks import awa2
7
+ from birder.eval.benchmarks import bioscan5m
8
+ from birder.eval.benchmarks import fishnet
9
+ from birder.eval.benchmarks import flowers102
10
+ from birder.eval.benchmarks import fungiclef
11
+ from birder.eval.benchmarks import nabirds
12
+ from birder.eval.benchmarks import newt
13
+ from birder.eval.benchmarks import plankton
14
+ from birder.eval.benchmarks import plantdoc
15
+ from birder.eval.benchmarks import plantnet
16
+
17
+
18
+ def main() -> None:
19
+ parser = argparse.ArgumentParser(
20
+ prog="python -m birder.eval",
21
+ allow_abbrev=False,
22
+ description="Evaluation module",
23
+ epilog=(
24
+ "Usage examples:\n"
25
+ "python -m birder.eval adversarial -n resnet_v2_50 -t il-all -e 100 --method pgd "
26
+ "--gpu data/validation_il-all_packed\n"
27
+ "python -m birder.eval classification --filter '*il-all*' --gpu data/validation_il-all\n"
28
+ "---\n"
29
+ "python -m birder.eval awa2 --embeddings "
30
+ "results/awa2/*.parquet --dataset-path ~/Datasets/Animals_with_Attributes2 --gpu\n"
31
+ "python -m birder.eval bioscan5m --embeddings "
32
+ "results/bioscan5m/*.parquet --data-path ~/Datasets/BIOSCAN-5M/species/testing_unseen\n"
33
+ "python -m birder.eval fishnet --embeddings "
34
+ "results/vit_b16_224px_embeddings.parquet --dataset-path ~/Datasets/fishnet --gpu\n"
35
+ "python -m birder.eval flowers102 --embeddings "
36
+ "results/flowers102_rope_i_vit_s16_pn_aps_c1_pe-core_0_384px_crop1.0_8189_sc_embeddings.parquet "
37
+ "--dataset-path ~/Datasets/Flowers102\n"
38
+ "python -m birder.eval fungiclef --embeddings "
39
+ "results/fungiclef/*.parquet --dataset-path ~/Datasets/FungiCLEF2023\n"
40
+ "python -m birder.eval nabirds --embeddings "
41
+ "results/vit_b16_224px_crop1.0_48562_embeddings.parquet --dataset-path ~/Datasets/nabirds\n"
42
+ "python -m birder.eval newt --embeddings "
43
+ "results/vit_reg4_so150m_p14_ls_dino-v2-bio_0_e45_224px_crop1.0_36032_output.parquet "
44
+ "--dataset-path ~/Datasets/NeWT\n"
45
+ "python -m birder.eval plankton --embeddings "
46
+ "results/plankton/*.parquet --dataset-path ~/Datasets/plankton --gpu\n"
47
+ "python -m birder.eval plantdoc --embeddings "
48
+ "results/plantdoc_embeddings.parquet --dataset-path ~/Datasets/PlantDoc\n"
49
+ "python -m birder.eval plantnet --embeddings "
50
+ "results/plantnet_embeddings.parquet --dataset-path ~/Datasets/plantnet_300K\n"
51
+ ),
52
+ formatter_class=cli.ArgumentHelpFormatter,
53
+ )
54
+ subparsers = parser.add_subparsers(dest="cmd", required=True)
55
+ adversarial.set_parser(subparsers)
56
+ classification.set_parser(subparsers)
57
+
58
+ awa2.set_parser(subparsers)
59
+ bioscan5m.set_parser(subparsers)
60
+ fishnet.set_parser(subparsers)
61
+ flowers102.set_parser(subparsers)
62
+ fungiclef.set_parser(subparsers)
63
+ nabirds.set_parser(subparsers)
64
+ newt.set_parser(subparsers)
65
+ plankton.set_parser(subparsers)
66
+ plantdoc.set_parser(subparsers)
67
+ plantnet.set_parser(subparsers)
68
+
69
+ args = parser.parse_args()
70
+ args.func(args)
71
+
72
+
73
+ if __name__ == "__main__":
74
+ main()