birdnet-analyzer 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. birdnet_analyzer/__init__.py +9 -8
  2. birdnet_analyzer/analyze/__init__.py +5 -5
  3. birdnet_analyzer/analyze/__main__.py +3 -4
  4. birdnet_analyzer/analyze/cli.py +25 -25
  5. birdnet_analyzer/analyze/core.py +241 -245
  6. birdnet_analyzer/analyze/utils.py +692 -701
  7. birdnet_analyzer/audio.py +368 -372
  8. birdnet_analyzer/cli.py +709 -707
  9. birdnet_analyzer/config.py +242 -242
  10. birdnet_analyzer/eBird_taxonomy_codes_2021E.json +25279 -25279
  11. birdnet_analyzer/embeddings/__init__.py +3 -4
  12. birdnet_analyzer/embeddings/__main__.py +3 -3
  13. birdnet_analyzer/embeddings/cli.py +12 -13
  14. birdnet_analyzer/embeddings/core.py +69 -70
  15. birdnet_analyzer/embeddings/utils.py +179 -193
  16. birdnet_analyzer/evaluation/__init__.py +196 -195
  17. birdnet_analyzer/evaluation/__main__.py +3 -3
  18. birdnet_analyzer/evaluation/assessment/__init__.py +0 -0
  19. birdnet_analyzer/evaluation/assessment/metrics.py +388 -0
  20. birdnet_analyzer/evaluation/assessment/performance_assessor.py +409 -0
  21. birdnet_analyzer/evaluation/assessment/plotting.py +379 -0
  22. birdnet_analyzer/evaluation/preprocessing/__init__.py +0 -0
  23. birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -0
  24. birdnet_analyzer/evaluation/preprocessing/utils.py +98 -0
  25. birdnet_analyzer/gui/__init__.py +19 -23
  26. birdnet_analyzer/gui/__main__.py +3 -3
  27. birdnet_analyzer/gui/analysis.py +175 -174
  28. birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
  29. birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
  30. birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
  31. birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
  32. birdnet_analyzer/gui/assets/gui.css +28 -28
  33. birdnet_analyzer/gui/assets/gui.js +93 -93
  34. birdnet_analyzer/gui/embeddings.py +619 -620
  35. birdnet_analyzer/gui/evaluation.py +795 -813
  36. birdnet_analyzer/gui/localization.py +75 -68
  37. birdnet_analyzer/gui/multi_file.py +245 -246
  38. birdnet_analyzer/gui/review.py +519 -527
  39. birdnet_analyzer/gui/segments.py +191 -191
  40. birdnet_analyzer/gui/settings.py +128 -129
  41. birdnet_analyzer/gui/single_file.py +267 -269
  42. birdnet_analyzer/gui/species.py +95 -95
  43. birdnet_analyzer/gui/train.py +696 -698
  44. birdnet_analyzer/gui/utils.py +810 -808
  45. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
  46. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
  47. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
  48. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
  49. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
  50. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
  51. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
  52. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
  53. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
  54. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
  55. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
  56. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
  57. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
  58. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
  59. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
  60. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
  61. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
  62. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
  63. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
  64. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
  65. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
  66. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
  67. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
  68. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
  69. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
  70. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
  71. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
  72. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
  73. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
  74. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
  75. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
  76. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
  77. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
  78. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
  79. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
  80. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
  81. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
  82. birdnet_analyzer/lang/de.json +334 -334
  83. birdnet_analyzer/lang/en.json +334 -334
  84. birdnet_analyzer/lang/fi.json +334 -334
  85. birdnet_analyzer/lang/fr.json +334 -334
  86. birdnet_analyzer/lang/id.json +334 -334
  87. birdnet_analyzer/lang/pt-br.json +334 -334
  88. birdnet_analyzer/lang/ru.json +334 -334
  89. birdnet_analyzer/lang/se.json +334 -334
  90. birdnet_analyzer/lang/tlh.json +334 -334
  91. birdnet_analyzer/lang/zh_TW.json +334 -334
  92. birdnet_analyzer/model.py +1212 -1243
  93. birdnet_analyzer/playground.py +5 -0
  94. birdnet_analyzer/search/__init__.py +3 -3
  95. birdnet_analyzer/search/__main__.py +3 -3
  96. birdnet_analyzer/search/cli.py +11 -12
  97. birdnet_analyzer/search/core.py +78 -78
  98. birdnet_analyzer/search/utils.py +107 -111
  99. birdnet_analyzer/segments/__init__.py +3 -3
  100. birdnet_analyzer/segments/__main__.py +3 -3
  101. birdnet_analyzer/segments/cli.py +13 -14
  102. birdnet_analyzer/segments/core.py +81 -78
  103. birdnet_analyzer/segments/utils.py +383 -394
  104. birdnet_analyzer/species/__init__.py +3 -3
  105. birdnet_analyzer/species/__main__.py +3 -3
  106. birdnet_analyzer/species/cli.py +13 -14
  107. birdnet_analyzer/species/core.py +35 -35
  108. birdnet_analyzer/species/utils.py +74 -75
  109. birdnet_analyzer/train/__init__.py +3 -3
  110. birdnet_analyzer/train/__main__.py +3 -3
  111. birdnet_analyzer/train/cli.py +13 -14
  112. birdnet_analyzer/train/core.py +113 -113
  113. birdnet_analyzer/train/utils.py +877 -847
  114. birdnet_analyzer/translate.py +133 -104
  115. birdnet_analyzer/utils.py +426 -419
  116. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/METADATA +137 -129
  117. birdnet_analyzer-2.0.1.dist-info/RECORD +125 -0
  118. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/WHEEL +1 -1
  119. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/licenses/LICENSE +18 -18
  120. birdnet_analyzer-2.0.0.dist-info/RECORD +0 -117
  121. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/entry_points.txt +0 -0
  122. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,3 @@
1
- from birdnet_analyzer.embeddings.core import embeddings
2
-
3
-
4
- __all__ = ["embeddings"]
1
+ from birdnet_analyzer.embeddings.core import embeddings
2
+
3
+ __all__ = ["embeddings"]
@@ -1,3 +1,3 @@
1
- from birdnet_analyzer.embeddings.cli import main
2
-
3
- main()
1
+ from birdnet_analyzer.embeddings.cli import main
2
+
3
+ main()
@@ -1,13 +1,12 @@
1
- from birdnet_analyzer.utils import runtime_error_handler
2
-
3
- from birdnet_analyzer import embeddings
4
-
5
-
6
- @runtime_error_handler
7
- def main():
8
- import birdnet_analyzer.cli as cli
9
-
10
- parser = cli.embeddings_parser()
11
- args = parser.parse_args()
12
-
13
- embeddings(**vars(args))
1
+ from birdnet_analyzer import embeddings
2
+ from birdnet_analyzer.utils import runtime_error_handler
3
+
4
+
5
+ @runtime_error_handler
6
+ def main():
7
+ from birdnet_analyzer import cli
8
+
9
+ parser = cli.embeddings_parser()
10
+ args = parser.parse_args()
11
+
12
+ embeddings(**vars(args))
@@ -1,70 +1,69 @@
1
- def embeddings(
2
- input: str,
3
- database: str,
4
- *,
5
- overlap: float = 0.0,
6
- audio_speed: float = 1.0,
7
- fmin: int = 0,
8
- fmax: int = 15000,
9
- threads: int = 8,
10
- batch_size: int = 1,
11
- ):
12
- """
13
- Generates embeddings for audio files using the BirdNET-Analyzer.
14
- This function processes audio files to extract embeddings, which are
15
- representations of audio features. The embeddings can be used for
16
- further analysis or comparison.
17
- Args:
18
- input (str): Path to the input audio file or directory containing audio files.
19
- database (str): Path to the database where embeddings will be stored.
20
- overlap (float, optional): Overlap between consecutive audio segments in seconds. Defaults to 0.0.
21
- audio_speed (float, optional): Speed factor for audio processing. Defaults to 1.0.
22
- fmin (int, optional): Minimum frequency (in Hz) for audio analysis. Defaults to 0.
23
- fmax (int, optional): Maximum frequency (in Hz) for audio analysis. Defaults to 15000.
24
- threads (int, optional): Number of threads to use for processing. Defaults to 8.
25
- batch_size (int, optional): Number of audio segments to process in a single batch. Defaults to 1.
26
- Raises:
27
- FileNotFoundError: If the input path or database path does not exist.
28
- ValueError: If any of the parameters are invalid.
29
- Note:
30
- Ensure that the required model files are downloaded and available before
31
- calling this function. The `ensure_model_exists` function is used to
32
- verify this.
33
- Example:
34
- embeddings(
35
- input="path/to/audio",
36
- database="path/to/database",
37
- overlap=0.5,
38
- audio_speed=1.0,
39
- fmin=500,
40
- fmax=10000,
41
- threads=4,
42
- batch_size=2
43
- )
44
- """
45
- from birdnet_analyzer.embeddings.utils import run
46
- from birdnet_analyzer.utils import ensure_model_exists
47
-
48
- ensure_model_exists()
49
- run(input, database, overlap, audio_speed, fmin, fmax, threads, batch_size)
50
-
51
-
52
- def get_database(db_path: str):
53
- """Get the database object. Creates or opens the databse.
54
- Args:
55
- db: The path to the database.
56
- Returns:
57
- The database object.
58
- """
59
- import os
60
-
61
- from perch_hoplite.db import sqlite_usearch_impl
62
-
63
- if not os.path.exists(db_path):
64
- os.makedirs(os.path.dirname(db_path), exist_ok=True)
65
- db = sqlite_usearch_impl.SQLiteUsearchDB.create(
66
- db_path=db_path,
67
- usearch_cfg=sqlite_usearch_impl.get_default_usearch_config(embedding_dim=1024), # TODO dont hardcode this
68
- )
69
- return db
70
- return sqlite_usearch_impl.SQLiteUsearchDB.create(db_path=db_path)
1
+ def embeddings(
2
+ audio_input: str,
3
+ database: str,
4
+ *,
5
+ overlap: float = 0.0,
6
+ audio_speed: float = 1.0,
7
+ fmin: int = 0,
8
+ fmax: int = 15000,
9
+ threads: int = 8,
10
+ batch_size: int = 1,
11
+ ):
12
+ """
13
+ Generates embeddings for audio files using the BirdNET-Analyzer.
14
+ This function processes audio files to extract embeddings, which are
15
+ representations of audio features. The embeddings can be used for
16
+ further analysis or comparison.
17
+ Args:
18
+ audio_input (str): Path to the input audio file or directory containing audio files.
19
+ database (str): Path to the database where embeddings will be stored.
20
+ overlap (float, optional): Overlap between consecutive audio segments in seconds. Defaults to 0.0.
21
+ audio_speed (float, optional): Speed factor for audio processing. Defaults to 1.0.
22
+ fmin (int, optional): Minimum frequency (in Hz) for audio analysis. Defaults to 0.
23
+ fmax (int, optional): Maximum frequency (in Hz) for audio analysis. Defaults to 15000.
24
+ threads (int, optional): Number of threads to use for processing. Defaults to 8.
25
+ batch_size (int, optional): Number of audio segments to process in a single batch. Defaults to 1.
26
+ Raises:
27
+ FileNotFoundError: If the input path or database path does not exist.
28
+ ValueError: If any of the parameters are invalid.
29
+ Note:
30
+ Ensure that the required model files are downloaded and available before
31
+ calling this function. The `ensure_model_exists` function is used to
32
+ verify this.
33
+ Example:
34
+ embeddings(
35
+ "path/to/audio",
36
+ "path/to/database",
37
+ overlap=0.5,
38
+ audio_speed=1.0,
39
+ fmin=500,
40
+ fmax=10000,
41
+ threads=4,
42
+ batch_size=2
43
+ )
44
+ """
45
+ from birdnet_analyzer.embeddings.utils import run
46
+ from birdnet_analyzer.utils import ensure_model_exists
47
+
48
+ ensure_model_exists()
49
+ run(audio_input, database, overlap, audio_speed, fmin, fmax, threads, batch_size)
50
+
51
+
52
+ def get_database(db_path: str):
53
+ """Get the database object. Creates or opens the databse.
54
+ Args:
55
+ db: The path to the database.
56
+ Returns:
57
+ The database object.
58
+ """
59
+ import os
60
+
61
+ from perch_hoplite.db import sqlite_usearch_impl
62
+
63
+ if not os.path.exists(db_path):
64
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
65
+ return sqlite_usearch_impl.SQLiteUsearchDB.create(
66
+ db_path=db_path,
67
+ usearch_cfg=sqlite_usearch_impl.get_default_usearch_config(embedding_dim=1024), # TODO: dont hardcode this
68
+ )
69
+ return sqlite_usearch_impl.SQLiteUsearchDB.create(db_path=db_path)
@@ -1,193 +1,179 @@
1
- """Module used to extract embeddings for samples."""
2
-
3
- import datetime
4
- import os
5
-
6
- import numpy as np
7
-
8
- import birdnet_analyzer.audio as audio
9
- import birdnet_analyzer.config as cfg
10
- import birdnet_analyzer.model as model
11
- import birdnet_analyzer.utils as utils
12
- from birdnet_analyzer.analyze.utils import get_raw_audio_from_file
13
- from birdnet_analyzer.embeddings.core import get_database
14
-
15
-
16
- from perch_hoplite.db import sqlite_usearch_impl
17
- from perch_hoplite.db import interface as hoplite
18
- from ml_collections import ConfigDict
19
- from functools import partial
20
- from tqdm import tqdm
21
- from multiprocessing import Pool
22
-
23
-
24
- DATASET_NAME: str = "birdnet_analyzer_dataset"
25
-
26
-
27
- def analyze_file(item, db: sqlite_usearch_impl.SQLiteUsearchDB):
28
- """Extracts the embeddings for a file.
29
-
30
- Args:
31
- item: (filepath, config)
32
- """
33
- # Get file path and restore cfg
34
- fpath: str = item[0]
35
- cfg.set_config(item[1])
36
-
37
- offset = 0
38
- duration = cfg.FILE_SPLITTING_DURATION
39
-
40
- try:
41
- fileLengthSeconds = int(audio.get_audio_file_length(fpath))
42
- except Exception as ex:
43
- # Write error log
44
- print(f"Error: Cannot analyze audio file {fpath}. File corrupt?\n", flush=True)
45
- utils.write_error_log(ex)
46
-
47
- return None
48
-
49
- # Start time
50
- start_time = datetime.datetime.now()
51
-
52
- # Status
53
- print(f"Analyzing {fpath}", flush=True)
54
-
55
- source_id = fpath
56
-
57
- # Process each chunk
58
- try:
59
- while offset < fileLengthSeconds:
60
- chunks = get_raw_audio_from_file(fpath, offset, duration)
61
- start, end = offset, cfg.SIG_LENGTH + offset
62
- samples = []
63
- timestamps = []
64
-
65
- for c in range(len(chunks)):
66
- # Add to batch
67
- samples.append(chunks[c])
68
- timestamps.append([start, end])
69
-
70
- # Advance start and end
71
- start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
72
- end = start + cfg.SIG_LENGTH
73
-
74
- # Check if batch is full or last chunk
75
- if len(samples) < cfg.BATCH_SIZE and c < len(chunks) - 1:
76
- continue
77
-
78
- # Prepare sample and pass through model
79
- data = np.array(samples, dtype="float32")
80
- e = model.embeddings(data)
81
-
82
- # Add to results
83
- for i in range(len(samples)):
84
- # Get timestamp
85
- s_start, s_end = timestamps[i]
86
-
87
- # Check if embedding already exists
88
- existing_embedding = db.get_embeddings_by_source(
89
- DATASET_NAME, source_id, np.array([s_start, s_end])
90
- )
91
-
92
- if existing_embedding.size == 0:
93
- # Get prediction
94
- embeddings = e[i]
95
-
96
- # Store embeddings
97
- embeddings_source = hoplite.EmbeddingSource(DATASET_NAME, source_id, np.array([s_start, s_end]))
98
-
99
- # Insert into database
100
- db.insert_embedding(embeddings, embeddings_source)
101
- db.commit()
102
-
103
- # Reset batch
104
- samples = []
105
- timestamps = []
106
-
107
- offset = offset + duration
108
-
109
- except Exception as ex:
110
- # Write error log
111
- print(f"Error: Cannot analyze audio file {fpath}.", flush=True)
112
- utils.write_error_log(ex)
113
-
114
- return
115
-
116
- delta_time = (datetime.datetime.now() - start_time).total_seconds()
117
- print("Finished {} in {:.2f} seconds".format(fpath, delta_time), flush=True)
118
-
119
-
120
- def check_database_settings(db: sqlite_usearch_impl.SQLiteUsearchDB):
121
- try:
122
- settings = db.get_metadata("birdnet_analyzer_settings")
123
- if (
124
- settings["BANDPASS_FMIN"] != cfg.BANDPASS_FMIN
125
- or settings["BANDPASS_FMAX"] != cfg.BANDPASS_FMAX
126
- or settings["AUDIO_SPEED"] != cfg.AUDIO_SPEED
127
- ):
128
- raise ValueError(
129
- "Database settings do not match current configuration. DB Settings are: fmin: {}, fmax: {}, audio_speed: {}".format(
130
- settings["BANDPASS_FMIN"], settings["BANDPASS_FMAX"], settings["AUDIO_SPEED"]
131
- )
132
- )
133
- except KeyError:
134
- settings = ConfigDict(
135
- {"BANDPASS_FMIN": cfg.BANDPASS_FMIN, "BANDPASS_FMAX": cfg.BANDPASS_FMAX, "AUDIO_SPEED": cfg.AUDIO_SPEED}
136
- )
137
- db.insert_metadata("birdnet_analyzer_settings", settings)
138
- db.commit()
139
-
140
-
141
- def run(input, database, overlap, audio_speed, fmin, fmax, threads, batchsize):
142
- ### Make sure to comment out appropriately if you are not using args. ###
143
-
144
- # Set input and output path
145
- cfg.INPUT_PATH = input
146
-
147
- # Parse input files
148
- if os.path.isdir(cfg.INPUT_PATH):
149
- cfg.FILE_LIST = utils.collect_audio_files(cfg.INPUT_PATH)
150
- else:
151
- cfg.FILE_LIST = [cfg.INPUT_PATH]
152
-
153
- # Set overlap
154
- cfg.SIG_OVERLAP = max(0.0, min(2.9, float(overlap)))
155
-
156
- # Set audio speed
157
- cfg.AUDIO_SPEED = max(0.01, audio_speed)
158
-
159
- # Set bandpass frequency range
160
- cfg.BANDPASS_FMIN = max(0, min(cfg.SIG_FMAX, int(fmin)))
161
- cfg.BANDPASS_FMAX = max(cfg.SIG_FMIN, min(cfg.SIG_FMAX, int(fmax)))
162
-
163
- # Set number of threads
164
- if os.path.isdir(cfg.INPUT_PATH):
165
- cfg.CPU_THREADS = max(1, int(threads))
166
- cfg.TFLITE_THREADS = 1
167
- else:
168
- cfg.CPU_THREADS = 1
169
- cfg.TFLITE_THREADS = max(1, int(threads))
170
-
171
- cfg.CPU_THREADS = 1 # TODO: with the current implementation, we can't use more than 1 thread
172
-
173
- # Set batch size
174
- cfg.BATCH_SIZE = max(1, int(batchsize))
175
-
176
- # Add config items to each file list entry.
177
- # We have to do this for Windows which does not
178
- # support fork() and thus each process has to
179
- # have its own config. USE LINUX!
180
- flist = [(f, cfg.get_config()) for f in cfg.FILE_LIST]
181
-
182
- db = get_database(database)
183
- check_database_settings(db)
184
-
185
- # Analyze files
186
- if cfg.CPU_THREADS < 2:
187
- for entry in tqdm(flist):
188
- analyze_file(entry, db)
189
- else:
190
- with Pool(cfg.CPU_THREADS) as p:
191
- tqdm(p.imap(partial(analyze_file, db=db), flist))
192
-
193
- db.db.close()
1
+ """Module used to extract embeddings for samples."""
2
+
3
+ import datetime
4
+ import os
5
+ from functools import partial
6
+ from multiprocessing import Pool
7
+
8
+ import numpy as np
9
+ from ml_collections import ConfigDict
10
+ from perch_hoplite.db import interface as hoplite
11
+ from perch_hoplite.db import sqlite_usearch_impl
12
+ from tqdm import tqdm
13
+
14
+ import birdnet_analyzer.config as cfg
15
+ from birdnet_analyzer import audio, model, utils
16
+ from birdnet_analyzer.analyze.utils import get_raw_audio_from_file
17
+ from birdnet_analyzer.embeddings.core import get_database
18
+
19
+ DATASET_NAME: str = "birdnet_analyzer_dataset"
20
+
21
+
22
+ def analyze_file(item, db: sqlite_usearch_impl.SQLiteUsearchDB):
23
+ """Extracts the embeddings for a file.
24
+
25
+ Args:
26
+ item: (filepath, config)
27
+ """
28
+ # Get file path and restore cfg
29
+ fpath: str = item[0]
30
+ cfg.set_config(item[1])
31
+
32
+ offset = 0
33
+ duration = cfg.FILE_SPLITTING_DURATION
34
+
35
+ try:
36
+ fileLengthSeconds = int(audio.get_audio_file_length(fpath))
37
+ except Exception as ex:
38
+ # Write error log
39
+ print(f"Error: Cannot analyze audio file {fpath}. File corrupt?\n", flush=True)
40
+ utils.write_error_log(ex)
41
+
42
+ return
43
+
44
+ # Start time
45
+ start_time = datetime.datetime.now()
46
+
47
+ # Status
48
+ print(f"Analyzing {fpath}", flush=True)
49
+
50
+ source_id = fpath
51
+
52
+ # Process each chunk
53
+ try:
54
+ while offset < fileLengthSeconds:
55
+ chunks = get_raw_audio_from_file(fpath, offset, duration)
56
+ start, end = offset, cfg.SIG_LENGTH + offset
57
+ samples = []
58
+ timestamps = []
59
+
60
+ for c in range(len(chunks)):
61
+ # Add to batch
62
+ samples.append(chunks[c])
63
+ timestamps.append([start, end])
64
+
65
+ # Advance start and end
66
+ start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
67
+ end = start + cfg.SIG_LENGTH
68
+
69
+ # Check if batch is full or last chunk
70
+ if len(samples) < cfg.BATCH_SIZE and c < len(chunks) - 1:
71
+ continue
72
+
73
+ # Prepare sample and pass through model
74
+ data = np.array(samples, dtype="float32")
75
+ e = model.embeddings(data)
76
+
77
+ # Add to results
78
+ for i in range(len(samples)):
79
+ # Get timestamp
80
+ s_start, s_end = timestamps[i]
81
+
82
+ # Check if embedding already exists
83
+ existing_embedding = db.get_embeddings_by_source(DATASET_NAME, source_id, np.array([s_start, s_end]))
84
+
85
+ if existing_embedding.size == 0:
86
+ # Get prediction
87
+ embeddings = e[i]
88
+
89
+ # Store embeddings
90
+ embeddings_source = hoplite.EmbeddingSource(DATASET_NAME, source_id, np.array([s_start, s_end]))
91
+
92
+ # Insert into database
93
+ db.insert_embedding(embeddings, embeddings_source)
94
+ db.commit()
95
+
96
+ # Reset batch
97
+ samples = []
98
+ timestamps = []
99
+
100
+ offset = offset + duration
101
+
102
+ except Exception as ex:
103
+ # Write error log
104
+ print(f"Error: Cannot analyze audio file {fpath}.", flush=True)
105
+ utils.write_error_log(ex)
106
+
107
+ return
108
+
109
+ delta_time = (datetime.datetime.now() - start_time).total_seconds()
110
+ print(f"Finished {fpath} in {delta_time:.2f} seconds", flush=True)
111
+
112
+
113
+ def check_database_settings(db: sqlite_usearch_impl.SQLiteUsearchDB):
114
+ try:
115
+ settings = db.get_metadata("birdnet_analyzer_settings")
116
+ if settings["BANDPASS_FMIN"] != cfg.BANDPASS_FMIN or settings["BANDPASS_FMAX"] != cfg.BANDPASS_FMAX or settings["AUDIO_SPEED"] != cfg.AUDIO_SPEED:
117
+ raise ValueError(
118
+ "Database settings do not match current configuration. DB Settings are: fmin:"
119
+ + f"{settings['BANDPASS_FMIN']}, fmax: {settings['BANDPASS_FMAX']}, audio_speed: {settings['AUDIO_SPEED']}"
120
+ )
121
+ except KeyError:
122
+ settings = ConfigDict({"BANDPASS_FMIN": cfg.BANDPASS_FMIN, "BANDPASS_FMAX": cfg.BANDPASS_FMAX, "AUDIO_SPEED": cfg.AUDIO_SPEED})
123
+ db.insert_metadata("birdnet_analyzer_settings", settings)
124
+ db.commit()
125
+
126
+
127
+ def run(audio_input, database, overlap, audio_speed, fmin, fmax, threads, batchsize):
128
+ ### Make sure to comment out appropriately if you are not using args. ###
129
+
130
+ # Set input and output path
131
+ cfg.INPUT_PATH = audio_input
132
+
133
+ # Parse input files
134
+ if os.path.isdir(cfg.INPUT_PATH):
135
+ cfg.FILE_LIST = utils.collect_audio_files(cfg.INPUT_PATH)
136
+ else:
137
+ cfg.FILE_LIST = [cfg.INPUT_PATH]
138
+
139
+ # Set overlap
140
+ cfg.SIG_OVERLAP = max(0.0, min(2.9, float(overlap)))
141
+
142
+ # Set audio speed
143
+ cfg.AUDIO_SPEED = max(0.01, audio_speed)
144
+
145
+ # Set bandpass frequency range
146
+ cfg.BANDPASS_FMIN = max(0, min(cfg.SIG_FMAX, int(fmin)))
147
+ cfg.BANDPASS_FMAX = max(cfg.SIG_FMIN, min(cfg.SIG_FMAX, int(fmax)))
148
+
149
+ # Set number of threads
150
+ if os.path.isdir(cfg.INPUT_PATH):
151
+ cfg.CPU_THREADS = max(1, int(threads))
152
+ cfg.TFLITE_THREADS = 1
153
+ else:
154
+ cfg.CPU_THREADS = 1
155
+ cfg.TFLITE_THREADS = max(1, int(threads))
156
+
157
+ cfg.CPU_THREADS = 1 # TODO: with the current implementation, we can't use more than 1 thread
158
+
159
+ # Set batch size
160
+ cfg.BATCH_SIZE = max(1, int(batchsize))
161
+
162
+ # Add config items to each file list entry.
163
+ # We have to do this for Windows which does not
164
+ # support fork() and thus each process has to
165
+ # have its own config. USE LINUX!
166
+ flist = [(f, cfg.get_config()) for f in cfg.FILE_LIST]
167
+
168
+ db = get_database(database)
169
+ check_database_settings(db)
170
+
171
+ # Analyze files
172
+ if cfg.CPU_THREADS < 2:
173
+ for entry in tqdm(flist):
174
+ analyze_file(entry, db)
175
+ else:
176
+ with Pool(cfg.CPU_THREADS) as p:
177
+ tqdm(p.imap(partial(analyze_file, db=db), flist))
178
+
179
+ db.db.close()