birdnet-analyzer 2.0.1__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. birdnet_analyzer/__init__.py +9 -9
  2. birdnet_analyzer/analyze/__init__.py +19 -5
  3. birdnet_analyzer/analyze/__main__.py +3 -3
  4. birdnet_analyzer/analyze/cli.py +30 -25
  5. birdnet_analyzer/analyze/core.py +268 -241
  6. birdnet_analyzer/analyze/utils.py +700 -692
  7. birdnet_analyzer/audio.py +368 -368
  8. birdnet_analyzer/cli.py +732 -709
  9. birdnet_analyzer/config.py +243 -242
  10. birdnet_analyzer/eBird_taxonomy_codes_2024E.json +13046 -0
  11. birdnet_analyzer/embeddings/__init__.py +3 -3
  12. birdnet_analyzer/embeddings/__main__.py +3 -3
  13. birdnet_analyzer/embeddings/cli.py +12 -12
  14. birdnet_analyzer/embeddings/core.py +70 -69
  15. birdnet_analyzer/embeddings/utils.py +173 -179
  16. birdnet_analyzer/evaluation/__init__.py +189 -196
  17. birdnet_analyzer/evaluation/__main__.py +3 -3
  18. birdnet_analyzer/evaluation/assessment/metrics.py +388 -388
  19. birdnet_analyzer/evaluation/assessment/performance_assessor.py +364 -409
  20. birdnet_analyzer/evaluation/assessment/plotting.py +378 -379
  21. birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -631
  22. birdnet_analyzer/evaluation/preprocessing/utils.py +98 -98
  23. birdnet_analyzer/gui/__init__.py +19 -19
  24. birdnet_analyzer/gui/__main__.py +3 -3
  25. birdnet_analyzer/gui/analysis.py +179 -175
  26. birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
  27. birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
  28. birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
  29. birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
  30. birdnet_analyzer/gui/assets/gui.css +36 -28
  31. birdnet_analyzer/gui/assets/gui.js +89 -93
  32. birdnet_analyzer/gui/embeddings.py +638 -619
  33. birdnet_analyzer/gui/evaluation.py +801 -795
  34. birdnet_analyzer/gui/localization.py +75 -75
  35. birdnet_analyzer/gui/multi_file.py +265 -245
  36. birdnet_analyzer/gui/review.py +472 -519
  37. birdnet_analyzer/gui/segments.py +191 -191
  38. birdnet_analyzer/gui/settings.py +149 -128
  39. birdnet_analyzer/gui/single_file.py +264 -267
  40. birdnet_analyzer/gui/species.py +95 -95
  41. birdnet_analyzer/gui/train.py +687 -696
  42. birdnet_analyzer/gui/utils.py +803 -810
  43. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
  44. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
  45. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
  46. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
  47. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
  48. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
  49. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
  50. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
  51. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
  52. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
  53. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
  54. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
  55. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
  56. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
  57. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
  58. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
  59. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
  60. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
  61. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
  62. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
  63. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
  64. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
  65. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
  66. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
  67. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
  68. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
  69. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
  70. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
  71. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
  72. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
  73. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
  74. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
  75. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
  76. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
  77. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
  78. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
  79. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
  80. birdnet_analyzer/lang/de.json +342 -334
  81. birdnet_analyzer/lang/en.json +342 -334
  82. birdnet_analyzer/lang/fi.json +342 -334
  83. birdnet_analyzer/lang/fr.json +342 -334
  84. birdnet_analyzer/lang/id.json +342 -334
  85. birdnet_analyzer/lang/pt-br.json +342 -334
  86. birdnet_analyzer/lang/ru.json +342 -334
  87. birdnet_analyzer/lang/se.json +342 -334
  88. birdnet_analyzer/lang/tlh.json +342 -334
  89. birdnet_analyzer/lang/zh_TW.json +342 -334
  90. birdnet_analyzer/model.py +1213 -1212
  91. birdnet_analyzer/search/__init__.py +3 -3
  92. birdnet_analyzer/search/__main__.py +3 -3
  93. birdnet_analyzer/search/cli.py +11 -11
  94. birdnet_analyzer/search/core.py +78 -78
  95. birdnet_analyzer/search/utils.py +104 -107
  96. birdnet_analyzer/segments/__init__.py +3 -3
  97. birdnet_analyzer/segments/__main__.py +3 -3
  98. birdnet_analyzer/segments/cli.py +13 -13
  99. birdnet_analyzer/segments/core.py +81 -81
  100. birdnet_analyzer/segments/utils.py +383 -383
  101. birdnet_analyzer/species/__init__.py +3 -3
  102. birdnet_analyzer/species/__main__.py +3 -3
  103. birdnet_analyzer/species/cli.py +13 -13
  104. birdnet_analyzer/species/core.py +35 -35
  105. birdnet_analyzer/species/utils.py +73 -74
  106. birdnet_analyzer/train/__init__.py +3 -3
  107. birdnet_analyzer/train/__main__.py +3 -3
  108. birdnet_analyzer/train/cli.py +13 -13
  109. birdnet_analyzer/train/core.py +113 -113
  110. birdnet_analyzer/train/utils.py +878 -877
  111. birdnet_analyzer/translate.py +132 -133
  112. birdnet_analyzer/utils.py +425 -426
  113. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/METADATA +147 -137
  114. birdnet_analyzer-2.1.1.dist-info/RECORD +124 -0
  115. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/WHEEL +1 -1
  116. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/licenses/LICENSE +18 -18
  117. birdnet_analyzer/eBird_taxonomy_codes_2021E.json +0 -25280
  118. birdnet_analyzer/playground.py +0 -5
  119. birdnet_analyzer-2.0.1.dist-info/RECORD +0 -125
  120. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/entry_points.txt +0 -0
  121. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/top_level.txt +0 -0
@@ -1,619 +1,638 @@
1
- import os
2
- from functools import partial
3
-
4
- import gradio as gr
5
-
6
- import birdnet_analyzer.config as cfg
7
- import birdnet_analyzer.gui.localization as loc
8
- import birdnet_analyzer.gui.utils as gu
9
- from birdnet_analyzer.embeddings.core import get_database as get_embeddings_database
10
- from birdnet_analyzer.search.core import get_database as get_search_database
11
-
12
- SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
13
- PAGE_SIZE = 4
14
-
15
-
16
- def play_audio(audio_infos):
17
- from birdnet_analyzer import audio
18
-
19
- arr, sr = audio.open_audio_file(
20
- audio_infos[0],
21
- offset=audio_infos[1],
22
- duration=audio_infos[2],
23
- speed=audio_infos[5],
24
- fmin=audio_infos[6],
25
- fmax=audio_infos[7],
26
- )
27
-
28
- return sr, arr
29
-
30
-
31
- def update_export_state(audio_infos, checkbox_value, export_state: dict):
32
- if checkbox_value:
33
- export_state[audio_infos[3]] = audio_infos
34
- else:
35
- export_state.pop(audio_infos[3], None)
36
-
37
- return export_state
38
-
39
-
40
- def rum_embeddings_with_tqdm_tracking(
41
- input_path,
42
- db_directory,
43
- db_name,
44
- overlap,
45
- threads,
46
- batch_size,
47
- audio_speed,
48
- fmin,
49
- fmax,
50
- progress=gr.Progress(track_tqdm=True),
51
- ):
52
- return run_embeddings(
53
- input_path,
54
- db_directory,
55
- db_name,
56
- overlap,
57
- threads,
58
- batch_size,
59
- audio_speed,
60
- fmin,
61
- fmax,
62
- progress,
63
- )
64
-
65
-
66
- @gu.gui_runtime_error_handler
67
- def run_embeddings(
68
- input_path,
69
- db_directory,
70
- db_name,
71
- overlap,
72
- threads,
73
- batch_size,
74
- audio_speed,
75
- fmin,
76
- fmax,
77
- progress,
78
- ):
79
- from birdnet_analyzer.embeddings.utils import run
80
-
81
- gu.validate(input_path, loc.localize("embeddings-input-dir-validation-message"))
82
- gu.validate(db_directory, loc.localize("embeddings-db-dir-validation-message"))
83
- gu.validate(db_name, loc.localize("embeddings-db-name-validation-message"))
84
- db_path = os.path.join(db_directory, db_name)
85
-
86
- db = get_embeddings_database(db_path)
87
-
88
- try:
89
- settings = db.get_metadata("birdnet_analyzer_settings")
90
- db.db.close()
91
- run(
92
- input_path,
93
- db_path,
94
- overlap,
95
- settings["AUDIO_SPEED"],
96
- settings["BANDPASS_FMIN"],
97
- settings["BANDPASS_FMAX"],
98
- threads,
99
- batch_size,
100
- )
101
- except Exception as e:
102
- db.db.close()
103
- # Transform audiospeed from slider to float
104
- audio_speed = max(0.1, 1.0 / (audio_speed * -1)) if audio_speed < 0 else max(1.0, float(audio_speed))
105
-
106
- if fmin is None or fmax is None or fmin < cfg.SIG_FMIN or fmax > cfg.SIG_FMAX or fmin > fmax:
107
- raise gr.Error(f"{loc.localize('validation-no-valid-frequency')} [{cfg.SIG_FMIN}, {cfg.SIG_FMAX}]") from e
108
-
109
- run(input_path, db_path, overlap, audio_speed, fmin, fmax, threads, batch_size)
110
-
111
- gr.Info(f"{loc.localize('embeddings-tab-finish-info')} {db_path}")
112
-
113
- return gr.Plot(), gr.Slider(visible=False), gr.Number(visible=False), gr.Number(visible=False)
114
-
115
-
116
- @gu.gui_runtime_error_handler
117
- def run_search(db_path, query_path, max_samples, score_fn, crop_mode, crop_overlap):
118
- from birdnet_analyzer.search.utils import get_search_results
119
-
120
- gu.validate(db_path, loc.localize("embeddings-search-db-validation-message"))
121
- gu.validate(query_path, loc.localize("embeddings-search-query-validation-message"))
122
- gu.validate(max_samples, loc.localize("embeddings-search-max-samples-validation-message"))
123
-
124
- db = get_search_database(db_path)
125
- settings = db.get_metadata("birdnet_analyzer_settings")
126
-
127
- results = get_search_results(
128
- query_path,
129
- db,
130
- max_samples,
131
- settings["AUDIO_SPEED"],
132
- settings["BANDPASS_FMIN"],
133
- settings["BANDPASS_FMAX"],
134
- score_fn,
135
- crop_mode,
136
- crop_overlap,
137
- )
138
- db.db.close() # Close the database connection to avoid having wal/shm files
139
-
140
- chunks = [results[i : i + PAGE_SIZE] for i in range(0, len(results), PAGE_SIZE)]
141
-
142
- return chunks, 0, gr.Button(interactive=True), {}
143
-
144
-
145
- def run_export(export_state: dict):
146
- from birdnet_analyzer import audio
147
-
148
- if len(export_state.items()) > 0:
149
- export_folder = gu.select_folder(state_key="embeddings-search-export-folder")
150
-
151
- if export_folder:
152
- for file in export_state.values():
153
- filebasename = os.path.basename(file[0])
154
- filebasename = os.path.splitext(filebasename)[0]
155
- dest = os.path.join(export_folder, f"{file[4]:.5f}_{filebasename}_{file[1]}_{file[1] + file[2]}.wav")
156
- # @mamau: Missing audio speed?
157
- sig, rate = audio.open_audio_file(file[0], offset=file[1], duration=file[2], sample_rate=None)
158
- audio.save_signal(sig, dest, rate)
159
-
160
- gr.Info(f"{loc.localize('embeddings-search-export-finish-info')} {export_folder}")
161
- else:
162
- gr.Info(loc.localize("embeddings-search-export-no-results-info"))
163
-
164
-
165
- def _build_extract_tab():
166
- with gr.Tab(loc.localize("embeddings-extract-tab-title")):
167
- input_directory_state = gr.State()
168
- db_directory_state = gr.State()
169
-
170
- def select_directory_to_state_and_tb(state_key):
171
- return (gu.select_directory(collect_files=False, state_key=state_key),) * 2
172
-
173
- with gr.Row():
174
- select_audio_directory_btn = gr.Button(loc.localize("embeddings-tab-select-input-directory-button-label"))
175
- selected_audio_directory_tb = gr.Textbox(show_label=False, interactive=False)
176
- select_audio_directory_btn.click(
177
- partial(select_directory_to_state_and_tb, state_key="embeddings-input-dir"),
178
- outputs=[selected_audio_directory_tb, input_directory_state],
179
- show_progress=False,
180
- )
181
-
182
- with gr.Row():
183
- select_db_directory_btn = gr.Button(loc.localize("embeddings-tab-select-db-directory-button-label"))
184
-
185
- with gr.Row():
186
- db_name_tb = gr.Textbox(
187
- "embeddings_database",
188
- visible=False,
189
- interactive=True,
190
- info=loc.localize("embeddings-tab-db-info"),
191
- )
192
-
193
- with gr.Accordion(loc.localize("embedding-settings-accordion-label"), open=False):
194
- with gr.Row():
195
- overlap_slider = gr.Slider(
196
- minimum=0,
197
- maximum=2.9,
198
- value=0,
199
- step=0.1,
200
- label=loc.localize("embedding-settings-overlap-slider-label"),
201
- info=loc.localize("embedding-settings-overlap-slider-info"),
202
- )
203
- batch_size_number = gr.Number(
204
- precision=1,
205
- label=loc.localize("embedding-settings-batchsize-number-label"),
206
- value=1,
207
- info=loc.localize("embedding-settings-batchsize-number-info"),
208
- minimum=1,
209
- interactive=True,
210
- )
211
- threads_number = gr.Number(
212
- precision=1,
213
- label=loc.localize("embedding-settings-threads-number-label"),
214
- value=4,
215
- info=loc.localize("embedding-settings-threads-number-info"),
216
- minimum=1,
217
- interactive=True,
218
- )
219
-
220
- with gr.Row():
221
- audio_speed_slider = gr.Slider(
222
- minimum=-10,
223
- maximum=10,
224
- value=0,
225
- step=1,
226
- label=loc.localize("embedding-settings-audio-speed-slider-label"),
227
- info=loc.localize("embedding-settings-audio-speed-slider-info"),
228
- )
229
- with gr.Row():
230
- fmin_number = gr.Number(
231
- cfg.SIG_FMIN,
232
- minimum=0,
233
- label=loc.localize("embedding-settings-fmin-number-label"),
234
- info=loc.localize("embedding-settings-fmin-number-info"),
235
- interactive=True,
236
- )
237
- fmax_number = gr.Number(
238
- cfg.SIG_FMAX,
239
- minimum=0,
240
- label=loc.localize("embedding-settings-fmax-number-label"),
241
- info=loc.localize("embedding-settings-fmax-number-info"),
242
- interactive=True,
243
- )
244
-
245
- def select_directory_and_update_tb(db_name):
246
- dir_name = gu.select_directory(state_key="embeddings-db-dir", collect_files=False)
247
-
248
- if dir_name:
249
- db_path = os.path.join(dir_name, db_name)
250
-
251
- if os.path.exists(db_path):
252
- db = get_embeddings_database(db_path)
253
-
254
- try:
255
- settings = db.get_metadata("birdnet_analyzer_settings")
256
- gr.Info(loc.localize("embeddings-db-already-exists-info"))
257
-
258
- return (
259
- dir_name,
260
- gr.Textbox(label=dir_name, visible=True),
261
- gr.Slider(value=settings["AUDIO_SPEED"], interactive=False),
262
- gr.Number(value=settings["BANDPASS_FMIN"], interactive=False),
263
- gr.Number(value=settings["BANDPASS_FMAX"], interactive=False),
264
- )
265
- except KeyError:
266
- pass
267
- finally:
268
- db.db.close()
269
-
270
- return (
271
- dir_name,
272
- gr.Textbox(label=dir_name, visible=True),
273
- gr.Slider(interactive=True),
274
- gr.Number(interactive=True),
275
- gr.Number(interactive=True),
276
- )
277
-
278
- return None, None, gr.Slider(interactive=True), gr.Number(interactive=True), gr.Number(interactive=True)
279
-
280
- select_db_directory_btn.click(
281
- select_directory_and_update_tb,
282
- inputs=[db_name_tb],
283
- outputs=[db_directory_state, db_name_tb, audio_speed_slider, fmin_number, fmax_number],
284
- show_progress=False,
285
- )
286
-
287
- def check_settings(dir_name, db_name):
288
- db_path = os.path.join(dir_name, db_name)
289
-
290
- if db_name and os.path.exists(db_path):
291
- db = get_embeddings_database(db_path)
292
-
293
- try:
294
- settings = db.get_metadata("birdnet_analyzer_settings")
295
-
296
- return (
297
- gr.Slider(value=settings["AUDIO_SPEED"], interactive=False),
298
- gr.Number(value=settings["BANDPASS_FMIN"], interactive=False),
299
- gr.Number(value=settings["BANDPASS_FMAX"], interactive=False),
300
- )
301
- except KeyError:
302
- pass
303
- finally:
304
- db.db.close()
305
-
306
- return gr.Slider(interactive=True), gr.Number(interactive=True), gr.Number(interactive=True)
307
-
308
- db_name_tb.change(
309
- check_settings,
310
- inputs=[db_directory_state, db_name_tb],
311
- outputs=[audio_speed_slider, fmin_number, fmax_number],
312
- show_progress=False,
313
- )
314
-
315
- progress_plot = gr.Plot()
316
- start_btn = gr.Button(loc.localize("embeddings-tab-start-button-label"), variant="huggingface")
317
-
318
- start_btn.click(
319
- rum_embeddings_with_tqdm_tracking,
320
- inputs=[
321
- input_directory_state,
322
- db_directory_state,
323
- db_name_tb,
324
- overlap_slider,
325
- batch_size_number,
326
- threads_number,
327
- audio_speed_slider,
328
- fmin_number,
329
- fmax_number,
330
- ],
331
- outputs=[progress_plot, audio_speed_slider, fmin_number, fmax_number],
332
- show_progress_on=progress_plot,
333
- show_progress=True,
334
- )
335
-
336
-
337
- def _build_search_tab():
338
- from birdnet_analyzer import audio, utils
339
-
340
- with gr.Tab(loc.localize("embeddings-search-tab-title")):
341
- results_state = gr.State([])
342
- page_state = gr.State(0)
343
- export_state = gr.State({})
344
- hidden_audio = gr.Audio(visible=False, autoplay=True, type="numpy")
345
-
346
- with gr.Row():
347
- with gr.Column():
348
- db_selection_button = gr.Button(loc.localize("embeddings-search-db-selection-button-label"))
349
- with gr.Group():
350
- with gr.Row():
351
- db_selection_tb = gr.Textbox(
352
- label=loc.localize("embeddings-search-db-selection-textbox-label"),
353
- max_lines=3,
354
- interactive=False,
355
- visible=False,
356
- )
357
- db_embedding_count_number = gr.Number(
358
- interactive=False,
359
- visible=False,
360
- label=loc.localize("embeddings-search-db-embedding-count-number-label"),
361
- )
362
- with gr.Row():
363
- db_bandpass_frequencies_tb = gr.Textbox(
364
- label=loc.localize("embeddings-search-db-bandpass-frequencies-label"),
365
- interactive=False,
366
- visible=False,
367
- )
368
- db_audio_speed_number = gr.Number(
369
- interactive=False,
370
- visible=False,
371
- label=loc.localize("embeddings-search-db-audio-speed-number-label"),
372
- )
373
- query_spectrogram = gr.Plot(show_label=False)
374
- select_query_btn = gr.Button(loc.localize("embeddings-search-select-query-button-label"))
375
- query_sample_tb = gr.Textbox(
376
- label=loc.localize("embeddings-search-query-sample-textbox-label"),
377
- visible=False,
378
- interactive=False,
379
- )
380
-
381
- crop_mode = gr.Radio(
382
- [
383
- (loc.localize("training-tab-crop-mode-radio-option-center"), "center"),
384
- (loc.localize("training-tab-crop-mode-radio-option-first"), "first"),
385
- (loc.localize("training-tab-crop-mode-radio-option-segments"), "segments"),
386
- ],
387
- value="center",
388
- label=loc.localize("training-tab-crop-mode-radio-label"),
389
- info=loc.localize("embeddings-search-crop-mode-radio-info"),
390
- )
391
-
392
- crop_overlap = gr.Slider(
393
- minimum=0,
394
- maximum=2.9,
395
- value=0,
396
- step=0.1,
397
- label=loc.localize("training-tab-crop-overlap-number-label"),
398
- info=loc.localize("embeddings-search-crop-overlap-number-info"),
399
- visible=False,
400
- )
401
- max_samples_number = gr.Number(
402
- label=loc.localize("embeddings-search-max-samples-number-label"),
403
- value=10,
404
- interactive=True,
405
- )
406
- score_fn_select = gr.Radio(
407
- label=loc.localize("embeddings-search-score-fn-select-label"),
408
- choices=["cosine", "dot", "euclidean"],
409
- value="cosine",
410
- interactive=True,
411
- )
412
- max_samples_number = gr.Number(
413
- label=loc.localize("embeddings-search-max-samples-number-label"),
414
- value=10,
415
- interactive=True,
416
- )
417
- score_fn_select = gr.Radio(
418
- label=loc.localize("embeddings-search-score-fn-select-label"),
419
- choices=["cosine", "dot", "euclidean"],
420
- value="cosine",
421
- interactive=True,
422
- )
423
- search_btn = gr.Button(loc.localize("embeddings-search-start-button-label"), variant="huggingface")
424
-
425
- with gr.Column():
426
- with gr.Column(elem_id="embeddings-search-results"):
427
-
428
- @gr.render(
429
- inputs=[results_state, page_state, db_selection_tb, export_state],
430
- triggers=[results_state.change, page_state.change, db_selection_tb.change],
431
- )
432
- def render_results(results, page, db_path, exports):
433
- with gr.Row():
434
- if db_path is not None and len(results) > 0:
435
- db = get_search_database(db_path)
436
- settings = db.get_metadata("birdnet_analyzer_settings")
437
-
438
- for i, r in enumerate(results[page]):
439
- with gr.Column():
440
- index = i + page * PAGE_SIZE
441
- embedding_source = db.get_embedding_source(r.embedding_id)
442
- file = embedding_source.source_id
443
- offset = embedding_source.offsets[0] * settings["AUDIO_SPEED"]
444
- duration = 3 * settings["AUDIO_SPEED"]
445
- spec = utils.spectrogram_from_file(
446
- file,
447
- offset=offset,
448
- duration=duration,
449
- speed=settings["AUDIO_SPEED"],
450
- fmin=settings["BANDPASS_FMIN"],
451
- fmax=settings["BANDPASS_FMAX"],
452
- fig_size=(6, 3),
453
- )
454
- plot_audio_state = gr.State(
455
- [
456
- file,
457
- offset,
458
- duration,
459
- index,
460
- r.sort_score,
461
- settings["AUDIO_SPEED"],
462
- settings["BANDPASS_FMIN"],
463
- settings["BANDPASS_FMAX"],
464
- ]
465
- )
466
- with gr.Row():
467
- gr.Plot(spec, label=f"{index + 1}_score: {r.sort_score:.2f}")
468
-
469
- with gr.Row():
470
- play_btn = gr.Button("")
471
- play_btn.click(play_audio, inputs=plot_audio_state, outputs=hidden_audio)
472
- checkbox = gr.Checkbox(label="Export", value=(index in exports))
473
- checkbox.change(
474
- update_export_state,
475
- inputs=[plot_audio_state, checkbox, export_state],
476
- outputs=export_state,
477
- )
478
- db.db.close() # Close the database connection to avoid having wal/shm files
479
-
480
- with gr.Row():
481
- prev_btn = gr.Button("Previous Page", interactive=page > 0)
482
- next_btn = gr.Button("Next Page", interactive=page < len(results) - 1)
483
-
484
- def prev_page(page):
485
- return page - 1 if page > 0 else 0
486
-
487
- def next_page(page):
488
- return page + 1
489
-
490
- prev_btn.click(prev_page, inputs=[page_state], outputs=[page_state])
491
- next_btn.click(next_page, inputs=[page_state], outputs=[page_state])
492
-
493
- export_btn = gr.Button(
494
- loc.localize("embeddings-search-export-button-label"), variant="huggingface", interactive=False
495
- )
496
-
497
- def on_db_selection_click():
498
- folder = gu.select_folder(state_key="embeddings_search_db")
499
-
500
- try:
501
- db = get_embeddings_database(folder)
502
- except ValueError as e:
503
- raise gr.Error(loc.localize("embeddings-search-db-selection-error")) from e
504
-
505
- embedding_count = db.count_embeddings()
506
- settings = db.get_metadata("birdnet_analyzer_settings")
507
- frequencies = f"{settings['BANDPASS_FMIN']} - {settings['BANDPASS_FMAX']} Hz"
508
- speed = settings["AUDIO_SPEED"]
509
- db.db.close()
510
-
511
- if folder:
512
- return (
513
- gr.Textbox(value=folder, visible=True),
514
- gr.Number(value=embedding_count, visible=True),
515
- gr.Textbox(visible=True, value=frequencies),
516
- gr.Number(visible=True, value=speed),
517
- [],
518
- {},
519
- gr.Button(visible=True),
520
- gr.Textbox(value=None, visible=False),
521
- )
522
-
523
- return None, None, None, None, [], {}, gr.Button(visible=False), gr.Textbox(visible=False)
524
-
525
- def select_query_sample():
526
- file = gu.select_file(state_key="query_sample")
527
- return gr.Textbox(file, visible=True)
528
-
529
- select_query_btn.click(select_query_sample, outputs=[query_sample_tb])
530
-
531
- def on_crop_select(new_crop_mode):
532
- return gr.Number(visible=new_crop_mode == "segments", interactive=new_crop_mode == "segments")
533
-
534
- crop_mode.change(on_crop_select, inputs=crop_mode, outputs=crop_overlap)
535
-
536
- def update_query_spectrogram(audiofilepath, db_selection, crop_mode, crop_overlap):
537
- import numpy as np
538
-
539
- if audiofilepath and db_selection:
540
- db = get_embeddings_database(db_selection)
541
- settings = db.get_metadata("birdnet_analyzer_settings")
542
- audio_speed = settings["AUDIO_SPEED"]
543
- fmin = settings["BANDPASS_FMIN"]
544
- fmax = settings["BANDPASS_FMAX"]
545
- db.db.close()
546
-
547
- sig, rate = audio.open_audio_file(
548
- audiofilepath,
549
- duration=cfg.SIG_LENGTH * audio_speed if crop_mode == "first" else None,
550
- fmin=fmin,
551
- fmax=fmax,
552
- speed=audio_speed,
553
- )
554
-
555
- # Crop query audio
556
- if crop_mode == "center":
557
- sig = [audio.crop_center(sig, rate, cfg.SIG_LENGTH)][0]
558
- elif crop_mode == "first":
559
- sig = [audio.split_signal(sig, rate, cfg.SIG_LENGTH, crop_overlap, cfg.SIG_MINLEN)[0]][0]
560
-
561
- sig = np.array(sig, dtype="float32")
562
- spec = utils.spectrogram_from_audio(sig, rate, fig_size=(10, 4))
563
-
564
- return spec, [], {}
565
-
566
- return None, [], {}
567
-
568
- crop_mode.change(
569
- update_query_spectrogram,
570
- inputs=[query_sample_tb, db_selection_tb, crop_mode, crop_overlap],
571
- outputs=[query_spectrogram, results_state, export_state],
572
- preprocess=False,
573
- )
574
- query_sample_tb.change(
575
- update_query_spectrogram,
576
- inputs=[query_sample_tb, db_selection_tb, crop_mode, crop_overlap],
577
- outputs=[query_spectrogram, results_state, export_state],
578
- preprocess=False,
579
- )
580
-
581
- db_selection_button.click(
582
- on_db_selection_click,
583
- outputs=[
584
- db_selection_tb,
585
- db_embedding_count_number,
586
- db_bandpass_frequencies_tb,
587
- db_audio_speed_number,
588
- results_state,
589
- export_state,
590
- select_query_btn,
591
- query_sample_tb,
592
- ],
593
- show_progress=False,
594
- )
595
-
596
- search_btn.click(
597
- run_search,
598
- inputs=[
599
- db_selection_tb,
600
- query_sample_tb,
601
- max_samples_number,
602
- score_fn_select,
603
- crop_mode,
604
- crop_overlap,
605
- ],
606
- outputs=[results_state, page_state, export_btn, export_state],
607
- show_progress_on=export_btn,
608
- )
609
-
610
- export_btn.click(
611
- run_export,
612
- inputs=[export_state],
613
- )
614
-
615
-
616
- def build_embeddings_tab():
617
- with gr.Tab(loc.localize("embeddings-tab-title")):
618
- _build_extract_tab()
619
- _build_search_tab()
1
+ import os
2
+ from functools import partial
3
+
4
+ import gradio as gr
5
+
6
+ import birdnet_analyzer.config as cfg
7
+ import birdnet_analyzer.gui.localization as loc
8
+ import birdnet_analyzer.gui.utils as gu
9
+ from birdnet_analyzer.embeddings.core import get_database as get_embeddings_database
10
+ from birdnet_analyzer.search.core import get_database as get_search_database
11
+
12
+ SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
13
+ PAGE_SIZE = 6
14
+
15
+
16
+ def play_audio(audio_infos):
17
+ from birdnet_analyzer import audio
18
+
19
+ arr, sr = audio.open_audio_file(
20
+ audio_infos[0],
21
+ offset=audio_infos[1],
22
+ duration=audio_infos[2],
23
+ speed=audio_infos[5],
24
+ fmin=audio_infos[6],
25
+ fmax=audio_infos[7],
26
+ )
27
+
28
+ return sr, arr
29
+
30
+
31
+ def update_export_state(audio_infos, checkbox_value, export_state: dict):
32
+ if checkbox_value:
33
+ export_state[audio_infos[3]] = audio_infos
34
+ else:
35
+ export_state.pop(audio_infos[3], None)
36
+
37
+ return export_state
38
+
39
+
40
+ def run_embeddings_with_tqdm_tracking(
41
+ input_path,
42
+ db_directory,
43
+ db_name,
44
+ overlap,
45
+ threads,
46
+ batch_size,
47
+ audio_speed,
48
+ fmin,
49
+ fmax,
50
+ file_output,
51
+ progress=gr.Progress(track_tqdm=True),
52
+ ):
53
+ return run_embeddings(
54
+ input_path,
55
+ db_directory,
56
+ db_name,
57
+ overlap,
58
+ threads,
59
+ batch_size,
60
+ audio_speed,
61
+ fmin,
62
+ fmax,
63
+ file_output,
64
+ progress,
65
+ )
66
+
67
+
68
+ @gu.gui_runtime_error_handler
69
+ def run_embeddings(
70
+ input_path,
71
+ db_directory,
72
+ db_name,
73
+ overlap,
74
+ threads,
75
+ batch_size,
76
+ audio_speed,
77
+ fmin,
78
+ fmax,
79
+ file_output,
80
+ progress,
81
+ ):
82
+ from birdnet_analyzer.embeddings.utils import run
83
+
84
+ gu.validate(input_path, loc.localize("embeddings-input-dir-validation-message"))
85
+ gu.validate(db_directory, loc.localize("embeddings-db-dir-validation-message"))
86
+ gu.validate(db_name, loc.localize("embeddings-db-name-validation-message"))
87
+ db_path = os.path.join(db_directory, db_name)
88
+
89
+ db = get_embeddings_database(db_path)
90
+
91
+ try:
92
+ settings = db.get_metadata("birdnet_analyzer_settings")
93
+ db.db.close()
94
+ run(
95
+ input_path,
96
+ db_path,
97
+ overlap,
98
+ settings["AUDIO_SPEED"],
99
+ settings["BANDPASS_FMIN"],
100
+ settings["BANDPASS_FMAX"],
101
+ threads,
102
+ batch_size,
103
+ file_output,
104
+ )
105
+ except Exception as e:
106
+ db.db.close()
107
+ # Transform audiospeed from slider to float
108
+ audio_speed = max(0.1, 1.0 / (audio_speed * -1)) if audio_speed < 0 else max(1.0, float(audio_speed))
109
+
110
+ if fmin is None or fmax is None or fmin < cfg.SIG_FMIN or fmax > cfg.SIG_FMAX or fmin > fmax:
111
+ raise gr.Error(f"{loc.localize('validation-no-valid-frequency')} [{cfg.SIG_FMIN}, {cfg.SIG_FMAX}]") from e
112
+
113
+ run(input_path, db_path, overlap, audio_speed, fmin, fmax, threads, batch_size, file_output)
114
+
115
+ gr.Info(f"{loc.localize('embeddings-tab-finish-info')} {db_path}")
116
+
117
+ return gr.Plot(), gr.Slider(interactive=False), gr.Number(interactive=False), gr.Number(interactive=False)
118
+
119
+
120
+ @gu.gui_runtime_error_handler
121
+ def run_search(db_path, query_path, max_samples, score_fn, crop_mode, crop_overlap):
122
+ from birdnet_analyzer.search.utils import get_search_results
123
+
124
+ gu.validate(db_path, loc.localize("embeddings-search-db-validation-message"))
125
+ gu.validate(query_path, loc.localize("embeddings-search-query-validation-message"))
126
+ gu.validate(max_samples, loc.localize("embeddings-search-max-samples-validation-message"))
127
+
128
+ db = get_search_database(db_path)
129
+ settings = db.get_metadata("birdnet_analyzer_settings")
130
+
131
+ results = get_search_results(
132
+ query_path,
133
+ db,
134
+ max_samples,
135
+ settings["AUDIO_SPEED"],
136
+ settings["BANDPASS_FMIN"],
137
+ settings["BANDPASS_FMAX"],
138
+ score_fn,
139
+ crop_mode,
140
+ crop_overlap,
141
+ )
142
+ db.db.close() # Close the database connection to avoid having wal/shm files
143
+
144
+ chunks = [results[i : i + PAGE_SIZE] for i in range(0, len(results), PAGE_SIZE)]
145
+
146
+ return chunks, 0, gr.Button(interactive=True), {}
147
+
148
+
149
+ def run_export(export_state: dict):
150
+ from birdnet_analyzer import audio
151
+
152
+ if len(export_state.items()) > 0:
153
+ export_folder = gu.select_folder(state_key="embeddings-search-export-folder")
154
+
155
+ if export_folder:
156
+ for file in export_state.values():
157
+ filebasename = os.path.basename(file[0])
158
+ filebasename = os.path.splitext(filebasename)[0]
159
+ dest = os.path.join(export_folder, f"{file[4]:.5f}_{filebasename}_{file[1]}_{file[1] + file[2]}.wav")
160
+ # @mamau: Missing audio speed?
161
+ sig, rate = audio.open_audio_file(file[0], offset=file[1], duration=file[2], sample_rate=None)
162
+ audio.save_signal(sig, dest, rate)
163
+
164
+ gr.Info(f"{loc.localize('embeddings-search-export-finish-info')} {export_folder}")
165
+ else:
166
+ gr.Info(loc.localize("embeddings-search-export-no-results-info"))
167
+
168
+
169
+ def _build_extract_tab():
170
+ with gr.Tab(loc.localize("embeddings-extract-tab-title")):
171
+ input_directory_state = gr.State()
172
+ db_directory_state = gr.State()
173
+
174
+ def select_directory_to_state_and_tb(state_key):
175
+ return (gu.select_directory(collect_files=False, state_key=state_key),) * 2
176
+
177
+ with gr.Row():
178
+ select_audio_directory_btn = gr.Button(loc.localize("embeddings-tab-select-input-directory-button-label"))
179
+ selected_audio_directory_tb = gr.Textbox(show_label=False, interactive=False)
180
+ select_audio_directory_btn.click(
181
+ partial(select_directory_to_state_and_tb, state_key="embeddings-input-dir"),
182
+ outputs=[selected_audio_directory_tb, input_directory_state],
183
+ show_progress="hidden",
184
+ )
185
+
186
+ with gr.Row():
187
+ select_db_directory_btn = gr.Button(loc.localize("embeddings-tab-select-db-directory-button-label"))
188
+
189
+ with gr.Row():
190
+ db_name_tb = gr.Textbox(
191
+ "embeddings_database",
192
+ visible=False,
193
+ interactive=True,
194
+ info=loc.localize("embeddings-tab-db-info"),
195
+ )
196
+
197
+ with gr.Accordion(loc.localize("embedding-settings-accordion-label"), open=False):
198
+ with gr.Row():
199
+ overlap_slider = gr.Slider(
200
+ minimum=0,
201
+ maximum=2.9,
202
+ value=0,
203
+ step=0.1,
204
+ label=loc.localize("embedding-settings-overlap-slider-label"),
205
+ info=loc.localize("embedding-settings-overlap-slider-info"),
206
+ )
207
+ batch_size_number = gr.Number(
208
+ precision=1,
209
+ label=loc.localize("embedding-settings-batchsize-number-label"),
210
+ value=1,
211
+ info=loc.localize("embedding-settings-batchsize-number-info"),
212
+ minimum=1,
213
+ interactive=True,
214
+ )
215
+ threads_number = gr.Number(
216
+ precision=1,
217
+ label=loc.localize("embedding-settings-threads-number-label"),
218
+ value=4,
219
+ info=loc.localize("embedding-settings-threads-number-info"),
220
+ minimum=1,
221
+ interactive=True,
222
+ )
223
+
224
+ with gr.Row():
225
+ audio_speed_slider = gr.Slider(
226
+ minimum=-10,
227
+ maximum=10,
228
+ value=0,
229
+ step=1,
230
+ label=loc.localize("embedding-settings-audio-speed-slider-label"),
231
+ info=loc.localize("embedding-settings-audio-speed-slider-info"),
232
+ )
233
+ with gr.Row():
234
+ fmin_number = gr.Number(
235
+ cfg.SIG_FMIN,
236
+ minimum=0,
237
+ label=loc.localize("embedding-settings-fmin-number-label"),
238
+ info=loc.localize("embedding-settings-fmin-number-info"),
239
+ interactive=True,
240
+ )
241
+ fmax_number = gr.Number(
242
+ cfg.SIG_FMAX,
243
+ minimum=0,
244
+ label=loc.localize("embedding-settings-fmax-number-label"),
245
+ info=loc.localize("embedding-settings-fmax-number-info"),
246
+ interactive=True,
247
+ )
248
+
249
+ def select_directory_and_update_tb(db_name):
250
+ dir_name = gu.select_directory(state_key="embeddings-db-dir", collect_files=False)
251
+
252
+ if dir_name:
253
+ db_path = os.path.join(dir_name, db_name)
254
+
255
+ if os.path.exists(db_path):
256
+ db = get_embeddings_database(db_path)
257
+
258
+ try:
259
+ settings = db.get_metadata("birdnet_analyzer_settings")
260
+ gr.Info(loc.localize("embeddings-db-already-exists-info"))
261
+
262
+ return (
263
+ dir_name,
264
+ gr.Textbox(label=dir_name, visible=True),
265
+ gr.Slider(value=settings["AUDIO_SPEED"], interactive=False),
266
+ gr.Number(value=settings["BANDPASS_FMIN"], interactive=False),
267
+ gr.Number(value=settings["BANDPASS_FMAX"], interactive=False),
268
+ )
269
+ except KeyError:
270
+ pass
271
+ finally:
272
+ db.db.close()
273
+
274
+ return (
275
+ dir_name,
276
+ gr.Textbox(label=dir_name, visible=True),
277
+ gr.Slider(interactive=True),
278
+ gr.Number(interactive=True),
279
+ gr.Number(interactive=True),
280
+ )
281
+
282
+ return None, gr.Textbox(visible=False), gr.Slider(interactive=True), gr.Number(interactive=True), gr.Number(interactive=True)
283
+
284
+ select_db_directory_btn.click(
285
+ select_directory_and_update_tb,
286
+ inputs=[db_name_tb],
287
+ outputs=[db_directory_state, db_name_tb, audio_speed_slider, fmin_number, fmax_number],
288
+ show_progress="hidden",
289
+ )
290
+
291
+ with gr.Accordion(loc.localize("embedding-file-output-accordion-label"), open=False):
292
+ with gr.Row():
293
+ select_file_output_directory_btn = gr.Button(loc.localize("embeddings-select-file-output-directory-button-label"))
294
+
295
+ with gr.Row():
296
+ file_output_tb = gr.Textbox(
297
+ value=None,
298
+ placeholder=loc.localize("embeddings-tab-file-output-directory-textbox-placeholder"),
299
+ interactive=True,
300
+ label=loc.localize("embeddings-tab-file-output-directory-textbox-label"),
301
+ )
302
+
303
+ def select_file_output_directory_and_update_tb():
304
+ dir_name = gu.select_directory(state_key="embeddings-file-output-dir", collect_files=False)
305
+
306
+ if dir_name:
307
+ return dir_name
308
+
309
+ return None
310
+
311
+ select_file_output_directory_btn.click(
312
+ select_file_output_directory_and_update_tb,
313
+ inputs=[],
314
+ outputs=[file_output_tb],
315
+ )
316
+
317
+ def check_settings(dir_name, db_name):
318
+ db_path = os.path.join(dir_name, db_name)
319
+
320
+ if db_name and os.path.exists(db_path):
321
+ db = get_embeddings_database(db_path)
322
+
323
+ try:
324
+ settings = db.get_metadata("birdnet_analyzer_settings")
325
+
326
+ return (
327
+ gr.Slider(value=settings["AUDIO_SPEED"], interactive=False),
328
+ gr.Number(value=settings["BANDPASS_FMIN"], interactive=False),
329
+ gr.Number(value=settings["BANDPASS_FMAX"], interactive=False),
330
+ )
331
+ except KeyError:
332
+ pass
333
+ finally:
334
+ db.db.close()
335
+
336
+ return gr.Slider(interactive=True), gr.Number(interactive=True), gr.Number(interactive=True)
337
+
338
+ db_name_tb.change(
339
+ check_settings,
340
+ inputs=[db_directory_state, db_name_tb],
341
+ outputs=[audio_speed_slider, fmin_number, fmax_number],
342
+ show_progress="hidden",
343
+ )
344
+
345
+ progress_plot = gr.Plot()
346
+ start_btn = gr.Button(loc.localize("embeddings-tab-start-button-label"), variant="huggingface")
347
+
348
+ start_btn.click(
349
+ run_embeddings_with_tqdm_tracking,
350
+ inputs=[
351
+ input_directory_state,
352
+ db_directory_state,
353
+ db_name_tb,
354
+ overlap_slider,
355
+ batch_size_number,
356
+ threads_number,
357
+ audio_speed_slider,
358
+ fmin_number,
359
+ fmax_number,
360
+ file_output_tb,
361
+ ],
362
+ outputs=[progress_plot, audio_speed_slider, fmin_number, fmax_number],
363
+ show_progress_on=progress_plot,
364
+ )
365
+
366
+
367
+ def _build_search_tab():
368
+ from birdnet_analyzer import audio, utils
369
+
370
+ with gr.Tab(loc.localize("embeddings-search-tab-title")):
371
+ results_state = gr.State([])
372
+ page_state = gr.State(0)
373
+ export_state = gr.State({})
374
+ hidden_audio = gr.Audio(visible=False, autoplay=True, type="numpy")
375
+
376
+ with gr.Row():
377
+ with gr.Column():
378
+ db_selection_button = gr.Button(loc.localize("embeddings-search-db-selection-button-label"))
379
+ with gr.Group():
380
+ with gr.Row():
381
+ db_selection_tb = gr.Textbox(
382
+ label=loc.localize("embeddings-search-db-selection-textbox-label"),
383
+ max_lines=3,
384
+ interactive=False,
385
+ visible=False,
386
+ )
387
+ db_embedding_count_number = gr.Number(
388
+ interactive=False,
389
+ visible=False,
390
+ label=loc.localize("embeddings-search-db-embedding-count-number-label"),
391
+ )
392
+ with gr.Row():
393
+ db_bandpass_frequencies_tb = gr.Textbox(
394
+ label=loc.localize("embeddings-search-db-bandpass-frequencies-label"),
395
+ interactive=False,
396
+ visible=False,
397
+ )
398
+ db_audio_speed_number = gr.Number(
399
+ interactive=False,
400
+ visible=False,
401
+ label=loc.localize("embeddings-search-db-audio-speed-number-label"),
402
+ )
403
+ query_spectrogram = gr.Plot(show_label=False)
404
+ select_query_btn = gr.Button(loc.localize("embeddings-search-select-query-button-label"))
405
+ query_sample_tb = gr.Textbox(
406
+ label=loc.localize("embeddings-search-query-sample-textbox-label"),
407
+ visible=False,
408
+ interactive=False,
409
+ )
410
+
411
+ crop_mode = gr.Radio(
412
+ [
413
+ (loc.localize("training-tab-crop-mode-radio-option-center"), "center"),
414
+ (loc.localize("training-tab-crop-mode-radio-option-first"), "first"),
415
+ (loc.localize("training-tab-crop-mode-radio-option-segments"), "segments"),
416
+ ],
417
+ value="center",
418
+ label=loc.localize("training-tab-crop-mode-radio-label"),
419
+ info=loc.localize("embeddings-search-crop-mode-radio-info"),
420
+ )
421
+
422
+ crop_overlap = gr.Slider(
423
+ minimum=0,
424
+ maximum=2.9,
425
+ value=0,
426
+ step=0.1,
427
+ label=loc.localize("training-tab-crop-overlap-number-label"),
428
+ info=loc.localize("embeddings-search-crop-overlap-number-info"),
429
+ visible=False,
430
+ )
431
+ max_samples_number = gr.Number(
432
+ label=loc.localize("embeddings-search-max-samples-number-label"),
433
+ value=10,
434
+ interactive=True,
435
+ )
436
+ score_fn_select = gr.Radio(
437
+ label=loc.localize("embeddings-search-score-fn-select-label"),
438
+ choices=["cosine", "dot", "euclidean"],
439
+ value="cosine",
440
+ interactive=True,
441
+ )
442
+ search_btn = gr.Button(loc.localize("embeddings-search-start-button-label"), variant="huggingface")
443
+
444
+ with gr.Column():
445
+ with gr.Column(elem_id="embeddings-search-results"):
446
+
447
+ @gr.render(
448
+ inputs=[results_state, page_state, db_selection_tb, export_state],
449
+ triggers=[results_state.change, page_state.change, db_selection_tb.change],
450
+ )
451
+ def render_results(results, page, db_path, exports):
452
+ with gr.Row():
453
+ if db_path is not None and len(results) > 0:
454
+ db = get_search_database(db_path)
455
+ settings = db.get_metadata("birdnet_analyzer_settings")
456
+
457
+ for i, r in enumerate(results[page]):
458
+ with gr.Column():
459
+ index = i + page * PAGE_SIZE
460
+ embedding_source = db.get_embedding_source(r.embedding_id)
461
+ file = embedding_source.source_id
462
+ offset = embedding_source.offsets[0]
463
+ duration = cfg.SIG_LENGTH * settings["AUDIO_SPEED"]
464
+ spec = utils.spectrogram_from_file(
465
+ file,
466
+ offset=offset,
467
+ duration=duration,
468
+ speed=settings["AUDIO_SPEED"],
469
+ fmin=settings["BANDPASS_FMIN"],
470
+ fmax=settings["BANDPASS_FMAX"],
471
+ fig_size=(6, 3),
472
+ )
473
+ plot_audio_state = gr.State(
474
+ [
475
+ file,
476
+ offset,
477
+ duration,
478
+ index,
479
+ r.sort_score,
480
+ settings["AUDIO_SPEED"],
481
+ settings["BANDPASS_FMIN"],
482
+ settings["BANDPASS_FMAX"],
483
+ ]
484
+ )
485
+ with gr.Row():
486
+ gr.Plot(spec, label=f"{index + 1}_score: {r.sort_score:.2f}")
487
+
488
+ with gr.Row():
489
+ play_btn = gr.Button("▶")
490
+ play_btn.click(play_audio, inputs=plot_audio_state, outputs=hidden_audio)
491
+ checkbox = gr.Checkbox(label="Export", value=(index in exports))
492
+ checkbox.change(
493
+ update_export_state,
494
+ inputs=[plot_audio_state, checkbox, export_state],
495
+ outputs=export_state,
496
+ )
497
+ db.db.close() # Close the database connection to avoid having wal/shm files
498
+
499
+ with gr.Row():
500
+ prev_btn = gr.Button("Previous Page", interactive=page > 0)
501
+ next_btn = gr.Button("Next Page", interactive=page < len(results) - 1)
502
+
503
+ def prev_page(page):
504
+ return page - 1 if page > 0 else 0
505
+
506
+ def next_page(page):
507
+ return page + 1
508
+
509
+ prev_btn.click(prev_page, inputs=[page_state], outputs=[page_state])
510
+ next_btn.click(next_page, inputs=[page_state], outputs=[page_state])
511
+
512
+ export_btn = gr.Button(
513
+ loc.localize("embeddings-search-export-button-label"), variant="huggingface", interactive=False
514
+ )
515
+
516
+ def on_db_selection_click():
517
+ folder = gu.select_folder(state_key="embeddings_search_db")
518
+
519
+ try:
520
+ db = get_embeddings_database(folder)
521
+ except ValueError as e:
522
+ raise gr.Error(loc.localize("embeddings-search-db-selection-error")) from e
523
+
524
+ embedding_count = db.count_embeddings()
525
+ settings = db.get_metadata("birdnet_analyzer_settings")
526
+ frequencies = f"{settings['BANDPASS_FMIN']} - {settings['BANDPASS_FMAX']} Hz"
527
+ speed = settings["AUDIO_SPEED"]
528
+ db.db.close()
529
+
530
+ if folder:
531
+ return (
532
+ gr.Textbox(value=folder, visible=True),
533
+ gr.Number(value=embedding_count, visible=True),
534
+ gr.Textbox(visible=True, value=frequencies),
535
+ gr.Number(visible=True, value=speed),
536
+ [],
537
+ {},
538
+ gr.Button(visible=True),
539
+ gr.Textbox(value=None, visible=False),
540
+ )
541
+
542
+ return None, None, None, None, [], {}, gr.Button(visible=False), gr.Textbox(visible=False)
543
+
544
+ def select_query_sample():
545
+ file = gu.select_file(state_key="query_sample")
546
+ return gr.Textbox(file, visible=True)
547
+
548
+ select_query_btn.click(select_query_sample, outputs=[query_sample_tb])
549
+
550
+ def on_crop_select(new_crop_mode):
551
+ return gr.Number(visible=new_crop_mode == "segments", interactive=new_crop_mode == "segments")
552
+
553
+ crop_mode.change(on_crop_select, inputs=crop_mode, outputs=crop_overlap)
554
+
555
+ def update_query_spectrogram(audiofilepath, db_selection, crop_mode, crop_overlap):
556
+ import numpy as np
557
+
558
+ if audiofilepath and db_selection:
559
+ db = get_embeddings_database(db_selection)
560
+ settings = db.get_metadata("birdnet_analyzer_settings")
561
+ audio_speed = settings["AUDIO_SPEED"]
562
+ fmin = settings["BANDPASS_FMIN"]
563
+ fmax = settings["BANDPASS_FMAX"]
564
+ db.db.close()
565
+
566
+ sig, rate = audio.open_audio_file(
567
+ audiofilepath,
568
+ duration=cfg.SIG_LENGTH * audio_speed if crop_mode == "first" else None,
569
+ fmin=fmin,
570
+ fmax=fmax,
571
+ speed=audio_speed,
572
+ )
573
+
574
+ # Crop query audio
575
+ if crop_mode == "center":
576
+ sig = [audio.crop_center(sig, rate, cfg.SIG_LENGTH)][0]
577
+ elif crop_mode == "first":
578
+ sig = [audio.split_signal(sig, rate, cfg.SIG_LENGTH, crop_overlap, cfg.SIG_MINLEN)[0]][0]
579
+
580
+ sig = np.array(sig, dtype="float32")
581
+ spec = utils.spectrogram_from_audio(sig, rate, fig_size=(10, 4))
582
+
583
+ return spec, [], {}
584
+
585
+ return None, [], {}
586
+
587
+ crop_mode.change(
588
+ update_query_spectrogram,
589
+ inputs=[query_sample_tb, db_selection_tb, crop_mode, crop_overlap],
590
+ outputs=[query_spectrogram, results_state, export_state],
591
+ preprocess=False,
592
+ )
593
+ query_sample_tb.change(
594
+ update_query_spectrogram,
595
+ inputs=[query_sample_tb, db_selection_tb, crop_mode, crop_overlap],
596
+ outputs=[query_spectrogram, results_state, export_state],
597
+ preprocess=False,
598
+ )
599
+
600
+ db_selection_button.click(
601
+ on_db_selection_click,
602
+ outputs=[
603
+ db_selection_tb,
604
+ db_embedding_count_number,
605
+ db_bandpass_frequencies_tb,
606
+ db_audio_speed_number,
607
+ results_state,
608
+ export_state,
609
+ select_query_btn,
610
+ query_sample_tb,
611
+ ],
612
+ show_progress="hidden",
613
+ )
614
+
615
+ search_btn.click(
616
+ run_search,
617
+ inputs=[
618
+ db_selection_tb,
619
+ query_sample_tb,
620
+ max_samples_number,
621
+ score_fn_select,
622
+ crop_mode,
623
+ crop_overlap,
624
+ ],
625
+ outputs=[results_state, page_state, export_btn, export_state],
626
+ show_progress_on=export_btn,
627
+ )
628
+
629
+ export_btn.click(
630
+ run_export,
631
+ inputs=[export_state],
632
+ )
633
+
634
+
635
+ def build_embeddings_tab():
636
+ with gr.Tab(loc.localize("embeddings-tab-title")):
637
+ _build_extract_tab()
638
+ _build_search_tab()