birdnet-analyzer 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. birdnet_analyzer/__init__.py +9 -8
  2. birdnet_analyzer/analyze/__init__.py +5 -5
  3. birdnet_analyzer/analyze/__main__.py +3 -4
  4. birdnet_analyzer/analyze/cli.py +25 -25
  5. birdnet_analyzer/analyze/core.py +241 -245
  6. birdnet_analyzer/analyze/utils.py +692 -701
  7. birdnet_analyzer/audio.py +368 -372
  8. birdnet_analyzer/cli.py +709 -707
  9. birdnet_analyzer/config.py +242 -242
  10. birdnet_analyzer/eBird_taxonomy_codes_2021E.json +25279 -25279
  11. birdnet_analyzer/embeddings/__init__.py +3 -4
  12. birdnet_analyzer/embeddings/__main__.py +3 -3
  13. birdnet_analyzer/embeddings/cli.py +12 -13
  14. birdnet_analyzer/embeddings/core.py +69 -70
  15. birdnet_analyzer/embeddings/utils.py +179 -193
  16. birdnet_analyzer/evaluation/__init__.py +196 -195
  17. birdnet_analyzer/evaluation/__main__.py +3 -3
  18. birdnet_analyzer/evaluation/assessment/__init__.py +0 -0
  19. birdnet_analyzer/evaluation/assessment/metrics.py +388 -0
  20. birdnet_analyzer/evaluation/assessment/performance_assessor.py +409 -0
  21. birdnet_analyzer/evaluation/assessment/plotting.py +379 -0
  22. birdnet_analyzer/evaluation/preprocessing/__init__.py +0 -0
  23. birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -0
  24. birdnet_analyzer/evaluation/preprocessing/utils.py +98 -0
  25. birdnet_analyzer/gui/__init__.py +19 -23
  26. birdnet_analyzer/gui/__main__.py +3 -3
  27. birdnet_analyzer/gui/analysis.py +175 -174
  28. birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
  29. birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
  30. birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
  31. birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
  32. birdnet_analyzer/gui/assets/gui.css +28 -28
  33. birdnet_analyzer/gui/assets/gui.js +93 -93
  34. birdnet_analyzer/gui/embeddings.py +619 -620
  35. birdnet_analyzer/gui/evaluation.py +795 -813
  36. birdnet_analyzer/gui/localization.py +75 -68
  37. birdnet_analyzer/gui/multi_file.py +245 -246
  38. birdnet_analyzer/gui/review.py +519 -527
  39. birdnet_analyzer/gui/segments.py +191 -191
  40. birdnet_analyzer/gui/settings.py +128 -129
  41. birdnet_analyzer/gui/single_file.py +267 -269
  42. birdnet_analyzer/gui/species.py +95 -95
  43. birdnet_analyzer/gui/train.py +696 -698
  44. birdnet_analyzer/gui/utils.py +810 -808
  45. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
  46. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
  47. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
  48. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
  49. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
  50. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
  51. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
  52. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
  53. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
  54. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
  55. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
  56. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
  57. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
  58. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
  59. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
  60. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
  61. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
  62. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
  63. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
  64. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
  65. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
  66. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
  67. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
  68. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
  69. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
  70. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
  71. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
  72. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
  73. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
  74. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
  75. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
  76. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
  77. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
  78. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
  79. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
  80. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
  81. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
  82. birdnet_analyzer/lang/de.json +334 -334
  83. birdnet_analyzer/lang/en.json +334 -334
  84. birdnet_analyzer/lang/fi.json +334 -334
  85. birdnet_analyzer/lang/fr.json +334 -334
  86. birdnet_analyzer/lang/id.json +334 -334
  87. birdnet_analyzer/lang/pt-br.json +334 -334
  88. birdnet_analyzer/lang/ru.json +334 -334
  89. birdnet_analyzer/lang/se.json +334 -334
  90. birdnet_analyzer/lang/tlh.json +334 -334
  91. birdnet_analyzer/lang/zh_TW.json +334 -334
  92. birdnet_analyzer/model.py +1212 -1243
  93. birdnet_analyzer/playground.py +5 -0
  94. birdnet_analyzer/search/__init__.py +3 -3
  95. birdnet_analyzer/search/__main__.py +3 -3
  96. birdnet_analyzer/search/cli.py +11 -12
  97. birdnet_analyzer/search/core.py +78 -78
  98. birdnet_analyzer/search/utils.py +107 -111
  99. birdnet_analyzer/segments/__init__.py +3 -3
  100. birdnet_analyzer/segments/__main__.py +3 -3
  101. birdnet_analyzer/segments/cli.py +13 -14
  102. birdnet_analyzer/segments/core.py +81 -78
  103. birdnet_analyzer/segments/utils.py +383 -394
  104. birdnet_analyzer/species/__init__.py +3 -3
  105. birdnet_analyzer/species/__main__.py +3 -3
  106. birdnet_analyzer/species/cli.py +13 -14
  107. birdnet_analyzer/species/core.py +35 -35
  108. birdnet_analyzer/species/utils.py +74 -75
  109. birdnet_analyzer/train/__init__.py +3 -3
  110. birdnet_analyzer/train/__main__.py +3 -3
  111. birdnet_analyzer/train/cli.py +13 -14
  112. birdnet_analyzer/train/core.py +113 -113
  113. birdnet_analyzer/train/utils.py +877 -847
  114. birdnet_analyzer/translate.py +133 -104
  115. birdnet_analyzer/utils.py +426 -419
  116. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/METADATA +137 -129
  117. birdnet_analyzer-2.0.1.dist-info/RECORD +125 -0
  118. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/WHEEL +1 -1
  119. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/licenses/LICENSE +18 -18
  120. birdnet_analyzer-2.0.0.dist-info/RECORD +0 -117
  121. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/entry_points.txt +0 -0
  122. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,620 +1,619 @@
1
- import os
2
- from functools import partial
3
-
4
- import gradio as gr
5
-
6
- import birdnet_analyzer.config as cfg
7
- import birdnet_analyzer.gui.localization as loc
8
- import birdnet_analyzer.gui.utils as gu
9
- from birdnet_analyzer.embeddings.core import get_database as get_embeddings_database
10
- from birdnet_analyzer.search.core import get_database as get_search_database
11
-
12
- SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
13
- PAGE_SIZE = 4
14
-
15
-
16
- def play_audio(audio_infos):
17
- import birdnet_analyzer.audio as audio
18
-
19
- arr, sr = audio.open_audio_file(
20
- audio_infos[0],
21
- offset=audio_infos[1],
22
- duration=audio_infos[2],
23
- speed=audio_infos[5],
24
- fmin=audio_infos[6],
25
- fmax=audio_infos[7],
26
- )
27
-
28
- return sr, arr
29
-
30
-
31
- def update_export_state(audio_infos, checkbox_value, export_state: dict):
32
- if checkbox_value:
33
- export_state[audio_infos[3]] = audio_infos
34
- else:
35
- export_state.pop(audio_infos[3], None)
36
-
37
- return export_state
38
-
39
-
40
- def rum_embeddings_with_tqdm_tracking(
41
- input_path,
42
- db_directory,
43
- db_name,
44
- overlap,
45
- threads,
46
- batch_size,
47
- audio_speed,
48
- fmin,
49
- fmax,
50
- progress=gr.Progress(track_tqdm=True),
51
- ):
52
- return run_embeddings(
53
- input_path,
54
- db_directory,
55
- db_name,
56
- overlap,
57
- threads,
58
- batch_size,
59
- audio_speed,
60
- fmin,
61
- fmax,
62
- progress,
63
- )
64
-
65
-
66
- @gu.gui_runtime_error_handler
67
- def run_embeddings(
68
- input_path,
69
- db_directory,
70
- db_name,
71
- overlap,
72
- threads,
73
- batch_size,
74
- audio_speed,
75
- fmin,
76
- fmax,
77
- progress,
78
- ):
79
- from birdnet_analyzer.embeddings.utils import run
80
-
81
- gu.validate(input_path, loc.localize("embeddings-input-dir-validation-message"))
82
- gu.validate(db_directory, loc.localize("embeddings-db-dir-validation-message"))
83
- gu.validate(db_name, loc.localize("embeddings-db-name-validation-message"))
84
- db_path = os.path.join(db_directory, db_name)
85
-
86
- db = get_embeddings_database(db_path)
87
-
88
- try:
89
- settings = db.get_metadata("birdnet_analyzer_settings")
90
- db.db.close()
91
- run(
92
- input_path,
93
- db_path,
94
- overlap,
95
- settings["AUDIO_SPEED"],
96
- settings["BANDPASS_FMIN"],
97
- settings["BANDPASS_FMAX"],
98
- threads,
99
- batch_size,
100
- )
101
- except:
102
- db.db.close()
103
- # Transform audiospeed from slider to float
104
- audio_speed = max(0.1, 1.0 / (audio_speed * -1)) if audio_speed < 0 else max(1.0, float(audio_speed))
105
-
106
- if fmin is None or fmax is None or fmin < cfg.SIG_FMIN or fmax > cfg.SIG_FMAX or fmin > fmax:
107
- raise gr.Error(f"{loc.localize('validation-no-valid-frequency')} [{cfg.SIG_FMIN}, {cfg.SIG_FMAX}]")
108
-
109
- run(input_path, db_path, overlap, audio_speed, fmin, fmax, threads, batch_size)
110
-
111
- gr.Info(f"{loc.localize('embeddings-tab-finish-info')} {db_path}")
112
-
113
- return gr.Plot(), gr.Slider(visible=False), gr.Number(visible=False), gr.Number(visible=False)
114
-
115
-
116
- @gu.gui_runtime_error_handler
117
- def run_search(db_path, query_path, max_samples, score_fn, crop_mode, crop_overlap):
118
- from birdnet_analyzer.search.utils import get_search_results
119
-
120
- gu.validate(db_path, loc.localize("embeddings-search-db-validation-message"))
121
- gu.validate(query_path, loc.localize("embeddings-search-query-validation-message"))
122
- gu.validate(max_samples, loc.localize("embeddings-search-max-samples-validation-message"))
123
-
124
- db = get_search_database(db_path)
125
- settings = db.get_metadata("birdnet_analyzer_settings")
126
-
127
- results = get_search_results(
128
- query_path,
129
- db,
130
- max_samples,
131
- settings["AUDIO_SPEED"],
132
- settings["BANDPASS_FMIN"],
133
- settings["BANDPASS_FMAX"],
134
- score_fn,
135
- crop_mode,
136
- crop_overlap,
137
- )
138
- db.db.close() # Close the database connection to avoid having wal/shm files
139
-
140
- chunks = [results[i : i + PAGE_SIZE] for i in range(0, len(results), PAGE_SIZE)]
141
-
142
- return chunks, 0, gr.Button(interactive=True), {}
143
-
144
-
145
- def run_export(export_state):
146
- import birdnet_analyzer.audio as audio
147
-
148
- if len(export_state.items()) > 0:
149
- export_folder = gu.select_folder(state_key="embeddings-search-export-folder")
150
-
151
- if export_folder:
152
- for index, file in export_state.items():
153
- filebasename = os.path.basename(file[0])
154
- filebasename = os.path.splitext(filebasename)[0]
155
- dest = os.path.join(export_folder, f"{file[4]:.5f}_{filebasename}_{file[1]}_{file[1] + file[2]}.wav")
156
- # @mamau: Missing audio speed?
157
- sig, rate = audio.open_audio_file(file[0], offset=file[1], duration=file[2], sample_rate=None)
158
- audio.save_signal(sig, dest, rate)
159
-
160
- gr.Info(f"{loc.localize('embeddings-search-export-finish-info')} {export_folder}")
161
- else:
162
- gr.Info(loc.localize("embeddings-search-export-no-results-info"))
163
-
164
-
165
- def _build_extract_tab():
166
- with gr.Tab(loc.localize("embeddings-extract-tab-title")):
167
- input_directory_state = gr.State()
168
- db_directory_state = gr.State()
169
-
170
- def select_directory_to_state_and_tb(state_key):
171
- return (gu.select_directory(collect_files=False, state_key=state_key),) * 2
172
-
173
- with gr.Row():
174
- select_audio_directory_btn = gr.Button(loc.localize("embeddings-tab-select-input-directory-button-label"))
175
- selected_audio_directory_tb = gr.Textbox(show_label=False, interactive=False)
176
- select_audio_directory_btn.click(
177
- partial(select_directory_to_state_and_tb, state_key="embeddings-input-dir"),
178
- outputs=[selected_audio_directory_tb, input_directory_state],
179
- show_progress=False,
180
- )
181
-
182
- with gr.Row():
183
- select_db_directory_btn = gr.Button(loc.localize("embeddings-tab-select-db-directory-button-label"))
184
-
185
- with gr.Row():
186
- db_name_tb = gr.Textbox(
187
- "embeddings_database",
188
- visible=False,
189
- interactive=True,
190
- info=loc.localize("embeddings-tab-db-info"),
191
- )
192
-
193
- with gr.Accordion(loc.localize("embedding-settings-accordion-label"), open=False):
194
- with gr.Row():
195
- overlap_slider = gr.Slider(
196
- minimum=0,
197
- maximum=2.9,
198
- value=0,
199
- step=0.1,
200
- label=loc.localize("embedding-settings-overlap-slider-label"),
201
- info=loc.localize("embedding-settings-overlap-slider-info"),
202
- )
203
- batch_size_number = gr.Number(
204
- precision=1,
205
- label=loc.localize("embedding-settings-batchsize-number-label"),
206
- value=1,
207
- info=loc.localize("embedding-settings-batchsize-number-info"),
208
- minimum=1,
209
- interactive=True,
210
- )
211
- threads_number = gr.Number(
212
- precision=1,
213
- label=loc.localize("embedding-settings-threads-number-label"),
214
- value=4,
215
- info=loc.localize("embedding-settings-threads-number-info"),
216
- minimum=1,
217
- interactive=True,
218
- )
219
-
220
- with gr.Row():
221
- audio_speed_slider = gr.Slider(
222
- minimum=-10,
223
- maximum=10,
224
- value=0,
225
- step=1,
226
- label=loc.localize("embedding-settings-audio-speed-slider-label"),
227
- info=loc.localize("embedding-settings-audio-speed-slider-info"),
228
- )
229
- with gr.Row():
230
- fmin_number = gr.Number(
231
- cfg.SIG_FMIN,
232
- minimum=0,
233
- label=loc.localize("embedding-settings-fmin-number-label"),
234
- info=loc.localize("embedding-settings-fmin-number-info"),
235
- interactive=True,
236
- )
237
- fmax_number = gr.Number(
238
- cfg.SIG_FMAX,
239
- minimum=0,
240
- label=loc.localize("embedding-settings-fmax-number-label"),
241
- info=loc.localize("embedding-settings-fmax-number-info"),
242
- interactive=True,
243
- )
244
-
245
- def select_directory_and_update_tb(db_name):
246
- dir_name = gu.select_directory(state_key="embeddings-db-dir", collect_files=False)
247
-
248
- if dir_name:
249
- db_path = os.path.join(dir_name, db_name)
250
-
251
- if os.path.exists(db_path):
252
- db = get_embeddings_database(db_path)
253
-
254
- try:
255
- settings = db.get_metadata("birdnet_analyzer_settings")
256
- gr.Info(loc.localize("embeddings-db-already-exists-info"))
257
-
258
- return (
259
- dir_name,
260
- gr.Textbox(label=dir_name, visible=True),
261
- gr.Slider(value=settings["AUDIO_SPEED"], interactive=False),
262
- gr.Number(value=settings["BANDPASS_FMIN"], interactive=False),
263
- gr.Number(value=settings["BANDPASS_FMAX"], interactive=False),
264
- )
265
- except KeyError:
266
- pass
267
- finally:
268
- db.db.close()
269
-
270
- return (
271
- dir_name,
272
- gr.Textbox(label=dir_name, visible=True),
273
- gr.Slider(interactive=True),
274
- gr.Number(interactive=True),
275
- gr.Number(interactive=True),
276
- )
277
-
278
- return None, None, gr.Slider(interactive=True), gr.Number(interactive=True), gr.Number(interactive=True)
279
-
280
- select_db_directory_btn.click(
281
- select_directory_and_update_tb,
282
- inputs=[db_name_tb],
283
- outputs=[db_directory_state, db_name_tb, audio_speed_slider, fmin_number, fmax_number],
284
- show_progress=False,
285
- )
286
-
287
- def check_settings(dir_name, db_name):
288
- db_path = os.path.join(dir_name, db_name)
289
-
290
- if db_name and os.path.exists(db_path):
291
- db = get_embeddings_database(db_path)
292
-
293
- try:
294
- settings = db.get_metadata("birdnet_analyzer_settings")
295
-
296
- return (
297
- gr.Slider(value=settings["AUDIO_SPEED"], interactive=False),
298
- gr.Number(value=settings["BANDPASS_FMIN"], interactive=False),
299
- gr.Number(value=settings["BANDPASS_FMAX"], interactive=False),
300
- )
301
- except KeyError:
302
- pass
303
- finally:
304
- db.db.close()
305
-
306
- return gr.Slider(interactive=True), gr.Number(interactive=True), gr.Number(interactive=True)
307
-
308
- db_name_tb.change(
309
- check_settings,
310
- inputs=[db_directory_state, db_name_tb],
311
- outputs=[audio_speed_slider, fmin_number, fmax_number],
312
- show_progress=False,
313
- )
314
-
315
- progress_plot = gr.Plot()
316
- start_btn = gr.Button(loc.localize("embeddings-tab-start-button-label"), variant="huggingface")
317
-
318
- start_btn.click(
319
- rum_embeddings_with_tqdm_tracking,
320
- inputs=[
321
- input_directory_state,
322
- db_directory_state,
323
- db_name_tb,
324
- overlap_slider,
325
- batch_size_number,
326
- threads_number,
327
- audio_speed_slider,
328
- fmin_number,
329
- fmax_number,
330
- ],
331
- outputs=[progress_plot, audio_speed_slider, fmin_number, fmax_number],
332
- show_progress_on=progress_plot,
333
- show_progress=True,
334
- )
335
-
336
-
337
- def _build_search_tab():
338
- import birdnet_analyzer.audio as audio
339
- import birdnet_analyzer.utils as utils
340
-
341
- with gr.Tab(loc.localize("embeddings-search-tab-title")):
342
- results_state = gr.State([])
343
- page_state = gr.State(0)
344
- export_state = gr.State({})
345
- hidden_audio = gr.Audio(visible=False, autoplay=True, type="numpy")
346
-
347
- with gr.Row():
348
- with gr.Column():
349
- db_selection_button = gr.Button(loc.localize("embeddings-search-db-selection-button-label"))
350
- with gr.Group():
351
- with gr.Row():
352
- db_selection_tb = gr.Textbox(
353
- label=loc.localize("embeddings-search-db-selection-textbox-label"),
354
- max_lines=3,
355
- interactive=False,
356
- visible=False,
357
- )
358
- db_embedding_count_number = gr.Number(
359
- interactive=False,
360
- visible=False,
361
- label=loc.localize("embeddings-search-db-embedding-count-number-label"),
362
- )
363
- with gr.Row():
364
- db_bandpass_frequencies_tb = gr.Textbox(
365
- label=loc.localize("embeddings-search-db-bandpass-frequencies-label"),
366
- interactive=False,
367
- visible=False,
368
- )
369
- db_audio_speed_number = gr.Number(
370
- interactive=False,
371
- visible=False,
372
- label=loc.localize("embeddings-search-db-audio-speed-number-label"),
373
- )
374
- query_spectrogram = gr.Plot(show_label=False)
375
- select_query_btn = gr.Button(loc.localize("embeddings-search-select-query-button-label"))
376
- query_sample_tb = gr.Textbox(
377
- label=loc.localize("embeddings-search-query-sample-textbox-label"),
378
- visible=False,
379
- interactive=False,
380
- )
381
-
382
- crop_mode = gr.Radio(
383
- [
384
- (loc.localize("training-tab-crop-mode-radio-option-center"), "center"),
385
- (loc.localize("training-tab-crop-mode-radio-option-first"), "first"),
386
- (loc.localize("training-tab-crop-mode-radio-option-segments"), "segments"),
387
- ],
388
- value="center",
389
- label=loc.localize("training-tab-crop-mode-radio-label"),
390
- info=loc.localize("embeddings-search-crop-mode-radio-info"),
391
- )
392
-
393
- crop_overlap = gr.Slider(
394
- minimum=0,
395
- maximum=2.9,
396
- value=0,
397
- step=0.1,
398
- label=loc.localize("training-tab-crop-overlap-number-label"),
399
- info=loc.localize("embeddings-search-crop-overlap-number-info"),
400
- visible=False,
401
- )
402
- max_samples_number = gr.Number(
403
- label=loc.localize("embeddings-search-max-samples-number-label"),
404
- value=10,
405
- interactive=True,
406
- )
407
- score_fn_select = gr.Radio(
408
- label=loc.localize("embeddings-search-score-fn-select-label"),
409
- choices=["cosine", "dot", "euclidean"],
410
- value="cosine",
411
- interactive=True,
412
- )
413
- max_samples_number = gr.Number(
414
- label=loc.localize("embeddings-search-max-samples-number-label"),
415
- value=10,
416
- interactive=True,
417
- )
418
- score_fn_select = gr.Radio(
419
- label=loc.localize("embeddings-search-score-fn-select-label"),
420
- choices=["cosine", "dot", "euclidean"],
421
- value="cosine",
422
- interactive=True,
423
- )
424
- search_btn = gr.Button(loc.localize("embeddings-search-start-button-label"), variant="huggingface")
425
-
426
- with gr.Column():
427
- with gr.Column(elem_id="embeddings-search-results"):
428
-
429
- @gr.render(
430
- inputs=[results_state, page_state, db_selection_tb, export_state],
431
- triggers=[results_state.change, page_state.change, db_selection_tb.change],
432
- )
433
- def render_results(results, page, db_path, exports):
434
- with gr.Row():
435
- if db_path is not None and len(results) > 0:
436
- db = get_search_database(db_path)
437
- settings = db.get_metadata("birdnet_analyzer_settings")
438
-
439
- for i, r in enumerate(results[page]):
440
- with gr.Column():
441
- index = i + page * PAGE_SIZE
442
- embedding_source = db.get_embedding_source(r.embedding_id)
443
- file = embedding_source.source_id
444
- offset = embedding_source.offsets[0] * settings["AUDIO_SPEED"]
445
- duration = 3 * settings["AUDIO_SPEED"]
446
- spec = utils.spectrogram_from_file(
447
- file,
448
- offset=offset,
449
- duration=duration,
450
- speed=settings["AUDIO_SPEED"],
451
- fmin=settings["BANDPASS_FMIN"],
452
- fmax=settings["BANDPASS_FMAX"],
453
- fig_size=(6, 3),
454
- )
455
- plot_audio_state = gr.State(
456
- [
457
- file,
458
- offset,
459
- duration,
460
- index,
461
- r.sort_score,
462
- settings["AUDIO_SPEED"],
463
- settings["BANDPASS_FMIN"],
464
- settings["BANDPASS_FMAX"],
465
- ]
466
- )
467
- with gr.Row():
468
- gr.Plot(spec, label=f"{index + 1}_score: {r.sort_score:.2f}")
469
-
470
- with gr.Row():
471
- play_btn = gr.Button("▶")
472
- play_btn.click(play_audio, inputs=plot_audio_state, outputs=hidden_audio)
473
- checkbox = gr.Checkbox(label="Export", value=(index in exports.keys()))
474
- checkbox.change(
475
- update_export_state,
476
- inputs=[plot_audio_state, checkbox, export_state],
477
- outputs=export_state,
478
- )
479
- db.db.close() # Close the database connection to avoid having wal/shm files
480
-
481
- with gr.Row():
482
- prev_btn = gr.Button("Previous Page", interactive=page > 0)
483
- next_btn = gr.Button("Next Page", interactive=page < len(results) - 1)
484
-
485
- def prev_page(page):
486
- return page - 1 if page > 0 else 0
487
-
488
- def next_page(page):
489
- return page + 1
490
-
491
- prev_btn.click(prev_page, inputs=[page_state], outputs=[page_state])
492
- next_btn.click(next_page, inputs=[page_state], outputs=[page_state])
493
-
494
- export_btn = gr.Button(
495
- loc.localize("embeddings-search-export-button-label"), variant="huggingface", interactive=False
496
- )
497
-
498
- def on_db_selection_click():
499
- folder = gu.select_folder(state_key="embeddings_search_db")
500
-
501
- try:
502
- db = get_embeddings_database(folder)
503
- except ValueError as e:
504
- raise gr.Error(loc.localize("embeddings-search-db-selection-error")) from e
505
-
506
- embedding_count = db.count_embeddings()
507
- settings = db.get_metadata("birdnet_analyzer_settings")
508
- frequencies = f"{settings['BANDPASS_FMIN']} - {settings['BANDPASS_FMAX']} Hz"
509
- speed = settings["AUDIO_SPEED"]
510
- db.db.close()
511
-
512
- if folder:
513
- return (
514
- gr.Textbox(value=folder, visible=True),
515
- gr.Number(value=embedding_count, visible=True),
516
- gr.Textbox(visible=True, value=frequencies),
517
- gr.Number(visible=True, value=speed),
518
- [],
519
- {},
520
- gr.Button(visible=True),
521
- gr.Textbox(value=None, visible=False),
522
- )
523
-
524
- return None, None, None, None, [], {}, gr.Button(visible=False), gr.Textbox(visible=False)
525
-
526
- def select_query_sample():
527
- file = gu.select_file(state_key="query_sample")
528
- return gr.Textbox(file, visible=True)
529
-
530
- select_query_btn.click(select_query_sample, outputs=[query_sample_tb])
531
-
532
- def on_crop_select(new_crop_mode):
533
- return gr.Number(visible=new_crop_mode == "segments", interactive=new_crop_mode == "segments")
534
-
535
- crop_mode.change(on_crop_select, inputs=crop_mode, outputs=crop_overlap)
536
-
537
- def update_query_spectrogram(audiofilepath, db_selection, crop_mode, crop_overlap):
538
- import numpy as np
539
-
540
- if audiofilepath and db_selection:
541
- db = get_embeddings_database(db_selection)
542
- settings = db.get_metadata("birdnet_analyzer_settings")
543
- audio_speed = settings["AUDIO_SPEED"]
544
- fmin = settings["BANDPASS_FMIN"]
545
- fmax = settings["BANDPASS_FMAX"]
546
- db.db.close()
547
-
548
- sig, rate = audio.open_audio_file(
549
- audiofilepath,
550
- duration=cfg.SIG_LENGTH * audio_speed if crop_mode == "first" else None,
551
- fmin=fmin,
552
- fmax=fmax,
553
- speed=audio_speed,
554
- )
555
-
556
- # Crop query audio
557
- if crop_mode == "center":
558
- sig = [audio.crop_center(sig, rate, cfg.SIG_LENGTH)][0]
559
- elif crop_mode == "first":
560
- sig = [audio.split_signal(sig, rate, cfg.SIG_LENGTH, crop_overlap, cfg.SIG_MINLEN)[0]][0]
561
-
562
- sig = np.array(sig, dtype="float32")
563
- spec = utils.spectrogram_from_audio(sig, rate, fig_size=(10, 4))
564
-
565
- return spec, [], {}
566
- else:
567
- return None, [], {}
568
-
569
- crop_mode.change(
570
- update_query_spectrogram,
571
- inputs=[query_sample_tb, db_selection_tb, crop_mode, crop_overlap],
572
- outputs=[query_spectrogram, results_state, export_state],
573
- preprocess=False,
574
- )
575
- query_sample_tb.change(
576
- update_query_spectrogram,
577
- inputs=[query_sample_tb, db_selection_tb, crop_mode, crop_overlap],
578
- outputs=[query_spectrogram, results_state, export_state],
579
- preprocess=False,
580
- )
581
-
582
- db_selection_button.click(
583
- on_db_selection_click,
584
- outputs=[
585
- db_selection_tb,
586
- db_embedding_count_number,
587
- db_bandpass_frequencies_tb,
588
- db_audio_speed_number,
589
- results_state,
590
- export_state,
591
- select_query_btn,
592
- query_sample_tb,
593
- ],
594
- show_progress=False,
595
- )
596
-
597
- search_btn.click(
598
- run_search,
599
- inputs=[
600
- db_selection_tb,
601
- query_sample_tb,
602
- max_samples_number,
603
- score_fn_select,
604
- crop_mode,
605
- crop_overlap,
606
- ],
607
- outputs=[results_state, page_state, export_btn, export_state],
608
- show_progress_on=export_btn,
609
- )
610
-
611
- export_btn.click(
612
- run_export,
613
- inputs=[export_state],
614
- )
615
-
616
-
617
- def build_embeddings_tab():
618
- with gr.Tab(loc.localize("embeddings-tab-title")):
619
- _build_extract_tab()
620
- _build_search_tab()
1
+ import os
2
+ from functools import partial
3
+
4
+ import gradio as gr
5
+
6
+ import birdnet_analyzer.config as cfg
7
+ import birdnet_analyzer.gui.localization as loc
8
+ import birdnet_analyzer.gui.utils as gu
9
+ from birdnet_analyzer.embeddings.core import get_database as get_embeddings_database
10
+ from birdnet_analyzer.search.core import get_database as get_search_database
11
+
12
+ SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
13
+ PAGE_SIZE = 4
14
+
15
+
16
+ def play_audio(audio_infos):
17
+ from birdnet_analyzer import audio
18
+
19
+ arr, sr = audio.open_audio_file(
20
+ audio_infos[0],
21
+ offset=audio_infos[1],
22
+ duration=audio_infos[2],
23
+ speed=audio_infos[5],
24
+ fmin=audio_infos[6],
25
+ fmax=audio_infos[7],
26
+ )
27
+
28
+ return sr, arr
29
+
30
+
31
+ def update_export_state(audio_infos, checkbox_value, export_state: dict):
32
+ if checkbox_value:
33
+ export_state[audio_infos[3]] = audio_infos
34
+ else:
35
+ export_state.pop(audio_infos[3], None)
36
+
37
+ return export_state
38
+
39
+
40
+ def rum_embeddings_with_tqdm_tracking(
41
+ input_path,
42
+ db_directory,
43
+ db_name,
44
+ overlap,
45
+ threads,
46
+ batch_size,
47
+ audio_speed,
48
+ fmin,
49
+ fmax,
50
+ progress=gr.Progress(track_tqdm=True),
51
+ ):
52
+ return run_embeddings(
53
+ input_path,
54
+ db_directory,
55
+ db_name,
56
+ overlap,
57
+ threads,
58
+ batch_size,
59
+ audio_speed,
60
+ fmin,
61
+ fmax,
62
+ progress,
63
+ )
64
+
65
+
66
+ @gu.gui_runtime_error_handler
67
+ def run_embeddings(
68
+ input_path,
69
+ db_directory,
70
+ db_name,
71
+ overlap,
72
+ threads,
73
+ batch_size,
74
+ audio_speed,
75
+ fmin,
76
+ fmax,
77
+ progress,
78
+ ):
79
+ from birdnet_analyzer.embeddings.utils import run
80
+
81
+ gu.validate(input_path, loc.localize("embeddings-input-dir-validation-message"))
82
+ gu.validate(db_directory, loc.localize("embeddings-db-dir-validation-message"))
83
+ gu.validate(db_name, loc.localize("embeddings-db-name-validation-message"))
84
+ db_path = os.path.join(db_directory, db_name)
85
+
86
+ db = get_embeddings_database(db_path)
87
+
88
+ try:
89
+ settings = db.get_metadata("birdnet_analyzer_settings")
90
+ db.db.close()
91
+ run(
92
+ input_path,
93
+ db_path,
94
+ overlap,
95
+ settings["AUDIO_SPEED"],
96
+ settings["BANDPASS_FMIN"],
97
+ settings["BANDPASS_FMAX"],
98
+ threads,
99
+ batch_size,
100
+ )
101
+ except Exception as e:
102
+ db.db.close()
103
+ # Transform audiospeed from slider to float
104
+ audio_speed = max(0.1, 1.0 / (audio_speed * -1)) if audio_speed < 0 else max(1.0, float(audio_speed))
105
+
106
+ if fmin is None or fmax is None or fmin < cfg.SIG_FMIN or fmax > cfg.SIG_FMAX or fmin > fmax:
107
+ raise gr.Error(f"{loc.localize('validation-no-valid-frequency')} [{cfg.SIG_FMIN}, {cfg.SIG_FMAX}]") from e
108
+
109
+ run(input_path, db_path, overlap, audio_speed, fmin, fmax, threads, batch_size)
110
+
111
+ gr.Info(f"{loc.localize('embeddings-tab-finish-info')} {db_path}")
112
+
113
+ return gr.Plot(), gr.Slider(visible=False), gr.Number(visible=False), gr.Number(visible=False)
114
+
115
+
116
+ @gu.gui_runtime_error_handler
117
+ def run_search(db_path, query_path, max_samples, score_fn, crop_mode, crop_overlap):
118
+ from birdnet_analyzer.search.utils import get_search_results
119
+
120
+ gu.validate(db_path, loc.localize("embeddings-search-db-validation-message"))
121
+ gu.validate(query_path, loc.localize("embeddings-search-query-validation-message"))
122
+ gu.validate(max_samples, loc.localize("embeddings-search-max-samples-validation-message"))
123
+
124
+ db = get_search_database(db_path)
125
+ settings = db.get_metadata("birdnet_analyzer_settings")
126
+
127
+ results = get_search_results(
128
+ query_path,
129
+ db,
130
+ max_samples,
131
+ settings["AUDIO_SPEED"],
132
+ settings["BANDPASS_FMIN"],
133
+ settings["BANDPASS_FMAX"],
134
+ score_fn,
135
+ crop_mode,
136
+ crop_overlap,
137
+ )
138
+ db.db.close() # Close the database connection to avoid having wal/shm files
139
+
140
+ chunks = [results[i : i + PAGE_SIZE] for i in range(0, len(results), PAGE_SIZE)]
141
+
142
+ return chunks, 0, gr.Button(interactive=True), {}
143
+
144
+
145
+ def run_export(export_state: dict):
146
+ from birdnet_analyzer import audio
147
+
148
+ if len(export_state.items()) > 0:
149
+ export_folder = gu.select_folder(state_key="embeddings-search-export-folder")
150
+
151
+ if export_folder:
152
+ for file in export_state.values():
153
+ filebasename = os.path.basename(file[0])
154
+ filebasename = os.path.splitext(filebasename)[0]
155
+ dest = os.path.join(export_folder, f"{file[4]:.5f}_{filebasename}_{file[1]}_{file[1] + file[2]}.wav")
156
+ # @mamau: Missing audio speed?
157
+ sig, rate = audio.open_audio_file(file[0], offset=file[1], duration=file[2], sample_rate=None)
158
+ audio.save_signal(sig, dest, rate)
159
+
160
+ gr.Info(f"{loc.localize('embeddings-search-export-finish-info')} {export_folder}")
161
+ else:
162
+ gr.Info(loc.localize("embeddings-search-export-no-results-info"))
163
+
164
+
165
+ def _build_extract_tab():
166
+ with gr.Tab(loc.localize("embeddings-extract-tab-title")):
167
+ input_directory_state = gr.State()
168
+ db_directory_state = gr.State()
169
+
170
+ def select_directory_to_state_and_tb(state_key):
171
+ return (gu.select_directory(collect_files=False, state_key=state_key),) * 2
172
+
173
+ with gr.Row():
174
+ select_audio_directory_btn = gr.Button(loc.localize("embeddings-tab-select-input-directory-button-label"))
175
+ selected_audio_directory_tb = gr.Textbox(show_label=False, interactive=False)
176
+ select_audio_directory_btn.click(
177
+ partial(select_directory_to_state_and_tb, state_key="embeddings-input-dir"),
178
+ outputs=[selected_audio_directory_tb, input_directory_state],
179
+ show_progress=False,
180
+ )
181
+
182
+ with gr.Row():
183
+ select_db_directory_btn = gr.Button(loc.localize("embeddings-tab-select-db-directory-button-label"))
184
+
185
+ with gr.Row():
186
+ db_name_tb = gr.Textbox(
187
+ "embeddings_database",
188
+ visible=False,
189
+ interactive=True,
190
+ info=loc.localize("embeddings-tab-db-info"),
191
+ )
192
+
193
+ with gr.Accordion(loc.localize("embedding-settings-accordion-label"), open=False):
194
+ with gr.Row():
195
+ overlap_slider = gr.Slider(
196
+ minimum=0,
197
+ maximum=2.9,
198
+ value=0,
199
+ step=0.1,
200
+ label=loc.localize("embedding-settings-overlap-slider-label"),
201
+ info=loc.localize("embedding-settings-overlap-slider-info"),
202
+ )
203
+ batch_size_number = gr.Number(
204
+ precision=1,
205
+ label=loc.localize("embedding-settings-batchsize-number-label"),
206
+ value=1,
207
+ info=loc.localize("embedding-settings-batchsize-number-info"),
208
+ minimum=1,
209
+ interactive=True,
210
+ )
211
+ threads_number = gr.Number(
212
+ precision=1,
213
+ label=loc.localize("embedding-settings-threads-number-label"),
214
+ value=4,
215
+ info=loc.localize("embedding-settings-threads-number-info"),
216
+ minimum=1,
217
+ interactive=True,
218
+ )
219
+
220
+ with gr.Row():
221
+ audio_speed_slider = gr.Slider(
222
+ minimum=-10,
223
+ maximum=10,
224
+ value=0,
225
+ step=1,
226
+ label=loc.localize("embedding-settings-audio-speed-slider-label"),
227
+ info=loc.localize("embedding-settings-audio-speed-slider-info"),
228
+ )
229
+ with gr.Row():
230
+ fmin_number = gr.Number(
231
+ cfg.SIG_FMIN,
232
+ minimum=0,
233
+ label=loc.localize("embedding-settings-fmin-number-label"),
234
+ info=loc.localize("embedding-settings-fmin-number-info"),
235
+ interactive=True,
236
+ )
237
+ fmax_number = gr.Number(
238
+ cfg.SIG_FMAX,
239
+ minimum=0,
240
+ label=loc.localize("embedding-settings-fmax-number-label"),
241
+ info=loc.localize("embedding-settings-fmax-number-info"),
242
+ interactive=True,
243
+ )
244
+
245
+ def select_directory_and_update_tb(db_name):
246
+ dir_name = gu.select_directory(state_key="embeddings-db-dir", collect_files=False)
247
+
248
+ if dir_name:
249
+ db_path = os.path.join(dir_name, db_name)
250
+
251
+ if os.path.exists(db_path):
252
+ db = get_embeddings_database(db_path)
253
+
254
+ try:
255
+ settings = db.get_metadata("birdnet_analyzer_settings")
256
+ gr.Info(loc.localize("embeddings-db-already-exists-info"))
257
+
258
+ return (
259
+ dir_name,
260
+ gr.Textbox(label=dir_name, visible=True),
261
+ gr.Slider(value=settings["AUDIO_SPEED"], interactive=False),
262
+ gr.Number(value=settings["BANDPASS_FMIN"], interactive=False),
263
+ gr.Number(value=settings["BANDPASS_FMAX"], interactive=False),
264
+ )
265
+ except KeyError:
266
+ pass
267
+ finally:
268
+ db.db.close()
269
+
270
+ return (
271
+ dir_name,
272
+ gr.Textbox(label=dir_name, visible=True),
273
+ gr.Slider(interactive=True),
274
+ gr.Number(interactive=True),
275
+ gr.Number(interactive=True),
276
+ )
277
+
278
+ return None, None, gr.Slider(interactive=True), gr.Number(interactive=True), gr.Number(interactive=True)
279
+
280
+ select_db_directory_btn.click(
281
+ select_directory_and_update_tb,
282
+ inputs=[db_name_tb],
283
+ outputs=[db_directory_state, db_name_tb, audio_speed_slider, fmin_number, fmax_number],
284
+ show_progress=False,
285
+ )
286
+
287
+ def check_settings(dir_name, db_name):
288
+ db_path = os.path.join(dir_name, db_name)
289
+
290
+ if db_name and os.path.exists(db_path):
291
+ db = get_embeddings_database(db_path)
292
+
293
+ try:
294
+ settings = db.get_metadata("birdnet_analyzer_settings")
295
+
296
+ return (
297
+ gr.Slider(value=settings["AUDIO_SPEED"], interactive=False),
298
+ gr.Number(value=settings["BANDPASS_FMIN"], interactive=False),
299
+ gr.Number(value=settings["BANDPASS_FMAX"], interactive=False),
300
+ )
301
+ except KeyError:
302
+ pass
303
+ finally:
304
+ db.db.close()
305
+
306
+ return gr.Slider(interactive=True), gr.Number(interactive=True), gr.Number(interactive=True)
307
+
308
+ db_name_tb.change(
309
+ check_settings,
310
+ inputs=[db_directory_state, db_name_tb],
311
+ outputs=[audio_speed_slider, fmin_number, fmax_number],
312
+ show_progress=False,
313
+ )
314
+
315
+ progress_plot = gr.Plot()
316
+ start_btn = gr.Button(loc.localize("embeddings-tab-start-button-label"), variant="huggingface")
317
+
318
+ start_btn.click(
319
+ rum_embeddings_with_tqdm_tracking,
320
+ inputs=[
321
+ input_directory_state,
322
+ db_directory_state,
323
+ db_name_tb,
324
+ overlap_slider,
325
+ batch_size_number,
326
+ threads_number,
327
+ audio_speed_slider,
328
+ fmin_number,
329
+ fmax_number,
330
+ ],
331
+ outputs=[progress_plot, audio_speed_slider, fmin_number, fmax_number],
332
+ show_progress_on=progress_plot,
333
+ show_progress=True,
334
+ )
335
+
336
+
337
+ def _build_search_tab():
338
+ from birdnet_analyzer import audio, utils
339
+
340
+ with gr.Tab(loc.localize("embeddings-search-tab-title")):
341
+ results_state = gr.State([])
342
+ page_state = gr.State(0)
343
+ export_state = gr.State({})
344
+ hidden_audio = gr.Audio(visible=False, autoplay=True, type="numpy")
345
+
346
+ with gr.Row():
347
+ with gr.Column():
348
+ db_selection_button = gr.Button(loc.localize("embeddings-search-db-selection-button-label"))
349
+ with gr.Group():
350
+ with gr.Row():
351
+ db_selection_tb = gr.Textbox(
352
+ label=loc.localize("embeddings-search-db-selection-textbox-label"),
353
+ max_lines=3,
354
+ interactive=False,
355
+ visible=False,
356
+ )
357
+ db_embedding_count_number = gr.Number(
358
+ interactive=False,
359
+ visible=False,
360
+ label=loc.localize("embeddings-search-db-embedding-count-number-label"),
361
+ )
362
+ with gr.Row():
363
+ db_bandpass_frequencies_tb = gr.Textbox(
364
+ label=loc.localize("embeddings-search-db-bandpass-frequencies-label"),
365
+ interactive=False,
366
+ visible=False,
367
+ )
368
+ db_audio_speed_number = gr.Number(
369
+ interactive=False,
370
+ visible=False,
371
+ label=loc.localize("embeddings-search-db-audio-speed-number-label"),
372
+ )
373
+ query_spectrogram = gr.Plot(show_label=False)
374
+ select_query_btn = gr.Button(loc.localize("embeddings-search-select-query-button-label"))
375
+ query_sample_tb = gr.Textbox(
376
+ label=loc.localize("embeddings-search-query-sample-textbox-label"),
377
+ visible=False,
378
+ interactive=False,
379
+ )
380
+
381
+ crop_mode = gr.Radio(
382
+ [
383
+ (loc.localize("training-tab-crop-mode-radio-option-center"), "center"),
384
+ (loc.localize("training-tab-crop-mode-radio-option-first"), "first"),
385
+ (loc.localize("training-tab-crop-mode-radio-option-segments"), "segments"),
386
+ ],
387
+ value="center",
388
+ label=loc.localize("training-tab-crop-mode-radio-label"),
389
+ info=loc.localize("embeddings-search-crop-mode-radio-info"),
390
+ )
391
+
392
+ crop_overlap = gr.Slider(
393
+ minimum=0,
394
+ maximum=2.9,
395
+ value=0,
396
+ step=0.1,
397
+ label=loc.localize("training-tab-crop-overlap-number-label"),
398
+ info=loc.localize("embeddings-search-crop-overlap-number-info"),
399
+ visible=False,
400
+ )
401
+ max_samples_number = gr.Number(
402
+ label=loc.localize("embeddings-search-max-samples-number-label"),
403
+ value=10,
404
+ interactive=True,
405
+ )
406
+ score_fn_select = gr.Radio(
407
+ label=loc.localize("embeddings-search-score-fn-select-label"),
408
+ choices=["cosine", "dot", "euclidean"],
409
+ value="cosine",
410
+ interactive=True,
411
+ )
412
+ max_samples_number = gr.Number(
413
+ label=loc.localize("embeddings-search-max-samples-number-label"),
414
+ value=10,
415
+ interactive=True,
416
+ )
417
+ score_fn_select = gr.Radio(
418
+ label=loc.localize("embeddings-search-score-fn-select-label"),
419
+ choices=["cosine", "dot", "euclidean"],
420
+ value="cosine",
421
+ interactive=True,
422
+ )
423
+ search_btn = gr.Button(loc.localize("embeddings-search-start-button-label"), variant="huggingface")
424
+
425
+ with gr.Column():
426
+ with gr.Column(elem_id="embeddings-search-results"):
427
+
428
+ @gr.render(
429
+ inputs=[results_state, page_state, db_selection_tb, export_state],
430
+ triggers=[results_state.change, page_state.change, db_selection_tb.change],
431
+ )
432
+ def render_results(results, page, db_path, exports):
433
+ with gr.Row():
434
+ if db_path is not None and len(results) > 0:
435
+ db = get_search_database(db_path)
436
+ settings = db.get_metadata("birdnet_analyzer_settings")
437
+
438
+ for i, r in enumerate(results[page]):
439
+ with gr.Column():
440
+ index = i + page * PAGE_SIZE
441
+ embedding_source = db.get_embedding_source(r.embedding_id)
442
+ file = embedding_source.source_id
443
+ offset = embedding_source.offsets[0] * settings["AUDIO_SPEED"]
444
+ duration = 3 * settings["AUDIO_SPEED"]
445
+ spec = utils.spectrogram_from_file(
446
+ file,
447
+ offset=offset,
448
+ duration=duration,
449
+ speed=settings["AUDIO_SPEED"],
450
+ fmin=settings["BANDPASS_FMIN"],
451
+ fmax=settings["BANDPASS_FMAX"],
452
+ fig_size=(6, 3),
453
+ )
454
+ plot_audio_state = gr.State(
455
+ [
456
+ file,
457
+ offset,
458
+ duration,
459
+ index,
460
+ r.sort_score,
461
+ settings["AUDIO_SPEED"],
462
+ settings["BANDPASS_FMIN"],
463
+ settings["BANDPASS_FMAX"],
464
+ ]
465
+ )
466
+ with gr.Row():
467
+ gr.Plot(spec, label=f"{index + 1}_score: {r.sort_score:.2f}")
468
+
469
+ with gr.Row():
470
+ play_btn = gr.Button("▶")
471
+ play_btn.click(play_audio, inputs=plot_audio_state, outputs=hidden_audio)
472
+ checkbox = gr.Checkbox(label="Export", value=(index in exports))
473
+ checkbox.change(
474
+ update_export_state,
475
+ inputs=[plot_audio_state, checkbox, export_state],
476
+ outputs=export_state,
477
+ )
478
+ db.db.close() # Close the database connection to avoid having wal/shm files
479
+
480
+ with gr.Row():
481
+ prev_btn = gr.Button("Previous Page", interactive=page > 0)
482
+ next_btn = gr.Button("Next Page", interactive=page < len(results) - 1)
483
+
484
+ def prev_page(page):
485
+ return page - 1 if page > 0 else 0
486
+
487
+ def next_page(page):
488
+ return page + 1
489
+
490
+ prev_btn.click(prev_page, inputs=[page_state], outputs=[page_state])
491
+ next_btn.click(next_page, inputs=[page_state], outputs=[page_state])
492
+
493
+ export_btn = gr.Button(
494
+ loc.localize("embeddings-search-export-button-label"), variant="huggingface", interactive=False
495
+ )
496
+
497
+ def on_db_selection_click():
498
+ folder = gu.select_folder(state_key="embeddings_search_db")
499
+
500
+ try:
501
+ db = get_embeddings_database(folder)
502
+ except ValueError as e:
503
+ raise gr.Error(loc.localize("embeddings-search-db-selection-error")) from e
504
+
505
+ embedding_count = db.count_embeddings()
506
+ settings = db.get_metadata("birdnet_analyzer_settings")
507
+ frequencies = f"{settings['BANDPASS_FMIN']} - {settings['BANDPASS_FMAX']} Hz"
508
+ speed = settings["AUDIO_SPEED"]
509
+ db.db.close()
510
+
511
+ if folder:
512
+ return (
513
+ gr.Textbox(value=folder, visible=True),
514
+ gr.Number(value=embedding_count, visible=True),
515
+ gr.Textbox(visible=True, value=frequencies),
516
+ gr.Number(visible=True, value=speed),
517
+ [],
518
+ {},
519
+ gr.Button(visible=True),
520
+ gr.Textbox(value=None, visible=False),
521
+ )
522
+
523
+ return None, None, None, None, [], {}, gr.Button(visible=False), gr.Textbox(visible=False)
524
+
525
+ def select_query_sample():
526
+ file = gu.select_file(state_key="query_sample")
527
+ return gr.Textbox(file, visible=True)
528
+
529
+ select_query_btn.click(select_query_sample, outputs=[query_sample_tb])
530
+
531
+ def on_crop_select(new_crop_mode):
532
+ return gr.Number(visible=new_crop_mode == "segments", interactive=new_crop_mode == "segments")
533
+
534
+ crop_mode.change(on_crop_select, inputs=crop_mode, outputs=crop_overlap)
535
+
536
+ def update_query_spectrogram(audiofilepath, db_selection, crop_mode, crop_overlap):
537
+ import numpy as np
538
+
539
+ if audiofilepath and db_selection:
540
+ db = get_embeddings_database(db_selection)
541
+ settings = db.get_metadata("birdnet_analyzer_settings")
542
+ audio_speed = settings["AUDIO_SPEED"]
543
+ fmin = settings["BANDPASS_FMIN"]
544
+ fmax = settings["BANDPASS_FMAX"]
545
+ db.db.close()
546
+
547
+ sig, rate = audio.open_audio_file(
548
+ audiofilepath,
549
+ duration=cfg.SIG_LENGTH * audio_speed if crop_mode == "first" else None,
550
+ fmin=fmin,
551
+ fmax=fmax,
552
+ speed=audio_speed,
553
+ )
554
+
555
+ # Crop query audio
556
+ if crop_mode == "center":
557
+ sig = [audio.crop_center(sig, rate, cfg.SIG_LENGTH)][0]
558
+ elif crop_mode == "first":
559
+ sig = [audio.split_signal(sig, rate, cfg.SIG_LENGTH, crop_overlap, cfg.SIG_MINLEN)[0]][0]
560
+
561
+ sig = np.array(sig, dtype="float32")
562
+ spec = utils.spectrogram_from_audio(sig, rate, fig_size=(10, 4))
563
+
564
+ return spec, [], {}
565
+
566
+ return None, [], {}
567
+
568
+ crop_mode.change(
569
+ update_query_spectrogram,
570
+ inputs=[query_sample_tb, db_selection_tb, crop_mode, crop_overlap],
571
+ outputs=[query_spectrogram, results_state, export_state],
572
+ preprocess=False,
573
+ )
574
+ query_sample_tb.change(
575
+ update_query_spectrogram,
576
+ inputs=[query_sample_tb, db_selection_tb, crop_mode, crop_overlap],
577
+ outputs=[query_spectrogram, results_state, export_state],
578
+ preprocess=False,
579
+ )
580
+
581
+ db_selection_button.click(
582
+ on_db_selection_click,
583
+ outputs=[
584
+ db_selection_tb,
585
+ db_embedding_count_number,
586
+ db_bandpass_frequencies_tb,
587
+ db_audio_speed_number,
588
+ results_state,
589
+ export_state,
590
+ select_query_btn,
591
+ query_sample_tb,
592
+ ],
593
+ show_progress=False,
594
+ )
595
+
596
+ search_btn.click(
597
+ run_search,
598
+ inputs=[
599
+ db_selection_tb,
600
+ query_sample_tb,
601
+ max_samples_number,
602
+ score_fn_select,
603
+ crop_mode,
604
+ crop_overlap,
605
+ ],
606
+ outputs=[results_state, page_state, export_btn, export_state],
607
+ show_progress_on=export_btn,
608
+ )
609
+
610
+ export_btn.click(
611
+ run_export,
612
+ inputs=[export_state],
613
+ )
614
+
615
+
616
+ def build_embeddings_tab():
617
+ with gr.Tab(loc.localize("embeddings-tab-title")):
618
+ _build_extract_tab()
619
+ _build_search_tab()