birdnet-analyzer 2.0.1__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. birdnet_analyzer/__init__.py +9 -9
  2. birdnet_analyzer/analyze/__init__.py +19 -5
  3. birdnet_analyzer/analyze/__main__.py +3 -3
  4. birdnet_analyzer/analyze/cli.py +30 -25
  5. birdnet_analyzer/analyze/core.py +268 -241
  6. birdnet_analyzer/analyze/utils.py +700 -692
  7. birdnet_analyzer/audio.py +368 -368
  8. birdnet_analyzer/cli.py +732 -709
  9. birdnet_analyzer/config.py +243 -242
  10. birdnet_analyzer/eBird_taxonomy_codes_2024E.json +13046 -0
  11. birdnet_analyzer/embeddings/__init__.py +3 -3
  12. birdnet_analyzer/embeddings/__main__.py +3 -3
  13. birdnet_analyzer/embeddings/cli.py +12 -12
  14. birdnet_analyzer/embeddings/core.py +70 -69
  15. birdnet_analyzer/embeddings/utils.py +173 -179
  16. birdnet_analyzer/evaluation/__init__.py +189 -196
  17. birdnet_analyzer/evaluation/__main__.py +3 -3
  18. birdnet_analyzer/evaluation/assessment/metrics.py +388 -388
  19. birdnet_analyzer/evaluation/assessment/performance_assessor.py +364 -409
  20. birdnet_analyzer/evaluation/assessment/plotting.py +378 -379
  21. birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -631
  22. birdnet_analyzer/evaluation/preprocessing/utils.py +98 -98
  23. birdnet_analyzer/gui/__init__.py +19 -19
  24. birdnet_analyzer/gui/__main__.py +3 -3
  25. birdnet_analyzer/gui/analysis.py +179 -175
  26. birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
  27. birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
  28. birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
  29. birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
  30. birdnet_analyzer/gui/assets/gui.css +36 -28
  31. birdnet_analyzer/gui/assets/gui.js +89 -93
  32. birdnet_analyzer/gui/embeddings.py +638 -619
  33. birdnet_analyzer/gui/evaluation.py +801 -795
  34. birdnet_analyzer/gui/localization.py +75 -75
  35. birdnet_analyzer/gui/multi_file.py +265 -245
  36. birdnet_analyzer/gui/review.py +472 -519
  37. birdnet_analyzer/gui/segments.py +191 -191
  38. birdnet_analyzer/gui/settings.py +149 -128
  39. birdnet_analyzer/gui/single_file.py +264 -267
  40. birdnet_analyzer/gui/species.py +95 -95
  41. birdnet_analyzer/gui/train.py +687 -696
  42. birdnet_analyzer/gui/utils.py +803 -810
  43. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
  44. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
  45. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
  46. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
  47. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
  48. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
  49. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
  50. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
  51. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
  52. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
  53. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
  54. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
  55. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
  56. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
  57. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
  58. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
  59. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
  60. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
  61. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
  62. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
  63. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
  64. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
  65. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
  66. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
  67. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
  68. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
  69. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
  70. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
  71. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
  72. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
  73. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
  74. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
  75. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
  76. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
  77. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
  78. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
  79. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
  80. birdnet_analyzer/lang/de.json +342 -334
  81. birdnet_analyzer/lang/en.json +342 -334
  82. birdnet_analyzer/lang/fi.json +342 -334
  83. birdnet_analyzer/lang/fr.json +342 -334
  84. birdnet_analyzer/lang/id.json +342 -334
  85. birdnet_analyzer/lang/pt-br.json +342 -334
  86. birdnet_analyzer/lang/ru.json +342 -334
  87. birdnet_analyzer/lang/se.json +342 -334
  88. birdnet_analyzer/lang/tlh.json +342 -334
  89. birdnet_analyzer/lang/zh_TW.json +342 -334
  90. birdnet_analyzer/model.py +1213 -1212
  91. birdnet_analyzer/search/__init__.py +3 -3
  92. birdnet_analyzer/search/__main__.py +3 -3
  93. birdnet_analyzer/search/cli.py +11 -11
  94. birdnet_analyzer/search/core.py +78 -78
  95. birdnet_analyzer/search/utils.py +104 -107
  96. birdnet_analyzer/segments/__init__.py +3 -3
  97. birdnet_analyzer/segments/__main__.py +3 -3
  98. birdnet_analyzer/segments/cli.py +13 -13
  99. birdnet_analyzer/segments/core.py +81 -81
  100. birdnet_analyzer/segments/utils.py +383 -383
  101. birdnet_analyzer/species/__init__.py +3 -3
  102. birdnet_analyzer/species/__main__.py +3 -3
  103. birdnet_analyzer/species/cli.py +13 -13
  104. birdnet_analyzer/species/core.py +35 -35
  105. birdnet_analyzer/species/utils.py +73 -74
  106. birdnet_analyzer/train/__init__.py +3 -3
  107. birdnet_analyzer/train/__main__.py +3 -3
  108. birdnet_analyzer/train/cli.py +13 -13
  109. birdnet_analyzer/train/core.py +113 -113
  110. birdnet_analyzer/train/utils.py +878 -877
  111. birdnet_analyzer/translate.py +132 -133
  112. birdnet_analyzer/utils.py +425 -426
  113. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/METADATA +147 -137
  114. birdnet_analyzer-2.1.1.dist-info/RECORD +124 -0
  115. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/WHEEL +1 -1
  116. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/licenses/LICENSE +18 -18
  117. birdnet_analyzer/eBird_taxonomy_codes_2021E.json +0 -25280
  118. birdnet_analyzer/playground.py +0 -5
  119. birdnet_analyzer-2.0.1.dist-info/RECORD +0 -125
  120. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/entry_points.txt +0 -0
  121. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/top_level.txt +0 -0
@@ -1,692 +1,700 @@
1
- """Module to analyze audio samples."""
2
-
3
- import datetime
4
- import json
5
- import operator
6
- import os
7
-
8
- import numpy as np
9
-
10
- import birdnet_analyzer.config as cfg
11
- from birdnet_analyzer import audio, model, utils
12
-
13
- RAVEN_TABLE_HEADER = "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tCommon Name\tSpecies Code\tConfidence\tBegin Path\tFile Offset (s)\n" # noqa: E501
14
- RTABLE_HEADER = "filepath,start,end,scientific_name,common_name,confidence,lat,lon,week,overlap,sensitivity,min_conf,species_list,model\n"
15
- KALEIDOSCOPE_HEADER = (
16
- "INDIR,FOLDER,IN FILE,OFFSET,DURATION,scientific_name,common_name,confidence,lat,lon,week,overlap,sensitivity\n"
17
- )
18
- CSV_HEADER = "Start (s),End (s),Scientific name,Common name,Confidence,File\n"
19
- SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
20
-
21
-
22
- def save_analysis_params(path):
23
- utils.save_params(
24
- path,
25
- (
26
- "File splitting duration",
27
- "Segment length",
28
- "Sample rate",
29
- "Segment overlap",
30
- "Minimum Segment length",
31
- "Bandpass filter minimum",
32
- "Bandpass filter maximum",
33
- "Merge consecutive detections",
34
- "Audio speed",
35
- "Custom classifier path",
36
- ),
37
- (
38
- cfg.FILE_SPLITTING_DURATION,
39
- cfg.SIG_LENGTH,
40
- cfg.SAMPLE_RATE,
41
- cfg.SIG_OVERLAP,
42
- cfg.SIG_MINLEN,
43
- cfg.BANDPASS_FMIN,
44
- cfg.BANDPASS_FMAX,
45
- cfg.MERGE_CONSECUTIVE,
46
- cfg.AUDIO_SPEED,
47
- cfg.CUSTOM_CLASSIFIER,
48
- ),
49
- )
50
-
51
-
52
- def load_codes():
53
- """Loads the eBird codes.
54
-
55
- Returns:
56
- A dictionary containing the eBird codes.
57
- """
58
- with open(os.path.join(SCRIPT_DIR, cfg.CODES_FILE)) as cfile:
59
- return json.load(cfile)
60
-
61
-
62
- def generate_raven_table(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
63
- """
64
- Generates a Raven selection table from the given timestamps and prediction results.
65
-
66
- Args:
67
- timestamps (list[str]): List of timestamp strings in the format "start-end".
68
- result (dict[str, list]): Dictionary where keys are timestamp strings and values are lists of predictions.
69
- afile_path (str): Path to the audio file being analyzed.
70
- result_path (str): Path where the resulting Raven selection table will be saved.
71
-
72
- Returns:
73
- None
74
- """
75
- selection_id = 0
76
- out_string = RAVEN_TABLE_HEADER
77
-
78
- # Read native sample rate
79
- high_freq = audio.get_sample_rate(afile_path) / 2
80
-
81
- high_freq = min(high_freq, int(cfg.SIG_FMAX / cfg.AUDIO_SPEED))
82
-
83
- high_freq = min(high_freq, int(cfg.BANDPASS_FMAX / cfg.AUDIO_SPEED))
84
- low_freq = max(cfg.SIG_FMIN, int(cfg.BANDPASS_FMIN / cfg.AUDIO_SPEED))
85
-
86
- # Extract valid predictions for every timestamp
87
- for timestamp in timestamps:
88
- rstring = ""
89
- start, end = timestamp.split("-", 1)
90
-
91
- for c in result[timestamp]:
92
- selection_id += 1
93
- label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
94
- code = cfg.CODES[c[0]] if c[0] in cfg.CODES else c[0]
95
- rstring += f"{selection_id}\tSpectrogram 1\t1\t{start}\t{end}\t{low_freq}\t{high_freq}\t{label.split('_', 1)[-1]}\t{code}\t{c[1]:.4f}\t{afile_path}\t{start}\n" # noqa: E501
96
-
97
- # Write result string to file
98
- out_string += rstring
99
-
100
- # If we don't have any valid predictions, we still need to add a line to the selection table
101
- # in case we want to combine results
102
- # TODO: That's a weird way to do it, but it works for now. It would be better to keep track
103
- # of file durations during the analysis.
104
- if len(out_string) == len(RAVEN_TABLE_HEADER) and cfg.OUTPUT_PATH is not None:
105
- selection_id += 1
106
- out_string += (
107
- f"{selection_id}\tSpectrogram 1\t1\t0\t3\t{low_freq}\t{high_freq}\tnocall\tnocall\t1.0\t{afile_path}\t0\n"
108
- )
109
-
110
- utils.save_result_file(result_path, out_string)
111
-
112
-
113
- def generate_audacity(timestamps: list[str], result: dict[str, list], result_path: str):
114
- """
115
- Generates an Audacity timeline label file from the given timestamps and results.
116
-
117
- Args:
118
- timestamps (list[str]): A list of timestamp strings.
119
- result (dict[str, list]): A dictionary where keys are timestamps and values are lists of tuples,
120
- each containing a label and a confidence score.
121
- result_path (str): The file path where the result string will be saved.
122
-
123
- Returns:
124
- None
125
- """
126
- out_string = ""
127
-
128
- # Audacity timeline labels
129
- for timestamp in timestamps:
130
- rstring = ""
131
-
132
- for c in result[timestamp]:
133
- label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
134
- ts = timestamp.replace("-", "\t")
135
- lbl = label.replace("_", ", ")
136
- rstring += f"{ts}\t{lbl}\t{c[1]:.4f}\n"
137
-
138
- # Write result string to file
139
- out_string += rstring
140
-
141
- utils.save_result_file(result_path, out_string)
142
-
143
-
144
- def generate_kaleidoscope(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
145
- """
146
- Generates a Kaleidoscope-compatible CSV string from the given timestamps and results, and saves it to a file.
147
-
148
- Args:
149
- timestamps (list[str]): List of timestamp strings in the format "start-end".
150
- result (dict[str, list]): Dictionary where keys are timestamp strings and values are lists of tuples containing
151
- species label and confidence score.
152
- afile_path (str): Path to the audio file being analyzed.
153
- result_path (str): Path where the resulting CSV file will be saved.
154
-
155
- Returns:
156
- None
157
- """
158
- out_string = KALEIDOSCOPE_HEADER
159
-
160
- folder_path, filename = os.path.split(afile_path)
161
- parent_folder, folder_name = os.path.split(folder_path)
162
-
163
- for timestamp in timestamps:
164
- rstring = ""
165
- start, end = timestamp.split("-", 1)
166
-
167
- for c in result[timestamp]:
168
- label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
169
- rstring += "{},{},{},{},{},{},{},{:.4f},{:.4f},{:.4f},{},{},{}\n".format(
170
- parent_folder.rstrip("/"),
171
- folder_name,
172
- filename,
173
- start,
174
- float(end) - float(start),
175
- label.split("_", 1)[0],
176
- label.split("_", 1)[-1],
177
- c[1],
178
- cfg.LATITUDE,
179
- cfg.LONGITUDE,
180
- cfg.WEEK,
181
- cfg.SIG_OVERLAP,
182
- cfg.SIGMOID_SENSITIVITY,
183
- )
184
-
185
- # Write result string to file
186
- out_string += rstring
187
-
188
- utils.save_result_file(result_path, out_string)
189
-
190
-
191
- def generate_csv(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
192
- """
193
- Generates a CSV file from the given timestamps and results.
194
-
195
- Args:
196
- timestamps (list[str]): A list of timestamp strings in the format "start-end".
197
- result (dict[str, list]): A dictionary where keys are timestamp strings and values are lists of tuples.
198
- Each tuple contains a label and a confidence score.
199
- afile_path (str): The file path of the audio file being analyzed.
200
- result_path (str): The file path where the resulting CSV file will be saved.
201
-
202
- Returns:
203
- None
204
- """
205
- out_string = CSV_HEADER
206
-
207
- for timestamp in timestamps:
208
- rstring = ""
209
-
210
- for c in result[timestamp]:
211
- start, end = timestamp.split("-", 1)
212
- label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
213
- rstring += f"{start},{end},{label.split('_', 1)[0]},{label.split('_', 1)[-1]},{c[1]:.4f},{afile_path}\n"
214
-
215
- # Write result string to file
216
- out_string += rstring
217
-
218
- utils.save_result_file(result_path, out_string)
219
-
220
-
221
- def save_result_files(r: dict[str, list], result_files: dict[str, str], afile_path: str):
222
- """
223
- Saves the result files in various formats based on the provided configuration.
224
-
225
- Args:
226
- r (dict[str, list]): A dictionary containing the analysis results with timestamps as keys.
227
- result_files (dict[str, str]): A dictionary mapping result types to their respective file paths.
228
- afile_path (str): The path to the audio file being analyzed.
229
-
230
- Returns:
231
- None
232
- """
233
-
234
- os.makedirs(cfg.OUTPUT_PATH, exist_ok=True)
235
-
236
- # Merge consecutive detections of the same species
237
- r_merged = merge_consecutive_detections(r, cfg.MERGE_CONSECUTIVE)
238
-
239
- # Selection table
240
- timestamps = get_sorted_timestamps(r_merged)
241
-
242
- if "table" in result_files:
243
- generate_raven_table(timestamps, r_merged, afile_path, result_files["table"])
244
-
245
- if "audacity" in cfg.RESULT_TYPES:
246
- generate_audacity(timestamps, r_merged, result_files["audacity"])
247
-
248
- # if "r" in cfg.RESULT_TYPES:
249
- # generate_rtable(timestamps, r, afile_path, result_files["r"])
250
-
251
- if "kaleidoscope" in cfg.RESULT_TYPES:
252
- generate_kaleidoscope(timestamps, r_merged, afile_path, result_files["kaleidoscope"])
253
-
254
- if "csv" in cfg.RESULT_TYPES:
255
- generate_csv(timestamps, r_merged, afile_path, result_files["csv"])
256
-
257
-
258
- def combine_raven_tables(saved_results: list[str]):
259
- """
260
- Combines multiple Raven selection table files into a single file and adjusts the selection IDs and times.
261
-
262
- Args:
263
- saved_results (list[str]): List of file paths to the Raven selection table files to be combined.
264
-
265
- Returns:
266
- None
267
- """
268
- # Combine all files
269
- s_id = 1
270
- time_offset = 0
271
- audiofiles = []
272
-
273
- with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_RAVEN_FILENAME), "w", encoding="utf-8") as f:
274
- f.write(RAVEN_TABLE_HEADER)
275
-
276
- for rfile in saved_results:
277
- if not rfile:
278
- continue
279
- with open(rfile, encoding="utf-8") as rf:
280
- try:
281
- lines = rf.readlines()
282
-
283
- # make sure it's a selection table
284
- if "Selection" not in lines[0] or "File Offset" not in lines[0]:
285
- continue
286
-
287
- # skip header and add to file
288
- f_name = lines[1].split("\t")[10]
289
- f_duration = audio.get_audio_file_length(f_name)
290
-
291
- audiofiles.append(f_name)
292
-
293
- for line in lines[1:]:
294
- # empty line?
295
- if not line.strip():
296
- continue
297
-
298
- # Is species code and common name == 'nocall'?
299
- # If so, that's a dummy line and we can skip it
300
- if line.split("\t")[7] == "nocall" and line.split("\t")[8] == "nocall":
301
- continue
302
-
303
- # adjust selection id
304
- line_elements = line.split("\t")
305
- line_elements[0] = str(s_id)
306
- s_id += 1
307
-
308
- # adjust time
309
- line_elements[3] = str(float(line_elements[3]) + time_offset)
310
- line_elements[4] = str(float(line_elements[4]) + time_offset)
311
-
312
- # write line
313
- f.write("\t".join(line_elements))
314
-
315
- # adjust time offset
316
- time_offset += f_duration
317
-
318
- except Exception as ex:
319
- print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
320
- utils.write_error_log(ex)
321
-
322
- listfilesname = cfg.OUTPUT_RAVEN_FILENAME.rsplit(".", 1)[0] + ".list.txt"
323
-
324
- with open(os.path.join(cfg.OUTPUT_PATH, listfilesname), "w", encoding="utf-8") as f:
325
- f.writelines(f + "\n" for f in audiofiles)
326
-
327
-
328
- def combine_kaleidoscope_files(saved_results: list[str]):
329
- """
330
- Combines multiple Kaleidoscope result files into a single file.
331
-
332
- Args:
333
- saved_results (list[str]): A list of file paths to the saved Kaleidoscope result files.
334
-
335
- Returns:
336
- None
337
- """
338
- # Combine all files
339
- with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_KALEIDOSCOPE_FILENAME), "w", encoding="utf-8") as f:
340
- f.write(KALEIDOSCOPE_HEADER)
341
-
342
- for rfile in saved_results:
343
- with open(rfile, encoding="utf-8") as rf:
344
- try:
345
- lines = rf.readlines()
346
-
347
- # make sure it's a selection table
348
- if "INDIR" not in lines[0] or "sensitivity" not in lines[0]:
349
- continue
350
-
351
- # skip header and add to file
352
- for line in lines[1:]:
353
- f.write(line)
354
-
355
- except Exception as ex:
356
- print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
357
- utils.write_error_log(ex)
358
-
359
-
360
- def combine_csv_files(saved_results: list[str]):
361
- """
362
- Combines multiple CSV files into a single CSV file.
363
-
364
- Args:
365
- saved_results (list[str]): A list of file paths to the CSV files to be combined.
366
- """
367
- # Combine all files
368
- with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_CSV_FILENAME), "w", encoding="utf-8") as f:
369
- f.write(CSV_HEADER)
370
-
371
- for rfile in saved_results:
372
- with open(rfile, encoding="utf-8") as rf:
373
- try:
374
- lines = rf.readlines()
375
-
376
- # make sure it's a selection table
377
- if "Start (s)" not in lines[0] or "Confidence" not in lines[0]:
378
- continue
379
-
380
- # skip header and add to file
381
- for line in lines[1:]:
382
- f.write(line)
383
-
384
- except Exception as ex:
385
- print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
386
- utils.write_error_log(ex)
387
-
388
-
389
- def combine_results(saved_results: list[dict[str, str]]):
390
- """
391
- Combines various types of result files based on the configuration settings.
392
- This function checks the types of results specified in the configuration
393
- and combines the corresponding files from the saved results list.
394
-
395
- Args:
396
- saved_results (list[dict[str, str]]): A list of dictionaries containing
397
- file paths for different result types. Each dictionary represents
398
- a set of result files for a particular analysis.
399
-
400
- Returns:
401
- None
402
- """
403
- if "table" in cfg.RESULT_TYPES:
404
- combine_raven_tables([f["table"] for f in saved_results if f])
405
-
406
- # if "r" in cfg.RESULT_TYPES:
407
- # combine_rtable_files([f["r"] for f in saved_results if f])
408
-
409
- if "kaleidoscope" in cfg.RESULT_TYPES:
410
- combine_kaleidoscope_files([f["kaleidoscope"] for f in saved_results if f])
411
-
412
- if "csv" in cfg.RESULT_TYPES:
413
- combine_csv_files([f["csv"] for f in saved_results if f])
414
-
415
-
416
- def merge_consecutive_detections(results: dict[str, list], max_consecutive: int | None = None):
417
- """Merges consecutive detections of the same species.
418
- Uses the mean of the top-3 highest scoring predictions as
419
- confidence score for the merged detection.
420
-
421
- Args:
422
- results: The dictionary with {segment: scores}.
423
- max_consecutive: The maximum number of consecutive detections to merge.
424
- If None, merge all consecutive detections.
425
-
426
- Returns:
427
- The dictionary with merged detections.
428
- """
429
-
430
- # If max_consecutive is 0 or 1, return original results
431
- if max_consecutive is not None and max_consecutive <= 1:
432
- return results
433
-
434
- # For each species, make list of timestamps and scores
435
- species = {}
436
- for timestamp, scores in results.items():
437
- for label, score in scores:
438
- if label not in species:
439
- species[label] = []
440
- species[label].append((timestamp, score))
441
-
442
- # Sort timestamps by start time for each species
443
- for label, timestamps in species.items():
444
- species[label] = sorted(timestamps, key=lambda t: float(t[0].split("-", 1)[0]))
445
-
446
- # Merge consecutive detections
447
- merged_results = {}
448
- for label in species:
449
- timestamps = species[label]
450
-
451
- # Check if end time of current detection is within the start time of the next detection
452
- i = 0
453
- while i < len(timestamps) - 1:
454
- start, end = timestamps[i][0].split("-", 1)
455
- next_start, next_end = timestamps[i + 1][0].split("-", 1)
456
-
457
- if float(end) >= float(next_start):
458
- # Merge detections
459
- merged_scores = [timestamps[i][1], timestamps[i + 1][1]]
460
- timestamps.pop(i)
461
-
462
- while i < len(timestamps) - 1 and float(next_end) >= float(timestamps[i + 1][0].split("-", 1)[0]):
463
- if max_consecutive and len(merged_scores) >= max_consecutive:
464
- break
465
- merged_scores.append(timestamps[i + 1][1])
466
- next_end = timestamps[i + 1][0].split("-", 1)[1]
467
- timestamps.pop(i + 1)
468
-
469
- # Calculate mean of top 3 scores
470
- top_3_scores = sorted(merged_scores, reverse=True)[:3]
471
- merged_score = sum(top_3_scores) / len(top_3_scores)
472
-
473
- timestamps[i] = (f"{start}-{next_end}", merged_score)
474
-
475
- i += 1
476
-
477
- merged_results[label] = timestamps
478
-
479
- # Restore original format
480
- results = {}
481
- for label, timestamps in merged_results.items():
482
- for timestamp, score in timestamps:
483
- if timestamp not in results:
484
- results[timestamp] = []
485
- results[timestamp].append((label, score))
486
-
487
- return results
488
-
489
-
490
- def get_sorted_timestamps(results: dict[str, list]):
491
- """Sorts the results based on the segments.
492
-
493
- Args:
494
- results: The dictionary with {segment: scores}.
495
-
496
- Returns:
497
- Returns the sorted list of segments and their scores.
498
- """
499
- return sorted(results, key=lambda t: float(t.split("-", 1)[0]))
500
-
501
-
502
- def get_raw_audio_from_file(fpath: str, offset, duration):
503
- """Reads an audio file and splits the signal into chunks.
504
-
505
- Args:
506
- fpath: Path to the audio file.
507
-
508
- Returns:
509
- The signal split into a list of chunks.
510
- """
511
- # Open file
512
- sig, rate = audio.open_audio_file(
513
- fpath, cfg.SAMPLE_RATE, offset, duration, cfg.BANDPASS_FMIN, cfg.BANDPASS_FMAX, cfg.AUDIO_SPEED
514
- )
515
-
516
- # Split into raw audio chunks
517
- return audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
518
-
519
-
520
- def predict(samples):
521
- """Predicts the classes for the given samples.
522
-
523
- Args:
524
- samples: Samples to be predicted.
525
-
526
- Returns:
527
- The prediction scores.
528
- """
529
- # Prepare sample and pass through model
530
- data = np.array(samples, dtype="float32")
531
- prediction = model.predict(data)
532
-
533
- # Logits or sigmoid activations?
534
- if cfg.APPLY_SIGMOID:
535
- prediction = model.flat_sigmoid(np.array(prediction), sensitivity=-1, bias=cfg.SIGMOID_SENSITIVITY)
536
-
537
- return prediction
538
-
539
-
540
- def get_result_file_names(fpath: str):
541
- """
542
- Generates a dictionary of result file names based on the input file path and configured result types.
543
-
544
- Args:
545
- fpath (str): The file path of the input file.
546
-
547
- Returns:
548
- dict: A dictionary where the keys are result types (e.g., "table", "audacity", "r", "kaleidoscope", "csv")
549
- and the values are the corresponding output file paths.
550
- """
551
- result_names = {}
552
-
553
- rpath = fpath.replace(cfg.INPUT_PATH, "")
554
-
555
- rpath = (rpath[1:] if rpath[0] in ["/", "\\"] else rpath) if rpath else os.path.basename(fpath)
556
-
557
- file_shorthand = rpath.rsplit(".", 1)[0]
558
-
559
- if "table" in cfg.RESULT_TYPES:
560
- result_names["table"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.selection.table.txt")
561
- if "audacity" in cfg.RESULT_TYPES:
562
- result_names["audacity"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.txt")
563
- # if "r" in cfg.RESULT_TYPES:
564
- # result_names["r"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.r.csv")
565
- if "kaleidoscope" in cfg.RESULT_TYPES:
566
- result_names["kaleidoscope"] = os.path.join(
567
- cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.kaleidoscope.csv"
568
- )
569
- if "csv" in cfg.RESULT_TYPES:
570
- result_names["csv"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.csv")
571
-
572
- return result_names
573
-
574
-
575
- def analyze_file(item):
576
- """
577
- Analyzes an audio file and generates prediction results.
578
-
579
- Args:
580
- item (tuple): A tuple containing the file path (str) and configuration settings.
581
-
582
- Returns:
583
- dict or None: A dictionary of result file names if analysis is successful,
584
- None if the file is skipped or an error occurs.
585
- Raises:
586
- Exception: If there is an error in reading the audio file or saving the results.
587
- """
588
- # Get file path and restore cfg
589
- fpath: str = item[0]
590
- cfg.set_config(item[1])
591
-
592
- result_file_names = get_result_file_names(fpath)
593
-
594
- if cfg.SKIP_EXISTING_RESULTS and all(os.path.exists(f) for f in result_file_names.values()):
595
- print(f"Skipping {fpath} as it has already been analyzed", flush=True)
596
- return None # or return path to combine later? TODO
597
-
598
- # Start time
599
- start_time = datetime.datetime.now()
600
- offset = 0
601
- duration = int(cfg.FILE_SPLITTING_DURATION / cfg.AUDIO_SPEED)
602
- start, end = 0, cfg.SIG_LENGTH
603
- results = {}
604
-
605
- # Status
606
- print(f"Analyzing {fpath}", flush=True)
607
-
608
- try:
609
- fileLengthSeconds = int(audio.get_audio_file_length(fpath) / cfg.AUDIO_SPEED)
610
- except Exception as ex:
611
- # Write error log
612
- print(f"Error: Cannot analyze audio file {fpath}. File corrupt?\n", flush=True)
613
- utils.write_error_log(ex)
614
-
615
- return None
616
-
617
- # Process each chunk
618
- try:
619
- while offset < fileLengthSeconds:
620
- chunks = get_raw_audio_from_file(fpath, offset, duration)
621
- samples = []
622
- timestamps = []
623
-
624
- for chunk_index, chunk in enumerate(chunks):
625
- # Add to batch
626
- samples.append(chunk)
627
- timestamps.append([round(start * cfg.AUDIO_SPEED, 1), round(end * cfg.AUDIO_SPEED, 1)])
628
-
629
- # Advance start and end
630
- start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
631
- end = start + cfg.SIG_LENGTH
632
-
633
- # Check if batch is full or last chunk
634
- if len(samples) < cfg.BATCH_SIZE and chunk_index < len(chunks) - 1:
635
- continue
636
-
637
- # Predict
638
- p = predict(samples)
639
-
640
- # Add to results
641
- for i in range(len(samples)):
642
- # Get timestamp
643
- s_start, s_end = timestamps[i]
644
-
645
- # Get prediction
646
- pred = p[i]
647
-
648
- # Assign scores to labels
649
- p_labels = [
650
- p
651
- for p in zip(cfg.LABELS, pred, strict=True)
652
- if (cfg.TOP_N or p[1] >= cfg.MIN_CONFIDENCE)
653
- and (not cfg.SPECIES_LIST or p[0] in cfg.SPECIES_LIST)
654
- ]
655
-
656
- # Sort by score
657
- p_sorted = sorted(p_labels, key=operator.itemgetter(1), reverse=True)
658
-
659
- if cfg.TOP_N:
660
- p_sorted = p_sorted[: cfg.TOP_N]
661
-
662
- # TODO: hier schon top n oder min conf raussortieren
663
- # Store top 5 results and advance indices
664
- results[str(s_start) + "-" + str(s_end)] = p_sorted
665
-
666
- # Clear batch
667
- samples = []
668
- timestamps = []
669
- offset = offset + duration
670
-
671
- except Exception as ex:
672
- # Write error log
673
- print(f"Error: Cannot analyze audio file {fpath}.\n", flush=True)
674
- utils.write_error_log(ex)
675
-
676
- return None
677
-
678
- # Save as selection table
679
- try:
680
- save_result_files(results, result_file_names, fpath)
681
-
682
- except Exception as ex:
683
- # Write error log
684
- print(f"Error: Cannot save result for {fpath}.\n", flush=True)
685
- utils.write_error_log(ex)
686
-
687
- return None
688
-
689
- delta_time = (datetime.datetime.now() - start_time).total_seconds()
690
- print(f"Finished {fpath} in {delta_time:.2f} seconds", flush=True)
691
-
692
- return result_file_names
1
+ """Module to analyze audio samples."""
2
+
3
+ import datetime
4
+ import json
5
+ import operator
6
+ import os
7
+ from collections.abc import Sequence
8
+
9
+ import numpy as np
10
+
11
+ import birdnet_analyzer.config as cfg
12
+ from birdnet_analyzer import audio, model, utils
13
+
14
+ RAVEN_TABLE_HEADER = (
15
+ "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tCommon Name\tSpecies Code\tConfidence\tBegin Path\tFile Offset (s)\n"
16
+ )
17
+ KALEIDOSCOPE_HEADER = "INDIR,FOLDER,IN FILE,OFFSET,DURATION,scientific_name,common_name,confidence,lat,lon,week,overlap,sensitivity\n"
18
+ CSV_HEADER = "Start (s),End (s),Scientific name,Common name,Confidence,File\n"
19
+ SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
20
+
21
+
22
+ def save_analysis_params(path):
23
+ utils.save_params(
24
+ path,
25
+ (
26
+ "File splitting duration",
27
+ "Segment length",
28
+ "Sample rate",
29
+ "Segment overlap",
30
+ "Minimum Segment length",
31
+ "Bandpass filter minimum",
32
+ "Bandpass filter maximum",
33
+ "Merge consecutive detections",
34
+ "Audio speed",
35
+ "Custom classifier path",
36
+ ),
37
+ (
38
+ cfg.FILE_SPLITTING_DURATION,
39
+ cfg.SIG_LENGTH,
40
+ cfg.SAMPLE_RATE,
41
+ cfg.SIG_OVERLAP,
42
+ cfg.SIG_MINLEN,
43
+ cfg.BANDPASS_FMIN,
44
+ cfg.BANDPASS_FMAX,
45
+ cfg.MERGE_CONSECUTIVE,
46
+ cfg.AUDIO_SPEED,
47
+ cfg.CUSTOM_CLASSIFIER,
48
+ ),
49
+ )
50
+
51
+
52
+ def load_codes():
53
+ """Loads the eBird codes.
54
+
55
+ Returns:
56
+ A dictionary containing the eBird codes.
57
+ """
58
+ with open(os.path.join(SCRIPT_DIR, cfg.CODES_FILE)) as cfile:
59
+ return json.load(cfile)
60
+
61
+
62
+ def generate_raven_table(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
63
+ """
64
+ Generates a Raven selection table from the given timestamps and prediction results.
65
+
66
+ Args:
67
+ timestamps (list[str]): List of timestamp strings in the format "start-end".
68
+ result (dict[str, list]): Dictionary where keys are timestamp strings and values are lists of predictions.
69
+ afile_path (str): Path to the audio file being analyzed.
70
+ result_path (str): Path where the resulting Raven selection table will be saved.
71
+
72
+ Returns:
73
+ None
74
+ """
75
+ selection_id = 0
76
+ out_string = RAVEN_TABLE_HEADER
77
+
78
+ # Read native sample rate
79
+ high_freq = audio.get_sample_rate(afile_path) / 2
80
+
81
+ high_freq = min(high_freq, int(cfg.SIG_FMAX / cfg.AUDIO_SPEED))
82
+
83
+ high_freq = int(min(high_freq, int(cfg.BANDPASS_FMAX / cfg.AUDIO_SPEED)))
84
+ low_freq = max(cfg.SIG_FMIN, int(cfg.BANDPASS_FMIN / cfg.AUDIO_SPEED))
85
+
86
+ # Extract valid predictions for every timestamp
87
+ for timestamp in timestamps:
88
+ rstring = ""
89
+ start, end = timestamp.split("-", 1)
90
+
91
+ for c in result[timestamp]:
92
+ selection_id += 1
93
+ label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])] if cfg.TRANSLATED_LABELS else c[0]
94
+ code = cfg.CODES[c[0]] if c[0] in cfg.CODES else c[0]
95
+ rstring += (
96
+ f"{selection_id}\tSpectrogram 1\t1\t{start}\t{end}\t{low_freq}\t{high_freq}\t{label.split('_', 1)[-1]}\t{code}\t{c[1]:.4f}\t{afile_path}\t{start}\n"
97
+ )
98
+
99
+ # Write result string to file
100
+ out_string += rstring
101
+
102
+ # If we don't have any valid predictions, we still need to add a line to the selection table
103
+ # in case we want to combine results
104
+ # TODO: That's a weird way to do it, but it works for now. It would be better to keep track
105
+ # of file durations during the analysis.
106
+ if len(out_string) == len(RAVEN_TABLE_HEADER) and cfg.OUTPUT_PATH is not None:
107
+ selection_id += 1
108
+ out_string += f"{selection_id}\tSpectrogram 1\t1\t0\t3\t{low_freq}\t{high_freq}\tnocall\tnocall\t1.0\t{afile_path}\t0\n"
109
+
110
+ utils.save_result_file(result_path, out_string)
111
+
112
+
113
+ def generate_audacity(timestamps: list[str], result: dict[str, list], result_path: str):
114
+ """
115
+ Generates an Audacity timeline label file from the given timestamps and results.
116
+
117
+ Args:
118
+ timestamps (list[str]): A list of timestamp strings.
119
+ result (dict[str, list]): A dictionary where keys are timestamps and values are lists of tuples,
120
+ each containing a label and a confidence score.
121
+ result_path (str): The file path where the result string will be saved.
122
+
123
+ Returns:
124
+ None
125
+ """
126
+ out_string = ""
127
+
128
+ # Audacity timeline labels
129
+ for timestamp in timestamps:
130
+ rstring = ""
131
+
132
+ for c in result[timestamp]:
133
+ label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])] if cfg.TRANSLATED_LABELS else c[0]
134
+ ts = timestamp.replace("-", "\t")
135
+ lbl = label.replace("_", ", ")
136
+ rstring += f"{ts}\t{lbl}\t{c[1]:.4f}\n"
137
+
138
+ # Write result string to file
139
+ out_string += rstring
140
+
141
+ utils.save_result_file(result_path, out_string)
142
+
143
+
144
+ def generate_kaleidoscope(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
145
+ """
146
+ Generates a Kaleidoscope-compatible CSV string from the given timestamps and results, and saves it to a file.
147
+
148
+ Args:
149
+ timestamps (list[str]): List of timestamp strings in the format "start-end".
150
+ result (dict[str, list]): Dictionary where keys are timestamp strings and values are lists of tuples containing
151
+ species label and confidence score.
152
+ afile_path (str): Path to the audio file being analyzed.
153
+ result_path (str): Path where the resulting CSV file will be saved.
154
+
155
+ Returns:
156
+ None
157
+ """
158
+ out_string = KALEIDOSCOPE_HEADER
159
+
160
+ folder_path, filename = os.path.split(afile_path)
161
+ parent_folder, folder_name = os.path.split(folder_path)
162
+
163
+ for timestamp in timestamps:
164
+ rstring = ""
165
+ start, end = timestamp.split("-", 1)
166
+
167
+ for c in result[timestamp]:
168
+ label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])] if cfg.TRANSLATED_LABELS else c[0]
169
+ rstring += "{},{},{},{},{},{},{},{:.4f},{:.4f},{:.4f},{},{},{}\n".format(
170
+ parent_folder.rstrip("/"),
171
+ folder_name,
172
+ filename,
173
+ start,
174
+ float(end) - float(start),
175
+ label.split("_", 1)[0],
176
+ label.split("_", 1)[-1],
177
+ c[1],
178
+ cfg.LATITUDE,
179
+ cfg.LONGITUDE,
180
+ cfg.WEEK,
181
+ cfg.SIG_OVERLAP,
182
+ cfg.SIGMOID_SENSITIVITY,
183
+ )
184
+
185
+ # Write result string to file
186
+ out_string += rstring
187
+
188
+ utils.save_result_file(result_path, out_string)
189
+
190
+
191
+ def generate_csv(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
192
+ """
193
+ Generates a CSV file from the given timestamps and results.
194
+
195
+ Args:
196
+ timestamps (list[str]): A list of timestamp strings in the format "start-end".
197
+ result (dict[str, list]): A dictionary where keys are timestamp strings and values are lists of tuples.
198
+ Each tuple contains a label and a confidence score.
199
+ afile_path (str): The file path of the audio file being analyzed.
200
+ result_path (str): The file path where the resulting CSV file will be saved.
201
+
202
+ Returns:
203
+ None
204
+ """
205
+ from birdnet_analyzer.analyze import POSSIBLE_ADDITIONAL_COLUMNS_MAP
206
+
207
+ out_string = CSV_HEADER
208
+ columns_map = {}
209
+
210
+ if cfg.ADDITIONAL_COLUMNS:
211
+ for col in cfg.ADDITIONAL_COLUMNS:
212
+ if col in POSSIBLE_ADDITIONAL_COLUMNS_MAP:
213
+ columns_map[col] = POSSIBLE_ADDITIONAL_COLUMNS_MAP[col]()
214
+
215
+ if columns_map:
216
+ out_string = out_string[:-1] + "," + ",".join(columns_map) + "\n"
217
+
218
+ for timestamp in timestamps:
219
+ rstring = ""
220
+
221
+ for c in result[timestamp]:
222
+ start, end = timestamp.split("-", 1)
223
+ label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])] if cfg.TRANSLATED_LABELS else c[0]
224
+ rstring += f"{start},{end},{label.split('_', 1)[0]},{label.split('_', 1)[-1]},{c[1]:.4f},{afile_path}"
225
+
226
+ if columns_map:
227
+ rstring += "," + ",".join(str(val) for val in columns_map.values())
228
+
229
+ rstring += "\n"
230
+
231
+ # Write result string to file
232
+ out_string += rstring
233
+
234
+ utils.save_result_file(result_path, out_string)
235
+
236
+
237
+ def save_result_files(r: dict[str, list], result_files: dict[str, str], afile_path: str):
238
+ """
239
+ Saves the result files in various formats based on the provided configuration.
240
+
241
+ Args:
242
+ r (dict[str, list]): A dictionary containing the analysis results with timestamps as keys.
243
+ result_files (dict[str, str]): A dictionary mapping result types to their respective file paths.
244
+ afile_path (str): The path to the audio file being analyzed.
245
+
246
+ Returns:
247
+ None
248
+ """
249
+
250
+ os.makedirs(cfg.OUTPUT_PATH, exist_ok=True)
251
+
252
+ # Merge consecutive detections of the same species
253
+ r_merged = merge_consecutive_detections(r, cfg.MERGE_CONSECUTIVE)
254
+
255
+ # Selection table
256
+ timestamps = get_sorted_timestamps(r_merged)
257
+
258
+ if "table" in result_files:
259
+ generate_raven_table(timestamps, r_merged, afile_path, result_files["table"])
260
+
261
+ if "audacity" in cfg.RESULT_TYPES:
262
+ generate_audacity(timestamps, r_merged, result_files["audacity"])
263
+
264
+ # if "r" in cfg.RESULT_TYPES:
265
+ # generate_rtable(timestamps, r, afile_path, result_files["r"])
266
+
267
+ if "kaleidoscope" in cfg.RESULT_TYPES:
268
+ generate_kaleidoscope(timestamps, r_merged, afile_path, result_files["kaleidoscope"])
269
+
270
+ if "csv" in cfg.RESULT_TYPES:
271
+ generate_csv(timestamps, r_merged, afile_path, result_files["csv"])
272
+
273
+
274
+ def combine_raven_tables(saved_results: list[str]):
275
+ """
276
+ Combines multiple Raven selection table files into a single file and adjusts the selection IDs and times.
277
+
278
+ Args:
279
+ saved_results (list[str]): List of file paths to the Raven selection table files to be combined.
280
+
281
+ Returns:
282
+ None
283
+ """
284
+ # Combine all files
285
+ s_id = 1
286
+ time_offset = 0
287
+ audiofiles = []
288
+
289
+ with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_RAVEN_FILENAME), "w", encoding="utf-8") as f:
290
+ f.write(RAVEN_TABLE_HEADER)
291
+
292
+ for rfile in saved_results:
293
+ if not rfile:
294
+ continue
295
+ with open(rfile, encoding="utf-8") as rf:
296
+ try:
297
+ lines = rf.readlines()
298
+
299
+ # make sure it's a selection table
300
+ if "Selection" not in lines[0] or "File Offset" not in lines[0]:
301
+ continue
302
+
303
+ # skip header and add to file
304
+ f_name = lines[1].split("\t")[10]
305
+ f_duration = audio.get_audio_file_length(f_name)
306
+
307
+ audiofiles.append(f_name)
308
+
309
+ for line in lines[1:]:
310
+ # empty line?
311
+ if not line.strip():
312
+ continue
313
+
314
+ # Is species code and common name == 'nocall'?
315
+ # If so, that's a dummy line and we can skip it
316
+ if line.split("\t")[7] == "nocall" and line.split("\t")[8] == "nocall":
317
+ continue
318
+
319
+ # adjust selection id
320
+ line_elements = line.split("\t")
321
+ line_elements[0] = str(s_id)
322
+ s_id += 1
323
+
324
+ # adjust time
325
+ line_elements[3] = str(float(line_elements[3]) + time_offset)
326
+ line_elements[4] = str(float(line_elements[4]) + time_offset)
327
+
328
+ # write line
329
+ f.write("\t".join(line_elements))
330
+
331
+ # adjust time offset
332
+ time_offset += f_duration
333
+
334
+ except Exception as ex:
335
+ print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
336
+ utils.write_error_log(ex)
337
+
338
+ listfilesname = cfg.OUTPUT_RAVEN_FILENAME.rsplit(".", 1)[0] + ".list.txt"
339
+
340
+ with open(os.path.join(cfg.OUTPUT_PATH, listfilesname), "w", encoding="utf-8") as f:
341
+ f.writelines(f + "\n" for f in audiofiles)
342
+
343
+
344
+ def combine_kaleidoscope_files(saved_results: list[str]):
345
+ """
346
+ Combines multiple Kaleidoscope result files into a single file.
347
+
348
+ Args:
349
+ saved_results (list[str]): A list of file paths to the saved Kaleidoscope result files.
350
+
351
+ Returns:
352
+ None
353
+ """
354
+ # Combine all files
355
+ with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_KALEIDOSCOPE_FILENAME), "w", encoding="utf-8") as f:
356
+ f.write(KALEIDOSCOPE_HEADER)
357
+
358
+ for rfile in saved_results:
359
+ with open(rfile, encoding="utf-8") as rf:
360
+ try:
361
+ lines = rf.readlines()
362
+
363
+ # make sure it's a selection table
364
+ if "INDIR" not in lines[0] or "sensitivity" not in lines[0]:
365
+ continue
366
+
367
+ # skip header and add to file
368
+ f.writelines(lines[1:])
369
+
370
+ except Exception as ex:
371
+ print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
372
+ utils.write_error_log(ex)
373
+
374
+
375
+ def combine_csv_files(saved_results: list[str]):
376
+ """
377
+ Combines multiple CSV files into a single CSV file.
378
+
379
+ Args:
380
+ saved_results (list[str]): A list of file paths to the CSV files to be combined.
381
+ """
382
+ out_string = ""
383
+
384
+ for rfile in saved_results:
385
+ try:
386
+ with open(rfile, encoding="utf-8") as rf:
387
+ lines = rf.readlines()
388
+ out_string += "".join(lines[1:] if out_string else lines)
389
+
390
+ except Exception as ex:
391
+ print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
392
+ utils.write_error_log(ex)
393
+
394
+ with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_CSV_FILENAME), "w", encoding="utf-8") as f:
395
+ f.write(out_string)
396
+
397
+
398
+ def combine_results(saved_results: Sequence[dict[str, str] | None]):
399
+ """
400
+ Combines various types of result files based on the configuration settings.
401
+ This function checks the types of results specified in the configuration
402
+ and combines the corresponding files from the saved results list.
403
+
404
+ Args:
405
+ saved_results (list[dict[str, str]]): A list of dictionaries containing
406
+ file paths for different result types. Each dictionary represents
407
+ a set of result files for a particular analysis.
408
+
409
+ Returns:
410
+ None
411
+ """
412
+ if "table" in cfg.RESULT_TYPES:
413
+ combine_raven_tables([f["table"] for f in saved_results if f])
414
+
415
+ if "kaleidoscope" in cfg.RESULT_TYPES:
416
+ combine_kaleidoscope_files([f["kaleidoscope"] for f in saved_results if f])
417
+
418
+ if "csv" in cfg.RESULT_TYPES:
419
+ combine_csv_files([f["csv"] for f in saved_results if f])
420
+
421
+
422
+ def merge_consecutive_detections(results: dict[str, list], max_consecutive: int | None = None):
423
+ """Merges consecutive detections of the same species.
424
+ Uses the mean of the top-3 highest scoring predictions as
425
+ confidence score for the merged detection.
426
+
427
+ Args:
428
+ results: The dictionary with {segment: scores}.
429
+ max_consecutive: The maximum number of consecutive detections to merge.
430
+ If None, merge all consecutive detections.
431
+
432
+ Returns:
433
+ The dictionary with merged detections.
434
+ """
435
+
436
+ # If max_consecutive is 0 or 1, return original results
437
+ if max_consecutive is not None and max_consecutive <= 1:
438
+ return results
439
+
440
+ # For each species, make list of timestamps and scores
441
+ species = {}
442
+ for timestamp, scores in results.items():
443
+ for label, score in scores:
444
+ if label not in species:
445
+ species[label] = []
446
+ species[label].append((timestamp, score))
447
+
448
+ # Sort timestamps by start time for each species
449
+ for label, timestamps in species.items():
450
+ species[label] = sorted(timestamps, key=lambda t: float(t[0].split("-", 1)[0]))
451
+
452
+ # Merge consecutive detections
453
+ merged_results = {}
454
+ for label in species:
455
+ timestamps = species[label]
456
+
457
+ # Check if end time of current detection is within the start time of the next detection
458
+ i = 0
459
+ while i < len(timestamps) - 1:
460
+ start, end = timestamps[i][0].split("-", 1)
461
+ next_start, next_end = timestamps[i + 1][0].split("-", 1)
462
+
463
+ if float(end) >= float(next_start):
464
+ # Merge detections
465
+ merged_scores = [timestamps[i][1], timestamps[i + 1][1]]
466
+ timestamps.pop(i)
467
+
468
+ while i < len(timestamps) - 1 and float(next_end) >= float(timestamps[i + 1][0].split("-", 1)[0]):
469
+ if max_consecutive and len(merged_scores) >= max_consecutive:
470
+ break
471
+ merged_scores.append(timestamps[i + 1][1])
472
+ next_end = timestamps[i + 1][0].split("-", 1)[1]
473
+ timestamps.pop(i + 1)
474
+
475
+ # Calculate mean of top 3 scores
476
+ top_3_scores = sorted(merged_scores, reverse=True)[:3]
477
+ merged_score = sum(top_3_scores) / len(top_3_scores)
478
+
479
+ timestamps[i] = (f"{start}-{next_end}", merged_score)
480
+
481
+ i += 1
482
+
483
+ merged_results[label] = timestamps
484
+
485
+ # Restore original format
486
+ results = {}
487
+ for label, timestamps in merged_results.items():
488
+ for timestamp, score in timestamps:
489
+ if timestamp not in results:
490
+ results[timestamp] = []
491
+ results[timestamp].append((label, score))
492
+
493
+ return results
494
+
495
+
496
+ def get_sorted_timestamps(results: dict[str, list]):
497
+ """Sorts the results based on the segments.
498
+
499
+ Args:
500
+ results: The dictionary with {segment: scores}.
501
+
502
+ Returns:
503
+ Returns the sorted list of segments and their scores.
504
+ """
505
+ return sorted(results, key=lambda t: float(t.split("-", 1)[0]))
506
+
507
+
508
+ def get_raw_audio_from_file(fpath: str, offset, duration):
509
+ """Reads an audio file and splits the signal into chunks.
510
+
511
+ Args:
512
+ fpath: Path to the audio file.
513
+
514
+ Returns:
515
+ The signal split into a list of chunks.
516
+ """
517
+ # Open file
518
+ sig, rate = audio.open_audio_file(fpath, cfg.SAMPLE_RATE, offset, duration, cfg.BANDPASS_FMIN, cfg.BANDPASS_FMAX, cfg.AUDIO_SPEED)
519
+
520
+ # Split into raw audio chunks
521
+ return audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
522
+
523
+
524
+ def iterate_audio_chunks(fpath: str, embeddings: bool = False):
525
+ """Iterates over audio chunks from a file.
526
+
527
+ Args:
528
+ fpath: Path to the audio file.
529
+ offset: Offset in seconds to start reading the file.
530
+
531
+ Yields:
532
+ Chunks of audio data.
533
+ """
534
+ fileLengthSeconds = audio.get_audio_file_length(fpath)
535
+ start, end = 0, cfg.SIG_LENGTH * cfg.AUDIO_SPEED
536
+ duration = int(cfg.FILE_SPLITTING_DURATION / cfg.AUDIO_SPEED)
537
+
538
+ while start < fileLengthSeconds and not np.isclose(start, fileLengthSeconds):
539
+ chunks = get_raw_audio_from_file(fpath, start, duration)
540
+ samples = []
541
+ timestamps = []
542
+
543
+ if not chunks:
544
+ break
545
+
546
+ for chunk_index, chunk in enumerate(chunks):
547
+ t_start = start + (chunk_index * (cfg.SIG_LENGTH - cfg.SIG_OVERLAP) * cfg.AUDIO_SPEED)
548
+ end = min(t_start + cfg.SIG_LENGTH * cfg.AUDIO_SPEED, fileLengthSeconds)
549
+
550
+ # Add to batch
551
+ samples.append(chunk)
552
+ timestamps.append([round(t_start, 2), round(end, 2)])
553
+
554
+ # Check if batch is full or last chunk
555
+ if len(samples) < cfg.BATCH_SIZE and chunk_index < len(chunks) - 1:
556
+ continue
557
+
558
+ # Predict
559
+ p = model.embeddings(samples) if embeddings else predict(samples)
560
+
561
+ # Add to results
562
+ for i in range(len(samples)):
563
+ # Get timestamp
564
+ s_start, s_end = timestamps[i]
565
+
566
+ yield s_start, s_end, p[i]
567
+
568
+ # Clear batch
569
+ samples = []
570
+ timestamps = []
571
+
572
+ start += len(chunks) * (cfg.SIG_LENGTH - cfg.SIG_OVERLAP) * cfg.AUDIO_SPEED
573
+
574
+
575
+ def predict(samples):
576
+ """Predicts the classes for the given samples.
577
+
578
+ Args:
579
+ samples: Samples to be predicted.
580
+
581
+ Returns:
582
+ The prediction scores.
583
+ """
584
+ # Prepare sample and pass through model
585
+ data = np.array(samples, dtype="float32")
586
+ prediction = model.predict(data)
587
+
588
+ # Logits or sigmoid activations?
589
+ if cfg.APPLY_SIGMOID:
590
+ prediction = model.flat_sigmoid(np.array(prediction), sensitivity=-1, bias=cfg.SIGMOID_SENSITIVITY)
591
+
592
+ return prediction
593
+
594
+
595
+ def get_result_file_names(fpath: str):
596
+ """
597
+ Generates a dictionary of result file names based on the input file path and configured result types.
598
+
599
+ Args:
600
+ fpath (str): The file path of the input file.
601
+
602
+ Returns:
603
+ dict: A dictionary where the keys are result types (e.g., "table", "audacity", "r", "kaleidoscope", "csv")
604
+ and the values are the corresponding output file paths.
605
+ """
606
+ result_names = {}
607
+
608
+ rpath = fpath.replace(cfg.INPUT_PATH, "")
609
+
610
+ rpath = (rpath[1:] if rpath[0] in ["/", "\\"] else rpath) if rpath else os.path.basename(fpath)
611
+
612
+ file_shorthand = rpath.rsplit(".", 1)[0]
613
+
614
+ if "table" in cfg.RESULT_TYPES:
615
+ result_names["table"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.selection.table.txt")
616
+ if "audacity" in cfg.RESULT_TYPES:
617
+ result_names["audacity"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.txt")
618
+ # if "r" in cfg.RESULT_TYPES:
619
+ # result_names["r"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.r.csv")
620
+ if "kaleidoscope" in cfg.RESULT_TYPES:
621
+ result_names["kaleidoscope"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.kaleidoscope.csv")
622
+ if "csv" in cfg.RESULT_TYPES:
623
+ result_names["csv"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.csv")
624
+
625
+ return result_names
626
+
627
+
628
+ def analyze_file(item) -> dict[str, str] | None:
629
+ """
630
+ Analyzes an audio file and generates prediction results.
631
+
632
+ Args:
633
+ item (tuple): A tuple containing the file path (str) and configuration settings.
634
+
635
+ Returns:
636
+ dict or None: A dictionary of result file names if analysis is successful,
637
+ None if the file is skipped or an error occurs.
638
+ Raises:
639
+ Exception: If there is an error in reading the audio file or saving the results.
640
+ """
641
+ # Get file path and restore cfg
642
+ fpath: str = item[0]
643
+ cfg.set_config(item[1])
644
+
645
+ result_file_names = get_result_file_names(fpath)
646
+
647
+ if cfg.SKIP_EXISTING_RESULTS and all(os.path.exists(f) for f in result_file_names.values()):
648
+ print(f"Skipping {fpath} as it has already been analyzed", flush=True)
649
+ return None # or return path to combine later? TODO
650
+
651
+ # Start time
652
+ start_time = datetime.datetime.now()
653
+ results = {}
654
+
655
+ # Status
656
+ print(f"Analyzing {fpath}", flush=True)
657
+
658
+ # Process each chunk
659
+ try:
660
+ for s_start, s_end, pred in iterate_audio_chunks(fpath):
661
+ if not cfg.LABELS:
662
+ cfg.LABELS = [f"Species-{i}_Species-{i}" for i in range(len(pred))]
663
+
664
+ # Assign scores to labels
665
+ p_labels = [
666
+ p for p in zip(cfg.LABELS, pred, strict=True) if (cfg.TOP_N or p[1] >= cfg.MIN_CONFIDENCE) and (not cfg.SPECIES_LIST or p[0] in cfg.SPECIES_LIST)
667
+ ]
668
+
669
+ # Sort by score
670
+ p_sorted = sorted(p_labels, key=operator.itemgetter(1), reverse=True)
671
+
672
+ if cfg.TOP_N:
673
+ p_sorted = p_sorted[: cfg.TOP_N]
674
+
675
+ # TODO: hier schon top n oder min conf raussortieren
676
+ # Store top 5 results and advance indices
677
+ results[str(s_start) + "-" + str(s_end)] = p_sorted
678
+
679
+ except Exception as ex:
680
+ # Write error log
681
+ print(f"Error: Cannot analyze audio file {fpath}.\n", flush=True)
682
+ utils.write_error_log(ex)
683
+
684
+ return None
685
+
686
+ # Save as selection table
687
+ try:
688
+ save_result_files(results, result_file_names, fpath)
689
+
690
+ except Exception as ex:
691
+ # Write error log
692
+ print(f"Error: Cannot save result for {fpath}.\n", flush=True)
693
+ utils.write_error_log(ex)
694
+
695
+ return None
696
+
697
+ delta_time = (datetime.datetime.now() - start_time).total_seconds()
698
+ print(f"Finished {fpath} in {delta_time:.2f} seconds", flush=True)
699
+
700
+ return result_file_names