birdnet-analyzer 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. birdnet_analyzer/__init__.py +9 -8
  2. birdnet_analyzer/analyze/__init__.py +5 -5
  3. birdnet_analyzer/analyze/__main__.py +3 -4
  4. birdnet_analyzer/analyze/cli.py +25 -25
  5. birdnet_analyzer/analyze/core.py +241 -245
  6. birdnet_analyzer/analyze/utils.py +692 -701
  7. birdnet_analyzer/audio.py +368 -372
  8. birdnet_analyzer/cli.py +709 -707
  9. birdnet_analyzer/config.py +242 -242
  10. birdnet_analyzer/eBird_taxonomy_codes_2021E.json +25279 -25279
  11. birdnet_analyzer/embeddings/__init__.py +3 -4
  12. birdnet_analyzer/embeddings/__main__.py +3 -3
  13. birdnet_analyzer/embeddings/cli.py +12 -13
  14. birdnet_analyzer/embeddings/core.py +69 -70
  15. birdnet_analyzer/embeddings/utils.py +179 -193
  16. birdnet_analyzer/evaluation/__init__.py +196 -195
  17. birdnet_analyzer/evaluation/__main__.py +3 -3
  18. birdnet_analyzer/evaluation/assessment/__init__.py +0 -0
  19. birdnet_analyzer/evaluation/assessment/metrics.py +388 -0
  20. birdnet_analyzer/evaluation/assessment/performance_assessor.py +409 -0
  21. birdnet_analyzer/evaluation/assessment/plotting.py +379 -0
  22. birdnet_analyzer/evaluation/preprocessing/__init__.py +0 -0
  23. birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -0
  24. birdnet_analyzer/evaluation/preprocessing/utils.py +98 -0
  25. birdnet_analyzer/gui/__init__.py +19 -23
  26. birdnet_analyzer/gui/__main__.py +3 -3
  27. birdnet_analyzer/gui/analysis.py +175 -174
  28. birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
  29. birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
  30. birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
  31. birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
  32. birdnet_analyzer/gui/assets/gui.css +28 -28
  33. birdnet_analyzer/gui/assets/gui.js +93 -93
  34. birdnet_analyzer/gui/embeddings.py +619 -620
  35. birdnet_analyzer/gui/evaluation.py +795 -813
  36. birdnet_analyzer/gui/localization.py +75 -68
  37. birdnet_analyzer/gui/multi_file.py +245 -246
  38. birdnet_analyzer/gui/review.py +519 -527
  39. birdnet_analyzer/gui/segments.py +191 -191
  40. birdnet_analyzer/gui/settings.py +128 -129
  41. birdnet_analyzer/gui/single_file.py +267 -269
  42. birdnet_analyzer/gui/species.py +95 -95
  43. birdnet_analyzer/gui/train.py +696 -698
  44. birdnet_analyzer/gui/utils.py +810 -808
  45. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
  46. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
  47. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
  48. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
  49. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
  50. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
  51. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
  52. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
  53. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
  54. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
  55. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
  56. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
  57. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
  58. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
  59. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
  60. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
  61. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
  62. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
  63. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
  64. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
  65. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
  66. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
  67. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
  68. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
  69. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
  70. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
  71. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
  72. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
  73. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
  74. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
  75. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
  76. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
  77. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
  78. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
  79. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
  80. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
  81. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
  82. birdnet_analyzer/lang/de.json +334 -334
  83. birdnet_analyzer/lang/en.json +334 -334
  84. birdnet_analyzer/lang/fi.json +334 -334
  85. birdnet_analyzer/lang/fr.json +334 -334
  86. birdnet_analyzer/lang/id.json +334 -334
  87. birdnet_analyzer/lang/pt-br.json +334 -334
  88. birdnet_analyzer/lang/ru.json +334 -334
  89. birdnet_analyzer/lang/se.json +334 -334
  90. birdnet_analyzer/lang/tlh.json +334 -334
  91. birdnet_analyzer/lang/zh_TW.json +334 -334
  92. birdnet_analyzer/model.py +1212 -1243
  93. birdnet_analyzer/playground.py +5 -0
  94. birdnet_analyzer/search/__init__.py +3 -3
  95. birdnet_analyzer/search/__main__.py +3 -3
  96. birdnet_analyzer/search/cli.py +11 -12
  97. birdnet_analyzer/search/core.py +78 -78
  98. birdnet_analyzer/search/utils.py +107 -111
  99. birdnet_analyzer/segments/__init__.py +3 -3
  100. birdnet_analyzer/segments/__main__.py +3 -3
  101. birdnet_analyzer/segments/cli.py +13 -14
  102. birdnet_analyzer/segments/core.py +81 -78
  103. birdnet_analyzer/segments/utils.py +383 -394
  104. birdnet_analyzer/species/__init__.py +3 -3
  105. birdnet_analyzer/species/__main__.py +3 -3
  106. birdnet_analyzer/species/cli.py +13 -14
  107. birdnet_analyzer/species/core.py +35 -35
  108. birdnet_analyzer/species/utils.py +74 -75
  109. birdnet_analyzer/train/__init__.py +3 -3
  110. birdnet_analyzer/train/__main__.py +3 -3
  111. birdnet_analyzer/train/cli.py +13 -14
  112. birdnet_analyzer/train/core.py +113 -113
  113. birdnet_analyzer/train/utils.py +877 -847
  114. birdnet_analyzer/translate.py +133 -104
  115. birdnet_analyzer/utils.py +426 -419
  116. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/METADATA +137 -129
  117. birdnet_analyzer-2.0.1.dist-info/RECORD +125 -0
  118. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/WHEEL +1 -1
  119. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/licenses/LICENSE +18 -18
  120. birdnet_analyzer-2.0.0.dist-info/RECORD +0 -117
  121. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/entry_points.txt +0 -0
  122. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,394 +1,383 @@
1
- """Extract segments from audio files based on BirdNET detections.
2
-
3
- Can be used to save the segments of the audio files for each detection.
4
- """
5
-
6
- import os
7
-
8
- import numpy as np
9
-
10
- import birdnet_analyzer.audio as audio
11
- import birdnet_analyzer.config as cfg
12
- import birdnet_analyzer.utils as utils
13
-
14
- # Set numpy random seed
15
- np.random.seed(cfg.RANDOM_SEED)
16
- SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
17
-
18
-
19
- def detect_rtype(line: str):
20
- """Detects the type of result file.
21
-
22
- Args:
23
- line: First line of text.
24
-
25
- Returns:
26
- Either "table", "kaleidoscope", "csv" or "audacity".
27
- """
28
- if line.lower().startswith("selection"):
29
- return "table"
30
- # elif line.lower().startswith("filepath"):
31
- # return "r"
32
- elif line.lower().startswith("indir"):
33
- return "kaleidoscope"
34
- elif line.lower().startswith("start (s)"):
35
- return "csv"
36
- else:
37
- return "audacity"
38
-
39
-
40
- def get_header_mapping(line: str) -> dict:
41
- """
42
- Parses a header line and returns a mapping of column names to their indices.
43
-
44
- Args:
45
- line (str): A string representing the header line of a file.
46
-
47
- Returns:
48
- dict: A dictionary where the keys are column names and the values are their respective indices.
49
- """
50
- rtype = detect_rtype(line)
51
-
52
- if rtype == "table" or rtype == "audacity":
53
- sep = "\t"
54
- else:
55
- sep = ","
56
-
57
- cols = line.split(sep)
58
-
59
- mapping = {}
60
-
61
- for i, col in enumerate(cols):
62
- mapping[col] = i
63
-
64
- return mapping
65
-
66
-
67
- def parse_folders(apath: str, rpath: str, allowed_result_filetypes: list[str] = ["txt", "csv"]) -> list[dict]:
68
- """Read audio and result files.
69
-
70
- Reads all audio files and BirdNET output inside directory recursively.
71
-
72
- Args:
73
- apath (str): Path to search for audio files.
74
- rpath (str): Path to search for result files.
75
- allowed_result_filetypes (list[str]): List of extensions for the result files.
76
-
77
- Returns:
78
- list[dict]: A list of {"audio": path_to_audio, "result": path_to_result }.
79
- """
80
- data = {}
81
- apath = apath.replace("/", os.sep).replace("\\", os.sep)
82
- rpath = rpath.replace("/", os.sep).replace("\\", os.sep)
83
-
84
- # Check if combined selection table is present and read that.
85
- if os.path.exists(os.path.join(rpath, cfg.OUTPUT_RAVEN_FILENAME)):
86
- # Read combined Raven selection table
87
- rfile = os.path.join(rpath, cfg.OUTPUT_RAVEN_FILENAME)
88
- data["combined"] = {"isCombinedFile": True, "result": rfile}
89
- elif os.path.exists(os.path.join(rpath, cfg.OUTPUT_CSV_FILENAME)):
90
- rfile = os.path.join(rpath, cfg.OUTPUT_CSV_FILENAME)
91
- data["combined"] = {"isCombinedFile": True, "result": rfile}
92
- elif os.path.exists(os.path.join(rpath, cfg.OUTPUT_KALEIDOSCOPE_FILENAME)):
93
- rfile = os.path.join(rpath, cfg.OUTPUT_KALEIDOSCOPE_FILENAME)
94
- data["combined"] = {"isCombinedFile": True, "result": rfile}
95
- else:
96
- # Get all audio files
97
- for root, _, files in os.walk(apath):
98
- for f in files:
99
- if f.rsplit(".", 1)[-1].lower() in cfg.ALLOWED_FILETYPES and not f.startswith("."):
100
- table_key = os.path.join(root.strip(apath), f.rsplit(".", 1)[0])
101
- data[table_key] = {"audio": os.path.join(root, f), "result": ""}
102
-
103
- # Get all result files
104
- for root, _, files in os.walk(rpath):
105
- for f in files:
106
- if f.rsplit(".", 1)[-1] in allowed_result_filetypes and ".BirdNET." in f:
107
- table_key = os.path.join(root.strip(rpath), f.split(".BirdNET.", 1)[0])
108
- if table_key in data:
109
- data[table_key]["result"] = os.path.join(root, f)
110
-
111
- # Convert to list
112
- flist = [f for f in data.values() if f["result"]]
113
-
114
- print(f"Found {len(flist)} audio files with valid result file.")
115
-
116
- return flist
117
-
118
-
119
- def parse_files(flist: list[dict], max_segments=100):
120
- """
121
- Parses a list of files to extract and organize bird call segments by species.
122
-
123
- Args:
124
- flist (list[dict]): A list of dictionaries, each containing 'audio' and 'result' file paths.
125
- Optionally, a dictionary can have 'isCombinedFile' set to True to indicate
126
- that it is a combined result file.
127
- max_segments (int, optional): The maximum number of segments to retain per species. Defaults to 100.
128
- Returns:
129
- list[tuple]: A list of tuples where each tuple contains an audio file path and a list of segments
130
- associated with that audio file.
131
- Raises:
132
- KeyError: If the dictionaries in flist do not contain the required keys ('audio' and 'result').
133
- Example:
134
- flist = [
135
- {"audio": "path/to/audio1.wav", "result": "path/to/result1.csv"},
136
- {"audio": "path/to/audio2.wav", "result": "path/to/result2.csv"}
137
- ]
138
- segments = parseFiles(flist, max_segments=50)
139
- """
140
- species_segments: dict[str, list] = {}
141
-
142
- is_combined_rfile = len(flist) == 1 and flist[0].get("isCombinedFile", False)
143
-
144
- if is_combined_rfile:
145
- rfile = flist[0]["result"]
146
- segments = find_segments_from_combined(rfile)
147
-
148
- # Parse segments by species
149
- for s in segments:
150
- if s["species"] not in species_segments:
151
- species_segments[s["species"]] = []
152
-
153
- species_segments[s["species"]].append(s)
154
- else:
155
- for f in flist:
156
- # Paths
157
- afile = f["audio"]
158
- rfile = f["result"]
159
-
160
- # Get all segments for result file
161
- segments = find_segments(afile, rfile)
162
-
163
- # Parse segments by species
164
- for s in segments:
165
- if s["species"] not in species_segments:
166
- species_segments[s["species"]] = []
167
-
168
- species_segments[s["species"]].append(s)
169
-
170
- # Shuffle segments for each species and limit to max_segments
171
- for s in species_segments:
172
- np.random.shuffle(species_segments[s])
173
- species_segments[s] = species_segments[s][:max_segments]
174
-
175
- # Make dict of segments per audio file
176
- segments: dict[str, list] = {}
177
- seg_cnt = 0
178
-
179
- for s in species_segments:
180
- for seg in species_segments[s]:
181
- if seg["audio"] not in segments:
182
- segments[seg["audio"]] = []
183
-
184
- segments[seg["audio"]].append(seg)
185
- seg_cnt += 1
186
-
187
- print(f"Found {seg_cnt} segments in {len(segments)} audio files.")
188
-
189
- # Convert to list
190
- flist = [tuple(e) for e in segments.items()]
191
-
192
- return flist
193
-
194
-
195
- def find_segments_from_combined(rfile: str) -> list[dict]:
196
- """Extracts the segments from a combined results file
197
-
198
- Args:
199
- rfile (str): Path to the result file.
200
-
201
- Returns:
202
- list[dict]: A list of dicts in the form of
203
- {"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence}
204
- """
205
- segments: list[dict] = []
206
-
207
- # Open and parse result file
208
- lines = utils.read_lines(rfile)
209
-
210
- # Auto-detect result type
211
- rtype = detect_rtype(lines[0])
212
-
213
- if rtype == "audacity":
214
- raise Exception("Audacity files are not supported for combined results.")
215
-
216
- # Get mapping from the header column
217
- header_mapping = get_header_mapping(lines[0])
218
-
219
- # Get start and end times based on rtype
220
- confidence = 0
221
- start = end = 0.0
222
- species = ""
223
- afile = ""
224
-
225
- for i, line in enumerate(lines):
226
- if rtype == "table" and i > 0:
227
- d = line.split("\t")
228
- file_offset = float(d[header_mapping["File Offset (s)"]])
229
- start = file_offset
230
- end = file_offset + (float(d[header_mapping["End Time (s)"]]) - float(d[header_mapping["Begin Time (s)"]]))
231
- species = d[header_mapping["Common Name"]]
232
- confidence = float(d[header_mapping["Confidence"]])
233
- afile = d[header_mapping["Begin Path"]].replace("/", os.sep).replace("\\", os.sep)
234
-
235
- elif rtype == "kaleidoscope" and i > 0:
236
- d = line.split(",")
237
- start = float(d[header_mapping["OFFSET"]])
238
- end = float(d[header_mapping["DURATION"]]) + start
239
- species = d[header_mapping["scientific_name"]]
240
- confidence = float(d[header_mapping["confidence"]])
241
- in_dir = d[header_mapping["INDIR"]]
242
- folder = d[header_mapping["FOLDER"]]
243
- in_file = d[header_mapping["IN FILE"]]
244
- afile = os.path.join(in_dir, folder, in_file).replace("/", os.sep).replace("\\", os.sep)
245
-
246
- elif rtype == "csv" and i > 0:
247
- d = line.split(",")
248
- start = float(d[header_mapping["Start (s)"]])
249
- end = float(d[header_mapping["End (s)"]])
250
- species = d[header_mapping["Common name"]]
251
- confidence = float(d[header_mapping["Confidence"]])
252
- afile = d[header_mapping["File"]].replace("/", os.sep).replace("\\", os.sep)
253
-
254
- # Check if confidence is high enough and label is not "nocall"
255
- if confidence >= cfg.MIN_CONFIDENCE and species.lower() != "nocall" and afile:
256
- segments.append({"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence})
257
-
258
- return segments
259
-
260
-
261
- def find_segments(afile: str, rfile: str):
262
- """Extracts the segments for an audio file from the results file
263
-
264
- Args:
265
- afile: Path to the audio file.
266
- rfile: Path to the result file.
267
-
268
- Returns:
269
- A list of dicts in the form of
270
- {"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence}
271
- """
272
- segments: list[dict] = []
273
-
274
- # Open and parse result file
275
- lines = utils.read_lines(rfile)
276
-
277
- # Auto-detect result type
278
- rtype = detect_rtype(lines[0])
279
-
280
- # Get mapping from the header column
281
- header_mapping = get_header_mapping(lines[0])
282
-
283
- # Get start and end times based on rtype
284
- confidence = 0
285
- start = end = 0.0
286
- species = ""
287
-
288
- for i, line in enumerate(lines):
289
- if rtype == "table" and i > 0:
290
- d = line.split("\t")
291
- start = float(d[header_mapping["Begin Time (s)"]])
292
- end = float(d[header_mapping["End Time (s)"]])
293
- species = d[header_mapping["Common Name"]]
294
- confidence = float(d[header_mapping["Confidence"]])
295
-
296
- elif rtype == "audacity":
297
- d = line.split("\t")
298
- start = float(d[0])
299
- end = float(d[1])
300
- species = d[2].split(", ")[1]
301
- confidence = float(d[-1])
302
-
303
- elif rtype == "kaleidoscope" and i > 0:
304
- d = line.split(",")
305
- start = float(d[header_mapping["OFFSET"]])
306
- end = float(d[header_mapping["DURATION"]]) + start
307
- species = d[header_mapping["scientific_name"]]
308
- confidence = float(d[header_mapping["confidence"]])
309
-
310
- elif rtype == "csv" and i > 0:
311
- d = line.split(",")
312
- start = float(d[header_mapping["Start (s)"]])
313
- end = float(d[header_mapping["End (s)"]])
314
- species = d[header_mapping["Common name"]]
315
- confidence = float(d[header_mapping["Confidence"]])
316
-
317
- # Check if confidence is high enough and label is not "nocall"
318
- if confidence >= cfg.MIN_CONFIDENCE and species.lower() != "nocall":
319
- segments.append({"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence})
320
-
321
- return segments
322
-
323
-
324
- def extract_segments(item: tuple[tuple[str, list[dict]], float, dict[str]]):
325
- """
326
- Extracts audio segments from a given audio file based on provided segment information.
327
- Args:
328
- item (tuple): A tuple containing:
329
- - A tuple with:
330
- - A string representing the path to the audio file.
331
- - A list of dictionaries, each containing segment information with keys "start", "end", "species", "confidence", and "audio".
332
- - A float representing the segment length.
333
- - A dictionary containing configuration settings.
334
- Returns:
335
- bool: True if segments were successfully extracted, False otherwise.
336
- Raises:
337
- Exception: If there is an error opening the audio file or extracting segments.
338
- """
339
- # Paths and config
340
- afile = item[0][0]
341
- segments = item[0][1]
342
- seg_length = item[1]
343
- cfg.set_config(item[2])
344
-
345
- # Status
346
- print(f"Extracting segments from {afile}")
347
-
348
- try:
349
- # Open audio file
350
- sig, rate = audio.open_audio_file(afile, cfg.SAMPLE_RATE, speed=cfg.AUDIO_SPEED)
351
- except Exception as ex:
352
- print(f"Error: Cannot open audio file {afile}", flush=True)
353
- utils.write_error_log(ex)
354
-
355
- return
356
-
357
- # Extract segments
358
- for seg_cnt, seg in enumerate(segments, 1):
359
- try:
360
- # Get start and end times
361
- start = int((seg["start"] * rate) / cfg.AUDIO_SPEED)
362
- end = int((seg["end"] * rate) / cfg.AUDIO_SPEED)
363
-
364
- offset = max(0, ((seg_length * rate) - (end - start)) // 2)
365
- start = max(0, start - offset)
366
- end = min(len(sig), end + offset)
367
-
368
- # Make sure segment is long enough
369
- if end > start:
370
- # Get segment raw audio from signal
371
- seg_sig = sig[int(start) : int(end)]
372
-
373
- # Make output path
374
- outpath = os.path.join(cfg.OUTPUT_PATH, seg["species"])
375
- os.makedirs(outpath, exist_ok=True)
376
-
377
- # Save segment
378
- seg_name = "{:.3f}_{}_{}_{:.1f}s_{:.1f}s.wav".format(
379
- seg["confidence"],
380
- seg_cnt,
381
- seg["audio"].rsplit(os.sep, 1)[-1].rsplit(".", 1)[0],
382
- seg["start"],
383
- seg["end"],
384
- )
385
- seg_path = os.path.join(outpath, seg_name)
386
- audio.save_signal(seg_sig, seg_path, rate)
387
-
388
- except Exception as ex:
389
- # Write error log
390
- print(f"Error: Cannot extract segments from {afile}.", flush=True)
391
- utils.write_error_log(ex)
392
- return False
393
-
394
- return True
1
+ """Extract segments from audio files based on BirdNET detections.
2
+
3
+ Can be used to save the segments of the audio files for each detection.
4
+ """
5
+
6
+ import os
7
+
8
+ import numpy as np
9
+
10
+ import birdnet_analyzer.config as cfg
11
+ from birdnet_analyzer import audio, utils
12
+
13
+ # Set numpy random seed
14
+ RNG = np.random.default_rng(cfg.RANDOM_SEED)
15
+ SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
16
+
17
+
18
+ def detect_rtype(line: str):
19
+ """Detects the type of result file.
20
+
21
+ Args:
22
+ line: First line of text.
23
+
24
+ Returns:
25
+ Either "table", "kaleidoscope", "csv" or "audacity".
26
+ """
27
+ if line.lower().startswith("selection"):
28
+ return "table"
29
+
30
+ if line.lower().startswith("indir"):
31
+ return "kaleidoscope"
32
+
33
+ if line.lower().startswith("start (s)"):
34
+ return "csv"
35
+
36
+ return "audacity"
37
+
38
+
39
+ def get_header_mapping(line: str) -> dict:
40
+ """
41
+ Parses a header line and returns a mapping of column names to their indices.
42
+
43
+ Args:
44
+ line (str): A string representing the header line of a file.
45
+
46
+ Returns:
47
+ dict: A dictionary where the keys are column names and the values are their respective indices.
48
+ """
49
+ rtype = detect_rtype(line)
50
+
51
+ sep = "\t" if rtype in ("table", "audacity") else ","
52
+
53
+ cols = line.split(sep)
54
+
55
+ return {col: i for i, col in enumerate(cols)}
56
+
57
+
58
+ def parse_folders(apath: str, rpath: str, allowed_result_filetypes: tuple[str] = ("txt", "csv")) -> list[dict]:
59
+ """Read audio and result files.
60
+
61
+ Reads all audio files and BirdNET output inside directory recursively.
62
+
63
+ Args:
64
+ apath (str): Path to search for audio files.
65
+ rpath (str): Path to search for result files.
66
+ allowed_result_filetypes (tuple[str]): List of extensions for the result files.
67
+
68
+ Returns:
69
+ list[dict]: A list of {"audio": path_to_audio, "result": path_to_result }.
70
+ """
71
+ data = {}
72
+ apath = apath.replace("/", os.sep).replace("\\", os.sep)
73
+ rpath = rpath.replace("/", os.sep).replace("\\", os.sep)
74
+
75
+ # Check if combined selection table is present and read that.
76
+ if os.path.exists(os.path.join(rpath, cfg.OUTPUT_RAVEN_FILENAME)):
77
+ # Read combined Raven selection table
78
+ rfile = os.path.join(rpath, cfg.OUTPUT_RAVEN_FILENAME)
79
+ data["combined"] = {"isCombinedFile": True, "result": rfile}
80
+ elif os.path.exists(os.path.join(rpath, cfg.OUTPUT_CSV_FILENAME)):
81
+ rfile = os.path.join(rpath, cfg.OUTPUT_CSV_FILENAME)
82
+ data["combined"] = {"isCombinedFile": True, "result": rfile}
83
+ elif os.path.exists(os.path.join(rpath, cfg.OUTPUT_KALEIDOSCOPE_FILENAME)):
84
+ rfile = os.path.join(rpath, cfg.OUTPUT_KALEIDOSCOPE_FILENAME)
85
+ data["combined"] = {"isCombinedFile": True, "result": rfile}
86
+ else:
87
+ # Get all audio files
88
+ for root, _, files in os.walk(apath):
89
+ for f in files:
90
+ if f.rsplit(".", 1)[-1].lower() in cfg.ALLOWED_FILETYPES and not f.startswith("."):
91
+ table_key = os.path.join(root.strip(apath), f.rsplit(".", 1)[0])
92
+ data[table_key] = {"audio": os.path.join(root, f), "result": ""}
93
+
94
+ # Get all result files
95
+ for root, _, files in os.walk(rpath):
96
+ for f in files:
97
+ if f.rsplit(".", 1)[-1] in allowed_result_filetypes and ".BirdNET." in f:
98
+ table_key = os.path.join(root.strip(rpath), f.split(".BirdNET.", 1)[0])
99
+ if table_key in data:
100
+ data[table_key]["result"] = os.path.join(root, f)
101
+
102
+ # Convert to list
103
+ flist = [f for f in data.values() if f["result"]]
104
+
105
+ print(f"Found {len(flist)} audio files with valid result file.")
106
+
107
+ return flist
108
+
109
+
110
+ def parse_files(flist: list[dict], max_segments=100):
111
+ """
112
+ Parses a list of files to extract and organize bird call segments by species.
113
+
114
+ Args:
115
+ flist (list[dict]): A list of dictionaries, each containing 'audio' and 'result' file paths.
116
+ Optionally, a dictionary can have 'isCombinedFile' set to True to indicate
117
+ that it is a combined result file.
118
+ max_segments (int, optional): The maximum number of segments to retain per species. Defaults to 100.
119
+ Returns:
120
+ list[tuple]: A list of tuples where each tuple contains an audio file path and a list of segments
121
+ associated with that audio file.
122
+ Raises:
123
+ KeyError: If the dictionaries in flist do not contain the required keys ('audio' and 'result').
124
+ Example:
125
+ flist = [
126
+ {"audio": "path/to/audio1.wav", "result": "path/to/result1.csv"},
127
+ {"audio": "path/to/audio2.wav", "result": "path/to/result2.csv"}
128
+ ]
129
+ segments = parseFiles(flist, max_segments=50)
130
+ """
131
+ species_segments: dict[str, list] = {}
132
+
133
+ is_combined_rfile = len(flist) == 1 and flist[0].get("isCombinedFile", False)
134
+
135
+ if is_combined_rfile:
136
+ rfile = flist[0]["result"]
137
+ segments = find_segments_from_combined(rfile)
138
+
139
+ # Parse segments by species
140
+ for s in segments:
141
+ if s["species"] not in species_segments:
142
+ species_segments[s["species"]] = []
143
+
144
+ species_segments[s["species"]].append(s)
145
+ else:
146
+ for f in flist:
147
+ # Paths
148
+ afile = f["audio"]
149
+ rfile = f["result"]
150
+
151
+ # Get all segments for result file
152
+ segments = find_segments(afile, rfile)
153
+
154
+ # Parse segments by species
155
+ for s in segments:
156
+ if s["species"] not in species_segments:
157
+ species_segments[s["species"]] = []
158
+
159
+ species_segments[s["species"]].append(s)
160
+
161
+ # Shuffle segments for each species and limit to max_segments
162
+ for s in species_segments:
163
+ RNG.shuffle(species_segments[s])
164
+ species_segments[s] = species_segments[s][:max_segments]
165
+
166
+ # Make dict of segments per audio file
167
+ segments: dict[str, list] = {}
168
+ seg_cnt = 0
169
+
170
+ for s in species_segments:
171
+ for seg in species_segments[s]:
172
+ if seg["audio"] not in segments:
173
+ segments[seg["audio"]] = []
174
+
175
+ segments[seg["audio"]].append(seg)
176
+ seg_cnt += 1
177
+
178
+ print(f"Found {seg_cnt} segments in {len(segments)} audio files.")
179
+
180
+ # Convert to list
181
+ return [tuple(e) for e in segments.items()]
182
+
183
+
184
+ def find_segments_from_combined(rfile: str) -> list[dict]:
185
+ """Extracts the segments from a combined results file
186
+
187
+ Args:
188
+ rfile (str): Path to the result file.
189
+
190
+ Returns:
191
+ list[dict]: A list of dicts in the form of
192
+ {"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence}
193
+ """
194
+ segments: list[dict] = []
195
+
196
+ # Open and parse result file
197
+ lines = utils.read_lines(rfile)
198
+
199
+ # Auto-detect result type
200
+ rtype = detect_rtype(lines[0])
201
+
202
+ if rtype == "audacity":
203
+ raise Exception("Audacity files are not supported for combined results.")
204
+
205
+ # Get mapping from the header column
206
+ header_mapping = get_header_mapping(lines[0])
207
+
208
+ # Get start and end times based on rtype
209
+ confidence = 0
210
+ start = end = 0.0
211
+ species = ""
212
+ afile = ""
213
+
214
+ for i, line in enumerate(lines):
215
+ if rtype == "table" and i > 0:
216
+ d = line.split("\t")
217
+ file_offset = float(d[header_mapping["File Offset (s)"]])
218
+ start = file_offset
219
+ end = file_offset + (float(d[header_mapping["End Time (s)"]]) - float(d[header_mapping["Begin Time (s)"]]))
220
+ species = d[header_mapping["Common Name"]]
221
+ confidence = float(d[header_mapping["Confidence"]])
222
+ afile = d[header_mapping["Begin Path"]].replace("/", os.sep).replace("\\", os.sep)
223
+
224
+ elif rtype == "kaleidoscope" and i > 0:
225
+ d = line.split(",")
226
+ start = float(d[header_mapping["OFFSET"]])
227
+ end = float(d[header_mapping["DURATION"]]) + start
228
+ species = d[header_mapping["scientific_name"]]
229
+ confidence = float(d[header_mapping["confidence"]])
230
+ in_dir = d[header_mapping["INDIR"]]
231
+ folder = d[header_mapping["FOLDER"]]
232
+ in_file = d[header_mapping["IN FILE"]]
233
+ afile = os.path.join(in_dir, folder, in_file).replace("/", os.sep).replace("\\", os.sep)
234
+
235
+ elif rtype == "csv" and i > 0:
236
+ d = line.split(",")
237
+ start = float(d[header_mapping["Start (s)"]])
238
+ end = float(d[header_mapping["End (s)"]])
239
+ species = d[header_mapping["Common name"]]
240
+ confidence = float(d[header_mapping["Confidence"]])
241
+ afile = d[header_mapping["File"]].replace("/", os.sep).replace("\\", os.sep)
242
+
243
+ # Check if confidence is high enough and label is not "nocall"
244
+ if confidence >= cfg.MIN_CONFIDENCE and species.lower() != "nocall" and afile:
245
+ segments.append({"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence})
246
+
247
+ return segments
248
+
249
+
250
+ def find_segments(afile: str, rfile: str):
251
+ """Extracts the segments for an audio file from the results file
252
+
253
+ Args:
254
+ afile: Path to the audio file.
255
+ rfile: Path to the result file.
256
+
257
+ Returns:
258
+ A list of dicts in the form of
259
+ {"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence}
260
+ """
261
+ segments: list[dict] = []
262
+
263
+ # Open and parse result file
264
+ lines = utils.read_lines(rfile)
265
+
266
+ # Auto-detect result type
267
+ rtype = detect_rtype(lines[0])
268
+
269
+ # Get mapping from the header column
270
+ header_mapping = get_header_mapping(lines[0])
271
+
272
+ # Get start and end times based on rtype
273
+ confidence = 0
274
+ start = end = 0.0
275
+ species = ""
276
+
277
+ for i, line in enumerate(lines):
278
+ if rtype == "table" and i > 0:
279
+ d = line.split("\t")
280
+ start = float(d[header_mapping["Begin Time (s)"]])
281
+ end = float(d[header_mapping["End Time (s)"]])
282
+ species = d[header_mapping["Common Name"]]
283
+ confidence = float(d[header_mapping["Confidence"]])
284
+
285
+ elif rtype == "audacity":
286
+ d = line.split("\t")
287
+ start = float(d[0])
288
+ end = float(d[1])
289
+ species = d[2].split(", ")[1]
290
+ confidence = float(d[-1])
291
+
292
+ elif rtype == "kaleidoscope" and i > 0:
293
+ d = line.split(",")
294
+ start = float(d[header_mapping["OFFSET"]])
295
+ end = float(d[header_mapping["DURATION"]]) + start
296
+ species = d[header_mapping["scientific_name"]]
297
+ confidence = float(d[header_mapping["confidence"]])
298
+
299
+ elif rtype == "csv" and i > 0:
300
+ d = line.split(",")
301
+ start = float(d[header_mapping["Start (s)"]])
302
+ end = float(d[header_mapping["End (s)"]])
303
+ species = d[header_mapping["Common name"]]
304
+ confidence = float(d[header_mapping["Confidence"]])
305
+
306
+ # Check if confidence is high enough and label is not "nocall"
307
+ if confidence >= cfg.MIN_CONFIDENCE and species.lower() != "nocall":
308
+ segments.append({"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence})
309
+
310
+ return segments
311
+
312
+
313
+ def extract_segments(item: tuple[tuple[str, list[dict]], float, dict[str]]):
314
+ """
315
+ Extracts audio segments from a given audio file based on provided segment information.
316
+ Args:
317
+ item (tuple): A tuple containing:
318
+ - A tuple with:
319
+ - A string representing the path to the audio file.
320
+ - A list of dictionaries, each containing segment information with keys "start", "end", "species", "confidence", and "audio".
321
+ - A float representing the segment length.
322
+ - A dictionary containing configuration settings.
323
+ Returns:
324
+ bool: True if segments were successfully extracted, False otherwise.
325
+ Raises:
326
+ Exception: If there is an error opening the audio file or extracting segments.
327
+ """
328
+ # Paths and config
329
+ afile = item[0][0]
330
+ segments = item[0][1]
331
+ seg_length = item[1]
332
+ cfg.set_config(item[2])
333
+
334
+ # Status
335
+ print(f"Extracting segments from {afile}")
336
+
337
+ try:
338
+ # Open audio file
339
+ sig, rate = audio.open_audio_file(afile, cfg.SAMPLE_RATE, speed=cfg.AUDIO_SPEED)
340
+ except Exception as ex:
341
+ print(f"Error: Cannot open audio file {afile}", flush=True)
342
+ utils.write_error_log(ex)
343
+
344
+ return None
345
+
346
+ # Extract segments
347
+ for seg_cnt, seg in enumerate(segments, 1):
348
+ try:
349
+ # Get start and end times
350
+ start = int((seg["start"] * rate) / cfg.AUDIO_SPEED)
351
+ end = int((seg["end"] * rate) / cfg.AUDIO_SPEED)
352
+
353
+ offset = max(0, ((seg_length * rate) - (end - start)) // 2)
354
+ start = max(0, start - offset)
355
+ end = min(len(sig), end + offset)
356
+
357
+ # Make sure segment is long enough
358
+ if end > start:
359
+ # Get segment raw audio from signal
360
+ seg_sig = sig[int(start) : int(end)]
361
+
362
+ # Make output path
363
+ outpath = os.path.join(cfg.OUTPUT_PATH, seg["species"])
364
+ os.makedirs(outpath, exist_ok=True)
365
+
366
+ # Save segment
367
+ seg_name = "{:.3f}_{}_{}_{:.1f}s_{:.1f}s.wav".format(
368
+ seg["confidence"],
369
+ seg_cnt,
370
+ seg["audio"].rsplit(os.sep, 1)[-1].rsplit(".", 1)[0],
371
+ seg["start"],
372
+ seg["end"],
373
+ )
374
+ seg_path = os.path.join(outpath, seg_name)
375
+ audio.save_signal(seg_sig, seg_path, rate)
376
+
377
+ except Exception as ex:
378
+ # Write error log
379
+ print(f"Error: Cannot extract segments from {afile}.", flush=True)
380
+ utils.write_error_log(ex)
381
+ return False
382
+
383
+ return True