britekit 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of britekit might be problematic. Click here for more details.
- britekit/__about__.py +1 -1
- britekit/cli.py +6 -1
- britekit/commands/__init__.py +2 -1
- britekit/commands/_analyze.py +37 -11
- britekit/commands/_audioset.py +8 -8
- britekit/commands/_calibrate.py +8 -8
- britekit/commands/_ckpt_ops.py +6 -6
- britekit/commands/_db_add.py +12 -12
- britekit/commands/_db_delete.py +15 -15
- britekit/commands/_embed.py +4 -4
- britekit/commands/_ensemble.py +7 -7
- britekit/commands/_extract.py +158 -19
- britekit/commands/_find_dup.py +5 -5
- britekit/commands/_inat.py +4 -4
- britekit/commands/_init.py +1 -1
- britekit/commands/_pickle.py +13 -7
- britekit/commands/_plot.py +26 -26
- britekit/commands/_reextract.py +6 -6
- britekit/commands/_reports.py +22 -22
- britekit/commands/_search.py +12 -12
- britekit/commands/_train.py +6 -6
- britekit/commands/_tune.py +13 -13
- britekit/commands/_wav2mp3.py +2 -2
- britekit/commands/_xeno.py +7 -7
- britekit/commands/_youtube.py +3 -3
- britekit/core/analyzer.py +43 -13
- britekit/core/audio.py +14 -14
- britekit/core/augmentation.py +24 -0
- britekit/core/data_module.py +2 -2
- britekit/core/dataset.py +1 -4
- britekit/core/plot.py +8 -8
- britekit/core/predictor.py +51 -23
- britekit/core/reextractor.py +6 -6
- britekit/core/util.py +44 -8
- britekit/models/base_model.py +0 -1
- britekit/occurrence_db/occurrence_data_provider.py +13 -13
- britekit/testing/per_recording_tester.py +2 -2
- britekit/training_db/extractor.py +65 -30
- britekit/training_db/training_data_provider.py +1 -1
- britekit/training_db/training_db.py +97 -100
- britekit-0.1.5.dist-info/METADATA +299 -0
- {britekit-0.1.3.dist-info → britekit-0.1.5.dist-info}/RECORD +45 -45
- britekit-0.1.3.dist-info/METADATA +0 -290
- {britekit-0.1.3.dist-info → britekit-0.1.5.dist-info}/WHEEL +0 -0
- {britekit-0.1.3.dist-info → britekit-0.1.5.dist-info}/entry_points.txt +0 -0
- {britekit-0.1.3.dist-info → britekit-0.1.5.dist-info}/licenses/LICENSE.txt +0 -0
britekit/core/plot.py
CHANGED
|
@@ -16,14 +16,14 @@ def plot_spec(
|
|
|
16
16
|
Plot and save a spectrogram image.
|
|
17
17
|
|
|
18
18
|
Args:
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
19
|
+
- spec (np.ndarray): Spectrogram of shape (height, width)
|
|
20
|
+
- output_path (str): Path to save the image (e.g., "output.png")
|
|
21
|
+
- show_dims (bool): Whether to show frequency and time scales
|
|
22
|
+
- spec_duration (float, optional): Number of seconds represented.
|
|
23
|
+
- height (int, optional): Output image height in pixels. If not specified,
|
|
24
|
+
the existing square behavior is preserved.
|
|
25
|
+
- width (int, optional): Output image width in pixels. If not specified,
|
|
26
|
+
the existing square behavior is preserved.
|
|
27
27
|
"""
|
|
28
28
|
import matplotlib.pyplot as plt
|
|
29
29
|
import numpy as np
|
britekit/core/predictor.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
# Defer some imports to improve initialization performance.
|
|
2
|
+
from copy import deepcopy
|
|
2
3
|
import importlib.util
|
|
3
4
|
import logging
|
|
4
5
|
import math
|
|
5
6
|
import os
|
|
6
7
|
from typing import Sequence, Optional, List
|
|
7
8
|
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
8
11
|
from britekit.core.config_loader import get_config
|
|
9
12
|
from britekit.core.exceptions import InferenceError
|
|
10
13
|
from britekit.core import util
|
|
@@ -30,10 +33,10 @@ class Predictor:
|
|
|
30
33
|
Initialize the Predictor with a model or ensemble of models.
|
|
31
34
|
|
|
32
35
|
Args:
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
36
|
+
- model_path (str): Path to a checkpoint (.ckpt) or ONNX (.onnx) file,
|
|
37
|
+
or a directory containing multiple checkpoint/ONNX files for an ensemble.
|
|
38
|
+
- device (str, optional): Device to use for inference ('cuda', 'cpu', or 'mps').
|
|
39
|
+
If None, automatically selects the best available device.
|
|
37
40
|
"""
|
|
38
41
|
from britekit.core.audio import Audio
|
|
39
42
|
|
|
@@ -62,12 +65,13 @@ class Predictor:
|
|
|
62
65
|
|
|
63
66
|
self._load_models(model_path)
|
|
64
67
|
|
|
65
|
-
def get_raw_scores(self, recording_path: str):
|
|
68
|
+
def get_raw_scores(self, recording_path: str, start_seconds: float = 0):
|
|
66
69
|
"""
|
|
67
70
|
Get scores in array format from the loaded models for the given recording.
|
|
68
71
|
|
|
69
72
|
Args:
|
|
70
|
-
|
|
73
|
+
- recording_path (str): Path to the audio recording file.
|
|
74
|
+
- start_seconds (float): Where to start processing the recording, in seconds from the start.
|
|
71
75
|
|
|
72
76
|
Returns:
|
|
73
77
|
tuple: A tuple containing:
|
|
@@ -94,7 +98,7 @@ class Predictor:
|
|
|
94
98
|
|
|
95
99
|
increment = max(0.5, self.cfg.audio.spec_duration - self.cfg.infer.overlap)
|
|
96
100
|
end_offset = max(increment, audio_duration - increment)
|
|
97
|
-
start_times = util.get_range(
|
|
101
|
+
start_times = util.get_range(start_seconds, end_offset, increment)
|
|
98
102
|
specs, _ = self.audio.get_spectrograms(start_times)
|
|
99
103
|
if specs is None or len(specs) == 0:
|
|
100
104
|
return None, None, []
|
|
@@ -139,8 +143,8 @@ class Predictor:
|
|
|
139
143
|
Given an array of raw segment-level scores, return dict of labels.
|
|
140
144
|
|
|
141
145
|
Args:
|
|
142
|
-
|
|
143
|
-
|
|
146
|
+
- scores (np.ndarray): Array of scores of shape (num_spectrograms, num_species).
|
|
147
|
+
- start_times (list[float]): Start time in seconds for each spectrogram.
|
|
144
148
|
|
|
145
149
|
Returns:
|
|
146
150
|
dict[str, list]: Dictionary mapping species names to lists of Label objects.
|
|
@@ -187,7 +191,7 @@ class Predictor:
|
|
|
187
191
|
Given a frame map, return dict of labels.
|
|
188
192
|
|
|
189
193
|
Args:
|
|
190
|
-
|
|
194
|
+
- frame_map (np.ndarray): Array of scores of shape (num_frames, num_species).
|
|
191
195
|
|
|
192
196
|
Returns:
|
|
193
197
|
dict[str, list]: Dictionary mapping species names to lists of Label objects.
|
|
@@ -283,11 +287,11 @@ class Predictor:
|
|
|
283
287
|
Given an array of raw scores, return as a pandas dataframe.
|
|
284
288
|
|
|
285
289
|
Args:
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
290
|
+
- score_array (np.ndarray): Array of scores of shape (num_spectrograms, num_species).
|
|
291
|
+
- frame_map (np.ndarray, optional): Frame-level scores of shape (num_frames, num_species).
|
|
292
|
+
If provided, uses frame-level labels; otherwise uses segment-level labels.
|
|
293
|
+
- start_times (list[float]): Start time in seconds for each spectrogram.
|
|
294
|
+
- recording_name (str): Name of the recording for the dataframe.
|
|
291
295
|
|
|
292
296
|
Returns:
|
|
293
297
|
pd.DataFrame: DataFrame with columns ['recording', 'name', 'start_time', 'end_time', 'score']
|
|
@@ -321,6 +325,30 @@ class Predictor:
|
|
|
321
325
|
df["score"] = score_list
|
|
322
326
|
return df
|
|
323
327
|
|
|
328
|
+
def log_scores(self, scores):
|
|
329
|
+
"""
|
|
330
|
+
Given an array of raw segment-level scores, log them by descending score.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
- scores (np.ndarray): Array of scores of shape (num_spectrograms, num_species).
|
|
334
|
+
"""
|
|
335
|
+
assert self.class_names is not None
|
|
336
|
+
|
|
337
|
+
labels: dict[str, list] = {} # name -> [(score, start_time, end_time)]
|
|
338
|
+
if scores is None or len(scores) == 0:
|
|
339
|
+
return labels
|
|
340
|
+
|
|
341
|
+
names = self._get_names()
|
|
342
|
+
|
|
343
|
+
# ensure labels are sorted by name/code before start_time,
|
|
344
|
+
# which is useful when inspecting label files during testing
|
|
345
|
+
num_classes = scores.shape[1]
|
|
346
|
+
scores = deepcopy(scores[0])
|
|
347
|
+
for i in range(min(num_classes, 10)):
|
|
348
|
+
j = np.argmax(scores)
|
|
349
|
+
logging.info(f"{names[j]}: {scores[j]:.4f}")
|
|
350
|
+
scores[j] = 0
|
|
351
|
+
|
|
324
352
|
def save_audacity_labels(
|
|
325
353
|
self,
|
|
326
354
|
scores,
|
|
@@ -332,11 +360,11 @@ class Predictor:
|
|
|
332
360
|
Given an array of raw scores, convert to Audacity labels and save in the given file.
|
|
333
361
|
|
|
334
362
|
Args:
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
363
|
+
- scores (np.ndarray): Segment-level scores of shape (num_spectrograms, num_species).
|
|
364
|
+
- frame_map (np.ndarray, optional): Frame-level scores of shape (num_frames, num_species).
|
|
365
|
+
If provided, uses frame-level labels; otherwise uses segment-level labels.
|
|
366
|
+
- start_times (list[float]): Start time in seconds for each spectrogram.
|
|
367
|
+
- file_path (str): Output path for the Audacity label file.
|
|
340
368
|
|
|
341
369
|
Returns:
|
|
342
370
|
None: Writes the labels directly to the specified file.
|
|
@@ -369,9 +397,9 @@ class Predictor:
|
|
|
369
397
|
Use mean rather than max or weighted values.
|
|
370
398
|
|
|
371
399
|
Args:
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
400
|
+
- frame_scores: (num_specs, num_classes, T_spec) scores in [0, 1].
|
|
401
|
+
- offsets_sec: start time (s) for each spectrogram within the recording.
|
|
402
|
+
- recording_duration_sec: total recording length in seconds.
|
|
375
403
|
|
|
376
404
|
Returns:
|
|
377
405
|
global_frames: (num_classes, T_global) tensor of scores in [0, 1].
|
britekit/core/reextractor.py
CHANGED
|
@@ -22,12 +22,12 @@ class Reextractor:
|
|
|
22
22
|
updating the database.
|
|
23
23
|
|
|
24
24
|
Args:
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
25
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
26
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.training_db.
|
|
27
|
+
- class_name (str, optional): Name of a specific class to reextract. If omitted, processes all classes.
|
|
28
|
+
- classes_path (str, optional): Path to CSV file listing classes to reextract. Alternative to class_name.
|
|
29
|
+
- check (bool): If True, only check that all recording paths are accessible without updating database.
|
|
30
|
+
- spec_group (str): Spectrogram group name for storing the extracted spectrograms. Defaults to 'default'.
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
33
|
def __init__(
|
britekit/core/util.py
CHANGED
|
@@ -135,6 +135,42 @@ def get_range(min_val: float, max_val: float, incr: float) -> List[float]:
|
|
|
135
135
|
return [float(v) for v in values]
|
|
136
136
|
|
|
137
137
|
|
|
138
|
+
def _get_seconds_from_time_string(time_str: str) -> int:
|
|
139
|
+
"""
|
|
140
|
+
Convert a time string into an integer number of seconds.
|
|
141
|
+
|
|
142
|
+
Supports the following formats:
|
|
143
|
+
- "71" → 71 seconds
|
|
144
|
+
- "1:11" → 71 seconds
|
|
145
|
+
- "0:01:11" → 71 seconds
|
|
146
|
+
- "1:02:03" → 3723 seconds (1 hour, 2 minutes, 3 seconds)
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
time_str (str): Time string in seconds or colon-separated format.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
int: Total number of seconds.
|
|
153
|
+
"""
|
|
154
|
+
parts = time_str.strip().split(":")
|
|
155
|
+
|
|
156
|
+
# Only seconds provided
|
|
157
|
+
if len(parts) == 1:
|
|
158
|
+
return int(float(parts[0]))
|
|
159
|
+
|
|
160
|
+
# Minutes and seconds
|
|
161
|
+
elif len(parts) == 2:
|
|
162
|
+
minutes, seconds = map(float, parts)
|
|
163
|
+
return int(minutes * 60 + seconds)
|
|
164
|
+
|
|
165
|
+
# Hours, minutes, and seconds
|
|
166
|
+
elif len(parts) == 3:
|
|
167
|
+
hours, minutes, seconds = map(float, parts)
|
|
168
|
+
return int(hours * 3600 + minutes * 60 + seconds)
|
|
169
|
+
|
|
170
|
+
else:
|
|
171
|
+
raise ValueError(f"Unrecognized time format: '{time_str}'")
|
|
172
|
+
|
|
173
|
+
|
|
138
174
|
def set_logging(level=logging.INFO, timestamp=False):
|
|
139
175
|
"""Initialize logging."""
|
|
140
176
|
if timestamp:
|
|
@@ -166,7 +202,7 @@ def cfg_to_pure(obj: Any) -> JSONValue:
|
|
|
166
202
|
str, int, float, bool) that can be safely serialized.
|
|
167
203
|
|
|
168
204
|
Args:
|
|
169
|
-
|
|
205
|
+
- obj: Any object to convert to JSON-serializable format
|
|
170
206
|
|
|
171
207
|
Returns:
|
|
172
208
|
JSON-serializable representation of the input object
|
|
@@ -284,8 +320,8 @@ def get_audio_files(path: str, short_names: bool = False) -> List[str]:
|
|
|
284
320
|
Return list of audio files in the given directory.
|
|
285
321
|
|
|
286
322
|
Args:
|
|
287
|
-
|
|
288
|
-
|
|
323
|
+
- path (str): Directory path
|
|
324
|
+
- short_names (bool): If true, return file names, else return full paths
|
|
289
325
|
|
|
290
326
|
Returns:
|
|
291
327
|
List of audio files in the given directory
|
|
@@ -325,8 +361,8 @@ def get_file_lines(path: str, encoding: str = "utf-8") -> List[str]:
|
|
|
325
361
|
and lines that start with #.
|
|
326
362
|
|
|
327
363
|
Args:
|
|
328
|
-
|
|
329
|
-
|
|
364
|
+
- path: Path to text file
|
|
365
|
+
- encoding: File encoding (default: utf-8)
|
|
330
366
|
|
|
331
367
|
Returns:
|
|
332
368
|
List of lines
|
|
@@ -354,7 +390,7 @@ def get_source_name(filename: str) -> str:
|
|
|
354
390
|
Return a source name given a recording file name.
|
|
355
391
|
|
|
356
392
|
Args:
|
|
357
|
-
|
|
393
|
+
- filename: Recording file name
|
|
358
394
|
|
|
359
395
|
Returns:
|
|
360
396
|
Source name
|
|
@@ -390,7 +426,7 @@ def compress_spectrogram(spec) -> bytes:
|
|
|
390
426
|
Compress a spectrogram in preparation for inserting into database.
|
|
391
427
|
|
|
392
428
|
Args:
|
|
393
|
-
|
|
429
|
+
- spec: Uncompressed spectrogram
|
|
394
430
|
|
|
395
431
|
Returns:
|
|
396
432
|
Compressed spectrogram
|
|
@@ -421,7 +457,7 @@ def expand_spectrogram(spec: bytes):
|
|
|
421
457
|
Decompress a spectrogram, then convert from bytes to floats and reshape it.
|
|
422
458
|
|
|
423
459
|
Args:
|
|
424
|
-
|
|
460
|
+
- spec: Compressed spectrogram
|
|
425
461
|
|
|
426
462
|
Returns:
|
|
427
463
|
Uncompressed spectrogram
|
britekit/models/base_model.py
CHANGED
|
@@ -252,7 +252,6 @@ class BaseModel(pl.LightningModule):
|
|
|
252
252
|
}
|
|
253
253
|
|
|
254
254
|
def on_save_checkpoint(self, checkpoint):
|
|
255
|
-
print("on_save_checkpoint")
|
|
256
255
|
"""Save model metadata to checkpoint."""
|
|
257
256
|
if not hasattr(self, "identifier"):
|
|
258
257
|
self.identifier = str(uuid.uuid4()).upper()
|
|
@@ -10,7 +10,7 @@ class OccurrenceDataProvider:
|
|
|
10
10
|
you must call the refresh method.
|
|
11
11
|
|
|
12
12
|
Args:
|
|
13
|
-
|
|
13
|
+
- db (OccurrenceDatabase): The database object.
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
16
|
def __init__(self, db: OccurrenceDatabase):
|
|
@@ -31,8 +31,8 @@ class OccurrenceDataProvider:
|
|
|
31
31
|
Return county info for a given latitude/longitude, or None if not found.
|
|
32
32
|
|
|
33
33
|
Args:
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
- latitude (float): Latitude.
|
|
35
|
+
- longitude (float): Longitude.
|
|
36
36
|
|
|
37
37
|
Returns:
|
|
38
38
|
County object, or None if not found.
|
|
@@ -54,8 +54,8 @@ class OccurrenceDataProvider:
|
|
|
54
54
|
For each week, return the maximum of it and the adjacent weeks.
|
|
55
55
|
|
|
56
56
|
Args:
|
|
57
|
-
|
|
58
|
-
|
|
57
|
+
- county_code (str): County code
|
|
58
|
+
- class_name (str): Class name
|
|
59
59
|
|
|
60
60
|
Returns:
|
|
61
61
|
List of smoothed occurrence values.
|
|
@@ -75,8 +75,8 @@ class OccurrenceDataProvider:
|
|
|
75
75
|
Return list of occurrence values for given county code and class name.
|
|
76
76
|
|
|
77
77
|
Args:
|
|
78
|
-
|
|
79
|
-
|
|
78
|
+
- county_code (str): County code
|
|
79
|
+
- class_name (str): Class name
|
|
80
80
|
|
|
81
81
|
Returns:
|
|
82
82
|
List of occurrence values.
|
|
@@ -97,9 +97,9 @@ class OccurrenceDataProvider:
|
|
|
97
97
|
If area_weight = True, weight each county by its area.
|
|
98
98
|
|
|
99
99
|
Args:
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
- county_prefix (str): County code prefix
|
|
101
|
+
- class_name (str): Class name
|
|
102
|
+
- area_weight (bool, Optional): If true, weight by county area (default = False)
|
|
103
103
|
|
|
104
104
|
Returns:
|
|
105
105
|
Numpy array of 48 average occurrence values (one per week, using 4-week months).
|
|
@@ -139,9 +139,9 @@ class OccurrenceDataProvider:
|
|
|
139
139
|
county don't occur in the same week.
|
|
140
140
|
|
|
141
141
|
Args:
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
142
|
+
- county_prefix (str): County code prefix
|
|
143
|
+
- class_name (str): Class name
|
|
144
|
+
- area_weight (bool, Optional): If true, weight by county area (default = False)
|
|
145
145
|
|
|
146
146
|
Returns:
|
|
147
147
|
Numpy average maximum occurrence value.
|
|
@@ -329,10 +329,10 @@ class PerRecordingTester(BaseTester):
|
|
|
329
329
|
rpt.append(
|
|
330
330
|
f" Recall (recording) = {100 * self.details_dict['recall_annotated']:.2f}%\n"
|
|
331
331
|
)
|
|
332
|
-
|
|
332
|
+
logging.info("")
|
|
333
333
|
with open(os.path.join(self.output_dir, "summary_report.txt"), "w") as summary:
|
|
334
334
|
for rpt_line in rpt:
|
|
335
|
-
|
|
335
|
+
logging.info(rpt_line[:-1])
|
|
336
336
|
summary.write(rpt_line)
|
|
337
337
|
|
|
338
338
|
# write recording details (row per segment)
|
|
@@ -109,13 +109,45 @@ class Extractor:
|
|
|
109
109
|
|
|
110
110
|
return offsets_per_file
|
|
111
111
|
|
|
112
|
+
def _insert_by_dict(self, recording_dir, destination_dir, offsets_per_file):
|
|
113
|
+
"""
|
|
114
|
+
Given a recording directory and a dict from recording stems to offsets,
|
|
115
|
+
insert the corresponding spectrograms.
|
|
116
|
+
"""
|
|
117
|
+
num_inserted = 0
|
|
118
|
+
recording_paths = util.get_audio_files(recording_dir)
|
|
119
|
+
for recording_dir in recording_paths:
|
|
120
|
+
filename = Path(recording_dir).stem
|
|
121
|
+
if filename not in offsets_per_file:
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
if destination_dir is not None:
|
|
125
|
+
dest_path = os.path.join(destination_dir, Path(recording_dir).name)
|
|
126
|
+
if not os.path.exists(dest_path):
|
|
127
|
+
shutil.copy(recording_dir, dest_path)
|
|
128
|
+
|
|
129
|
+
recording_dir = dest_path
|
|
130
|
+
|
|
131
|
+
logging.info(f"Processing {recording_dir}")
|
|
132
|
+
try:
|
|
133
|
+
self.audio.load(recording_dir)
|
|
134
|
+
except Exception as e:
|
|
135
|
+
logging.error(f"Caught exception: {e}")
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
num_inserted += self.insert_spectrograms(
|
|
139
|
+
recording_dir, offsets_per_file[filename]
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
return num_inserted
|
|
143
|
+
|
|
112
144
|
def insert_spectrograms(self, recording_path, offsets):
|
|
113
145
|
"""
|
|
114
146
|
Insert a spectrogram at each of the given offsets of the specified file.
|
|
115
147
|
|
|
116
148
|
Args:
|
|
117
|
-
|
|
118
|
-
|
|
149
|
+
- recording_path (str): Path to audio recording.
|
|
150
|
+
- offsets (list[float]): List of offsets, where each represents number of seconds to start of spectrogram.
|
|
119
151
|
|
|
120
152
|
Returns:
|
|
121
153
|
Number of spectrograms inserted.
|
|
@@ -156,7 +188,7 @@ class Extractor:
|
|
|
156
188
|
Extract spectrograms for all recordings in the given directory.
|
|
157
189
|
|
|
158
190
|
Args:
|
|
159
|
-
|
|
191
|
+
- dir_path (str): Directory containing recordings.
|
|
160
192
|
|
|
161
193
|
Returns:
|
|
162
194
|
Number of spectrograms inserted.
|
|
@@ -187,45 +219,48 @@ class Extractor:
|
|
|
187
219
|
|
|
188
220
|
return num_inserted
|
|
189
221
|
|
|
190
|
-
def
|
|
191
|
-
self, rec_dir: str,
|
|
222
|
+
def extract_by_csv(
|
|
223
|
+
self, rec_dir: str, csv_path: str, dest_dir: Optional[str] = None
|
|
192
224
|
):
|
|
193
225
|
"""
|
|
194
226
|
Extract spectrograms that match names of spectrogram images in a given directory.
|
|
195
227
|
Typically the spectrograms were generated using the 'search' or 'plot-db' commands.
|
|
196
228
|
|
|
197
229
|
Args:
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
230
|
+
- rec_dir (str): Directory containing recordings.
|
|
231
|
+
- csv_path (str): Path to CSV file containing two columns (recording and offset) to identify segments to extract.
|
|
232
|
+
- dest_dir (str, optional): Optionally copy used recordings to this directory.
|
|
201
233
|
|
|
202
234
|
Returns:
|
|
203
235
|
Number of spectrograms inserted.
|
|
204
236
|
"""
|
|
205
|
-
|
|
206
|
-
num_inserted = 0
|
|
207
|
-
recording_paths = util.get_audio_files(rec_dir)
|
|
208
|
-
for recording_path in recording_paths:
|
|
209
|
-
filename = Path(recording_path).stem
|
|
210
|
-
if filename not in offsets_per_file:
|
|
211
|
-
continue
|
|
237
|
+
import pandas as pd
|
|
212
238
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
239
|
+
df = pd.read_csv(csv_path)
|
|
240
|
+
offsets_per_file: dict[str, list] = {}
|
|
241
|
+
for i, row in df.iterrows():
|
|
242
|
+
recording = row["recording"]
|
|
243
|
+
if recording not in offsets_per_file:
|
|
244
|
+
offsets_per_file[recording] = []
|
|
217
245
|
|
|
218
|
-
|
|
246
|
+
offsets_per_file[recording].append(row["offset"])
|
|
219
247
|
|
|
220
|
-
|
|
221
|
-
try:
|
|
222
|
-
self.audio.load(recording_path)
|
|
223
|
-
except Exception as e:
|
|
224
|
-
logging.error(f"Caught exception: {e}")
|
|
225
|
-
continue
|
|
248
|
+
return self._insert_by_dict(rec_dir, dest_dir, offsets_per_file)
|
|
226
249
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
250
|
+
def extract_by_image(
|
|
251
|
+
self, rec_dir: str, spec_dir: str, dest_dir: Optional[str] = None
|
|
252
|
+
):
|
|
253
|
+
"""
|
|
254
|
+
Extract spectrograms that match names of spectrogram images in a given directory.
|
|
255
|
+
Typically the spectrograms were generated using the 'search' or 'plot-db' commands.
|
|
230
256
|
|
|
231
|
-
|
|
257
|
+
Args:
|
|
258
|
+
- rec_dir (str): Directory containing recordings.
|
|
259
|
+
- spec_dir (str): Directory containing spectrogram images.
|
|
260
|
+
- dest_dir (str, optional): Optionally copy used recordings to this directory.
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
Number of spectrograms inserted.
|
|
264
|
+
"""
|
|
265
|
+
offsets_per_file = self._process_image_dir(spec_dir)
|
|
266
|
+
return self._insert_by_dict(rec_dir, dest_dir, offsets_per_file)
|