britekit 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of britekit might be problematic. Click here for more details.
- britekit/__about__.py +1 -1
- britekit/cli.py +6 -1
- britekit/commands/__init__.py +2 -1
- britekit/commands/_analyze.py +9 -9
- britekit/commands/_audioset.py +8 -8
- britekit/commands/_calibrate.py +8 -8
- britekit/commands/_ckpt_ops.py +6 -6
- britekit/commands/_db_add.py +12 -12
- britekit/commands/_db_delete.py +15 -15
- britekit/commands/_embed.py +4 -4
- britekit/commands/_ensemble.py +7 -7
- britekit/commands/_extract.py +158 -19
- britekit/commands/_find_dup.py +5 -5
- britekit/commands/_inat.py +4 -4
- britekit/commands/_init.py +1 -1
- britekit/commands/_pickle.py +7 -7
- britekit/commands/_plot.py +26 -26
- britekit/commands/_reextract.py +6 -6
- britekit/commands/_reports.py +22 -22
- britekit/commands/_search.py +12 -12
- britekit/commands/_train.py +6 -6
- britekit/commands/_tune.py +12 -12
- britekit/commands/_wav2mp3.py +2 -2
- britekit/commands/_xeno.py +7 -7
- britekit/commands/_youtube.py +3 -3
- britekit/core/analyzer.py +8 -8
- britekit/core/audio.py +14 -14
- britekit/core/data_module.py +2 -2
- britekit/core/plot.py +8 -8
- britekit/core/predictor.py +21 -21
- britekit/core/reextractor.py +6 -6
- britekit/core/util.py +8 -8
- britekit/occurrence_db/occurrence_data_provider.py +13 -13
- britekit/training_db/extractor.py +65 -30
- britekit/training_db/training_data_provider.py +1 -1
- britekit/training_db/training_db.py +97 -100
- britekit-0.1.4.dist-info/METADATA +299 -0
- {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/RECORD +41 -41
- britekit-0.1.3.dist-info/METADATA +0 -290
- {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/WHEEL +0 -0
- {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/entry_points.txt +0 -0
- {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -10,7 +10,7 @@ class OccurrenceDataProvider:
|
|
|
10
10
|
you must call the refresh method.
|
|
11
11
|
|
|
12
12
|
Args:
|
|
13
|
-
|
|
13
|
+
- db (OccurrenceDatabase): The database object.
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
16
|
def __init__(self, db: OccurrenceDatabase):
|
|
@@ -31,8 +31,8 @@ class OccurrenceDataProvider:
|
|
|
31
31
|
Return county info for a given latitude/longitude, or None if not found.
|
|
32
32
|
|
|
33
33
|
Args:
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
- latitude (float): Latitude.
|
|
35
|
+
- longitude (float): Longitude.
|
|
36
36
|
|
|
37
37
|
Returns:
|
|
38
38
|
County object, or None if not found.
|
|
@@ -54,8 +54,8 @@ class OccurrenceDataProvider:
|
|
|
54
54
|
For each week, return the maximum of it and the adjacent weeks.
|
|
55
55
|
|
|
56
56
|
Args:
|
|
57
|
-
|
|
58
|
-
|
|
57
|
+
- county_code (str): County code
|
|
58
|
+
- class_name (str): Class name
|
|
59
59
|
|
|
60
60
|
Returns:
|
|
61
61
|
List of smoothed occurrence values.
|
|
@@ -75,8 +75,8 @@ class OccurrenceDataProvider:
|
|
|
75
75
|
Return list of occurrence values for given county code and class name.
|
|
76
76
|
|
|
77
77
|
Args:
|
|
78
|
-
|
|
79
|
-
|
|
78
|
+
- county_code (str): County code
|
|
79
|
+
- class_name (str): Class name
|
|
80
80
|
|
|
81
81
|
Returns:
|
|
82
82
|
List of occurrence values.
|
|
@@ -97,9 +97,9 @@ class OccurrenceDataProvider:
|
|
|
97
97
|
If area_weight = True, weight each county by its area.
|
|
98
98
|
|
|
99
99
|
Args:
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
- county_prefix (str): County code prefix
|
|
101
|
+
- class_name (str): Class name
|
|
102
|
+
- area_weight (bool, Optional): If true, weight by county area (default = False)
|
|
103
103
|
|
|
104
104
|
Returns:
|
|
105
105
|
Numpy array of 48 average occurrence values (one per week, using 4-week months).
|
|
@@ -139,9 +139,9 @@ class OccurrenceDataProvider:
|
|
|
139
139
|
county don't occur in the same week.
|
|
140
140
|
|
|
141
141
|
Args:
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
142
|
+
- county_prefix (str): County code prefix
|
|
143
|
+
- class_name (str): Class name
|
|
144
|
+
- area_weight (bool, Optional): If true, weight by county area (default = False)
|
|
145
145
|
|
|
146
146
|
Returns:
|
|
147
147
|
Numpy average maximum occurrence value.
|
|
@@ -109,13 +109,45 @@ class Extractor:
|
|
|
109
109
|
|
|
110
110
|
return offsets_per_file
|
|
111
111
|
|
|
112
|
+
def _insert_by_dict(self, recording_dir, destination_dir, offsets_per_file):
|
|
113
|
+
"""
|
|
114
|
+
Given a recording directory and a dict from recording stems to offsets,
|
|
115
|
+
insert the corresponding spectrograms.
|
|
116
|
+
"""
|
|
117
|
+
num_inserted = 0
|
|
118
|
+
recording_paths = util.get_audio_files(recording_dir)
|
|
119
|
+
for recording_dir in recording_paths:
|
|
120
|
+
filename = Path(recording_dir).stem
|
|
121
|
+
if filename not in offsets_per_file:
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
if destination_dir is not None:
|
|
125
|
+
dest_path = os.path.join(destination_dir, Path(recording_dir).name)
|
|
126
|
+
if not os.path.exists(dest_path):
|
|
127
|
+
shutil.copy(recording_dir, dest_path)
|
|
128
|
+
|
|
129
|
+
recording_dir = dest_path
|
|
130
|
+
|
|
131
|
+
logging.info(f"Processing {recording_dir}")
|
|
132
|
+
try:
|
|
133
|
+
self.audio.load(recording_dir)
|
|
134
|
+
except Exception as e:
|
|
135
|
+
logging.error(f"Caught exception: {e}")
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
num_inserted += self.insert_spectrograms(
|
|
139
|
+
recording_dir, offsets_per_file[filename]
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
return num_inserted
|
|
143
|
+
|
|
112
144
|
def insert_spectrograms(self, recording_path, offsets):
|
|
113
145
|
"""
|
|
114
146
|
Insert a spectrogram at each of the given offsets of the specified file.
|
|
115
147
|
|
|
116
148
|
Args:
|
|
117
|
-
|
|
118
|
-
|
|
149
|
+
- recording_path (str): Path to audio recording.
|
|
150
|
+
- offsets (list[float]): List of offsets, where each represents number of seconds to start of spectrogram.
|
|
119
151
|
|
|
120
152
|
Returns:
|
|
121
153
|
Number of spectrograms inserted.
|
|
@@ -156,7 +188,7 @@ class Extractor:
|
|
|
156
188
|
Extract spectrograms for all recordings in the given directory.
|
|
157
189
|
|
|
158
190
|
Args:
|
|
159
|
-
|
|
191
|
+
- dir_path (str): Directory containing recordings.
|
|
160
192
|
|
|
161
193
|
Returns:
|
|
162
194
|
Number of spectrograms inserted.
|
|
@@ -187,45 +219,48 @@ class Extractor:
|
|
|
187
219
|
|
|
188
220
|
return num_inserted
|
|
189
221
|
|
|
190
|
-
def
|
|
191
|
-
self, rec_dir: str,
|
|
222
|
+
def extract_by_csv(
|
|
223
|
+
self, rec_dir: str, csv_path: str, dest_dir: Optional[str] = None
|
|
192
224
|
):
|
|
193
225
|
"""
|
|
194
226
|
Extract spectrograms that match names of spectrogram images in a given directory.
|
|
195
227
|
Typically the spectrograms were generated using the 'search' or 'plot-db' commands.
|
|
196
228
|
|
|
197
229
|
Args:
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
230
|
+
- rec_dir (str): Directory containing recordings.
|
|
231
|
+
- csv_path (str): Path to CSV file containing two columns (recording and offset) to identify segments to extract.
|
|
232
|
+
- dest_dir (str, optional): Optionally copy used recordings to this directory.
|
|
201
233
|
|
|
202
234
|
Returns:
|
|
203
235
|
Number of spectrograms inserted.
|
|
204
236
|
"""
|
|
205
|
-
|
|
206
|
-
num_inserted = 0
|
|
207
|
-
recording_paths = util.get_audio_files(rec_dir)
|
|
208
|
-
for recording_path in recording_paths:
|
|
209
|
-
filename = Path(recording_path).stem
|
|
210
|
-
if filename not in offsets_per_file:
|
|
211
|
-
continue
|
|
237
|
+
import pandas as pd
|
|
212
238
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
239
|
+
df = pd.read_csv(csv_path)
|
|
240
|
+
offsets_per_file: dict[str, list] = {}
|
|
241
|
+
for i, row in df.iterrows():
|
|
242
|
+
recording = row["recording"]
|
|
243
|
+
if recording not in offsets_per_file:
|
|
244
|
+
offsets_per_file[recording] = []
|
|
217
245
|
|
|
218
|
-
|
|
246
|
+
offsets_per_file[recording].append(row["offset"])
|
|
219
247
|
|
|
220
|
-
|
|
221
|
-
try:
|
|
222
|
-
self.audio.load(recording_path)
|
|
223
|
-
except Exception as e:
|
|
224
|
-
logging.error(f"Caught exception: {e}")
|
|
225
|
-
continue
|
|
248
|
+
return self._insert_by_dict(rec_dir, dest_dir, offsets_per_file)
|
|
226
249
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
250
|
+
def extract_by_image(
|
|
251
|
+
self, rec_dir: str, spec_dir: str, dest_dir: Optional[str] = None
|
|
252
|
+
):
|
|
253
|
+
"""
|
|
254
|
+
Extract spectrograms that match names of spectrogram images in a given directory.
|
|
255
|
+
Typically the spectrograms were generated using the 'search' or 'plot-db' commands.
|
|
230
256
|
|
|
231
|
-
|
|
257
|
+
Args:
|
|
258
|
+
- rec_dir (str): Directory containing recordings.
|
|
259
|
+
- spec_dir (str): Directory containing spectrogram images.
|
|
260
|
+
- dest_dir (str, optional): Optionally copy used recordings to this directory.
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
Number of spectrograms inserted.
|
|
264
|
+
"""
|
|
265
|
+
offsets_per_file = self._process_image_dir(spec_dir)
|
|
266
|
+
return self._insert_by_dict(rec_dir, dest_dir, offsets_per_file)
|