britekit 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of britekit might be problematic. Click here for more details.

Files changed (42) hide show
  1. britekit/__about__.py +1 -1
  2. britekit/cli.py +6 -1
  3. britekit/commands/__init__.py +2 -1
  4. britekit/commands/_analyze.py +9 -9
  5. britekit/commands/_audioset.py +8 -8
  6. britekit/commands/_calibrate.py +8 -8
  7. britekit/commands/_ckpt_ops.py +6 -6
  8. britekit/commands/_db_add.py +12 -12
  9. britekit/commands/_db_delete.py +15 -15
  10. britekit/commands/_embed.py +4 -4
  11. britekit/commands/_ensemble.py +7 -7
  12. britekit/commands/_extract.py +158 -19
  13. britekit/commands/_find_dup.py +5 -5
  14. britekit/commands/_inat.py +4 -4
  15. britekit/commands/_init.py +1 -1
  16. britekit/commands/_pickle.py +7 -7
  17. britekit/commands/_plot.py +26 -26
  18. britekit/commands/_reextract.py +6 -6
  19. britekit/commands/_reports.py +22 -22
  20. britekit/commands/_search.py +12 -12
  21. britekit/commands/_train.py +6 -6
  22. britekit/commands/_tune.py +12 -12
  23. britekit/commands/_wav2mp3.py +2 -2
  24. britekit/commands/_xeno.py +7 -7
  25. britekit/commands/_youtube.py +3 -3
  26. britekit/core/analyzer.py +8 -8
  27. britekit/core/audio.py +14 -14
  28. britekit/core/data_module.py +2 -2
  29. britekit/core/plot.py +8 -8
  30. britekit/core/predictor.py +21 -21
  31. britekit/core/reextractor.py +6 -6
  32. britekit/core/util.py +8 -8
  33. britekit/occurrence_db/occurrence_data_provider.py +13 -13
  34. britekit/training_db/extractor.py +65 -30
  35. britekit/training_db/training_data_provider.py +1 -1
  36. britekit/training_db/training_db.py +97 -100
  37. britekit-0.1.4.dist-info/METADATA +299 -0
  38. {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/RECORD +41 -41
  39. britekit-0.1.3.dist-info/METADATA +0 -290
  40. {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/WHEEL +0 -0
  41. {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/entry_points.txt +0 -0
  42. {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/licenses/LICENSE.txt +0 -0
@@ -10,7 +10,7 @@ class OccurrenceDataProvider:
10
10
  you must call the refresh method.
11
11
 
12
12
  Args:
13
- db (OccurrenceDatabase): The database object.
13
+ - db (OccurrenceDatabase): The database object.
14
14
  """
15
15
 
16
16
  def __init__(self, db: OccurrenceDatabase):
@@ -31,8 +31,8 @@ class OccurrenceDataProvider:
31
31
  Return county info for a given latitude/longitude, or None if not found.
32
32
 
33
33
  Args:
34
- latitude (float): Latitude.
35
- longitude (float): Longitude.
34
+ - latitude (float): Latitude.
35
+ - longitude (float): Longitude.
36
36
 
37
37
  Returns:
38
38
  County object, or None if not found.
@@ -54,8 +54,8 @@ class OccurrenceDataProvider:
54
54
  For each week, return the maximum of it and the adjacent weeks.
55
55
 
56
56
  Args:
57
- county_code (str): County code
58
- class_name (str): Class name
57
+ - county_code (str): County code
58
+ - class_name (str): Class name
59
59
 
60
60
  Returns:
61
61
  List of smoothed occurrence values.
@@ -75,8 +75,8 @@ class OccurrenceDataProvider:
75
75
  Return list of occurrence values for given county code and class name.
76
76
 
77
77
  Args:
78
- county_code (str): County code
79
- class_name (str): Class name
78
+ - county_code (str): County code
79
+ - class_name (str): Class name
80
80
 
81
81
  Returns:
82
82
  List of occurrence values.
@@ -97,9 +97,9 @@ class OccurrenceDataProvider:
97
97
  If area_weight = True, weight each county by its area.
98
98
 
99
99
  Args:
100
- county_prefix (str): County code prefix
101
- class_name (str): Class name
102
- area_weight (bool, Optional): If true, weight by county area (default = False)
100
+ - county_prefix (str): County code prefix
101
+ - class_name (str): Class name
102
+ - area_weight (bool, Optional): If true, weight by county area (default = False)
103
103
 
104
104
  Returns:
105
105
  Numpy array of 48 average occurrence values (one per week, using 4-week months).
@@ -139,9 +139,9 @@ class OccurrenceDataProvider:
139
139
  county don't occur in the same week.
140
140
 
141
141
  Args:
142
- county_prefix (str): County code prefix
143
- class_name (str): Class name
144
- area_weight (bool, Optional): If true, weight by county area (default = False)
142
+ - county_prefix (str): County code prefix
143
+ - class_name (str): Class name
144
+ - area_weight (bool, Optional): If true, weight by county area (default = False)
145
145
 
146
146
  Returns:
147
147
  Numpy average maximum occurrence value.
@@ -109,13 +109,45 @@ class Extractor:
109
109
 
110
110
  return offsets_per_file
111
111
 
112
+ def _insert_by_dict(self, recording_dir, destination_dir, offsets_per_file):
113
+ """
114
+ Given a recording directory and a dict from recording stems to offsets,
115
+ insert the corresponding spectrograms.
116
+ """
117
+ num_inserted = 0
118
+ recording_paths = util.get_audio_files(recording_dir)
119
+ for recording_dir in recording_paths:
120
+ filename = Path(recording_dir).stem
121
+ if filename not in offsets_per_file:
122
+ continue
123
+
124
+ if destination_dir is not None:
125
+ dest_path = os.path.join(destination_dir, Path(recording_dir).name)
126
+ if not os.path.exists(dest_path):
127
+ shutil.copy(recording_dir, dest_path)
128
+
129
+ recording_dir = dest_path
130
+
131
+ logging.info(f"Processing {recording_dir}")
132
+ try:
133
+ self.audio.load(recording_dir)
134
+ except Exception as e:
135
+ logging.error(f"Caught exception: {e}")
136
+ continue
137
+
138
+ num_inserted += self.insert_spectrograms(
139
+ recording_dir, offsets_per_file[filename]
140
+ )
141
+
142
+ return num_inserted
143
+
112
144
  def insert_spectrograms(self, recording_path, offsets):
113
145
  """
114
146
  Insert a spectrogram at each of the given offsets of the specified file.
115
147
 
116
148
  Args:
117
- recording_path (str): Path to audio recording.
118
- offsets (list[float]): List of offsets, where each represents number of seconds to start of spectrogram.
149
+ - recording_path (str): Path to audio recording.
150
+ - offsets (list[float]): List of offsets, where each represents number of seconds to start of spectrogram.
119
151
 
120
152
  Returns:
121
153
  Number of spectrograms inserted.
@@ -156,7 +188,7 @@ class Extractor:
156
188
  Extract spectrograms for all recordings in the given directory.
157
189
 
158
190
  Args:
159
- dir_path (str): Directory containing recordings.
191
+ - dir_path (str): Directory containing recordings.
160
192
 
161
193
  Returns:
162
194
  Number of spectrograms inserted.
@@ -187,45 +219,48 @@ class Extractor:
187
219
 
188
220
  return num_inserted
189
221
 
190
- def extract_by_image(
191
- self, rec_dir: str, spec_dir: str, dest_dir: Optional[str] = None
222
+ def extract_by_csv(
223
+ self, rec_dir: str, csv_path: str, dest_dir: Optional[str] = None
192
224
  ):
193
225
  """
194
226
  Extract spectrograms that match names of spectrogram images in a given directory.
195
227
  Typically the spectrograms were generated using the 'search' or 'plot-db' commands.
196
228
 
197
229
  Args:
198
- rec_dir (str): Directory containing recordings.
199
- spec_dir (str): Directory containing spectrogram images.
200
- dest_dir (str, optional): Optionally copy used recordings to this directory.
230
+ - rec_dir (str): Directory containing recordings.
231
+ - csv_path (str): Path to CSV file containing two columns (recording and offset) to identify segments to extract.
232
+ - dest_dir (str, optional): Optionally copy used recordings to this directory.
201
233
 
202
234
  Returns:
203
235
  Number of spectrograms inserted.
204
236
  """
205
- offsets_per_file = self._process_image_dir(spec_dir)
206
- num_inserted = 0
207
- recording_paths = util.get_audio_files(rec_dir)
208
- for recording_path in recording_paths:
209
- filename = Path(recording_path).stem
210
- if filename not in offsets_per_file:
211
- continue
237
+ import pandas as pd
212
238
 
213
- if dest_dir is not None:
214
- dest_path = os.path.join(dest_dir, Path(recording_path).name)
215
- if not os.path.exists(dest_path):
216
- shutil.copy(recording_path, dest_path)
239
+ df = pd.read_csv(csv_path)
240
+ offsets_per_file: dict[str, list] = {}
241
+ for i, row in df.iterrows():
242
+ recording = row["recording"]
243
+ if recording not in offsets_per_file:
244
+ offsets_per_file[recording] = []
217
245
 
218
- recording_path = dest_path
246
+ offsets_per_file[recording].append(row["offset"])
219
247
 
220
- logging.info(f"Processing {recording_path}")
221
- try:
222
- self.audio.load(recording_path)
223
- except Exception as e:
224
- logging.error(f"Caught exception: {e}")
225
- continue
248
+ return self._insert_by_dict(rec_dir, dest_dir, offsets_per_file)
226
249
 
227
- num_inserted += self.insert_spectrograms(
228
- recording_path, offsets_per_file[filename]
229
- )
250
+ def extract_by_image(
251
+ self, rec_dir: str, spec_dir: str, dest_dir: Optional[str] = None
252
+ ):
253
+ """
254
+ Extract spectrograms that match names of spectrogram images in a given directory.
255
+ Typically the spectrograms were generated using the 'search' or 'plot-db' commands.
230
256
 
231
- return num_inserted
257
+ Args:
258
+ - rec_dir (str): Directory containing recordings.
259
+ - spec_dir (str): Directory containing spectrogram images.
260
+ - dest_dir (str, optional): Optionally copy used recordings to this directory.
261
+
262
+ Returns:
263
+ Number of spectrograms inserted.
264
+ """
265
+ offsets_per_file = self._process_image_dir(spec_dir)
266
+ return self._insert_by_dict(rec_dir, dest_dir, offsets_per_file)
@@ -8,7 +8,7 @@ class TrainingDataProvider:
8
8
  Data access layer on top of TrainingDatabase.
9
9
 
10
10
  Args:
11
- db (TrainingDatabase): The database object.
11
+ - db (TrainingDatabase): The database object.
12
12
  """
13
13
 
14
14
  def __init__(self, db: TrainingDatabase):