britekit 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of britekit might be problematic. Click here for more details.

britekit/__about__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Jan Huus <jhuus1@gmail.com>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.11"
4
+ __version__ = "0.0.12"
@@ -4,6 +4,7 @@ import json
4
4
  import logging
5
5
  import os
6
6
  from typing import Optional
7
+ from urllib.parse import quote
7
8
 
8
9
  import click
9
10
 
@@ -101,6 +102,8 @@ def xeno(
101
102
  else:
102
103
  name = f'en:"={name.lower()}"'
103
104
 
105
+ name = quote(name)
106
+
104
107
  # get list of recordings
105
108
  recordings: list = []
106
109
  page = 0
@@ -137,6 +140,9 @@ def xeno(
137
140
  if not os.path.exists(outfile):
138
141
  logging.info(f"Downloading {outfile}")
139
142
  url = recording["file"]
143
+ if not url:
144
+ url = f"https:{recording['url']}/download"
145
+
140
146
  response = requests.get(url)
141
147
  with open(outfile, "wb") as mp3:
142
148
  mp3.write(response.content)
britekit/core/audio.py CHANGED
@@ -264,7 +264,6 @@ class Audio:
264
264
  import numpy as np
265
265
  import torch
266
266
 
267
- """Create a filterbank for log spectrograms."""
268
267
  f_min = self.cfg.audio.min_freq
269
268
  f_max = self.cfg.audio.max_freq
270
269
  n_bins = self.cfg.audio.spec_height
@@ -282,13 +281,19 @@ class Audio:
282
281
 
283
282
  filters = []
284
283
  for log_cf in log2_centers:
284
+ # Gaussian filter in log2(f)
285
285
  weight = np.exp(-0.5 * ((fft_log2 - log_cf) / sigma_log2) ** 2)
286
- weight_sum = np.sum(weight)
287
- if weight_sum > 0:
288
- filters.append(weight / weight_sum)
289
- else:
290
- # Fallback: use uniform weights if sum is zero
291
- filters.append(weight)
286
+
287
+ # Normalize per filter (contrast-preserving)
288
+ weight /= np.sum(weight) + 1e-12
289
+
290
+ # Boost power in higher frequencies, so more like mel scale;
291
+ # increase log_freq_gain to increase the boost
292
+ cf_hz = 2**log_cf
293
+ gain = (cf_hz / f_min) ** self.cfg.audio.log_freq_gain
294
+ weight *= gain
295
+
296
+ filters.append(weight)
292
297
 
293
298
  filters = np.array(filters)
294
299
  return torch.tensor(filters, dtype=torch.float32).to(self.device)
@@ -323,28 +328,41 @@ class Audio:
323
328
  tensor = torch.from_numpy(signal).to(self.device)
324
329
 
325
330
  if freq_scale == "log":
326
- spec = self.linear_transform(tensor)
327
- spec = torch.matmul(self.log2_filterbank, spec.squeeze(0)).unsqueeze(0)
331
+ spec = self.linear_transform(tensor) # [1, n_freqs, n_frames]
332
+ spec = torch.matmul(
333
+ self.log2_filterbank, spec.squeeze(0)
334
+ ) # [n_mels, n_frames]
335
+ spec = spec.unsqueeze(0).unsqueeze(1) # [1, 1, n_mels, n_frames]
336
+
328
337
  elif freq_scale == "mel":
329
- spec = self.mel_transform(tensor)
338
+ spec = self.mel_transform(tensor).unsqueeze(1) # [1, 1, n_mels, T]
339
+
330
340
  elif freq_scale == "linear":
331
341
  spec = self.linear_transform(tensor)
332
-
333
342
  freqs = torch.fft.rfftfreq(
334
343
  2 * self.win_length, d=1 / self.cfg.audio.sampling_rate
335
- ) # [freq_bins]
344
+ )
336
345
  mask = (freqs >= self.cfg.audio.min_freq) & (
337
346
  freqs <= self.cfg.audio.max_freq
338
347
  )
339
- spec = spec[:, mask, :] # shape: [channel, selected_freq_bins, time_frames]
340
- spec = spec.unsqueeze(1)
341
- spec = F.interpolate(
342
- spec,
343
- size=(self.cfg.audio.spec_height, self.cfg.audio.spec_width),
344
- mode="bilinear",
345
- align_corners=False,
346
- )
347
- spec = spec.squeeze(1)
348
+ spec = spec[:, mask, :].unsqueeze(1) # [1, 1, F_sel, T]
349
+
350
+ # downsample frequency to spec_height (energy-preserving)
351
+ spec = F.interpolate(
352
+ spec,
353
+ size=(self.cfg.audio.spec_height, spec.shape[-1]),
354
+ mode="area",
355
+ )
356
+
357
+ # pad or crop to spec_width
358
+ T = spec.shape[-1]
359
+ if T < self.cfg.audio.spec_width:
360
+ pad_width = self.cfg.audio.spec_width - T
361
+ spec = F.pad(spec, (0, pad_width)) # pad on the right (time axis)
362
+ else:
363
+ spec = spec[..., : self.cfg.audio.spec_width]
364
+
365
+ spec = spec.squeeze(1)
348
366
 
349
367
  if decibels:
350
368
  spec = ta.transforms.AmplitudeToDB(stype="power", top_db=top_db)(spec)
@@ -23,6 +23,7 @@ class Audio:
23
23
  decibels: bool = False # Use decibel amplitude scale?
24
24
  top_db: float = 80 # Parameter to decibel conversion
25
25
  db_power: float = 1.0 # Raise to this exponent after convert to decibels
26
+ log_freq_gain: float = 0.6 # Boost loudness of higher frequencies with log scale
26
27
 
27
28
 
28
29
  @dataclass
britekit/core/trainer.py CHANGED
@@ -47,7 +47,9 @@ class Trainer:
47
47
  val_rocs = []
48
48
  for k in range(self.cfg.train.num_folds):
49
49
  logger = TensorBoardLogger(
50
- save_dir="logs", name=f"fold-{k}", default_hp_metric=False
50
+ save_dir="logs",
51
+ name=None if self.cfg.train.num_folds == 1 else f"fold-{k}",
52
+ default_hp_metric=False
51
53
  )
52
54
  version = (
53
55
  logger.version
@@ -5,6 +5,7 @@ audio: !!python/object:britekit.core.base_config.Audio
5
5
  db_power: 1.0
6
6
  decibels: false
7
7
  freq_scale: mel
8
+ log_freq_gain: 0.6
8
9
  max_freq: 8000
9
10
  min_freq: 100
10
11
  power: 1.0
@@ -177,7 +177,7 @@ class BaseModel(pl.LightningModule):
177
177
  else:
178
178
  preds = torch.softmax(seg_logits, dim=1)
179
179
 
180
- self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True)
180
+ self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=False)
181
181
 
182
182
  roc_auc = metrics.roc_auc_score(y.cpu(), preds.cpu(), average="micro")
183
183
  self.log("val_roc", roc_auc, on_step=False, on_epoch=True, prog_bar=True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: britekit
3
- Version: 0.0.11
3
+ Version: 0.0.12
4
4
  Summary: Core functions for bioacoustic recognizers.
5
5
  Project-URL: Documentation, https://github.com/jhuus/BriteKit#readme
6
6
  Project-URL: Issues, https://github.com/jhuus/BriteKit/issues
@@ -1,8 +1,8 @@
1
1
  britekit/cli.py,sha256=nnrCMfw3-1GJ4rKFpqTLu8JcBGxTocMn7nwzU4OSaew,3080
2
2
  britekit/core/analyzer.py,sha256=0OYVxuwYx_R36MZwIlVgPMWQ1udZ_tdgYODJyEVDJm0,5604
3
- britekit/core/audio.py,sha256=Ju1SBO9c8GUM0RZ-NuolZoPHF6hATLvayVEvKVOgxOw,15850
3
+ britekit/core/audio.py,sha256=2WINXunumjcbFlTd0Yngg8MmBy9O3dQWGosVCJmkryI,16397
4
4
  britekit/core/augmentation.py,sha256=5_wyB-6gt7uM68Zl-rO_fPu1D6tlsd2m5oWhA6l0W9Q,5721
5
- britekit/core/base_config.py,sha256=wbCJI9cEH9mktUTSfLSCnU5AhJT6xhxhwZS4QWRYTpM,8744
5
+ britekit/core/base_config.py,sha256=KNcmPhvbl6OGDZ5OvHShE9JCnuyhSvIQq5pi3AU1gVk,8830
6
6
  britekit/core/config_loader.py,sha256=mDmLwKYXsWb9FRk12q9Rj2rVdvbCVCI3QTdsZSnUBCY,1339
7
7
  britekit/core/data_module.py,sha256=0DkOZTTGymZ7JjukucDuAq-nXS_KPhqV7KFPhmMoQDw,9105
8
8
  britekit/core/dataset.py,sha256=R_NDaFljl4IMKxGWWWm1PMkYCcfrEaiJU1NrVswt6eM,5801
@@ -11,10 +11,10 @@ britekit/core/pickler.py,sha256=72hiqOiIbBeXvHGwmHAOODr9wGdyBwj4NUYecxIhtCE,5775
11
11
  britekit/core/plot.py,sha256=H-SB2ooe2LT0j1KoXs2PgT3t05oAB4CWkYX_HvGMO_c,5329
12
12
  britekit/core/predictor.py,sha256=9Heue3ku9rw4OfE0SWuWLKBLW-aMw8PXqWlBB4GUCbw,22911
13
13
  britekit/core/reextractor.py,sha256=UqWLapReNhEne0dykBCN_UIpLOHkZxJMw4RAlG70nzI,8393
14
- britekit/core/trainer.py,sha256=vFIVyFCYhnV0zdRjNU93wzd7_HLtiaXW8mAiK7BoJPA,6437
14
+ britekit/core/trainer.py,sha256=XCa4yOkKeX2CBPPyH3gsJBalhrsebRboH8EC4rLdceA,6512
15
15
  britekit/core/tuner.py,sha256=LoQi7jwIwqv2DY7MgupG32PdD6QiuneQrgRgxq8ANlk,16422
16
16
  britekit/core/util.py,sha256=JKT-yqFPA9FBMT1-YsGvNE8E21242MYAsXvRGSgTNYg,19163
17
- britekit/models/base_model.py,sha256=VpY2aMb7P3QWGzhIdJRGxdjT6z-o7XE5sJxnupeZtxQ,16880
17
+ britekit/models/base_model.py,sha256=jVYSIrXAmeAer6MTFg177YoDBn00_HzluDdEAZH1qpQ,16881
18
18
  britekit/models/dla.py,sha256=ALMY997AbERN7-sHqQuE5e43llRjpUDPZSFGL-Flv4M,3137
19
19
  britekit/models/effnet.py,sha256=e7WdZMsLPXe8jcWChk6n97c8DMV0YyGV6lDP_Jv6Wz4,3129
20
20
  britekit/models/gernet.py,sha256=7MEUZaDTfr-6oa8eE8dyDQb2LgahGBOEp1pTZSu1KOE,7022
@@ -32,7 +32,7 @@ britekit/testing/per_segment_tester.py,sha256=5zFpe8wVEEtVjthKKFRb0dwE9T0v4JIll9
32
32
  britekit/training_db/extractor.py,sha256=yblYTco9b-bZhBTOkGkNANOabsNo8IfQGJxPvh_eJyo,8406
33
33
  britekit/training_db/training_data_provider.py,sha256=V5aBjsCvrWViZ0Jv05hgcKRizcAXmqoj4q3hAHedoD8,5651
34
34
  britekit/training_db/training_db.py,sha256=xZqN1sMC2yFMEDm9rOrCigN3CUfUzTaTUkX3IZ_zHRc,64922
35
- britekit/__about__.py,sha256=CGewA3qxQM5QGFXz6GxYj1rsR4YlBI3pPly5AWErQFI,123
35
+ britekit/__about__.py,sha256=BYHVclZkphegQAtQMaCBTllSQbN2DGte-0mtbt9t5SM,123
36
36
  britekit/__init__.py,sha256=RpruzdjbvTcFNf21zJYY8HrAhJei91FtNNLjIBmw-kw,1857
37
37
  britekit/install/data/classes.csv,sha256=OdTZ8oQdx7N-HKyhftxZStGZYsjhCy4UbanwtQJ2wBM,54
38
38
  britekit/install/data/ignore.txt,sha256=RbKvEHtUCbgRYolwR1IucClwyD3q7l2s6QuRjph-Us4,68
@@ -52,7 +52,7 @@ britekit/install/data/audioset/curated/siren.csv,sha256=_EbIBGHJPsR7VoA0gEYG7DCc
52
52
  britekit/install/data/audioset/curated/speech.csv,sha256=eBGVyfFTPHr34XrEIQIy3CWQfXMjQP5JqjryRI3IF8c,2439
53
53
  britekit/install/data/audioset/curated/truck.csv,sha256=i0cTXlqGQwv2ULnrZTJ68p3EV8UWccU0saw7fqQq_8E,1667
54
54
  britekit/install/data/audioset/curated/wind.csv,sha256=77_YrP5LttXjuwahndUiPagjwQB9Uh2tpk3Xa8-JFY4,4482
55
- britekit/install/yaml/base_config.yaml,sha256=vQ1xtglVkFW2qUo8ls8llpk_1YjdfSdxBJSXPmIYlUU,2744
55
+ britekit/install/yaml/base_config.yaml,sha256=CDgTJWzIL1qSO-prJ9sMFQe1naVbXEEPDVy41VQqXOo,2765
56
56
  britekit/install/yaml/samples/cfg_infer.yaml,sha256=GsBqkL5xPlS1V-bZmXNXGFDS1_hBEGkuer8sqO3CiQw,65
57
57
  britekit/install/yaml/samples/train_dla.yaml,sha256=D2xHIgcPMOsoK2doPsF8PmoAVtkl0v2ojuuKfzYv8XI,228
58
58
  britekit/install/yaml/samples/train_effnet.yaml,sha256=MD6LEL94FN3Hd05j8N3CHn88LIOp-sERdbyWBSKfk4g,323
@@ -85,11 +85,11 @@ britekit/commands/_search.py,sha256=b7cIFSI3AuPhreYGO8HGqX12TKaOJ5uoc1uiqid04Mk,
85
85
  britekit/commands/_train.py,sha256=40Zdu9FZkmdA0bIuTK-oADw-4X6aMQYS1XGigheocdo,4152
86
86
  britekit/commands/_tune.py,sha256=CFEY08SsPF_2s_fknUHJLEsho6vL_it8NoOhB2RoVU0,7468
87
87
  britekit/commands/_wav2mp3.py,sha256=2Q4cjT6OhJmBPTNzGRMrDd6dSdBBufuQdjhH1V8ghLo,2167
88
- britekit/commands/_xeno.py,sha256=_6YxQ7xFdaSy5DNUaigkbYp3E8EhtOhTC9b6OFS0MFA,6026
88
+ britekit/commands/_xeno.py,sha256=4qS2iXeGMl0jYvTAvhSvX0VvIlp7STlX27o273WwBHk,6165
89
89
  britekit/commands/_youtube.py,sha256=_u1LrwY_2GxllKd505N_2ArFMbACQ_PtVxuqUCYxFe0,2214
90
90
  britekit/core/__init__.py,sha256=QcjcFyvO5KqJLF_HBeqiCk925uU5jTUjIV5lJix9XY4,556
91
- britekit-0.0.11.dist-info/METADATA,sha256=d1VgnfrMq3-tnSABq7aXN8SAMsuZI5ifbPM9MB7yDtE,18553
92
- britekit-0.0.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
- britekit-0.0.11.dist-info/entry_points.txt,sha256=ycnPy5DLX14RTf7lKfkQAVyIf1B1zTL1gMsHm455wmg,46
94
- britekit-0.0.11.dist-info/licenses/LICENSE.txt,sha256=kPoHm6iop8-CUa_720Tt8gqyvLD6D_7218u1hCCpErk,1092
95
- britekit-0.0.11.dist-info/RECORD,,
91
+ britekit-0.0.12.dist-info/METADATA,sha256=-D6Wr6aoJZ7Vy23qV4_dgxSdEBD1ZqZ-ObGVDPLFmsk,18553
92
+ britekit-0.0.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
+ britekit-0.0.12.dist-info/entry_points.txt,sha256=ycnPy5DLX14RTf7lKfkQAVyIf1B1zTL1gMsHm455wmg,46
94
+ britekit-0.0.12.dist-info/licenses/LICENSE.txt,sha256=kPoHm6iop8-CUa_720Tt8gqyvLD6D_7218u1hCCpErk,1092
95
+ britekit-0.0.12.dist-info/RECORD,,