birdnet-analyzer 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. birdnet_analyzer/__init__.py +9 -8
  2. birdnet_analyzer/analyze/__init__.py +5 -5
  3. birdnet_analyzer/analyze/__main__.py +3 -4
  4. birdnet_analyzer/analyze/cli.py +25 -25
  5. birdnet_analyzer/analyze/core.py +241 -245
  6. birdnet_analyzer/analyze/utils.py +692 -701
  7. birdnet_analyzer/audio.py +368 -372
  8. birdnet_analyzer/cli.py +709 -707
  9. birdnet_analyzer/config.py +242 -242
  10. birdnet_analyzer/eBird_taxonomy_codes_2021E.json +25279 -25279
  11. birdnet_analyzer/embeddings/__init__.py +3 -4
  12. birdnet_analyzer/embeddings/__main__.py +3 -3
  13. birdnet_analyzer/embeddings/cli.py +12 -13
  14. birdnet_analyzer/embeddings/core.py +69 -70
  15. birdnet_analyzer/embeddings/utils.py +179 -193
  16. birdnet_analyzer/evaluation/__init__.py +196 -195
  17. birdnet_analyzer/evaluation/__main__.py +3 -3
  18. birdnet_analyzer/evaluation/assessment/__init__.py +0 -0
  19. birdnet_analyzer/evaluation/assessment/metrics.py +388 -0
  20. birdnet_analyzer/evaluation/assessment/performance_assessor.py +409 -0
  21. birdnet_analyzer/evaluation/assessment/plotting.py +379 -0
  22. birdnet_analyzer/evaluation/preprocessing/__init__.py +0 -0
  23. birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -0
  24. birdnet_analyzer/evaluation/preprocessing/utils.py +98 -0
  25. birdnet_analyzer/gui/__init__.py +19 -23
  26. birdnet_analyzer/gui/__main__.py +3 -3
  27. birdnet_analyzer/gui/analysis.py +175 -174
  28. birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
  29. birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
  30. birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
  31. birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
  32. birdnet_analyzer/gui/assets/gui.css +28 -28
  33. birdnet_analyzer/gui/assets/gui.js +93 -93
  34. birdnet_analyzer/gui/embeddings.py +619 -620
  35. birdnet_analyzer/gui/evaluation.py +795 -813
  36. birdnet_analyzer/gui/localization.py +75 -68
  37. birdnet_analyzer/gui/multi_file.py +245 -246
  38. birdnet_analyzer/gui/review.py +519 -527
  39. birdnet_analyzer/gui/segments.py +191 -191
  40. birdnet_analyzer/gui/settings.py +128 -129
  41. birdnet_analyzer/gui/single_file.py +267 -269
  42. birdnet_analyzer/gui/species.py +95 -95
  43. birdnet_analyzer/gui/train.py +696 -698
  44. birdnet_analyzer/gui/utils.py +810 -808
  45. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
  46. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
  47. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
  48. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
  49. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
  50. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
  51. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
  52. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
  53. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
  54. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
  55. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
  56. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
  57. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
  58. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
  59. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
  60. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
  61. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
  62. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
  63. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
  64. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
  65. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
  66. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
  67. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
  68. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
  69. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
  70. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
  71. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
  72. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
  73. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
  74. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
  75. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
  76. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
  77. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
  78. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
  79. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
  80. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
  81. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
  82. birdnet_analyzer/lang/de.json +334 -334
  83. birdnet_analyzer/lang/en.json +334 -334
  84. birdnet_analyzer/lang/fi.json +334 -334
  85. birdnet_analyzer/lang/fr.json +334 -334
  86. birdnet_analyzer/lang/id.json +334 -334
  87. birdnet_analyzer/lang/pt-br.json +334 -334
  88. birdnet_analyzer/lang/ru.json +334 -334
  89. birdnet_analyzer/lang/se.json +334 -334
  90. birdnet_analyzer/lang/tlh.json +334 -334
  91. birdnet_analyzer/lang/zh_TW.json +334 -334
  92. birdnet_analyzer/model.py +1212 -1243
  93. birdnet_analyzer/playground.py +5 -0
  94. birdnet_analyzer/search/__init__.py +3 -3
  95. birdnet_analyzer/search/__main__.py +3 -3
  96. birdnet_analyzer/search/cli.py +11 -12
  97. birdnet_analyzer/search/core.py +78 -78
  98. birdnet_analyzer/search/utils.py +107 -111
  99. birdnet_analyzer/segments/__init__.py +3 -3
  100. birdnet_analyzer/segments/__main__.py +3 -3
  101. birdnet_analyzer/segments/cli.py +13 -14
  102. birdnet_analyzer/segments/core.py +81 -78
  103. birdnet_analyzer/segments/utils.py +383 -394
  104. birdnet_analyzer/species/__init__.py +3 -3
  105. birdnet_analyzer/species/__main__.py +3 -3
  106. birdnet_analyzer/species/cli.py +13 -14
  107. birdnet_analyzer/species/core.py +35 -35
  108. birdnet_analyzer/species/utils.py +74 -75
  109. birdnet_analyzer/train/__init__.py +3 -3
  110. birdnet_analyzer/train/__main__.py +3 -3
  111. birdnet_analyzer/train/cli.py +13 -14
  112. birdnet_analyzer/train/core.py +113 -113
  113. birdnet_analyzer/train/utils.py +877 -847
  114. birdnet_analyzer/translate.py +133 -104
  115. birdnet_analyzer/utils.py +426 -419
  116. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/METADATA +137 -129
  117. birdnet_analyzer-2.0.1.dist-info/RECORD +125 -0
  118. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/WHEEL +1 -1
  119. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/licenses/LICENSE +18 -18
  120. birdnet_analyzer-2.0.0.dist-info/RECORD +0 -117
  121. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/entry_points.txt +0 -0
  122. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,5 @@
1
+ if __name__ == "__main__":
2
+ from birdnet_analyzer import train
3
+
4
+ p = r"C:\Users\johau\data\custom_classifier\train_data_small"
5
+ train(p)
@@ -1,3 +1,3 @@
1
- from birdnet_analyzer.search.core import search
2
-
3
- __all__ = ["search"]
1
+ from birdnet_analyzer.search.core import search
2
+
3
+ __all__ = ["search"]
@@ -1,3 +1,3 @@
1
- from birdnet_analyzer.search.cli import main
2
-
3
- main()
1
+ from birdnet_analyzer.search.cli import main
2
+
3
+ main()
@@ -1,12 +1,11 @@
1
- import birdnet_analyzer.utils as utils
2
-
3
-
4
- @utils.runtime_error_handler
5
- def main():
6
- import birdnet_analyzer.cli as cli
7
- from birdnet_analyzer import search
8
-
9
- parser = cli.search_parser()
10
- args = parser.parse_args()
11
-
12
- search(**vars(args))
1
+ from birdnet_analyzer import utils
2
+
3
+
4
+ @utils.runtime_error_handler
5
+ def main():
6
+ from birdnet_analyzer import cli, search
7
+
8
+ parser = cli.search_parser()
9
+ args = parser.parse_args()
10
+
11
+ search(**vars(args))
@@ -1,78 +1,78 @@
1
- from typing import Literal
2
-
3
-
4
- def search(
5
- output: str,
6
- database: str,
7
- queryfile: str,
8
- *,
9
- n_results: int = 10,
10
- score_function: Literal["cosine", "euclidean", "dot"] = "cosine",
11
- crop_mode: Literal["center", "first", "segments"] = "center",
12
- overlap: float = 0.0,
13
- ):
14
- """
15
- Executes a search query on a given database and saves the results as audio files.
16
- Args:
17
- output (str): Path to the output directory where the results will be saved.
18
- database (str): Path to the database file to search in.
19
- queryfile (str): Path to the query file containing the search input.
20
- n_results (int, optional): Number of top results to return. Defaults to 10.
21
- score_function (Literal["cosine", "euclidean", "dot"], optional):
22
- Scoring function to use for similarity calculation. Defaults to "cosine".
23
- crop_mode (Literal["center", "first", "segments"], optional):
24
- Mode for cropping audio segments. Defaults to "center".
25
- overlap (float, optional): Overlap ratio for audio segments. Defaults to 0.0.
26
- Raises:
27
- ValueError: If the database does not contain the required settings metadata.
28
- Notes:
29
- - The function creates the output directory if it does not exist.
30
- - It retrieves metadata from the database to configure the search, including
31
- bandpass filter settings and audio speed.
32
- - The results are saved as audio files in the specified output directory, with
33
- filenames containing the score, source file name, and time offsets.
34
- Returns:
35
- None
36
- """
37
- import os
38
-
39
- import birdnet_analyzer.audio as audio
40
- import birdnet_analyzer.config as cfg
41
- from birdnet_analyzer.search.utils import get_search_results
42
-
43
- # Create output folder
44
- if not os.path.exists(output):
45
- os.makedirs(output)
46
-
47
- # Load the database
48
- db = get_database(database)
49
-
50
- try:
51
- settings = db.get_metadata("birdnet_analyzer_settings")
52
- except:
53
- raise ValueError("No settings present in database.")
54
-
55
- fmin = settings["BANDPASS_FMIN"]
56
- fmax = settings["BANDPASS_FMAX"]
57
- audio_speed = settings["AUDIO_SPEED"]
58
-
59
- # Execute the search
60
- results = get_search_results(queryfile, db, n_results, audio_speed, fmin, fmax, score_function, crop_mode, overlap)
61
-
62
- # Save the results
63
- for i, r in enumerate(results):
64
- embedding_source = db.get_embedding_source(r.embedding_id)
65
- file = embedding_source.source_id
66
- filebasename = os.path.basename(file)
67
- filebasename = os.path.splitext(filebasename)[0]
68
- offset = embedding_source.offsets[0] * audio_speed
69
- duration = cfg.SIG_LENGTH * audio_speed
70
- sig, rate = audio.open_audio_file(file, offset=offset, duration=duration, sample_rate=None)
71
- result_path = os.path.join(output, f"{file[4]:.5f}_{filebasename}_{offset}_{offset + duration}.wav")
72
- audio.save_signal(sig, result_path, rate)
73
-
74
-
75
- def get_database(database_path):
76
- from perch_hoplite.db import sqlite_usearch_impl
77
-
78
- return sqlite_usearch_impl.SQLiteUsearchDB.create(database_path).thread_split()
1
+ from typing import Literal
2
+
3
+
4
+ def search(
5
+ output: str,
6
+ database: str,
7
+ queryfile: str,
8
+ *,
9
+ n_results: int = 10,
10
+ score_function: Literal["cosine", "euclidean", "dot"] = "cosine",
11
+ crop_mode: Literal["center", "first", "segments"] = "center",
12
+ overlap: float = 0.0,
13
+ ):
14
+ """
15
+ Executes a search query on a given database and saves the results as audio files.
16
+ Args:
17
+ output (str): Path to the output directory where the results will be saved.
18
+ database (str): Path to the database file to search in.
19
+ queryfile (str): Path to the query file containing the search input.
20
+ n_results (int, optional): Number of top results to return. Defaults to 10.
21
+ score_function (Literal["cosine", "euclidean", "dot"], optional):
22
+ Scoring function to use for similarity calculation. Defaults to "cosine".
23
+ crop_mode (Literal["center", "first", "segments"], optional):
24
+ Mode for cropping audio segments. Defaults to "center".
25
+ overlap (float, optional): Overlap ratio for audio segments. Defaults to 0.0.
26
+ Raises:
27
+ ValueError: If the database does not contain the required settings metadata.
28
+ Notes:
29
+ - The function creates the output directory if it does not exist.
30
+ - It retrieves metadata from the database to configure the search, including
31
+ bandpass filter settings and audio speed.
32
+ - The results are saved as audio files in the specified output directory, with
33
+ filenames containing the score, source file name, and time offsets.
34
+ Returns:
35
+ None
36
+ """
37
+ import os
38
+
39
+ import birdnet_analyzer.config as cfg
40
+ from birdnet_analyzer import audio
41
+ from birdnet_analyzer.search.utils import get_search_results
42
+
43
+ # Create output folder
44
+ if not os.path.exists(output):
45
+ os.makedirs(output)
46
+
47
+ # Load the database
48
+ db = get_database(database)
49
+
50
+ try:
51
+ settings = db.get_metadata("birdnet_analyzer_settings")
52
+ except KeyError as e:
53
+ raise ValueError("No settings present in database.") from e
54
+
55
+ fmin = settings["BANDPASS_FMIN"]
56
+ fmax = settings["BANDPASS_FMAX"]
57
+ audio_speed = settings["AUDIO_SPEED"]
58
+
59
+ # Execute the search
60
+ results = get_search_results(queryfile, db, n_results, audio_speed, fmin, fmax, score_function, crop_mode, overlap)
61
+
62
+ # Save the results
63
+ for r in results:
64
+ embedding_source = db.get_embedding_source(r.embedding_id)
65
+ file = embedding_source.source_id
66
+ filebasename = os.path.basename(file)
67
+ filebasename = os.path.splitext(filebasename)[0]
68
+ offset = embedding_source.offsets[0] * audio_speed
69
+ duration = cfg.SIG_LENGTH * audio_speed
70
+ sig, rate = audio.open_audio_file(file, offset=offset, duration=duration, sample_rate=None)
71
+ result_path = os.path.join(output, f"{file[4]:.5f}_{filebasename}_{offset}_{offset + duration}.wav")
72
+ audio.save_signal(sig, result_path, rate)
73
+
74
+
75
+ def get_database(database_path):
76
+ from perch_hoplite.db import sqlite_usearch_impl
77
+
78
+ return sqlite_usearch_impl.SQLiteUsearchDB.create(database_path).thread_split()
@@ -1,111 +1,107 @@
1
- import numpy as np
2
- from perch_hoplite.db import brutalism
3
- from perch_hoplite.db.search_results import SearchResult
4
- from scipy.spatial.distance import euclidean
5
-
6
- import birdnet_analyzer.audio as audio
7
- import birdnet_analyzer.config as cfg
8
- import birdnet_analyzer.model as model
9
-
10
-
11
- def cosine_sim(a, b):
12
- if a.ndim == 2:
13
- return np.array([cosine_sim(a[i], b) for i in range(a.shape[0])])
14
- return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
15
-
16
-
17
- def euclidean_scoring(a, b):
18
- if a.ndim == 2:
19
- return np.array([euclidean_scoring(a[i], b) for i in range(a.shape[0])])
20
- return euclidean(a, b)
21
-
22
-
23
- def euclidean_scoring_inverse(a, b):
24
- return -euclidean_scoring(a, b)
25
-
26
-
27
- def get_query_embedding(queryfile_path):
28
- """
29
- Extracts the embedding for a query file. Reads only the first 3 seconds
30
- Args:
31
- queryfile_path: The path to the query file.
32
- Returns:
33
- The query embedding.
34
- """
35
-
36
- # Load audio
37
- sig, rate = audio.open_audio_file(
38
- queryfile_path,
39
- duration=cfg.SIG_LENGTH * cfg.AUDIO_SPEED if cfg.SAMPLE_CROP_MODE == "first" else None,
40
- fmin=cfg.BANDPASS_FMIN,
41
- fmax=cfg.BANDPASS_FMAX,
42
- speed=cfg.AUDIO_SPEED,
43
- )
44
-
45
- # Crop query audio
46
- if cfg.SAMPLE_CROP_MODE == "center":
47
- sig_splits = [audio.crop_center(sig, rate, cfg.SIG_LENGTH)]
48
- elif cfg.SAMPLE_CROP_MODE == "first":
49
- sig_splits = [audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)[0]]
50
- else:
51
- sig_splits = audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
52
-
53
- samples = sig_splits
54
- data = np.array(samples, dtype="float32")
55
- query = model.embeddings(data)
56
- return query
57
-
58
-
59
- def get_search_results(
60
- queryfile_path, db, n_results, audio_speed, fmin, fmax, score_function: str, crop_mode, crop_overlap
61
- ):
62
- # Set bandpass frequency range
63
- cfg.BANDPASS_FMIN = max(0, min(cfg.SIG_FMAX, int(fmin)))
64
- cfg.BANDPASS_FMAX = max(cfg.SIG_FMIN, min(cfg.SIG_FMAX, int(fmax)))
65
- cfg.AUDIO_SPEED = max(0.01, audio_speed)
66
- cfg.SAMPLE_CROP_MODE = crop_mode
67
- cfg.SIG_OVERLAP = max(0.0, min(2.9, float(crop_overlap)))
68
-
69
- # Get query embedding
70
- query_embeddings = get_query_embedding(queryfile_path)
71
-
72
- # Set score function
73
- if score_function == "cosine":
74
- score_fn = cosine_sim
75
- elif score_function == "dot":
76
- score_fn = np.dot
77
- elif score_function == "euclidean":
78
- score_fn = euclidean_scoring_inverse # TODO: this is a bit hacky since the search function expects the score to be high for similar embeddings
79
- else:
80
- raise ValueError("Invalid score function. Choose 'cosine', 'euclidean' or 'dot'.")
81
-
82
- db_embeddings_count = db.count_embeddings()
83
-
84
- if n_results > db_embeddings_count - 1:
85
- n_results = db_embeddings_count - 1
86
-
87
- scores_by_embedding_id = {}
88
-
89
- for embedding in query_embeddings:
90
- results, scores = brutalism.threaded_brute_search(db, embedding, n_results, score_fn)
91
- sorted_results = results.search_results
92
-
93
- if score_function == "euclidean":
94
- for result in sorted_results:
95
- result.sort_score *= -1
96
-
97
- for result in sorted_results:
98
- if result.embedding_id not in scores_by_embedding_id:
99
- scores_by_embedding_id[result.embedding_id] = []
100
- scores_by_embedding_id[result.embedding_id].append(result.sort_score)
101
-
102
- results = []
103
-
104
- for embedding_id, scores in scores_by_embedding_id.items():
105
- results.append(SearchResult(embedding_id, np.sum(scores) / len(query_embeddings)))
106
-
107
- reverse = score_function != "euclidean"
108
-
109
- results.sort(key=lambda x: x.sort_score, reverse=reverse)
110
-
111
- return results[0:n_results]
1
+ import numpy as np
2
+ from perch_hoplite.db import brutalism
3
+ from perch_hoplite.db.search_results import SearchResult
4
+ from scipy.spatial.distance import euclidean
5
+
6
+ import birdnet_analyzer.config as cfg
7
+ from birdnet_analyzer import audio, model
8
+
9
+
10
+ def cosine_sim(a, b):
11
+ if a.ndim == 2:
12
+ return np.array([cosine_sim(a[i], b) for i in range(a.shape[0])])
13
+ return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
14
+
15
+
16
+ def euclidean_scoring(a, b):
17
+ if a.ndim == 2:
18
+ return np.array([euclidean_scoring(a[i], b) for i in range(a.shape[0])])
19
+ return euclidean(a, b)
20
+
21
+
22
+ def euclidean_scoring_inverse(a, b):
23
+ return -euclidean_scoring(a, b)
24
+
25
+
26
+ def get_query_embedding(queryfile_path):
27
+ """
28
+ Extracts the embedding for a query file. Reads only the first 3 seconds
29
+ Args:
30
+ queryfile_path: The path to the query file.
31
+ Returns:
32
+ The query embedding.
33
+ """
34
+
35
+ # Load audio
36
+ sig, rate = audio.open_audio_file(
37
+ queryfile_path,
38
+ duration=cfg.SIG_LENGTH * cfg.AUDIO_SPEED if cfg.SAMPLE_CROP_MODE == "first" else None,
39
+ fmin=cfg.BANDPASS_FMIN,
40
+ fmax=cfg.BANDPASS_FMAX,
41
+ speed=cfg.AUDIO_SPEED,
42
+ )
43
+
44
+ # Crop query audio
45
+ if cfg.SAMPLE_CROP_MODE == "center":
46
+ sig_splits = [audio.crop_center(sig, rate, cfg.SIG_LENGTH)]
47
+ elif cfg.SAMPLE_CROP_MODE == "first":
48
+ sig_splits = [audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)[0]]
49
+ else:
50
+ sig_splits = audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
51
+
52
+ samples = sig_splits
53
+ data = np.array(samples, dtype="float32")
54
+
55
+ return model.embeddings(data)
56
+
57
+
58
+ def get_search_results(
59
+ queryfile_path, db, n_results, audio_speed, fmin, fmax, score_function: str, crop_mode, crop_overlap
60
+ ):
61
+ # Set bandpass frequency range
62
+ cfg.BANDPASS_FMIN = max(0, min(cfg.SIG_FMAX, int(fmin)))
63
+ cfg.BANDPASS_FMAX = max(cfg.SIG_FMIN, min(cfg.SIG_FMAX, int(fmax)))
64
+ cfg.AUDIO_SPEED = max(0.01, audio_speed)
65
+ cfg.SAMPLE_CROP_MODE = crop_mode
66
+ cfg.SIG_OVERLAP = max(0.0, min(2.9, float(crop_overlap)))
67
+
68
+ # Get query embedding
69
+ query_embeddings = get_query_embedding(queryfile_path)
70
+
71
+ # Set score function
72
+ if score_function == "cosine":
73
+ score_fn = cosine_sim
74
+ elif score_function == "dot":
75
+ score_fn = np.dot
76
+ elif score_function == "euclidean":
77
+ score_fn = euclidean_scoring_inverse # TODO: this is a bit hacky since the search function expects the score to be high for similar embeddings
78
+ else:
79
+ raise ValueError("Invalid score function. Choose 'cosine', 'euclidean' or 'dot'.")
80
+
81
+ db_embeddings_count = db.count_embeddings()
82
+ n_results = min(n_results, db_embeddings_count - 1)
83
+ scores_by_embedding_id = {}
84
+
85
+ for embedding in query_embeddings:
86
+ results, scores = brutalism.threaded_brute_search(db, embedding, n_results, score_fn)
87
+ sorted_results = results.search_results
88
+
89
+ if score_function == "euclidean":
90
+ for result in sorted_results:
91
+ result.sort_score *= -1
92
+
93
+ for result in sorted_results:
94
+ if result.embedding_id not in scores_by_embedding_id:
95
+ scores_by_embedding_id[result.embedding_id] = []
96
+ scores_by_embedding_id[result.embedding_id].append(result.sort_score)
97
+
98
+ results = []
99
+
100
+ for embedding_id, scores in scores_by_embedding_id.items():
101
+ results.append(SearchResult(embedding_id, np.sum(scores) / len(query_embeddings)))
102
+
103
+ reverse = score_function != "euclidean"
104
+
105
+ results.sort(key=lambda x: x.sort_score, reverse=reverse)
106
+
107
+ return results[0:n_results]
@@ -1,3 +1,3 @@
1
- from birdnet_analyzer.segments.core import segments
2
-
3
- __all__ = ["segments"]
1
+ from birdnet_analyzer.segments.core import segments
2
+
3
+ __all__ = ["segments"]
@@ -1,3 +1,3 @@
1
- from birdnet_analyzer.segments.cli import main
2
-
3
- main()
1
+ from birdnet_analyzer.segments.cli import main
2
+
3
+ main()
@@ -1,14 +1,13 @@
1
- from birdnet_analyzer.utils import runtime_error_handler
2
-
3
-
4
- @runtime_error_handler
5
- def main():
6
- import birdnet_analyzer.cli as cli
7
- from birdnet_analyzer import segments
8
-
9
- # Parse arguments
10
- parser = cli.segments_parser()
11
-
12
- args = parser.parse_args()
13
-
14
- segments(**vars(args))
1
+ from birdnet_analyzer.utils import runtime_error_handler
2
+
3
+
4
+ @runtime_error_handler
5
+ def main():
6
+ from birdnet_analyzer import cli, segments
7
+
8
+ # Parse arguments
9
+ parser = cli.segments_parser()
10
+
11
+ args = parser.parse_args()
12
+
13
+ segments(**vars(args))
@@ -1,78 +1,81 @@
1
- def segments(
2
- input: str,
3
- output: str | None = None,
4
- results: str | None = None,
5
- *,
6
- min_conf: float = 0.25,
7
- max_segments: int = 100,
8
- audio_speed: float = 1.0,
9
- seg_length: float = 3.0,
10
- threads: int = 1,
11
- ):
12
- """
13
- Processes audio files to extract segments based on detection results.
14
- Args:
15
- input (str): Path to the input folder containing audio files.
16
- output (str | None, optional): Path to the output folder where segments will be saved.
17
- If not provided, the input folder will be used as the output folder. Defaults to None.
18
- results (str | None, optional): Path to the folder containing detection result files.
19
- If not provided, the input folder will be used. Defaults to None.
20
- min_conf (float, optional): Minimum confidence threshold for detections to be considered.
21
- Defaults to 0.25.
22
- max_segments (int, optional): Maximum number of segments to extract per audio file.
23
- Defaults to 100.
24
- audio_speed (float, optional): Speed factor for audio processing. Defaults to 1.0.
25
- seg_length (float, optional): Length of each audio segment in seconds. Defaults to 3.0.
26
- threads (int, optional): Number of CPU threads to use for parallel processing.
27
- Defaults to 1.
28
- Returns:
29
- None
30
- Notes:
31
- - The function uses multiprocessing for parallel processing if `threads` is greater than 1.
32
- - On Windows, due to the lack of `fork()` support, configuration items are passed to each
33
- process explicitly.
34
- - It is recommended to use this function on Linux for better performance.
35
- """
36
- from multiprocessing import Pool
37
-
38
- import birdnet_analyzer.config as cfg
39
-
40
- from birdnet_analyzer.segments.utils import extract_segments, parse_folders, parse_files # noqa: E402
41
-
42
- cfg.INPUT_PATH = input
43
-
44
- if not output:
45
- cfg.OUTPUT_PATH = cfg.INPUT_PATH
46
- else:
47
- cfg.OUTPUT_PATH = output
48
-
49
- results = results if results else cfg.INPUT_PATH
50
-
51
- # Parse audio and result folders
52
- cfg.FILE_LIST = parse_folders(input, results)
53
-
54
- # Set number of threads
55
- cfg.CPU_THREADS = threads
56
-
57
- # Set confidence threshold
58
- cfg.MIN_CONFIDENCE = min_conf
59
-
60
- # Parse file list and make list of segments
61
- cfg.FILE_LIST = parse_files(cfg.FILE_LIST, max_segments)
62
-
63
- # Set audio speed
64
- cfg.AUDIO_SPEED = audio_speed
65
-
66
- # Add config items to each file list entry.
67
- # We have to do this for Windows which does not
68
- # support fork() and thus each process has to
69
- # have its own config. USE LINUX!
70
- flist = [(entry, seg_length, cfg.get_config()) for entry in cfg.FILE_LIST]
71
-
72
- # Extract segments
73
- if cfg.CPU_THREADS < 2:
74
- for entry in flist:
75
- extract_segments(entry)
76
- else:
77
- with Pool(cfg.CPU_THREADS) as p:
78
- p.map(extract_segments, flist)
1
+ def segments(
2
+ audio_input: str,
3
+ output: str | None = None,
4
+ results: str | None = None,
5
+ *,
6
+ min_conf: float = 0.25,
7
+ max_segments: int = 100,
8
+ audio_speed: float = 1.0,
9
+ seg_length: float = 3.0,
10
+ threads: int = 1,
11
+ ):
12
+ """
13
+ Processes audio files to extract segments based on detection results.
14
+ Args:
15
+ audio_input (str): Path to the input folder containing audio files.
16
+ output (str | None, optional): Path to the output folder where segments will be saved.
17
+ If not provided, the input folder will be used as the output folder. Defaults to None.
18
+ results (str | None, optional): Path to the folder containing detection result files.
19
+ If not provided, the input folder will be used. Defaults to None.
20
+ min_conf (float, optional): Minimum confidence threshold for detections to be considered.
21
+ Defaults to 0.25.
22
+ max_segments (int, optional): Maximum number of segments to extract per audio file.
23
+ Defaults to 100.
24
+ audio_speed (float, optional): Speed factor for audio processing. Defaults to 1.0.
25
+ seg_length (float, optional): Length of each audio segment in seconds. Defaults to 3.0.
26
+ threads (int, optional): Number of CPU threads to use for parallel processing.
27
+ Defaults to 1.
28
+ Returns:
29
+ None
30
+ Notes:
31
+ - The function uses multiprocessing for parallel processing if `threads` is greater than 1.
32
+ - On Windows, due to the lack of `fork()` support, configuration items are passed to each
33
+ process explicitly.
34
+ - It is recommended to use this function on Linux for better performance.
35
+ """
36
+ from multiprocessing import Pool
37
+
38
+ import birdnet_analyzer.config as cfg
39
+ from birdnet_analyzer.segments.utils import (
40
+ extract_segments,
41
+ parse_files,
42
+ parse_folders,
43
+ )
44
+
45
+ cfg.INPUT_PATH = audio_input
46
+
47
+ if not output:
48
+ cfg.OUTPUT_PATH = cfg.INPUT_PATH
49
+ else:
50
+ cfg.OUTPUT_PATH = output
51
+
52
+ results = results if results else cfg.INPUT_PATH
53
+
54
+ # Parse audio and result folders
55
+ cfg.FILE_LIST = parse_folders(audio_input, results)
56
+
57
+ # Set number of threads
58
+ cfg.CPU_THREADS = threads
59
+
60
+ # Set confidence threshold
61
+ cfg.MIN_CONFIDENCE = min_conf
62
+
63
+ # Parse file list and make list of segments
64
+ cfg.FILE_LIST = parse_files(cfg.FILE_LIST, max_segments)
65
+
66
+ # Set audio speed
67
+ cfg.AUDIO_SPEED = audio_speed
68
+
69
+ # Add config items to each file list entry.
70
+ # We have to do this for Windows which does not
71
+ # support fork() and thus each process has to
72
+ # have its own config. USE LINUX!
73
+ flist = [(entry, seg_length, cfg.get_config()) for entry in cfg.FILE_LIST]
74
+
75
+ # Extract segments
76
+ if cfg.CPU_THREADS < 2:
77
+ for entry in flist:
78
+ extract_segments(entry)
79
+ else:
80
+ with Pool(cfg.CPU_THREADS) as p:
81
+ p.map(extract_segments, flist)