birdnet-analyzer 2.0.1__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. birdnet_analyzer/__init__.py +9 -9
  2. birdnet_analyzer/analyze/__init__.py +19 -5
  3. birdnet_analyzer/analyze/__main__.py +3 -3
  4. birdnet_analyzer/analyze/cli.py +30 -25
  5. birdnet_analyzer/analyze/core.py +268 -241
  6. birdnet_analyzer/analyze/utils.py +700 -692
  7. birdnet_analyzer/audio.py +368 -368
  8. birdnet_analyzer/cli.py +732 -709
  9. birdnet_analyzer/config.py +243 -242
  10. birdnet_analyzer/eBird_taxonomy_codes_2024E.json +13046 -0
  11. birdnet_analyzer/embeddings/__init__.py +3 -3
  12. birdnet_analyzer/embeddings/__main__.py +3 -3
  13. birdnet_analyzer/embeddings/cli.py +12 -12
  14. birdnet_analyzer/embeddings/core.py +70 -69
  15. birdnet_analyzer/embeddings/utils.py +173 -179
  16. birdnet_analyzer/evaluation/__init__.py +189 -196
  17. birdnet_analyzer/evaluation/__main__.py +3 -3
  18. birdnet_analyzer/evaluation/assessment/metrics.py +388 -388
  19. birdnet_analyzer/evaluation/assessment/performance_assessor.py +364 -409
  20. birdnet_analyzer/evaluation/assessment/plotting.py +378 -379
  21. birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -631
  22. birdnet_analyzer/evaluation/preprocessing/utils.py +98 -98
  23. birdnet_analyzer/gui/__init__.py +19 -19
  24. birdnet_analyzer/gui/__main__.py +3 -3
  25. birdnet_analyzer/gui/analysis.py +179 -175
  26. birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
  27. birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
  28. birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
  29. birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
  30. birdnet_analyzer/gui/assets/gui.css +36 -28
  31. birdnet_analyzer/gui/assets/gui.js +89 -93
  32. birdnet_analyzer/gui/embeddings.py +638 -619
  33. birdnet_analyzer/gui/evaluation.py +801 -795
  34. birdnet_analyzer/gui/localization.py +75 -75
  35. birdnet_analyzer/gui/multi_file.py +265 -245
  36. birdnet_analyzer/gui/review.py +472 -519
  37. birdnet_analyzer/gui/segments.py +191 -191
  38. birdnet_analyzer/gui/settings.py +149 -128
  39. birdnet_analyzer/gui/single_file.py +264 -267
  40. birdnet_analyzer/gui/species.py +95 -95
  41. birdnet_analyzer/gui/train.py +687 -696
  42. birdnet_analyzer/gui/utils.py +803 -810
  43. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
  44. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
  45. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
  46. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
  47. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
  48. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
  49. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
  50. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
  51. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
  52. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
  53. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
  54. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
  55. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
  56. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
  57. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
  58. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
  59. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
  60. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
  61. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
  62. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
  63. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
  64. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
  65. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
  66. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
  67. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
  68. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
  69. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
  70. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
  71. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
  72. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
  73. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
  74. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
  75. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
  76. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
  77. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
  78. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
  79. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
  80. birdnet_analyzer/lang/de.json +342 -334
  81. birdnet_analyzer/lang/en.json +342 -334
  82. birdnet_analyzer/lang/fi.json +342 -334
  83. birdnet_analyzer/lang/fr.json +342 -334
  84. birdnet_analyzer/lang/id.json +342 -334
  85. birdnet_analyzer/lang/pt-br.json +342 -334
  86. birdnet_analyzer/lang/ru.json +342 -334
  87. birdnet_analyzer/lang/se.json +342 -334
  88. birdnet_analyzer/lang/tlh.json +342 -334
  89. birdnet_analyzer/lang/zh_TW.json +342 -334
  90. birdnet_analyzer/model.py +1213 -1212
  91. birdnet_analyzer/search/__init__.py +3 -3
  92. birdnet_analyzer/search/__main__.py +3 -3
  93. birdnet_analyzer/search/cli.py +11 -11
  94. birdnet_analyzer/search/core.py +78 -78
  95. birdnet_analyzer/search/utils.py +104 -107
  96. birdnet_analyzer/segments/__init__.py +3 -3
  97. birdnet_analyzer/segments/__main__.py +3 -3
  98. birdnet_analyzer/segments/cli.py +13 -13
  99. birdnet_analyzer/segments/core.py +81 -81
  100. birdnet_analyzer/segments/utils.py +383 -383
  101. birdnet_analyzer/species/__init__.py +3 -3
  102. birdnet_analyzer/species/__main__.py +3 -3
  103. birdnet_analyzer/species/cli.py +13 -13
  104. birdnet_analyzer/species/core.py +35 -35
  105. birdnet_analyzer/species/utils.py +73 -74
  106. birdnet_analyzer/train/__init__.py +3 -3
  107. birdnet_analyzer/train/__main__.py +3 -3
  108. birdnet_analyzer/train/cli.py +13 -13
  109. birdnet_analyzer/train/core.py +113 -113
  110. birdnet_analyzer/train/utils.py +878 -877
  111. birdnet_analyzer/translate.py +132 -133
  112. birdnet_analyzer/utils.py +425 -426
  113. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/METADATA +147 -137
  114. birdnet_analyzer-2.1.1.dist-info/RECORD +124 -0
  115. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/WHEEL +1 -1
  116. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/licenses/LICENSE +18 -18
  117. birdnet_analyzer/eBird_taxonomy_codes_2021E.json +0 -25280
  118. birdnet_analyzer/playground.py +0 -5
  119. birdnet_analyzer-2.0.1.dist-info/RECORD +0 -125
  120. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/entry_points.txt +0 -0
  121. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,3 @@
1
- from birdnet_analyzer.search.core import search
2
-
3
- __all__ = ["search"]
1
+ from birdnet_analyzer.search.core import search
2
+
3
+ __all__ = ["search"]
@@ -1,3 +1,3 @@
1
- from birdnet_analyzer.search.cli import main
2
-
3
- main()
1
+ from birdnet_analyzer.search.cli import main
2
+
3
+ main()
@@ -1,11 +1,11 @@
1
- from birdnet_analyzer import utils
2
-
3
-
4
- @utils.runtime_error_handler
5
- def main():
6
- from birdnet_analyzer import cli, search
7
-
8
- parser = cli.search_parser()
9
- args = parser.parse_args()
10
-
11
- search(**vars(args))
1
+ from birdnet_analyzer import utils
2
+
3
+
4
+ @utils.runtime_error_handler
5
+ def main():
6
+ from birdnet_analyzer import cli, search
7
+
8
+ parser = cli.search_parser()
9
+ args = parser.parse_args()
10
+
11
+ search(**vars(args))
@@ -1,78 +1,78 @@
1
- from typing import Literal
2
-
3
-
4
- def search(
5
- output: str,
6
- database: str,
7
- queryfile: str,
8
- *,
9
- n_results: int = 10,
10
- score_function: Literal["cosine", "euclidean", "dot"] = "cosine",
11
- crop_mode: Literal["center", "first", "segments"] = "center",
12
- overlap: float = 0.0,
13
- ):
14
- """
15
- Executes a search query on a given database and saves the results as audio files.
16
- Args:
17
- output (str): Path to the output directory where the results will be saved.
18
- database (str): Path to the database file to search in.
19
- queryfile (str): Path to the query file containing the search input.
20
- n_results (int, optional): Number of top results to return. Defaults to 10.
21
- score_function (Literal["cosine", "euclidean", "dot"], optional):
22
- Scoring function to use for similarity calculation. Defaults to "cosine".
23
- crop_mode (Literal["center", "first", "segments"], optional):
24
- Mode for cropping audio segments. Defaults to "center".
25
- overlap (float, optional): Overlap ratio for audio segments. Defaults to 0.0.
26
- Raises:
27
- ValueError: If the database does not contain the required settings metadata.
28
- Notes:
29
- - The function creates the output directory if it does not exist.
30
- - It retrieves metadata from the database to configure the search, including
31
- bandpass filter settings and audio speed.
32
- - The results are saved as audio files in the specified output directory, with
33
- filenames containing the score, source file name, and time offsets.
34
- Returns:
35
- None
36
- """
37
- import os
38
-
39
- import birdnet_analyzer.config as cfg
40
- from birdnet_analyzer import audio
41
- from birdnet_analyzer.search.utils import get_search_results
42
-
43
- # Create output folder
44
- if not os.path.exists(output):
45
- os.makedirs(output)
46
-
47
- # Load the database
48
- db = get_database(database)
49
-
50
- try:
51
- settings = db.get_metadata("birdnet_analyzer_settings")
52
- except KeyError as e:
53
- raise ValueError("No settings present in database.") from e
54
-
55
- fmin = settings["BANDPASS_FMIN"]
56
- fmax = settings["BANDPASS_FMAX"]
57
- audio_speed = settings["AUDIO_SPEED"]
58
-
59
- # Execute the search
60
- results = get_search_results(queryfile, db, n_results, audio_speed, fmin, fmax, score_function, crop_mode, overlap)
61
-
62
- # Save the results
63
- for r in results:
64
- embedding_source = db.get_embedding_source(r.embedding_id)
65
- file = embedding_source.source_id
66
- filebasename = os.path.basename(file)
67
- filebasename = os.path.splitext(filebasename)[0]
68
- offset = embedding_source.offsets[0] * audio_speed
69
- duration = cfg.SIG_LENGTH * audio_speed
70
- sig, rate = audio.open_audio_file(file, offset=offset, duration=duration, sample_rate=None)
71
- result_path = os.path.join(output, f"{file[4]:.5f}_{filebasename}_{offset}_{offset + duration}.wav")
72
- audio.save_signal(sig, result_path, rate)
73
-
74
-
75
- def get_database(database_path):
76
- from perch_hoplite.db import sqlite_usearch_impl
77
-
78
- return sqlite_usearch_impl.SQLiteUsearchDB.create(database_path).thread_split()
1
+ from typing import Literal
2
+
3
+
4
+ def search(
5
+ output: str,
6
+ database: str,
7
+ queryfile: str,
8
+ *,
9
+ n_results: int = 10,
10
+ score_function: Literal["cosine", "euclidean", "dot"] = "cosine",
11
+ crop_mode: Literal["center", "first", "segments"] = "center",
12
+ overlap: float = 0.0,
13
+ ):
14
+ """
15
+ Executes a search query on a given database and saves the results as audio files.
16
+ Args:
17
+ output (str): Path to the output directory where the results will be saved.
18
+ database (str): Path to the database file to search in.
19
+ queryfile (str): Path to the query file containing the search input.
20
+ n_results (int, optional): Number of top results to return. Defaults to 10.
21
+ score_function (Literal["cosine", "euclidean", "dot"], optional):
22
+ Scoring function to use for similarity calculation. Defaults to "cosine".
23
+ crop_mode (Literal["center", "first", "segments"], optional):
24
+ Mode for cropping audio segments. Defaults to "center".
25
+ overlap (float, optional): Overlap ratio for audio segments. Defaults to 0.0.
26
+ Raises:
27
+ ValueError: If the database does not contain the required settings metadata.
28
+ Notes:
29
+ - The function creates the output directory if it does not exist.
30
+ - It retrieves metadata from the database to configure the search, including
31
+ bandpass filter settings and audio speed.
32
+ - The results are saved as audio files in the specified output directory, with
33
+ filenames containing the score, source file name, and time offsets.
34
+ Returns:
35
+ None
36
+ """
37
+ import os
38
+
39
+ import birdnet_analyzer.config as cfg
40
+ from birdnet_analyzer import audio
41
+ from birdnet_analyzer.search.utils import get_search_results
42
+
43
+ # Create output folder
44
+ if not os.path.exists(output):
45
+ os.makedirs(output)
46
+
47
+ # Load the database
48
+ db = get_database(database)
49
+
50
+ try:
51
+ settings = db.get_metadata("birdnet_analyzer_settings")
52
+ except KeyError as e:
53
+ raise ValueError("No settings present in database.") from e
54
+
55
+ fmin = settings["BANDPASS_FMIN"]
56
+ fmax = settings["BANDPASS_FMAX"]
57
+ audio_speed = settings["AUDIO_SPEED"]
58
+
59
+ # Execute the search
60
+ results = get_search_results(queryfile, db, n_results, audio_speed, fmin, fmax, score_function, crop_mode, overlap)
61
+
62
+ # Save the results
63
+ for r in results:
64
+ embedding_source = db.get_embedding_source(r.embedding_id)
65
+ file = embedding_source.source_id
66
+ filebasename = os.path.basename(file)
67
+ filebasename = os.path.splitext(filebasename)[0]
68
+ offset = embedding_source.offsets[0]
69
+ duration = cfg.SIG_LENGTH * audio_speed
70
+ sig, rate = audio.open_audio_file(file, offset=offset, duration=duration, sample_rate=None)
71
+ result_path = os.path.join(output, f"{r.sort_score:.5f}_{filebasename}_{offset}_{offset + duration}.wav")
72
+ audio.save_signal(sig, result_path, rate)
73
+
74
+
75
+ def get_database(database_path):
76
+ from perch_hoplite.db import sqlite_usearch_impl
77
+
78
+ return sqlite_usearch_impl.SQLiteUsearchDB.create(database_path).thread_split()
@@ -1,107 +1,104 @@
1
- import numpy as np
2
- from perch_hoplite.db import brutalism
3
- from perch_hoplite.db.search_results import SearchResult
4
- from scipy.spatial.distance import euclidean
5
-
6
- import birdnet_analyzer.config as cfg
7
- from birdnet_analyzer import audio, model
8
-
9
-
10
- def cosine_sim(a, b):
11
- if a.ndim == 2:
12
- return np.array([cosine_sim(a[i], b) for i in range(a.shape[0])])
13
- return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
14
-
15
-
16
- def euclidean_scoring(a, b):
17
- if a.ndim == 2:
18
- return np.array([euclidean_scoring(a[i], b) for i in range(a.shape[0])])
19
- return euclidean(a, b)
20
-
21
-
22
- def euclidean_scoring_inverse(a, b):
23
- return -euclidean_scoring(a, b)
24
-
25
-
26
- def get_query_embedding(queryfile_path):
27
- """
28
- Extracts the embedding for a query file. Reads only the first 3 seconds
29
- Args:
30
- queryfile_path: The path to the query file.
31
- Returns:
32
- The query embedding.
33
- """
34
-
35
- # Load audio
36
- sig, rate = audio.open_audio_file(
37
- queryfile_path,
38
- duration=cfg.SIG_LENGTH * cfg.AUDIO_SPEED if cfg.SAMPLE_CROP_MODE == "first" else None,
39
- fmin=cfg.BANDPASS_FMIN,
40
- fmax=cfg.BANDPASS_FMAX,
41
- speed=cfg.AUDIO_SPEED,
42
- )
43
-
44
- # Crop query audio
45
- if cfg.SAMPLE_CROP_MODE == "center":
46
- sig_splits = [audio.crop_center(sig, rate, cfg.SIG_LENGTH)]
47
- elif cfg.SAMPLE_CROP_MODE == "first":
48
- sig_splits = [audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)[0]]
49
- else:
50
- sig_splits = audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
51
-
52
- samples = sig_splits
53
- data = np.array(samples, dtype="float32")
54
-
55
- return model.embeddings(data)
56
-
57
-
58
- def get_search_results(
59
- queryfile_path, db, n_results, audio_speed, fmin, fmax, score_function: str, crop_mode, crop_overlap
60
- ):
61
- # Set bandpass frequency range
62
- cfg.BANDPASS_FMIN = max(0, min(cfg.SIG_FMAX, int(fmin)))
63
- cfg.BANDPASS_FMAX = max(cfg.SIG_FMIN, min(cfg.SIG_FMAX, int(fmax)))
64
- cfg.AUDIO_SPEED = max(0.01, audio_speed)
65
- cfg.SAMPLE_CROP_MODE = crop_mode
66
- cfg.SIG_OVERLAP = max(0.0, min(2.9, float(crop_overlap)))
67
-
68
- # Get query embedding
69
- query_embeddings = get_query_embedding(queryfile_path)
70
-
71
- # Set score function
72
- if score_function == "cosine":
73
- score_fn = cosine_sim
74
- elif score_function == "dot":
75
- score_fn = np.dot
76
- elif score_function == "euclidean":
77
- score_fn = euclidean_scoring_inverse # TODO: this is a bit hacky since the search function expects the score to be high for similar embeddings
78
- else:
79
- raise ValueError("Invalid score function. Choose 'cosine', 'euclidean' or 'dot'.")
80
-
81
- db_embeddings_count = db.count_embeddings()
82
- n_results = min(n_results, db_embeddings_count - 1)
83
- scores_by_embedding_id = {}
84
-
85
- for embedding in query_embeddings:
86
- results, scores = brutalism.threaded_brute_search(db, embedding, n_results, score_fn)
87
- sorted_results = results.search_results
88
-
89
- if score_function == "euclidean":
90
- for result in sorted_results:
91
- result.sort_score *= -1
92
-
93
- for result in sorted_results:
94
- if result.embedding_id not in scores_by_embedding_id:
95
- scores_by_embedding_id[result.embedding_id] = []
96
- scores_by_embedding_id[result.embedding_id].append(result.sort_score)
97
-
98
- results = []
99
-
100
- for embedding_id, scores in scores_by_embedding_id.items():
101
- results.append(SearchResult(embedding_id, np.sum(scores) / len(query_embeddings)))
102
-
103
- reverse = score_function != "euclidean"
104
-
105
- results.sort(key=lambda x: x.sort_score, reverse=reverse)
106
-
107
- return results[0:n_results]
1
+ import numpy as np
2
+ from perch_hoplite.db import brutalism
3
+ from perch_hoplite.db.search_results import SearchResult
4
+ from scipy.spatial.distance import euclidean
5
+
6
+ import birdnet_analyzer.config as cfg
7
+ from birdnet_analyzer import audio, model
8
+
9
+
10
+ def cosine_sim(a, b):
11
+ if a.ndim == 2:
12
+ return np.array([cosine_sim(a[i], b) for i in range(a.shape[0])])
13
+ return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
14
+
15
+
16
+ def euclidean_scoring(a, b):
17
+ if a.ndim == 2:
18
+ return np.array([euclidean_scoring(a[i], b) for i in range(a.shape[0])])
19
+ return euclidean(a, b)
20
+
21
+
22
+ def euclidean_scoring_inverse(a, b):
23
+ return -euclidean_scoring(a, b)
24
+
25
+
26
+ def get_query_embedding(queryfile_path):
27
+ """
28
+ Extracts the embedding for a query file. Reads only the first 3 seconds
29
+ Args:
30
+ queryfile_path: The path to the query file.
31
+ Returns:
32
+ The query embedding.
33
+ """
34
+
35
+ # Load audio
36
+ sig, rate = audio.open_audio_file(
37
+ queryfile_path,
38
+ duration=cfg.SIG_LENGTH * cfg.AUDIO_SPEED if cfg.SAMPLE_CROP_MODE == "first" else None,
39
+ fmin=cfg.BANDPASS_FMIN,
40
+ fmax=cfg.BANDPASS_FMAX,
41
+ speed=cfg.AUDIO_SPEED,
42
+ )
43
+
44
+ # Crop query audio
45
+ if cfg.SAMPLE_CROP_MODE == "center":
46
+ sig_splits = [audio.crop_center(sig, rate, cfg.SIG_LENGTH)]
47
+ elif cfg.SAMPLE_CROP_MODE == "first":
48
+ sig_splits = [audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)[0]]
49
+ else:
50
+ sig_splits = audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
51
+
52
+ return model.embeddings(sig_splits)
53
+
54
+
55
+ def get_search_results(
56
+ queryfile_path, db, n_results, audio_speed, fmin, fmax, score_function: str, crop_mode, crop_overlap
57
+ ):
58
+ # Set bandpass frequency range
59
+ cfg.BANDPASS_FMIN = max(0, min(cfg.SIG_FMAX, int(fmin)))
60
+ cfg.BANDPASS_FMAX = max(cfg.SIG_FMIN, min(cfg.SIG_FMAX, int(fmax)))
61
+ cfg.AUDIO_SPEED = max(0.01, audio_speed)
62
+ cfg.SAMPLE_CROP_MODE = crop_mode
63
+ cfg.SIG_OVERLAP = max(0.0, min(2.9, float(crop_overlap)))
64
+
65
+ # Get query embedding
66
+ query_embeddings = get_query_embedding(queryfile_path)
67
+
68
+ # Set score function
69
+ if score_function == "cosine":
70
+ score_fn = cosine_sim
71
+ elif score_function == "dot":
72
+ score_fn = np.dot
73
+ elif score_function == "euclidean":
74
+ score_fn = euclidean_scoring_inverse # TODO: this is a bit hacky since the search function expects the score to be high for similar embeddings
75
+ else:
76
+ raise ValueError("Invalid score function. Choose 'cosine', 'euclidean' or 'dot'.")
77
+
78
+ db_embeddings_count = db.count_embeddings()
79
+ n_results = min(n_results, db_embeddings_count - 1)
80
+ scores_by_embedding_id = {}
81
+
82
+ for embedding in query_embeddings:
83
+ results, scores = brutalism.threaded_brute_search(db, embedding, n_results, score_fn)
84
+ sorted_results = results.search_results
85
+
86
+ if score_function == "euclidean":
87
+ for result in sorted_results:
88
+ result.sort_score *= -1
89
+
90
+ for result in sorted_results:
91
+ if result.embedding_id not in scores_by_embedding_id:
92
+ scores_by_embedding_id[result.embedding_id] = []
93
+ scores_by_embedding_id[result.embedding_id].append(result.sort_score)
94
+
95
+ results = []
96
+
97
+ for embedding_id, scores in scores_by_embedding_id.items():
98
+ results.append(SearchResult(embedding_id, np.sum(scores) / len(query_embeddings)))
99
+
100
+ reverse = score_function != "euclidean"
101
+
102
+ results.sort(key=lambda x: x.sort_score, reverse=reverse)
103
+
104
+ return results[0:n_results]
@@ -1,3 +1,3 @@
1
- from birdnet_analyzer.segments.core import segments
2
-
3
- __all__ = ["segments"]
1
+ from birdnet_analyzer.segments.core import segments
2
+
3
+ __all__ = ["segments"]
@@ -1,3 +1,3 @@
1
- from birdnet_analyzer.segments.cli import main
2
-
3
- main()
1
+ from birdnet_analyzer.segments.cli import main
2
+
3
+ main()
@@ -1,13 +1,13 @@
1
- from birdnet_analyzer.utils import runtime_error_handler
2
-
3
-
4
- @runtime_error_handler
5
- def main():
6
- from birdnet_analyzer import cli, segments
7
-
8
- # Parse arguments
9
- parser = cli.segments_parser()
10
-
11
- args = parser.parse_args()
12
-
13
- segments(**vars(args))
1
+ from birdnet_analyzer.utils import runtime_error_handler
2
+
3
+
4
+ @runtime_error_handler
5
+ def main():
6
+ from birdnet_analyzer import cli, segments
7
+
8
+ # Parse arguments
9
+ parser = cli.segments_parser()
10
+
11
+ args = parser.parse_args()
12
+
13
+ segments(**vars(args))
@@ -1,81 +1,81 @@
1
- def segments(
2
- audio_input: str,
3
- output: str | None = None,
4
- results: str | None = None,
5
- *,
6
- min_conf: float = 0.25,
7
- max_segments: int = 100,
8
- audio_speed: float = 1.0,
9
- seg_length: float = 3.0,
10
- threads: int = 1,
11
- ):
12
- """
13
- Processes audio files to extract segments based on detection results.
14
- Args:
15
- audio_input (str): Path to the input folder containing audio files.
16
- output (str | None, optional): Path to the output folder where segments will be saved.
17
- If not provided, the input folder will be used as the output folder. Defaults to None.
18
- results (str | None, optional): Path to the folder containing detection result files.
19
- If not provided, the input folder will be used. Defaults to None.
20
- min_conf (float, optional): Minimum confidence threshold for detections to be considered.
21
- Defaults to 0.25.
22
- max_segments (int, optional): Maximum number of segments to extract per audio file.
23
- Defaults to 100.
24
- audio_speed (float, optional): Speed factor for audio processing. Defaults to 1.0.
25
- seg_length (float, optional): Length of each audio segment in seconds. Defaults to 3.0.
26
- threads (int, optional): Number of CPU threads to use for parallel processing.
27
- Defaults to 1.
28
- Returns:
29
- None
30
- Notes:
31
- - The function uses multiprocessing for parallel processing if `threads` is greater than 1.
32
- - On Windows, due to the lack of `fork()` support, configuration items are passed to each
33
- process explicitly.
34
- - It is recommended to use this function on Linux for better performance.
35
- """
36
- from multiprocessing import Pool
37
-
38
- import birdnet_analyzer.config as cfg
39
- from birdnet_analyzer.segments.utils import (
40
- extract_segments,
41
- parse_files,
42
- parse_folders,
43
- )
44
-
45
- cfg.INPUT_PATH = audio_input
46
-
47
- if not output:
48
- cfg.OUTPUT_PATH = cfg.INPUT_PATH
49
- else:
50
- cfg.OUTPUT_PATH = output
51
-
52
- results = results if results else cfg.INPUT_PATH
53
-
54
- # Parse audio and result folders
55
- cfg.FILE_LIST = parse_folders(audio_input, results)
56
-
57
- # Set number of threads
58
- cfg.CPU_THREADS = threads
59
-
60
- # Set confidence threshold
61
- cfg.MIN_CONFIDENCE = min_conf
62
-
63
- # Parse file list and make list of segments
64
- cfg.FILE_LIST = parse_files(cfg.FILE_LIST, max_segments)
65
-
66
- # Set audio speed
67
- cfg.AUDIO_SPEED = audio_speed
68
-
69
- # Add config items to each file list entry.
70
- # We have to do this for Windows which does not
71
- # support fork() and thus each process has to
72
- # have its own config. USE LINUX!
73
- flist = [(entry, seg_length, cfg.get_config()) for entry in cfg.FILE_LIST]
74
-
75
- # Extract segments
76
- if cfg.CPU_THREADS < 2:
77
- for entry in flist:
78
- extract_segments(entry)
79
- else:
80
- with Pool(cfg.CPU_THREADS) as p:
81
- p.map(extract_segments, flist)
1
+ def segments(
2
+ audio_input: str,
3
+ output: str | None = None,
4
+ results: str | None = None,
5
+ *,
6
+ min_conf: float = 0.25,
7
+ max_segments: int = 100,
8
+ audio_speed: float = 1.0,
9
+ seg_length: float = 3.0,
10
+ threads: int = 1,
11
+ ):
12
+ """
13
+ Processes audio files to extract segments based on detection results.
14
+ Args:
15
+ audio_input (str): Path to the input folder containing audio files.
16
+ output (str | None, optional): Path to the output folder where segments will be saved.
17
+ If not provided, the input folder will be used as the output folder. Defaults to None.
18
+ results (str | None, optional): Path to the folder containing detection result files.
19
+ If not provided, the input folder will be used. Defaults to None.
20
+ min_conf (float, optional): Minimum confidence threshold for detections to be considered.
21
+ Defaults to 0.25.
22
+ max_segments (int, optional): Maximum number of segments to extract per audio file.
23
+ Defaults to 100.
24
+ audio_speed (float, optional): Speed factor for audio processing. Defaults to 1.0.
25
+ seg_length (float, optional): Length of each audio segment in seconds. Defaults to 3.0.
26
+ threads (int, optional): Number of CPU threads to use for parallel processing.
27
+ Defaults to 1.
28
+ Returns:
29
+ None
30
+ Notes:
31
+ - The function uses multiprocessing for parallel processing if `threads` is greater than 1.
32
+ - On Windows, due to the lack of `fork()` support, configuration items are passed to each
33
+ process explicitly.
34
+ - It is recommended to use this function on Linux for better performance.
35
+ """
36
+ from multiprocessing import Pool
37
+
38
+ import birdnet_analyzer.config as cfg
39
+ from birdnet_analyzer.segments.utils import (
40
+ extract_segments,
41
+ parse_files,
42
+ parse_folders,
43
+ )
44
+
45
+ cfg.INPUT_PATH = audio_input
46
+
47
+ if not output:
48
+ cfg.OUTPUT_PATH = cfg.INPUT_PATH
49
+ else:
50
+ cfg.OUTPUT_PATH = output
51
+
52
+ results = results if results else cfg.INPUT_PATH
53
+
54
+ # Parse audio and result folders
55
+ cfg.FILE_LIST = parse_folders(audio_input, results)
56
+
57
+ # Set number of threads
58
+ cfg.CPU_THREADS = threads
59
+
60
+ # Set confidence threshold
61
+ cfg.MIN_CONFIDENCE = min_conf
62
+
63
+ # Parse file list and make list of segments
64
+ cfg.FILE_LIST = parse_files(cfg.FILE_LIST, max_segments)
65
+
66
+ # Set audio speed
67
+ cfg.AUDIO_SPEED = audio_speed
68
+
69
+ # Add config items to each file list entry.
70
+ # We have to do this for Windows which does not
71
+ # support fork() and thus each process has to
72
+ # have its own config. USE LINUX!
73
+ flist = [(entry, seg_length, cfg.get_config()) for entry in cfg.FILE_LIST]
74
+
75
+ # Extract segments
76
+ if cfg.CPU_THREADS < 2:
77
+ for entry in flist:
78
+ extract_segments(entry)
79
+ else:
80
+ with Pool(cfg.CPU_THREADS) as p:
81
+ p.map(extract_segments, flist)