birdnet-analyzer 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. birdnet_analyzer/__init__.py +9 -8
  2. birdnet_analyzer/analyze/__init__.py +19 -5
  3. birdnet_analyzer/analyze/__main__.py +3 -4
  4. birdnet_analyzer/analyze/cli.py +30 -25
  5. birdnet_analyzer/analyze/core.py +246 -245
  6. birdnet_analyzer/analyze/utils.py +694 -701
  7. birdnet_analyzer/audio.py +368 -372
  8. birdnet_analyzer/cli.py +732 -707
  9. birdnet_analyzer/config.py +243 -242
  10. birdnet_analyzer/eBird_taxonomy_codes_2024E.json +13046 -0
  11. birdnet_analyzer/embeddings/__init__.py +3 -4
  12. birdnet_analyzer/embeddings/__main__.py +3 -3
  13. birdnet_analyzer/embeddings/cli.py +12 -13
  14. birdnet_analyzer/embeddings/core.py +70 -70
  15. birdnet_analyzer/embeddings/utils.py +220 -193
  16. birdnet_analyzer/evaluation/__init__.py +189 -195
  17. birdnet_analyzer/evaluation/__main__.py +3 -3
  18. birdnet_analyzer/evaluation/assessment/__init__.py +0 -0
  19. birdnet_analyzer/evaluation/assessment/metrics.py +388 -0
  20. birdnet_analyzer/evaluation/assessment/performance_assessor.py +364 -0
  21. birdnet_analyzer/evaluation/assessment/plotting.py +378 -0
  22. birdnet_analyzer/evaluation/preprocessing/__init__.py +0 -0
  23. birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -0
  24. birdnet_analyzer/evaluation/preprocessing/utils.py +98 -0
  25. birdnet_analyzer/gui/__init__.py +19 -23
  26. birdnet_analyzer/gui/__main__.py +3 -3
  27. birdnet_analyzer/gui/analysis.py +179 -174
  28. birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
  29. birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
  30. birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
  31. birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
  32. birdnet_analyzer/gui/assets/gui.css +36 -28
  33. birdnet_analyzer/gui/assets/gui.js +93 -93
  34. birdnet_analyzer/gui/embeddings.py +638 -620
  35. birdnet_analyzer/gui/evaluation.py +801 -813
  36. birdnet_analyzer/gui/localization.py +75 -68
  37. birdnet_analyzer/gui/multi_file.py +265 -246
  38. birdnet_analyzer/gui/review.py +472 -527
  39. birdnet_analyzer/gui/segments.py +191 -191
  40. birdnet_analyzer/gui/settings.py +149 -129
  41. birdnet_analyzer/gui/single_file.py +264 -269
  42. birdnet_analyzer/gui/species.py +95 -95
  43. birdnet_analyzer/gui/train.py +687 -698
  44. birdnet_analyzer/gui/utils.py +797 -808
  45. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
  46. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
  47. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
  48. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
  49. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
  50. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
  51. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
  52. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
  53. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
  54. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
  55. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
  56. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
  57. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
  58. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
  59. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
  60. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
  61. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
  62. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
  63. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
  64. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
  65. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
  66. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
  67. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
  68. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
  69. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
  70. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
  71. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
  72. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
  73. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
  74. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
  75. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
  76. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
  77. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
  78. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
  79. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
  80. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
  81. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
  82. birdnet_analyzer/lang/de.json +341 -334
  83. birdnet_analyzer/lang/en.json +341 -334
  84. birdnet_analyzer/lang/fi.json +341 -334
  85. birdnet_analyzer/lang/fr.json +341 -334
  86. birdnet_analyzer/lang/id.json +341 -334
  87. birdnet_analyzer/lang/pt-br.json +341 -334
  88. birdnet_analyzer/lang/ru.json +341 -334
  89. birdnet_analyzer/lang/se.json +341 -334
  90. birdnet_analyzer/lang/tlh.json +341 -334
  91. birdnet_analyzer/lang/zh_TW.json +341 -334
  92. birdnet_analyzer/model.py +1212 -1243
  93. birdnet_analyzer/playground.py +5 -0
  94. birdnet_analyzer/search/__init__.py +3 -3
  95. birdnet_analyzer/search/__main__.py +3 -3
  96. birdnet_analyzer/search/cli.py +11 -12
  97. birdnet_analyzer/search/core.py +78 -78
  98. birdnet_analyzer/search/utils.py +107 -111
  99. birdnet_analyzer/segments/__init__.py +3 -3
  100. birdnet_analyzer/segments/__main__.py +3 -3
  101. birdnet_analyzer/segments/cli.py +13 -14
  102. birdnet_analyzer/segments/core.py +81 -78
  103. birdnet_analyzer/segments/utils.py +383 -394
  104. birdnet_analyzer/species/__init__.py +3 -3
  105. birdnet_analyzer/species/__main__.py +3 -3
  106. birdnet_analyzer/species/cli.py +13 -14
  107. birdnet_analyzer/species/core.py +35 -35
  108. birdnet_analyzer/species/utils.py +74 -75
  109. birdnet_analyzer/train/__init__.py +3 -3
  110. birdnet_analyzer/train/__main__.py +3 -3
  111. birdnet_analyzer/train/cli.py +13 -14
  112. birdnet_analyzer/train/core.py +113 -113
  113. birdnet_analyzer/train/utils.py +877 -847
  114. birdnet_analyzer/translate.py +133 -104
  115. birdnet_analyzer/utils.py +425 -419
  116. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/METADATA +146 -129
  117. birdnet_analyzer-2.1.0.dist-info/RECORD +125 -0
  118. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/WHEEL +1 -1
  119. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/licenses/LICENSE +18 -18
  120. birdnet_analyzer/eBird_taxonomy_codes_2021E.json +0 -25280
  121. birdnet_analyzer-2.0.0.dist-info/RECORD +0 -117
  122. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/entry_points.txt +0 -0
  123. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,3 @@
1
- from birdnet_analyzer.embeddings.core import embeddings
2
-
3
-
4
- __all__ = ["embeddings"]
1
+ from birdnet_analyzer.embeddings.core import embeddings
2
+
3
+ __all__ = ["embeddings"]
@@ -1,3 +1,3 @@
1
- from birdnet_analyzer.embeddings.cli import main
2
-
3
- main()
1
+ from birdnet_analyzer.embeddings.cli import main
2
+
3
+ main()
@@ -1,13 +1,12 @@
1
- from birdnet_analyzer.utils import runtime_error_handler
2
-
3
- from birdnet_analyzer import embeddings
4
-
5
-
6
- @runtime_error_handler
7
- def main():
8
- import birdnet_analyzer.cli as cli
9
-
10
- parser = cli.embeddings_parser()
11
- args = parser.parse_args()
12
-
13
- embeddings(**vars(args))
1
+ from birdnet_analyzer import embeddings
2
+ from birdnet_analyzer.utils import runtime_error_handler
3
+
4
+
5
+ @runtime_error_handler
6
+ def main():
7
+ from birdnet_analyzer import cli
8
+
9
+ parser = cli.embeddings_parser()
10
+ args = parser.parse_args()
11
+
12
+ embeddings(**vars(args))
@@ -1,70 +1,70 @@
1
- def embeddings(
2
- input: str,
3
- database: str,
4
- *,
5
- overlap: float = 0.0,
6
- audio_speed: float = 1.0,
7
- fmin: int = 0,
8
- fmax: int = 15000,
9
- threads: int = 8,
10
- batch_size: int = 1,
11
- ):
12
- """
13
- Generates embeddings for audio files using the BirdNET-Analyzer.
14
- This function processes audio files to extract embeddings, which are
15
- representations of audio features. The embeddings can be used for
16
- further analysis or comparison.
17
- Args:
18
- input (str): Path to the input audio file or directory containing audio files.
19
- database (str): Path to the database where embeddings will be stored.
20
- overlap (float, optional): Overlap between consecutive audio segments in seconds. Defaults to 0.0.
21
- audio_speed (float, optional): Speed factor for audio processing. Defaults to 1.0.
22
- fmin (int, optional): Minimum frequency (in Hz) for audio analysis. Defaults to 0.
23
- fmax (int, optional): Maximum frequency (in Hz) for audio analysis. Defaults to 15000.
24
- threads (int, optional): Number of threads to use for processing. Defaults to 8.
25
- batch_size (int, optional): Number of audio segments to process in a single batch. Defaults to 1.
26
- Raises:
27
- FileNotFoundError: If the input path or database path does not exist.
28
- ValueError: If any of the parameters are invalid.
29
- Note:
30
- Ensure that the required model files are downloaded and available before
31
- calling this function. The `ensure_model_exists` function is used to
32
- verify this.
33
- Example:
34
- embeddings(
35
- input="path/to/audio",
36
- database="path/to/database",
37
- overlap=0.5,
38
- audio_speed=1.0,
39
- fmin=500,
40
- fmax=10000,
41
- threads=4,
42
- batch_size=2
43
- )
44
- """
45
- from birdnet_analyzer.embeddings.utils import run
46
- from birdnet_analyzer.utils import ensure_model_exists
47
-
48
- ensure_model_exists()
49
- run(input, database, overlap, audio_speed, fmin, fmax, threads, batch_size)
50
-
51
-
52
- def get_database(db_path: str):
53
- """Get the database object. Creates or opens the databse.
54
- Args:
55
- db: The path to the database.
56
- Returns:
57
- The database object.
58
- """
59
- import os
60
-
61
- from perch_hoplite.db import sqlite_usearch_impl
62
-
63
- if not os.path.exists(db_path):
64
- os.makedirs(os.path.dirname(db_path), exist_ok=True)
65
- db = sqlite_usearch_impl.SQLiteUsearchDB.create(
66
- db_path=db_path,
67
- usearch_cfg=sqlite_usearch_impl.get_default_usearch_config(embedding_dim=1024), # TODO dont hardcode this
68
- )
69
- return db
70
- return sqlite_usearch_impl.SQLiteUsearchDB.create(db_path=db_path)
1
+ def embeddings(
2
+ audio_input: str,
3
+ database: str,
4
+ *,
5
+ overlap: float = 0.0,
6
+ audio_speed: float = 1.0,
7
+ fmin: int = 0,
8
+ fmax: int = 15000,
9
+ threads: int = 8,
10
+ batch_size: int = 1,
11
+ file_output: str | None = None,
12
+ ):
13
+ """
14
+ Generates embeddings for audio files using the BirdNET-Analyzer.
15
+ This function processes audio files to extract embeddings, which are
16
+ representations of audio features. The embeddings can be used for
17
+ further analysis or comparison.
18
+ Args:
19
+ audio_input (str): Path to the input audio file or directory containing audio files.
20
+ database (str): Path to the database where embeddings will be stored.
21
+ overlap (float, optional): Overlap between consecutive audio segments in seconds. Defaults to 0.0.
22
+ audio_speed (float, optional): Speed factor for audio processing. Defaults to 1.0.
23
+ fmin (int, optional): Minimum frequency (in Hz) for audio analysis. Defaults to 0.
24
+ fmax (int, optional): Maximum frequency (in Hz) for audio analysis. Defaults to 15000.
25
+ threads (int, optional): Number of threads to use for processing. Defaults to 8.
26
+ batch_size (int, optional): Number of audio segments to process in a single batch. Defaults to 1.
27
+ Raises:
28
+ FileNotFoundError: If the input path or database path does not exist.
29
+ ValueError: If any of the parameters are invalid.
30
+ Note:
31
+ Ensure that the required model files are downloaded and available before
32
+ calling this function. The `ensure_model_exists` function is used to
33
+ verify this.
34
+ Example:
35
+ embeddings(
36
+ "path/to/audio",
37
+ "path/to/database",
38
+ overlap=0.5,
39
+ audio_speed=1.0,
40
+ fmin=500,
41
+ fmax=10000,
42
+ threads=4,
43
+ batch_size=2
44
+ )
45
+ """
46
+ from birdnet_analyzer.embeddings.utils import run
47
+ from birdnet_analyzer.utils import ensure_model_exists
48
+
49
+ ensure_model_exists()
50
+ run(audio_input, database, overlap, audio_speed, fmin, fmax, threads, batch_size, file_output)
51
+
52
+
53
+ def get_database(db_path: str):
54
+ """Get the database object. Creates or opens the databse.
55
+ Args:
56
+ db: The path to the database.
57
+ Returns:
58
+ The database object.
59
+ """
60
+ import os
61
+
62
+ from perch_hoplite.db import sqlite_usearch_impl
63
+
64
+ if not os.path.exists(db_path):
65
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
66
+ return sqlite_usearch_impl.SQLiteUsearchDB.create(
67
+ db_path=db_path,
68
+ usearch_cfg=sqlite_usearch_impl.get_default_usearch_config(embedding_dim=1024), # TODO: dont hardcode this
69
+ )
70
+ return sqlite_usearch_impl.SQLiteUsearchDB.create(db_path=db_path)
@@ -1,193 +1,220 @@
1
- """Module used to extract embeddings for samples."""
2
-
3
- import datetime
4
- import os
5
-
6
- import numpy as np
7
-
8
- import birdnet_analyzer.audio as audio
9
- import birdnet_analyzer.config as cfg
10
- import birdnet_analyzer.model as model
11
- import birdnet_analyzer.utils as utils
12
- from birdnet_analyzer.analyze.utils import get_raw_audio_from_file
13
- from birdnet_analyzer.embeddings.core import get_database
14
-
15
-
16
- from perch_hoplite.db import sqlite_usearch_impl
17
- from perch_hoplite.db import interface as hoplite
18
- from ml_collections import ConfigDict
19
- from functools import partial
20
- from tqdm import tqdm
21
- from multiprocessing import Pool
22
-
23
-
24
- DATASET_NAME: str = "birdnet_analyzer_dataset"
25
-
26
-
27
- def analyze_file(item, db: sqlite_usearch_impl.SQLiteUsearchDB):
28
- """Extracts the embeddings for a file.
29
-
30
- Args:
31
- item: (filepath, config)
32
- """
33
- # Get file path and restore cfg
34
- fpath: str = item[0]
35
- cfg.set_config(item[1])
36
-
37
- offset = 0
38
- duration = cfg.FILE_SPLITTING_DURATION
39
-
40
- try:
41
- fileLengthSeconds = int(audio.get_audio_file_length(fpath))
42
- except Exception as ex:
43
- # Write error log
44
- print(f"Error: Cannot analyze audio file {fpath}. File corrupt?\n", flush=True)
45
- utils.write_error_log(ex)
46
-
47
- return None
48
-
49
- # Start time
50
- start_time = datetime.datetime.now()
51
-
52
- # Status
53
- print(f"Analyzing {fpath}", flush=True)
54
-
55
- source_id = fpath
56
-
57
- # Process each chunk
58
- try:
59
- while offset < fileLengthSeconds:
60
- chunks = get_raw_audio_from_file(fpath, offset, duration)
61
- start, end = offset, cfg.SIG_LENGTH + offset
62
- samples = []
63
- timestamps = []
64
-
65
- for c in range(len(chunks)):
66
- # Add to batch
67
- samples.append(chunks[c])
68
- timestamps.append([start, end])
69
-
70
- # Advance start and end
71
- start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
72
- end = start + cfg.SIG_LENGTH
73
-
74
- # Check if batch is full or last chunk
75
- if len(samples) < cfg.BATCH_SIZE and c < len(chunks) - 1:
76
- continue
77
-
78
- # Prepare sample and pass through model
79
- data = np.array(samples, dtype="float32")
80
- e = model.embeddings(data)
81
-
82
- # Add to results
83
- for i in range(len(samples)):
84
- # Get timestamp
85
- s_start, s_end = timestamps[i]
86
-
87
- # Check if embedding already exists
88
- existing_embedding = db.get_embeddings_by_source(
89
- DATASET_NAME, source_id, np.array([s_start, s_end])
90
- )
91
-
92
- if existing_embedding.size == 0:
93
- # Get prediction
94
- embeddings = e[i]
95
-
96
- # Store embeddings
97
- embeddings_source = hoplite.EmbeddingSource(DATASET_NAME, source_id, np.array([s_start, s_end]))
98
-
99
- # Insert into database
100
- db.insert_embedding(embeddings, embeddings_source)
101
- db.commit()
102
-
103
- # Reset batch
104
- samples = []
105
- timestamps = []
106
-
107
- offset = offset + duration
108
-
109
- except Exception as ex:
110
- # Write error log
111
- print(f"Error: Cannot analyze audio file {fpath}.", flush=True)
112
- utils.write_error_log(ex)
113
-
114
- return
115
-
116
- delta_time = (datetime.datetime.now() - start_time).total_seconds()
117
- print("Finished {} in {:.2f} seconds".format(fpath, delta_time), flush=True)
118
-
119
-
120
- def check_database_settings(db: sqlite_usearch_impl.SQLiteUsearchDB):
121
- try:
122
- settings = db.get_metadata("birdnet_analyzer_settings")
123
- if (
124
- settings["BANDPASS_FMIN"] != cfg.BANDPASS_FMIN
125
- or settings["BANDPASS_FMAX"] != cfg.BANDPASS_FMAX
126
- or settings["AUDIO_SPEED"] != cfg.AUDIO_SPEED
127
- ):
128
- raise ValueError(
129
- "Database settings do not match current configuration. DB Settings are: fmin: {}, fmax: {}, audio_speed: {}".format(
130
- settings["BANDPASS_FMIN"], settings["BANDPASS_FMAX"], settings["AUDIO_SPEED"]
131
- )
132
- )
133
- except KeyError:
134
- settings = ConfigDict(
135
- {"BANDPASS_FMIN": cfg.BANDPASS_FMIN, "BANDPASS_FMAX": cfg.BANDPASS_FMAX, "AUDIO_SPEED": cfg.AUDIO_SPEED}
136
- )
137
- db.insert_metadata("birdnet_analyzer_settings", settings)
138
- db.commit()
139
-
140
-
141
- def run(input, database, overlap, audio_speed, fmin, fmax, threads, batchsize):
142
- ### Make sure to comment out appropriately if you are not using args. ###
143
-
144
- # Set input and output path
145
- cfg.INPUT_PATH = input
146
-
147
- # Parse input files
148
- if os.path.isdir(cfg.INPUT_PATH):
149
- cfg.FILE_LIST = utils.collect_audio_files(cfg.INPUT_PATH)
150
- else:
151
- cfg.FILE_LIST = [cfg.INPUT_PATH]
152
-
153
- # Set overlap
154
- cfg.SIG_OVERLAP = max(0.0, min(2.9, float(overlap)))
155
-
156
- # Set audio speed
157
- cfg.AUDIO_SPEED = max(0.01, audio_speed)
158
-
159
- # Set bandpass frequency range
160
- cfg.BANDPASS_FMIN = max(0, min(cfg.SIG_FMAX, int(fmin)))
161
- cfg.BANDPASS_FMAX = max(cfg.SIG_FMIN, min(cfg.SIG_FMAX, int(fmax)))
162
-
163
- # Set number of threads
164
- if os.path.isdir(cfg.INPUT_PATH):
165
- cfg.CPU_THREADS = max(1, int(threads))
166
- cfg.TFLITE_THREADS = 1
167
- else:
168
- cfg.CPU_THREADS = 1
169
- cfg.TFLITE_THREADS = max(1, int(threads))
170
-
171
- cfg.CPU_THREADS = 1 # TODO: with the current implementation, we can't use more than 1 thread
172
-
173
- # Set batch size
174
- cfg.BATCH_SIZE = max(1, int(batchsize))
175
-
176
- # Add config items to each file list entry.
177
- # We have to do this for Windows which does not
178
- # support fork() and thus each process has to
179
- # have its own config. USE LINUX!
180
- flist = [(f, cfg.get_config()) for f in cfg.FILE_LIST]
181
-
182
- db = get_database(database)
183
- check_database_settings(db)
184
-
185
- # Analyze files
186
- if cfg.CPU_THREADS < 2:
187
- for entry in tqdm(flist):
188
- analyze_file(entry, db)
189
- else:
190
- with Pool(cfg.CPU_THREADS) as p:
191
- tqdm(p.imap(partial(analyze_file, db=db), flist))
192
-
193
- db.db.close()
1
+ """Module used to extract embeddings for samples."""
2
+
3
+ import datetime
4
+ import os
5
+ from functools import partial
6
+ from multiprocessing import Pool
7
+
8
+ import numpy as np
9
+ from ml_collections import ConfigDict
10
+ from perch_hoplite.db import interface as hoplite
11
+ from perch_hoplite.db import sqlite_usearch_impl
12
+ from tqdm import tqdm
13
+
14
+ import birdnet_analyzer.config as cfg
15
+ from birdnet_analyzer import audio, model, utils
16
+ from birdnet_analyzer.analyze.utils import get_raw_audio_from_file
17
+ from birdnet_analyzer.embeddings.core import get_database
18
+
19
+ DATASET_NAME: str = "birdnet_analyzer_dataset"
20
+
21
+
22
+ def analyze_file(item, db: sqlite_usearch_impl.SQLiteUsearchDB):
23
+ """Extracts the embeddings for a file.
24
+
25
+ Args:
26
+ item: (filepath, config)
27
+ """
28
+
29
+ # Get file path and restore cfg
30
+ fpath: str = item[0]
31
+ cfg.set_config(item[1])
32
+
33
+ offset = 0
34
+ duration = cfg.FILE_SPLITTING_DURATION
35
+
36
+ try:
37
+ fileLengthSeconds = int(audio.get_audio_file_length(fpath))
38
+ except Exception as ex:
39
+ # Write error log
40
+ print(f"Error: Cannot analyze audio file {fpath}. File corrupt?\n", flush=True)
41
+ utils.write_error_log(ex)
42
+
43
+ return
44
+
45
+ # Start time
46
+ start_time = datetime.datetime.now()
47
+
48
+ # Status
49
+ print(f"Analyzing {fpath}", flush=True)
50
+
51
+ source_id = fpath
52
+
53
+ # Process each chunk
54
+ try:
55
+ while offset < fileLengthSeconds:
56
+ chunks = get_raw_audio_from_file(fpath, offset, duration)
57
+ start, end = offset, cfg.SIG_LENGTH + offset
58
+ samples = []
59
+ timestamps = []
60
+
61
+ for c in range(len(chunks)):
62
+ # Add to batch
63
+ samples.append(chunks[c])
64
+ timestamps.append([start, end])
65
+
66
+ # Advance start and end
67
+ start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
68
+ end = start + cfg.SIG_LENGTH
69
+
70
+ # Check if batch is full or last chunk
71
+ if len(samples) < cfg.BATCH_SIZE and c < len(chunks) - 1:
72
+ continue
73
+
74
+ # Prepare sample and pass through model
75
+ data = np.array(samples, dtype="float32")
76
+ e = model.embeddings(data)
77
+
78
+ # Add to results
79
+ for i in range(len(samples)):
80
+ # Get timestamp
81
+ s_start, s_end = timestamps[i]
82
+
83
+ # Check if embedding already exists
84
+ existing_embedding = db.get_embeddings_by_source(DATASET_NAME, source_id, np.array([s_start, s_end]))
85
+
86
+ if existing_embedding.size == 0:
87
+ # Get prediction
88
+ embeddings = e[i]
89
+
90
+ # Store embeddings
91
+ embeddings_source = hoplite.EmbeddingSource(DATASET_NAME, source_id, np.array([s_start, s_end]))
92
+
93
+ # Insert into database
94
+ db.insert_embedding(embeddings, embeddings_source)
95
+ db.commit()
96
+
97
+ # Reset batch
98
+ samples = []
99
+ timestamps = []
100
+
101
+ offset = offset + duration
102
+
103
+ except Exception as ex:
104
+ # Write error log
105
+ print(f"Error: Cannot analyze audio file {fpath}.", flush=True)
106
+ utils.write_error_log(ex)
107
+
108
+ return
109
+
110
+ delta_time = (datetime.datetime.now() - start_time).total_seconds()
111
+ print(f"Finished {fpath} in {delta_time:.2f} seconds", flush=True)
112
+
113
+
114
+ def check_database_settings(db: sqlite_usearch_impl.SQLiteUsearchDB):
115
+ try:
116
+ settings = db.get_metadata("birdnet_analyzer_settings")
117
+ if settings["BANDPASS_FMIN"] != cfg.BANDPASS_FMIN or settings["BANDPASS_FMAX"] != cfg.BANDPASS_FMAX or settings["AUDIO_SPEED"] != cfg.AUDIO_SPEED:
118
+ raise ValueError(
119
+ "Database settings do not match current configuration. DB Settings are: fmin:"
120
+ + f"{settings['BANDPASS_FMIN']}, fmax: {settings['BANDPASS_FMAX']}, audio_speed: {settings['AUDIO_SPEED']}"
121
+ )
122
+ except KeyError:
123
+ settings = ConfigDict({"BANDPASS_FMIN": cfg.BANDPASS_FMIN, "BANDPASS_FMAX": cfg.BANDPASS_FMAX, "AUDIO_SPEED": cfg.AUDIO_SPEED})
124
+ db.insert_metadata("birdnet_analyzer_settings", settings)
125
+ db.commit()
126
+
127
+
128
+ def create_file_output(output_path: str, db: sqlite_usearch_impl.SQLiteUsearchDB):
129
+ """Creates a file output for the database.
130
+
131
+ Args:
132
+ output_path: Path to the output file.
133
+ db: Database object.
134
+ """
135
+ # Check if output path exists
136
+ if not os.path.exists(output_path):
137
+ os.makedirs(output_path)
138
+ # Get all embeddings
139
+ embedding_ids = db.get_embedding_ids()
140
+
141
+ # Write embeddings to file
142
+ for embedding_id in embedding_ids:
143
+ embedding = db.get_embedding(embedding_id)
144
+ source = db.get_embedding_source(embedding_id)
145
+
146
+ # Get start and end time
147
+ start, end = source.offsets
148
+
149
+ source_id = source.source_id.rsplit(".", 1)[0]
150
+
151
+ filename = f"{source_id}_{start}_{end}.birdnet.embeddings.txt"
152
+
153
+ # Get the common prefix between the output path and the filename
154
+ common_prefix = os.path.commonpath([output_path, os.path.dirname(filename)])
155
+ relative_filename = os.path.relpath(filename, common_prefix)
156
+ target_path = os.path.join(output_path, relative_filename)
157
+
158
+ # Ensure the target directory exists
159
+ os.makedirs(os.path.dirname(target_path), exist_ok=True)
160
+
161
+ # Write embedding values to a text file
162
+ with open(target_path, "w") as f:
163
+ f.write(",".join(map(str, embedding.tolist())))
164
+
165
+ def run(audio_input, database, overlap, audio_speed, fmin, fmax, threads, batchsize, file_output):
166
+ ### Make sure to comment out appropriately if you are not using args. ###
167
+
168
+ # Set input and output path
169
+ cfg.INPUT_PATH = audio_input
170
+
171
+ # Parse input files
172
+ if os.path.isdir(cfg.INPUT_PATH):
173
+ cfg.FILE_LIST = utils.collect_audio_files(cfg.INPUT_PATH)
174
+ else:
175
+ cfg.FILE_LIST = [cfg.INPUT_PATH]
176
+
177
+ # Set overlap
178
+ cfg.SIG_OVERLAP = max(0.0, min(2.9, float(overlap)))
179
+
180
+ # Set audio speed
181
+ cfg.AUDIO_SPEED = max(0.01, audio_speed)
182
+
183
+ # Set bandpass frequency range
184
+ cfg.BANDPASS_FMIN = max(0, min(cfg.SIG_FMAX, int(fmin)))
185
+ cfg.BANDPASS_FMAX = max(cfg.SIG_FMIN, min(cfg.SIG_FMAX, int(fmax)))
186
+
187
+ # Set number of threads
188
+ if os.path.isdir(cfg.INPUT_PATH):
189
+ cfg.CPU_THREADS = max(1, int(threads))
190
+ cfg.TFLITE_THREADS = 1
191
+ else:
192
+ cfg.CPU_THREADS = 1
193
+ cfg.TFLITE_THREADS = max(1, int(threads))
194
+
195
+ cfg.CPU_THREADS = 1 # TODO: with the current implementation, we can't use more than 1 thread
196
+
197
+ # Set batch size
198
+ cfg.BATCH_SIZE = max(1, int(batchsize))
199
+
200
+ # Add config items to each file list entry.
201
+ # We have to do this for Windows which does not
202
+ # support fork() and thus each process has to
203
+ # have its own config. USE LINUX!
204
+ flist = [(f, cfg.get_config()) for f in cfg.FILE_LIST]
205
+
206
+ db = get_database(database)
207
+ check_database_settings(db)
208
+
209
+ # Analyze files
210
+ if cfg.CPU_THREADS < 2:
211
+ for entry in tqdm(flist):
212
+ analyze_file(entry, db)
213
+ else:
214
+ with Pool(cfg.CPU_THREADS) as p:
215
+ tqdm(p.imap(partial(analyze_file, db=db), flist))
216
+
217
+ if file_output:
218
+ create_file_output(file_output, db)
219
+
220
+ db.db.close()