acidcat 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. acidcat-0.9.0/LICENSE +21 -0
  2. acidcat-0.9.0/PKG-INFO +320 -0
  3. acidcat-0.9.0/README.md +270 -0
  4. acidcat-0.9.0/pyproject.toml +70 -0
  5. acidcat-0.9.0/setup.cfg +4 -0
  6. acidcat-0.9.0/src/acidcat/__init__.py +3 -0
  7. acidcat-0.9.0/src/acidcat/__main__.py +6 -0
  8. acidcat-0.9.0/src/acidcat/cli.py +134 -0
  9. acidcat-0.9.0/src/acidcat/commands/__init__.py +0 -0
  10. acidcat-0.9.0/src/acidcat/commands/chunks.py +96 -0
  11. acidcat-0.9.0/src/acidcat/commands/detect.py +88 -0
  12. acidcat-0.9.0/src/acidcat/commands/dump.py +106 -0
  13. acidcat-0.9.0/src/acidcat/commands/features.py +106 -0
  14. acidcat-0.9.0/src/acidcat/commands/index.py +1165 -0
  15. acidcat-0.9.0/src/acidcat/commands/info.py +415 -0
  16. acidcat-0.9.0/src/acidcat/commands/inspect.py +1578 -0
  17. acidcat-0.9.0/src/acidcat/commands/query.py +255 -0
  18. acidcat-0.9.0/src/acidcat/commands/scan.py +274 -0
  19. acidcat-0.9.0/src/acidcat/commands/search.py +264 -0
  20. acidcat-0.9.0/src/acidcat/commands/similar.py +209 -0
  21. acidcat-0.9.0/src/acidcat/commands/survey.py +114 -0
  22. acidcat-0.9.0/src/acidcat/core/__init__.py +0 -0
  23. acidcat-0.9.0/src/acidcat/core/aiff.py +295 -0
  24. acidcat-0.9.0/src/acidcat/core/camelot.py +206 -0
  25. acidcat-0.9.0/src/acidcat/core/detect.py +281 -0
  26. acidcat-0.9.0/src/acidcat/core/features.py +97 -0
  27. acidcat-0.9.0/src/acidcat/core/flac.py +61 -0
  28. acidcat-0.9.0/src/acidcat/core/formats.py +88 -0
  29. acidcat-0.9.0/src/acidcat/core/index.py +477 -0
  30. acidcat-0.9.0/src/acidcat/core/midi.py +251 -0
  31. acidcat-0.9.0/src/acidcat/core/mp3.py +182 -0
  32. acidcat-0.9.0/src/acidcat/core/paths.py +167 -0
  33. acidcat-0.9.0/src/acidcat/core/registry.py +410 -0
  34. acidcat-0.9.0/src/acidcat/core/riff.py +417 -0
  35. acidcat-0.9.0/src/acidcat/core/serum.py +53 -0
  36. acidcat-0.9.0/src/acidcat/core/tagged.py +255 -0
  37. acidcat-0.9.0/src/acidcat/mcp_server.py +1686 -0
  38. acidcat-0.9.0/src/acidcat/util/__init__.py +0 -0
  39. acidcat-0.9.0/src/acidcat/util/csv_helpers.py +20 -0
  40. acidcat-0.9.0/src/acidcat/util/deps.py +25 -0
  41. acidcat-0.9.0/src/acidcat/util/midi.py +24 -0
  42. acidcat-0.9.0/src/acidcat/util/stdin.py +29 -0
  43. acidcat-0.9.0/src/acidcat.egg-info/PKG-INFO +320 -0
  44. acidcat-0.9.0/src/acidcat.egg-info/SOURCES.txt +61 -0
  45. acidcat-0.9.0/src/acidcat.egg-info/dependency_links.txt +1 -0
  46. acidcat-0.9.0/src/acidcat.egg-info/entry_points.txt +3 -0
  47. acidcat-0.9.0/src/acidcat.egg-info/requires.txt +27 -0
  48. acidcat-0.9.0/src/acidcat.egg-info/top_level.txt +1 -0
  49. acidcat-0.9.0/tests/test_aiff.py +125 -0
  50. acidcat-0.9.0/tests/test_camelot.py +109 -0
  51. acidcat-0.9.0/tests/test_commands.py +373 -0
  52. acidcat-0.9.0/tests/test_detect.py +143 -0
  53. acidcat-0.9.0/tests/test_format_dispatch.py +77 -0
  54. acidcat-0.9.0/tests/test_index.py +1023 -0
  55. acidcat-0.9.0/tests/test_inspect.py +499 -0
  56. acidcat-0.9.0/tests/test_mcp_server.py +567 -0
  57. acidcat-0.9.0/tests/test_midi.py +130 -0
  58. acidcat-0.9.0/tests/test_paths.py +156 -0
  59. acidcat-0.9.0/tests/test_query.py +231 -0
  60. acidcat-0.9.0/tests/test_registry.py +342 -0
  61. acidcat-0.9.0/tests/test_riff.py +246 -0
  62. acidcat-0.9.0/tests/test_schema_version.py +62 -0
  63. acidcat-0.9.0/tests/test_tagged.py +151 -0
acidcat-0.9.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 hed0rah
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
acidcat-0.9.0/PKG-INFO ADDED
@@ -0,0 +1,320 @@
1
+ Metadata-Version: 2.4
2
+ Name: acidcat
3
+ Version: 0.9.0
4
+ Summary: Audio metadata explorer and analysis tool, like exiftool but for audio
5
+ Author-email: hed0rah <18272116+hed0rah@users.noreply.github.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/hed0rah/acidcat
8
+ Project-URL: Repository, https://github.com/hed0rah/acidcat
9
+ Project-URL: Issues, https://github.com/hed0rah/acidcat/issues
10
+ Keywords: audio,wav,aiff,midi,metadata,riff,acid,bpm,key,camelot,harmonic-mixing,librosa,music,samples,mcp
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: End Users/Desktop
15
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Analysis
16
+ Classifier: Topic :: Multimedia :: Sound/Audio
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Requires-Python: >=3.9
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: mutagen>=1.47
29
+ Provides-Extra: analysis
30
+ Requires-Dist: librosa>=0.10.1; extra == "analysis"
31
+ Requires-Dist: numpy>=1.26; extra == "analysis"
32
+ Requires-Dist: scipy>=1.13; extra == "analysis"
33
+ Requires-Dist: soundfile>=0.12; extra == "analysis"
34
+ Provides-Extra: ml
35
+ Requires-Dist: acidcat[analysis]; extra == "ml"
36
+ Requires-Dist: pandas>=2.0; extra == "ml"
37
+ Requires-Dist: scikit-learn>=1.3; extra == "ml"
38
+ Provides-Extra: viz
39
+ Requires-Dist: acidcat[ml]; extra == "viz"
40
+ Requires-Dist: matplotlib>=3.7; extra == "viz"
41
+ Requires-Dist: seaborn>=0.12; extra == "viz"
42
+ Provides-Extra: notebook
43
+ Requires-Dist: jupyter>=1.0; extra == "notebook"
44
+ Requires-Dist: ipykernel>=6.0; extra == "notebook"
45
+ Provides-Extra: mcp
46
+ Requires-Dist: mcp>=1.0; extra == "mcp"
47
+ Provides-Extra: all
48
+ Requires-Dist: acidcat[mcp,ml,notebook,viz]; extra == "all"
49
+ Dynamic: license-file
50
+
51
+ <p align="center">
52
+ <img src="docs/logo2.svg" alt="acidcat logo" width="240">
53
+ </p>
54
+
55
+ # acidcat
56
+
57
+ Audio metadata explorer and analysis tool -- like exiftool, but for audio.
58
+
59
+ Reads BPM, key, duration, tags, and format info from WAV, AIFF, MP3, FLAC,
60
+ OGG, Opus, M4A, MIDI, and Serum presets. Zero dependencies for core metadata.
61
+ Optional librosa analysis for BPM/key detection and ML feature extraction.
62
+
63
+ Also ships per-library SQLite indexes (`acidcat index`) tracked in a
64
+ small global registry, plus an MCP server (`acidcat-mcp`) so an LLM can
65
+ query your whole collection across libraries by bpm, key, tags, or
66
+ full-text.
67
+
68
+ ## Install
69
+
70
+ git clone https://github.com/hed0rah/acidcat.git
71
+ cd acidcat
72
+ pip install -e . # core + mutagen (WAV/AIFF/MIDI/Serum/MP3/FLAC/OGG/Opus/M4A)
73
+ pip install -e .[analysis] # + librosa BPM/key detection
74
+ pip install -e .[ml] # + sklearn similarity/clustering
75
+ pip install -e .[mcp] # + MCP server (acidcat-mcp)
76
+ pip install -e .[all] # everything
77
+
78
+ ## Quick Start
79
+
80
+ # single file -- instant metadata
81
+ acidcat kick_808.wav
82
+ acidcat loop.mp3
83
+ acidcat pad.flac
84
+
85
+ # pipe from stdin
86
+ cat file.wav | acidcat
87
+ curl https://example.com/loop.mp3 | acidcat -
88
+
89
+ # JSON output for piping
90
+ acidcat kick_808.wav -f json | jq .BPM
91
+
92
+ # deep analysis with librosa
93
+ acidcat kick_808.wav --deep
94
+
95
+ # scan a mixed-format directory
96
+ acidcat scan ~/Samples/Breaks -n 200
97
+
98
+ ## Supported Formats
99
+
100
+ | Format | Extension | What acidcat reads |
101
+ |--------|-----------|-------------------|
102
+ | WAV | `.wav` | BPM, key, loop points, beats, ACID/SMPL chunks, LIST/INFO |
103
+ | AIFF | `.aif` | Duration, format, name, author, copyright, markers |
104
+ | MP3 | `.mp3` | BPM, key, title, artist, album, genre, comment (ID3v2) |
105
+ | FLAC | `.flac` | BPM, key, title, artist, album, genre (Vorbis Comment) |
106
+ | OGG | `.ogg` | BPM, key, title, artist, album, genre (Vorbis Comment) |
107
+ | Opus | `.opus` | BPM, key, title, artist (Vorbis Comment) |
108
+ | M4A | `.m4a` | BPM, key, title, artist, album, genre (iTunes atoms) |
109
+ | MIDI | `.mid` | BPM, key sig, time sig, tracks, note count/range |
110
+ | Serum | `.SerumPreset` | Preset name, author, tags, description |
111
+
112
+ ## Commands
113
+
114
+ | Command | Description |
115
+ |---------|-------------|
116
+ | `acidcat FILE` | Show metadata for a single file (auto-detected) |
117
+ | `acidcat DIR` | Batch-scan a directory (auto-detected) |
118
+ | `acidcat -` | Read from stdin |
119
+ | `acidcat info FILE` | Explicit single-file metadata dump |
120
+ | `acidcat scan DIR` | Batch-scan with CSV output |
121
+ | `acidcat chunks FILE` | Walk RIFF chunks -- offsets, sizes, parsed fields |
122
+ | `acidcat survey DIR` | Count chunk types across a directory tree |
123
+ | `acidcat detect FILE\|DIR` | Estimate BPM/key using librosa |
124
+ | `acidcat features DIR` | Extract 50+ audio features for ML |
125
+ | `acidcat similar CSV find TARGET` | Find similar samples by features |
126
+ | `acidcat similar CSV cluster` | Cluster samples by audio characteristics |
127
+ | `acidcat search CSV query TEXT` | Text-based sample search (legacy CSV) |
128
+ | `acidcat dump FILE CHUNK [...]` | Hex-dump specific RIFF chunks |
129
+ | `acidcat inspect FILE [--hex] [--frames]` | readelf-style structural dump (WAV, RF64, AIFF, MIDI, Serum, MP3, FLAC) with lint warnings; `--frames` for a per-frame/event dump |
130
+ | `acidcat index DIR` | Upsert DIR into the global SQLite index |
131
+ | `acidcat query [flags]` | Filter the global index by bpm/key/tag/text |
132
+
133
+ ## Global Flags
134
+
135
+ -f, --format FMT Output format (default varies by command)
136
+ -o, --output FILE Write output to file
137
+ -q, --quiet Suppress progress output
138
+ -v, --verbose Extra detail
139
+ -n, --num N Max files to scan (default: 500)
140
+ --has CHUNKS Filter by chunk IDs (comma-separated)
141
+ --deep Include librosa analysis
142
+
143
+ Most commands accept `table`, `json`, and `csv` (default `table`, but
144
+ `scan` and `features` default to `csv`). Two differ: `inspect` is
145
+ `table`/`json`, and `dump` is `hex`/`json`.
146
+
147
+ ## Dependency Groups
148
+
149
+ | Group | What it adds | Commands enabled |
150
+ |-------|-------------|-----------------|
151
+ | (none) | mutagen (base) | info, scan, chunks, survey, dump for WAV/AIFF/MIDI/Serum/MP3/FLAC/OGG/Opus/M4A |
152
+ | `[analysis]` | librosa, numpy, scipy, soundfile | detect, info --deep |
153
+ | `[ml]` | + pandas, scikit-learn | features, similar, search |
154
+ | `[viz]` | + matplotlib, seaborn | optional plotting |
155
+ | `[notebook]` | + jupyter, ipykernel | optional notebook env |
156
+ | `[mcp]` | mcp SDK | `acidcat-mcp` stdio server |
157
+ | `[all]` | everything | all commands, all formats |
158
+
159
+ ## Examples
160
+
161
+ ### Metadata Exploration
162
+
163
+ # what chunks exist in your sample library?
164
+ acidcat survey ~/Samples/Loops -n 5000
165
+
166
+ # walk all chunks in a specific file
167
+ acidcat chunks ~/Samples/Loops/breakbeat.wav
168
+
169
+ # hex-dump the ACID and SMPL chunks
170
+ acidcat dump ~/Samples/Loops/breakbeat.wav acid smpl
171
+
172
+ # scan only files with ACID metadata
173
+ acidcat scan ~/Samples/Loops --has acid -n 200
174
+
175
+ # scan a directory with mixed formats (WAV, MP3, FLAC, etc.)
176
+ acidcat scan ~/Samples -n 500
177
+
178
+ ### BPM / Key Detection
179
+
180
+ # estimate BPM/key with librosa (for files without metadata)
181
+ acidcat detect ~/Samples/OneShots
182
+
183
+ # scan with librosa fallback for missing metadata
184
+ acidcat scan ~/Samples/Loops --fallback -n 100
185
+
186
+ ### ML Feature Extraction
187
+
188
+ # extract 50+ audio features to CSV
189
+ acidcat features ~/Samples/Loops -n 500
190
+
191
+ # generate normalized (StandardScaler) ML-ready dataset
192
+ acidcat features ~/Samples/Loops --ml-ready -n 500
193
+
194
+ ### Similarity & Clustering
195
+
196
+ # find 5 samples similar to index 0
197
+ acidcat similar features.csv find 0 -n 5
198
+
199
+ # k-means clustering
200
+ acidcat similar features.csv cluster -k 10 -o clustered.csv
201
+
202
+ ## Libraries (per-directory indexes)
203
+
204
+ `acidcat scan` writes a one-off CSV. `acidcat index` is the persistent
205
+ path: each directory you index becomes a *library* with its own SQLite
206
+ file, and a small global registry at `~/.acidcat/registry.db` lets reads
207
+ fan out across every library you have registered.
208
+
209
+ By default the per-library DB lives centrally at
210
+ `~/.acidcat/libraries/<label>_<hash>.db`. Pass `--in-tree` if you'd
211
+ rather have the DB travel with the data at
212
+ `<library>/.acidcat/index.db`.
213
+
214
+ # register and index a library (label defaults to basename of DIR)
215
+ acidcat index ~/Samples/Loops --label loops
216
+ acidcat index ~/Samples/OneShots --label oneshots
217
+
218
+ # show every registered library
219
+ acidcat index --list
220
+
221
+ # per-library stats
222
+ acidcat index --stats loops
223
+
224
+ # extract librosa features during indexing (slower, enables similarity)
225
+ acidcat index ~/Samples/Loops --label loops --features
226
+
227
+ # rebuild a library's DB from scratch
228
+ acidcat index ~/Samples/Loops --label loops --rebuild
229
+
230
+ # forget a library (registry only) vs remove it (deletes the DB file)
231
+ acidcat index --forget loops
232
+ acidcat index --remove loops
233
+
234
+ # list registered libraries whose DB file is missing on disk
235
+ acidcat index --orphans
236
+
237
+ # import a legacy <name>_tags.json into a library
238
+ acidcat index ~/Samples --label samples --import-tags old_tags.json
239
+
240
+ Nested libraries are rejected at registration time: if you've registered
241
+ `~/Samples`, you can't also register `~/Samples/Loops` until you forget
242
+ the parent.
243
+
244
+ ### Discovery
245
+
246
+ For users with many scattered packs, `--discover` walks a tree and
247
+ registers every qualifying subdirectory as its own library in one pass.
248
+
249
+ # preview what would get registered (no writes)
250
+ acidcat index --discover ~/Samples --dry-run
251
+
252
+ # actually register them
253
+ acidcat index --discover ~/Samples
254
+
255
+ # tighter threshold and namespacing for a subset of your collection
256
+ acidcat index --discover /mnt/external/old_drives \
257
+ --min-samples 50 --label-prefix "ext_"
258
+
259
+ A directory qualifies if its subtree (within `--max-depth`, default 3)
260
+ contains at least `--min-samples` audio files (default 20). Non-
261
+ qualifying parents are recursed into so packs nested inside catch-all
262
+ folders still surface. Already-registered roots are skipped. The home
263
+ directory is refused as a discover root to prevent runaway registration.
264
+
265
+ ### Querying
266
+
267
+ By default `acidcat query` fans out across every registered library and
268
+ merges the results.
269
+
270
+ acidcat query --bpm 120:130 --key Am
271
+ acidcat query --tag drums --tag punchy --duration :1
272
+ acidcat query --text "dusty lofi" --limit 20
273
+ acidcat query --format mp3 --root loops
274
+ acidcat query --root loops,oneshots --bpm 128
275
+ acidcat query --bpm 128 --paths-only | xargs -I {} cp {} out/
276
+
277
+ `--root` accepts a label, an absolute path, or a comma-separated list.
278
+ Override the registry on any command with `--registry PATH` or the
279
+ `ACIDCAT_REGISTRY` environment variable.
280
+
281
+ ## MCP Server
282
+
283
+ `acidcat-mcp` is a stdio MCP server that exposes the registered libraries
284
+ as structured tools. An LLM can ask "what libraries do I have?",
285
+ search across them by metadata, find compatible keys via Camelot, or
286
+ (with `[analysis]` installed) find similar samples by librosa feature
287
+ cosine.
288
+
289
+ pip install -e .[mcp] # minimum for discovery + writes
290
+ pip install -e .[analysis,mcp] # unlock find_similar / analyze_*
291
+
292
+ Claude Desktop / Claude Code config:
293
+
294
+ {
295
+ "mcpServers": {
296
+ "acidcat": {
297
+ "command": "acidcat-mcp"
298
+ }
299
+ }
300
+ }
301
+
302
+ Optional: pass `--registry PATH` on the server process or set
303
+ `ACIDCAT_REGISTRY` if your registry lives outside the default location.
304
+
305
+ Tool tiers (each tool description starts with `Fast.`, `SLOW.`, or
306
+ `VERY SLOW.` so the model self-selects):
307
+
308
+ - **Fast (SQLite only)**: `search_samples`, `get_sample`, `locate_sample`,
309
+ `list_libraries`, `list_tags`, `list_keys`, `list_formats`,
310
+ `index_stats`, `find_compatible`
311
+ - **Slow analysis** (needs `[analysis]`): `find_similar`, `analyze_sample`,
312
+ `detect_bpm_key`
313
+ - **Index management**: `reindex`, `reindex_features`,
314
+ `discover_libraries`
315
+ - **Write** (marked destructive): `register_library`, `forget_library`,
316
+ `tag_sample`, `set_sample_description`
317
+
318
+ ## License
319
+
320
+ MIT
@@ -0,0 +1,270 @@
1
+ <p align="center">
2
+ <img src="docs/logo2.svg" alt="acidcat logo" width="240">
3
+ </p>
4
+
5
+ # acidcat
6
+
7
+ Audio metadata explorer and analysis tool -- like exiftool, but for audio.
8
+
9
+ Reads BPM, key, duration, tags, and format info from WAV, AIFF, MP3, FLAC,
10
+ OGG, Opus, M4A, MIDI, and Serum presets. Zero dependencies for core metadata.
11
+ Optional librosa analysis for BPM/key detection and ML feature extraction.
12
+
13
+ Also ships per-library SQLite indexes (`acidcat index`) tracked in a
14
+ small global registry, plus an MCP server (`acidcat-mcp`) so an LLM can
15
+ query your whole collection across libraries by bpm, key, tags, or
16
+ full-text.
17
+
18
+ ## Install
19
+
20
+ git clone https://github.com/hed0rah/acidcat.git
21
+ cd acidcat
22
+ pip install -e . # core + mutagen (WAV/AIFF/MIDI/Serum/MP3/FLAC/OGG/Opus/M4A)
23
+ pip install -e .[analysis] # + librosa BPM/key detection
24
+ pip install -e .[ml] # + sklearn similarity/clustering
25
+ pip install -e .[mcp] # + MCP server (acidcat-mcp)
26
+ pip install -e .[all] # everything
27
+
28
+ ## Quick Start
29
+
30
+ # single file -- instant metadata
31
+ acidcat kick_808.wav
32
+ acidcat loop.mp3
33
+ acidcat pad.flac
34
+
35
+ # pipe from stdin
36
+ cat file.wav | acidcat
37
+ curl https://example.com/loop.mp3 | acidcat -
38
+
39
+ # JSON output for piping
40
+ acidcat kick_808.wav -f json | jq .BPM
41
+
42
+ # deep analysis with librosa
43
+ acidcat kick_808.wav --deep
44
+
45
+ # scan a mixed-format directory
46
+ acidcat scan ~/Samples/Breaks -n 200
47
+
48
+ ## Supported Formats
49
+
50
+ | Format | Extension | What acidcat reads |
51
+ |--------|-----------|-------------------|
52
+ | WAV | `.wav` | BPM, key, loop points, beats, ACID/SMPL chunks, LIST/INFO |
53
+ | AIFF | `.aif` | Duration, format, name, author, copyright, markers |
54
+ | MP3 | `.mp3` | BPM, key, title, artist, album, genre, comment (ID3v2) |
55
+ | FLAC | `.flac` | BPM, key, title, artist, album, genre (Vorbis Comment) |
56
+ | OGG | `.ogg` | BPM, key, title, artist, album, genre (Vorbis Comment) |
57
+ | Opus | `.opus` | BPM, key, title, artist (Vorbis Comment) |
58
+ | M4A | `.m4a` | BPM, key, title, artist, album, genre (iTunes atoms) |
59
+ | MIDI | `.mid` | BPM, key sig, time sig, tracks, note count/range |
60
+ | Serum | `.SerumPreset` | Preset name, author, tags, description |
61
+
62
+ ## Commands
63
+
64
+ | Command | Description |
65
+ |---------|-------------|
66
+ | `acidcat FILE` | Show metadata for a single file (auto-detected) |
67
+ | `acidcat DIR` | Batch-scan a directory (auto-detected) |
68
+ | `acidcat -` | Read from stdin |
69
+ | `acidcat info FILE` | Explicit single-file metadata dump |
70
+ | `acidcat scan DIR` | Batch-scan with CSV output |
71
+ | `acidcat chunks FILE` | Walk RIFF chunks -- offsets, sizes, parsed fields |
72
+ | `acidcat survey DIR` | Count chunk types across a directory tree |
73
+ | `acidcat detect FILE\|DIR` | Estimate BPM/key using librosa |
74
+ | `acidcat features DIR` | Extract 50+ audio features for ML |
75
+ | `acidcat similar CSV find TARGET` | Find similar samples by features |
76
+ | `acidcat similar CSV cluster` | Cluster samples by audio characteristics |
77
+ | `acidcat search CSV query TEXT` | Text-based sample search (legacy CSV) |
78
+ | `acidcat dump FILE CHUNK [...]` | Hex-dump specific RIFF chunks |
79
+ | `acidcat inspect FILE [--hex] [--frames]` | readelf-style structural dump (WAV, RF64, AIFF, MIDI, Serum, MP3, FLAC) with lint warnings; `--frames` for a per-frame/event dump |
80
+ | `acidcat index DIR` | Upsert DIR into the global SQLite index |
81
+ | `acidcat query [flags]` | Filter the global index by bpm/key/tag/text |
82
+
83
+ ## Global Flags
84
+
85
+ -f, --format FMT Output format (default varies by command)
86
+ -o, --output FILE Write output to file
87
+ -q, --quiet Suppress progress output
88
+ -v, --verbose Extra detail
89
+ -n, --num N Max files to scan (default: 500)
90
+ --has CHUNKS Filter by chunk IDs (comma-separated)
91
+ --deep Include librosa analysis
92
+
93
+ Most commands accept `table`, `json`, and `csv` (default `table`, but
94
+ `scan` and `features` default to `csv`). Two differ: `inspect` is
95
+ `table`/`json`, and `dump` is `hex`/`json`.
96
+
97
+ ## Dependency Groups
98
+
99
+ | Group | What it adds | Commands enabled |
100
+ |-------|-------------|-----------------|
101
+ | (none) | mutagen (base) | info, scan, chunks, survey, dump for WAV/AIFF/MIDI/Serum/MP3/FLAC/OGG/Opus/M4A |
102
+ | `[analysis]` | librosa, numpy, scipy, soundfile | detect, info --deep |
103
+ | `[ml]` | + pandas, scikit-learn | features, similar, search |
104
+ | `[viz]` | + matplotlib, seaborn | optional plotting |
105
+ | `[notebook]` | + jupyter, ipykernel | optional notebook env |
106
+ | `[mcp]` | mcp SDK | `acidcat-mcp` stdio server |
107
+ | `[all]` | everything | all commands, all formats |
108
+
109
+ ## Examples
110
+
111
+ ### Metadata Exploration
112
+
113
+ # what chunks exist in your sample library?
114
+ acidcat survey ~/Samples/Loops -n 5000
115
+
116
+ # walk all chunks in a specific file
117
+ acidcat chunks ~/Samples/Loops/breakbeat.wav
118
+
119
+ # hex-dump the ACID and SMPL chunks
120
+ acidcat dump ~/Samples/Loops/breakbeat.wav acid smpl
121
+
122
+ # scan only files with ACID metadata
123
+ acidcat scan ~/Samples/Loops --has acid -n 200
124
+
125
+ # scan a directory with mixed formats (WAV, MP3, FLAC, etc.)
126
+ acidcat scan ~/Samples -n 500
127
+
128
+ ### BPM / Key Detection
129
+
130
+ # estimate BPM/key with librosa (for files without metadata)
131
+ acidcat detect ~/Samples/OneShots
132
+
133
+ # scan with librosa fallback for missing metadata
134
+ acidcat scan ~/Samples/Loops --fallback -n 100
135
+
136
+ ### ML Feature Extraction
137
+
138
+ # extract 50+ audio features to CSV
139
+ acidcat features ~/Samples/Loops -n 500
140
+
141
+ # generate normalized (StandardScaler) ML-ready dataset
142
+ acidcat features ~/Samples/Loops --ml-ready -n 500
143
+
144
+ ### Similarity & Clustering
145
+
146
+ # find 5 samples similar to index 0
147
+ acidcat similar features.csv find 0 -n 5
148
+
149
+ # k-means clustering
150
+ acidcat similar features.csv cluster -k 10 -o clustered.csv
151
+
152
+ ## Libraries (per-directory indexes)
153
+
154
+ `acidcat scan` writes a one-off CSV. `acidcat index` is the persistent
155
+ path: each directory you index becomes a *library* with its own SQLite
156
+ file, and a small global registry at `~/.acidcat/registry.db` lets reads
157
+ fan out across every library you have registered.
158
+
159
+ By default the per-library DB lives centrally at
160
+ `~/.acidcat/libraries/<label>_<hash>.db`. Pass `--in-tree` if you'd
161
+ rather have the DB travel with the data at
162
+ `<library>/.acidcat/index.db`.
163
+
164
+ # register and index a library (label defaults to basename of DIR)
165
+ acidcat index ~/Samples/Loops --label loops
166
+ acidcat index ~/Samples/OneShots --label oneshots
167
+
168
+ # show every registered library
169
+ acidcat index --list
170
+
171
+ # per-library stats
172
+ acidcat index --stats loops
173
+
174
+ # extract librosa features during indexing (slower, enables similarity)
175
+ acidcat index ~/Samples/Loops --label loops --features
176
+
177
+ # rebuild a library's DB from scratch
178
+ acidcat index ~/Samples/Loops --label loops --rebuild
179
+
180
+ # forget a library (registry only) vs remove it (deletes the DB file)
181
+ acidcat index --forget loops
182
+ acidcat index --remove loops
183
+
184
+ # list registered libraries whose DB file is missing on disk
185
+ acidcat index --orphans
186
+
187
+ # import a legacy <name>_tags.json into a library
188
+ acidcat index ~/Samples --label samples --import-tags old_tags.json
189
+
190
+ Nested libraries are rejected at registration time: if you've registered
191
+ `~/Samples`, you can't also register `~/Samples/Loops` until you forget
192
+ the parent.
193
+
194
+ ### Discovery
195
+
196
+ For users with many scattered packs, `--discover` walks a tree and
197
+ registers every qualifying subdirectory as its own library in one pass.
198
+
199
+ # preview what would get registered (no writes)
200
+ acidcat index --discover ~/Samples --dry-run
201
+
202
+ # actually register them
203
+ acidcat index --discover ~/Samples
204
+
205
+ # tighter threshold and namespacing for a subset of your collection
206
+ acidcat index --discover /mnt/external/old_drives \
207
+ --min-samples 50 --label-prefix "ext_"
208
+
209
+ A directory qualifies if its subtree (within `--max-depth`, default 3)
210
+ contains at least `--min-samples` audio files (default 20). Non-
211
+ qualifying parents are recursed into so packs nested inside catch-all
212
+ folders still surface. Already-registered roots are skipped. The home
213
+ directory is refused as a discover root to prevent runaway registration.
214
+
215
+ ### Querying
216
+
217
+ By default `acidcat query` fans out across every registered library and
218
+ merges the results.
219
+
220
+ acidcat query --bpm 120:130 --key Am
221
+ acidcat query --tag drums --tag punchy --duration :1
222
+ acidcat query --text "dusty lofi" --limit 20
223
+ acidcat query --format mp3 --root loops
224
+ acidcat query --root loops,oneshots --bpm 128
225
+ acidcat query --bpm 128 --paths-only | xargs -I {} cp {} out/
226
+
227
+ `--root` accepts a label, an absolute path, or a comma-separated list.
228
+ Override the registry on any command with `--registry PATH` or the
229
+ `ACIDCAT_REGISTRY` environment variable.
230
+
231
+ ## MCP Server
232
+
233
+ `acidcat-mcp` is a stdio MCP server that exposes the registered libraries
234
+ as structured tools. An LLM can ask "what libraries do I have?",
235
+ search across them by metadata, find compatible keys via Camelot, or
236
+ (with `[analysis]` installed) find similar samples by librosa feature
237
+ cosine.
238
+
239
+ pip install -e .[mcp] # minimum for discovery + writes
240
+ pip install -e .[analysis,mcp] # unlock find_similar / analyze_*
241
+
242
+ Claude Desktop / Claude Code config:
243
+
244
+ {
245
+ "mcpServers": {
246
+ "acidcat": {
247
+ "command": "acidcat-mcp"
248
+ }
249
+ }
250
+ }
251
+
252
+ Optional: pass `--registry PATH` on the server process or set
253
+ `ACIDCAT_REGISTRY` if your registry lives outside the default location.
254
+
255
+ Tool tiers (each tool description starts with `Fast.`, `SLOW.`, or
256
+ `VERY SLOW.` so the model self-selects):
257
+
258
+ - **Fast (SQLite only)**: `search_samples`, `get_sample`, `locate_sample`,
259
+ `list_libraries`, `list_tags`, `list_keys`, `list_formats`,
260
+ `index_stats`, `find_compatible`
261
+ - **Slow analysis** (needs `[analysis]`): `find_similar`, `analyze_sample`,
262
+ `detect_bpm_key`
263
+ - **Index management**: `reindex`, `reindex_features`,
264
+ `discover_libraries`
265
+ - **Write** (marked destructive): `register_library`, `forget_library`,
266
+ `tag_sample`, `set_sample_description`
267
+
268
+ ## License
269
+
270
+ MIT
@@ -0,0 +1,70 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "acidcat"
7
+ version = "0.9.0"
8
+ description = "Audio metadata explorer and analysis tool, like exiftool but for audio"
9
+ readme = {file = "README.md", content-type = "text/markdown"}
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ {name = "hed0rah", email = "18272116+hed0rah@users.noreply.github.com"},
14
+ ]
15
+ keywords = [
16
+ "audio", "wav", "aiff", "midi", "metadata", "riff",
17
+ "acid", "bpm", "key", "camelot", "harmonic-mixing",
18
+ "librosa", "music", "samples", "mcp",
19
+ ]
20
+ classifiers = [
21
+ "Development Status :: 4 - Beta",
22
+ "Environment :: Console",
23
+ "Intended Audience :: Developers",
24
+ "Intended Audience :: End Users/Desktop",
25
+ "Topic :: Multimedia :: Sound/Audio :: Analysis",
26
+ "Topic :: Multimedia :: Sound/Audio",
27
+ "License :: OSI Approved :: MIT License",
28
+ "Operating System :: OS Independent",
29
+ "Programming Language :: Python :: 3",
30
+ "Programming Language :: Python :: 3.9",
31
+ "Programming Language :: Python :: 3.10",
32
+ "Programming Language :: Python :: 3.11",
33
+ "Programming Language :: Python :: 3.12",
34
+ "Programming Language :: Python :: 3.13",
35
+ ]
36
+ # mutagen is a base dep because the index dispatcher routes mp3/flac/ogg/m4a
37
+ # through it; a fresh user pointing acidcat at a modern sample collection
38
+ # without mutagen would see those files silently skipped.
39
+ dependencies = [
40
+ "mutagen>=1.47",
41
+ ]
42
+
43
+ [project.urls]
44
+ Homepage = "https://github.com/hed0rah/acidcat"
45
+ Repository = "https://github.com/hed0rah/acidcat"
46
+ Issues = "https://github.com/hed0rah/acidcat/issues"
47
+
48
+ [project.optional-dependencies]
49
+ analysis = [
50
+ "librosa>=0.10.1",
51
+ "numpy>=1.26",
52
+ "scipy>=1.13",
53
+ "soundfile>=0.12",
54
+ ]
55
+ ml = [
56
+ "acidcat[analysis]",
57
+ "pandas>=2.0",
58
+ "scikit-learn>=1.3",
59
+ ]
60
+ viz = ["acidcat[ml]", "matplotlib>=3.7", "seaborn>=0.12"]
61
+ notebook = ["jupyter>=1.0", "ipykernel>=6.0"]
62
+ mcp = ["mcp>=1.0"]
63
+ all = ["acidcat[ml,viz,notebook,mcp]"]
64
+
65
+ [project.scripts]
66
+ acidcat = "acidcat.cli:main"
67
+ acidcat-mcp = "acidcat.mcp_server:main"
68
+
69
+ [tool.setuptools.packages.find]
70
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ """acidcat -- audio metadata explorer and analysis tool."""
2
+
3
+ __version__ = "0.9.0"