livepilot 1.23.2 → 1.23.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +124 -0
  2. package/README.md +108 -10
  3. package/m4l_device/LivePilot_Analyzer.amxd +0 -0
  4. package/m4l_device/livepilot_bridge.js +39 -1
  5. package/mcp_server/__init__.py +1 -1
  6. package/mcp_server/atlas/cross_pack_chain.py +658 -0
  7. package/mcp_server/atlas/demo_story.py +700 -0
  8. package/mcp_server/atlas/extract_chain.py +786 -0
  9. package/mcp_server/atlas/macro_fingerprint.py +554 -0
  10. package/mcp_server/atlas/overlays.py +95 -3
  11. package/mcp_server/atlas/pack_aware_compose.py +1255 -0
  12. package/mcp_server/atlas/preset_resolver.py +238 -0
  13. package/mcp_server/atlas/tools.py +1001 -31
  14. package/mcp_server/atlas/transplant.py +1177 -0
  15. package/mcp_server/mix_engine/state_builder.py +44 -1
  16. package/mcp_server/runtime/capability_state.py +34 -3
  17. package/mcp_server/runtime/remote_commands.py +10 -0
  18. package/mcp_server/server.py +45 -24
  19. package/mcp_server/tools/agent_os.py +33 -9
  20. package/mcp_server/tools/analyzer.py +84 -23
  21. package/mcp_server/tools/browser.py +20 -1
  22. package/mcp_server/tools/devices.py +78 -11
  23. package/mcp_server/tools/perception.py +5 -1
  24. package/mcp_server/tools/tracks.py +39 -2
  25. package/mcp_server/user_corpus/__init__.py +48 -0
  26. package/mcp_server/user_corpus/manifest.py +142 -0
  27. package/mcp_server/user_corpus/plugin_engine/__init__.py +39 -0
  28. package/mcp_server/user_corpus/plugin_engine/detector.py +579 -0
  29. package/mcp_server/user_corpus/plugin_engine/manual.py +347 -0
  30. package/mcp_server/user_corpus/plugin_engine/research.py +247 -0
  31. package/mcp_server/user_corpus/runner.py +261 -0
  32. package/mcp_server/user_corpus/scanner.py +115 -0
  33. package/mcp_server/user_corpus/scanners/__init__.py +18 -0
  34. package/mcp_server/user_corpus/scanners/adg.py +79 -0
  35. package/mcp_server/user_corpus/scanners/als.py +144 -0
  36. package/mcp_server/user_corpus/scanners/amxd.py +374 -0
  37. package/mcp_server/user_corpus/scanners/plugin_preset.py +202 -0
  38. package/mcp_server/user_corpus/tools.py +904 -0
  39. package/mcp_server/user_corpus/wizard.py +224 -0
  40. package/package.json +2 -2
  41. package/remote_script/LivePilot/__init__.py +1 -1
  42. package/remote_script/LivePilot/browser.py +7 -2
  43. package/remote_script/LivePilot/devices.py +9 -0
  44. package/remote_script/LivePilot/simpler_sample.py +98 -0
  45. package/requirements.txt +3 -3
  46. package/server.json +2 -2
@@ -0,0 +1,554 @@
1
+ """Macro-fingerprint similarity matching for Pack-Atlas Phase D.
2
+
3
+ Finds presets with similar macro state to a source preset by computing
4
+ macro-name overlap (synonym-aware) + value distance. Returns top-K matches
5
+ scored and ranked, each with a generated rationale.
6
+
7
+ All data comes from the _preset_parses JSON sidecar layer — no Live
8
+ connection required.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import re
15
+ import unicodedata
16
+ from functools import lru_cache
17
+ from pathlib import Path
18
+ from typing import Generator
19
+
20
+ # ─── Paths ───────────────────────────────────────────────────────────────────
21
+
22
+ PRESET_PARSES_ROOT = (
23
+ Path.home() / ".livepilot" / "atlas-overlays" / "packs" / "_preset_parses"
24
+ )
25
+
26
+ # ─── Synonym Dictionary ───────────────────────────────────────────────────────
27
+ # Maps a canonical key → list of raw macro name variants (all lowercase).
28
+ # Built from corpus survey of 3,813 sidecars / 26,433 named macros.
29
+ # Top 5 raw names in corpus: volume (2451), attack (1997), tone (1383),
30
+ # filter cutoff (1249), pitch (1218).
31
+
32
+ MACRO_SYNONYMS: dict[str, list[str]] = {
33
+ # ── Filter ────────────────────────────────────────────────────────────────
34
+ "filter_cutoff": [
35
+ "filter cutoff", "filter freq", "filter control", "cutoff",
36
+ "lowpass", "lp cut", "filter sweep", "cutoff freq",
37
+ "filter frequency", "filiter cutoff", "filt cutoff",
38
+ "filter 1 cutoff", "filter 2 cutoff", "input cutoff",
39
+ "output cutoff", "creak filter", "rumble filter",
40
+ "bass cutoff", "high cutoff", "low cutoff",
41
+ "filter drift", "keyhole filter",
42
+ ],
43
+ "filter_resonance": [
44
+ "filter resonance", "filter reso", "resonance", "reso",
45
+ "filter q", "filter res", "filter r", "resonant freq",
46
+ "reso note", "filter rezo", "rezo",
47
+ ],
48
+ "filter_envelope": [
49
+ "filter envelope", "filter env", "filter attack", "filter decay",
50
+ "filter mod", "filter lfo", "fe < env", "fliter env",
51
+ "filter env amount", "filter env decay",
52
+ ],
53
+ "low_cut": [
54
+ "low cut", "lo cut", "high pass", "hi pass", "hp cut",
55
+ "highpass", "lo pass", "low cut filter", "high cut filter",
56
+ "bass cut",
57
+ ],
58
+ "high_cut": [
59
+ "high cut", "hi cut", "low pass", "lp", "treble cut",
60
+ "tone", "brightness", "brighten", "dull to bright",
61
+ "dark to light", "tonal balance",
62
+ ],
63
+
64
+ # ── Amplitude / Volume ────────────────────────────────────────────────────
65
+ "volume": [
66
+ "volume", "vol", "output", "output vol", "level",
67
+ "output volume", "release volume", "rack volume",
68
+ "master vol", "gain", "output level", " volume",
69
+ ],
70
+ "attack": [
71
+ "attack", "attack time", "env attack", "legato attack",
72
+ " attack", "fade in", "fade in <<< o",
73
+ ],
74
+ "decay": [
75
+ "decay", "decay time", "env decay", "reso decay",
76
+ "decay release", "decay 1", "decay 2", "decay 3",
77
+ ],
78
+ "release": [
79
+ "release", "release time", "tail", "decay time",
80
+ "env release", "key off volume", "k & s release",
81
+ "legato release", "press release", "release level",
82
+ ],
83
+ "sustain": [
84
+ "sustain", "sus",
85
+ ],
86
+
87
+ # ── Space / Reverb ────────────────────────────────────────────────────────
88
+ "reverb": [
89
+ "reverb", "verb", "reverb amount", "reverb mix", "reverb dry/wet",
90
+ "reverb time", "reverb decay", "reverb level", "reverb blend",
91
+ "reverb vol", "ambience", "reverb d/w", "space", "space amount",
92
+ "space 1", "space 2", "room", "into space", "orbit reverb",
93
+ "ambient amount", "warehouse verb", "reclusive verb",
94
+ "dizzy verb", "ghosts wet/dry", "verb level",
95
+ ],
96
+ "reverb_time": [
97
+ "reverb time", "reverb decay", "reverb length", "room",
98
+ "reverb d/w", "reverb decay",
99
+ ],
100
+
101
+ # ── Delay / Echo ──────────────────────────────────────────────────────────
102
+ "delay": [
103
+ "delay", "echo", "delay amount", "delay mix", "delay length",
104
+ "delay time", "delay rate", "delay feedback", "delay crank",
105
+ "delay vol", "delay d/w", "grain delay", "swarm delay",
106
+ "echo amount", "echo d/w", "roomy delay",
107
+ ],
108
+ "feedback": [
109
+ "feedback", "fdback", "fdbk", "feed back",
110
+ "delay feedback", "phaser fdback",
111
+ ],
112
+
113
+ # ── Modulation / Movement ─────────────────────────────────────────────────
114
+ "movement": [
115
+ "movement", "motion", "lfo amount", "modulation", "mod",
116
+ "mod amount", "motion speed", "wobble", "pulse rate",
117
+ "lfo rate", "mod rate", "mod freq",
118
+ ],
119
+ "lfo_rate": [
120
+ "lfo rate", "lfo sync rate", "mod rate", "motion speed",
121
+ "pulse rate", "tremolo speed", "lfo rate", "rate of flux",
122
+ "arp rate",
123
+ ],
124
+ "lfo_amount": [
125
+ "lfo amount", "mod amount", "modulation", "vibrato",
126
+ "tremolo amount", "wobble",
127
+ ],
128
+
129
+ # ── Drive / Distortion ────────────────────────────────────────────────────
130
+ "drive": [
131
+ "drive", "overdrive", "saturation", "drive amount",
132
+ "saturate", "distort", "distortion", "grit",
133
+ "amp drive", "saturator amount", "over drive",
134
+ "vinyl distortion", "stormy overdrive", "bit crush",
135
+ "erosion", "tube", "warmth", "low intensity",
136
+ ],
137
+ "compressor": [
138
+ "comp", "compressor", "compress", "glue", "squash",
139
+ "tighten", "comp amount", "comp thresh", "comp depth",
140
+ "squeeze", "heavy comp", "drum buss",
141
+ ],
142
+
143
+ # ── Chorus / Ensemble ─────────────────────────────────────────────────────
144
+ "chorus": [
145
+ "chorus", "chorus amount", "ensemble", "shimmer chorus",
146
+ "luster chorus", "chrus fb", "chours amount",
147
+ "ricochet chorus",
148
+ ],
149
+
150
+ # ── Pitch / Tune ──────────────────────────────────────────────────────────
151
+ "pitch": [
152
+ "pitch", "pitch shift", "transpose", "tune",
153
+ "transp.", "osc 2 transp.", "fine tuning",
154
+ "pitch env", "pitch envelope", "pitch mod",
155
+ "pitch decay", "pitch drift", "pitch spring",
156
+ "pitch tone", "pitch warp", "pitch drag l", "pitch drag r",
157
+ ],
158
+ "detune": [
159
+ "detune", "spread", "stereo spread", "unison", "detuning",
160
+ "melt detune", "headache detune", "drunkenness",
161
+ ],
162
+
163
+ # ── Stereo / Width ────────────────────────────────────────────────────────
164
+ "spread": [
165
+ "spread", "stereo", "width", "stereo spread", "stereo width",
166
+ "narrow to wide", "pan mod", "random pan", "pan random",
167
+ ],
168
+ "dry_wet": [
169
+ "dry/wet", "dry-wet", "mix", "wet", "effect mix",
170
+ "rack dry/wet", "spectral d/w", "effect dry/wet",
171
+ "spectral d/w", "trails d/w", "chorus d/w", "reverb d/w",
172
+ ],
173
+
174
+ # ── Spectral / Texture ────────────────────────────────────────────────────
175
+ "spectral": [
176
+ "spectral amount", "spectral d/w", "spectral note",
177
+ "spectral shift", "spectral stretch", "spectral time",
178
+ "freq shift", "bandwidth", "grain delay", "grain pitch",
179
+ "grain spread",
180
+ ],
181
+ "texture": [
182
+ "texture type", "texture amount", "texture tune",
183
+ "noise", "noise floor", "noise level", "shaper",
184
+ "oxide", "color", "timbre",
185
+ ],
186
+
187
+ # ── Noise / Grain ─────────────────────────────────────────────────────────
188
+ "noise": [
189
+ "noise", "noise floor", "noise level", "hiss tone",
190
+ "crackle volume", "crackle density", "nøize ω",
191
+ ],
192
+ "grain": [
193
+ "grain delay", "grain pitch", "grain spread", "grains",
194
+ "grain size", "sample start", "sample shift",
195
+ ],
196
+
197
+ # ── Drone-specific ────────────────────────────────────────────────────────
198
+ "drone_density": [
199
+ "drone density", "root note", "sub level",
200
+ "sine drone", "xl sub", "boom",
201
+ ],
202
+ "sub": [
203
+ "sub level", "sub osc", "xl sub", "boom",
204
+ "low end", "bass boost", "bass gain",
205
+ ],
206
+
207
+ # ── Frequency band shaping ────────────────────────────────────────────────
208
+ "eq_low": [
209
+ "low shelf", "low gain", "low end", "bass cutoff",
210
+ "tilt eq", "low freq", "lo gain",
211
+ ],
212
+ "eq_high": [
213
+ "high shelf", "high gain", "treble boost",
214
+ "top boost", "hi freq", "hi gain",
215
+ ],
216
+ "eq_mid": [
217
+ "mid gain", "mid range", "mid scoop", "master eq",
218
+ "tilt eq",
219
+ ],
220
+
221
+ # ── Phaser / Flanger ──────────────────────────────────────────────────────
222
+ "phaser": [
223
+ "phaser", "phaze", "phase", "flanger", "hat flanger",
224
+ "churn phaser",
225
+ ],
226
+ "ring_mod": [
227
+ "ring mod", "ring mod vol", "ringmod 1 freq",
228
+ "ringmod 1 amnt", "ringmod 2 freq", "ringmod 2 amnt",
229
+ "fm amount", "fm",
230
+ ],
231
+ }
232
+
233
+ # Build reverse lookup: raw_name_lower → canonical_key
234
+ _SYNONYM_REVERSE: dict[str, str] = {}
235
+ for _canonical, _variants in MACRO_SYNONYMS.items():
236
+ for _v in _variants:
237
+ _SYNONYM_REVERSE[_v.lower().strip()] = _canonical
238
+
239
+
240
+ # ─── Schema helpers ───────────────────────────────────────────────────────────
241
+
242
+ def _is_producer_named(name: str) -> bool:
243
+ """True when the macro has a producer-assigned name (not 'Macro N')."""
244
+ if not name:
245
+ return False
246
+ return not re.match(r"^Macro\s+\d+$", name.strip())
247
+
248
+
249
+ def _ascii_fold(name: str) -> str:
250
+ """Normalize a producer-stylized macro name for synonym matching.
251
+
252
+ 1. Apply Unicode character substitutions for common stylized glyphs
253
+ that don't survive NFKD decomposition (e.g. † → t, Ω → dropped).
254
+ 2. NFKD decompose and drop non-ASCII bytes (ø → o, e.g. MØD → mod).
255
+ 3. Lowercase and strip.
256
+
257
+ Examples:
258
+ "Nøize Ω" → "noize"
259
+ "MØD Rate" → "mod rate"
260
+ "Fil†er Amount" → "filter amount"
261
+ " Spark Entities" → "spark entities"
262
+ """
263
+ # Explicit substitutions before decomposition
264
+ _GLYPH_SUB = str.maketrans({
265
+ "†": "t", # dagger → t (e.g. Fil†er → Filter)
266
+ "Ω": "", # Greek omega — purely decorative, drop
267
+ "ω": "", # lowercase omega
268
+ "Ø": "o", # Nordic Ø — also handled by NFKD but explicit is safer
269
+ "ø": "o",
270
+ "Æ": "ae",
271
+ "æ": "ae",
272
+ "+": " ", # BLASTS ++ → blasts
273
+ "≈": "",
274
+ "→": "",
275
+ })
276
+ s = name.translate(_GLYPH_SUB)
277
+ # NFKD decompose, then drop non-ASCII combining marks
278
+ s = unicodedata.normalize("NFKD", s).encode("ASCII", "ignore").decode("ASCII")
279
+ return s.lower().strip()
280
+
281
+
282
+ def _canonicalize_macro_name(name: str) -> str:
283
+ """Return canonical synonym key for name, or normalised raw name as fallback.
284
+
285
+ Performs ASCII-fold + Unicode glyph substitution before synonym lookup so
286
+ producer-stylized names like "Nøize Ω" and "MØD Rate" resolve correctly.
287
+ """
288
+ clean = name.lower().strip()
289
+ # 1) Try exact match first (covers most common names cheaply)
290
+ if clean in _SYNONYM_REVERSE:
291
+ return _SYNONYM_REVERSE[clean]
292
+ # 2) ASCII-fold and try again (handles Unicode-decorated producer names)
293
+ folded = _ascii_fold(name)
294
+ if folded in _SYNONYM_REVERSE:
295
+ return _SYNONYM_REVERSE[folded]
296
+ # 3) Return the folded form as the fallback canonical key so that
297
+ # two presets with identical stylized names still match each other.
298
+ return folded if folded else clean
299
+
300
+
301
+ def _normalize_value(value_raw: str | float | int) -> float:
302
+ """Normalise a macro value to [0, 1] assuming the 0-127 MIDI range."""
303
+ try:
304
+ v = float(value_raw)
305
+ return max(0.0, min(1.0, v / 127.0))
306
+ except (TypeError, ValueError):
307
+ return 0.5
308
+
309
+
310
+ # ─── Sidecar loaders ─────────────────────────────────────────────────────────
311
+
312
+ @lru_cache(maxsize=None)
313
+ def _load_preset_sidecar(pack_slug: str, preset_path_slug: str) -> dict | None:
314
+ """Load a single preset sidecar. Returns None if path doesn't exist.
315
+
316
+ preset_path_slug is the filename stem, e.g.
317
+ "instruments_laboratory_razor-wire-drone".
318
+ """
319
+ p = PRESET_PARSES_ROOT / pack_slug / f"{preset_path_slug}.json"
320
+ if not p.exists():
321
+ return None
322
+ with p.open() as fh:
323
+ return json.load(fh)
324
+
325
+
326
+ def _iter_all_preset_sidecars() -> Generator[tuple[str, str, dict], None, None]:
327
+ """Yield (pack_slug, preset_path_slug, sidecar_dict) for every sidecar on disk."""
328
+ if not PRESET_PARSES_ROOT.exists():
329
+ return
330
+ for pack_dir in sorted(PRESET_PARSES_ROOT.iterdir()):
331
+ if not pack_dir.is_dir():
332
+ continue
333
+ pack_slug = pack_dir.name
334
+ for sidecar_path in sorted(pack_dir.glob("*.json")):
335
+ preset_path_slug = sidecar_path.stem
336
+ try:
337
+ sidecar = json.loads(sidecar_path.read_text())
338
+ except (json.JSONDecodeError, OSError):
339
+ continue
340
+ yield pack_slug, preset_path_slug, sidecar
341
+
342
+
343
+ # ─── Fingerprint extraction ───────────────────────────────────────────────────
344
+
345
+ def _extract_fingerprint(
346
+ sidecar_dict: dict,
347
+ ) -> list[dict]:
348
+ """Return list of fingerprint entries for non-default named macros.
349
+
350
+ Each entry: {name_canonical, name_raw, value, value_normalized}.
351
+ Skips macros named "Macro N" (default placeholders) and macros with
352
+ zero value that are likely unused/unconnected.
353
+ """
354
+ fp = []
355
+ for macro in sidecar_dict.get("macros", []):
356
+ raw_name = macro.get("name", "")
357
+ if not _is_producer_named(raw_name):
358
+ continue
359
+ raw_value = macro.get("value", 0)
360
+ try:
361
+ value_f = float(raw_value)
362
+ except (TypeError, ValueError):
363
+ value_f = 0.0
364
+ canonical = _canonicalize_macro_name(raw_name)
365
+ fp.append({
366
+ "name_canonical": canonical,
367
+ "name_raw": raw_name,
368
+ "value": value_f,
369
+ "value_normalized": _normalize_value(raw_value),
370
+ })
371
+ return fp
372
+
373
+
374
+ # ─── Similarity computation ───────────────────────────────────────────────────
375
+
376
+ def _compute_similarity(
377
+ source_fp: list[dict],
378
+ candidate_fp: list[dict],
379
+ ) -> tuple[float, list[dict]]:
380
+ """Compute similarity between two fingerprints.
381
+
382
+ Returns (score: float, matching_macros: list[dict]).
383
+
384
+ Algorithm (per spec §Phase D):
385
+ name_overlap_ratio = matched_pairs / max(len(source), len(candidate))
386
+ value_distance = mean |v_src - v_cand| over matched pairs
387
+ score = 0.6 * name_overlap_ratio + 0.4 * (1 - value_distance)
388
+ """
389
+ if not source_fp or not candidate_fp:
390
+ return 0.0, []
391
+
392
+ # Build canonical-name → entry map for candidate (last wins on dupes)
393
+ cand_by_canonical: dict[str, dict] = {}
394
+ cand_by_raw: dict[str, dict] = {}
395
+ for entry in candidate_fp:
396
+ cand_by_canonical[entry["name_canonical"]] = entry
397
+ cand_by_raw[entry["name_raw"].lower().strip()] = entry
398
+
399
+ matching_macros = []
400
+ for src_entry in source_fp:
401
+ cand_entry = None
402
+ match_label = ""
403
+
404
+ # 1) Exact canonical match (catches synonym → same canonical bucket)
405
+ if src_entry["name_canonical"] in cand_by_canonical:
406
+ cand_entry = cand_by_canonical[src_entry["name_canonical"]]
407
+ if src_entry["name_canonical"] == cand_entry["name_canonical"]:
408
+ # Both collapsed to same canonical via synonym dict
409
+ src_raw = src_entry["name_raw"]
410
+ cand_raw = cand_entry["name_raw"]
411
+ if src_raw.lower().strip() == cand_raw.lower().strip():
412
+ match_label = f"{src_raw} = {cand_raw}"
413
+ else:
414
+ match_label = f"{src_raw} ≈ {cand_raw}"
415
+ else:
416
+ match_label = (
417
+ f"{src_entry['name_raw']} ≈ {cand_entry['name_raw']}"
418
+ )
419
+
420
+ # 2) Exact raw name match (handles identical naming within same pack)
421
+ if cand_entry is None:
422
+ raw_lower = src_entry["name_raw"].lower().strip()
423
+ if raw_lower in cand_by_raw:
424
+ cand_entry = cand_by_raw[raw_lower]
425
+ match_label = f"{src_entry['name_raw']} = {cand_entry['name_raw']}"
426
+
427
+ if cand_entry is None:
428
+ continue
429
+
430
+ value_dist = abs(
431
+ src_entry["value_normalized"] - cand_entry["value_normalized"]
432
+ )
433
+ matching_macros.append({
434
+ "name_overlap": match_label,
435
+ "src_name": src_entry["name_raw"],
436
+ "cand_name": cand_entry["name_raw"],
437
+ "src_value": src_entry["value"],
438
+ "cand_value": cand_entry["value"],
439
+ "value_distance": round(value_dist, 4),
440
+ })
441
+
442
+ if not matching_macros:
443
+ return 0.0, []
444
+
445
+ n_source = len(source_fp)
446
+ n_cand = len(candidate_fp)
447
+ name_overlap_ratio = len(matching_macros) / max(n_source, n_cand)
448
+ mean_value_dist = sum(m["value_distance"] for m in matching_macros) / len(
449
+ matching_macros
450
+ )
451
+
452
+ score = 0.6 * name_overlap_ratio + 0.4 * (1.0 - mean_value_dist)
453
+ return round(min(score, 1.0), 4), matching_macros
454
+
455
+
456
+ # ─── Pack metadata helpers ───────────────────────────────────────────────────
457
+
458
+ # Curated pack genre / aesthetic tags — used in rationale generation.
459
+ _PACK_GENRE_TAGS: dict[str, list[str]] = {
460
+ "drone-lab": ["drone", "ambient", "experimental", "generative"],
461
+ "mood-reel": ["ambient", "cinematic", "atmospheric"],
462
+ "inspired-by-nature-by-dillon-bastan": ["generative", "ambient", "spectral"],
463
+ "lost-and-found": ["lo-fi", "vintage", "experimental"],
464
+ "synth-essentials": ["synth", "subtractive", "classic"],
465
+ "drive-and-glow": ["distortion", "saturation", "character"],
466
+ "glitch-and-wash": ["glitch", "ambient", "experimental", "spectral"],
467
+ "skitter-and-step": ["rhythmic", "percussion", "glitch"],
468
+ "voice-box": ["vocal", "choral", "processing"],
469
+ "build-and-drop": ["edm", "drop", "impact"],
470
+ "creative-extensions": ["generative", "experimental", "modular"],
471
+ "beat-tools": ["percussion", "drums", "hip-hop"],
472
+ "chop-and-swing": ["hip-hop", "funk", "sampling"],
473
+ "electric-keyboards": ["keys", "vintage", "classic"],
474
+ "grand-piano": ["piano", "acoustic", "classical"],
475
+ "guitar-and-bass": ["guitar", "bass", "live-instrument"],
476
+ "punch-and-tilt": ["percussion", "edm", "impact"],
477
+ "cv-tools": ["modular", "cv", "generative"],
478
+ "sequencers": ["generative", "arp", "modular"],
479
+ "golden-era-hip-hop-drums-by-sound-oracle": ["hip-hop", "sampling", "vintage"],
480
+ "trap-drums-by-sound-oracle": ["trap", "hip-hop", "drums"],
481
+ "drum-booth": ["acoustic", "drums", "percussion"],
482
+ "drum-essentials": ["drums", "percussion", "classic"],
483
+ "session-drums-club": ["drums", "club", "house"],
484
+ "session-drums-studio": ["drums", "studio", "live"],
485
+ "latin-percussion": ["latin", "percussion", "world"],
486
+ "orchestral-brass": ["orchestral", "brass", "cinematic"],
487
+ "orchestral-mallets": ["orchestral", "mallets", "classical"],
488
+ "orchestral-strings": ["orchestral", "strings", "cinematic"],
489
+ "orchestral-woodwinds": ["orchestral", "woodwinds", "classical"],
490
+ "brass-quartet-by-spitfire-audio": ["orchestral", "brass", "spitfire"],
491
+ "string-quartet-by-spitfire-audio": ["orchestral", "strings", "spitfire"],
492
+ "upright-piano-by-spitfire-audio": ["piano", "acoustic", "spitfire"],
493
+ }
494
+
495
+ _SPECTRAL_PACKS = frozenset([
496
+ "drone-lab", "inspired-by-nature-by-dillon-bastan",
497
+ "glitch-and-wash", "creative-extensions",
498
+ ])
499
+ _DUB_TECHNO_PACKS = frozenset([
500
+ "drone-lab", "mood-reel", "glitch-and-wash",
501
+ ])
502
+
503
+
504
+ def _generate_rationale(
505
+ source_pack: str,
506
+ source_name: str,
507
+ cand_pack: str,
508
+ cand_name: str,
509
+ matching_macros: list[dict],
510
+ ) -> str:
511
+ """Generate a short prose rationale for a match."""
512
+ parts = []
513
+
514
+ # Pack relationship
515
+ if source_pack == cand_pack:
516
+ parts.append(f"Same {source_pack!r} pack")
517
+ else:
518
+ src_tags = set(_PACK_GENRE_TAGS.get(source_pack, []))
519
+ cand_tags = set(_PACK_GENRE_TAGS.get(cand_pack, []))
520
+ shared = src_tags & cand_tags
521
+ if shared:
522
+ parts.append(
523
+ f"Different pack but shared aesthetic: {', '.join(sorted(shared))}"
524
+ )
525
+ else:
526
+ parts.append(f"Cross-pack match ({cand_pack!r})")
527
+
528
+ # Spectral / dub-techno tagging
529
+ if cand_pack in _SPECTRAL_PACKS:
530
+ parts.append("spectral processing topology")
531
+ if cand_pack in _DUB_TECHNO_PACKS:
532
+ parts.append("dub-techno compatible")
533
+
534
+ # Describe macro overlaps
535
+ overlap_names = [m["cand_name"] for m in matching_macros[:3]]
536
+ if overlap_names:
537
+ parts.append(f"matching macros: {', '.join(overlap_names)}")
538
+
539
+ # Value proximity bonus
540
+ close = [m for m in matching_macros if m["value_distance"] < 0.1]
541
+ if len(close) >= 2:
542
+ parts.append("similar parameter values")
543
+
544
+ return "; ".join(parts)
545
+
546
+
547
+ # ─── Fingerprint strength ─────────────────────────────────────────────────────
548
+
549
+ def _fingerprint_strength(n_named: int) -> str:
550
+ if n_named >= 6:
551
+ return "strong"
552
+ if n_named >= 3:
553
+ return "moderate"
554
+ return "weak"
@@ -11,6 +11,7 @@ Per spec: docs/superpowers/specs/2026-04-25-user-local-extensions-design.md
11
11
  from __future__ import annotations
12
12
 
13
13
  import logging
14
+ import re
14
15
  from dataclasses import dataclass, field
15
16
  from pathlib import Path
16
17
  from typing import Optional
@@ -20,6 +21,87 @@ import yaml
20
21
  logger = logging.getLogger(__name__)
21
22
 
22
23
 
24
+ # ─── Tokenizer (used by OverlayIndex.search) ─────────────────────────────────
25
+ # Producer-style queries use natural language with hyphenated phrases
26
+ # ("Kraftwerk-style bass"), apostrophes ("J Dilla's vibe"), and stylistic
27
+ # suffix words that pad meaning ("style", "vibe", "tone", "mood"). The
28
+ # whitespace-only split + AND-match used to reject those queries because:
29
+ # - "kraftwerk-style" (one token) wouldn't substring-match "kraftwerk" (in
30
+ # the artist tag);
31
+ # - "vibe" (always present in producer queries) would never match any
32
+ # indexed field, so the AND-clause failed.
33
+ #
34
+ # Fix: tokenize on whitespace + hyphens + apostrophes, drop stop words +
35
+ # stylistic suffix words, drop tokens shorter than 3 chars (single letters
36
+ # substring-match everything → noise). Score logic is unchanged.
37
+
38
+ # Words that carry no content for music-search queries.
39
+ _STOP_WORDS = frozenset({
40
+ # articles + determiners
41
+ "a", "an", "the", "this", "that", "these", "those",
42
+ # prepositions
43
+ "of", "in", "on", "with", "for", "to", "at", "by", "from", "as", "into",
44
+ # possessives + pronouns
45
+ "my", "your", "his", "her", "its", "our", "their", "i", "we", "you",
46
+ # stylistic / vibe-coded suffixes — always present in producer queries
47
+ "style", "styled", "sound", "sounding", "vibe", "vibes", "tone", "toned",
48
+ "mood", "moody", "era", "school", "esque", "like", "kind", "type",
49
+ "feel", "feels", "feeling",
50
+ # generic verbs
51
+ "is", "was", "are", "were", "has", "have", "had", "get", "gets", "make",
52
+ "makes", "making", "want", "need", "give",
53
+ # common modifiers
54
+ "very", "really", "kinda", "sorta", "more", "less", "some", "any", "all",
55
+ "just", "only", "also", "too",
56
+ # music-specific noise
57
+ "track", "song", "audio", "music", "musical",
58
+ })
59
+
60
+
61
+ def _tokenize(query: str) -> list[str]:
62
+ """Tokenize a search query for OverlayIndex.search.
63
+
64
+ - Split on whitespace + hyphens + apostrophes + slashes.
65
+ - Lowercase.
66
+ - Drop stop words.
67
+ - Drop tokens < 3 chars (single-letter tokens substring-match every
68
+ field — pure noise; 2-char tokens are still mostly noise except a few
69
+ domain terms like "fm" / "eq" — see _PRESERVED_SHORT_TOKENS).
70
+
71
+ Returns a deduplicated list (insertion order preserved).
72
+ """
73
+ if not query:
74
+ return []
75
+ raw = re.split(r"[\s\-'’/]+", query.lower())
76
+ seen: dict[str, None] = {}
77
+ for tok in raw:
78
+ if not tok:
79
+ continue
80
+ if tok in _STOP_WORDS:
81
+ continue
82
+ if len(tok) < 3 and tok not in _PRESERVED_SHORT_TOKENS:
83
+ continue
84
+ if tok not in seen:
85
+ seen[tok] = None
86
+ return list(seen.keys())
87
+
88
+
89
+ # A small whitelist of music-domain 2-char terms worth keeping as tokens.
90
+ _PRESERVED_SHORT_TOKENS = frozenset({
91
+ "fm", # frequency modulation
92
+ "am", # amplitude modulation
93
+ "eq", # equalizer
94
+ "lo", # lo-fi (after stripping "fi" via stop words it's still a useful tag substring)
95
+ "hi", # hi-fi / hi-hat
96
+ "ot", # rare but appears in tag suffixes
97
+ "808", "303", "707", "909", "606", # iconic drum machines
98
+ "tr", # often appears in tags like "tr-808"
99
+ "dx", # DX7 et al
100
+ "cs", # CS-80
101
+ "vc", # Vocoder shortform
102
+ })
103
+
104
+
23
105
  @dataclass
24
106
  class OverlayEntry:
25
107
  """A single overlay entity loaded from a YAML file under a namespace.
@@ -108,11 +190,16 @@ class OverlayIndex:
108
190
  Sorts by descending score, then by entity_id for stable ties.
109
191
  Filters by namespace and/or entity_type if provided.
110
192
  Empty query returns empty list.
193
+
194
+ Tokenization (v1.23.7+): see module-level _tokenize() — splits on
195
+ whitespace + hyphens + apostrophes; drops stop words + stylistic
196
+ suffixes ("style", "vibe", "mood") so producer-vocabulary queries
197
+ like "Kraftwerk-style bass" or "J Dilla SP-404 vibe" route to the
198
+ right plugins instead of getting AND-rejected by noise tokens.
111
199
  """
112
- q = (query or "").strip().lower()
113
- if not q:
200
+ if not query:
114
201
  return []
115
- tokens = q.split()
202
+ tokens = _tokenize(query)
116
203
  if not tokens:
117
204
  return []
118
205
 
@@ -243,6 +330,11 @@ def load_overlays(root: Optional[Path] = None,
243
330
  namespace = ns_dir.name
244
331
  for yaml_path in sorted(list(ns_dir.rglob("*.yaml")) +
245
332
  list(ns_dir.rglob("*.yml"))):
333
+ # Convention: filenames starting with "_" or "manifest.yaml"
334
+ # are internal config / cache files, not knowledge entries.
335
+ # The user_corpus pipeline writes its manifest + sidecars there.
336
+ if yaml_path.name.startswith("_") or yaml_path.name == "manifest.yaml":
337
+ continue
246
338
  try:
247
339
  with yaml_path.open("r") as f:
248
340
  parsed = yaml.safe_load(f)