omnius 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +4959 -0
  2. package/dist/index.d.ts +6 -0
  3. package/dist/index.js +630665 -0
  4. package/dist/launcher.cjs +78 -0
  5. package/dist/postinstall-daemon.cjs +776 -0
  6. package/dist/preinstall.cjs +92 -0
  7. package/dist/scripts/autoresearch-prepare.py +459 -0
  8. package/dist/scripts/autoresearch-train.py +661 -0
  9. package/dist/scripts/crawlee-scraper.py +358 -0
  10. package/dist/scripts/live-nemotron.py +478 -0
  11. package/dist/scripts/live-whisper.py +242 -0
  12. package/dist/scripts/ocr-advanced.py +571 -0
  13. package/dist/scripts/start-moondream.py +112 -0
  14. package/dist/scripts/tor/UPSTREAM-README.md +148 -0
  15. package/dist/scripts/tor/destroy_tor.sh +29 -0
  16. package/dist/scripts/tor/tor_setup.sh +163 -0
  17. package/dist/scripts/transcribe-file.py +63 -0
  18. package/dist/scripts/web_scrape.py +1295 -0
  19. package/npm-shrinkwrap.json +7412 -0
  20. package/package.json +142 -0
  21. package/prompts/agentic/system-large.md +569 -0
  22. package/prompts/agentic/system-medium.md +211 -0
  23. package/prompts/agentic/system-small.md +114 -0
  24. package/prompts/compaction/context-compaction.md +44 -0
  25. package/prompts/personality/level-1-minimal.md +3 -0
  26. package/prompts/personality/level-2-concise.md +3 -0
  27. package/prompts/personality/level-4-explanatory.md +3 -0
  28. package/prompts/personality/level-5-thorough.md +3 -0
  29. package/prompts/personality/level-autist.md +3 -0
  30. package/prompts/personality/level-stark.md +3 -0
  31. package/prompts/runners/dispatcher.md +24 -0
  32. package/prompts/runners/editor.md +44 -0
  33. package/prompts/runners/evaluator.md +30 -0
  34. package/prompts/runners/merge-summary.md +9 -0
  35. package/prompts/runners/normalizer.md +23 -0
  36. package/prompts/runners/planner.md +33 -0
  37. package/prompts/runners/scout.md +39 -0
  38. package/prompts/runners/verifier.md +36 -0
  39. package/prompts/skill-builder/seed-analysis.md +30 -0
  40. package/prompts/skill-builder/skill-expansion.md +76 -0
  41. package/prompts/skill-builder/skill-validation.md +31 -0
  42. package/prompts/templates/analysis.md +14 -0
  43. package/prompts/templates/code-review.md +16 -0
  44. package/prompts/templates/code.md +13 -0
  45. package/prompts/templates/document.md +13 -0
  46. package/prompts/templates/error-diagnosis.md +14 -0
  47. package/prompts/templates/general.md +9 -0
  48. package/prompts/templates/plan.md +15 -0
  49. package/prompts/templates/system.md +16 -0
  50. package/prompts/tui/dmn-gather.md +128 -0
  51. package/prompts/tui/dream-consolidate.md +48 -0
  52. package/prompts/tui/dream-lucid-eval.md +17 -0
  53. package/prompts/tui/dream-lucid-implement.md +14 -0
  54. package/prompts/tui/dream-stages.md +19 -0
  55. package/prompts/tui/emotion-behavioral.md +2 -0
  56. package/prompts/tui/emotion-center.md +12 -0
  57. package/voices/personaplex/OverBarn.pt +0 -0
  58. package/voices/personaplex/clone-voice.py +384 -0
  59. package/voices/personaplex/dequant-loader.py +174 -0
  60. package/voices/personaplex/quantize-weights.py +167 -0
@@ -0,0 +1,478 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ live-nemotron.py — Self-contained streaming ASR worker using NVIDIA's
4
+ nvidia/nemotron-speech-streaming-en-0.6b model.
5
+
6
+ Parallel to live-whisper.py. Same stdin/stdout protocol so the same
7
+ pipelines (nexus voice subsystem, asr_listen tool, eval harness) can
8
+ swap backends by pointing at a different script.
9
+
10
+ Protocol:
11
+ stdin — raw PCM16 (16kHz, mono, 16-bit signed little-endian)
12
+ stdout — JSON lines:
13
+ {"type":"status","message":"Creating venv..."}
14
+ {"type":"status","message":"Installing dependencies..."}
15
+ {"type":"status","message":"Loading model..."}
16
+ {"type":"ready"}
17
+ {"type":"transcript","text":"hello world","isFinal":false}
18
+ {"type":"transcript","text":"hello world how are you","isFinal":true}
19
+ {"type":"error","message":"..."}
20
+
21
+ Usage:
22
+ # Live stream from mic:
23
+ arecord -f S16_LE -r 16000 -c 1 -t raw -q - | python3 live-nemotron.py
24
+ # Single file transcription (write path + read transcript JSON):
25
+ python3 live-nemotron.py --file recording.wav
26
+
27
+ Backend selection:
28
+ 1. NeMo toolkit (nvidia NeMo) — native streaming support for Parakeet-
29
+ style models. Preferred when available.
30
+ 2. transformers + torchaudio — fallback via HuggingFace's generic
31
+ ASR pipeline. Works for file-based transcription even when NeMo
32
+ install fails (common on macOS / no-CUDA setups). Does NOT do
33
+ streaming — buffers the full window each chunk.
34
+ """
35
+
36
+ import sys
37
+ import os
38
+ import json
39
+ import subprocess
40
+ import struct
41
+ import time
42
+ import threading
43
+ import argparse
44
+ from pathlib import Path
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # Configuration
48
+ # ---------------------------------------------------------------------------
49
+
50
+ SCRIPT_DIR = Path(__file__).resolve().parent
51
+ VENV = SCRIPT_DIR / ".nemotron-venv"
52
+ PY = VENV / "bin" / "python"
53
+ PIP = VENV / "bin" / "pip"
54
+
55
+ SAMPLE_RATE = 16000
56
+ CHANNELS = 1
57
+ SAMPLE_WIDTH = 2 # 16-bit
58
+ CHUNK_SECONDS = 2.0 # Nemotron is a streaming model — shorter chunks than whisper
59
+ WINDOW_SECONDS = 8.0
60
+
61
+ # HuggingFace model identifier
62
+ MODEL_ID = "nvidia/nemotron-speech-streaming-en-0.6b"
63
+
64
+ # ---------------------------------------------------------------------------
65
+ # Output helpers (JSON lines to stdout)
66
+ # ---------------------------------------------------------------------------
67
+
68
+ def emit(event: dict):
69
+ sys.stdout.write(json.dumps(event) + "\n")
70
+ sys.stdout.flush()
71
+
72
+
73
+ def emit_status(msg: str):
74
+ emit({"type": "status", "message": msg})
75
+
76
+
77
+ def emit_error(msg: str):
78
+ emit({"type": "error", "message": msg})
79
+
80
+
81
+ def emit_transcript(text: str, is_final: bool = False, backend: str = "nemotron"):
82
+ emit({"type": "transcript", "text": text, "isFinal": is_final, "backend": backend})
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # Venv bootstrap (same pattern as live-whisper.py)
86
+ # ---------------------------------------------------------------------------
87
+
88
+ def _in_venv() -> bool:
89
+ return sys.prefix != sys.base_prefix and str(SCRIPT_DIR) in sys.prefix
90
+
91
+
92
+ def _ensure_venv():
93
+ if VENV.exists():
94
+ return
95
+ emit_status("Creating Python venv for Nemotron ASR...")
96
+ import venv
97
+ venv.EnvBuilder(with_pip=True).create(str(VENV))
98
+ subprocess.check_call(
99
+ [str(PY), "-m", "pip", "install", "--upgrade", "pip", "wheel"],
100
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
101
+ )
102
+
103
+
104
+ def _ensure_deps():
105
+ """Install torch + either nemo_toolkit[asr] or transformers as fallback."""
106
+ need = []
107
+ try:
108
+ import numpy # noqa: F401
109
+ except ImportError:
110
+ need.append("numpy")
111
+ try:
112
+ import torch # noqa: F401
113
+ except ImportError:
114
+ need.append("torch")
115
+ try:
116
+ import soundfile # noqa: F401
117
+ except ImportError:
118
+ need.append("soundfile")
119
+ try:
120
+ import transformers # noqa: F401
121
+ except ImportError:
122
+ need.append("transformers")
123
+
124
+ if need:
125
+ emit_status(f"Installing core deps: {', '.join(need)}...")
126
+ try:
127
+ subprocess.check_call(
128
+ [str(PIP), "install", *need],
129
+ stdout=subprocess.DEVNULL, stderr=subprocess.PIPE,
130
+ )
131
+ except subprocess.CalledProcessError as e:
132
+ emit_error(f"pip install failed: {e}")
133
+ sys.exit(1)
134
+ # Force reimport
135
+ for mod in ("numpy", "torch", "soundfile", "transformers"):
136
+ if mod in sys.modules:
137
+ del sys.modules[mod]
138
+
139
+ # NeMo toolkit is large and optional — try to install it but fall
140
+ # back gracefully if it's unavailable on this platform.
141
+ try:
142
+ import nemo.collections.asr # noqa: F401
143
+ except ImportError:
144
+ emit_status("Installing nemo_toolkit[asr] (large — may take a few minutes)...")
145
+ try:
146
+ subprocess.check_call(
147
+ [str(PIP), "install", "nemo_toolkit[asr]"],
148
+ stdout=subprocess.DEVNULL, stderr=subprocess.PIPE,
149
+ timeout=600,
150
+ )
151
+ except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
152
+ emit_status(f"NeMo install skipped ({e}) — will use transformers fallback")
153
+
154
+ # ---------------------------------------------------------------------------
155
+ # Bootstrap: re-exec inside venv
156
+ # ---------------------------------------------------------------------------
157
+
158
+ # --check short-circuit — runs on the host Python without any venv or
159
+ # dependency install so CI and smoke tests can verify the script parses
160
+ # + is callable without triggering a 5-minute NeMo download.
161
+ if "--check" in sys.argv:
162
+ emit({"type": "check", "ok": True, "script": str(Path(__file__).resolve())})
163
+ sys.exit(0)
164
+
165
+ if not _in_venv():
166
+ _ensure_venv()
167
+ os.execv(str(PY), [str(PY)] + sys.argv)
168
+
169
+ _ensure_deps()
170
+
171
+ # Now safe to import
172
+ import numpy as np # noqa: E402
173
+
174
+ # ---------------------------------------------------------------------------
175
+ # Backend loaders
176
+ # ---------------------------------------------------------------------------
177
+
178
+ def _load_nemo_model(model_id: str = MODEL_ID, force_cpu: bool = False):
179
+ """Try to load via NeMo toolkit. Returns (model, device) or (None, None).
180
+
181
+ Handles the "cuDNN not compatible with SM < 7.5" error by retrying
182
+ on CPU. This is the common failure mode on older NVIDIA GPUs where
183
+ the installed torch has a newer cuDNN than the hardware supports.
184
+ """
185
+ # If caller asked for CPU explicitly, hide the GPU from torch before
186
+ # importing anything that might touch CUDA.
187
+ if force_cpu:
188
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
189
+ try:
190
+ import nemo.collections.asr as nemo_asr
191
+ import torch
192
+ except ImportError:
193
+ return (None, None)
194
+ try:
195
+ emit_status(f"Loading NeMo model {model_id}...")
196
+ model = nemo_asr.models.ASRModel.from_pretrained(model_id)
197
+ model.eval()
198
+ # Force CPU to avoid cuDNN version mismatches on older GPUs
199
+ if force_cpu or not torch.cuda.is_available():
200
+ try:
201
+ model = model.cpu()
202
+ except Exception:
203
+ pass
204
+ return (model, "cpu" if (force_cpu or not torch.cuda.is_available()) else "cuda")
205
+ except Exception as e:
206
+ msg = str(e)
207
+ emit_status(f"NeMo load failed: {msg[:200]}")
208
+ # Retry on CPU if the error looks like a cuDNN / device compat issue
209
+ if not force_cpu and any(k in msg for k in ("cuDNN", "SM <", "CUDA", "device side")):
210
+ emit_status("Retrying NeMo load on CPU only...")
211
+ return _load_nemo_model(model_id, force_cpu=True)
212
+ return (None, None)
213
+
214
+
215
+ def _load_transformers_model(model_id: str = MODEL_ID):
216
+ """Fallback: load via HuggingFace transformers pipeline."""
217
+ try:
218
+ from transformers import pipeline
219
+ except ImportError:
220
+ return None
221
+ try:
222
+ emit_status(f"Loading transformers pipeline for {model_id}...")
223
+ device = -1
224
+ try:
225
+ import torch
226
+ if torch.cuda.is_available():
227
+ device = 0
228
+ except ImportError:
229
+ pass
230
+ pipe = pipeline(
231
+ task="automatic-speech-recognition",
232
+ model=model_id,
233
+ device=device,
234
+ return_timestamps=False,
235
+ chunk_length_s=30,
236
+ stride_length_s=5,
237
+ )
238
+ return pipe
239
+ except Exception as e:
240
+ emit_status(f"transformers load failed: {e}")
241
+ return None
242
+
243
+
244
+ def _extract_hypothesis_text(r0) -> str:
245
+ """Extract the transcript string from a NeMo result item. Handles
246
+ plain strings, Hypothesis objects (with possibly empty text), and
247
+ nested lists of Hypotheses returned by RNNT models. Returns an
248
+ empty string for silent input rather than dumping the repr."""
249
+ if r0 is None:
250
+ return ""
251
+ if isinstance(r0, str):
252
+ return r0.strip()
253
+ # Nested list of hypotheses (some RNNT decoders)
254
+ if isinstance(r0, list):
255
+ if not r0:
256
+ return ""
257
+ return _extract_hypothesis_text(r0[0])
258
+ # Hypothesis object — may have text="" for silent audio, which is
259
+ # a VALID transcript (just empty). Return it without falling through
260
+ # to str(r0) which would dump the whole repr.
261
+ if hasattr(r0, "text"):
262
+ return str(r0.text or "").strip()
263
+ # best_hypothesis() method (rare)
264
+ if hasattr(r0, "best_hypothesis"):
265
+ try:
266
+ bh = r0.best_hypothesis()
267
+ if bh and hasattr(bh, "text"):
268
+ return str(bh.text or "").strip()
269
+ except Exception:
270
+ pass
271
+ return ""
272
+
273
+
274
+ def _transcribe_buffer_nemo(model, audio: np.ndarray) -> str:
275
+ """Transcribe a 16kHz mono float32 numpy array via NeMo.
276
+
277
+ Tries multiple invocation signatures across NeMo versions:
278
+ - transcribe([np.ndarray]) — newest
279
+ - transcribe(paths2audio_files=["file.wav"]) — legacy, requires tmp wav
280
+ """
281
+ try:
282
+ # Newest API: pass audio arrays directly
283
+ result = model.transcribe([audio], batch_size=1, verbose=False)
284
+ if not result:
285
+ return ""
286
+ return _extract_hypothesis_text(result[0])
287
+ except TypeError:
288
+ # Older NeMo — fall through to file path invocation
289
+ pass
290
+ except Exception as e:
291
+ emit_error(f"NeMo transcribe error: {e}")
292
+ return ""
293
+
294
+ # Fallback: write audio to a temp WAV and pass the path
295
+ try:
296
+ import soundfile as sf
297
+ import tempfile
298
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
299
+ sf.write(tmp.name, audio, SAMPLE_RATE, subtype="PCM_16")
300
+ tmp_path = tmp.name
301
+ try:
302
+ result = model.transcribe(paths2audio_files=[tmp_path], batch_size=1, verbose=False)
303
+ if result and result[0] is not None:
304
+ return _extract_hypothesis_text(result[0])
305
+ finally:
306
+ try:
307
+ os.unlink(tmp_path)
308
+ except Exception:
309
+ pass
310
+ except Exception as e:
311
+ emit_error(f"NeMo legacy transcribe error: {e}")
312
+ return ""
313
+
314
+
315
+ def _transcribe_buffer_transformers(pipe, audio: np.ndarray) -> str:
316
+ """Transcribe via transformers pipeline."""
317
+ try:
318
+ result = pipe({"array": audio, "sampling_rate": SAMPLE_RATE})
319
+ if isinstance(result, dict):
320
+ return str(result.get("text", "")).strip()
321
+ if isinstance(result, list) and result:
322
+ return str(result[0].get("text", "")).strip() if isinstance(result[0], dict) else ""
323
+ return ""
324
+ except Exception as e:
325
+ emit_error(f"transformers transcribe error: {e}")
326
+ return ""
327
+
328
+ # ---------------------------------------------------------------------------
329
+ # File transcription mode (single-shot)
330
+ # ---------------------------------------------------------------------------
331
+
332
+ def transcribe_file(path: str, language: str = "en") -> int:
333
+ """Single-file transcription — reads a WAV, prints one transcript
334
+ JSON line, exits. Used by AsrListenTool's file path. Exit code 0
335
+ on success, 1 on failure."""
336
+ try:
337
+ import soundfile as sf
338
+ audio, sr = sf.read(path, dtype="float32")
339
+ if audio.ndim > 1:
340
+ audio = audio.mean(axis=1) # downmix to mono
341
+ if sr != SAMPLE_RATE:
342
+ # Resample via simple linear interpolation (avoids scipy dep)
343
+ ratio = SAMPLE_RATE / sr
344
+ new_len = int(len(audio) * ratio)
345
+ idx = np.linspace(0, len(audio) - 1, new_len).astype(np.float32)
346
+ audio = np.interp(idx, np.arange(len(audio), dtype=np.float32), audio).astype(np.float32)
347
+ except Exception as e:
348
+ emit_error(f"Failed to load audio file {path}: {e}")
349
+ return 1
350
+
351
+ (model, device) = _load_nemo_model()
352
+ backend = "nemo"
353
+ if model is None:
354
+ model = _load_transformers_model()
355
+ backend = "transformers"
356
+ if model is None:
357
+ emit_error("No nemotron backend available (tried NeMo + transformers)")
358
+ return 1
359
+
360
+ emit({"type": "ready", "backend": backend, "device": device or "cpu"})
361
+
362
+ t0 = time.time()
363
+ if backend == "nemo":
364
+ text = _transcribe_buffer_nemo(model, audio)
365
+ else:
366
+ text = _transcribe_buffer_transformers(model, audio)
367
+ elapsed = time.time() - t0
368
+
369
+ # Silent / no-speech audio is NOT an error — it's a valid transcript
370
+ # (empty string). Emit the full envelope so the caller can distinguish
371
+ # "no speech" from "engine crashed". Exit 0 either way.
372
+ emit({
373
+ "type": "transcript",
374
+ "text": text or "",
375
+ "isFinal": True,
376
+ "backend": f"nemotron-{backend}",
377
+ "latencyMs": int(elapsed * 1000),
378
+ "audioSeconds": float(len(audio) / SAMPLE_RATE),
379
+ "empty": not bool(text),
380
+ })
381
+ return 0
382
+
383
+ # ---------------------------------------------------------------------------
384
+ # Streaming mode (stdin → transcripts)
385
+ # ---------------------------------------------------------------------------
386
+
387
+ def stream_stdin(args) -> int:
388
+ (model, _device) = _load_nemo_model(args.model)
389
+ backend = "nemo"
390
+ if model is None:
391
+ model = _load_transformers_model(args.model)
392
+ backend = "transformers"
393
+ if model is None:
394
+ emit_error("No nemotron backend available (tried NeMo + transformers)")
395
+ return 1
396
+
397
+ emit({"type": "ready"})
398
+
399
+ audio_buf = np.zeros(0, dtype=np.float32)
400
+ buf_lock = threading.Lock()
401
+ chunk_bytes = int(args.chunk_seconds * SAMPLE_RATE * SAMPLE_WIDTH)
402
+ window_samples = int(args.window_seconds * SAMPLE_RATE)
403
+ last_text = ""
404
+ running = True
405
+
406
+ def read_stdin():
407
+ nonlocal audio_buf, running
408
+ try:
409
+ while running:
410
+ data = sys.stdin.buffer.read(chunk_bytes)
411
+ if not data:
412
+ break
413
+ samples = np.frombuffer(data, dtype=np.int16).astype(np.float32) / 32768.0
414
+ with buf_lock:
415
+ audio_buf = np.concatenate([audio_buf, samples])
416
+ except Exception:
417
+ pass
418
+ finally:
419
+ running = False
420
+
421
+ reader = threading.Thread(target=read_stdin, daemon=True)
422
+ reader.start()
423
+
424
+ try:
425
+ while running:
426
+ time.sleep(args.chunk_seconds)
427
+ with buf_lock:
428
+ if len(audio_buf) < SAMPLE_RATE:
429
+ continue
430
+ window = audio_buf[-window_samples:].copy() if len(audio_buf) > window_samples else audio_buf.copy()
431
+ if backend == "nemo":
432
+ text = _transcribe_buffer_nemo(model, window)
433
+ else:
434
+ text = _transcribe_buffer_transformers(model, window)
435
+ if text and text != last_text:
436
+ last_text = text
437
+ emit_transcript(text, is_final=False, backend=f"nemotron-{backend}")
438
+ except KeyboardInterrupt:
439
+ pass
440
+
441
+ with buf_lock:
442
+ full_audio = audio_buf.copy()
443
+ if len(full_audio) >= SAMPLE_RATE:
444
+ if backend == "nemo":
445
+ text = _transcribe_buffer_nemo(model, full_audio)
446
+ else:
447
+ text = _transcribe_buffer_transformers(model, full_audio)
448
+ if text:
449
+ emit_transcript(text, is_final=True, backend=f"nemotron-{backend}")
450
+ running = False
451
+ return 0
452
+
453
+ # ---------------------------------------------------------------------------
454
+ # Main
455
+ # ---------------------------------------------------------------------------
456
+
457
+ def main():
458
+ parser = argparse.ArgumentParser(description="Nemotron streaming ASR worker")
459
+ parser.add_argument("--model", default=MODEL_ID, help="HuggingFace model id (default: nvidia/nemotron-speech-streaming-en-0.6b)")
460
+ parser.add_argument("--file", default=None, help="Transcribe a single audio file instead of stdin")
461
+ parser.add_argument("--language", default="en", help="Language code")
462
+ parser.add_argument("--chunk-seconds", type=float, default=CHUNK_SECONDS, help="Transcribe interval")
463
+ parser.add_argument("--window-seconds", type=float, default=WINDOW_SECONDS, help="Sliding window size")
464
+ parser.add_argument("--stdin", action="store_true", help="Explicit stdin mode (default when no --file)")
465
+ parser.add_argument("--check", action="store_true", help="Just verify the script parses + imports; no model load")
466
+ args = parser.parse_args()
467
+
468
+ if args.check:
469
+ emit({"type": "check", "ok": True, "script": str(Path(__file__).resolve())})
470
+ return 0
471
+
472
+ if args.file:
473
+ return transcribe_file(args.file, args.language)
474
+ return stream_stdin(args)
475
+
476
+
477
+ if __name__ == "__main__":
478
+ sys.exit(main())