omnius 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +4959 -0
  2. package/dist/index.d.ts +6 -0
  3. package/dist/index.js +630665 -0
  4. package/dist/launcher.cjs +78 -0
  5. package/dist/postinstall-daemon.cjs +776 -0
  6. package/dist/preinstall.cjs +92 -0
  7. package/dist/scripts/autoresearch-prepare.py +459 -0
  8. package/dist/scripts/autoresearch-train.py +661 -0
  9. package/dist/scripts/crawlee-scraper.py +358 -0
  10. package/dist/scripts/live-nemotron.py +478 -0
  11. package/dist/scripts/live-whisper.py +242 -0
  12. package/dist/scripts/ocr-advanced.py +571 -0
  13. package/dist/scripts/start-moondream.py +112 -0
  14. package/dist/scripts/tor/UPSTREAM-README.md +148 -0
  15. package/dist/scripts/tor/destroy_tor.sh +29 -0
  16. package/dist/scripts/tor/tor_setup.sh +163 -0
  17. package/dist/scripts/transcribe-file.py +63 -0
  18. package/dist/scripts/web_scrape.py +1295 -0
  19. package/npm-shrinkwrap.json +7412 -0
  20. package/package.json +142 -0
  21. package/prompts/agentic/system-large.md +569 -0
  22. package/prompts/agentic/system-medium.md +211 -0
  23. package/prompts/agentic/system-small.md +114 -0
  24. package/prompts/compaction/context-compaction.md +44 -0
  25. package/prompts/personality/level-1-minimal.md +3 -0
  26. package/prompts/personality/level-2-concise.md +3 -0
  27. package/prompts/personality/level-4-explanatory.md +3 -0
  28. package/prompts/personality/level-5-thorough.md +3 -0
  29. package/prompts/personality/level-autist.md +3 -0
  30. package/prompts/personality/level-stark.md +3 -0
  31. package/prompts/runners/dispatcher.md +24 -0
  32. package/prompts/runners/editor.md +44 -0
  33. package/prompts/runners/evaluator.md +30 -0
  34. package/prompts/runners/merge-summary.md +9 -0
  35. package/prompts/runners/normalizer.md +23 -0
  36. package/prompts/runners/planner.md +33 -0
  37. package/prompts/runners/scout.md +39 -0
  38. package/prompts/runners/verifier.md +36 -0
  39. package/prompts/skill-builder/seed-analysis.md +30 -0
  40. package/prompts/skill-builder/skill-expansion.md +76 -0
  41. package/prompts/skill-builder/skill-validation.md +31 -0
  42. package/prompts/templates/analysis.md +14 -0
  43. package/prompts/templates/code-review.md +16 -0
  44. package/prompts/templates/code.md +13 -0
  45. package/prompts/templates/document.md +13 -0
  46. package/prompts/templates/error-diagnosis.md +14 -0
  47. package/prompts/templates/general.md +9 -0
  48. package/prompts/templates/plan.md +15 -0
  49. package/prompts/templates/system.md +16 -0
  50. package/prompts/tui/dmn-gather.md +128 -0
  51. package/prompts/tui/dream-consolidate.md +48 -0
  52. package/prompts/tui/dream-lucid-eval.md +17 -0
  53. package/prompts/tui/dream-lucid-implement.md +14 -0
  54. package/prompts/tui/dream-stages.md +19 -0
  55. package/prompts/tui/emotion-behavioral.md +2 -0
  56. package/prompts/tui/emotion-center.md +12 -0
  57. package/voices/personaplex/OverBarn.pt +0 -0
  58. package/voices/personaplex/clone-voice.py +384 -0
  59. package/voices/personaplex/dequant-loader.py +174 -0
  60. package/voices/personaplex/quantize-weights.py +167 -0
@@ -0,0 +1,167 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ quantize-weights.py — Quantize PersonaPlex 7B weights to INT4 (NF4) for edge devices.
4
+
5
+ Creates a ~3.5GB quantized checkpoint from the ~14GB bf16 weights.
6
+ The quantized model runs 3-4x faster on memory-bandwidth-limited devices
7
+ like Jetson AGX Orin while maintaining voice quality.
8
+
9
+ Usage:
10
+ python quantize-weights.py [--device cuda] [--output personaplex-7b-nf4.safetensors]
11
+
12
+ Requirements:
13
+ pip install bitsandbytes safetensors torch
14
+ """
15
+
16
+ import argparse
17
+ import os
18
+ import sys
19
+ import logging
20
+
21
+ logging.basicConfig(level=logging.INFO, format="%(message)s")
22
+ log = logging.getLogger(__name__)
23
+
24
+
25
+ def quantize_model(device: str = "cuda", output_path: str = None):
26
+ """Quantize PersonaPlex 7B to NF4 (4-bit Normal Float)"""
27
+ import torch
28
+ from huggingface_hub import hf_hub_download
29
+ from safetensors.torch import load_file, save_file
30
+
31
+ hf_repo = "nvidia/personaplex-7b-v1"
32
+
33
+ # 1) Download original weights
34
+ log.info("Downloading PersonaPlex 7B weights...")
35
+ weight_path = hf_hub_download(hf_repo, "model.safetensors")
36
+ log.info(f" Weights: {weight_path}")
37
+ log.info(f" Size: {os.path.getsize(weight_path) / 1024**3:.1f} GB")
38
+
39
+ # 2) Load state dict
40
+ log.info("Loading state dict...")
41
+ state_dict = load_file(weight_path, device="cpu")
42
+ log.info(f" Loaded {len(state_dict)} tensors")
43
+
44
+ # 3) Quantize each weight tensor to INT4 using block-wise NF4
45
+ try:
46
+ import bitsandbytes as bnb
47
+ from bitsandbytes.functional import quantize_nf4, dequantize_nf4
48
+ HAS_BNB = True
49
+ except ImportError:
50
+ HAS_BNB = False
51
+ log.info(" bitsandbytes not available — using manual INT4 quantization")
52
+
53
+ quantized_state = {}
54
+ quant_meta = {} # Store quantization parameters for dequantization
55
+ total_original = 0
56
+ total_quantized = 0
57
+ skipped = 0
58
+
59
+ for name, tensor in state_dict.items():
60
+ original_bytes = tensor.numel() * tensor.element_size()
61
+ total_original += original_bytes
62
+
63
+ # Only quantize large weight matrices (≥1024 elements, 2D)
64
+ # Skip biases, norms, embeddings, small tensors
65
+ should_quantize = (
66
+ tensor.ndim >= 2
67
+ and tensor.numel() >= 1024
68
+ and not any(skip in name for skip in [
69
+ "norm", "bias", "embed", "positional", "rope",
70
+ "depformer_emb", "depformer_in",
71
+ ])
72
+ )
73
+
74
+ if not should_quantize:
75
+ quantized_state[name] = tensor.to(torch.float16).contiguous()
76
+ total_quantized += tensor.numel() * 2 # fp16
77
+ skipped += 1
78
+ continue
79
+
80
+ # Reshape to 2D for quantization
81
+ orig_shape = tensor.shape
82
+ flat = tensor.reshape(-1).float()
83
+
84
+ if HAS_BNB:
85
+ # Use bitsandbytes NF4 quantization
86
+ quant_tensor, quant_state = bnb.functional.quantize_4bit(
87
+ flat, quant_type="nf4", compress_statistics=True,
88
+ )
89
+ # Store the quantized bytes + metadata for reconstruction
90
+ quantized_state[name] = quant_tensor.contiguous()
91
+ quant_meta[f"{name}.__quant_state__"] = torch.tensor(
92
+ list(orig_shape) + [0] * (4 - len(orig_shape)),
93
+ dtype=torch.int64,
94
+ )
95
+ # Store absmax for dequantization
96
+ if hasattr(quant_state, 'absmax'):
97
+ quantized_state[f"{name}.__absmax__"] = quant_state.absmax.contiguous()
98
+ if hasattr(quant_state, 'quant_map'):
99
+ quantized_state[f"{name}.__quant_map__"] = quant_state.quant_map.contiguous()
100
+ total_quantized += quant_tensor.numel()
101
+ else:
102
+ # Manual symmetric INT4 quantization (no bitsandbytes)
103
+ # Block size 64 for good accuracy
104
+ block_size = 64
105
+ n_blocks = (flat.numel() + block_size - 1) // block_size
106
+ padded = torch.zeros(n_blocks * block_size)
107
+ padded[:flat.numel()] = flat
108
+
109
+ blocks = padded.reshape(n_blocks, block_size)
110
+ scales = blocks.abs().max(dim=1).values / 7.0 # INT4 range: -8 to 7
111
+ scales = scales.clamp(min=1e-8)
112
+
113
+ # Quantize to INT4 (stored as INT8 pairs)
114
+ quantized_blocks = torch.round(blocks / scales.unsqueeze(1)).clamp(-8, 7).to(torch.int8)
115
+
116
+ # Pack two INT4 values into one INT8
117
+ packed = torch.zeros(n_blocks, block_size // 2, dtype=torch.uint8)
118
+ for i in range(block_size // 2):
119
+ low = (quantized_blocks[:, 2 * i] + 8).to(torch.uint8)
120
+ high = (quantized_blocks[:, 2 * i + 1] + 8).to(torch.uint8)
121
+ packed[:, i] = low | (high << 4)
122
+
123
+ quantized_state[name] = packed.reshape(-1).contiguous()
124
+ quantized_state[f"{name}.__scales__"] = scales.to(torch.float16).contiguous()
125
+ quant_meta[f"{name}.__quant_state__"] = torch.tensor(
126
+ list(orig_shape) + [0] * (4 - len(orig_shape)) + [block_size, flat.numel()],
127
+ dtype=torch.int64,
128
+ )
129
+ total_quantized += packed.numel() + scales.numel() * 2
130
+
131
+ # Add metadata tensors
132
+ quantized_state.update(quant_meta)
133
+
134
+ # 4) Save quantized weights
135
+ if output_path is None:
136
+ output_path = os.path.join(os.path.dirname(weight_path), "model-nf4.safetensors")
137
+
138
+ log.info(f"\nSaving quantized weights to: {output_path}")
139
+ save_file(quantized_state, output_path)
140
+
141
+ final_size = os.path.getsize(output_path)
142
+ compression = total_original / max(final_size, 1)
143
+
144
+ log.info(f"\nQuantization complete!")
145
+ log.info(f" Original: {total_original / 1024**3:.1f} GB (bf16)")
146
+ log.info(f" Quantized: {final_size / 1024**3:.1f} GB (NF4)")
147
+ log.info(f" Compression: {compression:.1f}x")
148
+ log.info(f" Tensors quantized: {len(state_dict) - skipped}/{len(state_dict)}")
149
+ log.info(f" Tensors kept fp16: {skipped} (norms, biases, embeddings)")
150
+ log.info(f"\nUse --quantized flag with PersonaPlex server for INT4 inference")
151
+
152
+ return output_path
153
+
154
+
155
+ def main():
156
+ parser = argparse.ArgumentParser(description="Quantize PersonaPlex 7B to INT4 NF4")
157
+ parser.add_argument("--device", default="cuda", help="Device for quantization")
158
+ parser.add_argument("--output", "-o", default=None, help="Output path for quantized weights")
159
+ args = parser.parse_args()
160
+
161
+ import torch
162
+ with torch.no_grad():
163
+ quantize_model(device=args.device, output_path=args.output)
164
+
165
+
166
+ if __name__ == "__main__":
167
+ main()