bit-ttt-engine 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. bit_ttt_engine-0.6.0/PKG-INFO +91 -0
  2. bit_ttt_engine-0.6.0/README_PYPI.md +73 -0
  3. bit_ttt_engine-0.6.0/crates/rust_engine/.cargo/config.toml +2 -0
  4. bit_ttt_engine-0.6.0/crates/rust_engine/CHANGELOG.md +99 -0
  5. bit_ttt_engine-0.6.0/crates/rust_engine/Cargo.lock +249 -0
  6. bit_ttt_engine-0.6.0/crates/rust_engine/Cargo.toml +100 -0
  7. bit_ttt_engine-0.6.0/crates/rust_engine/LICENSE +21 -0
  8. bit_ttt_engine-0.6.0/crates/rust_engine/README.md +61 -0
  9. bit_ttt_engine-0.6.0/crates/rust_engine/README_PYPI.md +73 -0
  10. bit_ttt_engine-0.6.0/crates/rust_engine/build.rs +139 -0
  11. bit_ttt_engine-0.6.0/crates/rust_engine/cortex_rust.pyi +100 -0
  12. bit_ttt_engine-0.6.0/crates/rust_engine/docs/paged_attention_quality_investigation.md +213 -0
  13. bit_ttt_engine-0.6.0/crates/rust_engine/examples/benchmark.rs +251 -0
  14. bit_ttt_engine-0.6.0/crates/rust_engine/examples/cuda_test.rs +54 -0
  15. bit_ttt_engine-0.6.0/crates/rust_engine/examples/debug_load.rs +45 -0
  16. bit_ttt_engine-0.6.0/crates/rust_engine/examples/e2e_benchmark.rs +273 -0
  17. bit_ttt_engine-0.6.0/crates/rust_engine/examples/python_sanity_check.py +48 -0
  18. bit_ttt_engine-0.6.0/crates/rust_engine/examples/ttt_benchmark.rs +225 -0
  19. bit_ttt_engine-0.6.0/crates/rust_engine/src/bin/bench_4bit_gpu.rs +151 -0
  20. bit_ttt_engine-0.6.0/crates/rust_engine/src/bin/bench_cpu_kernel.rs +100 -0
  21. bit_ttt_engine-0.6.0/crates/rust_engine/src/bin/bench_gemm_4bit.rs +120 -0
  22. bit_ttt_engine-0.6.0/crates/rust_engine/src/bin/bench_sizes.rs +71 -0
  23. bit_ttt_engine-0.6.0/crates/rust_engine/src/bin/bench_tinyllama.rs +101 -0
  24. bit_ttt_engine-0.6.0/crates/rust_engine/src/bin/detect_model.rs +118 -0
  25. bit_ttt_engine-0.6.0/crates/rust_engine/src/bin/fast_download.rs +161 -0
  26. bit_ttt_engine-0.6.0/crates/rust_engine/src/bin/quick_gen.rs +138 -0
  27. bit_ttt_engine-0.6.0/crates/rust_engine/src/bin/run_4bit_llama.rs +163 -0
  28. bit_ttt_engine-0.6.0/crates/rust_engine/src/bin/test_13b.rs +95 -0
  29. bit_ttt_engine-0.6.0/crates/rust_engine/src/bin/test_4bit_inference.rs +300 -0
  30. bit_ttt_engine-0.6.0/crates/rust_engine/src/bin/test_cuda_gemm.rs +80 -0
  31. bit_ttt_engine-0.6.0/crates/rust_engine/src/bin/test_memory.rs +80 -0
  32. bit_ttt_engine-0.6.0/crates/rust_engine/src/device_utils.rs +343 -0
  33. bit_ttt_engine-0.6.0/crates/rust_engine/src/download.rs +484 -0
  34. bit_ttt_engine-0.6.0/crates/rust_engine/src/error.rs +104 -0
  35. bit_ttt_engine-0.6.0/crates/rust_engine/src/eval/mod.rs +8 -0
  36. bit_ttt_engine-0.6.0/crates/rust_engine/src/eval/perplexity.rs +262 -0
  37. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/adaptive_bit_op.cu +135 -0
  38. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/adaptive_bit_op.ptx +214 -0
  39. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/bit_op.cu +45 -0
  40. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/bit_op.ptx +223 -0
  41. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/cpu.rs +317 -0
  42. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/cuda.rs +365 -0
  43. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/fused_ops.cu +261 -0
  44. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/fused_ops.ptx +498 -0
  45. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/fused_ops.rs +366 -0
  46. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/matmul_4bit.cu +541 -0
  47. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/matmul_4bit.ptx +3457 -0
  48. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/matmul_4bit.rs +886 -0
  49. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/mod.rs +7 -0
  50. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/packing.rs +650 -0
  51. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/packing_4bit.rs +503 -0
  52. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/paged_attention.cu +294 -0
  53. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/paged_attention.ptx +975 -0
  54. bit_ttt_engine-0.6.0/crates/rust_engine/src/kernels/paged_attention.rs +373 -0
  55. bit_ttt_engine-0.6.0/crates/rust_engine/src/layers/adaptive_linear.rs +364 -0
  56. bit_ttt_engine-0.6.0/crates/rust_engine/src/layers/attention.rs +459 -0
  57. bit_ttt_engine-0.6.0/crates/rust_engine/src/layers/bit_linear.rs +355 -0
  58. bit_ttt_engine-0.6.0/crates/rust_engine/src/layers/flash_attention.rs +376 -0
  59. bit_ttt_engine-0.6.0/crates/rust_engine/src/layers/isomorphic.rs +428 -0
  60. bit_ttt_engine-0.6.0/crates/rust_engine/src/layers/kv_cache/mod.rs +1196 -0
  61. bit_ttt_engine-0.6.0/crates/rust_engine/src/layers/linear_4bit.rs +522 -0
  62. bit_ttt_engine-0.6.0/crates/rust_engine/src/layers/rms_norm.rs +78 -0
  63. bit_ttt_engine-0.6.0/crates/rust_engine/src/layers/swiglu.rs +89 -0
  64. bit_ttt_engine-0.6.0/crates/rust_engine/src/layers/ttt.rs +255 -0
  65. bit_ttt_engine-0.6.0/crates/rust_engine/src/layers.rs +69 -0
  66. bit_ttt_engine-0.6.0/crates/rust_engine/src/lib.rs +80 -0
  67. bit_ttt_engine-0.6.0/crates/rust_engine/src/model/block.rs +240 -0
  68. bit_ttt_engine-0.6.0/crates/rust_engine/src/model/config.rs +292 -0
  69. bit_ttt_engine-0.6.0/crates/rust_engine/src/model/config_common.rs +30 -0
  70. bit_ttt_engine-0.6.0/crates/rust_engine/src/model/detector.rs +734 -0
  71. bit_ttt_engine-0.6.0/crates/rust_engine/src/model/gguf_loader.rs +474 -0
  72. bit_ttt_engine-0.6.0/crates/rust_engine/src/model/gguf_model.rs +700 -0
  73. bit_ttt_engine-0.6.0/crates/rust_engine/src/model/llama/bitllama.rs +559 -0
  74. bit_ttt_engine-0.6.0/crates/rust_engine/src/model/llama/llama_fp16.rs +279 -0
  75. bit_ttt_engine-0.6.0/crates/rust_engine/src/model/llama/mod.rs +20 -0
  76. bit_ttt_engine-0.6.0/crates/rust_engine/src/model/llama_4bit.rs +1568 -0
  77. bit_ttt_engine-0.6.0/crates/rust_engine/src/model/unified.rs +297 -0
  78. bit_ttt_engine-0.6.0/crates/rust_engine/src/model.rs +33 -0
  79. bit_ttt_engine-0.6.0/crates/rust_engine/src/optim/mod.rs +1 -0
  80. bit_ttt_engine-0.6.0/crates/rust_engine/src/optim/schedule_free.rs +185 -0
  81. bit_ttt_engine-0.6.0/crates/rust_engine/src/pack/install.rs +302 -0
  82. bit_ttt_engine-0.6.0/crates/rust_engine/src/pack/lib.rs +80 -0
  83. bit_ttt_engine-0.6.0/crates/rust_engine/src/pack/reader.rs +209 -0
  84. bit_ttt_engine-0.6.0/crates/rust_engine/src/pack/types.rs +204 -0
  85. bit_ttt_engine-0.6.0/crates/rust_engine/src/pack/verify.rs +232 -0
  86. bit_ttt_engine-0.6.0/crates/rust_engine/src/pack/writer.rs +166 -0
  87. bit_ttt_engine-0.6.0/crates/rust_engine/src/paged_attention/block_manager.rs +176 -0
  88. bit_ttt_engine-0.6.0/crates/rust_engine/src/paged_attention/cache_engine.rs +165 -0
  89. bit_ttt_engine-0.6.0/crates/rust_engine/src/paged_attention/mod.rs +10 -0
  90. bit_ttt_engine-0.6.0/crates/rust_engine/src/python.rs +432 -0
  91. bit_ttt_engine-0.6.0/crates/rust_engine/src/scheduler/mod.rs +377 -0
  92. bit_ttt_engine-0.6.0/crates/rust_engine/src/speculative/mod.rs +369 -0
  93. bit_ttt_engine-0.6.0/crates/rust_engine/src/tests/attention_test.rs +376 -0
  94. bit_ttt_engine-0.6.0/crates/rust_engine/src/tests/bit_linear_test.rs +281 -0
  95. bit_ttt_engine-0.6.0/crates/rust_engine/src/tests/format_diagnosis.rs +404 -0
  96. bit_ttt_engine-0.6.0/crates/rust_engine/src/tests/isomorphic_test.rs +272 -0
  97. bit_ttt_engine-0.6.0/crates/rust_engine/src/tests/ttt_test.rs +426 -0
  98. bit_ttt_engine-0.6.0/crates/rust_engine/src/wasm.rs +366 -0
  99. bit_ttt_engine-0.6.0/crates/rust_engine/tests/accuracy_test.rs +107 -0
  100. bit_ttt_engine-0.6.0/crates/rust_engine/tests/bitllama_e2e.rs +57 -0
  101. bit_ttt_engine-0.6.0/crates/rust_engine/tests/common.rs +68 -0
  102. bit_ttt_engine-0.6.0/crates/rust_engine/tests/gguf_e2e.rs +103 -0
  103. bit_ttt_engine-0.6.0/crates/rust_engine/tests/load_direct_benchmark.rs +188 -0
  104. bit_ttt_engine-0.6.0/crates/rust_engine/tests/load_packed_e2e.rs +106 -0
  105. bit_ttt_engine-0.6.0/pyproject.toml +26 -0
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: bit-ttt-engine
3
+ Version: 0.6.0
4
+ Classifier: Development Status :: 4 - Beta
5
+ Classifier: Programming Language :: Rust
6
+ Classifier: Programming Language :: Python :: Implementation :: CPython
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
10
+ License-File: LICENSE
11
+ Summary: 1.58-bit Quantization + Test-Time Training (TTT) Implementation in Pure Rust
12
+ Keywords: llm,rust,ttt,quantization,ai
13
+ Author: imonoonoko
14
+ License: MIT
15
+ Requires-Python: >=3.8
16
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
17
+
18
+ # Bit-TTT Engine: High-Performance Brain Core
19
+
20
+ [![Rust](https://img.shields.io/badge/rust-1.70+-orange.svg)](https://www.rust-lang.org/)
21
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
22
+ [![PyPI](https://img.shields.io/pypi/v/bit-ttt-engine.svg)](https://pypi.org/project/bit-ttt-engine/)
23
+
24
+ **1.58-bit Quantization + Test-Time Training (TTT)** Implementation in Pure Rust.
25
+
26
+ This package provides Python bindings for the Bit-TTT Engine, allowing you to run ultra-light ternary LLMs with real-time adaptation.
27
+
28
+ ## ✨ Features
29
+ 1. **Ultra-Light**: Runs large LLMs on cheap hardware using **1.58-bit (ternary) weights**.
30
+ 2. **Adaptive (TTT)**: Learns *while* inferring, adapting to context in real-time.
31
+ 3. **Pure Rust**: High performance with minimal dependencies.
32
+
33
+ ## 🚀 Installation
34
+
35
+ ```bash
36
+ pip install bit-ttt-engine
37
+ ```
38
+
39
+ ## 💻 Usage
40
+
41
+ ```python
42
+ import cortex_rust
43
+ import json
44
+
45
+ # Initialize Configuration
46
+ config = cortex_rust.BitLlamaConfig(
47
+ vocab_size=32000,
48
+ hidden_dim=512,
49
+ num_layers=12,
50
+ inner_lr=0.001
51
+ )
52
+
53
+ # Initialize Model (Inference)
54
+ model = cortex_rust.BitLlama(
55
+ config=config,
56
+ checkpoint_path="path/to/model.safetensors",
57
+ device="cpu", # or "cuda"
58
+ tokenizer_path="path/to/tokenizer.json"
59
+ )
60
+
61
+ # Generate Text
62
+ output = model.generate(prompt="Hello, world!", max_tokens=50)
63
+ print(output)
64
+ ```
65
+
66
+ ## 🏗️ Training (TTT)
67
+
68
+ ```python
69
+ trainer = cortex_rust.PyTrainer(
70
+ config=config,
71
+ checkpoint_path="path/to/model.safetensors",
72
+ device="cuda"
73
+ )
74
+
75
+ # Single training step
76
+ loss = trainer.train_step(input_ids=[...], targets=[...])
77
+ print(f"Loss: {loss}")
78
+
79
+ # Save checkpoint
80
+ trainer.save_checkpoint("model_updated.safetensors")
81
+ ```
82
+
83
+ ## 📖 Documentation
84
+ For more details, please visit the [GitHub repository](https://github.com/imonoonoko/Bit-TTT-Engine).
85
+
86
+ ## 🙏 Acknowledgments
87
+ This project incorporates ideas and techniques inspired by the DroPE method published by Sakana AI.
88
+
89
+ ## 💖 License
90
+ MIT License
91
+
@@ -0,0 +1,73 @@
1
+ # Bit-TTT Engine: High-Performance Brain Core
2
+
3
+ [![Rust](https://img.shields.io/badge/rust-1.70+-orange.svg)](https://www.rust-lang.org/)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
5
+ [![PyPI](https://img.shields.io/pypi/v/bit-ttt-engine.svg)](https://pypi.org/project/bit-ttt-engine/)
6
+
7
+ **1.58-bit Quantization + Test-Time Training (TTT)** Implementation in Pure Rust.
8
+
9
+ This package provides Python bindings for the Bit-TTT Engine, allowing you to run ultra-light ternary LLMs with real-time adaptation.
10
+
11
+ ## ✨ Features
12
+ 1. **Ultra-Light**: Runs large LLMs on cheap hardware using **1.58-bit (ternary) weights**.
13
+ 2. **Adaptive (TTT)**: Learns *while* inferring, adapting to context in real-time.
14
+ 3. **Pure Rust**: High performance with minimal dependencies.
15
+
16
+ ## 🚀 Installation
17
+
18
+ ```bash
19
+ pip install bit-ttt-engine
20
+ ```
21
+
22
+ ## 💻 Usage
23
+
24
+ ```python
25
+ import cortex_rust
26
+ import json
27
+
28
+ # Initialize Configuration
29
+ config = cortex_rust.BitLlamaConfig(
30
+ vocab_size=32000,
31
+ hidden_dim=512,
32
+ num_layers=12,
33
+ inner_lr=0.001
34
+ )
35
+
36
+ # Initialize Model (Inference)
37
+ model = cortex_rust.BitLlama(
38
+ config=config,
39
+ checkpoint_path="path/to/model.safetensors",
40
+ device="cpu", # or "cuda"
41
+ tokenizer_path="path/to/tokenizer.json"
42
+ )
43
+
44
+ # Generate Text
45
+ output = model.generate(prompt="Hello, world!", max_tokens=50)
46
+ print(output)
47
+ ```
48
+
49
+ ## 🏗️ Training (TTT)
50
+
51
+ ```python
52
+ trainer = cortex_rust.PyTrainer(
53
+ config=config,
54
+ checkpoint_path="path/to/model.safetensors",
55
+ device="cuda"
56
+ )
57
+
58
+ # Single training step
59
+ loss = trainer.train_step(input_ids=[...], targets=[...])
60
+ print(f"Loss: {loss}")
61
+
62
+ # Save checkpoint
63
+ trainer.save_checkpoint("model_updated.safetensors")
64
+ ```
65
+
66
+ ## 📖 Documentation
67
+ For more details, please visit the [GitHub repository](https://github.com/imonoonoko/Bit-TTT-Engine).
68
+
69
+ ## 🙏 Acknowledgments
70
+ This project incorporates ideas and techniques inspired by the DroPE method published by Sakana AI.
71
+
72
+ ## 💖 License
73
+ MIT License
@@ -0,0 +1,2 @@
1
+ [build]
2
+ rustflags = ["-C", "target-cpu=native"]
@@ -0,0 +1,99 @@
1
+ # Changelog
2
+
3
+ All notable changes to `cortex_rust` will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.5.0] - 2026-01-28
11
+
12
+ ### 🚨 Breaking Changes
13
+ - `paged_attention_v1()` signature changed: `num_kv_heads` now required for GQA support
14
+ - `CacheConfig::new()` parameter order reorganized for clarity
15
+
16
+ ### Added
17
+ - **GQA (Grouped Query Attention) support** in PagedAttention kernel
18
+ - `num_kv_heads` parameter for proper KV head mapping
19
+ - Detailed profiling methods: `forward_profiled()`, `forward_timed()`
20
+ - `PreallocKVCache` - pre-allocated KV cache to reduce allocation overhead
21
+
22
+ ### Fixed
23
+ - **[CRITICAL] Long sequence quality degradation** - see "Known Issues Resolved" below
24
+ - Shared memory overflow for sequences > 2048 tokens
25
+ - Warp reduction bug in softmax max computation
26
+ - Context length calculation in block manager (off-by-one)
27
+
28
+ ### Changed
29
+ - Increased default `max_context_len` to 4096 for shared memory allocation
30
+ - Improved error messages with actual tensor dimensions
31
+ - Block size default changed from 16 to 32 for better memory efficiency
32
+
33
+ ### Known Issues Resolved
34
+ #### Long Sequence Quality Degradation Root Causes
35
+
36
+ 1. **Query Loading Bug in PagedAttention** (`paged_attention.cu`)
37
+ - Query was loaded into per-thread local array `float q[128]`
38
+ - Each thread only loaded some elements, but inner loop accessed all
39
+ - **Result**: Reading uninitialized memory → garbage attention scores
40
+ - **Fix**: Load query into shared memory (`q_shared`)
41
+ - **Commit**: a6f14c8
42
+
43
+ 2. **Block Manager Token Count** (`block_manager.rs`)
44
+ - `allocate_slots()` used `blocks.len() * block_size` for current position
45
+ - With block_size=16, after 5 tokens: calculated 16 instead of 5
46
+ - **Result**: New tokens written to wrong slots (e.g., slot 16 instead of 5)
47
+ - **Fix**: Added `seq_to_num_tokens` HashMap to track actual token count
48
+ - **Commit**: f556c15
49
+
50
+ #### Additional Fixes in 0.5.0
51
+ 3. **unpack_4bit offset bug** (`linear_4bit.rs`)
52
+ - Old: `if w > 7 { w - 16 } else { w }` (wrong)
53
+ - New: `w - 8` (Python compatible)
54
+ - **Commit**: 7ffcdc2
55
+
56
+ 4. **Dynamic shared memory** (`paged_attention.rs`)
57
+ - Old: Hardcoded `max_context_len = 2048`
58
+ - New: Calculated from actual `context_lens` tensor
59
+ - **Commit**: 9a748ec
60
+
61
+ #### Verified (No Bug)
62
+ - Warp reduction: Works correctly with `blockDim.x = 32`
63
+
64
+ ## [0.4.0] - 2025-01-XX
65
+
66
+ ### Added
67
+ - PagedAttention CUDA kernels (`paged_attention.cu`)
68
+ - `BlockManager` for cache block allocation
69
+ - `CacheEngine` for GPU memory management
70
+ - `generate_paged()` for memory-efficient inference
71
+ - 4-bit quantized model support (`Llama4Bit`)
72
+ - Fused CUDA kernels: `fused_silu_mul_cuda`, `softmax_cuda`
73
+
74
+ ### Changed
75
+ - RoPE implementation moved to precomputed caches
76
+ - KV cache supports both traditional (`Tensor::cat`) and paged modes
77
+
78
+ ### Fixed
79
+ - Causal mask offset calculation for decode phase
80
+
81
+ ## [0.3.0] - 2025-01-XX
82
+
83
+ ### Added
84
+ - Initial CUDA kernel infrastructure
85
+ - 4-bit GEMM implementation (`gemm_4bit`)
86
+ - RMSNorm layer
87
+
88
+ ## [0.2.0] - 2025-01-XX
89
+
90
+ ### Added
91
+ - Basic transformer block implementation
92
+ - Embedding and LM head layers
93
+
94
+ ## [0.1.0] - 2025-01-XX
95
+
96
+ ### Added
97
+ - Initial project structure
98
+ - Candle-core integration
99
+ - Basic tensor operations
@@ -0,0 +1,249 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "autocfg"
7
+ version = "1.5.0"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
10
+
11
+ [[package]]
12
+ name = "cfg-if"
13
+ version = "1.0.4"
14
+ source = "registry+https://github.com/rust-lang/crates.io-index"
15
+ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
16
+
17
+ [[package]]
18
+ name = "cortex_rust"
19
+ version = "0.1.0"
20
+ dependencies = [
21
+ "libc",
22
+ "ndarray",
23
+ "rand",
24
+ ]
25
+
26
+ [[package]]
27
+ name = "crossbeam-deque"
28
+ version = "0.8.6"
29
+ source = "registry+https://github.com/rust-lang/crates.io-index"
30
+ checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
31
+ dependencies = [
32
+ "crossbeam-epoch",
33
+ "crossbeam-utils",
34
+ ]
35
+
36
+ [[package]]
37
+ name = "crossbeam-epoch"
38
+ version = "0.9.18"
39
+ source = "registry+https://github.com/rust-lang/crates.io-index"
40
+ checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
41
+ dependencies = [
42
+ "crossbeam-utils",
43
+ ]
44
+
45
+ [[package]]
46
+ name = "crossbeam-utils"
47
+ version = "0.8.21"
48
+ source = "registry+https://github.com/rust-lang/crates.io-index"
49
+ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
50
+
51
+ [[package]]
52
+ name = "either"
53
+ version = "1.15.0"
54
+ source = "registry+https://github.com/rust-lang/crates.io-index"
55
+ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
56
+
57
+ [[package]]
58
+ name = "getrandom"
59
+ version = "0.2.16"
60
+ source = "registry+https://github.com/rust-lang/crates.io-index"
61
+ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
62
+ dependencies = [
63
+ "cfg-if",
64
+ "libc",
65
+ "wasi",
66
+ ]
67
+
68
+ [[package]]
69
+ name = "libc"
70
+ version = "0.2.179"
71
+ source = "registry+https://github.com/rust-lang/crates.io-index"
72
+ checksum = "c5a2d376baa530d1238d133232d15e239abad80d05838b4b59354e5268af431f"
73
+
74
+ [[package]]
75
+ name = "matrixmultiply"
76
+ version = "0.3.10"
77
+ source = "registry+https://github.com/rust-lang/crates.io-index"
78
+ checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08"
79
+ dependencies = [
80
+ "autocfg",
81
+ "rawpointer",
82
+ ]
83
+
84
+ [[package]]
85
+ name = "ndarray"
86
+ version = "0.15.6"
87
+ source = "registry+https://github.com/rust-lang/crates.io-index"
88
+ checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32"
89
+ dependencies = [
90
+ "matrixmultiply",
91
+ "num-complex",
92
+ "num-integer",
93
+ "num-traits",
94
+ "rawpointer",
95
+ "rayon",
96
+ ]
97
+
98
+ [[package]]
99
+ name = "num-complex"
100
+ version = "0.4.6"
101
+ source = "registry+https://github.com/rust-lang/crates.io-index"
102
+ checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
103
+ dependencies = [
104
+ "num-traits",
105
+ ]
106
+
107
+ [[package]]
108
+ name = "num-integer"
109
+ version = "0.1.46"
110
+ source = "registry+https://github.com/rust-lang/crates.io-index"
111
+ checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
112
+ dependencies = [
113
+ "num-traits",
114
+ ]
115
+
116
+ [[package]]
117
+ name = "num-traits"
118
+ version = "0.2.19"
119
+ source = "registry+https://github.com/rust-lang/crates.io-index"
120
+ checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
121
+ dependencies = [
122
+ "autocfg",
123
+ ]
124
+
125
+ [[package]]
126
+ name = "ppv-lite86"
127
+ version = "0.2.21"
128
+ source = "registry+https://github.com/rust-lang/crates.io-index"
129
+ checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
130
+ dependencies = [
131
+ "zerocopy",
132
+ ]
133
+
134
+ [[package]]
135
+ name = "proc-macro2"
136
+ version = "1.0.105"
137
+ source = "registry+https://github.com/rust-lang/crates.io-index"
138
+ checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7"
139
+ dependencies = [
140
+ "unicode-ident",
141
+ ]
142
+
143
+ [[package]]
144
+ name = "quote"
145
+ version = "1.0.43"
146
+ source = "registry+https://github.com/rust-lang/crates.io-index"
147
+ checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a"
148
+ dependencies = [
149
+ "proc-macro2",
150
+ ]
151
+
152
+ [[package]]
153
+ name = "rand"
154
+ version = "0.8.5"
155
+ source = "registry+https://github.com/rust-lang/crates.io-index"
156
+ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
157
+ dependencies = [
158
+ "libc",
159
+ "rand_chacha",
160
+ "rand_core",
161
+ ]
162
+
163
+ [[package]]
164
+ name = "rand_chacha"
165
+ version = "0.3.1"
166
+ source = "registry+https://github.com/rust-lang/crates.io-index"
167
+ checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
168
+ dependencies = [
169
+ "ppv-lite86",
170
+ "rand_core",
171
+ ]
172
+
173
+ [[package]]
174
+ name = "rand_core"
175
+ version = "0.6.4"
176
+ source = "registry+https://github.com/rust-lang/crates.io-index"
177
+ checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
178
+ dependencies = [
179
+ "getrandom",
180
+ ]
181
+
182
+ [[package]]
183
+ name = "rawpointer"
184
+ version = "0.2.1"
185
+ source = "registry+https://github.com/rust-lang/crates.io-index"
186
+ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
187
+
188
+ [[package]]
189
+ name = "rayon"
190
+ version = "1.11.0"
191
+ source = "registry+https://github.com/rust-lang/crates.io-index"
192
+ checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
193
+ dependencies = [
194
+ "either",
195
+ "rayon-core",
196
+ ]
197
+
198
+ [[package]]
199
+ name = "rayon-core"
200
+ version = "1.13.0"
201
+ source = "registry+https://github.com/rust-lang/crates.io-index"
202
+ checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
203
+ dependencies = [
204
+ "crossbeam-deque",
205
+ "crossbeam-utils",
206
+ ]
207
+
208
+ [[package]]
209
+ name = "syn"
210
+ version = "2.0.114"
211
+ source = "registry+https://github.com/rust-lang/crates.io-index"
212
+ checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
213
+ dependencies = [
214
+ "proc-macro2",
215
+ "quote",
216
+ "unicode-ident",
217
+ ]
218
+
219
+ [[package]]
220
+ name = "unicode-ident"
221
+ version = "1.0.22"
222
+ source = "registry+https://github.com/rust-lang/crates.io-index"
223
+ checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
224
+
225
+ [[package]]
226
+ name = "wasi"
227
+ version = "0.11.1+wasi-snapshot-preview1"
228
+ source = "registry+https://github.com/rust-lang/crates.io-index"
229
+ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
230
+
231
+ [[package]]
232
+ name = "zerocopy"
233
+ version = "0.8.32"
234
+ source = "registry+https://github.com/rust-lang/crates.io-index"
235
+ checksum = "1fabae64378cb18147bb18bca364e63bdbe72a0ffe4adf0addfec8aa166b2c56"
236
+ dependencies = [
237
+ "zerocopy-derive",
238
+ ]
239
+
240
+ [[package]]
241
+ name = "zerocopy-derive"
242
+ version = "0.8.32"
243
+ source = "registry+https://github.com/rust-lang/crates.io-index"
244
+ checksum = "c9c2d862265a8bb4471d87e033e730f536e2a285cc7cb05dbce09a2a97075f90"
245
+ dependencies = [
246
+ "proc-macro2",
247
+ "quote",
248
+ "syn",
249
+ ]
@@ -0,0 +1,100 @@
1
+ [package]
2
+ name = "cortex_rust"
3
+ version = "0.6.0"
4
+ edition = "2021"
5
+ readme = "README.md"
6
+
7
+ [lib]
8
+ crate-type = ["cdylib", "rlib"]
9
+
10
+ [dependencies]
11
+ rand = "0.8"
12
+ libc = "0.2"
13
+ candle-core = { version = "=0.8.4", default-features = false }
14
+ candle-nn = { version = "=0.8.4", default-features = false }
15
+ anyhow = "1.0"
16
+ thiserror = "2.0"
17
+ serde = { version = "1.0", features = ["derive"] }
18
+ serde_json = "1.0"
19
+
20
+ # WASM specific dependencies
21
+ wasm-bindgen = { version = "0.2", optional = true }
22
+ js-sys = { version = "0.3", optional = true }
23
+ web-sys = { version = "0.3", optional = true, features = ["console"] }
24
+ console_error_panic_hook = { version = "0.1", optional = true }
25
+ getrandom = { version = "0.3", optional = true, features = ["wasm_js"] }
26
+
27
+ # CUDA specific dependencies
28
+ cuda-runtime-sys = { version = "0.3.0-alpha.1", optional = true }
29
+
30
+ # Optional dependencies
31
+ pyo3 = { version = "0.24", features = ["extension-module", "macros"], optional = true }
32
+ byteorder = "1.5.0"
33
+ half = "2.3"
34
+ rayon = "1.8"
35
+ tracing = "0.1"
36
+ tokenizers = { version = "0.22", optional = true }
37
+ safetensors = { version = "0.5", optional = true }
38
+ env_logger = { version = "0.11", optional = true }
39
+ windows-sys = { version = "0.59", features = ["Win32_System_ProcessStatus", "Win32_System_Threading"], optional = true }
40
+ reqwest = { version = "0.12", features = ["blocking"] }
41
+
42
+ [build-dependencies]
43
+ cc = "1.0"
44
+ anyhow = "1.0"
45
+ glob = "0.3"
46
+
47
+ # Binaries with extra features (require tokenizers)
48
+ [[bin]]
49
+ name = "test_13b"
50
+ path = "src/bin/test_13b.rs"
51
+ required-features = ["tokenizers"]
52
+
53
+ [[bin]]
54
+ name = "bench_tinyllama"
55
+ path = "src/bin/bench_tinyllama.rs"
56
+ required-features = ["tokenizers"]
57
+
58
+ [[bin]]
59
+ name = "quick_gen"
60
+ path = "src/bin/quick_gen.rs"
61
+ required-features = ["tokenizers"]
62
+
63
+ [[bin]]
64
+ name = "bench_4bit_gpu"
65
+ path = "src/bin/bench_4bit_gpu.rs"
66
+ required-features = ["tokenizers"]
67
+
68
+ [[bin]]
69
+ name = "run_4bit_llama"
70
+ path = "src/bin/run_4bit_llama.rs"
71
+ required-features = ["tokenizers"]
72
+
73
+ [[bin]]
74
+ name = "test_4bit_inference"
75
+ path = "src/bin/test_4bit_inference.rs"
76
+ required-features = ["tokenizers"]
77
+
78
+ [features]
79
+ default = ["python", "tokenizers"]
80
+ python = ["dep:pyo3"]
81
+ safetensors = ["dep:safetensors"]
82
+ tokenizers = ["dep:tokenizers"]
83
+ dev-bins = ["dep:safetensors", "dep:env_logger", "dep:windows-sys", "tokenizers"]
84
+ wasm = [
85
+ "dep:wasm-bindgen",
86
+ "dep:js-sys",
87
+ "dep:web-sys",
88
+ "dep:console_error_panic_hook",
89
+ "dep:getrandom"
90
+ ]
91
+ cuda = [
92
+ "dep:cuda-runtime-sys",
93
+ "candle-core/cuda"
94
+ ]
95
+ # Flash Attention for optimized attention computation
96
+ flash-attention = []
97
+
98
+ # WASM target-specific dependencies
99
+ [target.'cfg(target_arch = "wasm32")'.dependencies]
100
+ getrandom = { version = "0.2", features = ["js"] }
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 imonoonoko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,61 @@
1
+ # Cortex Rust Engine - WebAssembly Support
2
+
3
+ ## Prerequisites
4
+ - Rust (latest stable version)
5
+ - wasm-pack
6
+ - Node.js and npm
7
+
8
+ ## Installation
9
+ 1. Install wasm-pack:
10
+ ```bash
11
+ cargo install wasm-pack
12
+ ```
13
+
14
+ 2. Add WebAssembly target:
15
+ ```bash
16
+ rustup target add wasm32-unknown-unknown
17
+ ```
18
+
19
+ ## Building the WebAssembly Package
20
+ ```bash
21
+ # Build WASM package
22
+ wasm-pack build --target web --features wasm
23
+
24
+ # Build npm package
25
+ wasm-pack build --target npm --features wasm
26
+ ```
27
+
28
+ ## Usage in Browser
29
+ ```javascript
30
+ import init, { wasm_infer, wasm_log, init_panic_hook } from './pkg/cortex_rust.js';
31
+
32
+ async function run() {
33
+ // Initialize WASM module
34
+ await init();
35
+
36
+ // Set up panic hook for better error reporting
37
+ init_panic_hook();
38
+
39
+ // Basic inference
40
+ try {
41
+ const result = wasm_infer("Your input text here");
42
+ console.log(result);
43
+ } catch (error) {
44
+ console.error("Inference error:", error);
45
+ }
46
+
47
+ // Optional logging
48
+ wasm_log("WebAssembly module initialized");
49
+ }
50
+
51
+ run();
52
+ ```
53
+
54
+ ## Limitations
55
+ - No CUDA support (CPU-only)
56
+ - Limited file I/O capabilities
57
+ - Memory constraints inherent to WebAssembly
58
+
59
+ ## Notes
60
+ - Requires modern browser with WebAssembly support
61
+ - Performance may differ from native Rust execution