natc 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
natc-0.1.0/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 NATC Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
natc-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,108 @@
1
+ Metadata-Version: 2.4
2
+ Name: natc
3
+ Version: 0.1.0
4
+ Summary: NeuroSymbolic Adaptive Tensor Compression for CPU-first dynamic inference.
5
+ Author: NATC Contributors
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/Jatinverma0786/NATC
8
+ Project-URL: Documentation, https://github.com/Jatinverma0786/NATC#readme
9
+ Project-URL: Repository, https://github.com/Jatinverma0786/NATC
10
+ Keywords: tensor-compression,inference,llm,sparse-attention,neural-cache
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: numpy>=1.24
23
+ Requires-Dist: torch>=2.1
24
+ Requires-Dist: transformers>=4.40
25
+ Requires-Dist: accelerate>=0.28
26
+ Requires-Dist: sentence-transformers>=2.6
27
+ Requires-Dist: faiss-cpu>=1.8
28
+ Requires-Dist: numba>=0.59
29
+ Requires-Dist: onnxruntime>=1.17
30
+ Requires-Dist: safetensors>=0.4
31
+ Requires-Dist: scipy>=1.10
32
+ Provides-Extra: openvino
33
+ Requires-Dist: openvino>=2024.0; extra == "openvino"
34
+ Provides-Extra: triton
35
+ Requires-Dist: triton>=2.3; extra == "triton"
36
+ Provides-Extra: llama-cpp
37
+ Requires-Dist: llama-cpp-python>=0.2; extra == "llama-cpp"
38
+ Provides-Extra: dev
39
+ Requires-Dist: pytest>=8.0; extra == "dev"
40
+ Requires-Dist: pytest-cov>=5.0; extra == "dev"
41
+ Requires-Dist: ruff>=0.4; extra == "dev"
42
+ Requires-Dist: mypy>=1.8; extra == "dev"
43
+ Requires-Dist: build>=1.2; extra == "dev"
44
+ Requires-Dist: twine>=5.0; extra == "dev"
45
+ Dynamic: license-file
46
+
47
+ # NATC
48
+
49
+ NATC, short for NeuroSymbolic Adaptive Tensor Compression, is a Python framework for
50
+ experimenting with CPU-first dynamic inference architecture:
51
+
52
+ - latent Knowledge DNA encoding for tensors and model state dictionaries
53
+ - on-demand weight reconstruction and sparse materialization
54
+ - prompt-routed reasoning capsules
55
+ - predictive sparse attention routing
56
+ - persistent neural fragment caching
57
+ - recursive fractal tensor storage
58
+ - prompt compilation into execution plans
59
+ - CPU-oriented kernels and benchmark utilities
60
+
61
+ Install from a local checkout:
62
+
63
+ ```bash
64
+ pip install -e ".[dev]"
65
+ ```
66
+
67
+ Public API:
68
+
69
+ ```python
70
+ from natc import NATCModel
71
+
72
+ model = NATCModel.from_pretrained("distilgpt2")
73
+ model.enable_dna()
74
+ model.enable_capsules()
75
+ model.enable_sparse_attention()
76
+ model.enable_cache()
77
+ model.enable_cpu_acceleration()
78
+
79
+ print(model.generate("Explain quantum mechanics"))
80
+ ```
81
+
82
+ The HuggingFace backend is loaded lazily. If a remote model cannot be downloaded in
83
+ the current environment, NATC falls back to a deterministic local text backend so the
84
+ compression, routing, caching, and compiler pipeline remains testable offline.
85
+
86
+ ## Quick Encoder Example
87
+
88
+ ```python
89
+ import numpy as np
90
+ from natc.dna import encode_model, decode_model
91
+
92
+ state = {"linear.weight": np.random.default_rng(0).normal(size=(64, 32))}
93
+ dna = encode_model(state, rank=8)
94
+ reconstructed = decode_model(dna)
95
+
96
+ print(dna.compression_ratio())
97
+ print(reconstructed["linear.weight"].shape)
98
+ ```
99
+
100
+ ## Benchmark
101
+
102
+ ```bash
103
+ natc-benchmark --layers 4 --rows 128 --cols 128 --rank 16
104
+ ```
105
+
106
+ The benchmark reports compression ratio, memory saved, synthetic tokens/sec, cache hit
107
+ ratio, latency, throughput, and CPU efficiency in JSON.
108
+
natc-0.1.0/README.md ADDED
@@ -0,0 +1,62 @@
1
+ # NATC
2
+
3
+ NATC, short for NeuroSymbolic Adaptive Tensor Compression, is a Python framework for
4
+ experimenting with CPU-first dynamic inference architecture:
5
+
6
+ - latent Knowledge DNA encoding for tensors and model state dictionaries
7
+ - on-demand weight reconstruction and sparse materialization
8
+ - prompt-routed reasoning capsules
9
+ - predictive sparse attention routing
10
+ - persistent neural fragment caching
11
+ - recursive fractal tensor storage
12
+ - prompt compilation into execution plans
13
+ - CPU-oriented kernels and benchmark utilities
14
+
15
+ Install from a local checkout:
16
+
17
+ ```bash
18
+ pip install -e ".[dev]"
19
+ ```
20
+
21
+ Public API:
22
+
23
+ ```python
24
+ from natc import NATCModel
25
+
26
+ model = NATCModel.from_pretrained("distilgpt2")
27
+ model.enable_dna()
28
+ model.enable_capsules()
29
+ model.enable_sparse_attention()
30
+ model.enable_cache()
31
+ model.enable_cpu_acceleration()
32
+
33
+ print(model.generate("Explain quantum mechanics"))
34
+ ```
35
+
36
+ The HuggingFace backend is loaded lazily. If a remote model cannot be downloaded in
37
+ the current environment, NATC falls back to a deterministic local text backend so the
38
+ compression, routing, caching, and compiler pipeline remains testable offline.
39
+
40
+ ## Quick Encoder Example
41
+
42
+ ```python
43
+ import numpy as np
44
+ from natc.dna import encode_model, decode_model
45
+
46
+ state = {"linear.weight": np.random.default_rng(0).normal(size=(64, 32))}
47
+ dna = encode_model(state, rank=8)
48
+ reconstructed = decode_model(dna)
49
+
50
+ print(dna.compression_ratio())
51
+ print(reconstructed["linear.weight"].shape)
52
+ ```
53
+
54
+ ## Benchmark
55
+
56
+ ```bash
57
+ natc-benchmark --layers 4 --rows 128 --cols 128 --rank 16
58
+ ```
59
+
60
+ The benchmark reports compression ratio, memory saved, synthetic tokens/sec, cache hit
61
+ ratio, latency, throughput, and CPU efficiency in JSON.
62
+
@@ -0,0 +1,7 @@
1
+ """Public package interface for NATC."""
2
+
3
+ from natc.config import NATCConfig
4
+ from natc.model import NATCModel
5
+
6
+ __all__ = ["NATCConfig", "NATCModel"]
7
+
@@ -0,0 +1,123 @@
1
+ """Predictive sparse attention routing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ import numpy as np
8
+
9
+ from natc.utils import text_embedding
10
+
11
+
12
+ @dataclass(slots=True)
13
+ class SparseAttentionResult:
14
+ """Sparse attention output and its selected graph."""
15
+
16
+ output: np.ndarray
17
+ weights: np.ndarray
18
+ adjacency: np.ndarray
19
+
20
+
21
+ class SparseGraphBuilder:
22
+ """Build top-k token adjacency graphs from similarity scores."""
23
+
24
+ def build(self, scores: np.ndarray, *, top_k: int) -> np.ndarray:
25
+ if scores.ndim != 2:
26
+ raise ValueError("scores must be a 2D matrix")
27
+ top_k = max(1, min(top_k, scores.shape[1]))
28
+ adjacency = np.zeros_like(scores, dtype=bool)
29
+ indices = np.argpartition(scores, -top_k, axis=1)[:, -top_k:]
30
+ rows = np.arange(scores.shape[0])[:, None]
31
+ adjacency[rows, indices] = True
32
+ return adjacency
33
+
34
+
35
+ class AttentionRouter:
36
+ """Predict likely attention paths before dense attention execution."""
37
+
38
+ def __init__(self, *, top_k: int = 32) -> None:
39
+ self.top_k = top_k
40
+ self.graph_builder = SparseGraphBuilder()
41
+
42
+ def route_tokens(self, tokens: list[str], *, top_k: int | None = None) -> np.ndarray:
43
+ if not tokens:
44
+ return np.zeros((0, 0), dtype=bool)
45
+ embeddings = np.stack([text_embedding(token, dimensions=64) for token in tokens])
46
+ scores = embeddings @ embeddings.T
47
+ return self.graph_builder.build(scores, top_k=top_k or self.top_k)
48
+
49
+ def route_embeddings(self, embeddings: np.ndarray, *, top_k: int | None = None) -> np.ndarray:
50
+ embeddings = _as_2d(embeddings)
51
+ scores = embeddings @ embeddings.T / max(1.0, float(embeddings.shape[-1]) ** 0.5)
52
+ return self.graph_builder.build(scores, top_k=top_k or self.top_k)
53
+
54
+
55
+ class PredictiveAttention:
56
+ """Compute sparse scaled dot-product attention using predicted top-k edges."""
57
+
58
+ def __init__(self, *, top_k: int = 32) -> None:
59
+ self.router = AttentionRouter(top_k=top_k)
60
+
61
+ def __call__(
62
+ self,
63
+ query: np.ndarray,
64
+ key: np.ndarray,
65
+ value: np.ndarray,
66
+ *,
67
+ top_k: int | None = None,
68
+ ) -> SparseAttentionResult:
69
+ return self.forward(query, key, value, top_k=top_k)
70
+
71
+ def forward(
72
+ self,
73
+ query: np.ndarray,
74
+ key: np.ndarray,
75
+ value: np.ndarray,
76
+ *,
77
+ top_k: int | None = None,
78
+ ) -> SparseAttentionResult:
79
+ query = _as_2d(query)
80
+ key = _as_2d(key)
81
+ value = _as_2d(value)
82
+ if query.shape[-1] != key.shape[-1]:
83
+ raise ValueError("query and key dimensions must match")
84
+ if key.shape[0] != value.shape[0]:
85
+ raise ValueError("key and value token counts must match")
86
+
87
+ scores = query @ key.T / max(1.0, float(query.shape[-1]) ** 0.5)
88
+ adjacency = SparseGraphBuilder().build(scores, top_k=top_k or self.router.top_k)
89
+ masked_scores = np.where(adjacency, scores, -np.inf)
90
+ weights = _softmax(masked_scores, axis=1)
91
+ output = weights @ value
92
+ return SparseAttentionResult(
93
+ output=output.astype(np.float32, copy=False),
94
+ weights=weights.astype(np.float32, copy=False),
95
+ adjacency=adjacency,
96
+ )
97
+
98
+
99
+ def _softmax(values: np.ndarray, *, axis: int) -> np.ndarray:
100
+ maximum = np.max(values, axis=axis, keepdims=True)
101
+ shifted = np.exp(values - maximum)
102
+ shifted[~np.isfinite(values)] = 0.0
103
+ denom = shifted.sum(axis=axis, keepdims=True)
104
+ denom[denom == 0.0] = 1.0
105
+ return shifted / denom
106
+
107
+
108
+ def _as_2d(array: np.ndarray) -> np.ndarray:
109
+ array = np.asarray(array, dtype=np.float32)
110
+ if array.ndim == 1:
111
+ return array.reshape(1, -1)
112
+ if array.ndim != 2:
113
+ raise ValueError("attention arrays must be 1D or 2D")
114
+ return array
115
+
116
+
117
+ __all__ = [
118
+ "AttentionRouter",
119
+ "PredictiveAttention",
120
+ "SparseAttentionResult",
121
+ "SparseGraphBuilder",
122
+ ]
123
+
@@ -0,0 +1,135 @@
1
+ """Benchmark suite for NATC."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import tempfile
7
+ import time
8
+ import tracemalloc
9
+ from dataclasses import dataclass
10
+ from typing import Any
11
+
12
+ import numpy as np
13
+
14
+ from natc.cache import NeuralCache
15
+ from natc.dna import decode_model, encode_model
16
+ from natc.model import NATCModel
17
+
18
+
19
+ @dataclass(slots=True)
20
+ class BenchmarkResult:
21
+ """NATC benchmark metrics."""
22
+
23
+ compression_ratio: float
24
+ memory_saved: float
25
+ speedup: float
26
+ cpu_efficiency: float
27
+ tokens_per_sec: float
28
+ cache_hit_ratio: float
29
+ latency: float
30
+ throughput: float
31
+
32
+ def to_dict(self) -> dict[str, float]:
33
+ return {
34
+ "compression_ratio": self.compression_ratio,
35
+ "memory_saved": self.memory_saved,
36
+ "speedup": self.speedup,
37
+ "cpu_efficiency": self.cpu_efficiency,
38
+ "tokens_per_sec": self.tokens_per_sec,
39
+ "cache_hit_ratio": self.cache_hit_ratio,
40
+ "latency": self.latency,
41
+ "throughput": self.throughput,
42
+ }
43
+
44
+ def to_json(self) -> str:
45
+ return json.dumps(self.to_dict(), indent=2, sort_keys=True)
46
+
47
+
48
+ class BenchmarkRunner:
49
+ """Run synthetic compression and inference benchmarks."""
50
+
51
+ def __init__(
52
+ self,
53
+ *,
54
+ layers: int = 4,
55
+ rows: int = 128,
56
+ cols: int = 128,
57
+ rank: int = 16,
58
+ seed: int = 13,
59
+ ) -> None:
60
+ self.layers = layers
61
+ self.rows = rows
62
+ self.cols = cols
63
+ self.rank = rank
64
+ self.seed = seed
65
+
66
+ def run(self) -> BenchmarkResult:
67
+ state = self._synthetic_state()
68
+
69
+ tracemalloc.start()
70
+ start = time.perf_counter()
71
+ dense_outputs = _dense_reference(state)
72
+ dense_latency = time.perf_counter() - start
73
+ dense_current, dense_peak = tracemalloc.get_traced_memory()
74
+
75
+ start = time.perf_counter()
76
+ dna = encode_model(state, rank=self.rank)
77
+ reconstructed = decode_model(dna)
78
+ compressed_outputs = _dense_reference(reconstructed)
79
+ compressed_latency = time.perf_counter() - start
80
+ compressed_current, compressed_peak = tracemalloc.get_traced_memory()
81
+ tracemalloc.stop()
82
+
83
+ with tempfile.TemporaryDirectory() as tmpdir:
84
+ cache = NeuralCache(tmpdir)
85
+ cache.put("python pattern", {"value": "cached"})
86
+ cache.get("python pattern")
87
+ cache.get("python patterns")
88
+ cache_hit_ratio = cache.hit_ratio()
89
+
90
+ model = NATCModel.from_state_dict(state, rank=self.rank)
91
+ generated_start = time.perf_counter()
92
+ output = model.generate("Explain a Python matrix multiplication pattern", max_new_tokens=32)
93
+ generated_latency = max(time.perf_counter() - generated_start, 1e-9)
94
+ token_count = max(1, len(output.split()))
95
+
96
+ dense_memory = max(dense_peak, dense_current, 1)
97
+ compressed_memory = max(compressed_peak - dense_peak, compressed_current, 1)
98
+ memory_saved = max(0.0, 1.0 - (compressed_memory / dense_memory))
99
+ speedup = dense_latency / max(compressed_latency, 1e-9)
100
+ throughput = len(compressed_outputs) / max(compressed_latency, 1e-9)
101
+ cpu_efficiency = min(1.0, speedup / max(1.0, self.rank))
102
+
103
+ return BenchmarkResult(
104
+ compression_ratio=dna.compression_ratio(),
105
+ memory_saved=memory_saved,
106
+ speedup=speedup,
107
+ cpu_efficiency=cpu_efficiency,
108
+ tokens_per_sec=token_count / generated_latency,
109
+ cache_hit_ratio=cache_hit_ratio,
110
+ latency=compressed_latency,
111
+ throughput=throughput,
112
+ )
113
+
114
+ def _synthetic_state(self) -> dict[str, np.ndarray]:
115
+ rng = np.random.default_rng(self.seed)
116
+ state = {}
117
+ base_left = rng.normal(size=(self.rows, self.rank)).astype(np.float32)
118
+ base_right = rng.normal(size=(self.rank, self.cols)).astype(np.float32)
119
+ for layer in range(self.layers):
120
+ noise = rng.normal(scale=0.01, size=(self.rows, self.cols)).astype(np.float32)
121
+ state[f"layers.{layer}.weight"] = base_left @ base_right + noise
122
+ return state
123
+
124
+
125
+ def run_benchmark(**kwargs: Any) -> dict[str, float]:
126
+ return BenchmarkRunner(**kwargs).run().to_dict()
127
+
128
+
129
+ def _dense_reference(state: dict[str, np.ndarray]) -> list[np.ndarray]:
130
+ vector = np.ones((next(iter(state.values())).shape[1], 1), dtype=np.float32)
131
+ return [weight @ vector for weight in state.values()]
132
+
133
+
134
+ __all__ = ["BenchmarkResult", "BenchmarkRunner", "run_benchmark"]
135
+
@@ -0,0 +1,30 @@
1
+ """Command-line benchmark runner."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+
7
+ from natc.benchmark import BenchmarkRunner
8
+
9
+
10
+ def main() -> None:
11
+ parser = argparse.ArgumentParser(description="Run NATC benchmark suite.")
12
+ parser.add_argument("--layers", type=int, default=4)
13
+ parser.add_argument("--rows", type=int, default=128)
14
+ parser.add_argument("--cols", type=int, default=128)
15
+ parser.add_argument("--rank", type=int, default=16)
16
+ parser.add_argument("--seed", type=int, default=13)
17
+ args = parser.parse_args()
18
+ result = BenchmarkRunner(
19
+ layers=args.layers,
20
+ rows=args.rows,
21
+ cols=args.cols,
22
+ rank=args.rank,
23
+ seed=args.seed,
24
+ ).run()
25
+ print(result.to_json())
26
+
27
+
28
+ if __name__ == "__main__":
29
+ main()
30
+