pre-reasoning 2.5.2__tar.gz → 2.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/MANIFEST.in +4 -2
  2. {pre_reasoning-2.5.2/pre_reasoning.egg-info → pre_reasoning-2.5.4}/PKG-INFO +21 -11
  3. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/README.md +33 -23
  4. pre_reasoning-2.5.4/derive_expert/__init__.py +36 -0
  5. pre_reasoning-2.5.4/derive_expert/core.py +276 -0
  6. pre_reasoning-2.5.4/derive_expert/model.py +186 -0
  7. pre_reasoning-2.5.4/derive_expert/weights/thin_expert_d128L3.safetensors +0 -0
  8. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/__init__.py +19 -15
  9. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/cli.py +3 -3
  10. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/pre_reasoning_v2_5.py +1 -6
  11. pre_reasoning-2.5.4/pre_reasoning/pre_reasoning_v2_5_2.py +701 -0
  12. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4/pre_reasoning.egg-info}/PKG-INFO +21 -11
  13. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning.egg-info/SOURCES.txt +5 -0
  14. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning.egg-info/top_level.txt +1 -0
  15. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pyproject.toml +4 -3
  16. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/LICENSE +0 -0
  17. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/checkpoints/__init__.py +0 -0
  18. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors +0 -0
  19. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/heuristic.py +0 -0
  20. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/inference.py +0 -0
  21. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning.egg-info/dependency_links.txt +0 -0
  22. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning.egg-info/entry_points.txt +0 -0
  23. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning.egg-info/requires.txt +0 -0
  24. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/setup.cfg +0 -0
  25. {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/tests/test_engine.py +0 -0
@@ -1,5 +1,7 @@
1
- include pre_reasoning/checkpoints/*.safetensors
2
- exclude checkpoints/*.pt
1
+ include pre_reasoning/checkpoints/*.safetensors
2
+ include derive_expert/weights/*.safetensors
3
+ exclude checkpoints/*.pt
4
+ exclude derive_expert/weights/*.pt
3
5
  prune __pycache__
4
6
  global-exclude *.py[cod]
5
7
  global-exclude .DS_Store
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pre-reasoning
3
- Version: 2.5.2
3
+ Version: 2.5.4
4
4
  Summary: 3M-parameter neural pre-reasoning engine for grounding LLMs before they answer.
5
5
  Author: Luis Lozano, Dr. Shannon (Mia Labs AI co-researcher), Mia Labs
6
6
  License-Expression: MIT
@@ -25,9 +25,9 @@ Dynamic: license-file
25
25
 
26
26
  # Pre-Reasoning
27
27
 
28
- Pre-Reasoning is a Mia Labs structural analysis engine that grounds an LLM before it answers. It uses a 3M-parameter neural model (V3) to surface dependencies, root blockers, unlock order, parallel work, cycles, and conflicts from problem text.
28
+ Pre-Reasoning is a Mia Labs structural analysis engine that grounds an LLM before it answers. It uses a 3M-parameter neural perception model plus a bundled tiny derive expert to surface dependencies, derived assumptions, root blockers, unlock order, parallel work, cycles, and conflicts from problem text.
29
29
 
30
- The engine ships with bundled safetensors weights and torch -- install and run, no downloads needed.
30
+ The engine ships with bundled weights and declares its torch dependency -- install and run, no model download needed.
31
31
 
32
32
  ## What It Does
33
33
 
@@ -39,6 +39,8 @@ Given natural-language problem text, the engine returns:
39
39
  - CYCLES: circular dependencies that cannot be solved sequentially
40
40
  - CONFLICTS: competing positions or incompatible entities
41
41
  - REQUIREMENTS: numeric or threshold requirements
42
+ - DERIVED ASSUMPTIONS: transitive dependency assumptions inferred by the tiny derive expert
43
+ - BLOCK OUTPUT: direct and derived structural blocks used by the graph reasoner
42
44
 
43
45
  ## Install
44
46
 
@@ -59,6 +61,7 @@ from pre_reasoning import analyze, pulse
59
61
 
60
62
  result = analyze("Frontend depends on API. API depends on Auth.")
61
63
  print(result["trace"])
64
+ print(result["derived_assumptions"])
62
65
 
63
66
  check = pulse(
64
67
  "Frontend depends on API. API depends on Auth.",
@@ -93,9 +96,11 @@ These are product-research notes, not benchmark claims.
93
96
 
94
97
  ```text
95
98
  User text
96
- -> V3 neural perception (3M params, safetensors)
99
+ -> neural perception (3M params, safetensors)
97
100
  -> neural findings converted to structural blocks
98
- -> V2 heuristic graph analysis
101
+ -> tiny derive expert infers transitive assumptions
102
+ -> derived assumptions appended as dependency blocks
103
+ -> graph reasoning
99
104
  -> structural trace
100
105
  ```
101
106
 
@@ -104,19 +109,24 @@ User text
104
109
  | Path | Purpose |
105
110
  |---|---|
106
111
  | `pre_reasoning/` | Installable Python package and CLI entry point |
107
- | `pre_reasoning/inference.py` | 3M-parameter V3 neural perception layer |
108
- | `pre_reasoning/heuristic.py` | Deterministic graph-reasoning core (fallback) |
109
- | `pre_reasoning/pre_reasoning_v2_5.py` | v2.5 orchestrator: V3 neural + heuristic |
110
- | `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors` | Bundled V3 weights (11MB) |
112
+ | `pre_reasoning/inference.py` | 3M-parameter neural perception layer |
113
+ | `pre_reasoning/heuristic.py` | Graph-reasoning core |
114
+ | `pre_reasoning/pre_reasoning_v2_5_2.py` | Default v2.5.4 engine: neural perception + derive expert + graph reasoning |
115
+ | `pre_reasoning/pre_reasoning_v2_5.py` | Legacy v2.5 engine: neural perception + graph reasoning |
116
+ | `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors` | Bundled model weights (11MB) |
117
+ | `derive_expert/` | Tiny derive expert used for transitive-closure enrichment |
118
+ | `derive_expert/weights/thin_expert_d128L3.safetensors` | Bundled derive expert weights (2.5MB) |
111
119
  | `examples/` | Runnable usage examples |
112
120
  | `tests/` | Pytest suite |
113
121
  | `skill/SKILL.md` | Agent skill descriptor for model adoption |
114
- | `CLAUDE.md` | Optional Claude Code hooks configuration |
122
+ | `hooks/` | Claude Code before/after hooks for enforced pre-reasoning |
123
+ | `INSTALL.md` | Manual install and hook setup guide |
124
+ | `CLAUDE.md` | Claude Code adoption and grounding-hook guide |
115
125
  | `WHY_TRACES_WORK.md` | Literature connection, 9 cited papers |
116
126
 
117
127
  ## Weights Policy
118
128
 
119
- The raw training checkpoint is not part of the release. The package bundles `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors`, a weights-only inference artifact. It ships no training metadata: no optimizer state, LR schedules, step counters, RNG state, training config, or raw checkpoint provenance.
129
+ The raw training checkpoint is not part of the release. The package bundles `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors` for neural perception and `derive_expert/weights/thin_expert_d128L3.safetensors` for the tiny derive expert. These are inference artifacts. They ship no optimizer state, LR schedules, step counters, RNG state, training config, or raw checkpoint provenance.
120
130
 
121
131
  ## License
122
132
 
@@ -1,8 +1,8 @@
1
1
  # Pre-Reasoning
2
2
 
3
- Pre-Reasoning is a Mia Labs structural analysis engine that grounds an LLM before it answers. It uses a 3M-parameter neural model (V3) to surface dependencies, root blockers, unlock order, parallel work, cycles, and conflicts from problem text.
4
-
5
- The engine ships with bundled safetensors weights and torch -- install and run, no downloads needed.
3
+ Pre-Reasoning is a Mia Labs structural analysis engine that grounds an LLM before it answers. It uses a 3M-parameter neural perception model plus a bundled tiny derive expert to surface dependencies, derived assumptions, root blockers, unlock order, parallel work, cycles, and conflicts from problem text.
4
+
5
+ The engine ships with bundled weights and declares its torch dependency -- install and run, no model download needed.
6
6
 
7
7
  ## What It Does
8
8
 
@@ -12,8 +12,10 @@ Given natural-language problem text, the engine returns:
12
12
  - UNLOCK SEQUENCE: a dependency-aware resolution order
13
13
  - PARALLEL WORK: independent items that can proceed now
14
14
  - CYCLES: circular dependencies that cannot be solved sequentially
15
- - CONFLICTS: competing positions or incompatible entities
16
- - REQUIREMENTS: numeric or threshold requirements
15
+ - CONFLICTS: competing positions or incompatible entities
16
+ - REQUIREMENTS: numeric or threshold requirements
17
+ - DERIVED ASSUMPTIONS: transitive dependency assumptions inferred by the tiny derive expert
18
+ - BLOCK OUTPUT: direct and derived structural blocks used by the graph reasoner
17
19
 
18
20
  ## Install
19
21
 
@@ -32,8 +34,9 @@ pip install -e .
32
34
  ```python
33
35
  from pre_reasoning import analyze, pulse
34
36
 
35
- result = analyze("Frontend depends on API. API depends on Auth.")
36
- print(result["trace"])
37
+ result = analyze("Frontend depends on API. API depends on Auth.")
38
+ print(result["trace"])
39
+ print(result["derived_assumptions"])
37
40
 
38
41
  check = pulse(
39
42
  "Frontend depends on API. API depends on Auth.",
@@ -67,31 +70,38 @@ These are product-research notes, not benchmark claims.
67
70
  ## Architecture
68
71
 
69
72
  ```text
70
- User text
71
- -> V3 neural perception (3M params, safetensors)
72
- -> neural findings converted to structural blocks
73
- -> V2 heuristic graph analysis
74
- -> structural trace
75
- ```
73
+ User text
74
+ -> neural perception (3M params, safetensors)
75
+ -> neural findings converted to structural blocks
76
+ -> tiny derive expert infers transitive assumptions
77
+ -> derived assumptions appended as dependency blocks
78
+ -> graph reasoning
79
+ -> structural trace
80
+ ```
76
81
 
77
82
  ## File Map
78
83
 
79
84
  | Path | Purpose |
80
85
  |---|---|
81
86
  | `pre_reasoning/` | Installable Python package and CLI entry point |
82
- | `pre_reasoning/inference.py` | 3M-parameter V3 neural perception layer |
83
- | `pre_reasoning/heuristic.py` | Deterministic graph-reasoning core (fallback) |
84
- | `pre_reasoning/pre_reasoning_v2_5.py` | v2.5 orchestrator: V3 neural + heuristic |
85
- | `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors` | Bundled V3 weights (11MB) |
86
- | `examples/` | Runnable usage examples |
87
- | `tests/` | Pytest suite |
88
- | `skill/SKILL.md` | Agent skill descriptor for model adoption |
89
- | `CLAUDE.md` | Optional Claude Code hooks configuration |
90
- | `WHY_TRACES_WORK.md` | Literature connection, 9 cited papers |
87
+ | `pre_reasoning/inference.py` | 3M-parameter neural perception layer |
88
+ | `pre_reasoning/heuristic.py` | Graph-reasoning core |
89
+ | `pre_reasoning/pre_reasoning_v2_5_2.py` | Default v2.5.4 engine: neural perception + derive expert + graph reasoning |
90
+ | `pre_reasoning/pre_reasoning_v2_5.py` | Legacy v2.5 engine: neural perception + graph reasoning |
91
+ | `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors` | Bundled model weights (11MB) |
92
+ | `derive_expert/` | Tiny derive expert used for transitive-closure enrichment |
93
+ | `derive_expert/weights/thin_expert_d128L3.safetensors` | Bundled derive expert weights (2.5MB) |
94
+ | `examples/` | Runnable usage examples |
95
+ | `tests/` | Pytest suite |
96
+ | `skill/SKILL.md` | Agent skill descriptor for model adoption |
97
+ | `hooks/` | Claude Code before/after hooks for enforced pre-reasoning |
98
+ | `INSTALL.md` | Manual install and hook setup guide |
99
+ | `CLAUDE.md` | Claude Code adoption and grounding-hook guide |
100
+ | `WHY_TRACES_WORK.md` | Literature connection, 9 cited papers |
91
101
 
92
102
  ## Weights Policy
93
103
 
94
- The raw training checkpoint is not part of the release. The package bundles `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors`, a weights-only inference artifact. It ships no training metadata: no optimizer state, LR schedules, step counters, RNG state, training config, or raw checkpoint provenance.
104
+ The raw training checkpoint is not part of the release. The package bundles `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors` for neural perception and `derive_expert/weights/thin_expert_d128L3.safetensors` for the tiny derive expert. These are inference artifacts. They ship no optimizer state, LR schedules, step counters, RNG state, training config, or raw checkpoint provenance.
95
105
 
96
106
  ## License
97
107
 
@@ -0,0 +1,36 @@
1
+ """
2
+ derive-expert
3
+ =============
4
+
5
+ Transitive-closure / assumption-derivation with the bundled ThinExpert NN.
6
+
7
+ Quick start::
8
+
9
+ from derive_expert import derive_assumptions, derive_report, strategy_available
10
+
11
+ print(strategy_available())
12
+ # {'nn': True, 'bfs': False, 'error': None}
13
+
14
+ pairs = derive_assumptions([("auth", "session"), ("session", "dashboard")])
15
+ # [('auth', 'dashboard')]
16
+
17
+ report = derive_report([("A", "B"), ("B", "C"), ("C", "D")])
18
+ # {'derived': [('A','C'),('A','D'),('B','D')], 'strategy': 'nn', ...}
19
+
20
+ See derive_expert.core for full documentation.
21
+ """
22
+
23
+ from derive_expert.core import (
24
+ derive_assumptions,
25
+ derive_report,
26
+ strategy_available,
27
+ )
28
+
29
+ __all__ = [
30
+ "derive_assumptions",
31
+ "derive_report",
32
+ "strategy_available",
33
+ ]
34
+
35
+ __version__ = "0.1.0"
36
+ __author__ = "Mia Labs / Dr. Shannon"
@@ -0,0 +1,276 @@
1
+ """
2
+ derive_expert.core
3
+ ==================
4
+
5
+ Public API for the derive-expert package.
6
+
7
+ NN STRATEGY
8
+ -----------
9
+ The package uses ThinExpert, a 625K-param causal transformer, 6/6 on its
10
+ benchmark bar set. Torch and bundled safetensors weights are required. There is no
11
+ availability fallback: if the NN cannot load or the graph exceeds the model's
12
+ 10-entity window, public derive calls fail visibly.
13
+
14
+ The surrounding pre-reasoning engine feeds the expert small 2-hop windows and
15
+ uses exact graph traversal only as a verifier/guard for those windows, not as a
16
+ replacement for a missing expert.
17
+
18
+ Public surface
19
+ --------------
20
+ derive_assumptions(edges) -> list[tuple[str, str]]
21
+ derive_report(edges) -> dict
22
+ strategy_available() -> dict (probe without running inference)
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import logging
28
+ import os
29
+ from pathlib import Path
30
+ from typing import Dict, List, Set, Tuple
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Locate bundled weights
36
+ # ---------------------------------------------------------------------------
37
+ _WEIGHTS_PATH = Path(__file__).resolve().parent / "weights" / "thin_expert_d128L3.safetensors"
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Lazy NN model singleton
42
+ # ---------------------------------------------------------------------------
43
+ _nn_model = None # ThinExpert instance or None
44
+ _nn_load_attempted = False
45
+
46
+
47
+ def _try_load_nn():
48
+ """
49
+ Load ThinExpert once. Torch and bundled safetensors weights are required;
50
+ failures are raised instead of silently changing strategy.
51
+ """
52
+ global _nn_model, _nn_load_attempted
53
+ if _nn_load_attempted:
54
+ if _nn_model is None:
55
+ raise RuntimeError("derive-expert: NN load was attempted but no model is available")
56
+ return
57
+ _nn_load_attempted = True
58
+
59
+ from derive_expert.model import TORCH_AVAILABLE, ThinExpert
60
+ if not TORCH_AVAILABLE:
61
+ raise RuntimeError("derive-expert: torch is required for ThinExpert")
62
+
63
+ if not _WEIGHTS_PATH.exists():
64
+ raise FileNotFoundError(f"derive-expert: weights not found at {_WEIGHTS_PATH}")
65
+
66
+ from safetensors.torch import load_file
67
+ m = ThinExpert()
68
+ sd = load_file(str(_WEIGHTS_PATH), device="cpu")
69
+ m.load_state_dict(sd)
70
+ m.eval()
71
+ _nn_model = m
72
+ param_count = sum(p.numel() for p in m.parameters())
73
+ logger.info(
74
+ "derive-expert: ThinExpert loaded (%s params) from %s",
75
+ f"{param_count:,}",
76
+ _WEIGHTS_PATH,
77
+ )
78
+
79
+
80
+ # ---------------------------------------------------------------------------
81
+ # NN path (uses loaded ThinExpert)
82
+ # ---------------------------------------------------------------------------
83
+
84
+ def _nn_derive(
85
+ int_edges: List[Tuple[int, int]],
86
+ n_entities: int,
87
+ direct_set: Set[Tuple[int, int]],
88
+ id_to_name: Dict[int, str],
89
+ ) -> Tuple[List[Tuple[str, str]], str]:
90
+ """
91
+ Run NN inference and return (derived_pairs_str, strategy_label).
92
+ """
93
+ from derive_expert.model import (
94
+ BOS, ENT_OFFSET, SEP,
95
+ free_run, parse_output,
96
+ )
97
+
98
+ edge_toks = [
99
+ tok
100
+ for a, b in sorted(int_edges)
101
+ for tok in (ENT_OFFSET + a, ENT_OFFSET + b)
102
+ ]
103
+ inp = [BOS] + edge_toks + [SEP]
104
+
105
+ gen = free_run(_nn_model, inp, device="cpu")
106
+ has_derived, derived_int_pairs = parse_output(gen)
107
+
108
+ if not has_derived:
109
+ return [], "nn"
110
+
111
+ result: List[Tuple[str, str]] = []
112
+ for a, b in sorted(derived_int_pairs):
113
+ if (a, b) not in direct_set and a in id_to_name and b in id_to_name:
114
+ result.append((id_to_name[a], id_to_name[b]))
115
+
116
+ return result, "nn"
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # Shared edge-parsing helper
121
+ # ---------------------------------------------------------------------------
122
+
123
+ def _parse_edges(
124
+ edges: List[Tuple[str, str]],
125
+ ) -> Tuple[
126
+ List[Tuple[int, int]],
127
+ int,
128
+ Set[Tuple[int, int]],
129
+ Dict[int, str],
130
+ ]:
131
+ """
132
+ Map string entity names to contiguous integer ids.
133
+
134
+ Returns:
135
+ int_edges, n_entities, direct_set, id_to_name
136
+ """
137
+ seen: Dict[str, int] = {}
138
+ ordered: List[str] = []
139
+ for src, tgt in edges:
140
+ for name in (src, tgt):
141
+ if name not in seen:
142
+ seen[name] = len(seen)
143
+ ordered.append(name)
144
+
145
+ n_entities = len(seen)
146
+ id_to_name = {i: name for i, name in enumerate(ordered)}
147
+
148
+ int_edges: List[Tuple[int, int]] = []
149
+ direct_set: Set[Tuple[int, int]] = set()
150
+ for src, tgt in edges:
151
+ a, b = seen[src], seen[tgt]
152
+ int_edges.append((a, b))
153
+ direct_set.add((a, b))
154
+
155
+ return int_edges, n_entities, direct_set, id_to_name
156
+
157
+
158
+ # ---------------------------------------------------------------------------
159
+ # Public API
160
+ # ---------------------------------------------------------------------------
161
+
162
+ def derive_assumptions(
163
+ edges: List[Tuple[str, str]],
164
+ ) -> List[Tuple[str, str]]:
165
+ """
166
+ Given directed dependency edges as (src_name, tgt_name) string tuples,
167
+ return the transitively implied (a, c) pairs reachable in >=2 hops that
168
+ are NOT already direct edges.
169
+
170
+ NN STRATEGY:
171
+ - ThinExpert (learned, ~625K params) is required.
172
+ - The graph must fit the model's 10-entity window.
173
+ - No availability fallback is used.
174
+
175
+ Args:
176
+ edges: e.g. [("auth", "session"), ("session", "dashboard")]
177
+ Order within the list does not matter.
178
+
179
+ Returns:
180
+ Sorted list of (src_name, tgt_name) derived pairs.
181
+ Returns [] for an empty graph or a graph with no multi-hop paths.
182
+
183
+ Examples:
184
+ >>> derive_assumptions([("A","B"), ("B","C")])
185
+ [('A', 'C')]
186
+ >>> derive_assumptions([])
187
+ []
188
+ >>> derive_assumptions([("A","B"), ("A","C"), ("B","D"), ("C","D")])
189
+ [('A', 'D')]
190
+ """
191
+ if not edges:
192
+ return []
193
+
194
+ int_edges, n_entities, direct_set, id_to_name = _parse_edges(edges)
195
+
196
+ from derive_expert.model import N_ENTITIES as MODEL_CAP
197
+
198
+ if n_entities > MODEL_CAP:
199
+ raise ValueError(
200
+ f"derive-expert: {n_entities} entities exceeds ThinExpert cap of {MODEL_CAP}"
201
+ )
202
+
203
+ _try_load_nn()
204
+ result, _ = _nn_derive(int_edges, n_entities, direct_set, id_to_name)
205
+
206
+ return result
207
+
208
+
209
+ def derive_report(
210
+ edges: List[Tuple[str, str]],
211
+ ) -> Dict:
212
+ """
213
+ Like derive_assumptions(), but returns a richer dict so the caller can
214
+ inspect which strategy was used and basic stats.
215
+
216
+ Returns a dict with keys:
217
+ derived : list[tuple[str, str]] — the derived pairs
218
+ strategy : str — "nn" | "none"
219
+ n_entities: int — number of distinct named entities
220
+ n_direct : int — number of direct edges supplied
221
+ n_derived : int — len(derived)
222
+
223
+ Examples:
224
+ >>> derive_report([("A","B"), ("B","C")])
225
+ {'derived': [('A', 'C')], 'strategy': 'nn', 'n_entities': 3, 'n_direct': 2, 'n_derived': 1}
226
+ """
227
+ if not edges:
228
+ return {
229
+ "derived": [],
230
+ "strategy": "none",
231
+ "n_entities": 0,
232
+ "n_direct": 0,
233
+ "n_derived": 0,
234
+ }
235
+
236
+ int_edges, n_entities, direct_set, id_to_name = _parse_edges(edges)
237
+
238
+ from derive_expert.model import N_ENTITIES as MODEL_CAP
239
+
240
+ if n_entities > MODEL_CAP:
241
+ raise ValueError(
242
+ f"derive-expert: {n_entities} entities exceeds ThinExpert cap of {MODEL_CAP}"
243
+ )
244
+
245
+ _try_load_nn()
246
+ derived, strategy = _nn_derive(int_edges, n_entities, direct_set, id_to_name)
247
+
248
+ return {
249
+ "derived": derived,
250
+ "strategy": strategy,
251
+ "n_entities": n_entities,
252
+ "n_direct": len(int_edges),
253
+ "n_derived": len(derived),
254
+ }
255
+
256
+
257
+ def strategy_available() -> Dict[str, bool]:
258
+ """
259
+ Probe whether ThinExpert can load.
260
+
261
+ Returns:
262
+ {
263
+ "nn": True/False — torch installed + weights found
264
+ "bfs": False — no availability fallback
265
+ }
266
+ """
267
+ error = None
268
+ try:
269
+ _try_load_nn()
270
+ except Exception as exc:
271
+ error = str(exc)
272
+ return {
273
+ "nn": _nn_model is not None,
274
+ "bfs": False,
275
+ "error": error,
276
+ }
@@ -0,0 +1,186 @@
1
+ """
2
+ derive_expert.model
3
+ ===================
4
+
5
+ ThinExpert — a tiny causal transformer (d=128, L=3, H=4, ~625K params) trained
6
+ to solve the transitive-closure / assumption-derivation task over up to 10 named
7
+ entities.
8
+
9
+ Architecture is fixed to match the WINNER checkpoint
10
+ (thin_expert_d128L3.safetensors).
11
+ Do NOT change any constant here without retraining.
12
+
13
+ This module is intentionally free of public API surface — use derive_expert.core.
14
+
15
+ Constants:
16
+ D_MODEL=128, N_LAYERS=3, N_HEADS=4
17
+ VOCAB=16 (PAD=0 BOS=1 EOS=2 SEP=3 HAS_YES=4 HAS_NO=5 ENT_OFFSET=6)
18
+ N_ENTITIES=10 (model hard limit)
19
+ MAX_SEQ=200, MAX_GEN=150
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from typing import List
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Attempt torch import. core.py requires this to be available for public
28
+ # derive calls; stubs below only keep import errors explicit and controlled.
29
+ # ---------------------------------------------------------------------------
30
+ try:
31
+ import torch
32
+ import torch.nn as nn
33
+ _TORCH_OK = True
34
+ except ImportError:
35
+ _TORCH_OK = False
36
+
37
+ # ── Model constants (must match checkpoint exactly) ─────────────────────────
38
+ D_MODEL = 128
39
+ N_LAYERS = 3
40
+ N_HEADS = 4
41
+ DROPOUT = 0.0
42
+
43
+ PAD = 0
44
+ BOS = 1
45
+ EOS = 2
46
+ SEP = 3
47
+ HAS_YES = 4
48
+ HAS_NO = 5
49
+ ENT_OFFSET = 6
50
+ N_ENTITIES = 10 # hard model cap
51
+ VOCAB = ENT_OFFSET + N_ENTITIES # 16
52
+
53
+ MAX_SEQ = 200
54
+ MAX_GEN = 150
55
+
56
+ # Exported so core.py can reference them without importing torch directly
57
+ __all__ = [
58
+ "ThinExpert", "Block",
59
+ "free_run", "parse_output",
60
+ "TORCH_AVAILABLE",
61
+ "D_MODEL", "N_LAYERS", "N_HEADS",
62
+ "PAD", "BOS", "EOS", "SEP", "HAS_YES", "HAS_NO",
63
+ "ENT_OFFSET", "N_ENTITIES", "VOCAB",
64
+ "MAX_SEQ", "MAX_GEN",
65
+ ]
66
+
67
+ TORCH_AVAILABLE: bool = _TORCH_OK
68
+
69
+
70
+ if _TORCH_OK:
71
+ class Block(nn.Module):
72
+ """Single causal self-attention + FFN block."""
73
+
74
+ def __init__(self) -> None:
75
+ super().__init__()
76
+ self.ln1 = nn.LayerNorm(D_MODEL)
77
+ self.ln2 = nn.LayerNorm(D_MODEL)
78
+ self.attn = nn.MultiheadAttention(
79
+ D_MODEL, N_HEADS, dropout=DROPOUT, batch_first=True
80
+ )
81
+ self.ff = nn.Sequential(
82
+ nn.Linear(D_MODEL, D_MODEL * 4),
83
+ nn.GELU(),
84
+ nn.Linear(D_MODEL * 4, D_MODEL),
85
+ )
86
+
87
+ def forward(self, x: "torch.Tensor") -> "torch.Tensor":
88
+ T = x.size(1)
89
+ mask = torch.triu(torch.full((T, T), float("-inf"), device=x.device), 1)
90
+ h = self.ln1(x)
91
+ h, _ = self.attn(h, h, h, attn_mask=mask, is_causal=False)
92
+ x = x + h
93
+ return x + self.ff(self.ln2(x))
94
+
95
+ class ThinExpert(nn.Module):
96
+ """
97
+ Causal transformer for transitive-closure inference.
98
+
99
+ Input token sequence:
100
+ [BOS] <edge_pairs sorted> [SEP]
101
+ where each edge (a, b) is encoded as two tokens:
102
+ ENT_OFFSET+a ENT_OFFSET+b
103
+
104
+ Output greedy-decoded sequence:
105
+ HAS_YES <derived_pair_tokens...> -- if derived pairs exist
106
+ HAS_NO -- if no derived pairs
107
+
108
+ Instantiate then call load_state_dict with the bundled checkpoint.
109
+ """
110
+
111
+ def __init__(self) -> None:
112
+ super().__init__()
113
+ self.tok = nn.Embedding(VOCAB, D_MODEL, padding_idx=PAD)
114
+ self.pos = nn.Embedding(MAX_SEQ, D_MODEL)
115
+ self.blocks = nn.Sequential(*[Block() for _ in range(N_LAYERS)])
116
+ self.ln = nn.LayerNorm(D_MODEL)
117
+ self.head = nn.Linear(D_MODEL, VOCAB, bias=False)
118
+
119
+ def forward(self, ids: "torch.Tensor") -> "torch.Tensor":
120
+ """ids: (1, T) int64 -> logits: (1, T, VOCAB)"""
121
+ T = ids.size(1)
122
+ x = self.tok(ids) + self.pos(torch.arange(T, device=ids.device))
123
+ x = self.blocks(x)
124
+ x = self.ln(x)
125
+ return self.head(x)
126
+
127
+ @torch.no_grad()
128
+ def free_run(model: ThinExpert, inp_ids: List[int], device: str = "cpu") -> List[int]:
129
+ """
130
+ Greedy autoregressive decode.
131
+
132
+ Args:
133
+ model: loaded ThinExpert in eval mode
134
+ inp_ids: integer token list [BOS, ...edge tokens..., SEP]
135
+ device: 'cpu' (always — GPU optional, not required)
136
+
137
+ Returns:
138
+ List of generated token ids (excluding EOS / stopping token).
139
+ """
140
+ ids = torch.tensor(inp_ids, dtype=torch.long, device=device).unsqueeze(0)
141
+ gen: List[int] = []
142
+ for _ in range(MAX_GEN):
143
+ logits = model(ids)[0, -1]
144
+ nxt = logits.argmax().item()
145
+ if nxt == EOS or ids.size(1) >= MAX_SEQ - 1:
146
+ break
147
+ gen.append(nxt)
148
+ ids = torch.cat([ids, torch.tensor([[nxt]], device=device)], 1)
149
+ return gen
150
+
151
+ def parse_output(gen: List[int]):
152
+ """
153
+ Parse greedy-decoded token list.
154
+
155
+ Returns:
156
+ (has_derived: bool, derived_pairs: set of (int, int))
157
+ Pairs are integer entity indices (0-based, not token ids).
158
+ """
159
+ if not gen:
160
+ return False, set()
161
+ detected = gen[0] == HAS_YES
162
+ pairs: set = set()
163
+ if detected:
164
+ i = 1
165
+ while i + 1 < len(gen):
166
+ a = gen[i] - ENT_OFFSET
167
+ b = gen[i + 1] - ENT_OFFSET
168
+ if 0 <= a < N_ENTITIES and 0 <= b < N_ENTITIES:
169
+ pairs.add((a, b))
170
+ i += 2
171
+ return detected, pairs
172
+
173
+ else:
174
+ # Stub classes so `from derive_expert.model import ThinExpert` never raises
175
+ # ImportError. core.py checks TORCH_AVAILABLE and raises a clear error.
176
+ class Block: # type: ignore[no-redef]
177
+ pass
178
+
179
+ class ThinExpert: # type: ignore[no-redef]
180
+ pass
181
+
182
+ def free_run(*args, **kwargs): # type: ignore[misc]
183
+ raise RuntimeError("torch not installed; NN path unavailable")
184
+
185
+ def parse_output(*args, **kwargs): # type: ignore[misc]
186
+ raise RuntimeError("torch not installed; NN path unavailable")