pre-reasoning 2.5.2__tar.gz → 2.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/MANIFEST.in +4 -2
- {pre_reasoning-2.5.2/pre_reasoning.egg-info → pre_reasoning-2.5.4}/PKG-INFO +21 -11
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/README.md +33 -23
- pre_reasoning-2.5.4/derive_expert/__init__.py +36 -0
- pre_reasoning-2.5.4/derive_expert/core.py +276 -0
- pre_reasoning-2.5.4/derive_expert/model.py +186 -0
- pre_reasoning-2.5.4/derive_expert/weights/thin_expert_d128L3.safetensors +0 -0
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/__init__.py +19 -15
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/cli.py +3 -3
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/pre_reasoning_v2_5.py +1 -6
- pre_reasoning-2.5.4/pre_reasoning/pre_reasoning_v2_5_2.py +701 -0
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4/pre_reasoning.egg-info}/PKG-INFO +21 -11
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning.egg-info/SOURCES.txt +5 -0
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning.egg-info/top_level.txt +1 -0
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pyproject.toml +4 -3
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/LICENSE +0 -0
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/checkpoints/__init__.py +0 -0
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors +0 -0
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/heuristic.py +0 -0
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning/inference.py +0 -0
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning.egg-info/dependency_links.txt +0 -0
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning.egg-info/entry_points.txt +0 -0
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/pre_reasoning.egg-info/requires.txt +0 -0
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/setup.cfg +0 -0
- {pre_reasoning-2.5.2 → pre_reasoning-2.5.4}/tests/test_engine.py +0 -0
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
include pre_reasoning/checkpoints/*.safetensors
|
|
2
|
-
|
|
1
|
+
include pre_reasoning/checkpoints/*.safetensors
|
|
2
|
+
include derive_expert/weights/*.safetensors
|
|
3
|
+
exclude checkpoints/*.pt
|
|
4
|
+
exclude derive_expert/weights/*.pt
|
|
3
5
|
prune __pycache__
|
|
4
6
|
global-exclude *.py[cod]
|
|
5
7
|
global-exclude .DS_Store
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pre-reasoning
|
|
3
|
-
Version: 2.5.
|
|
3
|
+
Version: 2.5.4
|
|
4
4
|
Summary: 3M-parameter neural pre-reasoning engine for grounding LLMs before they answer.
|
|
5
5
|
Author: Luis Lozano, Dr. Shannon (Mia Labs AI co-researcher), Mia Labs
|
|
6
6
|
License-Expression: MIT
|
|
@@ -25,9 +25,9 @@ Dynamic: license-file
|
|
|
25
25
|
|
|
26
26
|
# Pre-Reasoning
|
|
27
27
|
|
|
28
|
-
Pre-Reasoning is a Mia Labs structural analysis engine that grounds an LLM before it answers. It uses a 3M-parameter neural model
|
|
28
|
+
Pre-Reasoning is a Mia Labs structural analysis engine that grounds an LLM before it answers. It uses a 3M-parameter neural perception model plus a bundled tiny derive expert to surface dependencies, derived assumptions, root blockers, unlock order, parallel work, cycles, and conflicts from problem text.
|
|
29
29
|
|
|
30
|
-
The engine ships with bundled
|
|
30
|
+
The engine ships with bundled weights and declares its torch dependency -- install and run, no model download needed.
|
|
31
31
|
|
|
32
32
|
## What It Does
|
|
33
33
|
|
|
@@ -39,6 +39,8 @@ Given natural-language problem text, the engine returns:
|
|
|
39
39
|
- CYCLES: circular dependencies that cannot be solved sequentially
|
|
40
40
|
- CONFLICTS: competing positions or incompatible entities
|
|
41
41
|
- REQUIREMENTS: numeric or threshold requirements
|
|
42
|
+
- DERIVED ASSUMPTIONS: transitive dependency assumptions inferred by the tiny derive expert
|
|
43
|
+
- BLOCK OUTPUT: direct and derived structural blocks used by the graph reasoner
|
|
42
44
|
|
|
43
45
|
## Install
|
|
44
46
|
|
|
@@ -59,6 +61,7 @@ from pre_reasoning import analyze, pulse
|
|
|
59
61
|
|
|
60
62
|
result = analyze("Frontend depends on API. API depends on Auth.")
|
|
61
63
|
print(result["trace"])
|
|
64
|
+
print(result["derived_assumptions"])
|
|
62
65
|
|
|
63
66
|
check = pulse(
|
|
64
67
|
"Frontend depends on API. API depends on Auth.",
|
|
@@ -93,9 +96,11 @@ These are product-research notes, not benchmark claims.
|
|
|
93
96
|
|
|
94
97
|
```text
|
|
95
98
|
User text
|
|
96
|
-
->
|
|
99
|
+
-> neural perception (3M params, safetensors)
|
|
97
100
|
-> neural findings converted to structural blocks
|
|
98
|
-
->
|
|
101
|
+
-> tiny derive expert infers transitive assumptions
|
|
102
|
+
-> derived assumptions appended as dependency blocks
|
|
103
|
+
-> graph reasoning
|
|
99
104
|
-> structural trace
|
|
100
105
|
```
|
|
101
106
|
|
|
@@ -104,19 +109,24 @@ User text
|
|
|
104
109
|
| Path | Purpose |
|
|
105
110
|
|---|---|
|
|
106
111
|
| `pre_reasoning/` | Installable Python package and CLI entry point |
|
|
107
|
-
| `pre_reasoning/inference.py` | 3M-parameter
|
|
108
|
-
| `pre_reasoning/heuristic.py` |
|
|
109
|
-
| `pre_reasoning/
|
|
110
|
-
| `pre_reasoning/
|
|
112
|
+
| `pre_reasoning/inference.py` | 3M-parameter neural perception layer |
|
|
113
|
+
| `pre_reasoning/heuristic.py` | Graph-reasoning core |
|
|
114
|
+
| `pre_reasoning/pre_reasoning_v2_5_2.py` | Default v2.5.4 engine: neural perception + derive expert + graph reasoning |
|
|
115
|
+
| `pre_reasoning/pre_reasoning_v2_5.py` | Legacy v2.5 engine: neural perception + graph reasoning |
|
|
116
|
+
| `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors` | Bundled model weights (11MB) |
|
|
117
|
+
| `derive_expert/` | Tiny derive expert used for transitive-closure enrichment |
|
|
118
|
+
| `derive_expert/weights/thin_expert_d128L3.safetensors` | Bundled derive expert weights (2.5MB) |
|
|
111
119
|
| `examples/` | Runnable usage examples |
|
|
112
120
|
| `tests/` | Pytest suite |
|
|
113
121
|
| `skill/SKILL.md` | Agent skill descriptor for model adoption |
|
|
114
|
-
| `
|
|
122
|
+
| `hooks/` | Claude Code before/after hooks for enforced pre-reasoning |
|
|
123
|
+
| `INSTALL.md` | Manual install and hook setup guide |
|
|
124
|
+
| `CLAUDE.md` | Claude Code adoption and grounding-hook guide |
|
|
115
125
|
| `WHY_TRACES_WORK.md` | Literature connection, 9 cited papers |
|
|
116
126
|
|
|
117
127
|
## Weights Policy
|
|
118
128
|
|
|
119
|
-
The raw training checkpoint is not part of the release. The package bundles `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors
|
|
129
|
+
The raw training checkpoint is not part of the release. The package bundles `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors` for neural perception and `derive_expert/weights/thin_expert_d128L3.safetensors` for the tiny derive expert. These are inference artifacts. They ship no optimizer state, LR schedules, step counters, RNG state, training config, or raw checkpoint provenance.
|
|
120
130
|
|
|
121
131
|
## License
|
|
122
132
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# Pre-Reasoning
|
|
2
2
|
|
|
3
|
-
Pre-Reasoning is a Mia Labs structural analysis engine that grounds an LLM before it answers. It uses a 3M-parameter neural model
|
|
4
|
-
|
|
5
|
-
The engine ships with bundled
|
|
3
|
+
Pre-Reasoning is a Mia Labs structural analysis engine that grounds an LLM before it answers. It uses a 3M-parameter neural perception model plus a bundled tiny derive expert to surface dependencies, derived assumptions, root blockers, unlock order, parallel work, cycles, and conflicts from problem text.
|
|
4
|
+
|
|
5
|
+
The engine ships with bundled weights and declares its torch dependency -- install and run, no model download needed.
|
|
6
6
|
|
|
7
7
|
## What It Does
|
|
8
8
|
|
|
@@ -12,8 +12,10 @@ Given natural-language problem text, the engine returns:
|
|
|
12
12
|
- UNLOCK SEQUENCE: a dependency-aware resolution order
|
|
13
13
|
- PARALLEL WORK: independent items that can proceed now
|
|
14
14
|
- CYCLES: circular dependencies that cannot be solved sequentially
|
|
15
|
-
- CONFLICTS: competing positions or incompatible entities
|
|
16
|
-
- REQUIREMENTS: numeric or threshold requirements
|
|
15
|
+
- CONFLICTS: competing positions or incompatible entities
|
|
16
|
+
- REQUIREMENTS: numeric or threshold requirements
|
|
17
|
+
- DERIVED ASSUMPTIONS: transitive dependency assumptions inferred by the tiny derive expert
|
|
18
|
+
- BLOCK OUTPUT: direct and derived structural blocks used by the graph reasoner
|
|
17
19
|
|
|
18
20
|
## Install
|
|
19
21
|
|
|
@@ -32,8 +34,9 @@ pip install -e .
|
|
|
32
34
|
```python
|
|
33
35
|
from pre_reasoning import analyze, pulse
|
|
34
36
|
|
|
35
|
-
result = analyze("Frontend depends on API. API depends on Auth.")
|
|
36
|
-
print(result["trace"])
|
|
37
|
+
result = analyze("Frontend depends on API. API depends on Auth.")
|
|
38
|
+
print(result["trace"])
|
|
39
|
+
print(result["derived_assumptions"])
|
|
37
40
|
|
|
38
41
|
check = pulse(
|
|
39
42
|
"Frontend depends on API. API depends on Auth.",
|
|
@@ -67,31 +70,38 @@ These are product-research notes, not benchmark claims.
|
|
|
67
70
|
## Architecture
|
|
68
71
|
|
|
69
72
|
```text
|
|
70
|
-
User text
|
|
71
|
-
->
|
|
72
|
-
-> neural findings converted to structural blocks
|
|
73
|
-
->
|
|
74
|
-
->
|
|
75
|
-
|
|
73
|
+
User text
|
|
74
|
+
-> neural perception (3M params, safetensors)
|
|
75
|
+
-> neural findings converted to structural blocks
|
|
76
|
+
-> tiny derive expert infers transitive assumptions
|
|
77
|
+
-> derived assumptions appended as dependency blocks
|
|
78
|
+
-> graph reasoning
|
|
79
|
+
-> structural trace
|
|
80
|
+
```
|
|
76
81
|
|
|
77
82
|
## File Map
|
|
78
83
|
|
|
79
84
|
| Path | Purpose |
|
|
80
85
|
|---|---|
|
|
81
86
|
| `pre_reasoning/` | Installable Python package and CLI entry point |
|
|
82
|
-
| `pre_reasoning/inference.py` | 3M-parameter
|
|
83
|
-
| `pre_reasoning/heuristic.py` |
|
|
84
|
-
| `pre_reasoning/
|
|
85
|
-
| `pre_reasoning/
|
|
86
|
-
| `
|
|
87
|
-
| `
|
|
88
|
-
| `
|
|
89
|
-
| `
|
|
90
|
-
| `
|
|
87
|
+
| `pre_reasoning/inference.py` | 3M-parameter neural perception layer |
|
|
88
|
+
| `pre_reasoning/heuristic.py` | Graph-reasoning core |
|
|
89
|
+
| `pre_reasoning/pre_reasoning_v2_5_2.py` | Default v2.5.4 engine: neural perception + derive expert + graph reasoning |
|
|
90
|
+
| `pre_reasoning/pre_reasoning_v2_5.py` | Legacy v2.5 engine: neural perception + graph reasoning |
|
|
91
|
+
| `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors` | Bundled model weights (11MB) |
|
|
92
|
+
| `derive_expert/` | Tiny derive expert used for transitive-closure enrichment |
|
|
93
|
+
| `derive_expert/weights/thin_expert_d128L3.safetensors` | Bundled derive expert weights (2.5MB) |
|
|
94
|
+
| `examples/` | Runnable usage examples |
|
|
95
|
+
| `tests/` | Pytest suite |
|
|
96
|
+
| `skill/SKILL.md` | Agent skill descriptor for model adoption |
|
|
97
|
+
| `hooks/` | Claude Code before/after hooks for enforced pre-reasoning |
|
|
98
|
+
| `INSTALL.md` | Manual install and hook setup guide |
|
|
99
|
+
| `CLAUDE.md` | Claude Code adoption and grounding-hook guide |
|
|
100
|
+
| `WHY_TRACES_WORK.md` | Literature connection, 9 cited papers |
|
|
91
101
|
|
|
92
102
|
## Weights Policy
|
|
93
103
|
|
|
94
|
-
The raw training checkpoint is not part of the release. The package bundles `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors
|
|
104
|
+
The raw training checkpoint is not part of the release. The package bundles `pre_reasoning/checkpoints/pre-reasoning-3m-v2.5.safetensors` for neural perception and `derive_expert/weights/thin_expert_d128L3.safetensors` for the tiny derive expert. These are inference artifacts. They ship no optimizer state, LR schedules, step counters, RNG state, training config, or raw checkpoint provenance.
|
|
95
105
|
|
|
96
106
|
## License
|
|
97
107
|
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""
|
|
2
|
+
derive-expert
|
|
3
|
+
=============
|
|
4
|
+
|
|
5
|
+
Transitive-closure / assumption-derivation with the bundled ThinExpert NN.
|
|
6
|
+
|
|
7
|
+
Quick start::
|
|
8
|
+
|
|
9
|
+
from derive_expert import derive_assumptions, derive_report, strategy_available
|
|
10
|
+
|
|
11
|
+
print(strategy_available())
|
|
12
|
+
# {'nn': True, 'bfs': False, 'error': None}
|
|
13
|
+
|
|
14
|
+
pairs = derive_assumptions([("auth", "session"), ("session", "dashboard")])
|
|
15
|
+
# [('auth', 'dashboard')]
|
|
16
|
+
|
|
17
|
+
report = derive_report([("A", "B"), ("B", "C"), ("C", "D")])
|
|
18
|
+
# {'derived': [('A','C'),('A','D'),('B','D')], 'strategy': 'nn', ...}
|
|
19
|
+
|
|
20
|
+
See derive_expert.core for full documentation.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from derive_expert.core import (
|
|
24
|
+
derive_assumptions,
|
|
25
|
+
derive_report,
|
|
26
|
+
strategy_available,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"derive_assumptions",
|
|
31
|
+
"derive_report",
|
|
32
|
+
"strategy_available",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
__version__ = "0.1.0"
|
|
36
|
+
__author__ = "Mia Labs / Dr. Shannon"
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""
|
|
2
|
+
derive_expert.core
|
|
3
|
+
==================
|
|
4
|
+
|
|
5
|
+
Public API for the derive-expert package.
|
|
6
|
+
|
|
7
|
+
NN STRATEGY
|
|
8
|
+
-----------
|
|
9
|
+
The package uses ThinExpert, a 625K-param causal transformer, 6/6 on its
|
|
10
|
+
benchmark bar set. Torch and bundled safetensors weights are required. There is no
|
|
11
|
+
availability fallback: if the NN cannot load or the graph exceeds the model's
|
|
12
|
+
10-entity window, public derive calls fail visibly.
|
|
13
|
+
|
|
14
|
+
The surrounding pre-reasoning engine feeds the expert small 2-hop windows and
|
|
15
|
+
uses exact graph traversal only as a verifier/guard for those windows, not as a
|
|
16
|
+
replacement for a missing expert.
|
|
17
|
+
|
|
18
|
+
Public surface
|
|
19
|
+
--------------
|
|
20
|
+
derive_assumptions(edges) -> list[tuple[str, str]]
|
|
21
|
+
derive_report(edges) -> dict
|
|
22
|
+
strategy_available() -> dict (probe without running inference)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import logging
|
|
28
|
+
import os
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import Dict, List, Set, Tuple
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
# Locate bundled weights
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
_WEIGHTS_PATH = Path(__file__).resolve().parent / "weights" / "thin_expert_d128L3.safetensors"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
# Lazy NN model singleton
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
_nn_model = None # ThinExpert instance or None
|
|
44
|
+
_nn_load_attempted = False
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _try_load_nn():
|
|
48
|
+
"""
|
|
49
|
+
Load ThinExpert once. Torch and bundled safetensors weights are required;
|
|
50
|
+
failures are raised instead of silently changing strategy.
|
|
51
|
+
"""
|
|
52
|
+
global _nn_model, _nn_load_attempted
|
|
53
|
+
if _nn_load_attempted:
|
|
54
|
+
if _nn_model is None:
|
|
55
|
+
raise RuntimeError("derive-expert: NN load was attempted but no model is available")
|
|
56
|
+
return
|
|
57
|
+
_nn_load_attempted = True
|
|
58
|
+
|
|
59
|
+
from derive_expert.model import TORCH_AVAILABLE, ThinExpert
|
|
60
|
+
if not TORCH_AVAILABLE:
|
|
61
|
+
raise RuntimeError("derive-expert: torch is required for ThinExpert")
|
|
62
|
+
|
|
63
|
+
if not _WEIGHTS_PATH.exists():
|
|
64
|
+
raise FileNotFoundError(f"derive-expert: weights not found at {_WEIGHTS_PATH}")
|
|
65
|
+
|
|
66
|
+
from safetensors.torch import load_file
|
|
67
|
+
m = ThinExpert()
|
|
68
|
+
sd = load_file(str(_WEIGHTS_PATH), device="cpu")
|
|
69
|
+
m.load_state_dict(sd)
|
|
70
|
+
m.eval()
|
|
71
|
+
_nn_model = m
|
|
72
|
+
param_count = sum(p.numel() for p in m.parameters())
|
|
73
|
+
logger.info(
|
|
74
|
+
"derive-expert: ThinExpert loaded (%s params) from %s",
|
|
75
|
+
f"{param_count:,}",
|
|
76
|
+
_WEIGHTS_PATH,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# ---------------------------------------------------------------------------
|
|
81
|
+
# NN path (uses loaded ThinExpert)
|
|
82
|
+
# ---------------------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
def _nn_derive(
|
|
85
|
+
int_edges: List[Tuple[int, int]],
|
|
86
|
+
n_entities: int,
|
|
87
|
+
direct_set: Set[Tuple[int, int]],
|
|
88
|
+
id_to_name: Dict[int, str],
|
|
89
|
+
) -> Tuple[List[Tuple[str, str]], str]:
|
|
90
|
+
"""
|
|
91
|
+
Run NN inference and return (derived_pairs_str, strategy_label).
|
|
92
|
+
"""
|
|
93
|
+
from derive_expert.model import (
|
|
94
|
+
BOS, ENT_OFFSET, SEP,
|
|
95
|
+
free_run, parse_output,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
edge_toks = [
|
|
99
|
+
tok
|
|
100
|
+
for a, b in sorted(int_edges)
|
|
101
|
+
for tok in (ENT_OFFSET + a, ENT_OFFSET + b)
|
|
102
|
+
]
|
|
103
|
+
inp = [BOS] + edge_toks + [SEP]
|
|
104
|
+
|
|
105
|
+
gen = free_run(_nn_model, inp, device="cpu")
|
|
106
|
+
has_derived, derived_int_pairs = parse_output(gen)
|
|
107
|
+
|
|
108
|
+
if not has_derived:
|
|
109
|
+
return [], "nn"
|
|
110
|
+
|
|
111
|
+
result: List[Tuple[str, str]] = []
|
|
112
|
+
for a, b in sorted(derived_int_pairs):
|
|
113
|
+
if (a, b) not in direct_set and a in id_to_name and b in id_to_name:
|
|
114
|
+
result.append((id_to_name[a], id_to_name[b]))
|
|
115
|
+
|
|
116
|
+
return result, "nn"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# ---------------------------------------------------------------------------
|
|
120
|
+
# Shared edge-parsing helper
|
|
121
|
+
# ---------------------------------------------------------------------------
|
|
122
|
+
|
|
123
|
+
def _parse_edges(
|
|
124
|
+
edges: List[Tuple[str, str]],
|
|
125
|
+
) -> Tuple[
|
|
126
|
+
List[Tuple[int, int]],
|
|
127
|
+
int,
|
|
128
|
+
Set[Tuple[int, int]],
|
|
129
|
+
Dict[int, str],
|
|
130
|
+
]:
|
|
131
|
+
"""
|
|
132
|
+
Map string entity names to contiguous integer ids.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
int_edges, n_entities, direct_set, id_to_name
|
|
136
|
+
"""
|
|
137
|
+
seen: Dict[str, int] = {}
|
|
138
|
+
ordered: List[str] = []
|
|
139
|
+
for src, tgt in edges:
|
|
140
|
+
for name in (src, tgt):
|
|
141
|
+
if name not in seen:
|
|
142
|
+
seen[name] = len(seen)
|
|
143
|
+
ordered.append(name)
|
|
144
|
+
|
|
145
|
+
n_entities = len(seen)
|
|
146
|
+
id_to_name = {i: name for i, name in enumerate(ordered)}
|
|
147
|
+
|
|
148
|
+
int_edges: List[Tuple[int, int]] = []
|
|
149
|
+
direct_set: Set[Tuple[int, int]] = set()
|
|
150
|
+
for src, tgt in edges:
|
|
151
|
+
a, b = seen[src], seen[tgt]
|
|
152
|
+
int_edges.append((a, b))
|
|
153
|
+
direct_set.add((a, b))
|
|
154
|
+
|
|
155
|
+
return int_edges, n_entities, direct_set, id_to_name
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# ---------------------------------------------------------------------------
|
|
159
|
+
# Public API
|
|
160
|
+
# ---------------------------------------------------------------------------
|
|
161
|
+
|
|
162
|
+
def derive_assumptions(
|
|
163
|
+
edges: List[Tuple[str, str]],
|
|
164
|
+
) -> List[Tuple[str, str]]:
|
|
165
|
+
"""
|
|
166
|
+
Given directed dependency edges as (src_name, tgt_name) string tuples,
|
|
167
|
+
return the transitively implied (a, c) pairs reachable in >=2 hops that
|
|
168
|
+
are NOT already direct edges.
|
|
169
|
+
|
|
170
|
+
NN STRATEGY:
|
|
171
|
+
- ThinExpert (learned, ~625K params) is required.
|
|
172
|
+
- The graph must fit the model's 10-entity window.
|
|
173
|
+
- No availability fallback is used.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
edges: e.g. [("auth", "session"), ("session", "dashboard")]
|
|
177
|
+
Order within the list does not matter.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Sorted list of (src_name, tgt_name) derived pairs.
|
|
181
|
+
Returns [] for an empty graph or a graph with no multi-hop paths.
|
|
182
|
+
|
|
183
|
+
Examples:
|
|
184
|
+
>>> derive_assumptions([("A","B"), ("B","C")])
|
|
185
|
+
[('A', 'C')]
|
|
186
|
+
>>> derive_assumptions([])
|
|
187
|
+
[]
|
|
188
|
+
>>> derive_assumptions([("A","B"), ("A","C"), ("B","D"), ("C","D")])
|
|
189
|
+
[('A', 'D')]
|
|
190
|
+
"""
|
|
191
|
+
if not edges:
|
|
192
|
+
return []
|
|
193
|
+
|
|
194
|
+
int_edges, n_entities, direct_set, id_to_name = _parse_edges(edges)
|
|
195
|
+
|
|
196
|
+
from derive_expert.model import N_ENTITIES as MODEL_CAP
|
|
197
|
+
|
|
198
|
+
if n_entities > MODEL_CAP:
|
|
199
|
+
raise ValueError(
|
|
200
|
+
f"derive-expert: {n_entities} entities exceeds ThinExpert cap of {MODEL_CAP}"
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
_try_load_nn()
|
|
204
|
+
result, _ = _nn_derive(int_edges, n_entities, direct_set, id_to_name)
|
|
205
|
+
|
|
206
|
+
return result
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def derive_report(
|
|
210
|
+
edges: List[Tuple[str, str]],
|
|
211
|
+
) -> Dict:
|
|
212
|
+
"""
|
|
213
|
+
Like derive_assumptions(), but returns a richer dict so the caller can
|
|
214
|
+
inspect which strategy was used and basic stats.
|
|
215
|
+
|
|
216
|
+
Returns a dict with keys:
|
|
217
|
+
derived : list[tuple[str, str]] — the derived pairs
|
|
218
|
+
strategy : str — "nn" | "none"
|
|
219
|
+
n_entities: int — number of distinct named entities
|
|
220
|
+
n_direct : int — number of direct edges supplied
|
|
221
|
+
n_derived : int — len(derived)
|
|
222
|
+
|
|
223
|
+
Examples:
|
|
224
|
+
>>> derive_report([("A","B"), ("B","C")])
|
|
225
|
+
{'derived': [('A', 'C')], 'strategy': 'nn', 'n_entities': 3, 'n_direct': 2, 'n_derived': 1}
|
|
226
|
+
"""
|
|
227
|
+
if not edges:
|
|
228
|
+
return {
|
|
229
|
+
"derived": [],
|
|
230
|
+
"strategy": "none",
|
|
231
|
+
"n_entities": 0,
|
|
232
|
+
"n_direct": 0,
|
|
233
|
+
"n_derived": 0,
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
int_edges, n_entities, direct_set, id_to_name = _parse_edges(edges)
|
|
237
|
+
|
|
238
|
+
from derive_expert.model import N_ENTITIES as MODEL_CAP
|
|
239
|
+
|
|
240
|
+
if n_entities > MODEL_CAP:
|
|
241
|
+
raise ValueError(
|
|
242
|
+
f"derive-expert: {n_entities} entities exceeds ThinExpert cap of {MODEL_CAP}"
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
_try_load_nn()
|
|
246
|
+
derived, strategy = _nn_derive(int_edges, n_entities, direct_set, id_to_name)
|
|
247
|
+
|
|
248
|
+
return {
|
|
249
|
+
"derived": derived,
|
|
250
|
+
"strategy": strategy,
|
|
251
|
+
"n_entities": n_entities,
|
|
252
|
+
"n_direct": len(int_edges),
|
|
253
|
+
"n_derived": len(derived),
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def strategy_available() -> Dict[str, bool]:
|
|
258
|
+
"""
|
|
259
|
+
Probe whether ThinExpert can load.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
{
|
|
263
|
+
"nn": True/False — torch installed + weights found
|
|
264
|
+
"bfs": False — no availability fallback
|
|
265
|
+
}
|
|
266
|
+
"""
|
|
267
|
+
error = None
|
|
268
|
+
try:
|
|
269
|
+
_try_load_nn()
|
|
270
|
+
except Exception as exc:
|
|
271
|
+
error = str(exc)
|
|
272
|
+
return {
|
|
273
|
+
"nn": _nn_model is not None,
|
|
274
|
+
"bfs": False,
|
|
275
|
+
"error": error,
|
|
276
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""
|
|
2
|
+
derive_expert.model
|
|
3
|
+
===================
|
|
4
|
+
|
|
5
|
+
ThinExpert — a tiny causal transformer (d=128, L=3, H=4, ~625K params) trained
|
|
6
|
+
to solve the transitive-closure / assumption-derivation task over up to 10 named
|
|
7
|
+
entities.
|
|
8
|
+
|
|
9
|
+
Architecture is fixed to match the WINNER checkpoint
|
|
10
|
+
(thin_expert_d128L3.safetensors).
|
|
11
|
+
Do NOT change any constant here without retraining.
|
|
12
|
+
|
|
13
|
+
This module is intentionally free of public API surface — use derive_expert.core.
|
|
14
|
+
|
|
15
|
+
Constants:
|
|
16
|
+
D_MODEL=128, N_LAYERS=3, N_HEADS=4
|
|
17
|
+
VOCAB=16 (PAD=0 BOS=1 EOS=2 SEP=3 HAS_YES=4 HAS_NO=5 ENT_OFFSET=6)
|
|
18
|
+
N_ENTITIES=10 (model hard limit)
|
|
19
|
+
MAX_SEQ=200, MAX_GEN=150
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
from typing import List
|
|
25
|
+
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
# Attempt torch import. core.py requires this to be available for public
|
|
28
|
+
# derive calls; stubs below only keep import errors explicit and controlled.
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
try:
|
|
31
|
+
import torch
|
|
32
|
+
import torch.nn as nn
|
|
33
|
+
_TORCH_OK = True
|
|
34
|
+
except ImportError:
|
|
35
|
+
_TORCH_OK = False
|
|
36
|
+
|
|
37
|
+
# ── Model constants (must match checkpoint exactly) ─────────────────────────
|
|
38
|
+
D_MODEL = 128
|
|
39
|
+
N_LAYERS = 3
|
|
40
|
+
N_HEADS = 4
|
|
41
|
+
DROPOUT = 0.0
|
|
42
|
+
|
|
43
|
+
PAD = 0
|
|
44
|
+
BOS = 1
|
|
45
|
+
EOS = 2
|
|
46
|
+
SEP = 3
|
|
47
|
+
HAS_YES = 4
|
|
48
|
+
HAS_NO = 5
|
|
49
|
+
ENT_OFFSET = 6
|
|
50
|
+
N_ENTITIES = 10 # hard model cap
|
|
51
|
+
VOCAB = ENT_OFFSET + N_ENTITIES # 16
|
|
52
|
+
|
|
53
|
+
MAX_SEQ = 200
|
|
54
|
+
MAX_GEN = 150
|
|
55
|
+
|
|
56
|
+
# Exported so core.py can reference them without importing torch directly
|
|
57
|
+
__all__ = [
|
|
58
|
+
"ThinExpert", "Block",
|
|
59
|
+
"free_run", "parse_output",
|
|
60
|
+
"TORCH_AVAILABLE",
|
|
61
|
+
"D_MODEL", "N_LAYERS", "N_HEADS",
|
|
62
|
+
"PAD", "BOS", "EOS", "SEP", "HAS_YES", "HAS_NO",
|
|
63
|
+
"ENT_OFFSET", "N_ENTITIES", "VOCAB",
|
|
64
|
+
"MAX_SEQ", "MAX_GEN",
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
TORCH_AVAILABLE: bool = _TORCH_OK
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
if _TORCH_OK:
|
|
71
|
+
class Block(nn.Module):
|
|
72
|
+
"""Single causal self-attention + FFN block."""
|
|
73
|
+
|
|
74
|
+
def __init__(self) -> None:
|
|
75
|
+
super().__init__()
|
|
76
|
+
self.ln1 = nn.LayerNorm(D_MODEL)
|
|
77
|
+
self.ln2 = nn.LayerNorm(D_MODEL)
|
|
78
|
+
self.attn = nn.MultiheadAttention(
|
|
79
|
+
D_MODEL, N_HEADS, dropout=DROPOUT, batch_first=True
|
|
80
|
+
)
|
|
81
|
+
self.ff = nn.Sequential(
|
|
82
|
+
nn.Linear(D_MODEL, D_MODEL * 4),
|
|
83
|
+
nn.GELU(),
|
|
84
|
+
nn.Linear(D_MODEL * 4, D_MODEL),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def forward(self, x: "torch.Tensor") -> "torch.Tensor":
|
|
88
|
+
T = x.size(1)
|
|
89
|
+
mask = torch.triu(torch.full((T, T), float("-inf"), device=x.device), 1)
|
|
90
|
+
h = self.ln1(x)
|
|
91
|
+
h, _ = self.attn(h, h, h, attn_mask=mask, is_causal=False)
|
|
92
|
+
x = x + h
|
|
93
|
+
return x + self.ff(self.ln2(x))
|
|
94
|
+
|
|
95
|
+
class ThinExpert(nn.Module):
|
|
96
|
+
"""
|
|
97
|
+
Causal transformer for transitive-closure inference.
|
|
98
|
+
|
|
99
|
+
Input token sequence:
|
|
100
|
+
[BOS] <edge_pairs sorted> [SEP]
|
|
101
|
+
where each edge (a, b) is encoded as two tokens:
|
|
102
|
+
ENT_OFFSET+a ENT_OFFSET+b
|
|
103
|
+
|
|
104
|
+
Output greedy-decoded sequence:
|
|
105
|
+
HAS_YES <derived_pair_tokens...> -- if derived pairs exist
|
|
106
|
+
HAS_NO -- if no derived pairs
|
|
107
|
+
|
|
108
|
+
Instantiate then call load_state_dict with the bundled checkpoint.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
def __init__(self) -> None:
|
|
112
|
+
super().__init__()
|
|
113
|
+
self.tok = nn.Embedding(VOCAB, D_MODEL, padding_idx=PAD)
|
|
114
|
+
self.pos = nn.Embedding(MAX_SEQ, D_MODEL)
|
|
115
|
+
self.blocks = nn.Sequential(*[Block() for _ in range(N_LAYERS)])
|
|
116
|
+
self.ln = nn.LayerNorm(D_MODEL)
|
|
117
|
+
self.head = nn.Linear(D_MODEL, VOCAB, bias=False)
|
|
118
|
+
|
|
119
|
+
def forward(self, ids: "torch.Tensor") -> "torch.Tensor":
|
|
120
|
+
"""ids: (1, T) int64 -> logits: (1, T, VOCAB)"""
|
|
121
|
+
T = ids.size(1)
|
|
122
|
+
x = self.tok(ids) + self.pos(torch.arange(T, device=ids.device))
|
|
123
|
+
x = self.blocks(x)
|
|
124
|
+
x = self.ln(x)
|
|
125
|
+
return self.head(x)
|
|
126
|
+
|
|
127
|
+
@torch.no_grad()
|
|
128
|
+
def free_run(model: ThinExpert, inp_ids: List[int], device: str = "cpu") -> List[int]:
|
|
129
|
+
"""
|
|
130
|
+
Greedy autoregressive decode.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
model: loaded ThinExpert in eval mode
|
|
134
|
+
inp_ids: integer token list [BOS, ...edge tokens..., SEP]
|
|
135
|
+
device: 'cpu' (always — GPU optional, not required)
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
List of generated token ids (excluding EOS / stopping token).
|
|
139
|
+
"""
|
|
140
|
+
ids = torch.tensor(inp_ids, dtype=torch.long, device=device).unsqueeze(0)
|
|
141
|
+
gen: List[int] = []
|
|
142
|
+
for _ in range(MAX_GEN):
|
|
143
|
+
logits = model(ids)[0, -1]
|
|
144
|
+
nxt = logits.argmax().item()
|
|
145
|
+
if nxt == EOS or ids.size(1) >= MAX_SEQ - 1:
|
|
146
|
+
break
|
|
147
|
+
gen.append(nxt)
|
|
148
|
+
ids = torch.cat([ids, torch.tensor([[nxt]], device=device)], 1)
|
|
149
|
+
return gen
|
|
150
|
+
|
|
151
|
+
def parse_output(gen: List[int]):
|
|
152
|
+
"""
|
|
153
|
+
Parse greedy-decoded token list.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
(has_derived: bool, derived_pairs: set of (int, int))
|
|
157
|
+
Pairs are integer entity indices (0-based, not token ids).
|
|
158
|
+
"""
|
|
159
|
+
if not gen:
|
|
160
|
+
return False, set()
|
|
161
|
+
detected = gen[0] == HAS_YES
|
|
162
|
+
pairs: set = set()
|
|
163
|
+
if detected:
|
|
164
|
+
i = 1
|
|
165
|
+
while i + 1 < len(gen):
|
|
166
|
+
a = gen[i] - ENT_OFFSET
|
|
167
|
+
b = gen[i + 1] - ENT_OFFSET
|
|
168
|
+
if 0 <= a < N_ENTITIES and 0 <= b < N_ENTITIES:
|
|
169
|
+
pairs.add((a, b))
|
|
170
|
+
i += 2
|
|
171
|
+
return detected, pairs
|
|
172
|
+
|
|
173
|
+
else:
|
|
174
|
+
# Stub classes so `from derive_expert.model import ThinExpert` never raises
|
|
175
|
+
# ImportError. core.py checks TORCH_AVAILABLE and raises a clear error.
|
|
176
|
+
class Block: # type: ignore[no-redef]
|
|
177
|
+
pass
|
|
178
|
+
|
|
179
|
+
class ThinExpert: # type: ignore[no-redef]
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
def free_run(*args, **kwargs): # type: ignore[misc]
|
|
183
|
+
raise RuntimeError("torch not installed; NN path unavailable")
|
|
184
|
+
|
|
185
|
+
def parse_output(*args, **kwargs): # type: ignore[misc]
|
|
186
|
+
raise RuntimeError("torch not installed; NN path unavailable")
|
|
Binary file
|