phantomreason 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phantomreason-0.1.0/LICENSE +21 -0
- phantomreason-0.1.0/MANIFEST.in +4 -0
- phantomreason-0.1.0/PKG-INFO +283 -0
- phantomreason-0.1.0/README.md +255 -0
- phantomreason-0.1.0/deployment/nginx.conf.example +21 -0
- phantomreason-0.1.0/deployment/phantom-agent.service.example +16 -0
- phantomreason-0.1.0/phantomreason/__init__.py +20 -0
- phantomreason-0.1.0/phantomreason/corpus.py +119 -0
- phantomreason-0.1.0/phantomreason/evaluate.py +89 -0
- phantomreason-0.1.0/phantomreason/model.py +1532 -0
- phantomreason-0.1.0/phantomreason/py.typed +0 -0
- phantomreason-0.1.0/phantomreason/service.py +329 -0
- phantomreason-0.1.0/phantomreason/storage.py +55 -0
- phantomreason-0.1.0/phantomreason/stores.py +161 -0
- phantomreason-0.1.0/phantomreason/traces.py +93 -0
- phantomreason-0.1.0/phantomreason.egg-info/PKG-INFO +283 -0
- phantomreason-0.1.0/phantomreason.egg-info/SOURCES.txt +22 -0
- phantomreason-0.1.0/phantomreason.egg-info/dependency_links.txt +1 -0
- phantomreason-0.1.0/phantomreason.egg-info/entry_points.txt +3 -0
- phantomreason-0.1.0/phantomreason.egg-info/requires.txt +1 -0
- phantomreason-0.1.0/phantomreason.egg-info/top_level.txt +1 -0
- phantomreason-0.1.0/pyproject.toml +61 -0
- phantomreason-0.1.0/seed_lexicon.txt +60 -0
- phantomreason-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 PhantomTrace Agent Maintainers
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: phantomreason
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Symbolic AI reasoning engine built on PhantomTrace absence arithmetic. Trace-based memory, fact management, and semantic retrieval for standalone use or as an augmentation layer over LLMs.
|
|
5
|
+
Author: PhantomTrace Agent Maintainers
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/phantomtrace/phantomreason
|
|
8
|
+
Project-URL: Documentation, https://github.com/phantomtrace/phantomreason#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/phantomtrace/phantomreason
|
|
10
|
+
Project-URL: Issues, https://github.com/phantomtrace/phantomreason/issues
|
|
11
|
+
Keywords: phantomtrace,phantomreason,symbolic-ai,reasoning,memory,agent,absence-arithmetic,trace-vector,fact-store,llm-augmentation
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Intended Audience :: Developers
|
|
20
|
+
Classifier: Intended Audience :: Science/Research
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: absence-calculator>=0.5.0
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
|
|
29
|
+
# PhantomReason
|
|
30
|
+
|
|
31
|
+
A symbolic AI reasoning engine built on [PhantomTrace](https://pypi.org/project/absence-calculator/) absence arithmetic.
|
|
32
|
+
|
|
33
|
+
PhantomReason is not a neural network. It represents knowledge as sparse binary vectors where each dimension is either *present* or *absent* -- the two fundamental states of PhantomTrace arithmetic. Learning happens by toggling these states through the same operations that define the algebra: `combine`, `compare`, `add`, `erase`, and `toggle`.
|
|
34
|
+
|
|
35
|
+
The result is a lightweight symbolic system that can store facts, answer questions, parse sentence structure, and generate constrained text -- all without gradient descent, matrix multiplication, or floating-point weights.
|
|
36
|
+
|
|
37
|
+
## Why This Exists
|
|
38
|
+
|
|
39
|
+
Large language models are powerful but opaque. They hallucinate, forget instructions, and offer no mechanism to inspect *why* they produced a given answer.
|
|
40
|
+
|
|
41
|
+
PhantomReason takes a different approach:
|
|
42
|
+
|
|
43
|
+
- **Every fact has a traceable strength** stored as a PhantomTrace number. You can inspect it, reinforce it with `add`, weaken it with `erase`, and watch it decay over time.
|
|
44
|
+
- **Predictions are distance-based.** The model ranks candidates by how close their trace vectors are to the current context. The scoring is transparent and deterministic.
|
|
45
|
+
- **Contradictions are resolved symbolically.** When a new fact conflicts with an old one, the old fact's strength is erased rather than silently overwritten.
|
|
46
|
+
- **Memory is explicit.** Episodes, facts, and symbols each have their own trace store with named banks. Nothing is hidden in a billion-parameter matrix.
|
|
47
|
+
|
|
48
|
+
This makes PhantomReason suitable as a standalone symbolic reasoner for constrained domains, or as an **augmentation layer** that can sit alongside an LLM to provide grounded fact memory, symbolic confidence tracking, and interpretable retrieval.
|
|
49
|
+
|
|
50
|
+
## Installation
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install phantomreason
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Requires Python 3.11+ and [`absence-calculator`](https://pypi.org/project/absence-calculator/) (installed automatically).
|
|
57
|
+
|
|
58
|
+
## Performance
|
|
59
|
+
|
|
60
|
+
Initial model setup involves computing sparse trace vectors for the vocabulary and training corpus. On a typical machine:
|
|
61
|
+
|
|
62
|
+
| Operation | Time |
|
|
63
|
+
|-----------|------|
|
|
64
|
+
| Import + model init (dim=512) | ~1-2 seconds |
|
|
65
|
+
| Training on a few sentences | ~30-40 seconds |
|
|
66
|
+
| Subsequent startup with persisted state | ~3-5 seconds |
|
|
67
|
+
| Individual predictions | <1 second (warm cache) |
|
|
68
|
+
| `route_prompt` queries | 0.1-4 seconds |
|
|
69
|
+
|
|
70
|
+
The first run is the slowest because trace vectors must be computed for every word in the training data. After the model saves its state, restarts are fast because the vectors are loaded from disk rather than recomputed.
|
|
71
|
+
|
|
72
|
+
## Quick Start
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from phantomreason import PhantomLanguageModel
|
|
76
|
+
|
|
77
|
+
model = PhantomLanguageModel(dim=512, sparsity=47)
|
|
78
|
+
|
|
79
|
+
model.train_on_text(
|
|
80
|
+
"aurora paints dawn softly. chefs simmer herbs slowly. "
|
|
81
|
+
"gardeners water orchids gently.",
|
|
82
|
+
epochs=1,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
model.register_fact("chefs", "simmer", ["herbs", "slowly"])
|
|
86
|
+
model.register_fact("gardeners", "water", ["orchids", "gently"])
|
|
87
|
+
|
|
88
|
+
routed = model.route_prompt("what do chefs simmer?")
|
|
89
|
+
print(routed["fact_answer"]) # "herbs slowly"
|
|
90
|
+
print(routed["sample"]) # "chefs simmer herbs slowly."
|
|
91
|
+
|
|
92
|
+
prediction = model.predict_next(model.tokenize("aurora paints"))
|
|
93
|
+
print(prediction) # "dawn"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**Note on fact recall:** `route_prompt` matches the question's predicate against stored fact predicates. The predicate in the question must match the stored form exactly (e.g., ask "what do chefs simmer?" not "what does chef simmer?"). Verb form normalization covers common auxiliaries (is/are/was/were) but not all inflections.
|
|
97
|
+
|
|
98
|
+
## Core Concepts
|
|
99
|
+
|
|
100
|
+
### Trace Vectors
|
|
101
|
+
|
|
102
|
+
Every word the model knows is represented as a sparse vector of `AbsentNumber` objects. Each slot is either *present* (the word is associated with that dimension) or *absent* (it is not). The vector has a fixed number of present slots controlled by the `sparsity` parameter.
|
|
103
|
+
|
|
104
|
+
### Operations
|
|
105
|
+
|
|
106
|
+
All reasoning uses PhantomTrace operations from the `absence-calculator` library:
|
|
107
|
+
|
|
108
|
+
| Operation | PhantomTrace | Use in Agent |
|
|
109
|
+
|-----------|-------------|--------------|
|
|
110
|
+
| `combine(a, b)` | State overlap | Building context signatures from word vectors |
|
|
111
|
+
| `compare(a, b)` | Directional difference | Measuring distance between vectors |
|
|
112
|
+
| `add(a, b)` | State accumulation | Composing semantic probes, strengthening facts |
|
|
113
|
+
| `erase(a, b)` | State removal with flip | Weakening facts, resolving contradictions |
|
|
114
|
+
| `toggle(x)` | Flip present/absent | Learning updates, vector modification |
|
|
115
|
+
| `n(value)` | Create a present number | Fact strength initialization |
|
|
116
|
+
|
|
117
|
+
### Trace Stores
|
|
118
|
+
|
|
119
|
+
The model maintains four separate trace stores:
|
|
120
|
+
|
|
121
|
+
- **Trace store** -- word vectors with context and topic memory banks
|
|
122
|
+
- **Symbol store** -- intent, action, form, and role classifications
|
|
123
|
+
- **Episode store** -- interaction history for episodic memory retrieval
|
|
124
|
+
- **Fact store** -- subject-predicate-object triples with forward and inverse lookup
|
|
125
|
+
|
|
126
|
+
Each store holds a primary vector and one or more named banks per entry, all subject to the same sparsity constraint.
|
|
127
|
+
|
|
128
|
+
### Fact Lifecycle
|
|
129
|
+
|
|
130
|
+
Facts have a strength value stored as a PhantomTrace `AbsentNumber`:
|
|
131
|
+
|
|
132
|
+
1. A new fact starts with strength `n(1)` -- a present 1.
|
|
133
|
+
2. Teaching the same fact again adds `n(1)` to its strength.
|
|
134
|
+
3. A contradicting fact (same subject + predicate, different object) erases the old fact's strength.
|
|
135
|
+
4. Periodic decay erases `n(1)` from old facts, letting stale knowledge fade.
|
|
136
|
+
5. A fact becomes inactive when its strength drops to an absent state.
|
|
137
|
+
|
|
138
|
+
This mirrors how PhantomTrace arithmetic treats presence and absence: knowledge does not disappear, it transitions from present to absent.
|
|
139
|
+
|
|
140
|
+
## Training and Ingestion
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
model.train_on_text("gardeners water orchids gently.", epochs=1)
|
|
144
|
+
|
|
145
|
+
model.ingest_text_corpus(long_text, trace_budget_per_sentence=96)
|
|
146
|
+
|
|
147
|
+
model.ingest_file("corpus.txt")
|
|
148
|
+
|
|
149
|
+
model.ingest_url("https://example.com/article")
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
The `seed_lexicon.txt` file included in the repository provides 60 dictionary-style definitions that bootstrap the model's vocabulary and fact base. Ingesting the full lexicon at dim=512 takes several minutes.
|
|
153
|
+
|
|
154
|
+
## HTTP Service
|
|
155
|
+
|
|
156
|
+
PhantomReason includes an HTTP service:
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
phantomreason-serve --host 127.0.0.1 --port 8080 --dim 512 --sparsity 47
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Or with authentication:
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
export PHANTOM_AGENT_API_TOKEN='your-secret-token'
|
|
166
|
+
phantomreason-serve --host 127.0.0.1 --port 8080
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Endpoints
|
|
170
|
+
|
|
171
|
+
| Method | Path | Auth | Description |
|
|
172
|
+
|--------|------|------|-------------|
|
|
173
|
+
| `GET` | `/health` | No | Service and model status |
|
|
174
|
+
| `GET` | `/evaluate` | Yes | Run the built-in evaluation harness |
|
|
175
|
+
| `POST` | `/query` | Yes | Route a prompt through the reasoning pipeline |
|
|
176
|
+
| `POST` | `/teach` | Yes | Train on new text |
|
|
177
|
+
| `POST` | `/ingest` | Yes | Ingest text, file, or URL |
|
|
178
|
+
| `POST` | `/focus` | Yes | Set focus mode or focus text |
|
|
179
|
+
| `POST` | `/checkpoint` | Yes | Save model state to disk |
|
|
180
|
+
|
|
181
|
+
### Example Requests
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
# Query
|
|
185
|
+
curl -X POST http://localhost:8080/query \
|
|
186
|
+
-H "Authorization: Bearer $PHANTOM_AGENT_API_TOKEN" \
|
|
187
|
+
-H "Content-Type: application/json" \
|
|
188
|
+
-d '{"prompt": "what do chefs simmer?"}'
|
|
189
|
+
|
|
190
|
+
# Teach
|
|
191
|
+
curl -X POST http://localhost:8080/teach \
|
|
192
|
+
-H "Authorization: Bearer $PHANTOM_AGENT_API_TOKEN" \
|
|
193
|
+
-H "Content-Type: application/json" \
|
|
194
|
+
-d '{"text": "vector means an ordered list used for state", "epochs": 1}'
|
|
195
|
+
|
|
196
|
+
# Ingest from URL
|
|
197
|
+
curl -X POST http://localhost:8080/ingest \
|
|
198
|
+
-H "Authorization: Bearer $PHANTOM_AGENT_API_TOKEN" \
|
|
199
|
+
-H "Content-Type: application/json" \
|
|
200
|
+
-d '{"url": "https://example.com/article"}'
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## Data Storage
|
|
204
|
+
|
|
205
|
+
Runtime state is stored in the current working directory by default:
|
|
206
|
+
|
|
207
|
+
- `words` -- the learned vocabulary, one word per line
|
|
208
|
+
- `phantom_model_state.json` -- all vectors, banks, facts, episodes, and symbols
|
|
209
|
+
|
|
210
|
+
Set `PHANTOM_DATA_DIR` to store state in a custom location:
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
export PHANTOM_DATA_DIR=/var/lib/phantomreason
|
|
214
|
+
phantomreason-serve --host 127.0.0.1 --port 8080
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## Evaluation
|
|
218
|
+
|
|
219
|
+
Run the built-in evaluation harness to verify the model works correctly:
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
phantomreason-eval
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
This tests fact recall, contradiction handling, decay behavior, and sentence parsing.
|
|
226
|
+
|
|
227
|
+
## As an LLM Augmentation Layer
|
|
228
|
+
|
|
229
|
+
PhantomReason is designed to complement large language models, not replace them. A future integration package can use the agent as:
|
|
230
|
+
|
|
231
|
+
- **A grounded fact store.** Teach the agent verified facts and query it before passing context to an LLM. The agent's symbolic confidence scores tell you which facts are reliable.
|
|
232
|
+
- **A contradiction detector.** When new information conflicts with stored knowledge, the agent's erasure mechanics make the conflict explicit rather than silently overwriting.
|
|
233
|
+
- **An interpretable memory layer.** The agent's trace vectors and fact strengths are fully inspectable -- you can trace *why* a particular answer was retrieved.
|
|
234
|
+
- **A retrieval filter.** Use the agent's `rank_candidates` and `retrieve_fact` to select relevant context for an LLM prompt, with transparent distance scores.
|
|
235
|
+
|
|
236
|
+
The boundary is clean: PhantomReason handles memory, confidence, and symbolic retrieval. The LLM handles natural language generation and broad world knowledge.
|
|
237
|
+
|
|
238
|
+
## Interactive Mode
|
|
239
|
+
|
|
240
|
+
For experimentation and debugging:
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
python -c "from phantomreason import PhantomLanguageModel, run_interactive_test; run_interactive_test(PhantomLanguageModel(dim=512, sparsity=47))"
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
The first run takes 30-40 seconds while trace vectors are computed. Subsequent runs with persisted state start in under 5 seconds.
|
|
247
|
+
|
|
248
|
+
Commands in interactive mode:
|
|
249
|
+
|
|
250
|
+
| Command | Description |
|
|
251
|
+
|---------|-------------|
|
|
252
|
+
| `teach <text>` | Train on new text |
|
|
253
|
+
| `ingest <file>` | Ingest a local text file |
|
|
254
|
+
| `scrape <url>` | Ingest from a URL |
|
|
255
|
+
| `focus <text>` | Prime focus mode on specific text |
|
|
256
|
+
| `focus on` / `focus off` | Toggle focus mode |
|
|
257
|
+
| `inspect <prompt>` | Show the top-ranked candidate and its trace comparison |
|
|
258
|
+
| Any other text | Route through the full reasoning pipeline |
|
|
259
|
+
|
|
260
|
+
## Project Structure
|
|
261
|
+
|
|
262
|
+
```
|
|
263
|
+
phantomreason/
|
|
264
|
+
__init__.py Package exports
|
|
265
|
+
model.py Core reasoning engine (PhantomLanguageModel)
|
|
266
|
+
traces.py PhantomTrace operation wrappers
|
|
267
|
+
stores.py Sparse trace vector storage (TraceStore)
|
|
268
|
+
storage.py Vocabulary and state path management
|
|
269
|
+
corpus.py Text normalization, sentence splitting, URL fetching
|
|
270
|
+
service.py HTTP service with auth and metrics
|
|
271
|
+
evaluate.py Built-in evaluation harness
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## Requirements
|
|
275
|
+
|
|
276
|
+
- Python 3.11+
|
|
277
|
+
- [`absence-calculator`](https://pypi.org/project/absence-calculator/) >= 0.5.0
|
|
278
|
+
|
|
279
|
+
No other dependencies. No PyTorch, TensorFlow, NumPy, or any ML framework.
|
|
280
|
+
|
|
281
|
+
## License
|
|
282
|
+
|
|
283
|
+
MIT
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
# PhantomReason
|
|
2
|
+
|
|
3
|
+
A symbolic AI reasoning engine built on [PhantomTrace](https://pypi.org/project/absence-calculator/) absence arithmetic.
|
|
4
|
+
|
|
5
|
+
PhantomReason is not a neural network. It represents knowledge as sparse binary vectors where each dimension is either *present* or *absent* -- the two fundamental states of PhantomTrace arithmetic. Learning happens by toggling these states through the same operations that define the algebra: `combine`, `compare`, `add`, `erase`, and `toggle`.
|
|
6
|
+
|
|
7
|
+
The result is a lightweight symbolic system that can store facts, answer questions, parse sentence structure, and generate constrained text -- all without gradient descent, matrix multiplication, or floating-point weights.
|
|
8
|
+
|
|
9
|
+
## Why This Exists
|
|
10
|
+
|
|
11
|
+
Large language models are powerful but opaque. They hallucinate, forget instructions, and offer no mechanism to inspect *why* they produced a given answer.
|
|
12
|
+
|
|
13
|
+
PhantomReason takes a different approach:
|
|
14
|
+
|
|
15
|
+
- **Every fact has a traceable strength** stored as a PhantomTrace number. You can inspect it, reinforce it with `add`, weaken it with `erase`, and watch it decay over time.
|
|
16
|
+
- **Predictions are distance-based.** The model ranks candidates by how close their trace vectors are to the current context. The scoring is transparent and deterministic.
|
|
17
|
+
- **Contradictions are resolved symbolically.** When a new fact conflicts with an old one, the old fact's strength is erased rather than silently overwritten.
|
|
18
|
+
- **Memory is explicit.** Episodes, facts, and symbols each have their own trace store with named banks. Nothing is hidden in a billion-parameter matrix.
|
|
19
|
+
|
|
20
|
+
This makes PhantomReason suitable as a standalone symbolic reasoner for constrained domains, or as an **augmentation layer** that can sit alongside an LLM to provide grounded fact memory, symbolic confidence tracking, and interpretable retrieval.
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install phantomreason
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Requires Python 3.11+ and [`absence-calculator`](https://pypi.org/project/absence-calculator/) (installed automatically).
|
|
29
|
+
|
|
30
|
+
## Performance
|
|
31
|
+
|
|
32
|
+
Initial model setup involves computing sparse trace vectors for the vocabulary and training corpus. On a typical machine:
|
|
33
|
+
|
|
34
|
+
| Operation | Time |
|
|
35
|
+
|-----------|------|
|
|
36
|
+
| Import + model init (dim=512) | ~1-2 seconds |
|
|
37
|
+
| Training on a few sentences | ~30-40 seconds |
|
|
38
|
+
| Subsequent startup with persisted state | ~3-5 seconds |
|
|
39
|
+
| Individual predictions | <1 second (warm cache) |
|
|
40
|
+
| `route_prompt` queries | 0.1-4 seconds |
|
|
41
|
+
|
|
42
|
+
The first run is the slowest because trace vectors must be computed for every word in the training data. After the model saves its state, restarts are fast because the vectors are loaded from disk rather than recomputed.
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from phantomreason import PhantomLanguageModel
|
|
48
|
+
|
|
49
|
+
model = PhantomLanguageModel(dim=512, sparsity=47)
|
|
50
|
+
|
|
51
|
+
model.train_on_text(
|
|
52
|
+
"aurora paints dawn softly. chefs simmer herbs slowly. "
|
|
53
|
+
"gardeners water orchids gently.",
|
|
54
|
+
epochs=1,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
model.register_fact("chefs", "simmer", ["herbs", "slowly"])
|
|
58
|
+
model.register_fact("gardeners", "water", ["orchids", "gently"])
|
|
59
|
+
|
|
60
|
+
routed = model.route_prompt("what do chefs simmer?")
|
|
61
|
+
print(routed["fact_answer"]) # "herbs slowly"
|
|
62
|
+
print(routed["sample"]) # "chefs simmer herbs slowly."
|
|
63
|
+
|
|
64
|
+
prediction = model.predict_next(model.tokenize("aurora paints"))
|
|
65
|
+
print(prediction) # "dawn"
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**Note on fact recall:** `route_prompt` matches the question's predicate against stored fact predicates. The predicate in the question must match the stored form exactly (e.g., ask "what do chefs simmer?" not "what does chef simmer?"). Verb form normalization covers common auxiliaries (is/are/was/were) but not all inflections.
|
|
69
|
+
|
|
70
|
+
## Core Concepts
|
|
71
|
+
|
|
72
|
+
### Trace Vectors
|
|
73
|
+
|
|
74
|
+
Every word the model knows is represented as a sparse vector of `AbsentNumber` objects. Each slot is either *present* (the word is associated with that dimension) or *absent* (it is not). The vector has a fixed number of present slots controlled by the `sparsity` parameter.
|
|
75
|
+
|
|
76
|
+
### Operations
|
|
77
|
+
|
|
78
|
+
All reasoning uses PhantomTrace operations from the `absence-calculator` library:
|
|
79
|
+
|
|
80
|
+
| Operation | PhantomTrace | Use in Agent |
|
|
81
|
+
|-----------|-------------|--------------|
|
|
82
|
+
| `combine(a, b)` | State overlap | Building context signatures from word vectors |
|
|
83
|
+
| `compare(a, b)` | Directional difference | Measuring distance between vectors |
|
|
84
|
+
| `add(a, b)` | State accumulation | Composing semantic probes, strengthening facts |
|
|
85
|
+
| `erase(a, b)` | State removal with flip | Weakening facts, resolving contradictions |
|
|
86
|
+
| `toggle(x)` | Flip present/absent | Learning updates, vector modification |
|
|
87
|
+
| `n(value)` | Create a present number | Fact strength initialization |
|
|
88
|
+
|
|
89
|
+
### Trace Stores
|
|
90
|
+
|
|
91
|
+
The model maintains four separate trace stores:
|
|
92
|
+
|
|
93
|
+
- **Trace store** -- word vectors with context and topic memory banks
|
|
94
|
+
- **Symbol store** -- intent, action, form, and role classifications
|
|
95
|
+
- **Episode store** -- interaction history for episodic memory retrieval
|
|
96
|
+
- **Fact store** -- subject-predicate-object triples with forward and inverse lookup
|
|
97
|
+
|
|
98
|
+
Each store holds a primary vector and one or more named banks per entry, all subject to the same sparsity constraint.
|
|
99
|
+
|
|
100
|
+
### Fact Lifecycle
|
|
101
|
+
|
|
102
|
+
Facts have a strength value stored as a PhantomTrace `AbsentNumber`:
|
|
103
|
+
|
|
104
|
+
1. A new fact starts with strength `n(1)` -- a present 1.
|
|
105
|
+
2. Teaching the same fact again adds `n(1)` to its strength.
|
|
106
|
+
3. A contradicting fact (same subject + predicate, different object) erases the old fact's strength.
|
|
107
|
+
4. Periodic decay erases `n(1)` from old facts, letting stale knowledge fade.
|
|
108
|
+
5. A fact becomes inactive when its strength drops to an absent state.
|
|
109
|
+
|
|
110
|
+
This mirrors how PhantomTrace arithmetic treats presence and absence: knowledge does not disappear, it transitions from present to absent.
|
|
111
|
+
|
|
112
|
+
## Training and Ingestion
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
model.train_on_text("gardeners water orchids gently.", epochs=1)
|
|
116
|
+
|
|
117
|
+
model.ingest_text_corpus(long_text, trace_budget_per_sentence=96)
|
|
118
|
+
|
|
119
|
+
model.ingest_file("corpus.txt")
|
|
120
|
+
|
|
121
|
+
model.ingest_url("https://example.com/article")
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
The `seed_lexicon.txt` file included in the repository provides 60 dictionary-style definitions that bootstrap the model's vocabulary and fact base. Ingesting the full lexicon at dim=512 takes several minutes.
|
|
125
|
+
|
|
126
|
+
## HTTP Service
|
|
127
|
+
|
|
128
|
+
PhantomReason includes an HTTP service:
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
phantomreason-serve --host 127.0.0.1 --port 8080 --dim 512 --sparsity 47
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Or with authentication:
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
export PHANTOM_AGENT_API_TOKEN='your-secret-token'
|
|
138
|
+
phantomreason-serve --host 127.0.0.1 --port 8080
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Endpoints
|
|
142
|
+
|
|
143
|
+
| Method | Path | Auth | Description |
|
|
144
|
+
|--------|------|------|-------------|
|
|
145
|
+
| `GET` | `/health` | No | Service and model status |
|
|
146
|
+
| `GET` | `/evaluate` | Yes | Run the built-in evaluation harness |
|
|
147
|
+
| `POST` | `/query` | Yes | Route a prompt through the reasoning pipeline |
|
|
148
|
+
| `POST` | `/teach` | Yes | Train on new text |
|
|
149
|
+
| `POST` | `/ingest` | Yes | Ingest text, file, or URL |
|
|
150
|
+
| `POST` | `/focus` | Yes | Set focus mode or focus text |
|
|
151
|
+
| `POST` | `/checkpoint` | Yes | Save model state to disk |
|
|
152
|
+
|
|
153
|
+
### Example Requests
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
# Query
|
|
157
|
+
curl -X POST http://localhost:8080/query \
|
|
158
|
+
-H "Authorization: Bearer $PHANTOM_AGENT_API_TOKEN" \
|
|
159
|
+
-H "Content-Type: application/json" \
|
|
160
|
+
-d '{"prompt": "what do chefs simmer?"}'
|
|
161
|
+
|
|
162
|
+
# Teach
|
|
163
|
+
curl -X POST http://localhost:8080/teach \
|
|
164
|
+
-H "Authorization: Bearer $PHANTOM_AGENT_API_TOKEN" \
|
|
165
|
+
-H "Content-Type: application/json" \
|
|
166
|
+
-d '{"text": "vector means an ordered list used for state", "epochs": 1}'
|
|
167
|
+
|
|
168
|
+
# Ingest from URL
|
|
169
|
+
curl -X POST http://localhost:8080/ingest \
|
|
170
|
+
-H "Authorization: Bearer $PHANTOM_AGENT_API_TOKEN" \
|
|
171
|
+
-H "Content-Type: application/json" \
|
|
172
|
+
-d '{"url": "https://example.com/article"}'
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## Data Storage
|
|
176
|
+
|
|
177
|
+
Runtime state is stored in the current working directory by default:
|
|
178
|
+
|
|
179
|
+
- `words` -- the learned vocabulary, one word per line
|
|
180
|
+
- `phantom_model_state.json` -- all vectors, banks, facts, episodes, and symbols
|
|
181
|
+
|
|
182
|
+
Set `PHANTOM_DATA_DIR` to store state in a custom location:
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
export PHANTOM_DATA_DIR=/var/lib/phantomreason
|
|
186
|
+
phantomreason-serve --host 127.0.0.1 --port 8080
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Evaluation
|
|
190
|
+
|
|
191
|
+
Run the built-in evaluation harness to verify the model works correctly:
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
phantomreason-eval
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
This tests fact recall, contradiction handling, decay behavior, and sentence parsing.
|
|
198
|
+
|
|
199
|
+
## As an LLM Augmentation Layer
|
|
200
|
+
|
|
201
|
+
PhantomReason is designed to complement large language models, not replace them. A future integration package can use the agent as:
|
|
202
|
+
|
|
203
|
+
- **A grounded fact store.** Teach the agent verified facts and query it before passing context to an LLM. The agent's symbolic confidence scores tell you which facts are reliable.
|
|
204
|
+
- **A contradiction detector.** When new information conflicts with stored knowledge, the agent's erasure mechanics make the conflict explicit rather than silently overwriting.
|
|
205
|
+
- **An interpretable memory layer.** The agent's trace vectors and fact strengths are fully inspectable -- you can trace *why* a particular answer was retrieved.
|
|
206
|
+
- **A retrieval filter.** Use the agent's `rank_candidates` and `retrieve_fact` to select relevant context for an LLM prompt, with transparent distance scores.
|
|
207
|
+
|
|
208
|
+
The boundary is clean: PhantomReason handles memory, confidence, and symbolic retrieval. The LLM handles natural language generation and broad world knowledge.
|
|
209
|
+
|
|
210
|
+
## Interactive Mode
|
|
211
|
+
|
|
212
|
+
For experimentation and debugging:
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
python -c "from phantomreason import PhantomLanguageModel, run_interactive_test; run_interactive_test(PhantomLanguageModel(dim=512, sparsity=47))"
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
The first run takes 30-40 seconds while trace vectors are computed. Subsequent runs with persisted state start in under 5 seconds.
|
|
219
|
+
|
|
220
|
+
Commands in interactive mode:
|
|
221
|
+
|
|
222
|
+
| Command | Description |
|
|
223
|
+
|---------|-------------|
|
|
224
|
+
| `teach <text>` | Train on new text |
|
|
225
|
+
| `ingest <file>` | Ingest a local text file |
|
|
226
|
+
| `scrape <url>` | Ingest from a URL |
|
|
227
|
+
| `focus <text>` | Prime focus mode on specific text |
|
|
228
|
+
| `focus on` / `focus off` | Toggle focus mode |
|
|
229
|
+
| `inspect <prompt>` | Show the top-ranked candidate and its trace comparison |
|
|
230
|
+
| Any other text | Route through the full reasoning pipeline |
|
|
231
|
+
|
|
232
|
+
## Project Structure
|
|
233
|
+
|
|
234
|
+
```
|
|
235
|
+
phantomreason/
|
|
236
|
+
__init__.py Package exports
|
|
237
|
+
model.py Core reasoning engine (PhantomLanguageModel)
|
|
238
|
+
traces.py PhantomTrace operation wrappers
|
|
239
|
+
stores.py Sparse trace vector storage (TraceStore)
|
|
240
|
+
storage.py Vocabulary and state path management
|
|
241
|
+
corpus.py Text normalization, sentence splitting, URL fetching
|
|
242
|
+
service.py HTTP service with auth and metrics
|
|
243
|
+
evaluate.py Built-in evaluation harness
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
## Requirements
|
|
247
|
+
|
|
248
|
+
- Python 3.11+
|
|
249
|
+
- [`absence-calculator`](https://pypi.org/project/absence-calculator/) >= 0.5.0
|
|
250
|
+
|
|
251
|
+
No other dependencies. No PyTorch, TensorFlow, NumPy, or any ML framework.
|
|
252
|
+
|
|
253
|
+
## License
|
|
254
|
+
|
|
255
|
+
MIT
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
server {
|
|
2
|
+
listen 443 ssl http2;
|
|
3
|
+
server_name phantom-agent.example.com;
|
|
4
|
+
|
|
5
|
+
ssl_certificate /etc/letsencrypt/live/phantom-agent.example.com/fullchain.pem;
|
|
6
|
+
ssl_certificate_key /etc/letsencrypt/live/phantom-agent.example.com/privkey.pem;
|
|
7
|
+
|
|
8
|
+
client_max_body_size 256k;
|
|
9
|
+
|
|
10
|
+
location / {
|
|
11
|
+
proxy_pass http://127.0.0.1:8080;
|
|
12
|
+
proxy_http_version 1.1;
|
|
13
|
+
proxy_set_header Host $host;
|
|
14
|
+
proxy_set_header X-Real-IP $remote_addr;
|
|
15
|
+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
16
|
+
proxy_set_header X-Forwarded-Proto $scheme;
|
|
17
|
+
proxy_set_header Connection "";
|
|
18
|
+
proxy_read_timeout 60s;
|
|
19
|
+
proxy_send_timeout 60s;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
[Unit]
|
|
2
|
+
Description=PhantomTrace Agent Service
|
|
3
|
+
After=network.target
|
|
4
|
+
|
|
5
|
+
[Service]
|
|
6
|
+
Type=simple
|
|
7
|
+
WorkingDirectory=/opt/phantom-agent
|
|
8
|
+
Environment=PHANTOM_AGENT_API_TOKEN=replace-with-long-random-token
|
|
9
|
+
ExecStart=/usr/bin/python3 /opt/phantom-agent/serve.py --host 127.0.0.1 --port 8080 --dim 512 --sparsity 47
|
|
10
|
+
Restart=always
|
|
11
|
+
RestartSec=2
|
|
12
|
+
User=phantom
|
|
13
|
+
Group=phantom
|
|
14
|
+
|
|
15
|
+
[Install]
|
|
16
|
+
WantedBy=multi-user.target
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from .model import PhantomLanguageModel, run_interactive_test
|
|
2
|
+
from .evaluate import run_evaluation
|
|
3
|
+
from .service import PhantomAgentService, build_server
|
|
4
|
+
from .storage import EXTRA_WORDS_PATH, GLOBAL_INDEX, GLOBAL_VOCAB, MODEL_STATE_PATH, vocabadder
|
|
5
|
+
|
|
6
|
+
__version__ = "0.1.0"
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"EXTRA_WORDS_PATH",
|
|
10
|
+
"GLOBAL_INDEX",
|
|
11
|
+
"GLOBAL_VOCAB",
|
|
12
|
+
"MODEL_STATE_PATH",
|
|
13
|
+
"PhantomAgentService",
|
|
14
|
+
"PhantomLanguageModel",
|
|
15
|
+
"__version__",
|
|
16
|
+
"build_server",
|
|
17
|
+
"run_evaluation",
|
|
18
|
+
"run_interactive_test",
|
|
19
|
+
"vocabadder",
|
|
20
|
+
]
|