PhantomReason 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ from .model import PhantomLanguageModel, run_interactive_test
2
+ from .evaluate import run_evaluation
3
+ from .service import PhantomAgentService, build_server
4
+ from .storage import EXTRA_WORDS_PATH, GLOBAL_INDEX, GLOBAL_VOCAB, MODEL_STATE_PATH, vocabadder
5
+
6
+ __version__ = "0.1.2"
7
+
8
+ __all__ = [
9
+ "EXTRA_WORDS_PATH",
10
+ "GLOBAL_INDEX",
11
+ "GLOBAL_VOCAB",
12
+ "MODEL_STATE_PATH",
13
+ "PhantomAgentService",
14
+ "PhantomLanguageModel",
15
+ "__version__",
16
+ "build_server",
17
+ "run_evaluation",
18
+ "run_interactive_test",
19
+ "vocabadder",
20
+ ]
@@ -0,0 +1,119 @@
1
+ from __future__ import annotations
2
+
3
+ import html
4
+ import re
5
+ import urllib.error
6
+ import urllib.request
7
+ from pathlib import Path
8
+
9
+
10
+ DEFAULT_USER_AGENT = "phantomreason/1.0 (+symbolic-trace-ingest)"
11
+ DEFAULT_TIMEOUT_SECONDS = 15.0
12
+
13
+
14
+ def normalize_text(raw_text: str) -> str:
15
+ cleaned = html.unescape(raw_text)
16
+ cleaned = re.sub(r"(?is)<(script|style).*?>.*?</\1>", " ", cleaned)
17
+ cleaned = re.sub(r"(?s)<[^>]+>", " ", cleaned)
18
+ cleaned = cleaned.replace("\r", "\n")
19
+ cleaned = re.sub(r"[^\S\n]+", " ", cleaned)
20
+ cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
21
+ return cleaned.strip()
22
+
23
+
24
+ def tokenize_words(text: str) -> list[str]:
25
+ return re.findall(r"[a-zA-Z']+", text.lower())
26
+
27
+
28
+ def iter_sentences(
29
+ text: str,
30
+ min_words: int = 3,
31
+ max_words: int = 24,
32
+ max_sentences: int | None = None,
33
+ ) -> list[str]:
34
+ normalized = normalize_text(text)
35
+ if not normalized:
36
+ return []
37
+ sentence_like = re.split(r"(?<=[.!?])\s+|\n+", normalized)
38
+ collected: list[str] = []
39
+ seen: set[str] = set()
40
+ for part in sentence_like:
41
+ words = tokenize_words(part)
42
+ if len(words) < min_words:
43
+ continue
44
+ while words:
45
+ chunk = words[:max_words]
46
+ words = words[max_words:]
47
+ if len(chunk) < min_words:
48
+ break
49
+ sentence = " ".join(chunk)
50
+ if sentence in seen:
51
+ continue
52
+ seen.add(sentence)
53
+ collected.append(sentence)
54
+ if max_sentences is not None and len(collected) >= max_sentences:
55
+ return collected
56
+ return collected
57
+
58
+
59
+ def load_text_file(path: str | Path) -> str:
60
+ return Path(path).read_text(encoding="utf-8")
61
+
62
+
63
+ def fetch_url_text(
64
+ url: str,
65
+ timeout: float = DEFAULT_TIMEOUT_SECONDS,
66
+ user_agent: str = DEFAULT_USER_AGENT,
67
+ ) -> str:
68
+ request = urllib.request.Request(url, headers={"User-Agent": user_agent})
69
+ with urllib.request.urlopen(request, timeout=timeout) as response:
70
+ charset = response.headers.get_content_charset() or "utf-8"
71
+ payload = response.read()
72
+ return payload.decode(charset, errors="replace")
73
+
74
+
75
+ def extract_dictionary_facts(text: str, max_entries: int = 256) -> list[str]:
76
+ facts: list[str] = []
77
+ seen: set[str] = set()
78
+ for raw_line in normalize_text(text).splitlines():
79
+ line = raw_line.strip(" -:\t")
80
+ if not line:
81
+ continue
82
+ match = re.match(r"^([A-Za-z][A-Za-z' -]{1,48})\s*[:\-]\s*(.+)$", line)
83
+ if not match:
84
+ continue
85
+ term = " ".join(tokenize_words(match.group(1)))
86
+ definition_tokens = tokenize_words(match.group(2))
87
+ if not term or len(definition_tokens) < 2:
88
+ continue
89
+ definition = " ".join(definition_tokens[:10])
90
+ sentence = f"{term} means {definition}"
91
+ if sentence in seen:
92
+ continue
93
+ seen.add(sentence)
94
+ facts.append(sentence)
95
+ if len(facts) >= max_entries:
96
+ break
97
+ return facts
98
+
99
+
100
+ def load_local_corpus_candidates(root: Path) -> list[Path]:
101
+ candidates: list[Path] = []
102
+ for relative in ("words.txt", "words"):
103
+ path = root / relative
104
+ if path.exists() and path.is_file():
105
+ candidates.append(path)
106
+ return candidates
107
+
108
+
109
+ __all__ = [
110
+ "DEFAULT_TIMEOUT_SECONDS",
111
+ "DEFAULT_USER_AGENT",
112
+ "extract_dictionary_facts",
113
+ "fetch_url_text",
114
+ "iter_sentences",
115
+ "load_local_corpus_candidates",
116
+ "load_text_file",
117
+ "normalize_text",
118
+ "tokenize_words",
119
+ ]
@@ -0,0 +1,89 @@
1
+ from __future__ import annotations
2
+
3
+ from .model import PhantomLanguageModel
4
+
5
+
6
+ def require(condition: bool, label: str, failures: list[str]) -> None:
7
+ if not condition:
8
+ failures.append(label)
9
+
10
+
11
+ def run_evaluation() -> dict[str, object]:
12
+ model = PhantomLanguageModel(dim=512, sparsity=47)
13
+ training_text = (
14
+ "vector means an ordered list used for state. "
15
+ "artist is a person who creates stories. "
16
+ "sun rises east. "
17
+ "north star guides sailors nightly. "
18
+ "gardeners water orchids daily."
19
+ )
20
+ model.train_on_text(training_text, epochs=1, persist=False)
21
+ failures: list[str] = []
22
+
23
+ parsed = model.understand_input("north star guides sailors nightly")
24
+ require(parsed["subject"] == "north star", "parse subject for two-word noun phrase", failures)
25
+ require(parsed["predicate"] == "guides", "parse predicate for two-word noun phrase", failures)
26
+
27
+ parsed = model.understand_input("what is vector?")
28
+ require(parsed["subject"] == "vector", "parse copular question subject", failures)
29
+ require(parsed["predicate"] == "is", "parse copular question predicate", failures)
30
+
31
+ routed = model.route_prompt("what is vector?")
32
+ require(routed["fact_answer"] is not None, "fact recall for learned definition", failures)
33
+
34
+ model.register_fact("vector", "means", ["symbolic", "state"])
35
+ require(
36
+ not model._fact_is_active("fact:vector|means|ordered list"),
37
+ "older contradictory fact should be inactive",
38
+ failures,
39
+ )
40
+ require(
41
+ model._fact_is_active("fact:vector|means|symbolic state"),
42
+ "new contradictory fact should stay active",
43
+ failures,
44
+ )
45
+ latest = model._direct_fact_match(subject="vector", predicate="means")
46
+ require(
47
+ latest is not None and latest["object_tokens"] == ["symbolic", "state"],
48
+ "latest contradictory fact should dominate recall",
49
+ failures,
50
+ )
51
+
52
+ before_decay = model._fact_strength_value("fact:vector|means|symbolic state")
53
+ model.apply_fact_decay(keep_recent=0, amount=1)
54
+ after_decay = model._fact_strength_value("fact:vector|means|symbolic state")
55
+ require(after_decay <= before_decay, "decay should not increase fact strength", failures)
56
+
57
+ if failures:
58
+ return {
59
+ "status": "FAIL",
60
+ "failures": failures,
61
+ "fact_count": model.fact_count(),
62
+ "fact_answer": routed["fact_answer"],
63
+ "parse": model.render_understanding(model.understand_input("north star guides sailors nightly")),
64
+ }
65
+
66
+ return {
67
+ "status": "PASS",
68
+ "failures": [],
69
+ "fact_count": model.fact_count(),
70
+ "fact_answer": routed["fact_answer"],
71
+ "parse": model.render_understanding(model.understand_input("north star guides sailors nightly")),
72
+ }
73
+
74
+
75
+ def main() -> int:
76
+ result = run_evaluation()
77
+ print(result["status"])
78
+ for item in result["failures"]:
79
+ print(f"- {item}")
80
+ if result["status"] == "PASS":
81
+ print(f"fact_count={result['fact_count']}")
82
+ print(f"fact_answer={result['fact_answer']}")
83
+ print(f"parse={result['parse']}")
84
+ return 0
85
+ return 1
86
+
87
+
88
+ if __name__ == "__main__":
89
+ raise SystemExit(main())