celltype-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- celltype_cli-0.1.0.dist-info/METADATA +267 -0
- celltype_cli-0.1.0.dist-info/RECORD +89 -0
- celltype_cli-0.1.0.dist-info/WHEEL +4 -0
- celltype_cli-0.1.0.dist-info/entry_points.txt +2 -0
- celltype_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- ct/__init__.py +3 -0
- ct/agent/__init__.py +0 -0
- ct/agent/case_studies.py +426 -0
- ct/agent/config.py +523 -0
- ct/agent/doctor.py +544 -0
- ct/agent/knowledge.py +523 -0
- ct/agent/loop.py +99 -0
- ct/agent/mcp_server.py +478 -0
- ct/agent/orchestrator.py +733 -0
- ct/agent/runner.py +656 -0
- ct/agent/sandbox.py +481 -0
- ct/agent/session.py +145 -0
- ct/agent/system_prompt.py +186 -0
- ct/agent/trace_store.py +228 -0
- ct/agent/trajectory.py +169 -0
- ct/agent/types.py +182 -0
- ct/agent/workflows.py +462 -0
- ct/api/__init__.py +1 -0
- ct/api/app.py +211 -0
- ct/api/config.py +120 -0
- ct/api/engine.py +124 -0
- ct/cli.py +1448 -0
- ct/data/__init__.py +0 -0
- ct/data/compute_providers.json +59 -0
- ct/data/cro_database.json +395 -0
- ct/data/downloader.py +238 -0
- ct/data/loaders.py +252 -0
- ct/kb/__init__.py +5 -0
- ct/kb/benchmarks.py +147 -0
- ct/kb/governance.py +106 -0
- ct/kb/ingest.py +415 -0
- ct/kb/reasoning.py +129 -0
- ct/kb/schema_monitor.py +162 -0
- ct/kb/substrate.py +387 -0
- ct/models/__init__.py +0 -0
- ct/models/llm.py +370 -0
- ct/tools/__init__.py +195 -0
- ct/tools/_compound_resolver.py +297 -0
- ct/tools/biomarker.py +368 -0
- ct/tools/cellxgene.py +282 -0
- ct/tools/chemistry.py +1371 -0
- ct/tools/claude.py +390 -0
- ct/tools/clinical.py +1153 -0
- ct/tools/clue.py +249 -0
- ct/tools/code.py +1069 -0
- ct/tools/combination.py +397 -0
- ct/tools/compute.py +402 -0
- ct/tools/cro.py +413 -0
- ct/tools/data_api.py +2114 -0
- ct/tools/design.py +295 -0
- ct/tools/dna.py +575 -0
- ct/tools/experiment.py +604 -0
- ct/tools/expression.py +655 -0
- ct/tools/files.py +957 -0
- ct/tools/genomics.py +1387 -0
- ct/tools/http_client.py +146 -0
- ct/tools/imaging.py +319 -0
- ct/tools/intel.py +223 -0
- ct/tools/literature.py +743 -0
- ct/tools/network.py +422 -0
- ct/tools/notification.py +111 -0
- ct/tools/omics.py +3330 -0
- ct/tools/ops.py +1230 -0
- ct/tools/parity.py +649 -0
- ct/tools/pk.py +245 -0
- ct/tools/protein.py +678 -0
- ct/tools/regulatory.py +643 -0
- ct/tools/remote_data.py +179 -0
- ct/tools/report.py +181 -0
- ct/tools/repurposing.py +376 -0
- ct/tools/safety.py +1280 -0
- ct/tools/shell.py +178 -0
- ct/tools/singlecell.py +533 -0
- ct/tools/statistics.py +552 -0
- ct/tools/structure.py +882 -0
- ct/tools/target.py +901 -0
- ct/tools/translational.py +123 -0
- ct/tools/viability.py +218 -0
- ct/ui/__init__.py +0 -0
- ct/ui/markdown.py +31 -0
- ct/ui/status.py +258 -0
- ct/ui/suggestions.py +567 -0
- ct/ui/terminal.py +1456 -0
- ct/ui/traces.py +112 -0
ct/kb/schema_monitor.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Schema drift monitor for external API/data integrations.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import asdict, dataclass
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Callable
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class SchemaCheckResult:
|
|
15
|
+
monitor: str
|
|
16
|
+
status: str # ok | drift | new | error
|
|
17
|
+
added_paths: list[str]
|
|
18
|
+
removed_paths: list[str]
|
|
19
|
+
baseline_size: int
|
|
20
|
+
current_size: int
|
|
21
|
+
error: str = ""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _schema_paths(obj: Any, prefix: str = "$") -> set[str]:
|
|
25
|
+
"""Flatten JSON-like object into typed path signatures."""
|
|
26
|
+
paths = set()
|
|
27
|
+
if isinstance(obj, dict):
|
|
28
|
+
paths.add(f"{prefix}:object")
|
|
29
|
+
for key, value in obj.items():
|
|
30
|
+
child = f"{prefix}.{key}"
|
|
31
|
+
paths.update(_schema_paths(value, child))
|
|
32
|
+
return paths
|
|
33
|
+
if isinstance(obj, list):
|
|
34
|
+
paths.add(f"{prefix}:array")
|
|
35
|
+
if obj:
|
|
36
|
+
# Sample first few elements for schema signature.
|
|
37
|
+
for item in obj[:3]:
|
|
38
|
+
paths.update(_schema_paths(item, f"{prefix}[]"))
|
|
39
|
+
return paths
|
|
40
|
+
if obj is None:
|
|
41
|
+
paths.add(f"{prefix}:null")
|
|
42
|
+
return paths
|
|
43
|
+
typename = type(obj).__name__
|
|
44
|
+
paths.add(f"{prefix}:{typename}")
|
|
45
|
+
return paths
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class SchemaMonitor:
|
|
49
|
+
"""Capture and compare tool output schemas against baselines."""
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
baseline_path: Path | None = None,
|
|
54
|
+
monitors: dict[str, Callable[[], Any]] | None = None,
|
|
55
|
+
):
|
|
56
|
+
self.baseline_path = baseline_path or (Path.home() / ".ct" / "knowledge" / "schema_baselines.json")
|
|
57
|
+
self.monitors = monitors or self._default_monitors()
|
|
58
|
+
self._baseline = self._load_baseline()
|
|
59
|
+
|
|
60
|
+
def _default_monitors(self) -> dict[str, Callable[[], Any]]:
|
|
61
|
+
from ct.tools.data_api import opentargets_search, uniprot_lookup
|
|
62
|
+
from ct.tools.literature import openalex_search, pubmed_search
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
"literature.pubmed_search": lambda: pubmed_search("TP53 cancer", max_results=1),
|
|
66
|
+
"literature.openalex_search": lambda: openalex_search("TP53 cancer", max_results=1),
|
|
67
|
+
"data_api.uniprot_lookup": lambda: uniprot_lookup("P04637"),
|
|
68
|
+
"data_api.opentargets_search": lambda: opentargets_search("TP53", entity_type="target"),
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
def _load_baseline(self) -> dict[str, Any]:
|
|
72
|
+
if not self.baseline_path.exists():
|
|
73
|
+
return {"version": 1, "monitors": {}}
|
|
74
|
+
try:
|
|
75
|
+
data = json.loads(self.baseline_path.read_text(encoding="utf-8"))
|
|
76
|
+
except (OSError, json.JSONDecodeError):
|
|
77
|
+
return {"version": 1, "monitors": {}}
|
|
78
|
+
if not isinstance(data, dict):
|
|
79
|
+
return {"version": 1, "monitors": {}}
|
|
80
|
+
data.setdefault("version", 1)
|
|
81
|
+
data.setdefault("monitors", {})
|
|
82
|
+
return data
|
|
83
|
+
|
|
84
|
+
def save_baseline(self):
|
|
85
|
+
self.baseline_path.parent.mkdir(parents=True, exist_ok=True)
|
|
86
|
+
self.baseline_path.write_text(
|
|
87
|
+
json.dumps(self._baseline, ensure_ascii=True, indent=2),
|
|
88
|
+
encoding="utf-8",
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def update_baseline(self, *, monitor: str | None = None) -> list[SchemaCheckResult]:
|
|
92
|
+
"""Capture current schema(s) as new baseline."""
|
|
93
|
+
results = self.check(update_baseline=True, monitor=monitor)
|
|
94
|
+
self.save_baseline()
|
|
95
|
+
return results
|
|
96
|
+
|
|
97
|
+
def check(
|
|
98
|
+
self,
|
|
99
|
+
*,
|
|
100
|
+
update_baseline: bool = False,
|
|
101
|
+
monitor: str | None = None,
|
|
102
|
+
) -> list[SchemaCheckResult]:
|
|
103
|
+
"""Run schema checks and return diff results."""
|
|
104
|
+
results = []
|
|
105
|
+
for name, probe in self.monitors.items():
|
|
106
|
+
if monitor and name != monitor:
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
baseline_paths = set(self._baseline["monitors"].get(name, {}).get("paths", []))
|
|
110
|
+
try:
|
|
111
|
+
payload = probe()
|
|
112
|
+
except Exception as exc:
|
|
113
|
+
results.append(
|
|
114
|
+
SchemaCheckResult(
|
|
115
|
+
monitor=name,
|
|
116
|
+
status="error",
|
|
117
|
+
added_paths=[],
|
|
118
|
+
removed_paths=[],
|
|
119
|
+
baseline_size=len(baseline_paths),
|
|
120
|
+
current_size=0,
|
|
121
|
+
error=str(exc),
|
|
122
|
+
)
|
|
123
|
+
)
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
current_paths = _schema_paths(payload)
|
|
127
|
+
added = sorted(current_paths - baseline_paths)
|
|
128
|
+
removed = sorted(baseline_paths - current_paths)
|
|
129
|
+
if not baseline_paths:
|
|
130
|
+
status = "new"
|
|
131
|
+
elif not added and not removed:
|
|
132
|
+
status = "ok"
|
|
133
|
+
else:
|
|
134
|
+
status = "drift"
|
|
135
|
+
|
|
136
|
+
results.append(
|
|
137
|
+
SchemaCheckResult(
|
|
138
|
+
monitor=name,
|
|
139
|
+
status=status,
|
|
140
|
+
added_paths=added,
|
|
141
|
+
removed_paths=removed,
|
|
142
|
+
baseline_size=len(baseline_paths),
|
|
143
|
+
current_size=len(current_paths),
|
|
144
|
+
)
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if update_baseline:
|
|
148
|
+
self._baseline["monitors"][name] = {
|
|
149
|
+
"paths": sorted(current_paths),
|
|
150
|
+
}
|
|
151
|
+
return results
|
|
152
|
+
|
|
153
|
+
@staticmethod
|
|
154
|
+
def summarize(results: list[SchemaCheckResult]) -> dict[str, Any]:
|
|
155
|
+
counts: dict[str, int] = {}
|
|
156
|
+
for result in results:
|
|
157
|
+
counts[result.status] = counts.get(result.status, 0) + 1
|
|
158
|
+
return {
|
|
159
|
+
"total": len(results),
|
|
160
|
+
"counts": counts,
|
|
161
|
+
"results": [asdict(r) for r in results],
|
|
162
|
+
}
|
ct/kb/substrate.py
ADDED
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Canonical R&D knowledge substrate.
|
|
3
|
+
|
|
4
|
+
Stores normalized entities, evidence, and typed relations in a local JSON store.
|
|
5
|
+
This is the foundational layer for cross-modal pharma knowledge accumulation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import asdict, dataclass, field
|
|
11
|
+
import json
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
import re
|
|
14
|
+
import time
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
_GENE_RE = re.compile(r"^[A-Z][A-Z0-9-]{1,9}$")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class KBEntity:
|
|
23
|
+
id: str
|
|
24
|
+
entity_type: str
|
|
25
|
+
name: str
|
|
26
|
+
synonyms: list[str] = field(default_factory=list)
|
|
27
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
28
|
+
first_seen: float = field(default_factory=time.time)
|
|
29
|
+
last_seen: float = field(default_factory=time.time)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class KBEvidence:
|
|
34
|
+
id: str
|
|
35
|
+
source_type: str
|
|
36
|
+
source_ref: str
|
|
37
|
+
summary: str
|
|
38
|
+
score: float = 0.5
|
|
39
|
+
tags: list[str] = field(default_factory=list)
|
|
40
|
+
timestamp: float = field(default_factory=time.time)
|
|
41
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class KBClaim:
|
|
46
|
+
evidence_id: str
|
|
47
|
+
polarity: str = "support" # support | contradict | neutral
|
|
48
|
+
score: float = 0.5
|
|
49
|
+
timestamp: float = field(default_factory=time.time)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class KBRelation:
|
|
54
|
+
id: str
|
|
55
|
+
subject_id: str
|
|
56
|
+
predicate: str
|
|
57
|
+
object_id: str
|
|
58
|
+
claims: list[KBClaim] = field(default_factory=list)
|
|
59
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
60
|
+
first_seen: float = field(default_factory=time.time)
|
|
61
|
+
last_seen: float = field(default_factory=time.time)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class KnowledgeSubstrate:
|
|
65
|
+
"""Local persistent knowledge substrate."""
|
|
66
|
+
|
|
67
|
+
SCHEMA_VERSION = 1
|
|
68
|
+
|
|
69
|
+
def __init__(self, path: Path | None = None):
|
|
70
|
+
self.path = path or (Path.home() / ".ct" / "knowledge" / "substrate.json")
|
|
71
|
+
self._data = self._load()
|
|
72
|
+
|
|
73
|
+
@staticmethod
|
|
74
|
+
def normalize_identifier(entity_type: str, value: str) -> str:
|
|
75
|
+
"""Normalize external identifiers into stable canonical keys."""
|
|
76
|
+
et = (entity_type or "unknown").strip().lower()
|
|
77
|
+
raw = (value or "").strip()
|
|
78
|
+
if not raw:
|
|
79
|
+
raw = "unknown"
|
|
80
|
+
|
|
81
|
+
if et == "gene":
|
|
82
|
+
norm = re.sub(r"[^A-Za-z0-9-]", "", raw).upper()
|
|
83
|
+
return norm or "UNKNOWN"
|
|
84
|
+
if et in {"disease", "indication", "pathway", "phenotype"}:
|
|
85
|
+
norm = re.sub(r"\s+", " ", raw.lower()).strip()
|
|
86
|
+
return norm or "unknown"
|
|
87
|
+
if et in {"compound", "drug"}:
|
|
88
|
+
norm = re.sub(r"\s+", " ", raw).strip()
|
|
89
|
+
return norm
|
|
90
|
+
if et in {"publication", "trial"}:
|
|
91
|
+
return raw.upper()
|
|
92
|
+
return re.sub(r"\s+", " ", raw).strip()
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def infer_entity_type(text: str) -> str:
|
|
96
|
+
"""Infer coarse entity type from surface form."""
|
|
97
|
+
token = (text or "").strip()
|
|
98
|
+
if not token:
|
|
99
|
+
return "unknown"
|
|
100
|
+
if token.upper().startswith("PMID"):
|
|
101
|
+
return "publication"
|
|
102
|
+
if token.upper().startswith("NCT") and token[3:].isdigit():
|
|
103
|
+
return "trial"
|
|
104
|
+
if _GENE_RE.match(token):
|
|
105
|
+
return "gene"
|
|
106
|
+
if any(c.isdigit() for c in token) and "-" in token:
|
|
107
|
+
return "compound"
|
|
108
|
+
if len(token.split()) >= 2:
|
|
109
|
+
return "disease"
|
|
110
|
+
return "unknown"
|
|
111
|
+
|
|
112
|
+
def _default(self) -> dict[str, Any]:
|
|
113
|
+
return {
|
|
114
|
+
"schema_version": self.SCHEMA_VERSION,
|
|
115
|
+
"created_at": time.time(),
|
|
116
|
+
"updated_at": time.time(),
|
|
117
|
+
"next_evidence_id": 1,
|
|
118
|
+
"entities": {},
|
|
119
|
+
"evidence": {},
|
|
120
|
+
"relations": {},
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
def _load(self) -> dict[str, Any]:
|
|
124
|
+
if not self.path.exists():
|
|
125
|
+
return self._default()
|
|
126
|
+
try:
|
|
127
|
+
payload = json.loads(self.path.read_text(encoding="utf-8"))
|
|
128
|
+
except (OSError, json.JSONDecodeError):
|
|
129
|
+
return self._default()
|
|
130
|
+
|
|
131
|
+
if not isinstance(payload, dict):
|
|
132
|
+
return self._default()
|
|
133
|
+
payload.setdefault("schema_version", self.SCHEMA_VERSION)
|
|
134
|
+
payload.setdefault("created_at", time.time())
|
|
135
|
+
payload.setdefault("updated_at", time.time())
|
|
136
|
+
payload.setdefault("next_evidence_id", 1)
|
|
137
|
+
payload.setdefault("entities", {})
|
|
138
|
+
payload.setdefault("evidence", {})
|
|
139
|
+
payload.setdefault("relations", {})
|
|
140
|
+
return payload
|
|
141
|
+
|
|
142
|
+
def save(self):
|
|
143
|
+
"""Persist substrate to disk."""
|
|
144
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
145
|
+
self._data["updated_at"] = time.time()
|
|
146
|
+
self.path.write_text(
|
|
147
|
+
json.dumps(self._data, ensure_ascii=True, indent=2),
|
|
148
|
+
encoding="utf-8",
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def upsert_entity(
|
|
152
|
+
self,
|
|
153
|
+
*,
|
|
154
|
+
entity_type: str,
|
|
155
|
+
name: str,
|
|
156
|
+
identifier: str | None = None,
|
|
157
|
+
synonyms: list[str] | None = None,
|
|
158
|
+
metadata: dict[str, Any] | None = None,
|
|
159
|
+
) -> KBEntity:
|
|
160
|
+
"""Insert/update entity and return canonical record."""
|
|
161
|
+
canonical = self.normalize_identifier(entity_type, identifier or name)
|
|
162
|
+
entity_id = f"{entity_type.lower()}:{canonical}"
|
|
163
|
+
now = time.time()
|
|
164
|
+
existing = self._data["entities"].get(entity_id)
|
|
165
|
+
syn_items = list(synonyms or []) + [name]
|
|
166
|
+
if identifier:
|
|
167
|
+
syn_items.append(str(identifier))
|
|
168
|
+
syn = sorted(set(syn_items))
|
|
169
|
+
if existing:
|
|
170
|
+
existing_syn = set(existing.get("synonyms", []))
|
|
171
|
+
existing["synonyms"] = sorted(existing_syn | set(syn))
|
|
172
|
+
if metadata:
|
|
173
|
+
existing_meta = existing.get("metadata", {})
|
|
174
|
+
existing_meta.update(metadata)
|
|
175
|
+
existing["metadata"] = existing_meta
|
|
176
|
+
existing["last_seen"] = now
|
|
177
|
+
self._data["entities"][entity_id] = existing
|
|
178
|
+
return KBEntity(**existing)
|
|
179
|
+
|
|
180
|
+
entity = KBEntity(
|
|
181
|
+
id=entity_id,
|
|
182
|
+
entity_type=entity_type.lower(),
|
|
183
|
+
name=name,
|
|
184
|
+
synonyms=syn,
|
|
185
|
+
metadata=metadata or {},
|
|
186
|
+
first_seen=now,
|
|
187
|
+
last_seen=now,
|
|
188
|
+
)
|
|
189
|
+
self._data["entities"][entity_id] = asdict(entity)
|
|
190
|
+
return entity
|
|
191
|
+
|
|
192
|
+
def get_entity(self, entity_id: str) -> KBEntity | None:
|
|
193
|
+
rec = self._data["entities"].get(entity_id)
|
|
194
|
+
if not rec:
|
|
195
|
+
return None
|
|
196
|
+
return KBEntity(**rec)
|
|
197
|
+
|
|
198
|
+
def add_evidence(
|
|
199
|
+
self,
|
|
200
|
+
*,
|
|
201
|
+
source_type: str,
|
|
202
|
+
source_ref: str,
|
|
203
|
+
summary: str,
|
|
204
|
+
score: float = 0.5,
|
|
205
|
+
tags: list[str] | None = None,
|
|
206
|
+
metadata: dict[str, Any] | None = None,
|
|
207
|
+
) -> KBEvidence:
|
|
208
|
+
"""Record evidence statement with provenance."""
|
|
209
|
+
next_id = int(self._data.get("next_evidence_id", 1))
|
|
210
|
+
evidence_id = f"e:{next_id}"
|
|
211
|
+
self._data["next_evidence_id"] = next_id + 1
|
|
212
|
+
ev = KBEvidence(
|
|
213
|
+
id=evidence_id,
|
|
214
|
+
source_type=(source_type or "unknown").lower(),
|
|
215
|
+
source_ref=source_ref or "",
|
|
216
|
+
summary=(summary or "")[:2000],
|
|
217
|
+
score=max(0.0, min(1.0, float(score))),
|
|
218
|
+
tags=tags or [],
|
|
219
|
+
metadata=metadata or {},
|
|
220
|
+
)
|
|
221
|
+
self._data["evidence"][evidence_id] = asdict(ev)
|
|
222
|
+
return ev
|
|
223
|
+
|
|
224
|
+
def get_evidence(self, evidence_id: str) -> KBEvidence | None:
|
|
225
|
+
rec = self._data["evidence"].get(evidence_id)
|
|
226
|
+
if not rec:
|
|
227
|
+
return None
|
|
228
|
+
return KBEvidence(**rec)
|
|
229
|
+
|
|
230
|
+
def link_entities(
|
|
231
|
+
self,
|
|
232
|
+
*,
|
|
233
|
+
subject_id: str,
|
|
234
|
+
predicate: str,
|
|
235
|
+
object_id: str,
|
|
236
|
+
evidence_id: str,
|
|
237
|
+
polarity: str = "support",
|
|
238
|
+
score: float = 0.5,
|
|
239
|
+
metadata: dict[str, Any] | None = None,
|
|
240
|
+
) -> KBRelation:
|
|
241
|
+
"""Create or update relation edge with evidence claim."""
|
|
242
|
+
relation_id = f"{subject_id}|{predicate}|{object_id}"
|
|
243
|
+
now = time.time()
|
|
244
|
+
claim = KBClaim(
|
|
245
|
+
evidence_id=evidence_id,
|
|
246
|
+
polarity=polarity if polarity in {"support", "contradict", "neutral"} else "neutral",
|
|
247
|
+
score=max(0.0, min(1.0, float(score))),
|
|
248
|
+
timestamp=now,
|
|
249
|
+
)
|
|
250
|
+
existing = self._data["relations"].get(relation_id)
|
|
251
|
+
if existing:
|
|
252
|
+
existing_claims = [
|
|
253
|
+
KBClaim(**c) if isinstance(c, dict) else c for c in existing.get("claims", [])
|
|
254
|
+
]
|
|
255
|
+
existing_claims.append(claim)
|
|
256
|
+
existing["claims"] = [asdict(c) for c in existing_claims]
|
|
257
|
+
if metadata:
|
|
258
|
+
existing_meta = existing.get("metadata", {})
|
|
259
|
+
existing_meta.update(metadata)
|
|
260
|
+
existing["metadata"] = existing_meta
|
|
261
|
+
existing["last_seen"] = now
|
|
262
|
+
self._data["relations"][relation_id] = existing
|
|
263
|
+
return KBRelation(
|
|
264
|
+
id=relation_id,
|
|
265
|
+
subject_id=existing["subject_id"],
|
|
266
|
+
predicate=existing["predicate"],
|
|
267
|
+
object_id=existing["object_id"],
|
|
268
|
+
claims=[KBClaim(**c) for c in existing["claims"]],
|
|
269
|
+
metadata=existing.get("metadata", {}),
|
|
270
|
+
first_seen=float(existing.get("first_seen", now)),
|
|
271
|
+
last_seen=float(existing.get("last_seen", now)),
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
rel = KBRelation(
|
|
275
|
+
id=relation_id,
|
|
276
|
+
subject_id=subject_id,
|
|
277
|
+
predicate=predicate,
|
|
278
|
+
object_id=object_id,
|
|
279
|
+
claims=[claim],
|
|
280
|
+
metadata=metadata or {},
|
|
281
|
+
first_seen=now,
|
|
282
|
+
last_seen=now,
|
|
283
|
+
)
|
|
284
|
+
self._data["relations"][relation_id] = {
|
|
285
|
+
"id": rel.id,
|
|
286
|
+
"subject_id": rel.subject_id,
|
|
287
|
+
"predicate": rel.predicate,
|
|
288
|
+
"object_id": rel.object_id,
|
|
289
|
+
"claims": [asdict(claim)],
|
|
290
|
+
"metadata": rel.metadata,
|
|
291
|
+
"first_seen": rel.first_seen,
|
|
292
|
+
"last_seen": rel.last_seen,
|
|
293
|
+
}
|
|
294
|
+
return rel
|
|
295
|
+
|
|
296
|
+
def search_entities(self, query: str, limit: int = 20) -> list[KBEntity]:
|
|
297
|
+
"""Simple text search by canonical name/synonyms."""
|
|
298
|
+
q = (query or "").strip().lower()
|
|
299
|
+
if not q:
|
|
300
|
+
return []
|
|
301
|
+
hits: list[tuple[float, KBEntity]] = []
|
|
302
|
+
terms = set(re.findall(r"[a-z0-9-]{2,}", q))
|
|
303
|
+
for rec in self._data["entities"].values():
|
|
304
|
+
entity = KBEntity(**rec)
|
|
305
|
+
haystack = " ".join([entity.name] + entity.synonyms).lower()
|
|
306
|
+
if q in haystack:
|
|
307
|
+
score = 1.0
|
|
308
|
+
else:
|
|
309
|
+
tokens = set(re.findall(r"[a-z0-9-]{2,}", haystack))
|
|
310
|
+
score = len(terms & tokens) / max(len(terms), 1)
|
|
311
|
+
if score <= 0:
|
|
312
|
+
continue
|
|
313
|
+
hits.append((score, entity))
|
|
314
|
+
hits.sort(key=lambda x: (x[0], x[1].last_seen), reverse=True)
|
|
315
|
+
return [h[1] for h in hits[: max(limit, 0)]]
|
|
316
|
+
|
|
317
|
+
def related_entities(
|
|
318
|
+
self,
|
|
319
|
+
entity_id: str,
|
|
320
|
+
*,
|
|
321
|
+
predicate: str | None = None,
|
|
322
|
+
limit: int = 20,
|
|
323
|
+
) -> list[dict[str, Any]]:
|
|
324
|
+
"""List entities connected via relations with aggregate claim stats."""
|
|
325
|
+
rows = []
|
|
326
|
+
for rel in self._data["relations"].values():
|
|
327
|
+
if rel.get("subject_id") != entity_id and rel.get("object_id") != entity_id:
|
|
328
|
+
continue
|
|
329
|
+
if predicate and rel.get("predicate") != predicate:
|
|
330
|
+
continue
|
|
331
|
+
claims = [KBClaim(**c) for c in rel.get("claims", [])]
|
|
332
|
+
support = sum(1 for c in claims if c.polarity == "support")
|
|
333
|
+
contradict = sum(1 for c in claims if c.polarity == "contradict")
|
|
334
|
+
neutral = sum(1 for c in claims if c.polarity == "neutral")
|
|
335
|
+
avg_score = sum(c.score for c in claims) / max(len(claims), 1)
|
|
336
|
+
other = rel["object_id"] if rel["subject_id"] == entity_id else rel["subject_id"]
|
|
337
|
+
rows.append(
|
|
338
|
+
{
|
|
339
|
+
"relation_id": rel["id"],
|
|
340
|
+
"predicate": rel["predicate"],
|
|
341
|
+
"other_entity_id": other,
|
|
342
|
+
"support_claims": support,
|
|
343
|
+
"contradict_claims": contradict,
|
|
344
|
+
"neutral_claims": neutral,
|
|
345
|
+
"claim_count": len(claims),
|
|
346
|
+
"average_claim_score": round(avg_score, 4),
|
|
347
|
+
"last_seen": float(rel.get("last_seen", 0)),
|
|
348
|
+
}
|
|
349
|
+
)
|
|
350
|
+
rows.sort(
|
|
351
|
+
key=lambda r: (r["support_claims"] - r["contradict_claims"], r["average_claim_score"], r["last_seen"]),
|
|
352
|
+
reverse=True,
|
|
353
|
+
)
|
|
354
|
+
return rows[: max(limit, 0)]
|
|
355
|
+
|
|
356
|
+
def list_relations(self) -> list[KBRelation]:
|
|
357
|
+
items = []
|
|
358
|
+
for rec in self._data["relations"].values():
|
|
359
|
+
items.append(
|
|
360
|
+
KBRelation(
|
|
361
|
+
id=rec["id"],
|
|
362
|
+
subject_id=rec["subject_id"],
|
|
363
|
+
predicate=rec["predicate"],
|
|
364
|
+
object_id=rec["object_id"],
|
|
365
|
+
claims=[KBClaim(**c) for c in rec.get("claims", [])],
|
|
366
|
+
metadata=rec.get("metadata", {}),
|
|
367
|
+
first_seen=float(rec.get("first_seen", 0)),
|
|
368
|
+
last_seen=float(rec.get("last_seen", 0)),
|
|
369
|
+
)
|
|
370
|
+
)
|
|
371
|
+
return items
|
|
372
|
+
|
|
373
|
+
def summary(self) -> dict[str, Any]:
|
|
374
|
+
"""High-level substrate stats."""
|
|
375
|
+
entity_types: dict[str, int] = {}
|
|
376
|
+
for rec in self._data["entities"].values():
|
|
377
|
+
et = rec.get("entity_type", "unknown")
|
|
378
|
+
entity_types[et] = entity_types.get(et, 0) + 1
|
|
379
|
+
return {
|
|
380
|
+
"path": str(self.path),
|
|
381
|
+
"schema_version": self._data.get("schema_version", self.SCHEMA_VERSION),
|
|
382
|
+
"n_entities": len(self._data["entities"]),
|
|
383
|
+
"n_relations": len(self._data["relations"]),
|
|
384
|
+
"n_evidence": len(self._data["evidence"]),
|
|
385
|
+
"entity_types": entity_types,
|
|
386
|
+
"updated_at": self._data.get("updated_at"),
|
|
387
|
+
}
|
ct/models/__init__.py
ADDED
|
File without changes
|