@geravant/sinain 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env python3
2
+ """Graph Query — entity-based lookup of knowledge graph facts.
3
+
4
+ Thin wrapper around triplestore.py for querying facts by entity/domain.
5
+ Used by sinain-core (via HTTP endpoint) and sinain-mcp-server (via subprocess).
6
+
7
+ Usage:
8
+ python3 graph_query.py --db memory/knowledge-graph.db \
9
+ --entities '["react-native", "metro-bundler"]' \
10
+ [--max-facts 5] [--format text|json]
11
+ """
12
+
13
+ import argparse
14
+ import json
15
+ import sys
16
+ from pathlib import Path
17
+
18
+
19
+ def query_facts_by_entities(
20
+ db_path: str,
21
+ entities: list[str],
22
+ max_facts: int = 5,
23
+ ) -> list[dict]:
24
+ """Query knowledge graph for facts matching keywords via tag index.
25
+
26
+ Uses auto-extracted 'tag' attributes for discovery. Results ranked by
27
+ number of matching tags (more matches = more relevant). Falls back to
28
+ domain/entity_id matching for untagged facts.
29
+ """
30
+ if not Path(db_path).exists():
31
+ return []
32
+
33
+ try:
34
+ from triplestore import TripleStore
35
+ store = TripleStore(db_path)
36
+
37
+ # Normalize keywords for tag matching
38
+ keywords = [e.lower().replace(" ", "-") for e in entities]
39
+ placeholders = ",".join(["?" for _ in keywords])
40
+
41
+ # Primary: tag-based ranked search (AVET index)
42
+ rows = store._conn.execute(
43
+ f"""SELECT entity_id, COUNT(*) as matches
44
+ FROM triples
45
+ WHERE attribute = 'tag' AND NOT retracted
46
+ AND value IN ({placeholders})
47
+ GROUP BY entity_id
48
+ ORDER BY matches DESC
49
+ LIMIT ?""",
50
+ (*keywords, max_facts * 3),
51
+ ).fetchall()
52
+
53
+ fact_ids = [r["entity_id"] for r in rows]
54
+
55
+ # Fallback: if tags found < max_facts, also search domain/entity_id (for untagged facts)
56
+ if len(fact_ids) < max_facts:
57
+ domain_placeholders = ",".join(["?" for _ in keywords])
58
+ like_clauses = " OR ".join([f"entity_id LIKE ?" for _ in keywords])
59
+ entity_likes = [f"fact:{kw}%" for kw in keywords]
60
+
61
+ fallback_rows = store._conn.execute(
62
+ f"""SELECT DISTINCT entity_id FROM triples
63
+ WHERE NOT retracted AND entity_id NOT IN ({','.join(['?' for _ in fact_ids]) or "''"})
64
+ AND (
65
+ (attribute = 'domain' AND value IN ({domain_placeholders}))
66
+ OR ({like_clauses})
67
+ )
68
+ LIMIT ?""",
69
+ (*fact_ids, *keywords, *entity_likes, max_facts - len(fact_ids)),
70
+ ).fetchall()
71
+ fact_ids.extend(r["entity_id"] for r in fallback_rows)
72
+
73
+ # Load full attributes for each fact
74
+ facts = []
75
+ for fid in fact_ids:
76
+ attrs = store.entity(fid)
77
+ if not attrs:
78
+ continue
79
+ fact = {"entityId": fid}
80
+ for attr_name, values in attrs.items():
81
+ if attr_name == "tag":
82
+ continue # Don't include tags in output (noise)
83
+ fact[attr_name] = values[0] if len(values) == 1 else values
84
+ facts.append(fact)
85
+
86
+ # Sort by confidence descending (tag ranking already done in SQL)
87
+ facts.sort(key=lambda f: float(f.get("confidence", "0")), reverse=True)
88
+ store.close()
89
+ return facts[:max_facts]
90
+ except Exception as e:
91
+ print(f"[warn] Graph query failed: {e}", file=sys.stderr)
92
+ return []
93
+
94
+
95
+ def query_top_facts(db_path: str, limit: int = 30) -> list[dict]:
96
+ """Query top-N facts by confidence for knowledge doc rendering."""
97
+ if not Path(db_path).exists():
98
+ return []
99
+
100
+ try:
101
+ from triplestore import TripleStore
102
+ store = TripleStore(db_path)
103
+
104
+ rows = store._conn.execute(
105
+ """SELECT entity_id, CAST(value AS REAL) as conf
106
+ FROM triples
107
+ WHERE attribute = 'confidence' AND NOT retracted
108
+ AND entity_id LIKE 'fact:%'
109
+ ORDER BY conf DESC
110
+ LIMIT ?""",
111
+ (limit,),
112
+ ).fetchall()
113
+
114
+ facts = []
115
+ for row in rows:
116
+ fid = row["entity_id"]
117
+ attrs = store.entity(fid)
118
+ if not attrs:
119
+ continue
120
+ fact = {"entityId": fid}
121
+ for attr_name, values in attrs.items():
122
+ fact[attr_name] = values[0] if len(values) == 1 else values
123
+ facts.append(fact)
124
+
125
+ store.close()
126
+ return facts
127
+ except Exception as e:
128
+ print(f"[warn] Graph top-facts query failed: {e}", file=sys.stderr)
129
+ return []
130
+
131
+
132
+ def format_facts_text(facts: list[dict], max_chars: int = 500) -> str:
133
+ """Format facts as human-readable text for escalation message injection."""
134
+ if not facts:
135
+ return ""
136
+
137
+ lines = []
138
+ total = 0
139
+ for f in facts:
140
+ value = f.get("value", "")
141
+ conf = f.get("confidence", "?")
142
+ count = f.get("reinforce_count", "1")
143
+ domain = f.get("domain", "")
144
+
145
+ line = f"- {value} (confidence: {conf}, confirmed {count}x)"
146
+ if domain:
147
+ line = f"- [{domain}] {value} (confidence: {conf}, confirmed {count}x)"
148
+
149
+ if total + len(line) > max_chars:
150
+ break
151
+ lines.append(line)
152
+ total += len(line)
153
+
154
+ return "\n".join(lines)
155
+
156
+
157
+ def domain_fact_counts(db_path: str) -> dict[str, int]:
158
+ """Count facts per domain for module emergence detection."""
159
+ if not Path(db_path).exists():
160
+ return {}
161
+
162
+ try:
163
+ from triplestore import TripleStore
164
+ store = TripleStore(db_path)
165
+
166
+ rows = store._conn.execute(
167
+ """SELECT value, COUNT(DISTINCT entity_id) as cnt
168
+ FROM triples
169
+ WHERE attribute = 'domain' AND NOT retracted
170
+ GROUP BY value
171
+ ORDER BY cnt DESC""",
172
+ ).fetchall()
173
+
174
+ store.close()
175
+ return {r["value"]: r["cnt"] for r in rows}
176
+ except Exception:
177
+ return {}
178
+
179
+
180
+ def main() -> None:
181
+ parser = argparse.ArgumentParser(description="Graph Query")
182
+ parser.add_argument("--db", required=True, help="Path to knowledge-graph.db")
183
+ parser.add_argument("--entities", default=None, help="JSON array of entity/domain names")
184
+ parser.add_argument("--top", type=int, default=None, help="Query top-N facts by confidence")
185
+ parser.add_argument("--domain-counts", action="store_true", help="Show fact counts per domain")
186
+ parser.add_argument("--max-facts", type=int, default=5, help="Maximum facts to return")
187
+ parser.add_argument("--format", choices=["text", "json"], default="json", help="Output format")
188
+ args = parser.parse_args()
189
+
190
+ if args.domain_counts:
191
+ counts = domain_fact_counts(args.db)
192
+ print(json.dumps(counts, indent=2))
193
+ return
194
+
195
+ if args.top is not None:
196
+ facts = query_top_facts(args.db, limit=args.top)
197
+ elif args.entities:
198
+ entities = json.loads(args.entities)
199
+ facts = query_facts_by_entities(args.db, entities, max_facts=args.max_facts)
200
+ else:
201
+ facts = query_top_facts(args.db, limit=args.max_facts)
202
+
203
+ if args.format == "text":
204
+ print(format_facts_text(facts))
205
+ else:
206
+ print(json.dumps({"facts": facts, "count": len(facts)}, indent=2, ensure_ascii=False))
207
+
208
+
209
+ if __name__ == "__main__":
210
+ main()