@geravant/sinain 1.11.0 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/package.json +1 -1
  2. package/sinain-core/package-lock.json +963 -0
  3. package/sinain-core/package.json +1 -0
  4. package/sinain-core/src/buffers/feed-buffer.ts +32 -0
  5. package/sinain-core/src/embedding/service.ts +66 -0
  6. package/sinain-core/src/escalation/escalator.ts +1 -0
  7. package/sinain-core/src/escalation/message-builder.ts +45 -118
  8. package/sinain-core/src/index.ts +19 -2
  9. package/sinain-core/src/learning/local-curation.ts +137 -7
  10. package/sinain-core/src/overlay/commands.ts +16 -3
  11. package/sinain-core/src/overlay/ws-handler.ts +4 -1
  12. package/sinain-core/src/server.ts +31 -0
  13. package/sinain-core/src/types.ts +3 -0
  14. package/sinain-memory/README.md +105 -0
  15. package/sinain-memory/__pycache__/common.cpython-312.pyc +0 -0
  16. package/sinain-memory/__pycache__/embed_client.cpython-312.pyc +0 -0
  17. package/sinain-memory/__pycache__/graph_query.cpython-312.pyc +0 -0
  18. package/sinain-memory/__pycache__/knowledge_integrator.cpython-312.pyc +0 -0
  19. package/sinain-memory/__pycache__/session_distiller.cpython-312.pyc +0 -0
  20. package/sinain-memory/__pycache__/triplestore.cpython-312.pyc +0 -0
  21. package/sinain-memory/embed_client.py +117 -0
  22. package/sinain-memory/eval/__pycache__/__init__.cpython-312.pyc +0 -0
  23. package/sinain-memory/eval/benchmarks/__init__.py +0 -0
  24. package/sinain-memory/eval/benchmarks/__pycache__/__init__.cpython-312.pyc +0 -0
  25. package/sinain-memory/eval/benchmarks/__pycache__/base_adapter.cpython-312.pyc +0 -0
  26. package/sinain-memory/eval/benchmarks/__pycache__/config.cpython-312.pyc +0 -0
  27. package/sinain-memory/eval/benchmarks/__pycache__/evaluate.cpython-312.pyc +0 -0
  28. package/sinain-memory/eval/benchmarks/__pycache__/ingest.cpython-312.pyc +0 -0
  29. package/sinain-memory/eval/benchmarks/__pycache__/longmemeval_adapter.cpython-312.pyc +0 -0
  30. package/sinain-memory/eval/benchmarks/__pycache__/meeting_adapter.cpython-312.pyc +0 -0
  31. package/sinain-memory/eval/benchmarks/__pycache__/meeting_runner.cpython-312.pyc +0 -0
  32. package/sinain-memory/eval/benchmarks/__pycache__/query.cpython-312.pyc +0 -0
  33. package/sinain-memory/eval/benchmarks/__pycache__/report.cpython-312.pyc +0 -0
  34. package/sinain-memory/eval/benchmarks/__pycache__/runner.cpython-312.pyc +0 -0
  35. package/sinain-memory/eval/benchmarks/base_adapter.py +43 -0
  36. package/sinain-memory/eval/benchmarks/config.py +23 -0
  37. package/sinain-memory/eval/benchmarks/evaluate.py +146 -0
  38. package/sinain-memory/eval/benchmarks/ingest.py +152 -0
  39. package/sinain-memory/eval/benchmarks/judges/__init__.py +0 -0
  40. package/sinain-memory/eval/benchmarks/judges/__pycache__/__init__.cpython-312.pyc +0 -0
  41. package/sinain-memory/eval/benchmarks/judges/__pycache__/qa_judge.cpython-312.pyc +0 -0
  42. package/sinain-memory/eval/benchmarks/judges/qa_judge.py +81 -0
  43. package/sinain-memory/eval/benchmarks/longmemeval_adapter.py +177 -0
  44. package/sinain-memory/eval/benchmarks/meeting_adapter.py +81 -0
  45. package/sinain-memory/eval/benchmarks/meeting_runner.py +230 -0
  46. package/sinain-memory/eval/benchmarks/query.py +193 -0
  47. package/sinain-memory/eval/benchmarks/report.py +87 -0
  48. package/sinain-memory/eval/benchmarks/run_meeting_bench.sh +318 -0
  49. package/sinain-memory/eval/benchmarks/runner.py +283 -0
  50. package/sinain-memory/graph_query.py +257 -15
  51. package/sinain-memory/knowledge_integrator.py +365 -72
  52. package/sinain-memory/koog-config.json +11 -0
  53. package/sinain-memory/memory-config.json +1 -1
  54. package/sinain-memory/session_distiller.py +43 -19
  55. package/sinain-memory/triplestore.py +60 -0
@@ -28,7 +28,7 @@ from common import (
28
28
 
29
29
  SYSTEM_PROMPT = """\
30
30
  You are a session distiller for a personal AI overlay system (sinain).
31
- Your job: analyze a session transcript and extract structured knowledge.
31
+ Your job: analyze a session transcript and extract ALL knowledge worth remembering.
32
32
 
33
33
  The transcript contains feed items from sinain-core:
34
34
  - audio: transcribed speech from the user's environment
@@ -37,24 +37,42 @@ The transcript contains feed items from sinain-core:
37
37
  - system: system events and status messages
38
38
 
39
39
  Extract:
40
- 1. whatHappened: 2-3 sentences summarizing what was accomplished in this session
41
- 2. patterns: up to 5 reusable patterns discovered (things that worked, techniques used)
42
- 3. antiPatterns: up to 3 things that failed and why
43
- 4. preferences: up to 3 user preferences or workflow habits observed
44
- 5. entities: key domains, tools, technologies, or topics worked with (for graph linking)
45
- 6. toolInsights: tool usage insights (e.g., "grep before read reduces misses")
46
-
47
- Focus on ACTIONABLE knowledge that would help a future agent in similar contexts.
48
- Skip trivial observations. If the session was idle or empty, say so briefly.
40
+ 1. whatHappened: 2-3 sentences summarizing what occurred in this session
41
+ 2. facts: up to 15 concrete factual claims. Each must be a self-contained sentence. \
42
+ IMPORTANT spread across these dimensions (do not let one theme dominate):
43
+ - WHO: people mentioned, their roles, backgrounds, relationships to each other
44
+ - WHAT: specific claims, properties, descriptions of things discussed
45
+ - HOW MUCH: any numbers, quantities, dates, durations, counts stated
46
+ - WHAT CHANGED: decisions made, agreements reached, state changes
47
+ - WHAT'S NEXT: commitments, action items, plans, deadlines
48
+ If you have 5+ facts about one dimension and 0 about another that was discussed, \
49
+ you are missing something. Breadth over depth.
50
+ Good: "The CTO of Al-Futaim previously worked at Citibank for 17 years as Director of IT in Singapore"
51
+ Good: "Citibank has 2400 IntelliJ subscriptions and heavy TeamCity usage"
52
+ Good: "The meeting is 45 minutes, scheduled for Tuesday"
53
+ Bad: "client-understanding-key: True"
54
+ Bad: five variations of "Al-Futaim is moving to the cloud"
55
+ 3. decisions: up to 5 decisions or agreements made (who decided what, with any deadline)
56
+ 4. entities: named things discussed or interacted with — as objects with name \
57
+ (lowercase-hyphenated slug) and type (freeform — person, org, tool, file, concept, \
58
+ service, framework, error, whatever fits the context).
59
+ Examples: {"name": "citibank", "type": "org"}, {"name": "auth-module", "type": "file"}, \
60
+ {"name": "react-native", "type": "framework"}
61
+ 5. patterns: up to 3 reusable techniques or workflows (if any — skip if none)
62
+ 6. preferences: up to 3 user preferences or habits observed
63
+
64
+ If existing entities are provided, reference them by name to enable reinforcement.
65
+ Focus on CONCRETE, SPECIFIC knowledge. Skip vague observations.
66
+ If the session was idle or empty, say so briefly.
49
67
 
50
68
  Respond with ONLY a JSON object:
51
69
  {
52
70
  "whatHappened": "string",
53
- "patterns": ["string", ...],
54
- "antiPatterns": ["string", ...],
55
- "preferences": ["string", ...],
56
- "entities": ["string", ...],
57
- "toolInsights": ["string", ...],
71
+ "facts": ["self-contained factual sentence", ...],
72
+ "decisions": ["decision sentence with who/what/when", ...],
73
+ "entities": [{"name": "citibank", "type": "org"}, {"name": "artom", "type": "person"}, ...],
74
+ "patterns": ["reusable technique or workflow", ...],
75
+ "preferences": ["user preference or habit", ...],
58
76
  "isEmpty": false
59
77
  }"""
60
78
 
@@ -95,6 +113,7 @@ def main() -> None:
95
113
  parser.add_argument("--memory-dir", required=True, help="Path to memory/ directory")
96
114
  parser.add_argument("--transcript", required=True, help="JSON array of feed items")
97
115
  parser.add_argument("--session-meta", default="{}", help="JSON session metadata")
116
+ parser.add_argument("--existing-entities", default="", help="Compact summary of existing knowledge graph entities")
98
117
  args = parser.parse_args()
99
118
 
100
119
  # Parse inputs
@@ -111,11 +130,11 @@ def main() -> None:
111
130
  if not items or len(items) < 2:
112
131
  output_json({
113
132
  "whatHappened": "Empty or trivial session",
133
+ "facts": [],
134
+ "decisions": [],
135
+ "entities": [],
114
136
  "patterns": [],
115
- "antiPatterns": [],
116
137
  "preferences": [],
117
- "entities": [],
118
- "toolInsights": [],
119
138
  "isEmpty": True,
120
139
  })
121
140
  return
@@ -130,11 +149,16 @@ def main() -> None:
130
149
  lines = [l for l in playbook.splitlines() if l.strip() and not l.startswith("<!--")]
131
150
  playbook_summary = f"\n\n## Current Playbook (for reference — don't repeat known patterns)\n{chr(10).join(lines[:30])}"
132
151
 
152
+ # Include existing entities for retrieve-before-extract (Mem0 pattern)
153
+ existing_section = ""
154
+ if args.existing_entities and args.existing_entities.strip():
155
+ existing_section = f"\n\n## Existing Knowledge (reinforce or update these if the session confirms/changes them)\n{args.existing_entities}"
156
+
133
157
  user_prompt = f"""## Session Transcript ({len(items)} items)
134
158
  {transcript_text}
135
159
 
136
160
  ## Session Metadata
137
- {json.dumps(meta, indent=2)}{playbook_summary}"""
161
+ {json.dumps(meta, indent=2)}{playbook_summary}{existing_section}"""
138
162
 
139
163
  try:
140
164
  raw = call_llm_with_fallback(
@@ -79,6 +79,40 @@ CREATE INDEX IF NOT EXISTS idx_avet
79
79
  ON triples(attribute, value, entity_id, tx_id);
80
80
  """
81
81
 
82
+ _FTS_SQL = """
83
+ -- Full-text search on fact values (for hybrid retrieval)
84
+ CREATE VIRTUAL TABLE IF NOT EXISTS triples_fts
85
+ USING fts5(entity_id, value, content=triples, content_rowid=id);
86
+
87
+ -- Triggers to keep FTS in sync with triples table
88
+ CREATE TRIGGER IF NOT EXISTS triples_ai AFTER INSERT ON triples BEGIN
89
+ INSERT INTO triples_fts(rowid, entity_id, value) VALUES (new.id, new.entity_id, new.value);
90
+ END;
91
+
92
+ CREATE TRIGGER IF NOT EXISTS triples_ad AFTER DELETE ON triples BEGIN
93
+ INSERT INTO triples_fts(triples_fts, rowid, entity_id, value) VALUES ('delete', old.id, old.entity_id, old.value);
94
+ END;
95
+
96
+ CREATE TRIGGER IF NOT EXISTS triples_au AFTER UPDATE ON triples BEGIN
97
+ INSERT INTO triples_fts(triples_fts, rowid, entity_id, value) VALUES ('delete', old.id, old.entity_id, old.value);
98
+ INSERT INTO triples_fts(rowid, entity_id, value) VALUES (new.id, new.entity_id, new.value);
99
+ END;
100
+ """
101
+
102
+ _TOUCHED_SQL = """
103
+ -- Track which entities are modified per transaction (for fast novelty checks)
104
+ CREATE TABLE IF NOT EXISTS touched_entities (
105
+ tx_id INTEGER NOT NULL,
106
+ entity_id TEXT NOT NULL,
107
+ PRIMARY KEY (tx_id, entity_id)
108
+ );
109
+
110
+ CREATE TRIGGER IF NOT EXISTS track_touched AFTER INSERT ON triples BEGIN
111
+ INSERT OR IGNORE INTO touched_entities (tx_id, entity_id)
112
+ VALUES (new.tx_id, new.entity_id);
113
+ END;
114
+ """
115
+
82
116
 
83
117
  def _now_iso() -> str:
84
118
  return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
@@ -119,6 +153,14 @@ class TripleStore:
119
153
  self._conn.execute("PRAGMA journal_mode=WAL")
120
154
  self._conn.execute("PRAGMA busy_timeout=10000")
121
155
  self._conn.executescript(_SCHEMA_SQL)
156
+ try:
157
+ self._conn.executescript(_FTS_SQL)
158
+ except sqlite3.OperationalError:
159
+ pass # FTS5 not available on this Python build — degrade gracefully
160
+ try:
161
+ self._conn.executescript(_TOUCHED_SQL)
162
+ except sqlite3.OperationalError:
163
+ pass
122
164
  self._migrate()
123
165
  self._conn.commit()
124
166
 
@@ -409,6 +451,24 @@ class TripleStore:
409
451
  ).fetchall()
410
452
  return [dict(r) for r in rows]
411
453
 
454
+ # ----- Touched entities (fast novelty check) -----
455
+
456
+ def was_touched(self, entity_id: str, since_tx: int) -> bool:
457
+ """Check if entity was modified since a given transaction. O(1) via index."""
458
+ row = self._conn.execute(
459
+ "SELECT 1 FROM touched_entities WHERE entity_id = ? AND tx_id > ? LIMIT 1",
460
+ (entity_id, since_tx),
461
+ ).fetchone()
462
+ return row is not None
463
+
464
+ def touched_entities_since(self, since_tx: int) -> list[str]:
465
+ """Return entity_ids modified since a transaction."""
466
+ rows = self._conn.execute(
467
+ "SELECT DISTINCT entity_id FROM touched_entities WHERE tx_id > ?",
468
+ (since_tx,),
469
+ ).fetchall()
470
+ return [r[0] for r in rows]
471
+
412
472
  # ----- Stats -----
413
473
 
414
474
  def stats(self) -> dict: