@pentatonic-ai/ai-agent-sdk 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/README.md +3 -3
  2. package/bin/cli.js +1 -1
  3. package/bin/commands/config.js +1 -1
  4. package/dist/index.cjs +1 -1
  5. package/dist/index.js +1 -1
  6. package/package.json +2 -2
  7. package/packages/doctor/src/checks/local-memory.js +2 -2
  8. package/packages/memory/README.md +2 -2
  9. package/packages/memory/openclaw-plugin/README.md +2 -2
  10. package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
  11. package/packages/memory/src/server.js +2 -2
  12. package/packages/memory-engine-v2/.env.example +30 -0
  13. package/packages/memory-engine-v2/README.md +125 -0
  14. package/packages/memory-engine-v2/compat/Dockerfile +11 -0
  15. package/packages/memory-engine-v2/compat/requirements.txt +6 -0
  16. package/packages/memory-engine-v2/compat/server.py +1047 -0
  17. package/packages/memory-engine-v2/docker-compose.aws.yml +78 -0
  18. package/packages/memory-engine-v2/docker-compose.yml +206 -0
  19. package/packages/memory-engine-v2/extractor-async/Dockerfile +14 -0
  20. package/packages/memory-engine-v2/extractor-async/confidence.py +62 -0
  21. package/packages/memory-engine-v2/extractor-async/noise_filter.py +144 -0
  22. package/packages/memory-engine-v2/extractor-async/requirements.txt +2 -0
  23. package/packages/memory-engine-v2/extractor-async/test_confidence.py +76 -0
  24. package/packages/memory-engine-v2/extractor-async/test_noise_filter.py +177 -0
  25. package/packages/memory-engine-v2/extractor-async/worker.py +797 -0
  26. package/packages/memory-engine-v2/extractor-sync/Dockerfile +11 -0
  27. package/packages/memory-engine-v2/extractor-sync/requirements.txt +4 -0
  28. package/packages/memory-engine-v2/extractor-sync/server.py +424 -0
  29. package/packages/memory-engine-v2/org-model/migrations/001_init.sql +390 -0
  30. package/packages/memory-engine-v2/tests/e2e_smoke.py +356 -0
  31. package/packages/memory-engine-v2/tests/fixtures/generate_synthetic_corpus.py +758 -0
  32. package/packages/memory-engine/.env.example +0 -13
  33. package/packages/memory-engine/MIGRATION.md +0 -219
  34. package/packages/memory-engine/README.md +0 -145
  35. package/packages/memory-engine/bench/README.md +0 -99
  36. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +0 -1115
  37. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +0 -819
  38. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +0 -1278
  39. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +0 -1018
  40. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +0 -1038
  41. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +0 -961
  42. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +0 -1115
  43. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +0 -819
  44. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +0 -1278
  45. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +0 -1018
  46. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +0 -1038
  47. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +0 -937
  48. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +0 -1115
  49. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +0 -819
  50. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +0 -1278
  51. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +0 -1018
  52. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +0 -1038
  53. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +0 -961
  54. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +0 -1115
  55. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +0 -819
  56. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +0 -1278
  57. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +0 -1018
  58. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +0 -1038
  59. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +0 -883
  60. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +0 -1115
  61. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +0 -819
  62. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +0 -1278
  63. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +0 -1018
  64. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +0 -1038
  65. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +0 -937
  66. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +0 -1115
  67. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +0 -1115
  68. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +0 -819
  69. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +0 -542
  70. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +0 -1278
  71. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +0 -894
  72. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +0 -1018
  73. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +0 -680
  74. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +0 -1038
  75. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +0 -693
  76. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +0 -961
  77. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +0 -727
  78. package/packages/memory-engine/compat/Dockerfile +0 -22
  79. package/packages/memory-engine/compat/server.py +0 -1255
  80. package/packages/memory-engine/docker-compose.test.yml +0 -59
  81. package/packages/memory-engine/docker-compose.yml +0 -255
  82. package/packages/memory-engine/engine/README.md +0 -52
  83. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +0 -1543
  84. package/packages/memory-engine/engine/l5-comms-layer.py +0 -663
  85. package/packages/memory-engine/engine/l6-document-store.py +0 -1018
  86. package/packages/memory-engine/engine/services/_shared/__init__.py +0 -1
  87. package/packages/memory-engine/engine/services/_shared/embed_provider.py +0 -562
  88. package/packages/memory-engine/engine/services/l2/Dockerfile +0 -50
  89. package/packages/memory-engine/engine/services/l2/init_databases.py +0 -81
  90. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +0 -2721
  91. package/packages/memory-engine/engine/services/l5/Dockerfile +0 -11
  92. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +0 -808
  93. package/packages/memory-engine/engine/services/l6/Dockerfile +0 -30
  94. package/packages/memory-engine/engine/services/l6/l6-document-store.py +0 -1221
  95. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +0 -28
  96. package/packages/memory-engine/engine/services/nv-embed/server.py +0 -152
  97. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  98. package/packages/memory-engine/pme_memory/__main__.py +0 -129
  99. package/packages/memory-engine/pme_memory/artifacts.py +0 -95
  100. package/packages/memory-engine/pme_memory/embed.py +0 -74
  101. package/packages/memory-engine/pme_memory/health.py +0 -36
  102. package/packages/memory-engine/pme_memory/hygiene.py +0 -159
  103. package/packages/memory-engine/pme_memory/indexer.py +0 -200
  104. package/packages/memory-engine/pme_memory/needs.py +0 -55
  105. package/packages/memory-engine/pme_memory/provenance.py +0 -80
  106. package/packages/memory-engine/pme_memory/scoring.py +0 -168
  107. package/packages/memory-engine/pme_memory/search.py +0 -52
  108. package/packages/memory-engine/pme_memory/store.py +0 -86
  109. package/packages/memory-engine/pme_memory/synthesis.py +0 -114
  110. package/packages/memory-engine/pyproject.toml +0 -65
  111. package/packages/memory-engine/scripts/kg-extractor.py +0 -557
  112. package/packages/memory-engine/scripts/kg-preflexor-v2.py +0 -738
  113. package/packages/memory-engine/scripts/wipe-legacy-l3-entities.py +0 -128
  114. package/packages/memory-engine/tests/e2e_arena.sh +0 -259
  115. package/packages/memory-engine/tests/embed_stub/Dockerfile +0 -13
  116. package/packages/memory-engine/tests/embed_stub/server.py +0 -80
  117. package/packages/memory-engine/tests/test_aggregate.py +0 -333
  118. package/packages/memory-engine/tests/test_api_contract.sh +0 -57
  119. package/packages/memory-engine/tests/test_arena_safety.py +0 -232
  120. package/packages/memory-engine/tests/test_channel_stat_reader.py +0 -437
  121. package/packages/memory-engine/tests/test_channel_stat_rollups.py +0 -308
  122. package/packages/memory-engine/tests/test_compat_nv_embed_probe.py +0 -48
  123. package/packages/memory-engine/tests/test_embed_provider.py +0 -693
  124. package/packages/memory-engine/tests/test_l2_qmd_vec_search.py +0 -280
  125. package/packages/memory-engine/tests/test_l3_arena_isolation.py +0 -412
  126. package/packages/memory-engine/tests/test_l6_module_load.py +0 -84
  127. package/packages/memory-engine/tests/test_people_list_reader.py +0 -432
@@ -0,0 +1,797 @@
1
+ """extractor-async — LLM distillation worker.
2
+
3
+ Polls org-model's distillation_queue, claims pending items, runs an
4
+ LLM extraction pass against each event's content, writes entities,
5
+ facts and relationships back, marks the queue item done.
6
+
7
+ Wire as of 2026-05-17: LLM_ENDPOINT points at the pentatonic-gateway
8
+ chat-completions endpoint (vLLM-served Qwen3-VL-30B-A3B-Instruct on
9
+ the GH200). Prompts ask for a strict JSON envelope; we parse, then
10
+ upsert entities first, then facts/relationships keyed on those
11
+ entity IDs.
12
+
13
+ Architecture notes:
14
+
15
+ - Single worker process per container; horizontal scaling = N
16
+ containers. Each container claims with its hostname, so
17
+ claim_expires_at lets a crashed container's items re-surface
18
+ after the TTL.
19
+ - Polling instead of LISTEN/NOTIFY: simpler, works with any
20
+ Postgres deployment, doesn't require keeping a long-lived
21
+ notification channel open.
22
+ - Idempotent: entities/facts/relationships are content-hash IDed,
23
+ so re-extraction of the same event converges to the same rows.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import asyncio
29
+ import hashlib
30
+ import json
31
+ import logging
32
+ import os
33
+ import re
34
+ import socket
35
+ import time
36
+ from typing import Any
37
+
38
+ import httpx
39
+ import psycopg
40
+ import psycopg.rows
41
+
42
+ from confidence import corroborated_confidence
43
+ from noise_filter import is_noise_entity_name
44
+
45
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
46
+ log = logging.getLogger("extractor-async")
47
+
48
+ PG_DSN = os.environ.get("PG_DSN", "postgresql://pme:local-dev-pw@org-model:5432/org_model")
49
+ LLM_ENDPOINT = os.environ.get("LLM_ENDPOINT", "")
50
+ LLM_API_KEY = os.environ.get("LLM_API_KEY", "")
51
+ LLM_MODEL = os.environ.get("LLM_MODEL", "Qwen/Qwen3-VL-30B-A3B-Instruct")
52
+ LLM_TIMEOUT_SEC = float(os.environ.get("LLM_TIMEOUT_SEC", "180"))
53
+ POLL_INTERVAL_SEC = float(os.environ.get("POLL_INTERVAL_SEC", "5"))
54
+ CLAIM_TTL_SEC = int(os.environ.get("CLAIM_TTL_SEC", "300"))
55
+ MAX_ATTEMPTS = int(os.environ.get("MAX_ATTEMPTS", "3"))
56
+ MAX_CONTENT_CHARS = int(os.environ.get("MAX_CONTENT_CHARS", "1200"))
57
+ # Multi-event batching. Each LLM call distills EVENTS_PER_LLM_CALL
58
+ # events in a single chat-completion request; CONCURRENT_LLM_CALLS such
59
+ # requests run in parallel against vLLM's continuous batcher. Together
60
+ # they cap how many events one worker has in-flight. The benchmarks on
61
+ # Qwen3-VL-30B-A3B on the GH200 (2026-05-18) showed batch=15 is the
62
+ # yield/latency sweet spot and concurrent=6 keeps max_num_seqs=32
63
+ # saturated without queueing inside vLLM.
64
+ EVENTS_PER_LLM_CALL = int(os.environ.get("EVENTS_PER_LLM_CALL", "15"))
65
+ CONCURRENT_LLM_CALLS = int(os.environ.get("CONCURRENT_LLM_CALLS", "6"))
66
+ BATCH_SIZE = int(
67
+ os.environ.get("BATCH_SIZE", str(EVENTS_PER_LLM_CALL * CONCURRENT_LLM_CALLS))
68
+ )
69
+ # KV-text output averages ~200 tokens per event (8 ENT, 6 FCT, 6 REL
70
+ # capped). 300 leaves margin for verbose entities and for the per-event
71
+ # header overhead. At EVENTS_PER_LLM_CALL=15 that's 4500 max_tokens —
72
+ # fits comfortably under vLLM's 16k max-model-len with ~7k of input.
73
+ LLM_MAX_TOKENS_PER_EVENT = int(os.environ.get("LLM_MAX_TOKENS_PER_EVENT", "300"))
74
+
75
+ WORKER_ID = f"{socket.gethostname()}:{os.getpid()}"
76
+
77
+
78
+ # KV-text output format constants. We dropped JSON output (and the
79
+ # `guided_json` schema enforcement that went with it) because a single
80
+ # invalid char inside a 13k-character JSON blob nukes the whole 10-event
81
+ # chunk via a JSONDecodeError. Pipe-delimited records, one per line,
82
+ # recover at line granularity — a malformed line skips itself, the rest
83
+ # of the chunk lands. See 2026-05-18 ops notes.
84
+ EVENT_HEADER_RE = re.compile(r"^===?\s*event\s+(\d+)\s*===?\s*$", re.IGNORECASE)
85
+ ALLOWED_ENT_TYPES = {
86
+ "person", "org", "product", "place", "project",
87
+ "concept", "topic", "date", "other",
88
+ }
89
+ ALLOWED_FCT_CATEGORIES = {
90
+ "decision", "commitment", "state", "mention",
91
+ "observation", "preference",
92
+ }
93
+
94
+
95
+ # --------------------------------------------------------------------
96
+ # LLM extraction prompt
97
+ # --------------------------------------------------------------------
98
+
99
+ # Pipe-delimited, line-oriented output. No JSON. Each record stands
100
+ # on its own line so a single malformed line skips itself instead of
101
+ # nuking the whole batch (as a broken char in a 13k-char JSON blob
102
+ # previously did). Prompt is deliberately repetitive about format —
103
+ # the model needs anchoring without a schema enforcer.
104
+ BATCH_SYSTEM_PROMPT = """You extract structured knowledge from N \
105
+ events for a personal-memory graph.
106
+
107
+ You will receive N events, each prefixed with `[event K]`. For EACH \
108
+ event, emit extractions in PIPE-DELIMITED TEXT (NOT JSON). Be \
109
+ conservative — only emit things explicitly stated.
110
+
111
+ OUTPUT FORMAT (exact, line-oriented):
112
+
113
+ === event 0 ===
114
+ ENT|<type>|<name>
115
+ ENT|<type>|<name>
116
+ FCT|<category>|<subject>|<predicate>|<object>|<statement>
117
+ REL|<from>|<to>|<rel_type>
118
+
119
+ === event 1 ===
120
+ ENT|...
121
+
122
+ RULES:
123
+ - One record per line. NO JSON. NO markdown. NO prose between records.
124
+ - Each event MUST start with a `=== event K ===` header (zero-indexed, \
125
+ matching the input index). NEVER skip an event — if an event has \
126
+ nothing to extract, emit ONLY the header.
127
+ - ENT lines have exactly 3 fields: literal `ENT`, type, name.
128
+ type ∈ {person, org, product, place, project, concept, topic, date, other}
129
+ - FCT lines have exactly 6 fields: `FCT`, category, subject, \
130
+ predicate, object, statement.
131
+ category ∈ {decision, commitment, state, mention, observation, preference}
132
+ subject MUST be an entity name declared in THIS event's ENT lines.
133
+ object MAY be an entity name OR a literal string OR `-` if absent.
134
+ statement ≤ 140 characters.
135
+ - REL lines have exactly 4 fields: `REL`, from, to, rel_type.
136
+ from and to MUST be entity names declared in THIS event's ENT lines.
137
+ rel_type is a short verb / preposition phrase.
138
+ - Pipes (`|`) inside values are FORBIDDEN — replace any `|` in source \
139
+ text with `/`. Newlines inside values are FORBIDDEN — replace with `; `.
140
+ - HARD CAPS per event: 8 ENT, 6 FCT, 6 REL. Pick the most salient.
141
+ - For code / technical content: extract only top-level services, \
142
+ modules, or domain concepts. NOT variables, types, or method names. \
143
+ A whole file is one entity, not twenty.
144
+ - Output ONLY the formatted records. No header, no footer, no prose."""
145
+
146
+
147
+ def build_event_block(idx: int, event: dict[str, Any]) -> str:
148
+ """Render one event as `[event K]\nheader\n---\ncontent` block."""
149
+ src = event.get("source_kind", "unknown")
150
+ content = (event.get("content") or "")[:MAX_CONTENT_CHARS]
151
+ attrs = event.get("attributes") or {}
152
+ when = attrs.get("emitted_at") or attrs.get("timestamp")
153
+ author = attrs.get("author") or attrs.get("user_id")
154
+ header = [f"[event {idx}]", f"source_kind: {src}"]
155
+ if when:
156
+ header.append(f"when: {when}")
157
+ if author:
158
+ header.append(f"author: {author}")
159
+ return "\n".join(header) + "\n---\n" + content
160
+
161
+
162
+ def _parse_kv_records(text: str, expected_n: int) -> list[dict[str, Any]]:
163
+ """Parse pipe-delimited KV output into per-event extraction dicts.
164
+
165
+ Format (per BATCH_SYSTEM_PROMPT):
166
+
167
+ === event 0 ===
168
+ ENT|person|Phil Hauser
169
+ FCT|mention|Phil Hauser|works at|Pentatonic|Phil works at Pentatonic
170
+ REL|Phil Hauser|Pentatonic|works_at
171
+
172
+ Lenient: blank lines, surrounding whitespace, missing events, and
173
+ individual malformed lines are all skipped without failing the
174
+ chunk. Always returns expected_n entries — events the model
175
+ omitted come back as empty extractions so the queue still drains.
176
+
177
+ Lines outside an event header are ignored (prose, fence text,
178
+ rogue summaries). Lines with wrong field counts are ignored.
179
+ Entity-type / fact-category outside the allowed set are still
180
+ accepted but lowercased; downstream upserts normalise them."""
181
+ results: list[dict[str, Any]] = [
182
+ {"entities": [], "facts": [], "relationships": []} for _ in range(expected_n)
183
+ ]
184
+ current: dict[str, Any] | None = None
185
+ for raw in text.splitlines():
186
+ line = raw.strip()
187
+ if not line:
188
+ continue
189
+ m = EVENT_HEADER_RE.match(line)
190
+ if m:
191
+ idx = int(m.group(1))
192
+ current = results[idx] if 0 <= idx < expected_n else None
193
+ continue
194
+ if current is None:
195
+ # Lines before the first header (model preamble) get dropped.
196
+ continue
197
+ # Strip a stray leading bullet or `-` the model might add.
198
+ if line.startswith(("- ", "* ")):
199
+ line = line[2:]
200
+ # maxsplit so statement / name fields can contain colons or
201
+ # other reserved-looking content without breaking parsing.
202
+ if line.startswith("ENT|"):
203
+ parts = line.split("|", 2)
204
+ if len(parts) == 3 and parts[2].strip():
205
+ current["entities"].append(
206
+ {"type": parts[1].strip().lower(), "name": parts[2].strip()}
207
+ )
208
+ elif line.startswith("FCT|"):
209
+ parts = line.split("|", 5)
210
+ if len(parts) == 6 and parts[5].strip():
211
+ obj = parts[4].strip()
212
+ current["facts"].append(
213
+ {
214
+ "category": parts[1].strip().lower(),
215
+ "subject": parts[2].strip(),
216
+ "predicate": parts[3].strip(),
217
+ "object": None if obj in ("", "-", "null", "None") else obj,
218
+ "statement": parts[5].strip(),
219
+ }
220
+ )
221
+ elif line.startswith("REL|"):
222
+ parts = line.split("|", 3)
223
+ if len(parts) == 4 and all(p.strip() for p in parts[1:]):
224
+ current["relationships"].append(
225
+ {
226
+ "from": parts[1].strip(),
227
+ "to": parts[2].strip(),
228
+ "type": parts[3].strip(),
229
+ }
230
+ )
231
+ # else: ignore unrecognised line (prose, malformed record).
232
+ return results
233
+
234
+
235
+ async def call_llm_batch(
236
+ client: httpx.AsyncClient, events: list[dict[str, Any]]
237
+ ) -> list[dict[str, Any]]:
238
+ """Send N events in a single chat-completion call, return the list
239
+ of per-event extraction dicts in input order. The model emits
240
+ pipe-delimited KV records (see BATCH_SYSTEM_PROMPT); the parser is
241
+ line-tolerant so a malformed record skips itself rather than
242
+ failing the chunk. Raises only on transport failure or completely
243
+ empty output."""
244
+ n = len(events)
245
+ if n == 0:
246
+ return []
247
+
248
+ headers = {"Content-Type": "application/json"}
249
+ if LLM_API_KEY:
250
+ # pentatonic-gateway uses X-API-Key; OpenAI-style endpoints
251
+ # use Authorization Bearer. Send both — the gateway ignores
252
+ # the one it doesn't care about.
253
+ headers["X-API-Key"] = LLM_API_KEY
254
+ headers["Authorization"] = f"Bearer {LLM_API_KEY}"
255
+
256
+ user_prompt = "\n\n---\n\n".join(
257
+ build_event_block(i, ev) for i, ev in enumerate(events)
258
+ )
259
+
260
+ body: dict[str, Any] = {
261
+ "model": LLM_MODEL,
262
+ "messages": [
263
+ {"role": "system", "content": BATCH_SYSTEM_PROMPT},
264
+ {"role": "user", "content": user_prompt},
265
+ ],
266
+ "temperature": 0.0,
267
+ "max_tokens": LLM_MAX_TOKENS_PER_EVENT * n,
268
+ # KV-text output — no guided_json / response_format. The
269
+ # benefit of structured-output enforcement was already
270
+ # half-ignored by VL upstream, and the parser now recovers
271
+ # from per-line drift so the schema enforcement isn't worth
272
+ # the JSON brittleness it brought.
273
+ }
274
+ r = await client.post(LLM_ENDPOINT, json=body, headers=headers)
275
+ r.raise_for_status()
276
+ data = r.json()
277
+ text = (data.get("choices") or [{}])[0].get("message", {}).get("content", "")
278
+ if not text:
279
+ text = data.get("message", {}).get("content", "")
280
+ if not text:
281
+ raise RuntimeError(f"llm returned no content: {json.dumps(data)[:300]}")
282
+ return _parse_kv_records(text, n)
283
+
284
+
285
+ # --------------------------------------------------------------------
286
+ # Upsert helpers (mirror extractor-sync's idempotent shape)
287
+ # --------------------------------------------------------------------
288
+
289
+
290
+ def _content_id(*parts: str) -> str:
291
+ return hashlib.sha256("\x1f".join(parts).encode()).hexdigest()[:32]
292
+
293
+
294
+ def upsert_entities(
295
+ conn: psycopg.Connection,
296
+ arena: str,
297
+ event_id: str,
298
+ participant_set: list[str],
299
+ disclosure_class: str,
300
+ entities: list[dict],
301
+ ) -> dict[str, str]:
302
+ """Insert (or merge) entities; return a name→id map so facts and
303
+ relationships can link to the inserted rows.
304
+
305
+ ID is sha256(arena:entity_type:canonical_name)[:32] so the same
306
+ entity in the same arena converges across events. Aliases and
307
+ provenance_event_ids array-append on conflict; never replace."""
308
+ name_to_id: dict[str, str] = {}
309
+ if not entities:
310
+ return name_to_id
311
+ with conn.cursor() as cur:
312
+ for e in entities:
313
+ etype = (e.get("type") or "other").lower()
314
+ name = (e.get("name") or "").strip()
315
+ if not name:
316
+ continue
317
+ # Drop junk names before they enter the graph. See
318
+ # noise_filter.py — patterns are anchored to live-arena
319
+ # noise (pronouns, hostnames, paths, agent-worktree
320
+ # labels). Skipping here means name_to_id never carries
321
+ # the bad name, so any fact/relationship the LLM tried to
322
+ # attach to it gets dropped downstream (subj/obj resolve
323
+ # to None ⇒ filtered out by upsert_facts /
324
+ # upsert_relationships).
325
+ if is_noise_entity_name(etype, name):
326
+ continue
327
+ aliases = [a for a in (e.get("aliases") or []) if a]
328
+ eid = _content_id(arena, etype, name)
329
+ name_to_id[name] = eid
330
+ cur.execute(
331
+ """
332
+ INSERT INTO entities (
333
+ id, arena, entity_type, canonical_name, aliases,
334
+ provenance_event_ids, participant_set, disclosure_class
335
+ ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s::disclosure_class)
336
+ ON CONFLICT (id) DO UPDATE SET
337
+ aliases = (
338
+ SELECT ARRAY(SELECT DISTINCT UNNEST(entities.aliases || EXCLUDED.aliases))
339
+ ),
340
+ provenance_event_ids = (
341
+ SELECT ARRAY(SELECT DISTINCT UNNEST(entities.provenance_event_ids || EXCLUDED.provenance_event_ids))
342
+ ),
343
+ last_seen = NOW()
344
+ """,
345
+ (
346
+ eid, arena, etype, name, aliases,
347
+ [event_id], participant_set, disclosure_class,
348
+ ),
349
+ )
350
+ return name_to_id
351
+
352
+
353
+ def upsert_facts(
354
+ conn: psycopg.Connection,
355
+ arena: str,
356
+ event_id: str,
357
+ participant_set: list[str],
358
+ disclosure_class: str,
359
+ facts: list[dict],
360
+ name_to_id: dict[str, str],
361
+ ) -> int:
362
+ """Facts are content-hashed on (arena, statement). Same statement
363
+ extracted from any event in the arena converges to the same row,
364
+ with `provenance_event_ids` accumulating the sources.
365
+
366
+ This is a forward-only change from the prior `(arena, event_id,
367
+ statement)` hash — historical rows extracted under the old key
368
+ will not back-merge with new extractions. Over a 30-day extraction
369
+ cycle the new rows dominate; until then the old + new shapes
370
+ co-exist.
371
+
372
+ Confidence promotes with corroboration on conflict: a statement
373
+ that appears in N events lands at `corroborated_confidence(N)`
374
+ (see confidence.py — caps at 0.9 to reserve [0.9, 1.0] for
375
+ `stage = 'verified'` which only a human can produce). Stage stays
376
+ `provisional`; corroboration is a signal, not a graduation.
377
+ """
378
+ if not facts:
379
+ return 0
380
+ inserted = 0
381
+ with conn.cursor() as cur:
382
+ for f in facts:
383
+ stmt = (f.get("statement") or "").strip()
384
+ if not stmt:
385
+ continue
386
+ subj_name = f.get("subject")
387
+ obj_name = f.get("object")
388
+ cur.execute(
389
+ """
390
+ INSERT INTO facts (
391
+ id, arena, category, subject_entity_id, predicate,
392
+ object_entity_id, statement, provenance_event_ids,
393
+ stage, confidence, participant_set, disclosure_class
394
+ ) VALUES (
395
+ %s, %s, %s, %s, %s, %s, %s, %s,
396
+ 'provisional'::extraction_stage, %s, %s, %s::disclosure_class
397
+ )
398
+ ON CONFLICT (id) DO UPDATE SET
399
+ provenance_event_ids = (
400
+ SELECT ARRAY(SELECT DISTINCT UNNEST(
401
+ facts.provenance_event_ids || EXCLUDED.provenance_event_ids
402
+ ))
403
+ ),
404
+ -- Confidence bumps with each additional independent
405
+ -- source. The cardinality of the merged provenance
406
+ -- array IS the corroboration count, so the formula
407
+ -- lives inline rather than round-tripping through
408
+ -- the worker. LEAST() guards the CHECK(<=1.0)
409
+ -- constraint defensively even though the helper's
410
+ -- cap is 0.9.
411
+ confidence = LEAST(
412
+ GREATEST(
413
+ facts.confidence,
414
+ 0.5 + 0.15 * (
415
+ cardinality(ARRAY(SELECT DISTINCT UNNEST(
416
+ facts.provenance_event_ids
417
+ || EXCLUDED.provenance_event_ids
418
+ ))) - 1
419
+ )
420
+ ),
421
+ 0.9
422
+ )
423
+ """,
424
+ (
425
+ _content_id(arena, stmt),
426
+ arena,
427
+ (f.get("category") or "observation").lower(),
428
+ name_to_id.get(subj_name),
429
+ f.get("predicate"),
430
+ name_to_id.get(obj_name), # may be None if object is a literal
431
+ stmt,
432
+ [event_id],
433
+ float(f.get("confidence") or corroborated_confidence(1)),
434
+ participant_set,
435
+ disclosure_class,
436
+ ),
437
+ )
438
+ inserted += 1
439
+ return inserted
440
+
441
+
442
+ def upsert_relationships(
443
+ conn: psycopg.Connection,
444
+ arena: str,
445
+ event_id: str,
446
+ participant_set: list[str],
447
+ disclosure_class: str,
448
+ relationships: list[dict],
449
+ name_to_id: dict[str, str],
450
+ ) -> int:
451
+ """Edge identity is (arena, from, to, type). ON CONFLICT bumps
452
+ weight + last_seen rather than duplicating."""
453
+ if not relationships:
454
+ return 0
455
+ inserted = 0
456
+ with conn.cursor() as cur:
457
+ for r in relationships:
458
+ from_id = name_to_id.get(r.get("from"))
459
+ to_id = name_to_id.get(r.get("to"))
460
+ rtype = (r.get("type") or "").strip()
461
+ if not from_id or not to_id or not rtype:
462
+ continue
463
+ rid = _content_id(arena, from_id, to_id, rtype)
464
+ cur.execute(
465
+ """
466
+ INSERT INTO relationships (
467
+ id, arena, from_entity_id, to_entity_id, relationship_type,
468
+ weight, provenance_event_ids, participant_set, disclosure_class
469
+ ) VALUES (
470
+ %s, %s, %s, %s, %s, %s, %s, %s, %s::disclosure_class
471
+ )
472
+ ON CONFLICT (id) DO UPDATE SET
473
+ weight = relationships.weight + EXCLUDED.weight,
474
+ provenance_event_ids = (
475
+ SELECT ARRAY(SELECT DISTINCT UNNEST(relationships.provenance_event_ids || EXCLUDED.provenance_event_ids))
476
+ ),
477
+ last_seen = NOW()
478
+ """,
479
+ (
480
+ rid, arena, from_id, to_id, rtype,
481
+ float(r.get("confidence") or 0.5),
482
+ [event_id], participant_set, disclosure_class,
483
+ ),
484
+ )
485
+ inserted += 1
486
+ return inserted
487
+
488
+
489
+ # --------------------------------------------------------------------
490
+ # Queue mechanics
491
+ # --------------------------------------------------------------------
492
+
493
+
494
+ # Distillation filters — applied at claim time so the worker never
495
+ # wastes an LLM call on noise we don't want in the graph. Tunable via
496
+ # env so we can revisit per-source value over time.
497
+ #
498
+ # Skip rules:
499
+ # - source attribute matches a known code-only ingest (pip-code-ingest
500
+ # and friends). Code chunks generate noisy entities — class names,
501
+ # file paths, variables — that pollute the graph and don't surface
502
+ # in human-memory queries.
503
+ # - received_at older than DISTILL_MAX_AGE_DAYS. Stale events have low
504
+ # facet value and burn LLM budget. Forward-only + 90-day window is
505
+ # the right default; old events stay vector-searchable.
506
+ SKIP_ATTRIBUTE_SOURCES = set(
507
+ s.strip()
508
+ for s in os.environ.get(
509
+ "DISTILL_SKIP_SOURCES", "pip-code-ingest"
510
+ ).split(",")
511
+ if s.strip()
512
+ )
513
+ DISTILL_MAX_AGE_DAYS = int(os.environ.get("DISTILL_MAX_AGE_DAYS", "90"))
514
+
515
+
516
+ def claim_next_batch(conn: psycopg.Connection) -> list[dict[str, Any]]:
517
+ """Atomically claim up to BATCH_SIZE pending items. SKIP LOCKED so
518
+ concurrent workers never race.
519
+
520
+ Filters at claim time:
521
+ - Events from skip-sources (attributes.source in SKIP_ATTRIBUTE_SOURCES)
522
+ are marked done with `filtered:<source>` rather than claimed.
523
+ - Events older than DISTILL_MAX_AGE_DAYS are similarly skipped.
524
+ Both pre-passes run BEFORE the claim so the worker never wastes an
525
+ LLM call on filtered events. They're cheap UPDATE statements scoped
526
+ to the current pending set."""
527
+ with conn.cursor() as cur:
528
+ # Pre-filter: skip-source events.
529
+ if SKIP_ATTRIBUTE_SOURCES:
530
+ cur.execute(
531
+ """
532
+ UPDATE distillation_queue dq SET
533
+ status = 'done',
534
+ completed_at = NOW(),
535
+ last_error = 'filtered: source=' || (e.attributes->>'source')
536
+ FROM events e
537
+ WHERE dq.event_id = e.id
538
+ AND dq.status = 'pending'
539
+ AND e.attributes->>'source' = ANY(%s)
540
+ """,
541
+ (list(SKIP_ATTRIBUTE_SOURCES),),
542
+ )
543
+ # Pre-filter: events older than the window.
544
+ cur.execute(
545
+ """
546
+ UPDATE distillation_queue dq SET
547
+ status = 'done',
548
+ completed_at = NOW(),
549
+ last_error = 'filtered: age>' || %s || 'd'
550
+ FROM events e
551
+ WHERE dq.event_id = e.id
552
+ AND dq.status = 'pending'
553
+ AND e.received_at < NOW() - (%s || ' days')::interval
554
+ """,
555
+ (DISTILL_MAX_AGE_DAYS, DISTILL_MAX_AGE_DAYS),
556
+ )
557
+
558
+ with conn.cursor(row_factory=psycopg.rows.dict_row) as cur:
559
+ cur.execute(
560
+ """
561
+ UPDATE distillation_queue SET
562
+ status = 'claimed',
563
+ claimed_by = %s,
564
+ claimed_at = NOW(),
565
+ claim_expires_at = NOW() + (%s || ' seconds')::interval,
566
+ attempts = attempts + 1
567
+ WHERE id IN (
568
+ SELECT id FROM distillation_queue
569
+ WHERE (
570
+ status = 'pending'
571
+ OR (status = 'claimed' AND claim_expires_at < NOW())
572
+ ) AND attempts < %s
573
+ ORDER BY id
574
+ FOR UPDATE SKIP LOCKED
575
+ LIMIT %s
576
+ )
577
+ RETURNING id, event_id, attempts
578
+ """,
579
+ (WORKER_ID, CLAIM_TTL_SEC, MAX_ATTEMPTS, BATCH_SIZE),
580
+ )
581
+ return cur.fetchall()
582
+
583
+
584
+ def fetch_event(conn: psycopg.Connection, event_id: str) -> dict[str, Any] | None:
585
+ with conn.cursor(row_factory=psycopg.rows.dict_row) as cur:
586
+ cur.execute(
587
+ "SELECT id, arena, source_kind, content, attributes, participant_set, "
588
+ "disclosure_class FROM events WHERE id = %s",
589
+ (event_id,),
590
+ )
591
+ return cur.fetchone()
592
+
593
+
594
+ def mark_done(conn: psycopg.Connection, queue_id: int) -> None:
595
+ with conn.cursor() as cur:
596
+ cur.execute(
597
+ "UPDATE distillation_queue SET status = 'done', completed_at = NOW() WHERE id = %s",
598
+ (queue_id,),
599
+ )
600
+
601
+
602
+ def mark_failed(conn: psycopg.Connection, queue_id: int, error: str) -> None:
603
+ with conn.cursor() as cur:
604
+ cur.execute(
605
+ "UPDATE distillation_queue SET status = 'failed', last_error = %s WHERE id = %s",
606
+ (error[:1024], queue_id),
607
+ )
608
+
609
+
610
+ def release_claim(conn: psycopg.Connection, queue_id: int, error: str) -> None:
611
+ with conn.cursor() as cur:
612
+ cur.execute(
613
+ """
614
+ UPDATE distillation_queue SET
615
+ status = 'pending',
616
+ claimed_by = NULL,
617
+ claimed_at = NULL,
618
+ claim_expires_at = NULL,
619
+ last_error = %s
620
+ WHERE id = %s
621
+ """,
622
+ (error[:1024], queue_id),
623
+ )
624
+
625
+
626
+ # --------------------------------------------------------------------
627
+ # Main loop
628
+ # --------------------------------------------------------------------
629
+
630
+
631
+ async def _extract_chunk(
632
+ http: httpx.AsyncClient,
633
+ chunk_items: list[dict[str, Any]],
634
+ chunk_events: list[dict[str, Any]],
635
+ stub_mode: bool,
636
+ ) -> tuple[list[dict[str, Any] | Exception], float]:
637
+ """Run one multi-event LLM call for a chunk. Returns (per_item_results,
638
+ llm_ms). Each per_item_result is either a per-event extraction dict or
639
+ an Exception. If the whole call fails, every item gets the same
640
+ Exception — caller releases all of them."""
641
+ if stub_mode:
642
+ return ([{"entities": [], "facts": [], "relationships": []}] * len(chunk_items)), 0.0
643
+ t0 = time.perf_counter()
644
+ try:
645
+ results = await call_llm_batch(http, chunk_events)
646
+ except Exception as exc:
647
+ llm_ms = (time.perf_counter() - t0) * 1000
648
+ return ([exc] * len(chunk_items)), llm_ms
649
+ llm_ms = (time.perf_counter() - t0) * 1000
650
+ # call_llm_batch guarantees len(results) == len(chunk_events); guard
651
+ # anyway so a server quirk doesn't crash the worker.
652
+ if len(results) != len(chunk_items):
653
+ err = RuntimeError(
654
+ f"chunk result count mismatch: got {len(results)}, expected {len(chunk_items)}"
655
+ )
656
+ return ([err] * len(chunk_items)), llm_ms
657
+ return list(results), llm_ms
658
+
659
+
660
+ async def process_batch(
661
+ http: httpx.AsyncClient,
662
+ conn: psycopg.Connection,
663
+ items: list[dict[str, Any]],
664
+ stub_mode: bool,
665
+ ) -> None:
666
+ """Process one claim with multi-event LLM batching. Items are split
667
+ into chunks of EVENTS_PER_LLM_CALL each, then CONCURRENT_LLM_CALLS
668
+ chunks run in parallel against vLLM's continuous batcher. DB writes
669
+ happen sequentially after the gather — keeping the psycopg conn
670
+ single-threaded sidesteps the lock dance a fully-async DB path
671
+ would need."""
672
+ # Pre-fetch all events from DB (sync, fast).
673
+ events_by_qid: dict[int, dict[str, Any] | None] = {}
674
+ for item in items:
675
+ events_by_qid[item["id"]] = fetch_event(conn, item["event_id"])
676
+
677
+ # Drop items whose event is missing (mark done up-front, no LLM call).
678
+ callable_items: list[dict[str, Any]] = []
679
+ for item in items:
680
+ if events_by_qid[item["id"]] is None:
681
+ log.warning(
682
+ f"event {item['event_id']} missing — marking queue {item['id']} done"
683
+ )
684
+ mark_done(conn, item["id"])
685
+ else:
686
+ callable_items.append(item)
687
+
688
+ if not callable_items:
689
+ return
690
+
691
+ # Build chunks of EVENTS_PER_LLM_CALL items each (last chunk may be
692
+ # short). Each chunk → one LLM call. Up to CONCURRENT_LLM_CALLS run
693
+ # concurrently; asyncio.gather queues the rest.
694
+ chunks: list[tuple[list[dict[str, Any]], list[dict[str, Any]]]] = []
695
+ for s in range(0, len(callable_items), EVENTS_PER_LLM_CALL):
696
+ chunk_items = callable_items[s : s + EVENTS_PER_LLM_CALL]
697
+ chunk_events = [events_by_qid[i["id"]] for i in chunk_items]
698
+ chunks.append((chunk_items, chunk_events))
699
+
700
+ # Bound parallelism with a semaphore — gather only waits for slots,
701
+ # not for everything to be ready at once.
702
+ sem = asyncio.Semaphore(CONCURRENT_LLM_CALLS)
703
+
704
+ async def run_one(chunk_items, chunk_events):
705
+ async with sem:
706
+ return await _extract_chunk(http, chunk_items, chunk_events, stub_mode)
707
+
708
+ coros = [run_one(ci, ce) for ci, ce in chunks]
709
+ chunk_outcomes = await asyncio.gather(*coros)
710
+
711
+ # Flatten chunk_outcomes back to per-item results, paired with items.
712
+ for (chunk_items, _chunk_events), (per_item, llm_ms) in zip(chunks, chunk_outcomes):
713
+ for item, result in zip(chunk_items, per_item):
714
+ queue_id = item["id"]
715
+ event_id = item["event_id"]
716
+ attempts = item["attempts"]
717
+ event = events_by_qid[queue_id]
718
+
719
+ if isinstance(result, Exception):
720
+ err = f"{type(result).__name__}: {result}"
721
+ log.warning(
722
+ f"extraction failed queue_id={queue_id} attempts={attempts}: {err}"
723
+ )
724
+ if attempts >= MAX_ATTEMPTS:
725
+ mark_failed(conn, queue_id, err)
726
+ else:
727
+ release_claim(conn, queue_id, err)
728
+ continue
729
+
730
+ ents = result.get("entities") or []
731
+ facts = result.get("facts") or []
732
+ rels = result.get("relationships") or []
733
+ arena = event["arena"]
734
+ participant_set = event.get("participant_set") or [arena]
735
+ disclosure = event.get("disclosure_class") or "private"
736
+
737
+ try:
738
+ name_to_id = upsert_entities(
739
+ conn, arena, event_id, participant_set, disclosure, ents
740
+ )
741
+ n_facts = upsert_facts(
742
+ conn, arena, event_id, participant_set, disclosure, facts, name_to_id,
743
+ )
744
+ n_rels = upsert_relationships(
745
+ conn, arena, event_id, participant_set, disclosure, rels, name_to_id,
746
+ )
747
+ mark_done(conn, queue_id)
748
+ log.info(
749
+ f"completed queue_id={queue_id} event_id={event_id} "
750
+ f"entities={len(name_to_id)} facts={n_facts} "
751
+ f"relationships={n_rels}"
752
+ + (f" llm_ms={llm_ms:.0f}/chunk" if not stub_mode else "")
753
+ )
754
+ except Exception as exc:
755
+ err = f"{type(exc).__name__}: {exc}"
756
+ log.warning(
757
+ f"db upsert failed queue_id={queue_id} attempts={attempts}: {err}"
758
+ )
759
+ if attempts >= MAX_ATTEMPTS:
760
+ mark_failed(conn, queue_id, err)
761
+ else:
762
+ release_claim(conn, queue_id, err)
763
+
764
+
765
+ async def amain():
766
+ log.info(
767
+ f"extractor-async starting (worker_id={WORKER_ID}, "
768
+ f"endpoint={LLM_ENDPOINT or '(stub)'}, model={LLM_MODEL}, "
769
+ f"poll={POLL_INTERVAL_SEC}s, claim={BATCH_SIZE}, "
770
+ f"events_per_call={EVENTS_PER_LLM_CALL}, "
771
+ f"concurrent_calls={CONCURRENT_LLM_CALLS})"
772
+ )
773
+ stub_mode = not LLM_ENDPOINT
774
+ if stub_mode:
775
+ log.warning("LLM_ENDPOINT not set — running in stub mode (no extraction).")
776
+
777
+ # Single async client across the loop. The httpx default limits
778
+ # (max_connections=100, max_keepalive=20) easily cover BATCH_SIZE
779
+ # concurrent inflight LLM calls.
780
+ async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SEC) as http:
781
+ while True:
782
+ try:
783
+ with psycopg.connect(PG_DSN, autocommit=True) as conn:
784
+ items = claim_next_batch(conn)
785
+ if not items:
786
+ await asyncio.sleep(POLL_INTERVAL_SEC)
787
+ continue
788
+
789
+ log.info(f"claimed {len(items)} item(s)")
790
+ await process_batch(http, conn, items, stub_mode)
791
+ except Exception as exc:
792
+ log.error(f"worker loop error: {exc}")
793
+ await asyncio.sleep(POLL_INTERVAL_SEC * 2)
794
+
795
+
796
+ if __name__ == "__main__":
797
+ asyncio.run(amain())