@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +170 -69
- package/bin/__tests__/callback-server.test.js +4 -1
- package/bin/cli.js +41 -164
- package/bin/commands/config.js +251 -0
- package/package.json +2 -1
- package/packages/doctor/__tests__/detect.test.js +2 -6
- package/packages/doctor/src/checks/local-memory.js +164 -196
- package/packages/doctor/src/detect.js +11 -3
- package/packages/memory/src/corpus/adapters.js +104 -0
- package/packages/memory/src/corpus/cli.js +72 -7
- package/packages/memory/src/corpus/index.js +1 -1
- package/packages/memory-engine/.env.example +13 -0
- package/packages/memory-engine/README.md +131 -0
- package/packages/memory-engine/bench/README.md +99 -0
- package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
- package/packages/memory-engine/compat/Dockerfile +11 -0
- package/packages/memory-engine/compat/server.py +680 -0
- package/packages/memory-engine/docker-compose.yml +243 -0
- package/packages/memory-engine/docs/MIGRATION.md +178 -0
- package/packages/memory-engine/docs/RUNBOOK-AWS.md +375 -0
- package/packages/memory-engine/docs/why-v05-underperforms.md +138 -0
- package/packages/memory-engine/engine/README.md +52 -0
- package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
- package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
- package/packages/memory-engine/engine/l6-document-store.py +1018 -0
- package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
- package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
- package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
- package/packages/memory-engine/engine/services/l4/server.py +235 -0
- package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +678 -0
- package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +1016 -0
- package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
- package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
- package/packages/memory-engine/pme_memory/__init__.py +0 -0
- package/packages/memory-engine/pme_memory/__main__.py +129 -0
- package/packages/memory-engine/pme_memory/artifacts.py +95 -0
- package/packages/memory-engine/pme_memory/embed.py +74 -0
- package/packages/memory-engine/pme_memory/health.py +36 -0
- package/packages/memory-engine/pme_memory/hygiene.py +159 -0
- package/packages/memory-engine/pme_memory/indexer.py +200 -0
- package/packages/memory-engine/pme_memory/needs.py +55 -0
- package/packages/memory-engine/pme_memory/provenance.py +80 -0
- package/packages/memory-engine/pme_memory/scoring.py +168 -0
- package/packages/memory-engine/pme_memory/search.py +52 -0
- package/packages/memory-engine/pme_memory/store.py +86 -0
- package/packages/memory-engine/pme_memory/synthesis.py +114 -0
- package/packages/memory-engine/pyproject.toml +65 -0
- package/packages/memory-engine/scripts/kg-extractor.py +557 -0
- package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
- package/packages/memory-engine/tests/test_api_contract.sh +57 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""
|
|
2
|
+
pme_memory.indexer — Index life data into Milvus collections
|
|
3
|
+
|
|
4
|
+
Scans workspace for chat transcripts (JSONL), email archives, people
|
|
5
|
+
profiles, contacts, and memory files. Chunks text and embeds via Ollama.
|
|
6
|
+
|
|
7
|
+
Collections:
|
|
8
|
+
chats: JSONL chat transcripts + markdown chat summaries
|
|
9
|
+
emails: Email archive markdown files
|
|
10
|
+
contacts: People profiles + contact records
|
|
11
|
+
memory: Daily notes, project docs, research (excludes evolution run logs)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import glob
|
|
15
|
+
import hashlib
|
|
16
|
+
import json
|
|
17
|
+
import os
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
from .embed import embed_texts, BATCH_SIZE
|
|
21
|
+
from .store import CommsStore
|
|
22
|
+
|
|
23
|
+
CHUNK_SIZE = 512
|
|
24
|
+
CHUNK_OVERLAP = 64
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def chunk_text(text: str, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP) -> list[str]:
|
|
28
|
+
"""Split text into overlapping chunks."""
|
|
29
|
+
if len(text) <= chunk_size:
|
|
30
|
+
return [text] if text.strip() else []
|
|
31
|
+
chunks = []
|
|
32
|
+
start = 0
|
|
33
|
+
while start < len(text):
|
|
34
|
+
end = start + chunk_size
|
|
35
|
+
chunk = text[start:end].strip()
|
|
36
|
+
if chunk:
|
|
37
|
+
chunks.append(chunk)
|
|
38
|
+
start = end - overlap
|
|
39
|
+
return chunks
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def text_id(text: str, source: str) -> str:
|
|
43
|
+
return hashlib.md5(f"{source}:{text[:200]}".encode()).hexdigest()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _upsert_batch(store, collection, batch):
|
|
47
|
+
"""Embed and upsert a batch of documents."""
|
|
48
|
+
if not batch:
|
|
49
|
+
return 0
|
|
50
|
+
vectors = embed_texts([d["text"] for d in batch])
|
|
51
|
+
for d, v in zip(batch, vectors):
|
|
52
|
+
d["vector"] = v
|
|
53
|
+
store.upsert(collection, batch)
|
|
54
|
+
return len(batch)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def index_chats(store: CommsStore, workspace: Path) -> int:
|
|
58
|
+
"""Index JSONL chat transcripts and markdown chat summaries."""
|
|
59
|
+
total = 0
|
|
60
|
+
chats_dir = workspace / "chats"
|
|
61
|
+
if not chats_dir.exists():
|
|
62
|
+
return 0
|
|
63
|
+
|
|
64
|
+
# JSONL files
|
|
65
|
+
for f in chats_dir.rglob("*.jsonl"):
|
|
66
|
+
try:
|
|
67
|
+
lines = f.read_text(errors="replace").strip().split("\n")
|
|
68
|
+
batch = []
|
|
69
|
+
for line in lines:
|
|
70
|
+
try:
|
|
71
|
+
msg = json.loads(line)
|
|
72
|
+
except json.JSONDecodeError:
|
|
73
|
+
continue
|
|
74
|
+
text = msg.get("text", "")
|
|
75
|
+
if not text or len(text) < 10:
|
|
76
|
+
continue
|
|
77
|
+
source = str(f.relative_to(workspace))
|
|
78
|
+
for chunk in chunk_text(text):
|
|
79
|
+
batch.append({
|
|
80
|
+
"id": text_id(chunk, source),
|
|
81
|
+
"text": chunk[:8000],
|
|
82
|
+
"source": source[:500],
|
|
83
|
+
"channel": str(msg.get("channel", "unknown"))[:60],
|
|
84
|
+
"contact": str(msg.get("contact", msg.get("sender", "")))[:250],
|
|
85
|
+
"timestamp": str(msg.get("timestamp", ""))[:30],
|
|
86
|
+
})
|
|
87
|
+
if len(batch) >= BATCH_SIZE:
|
|
88
|
+
total += _upsert_batch(store, "chats", batch)
|
|
89
|
+
batch = []
|
|
90
|
+
total += _upsert_batch(store, "chats", batch)
|
|
91
|
+
except Exception as e:
|
|
92
|
+
print(f" Error indexing {f}: {e}")
|
|
93
|
+
|
|
94
|
+
# Markdown chat summaries
|
|
95
|
+
for channel in ["telegram", "whatsapp", "imessage", "slack", "unknown"]:
|
|
96
|
+
chat_dir = workspace / "memory" / "chats" / channel
|
|
97
|
+
if not chat_dir.exists():
|
|
98
|
+
continue
|
|
99
|
+
for f in chat_dir.glob("*.md"):
|
|
100
|
+
try:
|
|
101
|
+
text = f.read_text(errors="replace")
|
|
102
|
+
if len(text) < 20:
|
|
103
|
+
continue
|
|
104
|
+
source = str(f.relative_to(workspace))
|
|
105
|
+
batch = [{"id": text_id(c, source), "text": c[:8000], "source": source[:500],
|
|
106
|
+
"channel": channel, "contact": f.stem[:250], "timestamp": ""}
|
|
107
|
+
for c in chunk_text(text)]
|
|
108
|
+
total += _upsert_batch(store, "chats", batch)
|
|
109
|
+
except Exception as e:
|
|
110
|
+
print(f" Error: {e}")
|
|
111
|
+
|
|
112
|
+
return total
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def index_emails(store: CommsStore, workspace: Path) -> int:
|
|
116
|
+
"""Index email archive markdown files."""
|
|
117
|
+
total = 0
|
|
118
|
+
emails_dir = workspace / "memory" / "chats" / "email"
|
|
119
|
+
if not emails_dir.exists():
|
|
120
|
+
return 0
|
|
121
|
+
for f in emails_dir.glob("*.md"):
|
|
122
|
+
try:
|
|
123
|
+
text = f.read_text(errors="replace")
|
|
124
|
+
if len(text) < 20:
|
|
125
|
+
continue
|
|
126
|
+
source = str(f.relative_to(workspace))
|
|
127
|
+
contact = f.stem.replace("_", " ")[:250]
|
|
128
|
+
batch = [{"id": text_id(c, source), "text": c[:8000], "source": source[:500],
|
|
129
|
+
"channel": "email", "contact": contact, "timestamp": ""}
|
|
130
|
+
for c in chunk_text(text)]
|
|
131
|
+
total += _upsert_batch(store, "emails", batch)
|
|
132
|
+
except Exception as e:
|
|
133
|
+
print(f" Error: {e}")
|
|
134
|
+
return total
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def index_contacts(store: CommsStore, workspace: Path) -> int:
|
|
138
|
+
"""Index people profiles and contact records."""
|
|
139
|
+
total = 0
|
|
140
|
+
for dir_path, channel in [(workspace / "memory" / "people", "profile"),
|
|
141
|
+
(workspace / "memory" / "contacts", "contacts")]:
|
|
142
|
+
if not dir_path.exists():
|
|
143
|
+
continue
|
|
144
|
+
for f in dir_path.glob("*"):
|
|
145
|
+
if not f.is_file():
|
|
146
|
+
continue
|
|
147
|
+
try:
|
|
148
|
+
text = f.read_text(errors="replace")
|
|
149
|
+
if len(text) < 20:
|
|
150
|
+
continue
|
|
151
|
+
source = str(f.relative_to(workspace))
|
|
152
|
+
batch = [{"id": text_id(c, source), "text": c[:8000], "source": source[:500],
|
|
153
|
+
"channel": channel, "contact": f.stem[:250], "timestamp": ""}
|
|
154
|
+
for c in chunk_text(text, chunk_size=1024 if channel == "contacts" else CHUNK_SIZE)]
|
|
155
|
+
total += _upsert_batch(store, "contacts", batch)
|
|
156
|
+
except Exception as e:
|
|
157
|
+
print(f" Error: {e}")
|
|
158
|
+
return total
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def index_memory(store: CommsStore, workspace: Path) -> int:
|
|
162
|
+
"""Index memory markdown files (excludes chats and evolution run logs)."""
|
|
163
|
+
total = 0
|
|
164
|
+
memory_dir = workspace / "memory"
|
|
165
|
+
skip_patterns = ["chats/", "evolution/loop-run-", "evolution/v3/runs/"]
|
|
166
|
+
|
|
167
|
+
for f in memory_dir.rglob("*.md"):
|
|
168
|
+
source = str(f.relative_to(workspace))
|
|
169
|
+
if any(p in source for p in skip_patterns):
|
|
170
|
+
continue
|
|
171
|
+
try:
|
|
172
|
+
text = f.read_text(errors="replace")
|
|
173
|
+
if len(text) < 30:
|
|
174
|
+
continue
|
|
175
|
+
batch = [{"id": text_id(c, source), "text": c[:8000], "source": source[:500],
|
|
176
|
+
"channel": "memory", "contact": "", "timestamp": ""}
|
|
177
|
+
for c in chunk_text(text)]
|
|
178
|
+
total += _upsert_batch(store, "memory", batch)
|
|
179
|
+
except Exception as e:
|
|
180
|
+
print(f" Error: {e}")
|
|
181
|
+
return total
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def index_all(store: CommsStore, workspace: Path, targets=None) -> dict:
|
|
185
|
+
"""Index specified targets (or all). Returns counts per collection."""
|
|
186
|
+
if targets is None:
|
|
187
|
+
targets = ["chats", "emails", "contacts", "memory"]
|
|
188
|
+
counts = {}
|
|
189
|
+
indexers = {
|
|
190
|
+
"chats": index_chats,
|
|
191
|
+
"emails": index_emails,
|
|
192
|
+
"contacts": index_contacts,
|
|
193
|
+
"memory": index_memory,
|
|
194
|
+
}
|
|
195
|
+
for target in targets:
|
|
196
|
+
if target in indexers:
|
|
197
|
+
print(f"Indexing {target}...")
|
|
198
|
+
counts[target] = indexers[target](store, workspace)
|
|
199
|
+
print(f" {counts[target]:,} chunks")
|
|
200
|
+
return counts
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, List
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _utc_now() -> str:
|
|
11
|
+
return datetime.now(timezone.utc).isoformat()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class NeedSignal:
|
|
16
|
+
need_type: str
|
|
17
|
+
produced_by_artifact: str
|
|
18
|
+
producer: str
|
|
19
|
+
priority_hint: str = "normal"
|
|
20
|
+
topic: str | None = None
|
|
21
|
+
created_at: str = field(default_factory=_utc_now)
|
|
22
|
+
|
|
23
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
24
|
+
return {
|
|
25
|
+
"need_type": self.need_type,
|
|
26
|
+
"produced_by_artifact": self.produced_by_artifact,
|
|
27
|
+
"producer": self.producer,
|
|
28
|
+
"priority_hint": self.priority_hint,
|
|
29
|
+
"topic": self.topic,
|
|
30
|
+
"created_at": self.created_at,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class NeedIndex:
|
|
35
|
+
"""Append-only machine-readable unresolved needs index (JSONL)."""
|
|
36
|
+
|
|
37
|
+
def __init__(self, path: str | Path):
|
|
38
|
+
self.path = Path(path)
|
|
39
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
40
|
+
if not self.path.exists():
|
|
41
|
+
self.path.touch()
|
|
42
|
+
|
|
43
|
+
def publish(self, signal: NeedSignal) -> None:
|
|
44
|
+
with self.path.open("a", encoding="utf-8") as f:
|
|
45
|
+
f.write(json.dumps(signal.to_dict(), sort_keys=True) + "\n")
|
|
46
|
+
|
|
47
|
+
def latest(self, limit: int = 50) -> List[Dict[str, Any]]:
|
|
48
|
+
lines = self.path.read_text(encoding="utf-8").splitlines()[-limit:]
|
|
49
|
+
out: List[Dict[str, Any]] = []
|
|
50
|
+
for line in lines:
|
|
51
|
+
try:
|
|
52
|
+
out.append(json.loads(line))
|
|
53
|
+
except json.JSONDecodeError:
|
|
54
|
+
continue
|
|
55
|
+
return out
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""
|
|
2
|
+
pme_memory.provenance — Provenance-first output rendering
|
|
3
|
+
|
|
4
|
+
Renders artifact lineage chains for human-readable output.
|
|
5
|
+
Given an artifact, walks parent pointers to build a full
|
|
6
|
+
provenance trail with sources, tools, and timestamps.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load_artifact_index(store_path: str | Path) -> Dict[str, Dict[str, Any]]:
|
|
17
|
+
"""Load all artifacts into a dict keyed by artifact_id."""
|
|
18
|
+
store_path = Path(store_path)
|
|
19
|
+
index: Dict[str, Dict[str, Any]] = {}
|
|
20
|
+
if not store_path.exists():
|
|
21
|
+
return index
|
|
22
|
+
for line in store_path.read_text(encoding="utf-8").splitlines():
|
|
23
|
+
try:
|
|
24
|
+
art = json.loads(line)
|
|
25
|
+
index[art["artifact_id"]] = art
|
|
26
|
+
except (json.JSONDecodeError, KeyError):
|
|
27
|
+
continue
|
|
28
|
+
return index
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def trace_lineage(
|
|
32
|
+
artifact_id: str,
|
|
33
|
+
index: Dict[str, Dict[str, Any]],
|
|
34
|
+
max_depth: int = 20,
|
|
35
|
+
) -> List[Dict[str, Any]]:
|
|
36
|
+
"""Walk parent pointers and return lineage chain (newest first)."""
|
|
37
|
+
chain: List[Dict[str, Any]] = []
|
|
38
|
+
visited: set = set()
|
|
39
|
+
queue = [artifact_id]
|
|
40
|
+
|
|
41
|
+
while queue and len(chain) < max_depth:
|
|
42
|
+
aid = queue.pop(0)
|
|
43
|
+
if aid in visited:
|
|
44
|
+
continue
|
|
45
|
+
visited.add(aid)
|
|
46
|
+
art = index.get(aid)
|
|
47
|
+
if not art:
|
|
48
|
+
chain.append({"artifact_id": aid, "status": "missing"})
|
|
49
|
+
continue
|
|
50
|
+
chain.append(art)
|
|
51
|
+
for pid in art.get("parents", []):
|
|
52
|
+
if pid not in visited:
|
|
53
|
+
queue.append(pid)
|
|
54
|
+
|
|
55
|
+
return chain
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def render_lineage_text(
|
|
59
|
+
artifact_id: str,
|
|
60
|
+
index: Dict[str, Dict[str, Any]],
|
|
61
|
+
max_depth: int = 20,
|
|
62
|
+
) -> str:
|
|
63
|
+
"""Render a human-readable provenance chain."""
|
|
64
|
+
chain = trace_lineage(artifact_id, index, max_depth)
|
|
65
|
+
if not chain:
|
|
66
|
+
return f"No lineage found for {artifact_id}"
|
|
67
|
+
|
|
68
|
+
lines = [f"Provenance for {artifact_id[:12]}...\n"]
|
|
69
|
+
for i, art in enumerate(chain):
|
|
70
|
+
prefix = " " * i + ("└─ " if i > 0 else "")
|
|
71
|
+
if art.get("status") == "missing":
|
|
72
|
+
lines.append(f"{prefix}[missing] {art['artifact_id'][:12]}...")
|
|
73
|
+
else:
|
|
74
|
+
tool = art.get("source_tool", "?")
|
|
75
|
+
atype = art.get("artifact_type", "?")
|
|
76
|
+
ts = art.get("created_at", "?")[:19]
|
|
77
|
+
aid = art["artifact_id"][:12]
|
|
78
|
+
lines.append(f"{prefix}{aid}... | {atype} | tool={tool} | {ts}")
|
|
79
|
+
|
|
80
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""
|
|
2
|
+
pme_memory.scoring — Pressure Scoring for Need Signals
|
|
3
|
+
|
|
4
|
+
Ranks unresolved needs by four dimensions:
|
|
5
|
+
- recency: how fresh the need is (exponential decay)
|
|
6
|
+
- novelty: inverse frequency of this need_type in the index
|
|
7
|
+
- centrality: how many artifacts reference the producing artifact
|
|
8
|
+
- priority: explicit priority_hint weight (critical > high > normal > low)
|
|
9
|
+
|
|
10
|
+
Output: sorted list of needs with composite pressure score (0-1).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import math
|
|
17
|
+
from collections import Counter
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any, Dict, List, Optional
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# --- Weight configuration (tunable) ---
|
|
25
|
+
WEIGHT_RECENCY = 0.30
|
|
26
|
+
WEIGHT_NOVELTY = 0.25
|
|
27
|
+
WEIGHT_CENTRALITY = 0.25
|
|
28
|
+
WEIGHT_PRIORITY = 0.20
|
|
29
|
+
|
|
30
|
+
PRIORITY_SCORES = {
|
|
31
|
+
"critical": 1.0,
|
|
32
|
+
"high": 0.75,
|
|
33
|
+
"normal": 0.5,
|
|
34
|
+
"low": 0.25,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# Recency half-life in hours (need loses half its recency score after this)
|
|
38
|
+
RECENCY_HALF_LIFE_H = 12.0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _parse_iso(ts: str) -> datetime:
|
|
42
|
+
return datetime.fromisoformat(ts)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _recency_score(created_at: str, now: datetime) -> float:
|
|
46
|
+
"""Exponential decay: score = 2^(-age_hours / half_life)."""
|
|
47
|
+
try:
|
|
48
|
+
age = (now - _parse_iso(created_at)).total_seconds() / 3600.0
|
|
49
|
+
except (ValueError, TypeError):
|
|
50
|
+
return 0.0
|
|
51
|
+
return math.pow(2, -age / RECENCY_HALF_LIFE_H)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _novelty_scores(needs: List[Dict[str, Any]]) -> Dict[str, float]:
|
|
55
|
+
"""Inverse frequency: rarer need_types score higher."""
|
|
56
|
+
counts = Counter(n.get("need_type", "") for n in needs)
|
|
57
|
+
total = len(needs) or 1
|
|
58
|
+
return {
|
|
59
|
+
nt: 1.0 - (count / total)
|
|
60
|
+
for nt, count in counts.items()
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _centrality_map(artifact_store_path: Path) -> Dict[str, int]:
|
|
65
|
+
"""Count how many artifacts reference each artifact_id as a parent."""
|
|
66
|
+
refs: Dict[str, int] = {}
|
|
67
|
+
if not artifact_store_path.exists():
|
|
68
|
+
return refs
|
|
69
|
+
for line in artifact_store_path.read_text(encoding="utf-8").splitlines():
|
|
70
|
+
try:
|
|
71
|
+
art = json.loads(line)
|
|
72
|
+
except json.JSONDecodeError:
|
|
73
|
+
continue
|
|
74
|
+
for pid in art.get("parents", []):
|
|
75
|
+
refs[pid] = refs.get(pid, 0) + 1
|
|
76
|
+
return refs
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class ScoredNeed:
|
|
81
|
+
need_type: str
|
|
82
|
+
produced_by_artifact: str
|
|
83
|
+
producer: str
|
|
84
|
+
topic: Optional[str]
|
|
85
|
+
created_at: str
|
|
86
|
+
priority_hint: str
|
|
87
|
+
recency: float
|
|
88
|
+
novelty: float
|
|
89
|
+
centrality: float
|
|
90
|
+
priority: float
|
|
91
|
+
pressure: float
|
|
92
|
+
|
|
93
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
94
|
+
return {
|
|
95
|
+
"need_type": self.need_type,
|
|
96
|
+
"produced_by_artifact": self.produced_by_artifact,
|
|
97
|
+
"producer": self.producer,
|
|
98
|
+
"topic": self.topic,
|
|
99
|
+
"created_at": self.created_at,
|
|
100
|
+
"priority_hint": self.priority_hint,
|
|
101
|
+
"scores": {
|
|
102
|
+
"recency": round(self.recency, 4),
|
|
103
|
+
"novelty": round(self.novelty, 4),
|
|
104
|
+
"centrality": round(self.centrality, 4),
|
|
105
|
+
"priority": round(self.priority, 4),
|
|
106
|
+
},
|
|
107
|
+
"pressure": round(self.pressure, 4),
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def rank_needs(
|
|
112
|
+
needs_path: str | Path,
|
|
113
|
+
artifact_store_path: str | Path,
|
|
114
|
+
limit: int = 50,
|
|
115
|
+
) -> List[ScoredNeed]:
|
|
116
|
+
"""Score and rank unresolved needs by composite pressure."""
|
|
117
|
+
needs_path = Path(needs_path)
|
|
118
|
+
artifact_store_path = Path(artifact_store_path)
|
|
119
|
+
|
|
120
|
+
if not needs_path.exists():
|
|
121
|
+
return []
|
|
122
|
+
|
|
123
|
+
raw = []
|
|
124
|
+
for line in needs_path.read_text(encoding="utf-8").splitlines():
|
|
125
|
+
try:
|
|
126
|
+
raw.append(json.loads(line))
|
|
127
|
+
except json.JSONDecodeError:
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
if not raw:
|
|
131
|
+
return []
|
|
132
|
+
|
|
133
|
+
now = datetime.now(timezone.utc)
|
|
134
|
+
novelty_map = _novelty_scores(raw)
|
|
135
|
+
centrality_map = _centrality_map(artifact_store_path)
|
|
136
|
+
max_centrality = max(centrality_map.values()) if centrality_map else 1
|
|
137
|
+
|
|
138
|
+
scored: List[ScoredNeed] = []
|
|
139
|
+
for n in raw[-limit:]:
|
|
140
|
+
rec = _recency_score(n.get("created_at", ""), now)
|
|
141
|
+
nov = novelty_map.get(n.get("need_type", ""), 0.0)
|
|
142
|
+
art_id = n.get("produced_by_artifact", "")
|
|
143
|
+
cen = (centrality_map.get(art_id, 0) / max_centrality) if max_centrality else 0.0
|
|
144
|
+
pri = PRIORITY_SCORES.get(n.get("priority_hint", "normal"), 0.5)
|
|
145
|
+
|
|
146
|
+
pressure = (
|
|
147
|
+
WEIGHT_RECENCY * rec
|
|
148
|
+
+ WEIGHT_NOVELTY * nov
|
|
149
|
+
+ WEIGHT_CENTRALITY * cen
|
|
150
|
+
+ WEIGHT_PRIORITY * pri
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
scored.append(ScoredNeed(
|
|
154
|
+
need_type=n.get("need_type", ""),
|
|
155
|
+
produced_by_artifact=art_id,
|
|
156
|
+
producer=n.get("producer", ""),
|
|
157
|
+
topic=n.get("topic"),
|
|
158
|
+
created_at=n.get("created_at", ""),
|
|
159
|
+
priority_hint=n.get("priority_hint", "normal"),
|
|
160
|
+
recency=rec,
|
|
161
|
+
novelty=nov,
|
|
162
|
+
centrality=cen,
|
|
163
|
+
priority=pri,
|
|
164
|
+
pressure=pressure,
|
|
165
|
+
))
|
|
166
|
+
|
|
167
|
+
scored.sort(key=lambda s: s.pressure, reverse=True)
|
|
168
|
+
return scored
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""
|
|
2
|
+
pme_memory.search — Semantic search across communications collections
|
|
3
|
+
|
|
4
|
+
Searches across chats, emails, contacts, and memory using vector similarity.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .embed import embed_query
|
|
8
|
+
from .store import CommsStore, COLLECTIONS
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def search(query: str, store: CommsStore = None, collection: str = None,
|
|
12
|
+
limit: int = 10) -> list[dict]:
|
|
13
|
+
"""Search across all collections (or a specific one).
|
|
14
|
+
|
|
15
|
+
Returns list of dicts with: collection, score, text, source, channel, contact, timestamp
|
|
16
|
+
"""
|
|
17
|
+
if store is None:
|
|
18
|
+
store = CommsStore()
|
|
19
|
+
|
|
20
|
+
vector = embed_query(query)
|
|
21
|
+
if all(v == 0.0 for v in vector):
|
|
22
|
+
return []
|
|
23
|
+
|
|
24
|
+
collections = [collection] if collection else COLLECTIONS
|
|
25
|
+
all_results = []
|
|
26
|
+
|
|
27
|
+
for coll in collections:
|
|
28
|
+
try:
|
|
29
|
+
results = store.search(coll, vector, limit=limit)
|
|
30
|
+
for hits in results:
|
|
31
|
+
for hit in hits:
|
|
32
|
+
entity = hit.get("entity", {})
|
|
33
|
+
all_results.append({
|
|
34
|
+
"collection": coll,
|
|
35
|
+
"score": round(hit.get("distance", 0), 4),
|
|
36
|
+
"text": entity.get("text", ""),
|
|
37
|
+
"source": entity.get("source", ""),
|
|
38
|
+
"channel": entity.get("channel", ""),
|
|
39
|
+
"contact": entity.get("contact", ""),
|
|
40
|
+
"timestamp": entity.get("timestamp", ""),
|
|
41
|
+
})
|
|
42
|
+
except Exception as e:
|
|
43
|
+
print(f" Search error in {coll}: {e}")
|
|
44
|
+
|
|
45
|
+
all_results.sort(key=lambda x: x["score"], reverse=True)
|
|
46
|
+
return all_results[:limit]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def search_collection(query: str, collection: str, store: CommsStore = None,
|
|
50
|
+
limit: int = 10) -> list[dict]:
|
|
51
|
+
"""Search a single collection."""
|
|
52
|
+
return search(query, store=store, collection=collection, limit=limit)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
pme_memory.store — Milvus connection and collection management
|
|
3
|
+
|
|
4
|
+
Supports Milvus Lite (local .db file) and full Milvus server.
|
|
5
|
+
Collections: chats, emails, contacts, memory.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from pymilvus import MilvusClient, DataType
|
|
11
|
+
|
|
12
|
+
COLLECTIONS = ["chats", "emails", "contacts", "memory"]
|
|
13
|
+
EMBED_DIM = int(os.environ.get("PME_EMBED_DIM", "4096"))
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _default_db_path():
|
|
17
|
+
pme_dir = os.environ.get("PME_DIR", os.path.expanduser("~/pentatonic"))
|
|
18
|
+
db_dir = Path(pme_dir) / "memory" / "l5"
|
|
19
|
+
db_dir.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
return str(db_dir / "comms.db")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CommsStore:
|
|
24
|
+
"""Manages Milvus collections for the communications layer."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, uri=None):
|
|
27
|
+
self.uri = uri or os.environ.get("MILVUS_URI", _default_db_path())
|
|
28
|
+
self._client = None
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def client(self):
|
|
32
|
+
if self._client is None:
|
|
33
|
+
self._client = MilvusClient(uri=self.uri)
|
|
34
|
+
return self._client
|
|
35
|
+
|
|
36
|
+
def ensure_collection(self, name: str):
|
|
37
|
+
"""Create collection if it doesn't exist."""
|
|
38
|
+
if self.client.has_collection(name):
|
|
39
|
+
return
|
|
40
|
+
schema = self.client.create_schema(auto_id=False, enable_dynamic_field=True)
|
|
41
|
+
schema.add_field("id", DataType.VARCHAR, is_primary=True, max_length=64)
|
|
42
|
+
schema.add_field("vector", DataType.FLOAT_VECTOR, dim=EMBED_DIM)
|
|
43
|
+
schema.add_field("text", DataType.VARCHAR, max_length=8192)
|
|
44
|
+
schema.add_field("source", DataType.VARCHAR, max_length=512)
|
|
45
|
+
schema.add_field("channel", DataType.VARCHAR, max_length=64)
|
|
46
|
+
schema.add_field("contact", DataType.VARCHAR, max_length=256)
|
|
47
|
+
schema.add_field("timestamp", DataType.VARCHAR, max_length=32)
|
|
48
|
+
|
|
49
|
+
index_params = self.client.prepare_index_params()
|
|
50
|
+
index_params.add_index(field_name="vector", index_type="FLAT", metric_type="COSINE")
|
|
51
|
+
self.client.create_collection(collection_name=name, schema=schema, index_params=index_params)
|
|
52
|
+
|
|
53
|
+
def upsert(self, collection: str, data: list[dict]):
|
|
54
|
+
"""Upsert documents into a collection."""
|
|
55
|
+
self.ensure_collection(collection)
|
|
56
|
+
self.client.upsert(collection_name=collection, data=data)
|
|
57
|
+
|
|
58
|
+
def search(self, collection: str, vector: list[float], limit: int = 10,
|
|
59
|
+
output_fields=None):
|
|
60
|
+
"""Search a collection by vector similarity."""
|
|
61
|
+
if not self.client.has_collection(collection):
|
|
62
|
+
return []
|
|
63
|
+
if output_fields is None:
|
|
64
|
+
output_fields = ["text", "source", "channel", "contact", "timestamp"]
|
|
65
|
+
results = self.client.search(
|
|
66
|
+
collection_name=collection,
|
|
67
|
+
data=[vector],
|
|
68
|
+
limit=limit,
|
|
69
|
+
output_fields=output_fields,
|
|
70
|
+
)
|
|
71
|
+
return results
|
|
72
|
+
|
|
73
|
+
def collection_stats(self):
|
|
74
|
+
"""Get stats for all collections."""
|
|
75
|
+
stats = {}
|
|
76
|
+
for name in COLLECTIONS:
|
|
77
|
+
if self.client.has_collection(name):
|
|
78
|
+
s = self.client.get_collection_stats(name)
|
|
79
|
+
stats[name] = {"exists": True, "count": s.get("row_count", 0)}
|
|
80
|
+
else:
|
|
81
|
+
stats[name] = {"exists": False, "count": 0}
|
|
82
|
+
return stats
|
|
83
|
+
|
|
84
|
+
def total_chunks(self):
|
|
85
|
+
stats = self.collection_stats()
|
|
86
|
+
return sum(c["count"] for c in stats.values())
|