@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +170 -69
  2. package/bin/__tests__/callback-server.test.js +4 -1
  3. package/bin/cli.js +41 -164
  4. package/bin/commands/config.js +251 -0
  5. package/package.json +2 -1
  6. package/packages/doctor/__tests__/detect.test.js +2 -6
  7. package/packages/doctor/src/checks/local-memory.js +164 -196
  8. package/packages/doctor/src/detect.js +11 -3
  9. package/packages/memory/src/corpus/adapters.js +104 -0
  10. package/packages/memory/src/corpus/cli.js +72 -7
  11. package/packages/memory/src/corpus/index.js +1 -1
  12. package/packages/memory-engine/.env.example +13 -0
  13. package/packages/memory-engine/README.md +131 -0
  14. package/packages/memory-engine/bench/README.md +99 -0
  15. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
  16. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
  17. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
  18. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
  19. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
  20. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
  21. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
  22. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
  23. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
  24. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
  25. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
  26. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
  27. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
  28. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
  29. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
  30. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
  31. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
  32. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
  33. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
  34. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
  35. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
  36. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
  37. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
  38. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
  39. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
  40. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
  41. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
  42. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
  43. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
  44. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
  45. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
  46. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
  47. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
  48. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
  49. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
  50. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
  51. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
  52. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
  53. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
  54. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
  55. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
  56. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
  57. package/packages/memory-engine/compat/Dockerfile +11 -0
  58. package/packages/memory-engine/compat/server.py +680 -0
  59. package/packages/memory-engine/docker-compose.yml +243 -0
  60. package/packages/memory-engine/docs/MIGRATION.md +178 -0
  61. package/packages/memory-engine/docs/RUNBOOK-AWS.md +375 -0
  62. package/packages/memory-engine/docs/why-v05-underperforms.md +138 -0
  63. package/packages/memory-engine/engine/README.md +52 -0
  64. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
  65. package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
  66. package/packages/memory-engine/engine/l6-document-store.py +1018 -0
  67. package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
  68. package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
  69. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
  70. package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
  71. package/packages/memory-engine/engine/services/l4/server.py +235 -0
  72. package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
  73. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +678 -0
  74. package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
  75. package/packages/memory-engine/engine/services/l6/l6-document-store.py +1016 -0
  76. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
  77. package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
  78. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  79. package/packages/memory-engine/pme_memory/__main__.py +129 -0
  80. package/packages/memory-engine/pme_memory/artifacts.py +95 -0
  81. package/packages/memory-engine/pme_memory/embed.py +74 -0
  82. package/packages/memory-engine/pme_memory/health.py +36 -0
  83. package/packages/memory-engine/pme_memory/hygiene.py +159 -0
  84. package/packages/memory-engine/pme_memory/indexer.py +200 -0
  85. package/packages/memory-engine/pme_memory/needs.py +55 -0
  86. package/packages/memory-engine/pme_memory/provenance.py +80 -0
  87. package/packages/memory-engine/pme_memory/scoring.py +168 -0
  88. package/packages/memory-engine/pme_memory/search.py +52 -0
  89. package/packages/memory-engine/pme_memory/store.py +86 -0
  90. package/packages/memory-engine/pme_memory/synthesis.py +114 -0
  91. package/packages/memory-engine/pyproject.toml +65 -0
  92. package/packages/memory-engine/scripts/kg-extractor.py +557 -0
  93. package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
  94. package/packages/memory-engine/tests/test_api_contract.sh +57 -0
@@ -0,0 +1,200 @@
1
+ """
2
+ pme_memory.indexer — Index life data into Milvus collections
3
+
4
+ Scans workspace for chat transcripts (JSONL), email archives, people
5
+ profiles, contacts, and memory files. Chunks text and embeds via Ollama.
6
+
7
+ Collections:
8
+ chats: JSONL chat transcripts + markdown chat summaries
9
+ emails: Email archive markdown files
10
+ contacts: People profiles + contact records
11
+ memory: Daily notes, project docs, research (excludes evolution run logs)
12
+ """
13
+
14
+ import glob
15
+ import hashlib
16
+ import json
17
+ import os
18
+ from pathlib import Path
19
+
20
+ from .embed import embed_texts, BATCH_SIZE
21
+ from .store import CommsStore
22
+
23
+ CHUNK_SIZE = 512
24
+ CHUNK_OVERLAP = 64
25
+
26
+
27
+ def chunk_text(text: str, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP) -> list[str]:
28
+ """Split text into overlapping chunks."""
29
+ if len(text) <= chunk_size:
30
+ return [text] if text.strip() else []
31
+ chunks = []
32
+ start = 0
33
+ while start < len(text):
34
+ end = start + chunk_size
35
+ chunk = text[start:end].strip()
36
+ if chunk:
37
+ chunks.append(chunk)
38
+ start = end - overlap
39
+ return chunks
40
+
41
+
42
+ def text_id(text: str, source: str) -> str:
43
+ return hashlib.md5(f"{source}:{text[:200]}".encode()).hexdigest()
44
+
45
+
46
+ def _upsert_batch(store, collection, batch):
47
+ """Embed and upsert a batch of documents."""
48
+ if not batch:
49
+ return 0
50
+ vectors = embed_texts([d["text"] for d in batch])
51
+ for d, v in zip(batch, vectors):
52
+ d["vector"] = v
53
+ store.upsert(collection, batch)
54
+ return len(batch)
55
+
56
+
57
+ def index_chats(store: CommsStore, workspace: Path) -> int:
58
+ """Index JSONL chat transcripts and markdown chat summaries."""
59
+ total = 0
60
+ chats_dir = workspace / "chats"
61
+ if not chats_dir.exists():
62
+ return 0
63
+
64
+ # JSONL files
65
+ for f in chats_dir.rglob("*.jsonl"):
66
+ try:
67
+ lines = f.read_text(errors="replace").strip().split("\n")
68
+ batch = []
69
+ for line in lines:
70
+ try:
71
+ msg = json.loads(line)
72
+ except json.JSONDecodeError:
73
+ continue
74
+ text = msg.get("text", "")
75
+ if not text or len(text) < 10:
76
+ continue
77
+ source = str(f.relative_to(workspace))
78
+ for chunk in chunk_text(text):
79
+ batch.append({
80
+ "id": text_id(chunk, source),
81
+ "text": chunk[:8000],
82
+ "source": source[:500],
83
+ "channel": str(msg.get("channel", "unknown"))[:60],
84
+ "contact": str(msg.get("contact", msg.get("sender", "")))[:250],
85
+ "timestamp": str(msg.get("timestamp", ""))[:30],
86
+ })
87
+ if len(batch) >= BATCH_SIZE:
88
+ total += _upsert_batch(store, "chats", batch)
89
+ batch = []
90
+ total += _upsert_batch(store, "chats", batch)
91
+ except Exception as e:
92
+ print(f" Error indexing {f}: {e}")
93
+
94
+ # Markdown chat summaries
95
+ for channel in ["telegram", "whatsapp", "imessage", "slack", "unknown"]:
96
+ chat_dir = workspace / "memory" / "chats" / channel
97
+ if not chat_dir.exists():
98
+ continue
99
+ for f in chat_dir.glob("*.md"):
100
+ try:
101
+ text = f.read_text(errors="replace")
102
+ if len(text) < 20:
103
+ continue
104
+ source = str(f.relative_to(workspace))
105
+ batch = [{"id": text_id(c, source), "text": c[:8000], "source": source[:500],
106
+ "channel": channel, "contact": f.stem[:250], "timestamp": ""}
107
+ for c in chunk_text(text)]
108
+ total += _upsert_batch(store, "chats", batch)
109
+ except Exception as e:
110
+ print(f" Error: {e}")
111
+
112
+ return total
113
+
114
+
115
+ def index_emails(store: CommsStore, workspace: Path) -> int:
116
+ """Index email archive markdown files."""
117
+ total = 0
118
+ emails_dir = workspace / "memory" / "chats" / "email"
119
+ if not emails_dir.exists():
120
+ return 0
121
+ for f in emails_dir.glob("*.md"):
122
+ try:
123
+ text = f.read_text(errors="replace")
124
+ if len(text) < 20:
125
+ continue
126
+ source = str(f.relative_to(workspace))
127
+ contact = f.stem.replace("_", " ")[:250]
128
+ batch = [{"id": text_id(c, source), "text": c[:8000], "source": source[:500],
129
+ "channel": "email", "contact": contact, "timestamp": ""}
130
+ for c in chunk_text(text)]
131
+ total += _upsert_batch(store, "emails", batch)
132
+ except Exception as e:
133
+ print(f" Error: {e}")
134
+ return total
135
+
136
+
137
+ def index_contacts(store: CommsStore, workspace: Path) -> int:
138
+ """Index people profiles and contact records."""
139
+ total = 0
140
+ for dir_path, channel in [(workspace / "memory" / "people", "profile"),
141
+ (workspace / "memory" / "contacts", "contacts")]:
142
+ if not dir_path.exists():
143
+ continue
144
+ for f in dir_path.glob("*"):
145
+ if not f.is_file():
146
+ continue
147
+ try:
148
+ text = f.read_text(errors="replace")
149
+ if len(text) < 20:
150
+ continue
151
+ source = str(f.relative_to(workspace))
152
+ batch = [{"id": text_id(c, source), "text": c[:8000], "source": source[:500],
153
+ "channel": channel, "contact": f.stem[:250], "timestamp": ""}
154
+ for c in chunk_text(text, chunk_size=1024 if channel == "contacts" else CHUNK_SIZE)]
155
+ total += _upsert_batch(store, "contacts", batch)
156
+ except Exception as e:
157
+ print(f" Error: {e}")
158
+ return total
159
+
160
+
161
+ def index_memory(store: CommsStore, workspace: Path) -> int:
162
+ """Index memory markdown files (excludes chats and evolution run logs)."""
163
+ total = 0
164
+ memory_dir = workspace / "memory"
165
+ skip_patterns = ["chats/", "evolution/loop-run-", "evolution/v3/runs/"]
166
+
167
+ for f in memory_dir.rglob("*.md"):
168
+ source = str(f.relative_to(workspace))
169
+ if any(p in source for p in skip_patterns):
170
+ continue
171
+ try:
172
+ text = f.read_text(errors="replace")
173
+ if len(text) < 30:
174
+ continue
175
+ batch = [{"id": text_id(c, source), "text": c[:8000], "source": source[:500],
176
+ "channel": "memory", "contact": "", "timestamp": ""}
177
+ for c in chunk_text(text)]
178
+ total += _upsert_batch(store, "memory", batch)
179
+ except Exception as e:
180
+ print(f" Error: {e}")
181
+ return total
182
+
183
+
184
+ def index_all(store: CommsStore, workspace: Path, targets=None) -> dict:
185
+ """Index specified targets (or all). Returns counts per collection."""
186
+ if targets is None:
187
+ targets = ["chats", "emails", "contacts", "memory"]
188
+ counts = {}
189
+ indexers = {
190
+ "chats": index_chats,
191
+ "emails": index_emails,
192
+ "contacts": index_contacts,
193
+ "memory": index_memory,
194
+ }
195
+ for target in targets:
196
+ if target in indexers:
197
+ print(f"Indexing {target}...")
198
+ counts[target] = indexers[target](store, workspace)
199
+ print(f" {counts[target]:,} chunks")
200
+ return counts
@@ -0,0 +1,55 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List
8
+
9
+
10
+ def _utc_now() -> str:
11
+ return datetime.now(timezone.utc).isoformat()
12
+
13
+
14
+ @dataclass
15
+ class NeedSignal:
16
+ need_type: str
17
+ produced_by_artifact: str
18
+ producer: str
19
+ priority_hint: str = "normal"
20
+ topic: str | None = None
21
+ created_at: str = field(default_factory=_utc_now)
22
+
23
+ def to_dict(self) -> Dict[str, Any]:
24
+ return {
25
+ "need_type": self.need_type,
26
+ "produced_by_artifact": self.produced_by_artifact,
27
+ "producer": self.producer,
28
+ "priority_hint": self.priority_hint,
29
+ "topic": self.topic,
30
+ "created_at": self.created_at,
31
+ }
32
+
33
+
34
+ class NeedIndex:
35
+ """Append-only machine-readable unresolved needs index (JSONL)."""
36
+
37
+ def __init__(self, path: str | Path):
38
+ self.path = Path(path)
39
+ self.path.parent.mkdir(parents=True, exist_ok=True)
40
+ if not self.path.exists():
41
+ self.path.touch()
42
+
43
+ def publish(self, signal: NeedSignal) -> None:
44
+ with self.path.open("a", encoding="utf-8") as f:
45
+ f.write(json.dumps(signal.to_dict(), sort_keys=True) + "\n")
46
+
47
+ def latest(self, limit: int = 50) -> List[Dict[str, Any]]:
48
+ lines = self.path.read_text(encoding="utf-8").splitlines()[-limit:]
49
+ out: List[Dict[str, Any]] = []
50
+ for line in lines:
51
+ try:
52
+ out.append(json.loads(line))
53
+ except json.JSONDecodeError:
54
+ continue
55
+ return out
@@ -0,0 +1,80 @@
1
+ """
2
+ pme_memory.provenance — Provenance-first output rendering
3
+
4
+ Renders artifact lineage chains for human-readable output.
5
+ Given an artifact, walks parent pointers to build a full
6
+ provenance trail with sources, tools, and timestamps.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from pathlib import Path
13
+ from typing import Any, Dict, List, Optional
14
+
15
+
16
+ def load_artifact_index(store_path: str | Path) -> Dict[str, Dict[str, Any]]:
17
+ """Load all artifacts into a dict keyed by artifact_id."""
18
+ store_path = Path(store_path)
19
+ index: Dict[str, Dict[str, Any]] = {}
20
+ if not store_path.exists():
21
+ return index
22
+ for line in store_path.read_text(encoding="utf-8").splitlines():
23
+ try:
24
+ art = json.loads(line)
25
+ index[art["artifact_id"]] = art
26
+ except (json.JSONDecodeError, KeyError):
27
+ continue
28
+ return index
29
+
30
+
31
+ def trace_lineage(
32
+ artifact_id: str,
33
+ index: Dict[str, Dict[str, Any]],
34
+ max_depth: int = 20,
35
+ ) -> List[Dict[str, Any]]:
36
+ """Walk parent pointers and return lineage chain (newest first)."""
37
+ chain: List[Dict[str, Any]] = []
38
+ visited: set = set()
39
+ queue = [artifact_id]
40
+
41
+ while queue and len(chain) < max_depth:
42
+ aid = queue.pop(0)
43
+ if aid in visited:
44
+ continue
45
+ visited.add(aid)
46
+ art = index.get(aid)
47
+ if not art:
48
+ chain.append({"artifact_id": aid, "status": "missing"})
49
+ continue
50
+ chain.append(art)
51
+ for pid in art.get("parents", []):
52
+ if pid not in visited:
53
+ queue.append(pid)
54
+
55
+ return chain
56
+
57
+
58
+ def render_lineage_text(
59
+ artifact_id: str,
60
+ index: Dict[str, Dict[str, Any]],
61
+ max_depth: int = 20,
62
+ ) -> str:
63
+ """Render a human-readable provenance chain."""
64
+ chain = trace_lineage(artifact_id, index, max_depth)
65
+ if not chain:
66
+ return f"No lineage found for {artifact_id}"
67
+
68
+ lines = [f"Provenance for {artifact_id[:12]}...\n"]
69
+ for i, art in enumerate(chain):
70
+ prefix = " " * i + ("└─ " if i > 0 else "")
71
+ if art.get("status") == "missing":
72
+ lines.append(f"{prefix}[missing] {art['artifact_id'][:12]}...")
73
+ else:
74
+ tool = art.get("source_tool", "?")
75
+ atype = art.get("artifact_type", "?")
76
+ ts = art.get("created_at", "?")[:19]
77
+ aid = art["artifact_id"][:12]
78
+ lines.append(f"{prefix}{aid}... | {atype} | tool={tool} | {ts}")
79
+
80
+ return "\n".join(lines)
@@ -0,0 +1,168 @@
1
+ """
2
+ pme_memory.scoring — Pressure Scoring for Need Signals
3
+
4
+ Ranks unresolved needs by four dimensions:
5
+ - recency: how fresh the need is (exponential decay)
6
+ - novelty: inverse frequency of this need_type in the index
7
+ - centrality: how many artifacts reference the producing artifact
8
+ - priority: explicit priority_hint weight (critical > high > normal > low)
9
+
10
+ Output: sorted list of needs with composite pressure score (0-1).
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import math
17
+ from collections import Counter
18
+ from dataclasses import dataclass
19
+ from datetime import datetime, timezone
20
+ from pathlib import Path
21
+ from typing import Any, Dict, List, Optional
22
+
23
+
24
+ # --- Weight configuration (tunable) ---
25
+ WEIGHT_RECENCY = 0.30
26
+ WEIGHT_NOVELTY = 0.25
27
+ WEIGHT_CENTRALITY = 0.25
28
+ WEIGHT_PRIORITY = 0.20
29
+
30
+ PRIORITY_SCORES = {
31
+ "critical": 1.0,
32
+ "high": 0.75,
33
+ "normal": 0.5,
34
+ "low": 0.25,
35
+ }
36
+
37
+ # Recency half-life in hours (need loses half its recency score after this)
38
+ RECENCY_HALF_LIFE_H = 12.0
39
+
40
+
41
+ def _parse_iso(ts: str) -> datetime:
42
+ return datetime.fromisoformat(ts)
43
+
44
+
45
+ def _recency_score(created_at: str, now: datetime) -> float:
46
+ """Exponential decay: score = 2^(-age_hours / half_life)."""
47
+ try:
48
+ age = (now - _parse_iso(created_at)).total_seconds() / 3600.0
49
+ except (ValueError, TypeError):
50
+ return 0.0
51
+ return math.pow(2, -age / RECENCY_HALF_LIFE_H)
52
+
53
+
54
+ def _novelty_scores(needs: List[Dict[str, Any]]) -> Dict[str, float]:
55
+ """Inverse frequency: rarer need_types score higher."""
56
+ counts = Counter(n.get("need_type", "") for n in needs)
57
+ total = len(needs) or 1
58
+ return {
59
+ nt: 1.0 - (count / total)
60
+ for nt, count in counts.items()
61
+ }
62
+
63
+
64
+ def _centrality_map(artifact_store_path: Path) -> Dict[str, int]:
65
+ """Count how many artifacts reference each artifact_id as a parent."""
66
+ refs: Dict[str, int] = {}
67
+ if not artifact_store_path.exists():
68
+ return refs
69
+ for line in artifact_store_path.read_text(encoding="utf-8").splitlines():
70
+ try:
71
+ art = json.loads(line)
72
+ except json.JSONDecodeError:
73
+ continue
74
+ for pid in art.get("parents", []):
75
+ refs[pid] = refs.get(pid, 0) + 1
76
+ return refs
77
+
78
+
79
+ @dataclass
80
+ class ScoredNeed:
81
+ need_type: str
82
+ produced_by_artifact: str
83
+ producer: str
84
+ topic: Optional[str]
85
+ created_at: str
86
+ priority_hint: str
87
+ recency: float
88
+ novelty: float
89
+ centrality: float
90
+ priority: float
91
+ pressure: float
92
+
93
+ def to_dict(self) -> Dict[str, Any]:
94
+ return {
95
+ "need_type": self.need_type,
96
+ "produced_by_artifact": self.produced_by_artifact,
97
+ "producer": self.producer,
98
+ "topic": self.topic,
99
+ "created_at": self.created_at,
100
+ "priority_hint": self.priority_hint,
101
+ "scores": {
102
+ "recency": round(self.recency, 4),
103
+ "novelty": round(self.novelty, 4),
104
+ "centrality": round(self.centrality, 4),
105
+ "priority": round(self.priority, 4),
106
+ },
107
+ "pressure": round(self.pressure, 4),
108
+ }
109
+
110
+
111
+ def rank_needs(
112
+ needs_path: str | Path,
113
+ artifact_store_path: str | Path,
114
+ limit: int = 50,
115
+ ) -> List[ScoredNeed]:
116
+ """Score and rank unresolved needs by composite pressure."""
117
+ needs_path = Path(needs_path)
118
+ artifact_store_path = Path(artifact_store_path)
119
+
120
+ if not needs_path.exists():
121
+ return []
122
+
123
+ raw = []
124
+ for line in needs_path.read_text(encoding="utf-8").splitlines():
125
+ try:
126
+ raw.append(json.loads(line))
127
+ except json.JSONDecodeError:
128
+ continue
129
+
130
+ if not raw:
131
+ return []
132
+
133
+ now = datetime.now(timezone.utc)
134
+ novelty_map = _novelty_scores(raw)
135
+ centrality_map = _centrality_map(artifact_store_path)
136
+ max_centrality = max(centrality_map.values()) if centrality_map else 1
137
+
138
+ scored: List[ScoredNeed] = []
139
+ for n in raw[-limit:]:
140
+ rec = _recency_score(n.get("created_at", ""), now)
141
+ nov = novelty_map.get(n.get("need_type", ""), 0.0)
142
+ art_id = n.get("produced_by_artifact", "")
143
+ cen = (centrality_map.get(art_id, 0) / max_centrality) if max_centrality else 0.0
144
+ pri = PRIORITY_SCORES.get(n.get("priority_hint", "normal"), 0.5)
145
+
146
+ pressure = (
147
+ WEIGHT_RECENCY * rec
148
+ + WEIGHT_NOVELTY * nov
149
+ + WEIGHT_CENTRALITY * cen
150
+ + WEIGHT_PRIORITY * pri
151
+ )
152
+
153
+ scored.append(ScoredNeed(
154
+ need_type=n.get("need_type", ""),
155
+ produced_by_artifact=art_id,
156
+ producer=n.get("producer", ""),
157
+ topic=n.get("topic"),
158
+ created_at=n.get("created_at", ""),
159
+ priority_hint=n.get("priority_hint", "normal"),
160
+ recency=rec,
161
+ novelty=nov,
162
+ centrality=cen,
163
+ priority=pri,
164
+ pressure=pressure,
165
+ ))
166
+
167
+ scored.sort(key=lambda s: s.pressure, reverse=True)
168
+ return scored
@@ -0,0 +1,52 @@
1
+ """
2
+ pme_memory.search — Semantic search across communications collections
3
+
4
+ Searches across chats, emails, contacts, and memory using vector similarity.
5
+ """
6
+
7
+ from .embed import embed_query
8
+ from .store import CommsStore, COLLECTIONS
9
+
10
+
11
+ def search(query: str, store: CommsStore = None, collection: str = None,
12
+ limit: int = 10) -> list[dict]:
13
+ """Search across all collections (or a specific one).
14
+
15
+ Returns list of dicts with: collection, score, text, source, channel, contact, timestamp
16
+ """
17
+ if store is None:
18
+ store = CommsStore()
19
+
20
+ vector = embed_query(query)
21
+ if all(v == 0.0 for v in vector):
22
+ return []
23
+
24
+ collections = [collection] if collection else COLLECTIONS
25
+ all_results = []
26
+
27
+ for coll in collections:
28
+ try:
29
+ results = store.search(coll, vector, limit=limit)
30
+ for hits in results:
31
+ for hit in hits:
32
+ entity = hit.get("entity", {})
33
+ all_results.append({
34
+ "collection": coll,
35
+ "score": round(hit.get("distance", 0), 4),
36
+ "text": entity.get("text", ""),
37
+ "source": entity.get("source", ""),
38
+ "channel": entity.get("channel", ""),
39
+ "contact": entity.get("contact", ""),
40
+ "timestamp": entity.get("timestamp", ""),
41
+ })
42
+ except Exception as e:
43
+ print(f" Search error in {coll}: {e}")
44
+
45
+ all_results.sort(key=lambda x: x["score"], reverse=True)
46
+ return all_results[:limit]
47
+
48
+
49
+ def search_collection(query: str, collection: str, store: CommsStore = None,
50
+ limit: int = 10) -> list[dict]:
51
+ """Search a single collection."""
52
+ return search(query, store=store, collection=collection, limit=limit)
@@ -0,0 +1,86 @@
1
+ """
2
+ pme_memory.store — Milvus connection and collection management
3
+
4
+ Supports Milvus Lite (local .db file) and full Milvus server.
5
+ Collections: chats, emails, contacts, memory.
6
+ """
7
+
8
+ import os
9
+ from pathlib import Path
10
+ from pymilvus import MilvusClient, DataType
11
+
12
+ COLLECTIONS = ["chats", "emails", "contacts", "memory"]
13
+ EMBED_DIM = int(os.environ.get("PME_EMBED_DIM", "4096"))
14
+
15
+
16
+ def _default_db_path():
17
+ pme_dir = os.environ.get("PME_DIR", os.path.expanduser("~/pentatonic"))
18
+ db_dir = Path(pme_dir) / "memory" / "l5"
19
+ db_dir.mkdir(parents=True, exist_ok=True)
20
+ return str(db_dir / "comms.db")
21
+
22
+
23
+ class CommsStore:
24
+ """Manages Milvus collections for the communications layer."""
25
+
26
+ def __init__(self, uri=None):
27
+ self.uri = uri or os.environ.get("MILVUS_URI", _default_db_path())
28
+ self._client = None
29
+
30
+ @property
31
+ def client(self):
32
+ if self._client is None:
33
+ self._client = MilvusClient(uri=self.uri)
34
+ return self._client
35
+
36
+ def ensure_collection(self, name: str):
37
+ """Create collection if it doesn't exist."""
38
+ if self.client.has_collection(name):
39
+ return
40
+ schema = self.client.create_schema(auto_id=False, enable_dynamic_field=True)
41
+ schema.add_field("id", DataType.VARCHAR, is_primary=True, max_length=64)
42
+ schema.add_field("vector", DataType.FLOAT_VECTOR, dim=EMBED_DIM)
43
+ schema.add_field("text", DataType.VARCHAR, max_length=8192)
44
+ schema.add_field("source", DataType.VARCHAR, max_length=512)
45
+ schema.add_field("channel", DataType.VARCHAR, max_length=64)
46
+ schema.add_field("contact", DataType.VARCHAR, max_length=256)
47
+ schema.add_field("timestamp", DataType.VARCHAR, max_length=32)
48
+
49
+ index_params = self.client.prepare_index_params()
50
+ index_params.add_index(field_name="vector", index_type="FLAT", metric_type="COSINE")
51
+ self.client.create_collection(collection_name=name, schema=schema, index_params=index_params)
52
+
53
+ def upsert(self, collection: str, data: list[dict]):
54
+ """Upsert documents into a collection."""
55
+ self.ensure_collection(collection)
56
+ self.client.upsert(collection_name=collection, data=data)
57
+
58
+ def search(self, collection: str, vector: list[float], limit: int = 10,
59
+ output_fields=None):
60
+ """Search a collection by vector similarity."""
61
+ if not self.client.has_collection(collection):
62
+ return []
63
+ if output_fields is None:
64
+ output_fields = ["text", "source", "channel", "contact", "timestamp"]
65
+ results = self.client.search(
66
+ collection_name=collection,
67
+ data=[vector],
68
+ limit=limit,
69
+ output_fields=output_fields,
70
+ )
71
+ return results
72
+
73
+ def collection_stats(self):
74
+ """Get stats for all collections."""
75
+ stats = {}
76
+ for name in COLLECTIONS:
77
+ if self.client.has_collection(name):
78
+ s = self.client.get_collection_stats(name)
79
+ stats[name] = {"exists": True, "count": s.get("row_count", 0)}
80
+ else:
81
+ stats[name] = {"exists": False, "count": 0}
82
+ return stats
83
+
84
+ def total_chunks(self):
85
+ stats = self.collection_stats()
86
+ return sum(c["count"] for c in stats.values())