superlocalmemory 3.4.9 → 3.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -3
- package/docs/cloud-backup.md +174 -0
- package/docs/skill-evolution.md +256 -0
- package/ide/hooks/tool-event-hook.sh +101 -11
- package/package.json +1 -1
- package/pyproject.toml +3 -2
- package/src/superlocalmemory/cli/commands.py +359 -0
- package/src/superlocalmemory/cli/ingest_cmd.py +81 -29
- package/src/superlocalmemory/cli/main.py +32 -0
- package/src/superlocalmemory/cli/setup_wizard.py +54 -11
- package/src/superlocalmemory/core/config.py +35 -0
- package/src/superlocalmemory/core/consolidation_engine.py +138 -0
- package/src/superlocalmemory/core/embedding_worker.py +1 -1
- package/src/superlocalmemory/core/engine.py +19 -0
- package/src/superlocalmemory/core/fact_consolidator.py +425 -0
- package/src/superlocalmemory/core/graph_pruner.py +290 -0
- package/src/superlocalmemory/core/maintenance_scheduler.py +44 -3
- package/src/superlocalmemory/core/recall_pipeline.py +9 -0
- package/src/superlocalmemory/core/tier_manager.py +325 -0
- package/src/superlocalmemory/encoding/entity_resolver.py +96 -28
- package/src/superlocalmemory/evolution/__init__.py +29 -0
- package/src/superlocalmemory/evolution/blind_verifier.py +115 -0
- package/src/superlocalmemory/evolution/evolution_store.py +302 -0
- package/src/superlocalmemory/evolution/mutation_generator.py +181 -0
- package/src/superlocalmemory/evolution/skill_evolver.py +555 -0
- package/src/superlocalmemory/evolution/triggers.py +367 -0
- package/src/superlocalmemory/evolution/types.py +92 -0
- package/src/superlocalmemory/hooks/hook_handlers.py +13 -0
- package/src/superlocalmemory/infra/backup.py +63 -20
- package/src/superlocalmemory/infra/cloud_backup.py +703 -0
- package/src/superlocalmemory/learning/skill_performance_miner.py +422 -0
- package/src/superlocalmemory/mcp/server.py +4 -0
- package/src/superlocalmemory/mcp/tools_evolution.py +338 -0
- package/src/superlocalmemory/retrieval/engine.py +64 -4
- package/src/superlocalmemory/retrieval/forgetting_filter.py +22 -7
- package/src/superlocalmemory/retrieval/strategy.py +2 -2
- package/src/superlocalmemory/server/routes/backup.py +512 -8
- package/src/superlocalmemory/server/routes/behavioral.py +39 -17
- package/src/superlocalmemory/server/routes/evolution.py +213 -0
- package/src/superlocalmemory/server/routes/tiers.py +195 -0
- package/src/superlocalmemory/server/unified_daemon.py +36 -5
- package/src/superlocalmemory/storage/schema_v3410.py +159 -0
- package/src/superlocalmemory/storage/schema_v3411.py +149 -0
- package/src/superlocalmemory/ui/index.html +59 -3
- package/src/superlocalmemory/ui/js/core.js +3 -0
- package/src/superlocalmemory/ui/js/lifecycle.js +83 -0
- package/src/superlocalmemory/ui/js/ng-entities.js +27 -3
- package/src/superlocalmemory/ui/js/ng-shell.js +33 -0
- package/src/superlocalmemory/ui/js/ng-skills.js +611 -0
- package/src/superlocalmemory/ui/js/settings.js +311 -1
- package/src/superlocalmemory.egg-info/PKG-INFO +16 -1
- package/src/superlocalmemory.egg-info/SOURCES.txt +18 -0
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""SuperLocalMemory V3.4.11 "Scale-Ready" — Fact Consolidation Engine.
|
|
6
|
+
|
|
7
|
+
Merges clusters of related facts about the same entity into single
|
|
8
|
+
comprehensive summary facts. Original facts move to 'archived' tier
|
|
9
|
+
but are NEVER deleted — searchable via deep recall.
|
|
10
|
+
|
|
11
|
+
Uses Mode B (Ollama LLM) for summarization, with Mode A (extractive)
|
|
12
|
+
fallback if LLM is unavailable.
|
|
13
|
+
|
|
14
|
+
CRITICAL RULES:
|
|
15
|
+
1. NEVER delete original facts
|
|
16
|
+
2. Original facts → lifecycle='archived' (not deleted)
|
|
17
|
+
3. Consolidated fact links back to originals via fact_consolidations table
|
|
18
|
+
4. Only consolidates facts that are already 'warm' or 'cold' tier
|
|
19
|
+
5. Never touches 'active' or 'pinned' facts
|
|
20
|
+
6. All writes per cluster wrapped in SAVEPOINT for atomicity
|
|
21
|
+
7. Entity ID LIKE patterns use JSON-boundary quoting to prevent
|
|
22
|
+
substring false positives
|
|
23
|
+
|
|
24
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import json
|
|
30
|
+
import logging
|
|
31
|
+
import sqlite3
|
|
32
|
+
import uuid
|
|
33
|
+
from datetime import datetime, timezone
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger("superlocalmemory.fact_consolidator")
|
|
37
|
+
|
|
38
|
+
_MAX_CLUSTER_SIZE = 10 # Max facts to merge into one
|
|
39
|
+
_MIN_CLUSTER_SIZE = 3 # Need at least 3 related facts to consolidate
|
|
40
|
+
_MAX_CONSOLIDATED_CHARS = 2000
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def consolidate_facts(
|
|
44
|
+
db_path: str | Path,
|
|
45
|
+
profile_id: str = "default",
|
|
46
|
+
max_clusters: int = 20,
|
|
47
|
+
dry_run: bool = False,
|
|
48
|
+
config: object | None = None,
|
|
49
|
+
) -> dict:
|
|
50
|
+
"""Find and consolidate clusters of related facts.
|
|
51
|
+
|
|
52
|
+
Mode behavior:
|
|
53
|
+
- Mode A: Extractive only (no LLM). Always available.
|
|
54
|
+
- Mode B: Ollama LLM summarization. Falls back to extractive if Ollama down.
|
|
55
|
+
- Mode C: Cloud LLM (user's configured provider). Falls back to extractive.
|
|
56
|
+
|
|
57
|
+
Returns stats: consolidated, clusters_found, facts_archived, errors.
|
|
58
|
+
"""
|
|
59
|
+
stats = {
|
|
60
|
+
"clusters_found": 0,
|
|
61
|
+
"consolidated": 0,
|
|
62
|
+
"facts_archived": 0,
|
|
63
|
+
"errors": 0,
|
|
64
|
+
"error_detail": "",
|
|
65
|
+
"mode": "a",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if config:
|
|
69
|
+
mode = getattr(config, 'mode', None)
|
|
70
|
+
if mode:
|
|
71
|
+
mode_str = getattr(mode, 'value', str(mode)).lower()
|
|
72
|
+
stats["mode"] = mode_str
|
|
73
|
+
|
|
74
|
+
conn = sqlite3.connect(str(db_path))
|
|
75
|
+
wal_mode = conn.execute("PRAGMA journal_mode=WAL").fetchone()
|
|
76
|
+
if wal_mode and wal_mode[0] != "wal":
|
|
77
|
+
logger.warning("WAL mode not active, got: %s", wal_mode[0])
|
|
78
|
+
conn.execute("PRAGMA busy_timeout=10000")
|
|
79
|
+
conn.row_factory = sqlite3.Row
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
clusters = _find_consolidation_clusters(conn, profile_id, max_clusters)
|
|
83
|
+
stats["clusters_found"] = len(clusters)
|
|
84
|
+
|
|
85
|
+
for entity_id, entity_name, fact_ids in clusters:
|
|
86
|
+
try:
|
|
87
|
+
result = _consolidate_cluster(
|
|
88
|
+
conn, profile_id, entity_id, entity_name,
|
|
89
|
+
fact_ids, dry_run, config,
|
|
90
|
+
)
|
|
91
|
+
if result:
|
|
92
|
+
stats["consolidated"] += 1
|
|
93
|
+
stats["facts_archived"] += len(fact_ids)
|
|
94
|
+
except Exception as exc:
|
|
95
|
+
logger.warning(
|
|
96
|
+
"Consolidation failed for %s: %s",
|
|
97
|
+
entity_name, exc, exc_info=True,
|
|
98
|
+
)
|
|
99
|
+
stats["errors"] += 1
|
|
100
|
+
|
|
101
|
+
if not dry_run:
|
|
102
|
+
conn.commit()
|
|
103
|
+
|
|
104
|
+
if stats["consolidated"] > 0:
|
|
105
|
+
logger.info(
|
|
106
|
+
"Fact consolidation: %d clusters merged, %d facts archived",
|
|
107
|
+
stats["consolidated"], stats["facts_archived"],
|
|
108
|
+
)
|
|
109
|
+
except Exception as exc:
|
|
110
|
+
logger.error("Fact consolidation failed: %s", exc, exc_info=True)
|
|
111
|
+
stats["errors"] += 1
|
|
112
|
+
stats["error_detail"] = str(exc)
|
|
113
|
+
finally:
|
|
114
|
+
conn.close()
|
|
115
|
+
|
|
116
|
+
return stats
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _find_consolidation_clusters(
|
|
120
|
+
conn: sqlite3.Connection,
|
|
121
|
+
profile_id: str,
|
|
122
|
+
max_clusters: int,
|
|
123
|
+
) -> list[tuple[str, str, list[str]]]:
|
|
124
|
+
"""Find entities with clusters of warm/cold facts ready for consolidation.
|
|
125
|
+
|
|
126
|
+
Uses JSON-boundary quoting on entity_id to prevent substring false positives.
|
|
127
|
+
Both outer count and inner fact query are scoped to profile_id.
|
|
128
|
+
"""
|
|
129
|
+
c = conn.cursor()
|
|
130
|
+
|
|
131
|
+
# Find entities with many non-active, non-pinned facts
|
|
132
|
+
# Uses '%" entity_id "%' pattern for JSON boundary matching
|
|
133
|
+
entities = c.execute("""
|
|
134
|
+
SELECT ce.entity_id, ce.canonical_name, COUNT(af.fact_id) as fact_count
|
|
135
|
+
FROM canonical_entities ce
|
|
136
|
+
JOIN atomic_facts af
|
|
137
|
+
ON af.canonical_entities_json LIKE '%"' || ce.entity_id || '"%'
|
|
138
|
+
AND af.profile_id = ?
|
|
139
|
+
WHERE ce.profile_id = ?
|
|
140
|
+
AND af.lifecycle IN ('warm', 'cold')
|
|
141
|
+
AND af.fact_id NOT IN (
|
|
142
|
+
SELECT fact_id FROM pinned_facts WHERE profile_id = ?
|
|
143
|
+
)
|
|
144
|
+
GROUP BY ce.entity_id
|
|
145
|
+
HAVING COUNT(af.fact_id) >= ?
|
|
146
|
+
ORDER BY COUNT(af.fact_id) DESC
|
|
147
|
+
LIMIT ?
|
|
148
|
+
""", (profile_id, profile_id, profile_id, _MIN_CLUSTER_SIZE,
|
|
149
|
+
max_clusters)).fetchall()
|
|
150
|
+
|
|
151
|
+
clusters = []
|
|
152
|
+
for entity in entities:
|
|
153
|
+
eid = entity["entity_id"]
|
|
154
|
+
facts = c.execute("""
|
|
155
|
+
SELECT af.fact_id FROM atomic_facts af
|
|
156
|
+
WHERE af.canonical_entities_json LIKE ?
|
|
157
|
+
AND af.profile_id = ?
|
|
158
|
+
AND af.lifecycle IN ('warm', 'cold')
|
|
159
|
+
AND af.fact_id NOT IN (
|
|
160
|
+
SELECT fact_id FROM pinned_facts WHERE profile_id = ?
|
|
161
|
+
)
|
|
162
|
+
ORDER BY af.confidence DESC, af.created_at DESC
|
|
163
|
+
LIMIT ?
|
|
164
|
+
""", (f'%"{eid}"%', profile_id, profile_id,
|
|
165
|
+
_MAX_CLUSTER_SIZE)).fetchall()
|
|
166
|
+
|
|
167
|
+
fact_ids = [f["fact_id"] for f in facts]
|
|
168
|
+
if len(fact_ids) >= _MIN_CLUSTER_SIZE:
|
|
169
|
+
clusters.append((eid, entity["canonical_name"], fact_ids))
|
|
170
|
+
|
|
171
|
+
return clusters
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _consolidate_cluster(
|
|
175
|
+
conn: sqlite3.Connection,
|
|
176
|
+
profile_id: str,
|
|
177
|
+
entity_id: str,
|
|
178
|
+
entity_name: str,
|
|
179
|
+
fact_ids: list[str],
|
|
180
|
+
dry_run: bool,
|
|
181
|
+
config: object | None = None,
|
|
182
|
+
) -> dict | None:
|
|
183
|
+
"""Merge a cluster of facts into one consolidated fact.
|
|
184
|
+
|
|
185
|
+
All writes are wrapped in a SAVEPOINT for atomicity — if any step fails,
|
|
186
|
+
the entire cluster consolidation is rolled back.
|
|
187
|
+
"""
|
|
188
|
+
c = conn.cursor()
|
|
189
|
+
|
|
190
|
+
# Load fact contents including canonical_entities_json
|
|
191
|
+
placeholders = ",".join("?" * len(fact_ids))
|
|
192
|
+
facts = c.execute(
|
|
193
|
+
f"SELECT fact_id, content, confidence, created_at, canonical_entities_json "
|
|
194
|
+
f"FROM atomic_facts "
|
|
195
|
+
f"WHERE fact_id IN ({placeholders}) ORDER BY created_at",
|
|
196
|
+
fact_ids,
|
|
197
|
+
).fetchall()
|
|
198
|
+
|
|
199
|
+
if len(facts) < _MIN_CLUSTER_SIZE:
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
summary = _generate_summary(entity_name, facts, config)
|
|
203
|
+
if not summary:
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
if dry_run:
|
|
207
|
+
return {"entity": entity_name, "facts": len(facts), "summary_len": len(summary)}
|
|
208
|
+
|
|
209
|
+
# Use SAVEPOINT for atomic multi-step write
|
|
210
|
+
savepoint_name = f"consolidate_{uuid.uuid4().hex[:8]}"
|
|
211
|
+
c.execute(f"SAVEPOINT {savepoint_name}")
|
|
212
|
+
|
|
213
|
+
try:
|
|
214
|
+
new_fact_id = uuid.uuid4().hex[:16]
|
|
215
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
216
|
+
avg_confidence = sum(f["confidence"] or 0.5 for f in facts) / len(facts)
|
|
217
|
+
|
|
218
|
+
# Collect entities from ALL source facts (already in the SELECT)
|
|
219
|
+
all_entities = set()
|
|
220
|
+
raw_entities = set()
|
|
221
|
+
for f in facts:
|
|
222
|
+
cej = f["canonical_entities_json"]
|
|
223
|
+
if cej:
|
|
224
|
+
try:
|
|
225
|
+
all_entities.update(json.loads(cej))
|
|
226
|
+
except (json.JSONDecodeError, TypeError):
|
|
227
|
+
pass
|
|
228
|
+
|
|
229
|
+
c.execute("""
|
|
230
|
+
INSERT INTO atomic_facts
|
|
231
|
+
(fact_id, memory_id, profile_id, content, fact_type,
|
|
232
|
+
entities_json, canonical_entities_json,
|
|
233
|
+
confidence, importance, evidence_count, access_count,
|
|
234
|
+
created_at, lifecycle)
|
|
235
|
+
VALUES (?, '', ?, ?, 'semantic', ?, ?, ?, 0.8, ?, 0, ?, 'active')
|
|
236
|
+
""", (
|
|
237
|
+
new_fact_id, profile_id, summary,
|
|
238
|
+
json.dumps(list(all_entities)),
|
|
239
|
+
json.dumps(list(all_entities)),
|
|
240
|
+
round(avg_confidence, 3), len(facts), now,
|
|
241
|
+
))
|
|
242
|
+
|
|
243
|
+
# Record the consolidation
|
|
244
|
+
consolidation_id = uuid.uuid4().hex[:16]
|
|
245
|
+
c.execute("""
|
|
246
|
+
INSERT INTO fact_consolidations
|
|
247
|
+
(consolidation_id, profile_id, consolidated_fact_id,
|
|
248
|
+
source_fact_ids, strategy, created_at)
|
|
249
|
+
VALUES (?, ?, ?, ?, 'entity_cluster', ?)
|
|
250
|
+
""", (consolidation_id, profile_id, new_fact_id,
|
|
251
|
+
json.dumps(fact_ids), now))
|
|
252
|
+
|
|
253
|
+
# Archive the original facts (NEVER delete) — scoped to profile_id
|
|
254
|
+
c.execute(
|
|
255
|
+
f"UPDATE atomic_facts SET lifecycle = 'archived' "
|
|
256
|
+
f"WHERE fact_id IN ({placeholders}) AND profile_id = ?",
|
|
257
|
+
(*fact_ids, profile_id),
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
c.execute(f"RELEASE SAVEPOINT {savepoint_name}")
|
|
261
|
+
|
|
262
|
+
except Exception:
|
|
263
|
+
c.execute(f"ROLLBACK TO SAVEPOINT {savepoint_name}")
|
|
264
|
+
raise
|
|
265
|
+
|
|
266
|
+
logger.info(
|
|
267
|
+
"Consolidated %d facts about '%s' → %s (%d chars)",
|
|
268
|
+
len(facts), entity_name, new_fact_id[:8], len(summary),
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
return {"entity": entity_name, "facts": len(facts), "new_fact_id": new_fact_id}
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _generate_summary(
|
|
275
|
+
entity_name: str,
|
|
276
|
+
facts: list,
|
|
277
|
+
config: object | None = None,
|
|
278
|
+
) -> str | None:
|
|
279
|
+
"""Generate a consolidated summary based on the user's configured mode.
|
|
280
|
+
|
|
281
|
+
All modes cap output at _MAX_CONSOLIDATED_CHARS.
|
|
282
|
+
"""
|
|
283
|
+
mode = "a"
|
|
284
|
+
if config:
|
|
285
|
+
m = getattr(config, 'mode', None)
|
|
286
|
+
if m:
|
|
287
|
+
mode = getattr(m, 'value', str(m)).lower()
|
|
288
|
+
|
|
289
|
+
result = None
|
|
290
|
+
|
|
291
|
+
if mode == "a":
|
|
292
|
+
result = _summarize_extractive(entity_name, facts)
|
|
293
|
+
elif mode == "b":
|
|
294
|
+
result = _summarize_with_ollama(entity_name, facts, config)
|
|
295
|
+
if not result:
|
|
296
|
+
result = _summarize_extractive(entity_name, facts)
|
|
297
|
+
elif mode == "c":
|
|
298
|
+
result = _summarize_with_cloud_llm(entity_name, facts, config)
|
|
299
|
+
if not result:
|
|
300
|
+
result = _summarize_with_ollama(entity_name, facts, config)
|
|
301
|
+
if not result:
|
|
302
|
+
result = _summarize_extractive(entity_name, facts)
|
|
303
|
+
else:
|
|
304
|
+
result = _summarize_extractive(entity_name, facts)
|
|
305
|
+
|
|
306
|
+
# Uniform cap across all modes
|
|
307
|
+
if result and len(result) > _MAX_CONSOLIDATED_CHARS:
|
|
308
|
+
result = result[:_MAX_CONSOLIDATED_CHARS - 3] + "..."
|
|
309
|
+
|
|
310
|
+
return result
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _summarize_with_ollama(
|
|
314
|
+
entity_name: str,
|
|
315
|
+
facts: list,
|
|
316
|
+
config: object | None = None,
|
|
317
|
+
) -> str | None:
|
|
318
|
+
"""Mode B: Summarize using local Ollama LLM."""
|
|
319
|
+
try:
|
|
320
|
+
import urllib.request
|
|
321
|
+
|
|
322
|
+
api_base = "http://localhost:11434"
|
|
323
|
+
model = "llama3.2"
|
|
324
|
+
timeout = 30
|
|
325
|
+
|
|
326
|
+
if config and hasattr(config, 'llm'):
|
|
327
|
+
api_base = getattr(config.llm, 'api_base', api_base) or api_base
|
|
328
|
+
model = getattr(config.llm, 'model', model) or model
|
|
329
|
+
timeout = getattr(config.llm, 'timeout', timeout) or timeout
|
|
330
|
+
|
|
331
|
+
fact_texts = "\n".join(f"- {f['content']}" for f in facts[:_MAX_CLUSTER_SIZE])
|
|
332
|
+
prompt = (
|
|
333
|
+
f"Merge these {len(facts)} facts about '{entity_name}' into ONE concise "
|
|
334
|
+
f"summary paragraph. Keep all key information. Maximum 500 words. "
|
|
335
|
+
f"No preamble.\n\nFacts:\n{fact_texts}"
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
payload = json.dumps({
|
|
339
|
+
"model": model,
|
|
340
|
+
"prompt": prompt,
|
|
341
|
+
"stream": False,
|
|
342
|
+
"options": {"num_predict": 600},
|
|
343
|
+
}).encode()
|
|
344
|
+
|
|
345
|
+
req = urllib.request.Request(
|
|
346
|
+
f"{api_base}/api/generate",
|
|
347
|
+
data=payload,
|
|
348
|
+
headers={"Content-Type": "application/json"},
|
|
349
|
+
)
|
|
350
|
+
resp = urllib.request.urlopen(req, timeout=timeout)
|
|
351
|
+
result = json.loads(resp.read().decode())
|
|
352
|
+
text = result.get("response", "").strip()
|
|
353
|
+
return text if text and len(text) > 50 else None
|
|
354
|
+
except Exception as exc:
|
|
355
|
+
logger.warning("Ollama summarization failed: %s", exc)
|
|
356
|
+
return None
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def _summarize_with_cloud_llm(
|
|
360
|
+
entity_name: str,
|
|
361
|
+
facts: list,
|
|
362
|
+
config: object | None = None,
|
|
363
|
+
) -> str | None:
|
|
364
|
+
"""Mode C: Summarize using the user's configured cloud LLM provider."""
|
|
365
|
+
if not config or not hasattr(config, 'llm'):
|
|
366
|
+
return None
|
|
367
|
+
|
|
368
|
+
llm_config = config.llm
|
|
369
|
+
provider = getattr(llm_config, 'provider', '')
|
|
370
|
+
if not provider:
|
|
371
|
+
return None
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
from superlocalmemory.llm.backbone import LLMBackbone
|
|
375
|
+
llm = LLMBackbone(llm_config)
|
|
376
|
+
if not llm.is_available():
|
|
377
|
+
return None
|
|
378
|
+
|
|
379
|
+
fact_texts = "\n".join(f"- {f['content']}" for f in facts[:_MAX_CLUSTER_SIZE])
|
|
380
|
+
prompt = (
|
|
381
|
+
f"Merge these {len(facts)} facts about '{entity_name}' into ONE concise "
|
|
382
|
+
f"summary paragraph. Keep all key information. Maximum 500 words. "
|
|
383
|
+
f"No preamble.\n\nFacts:\n{fact_texts}"
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
response = llm.generate(
|
|
387
|
+
prompt=prompt,
|
|
388
|
+
system="You are a precise fact summarizer. Output only the merged summary.",
|
|
389
|
+
max_tokens=600,
|
|
390
|
+
temperature=0.1,
|
|
391
|
+
)
|
|
392
|
+
text = response.strip() if response else None
|
|
393
|
+
return text if text and len(text) > 50 else None
|
|
394
|
+
except Exception as exc:
|
|
395
|
+
logger.warning("Cloud LLM summarization failed: %s", exc)
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def _summarize_extractive(entity_name: str, facts: list) -> str:
|
|
400
|
+
"""Extractive summary — all sentences from all facts, deduped.
|
|
401
|
+
|
|
402
|
+
Includes ALL sentences from each fact (not just the first one)
|
|
403
|
+
to preserve complete information.
|
|
404
|
+
"""
|
|
405
|
+
header = f"{entity_name}: "
|
|
406
|
+
seen = set()
|
|
407
|
+
sentences = []
|
|
408
|
+
|
|
409
|
+
for f in facts:
|
|
410
|
+
content = f["content"]
|
|
411
|
+
# Split on sentence boundaries and include ALL sentences
|
|
412
|
+
raw_sentences = [s.strip() for s in content.split(". ") if s.strip()]
|
|
413
|
+
for sent in raw_sentences:
|
|
414
|
+
if not sent.endswith("."):
|
|
415
|
+
sent += "."
|
|
416
|
+
normalized = sent.lower()
|
|
417
|
+
if normalized not in seen:
|
|
418
|
+
seen.add(normalized)
|
|
419
|
+
sentences.append(sent)
|
|
420
|
+
|
|
421
|
+
body = " ".join(sentences)
|
|
422
|
+
result = header + body
|
|
423
|
+
if len(result) > _MAX_CONSOLIDATED_CHARS:
|
|
424
|
+
result = result[:_MAX_CONSOLIDATED_CHARS - 3] + "..."
|
|
425
|
+
return result
|