agent-sin 0.1.11 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +79 -0
- package/README.md +2 -1
- package/builtin-skills/_shared/_todo_lib.py +290 -0
- package/builtin-skills/even-g2-setup/main.ts +896 -0
- package/builtin-skills/even-g2-setup/skill.yaml +133 -0
- package/builtin-skills/memo-delete/main.py +28 -107
- package/builtin-skills/memo-delete/skill.yaml +10 -21
- package/builtin-skills/memo-index/main.py +96 -64
- package/builtin-skills/memo-index/skill.yaml +4 -10
- package/builtin-skills/memo-list/main.py +179 -0
- package/builtin-skills/memo-list/skill.yaml +51 -0
- package/builtin-skills/memo-save/main.py +191 -25
- package/builtin-skills/memo-save/skill.yaml +29 -5
- package/builtin-skills/memo-search/main.py +38 -18
- package/builtin-skills/memo-vector-search/main.py +11 -6
- package/builtin-skills/nightly-topic-knowledge/_feedback_lib.py +391 -0
- package/builtin-skills/nightly-topic-knowledge/_topics_lib.py +415 -0
- package/builtin-skills/nightly-topic-knowledge/main.py +403 -0
- package/builtin-skills/nightly-topic-knowledge/skill.yaml +88 -0
- package/builtin-skills/schedule-add/main.py +26 -0
- package/builtin-skills/service-restart/main.ts +249 -0
- package/builtin-skills/service-restart/skill.yaml +49 -0
- package/builtin-skills/todo-add/main.py +3 -1
- package/builtin-skills/todo-delete/main.py +3 -1
- package/builtin-skills/todo-done/main.py +3 -1
- package/builtin-skills/todo-list/main.py +4 -1
- package/builtin-skills/todo-tick/main.py +3 -1
- package/builtin-skills/topic-knowledge-read/main.py +118 -0
- package/builtin-skills/topic-knowledge-read/skill.yaml +49 -0
- package/dist/builder/build-action-classifier.d.ts +18 -0
- package/dist/builder/build-action-classifier.js +82 -1
- package/dist/builder/build-flow.d.ts +33 -4
- package/dist/builder/build-flow.js +251 -89
- package/dist/builder/builder-session.d.ts +1 -1
- package/dist/builder/builder-session.js +112 -7
- package/dist/builder/conversation-router.d.ts +4 -2
- package/dist/builder/conversation-router.js +19 -2
- package/dist/cli/index.js +323 -20
- package/dist/core/ai-provider.d.ts +1 -0
- package/dist/core/ai-provider.js +8 -3
- package/dist/core/chat-engine.d.ts +10 -3
- package/dist/core/chat-engine.js +1563 -197
- package/dist/core/config.d.ts +4 -0
- package/dist/core/config.js +82 -0
- package/dist/core/daily-memory-promotion.d.ts +7 -0
- package/dist/core/daily-memory-promotion.js +568 -14
- package/dist/core/image-attachments.d.ts +31 -0
- package/dist/core/image-attachments.js +237 -0
- package/dist/core/logger.d.ts +2 -1
- package/dist/core/logger.js +77 -1
- package/dist/core/memo-migration.d.ts +3 -0
- package/dist/core/memo-migration.js +422 -0
- package/dist/core/native-modules.d.ts +24 -0
- package/dist/core/native-modules.js +99 -0
- package/dist/core/notifier.d.ts +8 -3
- package/dist/core/notifier.js +191 -17
- package/dist/core/obsidian-vault.d.ts +19 -0
- package/dist/core/obsidian-vault.js +477 -0
- package/dist/core/operating-model.d.ts +2 -0
- package/dist/core/operating-model.js +15 -0
- package/dist/core/output-writer.d.ts +3 -2
- package/dist/core/output-writer.js +108 -7
- package/dist/core/profile-memory.js +22 -1
- package/dist/core/runtime.d.ts +2 -0
- package/dist/core/runtime.js +9 -1
- package/dist/core/secrets.d.ts +4 -0
- package/dist/core/secrets.js +34 -0
- package/dist/core/skill-history.d.ts +44 -0
- package/dist/core/skill-history.js +329 -0
- package/dist/core/skill-registry.d.ts +5 -0
- package/dist/core/skill-registry.js +11 -0
- package/dist/discord/bot.d.ts +13 -0
- package/dist/discord/bot.js +542 -10
- package/dist/even-g2/gateway.d.ts +15 -0
- package/dist/even-g2/gateway.js +868 -0
- package/dist/runtimes/codex-app-server.d.ts +5 -1
- package/dist/runtimes/codex-app-server.js +147 -8
- package/dist/runtimes/python-runner.js +82 -0
- package/dist/runtimes/typescript-runner.js +13 -1
- package/dist/skills-sdk/types.d.ts +19 -4
- package/dist/telegram/bot.d.ts +1 -0
- package/dist/telegram/bot.js +122 -31
- package/package.json +3 -1
- package/templates/even-g2-agent/README.md +83 -0
- package/templates/even-g2-agent/app.json +20 -0
- package/templates/even-g2-agent/index.html +31 -0
- package/templates/even-g2-agent/package-lock.json +1836 -0
- package/templates/even-g2-agent/package.json +22 -0
- package/templates/even-g2-agent/scripts/qr-auto.mjs +182 -0
- package/templates/even-g2-agent/src/embedded-config.ts +4 -0
- package/templates/even-g2-agent/src/main.ts +539 -0
- package/templates/even-g2-agent/src/style.css +70 -0
- package/templates/even-g2-agent/tsconfig.json +11 -0
- package/templates/skill-python/main.py +20 -2
- package/templates/skill-python/skill.yaml +9 -0
- package/templates/skill-typescript/main.ts +40 -5
- package/templates/skill-typescript/skill.yaml +9 -0
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
"""Helpers for the nightly-topic-knowledge skill.
|
|
2
|
+
|
|
3
|
+
Reads daily memory / conversation log / notes, builds the LLM prompt, parses
|
|
4
|
+
the JSON response, and merges results into per-topic knowledge files with caps.
|
|
5
|
+
File I/O is deliberately kept here so main.py stays focused on orchestration.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
13
|
+
from datetime import datetime, timedelta, timezone
|
|
14
|
+
from typing import Any
|
|
15
|
+
from zoneinfo import ZoneInfo
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
DEFAULT_TIMEZONE = "Asia/Tokyo"
|
|
19
|
+
DEFAULT_MAX_CHARS = 60000
|
|
20
|
+
|
|
21
|
+
# Topic ids are now open-ended — the model invents them per user so that
|
|
22
|
+
# someone who suddenly gets into movies ends up with a movies.json file
|
|
23
|
+
# instead of an "other" bucket. Style examples shown to the model below.
|
|
24
|
+
TOPIC_ID_MAX_LEN = 48
|
|
25
|
+
TOPIC_ID_PATTERN = re.compile(r"[a-z0-9]+(?:-[a-z0-9]+)*$")
|
|
26
|
+
TOPIC_STYLE_EXAMPLES = [
|
|
27
|
+
"agent-sin",
|
|
28
|
+
"movies",
|
|
29
|
+
"ai-models",
|
|
30
|
+
"health",
|
|
31
|
+
"english-learning",
|
|
32
|
+
"personal-finance",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
FIELD_CAPS: dict[str, int] = {
|
|
36
|
+
"stable_facts": 12,
|
|
37
|
+
"preferences": 12,
|
|
38
|
+
"recent_focus": 8,
|
|
39
|
+
"open_questions": 8,
|
|
40
|
+
"useful_context": 10,
|
|
41
|
+
"avoid_assumptions": 8,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
LIST_FIELDS = list(FIELD_CAPS.keys())
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ---------- date helpers ----------
|
|
48
|
+
|
|
49
|
+
def resolve_target_date(date_arg: str | None, tz_name: str) -> str:
|
|
50
|
+
if date_arg:
|
|
51
|
+
# input.schema already validated YYYY-MM-DD; keep a defensive parse.
|
|
52
|
+
datetime.strptime(date_arg, "%Y-%m-%d")
|
|
53
|
+
return date_arg
|
|
54
|
+
tz = ZoneInfo(tz_name)
|
|
55
|
+
now = datetime.now(tz)
|
|
56
|
+
yesterday = (now - timedelta(days=1)).date()
|
|
57
|
+
return yesterday.isoformat()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def split_date(date_str: str) -> tuple[str, str, str]:
|
|
61
|
+
yyyy, mm, dd = date_str.split("-")
|
|
62
|
+
return yyyy, mm, dd
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# ---------- source readers ----------
|
|
66
|
+
|
|
67
|
+
def read_daily_memory(memory_dir: str, date_str: str) -> tuple[str | None, str]:
|
|
68
|
+
yyyy, mm, _ = split_date(date_str)
|
|
69
|
+
path = os.path.join(memory_dir, "daily", yyyy, mm, f"{date_str}.md")
|
|
70
|
+
text = _read_text(path)
|
|
71
|
+
return (path if text else None), text
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def read_conversation_log(logs_dir: str, date_str: str) -> tuple[str | None, str]:
|
|
75
|
+
path = os.path.join(logs_dir, "conversations", f"{date_str}.jsonl")
|
|
76
|
+
raw = _read_text(path)
|
|
77
|
+
if not raw:
|
|
78
|
+
return (None, "")
|
|
79
|
+
lines = []
|
|
80
|
+
for line in raw.splitlines():
|
|
81
|
+
line = line.strip()
|
|
82
|
+
if not line:
|
|
83
|
+
continue
|
|
84
|
+
try:
|
|
85
|
+
entry = json.loads(line)
|
|
86
|
+
except Exception:
|
|
87
|
+
continue
|
|
88
|
+
role = str(entry.get("role") or entry.get("source") or "").strip()
|
|
89
|
+
content = str(entry.get("content") or "").strip()
|
|
90
|
+
if not content:
|
|
91
|
+
continue
|
|
92
|
+
ts = str(entry.get("ts") or "").strip()
|
|
93
|
+
header = " ".join(part for part in (ts, role) if part)
|
|
94
|
+
lines.append(f"[{header}]\n{content}".rstrip())
|
|
95
|
+
body = "\n\n".join(lines)
|
|
96
|
+
return (path if body else None), body
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def read_notes(notes_dir: str, date_str: str) -> tuple[str | None, str]:
|
|
100
|
+
yyyy, mm, _ = split_date(date_str)
|
|
101
|
+
path = os.path.join(notes_dir, yyyy, mm, f"{date_str}.md")
|
|
102
|
+
text = _read_text(path)
|
|
103
|
+
return (path if text else None), text
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _read_text(path: str) -> str:
|
|
107
|
+
if not path or not os.path.isfile(path):
|
|
108
|
+
return ""
|
|
109
|
+
try:
|
|
110
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
111
|
+
return f.read()
|
|
112
|
+
except Exception:
|
|
113
|
+
return ""
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def bundle_sources(parts: list[tuple[str, str]], max_chars: int) -> tuple[str, int]:
|
|
117
|
+
"""Concatenate (label, text) pairs into a single bounded blob."""
|
|
118
|
+
chunks: list[str] = []
|
|
119
|
+
total = 0
|
|
120
|
+
for label, text in parts:
|
|
121
|
+
text = (text or "").strip()
|
|
122
|
+
if not text:
|
|
123
|
+
continue
|
|
124
|
+
header = f"## {label}\n"
|
|
125
|
+
remaining = max_chars - total - len(header)
|
|
126
|
+
if remaining <= 0:
|
|
127
|
+
break
|
|
128
|
+
if len(text) > remaining:
|
|
129
|
+
text = text[:remaining] + "\n... [truncated]"
|
|
130
|
+
chunk = header + text
|
|
131
|
+
chunks.append(chunk)
|
|
132
|
+
total += len(chunk) + 2 # joiner
|
|
133
|
+
return ("\n\n".join(chunks), total)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# ---------- LLM prompt + parsing ----------
|
|
137
|
+
|
|
138
|
+
def build_prompt(date_str: str, source_bundle: str, known_topic_ids: list[str] | None = None) -> str:
|
|
139
|
+
schema = {
|
|
140
|
+
"topics": [
|
|
141
|
+
{
|
|
142
|
+
"topic_id": "<short kebab-case ASCII id you choose for this theme>",
|
|
143
|
+
"summary": "<1-3 short sentences capturing the current understanding>",
|
|
144
|
+
"recent_focus": ["<recently emphasized concrete sub-themes>"],
|
|
145
|
+
"stable_facts": ["<long-lasting, non-time-sensitive facts>"],
|
|
146
|
+
"preferences": ["<user preferences or decision criteria>"],
|
|
147
|
+
"open_questions": ["<unresolved points to revisit>"],
|
|
148
|
+
"useful_context": ["<short context lines that help future replies>"],
|
|
149
|
+
"avoid_assumptions": ["<things to NOT assume>"],
|
|
150
|
+
}
|
|
151
|
+
]
|
|
152
|
+
}
|
|
153
|
+
instructions = [
|
|
154
|
+
"You are condensing one day of an individual user's conversations, daily memory, and notes",
|
|
155
|
+
"into per-topic knowledge that an assistant can re-use in future chats.",
|
|
156
|
+
"Hard rules:",
|
|
157
|
+
"- Output ONE JSON object only. No prose, no markdown fences.",
|
|
158
|
+
"- Choose topic_ids freely — there is no allow-list. One topic per coherent theme.",
|
|
159
|
+
"- topic_id must be short kebab-case ASCII (lowercase a-z, 0-9, hyphen). 1-48 chars.",
|
|
160
|
+
f" Examples: {', '.join(TOPIC_STYLE_EXAMPLES)}.",
|
|
161
|
+
"- Reuse an existing topic_id when the theme matches (see the list below). Invent a new id only for genuinely new themes.",
|
|
162
|
+
"- Do NOT preserve everything. Keep only items that will still be useful in future conversations.",
|
|
163
|
+
"- Drop small talk, transient emotions, short-term task progress, and any secrets / credentials.",
|
|
164
|
+
"- Prefer the user's continuing interests, judgement criteria, preferences, and spec decisions.",
|
|
165
|
+
"- Each list item must be a short standalone sentence (one line, < 200 chars).",
|
|
166
|
+
"- Newer signals matter more than older ones.",
|
|
167
|
+
"- Empty arrays are fine; omit fields you cannot fill with care.",
|
|
168
|
+
"- Respond in the same language the user mostly used in the sources (Japanese in most cases).",
|
|
169
|
+
"",
|
|
170
|
+
f"Date: {date_str}",
|
|
171
|
+
"",
|
|
172
|
+
]
|
|
173
|
+
if known_topic_ids:
|
|
174
|
+
instructions.append("Existing topic ids (prefer reusing these when they fit):")
|
|
175
|
+
instructions.extend(f"- {tid}" for tid in known_topic_ids)
|
|
176
|
+
instructions.append("")
|
|
177
|
+
instructions.extend(
|
|
178
|
+
[
|
|
179
|
+
"JSON schema (output shape):",
|
|
180
|
+
json.dumps(schema, ensure_ascii=False, indent=2),
|
|
181
|
+
"",
|
|
182
|
+
"Sources:",
|
|
183
|
+
source_bundle or "(empty)",
|
|
184
|
+
]
|
|
185
|
+
)
|
|
186
|
+
return "\n".join(instructions)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def sanitize_topic_id(raw: object) -> str | None:
|
|
190
|
+
"""Normalize a model-supplied topic_id to safe kebab-case or return None."""
|
|
191
|
+
if not isinstance(raw, str):
|
|
192
|
+
return None
|
|
193
|
+
text = raw.strip().lower()
|
|
194
|
+
# Drop anything that isn't a-z / 0-9; collapse runs into a single hyphen.
|
|
195
|
+
text = re.sub(r"[^a-z0-9]+", "-", text)
|
|
196
|
+
text = text.strip("-")
|
|
197
|
+
if not text:
|
|
198
|
+
return None
|
|
199
|
+
if len(text) > TOPIC_ID_MAX_LEN:
|
|
200
|
+
text = text[:TOPIC_ID_MAX_LEN].rstrip("-")
|
|
201
|
+
if not TOPIC_ID_PATTERN.match(text):
|
|
202
|
+
return None
|
|
203
|
+
return text
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
_JSON_OBJECT_PATTERN = re.compile(r"\{[\s\S]*\}")
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def parse_llm_response(text: str) -> dict[str, Any]:
|
|
210
|
+
raw = (text or "").strip()
|
|
211
|
+
if not raw:
|
|
212
|
+
raise ValueError("Empty LLM response")
|
|
213
|
+
# Drop common code-fence wrappers before trying strict parse.
|
|
214
|
+
fenced = re.match(r"^```(?:json)?\s*([\s\S]*?)\s*```$", raw)
|
|
215
|
+
if fenced:
|
|
216
|
+
raw = fenced.group(1).strip()
|
|
217
|
+
try:
|
|
218
|
+
return json.loads(raw)
|
|
219
|
+
except Exception:
|
|
220
|
+
pass
|
|
221
|
+
# Fall back to the first balanced-looking JSON object in the text.
|
|
222
|
+
match = _JSON_OBJECT_PATTERN.search(raw)
|
|
223
|
+
if not match:
|
|
224
|
+
raise ValueError("LLM response did not contain JSON")
|
|
225
|
+
return json.loads(match.group(0))
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def normalize_topics(parsed: dict[str, Any]) -> list[dict[str, Any]]:
|
|
229
|
+
raw_topics = parsed.get("topics")
|
|
230
|
+
if not isinstance(raw_topics, list):
|
|
231
|
+
return []
|
|
232
|
+
out: list[dict[str, Any]] = []
|
|
233
|
+
seen_ids: set[str] = set()
|
|
234
|
+
for entry in raw_topics:
|
|
235
|
+
if not isinstance(entry, dict):
|
|
236
|
+
continue
|
|
237
|
+
tid = sanitize_topic_id(entry.get("topic_id"))
|
|
238
|
+
if not tid or tid in seen_ids:
|
|
239
|
+
continue
|
|
240
|
+
seen_ids.add(tid)
|
|
241
|
+
topic: dict[str, Any] = {"topic_id": tid}
|
|
242
|
+
summary = entry.get("summary")
|
|
243
|
+
if isinstance(summary, str) and summary.strip():
|
|
244
|
+
topic["summary"] = summary.strip()
|
|
245
|
+
for field in LIST_FIELDS:
|
|
246
|
+
items = entry.get(field)
|
|
247
|
+
if not isinstance(items, list):
|
|
248
|
+
continue
|
|
249
|
+
cleaned: list[str] = []
|
|
250
|
+
for item in items:
|
|
251
|
+
if not isinstance(item, str):
|
|
252
|
+
continue
|
|
253
|
+
value = item.strip()
|
|
254
|
+
if value:
|
|
255
|
+
cleaned.append(value[:300])
|
|
256
|
+
if cleaned:
|
|
257
|
+
topic[field] = cleaned
|
|
258
|
+
out.append(topic)
|
|
259
|
+
return out
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def list_existing_topic_ids(memory_dir: str) -> list[str]:
|
|
263
|
+
folder = os.path.join(memory_dir, "topic-knowledge", "topics")
|
|
264
|
+
if not os.path.isdir(folder):
|
|
265
|
+
return []
|
|
266
|
+
ids: list[str] = []
|
|
267
|
+
for name in os.listdir(folder):
|
|
268
|
+
if not name.endswith(".json"):
|
|
269
|
+
continue
|
|
270
|
+
tid = name[:-5]
|
|
271
|
+
if TOPIC_ID_PATTERN.match(tid):
|
|
272
|
+
ids.append(tid)
|
|
273
|
+
return sorted(ids)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
# ---------- merge logic ----------
|
|
277
|
+
|
|
278
|
+
def topic_path(memory_dir: str, topic_id: str) -> str:
|
|
279
|
+
return os.path.join(memory_dir, "topic-knowledge", "topics", f"{topic_id}.json")
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def index_path(memory_dir: str) -> str:
|
|
283
|
+
return os.path.join(memory_dir, "topic-knowledge", "index.json")
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def run_path(memory_dir: str, date_str: str) -> str:
|
|
287
|
+
return os.path.join(memory_dir, "topic-knowledge", "runs", f"{date_str}.json")
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def load_json(path: str) -> dict[str, Any] | None:
|
|
291
|
+
if not os.path.isfile(path):
|
|
292
|
+
return None
|
|
293
|
+
try:
|
|
294
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
295
|
+
return json.load(f)
|
|
296
|
+
except Exception:
|
|
297
|
+
return None
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def write_json(path: str, data: dict[str, Any]) -> None:
|
|
301
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
302
|
+
tmp = path + ".tmp"
|
|
303
|
+
with open(tmp, "w", encoding="utf-8") as f:
|
|
304
|
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
305
|
+
f.write("\n")
|
|
306
|
+
os.replace(tmp, path)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def merge_topic(
|
|
310
|
+
existing: dict[str, Any] | None,
|
|
311
|
+
update: dict[str, Any],
|
|
312
|
+
date_str: str,
|
|
313
|
+
now_iso: str,
|
|
314
|
+
) -> dict[str, Any]:
|
|
315
|
+
topic_id = update["topic_id"]
|
|
316
|
+
base: dict[str, Any] = dict(existing or {})
|
|
317
|
+
base["topic_id"] = topic_id
|
|
318
|
+
base.setdefault("name", topic_id)
|
|
319
|
+
|
|
320
|
+
# New summary always wins (newer signals matter more), but keep the old
|
|
321
|
+
# one if the LLM did not produce a fresh summary this round.
|
|
322
|
+
if "summary" in update:
|
|
323
|
+
base["summary"] = update["summary"]
|
|
324
|
+
elif "summary" not in base:
|
|
325
|
+
base["summary"] = ""
|
|
326
|
+
|
|
327
|
+
# Merge each list field with caps; new items prepended so the most recent
|
|
328
|
+
# distillation surfaces first, then older items fill remaining slots.
|
|
329
|
+
for field, cap in FIELD_CAPS.items():
|
|
330
|
+
existing_items = base.get(field) or []
|
|
331
|
+
if not isinstance(existing_items, list):
|
|
332
|
+
existing_items = []
|
|
333
|
+
new_items = update.get(field) or []
|
|
334
|
+
merged: list[str] = []
|
|
335
|
+
seen: set[str] = set()
|
|
336
|
+
for item in list(new_items) + list(existing_items):
|
|
337
|
+
if not isinstance(item, str):
|
|
338
|
+
continue
|
|
339
|
+
key = re.sub(r"\s+", " ", item).strip().lower()
|
|
340
|
+
if not key or key in seen:
|
|
341
|
+
continue
|
|
342
|
+
seen.add(key)
|
|
343
|
+
merged.append(item.strip())
|
|
344
|
+
if len(merged) >= cap:
|
|
345
|
+
break
|
|
346
|
+
base[field] = merged
|
|
347
|
+
|
|
348
|
+
source_dates = base.get("source_dates")
|
|
349
|
+
if not isinstance(source_dates, list):
|
|
350
|
+
source_dates = []
|
|
351
|
+
if date_str not in source_dates:
|
|
352
|
+
source_dates.append(date_str)
|
|
353
|
+
# keep last 30 source dates only
|
|
354
|
+
base["source_dates"] = sorted(set(source_dates))[-30:]
|
|
355
|
+
|
|
356
|
+
base["last_updated"] = now_iso
|
|
357
|
+
base["freshness_score"] = _freshness_score(base["source_dates"], date_str)
|
|
358
|
+
base["confidence"] = _confidence(update)
|
|
359
|
+
return base
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _freshness_score(source_dates: list[str], reference: str) -> float:
|
|
363
|
+
if not source_dates:
|
|
364
|
+
return 0.0
|
|
365
|
+
try:
|
|
366
|
+
ref = datetime.strptime(reference, "%Y-%m-%d").date()
|
|
367
|
+
except Exception:
|
|
368
|
+
return 0.0
|
|
369
|
+
score = 0.0
|
|
370
|
+
for d in source_dates[-14:]:
|
|
371
|
+
try:
|
|
372
|
+
day = datetime.strptime(d, "%Y-%m-%d").date()
|
|
373
|
+
except Exception:
|
|
374
|
+
continue
|
|
375
|
+
age_days = (ref - day).days
|
|
376
|
+
if age_days < 0:
|
|
377
|
+
continue
|
|
378
|
+
score += max(0.0, 1.0 - age_days / 14.0)
|
|
379
|
+
return round(min(1.0, score / 7.0), 3)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def _confidence(update: dict[str, Any]) -> float:
|
|
383
|
+
populated = sum(1 for field in LIST_FIELDS if update.get(field))
|
|
384
|
+
if "summary" in update:
|
|
385
|
+
populated += 1
|
|
386
|
+
return round(min(1.0, populated / 5.0), 3)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def build_index(
|
|
390
|
+
existing: dict[str, Any] | None,
|
|
391
|
+
topic_files: list[dict[str, Any]],
|
|
392
|
+
now_iso: str,
|
|
393
|
+
) -> dict[str, Any]:
|
|
394
|
+
by_id: dict[str, dict[str, Any]] = {}
|
|
395
|
+
if isinstance(existing, dict):
|
|
396
|
+
for entry in (existing.get("topics") or []):
|
|
397
|
+
if isinstance(entry, dict) and entry.get("topic_id"):
|
|
398
|
+
by_id[entry["topic_id"]] = entry
|
|
399
|
+
for topic in topic_files:
|
|
400
|
+
tid = topic["topic_id"]
|
|
401
|
+
by_id[tid] = {
|
|
402
|
+
"topic_id": tid,
|
|
403
|
+
"name": topic.get("name", tid),
|
|
404
|
+
"summary": topic.get("summary", ""),
|
|
405
|
+
"last_updated": topic.get("last_updated", now_iso),
|
|
406
|
+
}
|
|
407
|
+
ordered = sorted(by_id.values(), key=lambda item: item["topic_id"])
|
|
408
|
+
# recent = topics touched in the latest run (passed in via topic_files)
|
|
409
|
+
recent = [topic["topic_id"] for topic in topic_files]
|
|
410
|
+
return {
|
|
411
|
+
"version": 1,
|
|
412
|
+
"updated_at": now_iso,
|
|
413
|
+
"topics": ordered,
|
|
414
|
+
"recent": recent,
|
|
415
|
+
}
|