easyoref 1.13.0 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/agent/auth.d.ts +11 -0
  2. package/dist/agent/auth.d.ts.map +1 -0
  3. package/dist/agent/auth.js +54 -0
  4. package/dist/agent/auth.js.map +1 -0
  5. package/dist/agent/clarify.d.ts +44 -0
  6. package/dist/agent/clarify.d.ts.map +1 -0
  7. package/dist/agent/clarify.js +283 -0
  8. package/dist/agent/clarify.js.map +1 -0
  9. package/dist/agent/dry-run.d.ts +12 -0
  10. package/dist/agent/dry-run.d.ts.map +1 -0
  11. package/dist/agent/dry-run.js +229 -0
  12. package/dist/agent/dry-run.js.map +1 -0
  13. package/dist/agent/gramjs-monitor.d.ts +26 -0
  14. package/dist/agent/gramjs-monitor.d.ts.map +1 -0
  15. package/dist/agent/gramjs-monitor.js +320 -0
  16. package/dist/agent/gramjs-monitor.js.map +1 -0
  17. package/dist/agent/graph.d.ts +111 -0
  18. package/dist/agent/graph.d.ts.map +1 -0
  19. package/dist/agent/graph.js +1142 -0
  20. package/dist/agent/graph.js.map +1 -0
  21. package/dist/agent/queue.d.ts +15 -0
  22. package/dist/agent/queue.d.ts.map +1 -0
  23. package/dist/agent/queue.js +41 -0
  24. package/dist/agent/queue.js.map +1 -0
  25. package/dist/agent/redis.d.ts +8 -0
  26. package/dist/agent/redis.d.ts.map +1 -0
  27. package/dist/agent/redis.js +33 -0
  28. package/dist/agent/redis.js.map +1 -0
  29. package/dist/agent/store.d.ts +71 -0
  30. package/dist/agent/store.d.ts.map +1 -0
  31. package/dist/agent/store.js +104 -0
  32. package/dist/agent/store.js.map +1 -0
  33. package/dist/agent/tools.d.ts +159 -0
  34. package/dist/agent/tools.d.ts.map +1 -0
  35. package/dist/agent/tools.js +439 -0
  36. package/dist/agent/tools.js.map +1 -0
  37. package/dist/agent/types.d.ts +163 -0
  38. package/dist/agent/types.d.ts.map +1 -0
  39. package/dist/agent/types.js +26 -0
  40. package/dist/agent/types.js.map +1 -0
  41. package/dist/agent/worker.d.ts +14 -0
  42. package/dist/agent/worker.d.ts.map +1 -0
  43. package/dist/agent/worker.js +90 -0
  44. package/dist/agent/worker.js.map +1 -0
  45. package/dist/bin.d.ts +17 -0
  46. package/dist/bin.d.ts.map +1 -0
  47. package/dist/bin.js +82 -0
  48. package/dist/bin.js.map +1 -0
  49. package/dist/bot.d.ts +16 -0
  50. package/dist/bot.d.ts.map +1 -0
  51. package/dist/bot.js +600 -0
  52. package/dist/bot.js.map +1 -0
  53. package/dist/config.d.ts +125 -0
  54. package/dist/config.d.ts.map +1 -0
  55. package/dist/config.js +145 -0
  56. package/dist/config.js.map +1 -0
  57. package/dist/gif-state.d.ts +17 -0
  58. package/dist/gif-state.d.ts.map +1 -0
  59. package/dist/gif-state.js +67 -0
  60. package/dist/gif-state.js.map +1 -0
  61. package/dist/i18n.d.ts +49 -0
  62. package/dist/i18n.d.ts.map +1 -0
  63. package/dist/i18n.js +229 -0
  64. package/dist/i18n.js.map +1 -0
  65. package/dist/init.d.ts +7 -0
  66. package/dist/init.d.ts.map +1 -0
  67. package/dist/init.js +163 -0
  68. package/dist/init.js.map +1 -0
  69. package/dist/logger.d.ts +14 -0
  70. package/dist/logger.d.ts.map +1 -0
  71. package/dist/logger.js +45 -0
  72. package/dist/logger.js.map +1 -0
  73. package/dist/service.d.ts +19 -0
  74. package/dist/service.d.ts.map +1 -0
  75. package/dist/service.js +165 -0
  76. package/dist/service.js.map +1 -0
  77. package/package.json +1 -1
@@ -0,0 +1,1142 @@
1
+ /**
2
+ * LangGraph.js enrichment pipeline — phase-aware, time-validated.
3
+ *
4
+ * KEY DESIGN PRINCIPLES:
5
+ * 1. TIME IS KING — every post is validated against the alert time window.
6
+ * LLM receives alert time + post time and scores time_relevance.
7
+ * Posts about previous/different attacks are rejected.
8
+ * 2. PHASE-AWARE — each phase extracts only what's relevant:
9
+ * - early_warning: origin, ETA, rocket count, cassette
10
+ * - siren: carries early data + adds interception, impacts
11
+ * - resolved: carries all + adds casualties, injuries, final stats
12
+ * 3. CARRY-FORWARD — results persist in Redis (EnrichmentData).
13
+ * Each phase inherits previous phase's findings.
14
+ * 4. INLINE CITATIONS — no superscripts, no footer sources.
15
+ * Format: [[1]](url) right after each data point.
16
+ * 5. DEDUP EDITS — hash-based check prevents "message not modified" spam.
17
+ *
18
+ * Pipeline:
19
+ * preFilter → extractAndValidate → postFilter → vote → [clarify] → editMessage
20
+ */
21
+ import { Annotation, MemorySaver, StateGraph } from "@langchain/langgraph";
22
+ import { ChatOpenAI } from "@langchain/openai";
23
+ import { Bot } from "grammy";
24
+ import { createHash } from "node:crypto";
25
+ import { config } from "../config.js";
26
+ import * as logger from "../logger.js";
27
+ import { runClarify } from "./clarify.js";
28
+ import { getActiveSession, getChannelPosts, getEnrichmentData, saveEnrichmentData, } from "./store.js";
29
+ import { emptyEnrichmentData } from "./types.js";
30
+ // ── State ──────────────────────────────────────────────
31
+ const AgentState = Annotation.Root({
32
+ alertId: Annotation({ reducer: (_, b) => b }),
33
+ alertTs: Annotation({ reducer: (_, b) => b }),
34
+ alertType: Annotation({ reducer: (_, b) => b }),
35
+ alertAreas: Annotation({ reducer: (_, b) => b }),
36
+ chatId: Annotation({ reducer: (_, b) => b }),
37
+ messageId: Annotation({ reducer: (_, b) => b }),
38
+ isCaption: Annotation({ reducer: (_, b) => b }),
39
+ currentText: Annotation({ reducer: (_, b) => b }),
40
+ channelPosts: Annotation({ reducer: (_, b) => b }),
41
+ filteredPosts: Annotation({ reducer: (_, b) => b }),
42
+ extractions: Annotation({ reducer: (_, b) => b }),
43
+ votedResult: Annotation({ reducer: (_, b) => b }),
44
+ /** Tracks whether clarify has already run (prevents infinite loop) */
45
+ clarifyAttempted: Annotation({ reducer: (_, b) => b }),
46
+ /** Cross-phase enrichment data loaded at start */
47
+ previousEnrichment: Annotation({ reducer: (_, b) => b }),
48
+ /** Session start timestamp for time window calculations */
49
+ sessionStartTs: Annotation({ reducer: (_, b) => b }),
50
+ /** Phase start timestamp */
51
+ phaseStartTs: Annotation({ reducer: (_, b) => b }),
52
+ });
53
+ // ── LLM ───────────────────────────────────────────────
54
+ function getLLM() {
55
+ return new ChatOpenAI({
56
+ model: config.agent.model,
57
+ configuration: {
58
+ baseURL: "https://openrouter.ai/api/v1",
59
+ defaultHeaders: {
60
+ "HTTP-Referer": "https://github.com/mikhailkogan17/EasyOref",
61
+ "X-Title": "EasyOref",
62
+ },
63
+ },
64
+ apiKey: config.agent.apiKey,
65
+ temperature: 0,
66
+ maxTokens: 500,
67
+ });
68
+ }
69
+ // ── Region keywords (Hebrew + transliterations) ────────
70
+ function buildRegionKeywords() {
71
+ const keywords = [];
72
+ for (const area of config.areas) {
73
+ keywords.push(area.toLowerCase());
74
+ const first = area.split(" ")[0];
75
+ if (first && first.length >= 2)
76
+ keywords.push(first.toLowerCase());
77
+ }
78
+ for (const [he, label] of Object.entries(config.agent.areaLabels)) {
79
+ keywords.push(he.toLowerCase());
80
+ for (const word of label.split(/\s+/)) {
81
+ if (word.length >= 3)
82
+ keywords.push(word.toLowerCase());
83
+ }
84
+ }
85
+ // Common attack-related keywords (always relevant)
86
+ keywords.push("ישראל", "israel", "израиль", "ракет", "rocket", "missile", "iron dome", "כיפת ברזל", "перехват", "intercept", "צבע אדום", "red alert");
87
+ return [...new Set(keywords)];
88
+ }
89
+ // ── Launch detection keywords (strict — early_warning only) ──
90
+ const LAUNCH_KEYWORDS = [
91
+ "שיגור",
92
+ "שיגורים",
93
+ "שוגרו",
94
+ "נורו",
95
+ "зафиксированы запуски",
96
+ "обнаружены запуски",
97
+ "запуски ракет",
98
+ "запуск ракет",
99
+ "пуски ракет",
100
+ "ракетный обстрел",
101
+ "ракетная атака",
102
+ "missile launch",
103
+ "rocket launch",
104
+ "barrage",
105
+ "fired towards",
106
+ "launches detected",
107
+ "missiles fired",
108
+ "שיגורים לישראל",
109
+ "ירי טילים",
110
+ "ירי רקטות",
111
+ "إطلاق صواريخ",
112
+ ].map((kw) => kw.toLowerCase());
113
+ // ── Time window per phase (ms before alertTs to accept posts) ──
114
+ const TIME_WINDOW_MS = {
115
+ early_warning: 5 * 60 * 1000, // 5 min before alert
116
+ siren: 10 * 60 * 1000, // 10 min (includes early_warning period)
117
+ resolved: 30 * 60 * 1000, // 30 min (full session window)
118
+ };
119
+ // ── Helpers ────────────────────────────────────────────
120
+ /** Format timestamp as HH:MM Israel time */
121
+ function toIsraelTime(ts) {
122
+ return new Date(ts).toLocaleTimeString("he-IL", {
123
+ hour: "2-digit",
124
+ minute: "2-digit",
125
+ timeZone: "Asia/Jerusalem",
126
+ });
127
+ }
128
+ /** MD5 hash for edit dedup */
129
+ function textHash(text) {
130
+ return createHash("md5").update(text).digest("hex");
131
+ }
132
+ // ─────────────────────────────────────────────────────────
133
+ // Tier 0: Pre-filter (phase-aware, time-bounded, 0 tokens)
134
+ // ─────────────────────────────────────────────────────────
135
+ async function collectAndPreFilter(state) {
136
+ const posts = await getChannelPosts(state.alertId);
137
+ const prevEnrichment = await getEnrichmentData();
138
+ // Load session for time boundaries
139
+ const session = await getActiveSession();
140
+ const sessionStartTs = session?.sessionStartTs ?? state.alertTs;
141
+ const phaseStartTs = session?.phaseStartTs ?? state.alertTs;
142
+ if (posts.length === 0) {
143
+ logger.info("Agent: no posts in session", { alertId: state.alertId });
144
+ return {
145
+ channelPosts: posts,
146
+ filteredPosts: [],
147
+ previousEnrichment: prevEnrichment,
148
+ sessionStartTs,
149
+ phaseStartTs,
150
+ };
151
+ }
152
+ const keywords = buildRegionKeywords();
153
+ const alertType = state.alertType;
154
+ const alertTs = state.alertTs;
155
+ // Time window: reject posts older than window before alertTs
156
+ const windowMs = TIME_WINDOW_MS[alertType];
157
+ const cutoffTs = alertTs - windowMs;
158
+ let filtered;
159
+ if (alertType === "early_warning") {
160
+ // ── STRICT launch-only filter for early warning ──
161
+ // Step 1: Find posts with launch keywords, within time window
162
+ const launchPosts = posts.filter((post) => {
163
+ if (post.ts < cutoffTs)
164
+ return false;
165
+ const text = post.text.toLowerCase();
166
+ return LAUNCH_KEYWORDS.some((kw) => text.includes(kw));
167
+ });
168
+ // Step 2: Get channels that posted about launches
169
+ const channelFirstLaunchTs = new Map();
170
+ for (const post of launchPosts) {
171
+ const current = channelFirstLaunchTs.get(post.channel);
172
+ if (current === undefined || post.ts < current) {
173
+ channelFirstLaunchTs.set(post.channel, post.ts);
174
+ }
175
+ }
176
+ // Step 3: Accept follow-up posts from launch channels only (within window)
177
+ filtered = posts.filter((post) => {
178
+ if (post.ts < cutoffTs)
179
+ return false;
180
+ const text = post.text.toLowerCase();
181
+ if (!keywords.some((kw) => text.includes(kw)))
182
+ return false;
183
+ const firstLaunch = channelFirstLaunchTs.get(post.channel);
184
+ if (firstLaunch === undefined)
185
+ return false;
186
+ return post.ts >= firstLaunch;
187
+ });
188
+ logger.info("Agent: pre-filter (early_warning)", {
189
+ alertId: state.alertId,
190
+ total: posts.length,
191
+ launch_posts: launchPosts.length,
192
+ launch_channels: channelFirstLaunchTs.size,
193
+ after_filter: filtered.length,
194
+ cutoff: toIsraelTime(cutoffTs),
195
+ });
196
+ }
197
+ else {
198
+ // ── Siren & Resolved: broader filter, time-bounded ──
199
+ filtered = posts.filter((post) => {
200
+ if (post.ts < cutoffTs)
201
+ return false;
202
+ const text = post.text.toLowerCase();
203
+ return keywords.some((kw) => text.includes(kw));
204
+ });
205
+ logger.info("Agent: pre-filter", {
206
+ alertId: state.alertId,
207
+ alertType,
208
+ total: posts.length,
209
+ after_filter: filtered.length,
210
+ cutoff: toIsraelTime(cutoffTs),
211
+ });
212
+ }
213
+ return {
214
+ channelPosts: posts,
215
+ filteredPosts: filtered,
216
+ previousEnrichment: prevEnrichment,
217
+ sessionStartTs,
218
+ phaseStartTs,
219
+ };
220
+ }
221
+ // ─────────────────────────────────────────────────────────
222
+ // Tier 1: Extract + validate (1 LLM call per post)
223
+ // Phase-aware prompts — agent knows what to look for.
224
+ // TIME CONTEXT — agent sees alert time + post time.
225
+ // ─────────────────────────────────────────────────────────
226
+ const QUAL_VALUES = '"all"|"most"|"many"|"few"|"exists"|"none"|"more_than"|"less_than"';
227
+ /** Phase-specific extraction instructions */
228
+ function getPhaseInstructions(alertType) {
229
+ switch (alertType) {
230
+ case "early_warning":
231
+ return `PHASE: EARLY WARNING (radar detected launches, sirens not yet).
232
+ Focus on: country_origin (WHERE were rockets launched from?), eta_refined_minutes, rocket_count, is_cassette.
233
+ Do NOT extract: intercepted, sea_impact, open_area_impact, hits_confirmed, casualties, injuries — these are IMPOSSIBLE at this stage.
234
+ If a message discusses interception results, it is about a PREVIOUS attack — set time_relevance=0.`;
235
+ case "siren":
236
+ return `PHASE: SIREN (rockets incoming, impact imminent).
237
+ Focus on: country_origin (if not known yet), rocket_count, intercepted, sea_impact, open_area_impact, is_cassette.
238
+ Do NOT extract: hits_confirmed, casualties, injuries — too early for confirmed damage reports.
239
+ If a message discusses casualties or confirmed hits, verify the timing carefully - it may be about a previous attack.`;
240
+ case "resolved":
241
+ return `PHASE: RESOLVED (incident over, assessing damage).
242
+ Focus on: intercepted (final count), hits_confirmed, casualties, injuries, open_area_impact.
243
+ All fields are valid at this stage. Prioritize confirmed official reports.`;
244
+ }
245
+ }
246
+ const SYSTEM_PROMPT_BASE = `You analyze Telegram channel messages about a missile/rocket attack on Israel.
247
+ Your job: extract factual data, assess quality, AND validate temporal relevance.
248
+
249
+ CRITICAL — TIME VALIDATION:
250
+ You will receive the alert time and the post time. You MUST determine if this post
251
+ is about the CURRENT attack or about a previous/different event.
252
+ - If post discusses events clearly BEFORE the alert time → time_relevance=0
253
+ - If post is generic military news not specific to this attack → time_relevance=0.2
254
+ - If post discusses the current attack → time_relevance=1.0
255
+ - If uncertain → time_relevance=0.5 (the system will use alert_history to verify)
256
+
257
+ Return ONLY valid JSON (no markdown, no explanation):
258
+ {
259
+ "region_relevance": float, // 0–1: does this message discuss the specified alert region?
260
+ "source_trust": float, // 0–1: factual reporting (1.0) vs unverified rumors/panic (0.0)
261
+ "tone": "calm"|"neutral"|"alarmist",
262
+ "time_relevance": float, // 0–1: is this post about the CURRENT attack? (see rules above)
263
+ "country_origin": string|null, // "Iran","Yemen","Lebanon","Gaza","Iraq","Syria" or null
264
+ "rocket_count": int|null,
265
+ "is_cassette": bool|null,
266
+ "intercepted": int|null,
267
+ "intercepted_qual": ${QUAL_VALUES}|null,
268
+ "intercepted_qual_num": int|null,
269
+ "sea_impact": int|null,
270
+ "sea_impact_qual": ${QUAL_VALUES}|null,
271
+ "sea_impact_qual_num": int|null,
272
+ "open_area_impact": int|null,
273
+ "open_area_impact_qual": ${QUAL_VALUES}|null,
274
+ "open_area_impact_qual_num": int|null,
275
+ "hits_confirmed": int|null,
276
+ "casualties": int|null,
277
+ "injuries": int|null,
278
+ "eta_refined_minutes": int|null,
279
+ "confidence": float
280
+ }
281
+
282
+ Rules:
283
+ - If unrelated to the alert region, set region_relevance=0 and all data fields to null.
284
+ - If message is speculative/unconfirmed rumor, set source_trust < 0.4.
285
+ - If message uses excessive caps, exclamation marks, panic language → tone="alarmist".
286
+ - Only extract concrete numbers explicitly stated in the text. Never guess.
287
+ - *_qual fields: use ONLY when NO exact count is given. If exact number present, set *_qual=null.
288
+ - "none" qual is only valid if explicitly stated (e.g., "все перехвачены", "не упало в море").
289
+ - For IDF (@idf_telegram) posts about ongoing operations (not this specific attack) → time_relevance=0.
290
+ - LANGUAGE NEUTRALITY: Posts may be in Hebrew, Russian, Arabic, or English. The language of the post
291
+ MUST NOT affect source_trust or confidence. Russian-language Israeli channels are equally reliable
292
+ and often break news faster than Hebrew ones. Judge ONLY by factual content and tone.`;
293
+ async function extractAndValidate(state) {
294
+ if (state.filteredPosts.length === 0) {
295
+ logger.info("Agent: no filtered posts to extract", {
296
+ alertId: state.alertId,
297
+ });
298
+ return { extractions: [] };
299
+ }
300
+ const llm = getLLM();
301
+ const posts = state.filteredPosts.slice(0, 8); // max 8 posts
302
+ const regionHint = state.alertAreas.length > 0
303
+ ? state.alertAreas.join(", ")
304
+ : Object.keys(config.agent.areaLabels).join(", ") || "Israel";
305
+ const alertTimeIL = toIsraelTime(state.alertTs);
306
+ const nowIL = toIsraelTime(Date.now());
307
+ const phaseInstructions = getPhaseInstructions(state.alertType);
308
+ const systemPrompt = SYSTEM_PROMPT_BASE + "\n\n" + phaseInstructions;
309
+ const results = await Promise.all(posts.map(async (post) => {
310
+ const postTimeIL = toIsraelTime(post.ts);
311
+ const postAgeMin = Math.round((state.alertTs - post.ts) / 60_000);
312
+ const postAgeSuffix = postAgeMin > 0
313
+ ? `(${postAgeMin} min BEFORE alert)`
314
+ : postAgeMin < 0
315
+ ? `(${Math.abs(postAgeMin)} min AFTER alert)`
316
+ : "(same time as alert)";
317
+ const contextHeader = `Alert time: ${alertTimeIL} (Israel)\n` +
318
+ `Post time: ${postTimeIL} (Israel) ${postAgeSuffix}\n` +
319
+ `Current time: ${nowIL} (Israel)\n` +
320
+ `Alert region: ${regionHint}\n` +
321
+ `UI language: ${config.language}\n`;
322
+ try {
323
+ const response = await llm.invoke([
324
+ { role: "system", content: systemPrompt },
325
+ {
326
+ role: "user",
327
+ content: `${contextHeader}Channel: ${post.channel}\n\nMessage:\n${post.text.slice(0, 800)}`,
328
+ },
329
+ ]);
330
+ const raw = typeof response.content === "string"
331
+ ? response.content
332
+ : JSON.stringify(response.content);
333
+ const text = raw
334
+ .replace(/^```(?:json)?\s*\n?/i, "")
335
+ .replace(/\n?```\s*$/i, "");
336
+ const parsed = JSON.parse(text.trim());
337
+ return {
338
+ ...parsed,
339
+ channel: post.channel,
340
+ messageUrl: post.messageUrl,
341
+ time_relevance: parsed.time_relevance ?? 0.5,
342
+ valid: true,
343
+ };
344
+ }
345
+ catch (err) {
346
+ logger.warn("Agent: extraction failed", {
347
+ channel: post.channel,
348
+ error: String(err),
349
+ });
350
+ return {
351
+ channel: post.channel,
352
+ region_relevance: 0,
353
+ source_trust: 0,
354
+ tone: "neutral",
355
+ time_relevance: 0,
356
+ country_origin: null,
357
+ rocket_count: null,
358
+ is_cassette: null,
359
+ intercepted: null,
360
+ intercepted_qual: null,
361
+ intercepted_qual_num: null,
362
+ sea_impact: null,
363
+ sea_impact_qual: null,
364
+ sea_impact_qual_num: null,
365
+ open_area_impact: null,
366
+ open_area_impact_qual: null,
367
+ open_area_impact_qual_num: null,
368
+ hits_confirmed: null,
369
+ casualties: null,
370
+ injuries: null,
371
+ eta_refined_minutes: null,
372
+ confidence: 0,
373
+ valid: false,
374
+ reject_reason: "extraction_error",
375
+ };
376
+ }
377
+ }));
378
+ logger.info("Agent: extracted", {
379
+ alertId: state.alertId,
380
+ count: results.length,
381
+ timeRelevance: results.map((r) => ({
382
+ ch: r.channel,
383
+ tr: r.time_relevance,
384
+ })),
385
+ });
386
+ return { extractions: results };
387
+ }
388
+ // ─────────────────────────────────────────────────────────
389
+ // Tier 2: Post-filter (deterministic, 0 tokens)
390
+ // Now includes TIME RELEVANCE check.
391
+ // ─────────────────────────────────────────────────────────
392
+ function postFilter(state) {
393
+ const validated = state.extractions.map((ext) => {
394
+ // V0: TIME RELEVANCE — the most important check
395
+ if (ext.time_relevance < 0.5) {
396
+ return { ...ext, valid: false, reject_reason: "stale_post" };
397
+ }
398
+ // V1: region relevance
399
+ if (ext.region_relevance < 0.5) {
400
+ return { ...ext, valid: false, reject_reason: "region_irrelevant" };
401
+ }
402
+ // V2: source trust
403
+ if (ext.source_trust < 0.4) {
404
+ return { ...ext, valid: false, reject_reason: "untrusted_source" };
405
+ }
406
+ // V3: tone — reject alarmist
407
+ if (ext.tone === "alarmist") {
408
+ return { ...ext, valid: false, reject_reason: "alarmist_tone" };
409
+ }
410
+ // V4: at least one data field must be non-null
411
+ const hasData = ext.country_origin !== null ||
412
+ ext.rocket_count !== null ||
413
+ ext.is_cassette !== null ||
414
+ ext.intercepted !== null ||
415
+ ext.intercepted_qual !== null ||
416
+ ext.hits_confirmed !== null ||
417
+ ext.casualties !== null ||
418
+ ext.injuries !== null ||
419
+ ext.eta_refined_minutes !== null;
420
+ if (!hasData) {
421
+ return { ...ext, valid: false, reject_reason: "no_data" };
422
+ }
423
+ // V5: overall confidence floor
424
+ if (ext.confidence < 0.3) {
425
+ return { ...ext, valid: false, reject_reason: "low_confidence" };
426
+ }
427
+ return { ...ext, valid: true };
428
+ });
429
+ const passed = validated.filter((e) => e.valid);
430
+ const rejected = validated.filter((e) => !e.valid);
431
+ logger.info("Agent: post-filter", {
432
+ alertId: state.alertId,
433
+ passed: passed.length,
434
+ rejected: rejected.length,
435
+ reasons: rejected.map((r) => r.reject_reason),
436
+ });
437
+ return { extractions: validated };
438
+ }
439
+ // ─────────────────────────────────────────────────────────
440
+ // Tier 3: Vote (deterministic, 0 tokens)
441
+ // ─────────────────────────────────────────────────────────
442
+ function vote(state) {
443
+ const valid = state.extractions.filter((e) => e.valid);
444
+ if (valid.length === 0) {
445
+ return { votedResult: null };
446
+ }
447
+ // Assign 1-based citation indices
448
+ const indexed = valid.map((e, i) => ({ ...e, idx: i + 1 }));
449
+ const citedSources = indexed.map((e) => ({
450
+ index: e.idx,
451
+ channel: e.channel,
452
+ messageUrl: e.messageUrl ?? null,
453
+ }));
454
+ // ETA: highest confidence source
455
+ const withEta = indexed
456
+ .filter((e) => e.eta_refined_minutes !== null)
457
+ .sort((a, b) => b.confidence - a.confidence);
458
+ const bestEta = withEta[0] ?? null;
459
+ // Country: group unique values
460
+ const countryMap = new Map();
461
+ for (const e of indexed) {
462
+ if (e.country_origin) {
463
+ const list = countryMap.get(e.country_origin) ?? [];
464
+ list.push(e.idx);
465
+ countryMap.set(e.country_origin, list);
466
+ }
467
+ }
468
+ const country_origins = countryMap.size > 0
469
+ ? Array.from(countryMap.entries()).map(([name, citations]) => ({
470
+ name,
471
+ citations,
472
+ }))
473
+ : null;
474
+ // Rocket count: range
475
+ const rocketSrcs = indexed.filter((e) => e.rocket_count !== null);
476
+ const rocketVals = rocketSrcs.map((e) => e.rocket_count);
477
+ const rocket_count_min = rocketVals.length > 0 ? Math.min(...rocketVals) : null;
478
+ const rocket_count_max = rocketVals.length > 0 ? Math.max(...rocketVals) : null;
479
+ const rocket_citations = rocketSrcs.map((e) => e.idx);
480
+ // Helper: avg weighted confidence
481
+ function fieldConf(srcs) {
482
+ if (srcs.length === 0)
483
+ return 0;
484
+ return (srcs.reduce((s, e) => s + e.source_trust * e.confidence, 0) / srcs.length);
485
+ }
486
+ // Helper: mode for QualCount
487
+ function modeQual(srcs, key) {
488
+ const vals = srcs
489
+ .map((e) => e[key])
490
+ .filter((v) => v !== null);
491
+ if (vals.length === 0)
492
+ return null;
493
+ const freq = new Map();
494
+ for (const v of vals)
495
+ freq.set(v, (freq.get(v) ?? 0) + 1);
496
+ return [...freq.entries()].sort((a, b) => b[1] - a[1])[0][0];
497
+ }
498
+ function medianQualNum(srcs, key) {
499
+ const vals = srcs
500
+ .map((e) => e[key])
501
+ .filter((v) => v !== null)
502
+ .sort((a, b) => a - b);
503
+ return vals.length > 0 ? vals[Math.floor(vals.length / 2)] : null;
504
+ }
505
+ // Cassette: majority
506
+ const cassSrcs = indexed.filter((e) => e.is_cassette !== null);
507
+ const cassVals = cassSrcs.map((e) => e.is_cassette);
508
+ const is_cassette = cassVals.length > 0
509
+ ? cassVals.filter(Boolean).length > cassVals.length / 2
510
+ : null;
511
+ const is_cassette_confidence = fieldConf(cassSrcs);
512
+ // Hits: median
513
+ const hitsSrcs = indexed.filter((e) => e.hits_confirmed !== null && e.hits_confirmed > 0);
514
+ const hitsVals = indexed
515
+ .filter((e) => e.hits_confirmed !== null)
516
+ .map((e) => e.hits_confirmed)
517
+ .sort((a, b) => a - b);
518
+ const hits_confirmed = hitsVals.length > 0 ? hitsVals[Math.floor(hitsVals.length / 2)] : null;
519
+ const hits_citations = hitsSrcs.map((e) => e.idx);
520
+ const hits_confidence = fieldConf(hitsSrcs);
521
+ // Intercepted: median / qual
522
+ const interceptedSrcs = indexed.filter((e) => e.intercepted !== null);
523
+ const interceptedQualSrcs = indexed.filter((e) => e.intercepted_qual !== null);
524
+ const interceptedVals = interceptedSrcs
525
+ .map((e) => e.intercepted)
526
+ .sort((a, b) => a - b);
527
+ const intercepted = interceptedVals.length > 0
528
+ ? interceptedVals[Math.floor(interceptedVals.length / 2)]
529
+ : null;
530
+ const intercepted_qual = intercepted === null
531
+ ? modeQual(interceptedQualSrcs, "intercepted_qual")
532
+ : null;
533
+ const intercepted_qual_num = intercepted_qual !== null
534
+ ? medianQualNum(interceptedQualSrcs, "intercepted_qual_num")
535
+ : null;
536
+ const intercepted_confidence = fieldConf(interceptedSrcs.length > 0 ? interceptedSrcs : interceptedQualSrcs);
537
+ // Sea impact: median / qual
538
+ const seaSrcs = indexed.filter((e) => e.sea_impact !== null);
539
+ const seaQualSrcs = indexed.filter((e) => e.sea_impact_qual !== null);
540
+ const seaVals = seaSrcs
541
+ .map((e) => e.sea_impact)
542
+ .sort((a, b) => a - b);
543
+ const sea_impact = seaVals.length > 0 ? seaVals[Math.floor(seaVals.length / 2)] : null;
544
+ const sea_impact_qual = sea_impact === null ? modeQual(seaQualSrcs, "sea_impact_qual") : null;
545
+ const sea_impact_qual_num = sea_impact_qual !== null
546
+ ? medianQualNum(seaQualSrcs, "sea_impact_qual_num")
547
+ : null;
548
+ const sea_confidence = fieldConf(seaSrcs.length > 0 ? seaSrcs : seaQualSrcs);
549
+ // Open area impact: median / qual
550
+ const openSrcs = indexed.filter((e) => e.open_area_impact !== null);
551
+ const openQualSrcs = indexed.filter((e) => e.open_area_impact_qual !== null);
552
+ const openVals = openSrcs
553
+ .map((e) => e.open_area_impact)
554
+ .sort((a, b) => a - b);
555
+ const open_area_impact = openVals.length > 0 ? openVals[Math.floor(openVals.length / 2)] : null;
556
+ const open_area_impact_qual = open_area_impact === null
557
+ ? modeQual(openQualSrcs, "open_area_impact_qual")
558
+ : null;
559
+ const open_area_impact_qual_num = open_area_impact_qual !== null
560
+ ? medianQualNum(openQualSrcs, "open_area_impact_qual_num")
561
+ : null;
562
+ const open_area_confidence = fieldConf(openSrcs.length > 0 ? openSrcs : openQualSrcs);
563
+ // Casualties
564
+ const casualtySrcs = indexed.filter((e) => e.casualties !== null && e.casualties > 0);
565
+ const casualtyVals = casualtySrcs
566
+ .map((e) => e.casualties)
567
+ .sort((a, b) => a - b);
568
+ const casualties = casualtyVals.length > 0
569
+ ? casualtyVals[Math.floor(casualtyVals.length / 2)]
570
+ : null;
571
+ const casualties_citations = casualtySrcs.map((e) => e.idx);
572
+ const casualties_confidence = fieldConf(casualtySrcs);
573
+ // Injuries
574
+ const injurySrcs = indexed.filter((e) => e.injuries !== null && e.injuries > 0);
575
+ const injuryVals = injurySrcs
576
+ .map((e) => e.injuries)
577
+ .sort((a, b) => a - b);
578
+ const injuries = injuryVals.length > 0
579
+ ? injuryVals[Math.floor(injuryVals.length / 2)]
580
+ : null;
581
+ const injuries_citations = injurySrcs.map((e) => e.idx);
582
+ const injuries_confidence = fieldConf(injurySrcs);
583
+ // Rocket confidence
584
+ const rocket_confidence = fieldConf(rocketSrcs);
585
+ // Overall weighted confidence
586
+ const totalWeight = indexed.reduce((s, e) => s + e.source_trust * e.confidence, 0);
587
+ const weightedConf = totalWeight / indexed.length;
588
+ const voted = {
589
+ eta_refined_minutes: bestEta?.eta_refined_minutes ?? null,
590
+ eta_citations: bestEta ? [bestEta.idx] : [],
591
+ country_origins,
592
+ rocket_count_min,
593
+ rocket_count_max,
594
+ rocket_citations,
595
+ rocket_confidence,
596
+ is_cassette,
597
+ is_cassette_confidence,
598
+ intercepted,
599
+ intercepted_qual,
600
+ intercepted_qual_num,
601
+ intercepted_confidence,
602
+ sea_impact,
603
+ sea_impact_qual,
604
+ sea_impact_qual_num,
605
+ sea_confidence,
606
+ open_area_impact,
607
+ open_area_impact_qual,
608
+ open_area_impact_qual_num,
609
+ open_area_confidence,
610
+ hits_confirmed,
611
+ hits_citations,
612
+ hits_confidence,
613
+ casualties,
614
+ casualties_citations,
615
+ casualties_confidence,
616
+ injuries,
617
+ injuries_citations,
618
+ injuries_confidence,
619
+ confidence: Math.round(weightedConf * 100) / 100,
620
+ sources_count: indexed.length,
621
+ citedSources,
622
+ };
623
+ logger.info("Agent: voted", { alertId: state.alertId, voted });
624
+ return { votedResult: voted };
625
+ }
626
+ // ─────────────────────────────────────────────────────────
627
+ // Tier 4: Edit message — inline citations, carry-forward
628
+ // ─────────────────────────────────────────────────────────
629
+ /** EN country name → Russian */
630
+ const COUNTRY_RU = {
631
+ Iran: "Иран",
632
+ Yemen: "Йемен",
633
+ Lebanon: "Ливан",
634
+ Gaza: "Газа",
635
+ Iraq: "Ирак",
636
+ Syria: "Сирия",
637
+ Hezbollah: "Хезболла",
638
+ };
639
+ /** Format inline citations: [[1]](url), [[2]](url) */
640
+ function inlineCites(indices, citedSources) {
641
+ const parts = [];
642
+ for (const idx of indices) {
643
+ const src = citedSources.find((s) => s.index === idx);
644
+ if (src?.messageUrl) {
645
+ parts.push(`<a href="${src.messageUrl}">[${idx}]</a>`);
646
+ }
647
+ }
648
+ return parts.length > 0 ? " " + parts.join(", ") : "";
649
+ }
650
+ /** Get InlineCite[] from citation indices */
651
+ function extractCites(indices, citedSources) {
652
+ const cites = [];
653
+ for (const idx of indices) {
654
+ const src = citedSources.find((s) => s.index === idx);
655
+ if (src?.messageUrl) {
656
+ cites.push({ url: src.messageUrl, channel: src.channel });
657
+ }
658
+ }
659
+ return cites;
660
+ }
661
+ /** Format inline citations from InlineCite[] (for carry-forward data) */
662
+ function inlineCitesFromData(cites) {
663
+ if (cites.length === 0)
664
+ return "";
665
+ return (" " + cites.map((c, i) => `<a href="${c.url}">[${i + 1}]</a>`).join(", "));
666
+ }
667
+ // Confidence thresholds
668
+ const SKIP = 0.6;
669
+ const UNCERTAIN = 0.75;
670
+ const CERTAIN = 0.95;
671
+ function qualDisplay(qual, qualNum, conf) {
672
+ if (qual === null)
673
+ return null;
674
+ if (qual === "none")
675
+ return conf >= CERTAIN ? "нет" : null;
676
+ const map = {
677
+ all: "все",
678
+ most: "большинство",
679
+ many: "много",
680
+ few: "несколько",
681
+ exists: "есть",
682
+ none: "нет",
683
+ more_than: qualNum != null ? `>${qualNum}` : ">1",
684
+ less_than: qualNum != null ? `<${qualNum}` : "<нескольких",
685
+ };
686
+ return map[qual];
687
+ }
688
+ function breakdownItem(label, num, qual, qualNum, conf) {
689
+ if (conf < SKIP)
690
+ return null;
691
+ const u = conf < UNCERTAIN ? " (?)" : "";
692
+ if (num !== null)
693
+ return `${label} — ${num}${u}`;
694
+ const qs = qualDisplay(qual, qualNum, conf);
695
+ if (qs === null)
696
+ return null;
697
+ return `${label} — ${qs}${u}`;
698
+ }
699
+ /**
700
+ * Build enrichment data from current vote + previous enrichment (carry-forward).
701
+ * Returns updated EnrichmentData for Redis persistence.
702
+ */
703
+ function buildEnrichmentFromVote(r, prev, alertType, alertTs) {
704
+ const data = { ...prev };
705
+ // Origin — update if voted has it
706
+ if (r.country_origins && r.country_origins.length > 0) {
707
+ data.origin = r.country_origins
708
+ .map((c) => COUNTRY_RU[c.name] ?? c.name)
709
+ .join(" + ");
710
+ data.originCites = r.country_origins.flatMap((c) => extractCites(c.citations, r.citedSources));
711
+ }
712
+ // ETA — only for early_warning/siren
713
+ if (r.eta_refined_minutes !== null &&
714
+ (alertType === "early_warning" || alertType === "siren")) {
715
+ const absTime = new Date(alertTs + r.eta_refined_minutes * 60_000).toLocaleTimeString("he-IL", {
716
+ hour: "2-digit",
717
+ minute: "2-digit",
718
+ timeZone: "Asia/Jerusalem",
719
+ });
720
+ data.etaAbsolute = `~${absTime}`;
721
+ data.etaCites = extractCites(r.eta_citations, r.citedSources);
722
+ }
723
+ // Rocket count
724
+ if (r.rocket_count_min !== null && r.rocket_count_max !== null) {
725
+ const u = r.rocket_confidence < UNCERTAIN ? " (?)" : "";
726
+ data.rocketCount =
727
+ r.rocket_count_min === r.rocket_count_max
728
+ ? `${r.rocket_count_min}${u}`
729
+ : `~${r.rocket_count_min}–${r.rocket_count_max}${u}`;
730
+ data.rocketCites = extractCites(r.rocket_citations, r.citedSources);
731
+ }
732
+ // Cassette
733
+ if (r.is_cassette !== null && r.is_cassette_confidence >= SKIP) {
734
+ data.isCassette = r.is_cassette;
735
+ }
736
+ // Intercepted
737
+ if (r.intercepted !== null && r.intercepted_confidence >= SKIP) {
738
+ const u = r.intercepted_confidence < UNCERTAIN ? " (?)" : "";
739
+ data.intercepted = `${r.intercepted}${u}`;
740
+ data.interceptedCites = extractCites(r.citedSources
741
+ .filter((s) => {
742
+ const ext = r.citedSources.find((cs) => cs.index === s.index);
743
+ return ext !== undefined;
744
+ })
745
+ .map((s) => s.index), r.citedSources);
746
+ }
747
+ else if (r.intercepted_qual !== null && r.intercepted_confidence >= SKIP) {
748
+ const qs = qualDisplay(r.intercepted_qual, r.intercepted_qual_num, r.intercepted_confidence);
749
+ if (qs)
750
+ data.intercepted = qs;
751
+ }
752
+ // Hits
753
+ if (r.hits_confirmed !== null &&
754
+ r.hits_confirmed > 0 &&
755
+ r.hits_confidence >= SKIP) {
756
+ const u = r.hits_confidence < UNCERTAIN ? " (?)" : "";
757
+ data.hitsConfirmed = `${r.hits_confirmed}${u}`;
758
+ data.hitsCites = extractCites(r.hits_citations, r.citedSources);
759
+ }
760
+ // Casualties
761
+ if (r.casualties !== null &&
762
+ r.casualties > 0 &&
763
+ r.casualties_confidence >= SKIP) {
764
+ const u = r.casualties_confidence < UNCERTAIN ? " (?)" : "";
765
+ data.casualties = `${r.casualties}${u}`;
766
+ data.casualtiesCites = extractCites(r.casualties_citations, r.citedSources);
767
+ }
768
+ // Injuries
769
+ if (r.injuries !== null && r.injuries > 0 && r.injuries_confidence >= SKIP) {
770
+ const u = r.injuries_confidence < UNCERTAIN ? " (?)" : "";
771
+ data.injuries = `${r.injuries}${u}`;
772
+ data.injuriesCites = extractCites(r.injuries_citations, r.citedSources);
773
+ }
774
+ // Early warning time — record when first early_warning was received
775
+ if (alertType === "early_warning" && !data.earlyWarningTime) {
776
+ data.earlyWarningTime = toIsraelTime(alertTs);
777
+ }
778
+ return data;
779
+ }
780
+ /**
781
+ * Build the enriched message text from current message + enrichment data.
782
+ * Uses inline [[1]](url) citations. No superscripts. No footer sources.
783
+ */
784
+ function buildEnrichedMessage(currentText, alertType, alertTs, enrichment) {
785
+ let text = currentText;
786
+ // ── Refine ETA in-place ──
787
+ if (enrichment.etaAbsolute &&
788
+ (alertType === "early_warning" || alertType === "siren")) {
789
+ const etaCiteStr = inlineCitesFromData(enrichment.etaCites);
790
+ const refined = `${enrichment.etaAbsolute}${etaCiteStr}`;
791
+ const etaPatterns = [
792
+ /~\d+[–-]\d+\s*мин/, // ~5–12 мин
793
+ /~\d+[–-]\d+\s*min/, // ~5–12 min
794
+ /~\d+[–-]\d+\s*דקות/, // ~5–12 דקות
795
+ /~\d+[–-]\d+\s*دقيقة/, // ~5–12 دقيقة
796
+ /1\.5\s*мин/, // 1.5 мин (siren)
797
+ /1\.5\s*min/,
798
+ /1\.5\s*דקות/,
799
+ /1\.5\s*دقيقة/,
800
+ ];
801
+ for (const pattern of etaPatterns) {
802
+ if (pattern.test(text)) {
803
+ text = text.replace(pattern, refined);
804
+ break;
805
+ }
806
+ }
807
+ }
808
+ // ── Siren: show "Раннее предупреждение: было в HH:MM" ──
809
+ if (alertType === "siren" && enrichment.earlyWarningTime) {
810
+ text = insertBeforeTimeLine(text, `<b>Раннее предупреждение:</b> было в ${enrichment.earlyWarningTime}`);
811
+ }
812
+ // ── Origin ──
813
+ if (enrichment.origin) {
814
+ const citeStr = inlineCitesFromData(enrichment.originCites);
815
+ text = insertBeforeTimeLine(text, `\n<b>Откуда:</b> ${enrichment.origin}${citeStr}`);
816
+ }
817
+ // ── Rocket count + breakdown ──
818
+ if (enrichment.rocketCount) {
819
+ const citeStr = inlineCitesFromData(enrichment.rocketCites);
820
+ const cassette = enrichment.isCassette ? ", есть кассетные" : "";
821
+ let breakdown = "";
822
+ const bParts = [];
823
+ if (enrichment.intercepted) {
824
+ bParts.push(`перехвачено — ${enrichment.intercepted}`);
825
+ }
826
+ if (enrichment.seaImpact) {
827
+ bParts.push(`упали в море — ${enrichment.seaImpact}`);
828
+ }
829
+ if (enrichment.openAreaImpact) {
830
+ bParts.push(`открытая местность — ${enrichment.openAreaImpact}`);
831
+ }
832
+ if (bParts.length > 0)
833
+ breakdown = `, из них: ${bParts.join(", ")}`;
834
+ text = insertBeforeTimeLine(text, `<b>Ракет:</b> ${enrichment.rocketCount}${breakdown}${cassette}${citeStr}`);
835
+ }
836
+ else if (enrichment.intercepted && alertType !== "early_warning") {
837
+ // No rocket count but have interception data
838
+ const citeStr = inlineCitesFromData(enrichment.interceptedCites);
839
+ text = insertBeforeTimeLine(text, `<b>Перехвачено:</b> ${enrichment.intercepted}${citeStr}`);
840
+ }
841
+ // ── Hits ──
842
+ if (enrichment.hitsConfirmed && alertType !== "early_warning") {
843
+ const areaLabel = Object.values(config.agent.areaLabels)[0] ?? "район";
844
+ const citeStr = inlineCitesFromData(enrichment.hitsCites);
845
+ text = insertBeforeTimeLine(text, `<b>Попадания (${areaLabel}):</b> ${enrichment.hitsConfirmed}${citeStr}`);
846
+ }
847
+ // ── Casualties / Injuries (resolved only) ──
848
+ if (enrichment.casualties && alertType === "resolved") {
849
+ const citeStr = inlineCitesFromData(enrichment.casualtiesCites);
850
+ text = insertBeforeTimeLine(text, `<b>Погибшие:</b> ${enrichment.casualties}${citeStr}`);
851
+ }
852
+ if (enrichment.injuries && alertType === "resolved") {
853
+ const citeStr = inlineCitesFromData(enrichment.injuriesCites);
854
+ text = insertBeforeTimeLine(text, `<b>Пострадавшие:</b> ${enrichment.injuries}${citeStr}`);
855
+ }
856
+ return text;
857
+ }
858
+ /**
859
+ * Insert a line before the time line (last "Время" / "Time" / "שעת" line).
860
+ */
861
+ function insertBeforeTimeLine(text, line) {
862
+ const timePattern = /(<b>(?:Время оповещения|Alert time|שעת ההתרעה|وقت الإنذار):<\/b>)/;
863
+ const match = text.match(timePattern);
864
+ if (match?.index !== undefined) {
865
+ return text.slice(0, match.index) + line + "\n" + text.slice(match.index);
866
+ }
867
+ const lines = text.split("\n");
868
+ lines.splice(Math.max(lines.length - 1, 0), 0, line);
869
+ return lines.join("\n");
870
+ }
871
+ async function editMessage(state) {
872
+ const { votedResult } = state;
873
+ if (!config.botToken)
874
+ return {};
875
+ const tgBot = new Bot(config.botToken);
876
+ // No valid sources — carry forward previous data only
877
+ const prevEnrichment = state.previousEnrichment ?? emptyEnrichmentData();
878
+ if (!votedResult) {
879
+ // No new data from channels — still try to build message from carry-forward
880
+ if (prevEnrichment.origin || prevEnrichment.intercepted) {
881
+ // Have carry-forward data, build message
882
+ const newText = buildEnrichedMessage(state.currentText, state.alertType, state.alertTs, prevEnrichment);
883
+ const hash = textHash(newText);
884
+ if (hash === prevEnrichment.lastEditHash) {
885
+ logger.info("Agent: no change in message (dedup) — skipping edit", {
886
+ alertId: state.alertId,
887
+ });
888
+ return {};
889
+ }
890
+ try {
891
+ if (state.isCaption) {
892
+ await tgBot.api.editMessageCaption(state.chatId, state.messageId, {
893
+ caption: newText,
894
+ parse_mode: "HTML",
895
+ });
896
+ }
897
+ else {
898
+ await tgBot.api.editMessageText(state.chatId, state.messageId, newText, { parse_mode: "HTML" });
899
+ }
900
+ prevEnrichment.lastEditHash = hash;
901
+ await saveEnrichmentData(prevEnrichment);
902
+ logger.info("Agent: message enriched (carry-forward only)", {
903
+ alertId: state.alertId,
904
+ messageId: state.messageId,
905
+ });
906
+ }
907
+ catch (err) {
908
+ const errStr = String(err);
909
+ if (errStr.includes("message is not modified")) {
910
+ prevEnrichment.lastEditHash = hash;
911
+ await saveEnrichmentData(prevEnrichment);
912
+ logger.info("Agent: message already up-to-date (dedup)", {
913
+ alertId: state.alertId,
914
+ });
915
+ }
916
+ else {
917
+ logger.error("Agent: failed to edit message", {
918
+ alertId: state.alertId,
919
+ error: errStr,
920
+ });
921
+ }
922
+ }
923
+ }
924
+ else {
925
+ logger.info("Agent: no voted result — skipping edit", {
926
+ alertId: state.alertId,
927
+ });
928
+ }
929
+ return {};
930
+ }
931
+ // Build enrichment data: merge vote + previous
932
+ const enrichment = buildEnrichmentFromVote(votedResult, prevEnrichment, state.alertType, state.alertTs);
933
+ const newText = buildEnrichedMessage(state.currentText, state.alertType, state.alertTs, enrichment);
934
+ // Dedup: skip if text hasn't changed
935
+ const hash = textHash(newText);
936
+ if (hash === enrichment.lastEditHash) {
937
+ logger.info("Agent: no change in message (dedup) — skipping edit", {
938
+ alertId: state.alertId,
939
+ });
940
+ return {};
941
+ }
942
+ // Low confidence: log but still show data with (?) markers
943
+ if (votedResult.confidence < config.agent.confidenceThreshold) {
944
+ logger.info("Agent: confidence below threshold — editing with (?) markers", {
945
+ alertId: state.alertId,
946
+ confidence: votedResult.confidence,
947
+ threshold: config.agent.confidenceThreshold,
948
+ });
949
+ }
950
+ try {
951
+ if (state.isCaption) {
952
+ await tgBot.api.editMessageCaption(state.chatId, state.messageId, {
953
+ caption: newText,
954
+ parse_mode: "HTML",
955
+ });
956
+ }
957
+ else {
958
+ await tgBot.api.editMessageText(state.chatId, state.messageId, newText, {
959
+ parse_mode: "HTML",
960
+ });
961
+ }
962
+ enrichment.lastEditHash = hash;
963
+ await saveEnrichmentData(enrichment);
964
+ logger.info("Agent: message enriched", {
965
+ alertId: state.alertId,
966
+ messageId: state.messageId,
967
+ confidence: votedResult.confidence,
968
+ sources: votedResult.sources_count,
969
+ phase: state.alertType,
970
+ });
971
+ }
972
+ catch (err) {
973
+ const errStr = String(err);
974
+ if (errStr.includes("message is not modified")) {
975
+ enrichment.lastEditHash = hash;
976
+ await saveEnrichmentData(enrichment);
977
+ logger.info("Agent: message already up-to-date (dedup)", {
978
+ alertId: state.alertId,
979
+ });
980
+ }
981
+ else {
982
+ logger.error("Agent: failed to edit message", {
983
+ alertId: state.alertId,
984
+ error: errStr,
985
+ });
986
+ }
987
+ }
988
+ return {};
989
+ }
990
+ // ─────────────────────────────────────────────────────────
991
+ // Clarify Node — MCP tool calling via ReAct (conditional)
992
+ // ─────────────────────────────────────────────────────────
993
+ async function clarifyNode(state) {
994
+ const { votedResult, extractions, alertId, alertAreas, alertType, alertTs, messageId, currentText, } = state;
995
+ if (!votedResult) {
996
+ logger.info("Agent: clarify skipped — no voted result", { alertId });
997
+ return { clarifyAttempted: true };
998
+ }
999
+ logger.info("Agent: clarify triggered", {
1000
+ alertId,
1001
+ confidence: votedResult.confidence,
1002
+ threshold: config.agent.confidenceThreshold,
1003
+ phase: alertType,
1004
+ });
1005
+ try {
1006
+ const result = await runClarify({
1007
+ alertId,
1008
+ alertAreas,
1009
+ alertType,
1010
+ alertTs,
1011
+ messageId,
1012
+ currentText,
1013
+ extractions,
1014
+ votedResult,
1015
+ });
1016
+ const mergedExtractions = [...extractions, ...result.newExtractions];
1017
+ logger.info("Agent: clarify completed", {
1018
+ alertId,
1019
+ toolCalls: result.toolCallCount,
1020
+ clarified: result.clarified,
1021
+ newExtractions: result.newExtractions.length,
1022
+ newPosts: result.newPosts.length,
1023
+ });
1024
+ return {
1025
+ extractions: mergedExtractions,
1026
+ votedResult: null,
1027
+ clarifyAttempted: true,
1028
+ };
1029
+ }
1030
+ catch (err) {
1031
+ logger.error("Agent: clarify failed", {
1032
+ alertId,
1033
+ error: String(err),
1034
+ });
1035
+ return { clarifyAttempted: true };
1036
+ }
1037
+ }
1038
+ // ── Conditional routing after vote ─────────────────────
1039
+ function shouldClarify(state) {
1040
+ if (state.clarifyAttempted)
1041
+ return "editMessage";
1042
+ if (!config.agent.mcpTools)
1043
+ return "editMessage";
1044
+ if (!state.votedResult)
1045
+ return "editMessage";
1046
+ // Low confidence → clarify (may use Oref tool for time validation)
1047
+ if (state.votedResult.confidence < config.agent.confidenceThreshold) {
1048
+ logger.info("Agent: routing to clarify (low confidence)", {
1049
+ confidence: state.votedResult.confidence,
1050
+ threshold: config.agent.confidenceThreshold,
1051
+ });
1052
+ return "clarify";
1053
+ }
1054
+ // Suspicious time: if the only country is unexpected for the region, verify
1055
+ // This catches cases like "Lebanon" appearing on a Tel Aviv alert
1056
+ // when the real attack is from Iran/Yemen
1057
+ const origins = state.votedResult.country_origins;
1058
+ if (origins &&
1059
+ origins.length === 1 &&
1060
+ state.votedResult.sources_count === 1) {
1061
+ const singleOrigin = origins[0].name;
1062
+ // Lebanon attacks typically don't reach central Israel
1063
+ if (singleOrigin === "Lebanon" &&
1064
+ state.alertAreas.some((a) => a.includes("תל אביב") ||
1065
+ a.includes("גוש דן") ||
1066
+ a.includes("שרון") ||
1067
+ a.includes("מרכז"))) {
1068
+ logger.info("Agent: routing to clarify (suspicious single source: Lebanon for central Israel)", { origin: singleOrigin });
1069
+ return "clarify";
1070
+ }
1071
+ }
1072
+ return "editMessage";
1073
+ }
1074
+ // ── Build graph ────────────────────────────────────────
1075
+ const checkpointer = new MemorySaver();
1076
+ function buildGraph() {
1077
+ const graph = new StateGraph(AgentState)
1078
+ .addNode("collectAndPreFilter", collectAndPreFilter)
1079
+ .addNode("extractAndValidate", extractAndValidate)
1080
+ .addNode("postFilter", postFilter)
1081
+ .addNode("vote", vote)
1082
+ .addNode("clarify", clarifyNode)
1083
+ .addNode("revote", vote)
1084
+ .addNode("editMessage", editMessage)
1085
+ .addEdge("__start__", "collectAndPreFilter")
1086
+ .addEdge("collectAndPreFilter", "extractAndValidate")
1087
+ .addEdge("extractAndValidate", "postFilter")
1088
+ .addEdge("postFilter", "vote")
1089
+ .addConditionalEdges("vote", shouldClarify, {
1090
+ clarify: "clarify",
1091
+ editMessage: "editMessage",
1092
+ })
1093
+ .addEdge("clarify", "revote")
1094
+ .addEdge("revote", "editMessage")
1095
+ .addEdge("editMessage", "__end__");
1096
+ return graph.compile({ checkpointer });
1097
+ }
1098
+ export async function runEnrichment(input) {
1099
+ const app = buildGraph();
1100
+ await app.invoke({
1101
+ alertId: input.alertId,
1102
+ alertTs: input.alertTs,
1103
+ alertType: input.alertType,
1104
+ alertAreas: input.alertAreas,
1105
+ chatId: input.chatId,
1106
+ messageId: input.messageId,
1107
+ isCaption: input.isCaption,
1108
+ currentText: input.currentText,
1109
+ channelPosts: [],
1110
+ filteredPosts: [],
1111
+ extractions: [],
1112
+ votedResult: null,
1113
+ clarifyAttempted: false,
1114
+ previousEnrichment: emptyEnrichmentData(),
1115
+ sessionStartTs: 0,
1116
+ phaseStartTs: 0,
1117
+ }, { configurable: { thread_id: input.alertId } });
1118
+ }
1119
+ // ── Exported for testing ───────────────────────────────
1120
+ export const _test = {
1121
+ getLLM,
1122
+ buildRegionKeywords,
1123
+ LAUNCH_KEYWORDS,
1124
+ TIME_WINDOW_MS,
1125
+ toIsraelTime,
1126
+ textHash,
1127
+ postFilter,
1128
+ vote,
1129
+ buildEnrichmentFromVote,
1130
+ buildEnrichedMessage,
1131
+ insertBeforeTimeLine,
1132
+ inlineCites,
1133
+ inlineCitesFromData,
1134
+ extractCites,
1135
+ COUNTRY_RU,
1136
+ SYSTEM_PROMPT_BASE,
1137
+ getPhaseInstructions,
1138
+ SKIP,
1139
+ UNCERTAIN,
1140
+ CERTAIN,
1141
+ };
1142
+ //# sourceMappingURL=graph.js.map