easyoref 1.14.1 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,32 +1,27 @@
1
1
  /**
2
2
  * LangGraph.js enrichment pipeline — phase-aware, time-validated.
3
3
  *
4
- * KEY DESIGN PRINCIPLES:
5
- * 1. TIME IS KING every post is validated against the alert time window.
6
- * LLM receives alert time + post time and scores time_relevance.
7
- * Posts about previous/different attacks are rejected.
8
- * 2. PHASE-AWARE each phase extracts only what's relevant:
9
- * - early_warning: origin, ETA, rocket count, cassette
10
- * - siren: carries early data + adds interception, impacts
11
- * - resolved: carries all + adds casualties, injuries, final stats
12
- * 3. CARRY-FORWARD — results persist in Redis (EnrichmentData).
13
- * Each phase inherits previous phase's findings.
14
- * 4. INLINE CITATIONS — no superscripts, no footer sources.
15
- * Format: [[1]](url) right after each data point.
16
- * 5. DEDUP EDITS — hash-based check prevents "message not modified" spam.
4
+ * Lean orchestrator: connects filter → extract → vote → edit.
5
+ * All logic lives in dedicated modules:
6
+ * - filters.ts: deterministic noise filter, channel tracking
7
+ * - extract.ts: cheap LLM pre-filter, expensive extraction, post-filter
8
+ * - vote.ts: consensus voting (deterministic)
9
+ * - message.ts: message building, Telegram editing
10
+ * - helpers.ts: toIsraelTime, textHash
17
11
  *
18
12
  * Pipeline:
19
- * preFilterextractAndValidatepostFiltervote[clarify] → editMessage
13
+ * collectAndFilterextractvote[clarifyrevote] → editMessage
20
14
  */
21
15
  import { Annotation, MemorySaver, StateGraph } from "@langchain/langgraph";
22
- import { ChatOpenAI } from "@langchain/openai";
23
- import { Bot } from "grammy";
24
- import { createHash } from "node:crypto";
25
16
  import { config } from "../config.js";
26
17
  import * as logger from "../logger.js";
27
18
  import { runClarify } from "./clarify.js";
28
- import { getActiveSession, getChannelPosts, getEnrichmentData, saveEnrichmentData, } from "./store.js";
19
+ import { extractPosts, filterChannelsCheap, postFilter, } from "./extract.js";
20
+ import { buildChannelTracking } from "./filters.js";
21
+ import { editMessage } from "./message.js";
22
+ import { getActiveSession, getChannelPosts, getEnrichmentData, getLastUpdateTs, setLastUpdateTs, } from "./store.js";
29
23
  import { emptyEnrichmentData } from "./types.js";
24
+ import { vote } from "./vote.js";
30
25
  // ── State ──────────────────────────────────────────────
31
26
  const AgentState = Annotation.Root({
32
27
  alertId: Annotation({ reducer: (_, b) => b }),
@@ -37,1049 +32,133 @@ const AgentState = Annotation.Root({
37
32
  messageId: Annotation({ reducer: (_, b) => b }),
38
33
  isCaption: Annotation({ reducer: (_, b) => b }),
39
34
  currentText: Annotation({ reducer: (_, b) => b }),
40
- channelPosts: Annotation({ reducer: (_, b) => b }),
41
- filteredPosts: Annotation({ reducer: (_, b) => b }),
35
+ tracking: Annotation({ reducer: (_, b) => b }),
42
36
  extractions: Annotation({ reducer: (_, b) => b }),
43
37
  votedResult: Annotation({ reducer: (_, b) => b }),
44
- /** Tracks whether clarify has already run (prevents infinite loop) */
45
38
  clarifyAttempted: Annotation({ reducer: (_, b) => b }),
46
- /** Cross-phase enrichment data loaded at start */
47
39
  previousEnrichment: Annotation({ reducer: (_, b) => b }),
48
- /** Session start timestamp for time window calculations */
49
- sessionStartTs: Annotation({ reducer: (_, b) => b }),
50
- /** Phase start timestamp */
51
- phaseStartTs: Annotation({ reducer: (_, b) => b }),
52
40
  });
53
- // ── LLM ───────────────────────────────────────────────
54
- function getLLM() {
55
- return new ChatOpenAI({
56
- model: config.agent.model,
57
- configuration: {
58
- baseURL: "https://openrouter.ai/api/v1",
59
- defaultHeaders: {
60
- "HTTP-Referer": "https://github.com/mikhailkogan17/EasyOref",
61
- "X-Title": "EasyOref",
62
- },
63
- },
64
- apiKey: config.agent.apiKey,
65
- temperature: 0,
66
- maxTokens: 500,
67
- });
68
- }
69
- // ── Region keywords (Hebrew + transliterations) ────────
70
- function buildRegionKeywords() {
71
- const keywords = [];
72
- for (const area of config.areas) {
73
- keywords.push(area.toLowerCase());
74
- const first = area.split(" ")[0];
75
- if (first && first.length >= 2)
76
- keywords.push(first.toLowerCase());
77
- }
78
- for (const [he, label] of Object.entries(config.agent.areaLabels)) {
79
- keywords.push(he.toLowerCase());
80
- for (const word of label.split(/\s+/)) {
81
- if (word.length >= 3)
82
- keywords.push(word.toLowerCase());
83
- }
84
- }
85
- // Common attack-related keywords (always relevant)
86
- keywords.push("ישראל", "israel", "израиль", "ракет", "rocket", "missile", "iron dome", "כיפת ברזל", "перехват", "intercept", "צבע אדום", "red alert");
87
- return [...new Set(keywords)];
88
- }
89
- // ── Hard-ignore: Pikud HaOref official area lists ("простыня") ──
90
- const OREF_LINK_PATTERN = /oref\.org\.il/i;
91
- const OREF_OFFICIAL_CHANNEL_PATTERN = /pikud|פיקוד|oref/i;
92
- /** Detect posts that are just official Pikud HaOref area list links */
93
- function isOrefAreaList(post) {
94
- const text = post.text;
95
- // Link to official Pikud HaOref page
96
- if (OREF_LINK_PATTERN.test(text))
97
- return true;
98
- // Official channels that just echo the area list (very long, >300 chars, mostly city names)
99
- if (OREF_OFFICIAL_CHANNEL_PATTERN.test(post.channel) && text.length > 300)
100
- return true;
101
- return false;
102
- }
103
- // ── Launch detection keywords (strict — early_warning only) ──
104
- const LAUNCH_KEYWORDS = [
105
- "שיגור",
106
- "שיגורים",
107
- "שוגרו",
108
- "נורו",
109
- "зафиксированы запуски",
110
- "обнаружены запуски",
111
- "запуски ракет",
112
- "запуск ракет",
113
- "пуски ракет",
114
- "ракетный обстрел",
115
- "ракетная атака",
116
- "missile launch",
117
- "rocket launch",
118
- "barrage",
119
- "fired towards",
120
- "launches detected",
121
- "missiles fired",
122
- "שיגורים לישראל",
123
- "ירי טילים",
124
- "ירי רקטות",
125
- "إطلاق صواريخ",
126
- ].map((kw) => kw.toLowerCase());
127
- // ── Time window per phase (ms before alertTs to accept posts) ──
128
- const TIME_WINDOW_MS = {
129
- early_warning: 5 * 60 * 1000, // 5 min before alert
130
- siren: 10 * 60 * 1000, // 10 min (includes early_warning period)
131
- resolved: 30 * 60 * 1000, // 30 min (full session window)
132
- };
133
- // ── Helpers ────────────────────────────────────────────
134
- /** Format timestamp as HH:MM Israel time */
135
- function toIsraelTime(ts) {
136
- return new Date(ts).toLocaleTimeString("he-IL", {
137
- hour: "2-digit",
138
- minute: "2-digit",
139
- timeZone: "Asia/Jerusalem",
140
- });
141
- }
142
- /** MD5 hash for edit dedup */
143
- function textHash(text) {
144
- return createHash("md5").update(text).digest("hex");
145
- }
146
- // ─────────────────────────────────────────────────────────
147
- // Tier 0: Pre-filter (phase-aware, time-bounded, 0 tokens)
148
- // ─────────────────────────────────────────────────────────
149
- async function collectAndPreFilter(state) {
41
+ // ── Node: collect posts + deterministic noise filter ───
42
+ async function collectAndFilter(state) {
150
43
  const posts = await getChannelPosts(state.alertId);
151
44
  const prevEnrichment = await getEnrichmentData();
152
- // Load session for time boundaries
153
45
  const session = await getActiveSession();
154
46
  const sessionStartTs = session?.sessionStartTs ?? state.alertTs;
155
- const phaseStartTs = session?.phaseStartTs ?? state.alertTs;
47
+ const lastUpdateTs = await getLastUpdateTs();
156
48
  if (posts.length === 0) {
157
- logger.info("Agent: no posts in session", { alertId: state.alertId });
158
- return {
159
- channelPosts: posts,
160
- filteredPosts: [],
161
- previousEnrichment: prevEnrichment,
162
- sessionStartTs,
163
- phaseStartTs,
164
- };
49
+ logger.info("Agent: no posts", { alertId: state.alertId });
50
+ return { tracking: null, previousEnrichment: prevEnrichment };
165
51
  }
166
- const keywords = buildRegionKeywords();
167
- const alertType = state.alertType;
168
- const alertTs = state.alertTs;
169
- // Time window: reject posts older than window before alertTs
170
- const windowMs = TIME_WINDOW_MS[alertType];
171
- const cutoffTs = alertTs - windowMs;
172
- let filtered;
173
- if (alertType === "early_warning") {
174
- // ── STRICT launch-only filter for early warning ──
175
- // Step 1: Find posts with launch keywords, within time window
176
- const launchPosts = posts.filter((post) => {
177
- if (post.ts < cutoffTs)
178
- return false;
179
- if (isOrefAreaList(post))
180
- return false;
181
- const text = post.text.toLowerCase();
182
- return LAUNCH_KEYWORDS.some((kw) => text.includes(kw));
183
- });
184
- // Step 2: Get channels that posted about launches
185
- const channelFirstLaunchTs = new Map();
186
- for (const post of launchPosts) {
187
- const current = channelFirstLaunchTs.get(post.channel);
188
- if (current === undefined || post.ts < current) {
189
- channelFirstLaunchTs.set(post.channel, post.ts);
190
- }
191
- }
192
- // Step 3: Accept follow-up posts from launch channels only (within window)
193
- filtered = posts.filter((post) => {
194
- if (post.ts < cutoffTs)
195
- return false;
196
- const text = post.text.toLowerCase();
197
- if (!keywords.some((kw) => text.includes(kw)))
198
- return false;
199
- const firstLaunch = channelFirstLaunchTs.get(post.channel);
200
- if (firstLaunch === undefined)
201
- return false;
202
- return post.ts >= firstLaunch;
203
- });
204
- logger.info("Agent: pre-filter (early_warning)", {
52
+ const tracking = buildChannelTracking(posts, sessionStartTs, lastUpdateTs);
53
+ logger.info("Agent: channel tracking", {
54
+ alertId: state.alertId,
55
+ total_posts: posts.length,
56
+ channels_with_updates: tracking.channels_with_updates.length,
57
+ total_new_posts: tracking.channels_with_updates.reduce((s, c) => s + c.last_tracked_messages.length, 0),
58
+ });
59
+ return { tracking, previousEnrichment: prevEnrichment };
60
+ }
61
+ // ── Node: cheap LLM channel filter + expensive extraction ──
62
+ async function extractNode(state) {
63
+ if (!state.tracking || state.tracking.channels_with_updates.length === 0) {
64
+ logger.info("Agent: no channels with updates", {
205
65
  alertId: state.alertId,
206
- total: posts.length,
207
- launch_posts: launchPosts.length,
208
- launch_channels: channelFirstLaunchTs.size,
209
- after_filter: filtered.length,
210
- cutoff: toIsraelTime(cutoffTs),
211
- channels_breakdown: Object.fromEntries([...new Set(posts.map((p) => p.channel))].map((ch) => [
212
- ch,
213
- {
214
- total: posts.filter((p) => p.channel === ch).length,
215
- passed: filtered.filter((p) => p.channel === ch).length,
216
- },
217
- ])),
218
- oref_filtered: posts.filter(isOrefAreaList).length,
219
66
  });
67
+ return { extractions: [] };
220
68
  }
221
- else {
222
- // ── Siren & Resolved: broader filter, time-bounded ──
223
- filtered = posts.filter((post) => {
224
- if (post.ts < cutoffTs)
225
- return false;
226
- if (isOrefAreaList(post))
227
- return false;
228
- const text = post.text.toLowerCase();
229
- return keywords.some((kw) => text.includes(kw));
230
- });
231
- logger.info("Agent: pre-filter", {
69
+ // Step 1: cheap LLM — which channels have important military intel?
70
+ const relevantChannels = await filterChannelsCheap(state.tracking, state.alertAreas, state.alertTs, state.alertType);
71
+ if (relevantChannels.length === 0) {
72
+ logger.info("Agent: no relevant channels after cheap filter", {
232
73
  alertId: state.alertId,
233
- alertType,
234
- total: posts.length,
235
- after_filter: filtered.length,
236
- cutoff: toIsraelTime(cutoffTs),
237
- channels_breakdown: Object.fromEntries([...new Set(posts.map((p) => p.channel))].map((ch) => [
238
- ch,
239
- {
240
- total: posts.filter((p) => p.channel === ch).length,
241
- passed: filtered.filter((p) => p.channel === ch).length,
242
- },
243
- ])),
244
- oref_filtered: posts.filter(isOrefAreaList).length,
245
74
  });
75
+ return { extractions: [] };
246
76
  }
247
- return {
248
- channelPosts: posts,
249
- filteredPosts: filtered,
250
- previousEnrichment: prevEnrichment,
251
- sessionStartTs,
252
- phaseStartTs,
253
- };
254
- }
255
- // ─────────────────────────────────────────────────────────
256
- // Tier 1: Extract + validate (1 LLM call per post)
257
- // Phase-aware prompts — agent knows what to look for.
258
- // TIME CONTEXT — agent sees alert time + post time.
259
- // ─────────────────────────────────────────────────────────
260
- const QUAL_VALUES = '"all"|"most"|"many"|"few"|"exists"|"none"|"more_than"|"less_than"';
261
- /** Phase-specific extraction instructions */
262
- function getPhaseInstructions(alertType) {
263
- switch (alertType) {
264
- case "early_warning":
265
- return `PHASE: EARLY WARNING (radar detected launches, sirens not yet).
266
- Focus on: country_origin (WHERE were rockets launched from?), eta_refined_minutes, rocket_count, is_cassette.
267
- Do NOT extract: intercepted, sea_impact, open_area_impact, hits_confirmed, casualties, injuries — these are IMPOSSIBLE at this stage.
268
- If a message discusses interception results, it is about a PREVIOUS attack — set time_relevance=0.`;
269
- case "siren":
270
- return `PHASE: SIREN (rockets incoming, impact imminent).
271
- Focus on: country_origin (if not known yet), rocket_count, intercepted, sea_impact, open_area_impact, is_cassette.
272
- Do NOT extract: hits_confirmed, casualties, injuries — too early for confirmed damage reports.
273
- If a message discusses casualties or confirmed hits, verify the timing carefully - it may be about a previous attack.`;
274
- case "resolved":
275
- return `PHASE: RESOLVED (incident over, assessing damage).
276
- Focus on: intercepted (final count), hits_confirmed, casualties, injuries, open_area_impact.
277
- All fields are valid at this stage. Prioritize confirmed official reports.`;
77
+ // Step 2: collect posts from relevant channels only
78
+ const postsToExtract = [];
79
+ for (const ch of state.tracking.channels_with_updates) {
80
+ const match = relevantChannels.some((rc) => rc === ch.channel || rc === `@${ch.channel}` || `@${rc}` === ch.channel);
81
+ if (match) {
82
+ postsToExtract.push(...ch.last_tracked_messages);
83
+ }
278
84
  }
279
- }
280
- const SYSTEM_PROMPT_BASE = `You analyze Telegram channel messages about a missile/rocket attack on Israel.
281
- Your job: extract factual data, assess quality, AND validate temporal relevance.
282
-
283
- CRITICAL — TIME VALIDATION:
284
- You will receive the alert time and the post time. You MUST determine if this post
285
- is about the CURRENT attack or about a previous/different event.
286
- - If post discusses events clearly BEFORE the alert time → time_relevance=0
287
- - If post is generic military news not specific to this attack → time_relevance=0.2
288
- - If post discusses the current attack → time_relevance=1.0
289
- - If uncertain → time_relevance=0.5 (the system will use alert_history to verify)
290
-
291
- Return ONLY valid JSON (no markdown, no explanation):
292
- {
293
- "region_relevance": float, // 0–1: does this message discuss the specified alert region?
294
- "source_trust": float, // 0–1: factual reporting (1.0) vs unverified rumors/panic (0.0)
295
- "tone": "calm"|"neutral"|"alarmist",
296
- "time_relevance": float, // 0–1: is this post about the CURRENT attack? (see rules above)
297
- "country_origin": string|null, // "Iran","Yemen","Lebanon","Gaza","Iraq","Syria" or null
298
- "rocket_count": int|null,
299
- "is_cassette": bool|null,
300
- "intercepted": int|null,
301
- "intercepted_qual": ${QUAL_VALUES}|null,
302
- "intercepted_qual_num": int|null,
303
- "sea_impact": int|null,
304
- "sea_impact_qual": ${QUAL_VALUES}|null,
305
- "sea_impact_qual_num": int|null,
306
- "open_area_impact": int|null,
307
- "open_area_impact_qual": ${QUAL_VALUES}|null,
308
- "open_area_impact_qual_num": int|null,
309
- "hits_confirmed": int|null,
310
- "casualties": int|null,
311
- "injuries": int|null,
312
- "eta_refined_minutes": int|null,
313
- "confidence": float
314
- }
315
-
316
- Rules:
317
- - If unrelated to the alert region, set region_relevance=0 and all data fields to null.
318
- - If message is speculative/unconfirmed rumor, set source_trust < 0.4.
319
- - If message uses excessive caps, exclamation marks, panic language → tone="alarmist".
320
- - Only extract concrete numbers explicitly stated in the text. Never guess.
321
- - *_qual fields: use ONLY when NO exact count is given. If exact number present, set *_qual=null.
322
- - "none" qual is only valid if explicitly stated (e.g., "все перехвачены", "не упало в море").
323
- - For IDF (@idf_telegram) posts about ongoing operations (not this specific attack) → time_relevance=0.
324
- - LANGUAGE NEUTRALITY: Posts may be in Hebrew, Russian, Arabic, or English. The language of the post
325
- MUST NOT affect source_trust or confidence. Russian-language Israeli channels are equally reliable
326
- and often break news faster than Hebrew ones. Judge ONLY by factual content and tone.
327
- - TRUST INTERCEPTION & IMPACT REPORTS: When a channel explicitly states interception results
328
- (e.g., "перехвачены", "intercepted", "יירוט", "упали в море", "fell in the sea", "נפלו בים",
329
- "open area impact", "שטח פתוח"), trust these claims with source_trust >= 0.7 and confidence >= 0.7.
330
- Israeli Telegram channels often report interception results before official confirmation,
331
- and these reports are typically accurate. Do NOT downgrade these just because they lack official source.`;
332
- async function extractAndValidate(state) {
333
- if (state.filteredPosts.length === 0) {
334
- logger.info("Agent: no filtered posts to extract", {
85
+ if (postsToExtract.length === 0) {
86
+ logger.info("Agent: no posts from relevant channels", {
335
87
  alertId: state.alertId,
336
88
  });
337
89
  return { extractions: [] };
338
90
  }
339
- const llm = getLLM();
340
- const posts = state.filteredPosts.slice(0, 8); // max 8 posts
341
- const regionHint = state.alertAreas.length > 0
342
- ? state.alertAreas.join(", ")
343
- : Object.keys(config.agent.areaLabels).join(", ") || "Israel";
344
- const alertTimeIL = toIsraelTime(state.alertTs);
345
- const nowIL = toIsraelTime(Date.now());
346
- const phaseInstructions = getPhaseInstructions(state.alertType);
347
- const systemPrompt = SYSTEM_PROMPT_BASE + "\n\n" + phaseInstructions;
348
- const results = await Promise.all(posts.map(async (post) => {
349
- const postTimeIL = toIsraelTime(post.ts);
350
- const postAgeMin = Math.round((state.alertTs - post.ts) / 60_000);
351
- const postAgeSuffix = postAgeMin > 0
352
- ? `(${postAgeMin} min BEFORE alert)`
353
- : postAgeMin < 0
354
- ? `(${Math.abs(postAgeMin)} min AFTER alert)`
355
- : "(same time as alert)";
356
- const contextHeader = `Alert time: ${alertTimeIL} (Israel)\n` +
357
- `Post time: ${postTimeIL} (Israel) ${postAgeSuffix}\n` +
358
- `Current time: ${nowIL} (Israel)\n` +
359
- `Alert region: ${regionHint}\n` +
360
- `UI language: ${config.language}\n`;
361
- try {
362
- const response = await llm.invoke([
363
- { role: "system", content: systemPrompt },
364
- {
365
- role: "user",
366
- content: `${contextHeader}Channel: ${post.channel}\n\nMessage:\n${post.text.slice(0, 800)}`,
367
- },
368
- ]);
369
- const raw = typeof response.content === "string"
370
- ? response.content
371
- : JSON.stringify(response.content);
372
- const text = raw
373
- .replace(/^```(?:json)?\s*\n?/i, "")
374
- .replace(/\n?```\s*$/i, "");
375
- const parsed = JSON.parse(text.trim());
376
- return {
377
- ...parsed,
378
- channel: post.channel,
379
- messageUrl: post.messageUrl,
380
- time_relevance: parsed.time_relevance ?? 0.5,
381
- valid: true,
382
- };
383
- }
384
- catch (err) {
385
- logger.warn("Agent: extraction failed", {
386
- channel: post.channel,
387
- error: String(err),
388
- });
389
- return {
390
- channel: post.channel,
391
- region_relevance: 0,
392
- source_trust: 0,
393
- tone: "neutral",
394
- time_relevance: 0,
395
- country_origin: null,
396
- rocket_count: null,
397
- is_cassette: null,
398
- intercepted: null,
399
- intercepted_qual: null,
400
- intercepted_qual_num: null,
401
- sea_impact: null,
402
- sea_impact_qual: null,
403
- sea_impact_qual_num: null,
404
- open_area_impact: null,
405
- open_area_impact_qual: null,
406
- open_area_impact_qual_num: null,
407
- hits_confirmed: null,
408
- casualties: null,
409
- injuries: null,
410
- eta_refined_minutes: null,
411
- confidence: 0,
412
- valid: false,
413
- reject_reason: "extraction_error",
414
- };
415
- }
416
- }));
417
- logger.info("Agent: extracted", {
91
+ // Step 3: expensive extraction with post-level dedup
92
+ const ctx = {
93
+ alertTs: state.alertTs,
94
+ alertType: state.alertType,
95
+ alertAreas: state.alertAreas,
418
96
  alertId: state.alertId,
419
- count: results.length,
420
- timeRelevance: results.map((r) => ({
421
- ch: r.channel,
422
- tr: r.time_relevance,
423
- conf: r.confidence,
424
- origin: r.country_origin,
425
- intercepted: r.intercepted ?? r.intercepted_qual,
426
- sea: r.sea_impact ?? r.sea_impact_qual,
427
- valid: r.valid,
428
- })),
429
- });
430
- return { extractions: results };
431
- }
432
- // ─────────────────────────────────────────────────────────
433
- // Tier 2: Post-filter (deterministic, 0 tokens)
434
- // Now includes TIME RELEVANCE check.
435
- // ─────────────────────────────────────────────────────────
436
- function postFilter(state) {
437
- const validated = state.extractions.map((ext) => {
438
- // V0: TIME RELEVANCE — the most important check
439
- if (ext.time_relevance < 0.5) {
440
- return { ...ext, valid: false, reject_reason: "stale_post" };
441
- }
442
- // V1: region relevance
443
- if (ext.region_relevance < 0.5) {
444
- return { ...ext, valid: false, reject_reason: "region_irrelevant" };
445
- }
446
- // V2: source trust
447
- if (ext.source_trust < 0.4) {
448
- return { ...ext, valid: false, reject_reason: "untrusted_source" };
449
- }
450
- // V3: tone — reject alarmist
451
- if (ext.tone === "alarmist") {
452
- return { ...ext, valid: false, reject_reason: "alarmist_tone" };
453
- }
454
- // V4: at least one data field must be non-null
455
- const hasData = ext.country_origin !== null ||
456
- ext.rocket_count !== null ||
457
- ext.is_cassette !== null ||
458
- ext.intercepted !== null ||
459
- ext.intercepted_qual !== null ||
460
- ext.hits_confirmed !== null ||
461
- ext.casualties !== null ||
462
- ext.injuries !== null ||
463
- ext.eta_refined_minutes !== null;
464
- if (!hasData) {
465
- return { ...ext, valid: false, reject_reason: "no_data" };
466
- }
467
- // V5: overall confidence floor
468
- if (ext.confidence < 0.3) {
469
- return { ...ext, valid: false, reject_reason: "low_confidence" };
470
- }
471
- return { ...ext, valid: true };
472
- });
473
- const passed = validated.filter((e) => e.valid);
474
- const rejected = validated.filter((e) => !e.valid);
475
- logger.info("Agent: post-filter", {
476
- alertId: state.alertId,
477
- passed: passed.length,
478
- rejected: rejected.length,
479
- reasons: rejected.map((r) => `${r.channel}:${r.reject_reason}`),
480
- passed_channels: passed.map((p) => p.channel),
481
- });
482
- return { extractions: validated };
483
- }
484
- // ─────────────────────────────────────────────────────────
485
- // Tier 3: Vote (deterministic, 0 tokens)
486
- // ─────────────────────────────────────────────────────────
487
- function vote(state) {
488
- const valid = state.extractions.filter((e) => e.valid);
489
- if (valid.length === 0) {
490
- return { votedResult: null };
491
- }
492
- // Assign 1-based citation indices
493
- const indexed = valid.map((e, i) => ({ ...e, idx: i + 1 }));
494
- const citedSources = indexed.map((e) => ({
495
- index: e.idx,
496
- channel: e.channel,
497
- messageUrl: e.messageUrl ?? null,
498
- }));
499
- // ETA: highest confidence source
500
- const withEta = indexed
501
- .filter((e) => e.eta_refined_minutes !== null)
502
- .sort((a, b) => b.confidence - a.confidence);
503
- const bestEta = withEta[0] ?? null;
504
- // Country: group unique values
505
- const countryMap = new Map();
506
- for (const e of indexed) {
507
- if (e.country_origin) {
508
- const list = countryMap.get(e.country_origin) ?? [];
509
- list.push(e.idx);
510
- countryMap.set(e.country_origin, list);
511
- }
512
- }
513
- const country_origins = countryMap.size > 0
514
- ? Array.from(countryMap.entries()).map(([name, citations]) => ({
515
- name,
516
- citations,
517
- }))
518
- : null;
519
- // Rocket count: range
520
- const rocketSrcs = indexed.filter((e) => e.rocket_count !== null);
521
- const rocketVals = rocketSrcs.map((e) => e.rocket_count);
522
- const rocket_count_min = rocketVals.length > 0 ? Math.min(...rocketVals) : null;
523
- const rocket_count_max = rocketVals.length > 0 ? Math.max(...rocketVals) : null;
524
- const rocket_citations = rocketSrcs.map((e) => e.idx);
525
- // Helper: avg weighted confidence
526
- function fieldConf(srcs) {
527
- if (srcs.length === 0)
528
- return 0;
529
- return (srcs.reduce((s, e) => s + e.source_trust * e.confidence, 0) / srcs.length);
530
- }
531
- // Helper: mode for QualCount
532
- function modeQual(srcs, key) {
533
- const vals = srcs
534
- .map((e) => e[key])
535
- .filter((v) => v !== null);
536
- if (vals.length === 0)
537
- return null;
538
- const freq = new Map();
539
- for (const v of vals)
540
- freq.set(v, (freq.get(v) ?? 0) + 1);
541
- return [...freq.entries()].sort((a, b) => b[1] - a[1])[0][0];
542
- }
543
- function medianQualNum(srcs, key) {
544
- const vals = srcs
545
- .map((e) => e[key])
546
- .filter((v) => v !== null)
547
- .sort((a, b) => a - b);
548
- return vals.length > 0 ? vals[Math.floor(vals.length / 2)] : null;
549
- }
550
- // Cassette: majority
551
- const cassSrcs = indexed.filter((e) => e.is_cassette !== null);
552
- const cassVals = cassSrcs.map((e) => e.is_cassette);
553
- const is_cassette = cassVals.length > 0
554
- ? cassVals.filter(Boolean).length > cassVals.length / 2
555
- : null;
556
- const is_cassette_confidence = fieldConf(cassSrcs);
557
- // Hits: median
558
- const hitsSrcs = indexed.filter((e) => e.hits_confirmed !== null && e.hits_confirmed > 0);
559
- const hitsVals = indexed
560
- .filter((e) => e.hits_confirmed !== null)
561
- .map((e) => e.hits_confirmed)
562
- .sort((a, b) => a - b);
563
- const hits_confirmed = hitsVals.length > 0 ? hitsVals[Math.floor(hitsVals.length / 2)] : null;
564
- const hits_citations = hitsSrcs.map((e) => e.idx);
565
- const hits_confidence = fieldConf(hitsSrcs);
566
- // Intercepted: median / qual
567
- const interceptedSrcs = indexed.filter((e) => e.intercepted !== null);
568
- const interceptedQualSrcs = indexed.filter((e) => e.intercepted_qual !== null);
569
- const interceptedVals = interceptedSrcs
570
- .map((e) => e.intercepted)
571
- .sort((a, b) => a - b);
572
- const intercepted = interceptedVals.length > 0
573
- ? interceptedVals[Math.floor(interceptedVals.length / 2)]
574
- : null;
575
- const intercepted_qual = intercepted === null
576
- ? modeQual(interceptedQualSrcs, "intercepted_qual")
577
- : null;
578
- const intercepted_qual_num = intercepted_qual !== null
579
- ? medianQualNum(interceptedQualSrcs, "intercepted_qual_num")
580
- : null;
581
- const intercepted_confidence = fieldConf(interceptedSrcs.length > 0 ? interceptedSrcs : interceptedQualSrcs);
582
- // Sea impact: median / qual
583
- const seaSrcs = indexed.filter((e) => e.sea_impact !== null);
584
- const seaQualSrcs = indexed.filter((e) => e.sea_impact_qual !== null);
585
- const seaVals = seaSrcs
586
- .map((e) => e.sea_impact)
587
- .sort((a, b) => a - b);
588
- const sea_impact = seaVals.length > 0 ? seaVals[Math.floor(seaVals.length / 2)] : null;
589
- const sea_impact_qual = sea_impact === null ? modeQual(seaQualSrcs, "sea_impact_qual") : null;
590
- const sea_impact_qual_num = sea_impact_qual !== null
591
- ? medianQualNum(seaQualSrcs, "sea_impact_qual_num")
592
- : null;
593
- const sea_confidence = fieldConf(seaSrcs.length > 0 ? seaSrcs : seaQualSrcs);
594
- // Open area impact: median / qual
595
- const openSrcs = indexed.filter((e) => e.open_area_impact !== null);
596
- const openQualSrcs = indexed.filter((e) => e.open_area_impact_qual !== null);
597
- const openVals = openSrcs
598
- .map((e) => e.open_area_impact)
599
- .sort((a, b) => a - b);
600
- const open_area_impact = openVals.length > 0 ? openVals[Math.floor(openVals.length / 2)] : null;
601
- const open_area_impact_qual = open_area_impact === null
602
- ? modeQual(openQualSrcs, "open_area_impact_qual")
603
- : null;
604
- const open_area_impact_qual_num = open_area_impact_qual !== null
605
- ? medianQualNum(openQualSrcs, "open_area_impact_qual_num")
606
- : null;
607
- const open_area_confidence = fieldConf(openSrcs.length > 0 ? openSrcs : openQualSrcs);
608
- // Casualties
609
- const casualtySrcs = indexed.filter((e) => e.casualties !== null && e.casualties > 0);
610
- const casualtyVals = casualtySrcs
611
- .map((e) => e.casualties)
612
- .sort((a, b) => a - b);
613
- const casualties = casualtyVals.length > 0
614
- ? casualtyVals[Math.floor(casualtyVals.length / 2)]
615
- : null;
616
- const casualties_citations = casualtySrcs.map((e) => e.idx);
617
- const casualties_confidence = fieldConf(casualtySrcs);
618
- // Injuries
619
- const injurySrcs = indexed.filter((e) => e.injuries !== null && e.injuries > 0);
620
- const injuryVals = injurySrcs
621
- .map((e) => e.injuries)
622
- .sort((a, b) => a - b);
623
- const injuries = injuryVals.length > 0
624
- ? injuryVals[Math.floor(injuryVals.length / 2)]
625
- : null;
626
- const injuries_citations = injurySrcs.map((e) => e.idx);
627
- const injuries_confidence = fieldConf(injurySrcs);
628
- // Rocket confidence
629
- const rocket_confidence = fieldConf(rocketSrcs);
630
- // Overall weighted confidence
631
- const totalWeight = indexed.reduce((s, e) => s + e.source_trust * e.confidence, 0);
632
- const weightedConf = totalWeight / indexed.length;
633
- const voted = {
634
- eta_refined_minutes: bestEta?.eta_refined_minutes ?? null,
635
- eta_citations: bestEta ? [bestEta.idx] : [],
636
- country_origins,
637
- rocket_count_min,
638
- rocket_count_max,
639
- rocket_citations,
640
- rocket_confidence,
641
- is_cassette,
642
- is_cassette_confidence,
643
- intercepted,
644
- intercepted_qual,
645
- intercepted_qual_num,
646
- intercepted_confidence,
647
- sea_impact,
648
- sea_impact_qual,
649
- sea_impact_qual_num,
650
- sea_confidence,
651
- open_area_impact,
652
- open_area_impact_qual,
653
- open_area_impact_qual_num,
654
- open_area_confidence,
655
- hits_confirmed,
656
- hits_citations,
657
- hits_confidence,
658
- casualties,
659
- casualties_citations,
660
- casualties_confidence,
661
- injuries,
662
- injuries_citations,
663
- injuries_confidence,
664
- confidence: Math.round(weightedConf * 100) / 100,
665
- sources_count: indexed.length,
666
- citedSources,
97
+ language: config.language,
667
98
  };
668
- logger.info("Agent: voted", { alertId: state.alertId, voted });
669
- return { votedResult: voted };
670
- }
671
- // ─────────────────────────────────────────────────────────
672
- // Tier 4: Edit message — inline citations, carry-forward
673
- // ─────────────────────────────────────────────────────────
674
- /** EN country name → Russian */
675
- const COUNTRY_RU = {
676
- Iran: "Иран",
677
- Yemen: "Йемен",
678
- Lebanon: "Ливан",
679
- Gaza: "Газа",
680
- Iraq: "Ирак",
681
- Syria: "Сирия",
682
- Hezbollah: "Хезболла",
683
- };
684
- /** Format inline citations: [[1]](url), [[2]](url) */
685
- function inlineCites(indices, citedSources) {
686
- const parts = [];
687
- for (const idx of indices) {
688
- const src = citedSources.find((s) => s.index === idx);
689
- if (src?.messageUrl) {
690
- parts.push(`<a href="${src.messageUrl}">[${idx}]</a>`);
691
- }
692
- }
693
- return parts.length > 0 ? " " + parts.join(", ") : "";
694
- }
695
- /** Get InlineCite[] from citation indices */
696
- function extractCites(indices, citedSources) {
697
- const cites = [];
698
- for (const idx of indices) {
699
- const src = citedSources.find((s) => s.index === idx);
700
- if (src?.messageUrl) {
701
- cites.push({ url: src.messageUrl, channel: src.channel });
702
- }
703
- }
704
- return cites;
705
- }
706
- /** Format inline citations from InlineCite[] (for carry-forward data) */
707
- function inlineCitesFromData(cites) {
708
- if (cites.length === 0)
709
- return "";
710
- return (" " + cites.map((c, i) => `<a href="${c.url}">[${i + 1}]</a>`).join(", "));
711
- }
712
- // Confidence thresholds
713
- const SKIP = 0.6;
714
- const UNCERTAIN = 0.75;
715
- const CERTAIN = 0.95;
716
- function qualDisplay(qual, qualNum, conf) {
717
- if (qual === null)
718
- return null;
719
- if (qual === "none")
720
- return conf >= CERTAIN ? "нет" : null;
721
- const map = {
722
- all: "все",
723
- most: "большинство",
724
- many: "много",
725
- few: "несколько",
726
- exists: "есть",
727
- none: "нет",
728
- more_than: qualNum != null ? `>${qualNum}` : ">1",
729
- less_than: qualNum != null ? `<${qualNum}` : "<нескольких",
730
- };
731
- return map[qual];
732
- }
733
- function breakdownItem(label, num, qual, qualNum, conf) {
734
- if (conf < SKIP)
735
- return null;
736
- const u = conf < UNCERTAIN ? " (?)" : "";
737
- if (num !== null)
738
- return `${label} — ${num}${u}`;
739
- const qs = qualDisplay(qual, qualNum, conf);
740
- if (qs === null)
741
- return null;
742
- return `${label} — ${qs}${u}`;
743
- }
744
- /**
745
- * Build enrichment data from current vote + previous enrichment (carry-forward).
746
- * Returns updated EnrichmentData for Redis persistence.
747
- */
748
- function buildEnrichmentFromVote(r, prev, alertType, alertTs) {
749
- const data = { ...prev };
750
- // Origin — update if voted has it
751
- if (r.country_origins && r.country_origins.length > 0) {
752
- data.origin = r.country_origins
753
- .map((c) => COUNTRY_RU[c.name] ?? c.name)
754
- .join(" + ");
755
- data.originCites = r.country_origins.flatMap((c) => extractCites(c.citations, r.citedSources));
756
- }
757
- // ETA — only for early_warning/siren
758
- if (r.eta_refined_minutes !== null &&
759
- (alertType === "early_warning" || alertType === "siren")) {
760
- const absTime = new Date(alertTs + r.eta_refined_minutes * 60_000).toLocaleTimeString("he-IL", {
761
- hour: "2-digit",
762
- minute: "2-digit",
763
- timeZone: "Asia/Jerusalem",
764
- });
765
- data.etaAbsolute = `~${absTime}`;
766
- data.etaCites = extractCites(r.eta_citations, r.citedSources);
767
- }
768
- // Rocket count
769
- if (r.rocket_count_min !== null && r.rocket_count_max !== null) {
770
- const u = r.rocket_confidence < UNCERTAIN ? " (?)" : "";
771
- data.rocketCount =
772
- r.rocket_count_min === r.rocket_count_max
773
- ? `${r.rocket_count_min}${u}`
774
- : `~${r.rocket_count_min}–${r.rocket_count_max}${u}`;
775
- data.rocketCites = extractCites(r.rocket_citations, r.citedSources);
776
- }
777
- // Cassette
778
- if (r.is_cassette !== null && r.is_cassette_confidence >= SKIP) {
779
- data.isCassette = r.is_cassette;
780
- }
781
- // Intercepted
782
- if (r.intercepted !== null && r.intercepted_confidence >= SKIP) {
783
- const u = r.intercepted_confidence < UNCERTAIN ? " (?)" : "";
784
- data.intercepted = `${r.intercepted}${u}`;
785
- data.interceptedCites = extractCites(r.citedSources
786
- .filter((s) => {
787
- const ext = r.citedSources.find((cs) => cs.index === s.index);
788
- return ext !== undefined;
789
- })
790
- .map((s) => s.index), r.citedSources);
791
- }
792
- else if (r.intercepted_qual !== null && r.intercepted_confidence >= SKIP) {
793
- const qs = qualDisplay(r.intercepted_qual, r.intercepted_qual_num, r.intercepted_confidence);
794
- if (qs)
795
- data.intercepted = qs;
796
- }
797
- // Hits
798
- if (r.hits_confirmed !== null &&
799
- r.hits_confirmed > 0 &&
800
- r.hits_confidence >= SKIP) {
801
- const u = r.hits_confidence < UNCERTAIN ? " (?)" : "";
802
- data.hitsConfirmed = `${r.hits_confirmed}${u}`;
803
- data.hitsCites = extractCites(r.hits_citations, r.citedSources);
804
- }
805
- // Casualties
806
- if (r.casualties !== null &&
807
- r.casualties > 0 &&
808
- r.casualties_confidence >= SKIP) {
809
- const u = r.casualties_confidence < UNCERTAIN ? " (?)" : "";
810
- data.casualties = `${r.casualties}${u}`;
811
- data.casualtiesCites = extractCites(r.casualties_citations, r.citedSources);
812
- }
813
- // Injuries
814
- if (r.injuries !== null && r.injuries > 0 && r.injuries_confidence >= SKIP) {
815
- const u = r.injuries_confidence < UNCERTAIN ? " (?)" : "";
816
- data.injuries = `${r.injuries}${u}`;
817
- data.injuriesCites = extractCites(r.injuries_citations, r.citedSources);
818
- }
819
- // Early warning time — record when first early_warning was received
820
- if (alertType === "early_warning" && !data.earlyWarningTime) {
821
- data.earlyWarningTime = toIsraelTime(alertTs);
822
- }
823
- return data;
824
- }
825
- /**
826
- * Build the enriched message text from current message + enrichment data.
827
- * Uses inline [[1]](url) citations. No superscripts. No footer sources.
828
- */
829
- function buildEnrichedMessage(currentText, alertType, alertTs, enrichment) {
830
- let text = currentText;
831
- // ── Refine ETA in-place ──
832
- if (enrichment.etaAbsolute &&
833
- (alertType === "early_warning" || alertType === "siren")) {
834
- const etaCiteStr = inlineCitesFromData(enrichment.etaCites);
835
- const refined = `${enrichment.etaAbsolute}${etaCiteStr}`;
836
- const etaPatterns = [
837
- /~\d+[–-]\d+\s*мин/, // ~5–12 мин
838
- /~\d+[–-]\d+\s*min/, // ~5–12 min
839
- /~\d+[–-]\d+\s*דקות/, // ~5–12 דקות
840
- /~\d+[–-]\d+\s*دقيقة/, // ~5–12 دقيقة
841
- /1\.5\s*мин/, // 1.5 мин (siren)
842
- /1\.5\s*min/,
843
- /1\.5\s*דקות/,
844
- /1\.5\s*دقيقة/,
845
- ];
846
- for (const pattern of etaPatterns) {
847
- if (pattern.test(text)) {
848
- text = text.replace(pattern, refined);
849
- break;
850
- }
851
- }
852
- }
853
- // ── Siren: show "Раннее предупреждение: было в HH:MM" ──
854
- if (alertType === "siren" && enrichment.earlyWarningTime) {
855
- text = insertBeforeTimeLine(text, `<b>Раннее предупреждение:</b> было в ${enrichment.earlyWarningTime}`);
856
- }
857
- // ── Origin ──
858
- if (enrichment.origin) {
859
- const citeStr = inlineCitesFromData(enrichment.originCites);
860
- text = insertBeforeTimeLine(text, `\n<b>Откуда:</b> ${enrichment.origin}${citeStr}`);
861
- }
862
- // ── Rocket count + breakdown ──
863
- if (enrichment.rocketCount) {
864
- const citeStr = inlineCitesFromData(enrichment.rocketCites);
865
- const cassette = enrichment.isCassette ? ", есть кассетные" : "";
866
- let breakdown = "";
867
- const bParts = [];
868
- if (enrichment.intercepted) {
869
- bParts.push(`перехвачено — ${enrichment.intercepted}`);
870
- }
871
- if (enrichment.seaImpact) {
872
- bParts.push(`упали в море — ${enrichment.seaImpact}`);
873
- }
874
- if (enrichment.openAreaImpact) {
875
- bParts.push(`открытая местность — ${enrichment.openAreaImpact}`);
876
- }
877
- if (bParts.length > 0)
878
- breakdown = `, из них: ${bParts.join(", ")}`;
879
- text = insertBeforeTimeLine(text, `<b>Ракет:</b> ${enrichment.rocketCount}${breakdown}${cassette}${citeStr}`);
880
- }
881
- else if (enrichment.intercepted && alertType !== "early_warning") {
882
- // No rocket count but have interception data
883
- const citeStr = inlineCitesFromData(enrichment.interceptedCites);
884
- text = insertBeforeTimeLine(text, `<b>Перехвачено:</b> ${enrichment.intercepted}${citeStr}`);
885
- }
886
- // ── Hits ──
887
- if (enrichment.hitsConfirmed && alertType !== "early_warning") {
888
- const areaLabel = Object.values(config.agent.areaLabels)[0] ?? "район";
889
- const citeStr = inlineCitesFromData(enrichment.hitsCites);
890
- text = insertBeforeTimeLine(text, `<b>Попадания (${areaLabel}):</b> ${enrichment.hitsConfirmed}${citeStr}`);
891
- }
892
- // ── Casualties / Injuries (resolved only) ──
893
- if (enrichment.casualties && alertType === "resolved") {
894
- const citeStr = inlineCitesFromData(enrichment.casualtiesCites);
895
- text = insertBeforeTimeLine(text, `<b>Погибшие:</b> ${enrichment.casualties}${citeStr}`);
896
- }
897
- if (enrichment.injuries && alertType === "resolved") {
898
- const citeStr = inlineCitesFromData(enrichment.injuriesCites);
899
- text = insertBeforeTimeLine(text, `<b>Пострадавшие:</b> ${enrichment.injuries}${citeStr}`);
900
- }
901
- return text;
902
- }
903
- /**
904
- * Insert a line before the time line (last "Время" / "Time" / "שעת" line).
905
- */
906
- function insertBeforeTimeLine(text, line) {
907
- const timePattern = /(<b>(?:Время оповещения|Alert time|שעת ההתרעה|وقت الإنذار):<\/b>)/;
908
- const match = text.match(timePattern);
909
- if (match?.index !== undefined) {
910
- return text.slice(0, match.index) + line + "\n" + text.slice(match.index);
911
- }
912
- const lines = text.split("\n");
913
- lines.splice(Math.max(lines.length - 1, 0), 0, line);
914
- return lines.join("\n");
915
- }
916
- async function editMessage(state) {
917
- const { votedResult } = state;
918
- if (!config.botToken)
919
- return {};
920
- const tgBot = new Bot(config.botToken);
921
- // No valid sources — carry forward previous data only
922
- const prevEnrichment = state.previousEnrichment ?? emptyEnrichmentData();
923
- if (!votedResult) {
924
- // No new data from channels — still try to build message from carry-forward
925
- if (prevEnrichment.origin || prevEnrichment.intercepted) {
926
- // Have carry-forward data, build message
927
- const newText = buildEnrichedMessage(state.currentText, state.alertType, state.alertTs, prevEnrichment);
928
- const hash = textHash(newText);
929
- if (hash === prevEnrichment.lastEditHash) {
930
- logger.info("Agent: no change in message (dedup) — skipping edit", {
931
- alertId: state.alertId,
932
- });
933
- return {};
934
- }
935
- try {
936
- if (state.isCaption) {
937
- await tgBot.api.editMessageCaption(state.chatId, state.messageId, {
938
- caption: newText,
939
- parse_mode: "HTML",
940
- });
941
- }
942
- else {
943
- await tgBot.api.editMessageText(state.chatId, state.messageId, newText, { parse_mode: "HTML" });
944
- }
945
- prevEnrichment.lastEditHash = hash;
946
- await saveEnrichmentData(prevEnrichment);
947
- logger.info("Agent: message enriched (carry-forward only)", {
948
- alertId: state.alertId,
949
- messageId: state.messageId,
950
- });
951
- }
952
- catch (err) {
953
- const errStr = String(err);
954
- if (errStr.includes("message is not modified")) {
955
- prevEnrichment.lastEditHash = hash;
956
- await saveEnrichmentData(prevEnrichment);
957
- logger.info("Agent: message already up-to-date (dedup)", {
958
- alertId: state.alertId,
959
- });
960
- }
961
- else {
962
- logger.error("Agent: failed to edit message", {
963
- alertId: state.alertId,
964
- error: errStr,
965
- });
966
- }
967
- }
968
- }
969
- else {
970
- logger.info("Agent: no voted result — skipping edit", {
971
- alertId: state.alertId,
972
- });
973
- }
974
- return {};
975
- }
976
- // Build enrichment data: merge vote + previous
977
- const enrichment = buildEnrichmentFromVote(votedResult, prevEnrichment, state.alertType, state.alertTs);
978
- const newText = buildEnrichedMessage(state.currentText, state.alertType, state.alertTs, enrichment);
979
- // Dedup: skip if text hasn't changed
980
- const hash = textHash(newText);
981
- if (hash === enrichment.lastEditHash) {
982
- logger.info("Agent: no change in message (dedup) — skipping edit", {
983
- alertId: state.alertId,
984
- });
985
- return {};
986
- }
987
- // Low confidence: log but still show data with (?) markers
988
- if (votedResult.confidence < config.agent.confidenceThreshold) {
989
- logger.info("Agent: confidence below threshold — editing with (?) markers", {
990
- alertId: state.alertId,
991
- confidence: votedResult.confidence,
992
- threshold: config.agent.confidenceThreshold,
993
- });
994
- }
995
- try {
996
- if (state.isCaption) {
997
- await tgBot.api.editMessageCaption(state.chatId, state.messageId, {
998
- caption: newText,
999
- parse_mode: "HTML",
1000
- });
1001
- }
1002
- else {
1003
- await tgBot.api.editMessageText(state.chatId, state.messageId, newText, {
1004
- parse_mode: "HTML",
1005
- });
1006
- }
1007
- enrichment.lastEditHash = hash;
1008
- await saveEnrichmentData(enrichment);
1009
- logger.info("Agent: message enriched", {
99
+ const raw = await extractPosts(postsToExtract, ctx);
100
+ // Step 4: deterministic post-filter
101
+ const filtered = postFilter(raw, state.alertId);
102
+ // Update timestamp for next job's dedup split
103
+ await setLastUpdateTs(Date.now());
104
+ return { extractions: filtered };
105
+ }
106
+ // ── Node: vote ─────────────────────────────────────────
107
+ function voteNode(state) {
108
+ return { votedResult: vote(state.extractions, state.alertId) };
109
+ }
110
+ // ── Node: clarify (MCP tool calling) ───────────────────
111
+ async function clarifyNode(state) {
112
+ if (!state.votedResult) {
113
+ logger.info("Agent: clarify skipped — no voted result", {
1010
114
  alertId: state.alertId,
1011
- messageId: state.messageId,
1012
- confidence: votedResult.confidence,
1013
- sources: votedResult.sources_count,
1014
- phase: state.alertType,
1015
115
  });
1016
- }
1017
- catch (err) {
1018
- const errStr = String(err);
1019
- if (errStr.includes("message is not modified")) {
1020
- enrichment.lastEditHash = hash;
1021
- await saveEnrichmentData(enrichment);
1022
- logger.info("Agent: message already up-to-date (dedup)", {
1023
- alertId: state.alertId,
1024
- });
1025
- }
1026
- else {
1027
- logger.error("Agent: failed to edit message", {
1028
- alertId: state.alertId,
1029
- error: errStr,
1030
- });
1031
- }
1032
- }
1033
- return {};
1034
- }
1035
- // ─────────────────────────────────────────────────────────
1036
- // Clarify Node — MCP tool calling via ReAct (conditional)
1037
- // ─────────────────────────────────────────────────────────
1038
- async function clarifyNode(state) {
1039
- const { votedResult, extractions, alertId, alertAreas, alertType, alertTs, messageId, currentText, } = state;
1040
- if (!votedResult) {
1041
- logger.info("Agent: clarify skipped — no voted result", { alertId });
1042
116
  return { clarifyAttempted: true };
1043
117
  }
1044
118
  logger.info("Agent: clarify triggered", {
1045
- alertId,
1046
- confidence: votedResult.confidence,
1047
- threshold: config.agent.confidenceThreshold,
1048
- phase: alertType,
119
+ alertId: state.alertId,
120
+ confidence: state.votedResult.confidence,
1049
121
  });
1050
122
  try {
1051
123
  const result = await runClarify({
1052
- alertId,
1053
- alertAreas,
1054
- alertType,
1055
- alertTs,
1056
- messageId,
1057
- currentText,
1058
- extractions,
1059
- votedResult,
1060
- });
1061
- const mergedExtractions = [...extractions, ...result.newExtractions];
1062
- logger.info("Agent: clarify completed", {
1063
- alertId,
1064
- toolCalls: result.toolCallCount,
1065
- clarified: result.clarified,
1066
- newExtractions: result.newExtractions.length,
1067
- newPosts: result.newPosts.length,
124
+ alertId: state.alertId,
125
+ alertAreas: state.alertAreas,
126
+ alertType: state.alertType,
127
+ alertTs: state.alertTs,
128
+ messageId: state.messageId,
129
+ currentText: state.currentText,
130
+ extractions: state.extractions,
131
+ votedResult: state.votedResult,
1068
132
  });
1069
133
  return {
1070
- extractions: mergedExtractions,
134
+ extractions: [...state.extractions, ...result.newExtractions],
1071
135
  votedResult: null,
1072
136
  clarifyAttempted: true,
1073
137
  };
1074
138
  }
1075
139
  catch (err) {
1076
140
  logger.error("Agent: clarify failed", {
1077
- alertId,
141
+ alertId: state.alertId,
1078
142
  error: String(err),
1079
143
  });
1080
144
  return { clarifyAttempted: true };
1081
145
  }
1082
146
  }
147
+ // ── Node: edit Telegram message ────────────────────────
148
+ async function editNode(state) {
149
+ await editMessage({
150
+ alertId: state.alertId,
151
+ alertTs: state.alertTs,
152
+ alertType: state.alertType,
153
+ chatId: state.chatId,
154
+ messageId: state.messageId,
155
+ isCaption: state.isCaption,
156
+ currentText: state.currentText,
157
+ votedResult: state.votedResult,
158
+ previousEnrichment: state.previousEnrichment ?? emptyEnrichmentData(),
159
+ });
160
+ return {};
161
+ }
1083
162
  // ── Conditional routing after vote ─────────────────────
1084
163
  function shouldClarify(state) {
1085
164
  if (state.clarifyAttempted)
@@ -1088,29 +167,23 @@ function shouldClarify(state) {
1088
167
  return "editMessage";
1089
168
  if (!state.votedResult)
1090
169
  return "editMessage";
1091
- // Low confidence → clarify (may use Oref tool for time validation)
1092
170
  if (state.votedResult.confidence < config.agent.confidenceThreshold) {
1093
171
  logger.info("Agent: routing to clarify (low confidence)", {
1094
172
  confidence: state.votedResult.confidence,
1095
- threshold: config.agent.confidenceThreshold,
1096
173
  });
1097
174
  return "clarify";
1098
175
  }
1099
- // Suspicious time: if the only country is unexpected for the region, verify
1100
- // This catches cases like "Lebanon" appearing on a Tel Aviv alert
1101
- // when the real attack is from Iran/Yemen
176
+ // Suspicious single-source: Lebanon for central Israel verify
1102
177
  const origins = state.votedResult.country_origins;
1103
178
  if (origins &&
1104
179
  origins.length === 1 &&
1105
180
  state.votedResult.sources_count === 1) {
1106
- const singleOrigin = origins[0].name;
1107
- // Lebanon attacks typically don't reach central Israel
1108
- if (singleOrigin === "Lebanon" &&
181
+ if (origins[0].name === "Lebanon" &&
1109
182
  state.alertAreas.some((a) => a.includes("תל אביב") ||
1110
183
  a.includes("גוש דן") ||
1111
184
  a.includes("שרון") ||
1112
185
  a.includes("מרכז"))) {
1113
- logger.info("Agent: routing to clarify (suspicious single source: Lebanon for central Israel)", { origin: singleOrigin });
186
+ logger.info("Agent: routing to clarify (suspicious Lebanon origin)", {});
1114
187
  return "clarify";
1115
188
  }
1116
189
  }
@@ -1119,30 +192,27 @@ function shouldClarify(state) {
1119
192
  // ── Build graph ────────────────────────────────────────
1120
193
  const checkpointer = new MemorySaver();
1121
194
  function buildGraph() {
1122
- const graph = new StateGraph(AgentState)
1123
- .addNode("collectAndPreFilter", collectAndPreFilter)
1124
- .addNode("extractAndValidate", extractAndValidate)
1125
- .addNode("postFilter", postFilter)
1126
- .addNode("vote", vote)
195
+ return new StateGraph(AgentState)
196
+ .addNode("collectAndFilter", collectAndFilter)
197
+ .addNode("extract", extractNode)
198
+ .addNode("vote", voteNode)
1127
199
  .addNode("clarify", clarifyNode)
1128
- .addNode("revote", vote)
1129
- .addNode("editMessage", editMessage)
1130
- .addEdge("__start__", "collectAndPreFilter")
1131
- .addEdge("collectAndPreFilter", "extractAndValidate")
1132
- .addEdge("extractAndValidate", "postFilter")
1133
- .addEdge("postFilter", "vote")
200
+ .addNode("revote", voteNode)
201
+ .addNode("editMessage", editNode)
202
+ .addEdge("__start__", "collectAndFilter")
203
+ .addEdge("collectAndFilter", "extract")
204
+ .addEdge("extract", "vote")
1134
205
  .addConditionalEdges("vote", shouldClarify, {
1135
206
  clarify: "clarify",
1136
207
  editMessage: "editMessage",
1137
208
  })
1138
209
  .addEdge("clarify", "revote")
1139
210
  .addEdge("revote", "editMessage")
1140
- .addEdge("editMessage", "__end__");
1141
- return graph.compile({ checkpointer });
211
+ .addEdge("editMessage", "__end__")
212
+ .compile({ checkpointer });
1142
213
  }
1143
214
  export async function runEnrichment(input) {
1144
- const app = buildGraph();
1145
- await app.invoke({
215
+ await buildGraph().invoke({
1146
216
  alertId: input.alertId,
1147
217
  alertTs: input.alertTs,
1148
218
  alertType: input.alertType,
@@ -1151,37 +221,11 @@ export async function runEnrichment(input) {
1151
221
  messageId: input.messageId,
1152
222
  isCaption: input.isCaption,
1153
223
  currentText: input.currentText,
1154
- channelPosts: [],
1155
- filteredPosts: [],
224
+ tracking: null,
1156
225
  extractions: [],
1157
226
  votedResult: null,
1158
227
  clarifyAttempted: false,
1159
228
  previousEnrichment: emptyEnrichmentData(),
1160
- sessionStartTs: 0,
1161
- phaseStartTs: 0,
1162
229
  }, { configurable: { thread_id: input.alertId } });
1163
230
  }
1164
- // ── Exported for testing ───────────────────────────────
1165
- export const _test = {
1166
- getLLM,
1167
- buildRegionKeywords,
1168
- LAUNCH_KEYWORDS,
1169
- TIME_WINDOW_MS,
1170
- toIsraelTime,
1171
- textHash,
1172
- postFilter,
1173
- vote,
1174
- buildEnrichmentFromVote,
1175
- buildEnrichedMessage,
1176
- insertBeforeTimeLine,
1177
- inlineCites,
1178
- inlineCitesFromData,
1179
- extractCites,
1180
- COUNTRY_RU,
1181
- SYSTEM_PROMPT_BASE,
1182
- getPhaseInstructions,
1183
- SKIP,
1184
- UNCERTAIN,
1185
- CERTAIN,
1186
- };
1187
231
  //# sourceMappingURL=graph.js.map