easyoref 1.14.2 → 1.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/agent/clarify.js +1 -1
  2. package/dist/agent/clarify.js.map +1 -1
  3. package/dist/agent/extract.d.ts +48 -0
  4. package/dist/agent/extract.d.ts.map +1 -0
  5. package/dist/agent/extract.js +375 -0
  6. package/dist/agent/extract.js.map +1 -0
  7. package/dist/agent/filters.d.ts +48 -0
  8. package/dist/agent/filters.d.ts.map +1 -0
  9. package/dist/agent/filters.js +124 -0
  10. package/dist/agent/filters.js.map +1 -0
  11. package/dist/agent/graph.d.ts +9 -93
  12. package/dist/agent/graph.d.ts.map +1 -1
  13. package/dist/agent/graph.js +110 -1118
  14. package/dist/agent/graph.js.map +1 -1
  15. package/dist/agent/helpers.d.ts +6 -0
  16. package/dist/agent/helpers.d.ts.map +1 -0
  17. package/dist/agent/helpers.js +15 -0
  18. package/dist/agent/helpers.js.map +1 -0
  19. package/dist/agent/message.d.ts +48 -0
  20. package/dist/agent/message.d.ts.map +1 -0
  21. package/dist/agent/message.js +353 -0
  22. package/dist/agent/message.js.map +1 -0
  23. package/dist/agent/store.d.ts +2 -0
  24. package/dist/agent/store.d.ts.map +1 -1
  25. package/dist/agent/store.js +12 -1
  26. package/dist/agent/store.js.map +1 -1
  27. package/dist/agent/types.d.ts +18 -0
  28. package/dist/agent/types.d.ts.map +1 -1
  29. package/dist/agent/types.js.map +1 -1
  30. package/dist/agent/vote.d.ts +13 -0
  31. package/dist/agent/vote.d.ts.map +1 -0
  32. package/dist/agent/vote.js +197 -0
  33. package/dist/agent/vote.js.map +1 -0
  34. package/dist/bot.js +1 -1
  35. package/dist/bot.js.map +1 -1
  36. package/dist/config.d.ts +5 -3
  37. package/dist/config.d.ts.map +1 -1
  38. package/dist/config.js +3 -2
  39. package/dist/config.js.map +1 -1
  40. package/package.json +1 -1
@@ -1,32 +1,27 @@
1
1
  /**
2
2
  * LangGraph.js enrichment pipeline — phase-aware, time-validated.
3
3
  *
4
- * KEY DESIGN PRINCIPLES:
5
- * 1. TIME IS KING every post is validated against the alert time window.
6
- * LLM receives alert time + post time and scores time_relevance.
7
- * Posts about previous/different attacks are rejected.
8
- * 2. PHASE-AWARE each phase extracts only what's relevant:
9
- * - early_warning: origin, ETA, rocket count, cassette
10
- * - siren: carries early data + adds interception, impacts
11
- * - resolved: carries all + adds casualties, injuries, final stats
12
- * 3. CARRY-FORWARD — results persist in Redis (EnrichmentData).
13
- * Each phase inherits previous phase's findings.
14
- * 4. INLINE CITATIONS — no superscripts, no footer sources.
15
- * Format: [[1]](url) right after each data point.
16
- * 5. DEDUP EDITS — hash-based check prevents "message not modified" spam.
4
+ * Lean orchestrator: connects filter → extract → vote → edit.
5
+ * All logic lives in dedicated modules:
6
+ * - filters.ts: deterministic noise filter, channel tracking
7
+ * - extract.ts: cheap LLM pre-filter, expensive extraction, post-filter
8
+ * - vote.ts: consensus voting (deterministic)
9
+ * - message.ts: message building, Telegram editing
10
+ * - helpers.ts: toIsraelTime, textHash
17
11
  *
18
12
  * Pipeline:
19
- * preFilterextractAndValidatepostFiltervote[clarify] → editMessage
13
+ * collectAndFilterextractvote[clarifyrevote] → editMessage
20
14
  */
21
15
  import { Annotation, MemorySaver, StateGraph } from "@langchain/langgraph";
22
- import { ChatOpenAI } from "@langchain/openai";
23
- import { Bot } from "grammy";
24
- import { createHash } from "node:crypto";
25
16
  import { config } from "../config.js";
26
17
  import * as logger from "../logger.js";
27
18
  import { runClarify } from "./clarify.js";
28
- import { getActiveSession, getCachedExtractions, getChannelPosts, getEnrichmentData, saveCachedExtractions, saveEnrichmentData, } from "./store.js";
19
+ import { extractPosts, filterChannelsCheap, postFilter, } from "./extract.js";
20
+ import { buildChannelTracking } from "./filters.js";
21
+ import { editMessage } from "./message.js";
22
+ import { getActiveSession, getChannelPosts, getEnrichmentData, getLastUpdateTs, setLastUpdateTs, } from "./store.js";
29
23
  import { emptyEnrichmentData } from "./types.js";
24
+ import { vote } from "./vote.js";
30
25
  // ── State ──────────────────────────────────────────────
31
26
  const AgentState = Annotation.Root({
32
27
  alertId: Annotation({ reducer: (_, b) => b }),
@@ -37,1101 +32,133 @@ const AgentState = Annotation.Root({
37
32
  messageId: Annotation({ reducer: (_, b) => b }),
38
33
  isCaption: Annotation({ reducer: (_, b) => b }),
39
34
  currentText: Annotation({ reducer: (_, b) => b }),
40
- channelPosts: Annotation({ reducer: (_, b) => b }),
41
- filteredPosts: Annotation({ reducer: (_, b) => b }),
35
+ tracking: Annotation({ reducer: (_, b) => b }),
42
36
  extractions: Annotation({ reducer: (_, b) => b }),
43
37
  votedResult: Annotation({ reducer: (_, b) => b }),
44
- /** Tracks whether clarify has already run (prevents infinite loop) */
45
38
  clarifyAttempted: Annotation({ reducer: (_, b) => b }),
46
- /** Cross-phase enrichment data loaded at start */
47
39
  previousEnrichment: Annotation({ reducer: (_, b) => b }),
48
- /** Session start timestamp for time window calculations */
49
- sessionStartTs: Annotation({ reducer: (_, b) => b }),
50
- /** Phase start timestamp */
51
- phaseStartTs: Annotation({ reducer: (_, b) => b }),
52
40
  });
53
- // ── LLM ───────────────────────────────────────────────
54
- function getLLM() {
55
- return new ChatOpenAI({
56
- model: config.agent.model,
57
- configuration: {
58
- baseURL: "https://openrouter.ai/api/v1",
59
- defaultHeaders: {
60
- "HTTP-Referer": "https://github.com/mikhailkogan17/EasyOref",
61
- "X-Title": "EasyOref",
62
- },
63
- },
64
- apiKey: config.agent.apiKey,
65
- temperature: 0,
66
- maxTokens: 500,
67
- });
68
- }
69
- // ── Region keywords (Hebrew + transliterations) ────────
70
- function buildRegionKeywords() {
71
- const keywords = [];
72
- for (const area of config.areas) {
73
- keywords.push(area.toLowerCase());
74
- const first = area.split(" ")[0];
75
- if (first && first.length >= 2)
76
- keywords.push(first.toLowerCase());
77
- }
78
- for (const [he, label] of Object.entries(config.agent.areaLabels)) {
79
- keywords.push(he.toLowerCase());
80
- for (const word of label.split(/\s+/)) {
81
- if (word.length >= 3)
82
- keywords.push(word.toLowerCase());
83
- }
84
- }
85
- // Common attack-related keywords (always relevant)
86
- keywords.push("ישראל", "israel", "израиль", "ракет", "rocket", "missile", "iron dome", "כיפת ברזל", "перехват", "intercept", "צבע אדום", "red alert");
87
- return [...new Set(keywords)];
88
- }
89
- // ── Hard-ignore: Pikud HaOref official area lists ("простыня") ──
90
- const OREF_LINK_PATTERN = /oref\.org\.il/i;
91
- const OREF_OFFICIAL_CHANNEL_PATTERN = /pikud|פיקוד|oref/i;
92
- /** Detect posts that are just official Pikud HaOref area list links */
93
- function isOrefAreaList(post) {
94
- const text = post.text;
95
- // Link to official Pikud HaOref page
96
- if (OREF_LINK_PATTERN.test(text))
97
- return true;
98
- // Official channels that just echo the area list (very long, >300 chars, mostly city names)
99
- if (OREF_OFFICIAL_CHANNEL_PATTERN.test(post.channel) && text.length > 300)
100
- return true;
101
- return false;
102
- }
103
- // ── Launch detection keywords (strict — early_warning only) ──
104
- const LAUNCH_KEYWORDS = [
105
- "שיגור",
106
- "שיגורים",
107
- "שוגרו",
108
- "נורו",
109
- "зафиксированы запуски",
110
- "обнаружены запуски",
111
- "запуски ракет",
112
- "запуск ракет",
113
- "пуски ракет",
114
- "ракетный обстрел",
115
- "ракетная атака",
116
- "missile launch",
117
- "rocket launch",
118
- "barrage",
119
- "fired towards",
120
- "launches detected",
121
- "missiles fired",
122
- "שיגורים לישראל",
123
- "ירי טילים",
124
- "ירי רקטות",
125
- "إطلاق صواريخ",
126
- ].map((kw) => kw.toLowerCase());
127
- // ── Time window per phase (ms before alertTs to accept posts) ──
128
- const TIME_WINDOW_MS = {
129
- early_warning: 5 * 60 * 1000, // 5 min before alert
130
- siren: 10 * 60 * 1000, // 10 min (includes early_warning period)
131
- resolved: 30 * 60 * 1000, // 30 min (full session window)
132
- };
133
- // ── Helpers ────────────────────────────────────────────
134
- /** Format timestamp as HH:MM Israel time */
135
- function toIsraelTime(ts) {
136
- return new Date(ts).toLocaleTimeString("he-IL", {
137
- hour: "2-digit",
138
- minute: "2-digit",
139
- timeZone: "Asia/Jerusalem",
140
- });
141
- }
142
- /** MD5 hash for edit dedup */
143
- function textHash(text) {
144
- return createHash("md5").update(text).digest("hex");
145
- }
146
- // ─────────────────────────────────────────────────────────
147
- // Tier 0: Pre-filter (phase-aware, time-bounded, 0 tokens)
148
- // ─────────────────────────────────────────────────────────
149
- async function collectAndPreFilter(state) {
41
+ // ── Node: collect posts + deterministic noise filter ───
42
+ async function collectAndFilter(state) {
150
43
  const posts = await getChannelPosts(state.alertId);
151
44
  const prevEnrichment = await getEnrichmentData();
152
- // Load session for time boundaries
153
45
  const session = await getActiveSession();
154
46
  const sessionStartTs = session?.sessionStartTs ?? state.alertTs;
155
- const phaseStartTs = session?.phaseStartTs ?? state.alertTs;
47
+ const lastUpdateTs = await getLastUpdateTs();
156
48
  if (posts.length === 0) {
157
- logger.info("Agent: no posts in session", { alertId: state.alertId });
158
- return {
159
- channelPosts: posts,
160
- filteredPosts: [],
161
- previousEnrichment: prevEnrichment,
162
- sessionStartTs,
163
- phaseStartTs,
164
- };
49
+ logger.info("Agent: no posts", { alertId: state.alertId });
50
+ return { tracking: null, previousEnrichment: prevEnrichment };
165
51
  }
166
- const keywords = buildRegionKeywords();
167
- const alertType = state.alertType;
168
- const alertTs = state.alertTs;
169
- // Time window: reject posts older than window before alertTs
170
- const windowMs = TIME_WINDOW_MS[alertType];
171
- const cutoffTs = alertTs - windowMs;
172
- let filtered;
173
- if (alertType === "early_warning") {
174
- // ── STRICT launch-only filter for early warning ──
175
- // Step 1: Find posts with launch keywords, within time window
176
- const launchPosts = posts.filter((post) => {
177
- if (post.ts < cutoffTs)
178
- return false;
179
- if (isOrefAreaList(post))
180
- return false;
181
- const text = post.text.toLowerCase();
182
- return LAUNCH_KEYWORDS.some((kw) => text.includes(kw));
183
- });
184
- // Step 2: Get channels that posted about launches
185
- const channelFirstLaunchTs = new Map();
186
- for (const post of launchPosts) {
187
- const current = channelFirstLaunchTs.get(post.channel);
188
- if (current === undefined || post.ts < current) {
189
- channelFirstLaunchTs.set(post.channel, post.ts);
190
- }
191
- }
192
- // Step 3: Accept follow-up posts from launch channels only (within window)
193
- filtered = posts.filter((post) => {
194
- if (post.ts < cutoffTs)
195
- return false;
196
- const text = post.text.toLowerCase();
197
- if (!keywords.some((kw) => text.includes(kw)))
198
- return false;
199
- const firstLaunch = channelFirstLaunchTs.get(post.channel);
200
- if (firstLaunch === undefined)
201
- return false;
202
- return post.ts >= firstLaunch;
203
- });
204
- logger.info("Agent: pre-filter (early_warning)", {
205
- alertId: state.alertId,
206
- total: posts.length,
207
- launch_posts: launchPosts.length,
208
- launch_channels: channelFirstLaunchTs.size,
209
- after_filter: filtered.length,
210
- cutoff: toIsraelTime(cutoffTs),
211
- channels_breakdown: Object.fromEntries([...new Set(posts.map((p) => p.channel))].map((ch) => [
212
- ch,
213
- {
214
- total: posts.filter((p) => p.channel === ch).length,
215
- passed: filtered.filter((p) => p.channel === ch).length,
216
- },
217
- ])),
218
- oref_filtered: posts.filter(isOrefAreaList).length,
219
- });
220
- }
221
- else {
222
- // ── Siren & Resolved: broader filter, time-bounded ──
223
- filtered = posts.filter((post) => {
224
- if (post.ts < cutoffTs)
225
- return false;
226
- if (isOrefAreaList(post))
227
- return false;
228
- const text = post.text.toLowerCase();
229
- return keywords.some((kw) => text.includes(kw));
230
- });
231
- logger.info("Agent: pre-filter", {
52
+ const tracking = buildChannelTracking(posts, sessionStartTs, lastUpdateTs);
53
+ logger.info("Agent: channel tracking", {
54
+ alertId: state.alertId,
55
+ total_posts: posts.length,
56
+ channels_with_updates: tracking.channels_with_updates.length,
57
+ total_new_posts: tracking.channels_with_updates.reduce((s, c) => s + c.last_tracked_messages.length, 0),
58
+ });
59
+ return { tracking, previousEnrichment: prevEnrichment };
60
+ }
61
+ // ── Node: cheap LLM channel filter + expensive extraction ──
62
+ async function extractNode(state) {
63
+ if (!state.tracking || state.tracking.channels_with_updates.length === 0) {
64
+ logger.info("Agent: no channels with updates", {
232
65
  alertId: state.alertId,
233
- alertType,
234
- total: posts.length,
235
- after_filter: filtered.length,
236
- cutoff: toIsraelTime(cutoffTs),
237
- channels_breakdown: Object.fromEntries([...new Set(posts.map((p) => p.channel))].map((ch) => [
238
- ch,
239
- {
240
- total: posts.filter((p) => p.channel === ch).length,
241
- passed: filtered.filter((p) => p.channel === ch).length,
242
- },
243
- ])),
244
- oref_filtered: posts.filter(isOrefAreaList).length,
245
66
  });
67
+ return { extractions: [] };
246
68
  }
247
- return {
248
- channelPosts: posts,
249
- filteredPosts: filtered,
250
- previousEnrichment: prevEnrichment,
251
- sessionStartTs,
252
- phaseStartTs,
253
- };
254
- }
255
- // ─────────────────────────────────────────────────────────
256
- // Tier 1: Extract + validate (1 LLM call per post)
257
- // Phase-aware prompts — agent knows what to look for.
258
- // TIME CONTEXT — agent sees alert time + post time.
259
- // ─────────────────────────────────────────────────────────
260
- const QUAL_VALUES = '"all"|"most"|"many"|"few"|"exists"|"none"|"more_than"|"less_than"';
261
- /** Phase-specific extraction instructions */
262
- function getPhaseInstructions(alertType) {
263
- switch (alertType) {
264
- case "early_warning":
265
- return `PHASE: EARLY WARNING (radar detected launches, sirens not yet).
266
- Focus on: country_origin (WHERE were rockets launched from?), eta_refined_minutes, rocket_count, is_cassette.
267
- Do NOT extract: intercepted, sea_impact, open_area_impact, hits_confirmed, casualties, injuries — these are IMPOSSIBLE at this stage.
268
- If a message discusses interception results, it is about a PREVIOUS attack — set time_relevance=0.`;
269
- case "siren":
270
- return `PHASE: SIREN (rockets incoming, impact imminent).
271
- Focus on: country_origin (if not known yet), rocket_count, intercepted, sea_impact, open_area_impact, is_cassette.
272
- Do NOT extract: hits_confirmed, casualties, injuries — too early for confirmed damage reports.
273
- If a message discusses casualties or confirmed hits, verify the timing carefully - it may be about a previous attack.`;
274
- case "resolved":
275
- return `PHASE: RESOLVED (incident over, assessing damage).
276
- Focus on: intercepted (final count), hits_confirmed, casualties, injuries, open_area_impact.
277
- All fields are valid at this stage. Prioritize confirmed official reports.`;
278
- }
279
- }
280
- const SYSTEM_PROMPT_BASE = `You analyze Telegram channel messages about a missile/rocket attack on Israel.
281
- Your job: extract factual data, assess quality, AND validate temporal relevance.
282
-
283
- CRITICAL — TIME VALIDATION:
284
- You will receive the alert time and the post time. You MUST determine if this post
285
- is about the CURRENT attack or about a previous/different event.
286
- - If post discusses events clearly BEFORE the alert time → time_relevance=0
287
- - If post is generic military news not specific to this attack → time_relevance=0.2
288
- - If post discusses the current attack → time_relevance=1.0
289
- - If uncertain → time_relevance=0.5 (the system will use alert_history to verify)
290
-
291
- Return ONLY valid JSON (no markdown, no explanation):
292
- {
293
- "region_relevance": float, // 0–1: does this message discuss the specified alert region?
294
- "source_trust": float, // 0–1: factual reporting (1.0) vs unverified rumors/panic (0.0)
295
- "tone": "calm"|"neutral"|"alarmist",
296
- "time_relevance": float, // 0–1: is this post about the CURRENT attack? (see rules above)
297
- "country_origin": string|null, // "Iran","Yemen","Lebanon","Gaza","Iraq","Syria" or null
298
- "rocket_count": int|null,
299
- "is_cassette": bool|null,
300
- "intercepted": int|null,
301
- "intercepted_qual": ${QUAL_VALUES}|null,
302
- "intercepted_qual_num": int|null,
303
- "sea_impact": int|null,
304
- "sea_impact_qual": ${QUAL_VALUES}|null,
305
- "sea_impact_qual_num": int|null,
306
- "open_area_impact": int|null,
307
- "open_area_impact_qual": ${QUAL_VALUES}|null,
308
- "open_area_impact_qual_num": int|null,
309
- "hits_confirmed": int|null,
310
- "casualties": int|null,
311
- "injuries": int|null,
312
- "eta_refined_minutes": int|null,
313
- "confidence": float
314
- }
315
-
316
- Rules:
317
- - If unrelated to the alert region, set region_relevance=0 and all data fields to null.
318
- - If message is speculative/unconfirmed rumor, set source_trust < 0.4.
319
- - If message uses excessive caps, exclamation marks, panic language → tone="alarmist".
320
- - Only extract concrete numbers explicitly stated in the text. Never guess.
321
- - *_qual fields: use ONLY when NO exact count is given. If exact number present, set *_qual=null.
322
- - "none" qual is only valid if explicitly stated (e.g., "все перехвачены", "не упало в море").
323
- - For IDF (@idf_telegram) posts about ongoing operations (not this specific attack) → time_relevance=0.
324
- - LANGUAGE NEUTRALITY: Posts may be in Hebrew, Russian, Arabic, or English. The language of the post
325
- MUST NOT affect source_trust or confidence. Russian-language Israeli channels are equally reliable
326
- and often break news faster than Hebrew ones. Judge ONLY by factual content and tone.
327
- - TRUST INTERCEPTION & IMPACT REPORTS: When a channel explicitly states interception results
328
- (e.g., "перехвачены", "intercepted", "יירוט", "упали в море", "fell in the sea", "נפלו בים",
329
- "open area impact", "שטח פתוח"), trust these claims with source_trust >= 0.7 and confidence >= 0.7.
330
- Israeli Telegram channels often report interception results before official confirmation,
331
- and these reports are typically accurate. Do NOT downgrade these just because they lack official source.`;
332
- async function extractAndValidate(state) {
333
- if (state.filteredPosts.length === 0) {
334
- logger.info("Agent: no filtered posts to extract", {
69
+ // Step 1: cheap LLM — which channels have important military intel?
70
+ const relevantChannels = await filterChannelsCheap(state.tracking, state.alertAreas, state.alertTs, state.alertType);
71
+ if (relevantChannels.length === 0) {
72
+ logger.info("Agent: no relevant channels after cheap filter", {
335
73
  alertId: state.alertId,
336
74
  });
337
75
  return { extractions: [] };
338
76
  }
339
- // ── Post-level dedup: only send NEW posts to LLM ───────
340
- const posts = state.filteredPosts;
341
- // Compute hash for each post (channel + text content)
342
- const postHashMap = new Map(); // hash serialized post key
343
- const hashToPost = new Map();
344
- for (const post of posts) {
345
- const hash = textHash(post.channel + "|" + post.text.slice(0, 800));
346
- postHashMap.set(hash, post.channel);
347
- hashToPost.set(hash, post);
348
- }
349
- // Fetch cached extractions from previous jobs in this session
350
- const allHashes = [...postHashMap.keys()];
351
- const cached = await getCachedExtractions(allHashes);
352
- // Split: cached results vs posts needing LLM
353
- const cachedResults = [];
354
- const newPosts = [];
355
- const newHashes = [];
356
- for (const [hash, post] of hashToPost) {
357
- const cachedJson = cached.get(hash);
358
- if (cachedJson) {
359
- cachedResults.push(JSON.parse(cachedJson));
360
- }
361
- else {
362
- newPosts.push(post);
363
- newHashes.push(hash);
77
+ // Step 2: collect posts from relevant channels only
78
+ const postsToExtract = [];
79
+ for (const ch of state.tracking.channels_with_updates) {
80
+ const match = relevantChannels.some((rc) => rc === ch.channel || rc === `@${ch.channel}` || `@${rc}` === ch.channel);
81
+ if (match) {
82
+ postsToExtract.push(...ch.last_tracked_messages);
364
83
  }
365
84
  }
366
- logger.info("Agent: extraction dedup", {
367
- alertId: state.alertId,
368
- total: posts.length,
369
- cached: cachedResults.length,
370
- new: newPosts.length,
371
- });
372
- // If all posts are cached, return immediately (0 LLM calls)
373
- if (newPosts.length === 0) {
374
- logger.info("Agent: extracted (all cached)", {
85
+ if (postsToExtract.length === 0) {
86
+ logger.info("Agent: no posts from relevant channels", {
375
87
  alertId: state.alertId,
376
- count: cachedResults.length,
377
88
  });
378
- return { extractions: cachedResults };
89
+ return { extractions: [] };
379
90
  }
380
- const llm = getLLM();
381
- const regionHint = state.alertAreas.length > 0
382
- ? state.alertAreas.join(", ")
383
- : Object.keys(config.agent.areaLabels).join(", ") || "Israel";
384
- const alertTimeIL = toIsraelTime(state.alertTs);
385
- const nowIL = toIsraelTime(Date.now());
386
- const phaseInstructions = getPhaseInstructions(state.alertType);
387
- const systemPrompt = SYSTEM_PROMPT_BASE + "\n\n" + phaseInstructions;
388
- // Only extract NEW posts (not seen in previous jobs)
389
- const newResults = await Promise.all(newPosts.map(async (post) => {
390
- const postTimeIL = toIsraelTime(post.ts);
391
- const postAgeMin = Math.round((state.alertTs - post.ts) / 60_000);
392
- const postAgeSuffix = postAgeMin > 0
393
- ? `(${postAgeMin} min BEFORE alert)`
394
- : postAgeMin < 0
395
- ? `(${Math.abs(postAgeMin)} min AFTER alert)`
396
- : "(same time as alert)";
397
- const contextHeader = `Alert time: ${alertTimeIL} (Israel)\n` +
398
- `Post time: ${postTimeIL} (Israel) ${postAgeSuffix}\n` +
399
- `Current time: ${nowIL} (Israel)\n` +
400
- `Alert region: ${regionHint}\n` +
401
- `UI language: ${config.language}\n`;
402
- try {
403
- const response = await llm.invoke([
404
- { role: "system", content: systemPrompt },
405
- {
406
- role: "user",
407
- content: `${contextHeader}Channel: ${post.channel}\n\nMessage:\n${post.text.slice(0, 800)}`,
408
- },
409
- ]);
410
- const raw = typeof response.content === "string"
411
- ? response.content
412
- : JSON.stringify(response.content);
413
- const text = raw
414
- .replace(/^```(?:json)?\s*\n?/i, "")
415
- .replace(/\n?```\s*$/i, "");
416
- const parsed = JSON.parse(text.trim());
417
- return {
418
- ...parsed,
419
- channel: post.channel,
420
- messageUrl: post.messageUrl,
421
- time_relevance: parsed.time_relevance ?? 0.5,
422
- valid: true,
423
- };
424
- }
425
- catch (err) {
426
- logger.warn("Agent: extraction failed", {
427
- channel: post.channel,
428
- error: String(err),
429
- });
430
- return {
431
- channel: post.channel,
432
- region_relevance: 0,
433
- source_trust: 0,
434
- tone: "neutral",
435
- time_relevance: 0,
436
- country_origin: null,
437
- rocket_count: null,
438
- is_cassette: null,
439
- intercepted: null,
440
- intercepted_qual: null,
441
- intercepted_qual_num: null,
442
- sea_impact: null,
443
- sea_impact_qual: null,
444
- sea_impact_qual_num: null,
445
- open_area_impact: null,
446
- open_area_impact_qual: null,
447
- open_area_impact_qual_num: null,
448
- hits_confirmed: null,
449
- casualties: null,
450
- injuries: null,
451
- eta_refined_minutes: null,
452
- confidence: 0,
453
- valid: false,
454
- reject_reason: "extraction_error",
455
- };
456
- }
457
- }));
458
- // Save new extractions to Redis cache for future jobs
459
- const cacheEntries = {};
460
- newPosts.forEach((post, i) => {
461
- const hash = textHash(post.channel + "|" + post.text.slice(0, 800));
462
- cacheEntries[hash] = JSON.stringify(newResults[i]);
463
- });
464
- await saveCachedExtractions(cacheEntries);
465
- // Merge cached + new results
466
- const results = [...cachedResults, ...newResults];
467
- logger.info("Agent: extracted", {
468
- alertId: state.alertId,
469
- count: results.length,
470
- newLLMCalls: newResults.length,
471
- cachedReused: cachedResults.length,
472
- timeRelevance: results.map((r) => ({
473
- ch: r.channel,
474
- tr: r.time_relevance,
475
- conf: r.confidence,
476
- origin: r.country_origin,
477
- intercepted: r.intercepted ?? r.intercepted_qual,
478
- sea: r.sea_impact ?? r.sea_impact_qual,
479
- valid: r.valid,
480
- })),
481
- });
482
- return { extractions: results };
483
- }
484
- // ─────────────────────────────────────────────────────────
485
- // Tier 2: Post-filter (deterministic, 0 tokens)
486
- // Now includes TIME RELEVANCE check.
487
- // ─────────────────────────────────────────────────────────
488
- function postFilter(state) {
489
- const validated = state.extractions.map((ext) => {
490
- // V0: TIME RELEVANCE — the most important check
491
- if (ext.time_relevance < 0.5) {
492
- return { ...ext, valid: false, reject_reason: "stale_post" };
493
- }
494
- // V1: region relevance
495
- if (ext.region_relevance < 0.5) {
496
- return { ...ext, valid: false, reject_reason: "region_irrelevant" };
497
- }
498
- // V2: source trust
499
- if (ext.source_trust < 0.4) {
500
- return { ...ext, valid: false, reject_reason: "untrusted_source" };
501
- }
502
- // V3: tone — reject alarmist
503
- if (ext.tone === "alarmist") {
504
- return { ...ext, valid: false, reject_reason: "alarmist_tone" };
505
- }
506
- // V4: at least one data field must be non-null
507
- const hasData = ext.country_origin !== null ||
508
- ext.rocket_count !== null ||
509
- ext.is_cassette !== null ||
510
- ext.intercepted !== null ||
511
- ext.intercepted_qual !== null ||
512
- ext.hits_confirmed !== null ||
513
- ext.casualties !== null ||
514
- ext.injuries !== null ||
515
- ext.eta_refined_minutes !== null;
516
- if (!hasData) {
517
- return { ...ext, valid: false, reject_reason: "no_data" };
518
- }
519
- // V5: overall confidence floor
520
- if (ext.confidence < 0.3) {
521
- return { ...ext, valid: false, reject_reason: "low_confidence" };
522
- }
523
- return { ...ext, valid: true };
524
- });
525
- const passed = validated.filter((e) => e.valid);
526
- const rejected = validated.filter((e) => !e.valid);
527
- logger.info("Agent: post-filter", {
91
+ // Step 3: expensive extraction with post-level dedup
92
+ const ctx = {
93
+ alertTs: state.alertTs,
94
+ alertType: state.alertType,
95
+ alertAreas: state.alertAreas,
528
96
  alertId: state.alertId,
529
- passed: passed.length,
530
- rejected: rejected.length,
531
- reasons: rejected.map((r) => `${r.channel}:${r.reject_reason}`),
532
- passed_channels: passed.map((p) => p.channel),
533
- });
534
- return { extractions: validated };
535
- }
536
- // ─────────────────────────────────────────────────────────
537
- // Tier 3: Vote (deterministic, 0 tokens)
538
- // ─────────────────────────────────────────────────────────
539
- function vote(state) {
540
- const valid = state.extractions.filter((e) => e.valid);
541
- if (valid.length === 0) {
542
- return { votedResult: null };
543
- }
544
- // Assign 1-based citation indices
545
- const indexed = valid.map((e, i) => ({ ...e, idx: i + 1 }));
546
- const citedSources = indexed.map((e) => ({
547
- index: e.idx,
548
- channel: e.channel,
549
- messageUrl: e.messageUrl ?? null,
550
- }));
551
- // ETA: highest confidence source
552
- const withEta = indexed
553
- .filter((e) => e.eta_refined_minutes !== null)
554
- .sort((a, b) => b.confidence - a.confidence);
555
- const bestEta = withEta[0] ?? null;
556
- // Country: group unique values
557
- const countryMap = new Map();
558
- for (const e of indexed) {
559
- if (e.country_origin) {
560
- const list = countryMap.get(e.country_origin) ?? [];
561
- list.push(e.idx);
562
- countryMap.set(e.country_origin, list);
563
- }
564
- }
565
- const country_origins = countryMap.size > 0
566
- ? Array.from(countryMap.entries()).map(([name, citations]) => ({
567
- name,
568
- citations,
569
- }))
570
- : null;
571
- // Rocket count: range
572
- const rocketSrcs = indexed.filter((e) => e.rocket_count !== null);
573
- const rocketVals = rocketSrcs.map((e) => e.rocket_count);
574
- const rocket_count_min = rocketVals.length > 0 ? Math.min(...rocketVals) : null;
575
- const rocket_count_max = rocketVals.length > 0 ? Math.max(...rocketVals) : null;
576
- const rocket_citations = rocketSrcs.map((e) => e.idx);
577
- // Helper: avg weighted confidence
578
- function fieldConf(srcs) {
579
- if (srcs.length === 0)
580
- return 0;
581
- return (srcs.reduce((s, e) => s + e.source_trust * e.confidence, 0) / srcs.length);
582
- }
583
- // Helper: mode for QualCount
584
- function modeQual(srcs, key) {
585
- const vals = srcs
586
- .map((e) => e[key])
587
- .filter((v) => v !== null);
588
- if (vals.length === 0)
589
- return null;
590
- const freq = new Map();
591
- for (const v of vals)
592
- freq.set(v, (freq.get(v) ?? 0) + 1);
593
- return [...freq.entries()].sort((a, b) => b[1] - a[1])[0][0];
594
- }
595
- function medianQualNum(srcs, key) {
596
- const vals = srcs
597
- .map((e) => e[key])
598
- .filter((v) => v !== null)
599
- .sort((a, b) => a - b);
600
- return vals.length > 0 ? vals[Math.floor(vals.length / 2)] : null;
601
- }
602
- // Cassette: majority
603
- const cassSrcs = indexed.filter((e) => e.is_cassette !== null);
604
- const cassVals = cassSrcs.map((e) => e.is_cassette);
605
- const is_cassette = cassVals.length > 0
606
- ? cassVals.filter(Boolean).length > cassVals.length / 2
607
- : null;
608
- const is_cassette_confidence = fieldConf(cassSrcs);
609
- // Hits: median
610
- const hitsSrcs = indexed.filter((e) => e.hits_confirmed !== null && e.hits_confirmed > 0);
611
- const hitsVals = indexed
612
- .filter((e) => e.hits_confirmed !== null)
613
- .map((e) => e.hits_confirmed)
614
- .sort((a, b) => a - b);
615
- const hits_confirmed = hitsVals.length > 0 ? hitsVals[Math.floor(hitsVals.length / 2)] : null;
616
- const hits_citations = hitsSrcs.map((e) => e.idx);
617
- const hits_confidence = fieldConf(hitsSrcs);
618
- // Intercepted: median / qual
619
- const interceptedSrcs = indexed.filter((e) => e.intercepted !== null);
620
- const interceptedQualSrcs = indexed.filter((e) => e.intercepted_qual !== null);
621
- const interceptedVals = interceptedSrcs
622
- .map((e) => e.intercepted)
623
- .sort((a, b) => a - b);
624
- const intercepted = interceptedVals.length > 0
625
- ? interceptedVals[Math.floor(interceptedVals.length / 2)]
626
- : null;
627
- const intercepted_qual = intercepted === null
628
- ? modeQual(interceptedQualSrcs, "intercepted_qual")
629
- : null;
630
- const intercepted_qual_num = intercepted_qual !== null
631
- ? medianQualNum(interceptedQualSrcs, "intercepted_qual_num")
632
- : null;
633
- const intercepted_confidence = fieldConf(interceptedSrcs.length > 0 ? interceptedSrcs : interceptedQualSrcs);
634
- // Sea impact: median / qual
635
- const seaSrcs = indexed.filter((e) => e.sea_impact !== null);
636
- const seaQualSrcs = indexed.filter((e) => e.sea_impact_qual !== null);
637
- const seaVals = seaSrcs
638
- .map((e) => e.sea_impact)
639
- .sort((a, b) => a - b);
640
- const sea_impact = seaVals.length > 0 ? seaVals[Math.floor(seaVals.length / 2)] : null;
641
- const sea_impact_qual = sea_impact === null ? modeQual(seaQualSrcs, "sea_impact_qual") : null;
642
- const sea_impact_qual_num = sea_impact_qual !== null
643
- ? medianQualNum(seaQualSrcs, "sea_impact_qual_num")
644
- : null;
645
- const sea_confidence = fieldConf(seaSrcs.length > 0 ? seaSrcs : seaQualSrcs);
646
- // Open area impact: median / qual
647
- const openSrcs = indexed.filter((e) => e.open_area_impact !== null);
648
- const openQualSrcs = indexed.filter((e) => e.open_area_impact_qual !== null);
649
- const openVals = openSrcs
650
- .map((e) => e.open_area_impact)
651
- .sort((a, b) => a - b);
652
- const open_area_impact = openVals.length > 0 ? openVals[Math.floor(openVals.length / 2)] : null;
653
- const open_area_impact_qual = open_area_impact === null
654
- ? modeQual(openQualSrcs, "open_area_impact_qual")
655
- : null;
656
- const open_area_impact_qual_num = open_area_impact_qual !== null
657
- ? medianQualNum(openQualSrcs, "open_area_impact_qual_num")
658
- : null;
659
- const open_area_confidence = fieldConf(openSrcs.length > 0 ? openSrcs : openQualSrcs);
660
- // Casualties
661
- const casualtySrcs = indexed.filter((e) => e.casualties !== null && e.casualties > 0);
662
- const casualtyVals = casualtySrcs
663
- .map((e) => e.casualties)
664
- .sort((a, b) => a - b);
665
- const casualties = casualtyVals.length > 0
666
- ? casualtyVals[Math.floor(casualtyVals.length / 2)]
667
- : null;
668
- const casualties_citations = casualtySrcs.map((e) => e.idx);
669
- const casualties_confidence = fieldConf(casualtySrcs);
670
- // Injuries
671
- const injurySrcs = indexed.filter((e) => e.injuries !== null && e.injuries > 0);
672
- const injuryVals = injurySrcs
673
- .map((e) => e.injuries)
674
- .sort((a, b) => a - b);
675
- const injuries = injuryVals.length > 0
676
- ? injuryVals[Math.floor(injuryVals.length / 2)]
677
- : null;
678
- const injuries_citations = injurySrcs.map((e) => e.idx);
679
- const injuries_confidence = fieldConf(injurySrcs);
680
- // Rocket confidence
681
- const rocket_confidence = fieldConf(rocketSrcs);
682
- // Overall weighted confidence
683
- const totalWeight = indexed.reduce((s, e) => s + e.source_trust * e.confidence, 0);
684
- const weightedConf = totalWeight / indexed.length;
685
- const voted = {
686
- eta_refined_minutes: bestEta?.eta_refined_minutes ?? null,
687
- eta_citations: bestEta ? [bestEta.idx] : [],
688
- country_origins,
689
- rocket_count_min,
690
- rocket_count_max,
691
- rocket_citations,
692
- rocket_confidence,
693
- is_cassette,
694
- is_cassette_confidence,
695
- intercepted,
696
- intercepted_qual,
697
- intercepted_qual_num,
698
- intercepted_confidence,
699
- sea_impact,
700
- sea_impact_qual,
701
- sea_impact_qual_num,
702
- sea_confidence,
703
- open_area_impact,
704
- open_area_impact_qual,
705
- open_area_impact_qual_num,
706
- open_area_confidence,
707
- hits_confirmed,
708
- hits_citations,
709
- hits_confidence,
710
- casualties,
711
- casualties_citations,
712
- casualties_confidence,
713
- injuries,
714
- injuries_citations,
715
- injuries_confidence,
716
- confidence: Math.round(weightedConf * 100) / 100,
717
- sources_count: indexed.length,
718
- citedSources,
719
- };
720
- logger.info("Agent: voted", { alertId: state.alertId, voted });
721
- return { votedResult: voted };
722
- }
723
- // ─────────────────────────────────────────────────────────
724
- // Tier 4: Edit message — inline citations, carry-forward
725
- // ─────────────────────────────────────────────────────────
726
- /** EN country name → Russian */
727
- const COUNTRY_RU = {
728
- Iran: "Иран",
729
- Yemen: "Йемен",
730
- Lebanon: "Ливан",
731
- Gaza: "Газа",
732
- Iraq: "Ирак",
733
- Syria: "Сирия",
734
- Hezbollah: "Хезболла",
735
- };
736
- /** Format inline citations: [[1]](url), [[2]](url) */
737
- function inlineCites(indices, citedSources) {
738
- const parts = [];
739
- for (const idx of indices) {
740
- const src = citedSources.find((s) => s.index === idx);
741
- if (src?.messageUrl) {
742
- parts.push(`<a href="${src.messageUrl}">[${idx}]</a>`);
743
- }
744
- }
745
- return parts.length > 0 ? " " + parts.join(", ") : "";
746
- }
747
- /** Get InlineCite[] from citation indices */
748
- function extractCites(indices, citedSources) {
749
- const cites = [];
750
- for (const idx of indices) {
751
- const src = citedSources.find((s) => s.index === idx);
752
- if (src?.messageUrl) {
753
- cites.push({ url: src.messageUrl, channel: src.channel });
754
- }
755
- }
756
- return cites;
757
- }
758
- /** Format inline citations from InlineCite[] (for carry-forward data) */
759
- function inlineCitesFromData(cites) {
760
- if (cites.length === 0)
761
- return "";
762
- return (" " + cites.map((c, i) => `<a href="${c.url}">[${i + 1}]</a>`).join(", "));
763
- }
764
- // Confidence thresholds
765
- const SKIP = 0.6;
766
- const UNCERTAIN = 0.75;
767
- const CERTAIN = 0.95;
768
- function qualDisplay(qual, qualNum, conf) {
769
- if (qual === null)
770
- return null;
771
- if (qual === "none")
772
- return conf >= CERTAIN ? "нет" : null;
773
- const map = {
774
- all: "все",
775
- most: "большинство",
776
- many: "много",
777
- few: "несколько",
778
- exists: "есть",
779
- none: "нет",
780
- more_than: qualNum != null ? `>${qualNum}` : ">1",
781
- less_than: qualNum != null ? `<${qualNum}` : "<нескольких",
97
+ language: config.language,
782
98
  };
783
- return map[qual];
784
- }
785
- function breakdownItem(label, num, qual, qualNum, conf) {
786
- if (conf < SKIP)
787
- return null;
788
- const u = conf < UNCERTAIN ? " (?)" : "";
789
- if (num !== null)
790
- return `${label} ${num}${u}`;
791
- const qs = qualDisplay(qual, qualNum, conf);
792
- if (qs === null)
793
- return null;
794
- return `${label} ${qs}${u}`;
795
- }
796
- /**
797
- * Build enrichment data from current vote + previous enrichment (carry-forward).
798
- * Returns updated EnrichmentData for Redis persistence.
799
- */
800
- function buildEnrichmentFromVote(r, prev, alertType, alertTs) {
801
- const data = { ...prev };
802
- // Origin — update if voted has it
803
- if (r.country_origins && r.country_origins.length > 0) {
804
- data.origin = r.country_origins
805
- .map((c) => COUNTRY_RU[c.name] ?? c.name)
806
- .join(" + ");
807
- data.originCites = r.country_origins.flatMap((c) => extractCites(c.citations, r.citedSources));
808
- }
809
- // ETA — only for early_warning/siren
810
- if (r.eta_refined_minutes !== null &&
811
- (alertType === "early_warning" || alertType === "siren")) {
812
- const absTime = new Date(alertTs + r.eta_refined_minutes * 60_000).toLocaleTimeString("he-IL", {
813
- hour: "2-digit",
814
- minute: "2-digit",
815
- timeZone: "Asia/Jerusalem",
816
- });
817
- data.etaAbsolute = `~${absTime}`;
818
- data.etaCites = extractCites(r.eta_citations, r.citedSources);
819
- }
820
- // Rocket count
821
- if (r.rocket_count_min !== null && r.rocket_count_max !== null) {
822
- const u = r.rocket_confidence < UNCERTAIN ? " (?)" : "";
823
- data.rocketCount =
824
- r.rocket_count_min === r.rocket_count_max
825
- ? `${r.rocket_count_min}${u}`
826
- : `~${r.rocket_count_min}–${r.rocket_count_max}${u}`;
827
- data.rocketCites = extractCites(r.rocket_citations, r.citedSources);
828
- }
829
- // Cassette
830
- if (r.is_cassette !== null && r.is_cassette_confidence >= SKIP) {
831
- data.isCassette = r.is_cassette;
832
- }
833
- // Intercepted
834
- if (r.intercepted !== null && r.intercepted_confidence >= SKIP) {
835
- const u = r.intercepted_confidence < UNCERTAIN ? " (?)" : "";
836
- data.intercepted = `${r.intercepted}${u}`;
837
- data.interceptedCites = extractCites(r.citedSources
838
- .filter((s) => {
839
- const ext = r.citedSources.find((cs) => cs.index === s.index);
840
- return ext !== undefined;
841
- })
842
- .map((s) => s.index), r.citedSources);
843
- }
844
- else if (r.intercepted_qual !== null && r.intercepted_confidence >= SKIP) {
845
- const qs = qualDisplay(r.intercepted_qual, r.intercepted_qual_num, r.intercepted_confidence);
846
- if (qs)
847
- data.intercepted = qs;
848
- }
849
- // Hits
850
- if (r.hits_confirmed !== null &&
851
- r.hits_confirmed > 0 &&
852
- r.hits_confidence >= SKIP) {
853
- const u = r.hits_confidence < UNCERTAIN ? " (?)" : "";
854
- data.hitsConfirmed = `${r.hits_confirmed}${u}`;
855
- data.hitsCites = extractCites(r.hits_citations, r.citedSources);
856
- }
857
- // Casualties
858
- if (r.casualties !== null &&
859
- r.casualties > 0 &&
860
- r.casualties_confidence >= SKIP) {
861
- const u = r.casualties_confidence < UNCERTAIN ? " (?)" : "";
862
- data.casualties = `${r.casualties}${u}`;
863
- data.casualtiesCites = extractCites(r.casualties_citations, r.citedSources);
864
- }
865
- // Injuries
866
- if (r.injuries !== null && r.injuries > 0 && r.injuries_confidence >= SKIP) {
867
- const u = r.injuries_confidence < UNCERTAIN ? " (?)" : "";
868
- data.injuries = `${r.injuries}${u}`;
869
- data.injuriesCites = extractCites(r.injuries_citations, r.citedSources);
870
- }
871
- // Early warning time — record when first early_warning was received
872
- if (alertType === "early_warning" && !data.earlyWarningTime) {
873
- data.earlyWarningTime = toIsraelTime(alertTs);
874
- }
875
- return data;
876
- }
877
- /**
878
- * Build the enriched message text from current message + enrichment data.
879
- * Uses inline [[1]](url) citations. No superscripts. No footer sources.
880
- */
881
- function buildEnrichedMessage(currentText, alertType, alertTs, enrichment) {
882
- let text = currentText;
883
- // ── Refine ETA in-place ──
884
- if (enrichment.etaAbsolute &&
885
- (alertType === "early_warning" || alertType === "siren")) {
886
- const etaCiteStr = inlineCitesFromData(enrichment.etaCites);
887
- const refined = `${enrichment.etaAbsolute}${etaCiteStr}`;
888
- const etaPatterns = [
889
- /~\d+[–-]\d+\s*мин/, // ~5–12 мин
890
- /~\d+[–-]\d+\s*min/, // ~5–12 min
891
- /~\d+[–-]\d+\s*דקות/, // ~5–12 דקות
892
- /~\d+[–-]\d+\s*دقيقة/, // ~5–12 دقيقة
893
- /1\.5\s*мин/, // 1.5 мин (siren)
894
- /1\.5\s*min/,
895
- /1\.5\s*דקות/,
896
- /1\.5\s*دقيقة/,
897
- ];
898
- for (const pattern of etaPatterns) {
899
- if (pattern.test(text)) {
900
- text = text.replace(pattern, refined);
901
- break;
902
- }
903
- }
904
- }
905
- // ── Siren: show "Раннее предупреждение: было в HH:MM" ──
906
- if (alertType === "siren" && enrichment.earlyWarningTime) {
907
- text = insertBeforeTimeLine(text, `<b>Раннее предупреждение:</b> было в ${enrichment.earlyWarningTime}`);
908
- }
909
- // ── Origin ──
910
- if (enrichment.origin) {
911
- const citeStr = inlineCitesFromData(enrichment.originCites);
912
- text = insertBeforeTimeLine(text, `\n<b>Откуда:</b> ${enrichment.origin}${citeStr}`);
913
- }
914
- // ── Rocket count + breakdown ──
915
- if (enrichment.rocketCount) {
916
- const citeStr = inlineCitesFromData(enrichment.rocketCites);
917
- const cassette = enrichment.isCassette ? ", есть кассетные" : "";
918
- let breakdown = "";
919
- const bParts = [];
920
- if (enrichment.intercepted) {
921
- bParts.push(`перехвачено — ${enrichment.intercepted}`);
922
- }
923
- if (enrichment.seaImpact) {
924
- bParts.push(`упали в море — ${enrichment.seaImpact}`);
925
- }
926
- if (enrichment.openAreaImpact) {
927
- bParts.push(`открытая местность — ${enrichment.openAreaImpact}`);
928
- }
929
- if (bParts.length > 0)
930
- breakdown = `, из них: ${bParts.join(", ")}`;
931
- text = insertBeforeTimeLine(text, `<b>Ракет:</b> ${enrichment.rocketCount}${breakdown}${cassette}${citeStr}`);
932
- }
933
- else if (enrichment.intercepted && alertType !== "early_warning") {
934
- // No rocket count but have interception data
935
- const citeStr = inlineCitesFromData(enrichment.interceptedCites);
936
- text = insertBeforeTimeLine(text, `<b>Перехвачено:</b> ${enrichment.intercepted}${citeStr}`);
937
- }
938
- // ── Hits ──
939
- if (enrichment.hitsConfirmed && alertType !== "early_warning") {
940
- const areaLabel = Object.values(config.agent.areaLabels)[0] ?? "район";
941
- const citeStr = inlineCitesFromData(enrichment.hitsCites);
942
- text = insertBeforeTimeLine(text, `<b>Попадания (${areaLabel}):</b> ${enrichment.hitsConfirmed}${citeStr}`);
943
- }
944
- // ── Casualties / Injuries (resolved only) ──
945
- if (enrichment.casualties && alertType === "resolved") {
946
- const citeStr = inlineCitesFromData(enrichment.casualtiesCites);
947
- text = insertBeforeTimeLine(text, `<b>Погибшие:</b> ${enrichment.casualties}${citeStr}`);
948
- }
949
- if (enrichment.injuries && alertType === "resolved") {
950
- const citeStr = inlineCitesFromData(enrichment.injuriesCites);
951
- text = insertBeforeTimeLine(text, `<b>Пострадавшие:</b> ${enrichment.injuries}${citeStr}`);
952
- }
953
- return text;
954
- }
955
- /**
956
- * Insert a line before the time line (last "Время" / "Time" / "שעת" line).
957
- */
958
- function insertBeforeTimeLine(text, line) {
959
- const timePattern = /(<b>(?:Время оповещения|Alert time|שעת ההתרעה|وقت الإنذار):<\/b>)/;
960
- const match = text.match(timePattern);
961
- if (match?.index !== undefined) {
962
- return text.slice(0, match.index) + line + "\n" + text.slice(match.index);
963
- }
964
- const lines = text.split("\n");
965
- lines.splice(Math.max(lines.length - 1, 0), 0, line);
966
- return lines.join("\n");
967
- }
968
- async function editMessage(state) {
969
- const { votedResult } = state;
970
- if (!config.botToken)
971
- return {};
972
- const tgBot = new Bot(config.botToken);
973
- // No valid sources — carry forward previous data only
974
- const prevEnrichment = state.previousEnrichment ?? emptyEnrichmentData();
975
- if (!votedResult) {
976
- // No new data from channels — still try to build message from carry-forward
977
- if (prevEnrichment.origin || prevEnrichment.intercepted) {
978
- // Have carry-forward data, build message
979
- const newText = buildEnrichedMessage(state.currentText, state.alertType, state.alertTs, prevEnrichment);
980
- const hash = textHash(newText);
981
- if (hash === prevEnrichment.lastEditHash) {
982
- logger.info("Agent: no change in message (dedup) — skipping edit", {
983
- alertId: state.alertId,
984
- });
985
- return {};
986
- }
987
- try {
988
- if (state.isCaption) {
989
- await tgBot.api.editMessageCaption(state.chatId, state.messageId, {
990
- caption: newText,
991
- parse_mode: "HTML",
992
- });
993
- }
994
- else {
995
- await tgBot.api.editMessageText(state.chatId, state.messageId, newText, { parse_mode: "HTML" });
996
- }
997
- prevEnrichment.lastEditHash = hash;
998
- await saveEnrichmentData(prevEnrichment);
999
- logger.info("Agent: message enriched (carry-forward only)", {
1000
- alertId: state.alertId,
1001
- messageId: state.messageId,
1002
- });
1003
- }
1004
- catch (err) {
1005
- const errStr = String(err);
1006
- if (errStr.includes("message is not modified")) {
1007
- prevEnrichment.lastEditHash = hash;
1008
- await saveEnrichmentData(prevEnrichment);
1009
- logger.info("Agent: message already up-to-date (dedup)", {
1010
- alertId: state.alertId,
1011
- });
1012
- }
1013
- else {
1014
- logger.error("Agent: failed to edit message", {
1015
- alertId: state.alertId,
1016
- error: errStr,
1017
- });
1018
- }
1019
- }
1020
- }
1021
- else {
1022
- logger.info("Agent: no voted result — skipping edit", {
1023
- alertId: state.alertId,
1024
- });
1025
- }
1026
- return {};
1027
- }
1028
- // Build enrichment data: merge vote + previous
1029
- const enrichment = buildEnrichmentFromVote(votedResult, prevEnrichment, state.alertType, state.alertTs);
1030
- const newText = buildEnrichedMessage(state.currentText, state.alertType, state.alertTs, enrichment);
1031
- // Dedup: skip if text hasn't changed
1032
- const hash = textHash(newText);
1033
- if (hash === enrichment.lastEditHash) {
1034
- logger.info("Agent: no change in message (dedup) — skipping edit", {
1035
- alertId: state.alertId,
1036
- });
1037
- return {};
1038
- }
1039
- // Low confidence: log but still show data with (?) markers
1040
- if (votedResult.confidence < config.agent.confidenceThreshold) {
1041
- logger.info("Agent: confidence below threshold — editing with (?) markers", {
1042
- alertId: state.alertId,
1043
- confidence: votedResult.confidence,
1044
- threshold: config.agent.confidenceThreshold,
1045
- });
1046
- }
1047
- try {
1048
- if (state.isCaption) {
1049
- await tgBot.api.editMessageCaption(state.chatId, state.messageId, {
1050
- caption: newText,
1051
- parse_mode: "HTML",
1052
- });
1053
- }
1054
- else {
1055
- await tgBot.api.editMessageText(state.chatId, state.messageId, newText, {
1056
- parse_mode: "HTML",
1057
- });
1058
- }
1059
- enrichment.lastEditHash = hash;
1060
- await saveEnrichmentData(enrichment);
1061
- logger.info("Agent: message enriched", {
99
+ const raw = await extractPosts(postsToExtract, ctx);
100
+ // Step 4: deterministic post-filter
101
+ const filtered = postFilter(raw, state.alertId);
102
+ // Update timestamp for next job's dedup split
103
+ await setLastUpdateTs(Date.now());
104
+ return { extractions: filtered };
105
+ }
106
+ // ── Node: vote ─────────────────────────────────────────
107
+ function voteNode(state) {
108
+ return { votedResult: vote(state.extractions, state.alertId) };
109
+ }
110
+ // ── Node: clarify (MCP tool calling) ───────────────────
111
+ async function clarifyNode(state) {
112
+ if (!state.votedResult) {
113
+ logger.info("Agent: clarify skipped no voted result", {
1062
114
  alertId: state.alertId,
1063
- messageId: state.messageId,
1064
- confidence: votedResult.confidence,
1065
- sources: votedResult.sources_count,
1066
- phase: state.alertType,
1067
115
  });
1068
- }
1069
- catch (err) {
1070
- const errStr = String(err);
1071
- if (errStr.includes("message is not modified")) {
1072
- enrichment.lastEditHash = hash;
1073
- await saveEnrichmentData(enrichment);
1074
- logger.info("Agent: message already up-to-date (dedup)", {
1075
- alertId: state.alertId,
1076
- });
1077
- }
1078
- else {
1079
- logger.error("Agent: failed to edit message", {
1080
- alertId: state.alertId,
1081
- error: errStr,
1082
- });
1083
- }
1084
- }
1085
- return {};
1086
- }
1087
- // ─────────────────────────────────────────────────────────
1088
- // Clarify Node — MCP tool calling via ReAct (conditional)
1089
- // ─────────────────────────────────────────────────────────
1090
- async function clarifyNode(state) {
1091
- const { votedResult, extractions, alertId, alertAreas, alertType, alertTs, messageId, currentText, } = state;
1092
- if (!votedResult) {
1093
- logger.info("Agent: clarify skipped — no voted result", { alertId });
1094
116
  return { clarifyAttempted: true };
1095
117
  }
1096
118
  logger.info("Agent: clarify triggered", {
1097
- alertId,
1098
- confidence: votedResult.confidence,
1099
- threshold: config.agent.confidenceThreshold,
1100
- phase: alertType,
119
+ alertId: state.alertId,
120
+ confidence: state.votedResult.confidence,
1101
121
  });
1102
122
  try {
1103
123
  const result = await runClarify({
1104
- alertId,
1105
- alertAreas,
1106
- alertType,
1107
- alertTs,
1108
- messageId,
1109
- currentText,
1110
- extractions,
1111
- votedResult,
1112
- });
1113
- const mergedExtractions = [...extractions, ...result.newExtractions];
1114
- logger.info("Agent: clarify completed", {
1115
- alertId,
1116
- toolCalls: result.toolCallCount,
1117
- clarified: result.clarified,
1118
- newExtractions: result.newExtractions.length,
1119
- newPosts: result.newPosts.length,
124
+ alertId: state.alertId,
125
+ alertAreas: state.alertAreas,
126
+ alertType: state.alertType,
127
+ alertTs: state.alertTs,
128
+ messageId: state.messageId,
129
+ currentText: state.currentText,
130
+ extractions: state.extractions,
131
+ votedResult: state.votedResult,
1120
132
  });
1121
133
  return {
1122
- extractions: mergedExtractions,
134
+ extractions: [...state.extractions, ...result.newExtractions],
1123
135
  votedResult: null,
1124
136
  clarifyAttempted: true,
1125
137
  };
1126
138
  }
1127
139
  catch (err) {
1128
140
  logger.error("Agent: clarify failed", {
1129
- alertId,
141
+ alertId: state.alertId,
1130
142
  error: String(err),
1131
143
  });
1132
144
  return { clarifyAttempted: true };
1133
145
  }
1134
146
  }
147
+ // ── Node: edit Telegram message ────────────────────────
148
+ async function editNode(state) {
149
+ await editMessage({
150
+ alertId: state.alertId,
151
+ alertTs: state.alertTs,
152
+ alertType: state.alertType,
153
+ chatId: state.chatId,
154
+ messageId: state.messageId,
155
+ isCaption: state.isCaption,
156
+ currentText: state.currentText,
157
+ votedResult: state.votedResult,
158
+ previousEnrichment: state.previousEnrichment ?? emptyEnrichmentData(),
159
+ });
160
+ return {};
161
+ }
1135
162
  // ── Conditional routing after vote ─────────────────────
1136
163
  function shouldClarify(state) {
1137
164
  if (state.clarifyAttempted)
@@ -1140,29 +167,23 @@ function shouldClarify(state) {
1140
167
  return "editMessage";
1141
168
  if (!state.votedResult)
1142
169
  return "editMessage";
1143
- // Low confidence → clarify (may use Oref tool for time validation)
1144
170
  if (state.votedResult.confidence < config.agent.confidenceThreshold) {
1145
171
  logger.info("Agent: routing to clarify (low confidence)", {
1146
172
  confidence: state.votedResult.confidence,
1147
- threshold: config.agent.confidenceThreshold,
1148
173
  });
1149
174
  return "clarify";
1150
175
  }
1151
- // Suspicious time: if the only country is unexpected for the region, verify
1152
- // This catches cases like "Lebanon" appearing on a Tel Aviv alert
1153
- // when the real attack is from Iran/Yemen
176
+ // Suspicious single-source: Lebanon for central Israel verify
1154
177
  const origins = state.votedResult.country_origins;
1155
178
  if (origins &&
1156
179
  origins.length === 1 &&
1157
180
  state.votedResult.sources_count === 1) {
1158
- const singleOrigin = origins[0].name;
1159
- // Lebanon attacks typically don't reach central Israel
1160
- if (singleOrigin === "Lebanon" &&
181
+ if (origins[0].name === "Lebanon" &&
1161
182
  state.alertAreas.some((a) => a.includes("תל אביב") ||
1162
183
  a.includes("גוש דן") ||
1163
184
  a.includes("שרון") ||
1164
185
  a.includes("מרכז"))) {
1165
- logger.info("Agent: routing to clarify (suspicious single source: Lebanon for central Israel)", { origin: singleOrigin });
186
+ logger.info("Agent: routing to clarify (suspicious Lebanon origin)", {});
1166
187
  return "clarify";
1167
188
  }
1168
189
  }
@@ -1171,30 +192,27 @@ function shouldClarify(state) {
1171
192
  // ── Build graph ────────────────────────────────────────
1172
193
  const checkpointer = new MemorySaver();
1173
194
  function buildGraph() {
1174
- const graph = new StateGraph(AgentState)
1175
- .addNode("collectAndPreFilter", collectAndPreFilter)
1176
- .addNode("extractAndValidate", extractAndValidate)
1177
- .addNode("postFilter", postFilter)
1178
- .addNode("vote", vote)
195
+ return new StateGraph(AgentState)
196
+ .addNode("collectAndFilter", collectAndFilter)
197
+ .addNode("extract", extractNode)
198
+ .addNode("vote", voteNode)
1179
199
  .addNode("clarify", clarifyNode)
1180
- .addNode("revote", vote)
1181
- .addNode("editMessage", editMessage)
1182
- .addEdge("__start__", "collectAndPreFilter")
1183
- .addEdge("collectAndPreFilter", "extractAndValidate")
1184
- .addEdge("extractAndValidate", "postFilter")
1185
- .addEdge("postFilter", "vote")
200
+ .addNode("revote", voteNode)
201
+ .addNode("editMessage", editNode)
202
+ .addEdge("__start__", "collectAndFilter")
203
+ .addEdge("collectAndFilter", "extract")
204
+ .addEdge("extract", "vote")
1186
205
  .addConditionalEdges("vote", shouldClarify, {
1187
206
  clarify: "clarify",
1188
207
  editMessage: "editMessage",
1189
208
  })
1190
209
  .addEdge("clarify", "revote")
1191
210
  .addEdge("revote", "editMessage")
1192
- .addEdge("editMessage", "__end__");
1193
- return graph.compile({ checkpointer });
211
+ .addEdge("editMessage", "__end__")
212
+ .compile({ checkpointer });
1194
213
  }
1195
214
  export async function runEnrichment(input) {
1196
- const app = buildGraph();
1197
- await app.invoke({
215
+ await buildGraph().invoke({
1198
216
  alertId: input.alertId,
1199
217
  alertTs: input.alertTs,
1200
218
  alertType: input.alertType,
@@ -1203,37 +221,11 @@ export async function runEnrichment(input) {
1203
221
  messageId: input.messageId,
1204
222
  isCaption: input.isCaption,
1205
223
  currentText: input.currentText,
1206
- channelPosts: [],
1207
- filteredPosts: [],
224
+ tracking: null,
1208
225
  extractions: [],
1209
226
  votedResult: null,
1210
227
  clarifyAttempted: false,
1211
228
  previousEnrichment: emptyEnrichmentData(),
1212
- sessionStartTs: 0,
1213
- phaseStartTs: 0,
1214
229
  }, { configurable: { thread_id: input.alertId } });
1215
230
  }
1216
- // ── Exported for testing ───────────────────────────────
1217
- export const _test = {
1218
- getLLM,
1219
- buildRegionKeywords,
1220
- LAUNCH_KEYWORDS,
1221
- TIME_WINDOW_MS,
1222
- toIsraelTime,
1223
- textHash,
1224
- postFilter,
1225
- vote,
1226
- buildEnrichmentFromVote,
1227
- buildEnrichedMessage,
1228
- insertBeforeTimeLine,
1229
- inlineCites,
1230
- inlineCitesFromData,
1231
- extractCites,
1232
- COUNTRY_RU,
1233
- SYSTEM_PROMPT_BASE,
1234
- getPhaseInstructions,
1235
- SKIP,
1236
- UNCERTAIN,
1237
- CERTAIN,
1238
- };
1239
231
  //# sourceMappingURL=graph.js.map