easyoref 1.13.1 → 1.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,47 +1,32 @@
1
1
  /**
2
- * LangGraph.js enrichment pipeline — tiered validation + tool calling.
2
+ * LangGraph.js enrichment pipeline — phase-aware, time-validated.
3
3
  *
4
- * Design: minimize tokens, maximize confidence.
5
- * When confidence is low, offer tools agent decides if they help.
4
+ * KEY DESIGN PRINCIPLES:
5
+ * 1. TIME IS KING every post is validated against the alert time window.
6
+ * LLM receives alert time + post time and scores time_relevance.
7
+ * Posts about previous/different attacks are rejected.
8
+ * 2. PHASE-AWARE — each phase extracts only what's relevant:
9
+ * - early_warning: origin, ETA, rocket count, cassette
10
+ * - siren: carries early data + adds interception, impacts
11
+ * - resolved: carries all + adds casualties, injuries, final stats
12
+ * 3. CARRY-FORWARD — results persist in Redis (EnrichmentData).
13
+ * Each phase inherits previous phase's findings.
14
+ * 4. INLINE CITATIONS — no superscripts, no footer sources.
15
+ * Format: [[1]](url) right after each data point.
16
+ * 5. DEDUP EDITS — hash-based check prevents "message not modified" spam.
6
17
  *
7
- * ┌──────────────────────────────────────────────────────────────┐
8
- * Tier 0: preFilter (deterministic, 0 tokens) │
9
- * │ → keyword + region check on raw post text │
10
- * │ │
11
- * │ Tier 1: extractAndValidate (1 LLM call per post) │
12
- * │ → combined extraction + 3 validators in single JSON │
13
- * │ │
14
- * │ Tier 2: postFilter (deterministic, 0 tokens) │
15
- * │ → reject low relevance / trust / alarmist / empty │
16
- * │ │
17
- * │ Tier 3: vote (deterministic, 0 tokens) │
18
- * │ → majority consensus across validated sources │
19
- * │ │
20
- * │ Tier 3.5: shouldClarify (conditional edge) │
21
- * │ → if confidence < threshold AND tools enabled: │
22
- * │ → clarify: LLM sees voted result + 4 tools │
23
- * │ • read_telegram_sources (1-4 channel posts) │
24
- * │ • alert_history (Oref history verification) │
25
- * │ • resolve_area (defense-zone proximity check) │
26
- * │ • betterstack_log (query recent pipeline logs) │
27
- * │ LLM decides: call 0, 1, 2, or 3+ tools. │
28
- * │ → revote with merged extractions │
29
- * │ → else: proceed to editMessage │
30
- * │ │
31
- * │ Tier 4: editMessage (deterministic, 0 tokens) │
32
- * │ → inline update of existing key:value pairs │
33
- * └──────────────────────────────────────────────────────────────┘
34
- *
35
- * Checkpointer: MemorySaver — session-level state persistence.
36
- * Total LLM cost: 1 call × N posts + (optional) 1 clarify call + 0-N tools.
18
+ * Pipeline:
19
+ * preFilter extractAndValidate postFilter vote → [clarify] → editMessage
37
20
  */
38
21
  import { Annotation, MemorySaver, StateGraph } from "@langchain/langgraph";
39
22
  import { ChatOpenAI } from "@langchain/openai";
40
23
  import { Bot } from "grammy";
24
+ import { createHash } from "node:crypto";
41
25
  import { config } from "../config.js";
42
26
  import * as logger from "../logger.js";
43
27
  import { runClarify } from "./clarify.js";
44
- import { getChannelPosts } from "./store.js";
28
+ import { getActiveSession, getChannelPosts, getEnrichmentData, saveEnrichmentData, } from "./store.js";
29
+ import { emptyEnrichmentData } from "./types.js";
45
30
  // ── State ──────────────────────────────────────────────
46
31
  const AgentState = Annotation.Root({
47
32
  alertId: Annotation({ reducer: (_, b) => b }),
@@ -58,6 +43,12 @@ const AgentState = Annotation.Root({
58
43
  votedResult: Annotation({ reducer: (_, b) => b }),
59
44
  /** Tracks whether clarify has already run (prevents infinite loop) */
60
45
  clarifyAttempted: Annotation({ reducer: (_, b) => b }),
46
+ /** Cross-phase enrichment data loaded at start */
47
+ previousEnrichment: Annotation({ reducer: (_, b) => b }),
48
+ /** Session start timestamp for time window calculations */
49
+ sessionStartTs: Annotation({ reducer: (_, b) => b }),
50
+ /** Phase start timestamp */
51
+ phaseStartTs: Annotation({ reducer: (_, b) => b }),
61
52
  });
62
53
  // ── LLM ───────────────────────────────────────────────
63
54
  function getLLM() {
@@ -72,85 +63,254 @@ function getLLM() {
72
63
  },
73
64
  apiKey: config.agent.apiKey,
74
65
  temperature: 0,
75
- maxTokens: 400,
66
+ maxTokens: 500,
76
67
  });
77
68
  }
78
69
  // ── Region keywords (Hebrew + transliterations) ────────
79
- /**
80
- * Build keyword list from config areas + area_labels.
81
- * Returns lowercased keywords for matching.
82
- */
83
70
  function buildRegionKeywords() {
84
71
  const keywords = [];
85
72
  for (const area of config.areas) {
86
73
  keywords.push(area.toLowerCase());
87
- // First word often enough (e.g. "תל אביב" → "תל")
88
74
  const first = area.split(" ")[0];
89
75
  if (first && first.length >= 2)
90
76
  keywords.push(first.toLowerCase());
91
77
  }
92
78
  for (const [he, label] of Object.entries(config.agent.areaLabels)) {
93
79
  keywords.push(he.toLowerCase());
94
- // Add transliterated label words (e.g. "Дан центр" → "дан", "центр")
95
80
  for (const word of label.split(/\s+/)) {
96
81
  if (word.length >= 3)
97
82
  keywords.push(word.toLowerCase());
98
83
  }
99
84
  }
100
85
  // Common attack-related keywords (always relevant)
101
- keywords.push("ישראל", "israel", "израиль", "ракет", "rocket", "missile", "iron dome", "כיפת ברזל", "жд", "перехват", "intercept", "siren", "азака", "צבע אדום", "red alert");
86
+ keywords.push("ישראל", "israel", "израиль", "ракет", "rocket", "missile", "iron dome", "כיפת ברזל", "перехват", "intercept", "צבע אדום", "red alert");
102
87
  return [...new Set(keywords)];
103
88
  }
89
+ // ── Hard-ignore: Pikud HaOref official area lists ("простыня") ──
90
+ const OREF_LINK_PATTERN = /oref\.org\.il/i;
91
+ const OREF_OFFICIAL_CHANNEL_PATTERN = /pikud|פיקוד|oref/i;
92
+ /** Detect posts that are just official Pikud HaOref area list links */
93
+ function isOrefAreaList(post) {
94
+ const text = post.text;
95
+ // Link to official Pikud HaOref page
96
+ if (OREF_LINK_PATTERN.test(text))
97
+ return true;
98
+ // Official channels that just echo the area list (very long, >300 chars, mostly city names)
99
+ if (OREF_OFFICIAL_CHANNEL_PATTERN.test(post.channel) && text.length > 300)
100
+ return true;
101
+ return false;
102
+ }
103
+ // ── Launch detection keywords (strict — early_warning only) ──
104
+ const LAUNCH_KEYWORDS = [
105
+ "שיגור",
106
+ "שיגורים",
107
+ "שוגרו",
108
+ "נורו",
109
+ "зафиксированы запуски",
110
+ "обнаружены запуски",
111
+ "запуски ракет",
112
+ "запуск ракет",
113
+ "пуски ракет",
114
+ "ракетный обстрел",
115
+ "ракетная атака",
116
+ "missile launch",
117
+ "rocket launch",
118
+ "barrage",
119
+ "fired towards",
120
+ "launches detected",
121
+ "missiles fired",
122
+ "שיגורים לישראל",
123
+ "ירי טילים",
124
+ "ירי רקטות",
125
+ "إطلاق صواريخ",
126
+ ].map((kw) => kw.toLowerCase());
127
+ // ── Time window per phase (ms before alertTs to accept posts) ──
128
+ const TIME_WINDOW_MS = {
129
+ early_warning: 5 * 60 * 1000, // 5 min before alert
130
+ siren: 10 * 60 * 1000, // 10 min (includes early_warning period)
131
+ resolved: 30 * 60 * 1000, // 30 min (full session window)
132
+ };
133
+ // ── Helpers ────────────────────────────────────────────
134
+ /** Format timestamp as HH:MM Israel time */
135
+ function toIsraelTime(ts) {
136
+ return new Date(ts).toLocaleTimeString("he-IL", {
137
+ hour: "2-digit",
138
+ minute: "2-digit",
139
+ timeZone: "Asia/Jerusalem",
140
+ });
141
+ }
142
+ /** MD5 hash for edit dedup */
143
+ function textHash(text) {
144
+ return createHash("md5").update(text).digest("hex");
145
+ }
104
146
  // ─────────────────────────────────────────────────────────
105
- // Tier 0: Pre-filter (deterministic, 0 tokens)
147
+ // Tier 0: Pre-filter (phase-aware, time-bounded, 0 tokens)
106
148
  // ─────────────────────────────────────────────────────────
107
149
  async function collectAndPreFilter(state) {
108
- // Session-scoped: all posts belong to the current session already
109
150
  const posts = await getChannelPosts(state.alertId);
151
+ const prevEnrichment = await getEnrichmentData();
152
+ // Load session for time boundaries
153
+ const session = await getActiveSession();
154
+ const sessionStartTs = session?.sessionStartTs ?? state.alertTs;
155
+ const phaseStartTs = session?.phaseStartTs ?? state.alertTs;
110
156
  if (posts.length === 0) {
111
157
  logger.info("Agent: no posts in session", { alertId: state.alertId });
112
- return { channelPosts: posts, filteredPosts: [] };
158
+ return {
159
+ channelPosts: posts,
160
+ filteredPosts: [],
161
+ previousEnrichment: prevEnrichment,
162
+ sessionStartTs,
163
+ phaseStartTs,
164
+ };
113
165
  }
114
166
  const keywords = buildRegionKeywords();
115
- const filtered = posts.filter((post) => {
116
- const text = post.text.toLowerCase();
117
- // Must contain at least 1 region/attack keyword
118
- return keywords.some((kw) => text.includes(kw));
119
- });
120
- logger.info("Agent: pre-filter", {
121
- alertId: state.alertId,
122
- total: posts.length,
123
- after_keyword_filter: filtered.length,
124
- });
125
- return { channelPosts: posts, filteredPosts: filtered };
167
+ const alertType = state.alertType;
168
+ const alertTs = state.alertTs;
169
+ // Time window: reject posts older than window before alertTs
170
+ const windowMs = TIME_WINDOW_MS[alertType];
171
+ const cutoffTs = alertTs - windowMs;
172
+ let filtered;
173
+ if (alertType === "early_warning") {
174
+ // ── STRICT launch-only filter for early warning ──
175
+ // Step 1: Find posts with launch keywords, within time window
176
+ const launchPosts = posts.filter((post) => {
177
+ if (post.ts < cutoffTs)
178
+ return false;
179
+ if (isOrefAreaList(post))
180
+ return false;
181
+ const text = post.text.toLowerCase();
182
+ return LAUNCH_KEYWORDS.some((kw) => text.includes(kw));
183
+ });
184
+ // Step 2: Get channels that posted about launches
185
+ const channelFirstLaunchTs = new Map();
186
+ for (const post of launchPosts) {
187
+ const current = channelFirstLaunchTs.get(post.channel);
188
+ if (current === undefined || post.ts < current) {
189
+ channelFirstLaunchTs.set(post.channel, post.ts);
190
+ }
191
+ }
192
+ // Step 3: Accept follow-up posts from launch channels only (within window)
193
+ filtered = posts.filter((post) => {
194
+ if (post.ts < cutoffTs)
195
+ return false;
196
+ const text = post.text.toLowerCase();
197
+ if (!keywords.some((kw) => text.includes(kw)))
198
+ return false;
199
+ const firstLaunch = channelFirstLaunchTs.get(post.channel);
200
+ if (firstLaunch === undefined)
201
+ return false;
202
+ return post.ts >= firstLaunch;
203
+ });
204
+ logger.info("Agent: pre-filter (early_warning)", {
205
+ alertId: state.alertId,
206
+ total: posts.length,
207
+ launch_posts: launchPosts.length,
208
+ launch_channels: channelFirstLaunchTs.size,
209
+ after_filter: filtered.length,
210
+ cutoff: toIsraelTime(cutoffTs),
211
+ channels_breakdown: Object.fromEntries([...new Set(posts.map((p) => p.channel))].map((ch) => [
212
+ ch,
213
+ {
214
+ total: posts.filter((p) => p.channel === ch).length,
215
+ passed: filtered.filter((p) => p.channel === ch).length,
216
+ },
217
+ ])),
218
+ oref_filtered: posts.filter(isOrefAreaList).length,
219
+ });
220
+ }
221
+ else {
222
+ // ── Siren & Resolved: broader filter, time-bounded ──
223
+ filtered = posts.filter((post) => {
224
+ if (post.ts < cutoffTs)
225
+ return false;
226
+ if (isOrefAreaList(post))
227
+ return false;
228
+ const text = post.text.toLowerCase();
229
+ return keywords.some((kw) => text.includes(kw));
230
+ });
231
+ logger.info("Agent: pre-filter", {
232
+ alertId: state.alertId,
233
+ alertType,
234
+ total: posts.length,
235
+ after_filter: filtered.length,
236
+ cutoff: toIsraelTime(cutoffTs),
237
+ channels_breakdown: Object.fromEntries([...new Set(posts.map((p) => p.channel))].map((ch) => [
238
+ ch,
239
+ {
240
+ total: posts.filter((p) => p.channel === ch).length,
241
+ passed: filtered.filter((p) => p.channel === ch).length,
242
+ },
243
+ ])),
244
+ oref_filtered: posts.filter(isOrefAreaList).length,
245
+ });
246
+ }
247
+ return {
248
+ channelPosts: posts,
249
+ filteredPosts: filtered,
250
+ previousEnrichment: prevEnrichment,
251
+ sessionStartTs,
252
+ phaseStartTs,
253
+ };
126
254
  }
127
255
  // ─────────────────────────────────────────────────────────
128
256
  // Tier 1: Extract + validate (1 LLM call per post)
257
+ // Phase-aware prompts — agent knows what to look for.
258
+ // TIME CONTEXT — agent sees alert time + post time.
129
259
  // ─────────────────────────────────────────────────────────
130
260
  const QUAL_VALUES = '"all"|"most"|"many"|"few"|"exists"|"none"|"more_than"|"less_than"';
131
- const SYSTEM_PROMPT = `You analyze Telegram channel messages about a missile/rocket attack on Israel.
132
- Your job: extract factual data AND assess message quality. Be concise.
261
+ /** Phase-specific extraction instructions */
262
+ function getPhaseInstructions(alertType) {
263
+ switch (alertType) {
264
+ case "early_warning":
265
+ return `PHASE: EARLY WARNING (radar detected launches, sirens not yet).
266
+ Focus on: country_origin (WHERE were rockets launched from?), eta_refined_minutes, rocket_count, is_cassette.
267
+ Do NOT extract: intercepted, sea_impact, open_area_impact, hits_confirmed, casualties, injuries — these are IMPOSSIBLE at this stage.
268
+ If a message discusses interception results, it is about a PREVIOUS attack — set time_relevance=0.`;
269
+ case "siren":
270
+ return `PHASE: SIREN (rockets incoming, impact imminent).
271
+ Focus on: country_origin (if not known yet), rocket_count, intercepted, sea_impact, open_area_impact, is_cassette.
272
+ Do NOT extract: hits_confirmed, casualties, injuries — too early for confirmed damage reports.
273
+ If a message discusses casualties or confirmed hits, verify the timing carefully - it may be about a previous attack.`;
274
+ case "resolved":
275
+ return `PHASE: RESOLVED (incident over, assessing damage).
276
+ Focus on: intercepted (final count), hits_confirmed, casualties, injuries, open_area_impact.
277
+ All fields are valid at this stage. Prioritize confirmed official reports.`;
278
+ }
279
+ }
280
+ const SYSTEM_PROMPT_BASE = `You analyze Telegram channel messages about a missile/rocket attack on Israel.
281
+ Your job: extract factual data, assess quality, AND validate temporal relevance.
282
+
283
+ CRITICAL — TIME VALIDATION:
284
+ You will receive the alert time and the post time. You MUST determine if this post
285
+ is about the CURRENT attack or about a previous/different event.
286
+ - If post discusses events clearly BEFORE the alert time → time_relevance=0
287
+ - If post is generic military news not specific to this attack → time_relevance=0.2
288
+ - If post discusses the current attack → time_relevance=1.0
289
+ - If uncertain → time_relevance=0.5 (the system will use alert_history to verify)
133
290
 
134
291
  Return ONLY valid JSON (no markdown, no explanation):
135
292
  {
136
293
  "region_relevance": float, // 0–1: does this message discuss the specified alert region?
137
294
  "source_trust": float, // 0–1: factual reporting (1.0) vs unverified rumors/panic (0.0)
138
- "tone": "calm"|"neutral"|"alarmist", // message tone — reject alarmist content
295
+ "tone": "calm"|"neutral"|"alarmist",
296
+ "time_relevance": float, // 0–1: is this post about the CURRENT attack? (see rules above)
139
297
  "country_origin": string|null, // "Iran","Yemen","Lebanon","Gaza","Iraq","Syria" or null
140
- "rocket_count": int|null, // total rockets/missiles launched if mentioned
141
- "is_cassette": bool|null, // cluster/cassette munitions confirmed?
142
- "intercepted": int|null, // exact number intercepted by Iron Dome/air defense
143
- "intercepted_qual": ${QUAL_VALUES}|null, // qualitative if no exact number; null if exact number given
144
- "intercepted_qual_num": int|null, // reference number for more_than/less_than (e.g. 5 if "more than 5")
145
- "sea_impact": int|null, // exact number fell in sea/unpopulated area
298
+ "rocket_count": int|null,
299
+ "is_cassette": bool|null,
300
+ "intercepted": int|null,
301
+ "intercepted_qual": ${QUAL_VALUES}|null,
302
+ "intercepted_qual_num": int|null,
303
+ "sea_impact": int|null,
146
304
  "sea_impact_qual": ${QUAL_VALUES}|null,
147
305
  "sea_impact_qual_num": int|null,
148
- "open_area_impact": int|null, // exact number hit open/populated ground
306
+ "open_area_impact": int|null,
149
307
  "open_area_impact_qual": ${QUAL_VALUES}|null,
150
308
  "open_area_impact_qual_num": int|null,
151
- "hits_confirmed": int|null, // confirmed hits on structures/buildings
152
- "eta_refined_minutes": int|null, // refined time-to-impact if mentioned
153
- "confidence": float // 0–1: overall confidence in this extraction
309
+ "hits_confirmed": int|null,
310
+ "casualties": int|null,
311
+ "injuries": int|null,
312
+ "eta_refined_minutes": int|null,
313
+ "confidence": float
154
314
  }
155
315
 
156
316
  Rules:
@@ -158,12 +318,17 @@ Rules:
158
318
  - If message is speculative/unconfirmed rumor, set source_trust < 0.4.
159
319
  - If message uses excessive caps, exclamation marks, panic language → tone="alarmist".
160
320
  - Only extract concrete numbers explicitly stated in the text. Never guess.
161
- - intercpted + sea_impact + open_area_impact should sum to rocket_count when all are known.
162
- - If partial breakdown known, set unknown sub-fields to null (not 0).
163
- - *_qual fields: use ONLY when the message explicitly states a qualitative descriptor WITHOUT an exact count.
164
- If an exact number is given, set *_qual to null. Do NOT infer from absence.
165
- - NEVER extract qualitative descriptors for casualties or injuries hits_confirmed handles structural hits only.
166
- - "none" qual is only valid if explicitly stated in the message (e.g., "все перехвачены", "не упало в море").`;
321
+ - *_qual fields: use ONLY when NO exact count is given. If exact number present, set *_qual=null.
322
+ - "none" qual is only valid if explicitly stated (e.g., "все перехвачены", "не упало в море").
323
+ - For IDF (@idf_telegram) posts about ongoing operations (not this specific attack) time_relevance=0.
324
+ - LANGUAGE NEUTRALITY: Posts may be in Hebrew, Russian, Arabic, or English. The language of the post
325
+ MUST NOT affect source_trust or confidence. Russian-language Israeli channels are equally reliable
326
+ and often break news faster than Hebrew ones. Judge ONLY by factual content and tone.
327
+ - TRUST INTERCEPTION & IMPACT REPORTS: When a channel explicitly states interception results
328
+ (e.g., "перехвачены", "intercepted", "יירוט", "упали в море", "fell in the sea", "נפלו בים",
329
+ "open area impact", "שטח פתוח"), trust these claims with source_trust >= 0.7 and confidence >= 0.7.
330
+ Israeli Telegram channels often report interception results before official confirmation,
331
+ and these reports are typically accurate. Do NOT downgrade these just because they lack official source.`;
167
332
  async function extractAndValidate(state) {
168
333
  if (state.filteredPosts.length === 0) {
169
334
  logger.info("Agent: no filtered posts to extract", {
@@ -176,31 +341,26 @@ async function extractAndValidate(state) {
176
341
  const regionHint = state.alertAreas.length > 0
177
342
  ? state.alertAreas.join(", ")
178
343
  : Object.keys(config.agent.areaLabels).join(", ") || "Israel";
179
- // Format alert time in Israel timezone
180
- const alertTimeIL = new Date(state.alertTs).toLocaleTimeString("he-IL", {
181
- hour: "2-digit",
182
- minute: "2-digit",
183
- timeZone: "Asia/Jerusalem",
184
- });
185
- const nowIL = new Date().toLocaleTimeString("he-IL", {
186
- hour: "2-digit",
187
- minute: "2-digit",
188
- timeZone: "Asia/Jerusalem",
189
- });
190
- const alertTypeLabel = state.alertType === "early_warning"
191
- ? "early warning (radar detection)"
192
- : state.alertType === "siren"
193
- ? "siren (impact imminent)"
194
- : state.alertType;
195
- const contextHeader = `Alert type: ${alertTypeLabel}\n` +
196
- `Alert time: ${alertTimeIL} (Israel)\n` +
197
- `Current time: ${nowIL} (Israel)\n` +
198
- `Alert region: ${regionHint}\n` +
199
- `UI language: ${config.language}\n`;
344
+ const alertTimeIL = toIsraelTime(state.alertTs);
345
+ const nowIL = toIsraelTime(Date.now());
346
+ const phaseInstructions = getPhaseInstructions(state.alertType);
347
+ const systemPrompt = SYSTEM_PROMPT_BASE + "\n\n" + phaseInstructions;
200
348
  const results = await Promise.all(posts.map(async (post) => {
349
+ const postTimeIL = toIsraelTime(post.ts);
350
+ const postAgeMin = Math.round((state.alertTs - post.ts) / 60_000);
351
+ const postAgeSuffix = postAgeMin > 0
352
+ ? `(${postAgeMin} min BEFORE alert)`
353
+ : postAgeMin < 0
354
+ ? `(${Math.abs(postAgeMin)} min AFTER alert)`
355
+ : "(same time as alert)";
356
+ const contextHeader = `Alert time: ${alertTimeIL} (Israel)\n` +
357
+ `Post time: ${postTimeIL} (Israel) ${postAgeSuffix}\n` +
358
+ `Current time: ${nowIL} (Israel)\n` +
359
+ `Alert region: ${regionHint}\n` +
360
+ `UI language: ${config.language}\n`;
201
361
  try {
202
362
  const response = await llm.invoke([
203
- { role: "system", content: SYSTEM_PROMPT },
363
+ { role: "system", content: systemPrompt },
204
364
  {
205
365
  role: "user",
206
366
  content: `${contextHeader}Channel: ${post.channel}\n\nMessage:\n${post.text.slice(0, 800)}`,
@@ -209,7 +369,6 @@ async function extractAndValidate(state) {
209
369
  const raw = typeof response.content === "string"
210
370
  ? response.content
211
371
  : JSON.stringify(response.content);
212
- // Strip markdown code fences (```json ... ```) that some models wrap around JSON
213
372
  const text = raw
214
373
  .replace(/^```(?:json)?\s*\n?/i, "")
215
374
  .replace(/\n?```\s*$/i, "");
@@ -218,6 +377,7 @@ async function extractAndValidate(state) {
218
377
  ...parsed,
219
378
  channel: post.channel,
220
379
  messageUrl: post.messageUrl,
380
+ time_relevance: parsed.time_relevance ?? 0.5,
221
381
  valid: true,
222
382
  };
223
383
  }
@@ -231,6 +391,7 @@ async function extractAndValidate(state) {
231
391
  region_relevance: 0,
232
392
  source_trust: 0,
233
393
  tone: "neutral",
394
+ time_relevance: 0,
234
395
  country_origin: null,
235
396
  rocket_count: null,
236
397
  is_cassette: null,
@@ -244,6 +405,8 @@ async function extractAndValidate(state) {
244
405
  open_area_impact_qual: null,
245
406
  open_area_impact_qual_num: null,
246
407
  hits_confirmed: null,
408
+ casualties: null,
409
+ injuries: null,
247
410
  eta_refined_minutes: null,
248
411
  confidence: 0,
249
412
  valid: false,
@@ -254,14 +417,28 @@ async function extractAndValidate(state) {
254
417
  logger.info("Agent: extracted", {
255
418
  alertId: state.alertId,
256
419
  count: results.length,
420
+ timeRelevance: results.map((r) => ({
421
+ ch: r.channel,
422
+ tr: r.time_relevance,
423
+ conf: r.confidence,
424
+ origin: r.country_origin,
425
+ intercepted: r.intercepted ?? r.intercepted_qual,
426
+ sea: r.sea_impact ?? r.sea_impact_qual,
427
+ valid: r.valid,
428
+ })),
257
429
  });
258
430
  return { extractions: results };
259
431
  }
260
432
  // ─────────────────────────────────────────────────────────
261
433
  // Tier 2: Post-filter (deterministic, 0 tokens)
434
+ // Now includes TIME RELEVANCE check.
262
435
  // ─────────────────────────────────────────────────────────
263
436
  function postFilter(state) {
264
437
  const validated = state.extractions.map((ext) => {
438
+ // V0: TIME RELEVANCE — the most important check
439
+ if (ext.time_relevance < 0.5) {
440
+ return { ...ext, valid: false, reject_reason: "stale_post" };
441
+ }
265
442
  // V1: region relevance
266
443
  if (ext.region_relevance < 0.5) {
267
444
  return { ...ext, valid: false, reject_reason: "region_irrelevant" };
@@ -270,7 +447,7 @@ function postFilter(state) {
270
447
  if (ext.source_trust < 0.4) {
271
448
  return { ...ext, valid: false, reject_reason: "untrusted_source" };
272
449
  }
273
- // V3: tone — reject alarmist (бот для успокоения, не для паники)
450
+ // V3: tone — reject alarmist
274
451
  if (ext.tone === "alarmist") {
275
452
  return { ...ext, valid: false, reject_reason: "alarmist_tone" };
276
453
  }
@@ -278,7 +455,11 @@ function postFilter(state) {
278
455
  const hasData = ext.country_origin !== null ||
279
456
  ext.rocket_count !== null ||
280
457
  ext.is_cassette !== null ||
458
+ ext.intercepted !== null ||
459
+ ext.intercepted_qual !== null ||
281
460
  ext.hits_confirmed !== null ||
461
+ ext.casualties !== null ||
462
+ ext.injuries !== null ||
282
463
  ext.eta_refined_minutes !== null;
283
464
  if (!hasData) {
284
465
  return { ...ext, valid: false, reject_reason: "no_data" };
@@ -295,7 +476,8 @@ function postFilter(state) {
295
476
  alertId: state.alertId,
296
477
  passed: passed.length,
297
478
  rejected: rejected.length,
298
- reasons: rejected.map((r) => r.reject_reason),
479
+ reasons: rejected.map((r) => `${r.channel}:${r.reject_reason}`),
480
+ passed_channels: passed.map((p) => p.channel),
299
481
  });
300
482
  return { extractions: validated };
301
483
  }
@@ -307,20 +489,19 @@ function vote(state) {
307
489
  if (valid.length === 0) {
308
490
  return { votedResult: null };
309
491
  }
310
- // Assign 1-based citation indices to valid extractions
492
+ // Assign 1-based citation indices
311
493
  const indexed = valid.map((e, i) => ({ ...e, idx: i + 1 }));
312
- // All valid sources become cited sources
313
494
  const citedSources = indexed.map((e) => ({
314
495
  index: e.idx,
315
496
  channel: e.channel,
316
497
  messageUrl: e.messageUrl ?? null,
317
498
  }));
318
- // ETA: highest confidence source that has eta
499
+ // ETA: highest confidence source
319
500
  const withEta = indexed
320
501
  .filter((e) => e.eta_refined_minutes !== null)
321
502
  .sort((a, b) => b.confidence - a.confidence);
322
503
  const bestEta = withEta[0] ?? null;
323
- // Country: group unique values, each with their source indices
504
+ // Country: group unique values
324
505
  const countryMap = new Map();
325
506
  for (const e of indexed) {
326
507
  if (e.country_origin) {
@@ -335,35 +516,19 @@ function vote(state) {
335
516
  citations,
336
517
  }))
337
518
  : null;
338
- // Rocket count: range across sources (min … max)
519
+ // Rocket count: range
339
520
  const rocketSrcs = indexed.filter((e) => e.rocket_count !== null);
340
521
  const rocketVals = rocketSrcs.map((e) => e.rocket_count);
341
522
  const rocket_count_min = rocketVals.length > 0 ? Math.min(...rocketVals) : null;
342
523
  const rocket_count_max = rocketVals.length > 0 ? Math.max(...rocketVals) : null;
343
524
  const rocket_citations = rocketSrcs.map((e) => e.idx);
344
- // Helper: avg weighted confidence for a set of sources
525
+ // Helper: avg weighted confidence
345
526
  function fieldConf(srcs) {
346
527
  if (srcs.length === 0)
347
528
  return 0;
348
529
  return (srcs.reduce((s, e) => s + e.source_trust * e.confidence, 0) / srcs.length);
349
530
  }
350
- // Cassette: majority
351
- const cassSrcs = indexed.filter((e) => e.is_cassette !== null);
352
- const cassVals = cassSrcs.map((e) => e.is_cassette);
353
- const is_cassette = cassVals.length > 0
354
- ? cassVals.filter(Boolean).length > cassVals.length / 2
355
- : null;
356
- const is_cassette_confidence = fieldConf(cassSrcs);
357
- // Hits: median
358
- const hitsSrcs = indexed.filter((e) => e.hits_confirmed !== null && e.hits_confirmed > 0);
359
- const hitsVals = indexed
360
- .filter((e) => e.hits_confirmed !== null)
361
- .map((e) => e.hits_confirmed)
362
- .sort((a, b) => a - b);
363
- const hits_confirmed = hitsVals.length > 0 ? hitsVals[Math.floor(hitsVals.length / 2)] : null;
364
- const hits_citations = hitsSrcs.map((e) => e.idx);
365
- const hits_confidence = fieldConf(hitsSrcs);
366
- // Helper: mode (most frequent non-null value) for QualCount aggregation
531
+ // Helper: mode for QualCount
367
532
  function modeQual(srcs, key) {
368
533
  const vals = srcs
369
534
  .map((e) => e[key])
@@ -382,7 +547,23 @@ function vote(state) {
382
547
  .sort((a, b) => a - b);
383
548
  return vals.length > 0 ? vals[Math.floor(vals.length / 2)] : null;
384
549
  }
385
- // Intercepted: median across sources that reported exact number; mode for qual
550
+ // Cassette: majority
551
+ const cassSrcs = indexed.filter((e) => e.is_cassette !== null);
552
+ const cassVals = cassSrcs.map((e) => e.is_cassette);
553
+ const is_cassette = cassVals.length > 0
554
+ ? cassVals.filter(Boolean).length > cassVals.length / 2
555
+ : null;
556
+ const is_cassette_confidence = fieldConf(cassSrcs);
557
+ // Hits: median
558
+ const hitsSrcs = indexed.filter((e) => e.hits_confirmed !== null && e.hits_confirmed > 0);
559
+ const hitsVals = indexed
560
+ .filter((e) => e.hits_confirmed !== null)
561
+ .map((e) => e.hits_confirmed)
562
+ .sort((a, b) => a - b);
563
+ const hits_confirmed = hitsVals.length > 0 ? hitsVals[Math.floor(hitsVals.length / 2)] : null;
564
+ const hits_citations = hitsSrcs.map((e) => e.idx);
565
+ const hits_confidence = fieldConf(hitsSrcs);
566
+ // Intercepted: median / qual
386
567
  const interceptedSrcs = indexed.filter((e) => e.intercepted !== null);
387
568
  const interceptedQualSrcs = indexed.filter((e) => e.intercepted_qual !== null);
388
569
  const interceptedVals = interceptedSrcs
@@ -424,6 +605,26 @@ function vote(state) {
424
605
  ? medianQualNum(openQualSrcs, "open_area_impact_qual_num")
425
606
  : null;
426
607
  const open_area_confidence = fieldConf(openSrcs.length > 0 ? openSrcs : openQualSrcs);
608
+ // Casualties
609
+ const casualtySrcs = indexed.filter((e) => e.casualties !== null && e.casualties > 0);
610
+ const casualtyVals = casualtySrcs
611
+ .map((e) => e.casualties)
612
+ .sort((a, b) => a - b);
613
+ const casualties = casualtyVals.length > 0
614
+ ? casualtyVals[Math.floor(casualtyVals.length / 2)]
615
+ : null;
616
+ const casualties_citations = casualtySrcs.map((e) => e.idx);
617
+ const casualties_confidence = fieldConf(casualtySrcs);
618
+ // Injuries
619
+ const injurySrcs = indexed.filter((e) => e.injuries !== null && e.injuries > 0);
620
+ const injuryVals = injurySrcs
621
+ .map((e) => e.injuries)
622
+ .sort((a, b) => a - b);
623
+ const injuries = injuryVals.length > 0
624
+ ? injuryVals[Math.floor(injuryVals.length / 2)]
625
+ : null;
626
+ const injuries_citations = injurySrcs.map((e) => e.idx);
627
+ const injuries_confidence = fieldConf(injurySrcs);
427
628
  // Rocket confidence
428
629
  const rocket_confidence = fieldConf(rocketSrcs);
429
630
  // Overall weighted confidence
@@ -454,6 +655,12 @@ function vote(state) {
454
655
  hits_confirmed,
455
656
  hits_citations,
456
657
  hits_confidence,
658
+ casualties,
659
+ casualties_citations,
660
+ casualties_confidence,
661
+ injuries,
662
+ injuries_citations,
663
+ injuries_confidence,
457
664
  confidence: Math.round(weightedConf * 100) / 100,
458
665
  sources_count: indexed.length,
459
666
  citedSources,
@@ -462,7 +669,7 @@ function vote(state) {
462
669
  return { votedResult: voted };
463
670
  }
464
671
  // ─────────────────────────────────────────────────────────
465
- // Tier 4: Edit message — inline update (0 tokens)
672
+ // Tier 4: Edit message — inline citations, carry-forward
466
673
  // ─────────────────────────────────────────────────────────
467
674
  /** EN country name → Russian */
468
675
  const COUNTRY_RU = {
@@ -474,174 +681,305 @@ const COUNTRY_RU = {
474
681
  Syria: "Сирия",
475
682
  Hezbollah: "Хезболла",
476
683
  };
477
- /** Convert index to Unicode superscript string: 1 ¹, 13 → ¹³ */
478
- const SUPERSCRIPTS = ["⁰", "¹", "²", "³", "⁴", "⁵", "⁶", "⁷", "⁸", "⁹"];
479
- function sup(indices) {
480
- return indices
481
- .map((n) => String(n)
482
- .split("")
483
- .map((d) => SUPERSCRIPTS[Number(d)])
484
- .join(""))
485
- .join("");
684
+ /** Format inline citations: [[1]](url), [[2]](url) */
685
+ function inlineCites(indices, citedSources) {
686
+ const parts = [];
687
+ for (const idx of indices) {
688
+ const src = citedSources.find((s) => s.index === idx);
689
+ if (src?.messageUrl) {
690
+ parts.push(`<a href="${src.messageUrl}">[${idx}]</a>`);
691
+ }
692
+ }
693
+ return parts.length > 0 ? " " + parts.join(", ") : "";
694
+ }
695
+ /** Get InlineCite[] from citation indices */
696
+ function extractCites(indices, citedSources) {
697
+ const cites = [];
698
+ for (const idx of indices) {
699
+ const src = citedSources.find((s) => s.index === idx);
700
+ if (src?.messageUrl) {
701
+ cites.push({ url: src.messageUrl, channel: src.channel });
702
+ }
703
+ }
704
+ return cites;
705
+ }
706
+ /** Format inline citations from InlineCite[] (for carry-forward data) */
707
+ function inlineCitesFromData(cites) {
708
+ if (cites.length === 0)
709
+ return "";
710
+ return (" " + cites.map((c, i) => `<a href="${c.url}">[${i + 1}]</a>`).join(", "));
711
+ }
712
+ // Confidence thresholds
713
+ const SKIP = 0.6;
714
+ const UNCERTAIN = 0.75;
715
+ const CERTAIN = 0.95;
716
+ function qualDisplay(qual, qualNum, conf) {
717
+ if (qual === null)
718
+ return null;
719
+ if (qual === "none")
720
+ return conf >= CERTAIN ? "нет" : null;
721
+ const map = {
722
+ all: "все",
723
+ most: "большинство",
724
+ many: "много",
725
+ few: "несколько",
726
+ exists: "есть",
727
+ none: "нет",
728
+ more_than: qualNum != null ? `>${qualNum}` : ">1",
729
+ less_than: qualNum != null ? `<${qualNum}` : "<нескольких",
730
+ };
731
+ return map[qual];
732
+ }
733
+ function breakdownItem(label, num, qual, qualNum, conf) {
734
+ if (conf < SKIP)
735
+ return null;
736
+ const u = conf < UNCERTAIN ? " (?)" : "";
737
+ if (num !== null)
738
+ return `${label} — ${num}${u}`;
739
+ const qs = qualDisplay(qual, qualNum, conf);
740
+ if (qs === null)
741
+ return null;
742
+ return `${label} — ${qs}${u}`;
486
743
  }
487
744
  /**
488
- * Merge enrichment data INTO the existing key:value message.
489
- * Format:
490
- * Подлётное время: ~00:21¹ ← ETA as absolute clock time
491
- *
492
- * Откуда: Иран¹³ + Ливан² ← blank line before intel block
493
- * Ракет: ~5-7
494
- * Попадания (Дан центр): 2¹
495
- * Время оповещения: 03:47
496
- * —
497
- * Источники: [1](url) [2](url) [3](url)
745
+ * Build enrichment data from current vote + previous enrichment (carry-forward).
746
+ * Returns updated EnrichmentData for Redis persistence.
498
747
  */
499
- function buildEnrichedMessage(currentText, alertType, alertTs, r) {
500
- let text = currentText;
501
- // Refine ETA in-place (early/siren only)
748
+ function buildEnrichmentFromVote(r, prev, alertType, alertTs) {
749
+ const data = { ...prev };
750
+ // Origin update if voted has it
751
+ if (r.country_origins && r.country_origins.length > 0) {
752
+ data.origin = r.country_origins
753
+ .map((c) => COUNTRY_RU[c.name] ?? c.name)
754
+ .join(" + ");
755
+ data.originCites = r.country_origins.flatMap((c) => extractCites(c.citations, r.citedSources));
756
+ }
757
+ // ETA — only for early_warning/siren
502
758
  if (r.eta_refined_minutes !== null &&
503
- r.eta_citations.length > 0 &&
504
759
  (alertType === "early_warning" || alertType === "siren")) {
505
- text = refineEtaInPlace(text, r.eta_refined_minutes, alertTs, r.eta_citations);
506
- }
507
- // Insert "Откуда" before time line (with leading blank line for visual separation)
508
- if (r.country_origins && r.country_origins.length > 0) {
509
- const parts = r.country_origins.map((c) => {
510
- const ru = COUNTRY_RU[c.name] ?? c.name;
511
- return `${ru}${sup(c.citations)}`;
760
+ const absTime = new Date(alertTs + r.eta_refined_minutes * 60_000).toLocaleTimeString("he-IL", {
761
+ hour: "2-digit",
762
+ minute: "2-digit",
763
+ timeZone: "Asia/Jerusalem",
512
764
  });
513
- text = insertBeforeTimeLine(text, `\n<b>Откуда:</b> ${parts.join(" + ")}`);
514
- }
515
- // Confidence thresholds for uncertainty markers
516
- const SKIP = 0.6; // below this → skip field entirely
517
- const UNCERTAIN = 0.75; // below this (but ≥ SKIP) → add (?)
518
- const CERTAIN = 0.95; // "none" qual requires this level
519
- // Convert QualCount to Russian display string.
520
- // Returns null if the qual should be suppressed (e.g. "none" below CERTAIN).
521
- function qualDisplay(qual, qualNum, conf) {
522
- if (qual === null)
523
- return null;
524
- if (qual === "none")
525
- return conf >= CERTAIN ? "нет" : null;
526
- const map = {
527
- all: "все",
528
- most: "большинство",
529
- many: "много",
530
- few: "несколько",
531
- exists: "есть",
532
- none: "нет",
533
- more_than: qualNum != null ? `>​${qualNum}` : ">​1",
534
- less_than: qualNum != null ? `<​${qualNum}` : "<​нескольких",
535
- };
536
- return map[qual];
765
+ data.etaAbsolute = `~${absTime}`;
766
+ data.etaCites = extractCites(r.eta_citations, r.citedSources);
537
767
  }
538
- // Format one breakdown item: prefer exact number, fall back to qual.
539
- // Returns null if nothing to show (below threshold or not reported).
540
- function breakdownItem(label, num, qual, qualNum, conf) {
541
- if (conf < SKIP)
542
- return null;
543
- const u = conf < UNCERTAIN ? " (?)" : "";
544
- if (num !== null)
545
- return `${label} ${num}${u}`;
546
- const qs = qualDisplay(qual, qualNum, conf);
547
- if (qs === null)
548
- return null;
549
- return `${label} — ${qs}${u}`;
550
- }
551
- // Rocket count with breakdown and uncertainty markers
552
- if (r.rocket_count_min !== null &&
553
- r.rocket_count_max !== null &&
554
- r.rocket_confidence >= SKIP) {
555
- const rocketUncertain = r.rocket_confidence < UNCERTAIN ? " (?)" : "";
556
- const countStr = r.rocket_count_min === r.rocket_count_max
557
- ? `${r.rocket_count_min}`
558
- : `~${r.rocket_count_min}–${r.rocket_count_max}`;
559
- const bParts = [];
560
- const bi = breakdownItem("перехвачено", r.intercepted, r.intercepted_qual, r.intercepted_qual_num, r.intercepted_confidence);
561
- if (bi)
562
- bParts.push(bi);
563
- const bs = breakdownItem("упали в море", r.sea_impact, r.sea_impact_qual, r.sea_impact_qual_num, r.sea_confidence);
564
- if (bs)
565
- bParts.push(bs);
566
- const bo = breakdownItem("открытая местность", r.open_area_impact, r.open_area_impact_qual, r.open_area_impact_qual_num, r.open_area_confidence);
567
- if (bo)
568
- bParts.push(bo);
569
- const breakdown = bParts.length > 0 ? `, из них: ${bParts.join(", ")}` : "";
570
- const cassetteU = r.is_cassette_confidence < UNCERTAIN ? " (?)" : "";
571
- const cassette = r.is_cassette && r.is_cassette_confidence >= SKIP
572
- ? `, есть кассетные${cassetteU}`
573
- : "";
574
- text = insertBeforeTimeLine(text, `<b>Ракет:</b> ${countStr}${rocketUncertain}${breakdown}${cassette}`);
575
- }
576
- // Hits: есть прямое попадание/-ия в <area>: N — only if confidence ≥ SKIP
768
+ // Rocket count
769
+ if (r.rocket_count_min !== null && r.rocket_count_max !== null) {
770
+ const u = r.rocket_confidence < UNCERTAIN ? " (?)" : "";
771
+ data.rocketCount =
772
+ r.rocket_count_min === r.rocket_count_max
773
+ ? `${r.rocket_count_min}${u}`
774
+ : `~${r.rocket_count_min}–${r.rocket_count_max}${u}`;
775
+ data.rocketCites = extractCites(r.rocket_citations, r.citedSources);
776
+ }
777
+ // Cassette
778
+ if (r.is_cassette !== null && r.is_cassette_confidence >= SKIP) {
779
+ data.isCassette = r.is_cassette;
780
+ }
781
+ // Intercepted
782
+ if (r.intercepted !== null && r.intercepted_confidence >= SKIP) {
783
+ const u = r.intercepted_confidence < UNCERTAIN ? " (?)" : "";
784
+ data.intercepted = `${r.intercepted}${u}`;
785
+ data.interceptedCites = extractCites(r.citedSources
786
+ .filter((s) => {
787
+ const ext = r.citedSources.find((cs) => cs.index === s.index);
788
+ return ext !== undefined;
789
+ })
790
+ .map((s) => s.index), r.citedSources);
791
+ }
792
+ else if (r.intercepted_qual !== null && r.intercepted_confidence >= SKIP) {
793
+ const qs = qualDisplay(r.intercepted_qual, r.intercepted_qual_num, r.intercepted_confidence);
794
+ if (qs)
795
+ data.intercepted = qs;
796
+ }
797
+ // Hits
577
798
  if (r.hits_confirmed !== null &&
578
799
  r.hits_confirmed > 0 &&
579
800
  r.hits_confidence >= SKIP) {
801
+ const u = r.hits_confidence < UNCERTAIN ? " (?)" : "";
802
+ data.hitsConfirmed = `${r.hits_confirmed}${u}`;
803
+ data.hitsCites = extractCites(r.hits_citations, r.citedSources);
804
+ }
805
+ // Casualties
806
+ if (r.casualties !== null &&
807
+ r.casualties > 0 &&
808
+ r.casualties_confidence >= SKIP) {
809
+ const u = r.casualties_confidence < UNCERTAIN ? " (?)" : "";
810
+ data.casualties = `${r.casualties}${u}`;
811
+ data.casualtiesCites = extractCites(r.casualties_citations, r.citedSources);
812
+ }
813
+ // Injuries
814
+ if (r.injuries !== null && r.injuries > 0 && r.injuries_confidence >= SKIP) {
815
+ const u = r.injuries_confidence < UNCERTAIN ? " (?)" : "";
816
+ data.injuries = `${r.injuries}${u}`;
817
+ data.injuriesCites = extractCites(r.injuries_citations, r.citedSources);
818
+ }
819
+ // Early warning time — record when first early_warning was received
820
+ if (alertType === "early_warning" && !data.earlyWarningTime) {
821
+ data.earlyWarningTime = toIsraelTime(alertTs);
822
+ }
823
+ return data;
824
+ }
825
+ /**
826
+ * Build the enriched message text from current message + enrichment data.
827
+ * Uses inline [[1]](url) citations. No superscripts. No footer sources.
828
+ */
829
+ function buildEnrichedMessage(currentText, alertType, alertTs, enrichment) {
830
+ let text = currentText;
831
+ // ── Refine ETA in-place ──
832
+ if (enrichment.etaAbsolute &&
833
+ (alertType === "early_warning" || alertType === "siren")) {
834
+ const etaCiteStr = inlineCitesFromData(enrichment.etaCites);
835
+ const refined = `${enrichment.etaAbsolute}${etaCiteStr}`;
836
+ const etaPatterns = [
837
+ /~\d+[–-]\d+\s*мин/, // ~5–12 мин
838
+ /~\d+[–-]\d+\s*min/, // ~5–12 min
839
+ /~\d+[–-]\d+\s*דקות/, // ~5–12 דקות
840
+ /~\d+[–-]\d+\s*دقيقة/, // ~5–12 دقيقة
841
+ /1\.5\s*мин/, // 1.5 мин (siren)
842
+ /1\.5\s*min/,
843
+ /1\.5\s*דקות/,
844
+ /1\.5\s*دقيقة/,
845
+ ];
846
+ for (const pattern of etaPatterns) {
847
+ if (pattern.test(text)) {
848
+ text = text.replace(pattern, refined);
849
+ break;
850
+ }
851
+ }
852
+ }
853
+ // ── Siren: show "Раннее предупреждение: было в HH:MM" ──
854
+ if (alertType === "siren" && enrichment.earlyWarningTime) {
855
+ text = insertBeforeTimeLine(text, `<b>Раннее предупреждение:</b> было в ${enrichment.earlyWarningTime}`);
856
+ }
857
+ // ── Origin ──
858
+ if (enrichment.origin) {
859
+ const citeStr = inlineCitesFromData(enrichment.originCites);
860
+ text = insertBeforeTimeLine(text, `\n<b>Откуда:</b> ${enrichment.origin}${citeStr}`);
861
+ }
862
+ // ── Rocket count + breakdown ──
863
+ if (enrichment.rocketCount) {
864
+ const citeStr = inlineCitesFromData(enrichment.rocketCites);
865
+ const cassette = enrichment.isCassette ? ", есть кассетные" : "";
866
+ let breakdown = "";
867
+ const bParts = [];
868
+ if (enrichment.intercepted) {
869
+ bParts.push(`перехвачено — ${enrichment.intercepted}`);
870
+ }
871
+ if (enrichment.seaImpact) {
872
+ bParts.push(`упали в море — ${enrichment.seaImpact}`);
873
+ }
874
+ if (enrichment.openAreaImpact) {
875
+ bParts.push(`открытая местность — ${enrichment.openAreaImpact}`);
876
+ }
877
+ if (bParts.length > 0)
878
+ breakdown = `, из них: ${bParts.join(", ")}`;
879
+ text = insertBeforeTimeLine(text, `<b>Ракет:</b> ${enrichment.rocketCount}${breakdown}${cassette}${citeStr}`);
880
+ }
881
+ else if (enrichment.intercepted && alertType !== "early_warning") {
882
+ // No rocket count but have interception data
883
+ const citeStr = inlineCitesFromData(enrichment.interceptedCites);
884
+ text = insertBeforeTimeLine(text, `<b>Перехвачено:</b> ${enrichment.intercepted}${citeStr}`);
885
+ }
886
+ // ── Hits ──
887
+ if (enrichment.hitsConfirmed && alertType !== "early_warning") {
580
888
  const areaLabel = Object.values(config.agent.areaLabels)[0] ?? "район";
581
- const hitWord = r.hits_confirmed === 1 ? "попадание" : "попадания";
582
- const hitsCite = r.hits_citations.length > 0 ? sup(r.hits_citations) : "";
583
- const hitsU = r.hits_confidence < UNCERTAIN ? " (?)" : "";
584
- text = insertBeforeTimeLine(text, `есть прямое ${hitWord} в ${areaLabel}: ${r.hits_confirmed}${hitsCite}${hitsU}`);
585
- }
586
- // Sources footer: [1](url) [2](url) ...
587
- const sourcesWithUrl = r.citedSources.filter((s) => s.messageUrl);
588
- if (sourcesWithUrl.length > 0) {
589
- const links = sourcesWithUrl
590
- .map((s) => `<a href="${s.messageUrl}">[${s.index}]</a>`)
591
- .join(" ");
592
- text += `\n—\n<i>Источники: ${links}</i>`;
889
+ const citeStr = inlineCitesFromData(enrichment.hitsCites);
890
+ text = insertBeforeTimeLine(text, `<b>Попадания (${areaLabel}):</b> ${enrichment.hitsConfirmed}${citeStr}`);
891
+ }
892
+ // ── Casualties / Injuries (resolved only) ──
893
+ if (enrichment.casualties && alertType === "resolved") {
894
+ const citeStr = inlineCitesFromData(enrichment.casualtiesCites);
895
+ text = insertBeforeTimeLine(text, `<b>Погибшие:</b> ${enrichment.casualties}${citeStr}`);
896
+ }
897
+ if (enrichment.injuries && alertType === "resolved") {
898
+ const citeStr = inlineCitesFromData(enrichment.injuriesCites);
899
+ text = insertBeforeTimeLine(text, `<b>Пострадавшие:</b> ${enrichment.injuries}${citeStr}`);
593
900
  }
594
901
  return text;
595
902
  }
596
903
  /**
597
904
  * Insert a line before the time line (last "Время" / "Time" / "שעת" line).
598
- * This keeps new data visually grouped with existing fields.
599
905
  */
600
906
  function insertBeforeTimeLine(text, line) {
601
- // Match "Время оповещения" / "Alert time" / "שעת ההתרעה" / "وقت الإنذار"
602
907
  const timePattern = /(<b>(?:Время оповещения|Alert time|שעת ההתרעה|وقت الإنذار):<\/b>)/;
603
908
  const match = text.match(timePattern);
604
909
  if (match?.index !== undefined) {
605
910
  return text.slice(0, match.index) + line + "\n" + text.slice(match.index);
606
911
  }
607
- // Fallback: append before last line
608
912
  const lines = text.split("\n");
609
913
  lines.splice(Math.max(lines.length - 1, 0), 0, line);
610
914
  return lines.join("\n");
611
915
  }
612
- /**
613
- * Replace the default ETA range with absolute impact time + superscript citation.
614
- * "~5–12 мин" → "~00:21¹"
615
- */
616
- function refineEtaInPlace(text, minutes, alertTs, citations) {
617
- // Compute absolute impact time in Israel timezone
618
- const absTime = new Date(alertTs + minutes * 60_000).toLocaleTimeString("he-IL", { hour: "2-digit", minute: "2-digit", timeZone: "Asia/Jerusalem" });
619
- const refined = `~${absTime}${sup(citations)}`;
620
- const etaPatterns = [
621
- /~\d+[–-]\d+\s*мин/, // ~5–12 мин
622
- /~\d+[–-]\d+\s*min/, // ~5–12 min
623
- /~\d+[–-]\d+\s*דקות/, // ~5–12 דקות
624
- /~\d+[–-]\d+\s*دقائق/, // ~5–12 دقائق
625
- /1\.5\s*мин/, // 1.5 мин (siren)
626
- /1\.5\s*min/, // 1.5 min
627
- /1\.5\s*דקות/, // 1.5 דקות
628
- /1\.5\s*دقائق/, // 1.5 دقائق
629
- ];
630
- for (const pattern of etaPatterns) {
631
- if (pattern.test(text)) {
632
- return text.replace(pattern, refined);
633
- }
634
- }
635
- return text;
636
- }
637
916
  async function editMessage(state) {
638
917
  const { votedResult } = state;
639
918
  if (!config.botToken)
640
919
  return {};
641
920
  const tgBot = new Bot(config.botToken);
642
- // No valid sources found silently skip (don't touch the message)
921
+ // No valid sources — carry forward previous data only
922
+ const prevEnrichment = state.previousEnrichment ?? emptyEnrichmentData();
643
923
  if (!votedResult) {
644
- logger.info("Agent: no voted resultskipping edit", {
924
+ // No new data from channels still try to build message from carry-forward
925
+ if (prevEnrichment.origin || prevEnrichment.intercepted) {
926
+ // Have carry-forward data, build message
927
+ const newText = buildEnrichedMessage(state.currentText, state.alertType, state.alertTs, prevEnrichment);
928
+ const hash = textHash(newText);
929
+ if (hash === prevEnrichment.lastEditHash) {
930
+ logger.info("Agent: no change in message (dedup) — skipping edit", {
931
+ alertId: state.alertId,
932
+ });
933
+ return {};
934
+ }
935
+ try {
936
+ if (state.isCaption) {
937
+ await tgBot.api.editMessageCaption(state.chatId, state.messageId, {
938
+ caption: newText,
939
+ parse_mode: "HTML",
940
+ });
941
+ }
942
+ else {
943
+ await tgBot.api.editMessageText(state.chatId, state.messageId, newText, { parse_mode: "HTML" });
944
+ }
945
+ prevEnrichment.lastEditHash = hash;
946
+ await saveEnrichmentData(prevEnrichment);
947
+ logger.info("Agent: message enriched (carry-forward only)", {
948
+ alertId: state.alertId,
949
+ messageId: state.messageId,
950
+ });
951
+ }
952
+ catch (err) {
953
+ const errStr = String(err);
954
+ if (errStr.includes("message is not modified")) {
955
+ prevEnrichment.lastEditHash = hash;
956
+ await saveEnrichmentData(prevEnrichment);
957
+ logger.info("Agent: message already up-to-date (dedup)", {
958
+ alertId: state.alertId,
959
+ });
960
+ }
961
+ else {
962
+ logger.error("Agent: failed to edit message", {
963
+ alertId: state.alertId,
964
+ error: errStr,
965
+ });
966
+ }
967
+ }
968
+ }
969
+ else {
970
+ logger.info("Agent: no voted result — skipping edit", {
971
+ alertId: state.alertId,
972
+ });
973
+ }
974
+ return {};
975
+ }
976
+ // Build enrichment data: merge vote + previous
977
+ const enrichment = buildEnrichmentFromVote(votedResult, prevEnrichment, state.alertType, state.alertTs);
978
+ const newText = buildEnrichedMessage(state.currentText, state.alertType, state.alertTs, enrichment);
979
+ // Dedup: skip if text hasn't changed
980
+ const hash = textHash(newText);
981
+ if (hash === enrichment.lastEditHash) {
982
+ logger.info("Agent: no change in message (dedup) — skipping edit", {
645
983
  alertId: state.alertId,
646
984
  });
647
985
  return {};
@@ -654,7 +992,6 @@ async function editMessage(state) {
654
992
  threshold: config.agent.confidenceThreshold,
655
993
  });
656
994
  }
657
- const newText = buildEnrichedMessage(state.currentText, state.alertType, state.alertTs, votedResult);
658
995
  try {
659
996
  if (state.isCaption) {
660
997
  await tgBot.api.editMessageCaption(state.chatId, state.messageId, {
@@ -667,18 +1004,31 @@ async function editMessage(state) {
667
1004
  parse_mode: "HTML",
668
1005
  });
669
1006
  }
1007
+ enrichment.lastEditHash = hash;
1008
+ await saveEnrichmentData(enrichment);
670
1009
  logger.info("Agent: message enriched", {
671
1010
  alertId: state.alertId,
672
1011
  messageId: state.messageId,
673
1012
  confidence: votedResult.confidence,
674
1013
  sources: votedResult.sources_count,
1014
+ phase: state.alertType,
675
1015
  });
676
1016
  }
677
1017
  catch (err) {
678
- logger.error("Agent: failed to edit message", {
679
- alertId: state.alertId,
680
- error: String(err),
681
- });
1018
+ const errStr = String(err);
1019
+ if (errStr.includes("message is not modified")) {
1020
+ enrichment.lastEditHash = hash;
1021
+ await saveEnrichmentData(enrichment);
1022
+ logger.info("Agent: message already up-to-date (dedup)", {
1023
+ alertId: state.alertId,
1024
+ });
1025
+ }
1026
+ else {
1027
+ logger.error("Agent: failed to edit message", {
1028
+ alertId: state.alertId,
1029
+ error: errStr,
1030
+ });
1031
+ }
682
1032
  }
683
1033
  return {};
684
1034
  }
@@ -686,7 +1036,7 @@ async function editMessage(state) {
686
1036
  // Clarify Node — MCP tool calling via ReAct (conditional)
687
1037
  // ─────────────────────────────────────────────────────────
688
1038
  async function clarifyNode(state) {
689
- const { votedResult, extractions, alertId, alertAreas, alertType, messageId, currentText, } = state;
1039
+ const { votedResult, extractions, alertId, alertAreas, alertType, alertTs, messageId, currentText, } = state;
690
1040
  if (!votedResult) {
691
1041
  logger.info("Agent: clarify skipped — no voted result", { alertId });
692
1042
  return { clarifyAttempted: true };
@@ -695,18 +1045,19 @@ async function clarifyNode(state) {
695
1045
  alertId,
696
1046
  confidence: votedResult.confidence,
697
1047
  threshold: config.agent.confidenceThreshold,
1048
+ phase: alertType,
698
1049
  });
699
1050
  try {
700
1051
  const result = await runClarify({
701
1052
  alertId,
702
1053
  alertAreas,
703
1054
  alertType,
1055
+ alertTs,
704
1056
  messageId,
705
1057
  currentText,
706
1058
  extractions,
707
1059
  votedResult,
708
1060
  });
709
- // Merge new extractions with existing valid ones
710
1061
  const mergedExtractions = [...extractions, ...result.newExtractions];
711
1062
  logger.info("Agent: clarify completed", {
712
1063
  alertId,
@@ -717,7 +1068,6 @@ async function clarifyNode(state) {
717
1068
  });
718
1069
  return {
719
1070
  extractions: mergedExtractions,
720
- // Reset votedResult so vote() re-runs with merged data
721
1071
  votedResult: null,
722
1072
  clarifyAttempted: true,
723
1073
  };
@@ -732,19 +1082,13 @@ async function clarifyNode(state) {
732
1082
  }
733
1083
  // ── Conditional routing after vote ─────────────────────
734
1084
  function shouldClarify(state) {
735
- // Only clarify once per pipeline run (prevents infinite loop)
736
- if (state.clarifyAttempted) {
1085
+ if (state.clarifyAttempted)
737
1086
  return "editMessage";
738
- }
739
- // MCP tools must be enabled
740
- if (!config.agent.mcpTools) {
1087
+ if (!config.agent.mcpTools)
741
1088
  return "editMessage";
742
- }
743
- // No voted result → nothing to clarify
744
- if (!state.votedResult) {
1089
+ if (!state.votedResult)
745
1090
  return "editMessage";
746
- }
747
- // Confidence below threshold → clarify
1091
+ // Low confidence → clarify (may use Oref tool for time validation)
748
1092
  if (state.votedResult.confidence < config.agent.confidenceThreshold) {
749
1093
  logger.info("Agent: routing to clarify (low confidence)", {
750
1094
  confidence: state.votedResult.confidence,
@@ -752,10 +1096,27 @@ function shouldClarify(state) {
752
1096
  });
753
1097
  return "clarify";
754
1098
  }
1099
+ // Suspicious time: if the only country is unexpected for the region, verify
1100
+ // This catches cases like "Lebanon" appearing on a Tel Aviv alert
1101
+ // when the real attack is from Iran/Yemen
1102
+ const origins = state.votedResult.country_origins;
1103
+ if (origins &&
1104
+ origins.length === 1 &&
1105
+ state.votedResult.sources_count === 1) {
1106
+ const singleOrigin = origins[0].name;
1107
+ // Lebanon attacks typically don't reach central Israel
1108
+ if (singleOrigin === "Lebanon" &&
1109
+ state.alertAreas.some((a) => a.includes("תל אביב") ||
1110
+ a.includes("גוש דן") ||
1111
+ a.includes("שרון") ||
1112
+ a.includes("מרכז"))) {
1113
+ logger.info("Agent: routing to clarify (suspicious single source: Lebanon for central Israel)", { origin: singleOrigin });
1114
+ return "clarify";
1115
+ }
1116
+ }
755
1117
  return "editMessage";
756
1118
  }
757
1119
  // ── Build graph ────────────────────────────────────────
758
- /** MemorySaver checkpointer — session-level state persistence */
759
1120
  const checkpointer = new MemorySaver();
760
1121
  function buildGraph() {
761
1122
  const graph = new StateGraph(AgentState)
@@ -764,13 +1125,12 @@ function buildGraph() {
764
1125
  .addNode("postFilter", postFilter)
765
1126
  .addNode("vote", vote)
766
1127
  .addNode("clarify", clarifyNode)
767
- .addNode("revote", vote) // Re-run vote after clarify with merged data
1128
+ .addNode("revote", vote)
768
1129
  .addNode("editMessage", editMessage)
769
1130
  .addEdge("__start__", "collectAndPreFilter")
770
1131
  .addEdge("collectAndPreFilter", "extractAndValidate")
771
1132
  .addEdge("extractAndValidate", "postFilter")
772
1133
  .addEdge("postFilter", "vote")
773
- // Conditional edge: vote → clarify (low conf) or editMessage (high conf)
774
1134
  .addConditionalEdges("vote", shouldClarify, {
775
1135
  clarify: "clarify",
776
1136
  editMessage: "editMessage",
@@ -796,8 +1156,32 @@ export async function runEnrichment(input) {
796
1156
  extractions: [],
797
1157
  votedResult: null,
798
1158
  clarifyAttempted: false,
799
- },
800
- // Thread ID for MemorySaver — enables session-level state persistence
801
- { configurable: { thread_id: input.alertId } });
1159
+ previousEnrichment: emptyEnrichmentData(),
1160
+ sessionStartTs: 0,
1161
+ phaseStartTs: 0,
1162
+ }, { configurable: { thread_id: input.alertId } });
802
1163
  }
1164
+ // ── Exported for testing ───────────────────────────────
1165
+ export const _test = {
1166
+ getLLM,
1167
+ buildRegionKeywords,
1168
+ LAUNCH_KEYWORDS,
1169
+ TIME_WINDOW_MS,
1170
+ toIsraelTime,
1171
+ textHash,
1172
+ postFilter,
1173
+ vote,
1174
+ buildEnrichmentFromVote,
1175
+ buildEnrichedMessage,
1176
+ insertBeforeTimeLine,
1177
+ inlineCites,
1178
+ inlineCitesFromData,
1179
+ extractCites,
1180
+ COUNTRY_RU,
1181
+ SYSTEM_PROMPT_BASE,
1182
+ getPhaseInstructions,
1183
+ SKIP,
1184
+ UNCERTAIN,
1185
+ CERTAIN,
1186
+ };
803
1187
  //# sourceMappingURL=graph.js.map