easyoref 1.13.1 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,47 +1,32 @@
1
1
  /**
2
- * LangGraph.js enrichment pipeline — tiered validation + tool calling.
2
+ * LangGraph.js enrichment pipeline — phase-aware, time-validated.
3
3
  *
4
- * Design: minimize tokens, maximize confidence.
5
- * When confidence is low, offer tools agent decides if they help.
4
+ * KEY DESIGN PRINCIPLES:
5
+ * 1. TIME IS KING every post is validated against the alert time window.
6
+ * LLM receives alert time + post time and scores time_relevance.
7
+ * Posts about previous/different attacks are rejected.
8
+ * 2. PHASE-AWARE — each phase extracts only what's relevant:
9
+ * - early_warning: origin, ETA, rocket count, cassette
10
+ * - siren: carries early data + adds interception, impacts
11
+ * - resolved: carries all + adds casualties, injuries, final stats
12
+ * 3. CARRY-FORWARD — results persist in Redis (EnrichmentData).
13
+ * Each phase inherits previous phase's findings.
14
+ * 4. INLINE CITATIONS — no superscripts, no footer sources.
15
+ * Format: [[1]](url) right after each data point.
16
+ * 5. DEDUP EDITS — hash-based check prevents "message not modified" spam.
6
17
  *
7
- * ┌──────────────────────────────────────────────────────────────┐
8
- * Tier 0: preFilter (deterministic, 0 tokens) │
9
- * │ → keyword + region check on raw post text │
10
- * │ │
11
- * │ Tier 1: extractAndValidate (1 LLM call per post) │
12
- * │ → combined extraction + 3 validators in single JSON │
13
- * │ │
14
- * │ Tier 2: postFilter (deterministic, 0 tokens) │
15
- * │ → reject low relevance / trust / alarmist / empty │
16
- * │ │
17
- * │ Tier 3: vote (deterministic, 0 tokens) │
18
- * │ → majority consensus across validated sources │
19
- * │ │
20
- * │ Tier 3.5: shouldClarify (conditional edge) │
21
- * │ → if confidence < threshold AND tools enabled: │
22
- * │ → clarify: LLM sees voted result + 4 tools │
23
- * │ • read_telegram_sources (1-4 channel posts) │
24
- * │ • alert_history (Oref history verification) │
25
- * │ • resolve_area (defense-zone proximity check) │
26
- * │ • betterstack_log (query recent pipeline logs) │
27
- * │ LLM decides: call 0, 1, 2, or 3+ tools. │
28
- * │ → revote with merged extractions │
29
- * │ → else: proceed to editMessage │
30
- * │ │
31
- * │ Tier 4: editMessage (deterministic, 0 tokens) │
32
- * │ → inline update of existing key:value pairs │
33
- * └──────────────────────────────────────────────────────────────┘
34
- *
35
- * Checkpointer: MemorySaver — session-level state persistence.
36
- * Total LLM cost: 1 call × N posts + (optional) 1 clarify call + 0-N tools.
18
+ * Pipeline:
19
+ * preFilter extractAndValidate postFilter vote → [clarify] → editMessage
37
20
  */
38
21
  import { Annotation, MemorySaver, StateGraph } from "@langchain/langgraph";
39
22
  import { ChatOpenAI } from "@langchain/openai";
40
23
  import { Bot } from "grammy";
24
+ import { createHash } from "node:crypto";
41
25
  import { config } from "../config.js";
42
26
  import * as logger from "../logger.js";
43
27
  import { runClarify } from "./clarify.js";
44
- import { getChannelPosts } from "./store.js";
28
+ import { getActiveSession, getChannelPosts, getEnrichmentData, saveEnrichmentData, } from "./store.js";
29
+ import { emptyEnrichmentData } from "./types.js";
45
30
  // ── State ──────────────────────────────────────────────
46
31
  const AgentState = Annotation.Root({
47
32
  alertId: Annotation({ reducer: (_, b) => b }),
@@ -58,6 +43,12 @@ const AgentState = Annotation.Root({
58
43
  votedResult: Annotation({ reducer: (_, b) => b }),
59
44
  /** Tracks whether clarify has already run (prevents infinite loop) */
60
45
  clarifyAttempted: Annotation({ reducer: (_, b) => b }),
46
+ /** Cross-phase enrichment data loaded at start */
47
+ previousEnrichment: Annotation({ reducer: (_, b) => b }),
48
+ /** Session start timestamp for time window calculations */
49
+ sessionStartTs: Annotation({ reducer: (_, b) => b }),
50
+ /** Phase start timestamp */
51
+ phaseStartTs: Annotation({ reducer: (_, b) => b }),
61
52
  });
62
53
  // ── LLM ───────────────────────────────────────────────
63
54
  function getLLM() {
@@ -72,85 +63,220 @@ function getLLM() {
72
63
  },
73
64
  apiKey: config.agent.apiKey,
74
65
  temperature: 0,
75
- maxTokens: 400,
66
+ maxTokens: 500,
76
67
  });
77
68
  }
78
69
  // ── Region keywords (Hebrew + transliterations) ────────
79
- /**
80
- * Build keyword list from config areas + area_labels.
81
- * Returns lowercased keywords for matching.
82
- */
83
70
  function buildRegionKeywords() {
84
71
  const keywords = [];
85
72
  for (const area of config.areas) {
86
73
  keywords.push(area.toLowerCase());
87
- // First word often enough (e.g. "תל אביב" → "תל")
88
74
  const first = area.split(" ")[0];
89
75
  if (first && first.length >= 2)
90
76
  keywords.push(first.toLowerCase());
91
77
  }
92
78
  for (const [he, label] of Object.entries(config.agent.areaLabels)) {
93
79
  keywords.push(he.toLowerCase());
94
- // Add transliterated label words (e.g. "Дан центр" → "дан", "центр")
95
80
  for (const word of label.split(/\s+/)) {
96
81
  if (word.length >= 3)
97
82
  keywords.push(word.toLowerCase());
98
83
  }
99
84
  }
100
85
  // Common attack-related keywords (always relevant)
101
- keywords.push("ישראל", "israel", "израиль", "ракет", "rocket", "missile", "iron dome", "כיפת ברזל", "жд", "перехват", "intercept", "siren", "азака", "צבע אדום", "red alert");
86
+ keywords.push("ישראל", "israel", "израиль", "ракет", "rocket", "missile", "iron dome", "כיפת ברזל", "перехват", "intercept", "צבע אדום", "red alert");
102
87
  return [...new Set(keywords)];
103
88
  }
89
+ // ── Launch detection keywords (strict — early_warning only) ──
90
+ const LAUNCH_KEYWORDS = [
91
+ "שיגור",
92
+ "שיגורים",
93
+ "שוגרו",
94
+ "נורו",
95
+ "зафиксированы запуски",
96
+ "обнаружены запуски",
97
+ "запуски ракет",
98
+ "запуск ракет",
99
+ "пуски ракет",
100
+ "ракетный обстрел",
101
+ "ракетная атака",
102
+ "missile launch",
103
+ "rocket launch",
104
+ "barrage",
105
+ "fired towards",
106
+ "launches detected",
107
+ "missiles fired",
108
+ "שיגורים לישראל",
109
+ "ירי טילים",
110
+ "ירי רקטות",
111
+ "إطلاق صواريخ",
112
+ ].map((kw) => kw.toLowerCase());
113
+ // ── Time window per phase (ms before alertTs to accept posts) ──
114
+ const TIME_WINDOW_MS = {
115
+ early_warning: 5 * 60 * 1000, // 5 min before alert
116
+ siren: 10 * 60 * 1000, // 10 min (includes early_warning period)
117
+ resolved: 30 * 60 * 1000, // 30 min (full session window)
118
+ };
119
+ // ── Helpers ────────────────────────────────────────────
120
+ /** Format timestamp as HH:MM Israel time */
121
+ function toIsraelTime(ts) {
122
+ return new Date(ts).toLocaleTimeString("he-IL", {
123
+ hour: "2-digit",
124
+ minute: "2-digit",
125
+ timeZone: "Asia/Jerusalem",
126
+ });
127
+ }
128
+ /** MD5 hash for edit dedup */
129
+ function textHash(text) {
130
+ return createHash("md5").update(text).digest("hex");
131
+ }
104
132
  // ─────────────────────────────────────────────────────────
105
- // Tier 0: Pre-filter (deterministic, 0 tokens)
133
+ // Tier 0: Pre-filter (phase-aware, time-bounded, 0 tokens)
106
134
  // ─────────────────────────────────────────────────────────
107
135
  async function collectAndPreFilter(state) {
108
- // Session-scoped: all posts belong to the current session already
109
136
  const posts = await getChannelPosts(state.alertId);
137
+ const prevEnrichment = await getEnrichmentData();
138
+ // Load session for time boundaries
139
+ const session = await getActiveSession();
140
+ const sessionStartTs = session?.sessionStartTs ?? state.alertTs;
141
+ const phaseStartTs = session?.phaseStartTs ?? state.alertTs;
110
142
  if (posts.length === 0) {
111
143
  logger.info("Agent: no posts in session", { alertId: state.alertId });
112
- return { channelPosts: posts, filteredPosts: [] };
144
+ return {
145
+ channelPosts: posts,
146
+ filteredPosts: [],
147
+ previousEnrichment: prevEnrichment,
148
+ sessionStartTs,
149
+ phaseStartTs,
150
+ };
113
151
  }
114
152
  const keywords = buildRegionKeywords();
115
- const filtered = posts.filter((post) => {
116
- const text = post.text.toLowerCase();
117
- // Must contain at least 1 region/attack keyword
118
- return keywords.some((kw) => text.includes(kw));
119
- });
120
- logger.info("Agent: pre-filter", {
121
- alertId: state.alertId,
122
- total: posts.length,
123
- after_keyword_filter: filtered.length,
124
- });
125
- return { channelPosts: posts, filteredPosts: filtered };
153
+ const alertType = state.alertType;
154
+ const alertTs = state.alertTs;
155
+ // Time window: reject posts older than window before alertTs
156
+ const windowMs = TIME_WINDOW_MS[alertType];
157
+ const cutoffTs = alertTs - windowMs;
158
+ let filtered;
159
+ if (alertType === "early_warning") {
160
+ // ── STRICT launch-only filter for early warning ──
161
+ // Step 1: Find posts with launch keywords, within time window
162
+ const launchPosts = posts.filter((post) => {
163
+ if (post.ts < cutoffTs)
164
+ return false;
165
+ const text = post.text.toLowerCase();
166
+ return LAUNCH_KEYWORDS.some((kw) => text.includes(kw));
167
+ });
168
+ // Step 2: Get channels that posted about launches
169
+ const channelFirstLaunchTs = new Map();
170
+ for (const post of launchPosts) {
171
+ const current = channelFirstLaunchTs.get(post.channel);
172
+ if (current === undefined || post.ts < current) {
173
+ channelFirstLaunchTs.set(post.channel, post.ts);
174
+ }
175
+ }
176
+ // Step 3: Accept follow-up posts from launch channels only (within window)
177
+ filtered = posts.filter((post) => {
178
+ if (post.ts < cutoffTs)
179
+ return false;
180
+ const text = post.text.toLowerCase();
181
+ if (!keywords.some((kw) => text.includes(kw)))
182
+ return false;
183
+ const firstLaunch = channelFirstLaunchTs.get(post.channel);
184
+ if (firstLaunch === undefined)
185
+ return false;
186
+ return post.ts >= firstLaunch;
187
+ });
188
+ logger.info("Agent: pre-filter (early_warning)", {
189
+ alertId: state.alertId,
190
+ total: posts.length,
191
+ launch_posts: launchPosts.length,
192
+ launch_channels: channelFirstLaunchTs.size,
193
+ after_filter: filtered.length,
194
+ cutoff: toIsraelTime(cutoffTs),
195
+ });
196
+ }
197
+ else {
198
+ // ── Siren & Resolved: broader filter, time-bounded ──
199
+ filtered = posts.filter((post) => {
200
+ if (post.ts < cutoffTs)
201
+ return false;
202
+ const text = post.text.toLowerCase();
203
+ return keywords.some((kw) => text.includes(kw));
204
+ });
205
+ logger.info("Agent: pre-filter", {
206
+ alertId: state.alertId,
207
+ alertType,
208
+ total: posts.length,
209
+ after_filter: filtered.length,
210
+ cutoff: toIsraelTime(cutoffTs),
211
+ });
212
+ }
213
+ return {
214
+ channelPosts: posts,
215
+ filteredPosts: filtered,
216
+ previousEnrichment: prevEnrichment,
217
+ sessionStartTs,
218
+ phaseStartTs,
219
+ };
126
220
  }
127
221
  // ─────────────────────────────────────────────────────────
128
222
  // Tier 1: Extract + validate (1 LLM call per post)
223
+ // Phase-aware prompts — agent knows what to look for.
224
+ // TIME CONTEXT — agent sees alert time + post time.
129
225
  // ─────────────────────────────────────────────────────────
130
226
  const QUAL_VALUES = '"all"|"most"|"many"|"few"|"exists"|"none"|"more_than"|"less_than"';
131
- const SYSTEM_PROMPT = `You analyze Telegram channel messages about a missile/rocket attack on Israel.
132
- Your job: extract factual data AND assess message quality. Be concise.
227
+ /** Phase-specific extraction instructions */
228
+ function getPhaseInstructions(alertType) {
229
+ switch (alertType) {
230
+ case "early_warning":
231
+ return `PHASE: EARLY WARNING (radar detected launches, sirens not yet).
232
+ Focus on: country_origin (WHERE were rockets launched from?), eta_refined_minutes, rocket_count, is_cassette.
233
+ Do NOT extract: intercepted, sea_impact, open_area_impact, hits_confirmed, casualties, injuries — these are IMPOSSIBLE at this stage.
234
+ If a message discusses interception results, it is about a PREVIOUS attack — set time_relevance=0.`;
235
+ case "siren":
236
+ return `PHASE: SIREN (rockets incoming, impact imminent).
237
+ Focus on: country_origin (if not known yet), rocket_count, intercepted, sea_impact, open_area_impact, is_cassette.
238
+ Do NOT extract: hits_confirmed, casualties, injuries — too early for confirmed damage reports.
239
+ If a message discusses casualties or confirmed hits, verify the timing carefully - it may be about a previous attack.`;
240
+ case "resolved":
241
+ return `PHASE: RESOLVED (incident over, assessing damage).
242
+ Focus on: intercepted (final count), hits_confirmed, casualties, injuries, open_area_impact.
243
+ All fields are valid at this stage. Prioritize confirmed official reports.`;
244
+ }
245
+ }
246
+ const SYSTEM_PROMPT_BASE = `You analyze Telegram channel messages about a missile/rocket attack on Israel.
247
+ Your job: extract factual data, assess quality, AND validate temporal relevance.
248
+
249
+ CRITICAL — TIME VALIDATION:
250
+ You will receive the alert time and the post time. You MUST determine if this post
251
+ is about the CURRENT attack or about a previous/different event.
252
+ - If post discusses events clearly BEFORE the alert time → time_relevance=0
253
+ - If post is generic military news not specific to this attack → time_relevance=0.2
254
+ - If post discusses the current attack → time_relevance=1.0
255
+ - If uncertain → time_relevance=0.5 (the system will use alert_history to verify)
133
256
 
134
257
  Return ONLY valid JSON (no markdown, no explanation):
135
258
  {
136
259
  "region_relevance": float, // 0–1: does this message discuss the specified alert region?
137
260
  "source_trust": float, // 0–1: factual reporting (1.0) vs unverified rumors/panic (0.0)
138
- "tone": "calm"|"neutral"|"alarmist", // message tone — reject alarmist content
261
+ "tone": "calm"|"neutral"|"alarmist",
262
+ "time_relevance": float, // 0–1: is this post about the CURRENT attack? (see rules above)
139
263
  "country_origin": string|null, // "Iran","Yemen","Lebanon","Gaza","Iraq","Syria" or null
140
- "rocket_count": int|null, // total rockets/missiles launched if mentioned
141
- "is_cassette": bool|null, // cluster/cassette munitions confirmed?
142
- "intercepted": int|null, // exact number intercepted by Iron Dome/air defense
143
- "intercepted_qual": ${QUAL_VALUES}|null, // qualitative if no exact number; null if exact number given
144
- "intercepted_qual_num": int|null, // reference number for more_than/less_than (e.g. 5 if "more than 5")
145
- "sea_impact": int|null, // exact number fell in sea/unpopulated area
264
+ "rocket_count": int|null,
265
+ "is_cassette": bool|null,
266
+ "intercepted": int|null,
267
+ "intercepted_qual": ${QUAL_VALUES}|null,
268
+ "intercepted_qual_num": int|null,
269
+ "sea_impact": int|null,
146
270
  "sea_impact_qual": ${QUAL_VALUES}|null,
147
271
  "sea_impact_qual_num": int|null,
148
- "open_area_impact": int|null, // exact number hit open/populated ground
272
+ "open_area_impact": int|null,
149
273
  "open_area_impact_qual": ${QUAL_VALUES}|null,
150
274
  "open_area_impact_qual_num": int|null,
151
- "hits_confirmed": int|null, // confirmed hits on structures/buildings
152
- "eta_refined_minutes": int|null, // refined time-to-impact if mentioned
153
- "confidence": float // 0–1: overall confidence in this extraction
275
+ "hits_confirmed": int|null,
276
+ "casualties": int|null,
277
+ "injuries": int|null,
278
+ "eta_refined_minutes": int|null,
279
+ "confidence": float
154
280
  }
155
281
 
156
282
  Rules:
@@ -158,12 +284,12 @@ Rules:
158
284
  - If message is speculative/unconfirmed rumor, set source_trust < 0.4.
159
285
  - If message uses excessive caps, exclamation marks, panic language → tone="alarmist".
160
286
  - Only extract concrete numbers explicitly stated in the text. Never guess.
161
- - intercpted + sea_impact + open_area_impact should sum to rocket_count when all are known.
162
- - If partial breakdown known, set unknown sub-fields to null (not 0).
163
- - *_qual fields: use ONLY when the message explicitly states a qualitative descriptor WITHOUT an exact count.
164
- If an exact number is given, set *_qual to null. Do NOT infer from absence.
165
- - NEVER extract qualitative descriptors for casualties or injuries hits_confirmed handles structural hits only.
166
- - "none" qual is only valid if explicitly stated in the message (e.g., "все перехвачены", "не упало в море").`;
287
+ - *_qual fields: use ONLY when NO exact count is given. If exact number present, set *_qual=null.
288
+ - "none" qual is only valid if explicitly stated (e.g., "все перехвачены", "не упало в море").
289
+ - For IDF (@idf_telegram) posts about ongoing operations (not this specific attack) time_relevance=0.
290
+ - LANGUAGE NEUTRALITY: Posts may be in Hebrew, Russian, Arabic, or English. The language of the post
291
+ MUST NOT affect source_trust or confidence. Russian-language Israeli channels are equally reliable
292
+ and often break news faster than Hebrew ones. Judge ONLY by factual content and tone.`;
167
293
  async function extractAndValidate(state) {
168
294
  if (state.filteredPosts.length === 0) {
169
295
  logger.info("Agent: no filtered posts to extract", {
@@ -176,31 +302,26 @@ async function extractAndValidate(state) {
176
302
  const regionHint = state.alertAreas.length > 0
177
303
  ? state.alertAreas.join(", ")
178
304
  : Object.keys(config.agent.areaLabels).join(", ") || "Israel";
179
- // Format alert time in Israel timezone
180
- const alertTimeIL = new Date(state.alertTs).toLocaleTimeString("he-IL", {
181
- hour: "2-digit",
182
- minute: "2-digit",
183
- timeZone: "Asia/Jerusalem",
184
- });
185
- const nowIL = new Date().toLocaleTimeString("he-IL", {
186
- hour: "2-digit",
187
- minute: "2-digit",
188
- timeZone: "Asia/Jerusalem",
189
- });
190
- const alertTypeLabel = state.alertType === "early_warning"
191
- ? "early warning (radar detection)"
192
- : state.alertType === "siren"
193
- ? "siren (impact imminent)"
194
- : state.alertType;
195
- const contextHeader = `Alert type: ${alertTypeLabel}\n` +
196
- `Alert time: ${alertTimeIL} (Israel)\n` +
197
- `Current time: ${nowIL} (Israel)\n` +
198
- `Alert region: ${regionHint}\n` +
199
- `UI language: ${config.language}\n`;
305
+ const alertTimeIL = toIsraelTime(state.alertTs);
306
+ const nowIL = toIsraelTime(Date.now());
307
+ const phaseInstructions = getPhaseInstructions(state.alertType);
308
+ const systemPrompt = SYSTEM_PROMPT_BASE + "\n\n" + phaseInstructions;
200
309
  const results = await Promise.all(posts.map(async (post) => {
310
+ const postTimeIL = toIsraelTime(post.ts);
311
+ const postAgeMin = Math.round((state.alertTs - post.ts) / 60_000);
312
+ const postAgeSuffix = postAgeMin > 0
313
+ ? `(${postAgeMin} min BEFORE alert)`
314
+ : postAgeMin < 0
315
+ ? `(${Math.abs(postAgeMin)} min AFTER alert)`
316
+ : "(same time as alert)";
317
+ const contextHeader = `Alert time: ${alertTimeIL} (Israel)\n` +
318
+ `Post time: ${postTimeIL} (Israel) ${postAgeSuffix}\n` +
319
+ `Current time: ${nowIL} (Israel)\n` +
320
+ `Alert region: ${regionHint}\n` +
321
+ `UI language: ${config.language}\n`;
201
322
  try {
202
323
  const response = await llm.invoke([
203
- { role: "system", content: SYSTEM_PROMPT },
324
+ { role: "system", content: systemPrompt },
204
325
  {
205
326
  role: "user",
206
327
  content: `${contextHeader}Channel: ${post.channel}\n\nMessage:\n${post.text.slice(0, 800)}`,
@@ -209,7 +330,6 @@ async function extractAndValidate(state) {
209
330
  const raw = typeof response.content === "string"
210
331
  ? response.content
211
332
  : JSON.stringify(response.content);
212
- // Strip markdown code fences (```json ... ```) that some models wrap around JSON
213
333
  const text = raw
214
334
  .replace(/^```(?:json)?\s*\n?/i, "")
215
335
  .replace(/\n?```\s*$/i, "");
@@ -218,6 +338,7 @@ async function extractAndValidate(state) {
218
338
  ...parsed,
219
339
  channel: post.channel,
220
340
  messageUrl: post.messageUrl,
341
+ time_relevance: parsed.time_relevance ?? 0.5,
221
342
  valid: true,
222
343
  };
223
344
  }
@@ -231,6 +352,7 @@ async function extractAndValidate(state) {
231
352
  region_relevance: 0,
232
353
  source_trust: 0,
233
354
  tone: "neutral",
355
+ time_relevance: 0,
234
356
  country_origin: null,
235
357
  rocket_count: null,
236
358
  is_cassette: null,
@@ -244,6 +366,8 @@ async function extractAndValidate(state) {
244
366
  open_area_impact_qual: null,
245
367
  open_area_impact_qual_num: null,
246
368
  hits_confirmed: null,
369
+ casualties: null,
370
+ injuries: null,
247
371
  eta_refined_minutes: null,
248
372
  confidence: 0,
249
373
  valid: false,
@@ -254,14 +378,23 @@ async function extractAndValidate(state) {
254
378
  logger.info("Agent: extracted", {
255
379
  alertId: state.alertId,
256
380
  count: results.length,
381
+ timeRelevance: results.map((r) => ({
382
+ ch: r.channel,
383
+ tr: r.time_relevance,
384
+ })),
257
385
  });
258
386
  return { extractions: results };
259
387
  }
260
388
  // ─────────────────────────────────────────────────────────
261
389
  // Tier 2: Post-filter (deterministic, 0 tokens)
390
+ // Now includes TIME RELEVANCE check.
262
391
  // ─────────────────────────────────────────────────────────
263
392
  function postFilter(state) {
264
393
  const validated = state.extractions.map((ext) => {
394
+ // V0: TIME RELEVANCE — the most important check
395
+ if (ext.time_relevance < 0.5) {
396
+ return { ...ext, valid: false, reject_reason: "stale_post" };
397
+ }
265
398
  // V1: region relevance
266
399
  if (ext.region_relevance < 0.5) {
267
400
  return { ...ext, valid: false, reject_reason: "region_irrelevant" };
@@ -270,7 +403,7 @@ function postFilter(state) {
270
403
  if (ext.source_trust < 0.4) {
271
404
  return { ...ext, valid: false, reject_reason: "untrusted_source" };
272
405
  }
273
- // V3: tone — reject alarmist (бот для успокоения, не для паники)
406
+ // V3: tone — reject alarmist
274
407
  if (ext.tone === "alarmist") {
275
408
  return { ...ext, valid: false, reject_reason: "alarmist_tone" };
276
409
  }
@@ -278,7 +411,11 @@ function postFilter(state) {
278
411
  const hasData = ext.country_origin !== null ||
279
412
  ext.rocket_count !== null ||
280
413
  ext.is_cassette !== null ||
414
+ ext.intercepted !== null ||
415
+ ext.intercepted_qual !== null ||
281
416
  ext.hits_confirmed !== null ||
417
+ ext.casualties !== null ||
418
+ ext.injuries !== null ||
282
419
  ext.eta_refined_minutes !== null;
283
420
  if (!hasData) {
284
421
  return { ...ext, valid: false, reject_reason: "no_data" };
@@ -307,20 +444,19 @@ function vote(state) {
307
444
  if (valid.length === 0) {
308
445
  return { votedResult: null };
309
446
  }
310
- // Assign 1-based citation indices to valid extractions
447
+ // Assign 1-based citation indices
311
448
  const indexed = valid.map((e, i) => ({ ...e, idx: i + 1 }));
312
- // All valid sources become cited sources
313
449
  const citedSources = indexed.map((e) => ({
314
450
  index: e.idx,
315
451
  channel: e.channel,
316
452
  messageUrl: e.messageUrl ?? null,
317
453
  }));
318
- // ETA: highest confidence source that has eta
454
+ // ETA: highest confidence source
319
455
  const withEta = indexed
320
456
  .filter((e) => e.eta_refined_minutes !== null)
321
457
  .sort((a, b) => b.confidence - a.confidence);
322
458
  const bestEta = withEta[0] ?? null;
323
- // Country: group unique values, each with their source indices
459
+ // Country: group unique values
324
460
  const countryMap = new Map();
325
461
  for (const e of indexed) {
326
462
  if (e.country_origin) {
@@ -335,35 +471,19 @@ function vote(state) {
335
471
  citations,
336
472
  }))
337
473
  : null;
338
- // Rocket count: range across sources (min … max)
474
+ // Rocket count: range
339
475
  const rocketSrcs = indexed.filter((e) => e.rocket_count !== null);
340
476
  const rocketVals = rocketSrcs.map((e) => e.rocket_count);
341
477
  const rocket_count_min = rocketVals.length > 0 ? Math.min(...rocketVals) : null;
342
478
  const rocket_count_max = rocketVals.length > 0 ? Math.max(...rocketVals) : null;
343
479
  const rocket_citations = rocketSrcs.map((e) => e.idx);
344
- // Helper: avg weighted confidence for a set of sources
480
+ // Helper: avg weighted confidence
345
481
  function fieldConf(srcs) {
346
482
  if (srcs.length === 0)
347
483
  return 0;
348
484
  return (srcs.reduce((s, e) => s + e.source_trust * e.confidence, 0) / srcs.length);
349
485
  }
350
- // Cassette: majority
351
- const cassSrcs = indexed.filter((e) => e.is_cassette !== null);
352
- const cassVals = cassSrcs.map((e) => e.is_cassette);
353
- const is_cassette = cassVals.length > 0
354
- ? cassVals.filter(Boolean).length > cassVals.length / 2
355
- : null;
356
- const is_cassette_confidence = fieldConf(cassSrcs);
357
- // Hits: median
358
- const hitsSrcs = indexed.filter((e) => e.hits_confirmed !== null && e.hits_confirmed > 0);
359
- const hitsVals = indexed
360
- .filter((e) => e.hits_confirmed !== null)
361
- .map((e) => e.hits_confirmed)
362
- .sort((a, b) => a - b);
363
- const hits_confirmed = hitsVals.length > 0 ? hitsVals[Math.floor(hitsVals.length / 2)] : null;
364
- const hits_citations = hitsSrcs.map((e) => e.idx);
365
- const hits_confidence = fieldConf(hitsSrcs);
366
- // Helper: mode (most frequent non-null value) for QualCount aggregation
486
+ // Helper: mode for QualCount
367
487
  function modeQual(srcs, key) {
368
488
  const vals = srcs
369
489
  .map((e) => e[key])
@@ -382,7 +502,23 @@ function vote(state) {
382
502
  .sort((a, b) => a - b);
383
503
  return vals.length > 0 ? vals[Math.floor(vals.length / 2)] : null;
384
504
  }
385
- // Intercepted: median across sources that reported exact number; mode for qual
505
+ // Cassette: majority
506
+ const cassSrcs = indexed.filter((e) => e.is_cassette !== null);
507
+ const cassVals = cassSrcs.map((e) => e.is_cassette);
508
+ const is_cassette = cassVals.length > 0
509
+ ? cassVals.filter(Boolean).length > cassVals.length / 2
510
+ : null;
511
+ const is_cassette_confidence = fieldConf(cassSrcs);
512
+ // Hits: median
513
+ const hitsSrcs = indexed.filter((e) => e.hits_confirmed !== null && e.hits_confirmed > 0);
514
+ const hitsVals = indexed
515
+ .filter((e) => e.hits_confirmed !== null)
516
+ .map((e) => e.hits_confirmed)
517
+ .sort((a, b) => a - b);
518
+ const hits_confirmed = hitsVals.length > 0 ? hitsVals[Math.floor(hitsVals.length / 2)] : null;
519
+ const hits_citations = hitsSrcs.map((e) => e.idx);
520
+ const hits_confidence = fieldConf(hitsSrcs);
521
+ // Intercepted: median / qual
386
522
  const interceptedSrcs = indexed.filter((e) => e.intercepted !== null);
387
523
  const interceptedQualSrcs = indexed.filter((e) => e.intercepted_qual !== null);
388
524
  const interceptedVals = interceptedSrcs
@@ -424,6 +560,26 @@ function vote(state) {
424
560
  ? medianQualNum(openQualSrcs, "open_area_impact_qual_num")
425
561
  : null;
426
562
  const open_area_confidence = fieldConf(openSrcs.length > 0 ? openSrcs : openQualSrcs);
563
+ // Casualties
564
+ const casualtySrcs = indexed.filter((e) => e.casualties !== null && e.casualties > 0);
565
+ const casualtyVals = casualtySrcs
566
+ .map((e) => e.casualties)
567
+ .sort((a, b) => a - b);
568
+ const casualties = casualtyVals.length > 0
569
+ ? casualtyVals[Math.floor(casualtyVals.length / 2)]
570
+ : null;
571
+ const casualties_citations = casualtySrcs.map((e) => e.idx);
572
+ const casualties_confidence = fieldConf(casualtySrcs);
573
+ // Injuries
574
+ const injurySrcs = indexed.filter((e) => e.injuries !== null && e.injuries > 0);
575
+ const injuryVals = injurySrcs
576
+ .map((e) => e.injuries)
577
+ .sort((a, b) => a - b);
578
+ const injuries = injuryVals.length > 0
579
+ ? injuryVals[Math.floor(injuryVals.length / 2)]
580
+ : null;
581
+ const injuries_citations = injurySrcs.map((e) => e.idx);
582
+ const injuries_confidence = fieldConf(injurySrcs);
427
583
  // Rocket confidence
428
584
  const rocket_confidence = fieldConf(rocketSrcs);
429
585
  // Overall weighted confidence
@@ -454,6 +610,12 @@ function vote(state) {
454
610
  hits_confirmed,
455
611
  hits_citations,
456
612
  hits_confidence,
613
+ casualties,
614
+ casualties_citations,
615
+ casualties_confidence,
616
+ injuries,
617
+ injuries_citations,
618
+ injuries_confidence,
457
619
  confidence: Math.round(weightedConf * 100) / 100,
458
620
  sources_count: indexed.length,
459
621
  citedSources,
@@ -462,7 +624,7 @@ function vote(state) {
462
624
  return { votedResult: voted };
463
625
  }
464
626
  // ─────────────────────────────────────────────────────────
465
- // Tier 4: Edit message — inline update (0 tokens)
627
+ // Tier 4: Edit message — inline citations, carry-forward
466
628
  // ─────────────────────────────────────────────────────────
467
629
  /** EN country name → Russian */
468
630
  const COUNTRY_RU = {
@@ -474,174 +636,305 @@ const COUNTRY_RU = {
474
636
  Syria: "Сирия",
475
637
  Hezbollah: "Хезболла",
476
638
  };
477
- /** Convert index to Unicode superscript string: 1 ¹, 13 → ¹³ */
478
- const SUPERSCRIPTS = ["⁰", "¹", "²", "³", "⁴", "⁵", "⁶", "⁷", "⁸", "⁹"];
479
- function sup(indices) {
480
- return indices
481
- .map((n) => String(n)
482
- .split("")
483
- .map((d) => SUPERSCRIPTS[Number(d)])
484
- .join(""))
485
- .join("");
639
+ /** Format inline citations: [[1]](url), [[2]](url) */
640
+ function inlineCites(indices, citedSources) {
641
+ const parts = [];
642
+ for (const idx of indices) {
643
+ const src = citedSources.find((s) => s.index === idx);
644
+ if (src?.messageUrl) {
645
+ parts.push(`<a href="${src.messageUrl}">[${idx}]</a>`);
646
+ }
647
+ }
648
+ return parts.length > 0 ? " " + parts.join(", ") : "";
649
+ }
650
+ /** Get InlineCite[] from citation indices */
651
+ function extractCites(indices, citedSources) {
652
+ const cites = [];
653
+ for (const idx of indices) {
654
+ const src = citedSources.find((s) => s.index === idx);
655
+ if (src?.messageUrl) {
656
+ cites.push({ url: src.messageUrl, channel: src.channel });
657
+ }
658
+ }
659
+ return cites;
660
+ }
661
+ /** Format inline citations from InlineCite[] (for carry-forward data) */
662
+ function inlineCitesFromData(cites) {
663
+ if (cites.length === 0)
664
+ return "";
665
+ return (" " + cites.map((c, i) => `<a href="${c.url}">[${i + 1}]</a>`).join(", "));
666
+ }
667
+ // Confidence thresholds
668
+ const SKIP = 0.6;
669
+ const UNCERTAIN = 0.75;
670
+ const CERTAIN = 0.95;
671
+ function qualDisplay(qual, qualNum, conf) {
672
+ if (qual === null)
673
+ return null;
674
+ if (qual === "none")
675
+ return conf >= CERTAIN ? "нет" : null;
676
+ const map = {
677
+ all: "все",
678
+ most: "большинство",
679
+ many: "много",
680
+ few: "несколько",
681
+ exists: "есть",
682
+ none: "нет",
683
+ more_than: qualNum != null ? `>${qualNum}` : ">1",
684
+ less_than: qualNum != null ? `<${qualNum}` : "<нескольких",
685
+ };
686
+ return map[qual];
687
+ }
688
+ function breakdownItem(label, num, qual, qualNum, conf) {
689
+ if (conf < SKIP)
690
+ return null;
691
+ const u = conf < UNCERTAIN ? " (?)" : "";
692
+ if (num !== null)
693
+ return `${label} — ${num}${u}`;
694
+ const qs = qualDisplay(qual, qualNum, conf);
695
+ if (qs === null)
696
+ return null;
697
+ return `${label} — ${qs}${u}`;
486
698
  }
487
699
  /**
488
- * Merge enrichment data INTO the existing key:value message.
489
- * Format:
490
- * Подлётное время: ~00:21¹ ← ETA as absolute clock time
491
- *
492
- * Откуда: Иран¹³ + Ливан² ← blank line before intel block
493
- * Ракет: ~5-7
494
- * Попадания (Дан центр): 2¹
495
- * Время оповещения: 03:47
496
- * —
497
- * Источники: [1](url) [2](url) [3](url)
700
+ * Build enrichment data from current vote + previous enrichment (carry-forward).
701
+ * Returns updated EnrichmentData for Redis persistence.
498
702
  */
499
- function buildEnrichedMessage(currentText, alertType, alertTs, r) {
500
- let text = currentText;
501
- // Refine ETA in-place (early/siren only)
703
+ function buildEnrichmentFromVote(r, prev, alertType, alertTs) {
704
+ const data = { ...prev };
705
+ // Origin update if voted has it
706
+ if (r.country_origins && r.country_origins.length > 0) {
707
+ data.origin = r.country_origins
708
+ .map((c) => COUNTRY_RU[c.name] ?? c.name)
709
+ .join(" + ");
710
+ data.originCites = r.country_origins.flatMap((c) => extractCites(c.citations, r.citedSources));
711
+ }
712
+ // ETA — only for early_warning/siren
502
713
  if (r.eta_refined_minutes !== null &&
503
- r.eta_citations.length > 0 &&
504
714
  (alertType === "early_warning" || alertType === "siren")) {
505
- text = refineEtaInPlace(text, r.eta_refined_minutes, alertTs, r.eta_citations);
506
- }
507
- // Insert "Откуда" before time line (with leading blank line for visual separation)
508
- if (r.country_origins && r.country_origins.length > 0) {
509
- const parts = r.country_origins.map((c) => {
510
- const ru = COUNTRY_RU[c.name] ?? c.name;
511
- return `${ru}${sup(c.citations)}`;
715
+ const absTime = new Date(alertTs + r.eta_refined_minutes * 60_000).toLocaleTimeString("he-IL", {
716
+ hour: "2-digit",
717
+ minute: "2-digit",
718
+ timeZone: "Asia/Jerusalem",
512
719
  });
513
- text = insertBeforeTimeLine(text, `\n<b>Откуда:</b> ${parts.join(" + ")}`);
514
- }
515
- // Confidence thresholds for uncertainty markers
516
- const SKIP = 0.6; // below this → skip field entirely
517
- const UNCERTAIN = 0.75; // below this (but ≥ SKIP) → add (?)
518
- const CERTAIN = 0.95; // "none" qual requires this level
519
- // Convert QualCount to Russian display string.
520
- // Returns null if the qual should be suppressed (e.g. "none" below CERTAIN).
521
- function qualDisplay(qual, qualNum, conf) {
522
- if (qual === null)
523
- return null;
524
- if (qual === "none")
525
- return conf >= CERTAIN ? "нет" : null;
526
- const map = {
527
- all: "все",
528
- most: "большинство",
529
- many: "много",
530
- few: "несколько",
531
- exists: "есть",
532
- none: "нет",
533
- more_than: qualNum != null ? `>​${qualNum}` : ">​1",
534
- less_than: qualNum != null ? `<​${qualNum}` : "<​нескольких",
535
- };
536
- return map[qual];
720
+ data.etaAbsolute = `~${absTime}`;
721
+ data.etaCites = extractCites(r.eta_citations, r.citedSources);
537
722
  }
538
- // Format one breakdown item: prefer exact number, fall back to qual.
539
- // Returns null if nothing to show (below threshold or not reported).
540
- function breakdownItem(label, num, qual, qualNum, conf) {
541
- if (conf < SKIP)
542
- return null;
543
- const u = conf < UNCERTAIN ? " (?)" : "";
544
- if (num !== null)
545
- return `${label} ${num}${u}`;
546
- const qs = qualDisplay(qual, qualNum, conf);
547
- if (qs === null)
548
- return null;
549
- return `${label} — ${qs}${u}`;
550
- }
551
- // Rocket count with breakdown and uncertainty markers
552
- if (r.rocket_count_min !== null &&
553
- r.rocket_count_max !== null &&
554
- r.rocket_confidence >= SKIP) {
555
- const rocketUncertain = r.rocket_confidence < UNCERTAIN ? " (?)" : "";
556
- const countStr = r.rocket_count_min === r.rocket_count_max
557
- ? `${r.rocket_count_min}`
558
- : `~${r.rocket_count_min}–${r.rocket_count_max}`;
559
- const bParts = [];
560
- const bi = breakdownItem("перехвачено", r.intercepted, r.intercepted_qual, r.intercepted_qual_num, r.intercepted_confidence);
561
- if (bi)
562
- bParts.push(bi);
563
- const bs = breakdownItem("упали в море", r.sea_impact, r.sea_impact_qual, r.sea_impact_qual_num, r.sea_confidence);
564
- if (bs)
565
- bParts.push(bs);
566
- const bo = breakdownItem("открытая местность", r.open_area_impact, r.open_area_impact_qual, r.open_area_impact_qual_num, r.open_area_confidence);
567
- if (bo)
568
- bParts.push(bo);
569
- const breakdown = bParts.length > 0 ? `, из них: ${bParts.join(", ")}` : "";
570
- const cassetteU = r.is_cassette_confidence < UNCERTAIN ? " (?)" : "";
571
- const cassette = r.is_cassette && r.is_cassette_confidence >= SKIP
572
- ? `, есть кассетные${cassetteU}`
573
- : "";
574
- text = insertBeforeTimeLine(text, `<b>Ракет:</b> ${countStr}${rocketUncertain}${breakdown}${cassette}`);
575
- }
576
- // Hits: есть прямое попадание/-ия в <area>: N — only if confidence ≥ SKIP
723
+ // Rocket count
724
+ if (r.rocket_count_min !== null && r.rocket_count_max !== null) {
725
+ const u = r.rocket_confidence < UNCERTAIN ? " (?)" : "";
726
+ data.rocketCount =
727
+ r.rocket_count_min === r.rocket_count_max
728
+ ? `${r.rocket_count_min}${u}`
729
+ : `~${r.rocket_count_min}–${r.rocket_count_max}${u}`;
730
+ data.rocketCites = extractCites(r.rocket_citations, r.citedSources);
731
+ }
732
+ // Cassette
733
+ if (r.is_cassette !== null && r.is_cassette_confidence >= SKIP) {
734
+ data.isCassette = r.is_cassette;
735
+ }
736
+ // Intercepted
737
+ if (r.intercepted !== null && r.intercepted_confidence >= SKIP) {
738
+ const u = r.intercepted_confidence < UNCERTAIN ? " (?)" : "";
739
+ data.intercepted = `${r.intercepted}${u}`;
740
+ data.interceptedCites = extractCites(r.citedSources
741
+ .filter((s) => {
742
+ const ext = r.citedSources.find((cs) => cs.index === s.index);
743
+ return ext !== undefined;
744
+ })
745
+ .map((s) => s.index), r.citedSources);
746
+ }
747
+ else if (r.intercepted_qual !== null && r.intercepted_confidence >= SKIP) {
748
+ const qs = qualDisplay(r.intercepted_qual, r.intercepted_qual_num, r.intercepted_confidence);
749
+ if (qs)
750
+ data.intercepted = qs;
751
+ }
752
+ // Hits
577
753
  if (r.hits_confirmed !== null &&
578
754
  r.hits_confirmed > 0 &&
579
755
  r.hits_confidence >= SKIP) {
756
+ const u = r.hits_confidence < UNCERTAIN ? " (?)" : "";
757
+ data.hitsConfirmed = `${r.hits_confirmed}${u}`;
758
+ data.hitsCites = extractCites(r.hits_citations, r.citedSources);
759
+ }
760
+ // Casualties
761
+ if (r.casualties !== null &&
762
+ r.casualties > 0 &&
763
+ r.casualties_confidence >= SKIP) {
764
+ const u = r.casualties_confidence < UNCERTAIN ? " (?)" : "";
765
+ data.casualties = `${r.casualties}${u}`;
766
+ data.casualtiesCites = extractCites(r.casualties_citations, r.citedSources);
767
+ }
768
+ // Injuries
769
+ if (r.injuries !== null && r.injuries > 0 && r.injuries_confidence >= SKIP) {
770
+ const u = r.injuries_confidence < UNCERTAIN ? " (?)" : "";
771
+ data.injuries = `${r.injuries}${u}`;
772
+ data.injuriesCites = extractCites(r.injuries_citations, r.citedSources);
773
+ }
774
+ // Early warning time — record when first early_warning was received
775
+ if (alertType === "early_warning" && !data.earlyWarningTime) {
776
+ data.earlyWarningTime = toIsraelTime(alertTs);
777
+ }
778
+ return data;
779
+ }
780
+ /**
781
+ * Build the enriched message text from current message + enrichment data.
782
+ * Uses inline [[1]](url) citations. No superscripts. No footer sources.
783
+ */
784
+ function buildEnrichedMessage(currentText, alertType, alertTs, enrichment) {
785
+ let text = currentText;
786
+ // ── Refine ETA in-place ──
787
+ if (enrichment.etaAbsolute &&
788
+ (alertType === "early_warning" || alertType === "siren")) {
789
+ const etaCiteStr = inlineCitesFromData(enrichment.etaCites);
790
+ const refined = `${enrichment.etaAbsolute}${etaCiteStr}`;
791
+ const etaPatterns = [
792
+ /~\d+[–-]\d+\s*мин/, // ~5–12 мин
793
+ /~\d+[–-]\d+\s*min/, // ~5–12 min
794
+ /~\d+[–-]\d+\s*דקות/, // ~5–12 דקות
795
+ /~\d+[–-]\d+\s*دقيقة/, // ~5–12 دقيقة
796
+ /1\.5\s*мин/, // 1.5 мин (siren)
797
+ /1\.5\s*min/,
798
+ /1\.5\s*דקות/,
799
+ /1\.5\s*دقيقة/,
800
+ ];
801
+ for (const pattern of etaPatterns) {
802
+ if (pattern.test(text)) {
803
+ text = text.replace(pattern, refined);
804
+ break;
805
+ }
806
+ }
807
+ }
808
+ // ── Siren: show "Раннее предупреждение: было в HH:MM" ──
809
+ if (alertType === "siren" && enrichment.earlyWarningTime) {
810
+ text = insertBeforeTimeLine(text, `<b>Раннее предупреждение:</b> было в ${enrichment.earlyWarningTime}`);
811
+ }
812
+ // ── Origin ──
813
+ if (enrichment.origin) {
814
+ const citeStr = inlineCitesFromData(enrichment.originCites);
815
+ text = insertBeforeTimeLine(text, `\n<b>Откуда:</b> ${enrichment.origin}${citeStr}`);
816
+ }
817
+ // ── Rocket count + breakdown ──
818
+ if (enrichment.rocketCount) {
819
+ const citeStr = inlineCitesFromData(enrichment.rocketCites);
820
+ const cassette = enrichment.isCassette ? ", есть кассетные" : "";
821
+ let breakdown = "";
822
+ const bParts = [];
823
+ if (enrichment.intercepted) {
824
+ bParts.push(`перехвачено — ${enrichment.intercepted}`);
825
+ }
826
+ if (enrichment.seaImpact) {
827
+ bParts.push(`упали в море — ${enrichment.seaImpact}`);
828
+ }
829
+ if (enrichment.openAreaImpact) {
830
+ bParts.push(`открытая местность — ${enrichment.openAreaImpact}`);
831
+ }
832
+ if (bParts.length > 0)
833
+ breakdown = `, из них: ${bParts.join(", ")}`;
834
+ text = insertBeforeTimeLine(text, `<b>Ракет:</b> ${enrichment.rocketCount}${breakdown}${cassette}${citeStr}`);
835
+ }
836
+ else if (enrichment.intercepted && alertType !== "early_warning") {
837
+ // No rocket count but have interception data
838
+ const citeStr = inlineCitesFromData(enrichment.interceptedCites);
839
+ text = insertBeforeTimeLine(text, `<b>Перехвачено:</b> ${enrichment.intercepted}${citeStr}`);
840
+ }
841
+ // ── Hits ──
842
+ if (enrichment.hitsConfirmed && alertType !== "early_warning") {
580
843
  const areaLabel = Object.values(config.agent.areaLabels)[0] ?? "район";
581
- const hitWord = r.hits_confirmed === 1 ? "попадание" : "попадания";
582
- const hitsCite = r.hits_citations.length > 0 ? sup(r.hits_citations) : "";
583
- const hitsU = r.hits_confidence < UNCERTAIN ? " (?)" : "";
584
- text = insertBeforeTimeLine(text, `есть прямое ${hitWord} в ${areaLabel}: ${r.hits_confirmed}${hitsCite}${hitsU}`);
585
- }
586
- // Sources footer: [1](url) [2](url) ...
587
- const sourcesWithUrl = r.citedSources.filter((s) => s.messageUrl);
588
- if (sourcesWithUrl.length > 0) {
589
- const links = sourcesWithUrl
590
- .map((s) => `<a href="${s.messageUrl}">[${s.index}]</a>`)
591
- .join(" ");
592
- text += `\n—\n<i>Источники: ${links}</i>`;
844
+ const citeStr = inlineCitesFromData(enrichment.hitsCites);
845
+ text = insertBeforeTimeLine(text, `<b>Попадания (${areaLabel}):</b> ${enrichment.hitsConfirmed}${citeStr}`);
846
+ }
847
+ // ── Casualties / Injuries (resolved only) ──
848
+ if (enrichment.casualties && alertType === "resolved") {
849
+ const citeStr = inlineCitesFromData(enrichment.casualtiesCites);
850
+ text = insertBeforeTimeLine(text, `<b>Погибшие:</b> ${enrichment.casualties}${citeStr}`);
851
+ }
852
+ if (enrichment.injuries && alertType === "resolved") {
853
+ const citeStr = inlineCitesFromData(enrichment.injuriesCites);
854
+ text = insertBeforeTimeLine(text, `<b>Пострадавшие:</b> ${enrichment.injuries}${citeStr}`);
593
855
  }
594
856
  return text;
595
857
  }
596
858
  /**
597
859
  * Insert a line before the time line (last "Время" / "Time" / "שעת" line).
598
- * This keeps new data visually grouped with existing fields.
599
860
  */
600
861
  function insertBeforeTimeLine(text, line) {
601
- // Match "Время оповещения" / "Alert time" / "שעת ההתרעה" / "وقت الإنذار"
602
862
  const timePattern = /(<b>(?:Время оповещения|Alert time|שעת ההתרעה|وقت الإنذار):<\/b>)/;
603
863
  const match = text.match(timePattern);
604
864
  if (match?.index !== undefined) {
605
865
  return text.slice(0, match.index) + line + "\n" + text.slice(match.index);
606
866
  }
607
- // Fallback: append before last line
608
867
  const lines = text.split("\n");
609
868
  lines.splice(Math.max(lines.length - 1, 0), 0, line);
610
869
  return lines.join("\n");
611
870
  }
612
- /**
613
- * Replace the default ETA range with absolute impact time + superscript citation.
614
- * "~5–12 мин" → "~00:21¹"
615
- */
616
- function refineEtaInPlace(text, minutes, alertTs, citations) {
617
- // Compute absolute impact time in Israel timezone
618
- const absTime = new Date(alertTs + minutes * 60_000).toLocaleTimeString("he-IL", { hour: "2-digit", minute: "2-digit", timeZone: "Asia/Jerusalem" });
619
- const refined = `~${absTime}${sup(citations)}`;
620
- const etaPatterns = [
621
- /~\d+[–-]\d+\s*мин/, // ~5–12 мин
622
- /~\d+[–-]\d+\s*min/, // ~5–12 min
623
- /~\d+[–-]\d+\s*דקות/, // ~5–12 דקות
624
- /~\d+[–-]\d+\s*دقائق/, // ~5–12 دقائق
625
- /1\.5\s*мин/, // 1.5 мин (siren)
626
- /1\.5\s*min/, // 1.5 min
627
- /1\.5\s*דקות/, // 1.5 דקות
628
- /1\.5\s*دقائق/, // 1.5 دقائق
629
- ];
630
- for (const pattern of etaPatterns) {
631
- if (pattern.test(text)) {
632
- return text.replace(pattern, refined);
633
- }
634
- }
635
- return text;
636
- }
637
871
  async function editMessage(state) {
638
872
  const { votedResult } = state;
639
873
  if (!config.botToken)
640
874
  return {};
641
875
  const tgBot = new Bot(config.botToken);
642
- // No valid sources found silently skip (don't touch the message)
876
+ // No valid sources — carry forward previous data only
877
+ const prevEnrichment = state.previousEnrichment ?? emptyEnrichmentData();
643
878
  if (!votedResult) {
644
- logger.info("Agent: no voted resultskipping edit", {
879
+ // No new data from channels still try to build message from carry-forward
880
+ if (prevEnrichment.origin || prevEnrichment.intercepted) {
881
+ // Have carry-forward data, build message
882
+ const newText = buildEnrichedMessage(state.currentText, state.alertType, state.alertTs, prevEnrichment);
883
+ const hash = textHash(newText);
884
+ if (hash === prevEnrichment.lastEditHash) {
885
+ logger.info("Agent: no change in message (dedup) — skipping edit", {
886
+ alertId: state.alertId,
887
+ });
888
+ return {};
889
+ }
890
+ try {
891
+ if (state.isCaption) {
892
+ await tgBot.api.editMessageCaption(state.chatId, state.messageId, {
893
+ caption: newText,
894
+ parse_mode: "HTML",
895
+ });
896
+ }
897
+ else {
898
+ await tgBot.api.editMessageText(state.chatId, state.messageId, newText, { parse_mode: "HTML" });
899
+ }
900
+ prevEnrichment.lastEditHash = hash;
901
+ await saveEnrichmentData(prevEnrichment);
902
+ logger.info("Agent: message enriched (carry-forward only)", {
903
+ alertId: state.alertId,
904
+ messageId: state.messageId,
905
+ });
906
+ }
907
+ catch (err) {
908
+ const errStr = String(err);
909
+ if (errStr.includes("message is not modified")) {
910
+ prevEnrichment.lastEditHash = hash;
911
+ await saveEnrichmentData(prevEnrichment);
912
+ logger.info("Agent: message already up-to-date (dedup)", {
913
+ alertId: state.alertId,
914
+ });
915
+ }
916
+ else {
917
+ logger.error("Agent: failed to edit message", {
918
+ alertId: state.alertId,
919
+ error: errStr,
920
+ });
921
+ }
922
+ }
923
+ }
924
+ else {
925
+ logger.info("Agent: no voted result — skipping edit", {
926
+ alertId: state.alertId,
927
+ });
928
+ }
929
+ return {};
930
+ }
931
+ // Build enrichment data: merge vote + previous
932
+ const enrichment = buildEnrichmentFromVote(votedResult, prevEnrichment, state.alertType, state.alertTs);
933
+ const newText = buildEnrichedMessage(state.currentText, state.alertType, state.alertTs, enrichment);
934
+ // Dedup: skip if text hasn't changed
935
+ const hash = textHash(newText);
936
+ if (hash === enrichment.lastEditHash) {
937
+ logger.info("Agent: no change in message (dedup) — skipping edit", {
645
938
  alertId: state.alertId,
646
939
  });
647
940
  return {};
@@ -654,7 +947,6 @@ async function editMessage(state) {
654
947
  threshold: config.agent.confidenceThreshold,
655
948
  });
656
949
  }
657
- const newText = buildEnrichedMessage(state.currentText, state.alertType, state.alertTs, votedResult);
658
950
  try {
659
951
  if (state.isCaption) {
660
952
  await tgBot.api.editMessageCaption(state.chatId, state.messageId, {
@@ -667,18 +959,31 @@ async function editMessage(state) {
667
959
  parse_mode: "HTML",
668
960
  });
669
961
  }
962
+ enrichment.lastEditHash = hash;
963
+ await saveEnrichmentData(enrichment);
670
964
  logger.info("Agent: message enriched", {
671
965
  alertId: state.alertId,
672
966
  messageId: state.messageId,
673
967
  confidence: votedResult.confidence,
674
968
  sources: votedResult.sources_count,
969
+ phase: state.alertType,
675
970
  });
676
971
  }
677
972
  catch (err) {
678
- logger.error("Agent: failed to edit message", {
679
- alertId: state.alertId,
680
- error: String(err),
681
- });
973
+ const errStr = String(err);
974
+ if (errStr.includes("message is not modified")) {
975
+ enrichment.lastEditHash = hash;
976
+ await saveEnrichmentData(enrichment);
977
+ logger.info("Agent: message already up-to-date (dedup)", {
978
+ alertId: state.alertId,
979
+ });
980
+ }
981
+ else {
982
+ logger.error("Agent: failed to edit message", {
983
+ alertId: state.alertId,
984
+ error: errStr,
985
+ });
986
+ }
682
987
  }
683
988
  return {};
684
989
  }
@@ -686,7 +991,7 @@ async function editMessage(state) {
686
991
  // Clarify Node — MCP tool calling via ReAct (conditional)
687
992
  // ─────────────────────────────────────────────────────────
688
993
  async function clarifyNode(state) {
689
- const { votedResult, extractions, alertId, alertAreas, alertType, messageId, currentText, } = state;
994
+ const { votedResult, extractions, alertId, alertAreas, alertType, alertTs, messageId, currentText, } = state;
690
995
  if (!votedResult) {
691
996
  logger.info("Agent: clarify skipped — no voted result", { alertId });
692
997
  return { clarifyAttempted: true };
@@ -695,18 +1000,19 @@ async function clarifyNode(state) {
695
1000
  alertId,
696
1001
  confidence: votedResult.confidence,
697
1002
  threshold: config.agent.confidenceThreshold,
1003
+ phase: alertType,
698
1004
  });
699
1005
  try {
700
1006
  const result = await runClarify({
701
1007
  alertId,
702
1008
  alertAreas,
703
1009
  alertType,
1010
+ alertTs,
704
1011
  messageId,
705
1012
  currentText,
706
1013
  extractions,
707
1014
  votedResult,
708
1015
  });
709
- // Merge new extractions with existing valid ones
710
1016
  const mergedExtractions = [...extractions, ...result.newExtractions];
711
1017
  logger.info("Agent: clarify completed", {
712
1018
  alertId,
@@ -717,7 +1023,6 @@ async function clarifyNode(state) {
717
1023
  });
718
1024
  return {
719
1025
  extractions: mergedExtractions,
720
- // Reset votedResult so vote() re-runs with merged data
721
1026
  votedResult: null,
722
1027
  clarifyAttempted: true,
723
1028
  };
@@ -732,19 +1037,13 @@ async function clarifyNode(state) {
732
1037
  }
733
1038
  // ── Conditional routing after vote ─────────────────────
734
1039
  function shouldClarify(state) {
735
- // Only clarify once per pipeline run (prevents infinite loop)
736
- if (state.clarifyAttempted) {
1040
+ if (state.clarifyAttempted)
737
1041
  return "editMessage";
738
- }
739
- // MCP tools must be enabled
740
- if (!config.agent.mcpTools) {
1042
+ if (!config.agent.mcpTools)
741
1043
  return "editMessage";
742
- }
743
- // No voted result → nothing to clarify
744
- if (!state.votedResult) {
1044
+ if (!state.votedResult)
745
1045
  return "editMessage";
746
- }
747
- // Confidence below threshold → clarify
1046
+ // Low confidence → clarify (may use Oref tool for time validation)
748
1047
  if (state.votedResult.confidence < config.agent.confidenceThreshold) {
749
1048
  logger.info("Agent: routing to clarify (low confidence)", {
750
1049
  confidence: state.votedResult.confidence,
@@ -752,10 +1051,27 @@ function shouldClarify(state) {
752
1051
  });
753
1052
  return "clarify";
754
1053
  }
1054
+ // Suspicious time: if the only country is unexpected for the region, verify
1055
+ // This catches cases like "Lebanon" appearing on a Tel Aviv alert
1056
+ // when the real attack is from Iran/Yemen
1057
+ const origins = state.votedResult.country_origins;
1058
+ if (origins &&
1059
+ origins.length === 1 &&
1060
+ state.votedResult.sources_count === 1) {
1061
+ const singleOrigin = origins[0].name;
1062
+ // Lebanon attacks typically don't reach central Israel
1063
+ if (singleOrigin === "Lebanon" &&
1064
+ state.alertAreas.some((a) => a.includes("תל אביב") ||
1065
+ a.includes("גוש דן") ||
1066
+ a.includes("שרון") ||
1067
+ a.includes("מרכז"))) {
1068
+ logger.info("Agent: routing to clarify (suspicious single source: Lebanon for central Israel)", { origin: singleOrigin });
1069
+ return "clarify";
1070
+ }
1071
+ }
755
1072
  return "editMessage";
756
1073
  }
757
1074
  // ── Build graph ────────────────────────────────────────
758
- /** MemorySaver checkpointer — session-level state persistence */
759
1075
  const checkpointer = new MemorySaver();
760
1076
  function buildGraph() {
761
1077
  const graph = new StateGraph(AgentState)
@@ -764,13 +1080,12 @@ function buildGraph() {
764
1080
  .addNode("postFilter", postFilter)
765
1081
  .addNode("vote", vote)
766
1082
  .addNode("clarify", clarifyNode)
767
- .addNode("revote", vote) // Re-run vote after clarify with merged data
1083
+ .addNode("revote", vote)
768
1084
  .addNode("editMessage", editMessage)
769
1085
  .addEdge("__start__", "collectAndPreFilter")
770
1086
  .addEdge("collectAndPreFilter", "extractAndValidate")
771
1087
  .addEdge("extractAndValidate", "postFilter")
772
1088
  .addEdge("postFilter", "vote")
773
- // Conditional edge: vote → clarify (low conf) or editMessage (high conf)
774
1089
  .addConditionalEdges("vote", shouldClarify, {
775
1090
  clarify: "clarify",
776
1091
  editMessage: "editMessage",
@@ -796,8 +1111,32 @@ export async function runEnrichment(input) {
796
1111
  extractions: [],
797
1112
  votedResult: null,
798
1113
  clarifyAttempted: false,
799
- },
800
- // Thread ID for MemorySaver — enables session-level state persistence
801
- { configurable: { thread_id: input.alertId } });
1114
+ previousEnrichment: emptyEnrichmentData(),
1115
+ sessionStartTs: 0,
1116
+ phaseStartTs: 0,
1117
+ }, { configurable: { thread_id: input.alertId } });
802
1118
  }
1119
+ // ── Exported for testing ───────────────────────────────
1120
+ export const _test = {
1121
+ getLLM,
1122
+ buildRegionKeywords,
1123
+ LAUNCH_KEYWORDS,
1124
+ TIME_WINDOW_MS,
1125
+ toIsraelTime,
1126
+ textHash,
1127
+ postFilter,
1128
+ vote,
1129
+ buildEnrichmentFromVote,
1130
+ buildEnrichedMessage,
1131
+ insertBeforeTimeLine,
1132
+ inlineCites,
1133
+ inlineCitesFromData,
1134
+ extractCites,
1135
+ COUNTRY_RU,
1136
+ SYSTEM_PROMPT_BASE,
1137
+ getPhaseInstructions,
1138
+ SKIP,
1139
+ UNCERTAIN,
1140
+ CERTAIN,
1141
+ };
803
1142
  //# sourceMappingURL=graph.js.map