easyoref 1.11.7 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/dist/agent/auth.d.ts +0 -11
- package/dist/agent/auth.d.ts.map +0 -1
- package/dist/agent/auth.js +0 -54
- package/dist/agent/auth.js.map +0 -1
- package/dist/agent/dry-run.d.ts +0 -12
- package/dist/agent/dry-run.d.ts.map +0 -1
- package/dist/agent/dry-run.js +0 -229
- package/dist/agent/dry-run.js.map +0 -1
- package/dist/agent/gramjs-monitor.d.ts +0 -13
- package/dist/agent/gramjs-monitor.d.ts.map +0 -1
- package/dist/agent/gramjs-monitor.js +0 -193
- package/dist/agent/gramjs-monitor.js.map +0 -1
- package/dist/agent/graph.d.ts +0 -44
- package/dist/agent/graph.d.ts.map +0 -1
- package/dist/agent/graph.js +0 -711
- package/dist/agent/graph.js.map +0 -1
- package/dist/agent/queue.d.ts +0 -15
- package/dist/agent/queue.d.ts.map +0 -1
- package/dist/agent/queue.js +0 -40
- package/dist/agent/queue.js.map +0 -1
- package/dist/agent/redis.d.ts +0 -8
- package/dist/agent/redis.d.ts.map +0 -1
- package/dist/agent/redis.js +0 -33
- package/dist/agent/redis.js.map +0 -1
- package/dist/agent/store.d.ts +0 -39
- package/dist/agent/store.d.ts.map +0 -1
- package/dist/agent/store.js +0 -52
- package/dist/agent/store.js.map +0 -1
- package/dist/agent/types.d.ts +0 -102
- package/dist/agent/types.d.ts.map +0 -1
- package/dist/agent/types.js +0 -3
- package/dist/agent/types.js.map +0 -1
- package/dist/agent/worker.d.ts +0 -10
- package/dist/agent/worker.d.ts.map +0 -1
- package/dist/agent/worker.js +0 -69
- package/dist/agent/worker.js.map +0 -1
- package/dist/bin.d.ts +0 -17
- package/dist/bin.d.ts.map +0 -1
- package/dist/bin.js +0 -82
- package/dist/bin.js.map +0 -1
- package/dist/bot.d.ts +0 -16
- package/dist/bot.d.ts.map +0 -1
- package/dist/bot.js +0 -548
- package/dist/bot.js.map +0 -1
- package/dist/config.d.ts +0 -114
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js +0 -139
- package/dist/config.js.map +0 -1
- package/dist/gif-state.d.ts +0 -17
- package/dist/gif-state.d.ts.map +0 -1
- package/dist/gif-state.js +0 -67
- package/dist/gif-state.js.map +0 -1
- package/dist/i18n.d.ts +0 -49
- package/dist/i18n.d.ts.map +0 -1
- package/dist/i18n.js +0 -229
- package/dist/i18n.js.map +0 -1
- package/dist/init.d.ts +0 -7
- package/dist/init.d.ts.map +0 -1
- package/dist/init.js +0 -163
- package/dist/init.js.map +0 -1
- package/dist/logger.d.ts +0 -14
- package/dist/logger.d.ts.map +0 -1
- package/dist/logger.js +0 -45
- package/dist/logger.js.map +0 -1
- package/dist/service.d.ts +0 -19
- package/dist/service.d.ts.map +0 -1
- package/dist/service.js +0 -165
- package/dist/service.js.map +0 -1
package/dist/agent/graph.js
DELETED
|
@@ -1,711 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* LangGraph.js enrichment pipeline — tiered validation.
|
|
3
|
-
*
|
|
4
|
-
* Design: minimize tokens, maximize confidence.
|
|
5
|
-
*
|
|
6
|
-
* ┌──────────────────────────────────────────────────────────────┐
|
|
7
|
-
* │ Tier 0: preFilter (deterministic, 0 tokens) │
|
|
8
|
-
* │ → keyword + region check on raw post text │
|
|
9
|
-
* │ │
|
|
10
|
-
* │ Tier 1: extractAndValidate (1 LLM call per post) │
|
|
11
|
-
* │ → combined extraction + 3 validators in single JSON: │
|
|
12
|
-
* │ V1: region_relevance (is post about our area?) │
|
|
13
|
-
* │ V2: source_trust (factual vs rumor/panic?) │
|
|
14
|
-
* │ V3: tone (calm/neutral/alarmist?) │
|
|
15
|
-
* │ → structured output, all validation in one prompt │
|
|
16
|
-
* │ │
|
|
17
|
-
* │ Tier 2: postFilter (deterministic, 0 tokens) │
|
|
18
|
-
* │ → reject: region_relevance < 0.5 │
|
|
19
|
-
* │ → reject: source_trust < 0.4 │
|
|
20
|
-
* │ → reject: tone === "alarmist" │
|
|
21
|
-
* │ → reject: all data fields null │
|
|
22
|
-
* │ │
|
|
23
|
-
* │ Tier 3: vote (deterministic, 0 tokens) │
|
|
24
|
-
* │ → majority consensus across validated sources │
|
|
25
|
-
* │ │
|
|
26
|
-
* │ Tier 4: editMessage (deterministic, 0 tokens) │
|
|
27
|
-
* │ → inline update of existing key:value pairs │
|
|
28
|
-
* └──────────────────────────────────────────────────────────────┘
|
|
29
|
-
*
|
|
30
|
-
* Total LLM cost: 1 call × N posts (max 8). GPT-4o-mini ≈ $0.0001/post.
|
|
31
|
-
*/
|
|
32
|
-
import { Annotation, StateGraph } from "@langchain/langgraph";
|
|
33
|
-
import { ChatOpenAI } from "@langchain/openai";
|
|
34
|
-
import { Bot } from "grammy";
|
|
35
|
-
import { config } from "../config.js";
|
|
36
|
-
import * as logger from "../logger.js";
|
|
37
|
-
import { getChannelPosts } from "./store.js";
|
|
38
|
-
// ── State ──────────────────────────────────────────────
|
|
39
|
-
const AgentState = Annotation.Root({
|
|
40
|
-
alertId: Annotation({ reducer: (_, b) => b }),
|
|
41
|
-
alertTs: Annotation({ reducer: (_, b) => b }),
|
|
42
|
-
alertType: Annotation({ reducer: (_, b) => b }),
|
|
43
|
-
alertAreas: Annotation({ reducer: (_, b) => b }),
|
|
44
|
-
chatId: Annotation({ reducer: (_, b) => b }),
|
|
45
|
-
messageId: Annotation({ reducer: (_, b) => b }),
|
|
46
|
-
isCaption: Annotation({ reducer: (_, b) => b }),
|
|
47
|
-
currentText: Annotation({ reducer: (_, b) => b }),
|
|
48
|
-
channelPosts: Annotation({ reducer: (_, b) => b }),
|
|
49
|
-
filteredPosts: Annotation({ reducer: (_, b) => b }),
|
|
50
|
-
extractions: Annotation({ reducer: (_, b) => b }),
|
|
51
|
-
votedResult: Annotation({ reducer: (_, b) => b }),
|
|
52
|
-
});
|
|
53
|
-
// ── LLM ───────────────────────────────────────────────
|
|
54
|
-
function getLLM() {
|
|
55
|
-
return new ChatOpenAI({
|
|
56
|
-
model: config.agent.model,
|
|
57
|
-
configuration: {
|
|
58
|
-
baseURL: "https://openrouter.ai/api/v1",
|
|
59
|
-
defaultHeaders: {
|
|
60
|
-
"HTTP-Referer": "https://github.com/mikhailkogan17/EasyOref",
|
|
61
|
-
"X-Title": "EasyOref",
|
|
62
|
-
},
|
|
63
|
-
},
|
|
64
|
-
apiKey: config.agent.apiKey,
|
|
65
|
-
temperature: 0,
|
|
66
|
-
maxTokens: 400,
|
|
67
|
-
});
|
|
68
|
-
}
|
|
69
|
-
// ── Region keywords (Hebrew + transliterations) ────────
|
|
70
|
-
/**
|
|
71
|
-
* Build keyword list from config areas + area_labels.
|
|
72
|
-
* Returns lowercased keywords for matching.
|
|
73
|
-
*/
|
|
74
|
-
function buildRegionKeywords() {
|
|
75
|
-
const keywords = [];
|
|
76
|
-
for (const area of config.areas) {
|
|
77
|
-
keywords.push(area.toLowerCase());
|
|
78
|
-
// First word often enough (e.g. "תל אביב" → "תל")
|
|
79
|
-
const first = area.split(" ")[0];
|
|
80
|
-
if (first && first.length >= 2)
|
|
81
|
-
keywords.push(first.toLowerCase());
|
|
82
|
-
}
|
|
83
|
-
for (const [he, label] of Object.entries(config.agent.areaLabels)) {
|
|
84
|
-
keywords.push(he.toLowerCase());
|
|
85
|
-
// Add transliterated label words (e.g. "Дан центр" → "дан", "центр")
|
|
86
|
-
for (const word of label.split(/\s+/)) {
|
|
87
|
-
if (word.length >= 3)
|
|
88
|
-
keywords.push(word.toLowerCase());
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
// Common attack-related keywords (always relevant)
|
|
92
|
-
keywords.push("ישראל", "israel", "израиль", "ракет", "rocket", "missile", "iron dome", "כיפת ברזל", "жд", "перехват", "intercept", "siren", "азака", "צבע אדום", "red alert");
|
|
93
|
-
return [...new Set(keywords)];
|
|
94
|
-
}
|
|
95
|
-
// ─────────────────────────────────────────────────────────
|
|
96
|
-
// Tier 0: Pre-filter (deterministic, 0 tokens)
|
|
97
|
-
// ─────────────────────────────────────────────────────────
|
|
98
|
-
async function collectAndPreFilter(state) {
|
|
99
|
-
const posts = await getChannelPosts(state.alertId);
|
|
100
|
-
const windowMs = config.agent.windowMinutes * 60 * 1000;
|
|
101
|
-
const inWindow = posts.filter((p) => Math.abs(p.ts - state.alertTs) <= windowMs);
|
|
102
|
-
if (inWindow.length === 0) {
|
|
103
|
-
logger.info("Agent: no posts in window", { alertId: state.alertId });
|
|
104
|
-
return { channelPosts: inWindow, filteredPosts: [] };
|
|
105
|
-
}
|
|
106
|
-
const keywords = buildRegionKeywords();
|
|
107
|
-
const filtered = inWindow.filter((post) => {
|
|
108
|
-
const text = post.text.toLowerCase();
|
|
109
|
-
// Must contain at least 1 region/attack keyword
|
|
110
|
-
return keywords.some((kw) => text.includes(kw));
|
|
111
|
-
});
|
|
112
|
-
logger.info("Agent: pre-filter", {
|
|
113
|
-
alertId: state.alertId,
|
|
114
|
-
total: posts.length,
|
|
115
|
-
in_window: inWindow.length,
|
|
116
|
-
after_keyword_filter: filtered.length,
|
|
117
|
-
});
|
|
118
|
-
return { channelPosts: inWindow, filteredPosts: filtered };
|
|
119
|
-
}
|
|
120
|
-
// ─────────────────────────────────────────────────────────
|
|
121
|
-
// Tier 1: Extract + validate (1 LLM call per post)
|
|
122
|
-
// ─────────────────────────────────────────────────────────
|
|
123
|
-
const QUAL_VALUES = '"all"|"most"|"many"|"few"|"exists"|"none"|"more_than"|"less_than"';
|
|
124
|
-
const SYSTEM_PROMPT = `You analyze Telegram channel messages about a missile/rocket attack on Israel.
|
|
125
|
-
Your job: extract factual data AND assess message quality. Be concise.
|
|
126
|
-
|
|
127
|
-
Return ONLY valid JSON (no markdown, no explanation):
|
|
128
|
-
{
|
|
129
|
-
"region_relevance": float, // 0–1: does this message discuss the specified alert region?
|
|
130
|
-
"source_trust": float, // 0–1: factual reporting (1.0) vs unverified rumors/panic (0.0)
|
|
131
|
-
"tone": "calm"|"neutral"|"alarmist", // message tone — reject alarmist content
|
|
132
|
-
"country_origin": string|null, // "Iran","Yemen","Lebanon","Gaza","Iraq","Syria" or null
|
|
133
|
-
"rocket_count": int|null, // total rockets/missiles launched if mentioned
|
|
134
|
-
"is_cassette": bool|null, // cluster/cassette munitions confirmed?
|
|
135
|
-
"intercepted": int|null, // exact number intercepted by Iron Dome/air defense
|
|
136
|
-
"intercepted_qual": ${QUAL_VALUES}|null, // qualitative if no exact number; null if exact number given
|
|
137
|
-
"intercepted_qual_num": int|null, // reference number for more_than/less_than (e.g. 5 if "more than 5")
|
|
138
|
-
"sea_impact": int|null, // exact number fell in sea/unpopulated area
|
|
139
|
-
"sea_impact_qual": ${QUAL_VALUES}|null,
|
|
140
|
-
"sea_impact_qual_num": int|null,
|
|
141
|
-
"open_area_impact": int|null, // exact number hit open/populated ground
|
|
142
|
-
"open_area_impact_qual": ${QUAL_VALUES}|null,
|
|
143
|
-
"open_area_impact_qual_num": int|null,
|
|
144
|
-
"hits_confirmed": int|null, // confirmed hits on structures/buildings
|
|
145
|
-
"eta_refined_minutes": int|null, // refined time-to-impact if mentioned
|
|
146
|
-
"confidence": float // 0–1: overall confidence in this extraction
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
Rules:
|
|
150
|
-
- If unrelated to the alert region, set region_relevance=0 and all data fields to null.
|
|
151
|
-
- If message is speculative/unconfirmed rumor, set source_trust < 0.4.
|
|
152
|
-
- If message uses excessive caps, exclamation marks, panic language → tone="alarmist".
|
|
153
|
-
- Only extract concrete numbers explicitly stated in the text. Never guess.
|
|
154
|
-
- intercpted + sea_impact + open_area_impact should sum to rocket_count when all are known.
|
|
155
|
-
- If partial breakdown known, set unknown sub-fields to null (not 0).
|
|
156
|
-
- *_qual fields: use ONLY when the message explicitly states a qualitative descriptor WITHOUT an exact count.
|
|
157
|
-
If an exact number is given, set *_qual to null. Do NOT infer from absence.
|
|
158
|
-
- NEVER extract qualitative descriptors for casualties or injuries — hits_confirmed handles structural hits only.
|
|
159
|
-
- "none" qual is only valid if explicitly stated in the message (e.g., "все перехвачены", "не упало в море").`;
|
|
160
|
-
async function extractAndValidate(state) {
|
|
161
|
-
if (state.filteredPosts.length === 0) {
|
|
162
|
-
logger.info("Agent: no filtered posts to extract", {
|
|
163
|
-
alertId: state.alertId,
|
|
164
|
-
});
|
|
165
|
-
return { extractions: [] };
|
|
166
|
-
}
|
|
167
|
-
const llm = getLLM();
|
|
168
|
-
const posts = state.filteredPosts.slice(0, 8); // max 8 posts
|
|
169
|
-
const regionHint = state.alertAreas.length > 0
|
|
170
|
-
? state.alertAreas.join(", ")
|
|
171
|
-
: Object.keys(config.agent.areaLabels).join(", ") || "Israel";
|
|
172
|
-
// Format alert time in Israel timezone
|
|
173
|
-
const alertTimeIL = new Date(state.alertTs).toLocaleTimeString("he-IL", {
|
|
174
|
-
hour: "2-digit",
|
|
175
|
-
minute: "2-digit",
|
|
176
|
-
timeZone: "Asia/Jerusalem",
|
|
177
|
-
});
|
|
178
|
-
const nowIL = new Date().toLocaleTimeString("he-IL", {
|
|
179
|
-
hour: "2-digit",
|
|
180
|
-
minute: "2-digit",
|
|
181
|
-
timeZone: "Asia/Jerusalem",
|
|
182
|
-
});
|
|
183
|
-
const alertTypeLabel = state.alertType === "early_warning"
|
|
184
|
-
? "early warning (radar detection)"
|
|
185
|
-
: state.alertType === "siren"
|
|
186
|
-
? "siren (impact imminent)"
|
|
187
|
-
: state.alertType;
|
|
188
|
-
const contextHeader = `Alert type: ${alertTypeLabel}\n` +
|
|
189
|
-
`Alert time: ${alertTimeIL} (Israel)\n` +
|
|
190
|
-
`Current time: ${nowIL} (Israel)\n` +
|
|
191
|
-
`Alert region: ${regionHint}\n` +
|
|
192
|
-
`UI language: ${config.language}\n`;
|
|
193
|
-
const results = await Promise.all(posts.map(async (post) => {
|
|
194
|
-
try {
|
|
195
|
-
const response = await llm.invoke([
|
|
196
|
-
{ role: "system", content: SYSTEM_PROMPT },
|
|
197
|
-
{
|
|
198
|
-
role: "user",
|
|
199
|
-
content: `${contextHeader}Channel: ${post.channel}\n\nMessage:\n${post.text.slice(0, 800)}`,
|
|
200
|
-
},
|
|
201
|
-
]);
|
|
202
|
-
const raw = typeof response.content === "string"
|
|
203
|
-
? response.content
|
|
204
|
-
: JSON.stringify(response.content);
|
|
205
|
-
// Strip markdown code fences (```json ... ```) that some models wrap around JSON
|
|
206
|
-
const text = raw
|
|
207
|
-
.replace(/^```(?:json)?\s*\n?/i, "")
|
|
208
|
-
.replace(/\n?```\s*$/i, "");
|
|
209
|
-
const parsed = JSON.parse(text.trim());
|
|
210
|
-
return {
|
|
211
|
-
...parsed,
|
|
212
|
-
channel: post.channel,
|
|
213
|
-
messageUrl: post.messageUrl,
|
|
214
|
-
valid: true,
|
|
215
|
-
};
|
|
216
|
-
}
|
|
217
|
-
catch (err) {
|
|
218
|
-
logger.warn("Agent: extraction failed", {
|
|
219
|
-
channel: post.channel,
|
|
220
|
-
error: String(err),
|
|
221
|
-
});
|
|
222
|
-
return {
|
|
223
|
-
channel: post.channel,
|
|
224
|
-
region_relevance: 0,
|
|
225
|
-
source_trust: 0,
|
|
226
|
-
tone: "neutral",
|
|
227
|
-
country_origin: null,
|
|
228
|
-
rocket_count: null,
|
|
229
|
-
is_cassette: null,
|
|
230
|
-
intercepted: null,
|
|
231
|
-
intercepted_qual: null,
|
|
232
|
-
intercepted_qual_num: null,
|
|
233
|
-
sea_impact: null,
|
|
234
|
-
sea_impact_qual: null,
|
|
235
|
-
sea_impact_qual_num: null,
|
|
236
|
-
open_area_impact: null,
|
|
237
|
-
open_area_impact_qual: null,
|
|
238
|
-
open_area_impact_qual_num: null,
|
|
239
|
-
hits_confirmed: null,
|
|
240
|
-
eta_refined_minutes: null,
|
|
241
|
-
confidence: 0,
|
|
242
|
-
valid: false,
|
|
243
|
-
reject_reason: "extraction_error",
|
|
244
|
-
};
|
|
245
|
-
}
|
|
246
|
-
}));
|
|
247
|
-
logger.info("Agent: extracted", {
|
|
248
|
-
alertId: state.alertId,
|
|
249
|
-
count: results.length,
|
|
250
|
-
});
|
|
251
|
-
return { extractions: results };
|
|
252
|
-
}
|
|
253
|
-
// ─────────────────────────────────────────────────────────
|
|
254
|
-
// Tier 2: Post-filter (deterministic, 0 tokens)
|
|
255
|
-
// ─────────────────────────────────────────────────────────
|
|
256
|
-
function postFilter(state) {
|
|
257
|
-
const validated = state.extractions.map((ext) => {
|
|
258
|
-
// V1: region relevance
|
|
259
|
-
if (ext.region_relevance < 0.5) {
|
|
260
|
-
return { ...ext, valid: false, reject_reason: "region_irrelevant" };
|
|
261
|
-
}
|
|
262
|
-
// V2: source trust
|
|
263
|
-
if (ext.source_trust < 0.4) {
|
|
264
|
-
return { ...ext, valid: false, reject_reason: "untrusted_source" };
|
|
265
|
-
}
|
|
266
|
-
// V3: tone — reject alarmist (бот для успокоения, не для паники)
|
|
267
|
-
if (ext.tone === "alarmist") {
|
|
268
|
-
return { ...ext, valid: false, reject_reason: "alarmist_tone" };
|
|
269
|
-
}
|
|
270
|
-
// V4: at least one data field must be non-null
|
|
271
|
-
const hasData = ext.country_origin !== null ||
|
|
272
|
-
ext.rocket_count !== null ||
|
|
273
|
-
ext.is_cassette !== null ||
|
|
274
|
-
ext.hits_confirmed !== null ||
|
|
275
|
-
ext.eta_refined_minutes !== null;
|
|
276
|
-
if (!hasData) {
|
|
277
|
-
return { ...ext, valid: false, reject_reason: "no_data" };
|
|
278
|
-
}
|
|
279
|
-
// V5: overall confidence floor
|
|
280
|
-
if (ext.confidence < 0.3) {
|
|
281
|
-
return { ...ext, valid: false, reject_reason: "low_confidence" };
|
|
282
|
-
}
|
|
283
|
-
return { ...ext, valid: true };
|
|
284
|
-
});
|
|
285
|
-
const passed = validated.filter((e) => e.valid);
|
|
286
|
-
const rejected = validated.filter((e) => !e.valid);
|
|
287
|
-
logger.info("Agent: post-filter", {
|
|
288
|
-
alertId: state.alertId,
|
|
289
|
-
passed: passed.length,
|
|
290
|
-
rejected: rejected.length,
|
|
291
|
-
reasons: rejected.map((r) => r.reject_reason),
|
|
292
|
-
});
|
|
293
|
-
return { extractions: validated };
|
|
294
|
-
}
|
|
295
|
-
// ─────────────────────────────────────────────────────────
|
|
296
|
-
// Tier 3: Vote (deterministic, 0 tokens)
|
|
297
|
-
// ─────────────────────────────────────────────────────────
|
|
298
|
-
function vote(state) {
|
|
299
|
-
const valid = state.extractions.filter((e) => e.valid);
|
|
300
|
-
if (valid.length === 0) {
|
|
301
|
-
return { votedResult: null };
|
|
302
|
-
}
|
|
303
|
-
// Assign 1-based citation indices to valid extractions
|
|
304
|
-
const indexed = valid.map((e, i) => ({ ...e, idx: i + 1 }));
|
|
305
|
-
// All valid sources become cited sources
|
|
306
|
-
const citedSources = indexed.map((e) => ({
|
|
307
|
-
index: e.idx,
|
|
308
|
-
channel: e.channel,
|
|
309
|
-
messageUrl: e.messageUrl ?? null,
|
|
310
|
-
}));
|
|
311
|
-
// ETA: highest confidence source that has eta
|
|
312
|
-
const withEta = indexed
|
|
313
|
-
.filter((e) => e.eta_refined_minutes !== null)
|
|
314
|
-
.sort((a, b) => b.confidence - a.confidence);
|
|
315
|
-
const bestEta = withEta[0] ?? null;
|
|
316
|
-
// Country: group unique values, each with their source indices
|
|
317
|
-
const countryMap = new Map();
|
|
318
|
-
for (const e of indexed) {
|
|
319
|
-
if (e.country_origin) {
|
|
320
|
-
const list = countryMap.get(e.country_origin) ?? [];
|
|
321
|
-
list.push(e.idx);
|
|
322
|
-
countryMap.set(e.country_origin, list);
|
|
323
|
-
}
|
|
324
|
-
}
|
|
325
|
-
const country_origins = countryMap.size > 0
|
|
326
|
-
? Array.from(countryMap.entries()).map(([name, citations]) => ({
|
|
327
|
-
name,
|
|
328
|
-
citations,
|
|
329
|
-
}))
|
|
330
|
-
: null;
|
|
331
|
-
// Rocket count: range across sources (min … max)
|
|
332
|
-
const rocketSrcs = indexed.filter((e) => e.rocket_count !== null);
|
|
333
|
-
const rocketVals = rocketSrcs.map((e) => e.rocket_count);
|
|
334
|
-
const rocket_count_min = rocketVals.length > 0 ? Math.min(...rocketVals) : null;
|
|
335
|
-
const rocket_count_max = rocketVals.length > 0 ? Math.max(...rocketVals) : null;
|
|
336
|
-
const rocket_citations = rocketSrcs.map((e) => e.idx);
|
|
337
|
-
// Helper: avg weighted confidence for a set of sources
|
|
338
|
-
function fieldConf(srcs) {
|
|
339
|
-
if (srcs.length === 0)
|
|
340
|
-
return 0;
|
|
341
|
-
return (srcs.reduce((s, e) => s + e.source_trust * e.confidence, 0) / srcs.length);
|
|
342
|
-
}
|
|
343
|
-
// Cassette: majority
|
|
344
|
-
const cassSrcs = indexed.filter((e) => e.is_cassette !== null);
|
|
345
|
-
const cassVals = cassSrcs.map((e) => e.is_cassette);
|
|
346
|
-
const is_cassette = cassVals.length > 0
|
|
347
|
-
? cassVals.filter(Boolean).length > cassVals.length / 2
|
|
348
|
-
: null;
|
|
349
|
-
const is_cassette_confidence = fieldConf(cassSrcs);
|
|
350
|
-
// Hits: median
|
|
351
|
-
const hitsSrcs = indexed.filter((e) => e.hits_confirmed !== null && e.hits_confirmed > 0);
|
|
352
|
-
const hitsVals = indexed
|
|
353
|
-
.filter((e) => e.hits_confirmed !== null)
|
|
354
|
-
.map((e) => e.hits_confirmed)
|
|
355
|
-
.sort((a, b) => a - b);
|
|
356
|
-
const hits_confirmed = hitsVals.length > 0 ? hitsVals[Math.floor(hitsVals.length / 2)] : null;
|
|
357
|
-
const hits_citations = hitsSrcs.map((e) => e.idx);
|
|
358
|
-
const hits_confidence = fieldConf(hitsSrcs);
|
|
359
|
-
// Helper: mode (most frequent non-null value) for QualCount aggregation
|
|
360
|
-
function modeQual(srcs, key) {
|
|
361
|
-
const vals = srcs
|
|
362
|
-
.map((e) => e[key])
|
|
363
|
-
.filter((v) => v !== null);
|
|
364
|
-
if (vals.length === 0)
|
|
365
|
-
return null;
|
|
366
|
-
const freq = new Map();
|
|
367
|
-
for (const v of vals)
|
|
368
|
-
freq.set(v, (freq.get(v) ?? 0) + 1);
|
|
369
|
-
return [...freq.entries()].sort((a, b) => b[1] - a[1])[0][0];
|
|
370
|
-
}
|
|
371
|
-
function medianQualNum(srcs, key) {
|
|
372
|
-
const vals = srcs
|
|
373
|
-
.map((e) => e[key])
|
|
374
|
-
.filter((v) => v !== null)
|
|
375
|
-
.sort((a, b) => a - b);
|
|
376
|
-
return vals.length > 0 ? vals[Math.floor(vals.length / 2)] : null;
|
|
377
|
-
}
|
|
378
|
-
// Intercepted: median across sources that reported exact number; mode for qual
|
|
379
|
-
const interceptedSrcs = indexed.filter((e) => e.intercepted !== null);
|
|
380
|
-
const interceptedQualSrcs = indexed.filter((e) => e.intercepted_qual !== null);
|
|
381
|
-
const interceptedVals = interceptedSrcs
|
|
382
|
-
.map((e) => e.intercepted)
|
|
383
|
-
.sort((a, b) => a - b);
|
|
384
|
-
const intercepted = interceptedVals.length > 0
|
|
385
|
-
? interceptedVals[Math.floor(interceptedVals.length / 2)]
|
|
386
|
-
: null;
|
|
387
|
-
const intercepted_qual = intercepted === null
|
|
388
|
-
? modeQual(interceptedQualSrcs, "intercepted_qual")
|
|
389
|
-
: null;
|
|
390
|
-
const intercepted_qual_num = intercepted_qual !== null
|
|
391
|
-
? medianQualNum(interceptedQualSrcs, "intercepted_qual_num")
|
|
392
|
-
: null;
|
|
393
|
-
const intercepted_confidence = fieldConf(interceptedSrcs.length > 0 ? interceptedSrcs : interceptedQualSrcs);
|
|
394
|
-
// Sea impact: median / qual
|
|
395
|
-
const seaSrcs = indexed.filter((e) => e.sea_impact !== null);
|
|
396
|
-
const seaQualSrcs = indexed.filter((e) => e.sea_impact_qual !== null);
|
|
397
|
-
const seaVals = seaSrcs
|
|
398
|
-
.map((e) => e.sea_impact)
|
|
399
|
-
.sort((a, b) => a - b);
|
|
400
|
-
const sea_impact = seaVals.length > 0 ? seaVals[Math.floor(seaVals.length / 2)] : null;
|
|
401
|
-
const sea_impact_qual = sea_impact === null ? modeQual(seaQualSrcs, "sea_impact_qual") : null;
|
|
402
|
-
const sea_impact_qual_num = sea_impact_qual !== null
|
|
403
|
-
? medianQualNum(seaQualSrcs, "sea_impact_qual_num")
|
|
404
|
-
: null;
|
|
405
|
-
const sea_confidence = fieldConf(seaSrcs.length > 0 ? seaSrcs : seaQualSrcs);
|
|
406
|
-
// Open area impact: median / qual
|
|
407
|
-
const openSrcs = indexed.filter((e) => e.open_area_impact !== null);
|
|
408
|
-
const openQualSrcs = indexed.filter((e) => e.open_area_impact_qual !== null);
|
|
409
|
-
const openVals = openSrcs
|
|
410
|
-
.map((e) => e.open_area_impact)
|
|
411
|
-
.sort((a, b) => a - b);
|
|
412
|
-
const open_area_impact = openVals.length > 0 ? openVals[Math.floor(openVals.length / 2)] : null;
|
|
413
|
-
const open_area_impact_qual = open_area_impact === null
|
|
414
|
-
? modeQual(openQualSrcs, "open_area_impact_qual")
|
|
415
|
-
: null;
|
|
416
|
-
const open_area_impact_qual_num = open_area_impact_qual !== null
|
|
417
|
-
? medianQualNum(openQualSrcs, "open_area_impact_qual_num")
|
|
418
|
-
: null;
|
|
419
|
-
const open_area_confidence = fieldConf(openSrcs.length > 0 ? openSrcs : openQualSrcs);
|
|
420
|
-
// Rocket confidence
|
|
421
|
-
const rocket_confidence = fieldConf(rocketSrcs);
|
|
422
|
-
// Overall weighted confidence
|
|
423
|
-
const totalWeight = indexed.reduce((s, e) => s + e.source_trust * e.confidence, 0);
|
|
424
|
-
const weightedConf = totalWeight / indexed.length;
|
|
425
|
-
const voted = {
|
|
426
|
-
eta_refined_minutes: bestEta?.eta_refined_minutes ?? null,
|
|
427
|
-
eta_citations: bestEta ? [bestEta.idx] : [],
|
|
428
|
-
country_origins,
|
|
429
|
-
rocket_count_min,
|
|
430
|
-
rocket_count_max,
|
|
431
|
-
rocket_citations,
|
|
432
|
-
rocket_confidence,
|
|
433
|
-
is_cassette,
|
|
434
|
-
is_cassette_confidence,
|
|
435
|
-
intercepted,
|
|
436
|
-
intercepted_qual,
|
|
437
|
-
intercepted_qual_num,
|
|
438
|
-
intercepted_confidence,
|
|
439
|
-
sea_impact,
|
|
440
|
-
sea_impact_qual,
|
|
441
|
-
sea_impact_qual_num,
|
|
442
|
-
sea_confidence,
|
|
443
|
-
open_area_impact,
|
|
444
|
-
open_area_impact_qual,
|
|
445
|
-
open_area_impact_qual_num,
|
|
446
|
-
open_area_confidence,
|
|
447
|
-
hits_confirmed,
|
|
448
|
-
hits_citations,
|
|
449
|
-
hits_confidence,
|
|
450
|
-
confidence: Math.round(weightedConf * 100) / 100,
|
|
451
|
-
sources_count: indexed.length,
|
|
452
|
-
citedSources,
|
|
453
|
-
};
|
|
454
|
-
logger.info("Agent: voted", { alertId: state.alertId, voted });
|
|
455
|
-
return { votedResult: voted };
|
|
456
|
-
}
|
|
457
|
-
// ─────────────────────────────────────────────────────────
|
|
458
|
-
// Tier 4: Edit message — inline update (0 tokens)
|
|
459
|
-
// ─────────────────────────────────────────────────────────
|
|
460
|
-
/** EN country name → Russian */
|
|
461
|
-
const COUNTRY_RU = {
|
|
462
|
-
Iran: "Иран",
|
|
463
|
-
Yemen: "Йемен",
|
|
464
|
-
Lebanon: "Ливан",
|
|
465
|
-
Gaza: "Газа",
|
|
466
|
-
Iraq: "Ирак",
|
|
467
|
-
Syria: "Сирия",
|
|
468
|
-
Hezbollah: "Хезболла",
|
|
469
|
-
};
|
|
470
|
-
/** Convert index to Unicode superscript string: 1 → ¹, 13 → ¹³ */
|
|
471
|
-
const SUPERSCRIPTS = ["⁰", "¹", "²", "³", "⁴", "⁵", "⁶", "⁷", "⁸", "⁹"];
|
|
472
|
-
function sup(indices) {
|
|
473
|
-
return indices
|
|
474
|
-
.map((n) => String(n)
|
|
475
|
-
.split("")
|
|
476
|
-
.map((d) => SUPERSCRIPTS[Number(d)])
|
|
477
|
-
.join(""))
|
|
478
|
-
.join("");
|
|
479
|
-
}
|
|
480
|
-
/**
|
|
481
|
-
* Merge enrichment data INTO the existing key:value message.
|
|
482
|
-
* Format:
|
|
483
|
-
* Подлётное время: ~00:21¹ ← ETA as absolute clock time
|
|
484
|
-
*
|
|
485
|
-
* Откуда: Иран¹³ + Ливан² ← blank line before intel block
|
|
486
|
-
* Ракет: ~5-7
|
|
487
|
-
* Попадания (Дан центр): 2¹
|
|
488
|
-
* Время оповещения: 03:47
|
|
489
|
-
* —
|
|
490
|
-
* Источники: [1](url) [2](url) [3](url)
|
|
491
|
-
*/
|
|
492
|
-
function buildEnrichedMessage(currentText, alertType, alertTs, r) {
|
|
493
|
-
let text = currentText;
|
|
494
|
-
// Refine ETA in-place (early/siren only)
|
|
495
|
-
if (r.eta_refined_minutes !== null &&
|
|
496
|
-
r.eta_citations.length > 0 &&
|
|
497
|
-
(alertType === "early_warning" || alertType === "siren")) {
|
|
498
|
-
text = refineEtaInPlace(text, r.eta_refined_minutes, alertTs, r.eta_citations);
|
|
499
|
-
}
|
|
500
|
-
// Insert "Откуда" before time line (with leading blank line for visual separation)
|
|
501
|
-
if (r.country_origins && r.country_origins.length > 0) {
|
|
502
|
-
const parts = r.country_origins.map((c) => {
|
|
503
|
-
const ru = COUNTRY_RU[c.name] ?? c.name;
|
|
504
|
-
return `${ru}${sup(c.citations)}`;
|
|
505
|
-
});
|
|
506
|
-
text = insertBeforeTimeLine(text, `\n<b>Откуда:</b> ${parts.join(" + ")}`);
|
|
507
|
-
}
|
|
508
|
-
// Confidence thresholds for uncertainty markers
|
|
509
|
-
const SKIP = 0.6; // below this → skip field entirely
|
|
510
|
-
const UNCERTAIN = 0.75; // below this (but ≥ SKIP) → add (?)
|
|
511
|
-
const CERTAIN = 0.95; // "none" qual requires this level
|
|
512
|
-
// Convert QualCount to Russian display string.
|
|
513
|
-
// Returns null if the qual should be suppressed (e.g. "none" below CERTAIN).
|
|
514
|
-
function qualDisplay(qual, qualNum, conf) {
|
|
515
|
-
if (qual === null)
|
|
516
|
-
return null;
|
|
517
|
-
if (qual === "none")
|
|
518
|
-
return conf >= CERTAIN ? "нет" : null;
|
|
519
|
-
const map = {
|
|
520
|
-
all: "все",
|
|
521
|
-
most: "большинство",
|
|
522
|
-
many: "много",
|
|
523
|
-
few: "несколько",
|
|
524
|
-
exists: "есть",
|
|
525
|
-
none: "нет",
|
|
526
|
-
more_than: qualNum != null ? `>${qualNum}` : ">1",
|
|
527
|
-
less_than: qualNum != null ? `<${qualNum}` : "<нескольких",
|
|
528
|
-
};
|
|
529
|
-
return map[qual];
|
|
530
|
-
}
|
|
531
|
-
// Format one breakdown item: prefer exact number, fall back to qual.
|
|
532
|
-
// Returns null if nothing to show (below threshold or not reported).
|
|
533
|
-
function breakdownItem(label, num, qual, qualNum, conf) {
|
|
534
|
-
if (conf < SKIP)
|
|
535
|
-
return null;
|
|
536
|
-
const u = conf < UNCERTAIN ? " (?)" : "";
|
|
537
|
-
if (num !== null)
|
|
538
|
-
return `${label} — ${num}${u}`;
|
|
539
|
-
const qs = qualDisplay(qual, qualNum, conf);
|
|
540
|
-
if (qs === null)
|
|
541
|
-
return null;
|
|
542
|
-
return `${label} — ${qs}${u}`;
|
|
543
|
-
}
|
|
544
|
-
// Rocket count with breakdown and uncertainty markers
|
|
545
|
-
if (r.rocket_count_min !== null &&
|
|
546
|
-
r.rocket_count_max !== null &&
|
|
547
|
-
r.rocket_confidence >= SKIP) {
|
|
548
|
-
const rocketUncertain = r.rocket_confidence < UNCERTAIN ? " (?)" : "";
|
|
549
|
-
const countStr = r.rocket_count_min === r.rocket_count_max
|
|
550
|
-
? `${r.rocket_count_min}`
|
|
551
|
-
: `~${r.rocket_count_min}–${r.rocket_count_max}`;
|
|
552
|
-
const bParts = [];
|
|
553
|
-
const bi = breakdownItem("перехвачено", r.intercepted, r.intercepted_qual, r.intercepted_qual_num, r.intercepted_confidence);
|
|
554
|
-
if (bi)
|
|
555
|
-
bParts.push(bi);
|
|
556
|
-
const bs = breakdownItem("упали в море", r.sea_impact, r.sea_impact_qual, r.sea_impact_qual_num, r.sea_confidence);
|
|
557
|
-
if (bs)
|
|
558
|
-
bParts.push(bs);
|
|
559
|
-
const bo = breakdownItem("открытая местность", r.open_area_impact, r.open_area_impact_qual, r.open_area_impact_qual_num, r.open_area_confidence);
|
|
560
|
-
if (bo)
|
|
561
|
-
bParts.push(bo);
|
|
562
|
-
const breakdown = bParts.length > 0 ? `, из них: ${bParts.join(", ")}` : "";
|
|
563
|
-
const cassetteU = r.is_cassette_confidence < UNCERTAIN ? " (?)" : "";
|
|
564
|
-
const cassette = r.is_cassette && r.is_cassette_confidence >= SKIP
|
|
565
|
-
? `, есть кассетные${cassetteU}`
|
|
566
|
-
: "";
|
|
567
|
-
text = insertBeforeTimeLine(text, `<b>Ракет:</b> ${countStr}${rocketUncertain}${breakdown}${cassette}`);
|
|
568
|
-
}
|
|
569
|
-
// Hits: есть прямое попадание/-ия в <area>: N — only if confidence ≥ SKIP
|
|
570
|
-
if (r.hits_confirmed !== null &&
|
|
571
|
-
r.hits_confirmed > 0 &&
|
|
572
|
-
r.hits_confidence >= SKIP) {
|
|
573
|
-
const areaLabel = Object.values(config.agent.areaLabels)[0] ?? "район";
|
|
574
|
-
const hitWord = r.hits_confirmed === 1 ? "попадание" : "попадания";
|
|
575
|
-
const hitsCite = r.hits_citations.length > 0 ? sup(r.hits_citations) : "";
|
|
576
|
-
const hitsU = r.hits_confidence < UNCERTAIN ? " (?)" : "";
|
|
577
|
-
text = insertBeforeTimeLine(text, `есть прямое ${hitWord} в ${areaLabel}: ${r.hits_confirmed}${hitsCite}${hitsU}`);
|
|
578
|
-
}
|
|
579
|
-
// Sources footer: [1](url) [2](url) ...
|
|
580
|
-
const sourcesWithUrl = r.citedSources.filter((s) => s.messageUrl);
|
|
581
|
-
if (sourcesWithUrl.length > 0) {
|
|
582
|
-
const links = sourcesWithUrl
|
|
583
|
-
.map((s) => `<a href="${s.messageUrl}">[${s.index}]</a>`)
|
|
584
|
-
.join(" ");
|
|
585
|
-
text += `\n—\n<i>Источники: ${links}</i>`;
|
|
586
|
-
}
|
|
587
|
-
return text;
|
|
588
|
-
}
|
|
589
|
-
/**
|
|
590
|
-
* Insert a line before the time line (last "Время" / "Time" / "שעת" line).
|
|
591
|
-
* This keeps new data visually grouped with existing fields.
|
|
592
|
-
*/
|
|
593
|
-
function insertBeforeTimeLine(text, line) {
|
|
594
|
-
// Match "Время оповещения" / "Alert time" / "שעת ההתרעה" / "وقت الإنذار"
|
|
595
|
-
const timePattern = /(<b>(?:Время оповещения|Alert time|שעת ההתרעה|وقت الإنذار):<\/b>)/;
|
|
596
|
-
const match = text.match(timePattern);
|
|
597
|
-
if (match?.index !== undefined) {
|
|
598
|
-
return text.slice(0, match.index) + line + "\n" + text.slice(match.index);
|
|
599
|
-
}
|
|
600
|
-
// Fallback: append before last line
|
|
601
|
-
const lines = text.split("\n");
|
|
602
|
-
lines.splice(Math.max(lines.length - 1, 0), 0, line);
|
|
603
|
-
return lines.join("\n");
|
|
604
|
-
}
|
|
605
|
-
/**
|
|
606
|
-
* Replace the default ETA range with absolute impact time + superscript citation.
|
|
607
|
-
* "~5–12 мин" → "~00:21¹"
|
|
608
|
-
*/
|
|
609
|
-
function refineEtaInPlace(text, minutes, alertTs, citations) {
|
|
610
|
-
// Compute absolute impact time in Israel timezone
|
|
611
|
-
const absTime = new Date(alertTs + minutes * 60_000).toLocaleTimeString("he-IL", { hour: "2-digit", minute: "2-digit", timeZone: "Asia/Jerusalem" });
|
|
612
|
-
const refined = `~${absTime}${sup(citations)}`;
|
|
613
|
-
const etaPatterns = [
|
|
614
|
-
/~\d+[–-]\d+\s*мин/, // ~5–12 мин
|
|
615
|
-
/~\d+[–-]\d+\s*min/, // ~5–12 min
|
|
616
|
-
/~\d+[–-]\d+\s*דקות/, // ~5–12 דקות
|
|
617
|
-
/~\d+[–-]\d+\s*دقائق/, // ~5–12 دقائق
|
|
618
|
-
/1\.5\s*мин/, // 1.5 мин (siren)
|
|
619
|
-
/1\.5\s*min/, // 1.5 min
|
|
620
|
-
/1\.5\s*דקות/, // 1.5 דקות
|
|
621
|
-
/1\.5\s*دقائق/, // 1.5 دقائق
|
|
622
|
-
];
|
|
623
|
-
for (const pattern of etaPatterns) {
|
|
624
|
-
if (pattern.test(text)) {
|
|
625
|
-
return text.replace(pattern, refined);
|
|
626
|
-
}
|
|
627
|
-
}
|
|
628
|
-
return text;
|
|
629
|
-
}
|
|
630
|
-
async function editMessage(state) {
|
|
631
|
-
const { votedResult } = state;
|
|
632
|
-
if (!config.botToken)
|
|
633
|
-
return {};
|
|
634
|
-
const tgBot = new Bot(config.botToken);
|
|
635
|
-
// No valid sources found — silently skip (don't touch the message)
|
|
636
|
-
if (!votedResult) {
|
|
637
|
-
logger.info("Agent: no voted result — skipping edit", {
|
|
638
|
-
alertId: state.alertId,
|
|
639
|
-
});
|
|
640
|
-
return {};
|
|
641
|
-
}
|
|
642
|
-
// Low confidence: log but still show data with (?) markers
|
|
643
|
-
if (votedResult.confidence < config.agent.confidenceThreshold) {
|
|
644
|
-
logger.info("Agent: confidence below threshold — editing with (?) markers", {
|
|
645
|
-
alertId: state.alertId,
|
|
646
|
-
confidence: votedResult.confidence,
|
|
647
|
-
threshold: config.agent.confidenceThreshold,
|
|
648
|
-
});
|
|
649
|
-
}
|
|
650
|
-
const newText = buildEnrichedMessage(state.currentText, state.alertType, state.alertTs, votedResult);
|
|
651
|
-
try {
|
|
652
|
-
if (state.isCaption) {
|
|
653
|
-
await tgBot.api.editMessageCaption(state.chatId, state.messageId, {
|
|
654
|
-
caption: newText,
|
|
655
|
-
parse_mode: "HTML",
|
|
656
|
-
});
|
|
657
|
-
}
|
|
658
|
-
else {
|
|
659
|
-
await tgBot.api.editMessageText(state.chatId, state.messageId, newText, {
|
|
660
|
-
parse_mode: "HTML",
|
|
661
|
-
});
|
|
662
|
-
}
|
|
663
|
-
logger.info("Agent: message enriched", {
|
|
664
|
-
alertId: state.alertId,
|
|
665
|
-
messageId: state.messageId,
|
|
666
|
-
confidence: votedResult.confidence,
|
|
667
|
-
sources: votedResult.sources_count,
|
|
668
|
-
});
|
|
669
|
-
}
|
|
670
|
-
catch (err) {
|
|
671
|
-
logger.error("Agent: failed to edit message", {
|
|
672
|
-
alertId: state.alertId,
|
|
673
|
-
error: String(err),
|
|
674
|
-
});
|
|
675
|
-
}
|
|
676
|
-
return {};
|
|
677
|
-
}
|
|
678
|
-
// ── Build graph ────────────────────────────────────────
|
|
679
|
-
function buildGraph() {
|
|
680
|
-
const graph = new StateGraph(AgentState)
|
|
681
|
-
.addNode("collectAndPreFilter", collectAndPreFilter)
|
|
682
|
-
.addNode("extractAndValidate", extractAndValidate)
|
|
683
|
-
.addNode("postFilter", postFilter)
|
|
684
|
-
.addNode("vote", vote)
|
|
685
|
-
.addNode("editMessage", editMessage)
|
|
686
|
-
.addEdge("__start__", "collectAndPreFilter")
|
|
687
|
-
.addEdge("collectAndPreFilter", "extractAndValidate")
|
|
688
|
-
.addEdge("extractAndValidate", "postFilter")
|
|
689
|
-
.addEdge("postFilter", "vote")
|
|
690
|
-
.addEdge("vote", "editMessage")
|
|
691
|
-
.addEdge("editMessage", "__end__");
|
|
692
|
-
return graph.compile();
|
|
693
|
-
}
|
|
694
|
-
export async function runEnrichment(input) {
|
|
695
|
-
const app = buildGraph();
|
|
696
|
-
await app.invoke({
|
|
697
|
-
alertId: input.alertId,
|
|
698
|
-
alertTs: input.alertTs,
|
|
699
|
-
alertType: input.alertType,
|
|
700
|
-
alertAreas: input.alertAreas,
|
|
701
|
-
chatId: input.chatId,
|
|
702
|
-
messageId: input.messageId,
|
|
703
|
-
isCaption: input.isCaption,
|
|
704
|
-
currentText: input.currentText,
|
|
705
|
-
channelPosts: [],
|
|
706
|
-
filteredPosts: [],
|
|
707
|
-
extractions: [],
|
|
708
|
-
votedResult: null,
|
|
709
|
-
});
|
|
710
|
-
}
|
|
711
|
-
//# sourceMappingURL=graph.js.map
|