scientify 1.13.6 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.en.md +371 -0
- package/README.md +167 -356
- package/dist/index.d.ts +8 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +131 -122
- package/dist/index.js.map +1 -1
- package/dist/src/cli/research.d.ts +1 -6
- package/dist/src/cli/research.d.ts.map +1 -1
- package/dist/src/cli/research.js +227 -123
- package/dist/src/cli/research.js.map +1 -1
- package/dist/src/commands/metabolism-status.d.ts +3 -3
- package/dist/src/commands/metabolism-status.d.ts.map +1 -1
- package/dist/src/commands/metabolism-status.js +72 -75
- package/dist/src/commands/metabolism-status.js.map +1 -1
- package/dist/src/commands.d.ts +1 -1
- package/dist/src/commands.d.ts.map +1 -1
- package/dist/src/commands.js +0 -55
- package/dist/src/commands.js.map +1 -1
- package/dist/src/hooks/cron-skill-inject.d.ts +6 -7
- package/dist/src/hooks/cron-skill-inject.d.ts.map +1 -1
- package/dist/src/hooks/cron-skill-inject.js +6 -15
- package/dist/src/hooks/cron-skill-inject.js.map +1 -1
- package/dist/src/hooks/research-mode.d.ts +1 -1
- package/dist/src/hooks/research-mode.d.ts.map +1 -1
- package/dist/src/hooks/research-mode.js +24 -101
- package/dist/src/hooks/research-mode.js.map +1 -1
- package/dist/src/hooks/scientify-signature.d.ts +1 -1
- package/dist/src/hooks/scientify-signature.d.ts.map +1 -1
- package/dist/src/hooks/scientify-signature.js +2 -5
- package/dist/src/hooks/scientify-signature.js.map +1 -1
- package/dist/src/knowledge-state/render.d.ts +1 -9
- package/dist/src/knowledge-state/render.d.ts.map +1 -1
- package/dist/src/knowledge-state/render.js +33 -187
- package/dist/src/knowledge-state/render.js.map +1 -1
- package/dist/src/knowledge-state/store.d.ts.map +1 -1
- package/dist/src/knowledge-state/store.js +65 -1100
- package/dist/src/knowledge-state/store.js.map +1 -1
- package/dist/src/knowledge-state/types.d.ts +0 -76
- package/dist/src/knowledge-state/types.d.ts.map +1 -1
- package/dist/src/literature/subscription-state.d.ts +0 -2
- package/dist/src/literature/subscription-state.d.ts.map +1 -1
- package/dist/src/literature/subscription-state.js +7 -1375
- package/dist/src/literature/subscription-state.js.map +1 -1
- package/dist/src/research-subscriptions/constants.d.ts +1 -1
- package/dist/src/research-subscriptions/constants.js +1 -1
- package/dist/src/research-subscriptions/cron-client.d.ts +1 -1
- package/dist/src/research-subscriptions/cron-client.d.ts.map +1 -1
- package/dist/src/research-subscriptions/delivery.d.ts +1 -1
- package/dist/src/research-subscriptions/delivery.d.ts.map +1 -1
- package/dist/src/research-subscriptions/handlers.d.ts +1 -1
- package/dist/src/research-subscriptions/handlers.d.ts.map +1 -1
- package/dist/src/research-subscriptions/handlers.js +10 -20
- package/dist/src/research-subscriptions/handlers.js.map +1 -1
- package/dist/src/research-subscriptions/parse.d.ts.map +1 -1
- package/dist/src/research-subscriptions/parse.js +0 -25
- package/dist/src/research-subscriptions/parse.js.map +1 -1
- package/dist/src/research-subscriptions/prompt.d.ts +1 -1
- package/dist/src/research-subscriptions/prompt.d.ts.map +1 -1
- package/dist/src/research-subscriptions/prompt.js +195 -244
- package/dist/src/research-subscriptions/prompt.js.map +1 -1
- package/dist/src/research-subscriptions/types.d.ts +1 -3
- package/dist/src/research-subscriptions/types.d.ts.map +1 -1
- package/dist/src/templates/bootstrap.d.ts.map +1 -1
- package/dist/src/templates/bootstrap.js +32 -19
- package/dist/src/templates/bootstrap.js.map +1 -1
- package/dist/src/tools/arxiv-download.d.ts +1 -2
- package/dist/src/tools/arxiv-download.d.ts.map +1 -1
- package/dist/src/tools/arxiv-search.d.ts +1 -2
- package/dist/src/tools/arxiv-search.d.ts.map +1 -1
- package/dist/src/tools/github-search-tool.d.ts +1 -2
- package/dist/src/tools/github-search-tool.d.ts.map +1 -1
- package/dist/src/tools/openalex-search.d.ts +1 -2
- package/dist/src/tools/openalex-search.d.ts.map +1 -1
- package/dist/src/tools/openreview-lookup.d.ts +1 -2
- package/dist/src/tools/openreview-lookup.d.ts.map +1 -1
- package/dist/src/tools/paper-browser.d.ts +1 -2
- package/dist/src/tools/paper-browser.d.ts.map +1 -1
- package/dist/src/tools/result.d.ts +3 -5
- package/dist/src/tools/result.d.ts.map +1 -1
- package/dist/src/tools/result.js +5 -7
- package/dist/src/tools/result.js.map +1 -1
- package/dist/src/tools/scientify-cron.d.ts +4 -11
- package/dist/src/tools/scientify-cron.d.ts.map +1 -1
- package/dist/src/tools/scientify-cron.js +19 -524
- package/dist/src/tools/scientify-cron.js.map +1 -1
- package/dist/src/tools/scientify-literature-state.d.ts +1 -76
- package/dist/src/tools/scientify-literature-state.d.ts.map +1 -1
- package/dist/src/tools/scientify-literature-state.js +46 -363
- package/dist/src/tools/scientify-literature-state.js.map +1 -1
- package/dist/src/tools/unpaywall-download.d.ts +1 -2
- package/dist/src/tools/unpaywall-download.d.ts.map +1 -1
- package/dist/src/types.d.ts +16 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/types.js +2 -0
- package/dist/src/types.js.map +1 -0
- package/openclaw.plugin.json +4 -2
- package/package.json +1 -1
- package/skills/metabolism/SKILL.md +2 -0
- package/skills/research-subscription/SKILL.md +1 -29
- package/README.zh.md +0 -494
|
@@ -9,19 +9,6 @@ const DEFAULT_SOURCES = ["openalex", "arxiv"];
|
|
|
9
9
|
const MAX_MEMORY_NOTES = 30;
|
|
10
10
|
const MAX_MEMORY_KEYS = 60;
|
|
11
11
|
const TOP_HINT_LIMIT = 8;
|
|
12
|
-
const DEFAULT_FULLTEXT_FETCH_TIMEOUT_MS = 20_000;
|
|
13
|
-
const RETRY_FULLTEXT_FETCH_TIMEOUT_MS = 35_000;
|
|
14
|
-
const MIN_FULLTEXT_TEXT_CHARS = 2_000;
|
|
15
|
-
const MAX_STRICT_FULLTEXT_ATTEMPTS = 5;
|
|
16
|
-
const ARXIV_API_URL = "https://export.arxiv.org/api/query";
|
|
17
|
-
const STRICT_EMPTY_FALLBACK_MAX_RESULTS = 12;
|
|
18
|
-
const STRICT_EMPTY_FALLBACK_MAX_QUERIES = 4;
|
|
19
|
-
const DEFAULT_STRICT_CANDIDATE_POOL = 24;
|
|
20
|
-
const DEFAULT_STRICT_MIN_CORE_FLOOR = 3;
|
|
21
|
-
const TIER_A_RATIO = 0.5;
|
|
22
|
-
const TIER_B_RATIO = 0.35;
|
|
23
|
-
const TIER_C_RATIO = 0.15;
|
|
24
|
-
const REFLECTION_MAX_ADDED_PAPERS = 2;
|
|
25
12
|
const FEEDBACK_SIGNAL_DELTA = {
|
|
26
13
|
read: 1,
|
|
27
14
|
skip: -1,
|
|
@@ -184,201 +171,6 @@ function derivePaperId(paper) {
|
|
|
184
171
|
const digest = createHash("sha1").update(fallback || JSON.stringify(paper)).digest("hex");
|
|
185
172
|
return `hash:${digest.slice(0, 20)}`;
|
|
186
173
|
}
|
|
187
|
-
function normalizeArxivToken(token) {
|
|
188
|
-
const cleaned = normalizeText(token).replace(/^arxiv:/i, "");
|
|
189
|
-
if (!cleaned)
|
|
190
|
-
return undefined;
|
|
191
|
-
const modern = cleaned.match(/^(\d{4}\.\d{4,5}(?:v\d+)?)$/i);
|
|
192
|
-
if (modern?.[1])
|
|
193
|
-
return modern[1].toLowerCase();
|
|
194
|
-
const legacy = cleaned.match(/^([a-z\-]+(?:\.[a-z\-]+)?\/\d{7}(?:v\d+)?)$/i);
|
|
195
|
-
if (legacy?.[1])
|
|
196
|
-
return legacy[1].toLowerCase();
|
|
197
|
-
return undefined;
|
|
198
|
-
}
|
|
199
|
-
function stripArxivVersion(id) {
|
|
200
|
-
return id.replace(/v\d+$/i, "");
|
|
201
|
-
}
|
|
202
|
-
function parseArxivIdCandidatesFromPaper(paper) {
|
|
203
|
-
const candidates = [];
|
|
204
|
-
const pushToken = (value) => {
|
|
205
|
-
if (!value)
|
|
206
|
-
return;
|
|
207
|
-
const normalized = normalizeArxivToken(value);
|
|
208
|
-
if (normalized)
|
|
209
|
-
candidates.push(normalized);
|
|
210
|
-
};
|
|
211
|
-
pushToken(paper.id);
|
|
212
|
-
const combined = [paper.url, paper.title].filter((item) => Boolean(item)).join(" ");
|
|
213
|
-
for (const m of combined.matchAll(/\b(\d{4}\.\d{4,5}(?:v\d+)?)\b/gi)) {
|
|
214
|
-
pushToken(m[1]);
|
|
215
|
-
}
|
|
216
|
-
for (const m of combined.matchAll(/\b([a-z\-]+(?:\.[a-z\-]+)?\/\d{7}(?:v\d+)?)\b/gi)) {
|
|
217
|
-
pushToken(m[1]);
|
|
218
|
-
}
|
|
219
|
-
const expanded = [];
|
|
220
|
-
const seen = new Set();
|
|
221
|
-
for (const item of candidates) {
|
|
222
|
-
if (!seen.has(item)) {
|
|
223
|
-
seen.add(item);
|
|
224
|
-
expanded.push(item);
|
|
225
|
-
}
|
|
226
|
-
const base = stripArxivVersion(item);
|
|
227
|
-
if (!seen.has(base)) {
|
|
228
|
-
seen.add(base);
|
|
229
|
-
expanded.push(base);
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
return expanded;
|
|
233
|
-
}
|
|
234
|
-
function htmlToPlainText(html) {
|
|
235
|
-
return html
|
|
236
|
-
.replace(/<script[\s\S]*?<\/script>/gi, " ")
|
|
237
|
-
.replace(/<style[\s\S]*?<\/style>/gi, " ")
|
|
238
|
-
.replace(/<noscript[\s\S]*?<\/noscript>/gi, " ")
|
|
239
|
-
.replace(/<svg[\s\S]*?<\/svg>/gi, " ")
|
|
240
|
-
.replace(/<math[\s\S]*?<\/math>/gi, " ")
|
|
241
|
-
.replace(/<\/?(?:p|div|section|article|h\d|li|ul|ol|br|tr|td|th|table|blockquote)[^>]*>/gi, "\n")
|
|
242
|
-
.replace(/<[^>]+>/g, " ")
|
|
243
|
-
.replace(/ /gi, " ")
|
|
244
|
-
.replace(/&/gi, "&")
|
|
245
|
-
.replace(/</gi, "<")
|
|
246
|
-
.replace(/>/gi, ">")
|
|
247
|
-
.replace(/"/gi, "\"")
|
|
248
|
-
.replace(/'/gi, "'")
|
|
249
|
-
.replace(/\r/g, "")
|
|
250
|
-
.replace(/[ \t]+\n/g, "\n")
|
|
251
|
-
.replace(/\n{3,}/g, "\n\n")
|
|
252
|
-
.replace(/[ \t]{2,}/g, " ")
|
|
253
|
-
.trim();
|
|
254
|
-
}
|
|
255
|
-
async function fetchArxivFullTextByHtmlCandidates(arxivIds, timeoutMs) {
|
|
256
|
-
const candidates = [];
|
|
257
|
-
const seen = new Set();
|
|
258
|
-
for (const id of arxivIds) {
|
|
259
|
-
const normalized = normalizeArxivToken(id);
|
|
260
|
-
if (!normalized)
|
|
261
|
-
continue;
|
|
262
|
-
for (const host of ["https://arxiv.org/html", "https://ar5iv.org/html"]) {
|
|
263
|
-
const url = `${host}/${normalized}`;
|
|
264
|
-
if (seen.has(url))
|
|
265
|
-
continue;
|
|
266
|
-
seen.add(url);
|
|
267
|
-
candidates.push({ url, tag: host.includes("ar5iv") ? "ar5iv_html" : "arxiv_html" });
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
const errors = [];
|
|
271
|
-
for (const candidate of candidates) {
|
|
272
|
-
const controller = new AbortController();
|
|
273
|
-
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
274
|
-
try {
|
|
275
|
-
const res = await fetch(candidate.url, {
|
|
276
|
-
signal: controller.signal,
|
|
277
|
-
headers: {
|
|
278
|
-
"User-Agent": "scientify-fulltext-bootstrap/1.0",
|
|
279
|
-
},
|
|
280
|
-
});
|
|
281
|
-
if (!res.ok) {
|
|
282
|
-
errors.push(`${candidate.tag}:http_${res.status}`);
|
|
283
|
-
continue;
|
|
284
|
-
}
|
|
285
|
-
const rawHtml = await res.text();
|
|
286
|
-
const plain = htmlToPlainText(rawHtml);
|
|
287
|
-
if (plain.length < MIN_FULLTEXT_TEXT_CHARS) {
|
|
288
|
-
errors.push(`${candidate.tag}:content_too_short(${plain.length})`);
|
|
289
|
-
continue;
|
|
290
|
-
}
|
|
291
|
-
return {
|
|
292
|
-
ok: true,
|
|
293
|
-
sourceUrl: candidate.url,
|
|
294
|
-
sourceTag: candidate.tag,
|
|
295
|
-
plainText: plain,
|
|
296
|
-
};
|
|
297
|
-
}
|
|
298
|
-
catch (error) {
|
|
299
|
-
errors.push(`${candidate.tag}:${error instanceof Error ? error.name || error.message : "fetch_failed"}`);
|
|
300
|
-
}
|
|
301
|
-
finally {
|
|
302
|
-
clearTimeout(timer);
|
|
303
|
-
}
|
|
304
|
-
}
|
|
305
|
-
return {
|
|
306
|
-
ok: false,
|
|
307
|
-
reason: errors.length > 0 ? errors.join(";") : "html_fulltext_unavailable",
|
|
308
|
-
};
|
|
309
|
-
}
|
|
310
|
-
async function backfillStrictCoreFullText(args) {
|
|
311
|
-
const updated = [];
|
|
312
|
-
let attempted = 0;
|
|
313
|
-
let completed = 0;
|
|
314
|
-
const failures = [];
|
|
315
|
-
for (const paper of args.corePapers) {
|
|
316
|
-
if (paper.fullTextRead === true || paper.readStatus === "fulltext") {
|
|
317
|
-
updated.push(paper);
|
|
318
|
-
continue;
|
|
319
|
-
}
|
|
320
|
-
const arxivIds = parseArxivIdCandidatesFromPaper({
|
|
321
|
-
id: paper.id,
|
|
322
|
-
url: paper.url,
|
|
323
|
-
title: paper.title,
|
|
324
|
-
});
|
|
325
|
-
if (arxivIds.length === 0) {
|
|
326
|
-
updated.push({
|
|
327
|
-
...paper,
|
|
328
|
-
fullTextRead: false,
|
|
329
|
-
readStatus: paper.readStatus ?? "metadata",
|
|
330
|
-
unreadReason: paper.unreadReason ??
|
|
331
|
-
"Automatic full-text bootstrap currently supports arXiv papers with parseable IDs only.",
|
|
332
|
-
});
|
|
333
|
-
continue;
|
|
334
|
-
}
|
|
335
|
-
if (attempted >= args.maxAttempts) {
|
|
336
|
-
updated.push({
|
|
337
|
-
...paper,
|
|
338
|
-
fullTextRead: false,
|
|
339
|
-
readStatus: paper.readStatus ?? "metadata",
|
|
340
|
-
unreadReason: paper.unreadReason ?? "Full-text bootstrap attempt budget reached in this run.",
|
|
341
|
-
});
|
|
342
|
-
continue;
|
|
343
|
-
}
|
|
344
|
-
attempted += 1;
|
|
345
|
-
let fetched = await fetchArxivFullTextByHtmlCandidates(arxivIds, DEFAULT_FULLTEXT_FETCH_TIMEOUT_MS);
|
|
346
|
-
if (!fetched.ok) {
|
|
347
|
-
fetched = await fetchArxivFullTextByHtmlCandidates(arxivIds, RETRY_FULLTEXT_FETCH_TIMEOUT_MS);
|
|
348
|
-
}
|
|
349
|
-
if (!fetched.ok) {
|
|
350
|
-
failures.push(`${arxivIds[0]}:${fetched.reason}`);
|
|
351
|
-
updated.push({
|
|
352
|
-
...paper,
|
|
353
|
-
fullTextRead: false,
|
|
354
|
-
readStatus: paper.readStatus ?? "metadata",
|
|
355
|
-
unreadReason: paper.unreadReason ?? `Automatic full-text fetch failed: ${fetched.reason}`,
|
|
356
|
-
});
|
|
357
|
-
continue;
|
|
358
|
-
}
|
|
359
|
-
completed += 1;
|
|
360
|
-
const excerpt = fetched.plainText.slice(0, 360).replace(/\s+/g, " ").trim();
|
|
361
|
-
updated.push({
|
|
362
|
-
...paper,
|
|
363
|
-
fullTextRead: true,
|
|
364
|
-
readStatus: "fulltext",
|
|
365
|
-
fullTextSource: fetched.sourceTag,
|
|
366
|
-
fullTextRef: fetched.sourceUrl,
|
|
367
|
-
unreadReason: undefined,
|
|
368
|
-
...(paper.keyEvidenceSpans && paper.keyEvidenceSpans.length > 0
|
|
369
|
-
? {}
|
|
370
|
-
: excerpt.length > 0
|
|
371
|
-
? { keyEvidenceSpans: [excerpt] }
|
|
372
|
-
: {}),
|
|
373
|
-
});
|
|
374
|
-
}
|
|
375
|
-
return {
|
|
376
|
-
corePapers: updated,
|
|
377
|
-
attempted,
|
|
378
|
-
completed,
|
|
379
|
-
failures,
|
|
380
|
-
};
|
|
381
|
-
}
|
|
382
174
|
function sanitizeKeyword(raw) {
|
|
383
175
|
const normalized = normalizeText(raw).toLowerCase();
|
|
384
176
|
if (normalized.length < 2 || normalized.length > 48)
|
|
@@ -396,840 +188,6 @@ function tokenizeKeywords(raw) {
|
|
|
396
188
|
}
|
|
397
189
|
return [...seen];
|
|
398
190
|
}
|
|
399
|
-
function inferTopicAliases(tokens) {
|
|
400
|
-
const normalized = tokens
|
|
401
|
-
.map((token) => token.toLowerCase())
|
|
402
|
-
.filter((token) => /^[a-z][a-z0-9_-]*$/.test(token))
|
|
403
|
-
.slice(0, 6);
|
|
404
|
-
if (normalized.length < 3)
|
|
405
|
-
return [];
|
|
406
|
-
const aliases = new Set();
|
|
407
|
-
const [a, b, c] = normalized;
|
|
408
|
-
if (a.length >= 2 && b.length >= 1 && c.length >= 1) {
|
|
409
|
-
aliases.add(`${a.slice(0, 2)}${b[0]}${c[0]}`);
|
|
410
|
-
}
|
|
411
|
-
aliases.add(`${a[0]}${b[0]}${c[0]}`);
|
|
412
|
-
const hasLow = normalized.includes("low");
|
|
413
|
-
const hasRank = normalized.includes("rank");
|
|
414
|
-
const hasAdapt = normalized.some((token) => token.startsWith("adapt"));
|
|
415
|
-
if (hasLow && hasRank && hasAdapt)
|
|
416
|
-
aliases.add("lora");
|
|
417
|
-
return [...aliases].filter((alias) => alias.length >= 3 && alias.length <= 8);
|
|
418
|
-
}
|
|
419
|
-
function buildScoringTokens(topic) {
|
|
420
|
-
const stopwords = new Set([
|
|
421
|
-
"from",
|
|
422
|
-
"with",
|
|
423
|
-
"without",
|
|
424
|
-
"first",
|
|
425
|
-
"basics",
|
|
426
|
-
"basic",
|
|
427
|
-
"foundational",
|
|
428
|
-
"foundation",
|
|
429
|
-
"seminal",
|
|
430
|
-
"classic",
|
|
431
|
-
"avoid",
|
|
432
|
-
"benchmark",
|
|
433
|
-
"only",
|
|
434
|
-
"prefer",
|
|
435
|
-
"authoritative",
|
|
436
|
-
"latest",
|
|
437
|
-
"recent",
|
|
438
|
-
"paper",
|
|
439
|
-
"papers",
|
|
440
|
-
"study",
|
|
441
|
-
"works",
|
|
442
|
-
]);
|
|
443
|
-
const rawTokens = tokenizeKeywords(topic);
|
|
444
|
-
const aliases = inferTopicAliases(rawTokens);
|
|
445
|
-
const base = rawTokens.filter((token) => token.length >= 4 && !stopwords.has(token));
|
|
446
|
-
if (base.length > 0)
|
|
447
|
-
return [...new Set([...base, ...aliases])].slice(0, 10);
|
|
448
|
-
return [...new Set([...rawTokens, ...aliases])].slice(0, 10);
|
|
449
|
-
}
|
|
450
|
-
function buildRetrievalSeedTokens(topic) {
|
|
451
|
-
const directiveWords = new Set([
|
|
452
|
-
"from",
|
|
453
|
-
"with",
|
|
454
|
-
"without",
|
|
455
|
-
"first",
|
|
456
|
-
"basics",
|
|
457
|
-
"basic",
|
|
458
|
-
"foundational",
|
|
459
|
-
"foundation",
|
|
460
|
-
"seminal",
|
|
461
|
-
"classic",
|
|
462
|
-
"avoid",
|
|
463
|
-
"benchmark",
|
|
464
|
-
"only",
|
|
465
|
-
"prefer",
|
|
466
|
-
"authoritative",
|
|
467
|
-
"latest",
|
|
468
|
-
"recent",
|
|
469
|
-
"paper",
|
|
470
|
-
"papers",
|
|
471
|
-
"study",
|
|
472
|
-
"works",
|
|
473
|
-
"strict",
|
|
474
|
-
"fast",
|
|
475
|
-
]);
|
|
476
|
-
const rawTokens = tokenizeKeywords(topic);
|
|
477
|
-
const aliases = inferTopicAliases(rawTokens);
|
|
478
|
-
const tokens = rawTokens
|
|
479
|
-
.map((token) => token.toLowerCase())
|
|
480
|
-
.filter((token) => token.length >= 3 && !directiveWords.has(token));
|
|
481
|
-
return [...new Set([...tokens, ...aliases])].slice(0, 10);
|
|
482
|
-
}
|
|
483
|
-
const FOUNDATIONAL_HINT_RE = /\b(foundational|foundation|seminal|classic|groundwork|original paper|from basics|start from basics|first principles)\b|\u57fa\u7840|\u5950\u57fa|\u7ecf\u5178|\u539f\u59cb/u;
|
|
484
|
-
const AVOID_BENCHMARK_HINT_RE = /\b(avoid benchmark|benchmark-only|no benchmark|less benchmark|not benchmark only)\b|\u5c11\u63a8.*benchmark|\u4e0d\u8981.*benchmark/u;
|
|
485
|
-
const SURVEY_HINT_RE = /\b(survey|review|taxonomy|overview|tutorial)\b|\u7efc\u8ff0|\u8bc4\u8ff0/u;
|
|
486
|
-
const AUTHORITY_HINT_RE = /\b(authoritative|high impact|top-tier|highly cited|landmark|canonical)\b|\u6743\u5a01|\u9ad8\u5f15\u7528/u;
|
|
487
|
-
const RECENT_HINT_RE = /\b(latest|recent|state[- ]of[- ]the[- ]art|newest)\b|\u6700\u65b0|\u8fd1\u671f/u;
|
|
488
|
-
const BENCHMARK_WORD_RE = /\b(benchmark|leaderboard|dataset|evaluation)\b/i;
|
|
489
|
-
const METHOD_WORD_RE = /\b(method|approach|adaptation|training|fine[- ]?tuning|optimization|algorithm|framework|model)\b/i;
|
|
490
|
-
const SURVEY_WORD_RE = /\b(survey|review|taxonomy|overview|tutorial)\b/i;
|
|
491
|
-
function decodeXmlEntities(raw) {
|
|
492
|
-
return raw
|
|
493
|
-
.replace(/</g, "<")
|
|
494
|
-
.replace(/>/g, ">")
|
|
495
|
-
.replace(/&/g, "&")
|
|
496
|
-
.replace(/"/g, "\"")
|
|
497
|
-
.replace(/'/g, "'");
|
|
498
|
-
}
|
|
499
|
-
function stripXmlTag(raw, tag) {
|
|
500
|
-
const match = raw.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`, "i"));
|
|
501
|
-
if (!match?.[1])
|
|
502
|
-
return "";
|
|
503
|
-
return normalizeText(decodeXmlEntities(match[1].replace(/<[^>]+>/g, " ").trim()));
|
|
504
|
-
}
|
|
505
|
-
function parseArxivAtomCandidates(xml) {
|
|
506
|
-
const entries = xml.match(/<entry>([\s\S]*?)<\/entry>/gi) ?? [];
|
|
507
|
-
const parsed = [];
|
|
508
|
-
for (const entryRaw of entries) {
|
|
509
|
-
const title = stripXmlTag(entryRaw, "title");
|
|
510
|
-
const summary = stripXmlTag(entryRaw, "summary");
|
|
511
|
-
const idUrl = stripXmlTag(entryRaw, "id");
|
|
512
|
-
const published = stripXmlTag(entryRaw, "published");
|
|
513
|
-
const arxivCandidates = parseArxivIdCandidatesFromPaper({ id: idUrl, url: idUrl, title });
|
|
514
|
-
const arxivId = arxivCandidates[0];
|
|
515
|
-
if (!title || !arxivId)
|
|
516
|
-
continue;
|
|
517
|
-
parsed.push({
|
|
518
|
-
id: `arxiv:${stripArxivVersion(arxivId)}`,
|
|
519
|
-
title,
|
|
520
|
-
summary,
|
|
521
|
-
url: `https://arxiv.org/abs/${stripArxivVersion(arxivId)}`,
|
|
522
|
-
...(published ? { published } : {}),
|
|
523
|
-
});
|
|
524
|
-
}
|
|
525
|
-
return parsed;
|
|
526
|
-
}
|
|
527
|
-
function dedupeQueries(queries, limit) {
|
|
528
|
-
const seen = new Set();
|
|
529
|
-
const deduped = [];
|
|
530
|
-
for (const query of queries) {
|
|
531
|
-
const key = normalizeText(query).toLowerCase();
|
|
532
|
-
if (!key || seen.has(key))
|
|
533
|
-
continue;
|
|
534
|
-
seen.add(key);
|
|
535
|
-
deduped.push(query);
|
|
536
|
-
if (deduped.length >= limit)
|
|
537
|
-
break;
|
|
538
|
-
}
|
|
539
|
-
return deduped;
|
|
540
|
-
}
|
|
541
|
-
function buildStrictFallbackQueries(topic) {
|
|
542
|
-
const seedTokens = buildRetrievalSeedTokens(topic);
|
|
543
|
-
const normalizedTopic = seedTokens.length > 0 ? seedTokens.join(" ") : normalizeText(topic);
|
|
544
|
-
const tokens = seedTokens.length > 0 ? seedTokens : tokenizeKeywords(normalizedTopic).filter((token) => token.length >= 3).slice(0, 10);
|
|
545
|
-
const queries = [normalizedTopic];
|
|
546
|
-
if (tokens.length >= 2)
|
|
547
|
-
queries.push(tokens.slice(0, 4).join(" "));
|
|
548
|
-
if (tokens.length >= 3)
|
|
549
|
-
queries.push(tokens.slice(0, 3).join(" "));
|
|
550
|
-
return dedupeQueries(queries, STRICT_EMPTY_FALLBACK_MAX_QUERIES);
|
|
551
|
-
}
|
|
552
|
-
function buildTieredFallbackQueries(topic) {
|
|
553
|
-
const seedTokens = buildRetrievalSeedTokens(topic);
|
|
554
|
-
const normalizedTopic = seedTokens.length > 0 ? seedTokens.join(" ") : normalizeText(topic);
|
|
555
|
-
const tokens = seedTokens.length > 0 ? seedTokens : tokenizeKeywords(normalizedTopic).filter((token) => token.length >= 3).slice(0, 10);
|
|
556
|
-
const tierA = buildStrictFallbackQueries(topic);
|
|
557
|
-
const tierB = dedupeQueries([
|
|
558
|
-
...tokens.slice(0, 6).map((token) => `${token} adaptation`),
|
|
559
|
-
...tokens.slice(0, 6).map((token) => `${token} method`),
|
|
560
|
-
...tokens.slice(0, 4).map((token) => `${token} framework`),
|
|
561
|
-
tokens.slice(0, 4).join(" "),
|
|
562
|
-
], STRICT_EMPTY_FALLBACK_MAX_QUERIES);
|
|
563
|
-
const tierC = dedupeQueries([
|
|
564
|
-
...tokens.slice(0, 5).map((token) => `${token} transfer learning`),
|
|
565
|
-
...tokens.slice(0, 5).map((token) => `${token} survey review`),
|
|
566
|
-
...tokens.slice(0, 5).map((token) => `${token} criticism limitations`),
|
|
567
|
-
...tokens.slice(0, 5).map((token) => `${token} failure analysis`),
|
|
568
|
-
`${normalizedTopic} cross domain`,
|
|
569
|
-
], STRICT_EMPTY_FALLBACK_MAX_QUERIES);
|
|
570
|
-
return {
|
|
571
|
-
tierA: tierA.length > 0 ? tierA : [normalizedTopic],
|
|
572
|
-
tierB,
|
|
573
|
-
tierC,
|
|
574
|
-
};
|
|
575
|
-
}
|
|
576
|
-
function inferRequirementProfile(raw) {
|
|
577
|
-
const text = normalizeText(raw);
|
|
578
|
-
return {
|
|
579
|
-
foundationalFirst: FOUNDATIONAL_HINT_RE.test(text),
|
|
580
|
-
avoidBenchmarkOnly: AVOID_BENCHMARK_HINT_RE.test(text),
|
|
581
|
-
preferSurvey: SURVEY_HINT_RE.test(text),
|
|
582
|
-
preferAuthority: AUTHORITY_HINT_RE.test(text),
|
|
583
|
-
preferRecent: RECENT_HINT_RE.test(text),
|
|
584
|
-
};
|
|
585
|
-
}
|
|
586
|
-
function clampRecallTierRatios(input) {
|
|
587
|
-
const a = Number.isFinite(input.tierA) ? Math.max(0, input.tierA) : 0;
|
|
588
|
-
const b = Number.isFinite(input.tierB) ? Math.max(0, input.tierB) : 0;
|
|
589
|
-
const c = Number.isFinite(input.tierC) ? Math.max(0, input.tierC) : 0;
|
|
590
|
-
const sum = a + b + c;
|
|
591
|
-
if (sum <= 0) {
|
|
592
|
-
return { tierA: TIER_A_RATIO, tierB: TIER_B_RATIO, tierC: TIER_C_RATIO };
|
|
593
|
-
}
|
|
594
|
-
return {
|
|
595
|
-
tierA: a / sum,
|
|
596
|
-
tierB: b / sum,
|
|
597
|
-
tierC: c / sum,
|
|
598
|
-
};
|
|
599
|
-
}
|
|
600
|
-
function pickAdaptiveResearchStage(args) {
|
|
601
|
-
if (args.emptyRunStreak >= 2)
|
|
602
|
-
return "pivot";
|
|
603
|
-
if (args.hasSubmittedHypothesis)
|
|
604
|
-
return "hypothesis_validation";
|
|
605
|
-
if (args.totalRuns <= 2 || args.knownPaperCount < 6)
|
|
606
|
-
return "bootstrap";
|
|
607
|
-
return "expansion";
|
|
608
|
-
}
|
|
609
|
-
function deriveAdaptiveRequirementProfile(args) {
|
|
610
|
-
if (args.stage === "bootstrap") {
|
|
611
|
-
return {
|
|
612
|
-
...args.base,
|
|
613
|
-
foundationalFirst: true,
|
|
614
|
-
preferSurvey: true,
|
|
615
|
-
preferAuthority: true,
|
|
616
|
-
};
|
|
617
|
-
}
|
|
618
|
-
if (args.stage === "hypothesis_validation") {
|
|
619
|
-
return {
|
|
620
|
-
...args.base,
|
|
621
|
-
avoidBenchmarkOnly: true,
|
|
622
|
-
preferAuthority: true,
|
|
623
|
-
preferRecent: true,
|
|
624
|
-
};
|
|
625
|
-
}
|
|
626
|
-
if (args.stage === "pivot") {
|
|
627
|
-
return {
|
|
628
|
-
...args.base,
|
|
629
|
-
avoidBenchmarkOnly: true,
|
|
630
|
-
preferAuthority: true,
|
|
631
|
-
preferSurvey: false,
|
|
632
|
-
foundationalFirst: false,
|
|
633
|
-
preferRecent: true,
|
|
634
|
-
};
|
|
635
|
-
}
|
|
636
|
-
return args.base;
|
|
637
|
-
}
|
|
638
|
-
function deriveAdaptiveRecallTierRatios(stage) {
|
|
639
|
-
if (stage === "bootstrap") {
|
|
640
|
-
return clampRecallTierRatios({ tierA: 0.45, tierB: 0.4, tierC: 0.15 });
|
|
641
|
-
}
|
|
642
|
-
if (stage === "hypothesis_validation") {
|
|
643
|
-
return clampRecallTierRatios({ tierA: 0.4, tierB: 0.3, tierC: 0.3 });
|
|
644
|
-
}
|
|
645
|
-
if (stage === "pivot") {
|
|
646
|
-
return clampRecallTierRatios({ tierA: 0.3, tierB: 0.3, tierC: 0.4 });
|
|
647
|
-
}
|
|
648
|
-
return clampRecallTierRatios({ tierA: TIER_A_RATIO, tierB: TIER_B_RATIO, tierC: TIER_C_RATIO });
|
|
649
|
-
}
|
|
650
|
-
function buildEmptyCycleAutoDiagnosis(args) {
|
|
651
|
-
const baseTopic = normalizeText(args.topic);
|
|
652
|
-
const nextQueries = [
|
|
653
|
-
`${baseTopic} survey review foundational`,
|
|
654
|
-
`${baseTopic} method theory variant`,
|
|
655
|
-
args.stage === "hypothesis_validation" || args.stage === "pivot"
|
|
656
|
-
? `${baseTopic} criticism failure limitations related work`
|
|
657
|
-
: `${baseTopic} adjacent transfer domain adaptation`,
|
|
658
|
-
].map((item) => normalizeText(item));
|
|
659
|
-
const isLikelySaturation = args.knownPaperCount >= 8 && args.totalRuns >= 3;
|
|
660
|
-
const diagnosis = args.emptyRunStreak >= 2
|
|
661
|
-
? "repeated_empty_cycle_likely_scope_saturation_or_query_mismatch"
|
|
662
|
-
: isLikelySaturation
|
|
663
|
-
? "likely_scope_saturation_check_adjacent_subdomains"
|
|
664
|
-
: "likely_query_or_recency_mismatch_expand_terms_and_time_window";
|
|
665
|
-
const pivotHint = args.stage === "pivot" || args.emptyRunStreak >= 2
|
|
666
|
-
? "consider a nearby sub-direction that serves current idea validation (supporting and critical evidence)."
|
|
667
|
-
: "continue current direction with broadened query and include foundational plus adjacent variants.";
|
|
668
|
-
return {
|
|
669
|
-
diagnosis,
|
|
670
|
-
nextQueries,
|
|
671
|
-
pivotHint,
|
|
672
|
-
};
|
|
673
|
-
}
|
|
674
|
-
function inferCandidateYear(paper) {
|
|
675
|
-
if (paper.published) {
|
|
676
|
-
const ts = Date.parse(paper.published);
|
|
677
|
-
if (Number.isFinite(ts))
|
|
678
|
-
return new Date(ts).getUTCFullYear();
|
|
679
|
-
}
|
|
680
|
-
const modern = paper.id.match(/:(\d{2})(\d{2})\./);
|
|
681
|
-
if (modern?.[1]) {
|
|
682
|
-
const yy = Number.parseInt(modern[1], 10);
|
|
683
|
-
if (Number.isFinite(yy))
|
|
684
|
-
return 2000 + yy;
|
|
685
|
-
}
|
|
686
|
-
return undefined;
|
|
687
|
-
}
|
|
688
|
-
function isBenchmarkOnlyPaper(paper) {
|
|
689
|
-
const text = `${paper.title} ${paper.summary ?? ""}`;
|
|
690
|
-
return BENCHMARK_WORD_RE.test(text) && !METHOD_WORD_RE.test(text);
|
|
691
|
-
}
|
|
692
|
-
function isSurveyPaper(paper) {
|
|
693
|
-
const text = `${paper.title} ${paper.summary ?? ""}`;
|
|
694
|
-
return SURVEY_WORD_RE.test(text);
|
|
695
|
-
}
|
|
696
|
-
function isFoundationalPaper(args) {
|
|
697
|
-
const year = args.year;
|
|
698
|
-
const nowYear = new Date().getUTCFullYear();
|
|
699
|
-
const oldEnough = typeof year === "number" ? year <= nowYear - 2 : false;
|
|
700
|
-
const title = normalizeText(args.paper.title).toLowerCase();
|
|
701
|
-
const tokenHit = args.topicTokens.some((token) => token.length >= 4 && title.includes(token));
|
|
702
|
-
return oldEnough || tokenHit;
|
|
703
|
-
}
|
|
704
|
-
function countTokenOverlap(tokens, text) {
|
|
705
|
-
const hay = ` ${normalizeText(text)
|
|
706
|
-
.toLowerCase()
|
|
707
|
-
.replace(/[_-]+/g, " ")
|
|
708
|
-
.replace(/[^\p{L}\p{N}\s]+/gu, " ")
|
|
709
|
-
.replace(/\s+/g, " ")} `;
|
|
710
|
-
let score = 0;
|
|
711
|
-
for (const token of tokens) {
|
|
712
|
-
if (token.length < 2)
|
|
713
|
-
continue;
|
|
714
|
-
const normalizedToken = token
|
|
715
|
-
.toLowerCase()
|
|
716
|
-
.replace(/[_-]+/g, " ")
|
|
717
|
-
.replace(/[^\p{L}\p{N}\s]+/gu, " ")
|
|
718
|
-
.trim();
|
|
719
|
-
if (!normalizedToken)
|
|
720
|
-
continue;
|
|
721
|
-
if (hay.includes(` ${normalizedToken} `))
|
|
722
|
-
score += 1;
|
|
723
|
-
}
|
|
724
|
-
return score;
|
|
725
|
-
}
|
|
726
|
-
function scoreFallbackCandidate(topicTokens, paper, tier, requirements) {
|
|
727
|
-
const titleOverlap = countTokenOverlap(topicTokens, paper.title);
|
|
728
|
-
const abstractOverlap = countTokenOverlap(topicTokens, paper.summary ?? "");
|
|
729
|
-
const publishedAt = paper.published ? Date.parse(paper.published) : NaN;
|
|
730
|
-
const recencyBoost = Number.isFinite(publishedAt)
|
|
731
|
-
? Math.max(0, Math.min(8, (Date.now() - publishedAt) / (1000 * 60 * 60 * 24 * -180)))
|
|
732
|
-
: 0;
|
|
733
|
-
const tierBoost = tier === "tierA" ? 8 : tier === "tierB" ? 4 : 1;
|
|
734
|
-
const year = inferCandidateYear(paper);
|
|
735
|
-
const isBenchmarkOnly = isBenchmarkOnlyPaper(paper);
|
|
736
|
-
const isSurvey = isSurveyPaper(paper);
|
|
737
|
-
const isFoundational = isFoundationalPaper({ paper, year, topicTokens });
|
|
738
|
-
const nowYear = new Date().getUTCFullYear();
|
|
739
|
-
const recencyPenalty = typeof year === "number" && year >= nowYear ? 4 : 0;
|
|
740
|
-
let rawScore = 60 + tierBoost + titleOverlap * 8 + abstractOverlap * 3 + recencyBoost - recencyPenalty;
|
|
741
|
-
if (requirements.foundationalFirst) {
|
|
742
|
-
rawScore += isFoundational ? 10 : -4;
|
|
743
|
-
}
|
|
744
|
-
if (requirements.preferSurvey) {
|
|
745
|
-
rawScore += isSurvey ? 8 : 0;
|
|
746
|
-
}
|
|
747
|
-
if (requirements.preferAuthority) {
|
|
748
|
-
rawScore += isSurvey ? 3 : 0;
|
|
749
|
-
if (isFoundational)
|
|
750
|
-
rawScore += 2;
|
|
751
|
-
}
|
|
752
|
-
if (requirements.preferRecent && typeof year === "number" && year >= nowYear - 1) {
|
|
753
|
-
rawScore += 4;
|
|
754
|
-
}
|
|
755
|
-
if (requirements.avoidBenchmarkOnly && isBenchmarkOnly) {
|
|
756
|
-
rawScore -= 15;
|
|
757
|
-
}
|
|
758
|
-
return Math.max(50, Math.min(99, Math.round(rawScore)));
|
|
759
|
-
}
|
|
760
|
-
async function fetchArxivFallbackByQuery(query) {
|
|
761
|
-
const params = new URLSearchParams({
|
|
762
|
-
search_query: query,
|
|
763
|
-
start: "0",
|
|
764
|
-
max_results: String(STRICT_EMPTY_FALLBACK_MAX_RESULTS),
|
|
765
|
-
sortBy: "relevance",
|
|
766
|
-
sortOrder: "descending",
|
|
767
|
-
});
|
|
768
|
-
const controller = new AbortController();
|
|
769
|
-
const timer = setTimeout(() => controller.abort(), 15_000);
|
|
770
|
-
try {
|
|
771
|
-
const res = await fetch(`${ARXIV_API_URL}?${params.toString()}`, {
|
|
772
|
-
signal: controller.signal,
|
|
773
|
-
headers: {
|
|
774
|
-
"User-Agent": "scientify-empty-fallback/1.0",
|
|
775
|
-
},
|
|
776
|
-
});
|
|
777
|
-
if (!res.ok)
|
|
778
|
-
return [];
|
|
779
|
-
const xml = await res.text();
|
|
780
|
-
return parseArxivAtomCandidates(xml);
|
|
781
|
-
}
|
|
782
|
-
catch {
|
|
783
|
-
return [];
|
|
784
|
-
}
|
|
785
|
-
finally {
|
|
786
|
-
clearTimeout(timer);
|
|
787
|
-
}
|
|
788
|
-
}
|
|
789
|
-
async function strictCoreFallbackSeed(args) {
|
|
790
|
-
const tieredQueries = buildTieredFallbackQueries(args.topic);
|
|
791
|
-
const byId = new Map();
|
|
792
|
-
const traces = [];
|
|
793
|
-
const tierStats = {
|
|
794
|
-
tierA: { candidates: 0, selected: 0 },
|
|
795
|
-
tierB: { candidates: 0, selected: 0 },
|
|
796
|
-
tierC: { candidates: 0, selected: 0 },
|
|
797
|
-
};
|
|
798
|
-
for (const tier of ["tierA", "tierB", "tierC"]) {
|
|
799
|
-
for (const query of tieredQueries[tier]) {
|
|
800
|
-
const rows = await fetchArxivFallbackByQuery(query);
|
|
801
|
-
tierStats[tier].candidates += rows.length;
|
|
802
|
-
traces.push({
|
|
803
|
-
query,
|
|
804
|
-
reason: `strict_core_backfill_seed_${tier}`,
|
|
805
|
-
source: "arxiv",
|
|
806
|
-
candidates: rows.length,
|
|
807
|
-
filteredTo: rows.length,
|
|
808
|
-
resultCount: rows.length,
|
|
809
|
-
});
|
|
810
|
-
for (const row of rows) {
|
|
811
|
-
if (!byId.has(row.id))
|
|
812
|
-
byId.set(row.id, { row, tier });
|
|
813
|
-
}
|
|
814
|
-
}
|
|
815
|
-
}
|
|
816
|
-
const topicTokens = tokenizeKeywords(args.topic);
|
|
817
|
-
const scoringTokens = buildScoringTokens(args.topic);
|
|
818
|
-
const ranked = [...byId.values()]
|
|
819
|
-
.map(({ row, tier }) => {
|
|
820
|
-
const year = inferCandidateYear(row);
|
|
821
|
-
const isSurvey = isSurveyPaper(row);
|
|
822
|
-
const isBenchmarkOnly = isBenchmarkOnlyPaper(row);
|
|
823
|
-
const isFoundational = isFoundationalPaper({ paper: row, year, topicTokens });
|
|
824
|
-
const relevance = countTokenOverlap(scoringTokens, `${row.title} ${row.summary ?? ""}`);
|
|
825
|
-
return {
|
|
826
|
-
row,
|
|
827
|
-
tier,
|
|
828
|
-
year,
|
|
829
|
-
isSurvey,
|
|
830
|
-
isBenchmarkOnly,
|
|
831
|
-
isFoundational,
|
|
832
|
-
relevance,
|
|
833
|
-
score: scoreFallbackCandidate(scoringTokens.length > 0 ? scoringTokens : topicTokens, row, tier, args.requirements),
|
|
834
|
-
};
|
|
835
|
-
})
|
|
836
|
-
.sort((a, b) => b.score - a.score);
|
|
837
|
-
const unseen = ranked.filter((item) => !args.knownPaperIds.has(item.row.id));
|
|
838
|
-
const poolBeforeRelevance = unseen.length > 0 ? unseen : ranked;
|
|
839
|
-
const minRelevance = scoringTokens.length >= 2 ? 2 : 1;
|
|
840
|
-
const candidatePool = Math.max(1, Math.min(40, Math.floor(args.candidatePool ?? Math.max(DEFAULT_STRICT_CANDIDATE_POOL, args.maxPapers * 4))));
|
|
841
|
-
const minCoreFloor = Math.max(1, Math.min(args.maxPapers, args.minCoreFloor ?? DEFAULT_STRICT_MIN_CORE_FLOOR));
|
|
842
|
-
const tierRatios = clampRecallTierRatios(args.tierRatios ?? { tierA: TIER_A_RATIO, tierB: TIER_B_RATIO, tierC: TIER_C_RATIO });
|
|
843
|
-
const effectivePoolByRelevance = poolBeforeRelevance.filter((item) => item.relevance >= minRelevance);
|
|
844
|
-
const focusTokens = scoringTokens.filter((token) => token.length >= 5);
|
|
845
|
-
const weakRelevanceWithFocusPool = poolBeforeRelevance.filter((item) => {
|
|
846
|
-
if (item.relevance < 1)
|
|
847
|
-
return false;
|
|
848
|
-
if (focusTokens.length === 0)
|
|
849
|
-
return true;
|
|
850
|
-
const focusHit = countTokenOverlap(focusTokens, `${item.row.title} ${item.row.summary ?? ""}`);
|
|
851
|
-
return focusHit >= 1;
|
|
852
|
-
});
|
|
853
|
-
const weakRelevancePool = weakRelevanceWithFocusPool.length > 0
|
|
854
|
-
? weakRelevanceWithFocusPool
|
|
855
|
-
: poolBeforeRelevance.filter((item) => item.relevance >= 1);
|
|
856
|
-
const effectivePool = effectivePoolByRelevance.length >= minCoreFloor
|
|
857
|
-
? effectivePoolByRelevance
|
|
858
|
-
: weakRelevancePool.length > 0
|
|
859
|
-
? weakRelevancePool
|
|
860
|
-
: poolBeforeRelevance;
|
|
861
|
-
const targetCount = Math.max(minCoreFloor, Math.min(args.maxPapers, candidatePool));
|
|
862
|
-
const tierTargets = {
|
|
863
|
-
tierA: Math.max(1, Math.round(targetCount * tierRatios.tierA)),
|
|
864
|
-
tierB: Math.max(1, Math.round(targetCount * tierRatios.tierB)),
|
|
865
|
-
tierC: Math.max(0, targetCount - Math.round(targetCount * tierRatios.tierA) - Math.round(targetCount * tierRatios.tierB)),
|
|
866
|
-
};
|
|
867
|
-
if (tierTargets.tierA + tierTargets.tierB + tierTargets.tierC < targetCount) {
|
|
868
|
-
tierTargets.tierA += targetCount - (tierTargets.tierA + tierTargets.tierB + tierTargets.tierC);
|
|
869
|
-
}
|
|
870
|
-
const selected = [];
|
|
871
|
-
const selectedIds = new Set();
|
|
872
|
-
for (const tier of ["tierA", "tierB", "tierC"]) {
|
|
873
|
-
const picked = effectivePool
|
|
874
|
-
.filter((item) => item.tier === tier && !selectedIds.has(item.row.id))
|
|
875
|
-
.slice(0, tierTargets[tier]);
|
|
876
|
-
for (const item of picked) {
|
|
877
|
-
selected.push(item);
|
|
878
|
-
selectedIds.add(item.row.id);
|
|
879
|
-
tierStats[tier].selected += 1;
|
|
880
|
-
}
|
|
881
|
-
}
|
|
882
|
-
if (selected.length < targetCount) {
|
|
883
|
-
const fill = effectivePool.filter((item) => !selectedIds.has(item.row.id)).slice(0, targetCount - selected.length);
|
|
884
|
-
for (const item of fill) {
|
|
885
|
-
selected.push(item);
|
|
886
|
-
selectedIds.add(item.row.id);
|
|
887
|
-
tierStats[item.tier].selected += 1;
|
|
888
|
-
}
|
|
889
|
-
}
|
|
890
|
-
const ensureAtLeast = (predicate, need) => {
|
|
891
|
-
while (selected.filter(predicate).length < need) {
|
|
892
|
-
const candidate = effectivePool.find((item) => !selectedIds.has(item.row.id) && predicate(item));
|
|
893
|
-
if (!candidate)
|
|
894
|
-
break;
|
|
895
|
-
const replaceIndex = selected.findIndex((item) => !predicate(item));
|
|
896
|
-
if (replaceIndex < 0)
|
|
897
|
-
break;
|
|
898
|
-
selectedIds.delete(selected[replaceIndex].row.id);
|
|
899
|
-
selected[replaceIndex] = candidate;
|
|
900
|
-
selectedIds.add(candidate.row.id);
|
|
901
|
-
}
|
|
902
|
-
};
|
|
903
|
-
if (args.requirements.foundationalFirst) {
|
|
904
|
-
ensureAtLeast((item) => item.isFoundational, Math.min(2, targetCount));
|
|
905
|
-
}
|
|
906
|
-
if (args.requirements.preferSurvey) {
|
|
907
|
-
ensureAtLeast((item) => item.isSurvey, 1);
|
|
908
|
-
}
|
|
909
|
-
if (args.requirements.avoidBenchmarkOnly) {
|
|
910
|
-
for (let i = 0; i < selected.length; i += 1) {
|
|
911
|
-
if (!selected[i].isBenchmarkOnly)
|
|
912
|
-
continue;
|
|
913
|
-
const replacement = effectivePool.find((item) => !selectedIds.has(item.row.id) && !item.isBenchmarkOnly);
|
|
914
|
-
if (!replacement)
|
|
915
|
-
break;
|
|
916
|
-
selectedIds.delete(selected[i].row.id);
|
|
917
|
-
selected[i] = replacement;
|
|
918
|
-
selectedIds.add(replacement.row.id);
|
|
919
|
-
}
|
|
920
|
-
}
|
|
921
|
-
tierStats.tierA.selected = selected.filter((item) => item.tier === "tierA").length;
|
|
922
|
-
tierStats.tierB.selected = selected.filter((item) => item.tier === "tierB").length;
|
|
923
|
-
tierStats.tierC.selected = selected.filter((item) => item.tier === "tierC").length;
|
|
924
|
-
const papers = selected.map(({ row, score }) => ({
|
|
925
|
-
id: row.id,
|
|
926
|
-
title: row.title,
|
|
927
|
-
url: row.url,
|
|
928
|
-
score,
|
|
929
|
-
reason: "auto_seeded_fallback_after_sparse_core_strict_run",
|
|
930
|
-
}));
|
|
931
|
-
const corePapers = selected.map(({ row, score }) => ({
|
|
932
|
-
id: row.id,
|
|
933
|
-
title: row.title,
|
|
934
|
-
url: row.url,
|
|
935
|
-
source: "arxiv",
|
|
936
|
-
...(row.published ? { publishedAt: row.published } : {}),
|
|
937
|
-
score,
|
|
938
|
-
reason: "auto_seeded_fallback_after_sparse_core_strict_run",
|
|
939
|
-
...(row.summary ? { summary: row.summary } : {}),
|
|
940
|
-
fullTextRead: false,
|
|
941
|
-
readStatus: "metadata",
|
|
942
|
-
unreadReason: "Auto-seeded fallback candidate; full-text bootstrap pending.",
|
|
943
|
-
}));
|
|
944
|
-
return {
|
|
945
|
-
papers,
|
|
946
|
-
corePapers,
|
|
947
|
-
explorationTrace: traces,
|
|
948
|
-
notes: `strict_core_backfill_seed selected=${selected.length} pool=${candidatePool} floor=${minCoreFloor} relevance_floor=${minRelevance} req_foundational=${args.requirements.foundationalFirst} req_avoid_benchmark=${args.requirements.avoidBenchmarkOnly} req_survey=${args.requirements.preferSurvey}`,
|
|
949
|
-
recallTierStats: tierStats,
|
|
950
|
-
};
|
|
951
|
-
}
|
|
952
|
-
function isPaperFullTextRead(paper) {
|
|
953
|
-
return paper.fullTextRead === true || paper.readStatus === "fulltext";
|
|
954
|
-
}
|
|
955
|
-
function hasStrictEvidenceAnchor(paper) {
|
|
956
|
-
const anchors = paper.evidenceAnchors ?? [];
|
|
957
|
-
return anchors.some((anchor) => Boolean(anchor?.section?.trim()) &&
|
|
958
|
-
Boolean(anchor?.locator?.trim()) &&
|
|
959
|
-
Boolean(anchor?.quote?.trim()));
|
|
960
|
-
}
|
|
961
|
-
function firstNonEmptyText(values) {
|
|
962
|
-
for (const value of values) {
|
|
963
|
-
if (typeof value !== "string")
|
|
964
|
-
continue;
|
|
965
|
-
const normalized = normalizeText(value);
|
|
966
|
-
if (normalized.length > 0)
|
|
967
|
-
return normalized;
|
|
968
|
-
}
|
|
969
|
-
return undefined;
|
|
970
|
-
}
|
|
971
|
-
function toEvidencePaperId(paper) {
|
|
972
|
-
return derivePaperId({ id: paper.id, title: paper.title, url: paper.url });
|
|
973
|
-
}
|
|
974
|
-
function dedupeEvidenceIds(ids) {
|
|
975
|
-
const seen = new Set();
|
|
976
|
-
const out = [];
|
|
977
|
-
for (const id of ids) {
|
|
978
|
-
const normalized = normalizeText(id);
|
|
979
|
-
if (!normalized)
|
|
980
|
-
continue;
|
|
981
|
-
const key = normalized.toLowerCase();
|
|
982
|
-
if (seen.has(key))
|
|
983
|
-
continue;
|
|
984
|
-
seen.add(key);
|
|
985
|
-
out.push(normalized);
|
|
986
|
-
}
|
|
987
|
-
return out;
|
|
988
|
-
}
|
|
989
|
-
function applyLightweightEvidenceBinding(args) {
|
|
990
|
-
if (!args.knowledgeState) {
|
|
991
|
-
return { knowledgeState: args.knowledgeState, anchorsAdded: 0, evidenceIdsFilled: 0 };
|
|
992
|
-
}
|
|
993
|
-
const corePapers = args.knowledgeState.corePapers ?? [];
|
|
994
|
-
if (corePapers.length === 0) {
|
|
995
|
-
return { knowledgeState: args.knowledgeState, anchorsAdded: 0, evidenceIdsFilled: 0 };
|
|
996
|
-
}
|
|
997
|
-
let anchorsAdded = 0;
|
|
998
|
-
const nextCore = corePapers.map((paper) => {
|
|
999
|
-
if (!isPaperFullTextRead(paper))
|
|
1000
|
-
return paper;
|
|
1001
|
-
if (hasStrictEvidenceAnchor(paper))
|
|
1002
|
-
return paper;
|
|
1003
|
-
const quote = firstNonEmptyText([
|
|
1004
|
-
paper.keyEvidenceSpans?.[0],
|
|
1005
|
-
paper.summary,
|
|
1006
|
-
paper.reason,
|
|
1007
|
-
paper.title,
|
|
1008
|
-
]);
|
|
1009
|
-
if (!quote)
|
|
1010
|
-
return paper;
|
|
1011
|
-
const nextQuote = quote.slice(0, 260);
|
|
1012
|
-
anchorsAdded += 1;
|
|
1013
|
-
return {
|
|
1014
|
-
...paper,
|
|
1015
|
-
evidenceAnchors: [
|
|
1016
|
-
...(paper.evidenceAnchors ?? []),
|
|
1017
|
-
{
|
|
1018
|
-
section: "AutoExtract",
|
|
1019
|
-
locator: paper.fullTextRef?.trim() || "excerpt:1",
|
|
1020
|
-
claim: firstNonEmptyText([paper.researchGoal, paper.reason, paper.title, "auto-bound claim"]) ?? "auto-bound claim",
|
|
1021
|
-
quote: nextQuote,
|
|
1022
|
-
},
|
|
1023
|
-
],
|
|
1024
|
-
};
|
|
1025
|
-
});
|
|
1026
|
-
const fallbackEvidenceIds = dedupeEvidenceIds(nextCore.filter((paper) => isPaperFullTextRead(paper)).map((paper) => toEvidencePaperId(paper)).slice(0, 2));
|
|
1027
|
-
let evidenceIdsFilled = 0;
|
|
1028
|
-
const patchEvidenceIds = (raw, allowAuto = true) => {
|
|
1029
|
-
const existing = dedupeEvidenceIds(raw ?? []);
|
|
1030
|
-
if (existing.length > 0)
|
|
1031
|
-
return existing;
|
|
1032
|
-
if (!allowAuto || fallbackEvidenceIds.length === 0)
|
|
1033
|
-
return undefined;
|
|
1034
|
-
evidenceIdsFilled += 1;
|
|
1035
|
-
return [...fallbackEvidenceIds];
|
|
1036
|
-
};
|
|
1037
|
-
const nextKnowledgeChanges = (args.knowledgeState.knowledgeChanges ?? []).map((change) => ({
|
|
1038
|
-
...change,
|
|
1039
|
-
...(change.type === "BRIDGE"
|
|
1040
|
-
? { evidenceIds: patchEvidenceIds(change.evidenceIds, false) }
|
|
1041
|
-
: { evidenceIds: patchEvidenceIds(change.evidenceIds, true) }),
|
|
1042
|
-
}));
|
|
1043
|
-
const nextKnowledgeUpdates = (args.knowledgeState.knowledgeUpdates ?? []).map((update) => ({
|
|
1044
|
-
...update,
|
|
1045
|
-
evidenceIds: patchEvidenceIds(update.evidenceIds, true),
|
|
1046
|
-
}));
|
|
1047
|
-
const nextHypotheses = (args.knowledgeState.hypotheses ?? []).map((hypothesis) => ({
|
|
1048
|
-
...hypothesis,
|
|
1049
|
-
evidenceIds: patchEvidenceIds(hypothesis.evidenceIds, true),
|
|
1050
|
-
}));
|
|
1051
|
-
if (anchorsAdded === 0 && evidenceIdsFilled === 0) {
|
|
1052
|
-
return { knowledgeState: args.knowledgeState, anchorsAdded: 0, evidenceIdsFilled: 0 };
|
|
1053
|
-
}
|
|
1054
|
-
const existingRunLog = args.knowledgeState.runLog;
|
|
1055
|
-
const runLog = existingRunLog || args.runProfile
|
|
1056
|
-
? {
|
|
1057
|
-
...(existingRunLog ?? {}),
|
|
1058
|
-
...(existingRunLog?.runProfile ? {} : args.runProfile ? { runProfile: args.runProfile } : {}),
|
|
1059
|
-
notes: [existingRunLog?.notes, `auto_evidence_binding anchors_added=${anchorsAdded} ids_filled=${evidenceIdsFilled}`]
|
|
1060
|
-
.filter((item) => Boolean(item && item.trim().length > 0))
|
|
1061
|
-
.join(" || "),
|
|
1062
|
-
}
|
|
1063
|
-
: undefined;
|
|
1064
|
-
return {
|
|
1065
|
-
knowledgeState: {
|
|
1066
|
-
...args.knowledgeState,
|
|
1067
|
-
corePapers: nextCore,
|
|
1068
|
-
...(nextKnowledgeChanges.length > 0 ? { knowledgeChanges: nextKnowledgeChanges } : {}),
|
|
1069
|
-
...(nextKnowledgeUpdates.length > 0 ? { knowledgeUpdates: nextKnowledgeUpdates } : {}),
|
|
1070
|
-
...(nextHypotheses.length > 0 ? { hypotheses: nextHypotheses } : {}),
|
|
1071
|
-
...(runLog ? { runLog } : {}),
|
|
1072
|
-
},
|
|
1073
|
-
anchorsAdded,
|
|
1074
|
-
evidenceIdsFilled,
|
|
1075
|
-
};
|
|
1076
|
-
}
|
|
1077
|
-
function buildReflectionFollowupQuery(topic, hint) {
|
|
1078
|
-
const tokens = tokenizeKeywords(`${topic} ${hint}`).slice(0, 8);
|
|
1079
|
-
if (tokens.length === 0)
|
|
1080
|
-
return normalizeText(topic);
|
|
1081
|
-
return tokens.join(" ");
|
|
1082
|
-
}
|
|
1083
|
-
function resolveSingleStepReflectionSeed(args) {
|
|
1084
|
-
const hypotheses = args.knowledgeState?.hypotheses ?? [];
|
|
1085
|
-
const changes = args.knowledgeState?.knowledgeChanges ?? [];
|
|
1086
|
-
const bridgeChanges = changes.filter((item) => item.type === "BRIDGE");
|
|
1087
|
-
const newChanges = changes.filter((item) => item.type === "NEW");
|
|
1088
|
-
const reviseChanges = changes.filter((item) => item.type === "REVISE");
|
|
1089
|
-
const unreadCore = (args.knowledgeState?.corePapers ?? []).filter((paper) => !isPaperFullTextRead(paper));
|
|
1090
|
-
if (hypotheses.length > 0) {
|
|
1091
|
-
const seed = hypotheses[0]?.statement ?? args.topic;
|
|
1092
|
-
return {
|
|
1093
|
-
trigger: "HYPOTHESIS_VALIDATE",
|
|
1094
|
-
reason: "hypothesis_validation_followup",
|
|
1095
|
-
query: buildReflectionFollowupQuery(args.topic, `${seed} supporting evidence critique related work limitations`),
|
|
1096
|
-
};
|
|
1097
|
-
}
|
|
1098
|
-
if (bridgeChanges.length > 0) {
|
|
1099
|
-
const seed = bridgeChanges[0]?.statement ?? args.topic;
|
|
1100
|
-
return {
|
|
1101
|
-
trigger: "BRIDGE",
|
|
1102
|
-
reason: "bridge_followup",
|
|
1103
|
-
query: buildReflectionFollowupQuery(args.topic, seed),
|
|
1104
|
-
};
|
|
1105
|
-
}
|
|
1106
|
-
if (newChanges.length >= 2 && reviseChanges.length >= 1) {
|
|
1107
|
-
const seed = `${newChanges[0]?.statement ?? ""} ${reviseChanges[0]?.statement ?? ""}`.trim();
|
|
1108
|
-
return {
|
|
1109
|
-
trigger: "CONFLICT",
|
|
1110
|
-
reason: "new_revise_followup",
|
|
1111
|
-
query: buildReflectionFollowupQuery(args.topic, seed || args.topic),
|
|
1112
|
-
};
|
|
1113
|
-
}
|
|
1114
|
-
if (unreadCore.length > 0) {
|
|
1115
|
-
const seed = unreadCore[0]?.id ?? unreadCore[0]?.title ?? args.topic;
|
|
1116
|
-
return {
|
|
1117
|
-
trigger: "UNREAD_CORE",
|
|
1118
|
-
reason: "unread_core_followup",
|
|
1119
|
-
query: buildReflectionFollowupQuery(args.topic, seed),
|
|
1120
|
-
};
|
|
1121
|
-
}
|
|
1122
|
-
return undefined;
|
|
1123
|
-
}
|
|
1124
|
-
async function executeSingleStepReflection(args) {
|
|
1125
|
-
const seed = resolveSingleStepReflectionSeed({
|
|
1126
|
-
topic: args.topic,
|
|
1127
|
-
knowledgeState: args.knowledgeState,
|
|
1128
|
-
});
|
|
1129
|
-
if (!seed) {
|
|
1130
|
-
return {
|
|
1131
|
-
executed: false,
|
|
1132
|
-
resultCount: 0,
|
|
1133
|
-
papers: [],
|
|
1134
|
-
changes: [],
|
|
1135
|
-
};
|
|
1136
|
-
}
|
|
1137
|
-
const rows = await fetchArxivFallbackByQuery(seed.query);
|
|
1138
|
-
const localKnownIds = new Set(args.knownPaperIds);
|
|
1139
|
-
for (const paper of args.effectivePapers) {
|
|
1140
|
-
localKnownIds.add(derivePaperId(paper));
|
|
1141
|
-
}
|
|
1142
|
-
for (const paper of args.knowledgeState?.corePapers ?? []) {
|
|
1143
|
-
localKnownIds.add(derivePaperId({ id: paper.id, title: paper.title, url: paper.url }));
|
|
1144
|
-
}
|
|
1145
|
-
for (const paper of args.knowledgeState?.explorationPapers ?? []) {
|
|
1146
|
-
localKnownIds.add(derivePaperId({ id: paper.id, title: paper.title, url: paper.url }));
|
|
1147
|
-
}
|
|
1148
|
-
const selected = rows.filter((row) => !localKnownIds.has(row.id)).slice(0, REFLECTION_MAX_ADDED_PAPERS);
|
|
1149
|
-
const papers = selected.map((row) => ({
|
|
1150
|
-
id: row.id,
|
|
1151
|
-
title: row.title,
|
|
1152
|
-
url: row.url,
|
|
1153
|
-
source: "arxiv",
|
|
1154
|
-
...(row.published ? { publishedAt: row.published } : {}),
|
|
1155
|
-
...(row.summary ? { summary: row.summary } : {}),
|
|
1156
|
-
fullTextRead: false,
|
|
1157
|
-
readStatus: "metadata",
|
|
1158
|
-
unreadReason: "single_step_reflection_added_without_fulltext",
|
|
1159
|
-
}));
|
|
1160
|
-
const changes = selected.length > 0
|
|
1161
|
-
? [
|
|
1162
|
-
{
|
|
1163
|
-
type: "NEW",
|
|
1164
|
-
statement: `Reflection follow-up added ${selected.length} adjacent paper(s) for ${args.topic}.`,
|
|
1165
|
-
evidenceIds: selected.map((row) => row.id).slice(0, 3),
|
|
1166
|
-
topic: args.topic,
|
|
1167
|
-
},
|
|
1168
|
-
]
|
|
1169
|
-
: [];
|
|
1170
|
-
return {
|
|
1171
|
-
executed: true,
|
|
1172
|
-
resultCount: selected.length,
|
|
1173
|
-
trace: {
|
|
1174
|
-
query: seed.query,
|
|
1175
|
-
reason: seed.reason,
|
|
1176
|
-
source: "arxiv",
|
|
1177
|
-
candidates: rows.length,
|
|
1178
|
-
filteredTo: selected.length,
|
|
1179
|
-
...(selected.length === 0 ? { filteredOutReasons: ["no_unseen_reflection_candidates"] } : {}),
|
|
1180
|
-
resultCount: selected.length,
|
|
1181
|
-
},
|
|
1182
|
-
papers,
|
|
1183
|
-
changes,
|
|
1184
|
-
};
|
|
1185
|
-
}
|
|
1186
|
-
function dedupePaperRecords(records) {
|
|
1187
|
-
const byId = new Map();
|
|
1188
|
-
for (const record of records) {
|
|
1189
|
-
const id = derivePaperId(record);
|
|
1190
|
-
const existing = byId.get(id);
|
|
1191
|
-
if (!existing) {
|
|
1192
|
-
byId.set(id, { ...record, ...(record.id ? {} : { id }) });
|
|
1193
|
-
continue;
|
|
1194
|
-
}
|
|
1195
|
-
byId.set(id, {
|
|
1196
|
-
id: existing.id ?? record.id ?? id,
|
|
1197
|
-
title: existing.title ?? record.title,
|
|
1198
|
-
url: existing.url ?? record.url,
|
|
1199
|
-
score: typeof existing.score === "number" && Number.isFinite(existing.score)
|
|
1200
|
-
? typeof record.score === "number" && Number.isFinite(record.score)
|
|
1201
|
-
? Math.max(existing.score, record.score)
|
|
1202
|
-
: existing.score
|
|
1203
|
-
: record.score,
|
|
1204
|
-
reason: existing.reason ?? record.reason,
|
|
1205
|
-
});
|
|
1206
|
-
}
|
|
1207
|
-
return [...byId.values()];
|
|
1208
|
-
}
|
|
1209
|
-
function dedupeKnowledgePapers(records) {
|
|
1210
|
-
const byId = new Map();
|
|
1211
|
-
for (const record of records) {
|
|
1212
|
-
const id = derivePaperId({ id: record.id, title: record.title, url: record.url });
|
|
1213
|
-
const existing = byId.get(id);
|
|
1214
|
-
if (!existing) {
|
|
1215
|
-
byId.set(id, {
|
|
1216
|
-
...record,
|
|
1217
|
-
...(record.id ? {} : { id }),
|
|
1218
|
-
});
|
|
1219
|
-
continue;
|
|
1220
|
-
}
|
|
1221
|
-
byId.set(id, {
|
|
1222
|
-
...existing,
|
|
1223
|
-
...record,
|
|
1224
|
-
id: existing.id ?? record.id ?? id,
|
|
1225
|
-
title: existing.title ?? record.title,
|
|
1226
|
-
url: existing.url ?? record.url,
|
|
1227
|
-
summary: existing.summary ?? record.summary,
|
|
1228
|
-
unreadReason: existing.unreadReason ?? record.unreadReason,
|
|
1229
|
-
});
|
|
1230
|
-
}
|
|
1231
|
-
return [...byId.values()];
|
|
1232
|
-
}
|
|
1233
191
|
function normalizeSource(raw) {
|
|
1234
192
|
if (!raw)
|
|
1235
193
|
return undefined;
|
|
@@ -1388,9 +346,6 @@ function getOrCreateTopicState(root, scope, topic, incomingPrefs) {
|
|
|
1388
346
|
if (!Number.isFinite(existing.totalRuns)) {
|
|
1389
347
|
existing.totalRuns = 0;
|
|
1390
348
|
}
|
|
1391
|
-
if (!Number.isFinite(existing.emptyRunStreak)) {
|
|
1392
|
-
existing.emptyRunStreak = 0;
|
|
1393
|
-
}
|
|
1394
349
|
existing.preferences = mergePreferences(existing.preferences, incomingPrefs);
|
|
1395
350
|
// Merge duplicate legacy buckets produced by old scope normalization rules.
|
|
1396
351
|
for (const [otherKey, other] of Object.entries(root.topics)) {
|
|
@@ -1467,15 +422,6 @@ function getOrCreateTopicState(root, scope, topic, incomingPrefs) {
|
|
|
1467
422
|
const existingRuns = Number.isFinite(existing.totalRuns) ? Math.max(0, Math.floor(existing.totalRuns)) : 0;
|
|
1468
423
|
const otherRuns = Number.isFinite(other.totalRuns) ? Math.max(0, Math.floor(other.totalRuns)) : 0;
|
|
1469
424
|
existing.totalRuns = existingRuns + otherRuns;
|
|
1470
|
-
const existingEmptyRaw = existing.emptyRunStreak;
|
|
1471
|
-
const otherEmptyRaw = other.emptyRunStreak;
|
|
1472
|
-
const existingEmptyRuns = typeof existingEmptyRaw === "number" && Number.isFinite(existingEmptyRaw)
|
|
1473
|
-
? Math.max(0, Math.floor(existingEmptyRaw))
|
|
1474
|
-
: 0;
|
|
1475
|
-
const otherEmptyRuns = typeof otherEmptyRaw === "number" && Number.isFinite(otherEmptyRaw)
|
|
1476
|
-
? Math.max(0, Math.floor(otherEmptyRaw))
|
|
1477
|
-
: 0;
|
|
1478
|
-
existing.emptyRunStreak = Math.max(existingEmptyRuns, otherEmptyRuns);
|
|
1479
425
|
const existingLastRun = existing.lastRunAtMs ?? 0;
|
|
1480
426
|
const otherLastRun = other.lastRunAtMs ?? 0;
|
|
1481
427
|
if (otherLastRun > existingLastRun) {
|
|
@@ -1500,7 +446,6 @@ function getOrCreateTopicState(root, scope, topic, incomingPrefs) {
|
|
|
1500
446
|
memory: defaultTopicMemoryState(),
|
|
1501
447
|
pushedPapers: {},
|
|
1502
448
|
totalRuns: 0,
|
|
1503
|
-
emptyRunStreak: 0,
|
|
1504
449
|
};
|
|
1505
450
|
root.topics[key] = created;
|
|
1506
451
|
return created;
|
|
@@ -1557,301 +502,8 @@ export async function recordIncrementalPush(args) {
|
|
|
1557
502
|
const topicState = getOrCreateTopicState(root, args.scope, args.topic, args.preferences);
|
|
1558
503
|
const memory = ensureTopicMemoryState(topicState);
|
|
1559
504
|
const now = Date.now();
|
|
1560
|
-
const normalizedPapersFromKnowledgeState = (args.knowledgeState?.corePapers ?? [])
|
|
1561
|
-
.filter((paper) => paper && typeof paper === "object")
|
|
1562
|
-
.map((paper) => ({
|
|
1563
|
-
...(paper.id ? { id: paper.id } : {}),
|
|
1564
|
-
...(paper.title ? { title: paper.title } : {}),
|
|
1565
|
-
...(paper.url ? { url: paper.url } : {}),
|
|
1566
|
-
...(typeof paper.score === "number" && Number.isFinite(paper.score) ? { score: paper.score } : {}),
|
|
1567
|
-
...(paper.reason ? { reason: paper.reason } : {}),
|
|
1568
|
-
}));
|
|
1569
|
-
let effectivePapers = args.papers.length > 0
|
|
1570
|
-
? args.papers
|
|
1571
|
-
: normalizedPapersFromKnowledgeState.length > 0
|
|
1572
|
-
? normalizedPapersFromKnowledgeState
|
|
1573
|
-
: [];
|
|
1574
|
-
const incomingRunLog = args.knowledgeState?.runLog
|
|
1575
|
-
? { ...args.knowledgeState.runLog }
|
|
1576
|
-
: undefined;
|
|
1577
|
-
const incomingRunProfile = incomingRunLog?.runProfile === "fast" || incomingRunLog?.runProfile === "strict"
|
|
1578
|
-
? incomingRunLog.runProfile
|
|
1579
|
-
: undefined;
|
|
1580
|
-
let effectiveRunLog = incomingRunLog ? { ...incomingRunLog } : undefined;
|
|
1581
|
-
if (incomingRunProfile === "strict" && effectiveRunLog) {
|
|
1582
|
-
const requiredCoreRaw = typeof effectiveRunLog.requiredCorePapers === "number" && Number.isFinite(effectiveRunLog.requiredCorePapers)
|
|
1583
|
-
? Math.floor(effectiveRunLog.requiredCorePapers)
|
|
1584
|
-
: 0;
|
|
1585
|
-
if (requiredCoreRaw > 0) {
|
|
1586
|
-
effectiveRunLog.requiredCorePapers = Math.max(1, requiredCoreRaw);
|
|
1587
|
-
}
|
|
1588
|
-
else {
|
|
1589
|
-
effectiveRunLog.requiredCorePapers = Math.max(1, Math.min(topicState.preferences.maxPapers, DEFAULT_STRICT_MIN_CORE_FLOOR));
|
|
1590
|
-
}
|
|
1591
|
-
if (typeof effectiveRunLog.requiredFullTextCoveragePct !== "number" ||
|
|
1592
|
-
!Number.isFinite(effectiveRunLog.requiredFullTextCoveragePct) ||
|
|
1593
|
-
effectiveRunLog.requiredFullTextCoveragePct < 80) {
|
|
1594
|
-
effectiveRunLog.requiredFullTextCoveragePct = 80;
|
|
1595
|
-
}
|
|
1596
|
-
}
|
|
1597
|
-
let effectiveKnowledgeState = args.knowledgeState || effectiveRunLog
|
|
1598
|
-
? {
|
|
1599
|
-
...(args.knowledgeState ?? {}),
|
|
1600
|
-
...(effectiveRunLog ? { runLog: effectiveRunLog } : {}),
|
|
1601
|
-
}
|
|
1602
|
-
: undefined;
|
|
1603
|
-
const knownPaperCount = Object.keys(topicState.pushedPapers).length;
|
|
1604
|
-
const baseRequirementProfile = inferRequirementProfile([
|
|
1605
|
-
topicState.topic,
|
|
1606
|
-
args.note,
|
|
1607
|
-
effectiveRunLog?.notes,
|
|
1608
|
-
effectiveKnowledgeState?.runLog?.notes,
|
|
1609
|
-
]
|
|
1610
|
-
.filter((item) => Boolean(item && item.trim().length > 0))
|
|
1611
|
-
.join(" "));
|
|
1612
|
-
const adaptiveStage = pickAdaptiveResearchStage({
|
|
1613
|
-
totalRuns: topicState.totalRuns,
|
|
1614
|
-
knownPaperCount,
|
|
1615
|
-
hasSubmittedHypothesis: (effectiveKnowledgeState?.hypotheses?.length ?? 0) > 0,
|
|
1616
|
-
emptyRunStreak: Math.max(0, Math.floor(topicState.emptyRunStreak ?? 0)),
|
|
1617
|
-
});
|
|
1618
|
-
const requirementProfile = deriveAdaptiveRequirementProfile({
|
|
1619
|
-
base: baseRequirementProfile,
|
|
1620
|
-
stage: adaptiveStage,
|
|
1621
|
-
});
|
|
1622
|
-
const adaptiveTierRatios = deriveAdaptiveRecallTierRatios(adaptiveStage);
|
|
1623
|
-
if (effectiveRunLog || effectiveKnowledgeState?.runLog) {
|
|
1624
|
-
const mergedRunLog = {
|
|
1625
|
-
...(effectiveRunLog ?? effectiveKnowledgeState?.runLog ?? {}),
|
|
1626
|
-
notes: [
|
|
1627
|
-
effectiveRunLog?.notes,
|
|
1628
|
-
effectiveKnowledgeState?.runLog?.notes,
|
|
1629
|
-
`adaptive_stage=${adaptiveStage} known_papers=${knownPaperCount} total_runs=${topicState.totalRuns}`,
|
|
1630
|
-
]
|
|
1631
|
-
.filter((item) => Boolean(item && item.trim().length > 0))
|
|
1632
|
-
.join(" || "),
|
|
1633
|
-
};
|
|
1634
|
-
effectiveRunLog = mergedRunLog;
|
|
1635
|
-
effectiveKnowledgeState = {
|
|
1636
|
-
...(effectiveKnowledgeState ?? {}),
|
|
1637
|
-
runLog: mergedRunLog,
|
|
1638
|
-
};
|
|
1639
|
-
}
|
|
1640
|
-
if (incomingRunProfile === "strict") {
|
|
1641
|
-
const strictMinCoreFloor = Math.max(1, Math.min(topicState.preferences.maxPapers, DEFAULT_STRICT_MIN_CORE_FLOOR));
|
|
1642
|
-
const requiredCoreFloor = Math.max(1, Math.min(topicState.preferences.maxPapers, effectiveRunLog?.requiredCorePapers ?? strictMinCoreFloor));
|
|
1643
|
-
const strictCandidatePool = Math.max(DEFAULT_STRICT_CANDIDATE_POOL, topicState.preferences.maxPapers * 4, adaptiveStage === "bootstrap" ? 30 : 0, adaptiveStage === "hypothesis_validation" ? 28 : 0);
|
|
1644
|
-
const existingCorePapers = effectiveKnowledgeState?.corePapers ?? [];
|
|
1645
|
-
const strictSignalCount = Math.max(existingCorePapers.length, effectivePapers.length);
|
|
1646
|
-
if (strictSignalCount < requiredCoreFloor) {
|
|
1647
|
-
const knownIds = new Set(Object.keys(topicState.pushedPapers));
|
|
1648
|
-
for (const paper of effectivePapers)
|
|
1649
|
-
knownIds.add(derivePaperId(paper));
|
|
1650
|
-
for (const paper of existingCorePapers) {
|
|
1651
|
-
knownIds.add(derivePaperId({ id: paper.id, title: paper.title, url: paper.url }));
|
|
1652
|
-
}
|
|
1653
|
-
const fallback = await strictCoreFallbackSeed({
|
|
1654
|
-
topic: topicState.topic,
|
|
1655
|
-
maxPapers: topicState.preferences.maxPapers,
|
|
1656
|
-
candidatePool: strictCandidatePool,
|
|
1657
|
-
minCoreFloor: requiredCoreFloor,
|
|
1658
|
-
knownPaperIds: knownIds,
|
|
1659
|
-
requirements: requirementProfile,
|
|
1660
|
-
tierRatios: adaptiveTierRatios,
|
|
1661
|
-
});
|
|
1662
|
-
if (fallback.papers.length > 0) {
|
|
1663
|
-
const existingIds = new Set(effectivePapers.map((paper) => derivePaperId(paper)));
|
|
1664
|
-
let fallbackPapers = fallback.papers.filter((paper) => !existingIds.has(derivePaperId(paper)));
|
|
1665
|
-
const needed = Math.max(0, requiredCoreFloor - strictSignalCount);
|
|
1666
|
-
if (needed > 0) {
|
|
1667
|
-
if (fallbackPapers.length === 0)
|
|
1668
|
-
fallbackPapers = fallback.papers;
|
|
1669
|
-
fallbackPapers = fallbackPapers.slice(0, needed);
|
|
1670
|
-
}
|
|
1671
|
-
const fallbackIds = new Set(fallbackPapers.map((paper) => derivePaperId(paper)));
|
|
1672
|
-
const fallbackCore = fallback.corePapers.filter((paper) => fallbackIds.has(derivePaperId({ id: paper.id, title: paper.title, url: paper.url })));
|
|
1673
|
-
effectivePapers = dedupePaperRecords([...effectivePapers, ...fallbackPapers]);
|
|
1674
|
-
const mergedRunLog = {
|
|
1675
|
-
...(effectiveRunLog ?? { runProfile: "strict" }),
|
|
1676
|
-
recallTierStats: fallback.recallTierStats,
|
|
1677
|
-
notes: [
|
|
1678
|
-
effectiveRunLog?.notes,
|
|
1679
|
-
fallback.notes,
|
|
1680
|
-
`strict_core_topup required=${requiredCoreFloor} before=${strictSignalCount} added=${fallbackPapers.length} stage=${adaptiveStage}`,
|
|
1681
|
-
]
|
|
1682
|
-
.filter((item) => Boolean(item && item.trim().length > 0))
|
|
1683
|
-
.join(" || "),
|
|
1684
|
-
};
|
|
1685
|
-
effectiveRunLog = mergedRunLog;
|
|
1686
|
-
effectiveKnowledgeState = {
|
|
1687
|
-
...(effectiveKnowledgeState ?? {}),
|
|
1688
|
-
corePapers: dedupeKnowledgePapers([...(effectiveKnowledgeState?.corePapers ?? []), ...fallbackCore]),
|
|
1689
|
-
explorationTrace: [
|
|
1690
|
-
...(effectiveKnowledgeState?.explorationTrace ?? []),
|
|
1691
|
-
...fallback.explorationTrace,
|
|
1692
|
-
],
|
|
1693
|
-
runLog: mergedRunLog,
|
|
1694
|
-
};
|
|
1695
|
-
}
|
|
1696
|
-
}
|
|
1697
|
-
}
|
|
1698
|
-
if (incomingRunProfile === "strict") {
|
|
1699
|
-
const strictCoreFromState = effectiveKnowledgeState?.corePapers ?? [];
|
|
1700
|
-
const strictCoreSeed = strictCoreFromState.length > 0
|
|
1701
|
-
? strictCoreFromState
|
|
1702
|
-
: effectivePapers.map((paper) => ({
|
|
1703
|
-
...(paper.id ? { id: paper.id } : {}),
|
|
1704
|
-
...(paper.title ? { title: paper.title } : {}),
|
|
1705
|
-
...(paper.url ? { url: paper.url } : {}),
|
|
1706
|
-
...(typeof paper.score === "number" && Number.isFinite(paper.score) ? { score: paper.score } : {}),
|
|
1707
|
-
...(paper.reason ? { reason: paper.reason } : {}),
|
|
1708
|
-
fullTextRead: false,
|
|
1709
|
-
readStatus: "metadata",
|
|
1710
|
-
unreadReason: "Full text not fetched yet; pending strict full-text bootstrap.",
|
|
1711
|
-
}));
|
|
1712
|
-
if (strictCoreSeed.length > 0) {
|
|
1713
|
-
const strictAttemptLimit = Math.max(1, Math.min(MAX_STRICT_FULLTEXT_ATTEMPTS, effectiveRunLog?.requiredCorePapers ?? strictCoreSeed.length));
|
|
1714
|
-
const backfilled = await backfillStrictCoreFullText({
|
|
1715
|
-
corePapers: strictCoreSeed,
|
|
1716
|
-
maxAttempts: strictAttemptLimit,
|
|
1717
|
-
});
|
|
1718
|
-
const strictRunLog = {
|
|
1719
|
-
...(effectiveRunLog ?? { runProfile: "strict" }),
|
|
1720
|
-
fullTextAttempted: backfilled.attempted,
|
|
1721
|
-
fullTextCompleted: backfilled.completed,
|
|
1722
|
-
notes: [
|
|
1723
|
-
effectiveRunLog?.notes,
|
|
1724
|
-
`strict_fulltext_bootstrap attempted=${backfilled.attempted} completed=${backfilled.completed}`,
|
|
1725
|
-
...(backfilled.failures.length > 0
|
|
1726
|
-
? [`strict_fulltext_failures=${backfilled.failures.slice(0, 8).join(" | ")}`]
|
|
1727
|
-
: []),
|
|
1728
|
-
]
|
|
1729
|
-
.filter((item) => Boolean(item && item.trim().length > 0))
|
|
1730
|
-
.join(" || "),
|
|
1731
|
-
};
|
|
1732
|
-
effectiveRunLog = strictRunLog;
|
|
1733
|
-
effectiveKnowledgeState = {
|
|
1734
|
-
...(effectiveKnowledgeState ?? {}),
|
|
1735
|
-
corePapers: backfilled.corePapers,
|
|
1736
|
-
runLog: strictRunLog,
|
|
1737
|
-
};
|
|
1738
|
-
}
|
|
1739
|
-
}
|
|
1740
|
-
const reflection = await executeSingleStepReflection({
|
|
1741
|
-
topic: topicState.topic,
|
|
1742
|
-
knownPaperIds: new Set(Object.keys(topicState.pushedPapers)),
|
|
1743
|
-
effectivePapers,
|
|
1744
|
-
knowledgeState: effectiveKnowledgeState,
|
|
1745
|
-
});
|
|
1746
|
-
const reflectionRunLogBase = effectiveRunLog ??
|
|
1747
|
-
(incomingRunProfile ? { runProfile: incomingRunProfile } : undefined);
|
|
1748
|
-
if (reflection.executed) {
|
|
1749
|
-
const reflectionPaperRecords = reflection.papers.map((paper) => ({
|
|
1750
|
-
...(paper.id ? { id: paper.id } : {}),
|
|
1751
|
-
...(paper.title ? { title: paper.title } : {}),
|
|
1752
|
-
...(paper.url ? { url: paper.url } : {}),
|
|
1753
|
-
...(typeof paper.score === "number" && Number.isFinite(paper.score) ? { score: paper.score } : {}),
|
|
1754
|
-
reason: "single_step_reflection_followup",
|
|
1755
|
-
}));
|
|
1756
|
-
effectivePapers = dedupePaperRecords([...effectivePapers, ...reflectionPaperRecords]);
|
|
1757
|
-
const mergedRunLog = {
|
|
1758
|
-
...(reflectionRunLogBase ?? {}),
|
|
1759
|
-
reflectionStepExecuted: true,
|
|
1760
|
-
reflectionStepResultCount: reflection.resultCount,
|
|
1761
|
-
notes: [
|
|
1762
|
-
reflectionRunLogBase?.notes,
|
|
1763
|
-
`single_step_reflection result_count=${reflection.resultCount}`,
|
|
1764
|
-
]
|
|
1765
|
-
.filter((item) => Boolean(item && item.trim().length > 0))
|
|
1766
|
-
.join(" || "),
|
|
1767
|
-
};
|
|
1768
|
-
effectiveRunLog = mergedRunLog;
|
|
1769
|
-
effectiveKnowledgeState = {
|
|
1770
|
-
...(effectiveKnowledgeState ?? {}),
|
|
1771
|
-
explorationTrace: [
|
|
1772
|
-
...(effectiveKnowledgeState?.explorationTrace ?? []),
|
|
1773
|
-
...(reflection.trace ? [reflection.trace] : []),
|
|
1774
|
-
],
|
|
1775
|
-
explorationPapers: dedupeKnowledgePapers([
|
|
1776
|
-
...(effectiveKnowledgeState?.explorationPapers ?? []),
|
|
1777
|
-
...reflection.papers,
|
|
1778
|
-
]),
|
|
1779
|
-
knowledgeChanges: [
|
|
1780
|
-
...(effectiveKnowledgeState?.knowledgeChanges ?? []),
|
|
1781
|
-
...(reflection.changes ?? []),
|
|
1782
|
-
],
|
|
1783
|
-
runLog: mergedRunLog,
|
|
1784
|
-
};
|
|
1785
|
-
}
|
|
1786
|
-
else if (reflectionRunLogBase) {
|
|
1787
|
-
const mergedRunLog = {
|
|
1788
|
-
...reflectionRunLogBase,
|
|
1789
|
-
reflectionStepExecuted: false,
|
|
1790
|
-
reflectionStepResultCount: 0,
|
|
1791
|
-
};
|
|
1792
|
-
effectiveRunLog = mergedRunLog;
|
|
1793
|
-
effectiveKnowledgeState = {
|
|
1794
|
-
...(effectiveKnowledgeState ?? {}),
|
|
1795
|
-
runLog: mergedRunLog,
|
|
1796
|
-
};
|
|
1797
|
-
}
|
|
1798
|
-
const autoEvidence = applyLightweightEvidenceBinding({
|
|
1799
|
-
knowledgeState: effectiveKnowledgeState,
|
|
1800
|
-
runProfile: incomingRunProfile,
|
|
1801
|
-
});
|
|
1802
|
-
effectiveKnowledgeState = autoEvidence.knowledgeState;
|
|
1803
|
-
if (autoEvidence.anchorsAdded > 0 || autoEvidence.evidenceIdsFilled > 0) {
|
|
1804
|
-
effectiveRunLog = effectiveKnowledgeState?.runLog
|
|
1805
|
-
? { ...effectiveKnowledgeState.runLog }
|
|
1806
|
-
: effectiveRunLog;
|
|
1807
|
-
}
|
|
1808
|
-
const statusRaw = normalizeText(args.status ?? "").toLowerCase();
|
|
1809
|
-
const researchArtifactsCount = effectivePapers.length +
|
|
1810
|
-
(effectiveKnowledgeState?.explorationPapers?.length ?? 0) +
|
|
1811
|
-
(effectiveKnowledgeState?.knowledgeChanges?.length ?? 0) +
|
|
1812
|
-
(effectiveKnowledgeState?.knowledgeUpdates?.length ?? 0) +
|
|
1813
|
-
(effectiveKnowledgeState?.hypotheses?.length ?? 0) +
|
|
1814
|
-
(effectiveKnowledgeState?.explorationTrace?.length ?? 0);
|
|
1815
|
-
let normalizedStatus = statusRaw.length > 0 ? statusRaw : undefined;
|
|
1816
|
-
if ((normalizedStatus ?? "") === "empty" && researchArtifactsCount === 0) {
|
|
1817
|
-
const emptyDiagnosis = buildEmptyCycleAutoDiagnosis({
|
|
1818
|
-
topic: topicState.topic,
|
|
1819
|
-
stage: adaptiveStage,
|
|
1820
|
-
knownPaperCount,
|
|
1821
|
-
totalRuns: topicState.totalRuns,
|
|
1822
|
-
emptyRunStreak: Math.max(0, Math.floor(topicState.emptyRunStreak ?? 0)),
|
|
1823
|
-
});
|
|
1824
|
-
const mergedRunLog = {
|
|
1825
|
-
...(effectiveRunLog ?? effectiveKnowledgeState?.runLog ?? { runProfile: incomingRunProfile ?? "strict" }),
|
|
1826
|
-
notes: [
|
|
1827
|
-
effectiveRunLog?.notes,
|
|
1828
|
-
effectiveKnowledgeState?.runLog?.notes,
|
|
1829
|
-
`empty_cycle_diagnosis=${emptyDiagnosis.diagnosis}`,
|
|
1830
|
-
`empty_cycle_next_queries=${emptyDiagnosis.nextQueries.join(" || ")}`,
|
|
1831
|
-
`empty_cycle_pivot_hint=${emptyDiagnosis.pivotHint}`,
|
|
1832
|
-
]
|
|
1833
|
-
.filter((item) => Boolean(item && item.trim().length > 0))
|
|
1834
|
-
.join(" || "),
|
|
1835
|
-
reflectionStepExecuted: false,
|
|
1836
|
-
reflectionStepResultCount: 0,
|
|
1837
|
-
};
|
|
1838
|
-
effectiveRunLog = mergedRunLog;
|
|
1839
|
-
effectiveKnowledgeState = {
|
|
1840
|
-
...(effectiveKnowledgeState ?? {}),
|
|
1841
|
-
runLog: mergedRunLog,
|
|
1842
|
-
};
|
|
1843
|
-
}
|
|
1844
|
-
const coercedFromEmptyWithArtifacts = normalizedStatus === "empty" && researchArtifactsCount > 0;
|
|
1845
|
-
if (coercedFromEmptyWithArtifacts) {
|
|
1846
|
-
normalizedStatus = "degraded_quality";
|
|
1847
|
-
}
|
|
1848
|
-
const hasRunError = Boolean(effectiveKnowledgeState?.runLog?.error && normalizeText(effectiveKnowledgeState.runLog.error).length > 0);
|
|
1849
|
-
const requiresArtifacts = normalizedStatus === "ok" || normalizedStatus === "fallback_representative" || normalizedStatus === "degraded_quality";
|
|
1850
|
-
if (requiresArtifacts && researchArtifactsCount === 0 && !hasRunError) {
|
|
1851
|
-
throw new Error("record payload has no research artifacts. Use status=empty for no-result runs, or include run_log.error for failed runs.");
|
|
1852
|
-
}
|
|
1853
505
|
let recordedPapers = 0;
|
|
1854
|
-
for (const rawPaper of
|
|
506
|
+
for (const rawPaper of args.papers) {
|
|
1855
507
|
const id = derivePaperId(rawPaper);
|
|
1856
508
|
const existing = topicState.pushedPapers[id];
|
|
1857
509
|
if (existing) {
|
|
@@ -1886,12 +538,7 @@ export async function recordIncrementalPush(args) {
|
|
|
1886
538
|
}
|
|
1887
539
|
topicState.totalRuns += 1;
|
|
1888
540
|
topicState.lastRunAtMs = now;
|
|
1889
|
-
topicState.lastStatus =
|
|
1890
|
-
const effectiveNote = coercedFromEmptyWithArtifacts
|
|
1891
|
-
? [args.note?.trim(), "status coerced: empty -> degraded_quality because research artifacts were present"]
|
|
1892
|
-
.filter((item) => Boolean(item && item.length > 0))
|
|
1893
|
-
.join(" | ")
|
|
1894
|
-
: args.note;
|
|
541
|
+
topicState.lastStatus = args.status?.trim() || (recordedPapers > 0 ? "ok" : "empty");
|
|
1895
542
|
const knowledgeCommitted = await commitKnowledgeRun({
|
|
1896
543
|
projectId: args.projectId ?? topicState.lastProjectId,
|
|
1897
544
|
scope: topicState.scope,
|
|
@@ -1899,17 +546,11 @@ export async function recordIncrementalPush(args) {
|
|
|
1899
546
|
topicKey: topicState.topicKey,
|
|
1900
547
|
status: topicState.lastStatus,
|
|
1901
548
|
runId: args.runId,
|
|
1902
|
-
note:
|
|
1903
|
-
papers:
|
|
1904
|
-
knowledgeState:
|
|
549
|
+
note: args.note,
|
|
550
|
+
papers: args.papers,
|
|
551
|
+
knowledgeState: args.knowledgeState,
|
|
1905
552
|
});
|
|
1906
553
|
topicState.lastStatus = knowledgeCommitted.summary.lastStatus ?? topicState.lastStatus;
|
|
1907
|
-
if (topicState.lastStatus === "empty") {
|
|
1908
|
-
topicState.emptyRunStreak = Math.max(0, Math.floor(topicState.emptyRunStreak ?? 0)) + 1;
|
|
1909
|
-
}
|
|
1910
|
-
else {
|
|
1911
|
-
topicState.emptyRunStreak = 0;
|
|
1912
|
-
}
|
|
1913
554
|
topicState.lastProjectId = knowledgeCommitted.projectId;
|
|
1914
555
|
await saveState(root);
|
|
1915
556
|
await appendPushLog({
|
|
@@ -1919,20 +560,18 @@ export async function recordIncrementalPush(args) {
|
|
|
1919
560
|
topicKey: topicState.topicKey,
|
|
1920
561
|
status: topicState.lastStatus,
|
|
1921
562
|
runId: knowledgeCommitted.runId,
|
|
1922
|
-
run_id: knowledgeCommitted.runId,
|
|
1923
|
-
run_profile: effectiveKnowledgeState?.runLog?.runProfile ?? null,
|
|
1924
563
|
projectId: knowledgeCommitted.projectId,
|
|
1925
564
|
streamKey: knowledgeCommitted.streamKey,
|
|
1926
565
|
preferences: topicState.preferences,
|
|
1927
566
|
recordedPapers,
|
|
1928
|
-
papers:
|
|
567
|
+
papers: args.papers.map((paper) => ({
|
|
1929
568
|
id: derivePaperId(paper),
|
|
1930
569
|
title: paper.title?.trim(),
|
|
1931
570
|
url: paper.url?.trim(),
|
|
1932
571
|
...(typeof paper.score === "number" && Number.isFinite(paper.score) ? { score: paper.score } : {}),
|
|
1933
572
|
...(paper.reason ? { reason: paper.reason.trim() } : {}),
|
|
1934
573
|
})),
|
|
1935
|
-
note:
|
|
574
|
+
note: args.note,
|
|
1936
575
|
knowledgeStateSummary: knowledgeCommitted.summary,
|
|
1937
576
|
});
|
|
1938
577
|
return {
|
|
@@ -1941,7 +580,6 @@ export async function recordIncrementalPush(args) {
|
|
|
1941
580
|
topicKey: topicState.topicKey,
|
|
1942
581
|
preferences: topicState.preferences,
|
|
1943
582
|
memoryHints: buildMemoryHints(memory),
|
|
1944
|
-
runId: knowledgeCommitted.runId,
|
|
1945
583
|
recordedPapers,
|
|
1946
584
|
totalKnownPapers: Object.keys(topicState.pushedPapers).length,
|
|
1947
585
|
pushedAtMs: now,
|
|
@@ -2037,11 +675,6 @@ export async function getIncrementalStateStatus(args) {
|
|
|
2037
675
|
const lastPushedAtMs = excludePaperIds.length
|
|
2038
676
|
? topicState.pushedPapers[excludePaperIds[0]]?.lastPushedAtMs
|
|
2039
677
|
: undefined;
|
|
2040
|
-
const knowledgeStateMissingReason = knowledgeSummaryResult === undefined
|
|
2041
|
-
? args.projectId || topicState.lastProjectId
|
|
2042
|
-
? "project_or_stream_not_found"
|
|
2043
|
-
: "project_unbound"
|
|
2044
|
-
: undefined;
|
|
2045
678
|
return {
|
|
2046
679
|
scope: topicState.scope,
|
|
2047
680
|
topic: topicState.topic,
|
|
@@ -2055,7 +688,6 @@ export async function getIncrementalStateStatus(args) {
|
|
|
2055
688
|
...(topicState.lastStatus ? { lastStatus: topicState.lastStatus } : {}),
|
|
2056
689
|
recentPapers: recentPapersByRecency(topicState.pushedPapers, 10),
|
|
2057
690
|
...(knowledgeSummaryResult ? { knowledgeStateSummary: knowledgeSummaryResult.summary } : {}),
|
|
2058
|
-
...(knowledgeStateMissingReason ? { knowledgeStateMissingReason } : {}),
|
|
2059
691
|
recentHypotheses: knowledgeSummaryResult?.summary.recentHypotheses ?? [],
|
|
2060
692
|
recentChangeStats: knowledgeSummaryResult?.summary.recentChangeStats ?? [],
|
|
2061
693
|
lastExplorationTrace: knowledgeSummaryResult?.summary.lastExplorationTrace ?? [],
|