agenr 0.9.60 → 0.9.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.9.61] - 2026-03-03
4
+
5
+ ### Added
6
+
7
+ - **Synthetic cold-start signals** (#417 Phase 3): `agenr ingest --bulk --synthetic` now runs a post-ingest cross-session mention analysis pass. Entries that appear across multiple session transcripts receive synthetic recall events so they start with meaningful recall profiles instead of flat zero-history signals.
8
+ - `--synthetic` and `--synthetic-dry-run` flags for `agenr ingest`.
9
+ - Quality score seeding from synthetic recall signals with four tiers (`0.6`, `0.65`, `0.7`, `0.8`) based on distinct session count and temporal spread. Seeding is guarded so only entries still at `quality_score = 0.5` are updated.
10
+
11
+ ### Improvements
12
+
13
+ - Synthetic event generation writes flat `signal_value = 0.4` per event (frequency is carried by event count), caps ANN fan-out at top-5 neighbors per entry, and uses chunked transaction batches with `INSERT OR IGNORE` idempotency on the existing synthetic dedup index.
14
+
3
15
  ## 0.9.60 (2026-03-03)
4
16
 
5
17
  ### Features
package/dist/cli-main.js CHANGED
@@ -81,6 +81,7 @@ import {
81
81
  clearBulkIngestMeta,
82
82
  closeDb,
83
83
  composeEmbeddingText,
84
+ cosineSimilarity,
84
85
  createLlmClient,
85
86
  createLogger,
86
87
  describeAuth,
@@ -93,6 +94,7 @@ import {
93
94
  initSchema,
94
95
  isCompleteConfig,
95
96
  isRecord,
97
+ mapBufferToVector,
96
98
  maskSecret,
97
99
  mergeConfigPatch,
98
100
  normalizeLabel,
@@ -122,6 +124,7 @@ import {
122
124
  getTopCoRecallEdges,
123
125
  parseDaysBetween,
124
126
  toNumber,
127
+ toRowsAffected,
125
128
  toStringValue
126
129
  } from "./chunk-D7W3PO7U.js";
127
130
 
@@ -11355,6 +11358,415 @@ import fs24 from "fs/promises";
11355
11358
  import path22 from "path";
11356
11359
  import * as clack9 from "@clack/prompts";
11357
11360
 
11361
+ // src/db/recall/synthetic.ts
11362
+ import { randomUUID } from "crypto";
11363
+ var DEFAULT_SIMILARITY_THRESHOLD = 0.75;
11364
+ var DEFAULT_MIN_SESSION_COUNT = 2;
11365
+ var VECTOR_FAN_OUT = 5;
11366
+ var DEFAULT_MAX_EVENTS_PER_ENTRY = VECTOR_FAN_OUT;
11367
+ var DEFAULT_SIGNAL_VALUE = 0.4;
11368
+ var INSERT_BATCH_SIZE = 500;
11369
+ function normalizeSourceFiles(sourceFiles) {
11370
+ if (!sourceFiles || sourceFiles.length === 0) {
11371
+ return [];
11372
+ }
11373
+ return Array.from(
11374
+ new Set(
11375
+ sourceFiles.map((value) => value.trim()).filter((value) => value.length > 0)
11376
+ )
11377
+ );
11378
+ }
11379
+ function normalizeThreshold(value) {
11380
+ if (!Number.isFinite(value)) {
11381
+ return DEFAULT_SIMILARITY_THRESHOLD;
11382
+ }
11383
+ const normalized = Number(value);
11384
+ if (normalized <= 0) {
11385
+ return 0;
11386
+ }
11387
+ if (normalized >= 1) {
11388
+ return 1;
11389
+ }
11390
+ return normalized;
11391
+ }
11392
+ function normalizePositiveInt2(value, fallback) {
11393
+ if (!Number.isFinite(value)) {
11394
+ return fallback;
11395
+ }
11396
+ const parsed = Math.floor(Number(value));
11397
+ if (parsed < 1) {
11398
+ return fallback;
11399
+ }
11400
+ return parsed;
11401
+ }
11402
+ function normalizeNullableText(value) {
11403
+ const text4 = toStringValue(value).trim();
11404
+ return text4.length > 0 ? text4 : null;
11405
+ }
11406
+ function normalizeTimestamp(value, fallbackIso) {
11407
+ const parsed = new Date(value);
11408
+ if (Number.isFinite(parsed.getTime())) {
11409
+ return parsed.toISOString();
11410
+ }
11411
+ return fallbackIso;
11412
+ }
11413
+ async function loadEntryCandidates(db, sourceFiles) {
11414
+ const scopedPlaceholders = sourceFiles.map(() => "?").join(", ");
11415
+ const scopedClause = sourceFiles.length > 0 ? `AND source_file IN (${scopedPlaceholders})` : "";
11416
+ const result = await db.execute({
11417
+ sql: `
11418
+ SELECT
11419
+ id,
11420
+ source_file,
11421
+ created_at,
11422
+ subject,
11423
+ norm_content_hash,
11424
+ platform,
11425
+ project,
11426
+ embedding
11427
+ FROM entries
11428
+ WHERE retired = 0
11429
+ AND superseded_by IS NULL
11430
+ ${scopedClause}
11431
+ `,
11432
+ args: sourceFiles
11433
+ });
11434
+ return result.rows.map((row) => {
11435
+ const sourceFile = toStringValue(row.source_file).trim();
11436
+ return {
11437
+ id: toStringValue(row.id),
11438
+ sourceFile,
11439
+ createdAt: toStringValue(row.created_at),
11440
+ subject: normalizeNullableText(row.subject),
11441
+ fingerprint: normalizeNullableText(row.norm_content_hash),
11442
+ platform: normalizeNullableText(row.platform),
11443
+ project: normalizeNullableText(row.project),
11444
+ embedding: row.embedding ? mapBufferToVector(row.embedding) : null
11445
+ };
11446
+ }).filter((row) => row.id.length > 0);
11447
+ }
11448
+ async function loadEntryMetadata(db) {
11449
+ const result = await db.execute({
11450
+ sql: `
11451
+ SELECT id, source_file, created_at, platform, project
11452
+ FROM entries
11453
+ WHERE retired = 0
11454
+ AND superseded_by IS NULL
11455
+ `
11456
+ });
11457
+ const metadata = /* @__PURE__ */ new Map();
11458
+ for (const row of result.rows) {
11459
+ const id = toStringValue(row.id);
11460
+ if (id.length === 0) {
11461
+ continue;
11462
+ }
11463
+ metadata.set(id, {
11464
+ sourceFile: toStringValue(row.source_file).trim(),
11465
+ createdAt: toStringValue(row.created_at),
11466
+ platform: normalizeNullableText(row.platform),
11467
+ project: normalizeNullableText(row.project)
11468
+ });
11469
+ }
11470
+ return metadata;
11471
+ }
11472
+ function createDefaultVectorSearch(db, embeddingsByEntryId) {
11473
+ return async (entryId, k, threshold) => {
11474
+ const queryEmbedding = embeddingsByEntryId.get(entryId);
11475
+ if (!queryEmbedding || queryEmbedding.length === 0) {
11476
+ return [];
11477
+ }
11478
+ const result = await db.execute({
11479
+ sql: `
11480
+ SELECT e.id, e.embedding
11481
+ FROM vector_top_k('idx_entries_embedding', vector32(?), ?) AS v
11482
+ CROSS JOIN entries AS e ON e.rowid = v.id
11483
+ WHERE e.embedding IS NOT NULL
11484
+ AND e.retired = 0
11485
+ AND e.superseded_by IS NULL
11486
+ `,
11487
+ args: [JSON.stringify(queryEmbedding), k + 1]
11488
+ });
11489
+ const neighbors = [];
11490
+ for (const row of result.rows) {
11491
+ const id = toStringValue(row.id);
11492
+ if (id.length === 0 || id === entryId) {
11493
+ continue;
11494
+ }
11495
+ const embedding = mapBufferToVector(row.embedding);
11496
+ if (embedding.length === 0) {
11497
+ continue;
11498
+ }
11499
+ const score = cosineSimilarity(queryEmbedding, embedding);
11500
+ if (score >= threshold) {
11501
+ neighbors.push({ id, score });
11502
+ }
11503
+ }
11504
+ neighbors.sort((left, right) => right.score - left.score);
11505
+ return neighbors.slice(0, k);
11506
+ };
11507
+ }
11508
+ function toSyntheticQualityMentions(entry, neighbors, metadataById, similarityThreshold, fallbackIso) {
11509
+ const mentionsBySession = /* @__PURE__ */ new Map();
11510
+ for (const neighbor of neighbors) {
11511
+ if (neighbor.score < similarityThreshold) {
11512
+ continue;
11513
+ }
11514
+ const metadata = metadataById.get(neighbor.id);
11515
+ if (!metadata) {
11516
+ continue;
11517
+ }
11518
+ const sessionId = metadata.sourceFile.trim();
11519
+ if (sessionId.length === 0 || sessionId === entry.sourceFile) {
11520
+ continue;
11521
+ }
11522
+ const mention = {
11523
+ sessionId,
11524
+ recalledAt: normalizeTimestamp(metadata.createdAt, fallbackIso),
11525
+ score: neighbor.score,
11526
+ platform: metadata.platform,
11527
+ project: metadata.project
11528
+ };
11529
+ const existing = mentionsBySession.get(sessionId);
11530
+ if (!existing || mention.score > existing.score) {
11531
+ mentionsBySession.set(sessionId, mention);
11532
+ }
11533
+ }
11534
+ return [...mentionsBySession.values()];
11535
+ }
11536
+ async function insertSyntheticEvents(db, events) {
11537
+ let inserted = 0;
11538
+ for (let index = 0; index < events.length; index += INSERT_BATCH_SIZE) {
11539
+ const chunk = events.slice(index, index + INSERT_BATCH_SIZE);
11540
+ await db.execute("BEGIN IMMEDIATE");
11541
+ try {
11542
+ for (const event of chunk) {
11543
+ const result = await db.execute({
11544
+ sql: `
11545
+ INSERT OR IGNORE INTO recall_events (
11546
+ id,
11547
+ entry_id,
11548
+ content_fingerprint,
11549
+ subject_key,
11550
+ session_id,
11551
+ platform,
11552
+ project,
11553
+ source,
11554
+ signal_type,
11555
+ signal_value,
11556
+ recalled_at,
11557
+ created_at
11558
+ )
11559
+ VALUES (?, ?, ?, ?, ?, ?, ?, 'synthetic', 'cross_session_mention', ?, ?, ?)
11560
+ `,
11561
+ args: [
11562
+ randomUUID(),
11563
+ event.entryId,
11564
+ event.fingerprint,
11565
+ event.subject,
11566
+ event.sessionId,
11567
+ event.platform,
11568
+ event.project,
11569
+ DEFAULT_SIGNAL_VALUE,
11570
+ event.recalledAt,
11571
+ event.recalledAt
11572
+ ]
11573
+ });
11574
+ inserted += Math.max(0, toRowsAffected(result.rowsAffected));
11575
+ }
11576
+ await db.execute("COMMIT");
11577
+ } catch (error) {
11578
+ try {
11579
+ await db.execute("ROLLBACK");
11580
+ } catch {
11581
+ }
11582
+ throw error;
11583
+ }
11584
+ }
11585
+ if (events.length > 0) {
11586
+ await db.execute("ANALYZE recall_events");
11587
+ await db.execute("PRAGMA optimize");
11588
+ }
11589
+ return inserted;
11590
+ }
11591
+ async function generateSyntheticEvents(options) {
11592
+ const startedAt = Date.now();
11593
+ const sourceFiles = normalizeSourceFiles(options.sourceFiles);
11594
+ const similarityThreshold = normalizeThreshold(options.similarityThreshold);
11595
+ const minSessionCount = normalizePositiveInt2(options.minSessionCount, DEFAULT_MIN_SESSION_COUNT);
11596
+ const maxEventsPerEntry = normalizePositiveInt2(options.maxEventsPerEntry, DEFAULT_MAX_EVENTS_PER_ENTRY);
11597
+ const dryRun = options.dryRun === true;
11598
+ const entries = await loadEntryCandidates(options.db, sourceFiles);
11599
+ const entriesAnalyzed = entries.length;
11600
+ if (entriesAnalyzed === 0) {
11601
+ return {
11602
+ entriesAnalyzed,
11603
+ entriesQualified: 0,
11604
+ eventsEmitted: 0,
11605
+ durationMs: Math.max(0, Date.now() - startedAt)
11606
+ };
11607
+ }
11608
+ const metadataById = await loadEntryMetadata(options.db);
11609
+ const embeddingsByEntryId = /* @__PURE__ */ new Map();
11610
+ for (const entry of entries) {
11611
+ if (entry.embedding && entry.embedding.length > 0) {
11612
+ embeddingsByEntryId.set(entry.id, entry.embedding);
11613
+ }
11614
+ }
11615
+ const vectorSearch = options.vectorSearch ?? createDefaultVectorSearch(options.db, embeddingsByEntryId);
11616
+ const fallbackIso = (/* @__PURE__ */ new Date()).toISOString();
11617
+ const eventsToInsert = [];
11618
+ let entriesQualified = 0;
11619
+ for (const entry of entries) {
11620
+ if (!entry.embedding || entry.embedding.length === 0) {
11621
+ continue;
11622
+ }
11623
+ if (!entry.fingerprint || entry.fingerprint.length === 0) {
11624
+ continue;
11625
+ }
11626
+ if (entry.sourceFile.length === 0) {
11627
+ continue;
11628
+ }
11629
+ const neighbors = await vectorSearch(entry.id, VECTOR_FAN_OUT, similarityThreshold);
11630
+ const mentions = toSyntheticQualityMentions(entry, neighbors, metadataById, similarityThreshold, fallbackIso);
11631
+ const sessionCount = mentions.length;
11632
+ if (sessionCount < minSessionCount) {
11633
+ continue;
11634
+ }
11635
+ entriesQualified += 1;
11636
+ mentions.sort((left, right) => right.score - left.score || left.recalledAt.localeCompare(right.recalledAt)).slice(0, maxEventsPerEntry).forEach((mention) => {
11637
+ eventsToInsert.push({
11638
+ entryId: entry.id,
11639
+ fingerprint: entry.fingerprint,
11640
+ subject: entry.subject,
11641
+ sessionId: mention.sessionId,
11642
+ platform: mention.platform ?? entry.platform,
11643
+ project: mention.project ?? entry.project,
11644
+ recalledAt: mention.recalledAt
11645
+ });
11646
+ });
11647
+ }
11648
+ const eventsEmitted = dryRun ? eventsToInsert.length : await insertSyntheticEvents(options.db, eventsToInsert);
11649
+ if (options.verbose === true) {
11650
+ process.stderr.write(
11651
+ `[synthetic] analyzed ${entriesAnalyzed} entries, qualified ${entriesQualified}, ${dryRun ? "would emit" : "emitted"} ${eventsEmitted} events
11652
+ `
11653
+ );
11654
+ }
11655
+ return {
11656
+ entriesAnalyzed,
11657
+ entriesQualified,
11658
+ eventsEmitted,
11659
+ durationMs: Math.max(0, Date.now() - startedAt)
11660
+ };
11661
+ }
11662
+
11663
+ // src/quality/seed.ts
11664
+ function normalizeSourceFiles2(sourceFiles) {
11665
+ if (!sourceFiles || sourceFiles.length === 0) {
11666
+ return [];
11667
+ }
11668
+ return Array.from(
11669
+ new Set(
11670
+ sourceFiles.map((value) => value.trim()).filter((value) => value.length > 0)
11671
+ )
11672
+ );
11673
+ }
11674
+ function parseCount(value) {
11675
+ const parsed = toNumber(value);
11676
+ if (!Number.isFinite(parsed) || parsed <= 0) {
11677
+ return 0;
11678
+ }
11679
+ return Math.floor(parsed);
11680
+ }
11681
+ function parseSpreadDays(value) {
11682
+ const parsed = toNumber(value);
11683
+ if (!Number.isFinite(parsed) || parsed <= 0) {
11684
+ return 0;
11685
+ }
11686
+ return parsed;
11687
+ }
11688
+ function resolveSeedQuality(candidate) {
11689
+ if (candidate.sessionCount >= 10 && candidate.temporalSpreadDays >= 30) {
11690
+ return 0.8;
11691
+ }
11692
+ if (candidate.sessionCount >= 5 && candidate.temporalSpreadDays >= 14) {
11693
+ return 0.7;
11694
+ }
11695
+ if (candidate.sessionCount >= 3 && candidate.temporalSpreadDays >= 7) {
11696
+ return 0.65;
11697
+ }
11698
+ if (candidate.sessionCount >= 2) {
11699
+ return 0.6;
11700
+ }
11701
+ return null;
11702
+ }
11703
+ async function loadSeedCandidates(db, sourceFiles) {
11704
+ const placeholders = sourceFiles.map(() => "?").join(", ");
11705
+ const sourceScopeClause = sourceFiles.length > 0 ? `AND e.source_file IN (${placeholders})` : "";
11706
+ const result = await db.execute({
11707
+ sql: `
11708
+ SELECT
11709
+ e.id AS entry_id,
11710
+ COUNT(DISTINCT re.session_id) AS session_count,
11711
+ COALESCE(julianday(MAX(re.recalled_at)) - julianday(MIN(re.recalled_at)), 0) AS temporal_spread_days
11712
+ FROM entries AS e
11713
+ INNER JOIN recall_events AS re ON re.entry_id = e.id
11714
+ WHERE e.retired = 0
11715
+ AND e.superseded_by IS NULL
11716
+ AND re.source = 'synthetic'
11717
+ AND re.session_id IS NOT NULL
11718
+ ${sourceScopeClause}
11719
+ GROUP BY e.id
11720
+ `,
11721
+ args: sourceFiles
11722
+ });
11723
+ const candidates = [];
11724
+ for (const row of result.rows) {
11725
+ const entryId = toStringValue(row.entry_id);
11726
+ if (entryId.length === 0) {
11727
+ continue;
11728
+ }
11729
+ candidates.push({
11730
+ entryId,
11731
+ sessionCount: parseCount(row.session_count),
11732
+ temporalSpreadDays: parseSpreadDays(row.temporal_spread_days)
11733
+ });
11734
+ }
11735
+ return candidates;
11736
+ }
11737
+ async function seedQualityScores(db, sourceFiles) {
11738
+ const normalizedSourceFiles = normalizeSourceFiles2(sourceFiles);
11739
+ const candidates = await loadSeedCandidates(db, normalizedSourceFiles);
11740
+ let updated = 0;
11741
+ await db.execute("BEGIN IMMEDIATE");
11742
+ try {
11743
+ for (const candidate of candidates) {
11744
+ const qualityScore = resolveSeedQuality(candidate);
11745
+ if (qualityScore === null) {
11746
+ continue;
11747
+ }
11748
+ const result = await db.execute({
11749
+ sql: `
11750
+ UPDATE entries
11751
+ SET quality_score = ?
11752
+ WHERE id = ?
11753
+ AND quality_score = 0.5
11754
+ `,
11755
+ args: [qualityScore, candidate.entryId]
11756
+ });
11757
+ updated += Math.max(0, toRowsAffected(result.rowsAffected));
11758
+ }
11759
+ await db.execute("COMMIT");
11760
+ } catch (error) {
11761
+ try {
11762
+ await db.execute("ROLLBACK");
11763
+ } catch {
11764
+ }
11765
+ throw error;
11766
+ }
11767
+ return { updated };
11768
+ }
11769
+
11358
11770
  // src/ingest/write-queue.ts
11359
11771
  var CancelledError = class extends Error {
11360
11772
  constructor(message = "Write queue item was cancelled.") {
@@ -11997,7 +12409,7 @@ async function resolveInputFiles(inputPaths, globPattern, expandInputFilesFn) {
11997
12409
 
11998
12410
  // src/commands/ingest/helpers.ts
11999
12411
  import path19 from "path";
12000
- import { randomUUID } from "crypto";
12412
+ import { randomUUID as randomUUID2 } from "crypto";
12001
12413
  function retryBackoffMs(attempt) {
12002
12414
  if (attempt <= 1) return 1e4;
12003
12415
  if (attempt === 2) return 3e4;
@@ -12149,7 +12561,7 @@ async function insertIngestLogForFile(db, params) {
12149
12561
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
12150
12562
  `,
12151
12563
  args: [
12152
- randomUUID(),
12564
+ randomUUID2(),
12153
12565
  params.filePath,
12154
12566
  params.contentHash,
12155
12567
  (/* @__PURE__ */ new Date()).toISOString(),
@@ -12818,7 +13230,9 @@ async function runIngestCommand(inputPaths, options, deps) {
12818
13230
  sleepFn: deps?.sleepFn ?? sleep,
12819
13231
  shouldShutdownFn: deps?.shouldShutdownFn ?? isShutdownRequested,
12820
13232
  createWriteQueueFn: deps?.createWriteQueueFn ?? ((queueOptions) => new WriteQueue(queueOptions)),
12821
- embedFn: deps?.embedFn ?? embed
13233
+ embedFn: deps?.embedFn ?? embed,
13234
+ generateSyntheticEventsFn: deps?.generateSyntheticEventsFn ?? generateSyntheticEvents,
13235
+ seedQualityScoresFn: deps?.seedQualityScoresFn ?? seedQualityScores
12822
13236
  };
12823
13237
  const clackOutput = { output: process.stderr };
12824
13238
  clack9.intro(banner(), clackOutput);
@@ -12858,6 +13272,8 @@ async function runIngestCommand(inputPaths, options, deps) {
12858
13272
  const dryRun = options.dryRun === true;
12859
13273
  const bulkRequested = options.bulk === true;
12860
13274
  const bulkMode = bulkRequested && !dryRun;
13275
+ const syntheticRequested = options.synthetic === true;
13276
+ const syntheticDryRun = options.syntheticDryRun === true;
12861
13277
  const json = options.json === true;
12862
13278
  const force = options.force === true;
12863
13279
  const skipIngested = force ? false : options.skipIngested !== false;
@@ -12895,6 +13311,15 @@ async function runIngestCommand(inputPaths, options, deps) {
12895
13311
  if (bulkRequested && dryRun) {
12896
13312
  clack9.log.warn(formatWarn("[bulk] --bulk is ignored when --dry-run is enabled."), clackOutput);
12897
13313
  }
13314
+ if (syntheticDryRun && !syntheticRequested) {
13315
+ clack9.log.warn(
13316
+ formatWarn("[synthetic] --synthetic-dry-run requires --synthetic; synthetic pass will be skipped."),
13317
+ clackOutput
13318
+ );
13319
+ }
13320
+ if (syntheticRequested && !bulkRequested) {
13321
+ clack9.log.warn(formatWarn("[synthetic] --synthetic requires --bulk; synthetic pass will be skipped."), clackOutput);
13322
+ }
12898
13323
  const files = await resolveInputFiles(inputPaths, globPattern, resolvedDeps.expandInputFilesFn);
12899
13324
  const targetsWithSizes = await Promise.all(
12900
13325
  files.map(async (filePath) => {
@@ -13064,6 +13489,9 @@ async function runIngestCommand(inputPaths, options, deps) {
13064
13489
  let firstPassFailedIndexSet = /* @__PURE__ */ new Set();
13065
13490
  let bulkTeardownComplete = false;
13066
13491
  let bulkVectorRebuildDurationSeconds = null;
13492
+ let syntheticSummaryLine = null;
13493
+ let qualitySeedLine = null;
13494
+ let syntheticResult = null;
13067
13495
  let cleanupFailure = null;
13068
13496
  let pipelineError = null;
13069
13497
  const updateProgressFn = (completedCount, totalCount, verb) => {
@@ -13165,6 +13593,40 @@ async function runIngestCommand(inputPaths, options, deps) {
13165
13593
  cleanupFailure = asError(error);
13166
13594
  }
13167
13595
  }
13596
+ const shouldRunSyntheticPass = bulkMode && syntheticRequested && !syntheticDryRun && state.totalEntriesStored > 0 && !stoppedForShutdown && !pipelineError && !cleanupFailure;
13597
+ const shouldRunSyntheticDryRun = bulkMode && syntheticRequested && syntheticDryRun && state.totalEntriesStored > 0 && !stoppedForShutdown && !pipelineError && !cleanupFailure;
13598
+ if (shouldRunSyntheticPass || shouldRunSyntheticDryRun) {
13599
+ const syntheticSourceFiles = Array.from(
13600
+ new Set(
13601
+ results.filter(
13602
+ (result) => Boolean(result) && !result.error && !result.skipped && result.entriesStored > 0
13603
+ ).map((result) => result.file)
13604
+ )
13605
+ );
13606
+ try {
13607
+ let qualitySeeded = 0;
13608
+ if (!syntheticDryRun) {
13609
+ const seededBefore = await resolvedDeps.seedQualityScoresFn(db, syntheticSourceFiles);
13610
+ qualitySeeded += seededBefore.updated;
13611
+ }
13612
+ syntheticResult = await resolvedDeps.generateSyntheticEventsFn({
13613
+ db,
13614
+ sourceFiles: syntheticSourceFiles,
13615
+ dryRun: syntheticDryRun,
13616
+ verbose
13617
+ });
13618
+ if (!syntheticDryRun) {
13619
+ const seededAfter = await resolvedDeps.seedQualityScoresFn(db, syntheticSourceFiles);
13620
+ qualitySeeded += seededAfter.updated;
13621
+ }
13622
+ const syntheticVerb = syntheticDryRun ? "would emit" : "emitted";
13623
+ syntheticSummaryLine = `Synthetic recall events: analyzed ${syntheticResult.entriesAnalyzed} entries, ${syntheticVerb} ${syntheticResult.eventsEmitted} events for ${syntheticResult.entriesQualified} entries`;
13624
+ qualitySeedLine = qualitySeeded > 0 ? `Quality seeding: updated ${qualitySeeded} entries.` : null;
13625
+ } catch (error) {
13626
+ clack9.log.error(formatError(`[synthetic] post-ingest synthesis failed: ${toErrorMessage(error)}`), clackOutput);
13627
+ cleanupFailure = asError(error);
13628
+ }
13629
+ }
13168
13630
  if (!dryRun) {
13169
13631
  try {
13170
13632
  await walCheckpoint(db);
@@ -13243,7 +13705,16 @@ async function runIngestCommand(inputPaths, options, deps) {
13243
13705
  const bulkRebuildLine = bulkMode && bulkVectorRebuildDurationSeconds !== null ? `Bulk mode: FTS rebuild + vector index rebuilt in ${bulkVectorRebuildDurationSeconds.toFixed(1)}s.` : null;
13244
13706
  const bulkDedupLine = bulkMode ? `Bulk dedup: ${bulkDedupSkippedHashMinhash} entries skipped (hash/MinHash).` : null;
13245
13707
  clack9.note(
13246
- [doneLine, chunkFailureLine, bulkRebuildLine, bulkDedupLine, ...retryLines, ...failedFileLines].filter((line) => Boolean(line)).join("\n"),
13708
+ [
13709
+ doneLine,
13710
+ chunkFailureLine,
13711
+ bulkRebuildLine,
13712
+ bulkDedupLine,
13713
+ syntheticSummaryLine,
13714
+ qualitySeedLine,
13715
+ ...retryLines,
13716
+ ...failedFileLines
13717
+ ].filter((line) => Boolean(line)).join("\n"),
13247
13718
  "Ingest Complete",
13248
13719
  clackOutput
13249
13720
  );
@@ -13282,6 +13753,14 @@ function registerIngestCommand(program) {
13282
13753
  "--bulk",
13283
13754
  "Optimize large imports by disabling FTS/vector indexes during writes and rebuilding afterward",
13284
13755
  false
13756
+ ).option(
13757
+ "--synthetic",
13758
+ "Run synthetic recall event generation after bulk ingest (requires --bulk)",
13759
+ false
13760
+ ).option(
13761
+ "--synthetic-dry-run",
13762
+ "Show what synthetic recall events would be generated without writing (requires --synthetic)",
13763
+ false
13285
13764
  ).option("--no-retry", "Disable auto-retry for failed files").option("--no-pre-fetch", "Disable elaborative encoding pre-fetch").option(
13286
13765
  "--whole-file",
13287
13766
  "Force whole-file extraction mode. Sends each file as a single LLM call. Auto-detected for large-context models; use this flag to force it for any model. Ignored in watch mode.",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agenr",
3
- "version": "0.9.60",
3
+ "version": "0.9.61",
4
4
  "openclaw": {
5
5
  "extensions": [
6
6
  "dist/openclaw-plugin/index.js"
@@ -11,6 +11,13 @@
11
11
  "bin": {
12
12
  "agenr": "dist/cli.js"
13
13
  },
14
+ "scripts": {
15
+ "build": "tsup src/cli.ts src/cli-main.ts src/openclaw-plugin/index.ts --format esm --dts",
16
+ "dev": "tsup src/cli.ts src/cli-main.ts --format esm --watch",
17
+ "test": "vitest run",
18
+ "test:watch": "vitest",
19
+ "typecheck": "tsc --noEmit"
20
+ },
14
21
  "dependencies": {
15
22
  "@clack/prompts": "^1.0.1",
16
23
  "@libsql/client": "^0.17.0",
@@ -54,11 +61,9 @@
54
61
  "README.md"
55
62
  ],
56
63
  "author": "agenr-ai",
57
- "scripts": {
58
- "build": "tsup src/cli.ts src/cli-main.ts src/openclaw-plugin/index.ts --format esm --dts",
59
- "dev": "tsup src/cli.ts src/cli-main.ts --format esm --watch",
60
- "test": "vitest run",
61
- "test:watch": "vitest",
62
- "typecheck": "tsc --noEmit"
64
+ "pnpm": {
65
+ "overrides": {
66
+ "fast-xml-parser": "^5.3.6"
67
+ }
63
68
  }
64
- }
69
+ }