@bodhi-ventures/aiocs 0.5.3 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,8 +7,10 @@ var AIOCS_ERROR_CODES = {
7
7
  noPagesFetched: "NO_PAGES_FETCHED",
8
8
  noProjectScope: "NO_PROJECT_SCOPE",
9
9
  chunkNotFound: "CHUNK_NOT_FOUND",
10
+ pageNotFound: "PAGE_NOT_FOUND",
10
11
  referenceFileNotFound: "REFERENCE_FILE_NOT_FOUND",
11
12
  invalidReferenceFile: "INVALID_REFERENCE_FILE",
13
+ sourceContextInvalid: "SOURCE_CONTEXT_INVALID",
12
14
  authEnvMissing: "AUTH_ENV_MISSING",
13
15
  canaryFailed: "CANARY_FAILED",
14
16
  backupConflict: "BACKUP_CONFLICT",
@@ -818,6 +820,35 @@ function initSchema(db) {
818
820
  PRIMARY KEY(project_path, source_id)
819
821
  );
820
822
 
823
+ CREATE TABLE IF NOT EXISTS source_context (
824
+ source_id TEXT PRIMARY KEY REFERENCES sources(id) ON DELETE CASCADE,
825
+ context_json TEXT NOT NULL,
826
+ created_at TEXT NOT NULL,
827
+ updated_at TEXT NOT NULL
828
+ );
829
+
830
+ CREATE TABLE IF NOT EXISTS routing_learnings (
831
+ id TEXT PRIMARY KEY,
832
+ route_key TEXT NOT NULL UNIQUE,
833
+ source_id TEXT NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
834
+ snapshot_id TEXT REFERENCES snapshots(id) ON DELETE SET NULL,
835
+ learning_type TEXT NOT NULL CHECK(learning_type IN ('discovery', 'negative')),
836
+ intent TEXT NOT NULL,
837
+ page_url TEXT,
838
+ file_path TEXT,
839
+ title TEXT,
840
+ note TEXT,
841
+ search_terms_json TEXT NOT NULL,
842
+ created_at TEXT NOT NULL,
843
+ updated_at TEXT NOT NULL
844
+ );
845
+
846
+ CREATE INDEX IF NOT EXISTS idx_routing_learnings_source_type
847
+ ON routing_learnings(source_id, learning_type, updated_at);
848
+
849
+ CREATE INDEX IF NOT EXISTS idx_routing_learnings_intent
850
+ ON routing_learnings(intent, updated_at);
851
+
821
852
  CREATE TABLE IF NOT EXISTS daemon_state (
822
853
  singleton_id INTEGER PRIMARY KEY CHECK(singleton_id = 1),
823
854
  last_started_at TEXT,
@@ -926,6 +957,15 @@ function normalizeQuery(query) {
926
957
  const words = query.replace(/[^\p{L}\p{N}]+/gu, " ").split(/\s+/).map((part) => part.trim()).filter(Boolean);
927
958
  return words.join(" ");
928
959
  }
960
+ function buildRouteKey(input) {
961
+ return sha256(stableStringify({
962
+ sourceId: input.sourceId,
963
+ learningType: input.learningType,
964
+ intent: normalizeQuery(input.intent).toLowerCase(),
965
+ pageUrl: input.pageUrl ?? null,
966
+ filePath: input.filePath ?? null
967
+ }));
968
+ }
929
969
  function normalizePatternFilters(patterns) {
930
970
  if (!patterns || patterns.length === 0) {
931
971
  return null;
@@ -1297,6 +1337,62 @@ function openCatalog(options) {
1297
1337
  }
1298
1338
  return JSON.parse(row.spec_json);
1299
1339
  },
1340
+ getSourceById(sourceId) {
1341
+ return this.listSources().find((source) => source.id === sourceId) ?? null;
1342
+ },
1343
+ upsertSourceContext(sourceId, context) {
1344
+ const source = this.getSourceById(sourceId);
1345
+ if (!source) {
1346
+ throw new AiocsError(
1347
+ AIOCS_ERROR_CODES.sourceNotFound,
1348
+ `Unknown source '${sourceId}'`
1349
+ );
1350
+ }
1351
+ const timestamp = nowIso();
1352
+ const existing = db.prepare(`
1353
+ SELECT created_at
1354
+ FROM source_context
1355
+ WHERE source_id = ?
1356
+ `).get(sourceId);
1357
+ db.prepare(`
1358
+ INSERT INTO source_context (source_id, context_json, created_at, updated_at)
1359
+ VALUES (?, ?, ?, ?)
1360
+ ON CONFLICT(source_id) DO UPDATE SET
1361
+ context_json = excluded.context_json,
1362
+ updated_at = excluded.updated_at
1363
+ `).run(
1364
+ sourceId,
1365
+ JSON.stringify(context),
1366
+ existing?.created_at ?? timestamp,
1367
+ timestamp
1368
+ );
1369
+ return {
1370
+ sourceId,
1371
+ context,
1372
+ createdAt: existing?.created_at ?? timestamp,
1373
+ updatedAt: timestamp
1374
+ };
1375
+ },
1376
+ getSourceContext(sourceId) {
1377
+ const source = this.getSourceById(sourceId);
1378
+ if (!source) {
1379
+ throw new AiocsError(
1380
+ AIOCS_ERROR_CODES.sourceNotFound,
1381
+ `Unknown source '${sourceId}'`
1382
+ );
1383
+ }
1384
+ const row = db.prepare(`
1385
+ SELECT context_json, created_at, updated_at
1386
+ FROM source_context
1387
+ WHERE source_id = ?
1388
+ `).get(sourceId);
1389
+ return {
1390
+ sourceId,
1391
+ context: row ? JSON.parse(row.context_json) : null,
1392
+ createdAt: row?.created_at ?? null,
1393
+ updatedAt: row?.updated_at ?? null
1394
+ };
1395
+ },
1300
1396
  listSources() {
1301
1397
  const rows = db.prepare(`
1302
1398
  SELECT
@@ -1335,6 +1431,280 @@ function openCatalog(options) {
1335
1431
  };
1336
1432
  });
1337
1433
  },
1434
+ listPages(input) {
1435
+ const source = this.getSourceById(input.sourceId);
1436
+ if (!source) {
1437
+ throw new AiocsError(
1438
+ AIOCS_ERROR_CODES.sourceNotFound,
1439
+ `Unknown source '${input.sourceId}'`
1440
+ );
1441
+ }
1442
+ const snapshotId = input.snapshotId ?? source.lastSuccessfulSnapshotId;
1443
+ if (!snapshotId) {
1444
+ throw new AiocsError(
1445
+ AIOCS_ERROR_CODES.snapshotNotFound,
1446
+ `No successful snapshot found for source '${input.sourceId}'`
1447
+ );
1448
+ }
1449
+ const snapshotRow = db.prepare(`
1450
+ SELECT id
1451
+ FROM snapshots
1452
+ WHERE id = ?
1453
+ AND source_id = ?
1454
+ `).get(snapshotId, input.sourceId);
1455
+ if (!snapshotRow) {
1456
+ throw new AiocsError(
1457
+ AIOCS_ERROR_CODES.snapshotNotFound,
1458
+ `Snapshot '${snapshotId}' not found for source '${input.sourceId}'`
1459
+ );
1460
+ }
1461
+ const normalizedQuery = input.query ? normalizeQuery(input.query).toLowerCase() : "";
1462
+ const pathPatterns = normalizePatternFilters(input.pathPatterns);
1463
+ const limit = assertPaginationValue(input.limit, "limit", 50);
1464
+ const offset = assertPaginationValue(input.offset, "offset", 0);
1465
+ const whereSql = [
1466
+ "snapshot_id = ?"
1467
+ ];
1468
+ const args = [snapshotId];
1469
+ if (normalizedQuery) {
1470
+ whereSql.push("(LOWER(title) LIKE ? OR LOWER(url) LIKE ? OR LOWER(COALESCE(file_path, '')) LIKE ?)");
1471
+ const queryLike = `%${normalizedQuery}%`;
1472
+ args.push(queryLike, queryLike, queryLike);
1473
+ }
1474
+ if (pathPatterns && pathPatterns.length > 0) {
1475
+ whereSql.push(`file_path IS NOT NULL AND (${pathPatterns.map(() => "file_path GLOB ?").join(" OR ")})`);
1476
+ args.push(...pathPatterns.map((pattern) => toSqliteGlob(pattern)));
1477
+ }
1478
+ const whereClause = whereSql.join(" AND ");
1479
+ const totalRow = db.prepare(`
1480
+ SELECT COUNT(*) AS total
1481
+ FROM pages
1482
+ WHERE ${whereClause}
1483
+ `).get(...args);
1484
+ const rows = db.prepare(`
1485
+ SELECT url, title, markdown, page_kind, file_path, language
1486
+ FROM pages
1487
+ WHERE ${whereClause}
1488
+ ORDER BY title, url
1489
+ LIMIT ?
1490
+ OFFSET ?
1491
+ `).all(...args, limit, offset);
1492
+ return {
1493
+ sourceId: input.sourceId,
1494
+ snapshotId,
1495
+ total: totalRow.total,
1496
+ limit,
1497
+ offset,
1498
+ hasMore: offset + rows.length < totalRow.total,
1499
+ pages: rows.map((row) => ({
1500
+ url: row.url,
1501
+ title: row.title,
1502
+ pageKind: row.page_kind,
1503
+ filePath: row.file_path,
1504
+ language: row.language,
1505
+ markdownLength: row.markdown.length
1506
+ }))
1507
+ };
1508
+ },
1509
+ getPage(input) {
1510
+ const source = this.getSourceById(input.sourceId);
1511
+ if (!source) {
1512
+ throw new AiocsError(
1513
+ AIOCS_ERROR_CODES.sourceNotFound,
1514
+ `Unknown source '${input.sourceId}'`
1515
+ );
1516
+ }
1517
+ if (!input.url && !input.filePath || input.url && input.filePath) {
1518
+ throw new AiocsError(
1519
+ AIOCS_ERROR_CODES.invalidArgument,
1520
+ "Provide exactly one of url or filePath"
1521
+ );
1522
+ }
1523
+ const snapshotId = input.snapshotId ?? source.lastSuccessfulSnapshotId;
1524
+ if (!snapshotId) {
1525
+ throw new AiocsError(
1526
+ AIOCS_ERROR_CODES.snapshotNotFound,
1527
+ `No successful snapshot found for source '${input.sourceId}'`
1528
+ );
1529
+ }
1530
+ const row = db.prepare(`
1531
+ SELECT p.snapshot_id, p.url, p.title, p.markdown, p.page_kind, p.file_path, p.language
1532
+ FROM pages p
1533
+ INNER JOIN snapshots s
1534
+ ON s.id = p.snapshot_id
1535
+ WHERE p.snapshot_id = ?
1536
+ AND s.source_id = ?
1537
+ AND ${input.url ? "p.url = ?" : "p.file_path = ?"}
1538
+ LIMIT 1
1539
+ `).get(snapshotId, input.sourceId, input.url ?? input.filePath);
1540
+ if (!row) {
1541
+ throw new AiocsError(
1542
+ AIOCS_ERROR_CODES.pageNotFound,
1543
+ input.url ? `Page '${input.url}' not found for source '${input.sourceId}'` : `Page path '${input.filePath}' not found for source '${input.sourceId}'`
1544
+ );
1545
+ }
1546
+ return {
1547
+ sourceId: input.sourceId,
1548
+ snapshotId,
1549
+ page: {
1550
+ url: row.url,
1551
+ title: row.title,
1552
+ markdown: row.markdown,
1553
+ pageKind: row.page_kind,
1554
+ filePath: row.file_path,
1555
+ language: row.language
1556
+ }
1557
+ };
1558
+ },
1559
+ upsertRoutingLearning(input) {
1560
+ const source = this.getSourceById(input.sourceId);
1561
+ if (!source) {
1562
+ throw new AiocsError(
1563
+ AIOCS_ERROR_CODES.sourceNotFound,
1564
+ `Unknown source '${input.sourceId}'`
1565
+ );
1566
+ }
1567
+ const timestamp = nowIso();
1568
+ const routeKey = buildRouteKey(input);
1569
+ const searchTerms = [...new Set((input.searchTerms ?? []).map((term) => term.trim()).filter(Boolean))];
1570
+ const validationSnapshotId = input.snapshotId ?? source.lastSuccessfulSnapshotId ?? null;
1571
+ const storedSnapshotId = input.snapshotId ?? null;
1572
+ const hasTargetPage = Boolean(input.pageUrl || input.filePath);
1573
+ if (input.snapshotId) {
1574
+ const snapshotRow = db.prepare(`
1575
+ SELECT id
1576
+ FROM snapshots
1577
+ WHERE id = ?
1578
+ AND source_id = ?
1579
+ `).get(input.snapshotId, input.sourceId);
1580
+ if (!snapshotRow) {
1581
+ throw new AiocsError(
1582
+ AIOCS_ERROR_CODES.snapshotNotFound,
1583
+ `Snapshot '${input.snapshotId}' not found for source '${input.sourceId}'`
1584
+ );
1585
+ }
1586
+ }
1587
+ if (hasTargetPage) {
1588
+ if (!validationSnapshotId) {
1589
+ throw new AiocsError(
1590
+ AIOCS_ERROR_CODES.snapshotNotFound,
1591
+ `No successful snapshot found for source '${input.sourceId}'`
1592
+ );
1593
+ }
1594
+ this.getPage({
1595
+ sourceId: input.sourceId,
1596
+ snapshotId: validationSnapshotId,
1597
+ ...input.pageUrl ? { url: input.pageUrl } : input.filePath ? { filePath: input.filePath } : {}
1598
+ });
1599
+ }
1600
+ const existing = db.prepare(`
1601
+ SELECT id, created_at
1602
+ FROM routing_learnings
1603
+ WHERE route_key = ?
1604
+ `).get(routeKey);
1605
+ const learningId = existing?.id ?? randomUUID();
1606
+ db.prepare(`
1607
+ INSERT INTO routing_learnings (
1608
+ id,
1609
+ route_key,
1610
+ source_id,
1611
+ snapshot_id,
1612
+ learning_type,
1613
+ intent,
1614
+ page_url,
1615
+ file_path,
1616
+ title,
1617
+ note,
1618
+ search_terms_json,
1619
+ created_at,
1620
+ updated_at
1621
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1622
+ ON CONFLICT(route_key) DO UPDATE SET
1623
+ snapshot_id = excluded.snapshot_id,
1624
+ title = excluded.title,
1625
+ note = excluded.note,
1626
+ search_terms_json = excluded.search_terms_json,
1627
+ updated_at = excluded.updated_at
1628
+ `).run(
1629
+ learningId,
1630
+ routeKey,
1631
+ input.sourceId,
1632
+ storedSnapshotId,
1633
+ input.learningType,
1634
+ input.intent.trim(),
1635
+ input.pageUrl ?? null,
1636
+ input.filePath ?? null,
1637
+ input.title ?? null,
1638
+ input.note ?? null,
1639
+ JSON.stringify(searchTerms),
1640
+ existing?.created_at ?? timestamp,
1641
+ timestamp
1642
+ );
1643
+ return {
1644
+ learningId,
1645
+ sourceId: input.sourceId,
1646
+ snapshotId: storedSnapshotId,
1647
+ learningType: input.learningType,
1648
+ intent: input.intent.trim(),
1649
+ pageUrl: input.pageUrl ?? null,
1650
+ filePath: input.filePath ?? null,
1651
+ title: input.title ?? null,
1652
+ note: input.note ?? null,
1653
+ searchTerms,
1654
+ createdAt: existing?.created_at ?? timestamp,
1655
+ updatedAt: timestamp
1656
+ };
1657
+ },
1658
+ listRoutingLearnings(input) {
1659
+ const whereSql = [];
1660
+ const args = [];
1661
+ if (input?.sourceId) {
1662
+ whereSql.push("source_id = ?");
1663
+ args.push(input.sourceId);
1664
+ }
1665
+ if (input?.learningType) {
1666
+ whereSql.push("learning_type = ?");
1667
+ args.push(input.learningType);
1668
+ }
1669
+ if (input?.intentQuery) {
1670
+ whereSql.push("LOWER(intent) LIKE ?");
1671
+ args.push(`%${normalizeQuery(input.intentQuery).toLowerCase()}%`);
1672
+ }
1673
+ const limit = assertPaginationValue(input?.limit, "limit", 50);
1674
+ const rows = db.prepare(`
1675
+ SELECT
1676
+ id,
1677
+ source_id,
1678
+ snapshot_id,
1679
+ learning_type,
1680
+ intent,
1681
+ page_url,
1682
+ file_path,
1683
+ title,
1684
+ note,
1685
+ search_terms_json,
1686
+ created_at,
1687
+ updated_at
1688
+ FROM routing_learnings
1689
+ ${whereSql.length > 0 ? `WHERE ${whereSql.join(" AND ")}` : ""}
1690
+ ORDER BY updated_at DESC, source_id, intent
1691
+ LIMIT ?
1692
+ `).all(...args, limit);
1693
+ return rows.map((row) => ({
1694
+ learningId: row.id,
1695
+ sourceId: row.source_id,
1696
+ snapshotId: row.snapshot_id,
1697
+ learningType: row.learning_type,
1698
+ intent: row.intent,
1699
+ pageUrl: row.page_url,
1700
+ filePath: row.file_path,
1701
+ title: row.title,
1702
+ note: row.note,
1703
+ searchTerms: JSON.parse(row.search_terms_json),
1704
+ createdAt: row.created_at,
1705
+ updatedAt: row.updated_at
1706
+ }));
1707
+ },
1338
1708
  listDueSourceIds(referenceTime = nowIso()) {
1339
1709
  const rows = db.prepare(`
1340
1710
  SELECT id
@@ -3997,7 +4367,7 @@ async function startDaemon(input) {
3997
4367
  // package.json
3998
4368
  var package_default = {
3999
4369
  name: "@bodhi-ventures/aiocs",
4000
- version: "0.5.3",
4370
+ version: "0.6.1",
4001
4371
  license: "MIT",
4002
4372
  type: "module",
4003
4373
  description: "Local-only documentation store, fetcher, and search CLI for AI agents.",
@@ -5052,7 +5422,181 @@ async function searchHybridCatalog(input) {
5052
5422
  };
5053
5423
  }
5054
5424
 
5425
+ // src/retrieval.ts
5426
+ function tokenize(value) {
5427
+ return value.toLowerCase().replace(/[^\p{L}\p{N}]+/gu, " ").split(/\s+/).map((part) => part.trim()).filter(Boolean);
5428
+ }
5429
+ function overlapScore(queryTokens, candidate) {
5430
+ const candidateTokens = new Set(tokenize(candidate));
5431
+ if (candidateTokens.size === 0) {
5432
+ return 0;
5433
+ }
5434
+ let matches = 0;
5435
+ for (const token of queryTokens) {
5436
+ if (candidateTokens.has(token)) {
5437
+ matches += 1;
5438
+ }
5439
+ }
5440
+ return matches;
5441
+ }
5442
+ function uniqueTokens(tokens) {
5443
+ return [...new Set(tokens)];
5444
+ }
5445
+ function hasTokenCoverage(requiredTokens, candidate) {
5446
+ if (requiredTokens.length === 0) {
5447
+ return false;
5448
+ }
5449
+ const candidateTokens = new Set(tokenize(candidate));
5450
+ return requiredTokens.every((token) => candidateTokens.has(token));
5451
+ }
5452
+ var NAVIGATIONAL_TERMS = /* @__PURE__ */ new Set([
5453
+ "api",
5454
+ "apis",
5455
+ "auth",
5456
+ "authentication",
5457
+ "docs",
5458
+ "documentation",
5459
+ "endpoint",
5460
+ "endpoints",
5461
+ "overview",
5462
+ "reference",
5463
+ "references",
5464
+ "rest",
5465
+ "sdk",
5466
+ "transport",
5467
+ "websocket",
5468
+ "ws"
5469
+ ]);
5470
+ function classifyRetrievalQuery(query) {
5471
+ const queryTokens = uniqueTokens(tokenize(query));
5472
+ const matchedNavigationalTerms = uniqueTokens(queryTokens.filter((token) => NAVIGATIONAL_TERMS.has(token)));
5473
+ return {
5474
+ isNavigational: matchedNavigationalTerms.length > 0,
5475
+ queryTokens,
5476
+ routingTokens: queryTokens,
5477
+ matchedNavigationalTerms
5478
+ };
5479
+ }
5480
+ function scorePageCandidate(query, input) {
5481
+ const intent = classifyRetrievalQuery(query);
5482
+ const routingTokens = intent.routingTokens;
5483
+ const titleOverlap = overlapScore(routingTokens, input.pageTitle);
5484
+ const referenceOverlap = overlapScore(routingTokens, input.pageReference);
5485
+ const sectionOverlap = Math.max(0, ...input.sectionTitles.map((sectionTitle) => overlapScore(routingTokens, sectionTitle)));
5486
+ const exactTitleCoverage = hasTokenCoverage(routingTokens, input.pageTitle);
5487
+ const exactReferenceCoverage = hasTokenCoverage(routingTokens, input.pageReference);
5488
+ const lexicalWeight = intent.isNavigational ? 6 : 4;
5489
+ const vectorWeight = intent.isNavigational ? 1 : 3;
5490
+ const titleWeight = intent.isNavigational ? 8 : 4;
5491
+ const referenceWeight = intent.isNavigational ? 6 : 3;
5492
+ const sectionWeight = intent.isNavigational ? 4 : 2;
5493
+ const sourceHintWeight = intent.isNavigational ? 3 : 2;
5494
+ const commonLocationWeight = intent.isNavigational ? 7 : 4;
5495
+ const learningWeight = intent.isNavigational ? 5 : 4;
5496
+ const pureVectorPenalty = intent.isNavigational && input.bestLexicalScore === 0 && titleOverlap === 0 && referenceOverlap === 0 && input.commonLocationScore === 0 ? 8 : 0;
5497
+ return input.bestLexicalScore * lexicalWeight + input.bestVectorScore * vectorWeight + titleOverlap * titleWeight + referenceOverlap * referenceWeight + sectionOverlap * sectionWeight + input.sourceHintScore * sourceHintWeight + input.commonLocationScore * commonLocationWeight + input.learningScore * learningWeight + (exactTitleCoverage ? 12 : 0) + (exactReferenceCoverage ? 10 : 0) - pureVectorPenalty;
5498
+ }
5499
+ function scoreLearning(query, learning) {
5500
+ const queryTokens = tokenize(query);
5501
+ const candidates = [learning.intent, ...learning.searchTerms];
5502
+ const bestOverlap = Math.max(0, ...candidates.map((candidate) => overlapScore(queryTokens, candidate)));
5503
+ const exactIntentBoost = learning.intent.trim().toLowerCase() === query.trim().toLowerCase() ? 10 : 0;
5504
+ const recencyBoost = learning.learningType === "discovery" ? 1 : 0;
5505
+ return bestOverlap + exactIntentBoost + recencyBoost;
5506
+ }
5507
+ function scoreSourceContext(query, context) {
5508
+ if (!context) {
5509
+ return 0;
5510
+ }
5511
+ const queryTokens = tokenize(query);
5512
+ const candidates = [
5513
+ context.purpose ?? "",
5514
+ context.summary ?? "",
5515
+ ...context.topicHints,
5516
+ ...context.gotchas,
5517
+ ...context.authNotes,
5518
+ ...context.commonLocations.flatMap((location) => [location.label, location.note ?? "", location.url ?? "", location.filePath ?? ""])
5519
+ ];
5520
+ return Math.max(0, ...candidates.map((candidate) => overlapScore(queryTokens, candidate)));
5521
+ }
5522
+
5523
+ // src/source-context.ts
5524
+ import { readFile as readFile4 } from "fs/promises";
5525
+ import { extname as extname3 } from "path";
5526
+ import YAML2 from "yaml";
5527
+ import { z as z2 } from "zod";
5528
+ var commonLocationSchema = z2.object({
5529
+ label: z2.string().min(1),
5530
+ url: z2.string().url().optional(),
5531
+ filePath: z2.string().min(1).optional(),
5532
+ note: z2.string().min(1).optional()
5533
+ }).superRefine((value, context) => {
5534
+ if (!value.url && !value.filePath) {
5535
+ context.addIssue({
5536
+ code: z2.ZodIssueCode.custom,
5537
+ message: "common location entries must include url or filePath",
5538
+ path: ["url"]
5539
+ });
5540
+ }
5541
+ });
5542
+ var sourceContextSchema = z2.object({
5543
+ purpose: z2.string().min(1).optional(),
5544
+ summary: z2.string().min(1).optional(),
5545
+ topicHints: z2.array(z2.string().min(1)).default([]),
5546
+ commonLocations: z2.array(commonLocationSchema).default([]),
5547
+ gotchas: z2.array(z2.string().min(1)).default([]),
5548
+ authNotes: z2.array(z2.string().min(1)).default([])
5549
+ });
5550
+ function parseSourceContext(raw, extension) {
5551
+ if (extension === ".json") {
5552
+ return JSON.parse(raw);
5553
+ }
5554
+ return YAML2.parse(raw);
5555
+ }
5556
+ async function loadSourceContextFile(path) {
5557
+ try {
5558
+ const raw = await readFile4(path, "utf8");
5559
+ return sourceContextSchema.parse(parseSourceContext(raw, extname3(path).toLowerCase()));
5560
+ } catch (error) {
5561
+ if (error instanceof z2.ZodError) {
5562
+ throw new AiocsError(
5563
+ AIOCS_ERROR_CODES.sourceContextInvalid,
5564
+ `Invalid source context file '${path}'`,
5565
+ {
5566
+ issues: error.issues.map((issue) => ({
5567
+ path: issue.path.join("."),
5568
+ message: issue.message
5569
+ }))
5570
+ }
5571
+ );
5572
+ }
5573
+ if (error instanceof Error) {
5574
+ throw new AiocsError(
5575
+ AIOCS_ERROR_CODES.sourceContextInvalid,
5576
+ `Failed to load source context file '${path}': ${error.message}`
5577
+ );
5578
+ }
5579
+ throw error;
5580
+ }
5581
+ }
5582
+
5055
5583
  // src/services.ts
5584
+ function dedupePagesByIdentity(rows) {
5585
+ const seen = /* @__PURE__ */ new Set();
5586
+ const deduped = [];
5587
+ for (const row of rows) {
5588
+ const key = `${row.sourceId}::${row.snapshotId}::${row.filePath ?? row.pageUrl}`;
5589
+ if (seen.has(key)) {
5590
+ continue;
5591
+ }
5592
+ seen.add(key);
5593
+ deduped.push(row);
5594
+ }
5595
+ return deduped;
5596
+ }
5597
+ function pageIdentityKey(input) {
5598
+ return `${input.sourceId}::${input.snapshotId}::${input.filePath ?? input.pageUrl}`;
5599
+ }
5056
5600
  function createCatalog() {
5057
5601
  const dataDir = getAiocsDataDir();
5058
5602
  getAiocsConfigDir();
@@ -5079,6 +5623,70 @@ async function listSources() {
5079
5623
  const sources = await withCatalog(({ catalog }) => catalog.listSources());
5080
5624
  return { sources };
5081
5625
  }
5626
+ async function describeSource(sourceId) {
5627
+ return withCatalog(({ catalog }) => {
5628
+ const source = catalog.getSourceById(sourceId);
5629
+ if (!source) {
5630
+ throw new AiocsError(
5631
+ AIOCS_ERROR_CODES.sourceNotFound,
5632
+ `Unknown source '${sourceId}'`
5633
+ );
5634
+ }
5635
+ const latestSnapshot = catalog.listSnapshots(sourceId)[0] ?? null;
5636
+ return {
5637
+ source,
5638
+ context: catalog.getSourceContext(sourceId),
5639
+ latestSnapshot,
5640
+ recentLearnings: catalog.listRoutingLearnings({
5641
+ sourceId,
5642
+ limit: 10
5643
+ })
5644
+ };
5645
+ });
5646
+ }
5647
+ async function upsertSourceContextFromFile(sourceId, contextFile) {
5648
+ const resolvedContextFile = resolve8(contextFile);
5649
+ const context = await loadSourceContextFile(resolvedContextFile);
5650
+ const result = await withCatalog(({ catalog }) => catalog.upsertSourceContext(sourceId, context));
5651
+ return {
5652
+ ...result,
5653
+ contextFile: resolvedContextFile
5654
+ };
5655
+ }
5656
+ async function getSourceContextForSource(sourceId) {
5657
+ return withCatalog(({ catalog }) => catalog.getSourceContext(sourceId));
5658
+ }
5659
+ async function listSourcePages(sourceId, options) {
5660
+ return withCatalog(({ catalog }) => catalog.listPages({
5661
+ sourceId,
5662
+ ...options.snapshot ? { snapshotId: options.snapshot } : {},
5663
+ ...options.query ? { query: options.query } : {},
5664
+ ...options.path && options.path.length > 0 ? { pathPatterns: options.path } : {},
5665
+ ...typeof options.limit === "number" ? { limit: options.limit } : {},
5666
+ ...typeof options.offset === "number" ? { offset: options.offset } : {}
5667
+ }));
5668
+ }
5669
+ async function showPage(input) {
5670
+ return withCatalog(({ catalog }) => catalog.getPage(input));
5671
+ }
5672
+ async function saveRoutingLearning(input) {
5673
+ const learning = await withCatalog(({ catalog }) => catalog.upsertRoutingLearning({
5674
+ sourceId: input.sourceId,
5675
+ ...input.snapshotId ? { snapshotId: input.snapshotId } : {},
5676
+ learningType: input.learningType,
5677
+ intent: input.intent,
5678
+ ...input.pageUrl ? { pageUrl: input.pageUrl } : {},
5679
+ ...input.filePath ? { filePath: input.filePath } : {},
5680
+ ...input.title ? { title: input.title } : {},
5681
+ ...input.note ? { note: input.note } : {},
5682
+ ...input.searchTerms ? { searchTerms: input.searchTerms } : {}
5683
+ }));
5684
+ return { learning };
5685
+ }
5686
+ async function listRoutingLearningsForQuery(input) {
5687
+ const learnings = await withCatalog(({ catalog }) => catalog.listRoutingLearnings(input));
5688
+ return { learnings };
5689
+ }
5082
5690
  async function fetchSources(sourceIdOrAll) {
5083
5691
  const results = await withCatalog(async ({ catalog, dataDir }) => {
5084
5692
  const sourceIds = sourceIdOrAll === "all" ? catalog.listSources().map((item) => item.id) : [sourceIdOrAll];
@@ -5222,6 +5830,233 @@ async function searchCatalog(query, options) {
5222
5830
  results: results.results
5223
5831
  };
5224
5832
  }
5833
+ async function retrieveContext(query, options) {
5834
+ const cwd = options.project ? resolve8(options.project) : process.cwd();
5835
+ const explicitSources = options.source.length > 0;
5836
+ const pageLimit = typeof options.pageLimit === "number" ? options.pageLimit : 3;
5837
+ return withCatalog(async ({ catalog }) => {
5838
+ const hybridConfig = getHybridRuntimeConfig();
5839
+ const scope = resolveProjectScope(cwd, catalog.listProjectLinks());
5840
+ if (!explicitSources && !options.all && !scope) {
5841
+ throw new AiocsError(
5842
+ AIOCS_ERROR_CODES.noProjectScope,
5843
+ "No linked project scope found. Use --source or --all."
5844
+ );
5845
+ }
5846
+ const sourceScope = explicitSources ? options.source : options.all ? catalog.listSources().map((source) => source.id) : scope?.sourceIds ?? [];
5847
+ const learnings = catalog.listRoutingLearnings({
5848
+ limit: 100
5849
+ }).filter((learning) => sourceScope.length === 0 || sourceScope.includes(learning.sourceId));
5850
+ const scoredLearnings = learnings.map((learning) => ({
5851
+ ...learning,
5852
+ score: scoreLearning(query, learning)
5853
+ })).filter((learning) => learning.score > 0).sort((left, right) => right.score - left.score || right.updatedAt.localeCompare(left.updatedAt));
5854
+ const matchedLearnings = scoredLearnings.filter((learning) => learning.learningType === "discovery");
5855
+ const avoidedLearnings = scoredLearnings.filter((learning) => learning.learningType === "negative");
5856
+ const avoidedPageKeys = new Set(
5857
+ avoidedLearnings.map((learning) => `${learning.sourceId}::${learning.filePath ?? learning.pageUrl ?? ""}`)
5858
+ );
5859
+ const sourceHintScores = /* @__PURE__ */ new Map();
5860
+ const commonLocationBoosts = /* @__PURE__ */ new Map();
5861
+ const sourceHints = sourceScope.map((sourceId) => {
5862
+ const contextRecord = catalog.getSourceContext(sourceId);
5863
+ const score = scoreSourceContext(query, contextRecord.context);
5864
+ if (!contextRecord.context || score <= 0) {
5865
+ return null;
5866
+ }
5867
+ const matchedCommonLocations = contextRecord.context.commonLocations.filter(
5868
+ (location) => scoreSourceContext(query, {
5869
+ purpose: "",
5870
+ summary: "",
5871
+ topicHints: [],
5872
+ commonLocations: [location],
5873
+ gotchas: [],
5874
+ authNotes: []
5875
+ }) > 0
5876
+ );
5877
+ return {
5878
+ sourceId,
5879
+ score,
5880
+ context: contextRecord.context,
5881
+ matchedCommonLocations
5882
+ };
5883
+ }).filter((entry) => Boolean(entry)).sort((left, right) => right.score - left.score || left.sourceId.localeCompare(right.sourceId));
5884
+ for (const sourceHint of sourceHints) {
5885
+ sourceHintScores.set(
5886
+ sourceHint.sourceId,
5887
+ Math.max(sourceHint.score, sourceHintScores.get(sourceHint.sourceId) ?? 0)
5888
+ );
5889
+ const latestSnapshotId = options.snapshot ?? catalog.getSourceById(sourceHint.sourceId)?.lastSuccessfulSnapshotId ?? null;
5890
+ if (!latestSnapshotId) {
5891
+ continue;
5892
+ }
5893
+ for (const location of sourceHint.matchedCommonLocations) {
5894
+ const key = pageIdentityKey({
5895
+ sourceId: sourceHint.sourceId,
5896
+ snapshotId: latestSnapshotId,
5897
+ pageUrl: location.url ?? "",
5898
+ filePath: location.filePath ?? null
5899
+ });
5900
+ commonLocationBoosts.set(
5901
+ key,
5902
+ Math.max(sourceHint.score, commonLocationBoosts.get(key) ?? 0)
5903
+ );
5904
+ }
5905
+ }
5906
+ const search = await searchHybridCatalog({
5907
+ catalog,
5908
+ config: hybridConfig,
5909
+ query,
5910
+ mode: options.mode ?? hybridConfig.defaultSearchMode,
5911
+ searchInput: {
5912
+ cwd,
5913
+ ...explicitSources ? { sourceIds: options.source } : {},
5914
+ ...options.snapshot ? { snapshotId: options.snapshot } : {},
5915
+ ...options.all ? { all: true } : {},
5916
+ ...options.path && options.path.length > 0 ? { pathPatterns: options.path } : {},
5917
+ ...options.language && options.language.length > 0 ? { languages: options.language } : {},
5918
+ ...typeof options.limit === "number" ? { limit: options.limit } : {},
5919
+ ...typeof options.offset === "number" ? { offset: options.offset } : {}
5920
+ }
5921
+ });
5922
+ const learnedPages = matchedLearnings.filter((learning) => learning.pageUrl || learning.filePath).map((learning) => ({
5923
+ sourceId: learning.sourceId,
5924
+ snapshotId: learning.snapshotId ?? catalog.getSourceById(learning.sourceId)?.lastSuccessfulSnapshotId ?? "",
5925
+ pageUrl: learning.pageUrl ?? "",
5926
+ filePath: learning.filePath ?? null
5927
+ })).filter((entry) => entry.snapshotId);
5928
+ const searchedPages = search.results.filter((result) => !avoidedPageKeys.has(`${result.sourceId}::${result.filePath ?? result.pageUrl}`)).map((result) => ({
5929
+ sourceId: result.sourceId,
5930
+ snapshotId: result.snapshotId,
5931
+ pageUrl: result.pageUrl,
5932
+ filePath: result.filePath
5933
+ }));
5934
+ const commonLocationPages = sourceHints.flatMap((sourceHint) => {
5935
+ const snapshotId = options.snapshot ?? catalog.getSourceById(sourceHint.sourceId)?.lastSuccessfulSnapshotId ?? "";
5936
+ if (!snapshotId) {
5937
+ return [];
5938
+ }
5939
+ return sourceHint.matchedCommonLocations.map((location) => ({
5940
+ sourceId: sourceHint.sourceId,
5941
+ snapshotId,
5942
+ pageUrl: location.url ?? "",
5943
+ filePath: location.filePath ?? null
5944
+ }));
5945
+ });
5946
+ const pageCandidates = dedupePagesByIdentity([
5947
+ ...commonLocationPages,
5948
+ ...learnedPages,
5949
+ ...searchedPages
5950
+ ]).filter((entry) => !avoidedPageKeys.has(`${entry.sourceId}::${entry.filePath ?? entry.pageUrl}`));
5951
+ const searchPageSignals = /* @__PURE__ */ new Map();
5952
+ for (const result of search.results) {
5953
+ if (avoidedPageKeys.has(`${result.sourceId}::${result.filePath ?? result.pageUrl}`)) {
5954
+ continue;
5955
+ }
5956
+ const key = pageIdentityKey({
5957
+ sourceId: result.sourceId,
5958
+ snapshotId: result.snapshotId,
5959
+ pageUrl: result.pageUrl,
5960
+ filePath: result.filePath
5961
+ });
5962
+ const record = searchPageSignals.get(key) ?? {
5963
+ bestLexicalScore: 0,
5964
+ bestVectorScore: 0,
5965
+ sectionTitles: /* @__PURE__ */ new Set()
5966
+ };
5967
+ const signalCount = Math.max(1, result.signals.length);
5968
+ const perSignalScore = result.score / signalCount;
5969
+ if (result.signals.includes("lexical")) {
5970
+ record.bestLexicalScore = Math.max(record.bestLexicalScore, perSignalScore);
5971
+ }
5972
+ if (result.signals.includes("vector")) {
5973
+ record.bestVectorScore = Math.max(record.bestVectorScore, perSignalScore);
5974
+ }
5975
+ record.sectionTitles.add(result.sectionTitle);
5976
+ searchPageSignals.set(key, record);
5977
+ }
5978
+ const learningBoosts = /* @__PURE__ */ new Map();
5979
+ for (const learning of matchedLearnings) {
5980
+ if (!learning.pageUrl && !learning.filePath) {
5981
+ continue;
5982
+ }
5983
+ const snapshotId = learning.snapshotId ?? catalog.getSourceById(learning.sourceId)?.lastSuccessfulSnapshotId ?? "";
5984
+ if (!snapshotId) {
5985
+ continue;
5986
+ }
5987
+ const key = pageIdentityKey({
5988
+ sourceId: learning.sourceId,
5989
+ snapshotId,
5990
+ pageUrl: learning.pageUrl ?? "",
5991
+ filePath: learning.filePath ?? null
5992
+ });
5993
+ learningBoosts.set(key, Math.max(learning.score, learningBoosts.get(key) ?? 0));
5994
+ }
5995
+ const rankedPages = pageCandidates.flatMap((entry) => {
5996
+ try {
5997
+ const page = catalog.getPage({
5998
+ sourceId: entry.sourceId,
5999
+ snapshotId: entry.snapshotId,
6000
+ ...entry.filePath ? { filePath: entry.filePath } : { url: entry.pageUrl }
6001
+ });
6002
+ const key = pageIdentityKey({
6003
+ sourceId: page.sourceId,
6004
+ snapshotId: page.snapshotId,
6005
+ pageUrl: page.page.url,
6006
+ filePath: page.page.filePath
6007
+ });
6008
+ const searchSignals = searchPageSignals.get(key) ?? {
6009
+ bestLexicalScore: 0,
6010
+ bestVectorScore: 0,
6011
+ sectionTitles: /* @__PURE__ */ new Set()
6012
+ };
6013
+ const score = scorePageCandidate(query, {
6014
+ pageTitle: page.page.title,
6015
+ pageReference: page.page.filePath ?? page.page.url,
6016
+ sectionTitles: [...searchSignals.sectionTitles],
6017
+ bestLexicalScore: searchSignals.bestLexicalScore,
6018
+ bestVectorScore: searchSignals.bestVectorScore,
6019
+ learningScore: learningBoosts.get(key) ?? 0,
6020
+ sourceHintScore: sourceHintScores.get(page.sourceId) ?? 0,
6021
+ commonLocationScore: commonLocationBoosts.get(key) ?? 0
6022
+ });
6023
+ return [{
6024
+ sourceId: page.sourceId,
6025
+ snapshotId: page.snapshotId,
6026
+ score,
6027
+ page: page.page
6028
+ }];
6029
+ } catch (error) {
6030
+ if (error instanceof AiocsError && (error.code === AIOCS_ERROR_CODES.pageNotFound || error.code === AIOCS_ERROR_CODES.snapshotNotFound)) {
6031
+ return [];
6032
+ }
6033
+ throw error;
6034
+ }
6035
+ }).sort((left, right) => right.score - left.score || left.page.title.localeCompare(right.page.title) || left.page.url.localeCompare(right.page.url)).slice(0, Math.max(1, pageLimit));
6036
+ const pages = rankedPages.map((entry) => ({
6037
+ sourceId: entry.sourceId,
6038
+ snapshotId: entry.snapshotId,
6039
+ ...entry.page
6040
+ }));
6041
+ return {
6042
+ query,
6043
+ modeRequested: search.modeRequested,
6044
+ modeUsed: search.modeUsed,
6045
+ sourceScope,
6046
+ sourceHints,
6047
+ matchedLearnings,
6048
+ avoidedLearnings,
6049
+ search: {
6050
+ total: search.total,
6051
+ limit: search.limit,
6052
+ offset: search.offset,
6053
+ hasMore: search.hasMore,
6054
+ results: search.results
6055
+ },
6056
+ pages
6057
+ };
6058
+ });
6059
+ }
5225
6060
  async function showChunk(chunkId) {
5226
6061
  const chunk = await withCatalog(({ catalog }) => catalog.getChunkById(chunkId));
5227
6062
  if (!chunk) {
@@ -5361,6 +6196,13 @@ export {
5361
6196
  packageDescription,
5362
6197
  upsertSourceFromSpecFile,
5363
6198
  listSources,
6199
+ describeSource,
6200
+ upsertSourceContextFromFile,
6201
+ getSourceContextForSource,
6202
+ listSourcePages,
6203
+ showPage,
6204
+ saveRoutingLearning,
6205
+ listRoutingLearningsForQuery,
5364
6206
  fetchSources,
5365
6207
  refreshDueSources,
5366
6208
  runSourceCanaries,
@@ -5369,6 +6211,7 @@ export {
5369
6211
  linkProjectSources,
5370
6212
  unlinkProjectSources,
5371
6213
  searchCatalog,
6214
+ retrieveContext,
5372
6215
  showChunk,
5373
6216
  verifyCoverage,
5374
6217
  initManagedSources,