@skill-map/cli 0.45.1 → 0.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/cli/tutorial/sm-master/SKILL.md +29 -29
  2. package/dist/cli/tutorial/sm-master/references/fixture-templates.md +18 -13
  3. package/dist/cli/tutorial/sm-master/references/tour-authoring.md +35 -40
  4. package/dist/cli/tutorial/sm-master/references/tour-plugins.md +32 -32
  5. package/dist/cli/tutorial/sm-master/references/tour-settings.md +156 -75
  6. package/dist/cli/tutorial/sm-tutorial/SKILL.md +3 -3
  7. package/dist/cli.js +998 -458
  8. package/dist/conformance/index.js +4 -1
  9. package/dist/index.js +67 -22
  10. package/dist/kernel/index.d.ts +93 -13
  11. package/dist/kernel/index.js +67 -22
  12. package/dist/migrations/001_initial.sql +23 -0
  13. package/dist/ui/chunk-22CKFAEU.js +1 -0
  14. package/dist/ui/chunk-3AKR33GE.js +1 -0
  15. package/dist/ui/{chunk-QNTAOR2L.js → chunk-3HLMBEDX.js} +1 -1
  16. package/dist/ui/{chunk-T3IVIRRJ.js → chunk-7K36273M.js} +1 -1
  17. package/dist/ui/{chunk-MFLFIA7C.js → chunk-CO2ZOUSD.js} +1 -1
  18. package/dist/ui/{chunk-2RAE3FAN.js → chunk-CRWK2NFZ.js} +1 -1
  19. package/dist/ui/{chunk-VGPYYAVI.js → chunk-EPBUSS3I.js} +1 -1
  20. package/dist/ui/chunk-HAWX5WNM.js +4 -0
  21. package/dist/ui/{chunk-QDUSFOBE.js → chunk-K365TVPA.js} +1 -1
  22. package/dist/ui/chunk-PO2VZMOB.js +123 -0
  23. package/dist/ui/{chunk-X227ITGS.js → chunk-RT7E4S5B.js} +1 -1
  24. package/dist/ui/{chunk-5AD5ZV4I.js → chunk-UIUGLD7F.js} +1 -1
  25. package/dist/ui/{chunk-IYM26L3O.js → chunk-UV3QRBRR.js} +1 -1
  26. package/dist/ui/chunk-VNA3TMIO.js +1 -0
  27. package/dist/ui/{chunk-F7I6KMHX.js → chunk-VW2A6WZ3.js} +1 -1
  28. package/dist/ui/{chunk-A7PRWMQD.js → chunk-WPUUCIS3.js} +11 -11
  29. package/dist/ui/{chunk-MS6B7344.js → chunk-XWU3YFSM.js} +7 -7
  30. package/dist/ui/{chunk-I5AX4U2N.js → chunk-YOF6HQCQ.js} +1 -1
  31. package/dist/ui/chunk-ZZJ7XWDX.js +1 -0
  32. package/dist/ui/index.html +1 -1
  33. package/dist/ui/main-55GYZX6C.js +4 -0
  34. package/migrations/001_initial.sql +23 -0
  35. package/package.json +2 -2
  36. package/dist/cli.js.map +0 -1
  37. package/dist/conformance/index.js.map +0 -1
  38. package/dist/index.js.map +0 -1
  39. package/dist/kernel/index.js.map +0 -1
  40. package/dist/ui/chunk-27WQPOXP.js +0 -1
  41. package/dist/ui/chunk-555ST76V.js +0 -1
  42. package/dist/ui/chunk-PZQHB7GS.js +0 -4
  43. package/dist/ui/chunk-ZIGUUDUX.js +0 -123
  44. package/dist/ui/main-KMSUFJ6Y.js +0 -3
@@ -1,4 +1,6 @@
1
1
  // conformance/index.ts
2
+
3
+ !function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof globalThis?globalThis:"undefined"!=typeof self?self:{},n=(new e.Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="e3097683-993f-599d-92ef-d1e724637586")}catch(e){}}();
2
4
  import { spawnSync } from "child_process";
3
5
  import { cpSync, existsSync, mkdtempSync, readdirSync, readFileSync, rmSync, statSync } from "fs";
4
6
  import { tmpdir } from "os";
@@ -422,4 +424,5 @@ export {
422
424
  assertSpecRoot,
423
425
  runConformanceCase
424
426
  };
425
- //# sourceMappingURL=index.js.map
427
+ //# sourceMappingURL=index.js.map
428
+ //# debugId=e3097683-993f-599d-92ef-d1e724637586
package/dist/index.js CHANGED
@@ -1,4 +1,6 @@
1
1
  // kernel/i18n/registry.texts.ts
2
+
3
+ !function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof globalThis?globalThis:"undefined"!=typeof self?self:{},n=(new e.Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="e5a349ec-19a4-5568-bcab-36d7ea370839")}catch(e){}}();
2
4
  var REGISTRY_TEXTS = {
3
5
  duplicateExtension: "Extension already registered: {{kind}}:{{qualifiedId}}",
4
6
  unknownKind: "Unknown extension kind: {{kind}}",
@@ -96,12 +98,11 @@ var Registry = class {
96
98
  import { existsSync as existsSync11, statSync as statSync4 } from "fs";
97
99
  import { isAbsolute as isAbsolute4, resolve as resolve11 } from "path";
98
100
  import { Tiktoken as Tiktoken2 } from "js-tiktoken/lite";
99
- import cl100k_base from "js-tiktoken/ranks/cl100k_base";
100
101
 
101
102
  // package.json
102
103
  var package_default = {
103
104
  name: "@skill-map/cli",
104
- version: "0.45.1",
105
+ version: "0.47.0",
105
106
  description: "skill-map reference implementation \u2014 kernel + CLI + adapters.",
106
107
  license: "MIT",
107
108
  type: "module",
@@ -765,16 +766,13 @@ function strip(value) {
765
766
  // config/defaults.json
766
767
  var defaults_default = {
767
768
  schemaVersion: 1,
768
- autoMigrate: true,
769
769
  allowEditSmFiles: false,
770
770
  tokenizer: "cl100k_base",
771
- providers: [],
772
771
  roots: [],
773
772
  ignore: [],
774
773
  scan: {
775
774
  tokenize: true,
776
775
  strict: false,
777
- followSymlinks: false,
778
776
  maxFileSizeBytes: 1048576,
779
777
  maxNodes: 256,
780
778
  watch: {
@@ -783,9 +781,6 @@ var defaults_default = {
783
781
  referencePaths: []
784
782
  },
785
783
  plugins: {},
786
- history: {
787
- share: false
788
- },
789
784
  jobs: {
790
785
  ttlSeconds: 3600,
791
786
  graceMultiplier: 3,
@@ -796,9 +791,6 @@ var defaults_default = {
796
791
  completed: 2592e3,
797
792
  failed: null
798
793
  }
799
- },
800
- i18n: {
801
- locale: "en"
802
794
  }
803
795
  };
804
796
 
@@ -2363,8 +2355,9 @@ async function* walkContent(roots, options) {
2363
2355
  if (!parser) throw new UnknownParserError(options.parser);
2364
2356
  const filter = options.ignoreFilter ?? buildIgnoreFilter();
2365
2357
  const extensions = options.extensions;
2358
+ const sizeLimit = buildSizeLimit(options);
2366
2359
  for (const root of roots) {
2367
- for await (const file of walkRoot(root, root, filter, extensions)) {
2360
+ for await (const file of walkRoot(root, root, filter, extensions, sizeLimit)) {
2368
2361
  const relPath = relative2(root, file).split(sep2).join("/");
2369
2362
  let raw;
2370
2363
  try {
@@ -2387,7 +2380,15 @@ async function* walkContent(roots, options) {
2387
2380
  }
2388
2381
  }
2389
2382
  }
2390
- async function* walkRoot(root, current, filter, extensions) {
2383
+ function buildSizeLimit(options) {
2384
+ const sizeLimit = {};
2385
+ if (options.maxFileSizeBytes !== void 0) {
2386
+ sizeLimit.maxFileSizeBytes = options.maxFileSizeBytes;
2387
+ }
2388
+ if (options.onOversizedFile) sizeLimit.onOversizedFile = options.onOversizedFile;
2389
+ return sizeLimit;
2390
+ }
2391
+ async function* walkRoot(root, current, filter, extensions, sizeLimit) {
2391
2392
  let entries;
2392
2393
  try {
2393
2394
  entries = await readdir(current, { withFileTypes: true, encoding: "utf8" });
@@ -2401,11 +2402,16 @@ async function* walkRoot(root, current, filter, extensions) {
2401
2402
  if (filter.ignores(rel)) continue;
2402
2403
  if (entry.isSymbolicLink()) continue;
2403
2404
  if (entry.isDirectory()) {
2404
- yield* walkRoot(root, full, filter, extensions);
2405
+ yield* walkRoot(root, full, filter, extensions, sizeLimit);
2405
2406
  } else if (entry.isFile() && hasMatchingExtension(name, extensions)) {
2406
2407
  try {
2407
2408
  const s = await lstat(full);
2408
- if (s.isFile()) yield full;
2409
+ if (!s.isFile()) continue;
2410
+ if (sizeLimit.maxFileSizeBytes !== void 0 && s.size > sizeLimit.maxFileSizeBytes) {
2411
+ sizeLimit.onOversizedFile?.({ path: rel, bytes: s.size });
2412
+ continue;
2413
+ }
2414
+ yield full;
2409
2415
  } catch {
2410
2416
  }
2411
2417
  }
@@ -2435,6 +2441,10 @@ function resolveProviderWalk(provider) {
2435
2441
  parser: read.parser
2436
2442
  };
2437
2443
  if (options?.ignoreFilter) walkOptions.ignoreFilter = options.ignoreFilter;
2444
+ if (options?.maxFileSizeBytes !== void 0) {
2445
+ walkOptions.maxFileSizeBytes = options.maxFileSizeBytes;
2446
+ }
2447
+ if (options?.onOversizedFile) walkOptions.onOversizedFile = options.onOversizedFile;
2438
2448
  return walkContent(roots, walkOptions);
2439
2449
  };
2440
2450
  }
@@ -2843,7 +2853,18 @@ async function walkAndExtract(opts) {
2843
2853
  const accum = createWalkAccumulators();
2844
2854
  const wctx = buildWalkContext(opts);
2845
2855
  const claimedPaths = /* @__PURE__ */ new Set();
2846
- const walkOptions = opts.ignoreFilter ? { ignoreFilter: opts.ignoreFilter } : {};
2856
+ const oversizedFiles = [];
2857
+ const oversizedSeen = /* @__PURE__ */ new Set();
2858
+ const onOversizedFile = (info) => {
2859
+ if (oversizedSeen.has(info.path)) return;
2860
+ oversizedSeen.add(info.path);
2861
+ oversizedFiles.push(info);
2862
+ };
2863
+ const walkOptions = {
2864
+ ...opts.ignoreFilter ? { ignoreFilter: opts.ignoreFilter } : {},
2865
+ onOversizedFile,
2866
+ ...opts.maxFileSizeBytes !== void 0 ? { maxFileSizeBytes: opts.maxFileSizeBytes } : {}
2867
+ };
2847
2868
  let filesWalked = 0;
2848
2869
  let index = 0;
2849
2870
  const effectiveMaxNodes = opts.overrideMaxNodes ?? opts.recommendedNodeLimit;
@@ -2876,6 +2897,7 @@ async function walkAndExtract(opts) {
2876
2897
  cachedPaths: accum.cachedPaths,
2877
2898
  frontmatterIssues: accum.frontmatterIssues,
2878
2899
  filesWalked,
2900
+ oversizedFiles,
2879
2901
  recommendedNodeLimit: opts.recommendedNodeLimit,
2880
2902
  overrideMaxNodes: opts.overrideMaxNodes,
2881
2903
  capReached,
@@ -2926,7 +2948,11 @@ async function processRawNode(raw, provider, wctx, accum, claimedPaths, nextInde
2926
2948
  }
2927
2949
  claimedPaths.add(raw.path);
2928
2950
  const priorNode = wctx.priorNodesByPath.get(raw.path);
2929
- const nodeHashCacheEligible = wctx.opts.enableCache && wctx.opts.prior !== null && priorNode !== void 0 && priorNode.bodyHash === bodyHash && priorNode.frontmatterHash === frontmatterHash;
2951
+ const nodeHashCacheEligible = wctx.opts.enableCache && // Tokenizer-change invalidation: when the resolved encoder differs
2952
+ // from the one that produced the prior snapshot's counts, no node is
2953
+ // cache-eligible, every node rebuilds so `buildNode` re-tokenizes
2954
+ // with the current encoder. See `tokenizerChanged` on the options.
2955
+ !wctx.opts.tokenizerChanged && wctx.opts.prior !== null && priorNode !== void 0 && priorNode.bodyHash === bodyHash && priorNode.frontmatterHash === frontmatterHash;
2930
2956
  const sidecarResolution = resolveSidecarOverlay(
2931
2957
  raw.path,
2932
2958
  raw.path,
@@ -3128,6 +3154,16 @@ function resolveSpecVersionSafe() {
3128
3154
  return "unknown";
3129
3155
  }
3130
3156
  }
3157
+ var DEFAULT_TOKENIZER = "cl100k_base";
3158
+ function resolveTokenizerName(name) {
3159
+ return name === "o200k_base" ? "o200k_base" : DEFAULT_TOKENIZER;
3160
+ }
3161
+ async function loadTokenizerRanks(name) {
3162
+ if (name === "o200k_base") {
3163
+ return (await import("js-tiktoken/ranks/o200k_base")).default;
3164
+ }
3165
+ return (await import("js-tiktoken/ranks/cl100k_base")).default;
3166
+ }
3131
3167
  async function runScanWithRenames(_kernel, options) {
3132
3168
  return runScanInternal(_kernel, options);
3133
3169
  }
@@ -3137,7 +3173,7 @@ async function runScan(_kernel, options) {
3137
3173
  }
3138
3174
  async function runScanInternal(_kernel, options) {
3139
3175
  validateRoots(options.roots);
3140
- const setup = buildScanSetup(options);
3176
+ const setup = await buildScanSetup(options);
3141
3177
  const { emitter, exts, hookDispatcher, encoder, prior, start } = setup;
3142
3178
  const scanStartedEvent = makeEvent("scan.started", { roots: options.roots });
3143
3179
  emitter.emit(scanStartedEvent);
@@ -3147,6 +3183,7 @@ async function runScanInternal(_kernel, options) {
3147
3183
  options.roots,
3148
3184
  exts.providers
3149
3185
  );
3186
+ const tokenizerChanged = encoder !== null && prior !== null && prior.tokenizer !== setup.tokenizer;
3150
3187
  const walked = await walkAndExtract({
3151
3188
  providers: exts.providers,
3152
3189
  extractors: exts.extractors,
@@ -3156,6 +3193,7 @@ async function runScanInternal(_kernel, options) {
3156
3193
  encoder,
3157
3194
  strict: setup.strict,
3158
3195
  enableCache: setup.enableCache,
3196
+ tokenizerChanged,
3159
3197
  prior,
3160
3198
  priorIndex: setup.priorIndex,
3161
3199
  priorExtractorRuns: setup.priorExtractorRuns,
@@ -3163,7 +3201,8 @@ async function runScanInternal(_kernel, options) {
3163
3201
  pluginStores: options.pluginStores,
3164
3202
  activeProvider: activeProviderId,
3165
3203
  recommendedNodeLimit: options.recommendedNodeLimit ?? 256,
3166
- overrideMaxNodes: options.overrideMaxNodes ?? null
3204
+ overrideMaxNodes: options.overrideMaxNodes ?? null,
3205
+ ...options.maxFileSizeBytes !== void 0 ? { maxFileSizeBytes: options.maxFileSizeBytes } : {}
3167
3206
  });
3168
3207
  const activeProvider = activeProviderId ? exts.providers.find((p) => p.id === activeProviderId) ?? null : null;
3169
3208
  const resolved = resolveSignals({
@@ -3270,13 +3309,14 @@ function buildReservedNodePaths(nodes, kindRegistry, reservedNamesByProviderKind
3270
3309
  function hasEntries(set) {
3271
3310
  return set !== void 0 && set.size > 0;
3272
3311
  }
3273
- function buildScanSetup(options) {
3312
+ async function buildScanSetup(options) {
3274
3313
  const start = Date.now();
3275
3314
  const emitter = options.emitter ?? new InMemoryProgressEmitter();
3276
3315
  const exts = options.extensions ?? { providers: [], extractors: [], analyzers: [] };
3277
3316
  const hookDispatcher = makeHookDispatcher(exts.hooks ?? [], emitter);
3278
3317
  const tokenize = options.tokenize !== false;
3279
- const encoder = tokenize ? new Tiktoken2(cl100k_base) : null;
3318
+ const tokenizer = resolveTokenizerName(options.tokenizer);
3319
+ const encoder = tokenize ? new Tiktoken2(await loadTokenizerRanks(tokenizer)) : null;
3280
3320
  const prior = options.priorSnapshot ?? null;
3281
3321
  const priorIndex = indexPriorSnapshot(prior);
3282
3322
  const providerFrontmatter = buildProviderFrontmatterValidator(exts.providers);
@@ -3287,6 +3327,7 @@ function buildScanSetup(options) {
3287
3327
  exts,
3288
3328
  hookDispatcher,
3289
3329
  encoder,
3330
+ tokenizer,
3290
3331
  prior,
3291
3332
  priorIndex,
3292
3333
  priorExtractorRuns: options.priorExtractorRuns,
@@ -3322,6 +3363,7 @@ function buildScanStats(walked, issues, start) {
3322
3363
  // Providers compete.
3323
3364
  filesWalked: walked.filesWalked,
3324
3365
  filesSkipped: 0,
3366
+ filesOversized: walked.oversizedFiles.length,
3325
3367
  nodesCount: walked.nodes.length,
3326
3368
  linksCount: walked.internalLinks.length,
3327
3369
  issuesCount: issues.length,
@@ -3336,8 +3378,10 @@ function buildScanReturn(walked, issues, renameOps, stats, options, setup) {
3336
3378
  roots: options.roots,
3337
3379
  providers: setup.exts.providers.map((a) => a.id),
3338
3380
  scannedBy: SCANNED_BY,
3381
+ tokenizer: setup.tokenizer,
3339
3382
  recommendedNodeLimit: walked.recommendedNodeLimit,
3340
3383
  overrideMaxNodes: walked.overrideMaxNodes,
3384
+ oversizedFiles: walked.oversizedFiles,
3341
3385
  nodes: walked.nodes,
3342
3386
  links: walked.internalLinks,
3343
3387
  issues,
@@ -3775,4 +3819,5 @@ export {
3775
3819
  runScan,
3776
3820
  runScanWithRenames
3777
3821
  };
3778
- //# sourceMappingURL=index.js.map
3822
+ //# sourceMappingURL=index.js.map
3823
+ //# debugId=e5a349ec-19a4-5568-bcab-36d7ea370839
@@ -442,7 +442,7 @@ interface IExtensionBase {
442
442
  * 4. **Internal interfaces**, option bags, result records, config
443
443
  * slices, anything declared as `interface` and passed across
444
444
  * function boundaries inside the kernel / CLI but not part of the
445
- * spec: `IPluginRuntimeBundle`, `IPruneResult`, `IMigrationFile`,
445
+ * spec: `IPluginRuntime`, `IPruneResult`, `IMigrationFile`,
446
446
  * `IDbLocationOptions`. **`I` prefix.** The prefix matches
447
447
  * category 3 because both are "shapes that live in TypeScript
448
448
  * only, never in JSON".
@@ -912,6 +912,13 @@ interface ScanStats {
912
912
  * multiple Providers can claim the same file.
913
913
  */
914
914
  filesSkipped: number;
915
+ /**
916
+ * Files skipped by the walker BEFORE reading because their on-disk
917
+ * size exceeded `scan.maxFileSizeBytes`. Equals
918
+ * `ScanResult.oversizedFiles.length`. Absent on synthetic fixtures /
919
+ * loaders that predate the field; defaults to 0 when omitted.
920
+ */
921
+ filesOversized?: number;
915
922
  nodesCount: number;
916
923
  linksCount: number;
917
924
  issuesCount: number;
@@ -1021,6 +1028,17 @@ interface ScanResult {
1021
1028
  providers: string[];
1022
1029
  /** Implementation metadata. Populated by `runScan` for self-describing output. */
1023
1030
  scannedBy?: ScanScannedBy;
1031
+ /**
1032
+ * Resolved offline tokenizer (encoder) that produced the per-node token
1033
+ * counts in this scan. One of the closed allow-list in
1034
+ * `project-config.schema.json#/properties/tokenizer` (`cl100k_base`
1035
+ * default, `o200k_base`). Mirrors `scan_meta.tokenizer`. Populated by
1036
+ * `runScan` from the resolved `RunScanOptions.tokenizer`; the
1037
+ * incremental path compares the persisted value against the resolved
1038
+ * one and force-recomputes counts when they differ. Absent on synthetic
1039
+ * fixtures / loaders that predate the field.
1040
+ */
1041
+ tokenizer?: string;
1024
1042
  /**
1025
1043
  * Effective recommended cap on the number of files the walker accepted
1026
1044
  * during this scan (`scan.maxNodes` from settings, default 256). The UI
@@ -1035,11 +1053,30 @@ interface ScanResult {
1035
1053
  * setting). Bidirectional: can raise OR lower the recommended limit.
1036
1054
  */
1037
1055
  overrideMaxNodes?: number | null;
1056
+ /**
1057
+ * Files the walker skipped because their on-disk size exceeded
1058
+ * `scan.maxFileSizeBytes` (default 1 MiB). Each entry is the
1059
+ * root-relative, forward-slash path (same form as `node.path`) plus
1060
+ * the byte size. Drives the CLI / serve terminal WARN and the UI
1061
+ * banner. Defaults to `[]`; absent on synthetic fixtures that bypass
1062
+ * the walker.
1063
+ */
1064
+ oversizedFiles?: OversizedFile[];
1038
1065
  nodes: Node[];
1039
1066
  links: Link[];
1040
1067
  issues: Issue[];
1041
1068
  stats: ScanStats;
1042
1069
  }
1070
+ /**
1071
+ * One file the walker skipped for exceeding `scan.maxFileSizeBytes`.
1072
+ * Mirrors `scan-result.schema.json#/properties/oversizedFiles/items`.
1073
+ */
1074
+ interface OversizedFile {
1075
+ /** Root-relative, forward-slash path (same form as `node.path`). */
1076
+ path: string;
1077
+ /** On-disk size of the skipped file, in bytes. */
1078
+ bytes: number;
1079
+ }
1043
1080
 
1044
1081
  /**
1045
1082
  * Plugin-surface types, hand-written to mirror
@@ -1396,7 +1433,7 @@ interface IPersistOptions {
1396
1433
  * `<pluginId>/<extensionId>/<contributionId>`. Passed to the
1397
1434
  * `scan_contributions` upsert so the catalog sweep can drop rows
1398
1435
  * belonging to plugins / extensions that are no longer in the
1399
- * catalog (uninstalled plugins, disabled bundles, removed
1436
+ * catalog (uninstalled plugins, disabled plugins, removed
1400
1437
  * contributions). Empty / absent set = no catalog sweep (legacy
1401
1438
  * behaviour, leaves disabled-plugin rows stale per design F24
1402
1439
  * pre-fix).
@@ -1712,9 +1749,10 @@ interface IContributionRecord {
1712
1749
  * new / modified nodes from a fresh extractor pass.
1713
1750
  *
1714
1751
  * Meta envelope: the `scan_meta` table persists `roots` /
1715
- * `scannedAt` / `scannedBy` / `providers` / `stats.filesWalked` /
1716
- * `stats.filesSkipped` / `stats.durationMs`. When the row exists,
1717
- * those fields come back authoritatively. When it does not (DB
1752
+ * `scannedAt` / `scannedBy` / `tokenizer` / `providers` /
1753
+ * `stats.filesWalked` / `stats.filesSkipped` / `stats.filesOversized` /
1754
+ * `stats.durationMs` / `oversizedFiles`. When the row exists, those fields come back
1755
+ * authoritatively. When it does not (DB
1718
1756
  * freshly migrated but never scanned, or a legacy DB never
1719
1757
  * re-persisted), the loader degrades to a synthetic envelope:
1720
1758
  *
@@ -2163,9 +2201,7 @@ interface IProvider extends IExtensionBase {
2163
2201
  * directly, it goes through `resolveProviderWalk(provider)` which
2164
2202
  * picks `walk` over `read`.
2165
2203
  */
2166
- walk?(roots: string[], options?: {
2167
- ignoreFilter?: IIgnoreFilter;
2168
- }): AsyncIterable<IRawNode>;
2204
+ walk?(roots: string[], options?: IProviderWalkOptions): AsyncIterable<IRawNode>;
2169
2205
  /**
2170
2206
  * Given a path and its parsed frontmatter, decide the node kind, or
2171
2207
  * `null` to disclaim the file. The classifier is called after walk()
@@ -2186,7 +2222,7 @@ interface IProvider extends IExtensionBase {
2186
2222
  /**
2187
2223
  * Strict resolution matrix consumed by the post-walk confidence-lift
2188
2224
  * transform: maps a `link.kind` (emitted by an Extractor in this
2189
- * Provider's bundle, e.g. `'mentions'`, `'invokes'`) to the set of
2225
+ * Provider's plugin, e.g. `'mentions'`, `'invokes'`) to the set of
2190
2226
  * target `node.kind` values that count as a valid resolution.
2191
2227
  *
2192
2228
  * Used to decide whether to bump a link's confidence to 1.0 when its
@@ -2317,6 +2353,28 @@ interface IResolverRules {
2317
2353
  */
2318
2354
  kindPriority?: readonly LinkKind[];
2319
2355
  }
2356
+ /**
2357
+ * Per-invocation options the orchestrator threads into a Provider walk
2358
+ * (and through `resolveProviderWalk` into the kernel walker). All
2359
+ * optional, so a bare `provider.walk(roots)` keeps working.
2360
+ *
2361
+ * - `ignoreFilter`, the composed `.skillmapignore` + config.ignore +
2362
+ * bundled-defaults filter.
2363
+ * - `maxFileSizeBytes` / `onOversizedFile`, mirror of
2364
+ * `scan.maxFileSizeBytes` and the collector that records skipped
2365
+ * files into `ScanResult.oversizedFiles`. A Provider that ships its
2366
+ * own `walk()` SHOULD forward both into `walkContent` (or apply the
2367
+ * same size guard) so oversized files stay skipped + reported
2368
+ * regardless of which discovery path runs.
2369
+ */
2370
+ interface IProviderWalkOptions {
2371
+ ignoreFilter?: IIgnoreFilter;
2372
+ maxFileSizeBytes?: number;
2373
+ onOversizedFile?: (info: {
2374
+ path: string;
2375
+ bytes: number;
2376
+ }) => void;
2377
+ }
2320
2378
  /**
2321
2379
  * Declarative read config a Provider declares via `IProvider.read`.
2322
2380
  * Mirrors `extensions/provider.schema.json#/properties/read` at the
@@ -3313,11 +3371,25 @@ interface RunScanOptions {
3313
3371
  viewContributions?: readonly IRegisteredViewContribution[];
3314
3372
  /**
3315
3373
  * Compute per-node token counts (frontmatter / body / total) using the
3316
- * cl100k_base BPE (the modern OpenAI tokenizer used by GPT-4 / GPT-3.5).
3317
- * Defaults to true. Set false to skip tokenization; `node.tokens` is
3318
- * left undefined (spec-valid: the field is optional).
3374
+ * encoder named by `tokenizer` (default `cl100k_base`). Defaults to
3375
+ * true. Set false to skip tokenization; `node.tokens` is left undefined
3376
+ * (spec-valid: the field is optional).
3319
3377
  */
3320
3378
  tokenize?: boolean;
3379
+ /**
3380
+ * Offline tokenizer (encoder) used to build the per-node token counts.
3381
+ * Closed allow-list mirroring `project-config.schema.json#/properties/tokenizer`:
3382
+ * `cl100k_base` (default) or `o200k_base`. Threaded from `cfg.tokenizer`
3383
+ * by the driving adapters (scan-runner, watcher). Absent → `cl100k_base`.
3384
+ * The orchestrator guards the override layer: any value that is neither
3385
+ * allow-list member falls back to `cl100k_base` (the AJV enum on the
3386
+ * config schema already guarantees this for the config layers, the
3387
+ * guard covers out-of-band callers and the `override` layer). The
3388
+ * resolved value is carried onto `ScanResult.tokenizer` so the
3389
+ * persistence layer can record which encoder produced the counts and
3390
+ * the incremental path can detect an encoder switch.
3391
+ */
3392
+ tokenizer?: string;
3321
3393
  /**
3322
3394
  * Prior snapshot for two purposes (decoupled by design):
3323
3395
  *
@@ -3468,6 +3540,14 @@ interface RunScanOptions {
3468
3540
  * replaces the recommended limit for the duration of this scan.
3469
3541
  */
3470
3542
  overrideMaxNodes?: number | null;
3543
+ /**
3544
+ * Mirror of `scan.maxFileSizeBytes` (default 1 MiB). Threaded into
3545
+ * `walkAndExtract` so the walker skips any file larger than this
3546
+ * BEFORE reading it; skipped files surface in
3547
+ * `ScanResult.oversizedFiles` and `stats.filesOversized`. Absent → no
3548
+ * size limit (out-of-band callers and synthetic fixtures stay safe).
3549
+ */
3550
+ maxFileSizeBytes?: number;
3471
3551
  }
3472
3552
  /**
3473
3553
  * Same as `runScan` but also returns the rename heuristic's `RenameOp[]`
@@ -4015,7 +4095,7 @@ interface StoragePort {
4015
4095
  lookup(pluginId: string, contributionId: string, nodePath: string, extensionId?: string): Promise<IPersistedContribution[]>;
4016
4096
  /**
4017
4097
  * Drop rows for a plugin (optionally narrowed to a single
4018
- * extension within the bundle). Returns the number of deleted
4098
+ * extension within the plugin). Returns the number of deleted
4019
4099
  * rows. Called by `sm plugins disable` so the UI stops rendering
4020
4100
  * the disabled plugin's chips before the next scan.
4021
4101
  */