@skill-map/cli 0.45.1 → 0.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/tutorial/sm-master/SKILL.md +29 -29
- package/dist/cli/tutorial/sm-master/references/fixture-templates.md +18 -13
- package/dist/cli/tutorial/sm-master/references/tour-authoring.md +35 -40
- package/dist/cli/tutorial/sm-master/references/tour-plugins.md +32 -32
- package/dist/cli/tutorial/sm-master/references/tour-settings.md +156 -75
- package/dist/cli/tutorial/sm-tutorial/SKILL.md +3 -3
- package/dist/cli.js +998 -458
- package/dist/conformance/index.js +4 -1
- package/dist/index.js +67 -22
- package/dist/kernel/index.d.ts +93 -13
- package/dist/kernel/index.js +67 -22
- package/dist/migrations/001_initial.sql +23 -0
- package/dist/ui/chunk-22CKFAEU.js +1 -0
- package/dist/ui/chunk-3AKR33GE.js +1 -0
- package/dist/ui/{chunk-QNTAOR2L.js → chunk-3HLMBEDX.js} +1 -1
- package/dist/ui/{chunk-T3IVIRRJ.js → chunk-7K36273M.js} +1 -1
- package/dist/ui/{chunk-MFLFIA7C.js → chunk-CO2ZOUSD.js} +1 -1
- package/dist/ui/{chunk-2RAE3FAN.js → chunk-CRWK2NFZ.js} +1 -1
- package/dist/ui/{chunk-VGPYYAVI.js → chunk-EPBUSS3I.js} +1 -1
- package/dist/ui/chunk-HAWX5WNM.js +4 -0
- package/dist/ui/{chunk-QDUSFOBE.js → chunk-K365TVPA.js} +1 -1
- package/dist/ui/chunk-PO2VZMOB.js +123 -0
- package/dist/ui/{chunk-X227ITGS.js → chunk-RT7E4S5B.js} +1 -1
- package/dist/ui/{chunk-5AD5ZV4I.js → chunk-UIUGLD7F.js} +1 -1
- package/dist/ui/{chunk-IYM26L3O.js → chunk-UV3QRBRR.js} +1 -1
- package/dist/ui/chunk-VNA3TMIO.js +1 -0
- package/dist/ui/{chunk-F7I6KMHX.js → chunk-VW2A6WZ3.js} +1 -1
- package/dist/ui/{chunk-A7PRWMQD.js → chunk-WPUUCIS3.js} +11 -11
- package/dist/ui/{chunk-MS6B7344.js → chunk-XWU3YFSM.js} +7 -7
- package/dist/ui/{chunk-I5AX4U2N.js → chunk-YOF6HQCQ.js} +1 -1
- package/dist/ui/chunk-ZZJ7XWDX.js +1 -0
- package/dist/ui/index.html +1 -1
- package/dist/ui/main-55GYZX6C.js +4 -0
- package/migrations/001_initial.sql +23 -0
- package/package.json +2 -2
- package/dist/cli.js.map +0 -1
- package/dist/conformance/index.js.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/kernel/index.js.map +0 -1
- package/dist/ui/chunk-27WQPOXP.js +0 -1
- package/dist/ui/chunk-555ST76V.js +0 -1
- package/dist/ui/chunk-PZQHB7GS.js +0 -4
- package/dist/ui/chunk-ZIGUUDUX.js +0 -123
- package/dist/ui/main-KMSUFJ6Y.js +0 -3
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
// conformance/index.ts
|
|
2
|
+
|
|
3
|
+
!function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof globalThis?globalThis:"undefined"!=typeof self?self:{},n=(new e.Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="e3097683-993f-599d-92ef-d1e724637586")}catch(e){}}();
|
|
2
4
|
import { spawnSync } from "child_process";
|
|
3
5
|
import { cpSync, existsSync, mkdtempSync, readdirSync, readFileSync, rmSync, statSync } from "fs";
|
|
4
6
|
import { tmpdir } from "os";
|
|
@@ -422,4 +424,5 @@ export {
|
|
|
422
424
|
assertSpecRoot,
|
|
423
425
|
runConformanceCase
|
|
424
426
|
};
|
|
425
|
-
//# sourceMappingURL=index.js.map
|
|
427
|
+
//# sourceMappingURL=index.js.map
|
|
428
|
+
//# debugId=e3097683-993f-599d-92ef-d1e724637586
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
// kernel/i18n/registry.texts.ts
|
|
2
|
+
|
|
3
|
+
!function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof globalThis?globalThis:"undefined"!=typeof self?self:{},n=(new e.Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="e5a349ec-19a4-5568-bcab-36d7ea370839")}catch(e){}}();
|
|
2
4
|
var REGISTRY_TEXTS = {
|
|
3
5
|
duplicateExtension: "Extension already registered: {{kind}}:{{qualifiedId}}",
|
|
4
6
|
unknownKind: "Unknown extension kind: {{kind}}",
|
|
@@ -96,12 +98,11 @@ var Registry = class {
|
|
|
96
98
|
import { existsSync as existsSync11, statSync as statSync4 } from "fs";
|
|
97
99
|
import { isAbsolute as isAbsolute4, resolve as resolve11 } from "path";
|
|
98
100
|
import { Tiktoken as Tiktoken2 } from "js-tiktoken/lite";
|
|
99
|
-
import cl100k_base from "js-tiktoken/ranks/cl100k_base";
|
|
100
101
|
|
|
101
102
|
// package.json
|
|
102
103
|
var package_default = {
|
|
103
104
|
name: "@skill-map/cli",
|
|
104
|
-
version: "0.
|
|
105
|
+
version: "0.47.0",
|
|
105
106
|
description: "skill-map reference implementation \u2014 kernel + CLI + adapters.",
|
|
106
107
|
license: "MIT",
|
|
107
108
|
type: "module",
|
|
@@ -765,16 +766,13 @@ function strip(value) {
|
|
|
765
766
|
// config/defaults.json
|
|
766
767
|
var defaults_default = {
|
|
767
768
|
schemaVersion: 1,
|
|
768
|
-
autoMigrate: true,
|
|
769
769
|
allowEditSmFiles: false,
|
|
770
770
|
tokenizer: "cl100k_base",
|
|
771
|
-
providers: [],
|
|
772
771
|
roots: [],
|
|
773
772
|
ignore: [],
|
|
774
773
|
scan: {
|
|
775
774
|
tokenize: true,
|
|
776
775
|
strict: false,
|
|
777
|
-
followSymlinks: false,
|
|
778
776
|
maxFileSizeBytes: 1048576,
|
|
779
777
|
maxNodes: 256,
|
|
780
778
|
watch: {
|
|
@@ -783,9 +781,6 @@ var defaults_default = {
|
|
|
783
781
|
referencePaths: []
|
|
784
782
|
},
|
|
785
783
|
plugins: {},
|
|
786
|
-
history: {
|
|
787
|
-
share: false
|
|
788
|
-
},
|
|
789
784
|
jobs: {
|
|
790
785
|
ttlSeconds: 3600,
|
|
791
786
|
graceMultiplier: 3,
|
|
@@ -796,9 +791,6 @@ var defaults_default = {
|
|
|
796
791
|
completed: 2592e3,
|
|
797
792
|
failed: null
|
|
798
793
|
}
|
|
799
|
-
},
|
|
800
|
-
i18n: {
|
|
801
|
-
locale: "en"
|
|
802
794
|
}
|
|
803
795
|
};
|
|
804
796
|
|
|
@@ -2363,8 +2355,9 @@ async function* walkContent(roots, options) {
|
|
|
2363
2355
|
if (!parser) throw new UnknownParserError(options.parser);
|
|
2364
2356
|
const filter = options.ignoreFilter ?? buildIgnoreFilter();
|
|
2365
2357
|
const extensions = options.extensions;
|
|
2358
|
+
const sizeLimit = buildSizeLimit(options);
|
|
2366
2359
|
for (const root of roots) {
|
|
2367
|
-
for await (const file of walkRoot(root, root, filter, extensions)) {
|
|
2360
|
+
for await (const file of walkRoot(root, root, filter, extensions, sizeLimit)) {
|
|
2368
2361
|
const relPath = relative2(root, file).split(sep2).join("/");
|
|
2369
2362
|
let raw;
|
|
2370
2363
|
try {
|
|
@@ -2387,7 +2380,15 @@ async function* walkContent(roots, options) {
|
|
|
2387
2380
|
}
|
|
2388
2381
|
}
|
|
2389
2382
|
}
|
|
2390
|
-
|
|
2383
|
+
function buildSizeLimit(options) {
|
|
2384
|
+
const sizeLimit = {};
|
|
2385
|
+
if (options.maxFileSizeBytes !== void 0) {
|
|
2386
|
+
sizeLimit.maxFileSizeBytes = options.maxFileSizeBytes;
|
|
2387
|
+
}
|
|
2388
|
+
if (options.onOversizedFile) sizeLimit.onOversizedFile = options.onOversizedFile;
|
|
2389
|
+
return sizeLimit;
|
|
2390
|
+
}
|
|
2391
|
+
async function* walkRoot(root, current, filter, extensions, sizeLimit) {
|
|
2391
2392
|
let entries;
|
|
2392
2393
|
try {
|
|
2393
2394
|
entries = await readdir(current, { withFileTypes: true, encoding: "utf8" });
|
|
@@ -2401,11 +2402,16 @@ async function* walkRoot(root, current, filter, extensions) {
|
|
|
2401
2402
|
if (filter.ignores(rel)) continue;
|
|
2402
2403
|
if (entry.isSymbolicLink()) continue;
|
|
2403
2404
|
if (entry.isDirectory()) {
|
|
2404
|
-
yield* walkRoot(root, full, filter, extensions);
|
|
2405
|
+
yield* walkRoot(root, full, filter, extensions, sizeLimit);
|
|
2405
2406
|
} else if (entry.isFile() && hasMatchingExtension(name, extensions)) {
|
|
2406
2407
|
try {
|
|
2407
2408
|
const s = await lstat(full);
|
|
2408
|
-
if (s.isFile())
|
|
2409
|
+
if (!s.isFile()) continue;
|
|
2410
|
+
if (sizeLimit.maxFileSizeBytes !== void 0 && s.size > sizeLimit.maxFileSizeBytes) {
|
|
2411
|
+
sizeLimit.onOversizedFile?.({ path: rel, bytes: s.size });
|
|
2412
|
+
continue;
|
|
2413
|
+
}
|
|
2414
|
+
yield full;
|
|
2409
2415
|
} catch {
|
|
2410
2416
|
}
|
|
2411
2417
|
}
|
|
@@ -2435,6 +2441,10 @@ function resolveProviderWalk(provider) {
|
|
|
2435
2441
|
parser: read.parser
|
|
2436
2442
|
};
|
|
2437
2443
|
if (options?.ignoreFilter) walkOptions.ignoreFilter = options.ignoreFilter;
|
|
2444
|
+
if (options?.maxFileSizeBytes !== void 0) {
|
|
2445
|
+
walkOptions.maxFileSizeBytes = options.maxFileSizeBytes;
|
|
2446
|
+
}
|
|
2447
|
+
if (options?.onOversizedFile) walkOptions.onOversizedFile = options.onOversizedFile;
|
|
2438
2448
|
return walkContent(roots, walkOptions);
|
|
2439
2449
|
};
|
|
2440
2450
|
}
|
|
@@ -2843,7 +2853,18 @@ async function walkAndExtract(opts) {
|
|
|
2843
2853
|
const accum = createWalkAccumulators();
|
|
2844
2854
|
const wctx = buildWalkContext(opts);
|
|
2845
2855
|
const claimedPaths = /* @__PURE__ */ new Set();
|
|
2846
|
-
const
|
|
2856
|
+
const oversizedFiles = [];
|
|
2857
|
+
const oversizedSeen = /* @__PURE__ */ new Set();
|
|
2858
|
+
const onOversizedFile = (info) => {
|
|
2859
|
+
if (oversizedSeen.has(info.path)) return;
|
|
2860
|
+
oversizedSeen.add(info.path);
|
|
2861
|
+
oversizedFiles.push(info);
|
|
2862
|
+
};
|
|
2863
|
+
const walkOptions = {
|
|
2864
|
+
...opts.ignoreFilter ? { ignoreFilter: opts.ignoreFilter } : {},
|
|
2865
|
+
onOversizedFile,
|
|
2866
|
+
...opts.maxFileSizeBytes !== void 0 ? { maxFileSizeBytes: opts.maxFileSizeBytes } : {}
|
|
2867
|
+
};
|
|
2847
2868
|
let filesWalked = 0;
|
|
2848
2869
|
let index = 0;
|
|
2849
2870
|
const effectiveMaxNodes = opts.overrideMaxNodes ?? opts.recommendedNodeLimit;
|
|
@@ -2876,6 +2897,7 @@ async function walkAndExtract(opts) {
|
|
|
2876
2897
|
cachedPaths: accum.cachedPaths,
|
|
2877
2898
|
frontmatterIssues: accum.frontmatterIssues,
|
|
2878
2899
|
filesWalked,
|
|
2900
|
+
oversizedFiles,
|
|
2879
2901
|
recommendedNodeLimit: opts.recommendedNodeLimit,
|
|
2880
2902
|
overrideMaxNodes: opts.overrideMaxNodes,
|
|
2881
2903
|
capReached,
|
|
@@ -2926,7 +2948,11 @@ async function processRawNode(raw, provider, wctx, accum, claimedPaths, nextInde
|
|
|
2926
2948
|
}
|
|
2927
2949
|
claimedPaths.add(raw.path);
|
|
2928
2950
|
const priorNode = wctx.priorNodesByPath.get(raw.path);
|
|
2929
|
-
const nodeHashCacheEligible = wctx.opts.enableCache &&
|
|
2951
|
+
const nodeHashCacheEligible = wctx.opts.enableCache && // Tokenizer-change invalidation: when the resolved encoder differs
|
|
2952
|
+
// from the one that produced the prior snapshot's counts, no node is
|
|
2953
|
+
// cache-eligible, every node rebuilds so `buildNode` re-tokenizes
|
|
2954
|
+
// with the current encoder. See `tokenizerChanged` on the options.
|
|
2955
|
+
!wctx.opts.tokenizerChanged && wctx.opts.prior !== null && priorNode !== void 0 && priorNode.bodyHash === bodyHash && priorNode.frontmatterHash === frontmatterHash;
|
|
2930
2956
|
const sidecarResolution = resolveSidecarOverlay(
|
|
2931
2957
|
raw.path,
|
|
2932
2958
|
raw.path,
|
|
@@ -3128,6 +3154,16 @@ function resolveSpecVersionSafe() {
|
|
|
3128
3154
|
return "unknown";
|
|
3129
3155
|
}
|
|
3130
3156
|
}
|
|
3157
|
+
var DEFAULT_TOKENIZER = "cl100k_base";
|
|
3158
|
+
function resolveTokenizerName(name) {
|
|
3159
|
+
return name === "o200k_base" ? "o200k_base" : DEFAULT_TOKENIZER;
|
|
3160
|
+
}
|
|
3161
|
+
async function loadTokenizerRanks(name) {
|
|
3162
|
+
if (name === "o200k_base") {
|
|
3163
|
+
return (await import("js-tiktoken/ranks/o200k_base")).default;
|
|
3164
|
+
}
|
|
3165
|
+
return (await import("js-tiktoken/ranks/cl100k_base")).default;
|
|
3166
|
+
}
|
|
3131
3167
|
async function runScanWithRenames(_kernel, options) {
|
|
3132
3168
|
return runScanInternal(_kernel, options);
|
|
3133
3169
|
}
|
|
@@ -3137,7 +3173,7 @@ async function runScan(_kernel, options) {
|
|
|
3137
3173
|
}
|
|
3138
3174
|
async function runScanInternal(_kernel, options) {
|
|
3139
3175
|
validateRoots(options.roots);
|
|
3140
|
-
const setup = buildScanSetup(options);
|
|
3176
|
+
const setup = await buildScanSetup(options);
|
|
3141
3177
|
const { emitter, exts, hookDispatcher, encoder, prior, start } = setup;
|
|
3142
3178
|
const scanStartedEvent = makeEvent("scan.started", { roots: options.roots });
|
|
3143
3179
|
emitter.emit(scanStartedEvent);
|
|
@@ -3147,6 +3183,7 @@ async function runScanInternal(_kernel, options) {
|
|
|
3147
3183
|
options.roots,
|
|
3148
3184
|
exts.providers
|
|
3149
3185
|
);
|
|
3186
|
+
const tokenizerChanged = encoder !== null && prior !== null && prior.tokenizer !== setup.tokenizer;
|
|
3150
3187
|
const walked = await walkAndExtract({
|
|
3151
3188
|
providers: exts.providers,
|
|
3152
3189
|
extractors: exts.extractors,
|
|
@@ -3156,6 +3193,7 @@ async function runScanInternal(_kernel, options) {
|
|
|
3156
3193
|
encoder,
|
|
3157
3194
|
strict: setup.strict,
|
|
3158
3195
|
enableCache: setup.enableCache,
|
|
3196
|
+
tokenizerChanged,
|
|
3159
3197
|
prior,
|
|
3160
3198
|
priorIndex: setup.priorIndex,
|
|
3161
3199
|
priorExtractorRuns: setup.priorExtractorRuns,
|
|
@@ -3163,7 +3201,8 @@ async function runScanInternal(_kernel, options) {
|
|
|
3163
3201
|
pluginStores: options.pluginStores,
|
|
3164
3202
|
activeProvider: activeProviderId,
|
|
3165
3203
|
recommendedNodeLimit: options.recommendedNodeLimit ?? 256,
|
|
3166
|
-
overrideMaxNodes: options.overrideMaxNodes ?? null
|
|
3204
|
+
overrideMaxNodes: options.overrideMaxNodes ?? null,
|
|
3205
|
+
...options.maxFileSizeBytes !== void 0 ? { maxFileSizeBytes: options.maxFileSizeBytes } : {}
|
|
3167
3206
|
});
|
|
3168
3207
|
const activeProvider = activeProviderId ? exts.providers.find((p) => p.id === activeProviderId) ?? null : null;
|
|
3169
3208
|
const resolved = resolveSignals({
|
|
@@ -3270,13 +3309,14 @@ function buildReservedNodePaths(nodes, kindRegistry, reservedNamesByProviderKind
|
|
|
3270
3309
|
function hasEntries(set) {
|
|
3271
3310
|
return set !== void 0 && set.size > 0;
|
|
3272
3311
|
}
|
|
3273
|
-
function buildScanSetup(options) {
|
|
3312
|
+
async function buildScanSetup(options) {
|
|
3274
3313
|
const start = Date.now();
|
|
3275
3314
|
const emitter = options.emitter ?? new InMemoryProgressEmitter();
|
|
3276
3315
|
const exts = options.extensions ?? { providers: [], extractors: [], analyzers: [] };
|
|
3277
3316
|
const hookDispatcher = makeHookDispatcher(exts.hooks ?? [], emitter);
|
|
3278
3317
|
const tokenize = options.tokenize !== false;
|
|
3279
|
-
const
|
|
3318
|
+
const tokenizer = resolveTokenizerName(options.tokenizer);
|
|
3319
|
+
const encoder = tokenize ? new Tiktoken2(await loadTokenizerRanks(tokenizer)) : null;
|
|
3280
3320
|
const prior = options.priorSnapshot ?? null;
|
|
3281
3321
|
const priorIndex = indexPriorSnapshot(prior);
|
|
3282
3322
|
const providerFrontmatter = buildProviderFrontmatterValidator(exts.providers);
|
|
@@ -3287,6 +3327,7 @@ function buildScanSetup(options) {
|
|
|
3287
3327
|
exts,
|
|
3288
3328
|
hookDispatcher,
|
|
3289
3329
|
encoder,
|
|
3330
|
+
tokenizer,
|
|
3290
3331
|
prior,
|
|
3291
3332
|
priorIndex,
|
|
3292
3333
|
priorExtractorRuns: options.priorExtractorRuns,
|
|
@@ -3322,6 +3363,7 @@ function buildScanStats(walked, issues, start) {
|
|
|
3322
3363
|
// Providers compete.
|
|
3323
3364
|
filesWalked: walked.filesWalked,
|
|
3324
3365
|
filesSkipped: 0,
|
|
3366
|
+
filesOversized: walked.oversizedFiles.length,
|
|
3325
3367
|
nodesCount: walked.nodes.length,
|
|
3326
3368
|
linksCount: walked.internalLinks.length,
|
|
3327
3369
|
issuesCount: issues.length,
|
|
@@ -3336,8 +3378,10 @@ function buildScanReturn(walked, issues, renameOps, stats, options, setup) {
|
|
|
3336
3378
|
roots: options.roots,
|
|
3337
3379
|
providers: setup.exts.providers.map((a) => a.id),
|
|
3338
3380
|
scannedBy: SCANNED_BY,
|
|
3381
|
+
tokenizer: setup.tokenizer,
|
|
3339
3382
|
recommendedNodeLimit: walked.recommendedNodeLimit,
|
|
3340
3383
|
overrideMaxNodes: walked.overrideMaxNodes,
|
|
3384
|
+
oversizedFiles: walked.oversizedFiles,
|
|
3341
3385
|
nodes: walked.nodes,
|
|
3342
3386
|
links: walked.internalLinks,
|
|
3343
3387
|
issues,
|
|
@@ -3775,4 +3819,5 @@ export {
|
|
|
3775
3819
|
runScan,
|
|
3776
3820
|
runScanWithRenames
|
|
3777
3821
|
};
|
|
3778
|
-
//# sourceMappingURL=index.js.map
|
|
3822
|
+
//# sourceMappingURL=index.js.map
|
|
3823
|
+
//# debugId=e5a349ec-19a4-5568-bcab-36d7ea370839
|
package/dist/kernel/index.d.ts
CHANGED
|
@@ -442,7 +442,7 @@ interface IExtensionBase {
|
|
|
442
442
|
* 4. **Internal interfaces**, option bags, result records, config
|
|
443
443
|
* slices, anything declared as `interface` and passed across
|
|
444
444
|
* function boundaries inside the kernel / CLI but not part of the
|
|
445
|
-
* spec: `
|
|
445
|
+
* spec: `IPluginRuntime`, `IPruneResult`, `IMigrationFile`,
|
|
446
446
|
* `IDbLocationOptions`. **`I` prefix.** The prefix matches
|
|
447
447
|
* category 3 because both are "shapes that live in TypeScript
|
|
448
448
|
* only, never in JSON".
|
|
@@ -912,6 +912,13 @@ interface ScanStats {
|
|
|
912
912
|
* multiple Providers can claim the same file.
|
|
913
913
|
*/
|
|
914
914
|
filesSkipped: number;
|
|
915
|
+
/**
|
|
916
|
+
* Files skipped by the walker BEFORE reading because their on-disk
|
|
917
|
+
* size exceeded `scan.maxFileSizeBytes`. Equals
|
|
918
|
+
* `ScanResult.oversizedFiles.length`. Absent on synthetic fixtures /
|
|
919
|
+
* loaders that predate the field; defaults to 0 when omitted.
|
|
920
|
+
*/
|
|
921
|
+
filesOversized?: number;
|
|
915
922
|
nodesCount: number;
|
|
916
923
|
linksCount: number;
|
|
917
924
|
issuesCount: number;
|
|
@@ -1021,6 +1028,17 @@ interface ScanResult {
|
|
|
1021
1028
|
providers: string[];
|
|
1022
1029
|
/** Implementation metadata. Populated by `runScan` for self-describing output. */
|
|
1023
1030
|
scannedBy?: ScanScannedBy;
|
|
1031
|
+
/**
|
|
1032
|
+
* Resolved offline tokenizer (encoder) that produced the per-node token
|
|
1033
|
+
* counts in this scan. One of the closed allow-list in
|
|
1034
|
+
* `project-config.schema.json#/properties/tokenizer` (`cl100k_base`
|
|
1035
|
+
* default, `o200k_base`). Mirrors `scan_meta.tokenizer`. Populated by
|
|
1036
|
+
* `runScan` from the resolved `RunScanOptions.tokenizer`; the
|
|
1037
|
+
* incremental path compares the persisted value against the resolved
|
|
1038
|
+
* one and force-recomputes counts when they differ. Absent on synthetic
|
|
1039
|
+
* fixtures / loaders that predate the field.
|
|
1040
|
+
*/
|
|
1041
|
+
tokenizer?: string;
|
|
1024
1042
|
/**
|
|
1025
1043
|
* Effective recommended cap on the number of files the walker accepted
|
|
1026
1044
|
* during this scan (`scan.maxNodes` from settings, default 256). The UI
|
|
@@ -1035,11 +1053,30 @@ interface ScanResult {
|
|
|
1035
1053
|
* setting). Bidirectional: can raise OR lower the recommended limit.
|
|
1036
1054
|
*/
|
|
1037
1055
|
overrideMaxNodes?: number | null;
|
|
1056
|
+
/**
|
|
1057
|
+
* Files the walker skipped because their on-disk size exceeded
|
|
1058
|
+
* `scan.maxFileSizeBytes` (default 1 MiB). Each entry is the
|
|
1059
|
+
* root-relative, forward-slash path (same form as `node.path`) plus
|
|
1060
|
+
* the byte size. Drives the CLI / serve terminal WARN and the UI
|
|
1061
|
+
* banner. Defaults to `[]`; absent on synthetic fixtures that bypass
|
|
1062
|
+
* the walker.
|
|
1063
|
+
*/
|
|
1064
|
+
oversizedFiles?: OversizedFile[];
|
|
1038
1065
|
nodes: Node[];
|
|
1039
1066
|
links: Link[];
|
|
1040
1067
|
issues: Issue[];
|
|
1041
1068
|
stats: ScanStats;
|
|
1042
1069
|
}
|
|
1070
|
+
/**
|
|
1071
|
+
* One file the walker skipped for exceeding `scan.maxFileSizeBytes`.
|
|
1072
|
+
* Mirrors `scan-result.schema.json#/properties/oversizedFiles/items`.
|
|
1073
|
+
*/
|
|
1074
|
+
interface OversizedFile {
|
|
1075
|
+
/** Root-relative, forward-slash path (same form as `node.path`). */
|
|
1076
|
+
path: string;
|
|
1077
|
+
/** On-disk size of the skipped file, in bytes. */
|
|
1078
|
+
bytes: number;
|
|
1079
|
+
}
|
|
1043
1080
|
|
|
1044
1081
|
/**
|
|
1045
1082
|
* Plugin-surface types, hand-written to mirror
|
|
@@ -1396,7 +1433,7 @@ interface IPersistOptions {
|
|
|
1396
1433
|
* `<pluginId>/<extensionId>/<contributionId>`. Passed to the
|
|
1397
1434
|
* `scan_contributions` upsert so the catalog sweep can drop rows
|
|
1398
1435
|
* belonging to plugins / extensions that are no longer in the
|
|
1399
|
-
* catalog (uninstalled plugins, disabled
|
|
1436
|
+
* catalog (uninstalled plugins, disabled plugins, removed
|
|
1400
1437
|
* contributions). Empty / absent set = no catalog sweep (legacy
|
|
1401
1438
|
* behaviour, leaves disabled-plugin rows stale per design F24
|
|
1402
1439
|
* pre-fix).
|
|
@@ -1712,9 +1749,10 @@ interface IContributionRecord {
|
|
|
1712
1749
|
* new / modified nodes from a fresh extractor pass.
|
|
1713
1750
|
*
|
|
1714
1751
|
* Meta envelope: the `scan_meta` table persists `roots` /
|
|
1715
|
-
* `scannedAt` / `scannedBy` / `
|
|
1716
|
-
* `stats.
|
|
1717
|
-
*
|
|
1752
|
+
* `scannedAt` / `scannedBy` / `tokenizer` / `providers` /
|
|
1753
|
+
* `stats.filesWalked` / `stats.filesSkipped` / `stats.filesOversized` /
|
|
1754
|
+
* `stats.durationMs` / `oversizedFiles`. When the row exists, those fields come back
|
|
1755
|
+
* authoritatively. When it does not (DB
|
|
1718
1756
|
* freshly migrated but never scanned, or a legacy DB never
|
|
1719
1757
|
* re-persisted), the loader degrades to a synthetic envelope:
|
|
1720
1758
|
*
|
|
@@ -2163,9 +2201,7 @@ interface IProvider extends IExtensionBase {
|
|
|
2163
2201
|
* directly, it goes through `resolveProviderWalk(provider)` which
|
|
2164
2202
|
* picks `walk` over `read`.
|
|
2165
2203
|
*/
|
|
2166
|
-
walk?(roots: string[], options?:
|
|
2167
|
-
ignoreFilter?: IIgnoreFilter;
|
|
2168
|
-
}): AsyncIterable<IRawNode>;
|
|
2204
|
+
walk?(roots: string[], options?: IProviderWalkOptions): AsyncIterable<IRawNode>;
|
|
2169
2205
|
/**
|
|
2170
2206
|
* Given a path and its parsed frontmatter, decide the node kind, or
|
|
2171
2207
|
* `null` to disclaim the file. The classifier is called after walk()
|
|
@@ -2186,7 +2222,7 @@ interface IProvider extends IExtensionBase {
|
|
|
2186
2222
|
/**
|
|
2187
2223
|
* Strict resolution matrix consumed by the post-walk confidence-lift
|
|
2188
2224
|
* transform: maps a `link.kind` (emitted by an Extractor in this
|
|
2189
|
-
* Provider's
|
|
2225
|
+
* Provider's plugin, e.g. `'mentions'`, `'invokes'`) to the set of
|
|
2190
2226
|
* target `node.kind` values that count as a valid resolution.
|
|
2191
2227
|
*
|
|
2192
2228
|
* Used to decide whether to bump a link's confidence to 1.0 when its
|
|
@@ -2317,6 +2353,28 @@ interface IResolverRules {
|
|
|
2317
2353
|
*/
|
|
2318
2354
|
kindPriority?: readonly LinkKind[];
|
|
2319
2355
|
}
|
|
2356
|
+
/**
|
|
2357
|
+
* Per-invocation options the orchestrator threads into a Provider walk
|
|
2358
|
+
* (and through `resolveProviderWalk` into the kernel walker). All
|
|
2359
|
+
* optional, so a bare `provider.walk(roots)` keeps working.
|
|
2360
|
+
*
|
|
2361
|
+
* - `ignoreFilter`, the composed `.skillmapignore` + config.ignore +
|
|
2362
|
+
* bundled-defaults filter.
|
|
2363
|
+
* - `maxFileSizeBytes` / `onOversizedFile`, mirror of
|
|
2364
|
+
* `scan.maxFileSizeBytes` and the collector that records skipped
|
|
2365
|
+
* files into `ScanResult.oversizedFiles`. A Provider that ships its
|
|
2366
|
+
* own `walk()` SHOULD forward both into `walkContent` (or apply the
|
|
2367
|
+
* same size guard) so oversized files stay skipped + reported
|
|
2368
|
+
* regardless of which discovery path runs.
|
|
2369
|
+
*/
|
|
2370
|
+
interface IProviderWalkOptions {
|
|
2371
|
+
ignoreFilter?: IIgnoreFilter;
|
|
2372
|
+
maxFileSizeBytes?: number;
|
|
2373
|
+
onOversizedFile?: (info: {
|
|
2374
|
+
path: string;
|
|
2375
|
+
bytes: number;
|
|
2376
|
+
}) => void;
|
|
2377
|
+
}
|
|
2320
2378
|
/**
|
|
2321
2379
|
* Declarative read config a Provider declares via `IProvider.read`.
|
|
2322
2380
|
* Mirrors `extensions/provider.schema.json#/properties/read` at the
|
|
@@ -3313,11 +3371,25 @@ interface RunScanOptions {
|
|
|
3313
3371
|
viewContributions?: readonly IRegisteredViewContribution[];
|
|
3314
3372
|
/**
|
|
3315
3373
|
* Compute per-node token counts (frontmatter / body / total) using the
|
|
3316
|
-
*
|
|
3317
|
-
*
|
|
3318
|
-
*
|
|
3374
|
+
* encoder named by `tokenizer` (default `cl100k_base`). Defaults to
|
|
3375
|
+
* true. Set false to skip tokenization; `node.tokens` is left undefined
|
|
3376
|
+
* (spec-valid: the field is optional).
|
|
3319
3377
|
*/
|
|
3320
3378
|
tokenize?: boolean;
|
|
3379
|
+
/**
|
|
3380
|
+
* Offline tokenizer (encoder) used to build the per-node token counts.
|
|
3381
|
+
* Closed allow-list mirroring `project-config.schema.json#/properties/tokenizer`:
|
|
3382
|
+
* `cl100k_base` (default) or `o200k_base`. Threaded from `cfg.tokenizer`
|
|
3383
|
+
* by the driving adapters (scan-runner, watcher). Absent → `cl100k_base`.
|
|
3384
|
+
* The orchestrator guards the override layer: any value that is neither
|
|
3385
|
+
* allow-list member falls back to `cl100k_base` (the AJV enum on the
|
|
3386
|
+
* config schema already guarantees this for the config layers, the
|
|
3387
|
+
* guard covers out-of-band callers and the `override` layer). The
|
|
3388
|
+
* resolved value is carried onto `ScanResult.tokenizer` so the
|
|
3389
|
+
* persistence layer can record which encoder produced the counts and
|
|
3390
|
+
* the incremental path can detect an encoder switch.
|
|
3391
|
+
*/
|
|
3392
|
+
tokenizer?: string;
|
|
3321
3393
|
/**
|
|
3322
3394
|
* Prior snapshot for two purposes (decoupled by design):
|
|
3323
3395
|
*
|
|
@@ -3468,6 +3540,14 @@ interface RunScanOptions {
|
|
|
3468
3540
|
* replaces the recommended limit for the duration of this scan.
|
|
3469
3541
|
*/
|
|
3470
3542
|
overrideMaxNodes?: number | null;
|
|
3543
|
+
/**
|
|
3544
|
+
* Mirror of `scan.maxFileSizeBytes` (default 1 MiB). Threaded into
|
|
3545
|
+
* `walkAndExtract` so the walker skips any file larger than this
|
|
3546
|
+
* BEFORE reading it; skipped files surface in
|
|
3547
|
+
* `ScanResult.oversizedFiles` and `stats.filesOversized`. Absent → no
|
|
3548
|
+
* size limit (out-of-band callers and synthetic fixtures stay safe).
|
|
3549
|
+
*/
|
|
3550
|
+
maxFileSizeBytes?: number;
|
|
3471
3551
|
}
|
|
3472
3552
|
/**
|
|
3473
3553
|
* Same as `runScan` but also returns the rename heuristic's `RenameOp[]`
|
|
@@ -4015,7 +4095,7 @@ interface StoragePort {
|
|
|
4015
4095
|
lookup(pluginId: string, contributionId: string, nodePath: string, extensionId?: string): Promise<IPersistedContribution[]>;
|
|
4016
4096
|
/**
|
|
4017
4097
|
* Drop rows for a plugin (optionally narrowed to a single
|
|
4018
|
-
* extension within the
|
|
4098
|
+
* extension within the plugin). Returns the number of deleted
|
|
4019
4099
|
* rows. Called by `sm plugins disable` so the UI stops rendering
|
|
4020
4100
|
* the disabled plugin's chips before the next scan.
|
|
4021
4101
|
*/
|