skillwiki 0.8.2 → 0.8.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +254 -3
- package/package.json +1 -1
- package/skills/.claude-plugin/plugin.json +1 -1
- package/skills/.codex-plugin/plugin.json +1 -1
- package/skills/package.json +1 -1
- package/skills/skills/wiki-ingest/SKILL.md +8 -6
- package/skills/skills/wiki-reingest/SKILL.md +2 -0
- package/skills/wiki-ingest/SKILL.md +8 -6
- package/skills/wiki-reingest/SKILL.md +2 -0
package/dist/cli.js
CHANGED
|
@@ -3158,6 +3158,79 @@ function validateCliRefs(text, page, surface) {
|
|
|
3158
3158
|
return violations;
|
|
3159
3159
|
}
|
|
3160
3160
|
|
|
3161
|
+
// src/utils/source-identity.ts
|
|
3162
|
+
var PROJECT_PATTERNS = {
|
|
3163
|
+
hermes: [/\bhermes\b/i, /nousresearch\s*hermes/i, /nousresearch\/hermes-agent/i, /hermes agent/i],
|
|
3164
|
+
skillwiki: [/\bskillwiki\b/i, /\bllm[-_ ]?wiki\b/i, /karpathy'?s llm wiki/i],
|
|
3165
|
+
superpowers: [/\bsuperpowers\b/i, /obra\/superpowers/i, /complete software development methodology/i],
|
|
3166
|
+
playwright: [/\bplaywright\b/i, /microsoft\s*playwright/i, /microsoft\/playwright/i],
|
|
3167
|
+
convex: [/\bconvex\b/i],
|
|
3168
|
+
newapi: [/\bnew[-_ ]?api\b/i, /quantumnous\/new-api/i],
|
|
3169
|
+
coolify: [/\bcoolify\b/i, /coollabsio\/coolify/i],
|
|
3170
|
+
seaweedfs: [/\bseaweed\s*fs\b/i],
|
|
3171
|
+
proxmox: [/\bproxmox\b/i, /proxmoxve/i],
|
|
3172
|
+
codestable: [/\bcodestable\b/i]
|
|
3173
|
+
};
|
|
3174
|
+
var COMPATIBLE = /* @__PURE__ */ new Set([
|
|
3175
|
+
"hermes|skillwiki",
|
|
3176
|
+
"skillwiki|hermes",
|
|
3177
|
+
"proxmox|seaweedfs",
|
|
3178
|
+
"seaweedfs|proxmox",
|
|
3179
|
+
"coolify|seaweedfs",
|
|
3180
|
+
"seaweedfs|coolify"
|
|
3181
|
+
]);
|
|
3182
|
+
function normalize(text) {
|
|
3183
|
+
return text.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().replace(/[_-]+/g, " ");
|
|
3184
|
+
}
|
|
3185
|
+
function firstBodyWindow(body) {
|
|
3186
|
+
if (!body) return "";
|
|
3187
|
+
const withoutFrontmatter = body.replace(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/, "");
|
|
3188
|
+
return withoutFrontmatter.slice(0, 2e3);
|
|
3189
|
+
}
|
|
3190
|
+
function collectSignals(text) {
|
|
3191
|
+
const normalized = normalize(text);
|
|
3192
|
+
const found = [];
|
|
3193
|
+
for (const [name, patterns] of Object.entries(PROJECT_PATTERNS)) {
|
|
3194
|
+
if (patterns.some((pattern) => pattern.test(normalized))) found.push(name);
|
|
3195
|
+
}
|
|
3196
|
+
return found;
|
|
3197
|
+
}
|
|
3198
|
+
function compatible(left, right) {
|
|
3199
|
+
return left === right || COMPATIBLE.has(`${left}|${right}`);
|
|
3200
|
+
}
|
|
3201
|
+
function hasAnyIncompatibleSignals(leftSignals, rightSignals) {
|
|
3202
|
+
if (leftSignals.length === 0 || rightSignals.length === 0) return false;
|
|
3203
|
+
return leftSignals.some((left) => rightSignals.some((right) => !compatible(left, right)));
|
|
3204
|
+
}
|
|
3205
|
+
function hasAnyCompatibleSignals(leftSignals, rightSignals) {
|
|
3206
|
+
return leftSignals.some((left) => rightSignals.some((right) => compatible(left, right)));
|
|
3207
|
+
}
|
|
3208
|
+
function assessSourceIdentity(input) {
|
|
3209
|
+
const pathSignals = collectSignals(input.rawPath);
|
|
3210
|
+
const sourceSignals = collectSignals(input.sourceUrl ?? "");
|
|
3211
|
+
const bodySignals = collectSignals(firstBodyWindow(input.body));
|
|
3212
|
+
const reasons = [];
|
|
3213
|
+
if (hasAnyIncompatibleSignals(pathSignals, sourceSignals)) {
|
|
3214
|
+
reasons.push(`filename/path signals [${pathSignals.join(", ")}] but source_url signals [${sourceSignals.join(", ")}]`);
|
|
3215
|
+
}
|
|
3216
|
+
if (pathSignals.length > 0 && bodySignals.length > 0 && !hasAnyCompatibleSignals(pathSignals, bodySignals)) {
|
|
3217
|
+
reasons.push(`filename/path signals [${pathSignals.join(", ")}] but body signals [${bodySignals.join(", ")}]`);
|
|
3218
|
+
}
|
|
3219
|
+
if (reasons.length > 0) {
|
|
3220
|
+
return { status: "conflict", pathSignals, sourceSignals, bodySignals, reasons };
|
|
3221
|
+
}
|
|
3222
|
+
if (pathSignals.length === 0 && sourceSignals.length > 0 && bodySignals.length > 0 && !hasAnyCompatibleSignals(sourceSignals, bodySignals)) {
|
|
3223
|
+
return {
|
|
3224
|
+
status: "suspicious",
|
|
3225
|
+
pathSignals,
|
|
3226
|
+
sourceSignals,
|
|
3227
|
+
bodySignals,
|
|
3228
|
+
reasons: [`source_url signals [${sourceSignals.join(", ")}] but body signals [${bodySignals.join(", ")}]`]
|
|
3229
|
+
};
|
|
3230
|
+
}
|
|
3231
|
+
return { status: "ok", pathSignals, sourceSignals, bodySignals, reasons };
|
|
3232
|
+
}
|
|
3233
|
+
|
|
3161
3234
|
// src/commands/lint.ts
|
|
3162
3235
|
var STRUCT_MIN_BODY_LINES = 60;
|
|
3163
3236
|
var STRUCT_MIN_SECTIONS = 3;
|
|
@@ -3189,7 +3262,7 @@ function extractSourceEntries(rawFm) {
|
|
|
3189
3262
|
}
|
|
3190
3263
|
return entries;
|
|
3191
3264
|
}
|
|
3192
|
-
var ERROR_ORDER = ["broken_wikilinks", "invalid_frontmatter", "raw_dedup", "broken_sources", "tag_not_in_taxonomy", "path_too_long"];
|
|
3265
|
+
var ERROR_ORDER = ["broken_wikilinks", "invalid_frontmatter", "raw_source_identity_conflict", "raw_dedup", "broken_sources", "tag_not_in_taxonomy", "path_too_long"];
|
|
3193
3266
|
var WARNING_ORDER = ["raw_body_duplicate", "raw_subdirectory_duplicate", "file_source_url", "index_incomplete", "index_link_format", "stale_page", "page_too_large", "log_rotate_needed", "orphans", "compound_refs", "legacy_citation_style", "orphaned_citations", "duplicate_frontmatter", "work_item_health", "orphaned_project_pages", "missing_overview", "missing_diagram"];
|
|
3194
3267
|
var INFO_ORDER = ["bridges", "sparse_community", "page_structure", "topic_map_recommended", "frontmatter_wikilink", "wikilink_citation", "missing_tldr", "stale_sections", "cli_refs"];
|
|
3195
3268
|
var KNOWN_BUCKETS = [...ERROR_ORDER, ...WARNING_ORDER, ...INFO_ORDER];
|
|
@@ -3290,6 +3363,7 @@ async function runLint(input) {
|
|
|
3290
3363
|
buckets.raw_subdirectory_duplicate = subDirDupes;
|
|
3291
3364
|
}
|
|
3292
3365
|
const fileSourceUrlFlags = [];
|
|
3366
|
+
const rawIdentityConflicts = [];
|
|
3293
3367
|
for (const raw of scan.data.raw) {
|
|
3294
3368
|
const text = await readPage(raw);
|
|
3295
3369
|
const split = splitFrontmatter(text);
|
|
@@ -3297,8 +3371,25 @@ async function runLint(input) {
|
|
|
3297
3371
|
if (/^source_url:\s*file:\/\//m.test(split.data.rawFrontmatter)) {
|
|
3298
3372
|
fileSourceUrlFlags.push(raw.relPath);
|
|
3299
3373
|
}
|
|
3374
|
+
const sourceUrl = split.data.rawFrontmatter.match(/^source_url:\s*(.+)$/m)?.[1]?.trim().replace(/^["']|["']$/g, "") ?? "";
|
|
3375
|
+
const assessment = assessSourceIdentity({
|
|
3376
|
+
rawPath: raw.relPath,
|
|
3377
|
+
sourceUrl,
|
|
3378
|
+
body: split.data.body
|
|
3379
|
+
});
|
|
3380
|
+
if (assessment.status === "conflict") {
|
|
3381
|
+
rawIdentityConflicts.push({
|
|
3382
|
+
file: raw.relPath,
|
|
3383
|
+
status: assessment.status,
|
|
3384
|
+
reasons: assessment.reasons,
|
|
3385
|
+
pathSignals: assessment.pathSignals,
|
|
3386
|
+
sourceSignals: assessment.sourceSignals,
|
|
3387
|
+
bodySignals: assessment.bodySignals
|
|
3388
|
+
});
|
|
3389
|
+
}
|
|
3300
3390
|
}
|
|
3301
3391
|
if (fileSourceUrlFlags.length > 0) buckets.file_source_url = fileSourceUrlFlags;
|
|
3392
|
+
if (rawIdentityConflicts.length > 0) buckets.raw_source_identity_conflict = rawIdentityConflicts;
|
|
3302
3393
|
const legacyPages = [];
|
|
3303
3394
|
const orphanedPages = [];
|
|
3304
3395
|
const structFlags = [];
|
|
@@ -4576,6 +4667,120 @@ function checkSyncLastPush(resolvedPath) {
|
|
|
4576
4667
|
}
|
|
4577
4668
|
return check("pass", "sync_last_push", "Vault sync recency", `Last push: ${dateStr} (${daysSince2} day(s) ago)`);
|
|
4578
4669
|
}
|
|
4670
|
+
function hasOriginMain(resolvedPath) {
|
|
4671
|
+
try {
|
|
4672
|
+
execSync2("git rev-parse --verify --quiet origin/main", {
|
|
4673
|
+
cwd: resolvedPath,
|
|
4674
|
+
encoding: "utf8",
|
|
4675
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
4676
|
+
});
|
|
4677
|
+
return true;
|
|
4678
|
+
} catch {
|
|
4679
|
+
return false;
|
|
4680
|
+
}
|
|
4681
|
+
}
|
|
4682
|
+
function checkVaultGitDirty(resolvedPath) {
|
|
4683
|
+
if (resolvedPath === void 0) {
|
|
4684
|
+
return check("pass", "vault_git_dirty", "Vault git dirty state", "No vault path \u2014 check skipped");
|
|
4685
|
+
}
|
|
4686
|
+
if (!existsSync9(join27(resolvedPath, ".git"))) {
|
|
4687
|
+
return check("pass", "vault_git_dirty", "Vault git dirty state", "No git repo \u2014 check skipped");
|
|
4688
|
+
}
|
|
4689
|
+
try {
|
|
4690
|
+
const lines = execSync2("git status --porcelain", {
|
|
4691
|
+
cwd: resolvedPath,
|
|
4692
|
+
encoding: "utf8",
|
|
4693
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
4694
|
+
}).trim().split("\n").filter(Boolean);
|
|
4695
|
+
if (lines.length > 0) {
|
|
4696
|
+
return check("warn", "vault_git_dirty", "Vault git dirty state", `${lines.length} dirty file(s) in vault worktree`);
|
|
4697
|
+
}
|
|
4698
|
+
return check("pass", "vault_git_dirty", "Vault git dirty state", "Clean worktree");
|
|
4699
|
+
} catch {
|
|
4700
|
+
return check("warn", "vault_git_dirty", "Vault git dirty state", "Could not read git status");
|
|
4701
|
+
}
|
|
4702
|
+
}
|
|
4703
|
+
function checkVaultGitAhead(resolvedPath) {
|
|
4704
|
+
return checkVaultGitComparison(
|
|
4705
|
+
resolvedPath,
|
|
4706
|
+
"vault_git_ahead",
|
|
4707
|
+
"Vault commits ahead",
|
|
4708
|
+
"origin/main..HEAD",
|
|
4709
|
+
"ahead of origin/main",
|
|
4710
|
+
"0 commits ahead of origin/main"
|
|
4711
|
+
);
|
|
4712
|
+
}
|
|
4713
|
+
function checkVaultGitBehind(resolvedPath) {
|
|
4714
|
+
return checkVaultGitComparison(
|
|
4715
|
+
resolvedPath,
|
|
4716
|
+
"vault_git_behind",
|
|
4717
|
+
"Vault commits behind",
|
|
4718
|
+
"HEAD..origin/main",
|
|
4719
|
+
"behind origin/main",
|
|
4720
|
+
"0 commits behind origin/main"
|
|
4721
|
+
);
|
|
4722
|
+
}
|
|
4723
|
+
function checkVaultGitComparison(resolvedPath, id, label, range, nonZeroSuffix, zeroDetail) {
|
|
4724
|
+
if (resolvedPath === void 0) {
|
|
4725
|
+
return check("pass", id, label, "No vault path \u2014 check skipped");
|
|
4726
|
+
}
|
|
4727
|
+
if (!existsSync9(join27(resolvedPath, ".git"))) {
|
|
4728
|
+
return check("pass", id, label, "No git repo \u2014 check skipped");
|
|
4729
|
+
}
|
|
4730
|
+
if (!hasOriginMain(resolvedPath)) {
|
|
4731
|
+
return check("pass", id, label, "origin/main unavailable \u2014 check skipped");
|
|
4732
|
+
}
|
|
4733
|
+
try {
|
|
4734
|
+
const count = parseInt(execSync2(`git rev-list --count ${range}`, {
|
|
4735
|
+
cwd: resolvedPath,
|
|
4736
|
+
encoding: "utf8",
|
|
4737
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
4738
|
+
}).trim(), 10);
|
|
4739
|
+
if (count > 0) {
|
|
4740
|
+
return check("warn", id, label, `${count} commit(s) ${nonZeroSuffix}`);
|
|
4741
|
+
}
|
|
4742
|
+
return check("pass", id, label, zeroDetail);
|
|
4743
|
+
} catch {
|
|
4744
|
+
return check("warn", id, label, "Could not compare HEAD with origin/main");
|
|
4745
|
+
}
|
|
4746
|
+
}
|
|
4747
|
+
function pullLogPaths(home) {
|
|
4748
|
+
const paths = platform2() === "darwin" ? [
|
|
4749
|
+
join27(home, "Library", "Logs", "wiki-pull.log"),
|
|
4750
|
+
join27(home, ".local", "state", "vault-sync", "log", "wiki-pull.log")
|
|
4751
|
+
] : [
|
|
4752
|
+
join27(home, ".local", "state", "vault-sync", "log", "wiki-pull.log"),
|
|
4753
|
+
join27(home, "Library", "Logs", "wiki-pull.log")
|
|
4754
|
+
];
|
|
4755
|
+
return [...new Set(paths)];
|
|
4756
|
+
}
|
|
4757
|
+
function isRecentLogLine(line, nowMs) {
|
|
4758
|
+
const match = line.match(/^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)/);
|
|
4759
|
+
if (!match) return true;
|
|
4760
|
+
const ts = Date.parse(match[1]);
|
|
4761
|
+
if (!Number.isFinite(ts)) return true;
|
|
4762
|
+
return nowMs - ts <= 24 * 60 * 60 * 1e3;
|
|
4763
|
+
}
|
|
4764
|
+
function checkVaultGitPullFailures(home) {
|
|
4765
|
+
const path = pullLogPaths(home).find((p) => existsSync9(p));
|
|
4766
|
+
if (!path) {
|
|
4767
|
+
return check("pass", "vault_git_pull_failures", "Vault pull failures", "No wiki-pull.log found \u2014 check skipped");
|
|
4768
|
+
}
|
|
4769
|
+
try {
|
|
4770
|
+
const lines = readFileSync7(path, "utf8").split(/\r?\n/).filter(Boolean);
|
|
4771
|
+
const now = Date.now();
|
|
4772
|
+
const failures = lines.filter(
|
|
4773
|
+
(line) => isRecentLogLine(line, now) && /(pre-push pull failed|FAIL .*pull|FAIL .*rebase|cannot pull with rebase|unstaged changes)/i.test(line)
|
|
4774
|
+
);
|
|
4775
|
+
if (failures.length > 0) {
|
|
4776
|
+
const sample = failures.slice(-2).map((line) => line.slice(0, 100)).join(" | ");
|
|
4777
|
+
return check("warn", "vault_git_pull_failures", "Vault pull failures", `${failures.length} recent pull failure(s): ${sample}`);
|
|
4778
|
+
}
|
|
4779
|
+
return check("pass", "vault_git_pull_failures", "Vault pull failures", "No recent pull failures logged");
|
|
4780
|
+
} catch {
|
|
4781
|
+
return check("warn", "vault_git_pull_failures", "Vault pull failures", `Could not read ${path}`);
|
|
4782
|
+
}
|
|
4783
|
+
}
|
|
4579
4784
|
function checkS3MountPerf(resolvedPath) {
|
|
4580
4785
|
if (resolvedPath === void 0) {
|
|
4581
4786
|
return check("pass", "s3_mount_perf", "S3 mount performance", "No vault path \u2014 check skipped");
|
|
@@ -5253,6 +5458,10 @@ async function runDoctor(input) {
|
|
|
5253
5458
|
checks.push(checkObsidianTemplates(resolvedPath));
|
|
5254
5459
|
checks.push(checkVaultGitRemote(resolvedPath));
|
|
5255
5460
|
checks.push(checkSyncLastPush(resolvedPath));
|
|
5461
|
+
checks.push(checkVaultGitDirty(resolvedPath));
|
|
5462
|
+
checks.push(checkVaultGitAhead(resolvedPath));
|
|
5463
|
+
checks.push(checkVaultGitBehind(resolvedPath));
|
|
5464
|
+
checks.push(checkVaultGitPullFailures(input.home));
|
|
5256
5465
|
checks.push(checkDotStoreClean(resolvedPath));
|
|
5257
5466
|
checks.push(checkS3MountPerf(resolvedPath));
|
|
5258
5467
|
checks.push(checkS3MountFreshness(resolvedPath));
|
|
@@ -5509,6 +5718,22 @@ async function runDrift(input) {
|
|
|
5509
5718
|
continue;
|
|
5510
5719
|
}
|
|
5511
5720
|
const currentHash = createHash3("sha256").update(Buffer.from(resp.data.body, "utf8")).digest("hex");
|
|
5721
|
+
const identity = assessSourceIdentity({
|
|
5722
|
+
rawPath: raw.relPath,
|
|
5723
|
+
sourceUrl,
|
|
5724
|
+
body: resp.data.body
|
|
5725
|
+
});
|
|
5726
|
+
if (identity.status === "conflict") {
|
|
5727
|
+
results.push({
|
|
5728
|
+
raw_path: raw.relPath,
|
|
5729
|
+
source_url: sourceUrl,
|
|
5730
|
+
stored_sha256: storedHash,
|
|
5731
|
+
current_sha256: currentHash,
|
|
5732
|
+
status: "identity_conflict",
|
|
5733
|
+
identity
|
|
5734
|
+
});
|
|
5735
|
+
continue;
|
|
5736
|
+
}
|
|
5512
5737
|
const drifted2 = currentHash !== storedHash;
|
|
5513
5738
|
if (drifted2 && input.apply) {
|
|
5514
5739
|
const newFm = rawFrontmatter.replace(/^sha256:\s*[a-f0-9]+$/m, `sha256: ${currentHash}`);
|
|
@@ -5536,12 +5761,19 @@ ${body}`;
|
|
|
5536
5761
|
}
|
|
5537
5762
|
const drifted = results.filter((r) => r.status === "drifted");
|
|
5538
5763
|
const fetchFailed = results.filter((r) => r.status === "fetch_failed");
|
|
5764
|
+
const identityConflicts = results.filter((r) => r.status === "identity_conflict");
|
|
5539
5765
|
const updated = results.filter((r) => r.status === "updated");
|
|
5540
5766
|
const unchanged = results.filter((r) => r.status === "unchanged").length;
|
|
5541
|
-
const exitCode = drifted.length > 0 ? ExitCode.DRIFT_DETECTED : ExitCode.OK;
|
|
5767
|
+
const exitCode = drifted.length > 0 || identityConflicts.length > 0 ? ExitCode.DRIFT_DETECTED : ExitCode.OK;
|
|
5542
5768
|
const hintLines = [`scanned: ${results.length}, unchanged: ${unchanged}`];
|
|
5543
5769
|
if (newResults.length > 0) hintLines.push(`new: ${newResults.length}`, ...newResults.map((n) => ` ${n.raw_path} (ingested: ${n.ingested})`));
|
|
5544
5770
|
if (drifted.length > 0) hintLines.push(`drifted: ${drifted.length}`, ...drifted.map((d) => ` ${d.raw_path}`));
|
|
5771
|
+
if (identityConflicts.length > 0) {
|
|
5772
|
+
hintLines.push(
|
|
5773
|
+
`identity_conflicts: ${identityConflicts.length}`,
|
|
5774
|
+
...identityConflicts.map((c) => ` ${c.raw_path}: ${c.identity?.reasons.join("; ") ?? "source identity conflict"}`)
|
|
5775
|
+
);
|
|
5776
|
+
}
|
|
5545
5777
|
if (fetchFailed.length > 0) hintLines.push(`fetch_failed: ${fetchFailed.length}`, ...fetchFailed.map((f) => ` ${f.raw_path}: ${f.fetch_error}`));
|
|
5546
5778
|
if (updated.length > 0) hintLines.push(`updated: ${updated.length}`, ...updated.map((u) => ` ${u.raw_path}`));
|
|
5547
5779
|
if (input.apply && updated.length > 0) {
|
|
@@ -5554,7 +5786,7 @@ ${body}`;
|
|
|
5554
5786
|
}
|
|
5555
5787
|
return {
|
|
5556
5788
|
exitCode,
|
|
5557
|
-
result: ok({ scanned: results.length, drifted, fetch_failed: fetchFailed, updated, newFiles: newResults, unchanged, humanHint: hintLines.join("\n") })
|
|
5789
|
+
result: ok({ scanned: results.length, drifted, fetch_failed: fetchFailed, identity_conflicts: identityConflicts, updated, newFiles: newResults, unchanged, humanHint: hintLines.join("\n") })
|
|
5558
5790
|
};
|
|
5559
5791
|
}
|
|
5560
5792
|
|
|
@@ -6756,6 +6988,25 @@ async function runIngest(input) {
|
|
|
6756
6988
|
const typedRelPath = `${typedDir}/${slug}.md`;
|
|
6757
6989
|
const rawAbsPath = join35(input.vault, rawRelPath);
|
|
6758
6990
|
const typedAbsPath = join35(input.vault, typedRelPath);
|
|
6991
|
+
const identity = assessSourceIdentity({
|
|
6992
|
+
rawPath: rawRelPath,
|
|
6993
|
+
sourceUrl: sourceUrl ?? void 0,
|
|
6994
|
+
body: sourceContent
|
|
6995
|
+
});
|
|
6996
|
+
if (identity.status === "conflict") {
|
|
6997
|
+
return {
|
|
6998
|
+
exitCode: ExitCode.INGEST_VALIDATION_FAILED,
|
|
6999
|
+
result: err("INGEST_VALIDATION_FAILED", {
|
|
7000
|
+
message: "source identity conflict",
|
|
7001
|
+
raw_path: rawRelPath,
|
|
7002
|
+
source_url: sourceUrl,
|
|
7003
|
+
reasons: identity.reasons,
|
|
7004
|
+
pathSignals: identity.pathSignals,
|
|
7005
|
+
sourceSignals: identity.sourceSignals,
|
|
7006
|
+
bodySignals: identity.bodySignals
|
|
7007
|
+
})
|
|
7008
|
+
};
|
|
7009
|
+
}
|
|
6759
7010
|
const rawContent = buildRawContent(sourceUrl, today, sha256, sourceContent);
|
|
6760
7011
|
const typedContent = buildTypedContent(
|
|
6761
7012
|
input.title,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "skillwiki",
|
|
3
|
-
"version": "0.8.2",
|
|
3
|
+
"version": "0.8.3-beta.2",
|
|
4
4
|
"skills": "./",
|
|
5
5
|
"description": "Project-aware Karpathy-style knowledge base for Claude Code: 18 prompt-only skills (wiki-*, proj-*, using-skillwiki) backed by the deterministic `skillwiki` CLI.",
|
|
6
6
|
"author": {
|
package/skills/package.json
CHANGED
|
@@ -18,13 +18,14 @@ Run `skillwiki lang` at the start. Generate page-body prose, narrative sections,
|
|
|
18
18
|
0. **Resolve vault and language.** Run `skillwiki path` (fail if NO_VAULT_CONFIGURED) and `skillwiki lang`. Use the resolved vault path for all writes; use the canonical language for all generated prose.
|
|
19
19
|
1. **Guard.** For each URL: run `skillwiki fetch-guard <url>`. If exit ≠ 0, STOP and surface the error. Do not retry.
|
|
20
20
|
2. **Fetch.** Use `web_fetch` (or read local file) under Layer 2 controls (the CLI Layer 2 fetcher applies in tests; in skill runtime use `web_fetch` directly and treat any error as STOP).
|
|
21
|
-
3. **
|
|
22
|
-
4. **
|
|
21
|
+
3. **Identity guard.** Before writing raw files, ensure the target raw filename/title, `source_url`, fetched H1/title, and early body subject agree. If `skillwiki ingest` reports `INGEST_VALIDATION_FAILED` with `source identity conflict`, STOP. Do not fix by renaming after the fact; choose the correct title/source pair or ask the user.
|
|
22
|
+
4. **Hash.** Write the raw file (frontmatter + body). Run `skillwiki hash <raw-file>` and embed the result in raw frontmatter `sha256:`.
|
|
23
|
+
5. **Generate page(s).** Compose typed-knowledge page(s) with citations pre-attached (`^[raw/...]` markers). Every page MUST include:
|
|
23
24
|
- `> **TL;DR:**` blockquote as the first content after the title heading — a one-sentence summary of the page's key takeaway (under 200 chars). See SCHEMA.md `## TL;DR Convention`.
|
|
24
25
|
- For pages tagged `architecture` or explaining workflows/systems: include a Mermaid diagram (`graph TB` or `sequenceDiagram`) in the body. Follow Obsidian-compatible Mermaid rules (see SCHEMA.md `## Mermaid Diagrams`).
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
6. **Validate.** For each generated page: run `skillwiki validate <page>`. If exit ≠ 0, STOP — do not write index/log.
|
|
27
|
+
7. **Apply writes in order.** raw → page(s) → `index.md` → `log.md`.
|
|
28
|
+
8. **Confidence flag.** If only one source is cited, set `confidence: low`.
|
|
28
29
|
## Provenance defaults
|
|
29
30
|
- Default `provenance: research`.
|
|
30
31
|
- If cwd is inside `projects/{slug}/`, set `provenance: project` and add `provenance_projects: ["[[slug]]"]`.
|
|
@@ -36,6 +37,7 @@ Raw ephemeral data (market feeds, logs, transient JSON) must be written to the *
|
|
|
36
37
|
## Stop conditions
|
|
37
38
|
- `fetch-guard` non-zero.
|
|
38
39
|
- Fetch timeout / size limit exceeded.
|
|
40
|
+
- `INGEST_VALIDATION_FAILED` with `source identity conflict`.
|
|
39
41
|
- `validate` non-zero on any page.
|
|
40
42
|
- sha256 already exists in vault for the same source.
|
|
41
43
|
## Forbidden
|
|
@@ -46,7 +48,7 @@ Raw ephemeral data (market feeds, logs, transient JSON) must be written to the *
|
|
|
46
48
|
- Writing `[[wikilinks]]` to pages that don't exist in the vault. Before linking, verify the target exists: check `index.md` or `ls` the target directory. If the target doesn't exist yet, use plain text instead of a wikilink.
|
|
47
49
|
## Batch Mode
|
|
48
50
|
When the user provides multiple sources (a directory of files, a list of URLs, or a multi-document input):
|
|
49
|
-
1. **Loop per source.** Execute steps 1–
|
|
51
|
+
1. **Loop per source.** Execute steps 1–6 for each source individually (guard → fetch → identity guard → hash → generate → validate).
|
|
50
52
|
2. **Accumulate, don't write yet.** Collect all raw files and pages in memory. Do not write `index.md` or `log.md` until every source has validated.
|
|
51
53
|
3. **Fail fast.** If any page fails validation, STOP. Report all failures. Do not write index/log for any source.
|
|
52
54
|
4. **Deduplication.** Before writing each raw file, check `sha256` against existing vault raw sources. Skip sources whose content is already present.
|
|
@@ -25,6 +25,7 @@ Standard four reads (SCHEMA, index, log, project context if applicable).
|
|
|
25
25
|
1. Run `skillwiki drift [vault]`. Read the JSON output.
|
|
26
26
|
2. Present findings grouped by status:
|
|
27
27
|
- **drifted:** Source content has changed. Show stored vs current sha256.
|
|
28
|
+
- **identity_conflicts:** The fetched source no longer matches the raw filename/source identity. STOP and surface the conflict. Do not archive or reingest until a human chooses the correct source/filename pair.
|
|
28
29
|
- **fetch_failed:** Could not re-fetch. Show error details.
|
|
29
30
|
- **unchanged:** No action needed.
|
|
30
31
|
3. For each drifted source, ask the user: archive old + ingest new, or skip?
|
|
@@ -51,4 +52,5 @@ Raw files are immutable (N9). Re-ingest never modifies an existing raw file. Ins
|
|
|
51
52
|
|
|
52
53
|
- Modifying files in `raw/` directly (N9).
|
|
53
54
|
- Re-ingesting without user approval for each drifted source.
|
|
55
|
+
- Re-ingesting a source listed under `identity_conflicts` without explicit user approval and a corrected target filename/source URL.
|
|
54
56
|
- Skipping the drift check and assuming sources have changed.
|
|
@@ -18,13 +18,14 @@ Run `skillwiki lang` at the start. Generate page-body prose, narrative sections,
|
|
|
18
18
|
0. **Resolve vault and language.** Run `skillwiki path` (fail if NO_VAULT_CONFIGURED) and `skillwiki lang`. Use the resolved vault path for all writes; use the canonical language for all generated prose.
|
|
19
19
|
1. **Guard.** For each URL: run `skillwiki fetch-guard <url>`. If exit ≠ 0, STOP and surface the error. Do not retry.
|
|
20
20
|
2. **Fetch.** Use `web_fetch` (or read local file) under Layer 2 controls (the CLI Layer 2 fetcher applies in tests; in skill runtime use `web_fetch` directly and treat any error as STOP).
|
|
21
|
-
3. **
|
|
22
|
-
4. **
|
|
21
|
+
3. **Identity guard.** Before writing raw files, ensure the target raw filename/title, `source_url`, fetched H1/title, and early body subject agree. If `skillwiki ingest` reports `INGEST_VALIDATION_FAILED` with `source identity conflict`, STOP. Do not fix by renaming after the fact; choose the correct title/source pair or ask the user.
|
|
22
|
+
4. **Hash.** Write the raw file (frontmatter + body). Run `skillwiki hash <raw-file>` and embed the result in raw frontmatter `sha256:`.
|
|
23
|
+
5. **Generate page(s).** Compose typed-knowledge page(s) with citations pre-attached (`^[raw/...]` markers). Every page MUST include:
|
|
23
24
|
- `> **TL;DR:**` blockquote as the first content after the title heading — a one-sentence summary of the page's key takeaway (under 200 chars). See SCHEMA.md `## TL;DR Convention`.
|
|
24
25
|
- For pages tagged `architecture` or explaining workflows/systems: include a Mermaid diagram (`graph TB` or `sequenceDiagram`) in the body. Follow Obsidian-compatible Mermaid rules (see SCHEMA.md `## Mermaid Diagrams`).
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
6. **Validate.** For each generated page: run `skillwiki validate <page>`. If exit ≠ 0, STOP — do not write index/log.
|
|
27
|
+
7. **Apply writes in order.** raw → page(s) → `index.md` → `log.md`.
|
|
28
|
+
8. **Confidence flag.** If only one source is cited, set `confidence: low`.
|
|
28
29
|
## Provenance defaults
|
|
29
30
|
- Default `provenance: research`.
|
|
30
31
|
- If cwd is inside `projects/{slug}/`, set `provenance: project` and add `provenance_projects: ["[[slug]]"]`.
|
|
@@ -36,6 +37,7 @@ Raw ephemeral data (market feeds, logs, transient JSON) must be written to the *
|
|
|
36
37
|
## Stop conditions
|
|
37
38
|
- `fetch-guard` non-zero.
|
|
38
39
|
- Fetch timeout / size limit exceeded.
|
|
40
|
+
- `INGEST_VALIDATION_FAILED` with `source identity conflict`.
|
|
39
41
|
- `validate` non-zero on any page.
|
|
40
42
|
- sha256 already exists in vault for the same source.
|
|
41
43
|
## Forbidden
|
|
@@ -46,7 +48,7 @@ Raw ephemeral data (market feeds, logs, transient JSON) must be written to the *
|
|
|
46
48
|
- Writing `[[wikilinks]]` to pages that don't exist in the vault. Before linking, verify the target exists: check `index.md` or `ls` the target directory. If the target doesn't exist yet, use plain text instead of a wikilink.
|
|
47
49
|
## Batch Mode
|
|
48
50
|
When the user provides multiple sources (a directory of files, a list of URLs, or a multi-document input):
|
|
49
|
-
1. **Loop per source.** Execute steps 1–
|
|
51
|
+
1. **Loop per source.** Execute steps 1–6 for each source individually (guard → fetch → identity guard → hash → generate → validate).
|
|
50
52
|
2. **Accumulate, don't write yet.** Collect all raw files and pages in memory. Do not write `index.md` or `log.md` until every source has validated.
|
|
51
53
|
3. **Fail fast.** If any page fails validation, STOP. Report all failures. Do not write index/log for any source.
|
|
52
54
|
4. **Deduplication.** Before writing each raw file, check `sha256` against existing vault raw sources. Skip sources whose content is already present.
|
|
@@ -25,6 +25,7 @@ Standard four reads (SCHEMA, index, log, project context if applicable).
|
|
|
25
25
|
1. Run `skillwiki drift [vault]`. Read the JSON output.
|
|
26
26
|
2. Present findings grouped by status:
|
|
27
27
|
- **drifted:** Source content has changed. Show stored vs current sha256.
|
|
28
|
+
- **identity_conflicts:** The fetched source no longer matches the raw filename/source identity. STOP and surface the conflict. Do not archive or reingest until a human chooses the correct source/filename pair.
|
|
28
29
|
- **fetch_failed:** Could not re-fetch. Show error details.
|
|
29
30
|
- **unchanged:** No action needed.
|
|
30
31
|
3. For each drifted source, ask the user: archive old + ingest new, or skip?
|
|
@@ -51,4 +52,5 @@ Raw files are immutable (N9). Re-ingest never modifies an existing raw file. Ins
|
|
|
51
52
|
|
|
52
53
|
- Modifying files in `raw/` directly (N9).
|
|
53
54
|
- Re-ingesting without user approval for each drifted source.
|
|
55
|
+
- Re-ingesting a source listed under `identity_conflicts` without explicit user approval and a corrected target filename/source URL.
|
|
54
56
|
- Skipping the drift check and assuming sources have changed.
|