skillwiki 0.8.2 → 0.8.3-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +136 -3
- package/package.json +1 -1
- package/skills/.claude-plugin/plugin.json +1 -1
- package/skills/.codex-plugin/plugin.json +1 -1
- package/skills/package.json +1 -1
- package/skills/skills/wiki-ingest/SKILL.md +8 -6
- package/skills/skills/wiki-reingest/SKILL.md +2 -0
- package/skills/wiki-ingest/SKILL.md +8 -6
- package/skills/wiki-reingest/SKILL.md +2 -0
package/dist/cli.js
CHANGED
|
@@ -3158,6 +3158,79 @@ function validateCliRefs(text, page, surface) {
|
|
|
3158
3158
|
return violations;
|
|
3159
3159
|
}
|
|
3160
3160
|
|
|
3161
|
+
// src/utils/source-identity.ts
|
|
3162
|
+
var PROJECT_PATTERNS = {
|
|
3163
|
+
hermes: [/\bhermes\b/i, /nousresearch\s*hermes/i, /nousresearch\/hermes-agent/i, /hermes agent/i],
|
|
3164
|
+
skillwiki: [/\bskillwiki\b/i, /\bllm[-_ ]?wiki\b/i, /karpathy'?s llm wiki/i],
|
|
3165
|
+
superpowers: [/\bsuperpowers\b/i, /obra\/superpowers/i, /complete software development methodology/i],
|
|
3166
|
+
playwright: [/\bplaywright\b/i, /microsoft\s*playwright/i, /microsoft\/playwright/i],
|
|
3167
|
+
convex: [/\bconvex\b/i],
|
|
3168
|
+
newapi: [/\bnew[-_ ]?api\b/i, /quantumnous\/new-api/i],
|
|
3169
|
+
coolify: [/\bcoolify\b/i, /coollabsio\/coolify/i],
|
|
3170
|
+
seaweedfs: [/\bseaweed\s*fs\b/i],
|
|
3171
|
+
proxmox: [/\bproxmox\b/i, /proxmoxve/i],
|
|
3172
|
+
codestable: [/\bcodestable\b/i]
|
|
3173
|
+
};
|
|
3174
|
+
var COMPATIBLE = /* @__PURE__ */ new Set([
|
|
3175
|
+
"hermes|skillwiki",
|
|
3176
|
+
"skillwiki|hermes",
|
|
3177
|
+
"proxmox|seaweedfs",
|
|
3178
|
+
"seaweedfs|proxmox",
|
|
3179
|
+
"coolify|seaweedfs",
|
|
3180
|
+
"seaweedfs|coolify"
|
|
3181
|
+
]);
|
|
3182
|
+
function normalize(text) {
|
|
3183
|
+
return text.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().replace(/[_-]+/g, " ");
|
|
3184
|
+
}
|
|
3185
|
+
function firstBodyWindow(body) {
|
|
3186
|
+
if (!body) return "";
|
|
3187
|
+
const withoutFrontmatter = body.replace(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/, "");
|
|
3188
|
+
return withoutFrontmatter.slice(0, 2e3);
|
|
3189
|
+
}
|
|
3190
|
+
function collectSignals(text) {
|
|
3191
|
+
const normalized = normalize(text);
|
|
3192
|
+
const found = [];
|
|
3193
|
+
for (const [name, patterns] of Object.entries(PROJECT_PATTERNS)) {
|
|
3194
|
+
if (patterns.some((pattern) => pattern.test(normalized))) found.push(name);
|
|
3195
|
+
}
|
|
3196
|
+
return found;
|
|
3197
|
+
}
|
|
3198
|
+
function compatible(left, right) {
|
|
3199
|
+
return left === right || COMPATIBLE.has(`${left}|${right}`);
|
|
3200
|
+
}
|
|
3201
|
+
function hasAnyIncompatibleSignals(leftSignals, rightSignals) {
|
|
3202
|
+
if (leftSignals.length === 0 || rightSignals.length === 0) return false;
|
|
3203
|
+
return leftSignals.some((left) => rightSignals.some((right) => !compatible(left, right)));
|
|
3204
|
+
}
|
|
3205
|
+
function hasAnyCompatibleSignals(leftSignals, rightSignals) {
|
|
3206
|
+
return leftSignals.some((left) => rightSignals.some((right) => compatible(left, right)));
|
|
3207
|
+
}
|
|
3208
|
+
function assessSourceIdentity(input) {
|
|
3209
|
+
const pathSignals = collectSignals(input.rawPath);
|
|
3210
|
+
const sourceSignals = collectSignals(input.sourceUrl ?? "");
|
|
3211
|
+
const bodySignals = collectSignals(firstBodyWindow(input.body));
|
|
3212
|
+
const reasons = [];
|
|
3213
|
+
if (hasAnyIncompatibleSignals(pathSignals, sourceSignals)) {
|
|
3214
|
+
reasons.push(`filename/path signals [${pathSignals.join(", ")}] but source_url signals [${sourceSignals.join(", ")}]`);
|
|
3215
|
+
}
|
|
3216
|
+
if (pathSignals.length > 0 && bodySignals.length > 0 && !hasAnyCompatibleSignals(pathSignals, bodySignals)) {
|
|
3217
|
+
reasons.push(`filename/path signals [${pathSignals.join(", ")}] but body signals [${bodySignals.join(", ")}]`);
|
|
3218
|
+
}
|
|
3219
|
+
if (reasons.length > 0) {
|
|
3220
|
+
return { status: "conflict", pathSignals, sourceSignals, bodySignals, reasons };
|
|
3221
|
+
}
|
|
3222
|
+
if (pathSignals.length === 0 && sourceSignals.length > 0 && bodySignals.length > 0 && !hasAnyCompatibleSignals(sourceSignals, bodySignals)) {
|
|
3223
|
+
return {
|
|
3224
|
+
status: "suspicious",
|
|
3225
|
+
pathSignals,
|
|
3226
|
+
sourceSignals,
|
|
3227
|
+
bodySignals,
|
|
3228
|
+
reasons: [`source_url signals [${sourceSignals.join(", ")}] but body signals [${bodySignals.join(", ")}]`]
|
|
3229
|
+
};
|
|
3230
|
+
}
|
|
3231
|
+
return { status: "ok", pathSignals, sourceSignals, bodySignals, reasons };
|
|
3232
|
+
}
|
|
3233
|
+
|
|
3161
3234
|
// src/commands/lint.ts
|
|
3162
3235
|
var STRUCT_MIN_BODY_LINES = 60;
|
|
3163
3236
|
var STRUCT_MIN_SECTIONS = 3;
|
|
@@ -3189,7 +3262,7 @@ function extractSourceEntries(rawFm) {
|
|
|
3189
3262
|
}
|
|
3190
3263
|
return entries;
|
|
3191
3264
|
}
|
|
3192
|
-
var ERROR_ORDER = ["broken_wikilinks", "invalid_frontmatter", "raw_dedup", "broken_sources", "tag_not_in_taxonomy", "path_too_long"];
|
|
3265
|
+
var ERROR_ORDER = ["broken_wikilinks", "invalid_frontmatter", "raw_source_identity_conflict", "raw_dedup", "broken_sources", "tag_not_in_taxonomy", "path_too_long"];
|
|
3193
3266
|
var WARNING_ORDER = ["raw_body_duplicate", "raw_subdirectory_duplicate", "file_source_url", "index_incomplete", "index_link_format", "stale_page", "page_too_large", "log_rotate_needed", "orphans", "compound_refs", "legacy_citation_style", "orphaned_citations", "duplicate_frontmatter", "work_item_health", "orphaned_project_pages", "missing_overview", "missing_diagram"];
|
|
3194
3267
|
var INFO_ORDER = ["bridges", "sparse_community", "page_structure", "topic_map_recommended", "frontmatter_wikilink", "wikilink_citation", "missing_tldr", "stale_sections", "cli_refs"];
|
|
3195
3268
|
var KNOWN_BUCKETS = [...ERROR_ORDER, ...WARNING_ORDER, ...INFO_ORDER];
|
|
@@ -3290,6 +3363,7 @@ async function runLint(input) {
|
|
|
3290
3363
|
buckets.raw_subdirectory_duplicate = subDirDupes;
|
|
3291
3364
|
}
|
|
3292
3365
|
const fileSourceUrlFlags = [];
|
|
3366
|
+
const rawIdentityConflicts = [];
|
|
3293
3367
|
for (const raw of scan.data.raw) {
|
|
3294
3368
|
const text = await readPage(raw);
|
|
3295
3369
|
const split = splitFrontmatter(text);
|
|
@@ -3297,8 +3371,25 @@ async function runLint(input) {
|
|
|
3297
3371
|
if (/^source_url:\s*file:\/\//m.test(split.data.rawFrontmatter)) {
|
|
3298
3372
|
fileSourceUrlFlags.push(raw.relPath);
|
|
3299
3373
|
}
|
|
3374
|
+
const sourceUrl = split.data.rawFrontmatter.match(/^source_url:\s*(.+)$/m)?.[1]?.trim().replace(/^["']|["']$/g, "") ?? "";
|
|
3375
|
+
const assessment = assessSourceIdentity({
|
|
3376
|
+
rawPath: raw.relPath,
|
|
3377
|
+
sourceUrl,
|
|
3378
|
+
body: split.data.body
|
|
3379
|
+
});
|
|
3380
|
+
if (assessment.status === "conflict") {
|
|
3381
|
+
rawIdentityConflicts.push({
|
|
3382
|
+
file: raw.relPath,
|
|
3383
|
+
status: assessment.status,
|
|
3384
|
+
reasons: assessment.reasons,
|
|
3385
|
+
pathSignals: assessment.pathSignals,
|
|
3386
|
+
sourceSignals: assessment.sourceSignals,
|
|
3387
|
+
bodySignals: assessment.bodySignals
|
|
3388
|
+
});
|
|
3389
|
+
}
|
|
3300
3390
|
}
|
|
3301
3391
|
if (fileSourceUrlFlags.length > 0) buckets.file_source_url = fileSourceUrlFlags;
|
|
3392
|
+
if (rawIdentityConflicts.length > 0) buckets.raw_source_identity_conflict = rawIdentityConflicts;
|
|
3302
3393
|
const legacyPages = [];
|
|
3303
3394
|
const orphanedPages = [];
|
|
3304
3395
|
const structFlags = [];
|
|
@@ -5509,6 +5600,22 @@ async function runDrift(input) {
|
|
|
5509
5600
|
continue;
|
|
5510
5601
|
}
|
|
5511
5602
|
const currentHash = createHash3("sha256").update(Buffer.from(resp.data.body, "utf8")).digest("hex");
|
|
5603
|
+
const identity = assessSourceIdentity({
|
|
5604
|
+
rawPath: raw.relPath,
|
|
5605
|
+
sourceUrl,
|
|
5606
|
+
body: resp.data.body
|
|
5607
|
+
});
|
|
5608
|
+
if (identity.status === "conflict") {
|
|
5609
|
+
results.push({
|
|
5610
|
+
raw_path: raw.relPath,
|
|
5611
|
+
source_url: sourceUrl,
|
|
5612
|
+
stored_sha256: storedHash,
|
|
5613
|
+
current_sha256: currentHash,
|
|
5614
|
+
status: "identity_conflict",
|
|
5615
|
+
identity
|
|
5616
|
+
});
|
|
5617
|
+
continue;
|
|
5618
|
+
}
|
|
5512
5619
|
const drifted2 = currentHash !== storedHash;
|
|
5513
5620
|
if (drifted2 && input.apply) {
|
|
5514
5621
|
const newFm = rawFrontmatter.replace(/^sha256:\s*[a-f0-9]+$/m, `sha256: ${currentHash}`);
|
|
@@ -5536,12 +5643,19 @@ ${body}`;
|
|
|
5536
5643
|
}
|
|
5537
5644
|
const drifted = results.filter((r) => r.status === "drifted");
|
|
5538
5645
|
const fetchFailed = results.filter((r) => r.status === "fetch_failed");
|
|
5646
|
+
const identityConflicts = results.filter((r) => r.status === "identity_conflict");
|
|
5539
5647
|
const updated = results.filter((r) => r.status === "updated");
|
|
5540
5648
|
const unchanged = results.filter((r) => r.status === "unchanged").length;
|
|
5541
|
-
const exitCode = drifted.length > 0 ? ExitCode.DRIFT_DETECTED : ExitCode.OK;
|
|
5649
|
+
const exitCode = drifted.length > 0 || identityConflicts.length > 0 ? ExitCode.DRIFT_DETECTED : ExitCode.OK;
|
|
5542
5650
|
const hintLines = [`scanned: ${results.length}, unchanged: ${unchanged}`];
|
|
5543
5651
|
if (newResults.length > 0) hintLines.push(`new: ${newResults.length}`, ...newResults.map((n) => ` ${n.raw_path} (ingested: ${n.ingested})`));
|
|
5544
5652
|
if (drifted.length > 0) hintLines.push(`drifted: ${drifted.length}`, ...drifted.map((d) => ` ${d.raw_path}`));
|
|
5653
|
+
if (identityConflicts.length > 0) {
|
|
5654
|
+
hintLines.push(
|
|
5655
|
+
`identity_conflicts: ${identityConflicts.length}`,
|
|
5656
|
+
...identityConflicts.map((c) => ` ${c.raw_path}: ${c.identity?.reasons.join("; ") ?? "source identity conflict"}`)
|
|
5657
|
+
);
|
|
5658
|
+
}
|
|
5545
5659
|
if (fetchFailed.length > 0) hintLines.push(`fetch_failed: ${fetchFailed.length}`, ...fetchFailed.map((f) => ` ${f.raw_path}: ${f.fetch_error}`));
|
|
5546
5660
|
if (updated.length > 0) hintLines.push(`updated: ${updated.length}`, ...updated.map((u) => ` ${u.raw_path}`));
|
|
5547
5661
|
if (input.apply && updated.length > 0) {
|
|
@@ -5554,7 +5668,7 @@ ${body}`;
|
|
|
5554
5668
|
}
|
|
5555
5669
|
return {
|
|
5556
5670
|
exitCode,
|
|
5557
|
-
result: ok({ scanned: results.length, drifted, fetch_failed: fetchFailed, updated, newFiles: newResults, unchanged, humanHint: hintLines.join("\n") })
|
|
5671
|
+
result: ok({ scanned: results.length, drifted, fetch_failed: fetchFailed, identity_conflicts: identityConflicts, updated, newFiles: newResults, unchanged, humanHint: hintLines.join("\n") })
|
|
5558
5672
|
};
|
|
5559
5673
|
}
|
|
5560
5674
|
|
|
@@ -6756,6 +6870,25 @@ async function runIngest(input) {
|
|
|
6756
6870
|
const typedRelPath = `${typedDir}/${slug}.md`;
|
|
6757
6871
|
const rawAbsPath = join35(input.vault, rawRelPath);
|
|
6758
6872
|
const typedAbsPath = join35(input.vault, typedRelPath);
|
|
6873
|
+
const identity = assessSourceIdentity({
|
|
6874
|
+
rawPath: rawRelPath,
|
|
6875
|
+
sourceUrl: sourceUrl ?? void 0,
|
|
6876
|
+
body: sourceContent
|
|
6877
|
+
});
|
|
6878
|
+
if (identity.status === "conflict") {
|
|
6879
|
+
return {
|
|
6880
|
+
exitCode: ExitCode.INGEST_VALIDATION_FAILED,
|
|
6881
|
+
result: err("INGEST_VALIDATION_FAILED", {
|
|
6882
|
+
message: "source identity conflict",
|
|
6883
|
+
raw_path: rawRelPath,
|
|
6884
|
+
source_url: sourceUrl,
|
|
6885
|
+
reasons: identity.reasons,
|
|
6886
|
+
pathSignals: identity.pathSignals,
|
|
6887
|
+
sourceSignals: identity.sourceSignals,
|
|
6888
|
+
bodySignals: identity.bodySignals
|
|
6889
|
+
})
|
|
6890
|
+
};
|
|
6891
|
+
}
|
|
6759
6892
|
const rawContent = buildRawContent(sourceUrl, today, sha256, sourceContent);
|
|
6760
6893
|
const typedContent = buildTypedContent(
|
|
6761
6894
|
input.title,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "skillwiki",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.3-beta.1",
|
|
4
4
|
"skills": "./",
|
|
5
5
|
"description": "Project-aware Karpathy-style knowledge base for Claude Code: 18 prompt-only skills (wiki-*, proj-*, using-skillwiki) backed by the deterministic `skillwiki` CLI.",
|
|
6
6
|
"author": {
|
package/skills/package.json
CHANGED
|
@@ -18,13 +18,14 @@ Run `skillwiki lang` at the start. Generate page-body prose, narrative sections,
|
|
|
18
18
|
0. **Resolve vault and language.** Run `skillwiki path` (fail if NO_VAULT_CONFIGURED) and `skillwiki lang`. Use the resolved vault path for all writes; use the canonical language for all generated prose.
|
|
19
19
|
1. **Guard.** For each URL: run `skillwiki fetch-guard <url>`. If exit ≠ 0, STOP and surface the error. Do not retry.
|
|
20
20
|
2. **Fetch.** Use `web_fetch` (or read local file) under Layer 2 controls (the CLI Layer 2 fetcher applies in tests; in skill runtime use `web_fetch` directly and treat any error as STOP).
|
|
21
|
-
3. **
|
|
22
|
-
4. **
|
|
21
|
+
3. **Identity guard.** Before writing raw files, ensure the target raw filename/title, `source_url`, fetched H1/title, and early body subject agree. If `skillwiki ingest` reports `INGEST_VALIDATION_FAILED` with `source identity conflict`, STOP. Do not fix by renaming after the fact; choose the correct title/source pair or ask the user.
|
|
22
|
+
4. **Hash.** Write the raw file (frontmatter + body). Run `skillwiki hash <raw-file>` and embed the result in raw frontmatter `sha256:`.
|
|
23
|
+
5. **Generate page(s).** Compose typed-knowledge page(s) with citations pre-attached (`^[raw/...]` markers). Every page MUST include:
|
|
23
24
|
- `> **TL;DR:**` blockquote as the first content after the title heading — a one-sentence summary of the page's key takeaway (under 200 chars). See SCHEMA.md `## TL;DR Convention`.
|
|
24
25
|
- For pages tagged `architecture` or explaining workflows/systems: include a Mermaid diagram (`graph TB` or `sequenceDiagram`) in the body. Follow Obsidian-compatible Mermaid rules (see SCHEMA.md `## Mermaid Diagrams`).
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
6. **Validate.** For each generated page: run `skillwiki validate <page>`. If exit ≠ 0, STOP — do not write index/log.
|
|
27
|
+
7. **Apply writes in order.** raw → page(s) → `index.md` → `log.md`.
|
|
28
|
+
8. **Confidence flag.** If only one source is cited, set `confidence: low`.
|
|
28
29
|
## Provenance defaults
|
|
29
30
|
- Default `provenance: research`.
|
|
30
31
|
- If cwd is inside `projects/{slug}/`, set `provenance: project` and add `provenance_projects: ["[[slug]]"]`.
|
|
@@ -36,6 +37,7 @@ Raw ephemeral data (market feeds, logs, transient JSON) must be written to the *
|
|
|
36
37
|
## Stop conditions
|
|
37
38
|
- `fetch-guard` non-zero.
|
|
38
39
|
- Fetch timeout / size limit exceeded.
|
|
40
|
+
- `INGEST_VALIDATION_FAILED` with `source identity conflict`.
|
|
39
41
|
- `validate` non-zero on any page.
|
|
40
42
|
- sha256 already exists in vault for the same source.
|
|
41
43
|
## Forbidden
|
|
@@ -46,7 +48,7 @@ Raw ephemeral data (market feeds, logs, transient JSON) must be written to the *
|
|
|
46
48
|
- Writing `[[wikilinks]]` to pages that don't exist in the vault. Before linking, verify the target exists: check `index.md` or `ls` the target directory. If the target doesn't exist yet, use plain text instead of a wikilink.
|
|
47
49
|
## Batch Mode
|
|
48
50
|
When the user provides multiple sources (a directory of files, a list of URLs, or a multi-document input):
|
|
49
|
-
1. **Loop per source.** Execute steps 1–
|
|
51
|
+
1. **Loop per source.** Execute steps 1–6 for each source individually (guard → fetch → identity guard → hash → generate → validate).
|
|
50
52
|
2. **Accumulate, don't write yet.** Collect all raw files and pages in memory. Do not write `index.md` or `log.md` until every source has validated.
|
|
51
53
|
3. **Fail fast.** If any page fails validation, STOP. Report all failures. Do not write index/log for any source.
|
|
52
54
|
4. **Deduplication.** Before writing each raw file, check `sha256` against existing vault raw sources. Skip sources whose content is already present.
|
|
@@ -25,6 +25,7 @@ Standard four reads (SCHEMA, index, log, project context if applicable).
|
|
|
25
25
|
1. Run `skillwiki drift [vault]`. Read the JSON output.
|
|
26
26
|
2. Present findings grouped by status:
|
|
27
27
|
- **drifted:** Source content has changed. Show stored vs current sha256.
|
|
28
|
+
- **identity_conflicts:** The fetched source no longer matches the raw filename/source identity. STOP and surface the conflict. Do not archive or reingest until a human chooses the correct source/filename pair.
|
|
28
29
|
- **fetch_failed:** Could not re-fetch. Show error details.
|
|
29
30
|
- **unchanged:** No action needed.
|
|
30
31
|
3. For each drifted source, ask the user: archive old + ingest new, or skip?
|
|
@@ -51,4 +52,5 @@ Raw files are immutable (N9). Re-ingest never modifies an existing raw file. Ins
|
|
|
51
52
|
|
|
52
53
|
- Modifying files in `raw/` directly (N9).
|
|
53
54
|
- Re-ingesting without user approval for each drifted source.
|
|
55
|
+
- Re-ingesting a source listed under `identity_conflicts` without explicit user approval and a corrected target filename/source URL.
|
|
54
56
|
- Skipping the drift check and assuming sources have changed.
|
|
@@ -18,13 +18,14 @@ Run `skillwiki lang` at the start. Generate page-body prose, narrative sections,
|
|
|
18
18
|
0. **Resolve vault and language.** Run `skillwiki path` (fail if NO_VAULT_CONFIGURED) and `skillwiki lang`. Use the resolved vault path for all writes; use the canonical language for all generated prose.
|
|
19
19
|
1. **Guard.** For each URL: run `skillwiki fetch-guard <url>`. If exit ≠ 0, STOP and surface the error. Do not retry.
|
|
20
20
|
2. **Fetch.** Use `web_fetch` (or read local file) under Layer 2 controls (the CLI Layer 2 fetcher applies in tests; in skill runtime use `web_fetch` directly and treat any error as STOP).
|
|
21
|
-
3. **
|
|
22
|
-
4. **
|
|
21
|
+
3. **Identity guard.** Before writing raw files, ensure the target raw filename/title, `source_url`, fetched H1/title, and early body subject agree. If `skillwiki ingest` reports `INGEST_VALIDATION_FAILED` with `source identity conflict`, STOP. Do not fix by renaming after the fact; choose the correct title/source pair or ask the user.
|
|
22
|
+
4. **Hash.** Write the raw file (frontmatter + body). Run `skillwiki hash <raw-file>` and embed the result in raw frontmatter `sha256:`.
|
|
23
|
+
5. **Generate page(s).** Compose typed-knowledge page(s) with citations pre-attached (`^[raw/...]` markers). Every page MUST include:
|
|
23
24
|
- `> **TL;DR:**` blockquote as the first content after the title heading — a one-sentence summary of the page's key takeaway (under 200 chars). See SCHEMA.md `## TL;DR Convention`.
|
|
24
25
|
- For pages tagged `architecture` or explaining workflows/systems: include a Mermaid diagram (`graph TB` or `sequenceDiagram`) in the body. Follow Obsidian-compatible Mermaid rules (see SCHEMA.md `## Mermaid Diagrams`).
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
6. **Validate.** For each generated page: run `skillwiki validate <page>`. If exit ≠ 0, STOP — do not write index/log.
|
|
27
|
+
7. **Apply writes in order.** raw → page(s) → `index.md` → `log.md`.
|
|
28
|
+
8. **Confidence flag.** If only one source is cited, set `confidence: low`.
|
|
28
29
|
## Provenance defaults
|
|
29
30
|
- Default `provenance: research`.
|
|
30
31
|
- If cwd is inside `projects/{slug}/`, set `provenance: project` and add `provenance_projects: ["[[slug]]"]`.
|
|
@@ -36,6 +37,7 @@ Raw ephemeral data (market feeds, logs, transient JSON) must be written to the *
|
|
|
36
37
|
## Stop conditions
|
|
37
38
|
- `fetch-guard` non-zero.
|
|
38
39
|
- Fetch timeout / size limit exceeded.
|
|
40
|
+
- `INGEST_VALIDATION_FAILED` with `source identity conflict`.
|
|
39
41
|
- `validate` non-zero on any page.
|
|
40
42
|
- sha256 already exists in vault for the same source.
|
|
41
43
|
## Forbidden
|
|
@@ -46,7 +48,7 @@ Raw ephemeral data (market feeds, logs, transient JSON) must be written to the *
|
|
|
46
48
|
- Writing `[[wikilinks]]` to pages that don't exist in the vault. Before linking, verify the target exists: check `index.md` or `ls` the target directory. If the target doesn't exist yet, use plain text instead of a wikilink.
|
|
47
49
|
## Batch Mode
|
|
48
50
|
When the user provides multiple sources (a directory of files, a list of URLs, or a multi-document input):
|
|
49
|
-
1. **Loop per source.** Execute steps 1–
|
|
51
|
+
1. **Loop per source.** Execute steps 1–6 for each source individually (guard → fetch → identity guard → hash → generate → validate).
|
|
50
52
|
2. **Accumulate, don't write yet.** Collect all raw files and pages in memory. Do not write `index.md` or `log.md` until every source has validated.
|
|
51
53
|
3. **Fail fast.** If any page fails validation, STOP. Report all failures. Do not write index/log for any source.
|
|
52
54
|
4. **Deduplication.** Before writing each raw file, check `sha256` against existing vault raw sources. Skip sources whose content is already present.
|
|
@@ -25,6 +25,7 @@ Standard four reads (SCHEMA, index, log, project context if applicable).
|
|
|
25
25
|
1. Run `skillwiki drift [vault]`. Read the JSON output.
|
|
26
26
|
2. Present findings grouped by status:
|
|
27
27
|
- **drifted:** Source content has changed. Show stored vs current sha256.
|
|
28
|
+
- **identity_conflicts:** The fetched source no longer matches the raw filename/source identity. STOP and surface the conflict. Do not archive or reingest until a human chooses the correct source/filename pair.
|
|
28
29
|
- **fetch_failed:** Could not re-fetch. Show error details.
|
|
29
30
|
- **unchanged:** No action needed.
|
|
30
31
|
3. For each drifted source, ask the user: archive old + ingest new, or skip?
|
|
@@ -51,4 +52,5 @@ Raw files are immutable (N9). Re-ingest never modifies an existing raw file. Ins
|
|
|
51
52
|
|
|
52
53
|
- Modifying files in `raw/` directly (N9).
|
|
53
54
|
- Re-ingesting without user approval for each drifted source.
|
|
55
|
+
- Re-ingesting a source listed under `identity_conflicts` without explicit user approval and a corrected target filename/source URL.
|
|
54
56
|
- Skipping the drift check and assuming sources have changed.
|