npm - skillwiki - Versions diffs - 0.8.2 → 0.8.3-beta.2 - Mend

skillwiki 0.8.2 → 0.8.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/cli.js +254 -3
package/package.json +1 -1
package/skills/.claude-plugin/plugin.json +1 -1
package/skills/.codex-plugin/plugin.json +1 -1
package/skills/package.json +1 -1
package/skills/skills/wiki-ingest/SKILL.md +8 -6
package/skills/skills/wiki-reingest/SKILL.md +2 -0
package/skills/wiki-ingest/SKILL.md +8 -6
package/skills/wiki-reingest/SKILL.md +2 -0

package/dist/cli.js CHANGED Viewed

@@ -3158,6 +3158,79 @@ function validateCliRefs(text, page, surface) {
   return violations;
 }
+// src/utils/source-identity.ts
+var PROJECT_PATTERNS = {
+  hermes: [/\bhermes\b/i, /nousresearch\s*hermes/i, /nousresearch\/hermes-agent/i, /hermes agent/i],
+  skillwiki: [/\bskillwiki\b/i, /\bllm[-_ ]?wiki\b/i, /karpathy'?s llm wiki/i],
+  superpowers: [/\bsuperpowers\b/i, /obra\/superpowers/i, /complete software development methodology/i],
+  playwright: [/\bplaywright\b/i, /microsoft\s*playwright/i, /microsoft\/playwright/i],
+  convex: [/\bconvex\b/i],
+  newapi: [/\bnew[-_ ]?api\b/i, /quantumnous\/new-api/i],
+  coolify: [/\bcoolify\b/i, /coollabsio\/coolify/i],
+  seaweedfs: [/\bseaweed\s*fs\b/i],
+  proxmox: [/\bproxmox\b/i, /proxmoxve/i],
+  codestable: [/\bcodestable\b/i]
+};
+var COMPATIBLE = /* @__PURE__ */ new Set([
+  "hermes|skillwiki",
+  "skillwiki|hermes",
+  "proxmox|seaweedfs",
+  "seaweedfs|proxmox",
+  "coolify|seaweedfs",
+  "seaweedfs|coolify"
+]);
+function normalize(text) {
+  return text.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().replace(/[_-]+/g, " ");
+}
+function firstBodyWindow(body) {
+  if (!body) return "";
+  const withoutFrontmatter = body.replace(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/, "");
+  return withoutFrontmatter.slice(0, 2e3);
+}
+function collectSignals(text) {
+  const normalized = normalize(text);
+  const found = [];
+  for (const [name, patterns] of Object.entries(PROJECT_PATTERNS)) {
+    if (patterns.some((pattern) => pattern.test(normalized))) found.push(name);
+  }
+  return found;
+}
+function compatible(left, right) {
+  return left === right || COMPATIBLE.has(`${left}|${right}`);
+}
+function hasAnyIncompatibleSignals(leftSignals, rightSignals) {
+  if (leftSignals.length === 0 || rightSignals.length === 0) return false;
+  return leftSignals.some((left) => rightSignals.some((right) => !compatible(left, right)));
+}
+function hasAnyCompatibleSignals(leftSignals, rightSignals) {
+  return leftSignals.some((left) => rightSignals.some((right) => compatible(left, right)));
+}
+function assessSourceIdentity(input) {
+  const pathSignals = collectSignals(input.rawPath);
+  const sourceSignals = collectSignals(input.sourceUrl ?? "");
+  const bodySignals = collectSignals(firstBodyWindow(input.body));
+  const reasons = [];
+  if (hasAnyIncompatibleSignals(pathSignals, sourceSignals)) {
+    reasons.push(`filename/path signals [${pathSignals.join(", ")}] but source_url signals [${sourceSignals.join(", ")}]`);
+  }
+  if (pathSignals.length > 0 && bodySignals.length > 0 && !hasAnyCompatibleSignals(pathSignals, bodySignals)) {
+    reasons.push(`filename/path signals [${pathSignals.join(", ")}] but body signals [${bodySignals.join(", ")}]`);
+  }
+  if (reasons.length > 0) {
+    return { status: "conflict", pathSignals, sourceSignals, bodySignals, reasons };
+  }
+  if (pathSignals.length === 0 && sourceSignals.length > 0 && bodySignals.length > 0 && !hasAnyCompatibleSignals(sourceSignals, bodySignals)) {
+    return {
+      status: "suspicious",
+      pathSignals,
+      sourceSignals,
+      bodySignals,
+      reasons: [`source_url signals [${sourceSignals.join(", ")}] but body signals [${bodySignals.join(", ")}]`]
+    };
+  }
+  return { status: "ok", pathSignals, sourceSignals, bodySignals, reasons };
+}
 // src/commands/lint.ts
 var STRUCT_MIN_BODY_LINES = 60;
 var STRUCT_MIN_SECTIONS = 3;
@@ -3189,7 +3262,7 @@ function extractSourceEntries(rawFm) {
   }
   return entries;
 }
-var ERROR_ORDER = ["broken_wikilinks", "invalid_frontmatter", "raw_dedup", "broken_sources", "tag_not_in_taxonomy", "path_too_long"];
+var ERROR_ORDER = ["broken_wikilinks", "invalid_frontmatter", "raw_source_identity_conflict", "raw_dedup", "broken_sources", "tag_not_in_taxonomy", "path_too_long"];
 var WARNING_ORDER = ["raw_body_duplicate", "raw_subdirectory_duplicate", "file_source_url", "index_incomplete", "index_link_format", "stale_page", "page_too_large", "log_rotate_needed", "orphans", "compound_refs", "legacy_citation_style", "orphaned_citations", "duplicate_frontmatter", "work_item_health", "orphaned_project_pages", "missing_overview", "missing_diagram"];
 var INFO_ORDER = ["bridges", "sparse_community", "page_structure", "topic_map_recommended", "frontmatter_wikilink", "wikilink_citation", "missing_tldr", "stale_sections", "cli_refs"];
 var KNOWN_BUCKETS = [...ERROR_ORDER, ...WARNING_ORDER, ...INFO_ORDER];
@@ -3290,6 +3363,7 @@ async function runLint(input) {
       buckets.raw_subdirectory_duplicate = subDirDupes;
     }
     const fileSourceUrlFlags = [];
+    const rawIdentityConflicts = [];
     for (const raw of scan.data.raw) {
       const text = await readPage(raw);
       const split = splitFrontmatter(text);
@@ -3297,8 +3371,25 @@ async function runLint(input) {
       if (/^source_url:\s*file:\/\//m.test(split.data.rawFrontmatter)) {
         fileSourceUrlFlags.push(raw.relPath);
       }
+      const sourceUrl = split.data.rawFrontmatter.match(/^source_url:\s*(.+)$/m)?.[1]?.trim().replace(/^["']|["']$/g, "") ?? "";
+      const assessment = assessSourceIdentity({
+        rawPath: raw.relPath,
+        sourceUrl,
+        body: split.data.body
+      });
+      if (assessment.status === "conflict") {
+        rawIdentityConflicts.push({
+          file: raw.relPath,
+          status: assessment.status,
+          reasons: assessment.reasons,
+          pathSignals: assessment.pathSignals,
+          sourceSignals: assessment.sourceSignals,
+          bodySignals: assessment.bodySignals
+        });
+      }
     }
     if (fileSourceUrlFlags.length > 0) buckets.file_source_url = fileSourceUrlFlags;
+    if (rawIdentityConflicts.length > 0) buckets.raw_source_identity_conflict = rawIdentityConflicts;
     const legacyPages = [];
     const orphanedPages = [];
     const structFlags = [];
@@ -4576,6 +4667,120 @@ function checkSyncLastPush(resolvedPath) {
   }
   return check("pass", "sync_last_push", "Vault sync recency", `Last push: ${dateStr} (${daysSince2} day(s) ago)`);
 }
+function hasOriginMain(resolvedPath) {
+  try {
+    execSync2("git rev-parse --verify --quiet origin/main", {
+      cwd: resolvedPath,
+      encoding: "utf8",
+      stdio: ["pipe", "pipe", "pipe"]
+    });
+    return true;
+  } catch {
+    return false;
+  }
+}
+function checkVaultGitDirty(resolvedPath) {
+  if (resolvedPath === void 0) {
+    return check("pass", "vault_git_dirty", "Vault git dirty state", "No vault path \u2014 check skipped");
+  }
+  if (!existsSync9(join27(resolvedPath, ".git"))) {
+    return check("pass", "vault_git_dirty", "Vault git dirty state", "No git repo \u2014 check skipped");
+  }
+  try {
+    const lines = execSync2("git status --porcelain", {
+      cwd: resolvedPath,
+      encoding: "utf8",
+      stdio: ["pipe", "pipe", "pipe"]
+    }).trim().split("\n").filter(Boolean);
+    if (lines.length > 0) {
+      return check("warn", "vault_git_dirty", "Vault git dirty state", `${lines.length} dirty file(s) in vault worktree`);
+    }
+    return check("pass", "vault_git_dirty", "Vault git dirty state", "Clean worktree");
+  } catch {
+    return check("warn", "vault_git_dirty", "Vault git dirty state", "Could not read git status");
+  }
+}
+function checkVaultGitAhead(resolvedPath) {
+  return checkVaultGitComparison(
+    resolvedPath,
+    "vault_git_ahead",
+    "Vault commits ahead",
+    "origin/main..HEAD",
+    "ahead of origin/main",
+    "0 commits ahead of origin/main"
+  );
+}
+function checkVaultGitBehind(resolvedPath) {
+  return checkVaultGitComparison(
+    resolvedPath,
+    "vault_git_behind",
+    "Vault commits behind",
+    "HEAD..origin/main",
+    "behind origin/main",
+    "0 commits behind origin/main"
+  );
+}
+function checkVaultGitComparison(resolvedPath, id, label, range, nonZeroSuffix, zeroDetail) {
+  if (resolvedPath === void 0) {
+    return check("pass", id, label, "No vault path \u2014 check skipped");
+  }
+  if (!existsSync9(join27(resolvedPath, ".git"))) {
+    return check("pass", id, label, "No git repo \u2014 check skipped");
+  }
+  if (!hasOriginMain(resolvedPath)) {
+    return check("pass", id, label, "origin/main unavailable \u2014 check skipped");
+  }
+  try {
+    const count = parseInt(execSync2(`git rev-list --count ${range}`, {
+      cwd: resolvedPath,
+      encoding: "utf8",
+      stdio: ["pipe", "pipe", "pipe"]
+    }).trim(), 10);
+    if (count > 0) {
+      return check("warn", id, label, `${count} commit(s) ${nonZeroSuffix}`);
+    }
+    return check("pass", id, label, zeroDetail);
+  } catch {
+    return check("warn", id, label, "Could not compare HEAD with origin/main");
+  }
+}
+function pullLogPaths(home) {
+  const paths = platform2() === "darwin" ? [
+    join27(home, "Library", "Logs", "wiki-pull.log"),
+    join27(home, ".local", "state", "vault-sync", "log", "wiki-pull.log")
+  ] : [
+    join27(home, ".local", "state", "vault-sync", "log", "wiki-pull.log"),
+    join27(home, "Library", "Logs", "wiki-pull.log")
+  ];
+  return [...new Set(paths)];
+}
+function isRecentLogLine(line, nowMs) {
+  const match = line.match(/^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)/);
+  if (!match) return true;
+  const ts = Date.parse(match[1]);
+  if (!Number.isFinite(ts)) return true;
+  return nowMs - ts <= 24 * 60 * 60 * 1e3;
+}
+function checkVaultGitPullFailures(home) {
+  const path = pullLogPaths(home).find((p) => existsSync9(p));
+  if (!path) {
+    return check("pass", "vault_git_pull_failures", "Vault pull failures", "No wiki-pull.log found \u2014 check skipped");
+  }
+  try {
+    const lines = readFileSync7(path, "utf8").split(/\r?\n/).filter(Boolean);
+    const now = Date.now();
+    const failures = lines.filter(
+      (line) => isRecentLogLine(line, now) && /(pre-push pull failed|FAIL .*pull|FAIL .*rebase|cannot pull with rebase|unstaged changes)/i.test(line)
+    );
+    if (failures.length > 0) {
+      const sample = failures.slice(-2).map((line) => line.slice(0, 100)).join(" | ");
+      return check("warn", "vault_git_pull_failures", "Vault pull failures", `${failures.length} recent pull failure(s): ${sample}`);
+    }
+    return check("pass", "vault_git_pull_failures", "Vault pull failures", "No recent pull failures logged");
+  } catch {
+    return check("warn", "vault_git_pull_failures", "Vault pull failures", `Could not read ${path}`);
+  }
+}
 function checkS3MountPerf(resolvedPath) {
   if (resolvedPath === void 0) {
     return check("pass", "s3_mount_perf", "S3 mount performance", "No vault path \u2014 check skipped");
@@ -5253,6 +5458,10 @@ async function runDoctor(input) {
   checks.push(checkObsidianTemplates(resolvedPath));
   checks.push(checkVaultGitRemote(resolvedPath));
   checks.push(checkSyncLastPush(resolvedPath));
+  checks.push(checkVaultGitDirty(resolvedPath));
+  checks.push(checkVaultGitAhead(resolvedPath));
+  checks.push(checkVaultGitBehind(resolvedPath));
+  checks.push(checkVaultGitPullFailures(input.home));
   checks.push(checkDotStoreClean(resolvedPath));
   checks.push(checkS3MountPerf(resolvedPath));
   checks.push(checkS3MountFreshness(resolvedPath));
@@ -5509,6 +5718,22 @@ async function runDrift(input) {
       continue;
     }
     const currentHash = createHash3("sha256").update(Buffer.from(resp.data.body, "utf8")).digest("hex");
+    const identity = assessSourceIdentity({
+      rawPath: raw.relPath,
+      sourceUrl,
+      body: resp.data.body
+    });
+    if (identity.status === "conflict") {
+      results.push({
+        raw_path: raw.relPath,
+        source_url: sourceUrl,
+        stored_sha256: storedHash,
+        current_sha256: currentHash,
+        status: "identity_conflict",
+        identity
+      });
+      continue;
+    }
     const drifted2 = currentHash !== storedHash;
     if (drifted2 && input.apply) {
       const newFm = rawFrontmatter.replace(/^sha256:\s*[a-f0-9]+$/m, `sha256: ${currentHash}`);
@@ -5536,12 +5761,19 @@ ${body}`;
   }
   const drifted = results.filter((r) => r.status === "drifted");
   const fetchFailed = results.filter((r) => r.status === "fetch_failed");
+  const identityConflicts = results.filter((r) => r.status === "identity_conflict");
   const updated = results.filter((r) => r.status === "updated");
   const unchanged = results.filter((r) => r.status === "unchanged").length;
-  const exitCode = drifted.length > 0 ? ExitCode.DRIFT_DETECTED : ExitCode.OK;
+  const exitCode = drifted.length > 0 || identityConflicts.length > 0 ? ExitCode.DRIFT_DETECTED : ExitCode.OK;
   const hintLines = [`scanned: ${results.length}, unchanged: ${unchanged}`];
   if (newResults.length > 0) hintLines.push(`new: ${newResults.length}`, ...newResults.map((n) => `  ${n.raw_path} (ingested: ${n.ingested})`));
   if (drifted.length > 0) hintLines.push(`drifted: ${drifted.length}`, ...drifted.map((d) => `  ${d.raw_path}`));
+  if (identityConflicts.length > 0) {
+    hintLines.push(
+      `identity_conflicts: ${identityConflicts.length}`,
+      ...identityConflicts.map((c) => `  ${c.raw_path}: ${c.identity?.reasons.join("; ") ?? "source identity conflict"}`)
+    );
+  }
   if (fetchFailed.length > 0) hintLines.push(`fetch_failed: ${fetchFailed.length}`, ...fetchFailed.map((f) => `  ${f.raw_path}: ${f.fetch_error}`));
   if (updated.length > 0) hintLines.push(`updated: ${updated.length}`, ...updated.map((u) => `  ${u.raw_path}`));
   if (input.apply && updated.length > 0) {
@@ -5554,7 +5786,7 @@ ${body}`;
   }
   return {
     exitCode,
-    result: ok({ scanned: results.length, drifted, fetch_failed: fetchFailed, updated, newFiles: newResults, unchanged, humanHint: hintLines.join("\n") })
+    result: ok({ scanned: results.length, drifted, fetch_failed: fetchFailed, identity_conflicts: identityConflicts, updated, newFiles: newResults, unchanged, humanHint: hintLines.join("\n") })
   };
 }
@@ -6756,6 +6988,25 @@ async function runIngest(input) {
   const typedRelPath = `${typedDir}/${slug}.md`;
   const rawAbsPath = join35(input.vault, rawRelPath);
   const typedAbsPath = join35(input.vault, typedRelPath);
+  const identity = assessSourceIdentity({
+    rawPath: rawRelPath,
+    sourceUrl: sourceUrl ?? void 0,
+    body: sourceContent
+  });
+  if (identity.status === "conflict") {
+    return {
+      exitCode: ExitCode.INGEST_VALIDATION_FAILED,
+      result: err("INGEST_VALIDATION_FAILED", {
+        message: "source identity conflict",
+        raw_path: rawRelPath,
+        source_url: sourceUrl,
+        reasons: identity.reasons,
+        pathSignals: identity.pathSignals,
+        sourceSignals: identity.sourceSignals,
+        bodySignals: identity.bodySignals
+      })
+    };
+  }
   const rawContent = buildRawContent(sourceUrl, today, sha256, sourceContent);
   const typedContent = buildTypedContent(
     input.title,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "skillwiki",
-  "version": "0.8.2",
+  "version": "0.8.3-beta.2",
   "type": "module",
   "bin": {
     "skillwiki": "dist/cli.js"

package/skills/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "skillwiki",
-  "version": "0.8.2",
+  "version": "0.8.3-beta.2",
   "skills": "./",
   "description": "Project-aware Karpathy-style knowledge base for Claude Code: 18 prompt-only skills (wiki-*, proj-*, using-skillwiki) backed by the deterministic `skillwiki` CLI.",
   "author": {

package/skills/.codex-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "skillwiki",
-  "version": "0.8.2",
+  "version": "0.8.3-beta.2",
   "description": "Project-aware Karpathy-style knowledge base for Codex with 18 prompt-only skills backed by the deterministic skillwiki CLI.",
   "author": {
     "name": "karlorz",

package/skills/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@skillwiki/skills",
-  "version": "0.8.2",
+  "version": "0.8.3-beta.2",
   "private": true,
   "files": [
     "wiki-*",

package/skills/skills/wiki-ingest/SKILL.md CHANGED Viewed

@@ -18,13 +18,14 @@ Run `skillwiki lang` at the start. Generate page-body prose, narrative sections,
 0. **Resolve vault and language.** Run `skillwiki path` (fail if NO_VAULT_CONFIGURED) and `skillwiki lang`. Use the resolved vault path for all writes; use the canonical language for all generated prose.
 1. **Guard.** For each URL: run `skillwiki fetch-guard <url>`. If exit ≠ 0, STOP and surface the error. Do not retry.
 2. **Fetch.** Use `web_fetch` (or read local file) under Layer 2 controls (the CLI Layer 2 fetcher applies in tests; in skill runtime use `web_fetch` directly and treat any error as STOP).
-3. **Hash.** Write the raw file (frontmatter + body). Run `skillwiki hash <raw-file>` and embed the result in raw frontmatter `sha256:`.
-4. **Generate page(s).** Compose typed-knowledge page(s) with citations pre-attached (`^[raw/...]` markers). Every page MUST include:
+3. **Identity guard.** Before writing raw files, ensure the target raw filename/title, `source_url`, fetched H1/title, and early body subject agree. If `skillwiki ingest` reports `INGEST_VALIDATION_FAILED` with `source identity conflict`, STOP. Do not fix by renaming after the fact; choose the correct title/source pair or ask the user.
+4. **Hash.** Write the raw file (frontmatter + body). Run `skillwiki hash <raw-file>` and embed the result in raw frontmatter `sha256:`.
+5. **Generate page(s).** Compose typed-knowledge page(s) with citations pre-attached (`^[raw/...]` markers). Every page MUST include:
 - `> **TL;DR:**` blockquote as the first content after the title heading — a one-sentence summary of the page's key takeaway (under 200 chars). See SCHEMA.md `## TL;DR Convention`.
 - For pages tagged `architecture` or explaining workflows/systems: include a Mermaid diagram (`graph TB` or `sequenceDiagram`) in the body. Follow Obsidian-compatible Mermaid rules (see SCHEMA.md `## Mermaid Diagrams`).
-5. **Validate.** For each generated page: run `skillwiki validate <page>`. If exit ≠ 0, STOP — do not write index/log.
-6. **Apply writes in order.** raw → page(s) → `index.md` → `log.md`.
-7. **Confidence flag.** If only one source is cited, set `confidence: low`.
+6. **Validate.** For each generated page: run `skillwiki validate <page>`. If exit ≠ 0, STOP — do not write index/log.
+7. **Apply writes in order.** raw → page(s) → `index.md` → `log.md`.
+8. **Confidence flag.** If only one source is cited, set `confidence: low`.
 ## Provenance defaults
 - Default `provenance: research`.
 - If cwd is inside `projects/{slug}/`, set `provenance: project` and add `provenance_projects: ["[[slug]]"]`.
@@ -36,6 +37,7 @@ Raw ephemeral data (market feeds, logs, transient JSON) must be written to the *
 ## Stop conditions
 - `fetch-guard` non-zero.
 - Fetch timeout / size limit exceeded.
+- `INGEST_VALIDATION_FAILED` with `source identity conflict`.
 - `validate` non-zero on any page.
 - sha256 already exists in vault for the same source.
 ## Forbidden
@@ -46,7 +48,7 @@ Raw ephemeral data (market feeds, logs, transient JSON) must be written to the *
 - Writing `[[wikilinks]]` to pages that don't exist in the vault. Before linking, verify the target exists: check `index.md` or `ls` the target directory. If the target doesn't exist yet, use plain text instead of a wikilink.
 ## Batch Mode
 When the user provides multiple sources (a directory of files, a list of URLs, or a multi-document input):
-1. **Loop per source.** Execute steps 1–5 for each source individually (guard → fetch → hash → generate → validate).
+1. **Loop per source.** Execute steps 1–6 for each source individually (guard → fetch → identity guard → hash → generate → validate).
 2. **Accumulate, don't write yet.** Collect all raw files and pages in memory. Do not write `index.md` or `log.md` until every source has validated.
 3. **Fail fast.** If any page fails validation, STOP. Report all failures. Do not write index/log for any source.
 4. **Deduplication.** Before writing each raw file, check `sha256` against existing vault raw sources. Skip sources whose content is already present.

package/skills/skills/wiki-reingest/SKILL.md CHANGED Viewed

@@ -25,6 +25,7 @@ Standard four reads (SCHEMA, index, log, project context if applicable).
 1. Run `skillwiki drift [vault]`. Read the JSON output.
 2. Present findings grouped by status:
    - **drifted:** Source content has changed. Show stored vs current sha256.
+   - **identity_conflicts:** The fetched source no longer matches the raw filename/source identity. STOP and surface the conflict. Do not archive or reingest until a human chooses the correct source/filename pair.
    - **fetch_failed:** Could not re-fetch. Show error details.
    - **unchanged:** No action needed.
 3. For each drifted source, ask the user: archive old + ingest new, or skip?
@@ -51,4 +52,5 @@ Raw files are immutable (N9). Re-ingest never modifies an existing raw file. Ins
 - Modifying files in `raw/` directly (N9).
 - Re-ingesting without user approval for each drifted source.
+- Re-ingesting a source listed under `identity_conflicts` without explicit user approval and a corrected target filename/source URL.
 - Skipping the drift check and assuming sources have changed.

package/skills/wiki-ingest/SKILL.md CHANGED Viewed

@@ -18,13 +18,14 @@ Run `skillwiki lang` at the start. Generate page-body prose, narrative sections,
 0. **Resolve vault and language.** Run `skillwiki path` (fail if NO_VAULT_CONFIGURED) and `skillwiki lang`. Use the resolved vault path for all writes; use the canonical language for all generated prose.
 1. **Guard.** For each URL: run `skillwiki fetch-guard <url>`. If exit ≠ 0, STOP and surface the error. Do not retry.
 2. **Fetch.** Use `web_fetch` (or read local file) under Layer 2 controls (the CLI Layer 2 fetcher applies in tests; in skill runtime use `web_fetch` directly and treat any error as STOP).
-3. **Hash.** Write the raw file (frontmatter + body). Run `skillwiki hash <raw-file>` and embed the result in raw frontmatter `sha256:`.
-4. **Generate page(s).** Compose typed-knowledge page(s) with citations pre-attached (`^[raw/...]` markers). Every page MUST include:
+3. **Identity guard.** Before writing raw files, ensure the target raw filename/title, `source_url`, fetched H1/title, and early body subject agree. If `skillwiki ingest` reports `INGEST_VALIDATION_FAILED` with `source identity conflict`, STOP. Do not fix by renaming after the fact; choose the correct title/source pair or ask the user.
+4. **Hash.** Write the raw file (frontmatter + body). Run `skillwiki hash <raw-file>` and embed the result in raw frontmatter `sha256:`.
+5. **Generate page(s).** Compose typed-knowledge page(s) with citations pre-attached (`^[raw/...]` markers). Every page MUST include:
 - `> **TL;DR:**` blockquote as the first content after the title heading — a one-sentence summary of the page's key takeaway (under 200 chars). See SCHEMA.md `## TL;DR Convention`.
 - For pages tagged `architecture` or explaining workflows/systems: include a Mermaid diagram (`graph TB` or `sequenceDiagram`) in the body. Follow Obsidian-compatible Mermaid rules (see SCHEMA.md `## Mermaid Diagrams`).
-5. **Validate.** For each generated page: run `skillwiki validate <page>`. If exit ≠ 0, STOP — do not write index/log.
-6. **Apply writes in order.** raw → page(s) → `index.md` → `log.md`.
-7. **Confidence flag.** If only one source is cited, set `confidence: low`.
+6. **Validate.** For each generated page: run `skillwiki validate <page>`. If exit ≠ 0, STOP — do not write index/log.
+7. **Apply writes in order.** raw → page(s) → `index.md` → `log.md`.
+8. **Confidence flag.** If only one source is cited, set `confidence: low`.
 ## Provenance defaults
 - Default `provenance: research`.
 - If cwd is inside `projects/{slug}/`, set `provenance: project` and add `provenance_projects: ["[[slug]]"]`.
@@ -36,6 +37,7 @@ Raw ephemeral data (market feeds, logs, transient JSON) must be written to the *
 ## Stop conditions
 - `fetch-guard` non-zero.
 - Fetch timeout / size limit exceeded.
+- `INGEST_VALIDATION_FAILED` with `source identity conflict`.
 - `validate` non-zero on any page.
 - sha256 already exists in vault for the same source.
 ## Forbidden
@@ -46,7 +48,7 @@ Raw ephemeral data (market feeds, logs, transient JSON) must be written to the *
 - Writing `[[wikilinks]]` to pages that don't exist in the vault. Before linking, verify the target exists: check `index.md` or `ls` the target directory. If the target doesn't exist yet, use plain text instead of a wikilink.
 ## Batch Mode
 When the user provides multiple sources (a directory of files, a list of URLs, or a multi-document input):
-1. **Loop per source.** Execute steps 1–5 for each source individually (guard → fetch → hash → generate → validate).
+1. **Loop per source.** Execute steps 1–6 for each source individually (guard → fetch → identity guard → hash → generate → validate).
 2. **Accumulate, don't write yet.** Collect all raw files and pages in memory. Do not write `index.md` or `log.md` until every source has validated.
 3. **Fail fast.** If any page fails validation, STOP. Report all failures. Do not write index/log for any source.
 4. **Deduplication.** Before writing each raw file, check `sha256` against existing vault raw sources. Skip sources whose content is already present.

package/skills/wiki-reingest/SKILL.md CHANGED Viewed

@@ -25,6 +25,7 @@ Standard four reads (SCHEMA, index, log, project context if applicable).
 1. Run `skillwiki drift [vault]`. Read the JSON output.
 2. Present findings grouped by status:
    - **drifted:** Source content has changed. Show stored vs current sha256.
+   - **identity_conflicts:** The fetched source no longer matches the raw filename/source identity. STOP and surface the conflict. Do not archive or reingest until a human chooses the correct source/filename pair.
    - **fetch_failed:** Could not re-fetch. Show error details.
    - **unchanged:** No action needed.
 3. For each drifted source, ask the user: archive old + ingest new, or skip?
@@ -51,4 +52,5 @@ Raw files are immutable (N9). Re-ingest never modifies an existing raw file. Ins
 - Modifying files in `raw/` directly (N9).
 - Re-ingesting without user approval for each drifted source.
+- Re-ingesting a source listed under `identity_conflicts` without explicit user approval and a corrected target filename/source URL.
 - Skipping the drift check and assuming sources have changed.