npm - sdtk-wiki-kit - Versions diffs - 0.1.0 → 0.1.1 - Mend

sdtk-wiki-kit 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +87 -11
package/assets/atlas/build_atlas.py +164 -79
package/package.json +1 -1
package/src/commands/help.js +10 -3
package/src/commands/lint.js +2 -1
package/src/commands/search.js +88 -0
package/src/commands/wiki.js +83 -9
package/src/index.js +4 -1
package/src/lib/wiki-compile.js +694 -6
package/src/lib/wiki-extract.js +637 -0
package/src/lib/wiki-flags.js +8 -0
package/src/lib/wiki-lint.js +179 -2
package/src/lib/wiki-search.js +175 -0

package/src/lib/wiki-lint.js CHANGED Viewed

@@ -8,6 +8,7 @@ const {
   getWikiGraphPath,
   getWikiPagesPath,
   getWikiProvenanceSourcesPath,
+  getWikiRawSourcesPath,
   getWikiReportsPath,
   getWikiWorkspacePath,
   isPathInsideOrEqual,
@@ -38,10 +39,11 @@ const CATEGORY_DEFS = [
   ["stale", "Stale pages"],
   ["markers", "TODO/Open Questions/Gaps"],
   ["contradictions", "Candidate contradictions"],
+  ["sourceQuality", "Source quality"],
 ];
 function toPosix(value) {
-  return value.split(path.sep).join("/");
+  return String(value || "").replace(/\\/g, "/");
 }
 function stripQuotes(value) {
@@ -171,19 +173,192 @@ function readLintInputs(projectPath, findings) {
   const pagesRoot = getWikiPagesPath(projectPath);
   const pageFiles = listMarkdownPages(pagesRoot);
   const provenance = readJsonIfPresent(getWikiProvenanceSourcesPath(projectPath), { sources: [] });
+  const raw = readJsonIfPresent(getWikiRawSourcesPath(projectPath), { sources: [] });
   const graph = readJsonIfPresent(path.join(getWikiGraphPath(projectPath), "SDTK_DOC_GRAPH.json"), {
     edges: [],
   });
+  const graphIndex = readJsonIfPresent(path.join(getWikiGraphPath(projectPath), "SDTK_DOC_INDEX.json"), {
+    documents: [],
+  });
   if (provenance && provenance.__lintError) {
     appendFinding(findings, "provenance", provenance.__lintError);
   }
+  if (raw && raw.__lintError) {
+    appendFinding(findings, "sourceQuality", raw.__lintError);
+  }
   if (graph && graph.__lintError) {
     appendFinding(findings, "downstream", graph.__lintError);
   }
+  if (graphIndex && graphIndex.__lintError) {
+    appendFinding(findings, "sourceQuality", graphIndex.__lintError);
+  }
   const sources = provenance && Array.isArray(provenance.sources) ? provenance.sources : [];
-  return { graph, pageFiles, pagesRoot, sources };
+  const rawSources = raw && Array.isArray(raw.sources) ? raw.sources : [];
+  const graphDocuments = graphIndex && Array.isArray(graphIndex.documents) ? graphIndex.documents : [];
+  return { graph, graphDocuments, pageFiles, pagesRoot, rawSources, sources };
+}
+function normalizeSourcePath(value) {
+  return toPosix(value).replace(/^\.\//, "");
+}
+function sourceRecordPath(record) {
+  if (!record || typeof record !== "object") return "";
+  return normalizeSourcePath(record.sourcePath || record.path || record.id || "");
+}
+function resolveSourceFilePath(projectPath, sourcePath) {
+  if (!sourcePath) return null;
+  const nativePath = sourcePath.replace(/\//g, path.sep);
+  const candidate = path.isAbsolute(nativePath) ? path.resolve(nativePath) : path.resolve(projectPath, nativePath);
+  if (!fs.existsSync(candidate) || !fs.statSync(candidate).isFile()) return null;
+  return candidate;
+}
+function extractGithubRepos(text) {
+  const repos = [];
+  const seen = new Set();
+  const matcher = /(?:https?:\/\/)?(?:www\.)?github\.com\/([A-Za-z0-9](?:[A-Za-z0-9-]{0,38}))\/([A-Za-z0-9._-]+)/gi;
+  let match;
+  while ((match = matcher.exec(String(text || ""))) !== null) {
+    const owner = match[1];
+    const repo = match[2].replace(/[).,;:]+$/g, "").replace(/\.git$/i, "");
+    if (!repo || repo.includes("...")) continue;
+    const url = `https://github.com/${owner}/${repo}`;
+    const key = url.toLowerCase();
+    if (seen.has(key)) continue;
+    seen.add(key);
+    repos.push({ owner, repo, url });
+  }
+  return repos;
+}
+function detectMojibakeExamples(text) {
+  const examples = [];
+  const matcher = /�|Ã.|Â.|â€|ðŸ/gi;
+  let match;
+  while ((match = matcher.exec(String(text || ""))) !== null) {
+    const start = Math.max(0, match.index - 20);
+    const end = Math.min(text.length, match.index + 40);
+    examples.push(text.slice(start, end).replace(/\s+/g, " ").trim());
+    if (examples.length >= 3) break;
+  }
+  return examples;
+}
+function weakTitle(title, sourcePath) {
+  const text = String(title || "").trim();
+  const stem = path.basename(sourcePath || "", path.extname(sourcePath || ""));
+  return (
+    text.length < 6 ||
+    /^untitled|note|readme$/i.test(text) ||
+    text === stem.replace(/[_-]+/g, " ")
+  );
+}
+function analyzeSourceQuality(projectPath, inputs, findings) {
+  const provenancePaths = new Set(inputs.sources.map(sourceRecordPath).filter(Boolean));
+  const rawPaths = new Set(inputs.rawSources.map(sourceRecordPath).filter(Boolean));
+  const graphPaths = new Set(
+    inputs.graphDocuments
+      .map((record) => normalizeSourcePath(record && (record.id || record.path)))
+      .filter(Boolean)
+  );
+  const repoToSources = new Map();
+  const urlToSources = new Map();
+  for (const sourcePath of rawPaths) {
+    if (!provenancePaths.has(sourcePath)) {
+      appendFinding(
+        findings,
+        "sourceQuality",
+        `Raw source \`${sourcePath}\` is registered but absent from graph/provenance source coverage.`
+      );
+    }
+  }
+  for (const sourcePath of provenancePaths) {
+    if (graphPaths.size > 0 && !graphPaths.has(sourcePath)) {
+      appendFinding(
+        findings,
+        "sourceQuality",
+        `Provenance source \`${sourcePath}\` is absent from graph document index.`
+      );
+    }
+  }
+  for (const record of inputs.sources) {
+    const sourcePath = sourceRecordPath(record);
+    const resolved = resolveSourceFilePath(projectPath, sourcePath);
+    const title = String((record && record.title) || "");
+    if (!resolved) {
+      appendFinding(
+        findings,
+        "sourceQuality",
+        `Source \`${sourcePath || "(missing)"}\` could not be read for source-quality lint.`
+      );
+      continue;
+    }
+    const text = fs.readFileSync(resolved, "utf-8");
+    const repos = extractGithubRepos(text);
+    const mojibakeExamples = detectMojibakeExamples(text);
+    if (mojibakeExamples.length > 0) {
+      appendFinding(
+        findings,
+        "sourceQuality",
+        `Source \`${sourcePath}\` has mojibake-like text examples: ${mojibakeExamples.map((item) => `\`${item}\``).join("; ")}.`
+      );
+    }
+    if (repos.length === 0) {
+      appendFinding(findings, "sourceQuality", `Source \`${sourcePath}\` has no detected GitHub/source URL.`);
+    }
+    if (weakTitle(title, sourcePath)) {
+      appendFinding(findings, "sourceQuality", `Source \`${sourcePath}\` has a weak or filename-derived title \`${title || "(missing)"}\`.`);
+    }
+    if (repos.length === 0) {
+      appendFinding(findings, "sourceQuality", `Source \`${sourcePath}\` has low-confidence extraction because no valid GitHub repo candidate was detected.`);
+    }
+    if (repos.length > 0) {
+      const sourceUrl = repos[0].url;
+      const existing = urlToSources.get(sourceUrl.toLowerCase()) || [];
+      existing.push(sourcePath);
+      urlToSources.set(sourceUrl.toLowerCase(), existing);
+    }
+    for (const repo of repos) {
+      const existing = repoToSources.get(repo.url.toLowerCase()) || [];
+      existing.push(sourcePath);
+      repoToSources.set(repo.url.toLowerCase(), existing);
+    }
+  }
+  for (const [url, paths] of urlToSources.entries()) {
+    if (paths.length > 1) {
+      appendFinding(
+        findings,
+        "sourceQuality",
+        `Duplicate source URL candidate \`${url}\` appears in ${paths.map((item) => `\`${item}\``).join(", ")}.`
+      );
+    }
+  }
+  for (const [repo, paths] of repoToSources.entries()) {
+    if (paths.length > 1) {
+      appendFinding(
+        findings,
+        "sourceQuality",
+        `Duplicate GitHub repo candidate \`${repo}\` appears in ${paths.map((item) => `\`${item}\``).join(", ")}.`
+      );
+    }
+  }
 }
 function analyzePages(projectPath) {
@@ -373,6 +548,8 @@ function analyzePages(projectPath) {
     }
   }
+  analyzeSourceQuality(projectPath, inputs, findings);
   return { findings, pageCount: pages.length };
 }

package/src/lib/wiki-search.js ADDED Viewed

@@ -0,0 +1,175 @@
+"use strict";
+const fs = require("fs");
+const path = require("path");
+const { ValidationError } = require("./errors");
+const {
+  getWikiWorkspacePath,
+  isPathInsideOrEqual,
+  resolveProjectPath,
+} = require("./wiki-paths");
+const DEFAULT_LIMIT = 10;
+const PERSONAL_BRAIN_RELATIVE = path.join(".sdtk", "wiki", "personal-brain");
+function toPosix(value) {
+  return String(value || "").replace(/\\/g, "/");
+}
+function normalizeText(value) {
+  return String(value || "").toLowerCase();
+}
+function tokenize(query) {
+  return normalizeText(query)
+    .split(/[^a-z0-9\u00c0-\u1ef9_]+/i)
+    .map((part) => part.trim())
+    .filter((part) => part.length >= 2);
+}
+function collectMarkdownFiles(rootPath) {
+  const files = [];
+  function visit(current) {
+    const stat = fs.statSync(current);
+    if (stat.isDirectory()) {
+      for (const child of fs.readdirSync(current).sort()) {
+        visit(path.join(current, child));
+      }
+      return;
+    }
+    if (stat.isFile() && /\.md(?:arkdown)?$/i.test(current)) {
+      files.push(current);
+    }
+  }
+  visit(rootPath);
+  return files.sort((a, b) => toPosix(a).localeCompare(toPosix(b)));
+}
+function extractTitle(text, filePath) {
+  const heading = text.match(/^#\s+(.+?)\s*$/m);
+  if (heading) return heading[1].trim();
+  return path.basename(filePath, path.extname(filePath)).replace(/[-_]+/g, " ").trim();
+}
+function snippetFor(text, query, tokens) {
+  const lower = normalizeText(text);
+  const phrase = normalizeText(query);
+  let index = phrase ? lower.indexOf(phrase) : -1;
+  if (index < 0) {
+    for (const token of tokens) {
+      index = lower.indexOf(token);
+      if (index >= 0) break;
+    }
+  }
+  if (index < 0) {
+    return text.replace(/\s+/g, " ").trim().slice(0, 180);
+  }
+  const start = Math.max(0, index - 80);
+  const end = Math.min(text.length, index + 180);
+  return text.slice(start, end).replace(/\s+/g, " ").trim();
+}
+function scoreFile({ text, title, relativePath, query, tokens }) {
+  const lowerText = normalizeText(text);
+  const lowerTitle = normalizeText(title);
+  const lowerPath = normalizeText(relativePath);
+  const phrase = normalizeText(query);
+  const reasons = [];
+  let score = 0;
+  if (phrase && lowerText.includes(phrase)) {
+    score += 50;
+    reasons.push("exact phrase match in page content");
+  }
+  if (phrase && lowerTitle.includes(phrase)) {
+    score += 30;
+    reasons.push("exact phrase match in title");
+  }
+  if (phrase && lowerPath.includes(phrase)) {
+    score += 20;
+    reasons.push("exact phrase match in path");
+  }
+  let matchedTokens = 0;
+  for (const token of tokens) {
+    const inText = lowerText.includes(token);
+    const inTitle = lowerTitle.includes(token);
+    const inPath = lowerPath.includes(token);
+    if (inText || inTitle || inPath) {
+      matchedTokens += 1;
+      score += inTitle ? 12 : inPath ? 8 : 5;
+    }
+  }
+  if (matchedTokens > 0) {
+    reasons.push(`matched ${matchedTokens}/${tokens.length} query token(s)`);
+  }
+  return { score, reasons };
+}
+function runWikiSearch({ projectPath, query, limit = DEFAULT_LIMIT }) {
+  const resolvedProjectPath = resolveProjectPath(projectPath || process.cwd());
+  if (!fs.existsSync(resolvedProjectPath) || !fs.statSync(resolvedProjectPath).isDirectory()) {
+    throw new ValidationError(`--project-path is not a valid directory: ${resolvedProjectPath}`);
+  }
+  const normalizedQuery = String(query || "").trim();
+  if (!normalizedQuery) {
+    throw new ValidationError('sdtk-wiki search requires a query, for example: sdtk-wiki search --project-path <path> "multi-agent".');
+  }
+  const parsedLimit = Number.parseInt(limit, 10);
+  const safeLimit = Number.isFinite(parsedLimit) && parsedLimit > 0 ? Math.min(parsedLimit, 50) : DEFAULT_LIMIT;
+  const personalBrainPath = path.join(getWikiWorkspacePath(resolvedProjectPath), "personal-brain");
+  if (!isPathInsideOrEqual(personalBrainPath, resolvedProjectPath)) {
+    throw new ValidationError("Refusing to search outside the project root.");
+  }
+  if (!fs.existsSync(personalBrainPath) || !fs.statSync(personalBrainPath).isDirectory()) {
+    throw new ValidationError(
+      `No SDTK-WIKI personal brain found at ${personalBrainPath}. Run extract, compile dry-run, and compile --apply --yes from the generated JSON sidecar first.`
+    );
+  }
+  const tokens = tokenize(normalizedQuery);
+  const files = collectMarkdownFiles(personalBrainPath);
+  const matches = [];
+  for (const filePath of files) {
+    const text = fs.readFileSync(filePath, "utf-8");
+    const relativePath = toPosix(path.relative(resolvedProjectPath, filePath));
+    const title = extractTitle(text, filePath);
+    const scored = scoreFile({ text, title, relativePath, query: normalizedQuery, tokens });
+    if (scored.score <= 0) continue;
+    matches.push({
+      path: relativePath,
+      title,
+      score: scored.score,
+      why: scored.reasons.join("; "),
+      snippet: snippetFor(text, normalizedQuery, tokens),
+    });
+  }
+  matches.sort((a, b) => {
+    if (b.score !== a.score) return b.score - a.score;
+    return a.path.localeCompare(b.path);
+  });
+  return {
+    query: normalizedQuery,
+    projectPath: resolvedProjectPath,
+    personalBrainPath,
+    scannedFiles: files.length,
+    matches: matches.slice(0, safeLimit),
+    totalMatches: matches.length,
+    limit: safeLimit,
+    searchMode: "local_deterministic_personal_brain_markdown",
+    premiumRequired: false,
+    mutated: false,
+  };
+}
+module.exports = {
+  PERSONAL_BRAIN_RELATIVE,
+  runWikiSearch,
+  tokenize,
+};