npm - @doquflow/server - Versions diffs - 1.5.2 → 1.7.0 - Mend

@doquflow/server 1.5.2 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/category-dir.js +12 -0
package/dist/extractor-rules.js +166 -0
package/dist/extractor-stoplist.js +89 -0
package/dist/tools/ingest-source.js +37 -17
package/dist/tools/save-answer-as-page.js +7 -18
package/package.json +5 -3

package/dist/category-dir.js ADDED Viewed

@@ -0,0 +1,12 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.categoryDir = categoryDir;
+const CATEGORY_PLURAL = {
+    entity: "entities",
+    concept: "concepts",
+    timeline: "timelines",
+    synthesis: "syntheses",
+};
+function categoryDir(category) {
+    return CATEGORY_PLURAL[category];
+}

package/dist/extractor-rules.js ADDED Viewed

@@ -0,0 +1,166 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.passesEntityRules = passesEntityRules;
+const extractor_stoplist_1 = require("./extractor-stoplist");
+/**
+ * Rule 1 — Stop-list rejection (fast path)
+ * Candidate is rejected when:
+ *   (a) the whole normalized phrase is on the stop-list, or
+ *   (b) it is a single word on the stop-list, or
+ *   (c) it is a multi-word phrase whose words are *all* on the stop-list
+ *       (e.g. "key components" — both "key" and "components" are generic).
+ */
+function ruleStopList(c) {
+    const normalized = c.name
+        .trim()
+        .toLowerCase()
+        .replace(/[^a-z0-9 ]/g, " ")
+        .replace(/\s+/g, " ")
+        .trim();
+    if (!normalized)
+        return { ok: true };
+    if (extractor_stoplist_1.ENTITY_STOPLIST.has(normalized)) {
+        return { ok: false, reason: `"${c.name}" is a generic stop-list term` };
+    }
+    const words = normalized.split(/\s+/).filter(Boolean);
+    if (words.length === 1 && extractor_stoplist_1.ENTITY_STOPLIST.has(words[0])) {
+        return { ok: false, reason: `"${c.name}" is a generic stop-list term` };
+    }
+    if (words.length > 1 && words.every((w) => extractor_stoplist_1.ENTITY_STOPLIST.has(w))) {
+        return { ok: false, reason: `"${c.name}" contains only generic stop-list terms` };
+    }
+    return { ok: true };
+}
+/**
+ * Rule 2 — No emoji-only or punctuation-only slugs (fast path)
+ * Strip emoji; reject if resulting slug is empty or only underscores/dashes.
+ */
+function ruleNoEmojiOrPunctSlug(c) {
+    const stripped = c.name
+        .replace(/[\p{Emoji}\p{Emoji_Modifier}\p{Emoji_Component}]/gu, "")
+        .replace(/[^a-z0-9]/gi, "_")
+        .toLowerCase();
+    if (!stripped || /^[_\-]+$/.test(stripped)) {
+        return { ok: false, reason: `"${c.name}" produces an empty or punctuation-only slug` };
+    }
+    return { ok: true };
+}
+/**
+ * Rule 3 — Structural anchor
+ * Bold-text candidates are only accepted when they sit inside a meaningful
+ * structural paragraph (not a bare bullet point with no prose).
+ * Heading candidates always pass this rule.
+ */
+function ruleStructuralAnchor(c) {
+    if (c.source === "heading")
+        return { ok: true };
+    const stripped = c.context.replace(/^\s*[-*+]\s+/, "").trim();
+    const wordCount = stripped.split(/\s+/).filter(Boolean).length;
+    if (wordCount < 4) {
+        return { ok: false, reason: "bold text in bare bullet with no sentence context" };
+    }
+    return { ok: true };
+}
+/**
+ * Rule 4 — Minimum token signal
+ * Must be ≥2 words, OR ≥1 word with a non-sentence-start capital (camelCase/PascalCase),
+ * OR contain a code-like separator (_, -, (), ::, .).
+ * Heading candidates are exempt — structural position grants authority.
+ */
+function ruleMinimumTokenSignal(c) {
+    if (c.source === "heading")
+        return { ok: true };
+    const name = c.name.trim();
+    const words = name.split(/\s+/).filter(Boolean);
+    if (words.length >= 2)
+        return { ok: true };
+    const word = words[0] ?? "";
+    const hasInternalCap = /(?<!^)[A-Z]/.test(word);
+    const hasCodeSeparator = /[_\-().::]/.test(word);
+    if (hasInternalCap || hasCodeSeparator)
+        return { ok: true };
+    return { ok: false, reason: "single generic word with no distinguishing signal" };
+}
+/**
+ * Rule 5 — Context requirement
+ * Bold text must have at least 1 sentence of real context (≥6 words).
+ * Heading candidates are exempt.
+ */
+function ruleContextRequirement(c) {
+    if (c.source === "heading")
+        return { ok: true };
+    const sentences = c.context.split(/[.!?]+/).filter((s) => s.trim().split(/\s+/).length >= 6);
+    if (sentences.length === 0) {
+        return { ok: false, reason: "no surrounding sentence context (≥6 words) found" };
+    }
+    return { ok: true };
+}
+/**
+ * Rule 6 — Section-heading noise patterns
+ * Reject candidates whose surface form matches well-known structural noise:
+ * numbered list items ("1. Foo"), file references ("foo.md"), question-form
+ * headings ("What is X"), preposition-led phrases ("For X"), layer/phase
+ * markers ("Layer 1"), and full sentences captured as entities ("X is a Y").
+ */
+function ruleSectionHeadingNoise(c) {
+    const name = c.name.trim();
+    // Numbered list items: "1. Foo", "2) Bar"
+    if (/^\d+[.)]\s/.test(name)) {
+        return { ok: false, reason: `"${name}" is a numbered list item, not an entity` };
+    }
+    // File references masquerading as entities
+    if (/\.(md|ts|tsx|js|jsx|json|ya?ml|css|scss|sh|py|go|rb)$/i.test(name)) {
+        return { ok: false, reason: `"${name}" looks like a file reference, not an entity` };
+    }
+    // Question-form headings
+    if (/^(what|how|why|where|when|who|which)\s/i.test(name)) {
+        return { ok: false, reason: `"${name}" is a question-form section heading` };
+    }
+    // Preposition-led phrases (common bullet-list openings)
+    if (/^(for|with|by|to|in|on|at|of|from|about)\s/i.test(name)) {
+        return { ok: false, reason: `"${name}" starts with a preposition (section-heading form)` };
+    }
+    // Layer / phase / step markers
+    if (/^(layer|phase|step|chapter|part|section)\s+\d/i.test(name)) {
+        return { ok: false, reason: `"${name}" is a layer/phase marker, not an entity` };
+    }
+    // Sentence-form: "X is a Y", "X is not a Y" — full sentences captured as entities
+    if (/\bis\s+(not\s+)?(a|an|the)\s+/i.test(name)) {
+        return { ok: false, reason: `"${name}" looks like a sentence, not an entity name` };
+    }
+    // Starts with emoji-as-decoration (e.g. "❌ Anti-pattern", "🔧 Storage", "✅ Allowed").
+    // These are section dividers / status markers, not entity names.
+    if (/^[\p{Emoji_Presentation}\p{Extended_Pictographic}]/u.test(name)) {
+        return { ok: false, reason: `"${name}" starts with an emoji (heading decoration, not an entity)` };
+    }
+    // "The X" pattern — descriptive references, not entity names
+    if (/^the\s+/i.test(name)) {
+        return { ok: false, reason: `"${name}" starts with "the " (descriptive reference, not an entity)` };
+    }
+    // Date strings captured as entities ("Date: 2026-04-27", "2026-04-27")
+    if (/\b\d{4}[-/_]\d{1,2}[-/_]\d{1,2}\b/.test(name)) {
+        return { ok: false, reason: `"${name}" contains a date (metadata, not an entity)` };
+    }
+    return { ok: true };
+}
+const RULES = [
+    ruleStopList,
+    ruleNoEmojiOrPunctSlug,
+    ruleSectionHeadingNoise,
+    ruleStructuralAnchor,
+    ruleMinimumTokenSignal,
+    ruleContextRequirement,
+];
+/**
+ * Run all entity quality rules against a candidate.
+ * Returns { ok: true } if the candidate passes all rules,
+ * or { ok: false, reason } for the first rule that rejects it.
+ */
+function passesEntityRules(candidate) {
+    for (const rule of RULES) {
+        const result = rule(candidate);
+        if (!result.ok)
+            return result;
+    }
+    return { ok: true };
+}

package/dist/extractor-stoplist.js ADDED Viewed

@@ -0,0 +1,89 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.ENTITY_STOPLIST = void 0;
+/**
+ * Stop-list for wiki entity extraction.
+ * Any candidate whose normalized form appears here is rejected as a generic term
+ * that adds noise without semantic value.
+ *
+ * Applied as single-word match AND whole-phrase match. Multi-word phrases
+ * where every word is on this list are also rejected (see ruleStopList).
+ */
+exports.ENTITY_STOPLIST = new Set([
+    // Generic quantifiers / determiners
+    "any", "all", "some", "each", "every", "one", "two", "three", "four", "five",
+    "none", "both", "either", "other", "another", "several", "many", "few",
+    // Conjunctions / prepositions / filler / connectors
+    "the", "and", "or", "for", "with", "from", "into", "onto", "over", "under",
+    "then", "when", "while", "where", "which", "what", "who", "why", "how",
+    "that", "this", "these", "those", "here", "there", "of", "in", "on", "at",
+    "to", "by", "as", "is", "are", "was", "were", "be", "been", "being",
+    // Modal / auxiliary verbs
+    "can", "will", "shall", "would", "could", "should", "must", "may", "might",
+    "also", "just", "only", "even", "already", "still", "yet", "more",
+    // Boolean / status tokens
+    "true", "false", "yes", "no", "ok", "done", "todo",
+    // Common vague action words
+    "add", "added", "remove", "removed", "update", "updated",
+    "change", "changed", "fix", "fixed", "use", "used", "get", "set",
+    "allow", "allowed", "block", "blocked", "enable", "enabled", "disable", "disabled",
+    "create", "created", "build", "built", "run", "ran", "make", "made",
+    "show", "shows", "display", "displayed", "include", "included",
+    // Generic noun noise
+    "active", "audience", "behavior", "behaviour", "benefits", "binary",
+    "append", "approval", "audit", "auditable",
+    "note", "notes", "type", "types", "item", "items", "list", "value", "values",
+    "option", "options", "example", "examples", "result", "results",
+    "output", "outputs", "input", "inputs", "data", "info", "information",
+    "step", "steps", "section", "sections", "part", "parts", "content",
+    // Section-heading boilerplate (single words)
+    "architecture", "overview", "purpose", "documentation", "installation",
+    "commands", "components", "configuration", "management", "registration",
+    "views", "view", "iteration", "iterations",
+    "summary", "introduction", "background", "context", "scope", "rationale",
+    "goals", "goal", "motivation", "definition", "definitions", "terminology",
+    "implementation", "design", "approach", "methodology", "method", "methods",
+    "usage", "setup", "tutorial", "guide", "guides", "reference",
+    "requirements", "dependencies", "prerequisites", "features", "feature",
+    "categories", "category", "kind", "kinds",
+    "workflow", "workflows", "pipeline", "pipelines",
+    "system", "systems", "module", "modules", "package", "packages",
+    "service", "services", "interface", "interfaces", "layer", "layers",
+    "support", "supports", "supported", "available", "current",
+    // Common section adjectives (combined with stop nouns → all-stop phrase rejected)
+    "key", "core", "main", "primary", "secondary", "general", "basic", "advanced",
+    "common", "typical", "standard", "default", "custom", "specific", "generic",
+    "new", "old", "next", "previous", "first", "last", "final", "initial",
+    // Doc-structure terms
+    "title", "abstract", "outline", "table", "figure", "diagram", "appendix",
+    "footnote", "footnotes", "see", "above", "below",
+    // Common project section phrases (multi-word, exact match)
+    "getting started", "next steps", "key components", "key commands",
+    "core modules", "core components", "core types", "tech stack",
+    "technology stack", "use case", "use cases", "see also", "edge cases",
+    "acceptance criteria", "test scenarios", "testing results",
+    "implementation status", "implementation steps", "files to touch",
+    "format options", "format purity", "data flow", "data storage",
+    "entry point", "output includes", "step by step",
+    "command pattern", "tool pattern", "document type", "release history",
+    "design principles", "security considerations",
+    "global flags", "environment variables", "flag normalization",
+    "filename validation", "commit deduplication", "commit message format",
+    "create update", "find contradictions", "find gaps", "find orphans", "find stale",
+    "build pipeline", "ci cd", "ci cd pipeline", "github actions workflow",
+    "github actions integration", "version controlled", "version injection",
+    "deployment to release repo", "readme integration",
+    "lessons learned applied to docuflow", "specific wiki pages for docuflow",
+    "wiki maintenance for docuflow", "wiki pipeline tools", "codebase scanner tools",
+    "health guidance tools", "tool categories summary", "package responsibilities",
+    "component architecture", "monorepo overview", "package architecture",
+    "phase 2 testing", "initial setup", "ingest workflow", "query workflow",
+    "lint workflow", "related entities", "agent integration",
+    "integration with docuflow", "mcp registration",
+    "project context", "project auto discovery", "provider detection",
+    "silent fallback warning", "reviewer verdict format", "task spec format",
+    "reusable skill", "supported languages",
+    "what it detects", "what it does", "what it shows", "what starts",
+    "concepts ideas", "entities things", "syntheses analyses", "timelines events",
+    "graph tool", "format options",
+]);

package/dist/tools/ingest-source.js CHANGED Viewed

@@ -6,6 +6,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.ingestSource = ingestSource;
 const node_path_1 = __importDefault(require("node:path"));
 const filesystem_1 = require("../filesystem");
+const category_dir_1 = require("../category-dir");
+const extractor_rules_1 = require("../extractor-rules");
 /**
  * Find the first paragraph in the source that mentions the given name.
  * Returns cleaned text (stripped of markdown syntax), up to 400 chars.
@@ -48,15 +50,29 @@ function extractFromMarkdown(content) {
         .join(" ")
         .substring(0, 500);
     // Find headers (entities/concepts)
-    for (const line of lines) {
-        // ### Header → entity/concept
-        if (line.match(/^###\s+/)) {
-            const header = line.replace(/^###\s+/, "").trim();
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i];
+        // ## / ### / #### Header → entity/concept candidate
+        const headingMatch = line.match(/^#{2,4}\s+(.+)/);
+        if (headingMatch) {
+            const header = headingMatch[1].trim();
             if (header && !header.startsWith("[") && !header.startsWith("{")) {
-                entities.push({ name: header, type: "concept" });
+                // Gather surrounding context: lines after this heading until next heading or blank+blank
+                const contextLines = [];
+                for (let j = i + 1; j < Math.min(i + 8, lines.length); j++) {
+                    const l = lines[j];
+                    if (/^#{1,4}\s/.test(l))
+                        break;
+                    contextLines.push(l);
+                }
+                const context = contextLines.join(" ").trim();
+                const candidate = { name: header, type: "concept", source: "heading", context };
+                if ((0, extractor_rules_1.passesEntityRules)(candidate).ok) {
+                    entities.push({ name: header, type: "concept" });
+                }
             }
         }
-        // **bold text** → potential entity (but not arrays or JSON)
+        // **bold text** → potential entity candidate (but not arrays or JSON)
         const boldMatches = line.matchAll(/\*\*([^*]+)\*\*/g);
         for (const match of boldMatches) {
             const text = match[1].trim();
@@ -67,7 +83,18 @@ function extractFromMarkdown(content) {
                 !text.includes("{") &&
                 !text.includes('"') &&
                 !text.includes("`")) {
-                entities.push({ name: text, type: "entity" });
+                // Gather surrounding context: the current paragraph
+                const paraLines = [line];
+                for (let j = i + 1; j < Math.min(i + 5, lines.length); j++) {
+                    if (lines[j].trim() === "")
+                        break;
+                    paraLines.push(lines[j]);
+                }
+                const context = paraLines.join(" ").trim();
+                const candidate = { name: text, type: "entity", source: "bold", context };
+                if ((0, extractor_rules_1.passesEntityRules)(candidate).ok) {
+                    entities.push({ name: text, type: "entity" });
+                }
             }
         }
     }
@@ -206,15 +233,8 @@ async function ingestSource(input) {
         // Write all pages
         const pagesCreated = [];
         for (const page of wikiPages) {
-            // Determine category subdirectory - use correct plural forms
-            const pluralMap = {
-                entity: "entities",
-                concept: "concepts",
-                timeline: "timelines",
-                synthesis: "syntheses",
-            };
-            const categoryDir = node_path_1.default.join(wikiDir, pluralMap[page.category] || page.category + "s");
-            await (0, filesystem_1.ensureDir)(categoryDir);
+            const catDirPath = node_path_1.default.join(wikiDir, (0, category_dir_1.categoryDir)(page.category));
+            await (0, filesystem_1.ensureDir)(catDirPath);
             // Create page file with frontmatter
             const frontmatterYaml = `---
 created_at: ${page.frontmatter.created_at}
@@ -226,7 +246,7 @@ outbound_links: ${JSON.stringify(page.frontmatter.outbound_links)}
 ---
 `;
             const pageContent = frontmatterYaml + "\n" + page.content;
-            const pageFile = node_path_1.default.join(categoryDir, `${page.id}.md`);
+            const pageFile = node_path_1.default.join(catDirPath, `${page.id}.md`);
             await (0, filesystem_1.writeFileAtomic)(pageFile, pageContent);
             pagesCreated.push(page.id);
         }

package/dist/tools/save-answer-as-page.js CHANGED Viewed

@@ -7,22 +7,20 @@ exports.saveAnswerAsPage = saveAnswerAsPage;
 const node_path_1 = __importDefault(require("node:path"));
 const promises_1 = __importDefault(require("node:fs/promises"));
 const filesystem_1 = require("../filesystem");
+const category_dir_1 = require("../category-dir");
 async function saveAnswerAsPage(input) {
     try {
         const projectPath = node_path_1.default.resolve(input.project_path);
         const docuDir = node_path_1.default.join(projectPath, ".docuflow");
         const wikiDir = node_path_1.default.join(docuDir, "wiki");
-        // Use provided category or default to synthesis
         const category = input.category ?? "synthesis";
-        const categoryDir = node_path_1.default.join(wikiDir, category + "s");
-        await (0, filesystem_1.ensureDir)(categoryDir);
-        // Generate page ID from title
+        const catDirPath = node_path_1.default.join(wikiDir, (0, category_dir_1.categoryDir)(category));
+        await (0, filesystem_1.ensureDir)(catDirPath);
         const pageId = `query_${input.page_title
             .toLowerCase()
             .replace(/[^a-z0-9]/g, "_")
             .replace(/_+/g, "_")
             .substring(0, 50)}`;
-        // Generate frontmatter
         const now = new Date().toISOString();
         const sources = input.source_page_ids ?? [];
         const frontmatterYaml = `---
@@ -34,13 +32,6 @@ inbound_links: ${JSON.stringify([])}
 outbound_links: ${JSON.stringify(sources)}
 ---
 `;
-        const CATEGORY_DIR = {
-            synthesis: "syntheses",
-            entity: "entities",
-            concept: "concepts",
-            timeline: "timelines",
-        };
-        // Build page content
         const pageContent = `${frontmatterYaml}
 # ${input.page_title}
@@ -55,7 +46,7 @@ ${input.answer}
 ## Related Pages
 ${sources.length > 0
-            ? sources.map((s) => `- [\`${s}\`](../${CATEGORY_DIR[category] ?? category + "s"}/${s}.md)`).join("\n")
+            ? sources.map((s) => `- [\`${s}\`](../${(0, category_dir_1.categoryDir)(category)}/${s}.md)`).join("\n")
             : "No source pages linked."}
 ---
@@ -63,10 +54,8 @@ ${sources.length > 0
 *This page was generated by synthesizing answers from multiple wiki pages.*
 *To refine further, add more source documents and re-ingest.*
 `;
-        // Write the page file
-        const pageFile = node_path_1.default.join(categoryDir, `${pageId}.md`);
-        const bytes = await (0, filesystem_1.writeFileAtomic)(pageFile, pageContent);
-        // Also update log.md to record this
+        const pageFile = node_path_1.default.join(catDirPath, `${pageId}.md`);
+        await (0, filesystem_1.writeFileAtomic)(pageFile, pageContent);
         const logFile = node_path_1.default.join(docuDir, "log.md");
         try {
             let logContent = "";
@@ -77,7 +66,7 @@ ${sources.length > 0
             catch (e) {
                 logContent = "# Operation Log\n\n";
             }
-            const timestamp = now.split("T")[0]; // YYYY-MM-DD
+            const timestamp = now.split("T")[0];
             const logEntry = `## [${timestamp}] query-result | Saved answer as ${pageId}\n\n`;
             logContent += logEntry;
             await (0, filesystem_1.writeFileAtomic)(logFile, logContent);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@doquflow/server",
-  "version": "1.5.2",
+  "version": "1.7.0",
   "description": "Docuflow MCP server — lets AI agents read codebases and persist living specs",
   "author": "Docuflow <hello@doquflows.dev>",
   "license": "MIT",
@@ -28,13 +28,15 @@
     "README.md"
   ],
   "scripts": {
-    "build": "tsc"
+    "build": "tsc",
+    "test": "vitest run"
   },
   "dependencies": {
     "@modelcontextprotocol/sdk": "^1.0.4"
   },
   "devDependencies": {
     "@types/node": "^22.0.0",
-    "typescript": "^5.6.0"
+    "typescript": "^5.6.0",
+    "vitest": "^4.1.6"
   }
 }