@doquflow/server 1.5.2 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.categoryDir = categoryDir;
4
+ const CATEGORY_PLURAL = {
5
+ entity: "entities",
6
+ concept: "concepts",
7
+ timeline: "timelines",
8
+ synthesis: "syntheses",
9
+ };
10
+ function categoryDir(category) {
11
+ return CATEGORY_PLURAL[category];
12
+ }
@@ -0,0 +1,166 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.passesEntityRules = passesEntityRules;
4
+ const extractor_stoplist_1 = require("./extractor-stoplist");
5
+ /**
6
+ * Rule 1 — Stop-list rejection (fast path)
7
+ * Candidate is rejected when:
8
+ * (a) the whole normalized phrase is on the stop-list, or
9
+ * (b) it is a single word on the stop-list, or
10
+ * (c) it is a multi-word phrase whose words are *all* on the stop-list
11
+ * (e.g. "key components" — both "key" and "components" are generic).
12
+ */
13
+ function ruleStopList(c) {
14
+ const normalized = c.name
15
+ .trim()
16
+ .toLowerCase()
17
+ .replace(/[^a-z0-9 ]/g, " ")
18
+ .replace(/\s+/g, " ")
19
+ .trim();
20
+ if (!normalized)
21
+ return { ok: true };
22
+ if (extractor_stoplist_1.ENTITY_STOPLIST.has(normalized)) {
23
+ return { ok: false, reason: `"${c.name}" is a generic stop-list term` };
24
+ }
25
+ const words = normalized.split(/\s+/).filter(Boolean);
26
+ if (words.length === 1 && extractor_stoplist_1.ENTITY_STOPLIST.has(words[0])) {
27
+ return { ok: false, reason: `"${c.name}" is a generic stop-list term` };
28
+ }
29
+ if (words.length > 1 && words.every((w) => extractor_stoplist_1.ENTITY_STOPLIST.has(w))) {
30
+ return { ok: false, reason: `"${c.name}" contains only generic stop-list terms` };
31
+ }
32
+ return { ok: true };
33
+ }
34
+ /**
35
+ * Rule 2 — No emoji-only or punctuation-only slugs (fast path)
36
+ * Strip emoji; reject if resulting slug is empty or only underscores/dashes.
37
+ */
38
+ function ruleNoEmojiOrPunctSlug(c) {
39
+ const stripped = c.name
40
+ .replace(/[\p{Emoji}\p{Emoji_Modifier}\p{Emoji_Component}]/gu, "")
41
+ .replace(/[^a-z0-9]/gi, "_")
42
+ .toLowerCase();
43
+ if (!stripped || /^[_\-]+$/.test(stripped)) {
44
+ return { ok: false, reason: `"${c.name}" produces an empty or punctuation-only slug` };
45
+ }
46
+ return { ok: true };
47
+ }
48
+ /**
49
+ * Rule 3 — Structural anchor
50
+ * Bold-text candidates are only accepted when they sit inside a meaningful
51
+ * structural paragraph (not a bare bullet point with no prose).
52
+ * Heading candidates always pass this rule.
53
+ */
54
+ function ruleStructuralAnchor(c) {
55
+ if (c.source === "heading")
56
+ return { ok: true };
57
+ const stripped = c.context.replace(/^\s*[-*+]\s+/, "").trim();
58
+ const wordCount = stripped.split(/\s+/).filter(Boolean).length;
59
+ if (wordCount < 4) {
60
+ return { ok: false, reason: "bold text in bare bullet with no sentence context" };
61
+ }
62
+ return { ok: true };
63
+ }
64
+ /**
65
+ * Rule 4 — Minimum token signal
66
+ * Must be ≥2 words, OR ≥1 word with a non-sentence-start capital (camelCase/PascalCase),
67
+ * OR contain a code-like separator (_, -, (), ::, .).
68
+ * Heading candidates are exempt — structural position grants authority.
69
+ */
70
+ function ruleMinimumTokenSignal(c) {
71
+ if (c.source === "heading")
72
+ return { ok: true };
73
+ const name = c.name.trim();
74
+ const words = name.split(/\s+/).filter(Boolean);
75
+ if (words.length >= 2)
76
+ return { ok: true };
77
+ const word = words[0] ?? "";
78
+ const hasInternalCap = /(?<!^)[A-Z]/.test(word);
79
+ const hasCodeSeparator = /[_\-().::]/.test(word);
80
+ if (hasInternalCap || hasCodeSeparator)
81
+ return { ok: true };
82
+ return { ok: false, reason: "single generic word with no distinguishing signal" };
83
+ }
84
+ /**
85
+ * Rule 5 — Context requirement
86
+ * Bold text must have at least 1 sentence of real context (≥6 words).
87
+ * Heading candidates are exempt.
88
+ */
89
+ function ruleContextRequirement(c) {
90
+ if (c.source === "heading")
91
+ return { ok: true };
92
+ const sentences = c.context.split(/[.!?]+/).filter((s) => s.trim().split(/\s+/).length >= 6);
93
+ if (sentences.length === 0) {
94
+ return { ok: false, reason: "no surrounding sentence context (≥6 words) found" };
95
+ }
96
+ return { ok: true };
97
+ }
98
+ /**
99
+ * Rule 6 — Section-heading noise patterns
100
+ * Reject candidates whose surface form matches well-known structural noise:
101
+ * numbered list items ("1. Foo"), file references ("foo.md"), question-form
102
+ * headings ("What is X"), preposition-led phrases ("For X"), layer/phase
103
+ * markers ("Layer 1"), and full sentences captured as entities ("X is a Y").
104
+ */
105
+ function ruleSectionHeadingNoise(c) {
106
+ const name = c.name.trim();
107
+ // Numbered list items: "1. Foo", "2) Bar"
108
+ if (/^\d+[.)]\s/.test(name)) {
109
+ return { ok: false, reason: `"${name}" is a numbered list item, not an entity` };
110
+ }
111
+ // File references masquerading as entities
112
+ if (/\.(md|ts|tsx|js|jsx|json|ya?ml|css|scss|sh|py|go|rb)$/i.test(name)) {
113
+ return { ok: false, reason: `"${name}" looks like a file reference, not an entity` };
114
+ }
115
+ // Question-form headings
116
+ if (/^(what|how|why|where|when|who|which)\s/i.test(name)) {
117
+ return { ok: false, reason: `"${name}" is a question-form section heading` };
118
+ }
119
+ // Preposition-led phrases (common bullet-list openings)
120
+ if (/^(for|with|by|to|in|on|at|of|from|about)\s/i.test(name)) {
121
+ return { ok: false, reason: `"${name}" starts with a preposition (section-heading form)` };
122
+ }
123
+ // Layer / phase / step markers
124
+ if (/^(layer|phase|step|chapter|part|section)\s+\d/i.test(name)) {
125
+ return { ok: false, reason: `"${name}" is a layer/phase marker, not an entity` };
126
+ }
127
+ // Sentence-form: "X is a Y", "X is not a Y" — full sentences captured as entities
128
+ if (/\bis\s+(not\s+)?(a|an|the)\s+/i.test(name)) {
129
+ return { ok: false, reason: `"${name}" looks like a sentence, not an entity name` };
130
+ }
131
+ // Starts with emoji-as-decoration (e.g. "❌ Anti-pattern", "🔧 Storage", "✅ Allowed").
132
+ // These are section dividers / status markers, not entity names.
133
+ if (/^[\p{Emoji_Presentation}\p{Extended_Pictographic}]/u.test(name)) {
134
+ return { ok: false, reason: `"${name}" starts with an emoji (heading decoration, not an entity)` };
135
+ }
136
+ // "The X" pattern — descriptive references, not entity names
137
+ if (/^the\s+/i.test(name)) {
138
+ return { ok: false, reason: `"${name}" starts with "the " (descriptive reference, not an entity)` };
139
+ }
140
+ // Date strings captured as entities ("Date: 2026-04-27", "2026-04-27")
141
+ if (/\b\d{4}[-/_]\d{1,2}[-/_]\d{1,2}\b/.test(name)) {
142
+ return { ok: false, reason: `"${name}" contains a date (metadata, not an entity)` };
143
+ }
144
+ return { ok: true };
145
+ }
146
+ const RULES = [
147
+ ruleStopList,
148
+ ruleNoEmojiOrPunctSlug,
149
+ ruleSectionHeadingNoise,
150
+ ruleStructuralAnchor,
151
+ ruleMinimumTokenSignal,
152
+ ruleContextRequirement,
153
+ ];
154
+ /**
155
+ * Run all entity quality rules against a candidate.
156
+ * Returns { ok: true } if the candidate passes all rules,
157
+ * or { ok: false, reason } for the first rule that rejects it.
158
+ */
159
+ function passesEntityRules(candidate) {
160
+ for (const rule of RULES) {
161
+ const result = rule(candidate);
162
+ if (!result.ok)
163
+ return result;
164
+ }
165
+ return { ok: true };
166
+ }
@@ -0,0 +1,89 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ENTITY_STOPLIST = void 0;
4
+ /**
5
+ * Stop-list for wiki entity extraction.
6
+ * Any candidate whose normalized form appears here is rejected as a generic term
7
+ * that adds noise without semantic value.
8
+ *
9
+ * Applied as single-word match AND whole-phrase match. Multi-word phrases
10
+ * where every word is on this list are also rejected (see ruleStopList).
11
+ */
12
+ exports.ENTITY_STOPLIST = new Set([
13
+ // Generic quantifiers / determiners
14
+ "any", "all", "some", "each", "every", "one", "two", "three", "four", "five",
15
+ "none", "both", "either", "other", "another", "several", "many", "few",
16
+ // Conjunctions / prepositions / filler / connectors
17
+ "the", "and", "or", "for", "with", "from", "into", "onto", "over", "under",
18
+ "then", "when", "while", "where", "which", "what", "who", "why", "how",
19
+ "that", "this", "these", "those", "here", "there", "of", "in", "on", "at",
20
+ "to", "by", "as", "is", "are", "was", "were", "be", "been", "being",
21
+ // Modal / auxiliary verbs
22
+ "can", "will", "shall", "would", "could", "should", "must", "may", "might",
23
+ "also", "just", "only", "even", "already", "still", "yet", "more",
24
+ // Boolean / status tokens
25
+ "true", "false", "yes", "no", "ok", "done", "todo",
26
+ // Common vague action words
27
+ "add", "added", "remove", "removed", "update", "updated",
28
+ "change", "changed", "fix", "fixed", "use", "used", "get", "set",
29
+ "allow", "allowed", "block", "blocked", "enable", "enabled", "disable", "disabled",
30
+ "create", "created", "build", "built", "run", "ran", "make", "made",
31
+ "show", "shows", "display", "displayed", "include", "included",
32
+ // Generic noun noise
33
+ "active", "audience", "behavior", "behaviour", "benefits", "binary",
34
+ "append", "approval", "audit", "auditable",
35
+ "note", "notes", "type", "types", "item", "items", "list", "value", "values",
36
+ "option", "options", "example", "examples", "result", "results",
37
+ "output", "outputs", "input", "inputs", "data", "info", "information",
38
+ "step", "steps", "section", "sections", "part", "parts", "content",
39
+ // Section-heading boilerplate (single words)
40
+ "architecture", "overview", "purpose", "documentation", "installation",
41
+ "commands", "components", "configuration", "management", "registration",
42
+ "views", "view", "iteration", "iterations",
43
+ "summary", "introduction", "background", "context", "scope", "rationale",
44
+ "goals", "goal", "motivation", "definition", "definitions", "terminology",
45
+ "implementation", "design", "approach", "methodology", "method", "methods",
46
+ "usage", "setup", "tutorial", "guide", "guides", "reference",
47
+ "requirements", "dependencies", "prerequisites", "features", "feature",
48
+ "categories", "category", "kind", "kinds",
49
+ "workflow", "workflows", "pipeline", "pipelines",
50
+ "system", "systems", "module", "modules", "package", "packages",
51
+ "service", "services", "interface", "interfaces", "layer", "layers",
52
+ "support", "supports", "supported", "available", "current",
53
+ // Common section adjectives (combined with stop nouns → all-stop phrase rejected)
54
+ "key", "core", "main", "primary", "secondary", "general", "basic", "advanced",
55
+ "common", "typical", "standard", "default", "custom", "specific", "generic",
56
+ "new", "old", "next", "previous", "first", "last", "final", "initial",
57
+ // Doc-structure terms
58
+ "title", "abstract", "outline", "table", "figure", "diagram", "appendix",
59
+ "footnote", "footnotes", "see", "above", "below",
60
+ // Common project section phrases (multi-word, exact match)
61
+ "getting started", "next steps", "key components", "key commands",
62
+ "core modules", "core components", "core types", "tech stack",
63
+ "technology stack", "use case", "use cases", "see also", "edge cases",
64
+ "acceptance criteria", "test scenarios", "testing results",
65
+ "implementation status", "implementation steps", "files to touch",
66
+ "format options", "format purity", "data flow", "data storage",
67
+ "entry point", "output includes", "step by step",
68
+ "command pattern", "tool pattern", "document type", "release history",
69
+ "design principles", "security considerations",
70
+ "global flags", "environment variables", "flag normalization",
71
+ "filename validation", "commit deduplication", "commit message format",
72
+ "create update", "find contradictions", "find gaps", "find orphans", "find stale",
73
+ "build pipeline", "ci cd", "ci cd pipeline", "github actions workflow",
74
+ "github actions integration", "version controlled", "version injection",
75
+ "deployment to release repo", "readme integration",
76
+ "lessons learned applied to docuflow", "specific wiki pages for docuflow",
77
+ "wiki maintenance for docuflow", "wiki pipeline tools", "codebase scanner tools",
78
+ "health guidance tools", "tool categories summary", "package responsibilities",
79
+ "component architecture", "monorepo overview", "package architecture",
80
+ "phase 2 testing", "initial setup", "ingest workflow", "query workflow",
81
+ "lint workflow", "related entities", "agent integration",
82
+ "integration with docuflow", "mcp registration",
83
+ "project context", "project auto discovery", "provider detection",
84
+ "silent fallback warning", "reviewer verdict format", "task spec format",
85
+ "reusable skill", "supported languages",
86
+ "what it detects", "what it does", "what it shows", "what starts",
87
+ "concepts ideas", "entities things", "syntheses analyses", "timelines events",
88
+ "graph tool", "format options",
89
+ ]);
@@ -6,6 +6,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.ingestSource = ingestSource;
7
7
  const node_path_1 = __importDefault(require("node:path"));
8
8
  const filesystem_1 = require("../filesystem");
9
+ const category_dir_1 = require("../category-dir");
10
+ const extractor_rules_1 = require("../extractor-rules");
9
11
  /**
10
12
  * Find the first paragraph in the source that mentions the given name.
11
13
  * Returns cleaned text (stripped of markdown syntax), up to 400 chars.
@@ -48,15 +50,29 @@ function extractFromMarkdown(content) {
48
50
  .join(" ")
49
51
  .substring(0, 500);
50
52
  // Find headers (entities/concepts)
51
- for (const line of lines) {
52
- // ### Header → entity/concept
53
- if (line.match(/^###\s+/)) {
54
- const header = line.replace(/^###\s+/, "").trim();
53
+ for (let i = 0; i < lines.length; i++) {
54
+ const line = lines[i];
55
+ // ## / ### / #### Header → entity/concept candidate
56
+ const headingMatch = line.match(/^#{2,4}\s+(.+)/);
57
+ if (headingMatch) {
58
+ const header = headingMatch[1].trim();
55
59
  if (header && !header.startsWith("[") && !header.startsWith("{")) {
56
- entities.push({ name: header, type: "concept" });
60
+ // Gather surrounding context: lines after this heading until next heading or blank+blank
61
+ const contextLines = [];
62
+ for (let j = i + 1; j < Math.min(i + 8, lines.length); j++) {
63
+ const l = lines[j];
64
+ if (/^#{1,4}\s/.test(l))
65
+ break;
66
+ contextLines.push(l);
67
+ }
68
+ const context = contextLines.join(" ").trim();
69
+ const candidate = { name: header, type: "concept", source: "heading", context };
70
+ if ((0, extractor_rules_1.passesEntityRules)(candidate).ok) {
71
+ entities.push({ name: header, type: "concept" });
72
+ }
57
73
  }
58
74
  }
59
- // **bold text** → potential entity (but not arrays or JSON)
75
+ // **bold text** → potential entity candidate (but not arrays or JSON)
60
76
  const boldMatches = line.matchAll(/\*\*([^*]+)\*\*/g);
61
77
  for (const match of boldMatches) {
62
78
  const text = match[1].trim();
@@ -67,7 +83,18 @@ function extractFromMarkdown(content) {
67
83
  !text.includes("{") &&
68
84
  !text.includes('"') &&
69
85
  !text.includes("`")) {
70
- entities.push({ name: text, type: "entity" });
86
+ // Gather surrounding context: the current paragraph
87
+ const paraLines = [line];
88
+ for (let j = i + 1; j < Math.min(i + 5, lines.length); j++) {
89
+ if (lines[j].trim() === "")
90
+ break;
91
+ paraLines.push(lines[j]);
92
+ }
93
+ const context = paraLines.join(" ").trim();
94
+ const candidate = { name: text, type: "entity", source: "bold", context };
95
+ if ((0, extractor_rules_1.passesEntityRules)(candidate).ok) {
96
+ entities.push({ name: text, type: "entity" });
97
+ }
71
98
  }
72
99
  }
73
100
  }
@@ -206,15 +233,8 @@ async function ingestSource(input) {
206
233
  // Write all pages
207
234
  const pagesCreated = [];
208
235
  for (const page of wikiPages) {
209
- // Determine category subdirectory - use correct plural forms
210
- const pluralMap = {
211
- entity: "entities",
212
- concept: "concepts",
213
- timeline: "timelines",
214
- synthesis: "syntheses",
215
- };
216
- const categoryDir = node_path_1.default.join(wikiDir, pluralMap[page.category] || page.category + "s");
217
- await (0, filesystem_1.ensureDir)(categoryDir);
236
+ const catDirPath = node_path_1.default.join(wikiDir, (0, category_dir_1.categoryDir)(page.category));
237
+ await (0, filesystem_1.ensureDir)(catDirPath);
218
238
  // Create page file with frontmatter
219
239
  const frontmatterYaml = `---
220
240
  created_at: ${page.frontmatter.created_at}
@@ -226,7 +246,7 @@ outbound_links: ${JSON.stringify(page.frontmatter.outbound_links)}
226
246
  ---
227
247
  `;
228
248
  const pageContent = frontmatterYaml + "\n" + page.content;
229
- const pageFile = node_path_1.default.join(categoryDir, `${page.id}.md`);
249
+ const pageFile = node_path_1.default.join(catDirPath, `${page.id}.md`);
230
250
  await (0, filesystem_1.writeFileAtomic)(pageFile, pageContent);
231
251
  pagesCreated.push(page.id);
232
252
  }
@@ -7,22 +7,20 @@ exports.saveAnswerAsPage = saveAnswerAsPage;
7
7
  const node_path_1 = __importDefault(require("node:path"));
8
8
  const promises_1 = __importDefault(require("node:fs/promises"));
9
9
  const filesystem_1 = require("../filesystem");
10
+ const category_dir_1 = require("../category-dir");
10
11
  async function saveAnswerAsPage(input) {
11
12
  try {
12
13
  const projectPath = node_path_1.default.resolve(input.project_path);
13
14
  const docuDir = node_path_1.default.join(projectPath, ".docuflow");
14
15
  const wikiDir = node_path_1.default.join(docuDir, "wiki");
15
- // Use provided category or default to synthesis
16
16
  const category = input.category ?? "synthesis";
17
- const categoryDir = node_path_1.default.join(wikiDir, category + "s");
18
- await (0, filesystem_1.ensureDir)(categoryDir);
19
- // Generate page ID from title
17
+ const catDirPath = node_path_1.default.join(wikiDir, (0, category_dir_1.categoryDir)(category));
18
+ await (0, filesystem_1.ensureDir)(catDirPath);
20
19
  const pageId = `query_${input.page_title
21
20
  .toLowerCase()
22
21
  .replace(/[^a-z0-9]/g, "_")
23
22
  .replace(/_+/g, "_")
24
23
  .substring(0, 50)}`;
25
- // Generate frontmatter
26
24
  const now = new Date().toISOString();
27
25
  const sources = input.source_page_ids ?? [];
28
26
  const frontmatterYaml = `---
@@ -34,13 +32,6 @@ inbound_links: ${JSON.stringify([])}
34
32
  outbound_links: ${JSON.stringify(sources)}
35
33
  ---
36
34
  `;
37
- const CATEGORY_DIR = {
38
- synthesis: "syntheses",
39
- entity: "entities",
40
- concept: "concepts",
41
- timeline: "timelines",
42
- };
43
- // Build page content
44
35
  const pageContent = `${frontmatterYaml}
45
36
  # ${input.page_title}
46
37
 
@@ -55,7 +46,7 @@ ${input.answer}
55
46
  ## Related Pages
56
47
 
57
48
  ${sources.length > 0
58
- ? sources.map((s) => `- [\`${s}\`](../${CATEGORY_DIR[category] ?? category + "s"}/${s}.md)`).join("\n")
49
+ ? sources.map((s) => `- [\`${s}\`](../${(0, category_dir_1.categoryDir)(category)}/${s}.md)`).join("\n")
59
50
  : "No source pages linked."}
60
51
 
61
52
  ---
@@ -63,10 +54,8 @@ ${sources.length > 0
63
54
  *This page was generated by synthesizing answers from multiple wiki pages.*
64
55
  *To refine further, add more source documents and re-ingest.*
65
56
  `;
66
- // Write the page file
67
- const pageFile = node_path_1.default.join(categoryDir, `${pageId}.md`);
68
- const bytes = await (0, filesystem_1.writeFileAtomic)(pageFile, pageContent);
69
- // Also update log.md to record this
57
+ const pageFile = node_path_1.default.join(catDirPath, `${pageId}.md`);
58
+ await (0, filesystem_1.writeFileAtomic)(pageFile, pageContent);
70
59
  const logFile = node_path_1.default.join(docuDir, "log.md");
71
60
  try {
72
61
  let logContent = "";
@@ -77,7 +66,7 @@ ${sources.length > 0
77
66
  catch (e) {
78
67
  logContent = "# Operation Log\n\n";
79
68
  }
80
- const timestamp = now.split("T")[0]; // YYYY-MM-DD
69
+ const timestamp = now.split("T")[0];
81
70
  const logEntry = `## [${timestamp}] query-result | Saved answer as ${pageId}\n\n`;
82
71
  logContent += logEntry;
83
72
  await (0, filesystem_1.writeFileAtomic)(logFile, logContent);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@doquflow/server",
3
- "version": "1.5.2",
3
+ "version": "1.7.0",
4
4
  "description": "Docuflow MCP server — lets AI agents read codebases and persist living specs",
5
5
  "author": "Docuflow <hello@doquflows.dev>",
6
6
  "license": "MIT",
@@ -28,13 +28,15 @@
28
28
  "README.md"
29
29
  ],
30
30
  "scripts": {
31
- "build": "tsc"
31
+ "build": "tsc",
32
+ "test": "vitest run"
32
33
  },
33
34
  "dependencies": {
34
35
  "@modelcontextprotocol/sdk": "^1.0.4"
35
36
  },
36
37
  "devDependencies": {
37
38
  "@types/node": "^22.0.0",
38
- "typescript": "^5.6.0"
39
+ "typescript": "^5.6.0",
40
+ "vitest": "^4.1.6"
39
41
  }
40
42
  }