@aigne/doc-smith 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +14 -0
  2. package/README.md +1 -0
  3. package/agents/check-detail-result.mjs +15 -120
  4. package/agents/check-structure-plan.mjs +56 -7
  5. package/agents/detail-generator-and-translate.yaml +7 -1
  6. package/agents/detail-regenerator.yaml +6 -57
  7. package/agents/docs-generator.yaml +5 -61
  8. package/agents/find-item-by-path.mjs +63 -14
  9. package/agents/input-generator.mjs +31 -21
  10. package/agents/language-selector.mjs +101 -0
  11. package/agents/load-config.mjs +3 -3
  12. package/agents/load-sources.mjs +55 -40
  13. package/agents/publish-docs.mjs +44 -153
  14. package/agents/retranslate.yaml +74 -0
  15. package/agents/save-docs.mjs +12 -2
  16. package/agents/save-output.mjs +9 -3
  17. package/agents/save-single-doc.mjs +19 -0
  18. package/agents/structure-planning.yaml +6 -0
  19. package/agents/team-publish-docs.yaml +7 -7
  20. package/agents/translate.yaml +3 -0
  21. package/aigne.yaml +5 -1
  22. package/docs-mcp/docs-search.yaml +1 -1
  23. package/docs-mcp/get-docs-detail.mjs +1 -1
  24. package/docs-mcp/get-docs-structure.mjs +1 -1
  25. package/docs-mcp/read-doc-content.mjs +1 -1
  26. package/package.json +16 -7
  27. package/prompts/check-structure-planning-result.md +4 -7
  28. package/prompts/content-detail-generator.md +1 -2
  29. package/prompts/structure-planning.md +7 -2
  30. package/prompts/translator.md +4 -0
  31. package/tests/test-all-validation-cases.mjs +707 -0
  32. package/utils/constants.mjs +3 -2
  33. package/utils/markdown-checker.mjs +386 -0
  34. package/utils/mermaid-validator.mjs +158 -0
  35. package/utils/mermaid-worker-pool.mjs +254 -0
  36. package/utils/mermaid-worker.mjs +242 -0
  37. package/utils/utils.mjs +155 -44
@@ -25,6 +25,7 @@ export const DEFAULT_INCLUDE_PATTERNS = [
25
25
  export const DEFAULT_EXCLUDE_PATTERNS = [
26
26
  "aigne-docs/**",
27
27
  "doc-smith/**",
28
+ ".aigne/**",
28
29
  "assets/**",
29
30
  "data/**",
30
31
  "images/**",
@@ -62,14 +63,14 @@ export const DEFAULT_EXCLUDE_PATTERNS = [
62
63
  // Supported languages for documentation
63
64
  export const SUPPORTED_LANGUAGES = [
64
65
  { code: "en", label: "English (en)", sample: "Hello" },
65
- { code: "zh-CN", label: "简体中文 (zh-CN)", sample: "你好" },
66
+ { code: "zh", label: "简体中文 (zh)", sample: "你好" },
66
67
  { code: "zh-TW", label: "繁體中文 (zh-TW)", sample: "你好" },
67
68
  { code: "ja", label: "日本語 (ja)", sample: "こんにちは" },
68
69
  { code: "ko", label: "한국어 (ko)", sample: "안녕하세요" },
69
70
  { code: "es", label: "Español (es)", sample: "Hola" },
70
71
  { code: "fr", label: "Français (fr)", sample: "Bonjour" },
71
72
  { code: "de", label: "Deutsch (de)", sample: "Hallo" },
72
- { code: "pt-BR", label: "Português (pt-BR)", sample: "Olá" },
73
+ { code: "pt", label: "Português (pt)", sample: "Olá" },
73
74
  { code: "ru", label: "Русский (ru)", sample: "Привет" },
74
75
  { code: "it", label: "Italiano (it)", sample: "Ciao" },
75
76
  { code: "ar", label: "العربية (ar)", sample: "مرحبا" },
@@ -0,0 +1,386 @@
1
+ import { unified } from "unified";
2
+ import remarkParse from "remark-parse";
3
+ import remarkGfm from "remark-gfm";
4
+ import remarkLint from "remark-lint";
5
+ import { VFile } from "vfile";
6
+ import { visit } from "unist-util-visit";
7
+ import { validateMermaidSyntax } from "./mermaid-validator.mjs";
8
+
9
+ /**
10
+ * Parse table row and count actual columns
11
+ * Properly handles content within cells, including pipes that are part of content
12
+ * @param {string} line - The table row line to analyze
13
+ * @returns {number} - Number of actual table columns
14
+ */
15
+ function countTableColumns(line) {
16
+ const trimmed = line.trim();
17
+
18
+ // Remove leading and trailing pipes if present
19
+ const content =
20
+ trimmed.startsWith("|") && trimmed.endsWith("|")
21
+ ? trimmed.slice(1, -1)
22
+ : trimmed;
23
+
24
+ if (!content.trim()) {
25
+ return 0;
26
+ }
27
+
28
+ const columns = [];
29
+ let currentColumn = "";
30
+ let i = 0;
31
+ let inCode = false;
32
+
33
+ while (i < content.length) {
34
+ const char = content[i];
35
+ const prevChar = i > 0 ? content[i - 1] : "";
36
+
37
+ if (char === "`") {
38
+ // Toggle code span state
39
+ inCode = !inCode;
40
+ currentColumn += char;
41
+ } else if (char === "|" && !inCode && prevChar !== "\\") {
42
+ // This is a column separator (not escaped and not in code)
43
+ columns.push(currentColumn.trim());
44
+ currentColumn = "";
45
+ } else {
46
+ currentColumn += char;
47
+ }
48
+
49
+ i++;
50
+ }
51
+
52
+ // Add the last column
53
+ if (currentColumn.length > 0 || content.endsWith("|")) {
54
+ columns.push(currentColumn.trim());
55
+ }
56
+
57
+ return columns.length;
58
+ }
59
+
60
+ /**
61
+ * Check for dead links in markdown content
62
+ * @param {string} markdown - The markdown content
63
+ * @param {string} source - Source description for error reporting
64
+ * @param {Set} allowedLinks - Set of allowed links
65
+ * @param {Array} errorMessages - Array to push error messages to
66
+ */
67
+ function checkDeadLinks(markdown, source, allowedLinks, errorMessages) {
68
+ const linkRegex = /(?<!\!)\[([^\]]+)\]\(([^)]+)\)/g;
69
+ let match;
70
+
71
+ while ((match = linkRegex.exec(markdown)) !== null) {
72
+ const link = match[2];
73
+ const trimLink = link.trim();
74
+
75
+ // Only check links that processContent would process
76
+ // Exclude external links and mailto
77
+ if (/^(https?:\/\/|mailto:)/.test(trimLink)) continue;
78
+
79
+ // Preserve anchors
80
+ const [path, hash] = trimLink.split("#");
81
+
82
+ // Only process relative paths or paths starting with /
83
+ if (!path) continue;
84
+
85
+ // Check if this link is in the allowed links set
86
+ if (!allowedLinks.has(trimLink)) {
87
+ errorMessages.push(
88
+ `Found a dead link in ${source}: [${match[1]}](${trimLink}), ensure the link exists in the structure plan path`
89
+ );
90
+ }
91
+ }
92
+ }
93
+
94
+ /**
95
+ * Check content structure and formatting issues
96
+ * @param {string} markdown - The markdown content
97
+ * @param {string} source - Source description for error reporting
98
+ * @param {Array} errorMessages - Array to push error messages to
99
+ */
100
+ function checkContentStructure(markdown, source, errorMessages) {
101
+ const lines = markdown.split("\n");
102
+ const allCodeBlockRegex = /^\s*```(?:\w+)?$/;
103
+
104
+ // State variables for different checks
105
+ let inCodeBlock = false;
106
+ let codeBlockIndentLevel = 0;
107
+ let codeBlockStartLine = 0;
108
+ let inAnyCodeBlock = false;
109
+ let anyCodeBlockStartLine = 0;
110
+
111
+ for (let i = 0; i < lines.length; i++) {
112
+ const line = lines[i];
113
+ const lineNumber = i + 1;
114
+
115
+ // Check for any code block markers (for incomplete code block detection)
116
+ if (allCodeBlockRegex.test(line)) {
117
+ if (!inAnyCodeBlock) {
118
+ // Starting a new code block
119
+ inAnyCodeBlock = true;
120
+ anyCodeBlockStartLine = lineNumber;
121
+ } else {
122
+ // Ending the code block
123
+ inAnyCodeBlock = false;
124
+ }
125
+ }
126
+ }
127
+
128
+ // Check for incomplete code blocks (started but not closed)
129
+ if (inAnyCodeBlock) {
130
+ errorMessages.push(
131
+ `Found incomplete code block in ${source} starting at line ${anyCodeBlockStartLine}: code block opened with \`\`\` but never closed. Please return the complete content`
132
+ );
133
+ }
134
+
135
+ // Check single line content (this needs to be done after the loop)
136
+ const newlineCount = (markdown.match(/\n/g) || []).length;
137
+ if (newlineCount === 0 && markdown.trim().length > 0) {
138
+ errorMessages.push(
139
+ `Found single line content in ${source}: content appears to be on only one line, check for missing line breaks`
140
+ );
141
+ }
142
+
143
+ // Check if content ends with proper punctuation (indicating completeness)
144
+ const trimmedText = markdown.trim();
145
+ if (
146
+ trimmedText.length > 0 &&
147
+ !trimmedText.endsWith(".") &&
148
+ !trimmedText.endsWith("。")
149
+ ) {
150
+ errorMessages.push(
151
+ `Found incomplete content in ${source}: content does not end with proper punctuation (. or 。). Please return the complete content`
152
+ );
153
+ }
154
+ }
155
+
156
+ /**
157
+ * Check markdown content for formatting issues and mermaid syntax errors
158
+ * @param {string} markdown - The markdown content to check
159
+ * @param {string} [source] - Source description for error reporting (e.g., "result")
160
+ * @param {Object} [options] - Additional options for validation
161
+ * @param {Array} [options.allowedLinks] - Set of allowed links for link validation
162
+ * @returns {Promise<Array<string>>} - Array of error messages in check-detail-result format
163
+ */
164
+ export async function checkMarkdown(
165
+ markdown,
166
+ source = "content",
167
+ options = {}
168
+ ) {
169
+ const file = new VFile({ value: markdown, path: source });
170
+ const errorMessages = [];
171
+
172
+ try {
173
+ // Extract allowed links from options
174
+ const { allowedLinks } = options;
175
+
176
+ // Create unified processor with markdown parsing and linting
177
+ // Use individual rules instead of presets to have better control
178
+ const processor = unified()
179
+ .use(remarkParse)
180
+ .use(remarkGfm)
181
+ .use(remarkLint)
182
+ // Add specific useful rules, avoiding overly strict formatting ones
183
+ .use(remarkLint, [
184
+ // Content quality rules (keep these)
185
+ "no-duplicate-headings",
186
+ "no-duplicate-definitions",
187
+ "no-unused-definitions",
188
+ "no-undefined-references",
189
+
190
+ // Structural rules (keep these)
191
+ "no-heading-content-indent",
192
+ "no-heading-indent",
193
+ "no-multiple-toplevel-headings",
194
+
195
+ // Link rules (keep these)
196
+ "no-reference-like-url",
197
+ "no-unneeded-full-reference-image",
198
+ "no-unneeded-full-reference-link",
199
+ "code-block-style",
200
+
201
+ // Skip overly strict formatting rules that don't affect rendering:
202
+ // - final-newline (missing newline at end)
203
+ // - list-item-indent (flexible list spacing)
204
+ // - table-cell-padding (flexible table spacing)
205
+ // - emphasis-marker (allow both * and _)
206
+ // - strong-marker (allow both ** and __)
207
+ ]);
208
+
209
+ // Parse markdown content to AST
210
+ const ast = processor.parse(file);
211
+
212
+ // 1. Check dead links if allowedLinks is provided
213
+ if (allowedLinks) {
214
+ checkDeadLinks(markdown, source, allowedLinks, errorMessages);
215
+ }
216
+
217
+ // 2. Check content structure and formatting issues
218
+ checkContentStructure(markdown, source, errorMessages);
219
+
220
+ // Check mermaid code blocks and other custom validations
221
+ const mermaidChecks = [];
222
+ visit(ast, "code", (node) => {
223
+ if (node.lang && node.lang.toLowerCase() === "mermaid") {
224
+ // Check for mermaid syntax errors
225
+ mermaidChecks.push(
226
+ validateMermaidSyntax(node.value).catch((error) => {
227
+ const errorMessage =
228
+ error?.message || String(error) || "Unknown mermaid syntax error";
229
+
230
+ // Format mermaid error in check-detail-result style
231
+ const line = node.position?.start?.line || "unknown";
232
+ errorMessages.push(
233
+ `Found Mermaid syntax error in ${source} at line ${line}: ${errorMessage}`
234
+ );
235
+ })
236
+ );
237
+
238
+ // Check for specific mermaid rendering issues
239
+ const mermaidContent = node.value;
240
+ const line = node.position?.start?.line || "unknown";
241
+
242
+ // Check for backticks in node labels
243
+ const nodeLabelRegex =
244
+ /[A-Za-z0-9_]+\["([^"]*`[^"]*)"\]|[A-Za-z0-9_]+{"([^}]*`[^}]*)"}/g;
245
+ let match;
246
+ while ((match = nodeLabelRegex.exec(mermaidContent)) !== null) {
247
+ const label = match[1] || match[2];
248
+ errorMessages.push(
249
+ `Found backticks in Mermaid node label in ${source} at line ${line}: "${label}" - backticks in node labels cause rendering issues in Mermaid diagrams`
250
+ );
251
+ }
252
+
253
+ // Check for numbered list format in edge descriptions
254
+ const edgeDescriptionRegex = /--\s*"([^"]*)"\s*-->/g;
255
+ let edgeMatch;
256
+ while (
257
+ (edgeMatch = edgeDescriptionRegex.exec(mermaidContent)) !== null
258
+ ) {
259
+ const description = edgeMatch[1];
260
+ if (/^\d+\.\s/.test(description)) {
261
+ errorMessages.push(
262
+ `Unsupported markdown: list - Found numbered list format in Mermaid edge description in ${source} at line ${line}: "${description}" - numbered lists in edge descriptions are not supported`
263
+ );
264
+ }
265
+ }
266
+
267
+ // Check for special characters in node labels that should be quoted
268
+ const nodeWithSpecialCharsRegex =
269
+ /([A-Za-z0-9_]+)\[([^\]]*[(){}:;,\-\s\.][^\]]*)\]/g;
270
+ let specialCharMatch;
271
+ while (
272
+ (specialCharMatch =
273
+ nodeWithSpecialCharsRegex.exec(mermaidContent)) !== null
274
+ ) {
275
+ const nodeId = specialCharMatch[1];
276
+ const label = specialCharMatch[2];
277
+
278
+ // Check if label contains special characters but is not quoted
279
+ if (!/^".*"$/.test(label)) {
280
+ // List of characters that typically need quoting
281
+ const specialChars = ["(", ")", "{", "}", ":", ";", ",", "-", "."];
282
+ const foundSpecialChars = specialChars.filter((char) =>
283
+ label.includes(char)
284
+ );
285
+
286
+ if (foundSpecialChars.length > 0) {
287
+ errorMessages.push(
288
+ `Found unquoted special characters in Mermaid node label in ${source} at line ${line}: "${label}" contains ${foundSpecialChars.join(
289
+ ", "
290
+ )} - node labels with special characters should be quoted like ${nodeId}["${label}"]`
291
+ );
292
+ }
293
+ }
294
+ }
295
+ }
296
+ });
297
+
298
+ // Check table separators in original text (since AST normalizes them)
299
+ const lines = markdown.split("\n");
300
+ for (let i = 0; i < lines.length; i++) {
301
+ const line = lines[i];
302
+
303
+ // Check for table separator lines (lines with | and -)
304
+ if (/^\s*\|.*-.*\|\s*$/.test(line)) {
305
+ // Count separator columns
306
+ const separatorColumns = countTableColumns(line);
307
+
308
+ // Check if previous line looks like a table header
309
+ if (i > 0) {
310
+ const prevLine = lines[i - 1];
311
+ if (/^\s*\|.*\|\s*$/.test(prevLine)) {
312
+ // Count header columns
313
+ const headerColumns = countTableColumns(prevLine);
314
+
315
+ // Check for column count mismatch
316
+ if (separatorColumns !== headerColumns) {
317
+ errorMessages.push(
318
+ `Found table separator with mismatched column count in ${source} at line ${
319
+ i + 1
320
+ }: separator has ${separatorColumns} columns but header has ${headerColumns} columns - this causes table rendering issues`
321
+ );
322
+ }
323
+
324
+ // Also check if next line exists and has different column count
325
+ if (i + 1 < lines.length) {
326
+ const nextLine = lines[i + 1];
327
+ if (/^\s*\|.*\|\s*$/.test(nextLine)) {
328
+ const dataColumns = countTableColumns(nextLine);
329
+ if (separatorColumns !== dataColumns) {
330
+ errorMessages.push(
331
+ `Found table data row with mismatched column count in ${source} at line ${
332
+ i + 2
333
+ }: data row has ${dataColumns} columns but separator defines ${separatorColumns} columns - this causes table rendering issues`
334
+ );
335
+ }
336
+ }
337
+ }
338
+ }
339
+ }
340
+ }
341
+ }
342
+
343
+ // Wait for all mermaid checks to complete
344
+ await Promise.all(mermaidChecks);
345
+
346
+ // Run markdown linting rules
347
+ await processor.run(ast, file);
348
+
349
+ // Format messages in check-detail-result style
350
+ file.messages.forEach((message) => {
351
+ const line = message.line || "unknown";
352
+ const column = message.column || "unknown";
353
+ const reason = message.reason || "Unknown markdown issue";
354
+ const ruleId = message.ruleId || message.source || "markdown";
355
+
356
+ // Categorize different types of issues
357
+ let errorType = "markdown formatting";
358
+ if (ruleId.includes("table")) {
359
+ errorType = "table";
360
+ } else if (ruleId.includes("code")) {
361
+ errorType = "code block";
362
+ } else if (ruleId.includes("link")) {
363
+ errorType = "link";
364
+ }
365
+
366
+ // Format error message similar to check-detail-result style
367
+ if (line !== "unknown") {
368
+ errorMessages.push(
369
+ `Found ${errorType} issue in ${source} at line ${line}: ${reason} (${ruleId})`
370
+ );
371
+ } else {
372
+ errorMessages.push(
373
+ `Found ${errorType} issue in ${source}: ${reason} (${ruleId})`
374
+ );
375
+ }
376
+ });
377
+
378
+ return errorMessages;
379
+ } catch (error) {
380
+ // Handle any unexpected errors during processing
381
+ errorMessages.push(
382
+ `Found markdown processing error in ${source}: ${error.message}`
383
+ );
384
+ return errorMessages;
385
+ }
386
+ }
@@ -0,0 +1,158 @@
1
+ /**
2
+ * Simplified Mermaid validation using Worker Thread pool
3
+ * Provides concurrent-safe validation with isolated worker environments
4
+ */
5
+
6
+ import {
7
+ getMermaidWorkerPool,
8
+ shutdownMermaidWorkerPool,
9
+ } from "./mermaid-worker-pool.mjs";
10
+
11
+ /**
12
+ * Worker-based mermaid validation - DEPRECATED but kept for compatibility
13
+ * This function now delegates to the worker pool implementation
14
+ * @param {string} content - Mermaid diagram content
15
+ * @returns {boolean} - True if syntax is valid
16
+ * @throws {Error} - If syntax is invalid
17
+ * @deprecated Use validateMermaidSyntax instead which uses worker pool
18
+ */
19
+ export async function validateMermaidWithOfficialParser(content) {
20
+ // Delegate to the new worker-based implementation
21
+ return await validateMermaidSyntax(content);
22
+ }
23
+
24
+ /**
25
+ * Basic mermaid syntax validation fallback
26
+ * Used when worker validation fails due to environment issues
27
+ * @param {string} content - Mermaid diagram content
28
+ * @returns {boolean} - True if basic validation passes
29
+ * @throws {Error} - If validation fails
30
+ */
31
+ export function validateBasicMermaidSyntax(content) {
32
+ const trimmedContent = content.trim();
33
+
34
+ if (!trimmedContent) {
35
+ throw new Error("Empty mermaid diagram");
36
+ }
37
+
38
+ // Check for valid diagram type
39
+ const validDiagramTypes = [
40
+ "flowchart",
41
+ "graph",
42
+ "sequenceDiagram",
43
+ "classDiagram",
44
+ "stateDiagram",
45
+ "entityRelationshipDiagram",
46
+ "erDiagram",
47
+ "journey",
48
+ "gantt",
49
+ "pie",
50
+ "requirement",
51
+ "gitgraph",
52
+ "mindmap",
53
+ "timeline",
54
+ "quadrantChart",
55
+ ];
56
+
57
+ const firstLine = trimmedContent.split("\n")[0].trim();
58
+ const hasValidType = validDiagramTypes.some((type) =>
59
+ firstLine.includes(type)
60
+ );
61
+
62
+ if (!hasValidType) {
63
+ throw new Error("Invalid or missing diagram type");
64
+ }
65
+
66
+ // Basic bracket matching
67
+ const openBrackets = (content.match(/[\[\{\(]/g) || []).length;
68
+ const closeBrackets = (content.match(/[\]\}\)]/g) || []).length;
69
+
70
+ if (openBrackets !== closeBrackets) {
71
+ throw new Error("Unmatched brackets in diagram");
72
+ }
73
+
74
+ // Basic quote matching
75
+ const singleQuotes = (content.match(/'/g) || []).length;
76
+ const doubleQuotes = (content.match(/"/g) || []).length;
77
+
78
+ if (singleQuotes % 2 !== 0) {
79
+ throw new Error("Unmatched single quotes in diagram");
80
+ }
81
+
82
+ if (doubleQuotes % 2 !== 0) {
83
+ throw new Error("Unmatched double quotes in diagram");
84
+ }
85
+
86
+ return true;
87
+ }
88
+
89
+ /**
90
+ * Main validation function using simplified worker pool for concurrency safety
91
+ * @param {string} content - Mermaid diagram content
92
+ * @returns {Promise<boolean>} - True if validation passes
93
+ * @throws {Error} - If validation fails
94
+ */
95
+ export async function validateMermaidSyntax(content) {
96
+ if (!content || !content.trim()) {
97
+ throw new Error("Empty mermaid diagram");
98
+ }
99
+
100
+ try {
101
+ // Use simplified worker pool for validation
102
+ const workerPool = getMermaidWorkerPool({
103
+ poolSize: 2, // Reduced pool size
104
+ timeout: 10000, // Reduced timeout
105
+ });
106
+
107
+ const result = await workerPool.validate(content);
108
+ return result;
109
+ } catch (error) {
110
+ // If worker validation fails, check if it's an environment issue
111
+ const errorMsg = error.message || String(error);
112
+
113
+ if (
114
+ errorMsg.includes("Worker error") ||
115
+ errorMsg.includes("Worker exited") ||
116
+ errorMsg.includes("Worker pool") ||
117
+ errorMsg.includes("timeout") ||
118
+ errorMsg.includes("Cannot resolve module") ||
119
+ errorMsg.includes("window is not defined") ||
120
+ errorMsg.includes("canvas") ||
121
+ errorMsg.includes("Web APIs") ||
122
+ errorMsg.includes("getComputedTextLength") ||
123
+ errorMsg.includes("document is not defined")
124
+ ) {
125
+ // Fall back to basic validation for environment issues
126
+ console.warn(
127
+ "Worker-based mermaid validation failed, falling back to basic validation:",
128
+ errorMsg
129
+ );
130
+ return validateBasicMermaidSyntax(content);
131
+ }
132
+
133
+ // If it's a genuine syntax error, re-throw it
134
+ throw error;
135
+ }
136
+ }
137
+
138
+ /**
139
+ * Get worker pool statistics for monitoring
140
+ * @returns {Object} - Pool statistics
141
+ */
142
+ export function getValidationStats() {
143
+ try {
144
+ const workerPool = getMermaidWorkerPool();
145
+ return workerPool.getStats();
146
+ } catch (error) {
147
+ return { error: error.message };
148
+ }
149
+ }
150
+
151
+ /**
152
+ * Shutdown the validation worker pool
153
+ * Call this when shutting down the application
154
+ * @returns {Promise<void>}
155
+ */
156
+ export async function shutdownValidation() {
157
+ await shutdownMermaidWorkerPool();
158
+ }