@aigne/doc-smith 0.9.7 → 0.9.8-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/agents/create/analyze-diagram-type-llm.yaml +160 -0
  3. package/agents/create/analyze-diagram-type.mjs +297 -0
  4. package/agents/create/generate-diagram-image.yaml +60 -0
  5. package/agents/create/replace-d2-with-image.mjs +624 -0
  6. package/agents/create/utils/init-current-content.mjs +5 -9
  7. package/agents/evaluate/document.yaml +6 -0
  8. package/agents/evaluate/index.yaml +1 -0
  9. package/agents/init/index.mjs +16 -0
  10. package/agents/media/batch-generate-media-description.yaml +2 -0
  11. package/agents/media/generate-media-description.yaml +3 -0
  12. package/agents/media/load-media-description.mjs +44 -15
  13. package/agents/publish/publish-docs.mjs +1 -4
  14. package/agents/update/check-diagram-flag.mjs +116 -0
  15. package/agents/update/check-document.mjs +0 -1
  16. package/agents/update/check-generate-diagram.mjs +48 -30
  17. package/agents/update/check-sync-image-flag.mjs +55 -0
  18. package/agents/update/check-update-is-single.mjs +11 -0
  19. package/agents/update/generate-diagram.yaml +43 -9
  20. package/agents/update/generate-document.yaml +9 -0
  21. package/agents/update/handle-document-update.yaml +10 -8
  22. package/agents/update/index.yaml +16 -1
  23. package/agents/update/sync-images-and-exit.mjs +148 -0
  24. package/agents/update/update-single/update-single-document-detail.mjs +131 -17
  25. package/agents/utils/analyze-feedback-intent.mjs +136 -0
  26. package/agents/utils/choose-docs.mjs +183 -40
  27. package/agents/utils/generate-document-or-skip.mjs +41 -0
  28. package/agents/utils/handle-diagram-operations.mjs +263 -0
  29. package/agents/utils/load-all-document-content.mjs +30 -0
  30. package/agents/utils/load-sources.mjs +2 -2
  31. package/agents/utils/read-current-document-content.mjs +46 -0
  32. package/agents/utils/save-doc.mjs +42 -0
  33. package/agents/utils/skip-if-content-exists.mjs +27 -0
  34. package/aigne.yaml +6 -1
  35. package/assets/report-template/report.html +17 -17
  36. package/docs-mcp/read-doc-content.mjs +30 -1
  37. package/package.json +4 -4
  38. package/prompts/detail/diagram/generate-image-system.md +135 -0
  39. package/prompts/detail/diagram/generate-image-user.md +32 -0
  40. package/prompts/detail/generate/user-prompt.md +27 -13
  41. package/prompts/evaluate/document.md +23 -10
  42. package/prompts/media/media-description/system-prompt.md +10 -2
  43. package/prompts/media/media-description/user-prompt.md +9 -0
  44. package/utils/check-document-has-diagram.mjs +97 -0
  45. package/utils/constants/index.mjs +46 -0
  46. package/utils/d2-utils.mjs +114 -181
  47. package/utils/delete-diagram-images.mjs +103 -0
  48. package/utils/docs-finder-utils.mjs +34 -1
  49. package/utils/image-compress.mjs +75 -0
  50. package/utils/kroki-utils.mjs +2 -3
  51. package/utils/sync-diagram-to-translations.mjs +258 -0
  52. package/utils/utils.mjs +24 -0
  53. package/agents/create/check-diagram.mjs +0 -40
  54. package/agents/create/draw-diagram.yaml +0 -27
  55. package/agents/create/merge-diagram.yaml +0 -39
  56. package/agents/create/wrap-diagram-code.mjs +0 -35
@@ -1,190 +1,15 @@
1
1
  import path from "node:path";
2
2
 
3
- import { D2 } from "@terrastruct/d2";
4
3
  import fs from "fs-extra";
5
- import { glob } from "glob";
6
- import pMap from "p-map";
7
4
 
8
- import {
9
- D2_CONCURRENCY,
10
- D2_CONFIG,
11
- DOC_SMITH_DIR,
12
- FILE_CONCURRENCY,
13
- TMP_ASSETS_DIR,
14
- TMP_DIR,
15
- } from "./constants/index.mjs";
16
- import { debug } from "./debug.mjs";
17
- import { iconMap } from "./icon-map.mjs";
18
- import { getContentHash } from "./utils.mjs";
5
+ import { DOC_SMITH_DIR, TMP_DIR } from "./constants/index.mjs";
19
6
 
20
- const codeBlockRegex = /```d2.*\n([\s\S]*?)```/g;
7
+ // Note: .* matches title or other text after ```d2 (e.g., ```d2 Vault 驗證流程)
8
+ // Export regex for reuse across the codebase to avoid duplication
9
+ export const d2CodeBlockRegex = /```d2.*\n([\s\S]*?)```/g;
21
10
 
22
11
  export const DIAGRAM_PLACEHOLDER = "DIAGRAM_PLACEHOLDER";
23
12
 
24
- export async function getChart({ content, strict }) {
25
- const d2 = new D2();
26
- const iconUrlList = Object.keys(iconMap);
27
- const escapedUrls = iconUrlList.map((url) => url.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
28
- const regexPattern = escapedUrls.join("|");
29
- const regex = new RegExp(regexPattern, "g");
30
-
31
- const contentWithBase64Img = content.replace(regex, (match) => {
32
- return iconMap[match];
33
- });
34
- try {
35
- const { diagram, renderOptions, graph } = await d2.compile(contentWithBase64Img);
36
-
37
- // Do not apply a stroke-dash to sequence diagrams.
38
- if (
39
- graph?.root?.attributes?.shape &&
40
- graph.root.attributes.shape.value !== "sequence_diagram"
41
- ) {
42
- // Save the first-level container.
43
- const firstLevelContainer = new Set();
44
- diagram.shapes.forEach((x) => {
45
- const idList = x.id.split(".");
46
- if (idList.length > 1) {
47
- const targetShape = diagram.shapes.find((x) => x.id === idList[0]);
48
- if (targetShape && !["c4-person", "cylinder", "queue"].includes(targetShape.type)) {
49
- firstLevelContainer.add(targetShape);
50
- }
51
- }
52
- });
53
- firstLevelContainer.forEach((shape) => {
54
- if (!shape.strokeDash) {
55
- // Note: The data structure here is different from the d2 source code.
56
- shape.strokeDash = 3;
57
- }
58
- });
59
- }
60
-
61
- const svg = await d2.render(diagram, renderOptions);
62
-
63
- return svg;
64
- } catch (err) {
65
- if (strict) throw err;
66
-
67
- console.error("Failed to generate D2 diagram. Content:", content, "Error:", err);
68
- return null;
69
- } finally {
70
- d2.worker.terminate();
71
- }
72
- }
73
-
74
- export async function saveAssets({ markdown, docsDir }) {
75
- if (!markdown) {
76
- return markdown;
77
- }
78
-
79
- const { replaced } = await runIterator({
80
- input: markdown,
81
- regexp: codeBlockRegex,
82
- replace: true,
83
- fn: async ([_match, _code]) => {
84
- const assetDir = path.join(docsDir, "../", TMP_ASSETS_DIR, "d2");
85
- await fs.ensureDir(assetDir);
86
- const d2Content = [D2_CONFIG, _code].join("\n");
87
- const fileName = `${getContentHash(d2Content)}.svg`;
88
- const svgPath = path.join(assetDir, fileName);
89
-
90
- if (await fs.pathExists(svgPath)) {
91
- debug("Asset cache found, skipping generation", svgPath);
92
- } else {
93
- try {
94
- debug("Generating d2 diagram", svgPath);
95
- if (debug.enabled) {
96
- const d2FileName = `${getContentHash(d2Content)}.d2`;
97
- const d2Path = path.join(assetDir, d2FileName);
98
- await fs.writeFile(d2Path, d2Content, { encoding: "utf8" });
99
- }
100
-
101
- const svg = await getChart({ content: d2Content });
102
- if (svg) {
103
- await fs.writeFile(svgPath, svg, { encoding: "utf8" });
104
- }
105
- } catch (error) {
106
- debug("Failed to generate D2 diagram. Content:", d2Content, "Error:", error);
107
- return _code;
108
- }
109
- }
110
- return `![](${path.posix.join("..", TMP_ASSETS_DIR, "d2", fileName)})`;
111
- },
112
- options: { concurrency: D2_CONCURRENCY },
113
- });
114
-
115
- return replaced;
116
- }
117
-
118
- export async function beforePublishHook({ docsDir }) {
119
- // Process each markdown file to save d2 svg assets.
120
- const mdFilePaths = await glob("**/*.md", { cwd: docsDir });
121
- await pMap(
122
- mdFilePaths,
123
- async (filePath) => {
124
- let finalContent = await fs.readFile(path.join(docsDir, filePath), { encoding: "utf8" });
125
- finalContent = await saveAssets({ markdown: finalContent, docsDir });
126
-
127
- await fs.writeFile(path.join(docsDir, filePath), finalContent, { encoding: "utf8" });
128
- },
129
- { concurrency: FILE_CONCURRENCY },
130
- );
131
- }
132
-
133
- async function runIterator({ input, regexp, fn = () => {}, options, replace = false }) {
134
- if (!input) return input;
135
- const matches = [...input.matchAll(regexp)];
136
- const results = [];
137
- await pMap(
138
- matches,
139
- async (...args) => {
140
- const resultItem = await fn(...args);
141
- results.push(resultItem);
142
- },
143
- options,
144
- );
145
-
146
- let replaced = input;
147
- if (replace) {
148
- let index = 0;
149
- replaced = replaced.replace(regexp, () => {
150
- return results[index++];
151
- });
152
- }
153
-
154
- return {
155
- results,
156
- replaced,
157
- };
158
- }
159
-
160
- export async function checkContent({ content: _content }) {
161
- const matches = Array.from(_content.matchAll(codeBlockRegex));
162
- let content = _content;
163
- if (matches.length > 0) {
164
- content = matches[0][1];
165
- }
166
- await ensureTmpDir();
167
- const assetDir = path.join(DOC_SMITH_DIR, TMP_DIR, TMP_ASSETS_DIR, "d2");
168
- await fs.ensureDir(assetDir);
169
- const d2Content = [D2_CONFIG, content].join("\n");
170
- const fileName = `${getContentHash(d2Content)}.svg`;
171
- const svgPath = path.join(assetDir, fileName);
172
-
173
- if (debug.enabled) {
174
- const d2FileName = `${getContentHash(d2Content)}.d2`;
175
- const d2Path = path.join(assetDir, d2FileName);
176
- await fs.writeFile(d2Path, d2Content, { encoding: "utf8" });
177
- }
178
-
179
- if (await fs.pathExists(svgPath)) {
180
- debug("Asset cache found, skipping generation", svgPath);
181
- return;
182
- }
183
-
184
- const svg = await getChart({ content: d2Content, strict: true });
185
- await fs.writeFile(svgPath, svg, { encoding: "utf8" });
186
- }
187
-
188
13
  export async function ensureTmpDir() {
189
14
  const tmpDir = path.join(DOC_SMITH_DIR, TMP_DIR);
190
15
  if (!(await fs.pathExists(path.join(tmpDir, ".gitignore")))) {
@@ -198,7 +23,7 @@ export function isValidCode(lang) {
198
23
  }
199
24
 
200
25
  export function wrapCode({ content }) {
201
- const matches = Array.from(content.matchAll(codeBlockRegex));
26
+ const matches = Array.from(content.matchAll(d2CodeBlockRegex));
202
27
  if (matches.length > 0) {
203
28
  return content;
204
29
  }
@@ -212,7 +37,7 @@ export function wrapCode({ content }) {
212
37
  * @returns {Array} - [contentWithPlaceholder, originalCodeBlock]
213
38
  */
214
39
  export function replaceD2WithPlaceholder({ content }) {
215
- const [firstMatch] = Array.from(content.matchAll(codeBlockRegex));
40
+ const [firstMatch] = Array.from(content.matchAll(d2CodeBlockRegex));
216
41
  if (firstMatch) {
217
42
  const matchContent = firstMatch[0];
218
43
  const cleanContent = content.replace(matchContent, DIAGRAM_PLACEHOLDER);
@@ -255,3 +80,111 @@ export function replacePlaceholderWithD2({ content, diagramSourceCode }) {
255
80
 
256
81
  return content.replace(DIAGRAM_PLACEHOLDER, replacement);
257
82
  }
83
+
84
+ /**
85
+ * Replace all diagrams (D2 code blocks and generated images) with DIAGRAM_PLACEHOLDER
86
+ * Used for deletion operations to normalize all diagram types to a single placeholder
87
+ * @param {string} content - Document content containing diagrams
88
+ * @param {number} [diagramIndex] - Optional index of diagram to replace (0-based). If not provided, replaces all diagrams.
89
+ * @returns {string} - Content with diagrams replaced by DIAGRAM_PLACEHOLDER
90
+ */
91
+ export function replaceDiagramsWithPlaceholder({ content, diagramIndex }) {
92
+ if (!content) {
93
+ return content;
94
+ }
95
+
96
+ // Import regex from replace-d2-with-image.mjs to find all diagram locations
97
+ // We'll use a similar approach to findAllDiagramLocations
98
+ const diagramImageRegex = /<!-- DIAGRAM_IMAGE_START:[^>]+ -->[\s\S]*?<!-- DIAGRAM_IMAGE_END -->/g;
99
+ const mermaidCodeBlockRegex = /```mermaid.*\n([\s\S]*?)```/g;
100
+
101
+ // Find all diagram locations
102
+ const locations = [];
103
+
104
+ // 1. Find DIAGRAM_PLACEHOLDER (already a placeholder, keep as is)
105
+ let placeholderIndex = content.indexOf(DIAGRAM_PLACEHOLDER);
106
+ while (placeholderIndex !== -1) {
107
+ locations.push({
108
+ type: "placeholder",
109
+ start: placeholderIndex,
110
+ end: placeholderIndex + DIAGRAM_PLACEHOLDER.length,
111
+ });
112
+ placeholderIndex = content.indexOf(DIAGRAM_PLACEHOLDER, placeholderIndex + 1);
113
+ }
114
+
115
+ // 2. Find DIAGRAM_IMAGE_START markers (generated images)
116
+ let match = diagramImageRegex.exec(content);
117
+ while (match !== null) {
118
+ locations.push({
119
+ type: "image",
120
+ start: match.index,
121
+ end: match.index + match[0].length,
122
+ });
123
+ match = diagramImageRegex.exec(content);
124
+ }
125
+
126
+ // 3. Find D2 code blocks
127
+ match = d2CodeBlockRegex.exec(content);
128
+ while (match !== null) {
129
+ locations.push({
130
+ type: "d2",
131
+ start: match.index,
132
+ end: match.index + match[0].length,
133
+ });
134
+ match = d2CodeBlockRegex.exec(content);
135
+ }
136
+
137
+ // 4. Find Mermaid code blocks
138
+ match = mermaidCodeBlockRegex.exec(content);
139
+ while (match !== null) {
140
+ locations.push({
141
+ type: "mermaid",
142
+ start: match.index,
143
+ end: match.index + match[0].length,
144
+ });
145
+ match = mermaidCodeBlockRegex.exec(content);
146
+ }
147
+
148
+ // Sort by position (top to bottom)
149
+ locations.sort((a, b) => a.start - b.start);
150
+
151
+ if (locations.length === 0) {
152
+ return content;
153
+ }
154
+
155
+ // If diagramIndex is provided, only replace that specific diagram
156
+ if (diagramIndex !== undefined && diagramIndex >= 0 && diagramIndex < locations.length) {
157
+ const targetLocation = locations[diagramIndex];
158
+ const before = content.substring(0, targetLocation.start);
159
+ const after = content.substring(targetLocation.end);
160
+ // Add newlines if needed
161
+ let replacement = DIAGRAM_PLACEHOLDER;
162
+ if (before && !before.endsWith("\n")) {
163
+ replacement = `\n${replacement}`;
164
+ }
165
+ if (after && !after.startsWith("\n")) {
166
+ replacement = `${replacement}\n`;
167
+ }
168
+ return before + replacement + after;
169
+ }
170
+
171
+ // Replace all diagrams with placeholder (for deletion)
172
+ // Process from end to start to preserve indices
173
+ let result = content;
174
+ for (let i = locations.length - 1; i >= 0; i--) {
175
+ const location = locations[i];
176
+ const before = result.substring(0, location.start);
177
+ const after = result.substring(location.end);
178
+ // Add newlines if needed
179
+ let replacement = DIAGRAM_PLACEHOLDER;
180
+ if (before && !before.endsWith("\n")) {
181
+ replacement = `\n${replacement}`;
182
+ }
183
+ if (after && !after.startsWith("\n")) {
184
+ replacement = `${replacement}\n`;
185
+ }
186
+ result = before + replacement + after;
187
+ }
188
+
189
+ return result;
190
+ }
@@ -0,0 +1,103 @@
1
+ import { unlink } from "node:fs/promises";
2
+ import { join, dirname, normalize } from "node:path";
3
+ import fs from "fs-extra";
4
+ import { debug } from "./debug.mjs";
5
+
6
+ /**
7
+ * Extract image file paths from markdown content
8
+ * Finds all diagram image references and extracts their file paths
9
+ * @param {string} content - Markdown content
10
+ * @param {string} path - Document path (e.g., "guides/getting-started.md")
11
+ * @param {string} docsDir - Documentation directory
12
+ * @returns {Promise<Array<string>>} Array of absolute paths to image files
13
+ */
14
+ export async function extractDiagramImagePaths(content, path, docsDir) {
15
+ if (!content || !path || !docsDir) {
16
+ return [];
17
+ }
18
+
19
+ const imagePaths = [];
20
+
21
+ // Pattern to match: <!-- DIAGRAM_IMAGE_START:... -->![alt](path)<!-- DIAGRAM_IMAGE_END -->
22
+ const diagramPattern =
23
+ /<!--\s*DIAGRAM_IMAGE_START:[^>]+-->\s*!\[[^\]]*\]\(([^)]+)\)\s*<!--\s*DIAGRAM_IMAGE_END\s*-->/g;
24
+
25
+ diagramPattern.lastIndex = 0; // Reset regex
26
+ let match = diagramPattern.exec(content);
27
+ while (match !== null) {
28
+ const imagePath = match[1];
29
+
30
+ // Resolve absolute path
31
+ // If imagePath is relative, resolve from document location
32
+ // If imagePath is absolute or starts with http, skip
33
+ if (imagePath.startsWith("http://") || imagePath.startsWith("https://")) {
34
+ continue; // Skip remote URLs
35
+ }
36
+
37
+ // Calculate relative path from document to image
38
+ const docDir = dirname(path);
39
+ const imageRelativePath = imagePath.startsWith("../")
40
+ ? imagePath
41
+ : join(docDir, imagePath).replace(/\\/g, "/");
42
+
43
+ // Resolve absolute path
44
+ const absolutePath = join(process.cwd(), docsDir, imageRelativePath);
45
+
46
+ // Normalize path (remove .. and .)
47
+ const normalizedPath = normalize(absolutePath);
48
+
49
+ if (await fs.pathExists(normalizedPath)) {
50
+ imagePaths.push(normalizedPath);
51
+ }
52
+
53
+ match = diagramPattern.exec(content);
54
+ }
55
+
56
+ return imagePaths;
57
+ }
58
+
59
+ /**
60
+ * Delete diagram image files associated with a document
61
+ * @param {string} content - Markdown content (before deletion)
62
+ * @param {string} path - Document path
63
+ * @param {string} docsDir - Documentation directory
64
+ * @returns {Promise<{deleted: number, failed: number}>}
65
+ */
66
+ export async function deleteDiagramImages(content, path, docsDir) {
67
+ if (!content || !path || !docsDir) {
68
+ return { deleted: 0, failed: 0 };
69
+ }
70
+
71
+ try {
72
+ const imagePaths = await extractDiagramImagePaths(content, path, docsDir);
73
+
74
+ if (imagePaths.length === 0) {
75
+ return { deleted: 0, failed: 0 };
76
+ }
77
+
78
+ let deleted = 0;
79
+ let failed = 0;
80
+
81
+ for (const imagePath of imagePaths) {
82
+ try {
83
+ await unlink(imagePath);
84
+ debug(`Deleted diagram image: ${imagePath}`);
85
+ deleted++;
86
+ } catch (error) {
87
+ if (error.code !== "ENOENT") {
88
+ // File not found is ok, other errors should be logged
89
+ console.warn(`Failed to delete diagram image ${imagePath}: ${error.message}`);
90
+ failed++;
91
+ } else {
92
+ // File already doesn't exist, count as deleted
93
+ deleted++;
94
+ }
95
+ }
96
+ }
97
+
98
+ return { deleted, failed };
99
+ } catch (error) {
100
+ console.warn(`Error deleting diagram images: ${error.message}`);
101
+ return { deleted: 0, failed: 0 };
102
+ }
103
+ }
@@ -107,10 +107,43 @@ export async function findItemByPath(documentStructure, docPath, boardId, docsDi
107
107
  * @param {string} fileName - File name to read
108
108
  * @returns {Promise<string|null>} File content or null if failed
109
109
  */
110
+ /**
111
+ * Remove base64 encoded images from markdown content
112
+ * This prevents large binary data from being included in document content
113
+ * Base64 images are completely removed (not replaced with placeholders) because:
114
+ * 1. They significantly increase token usage without providing useful information to LLM
115
+ * 2. Normal image references (file paths) are preserved and should be used instead
116
+ * 3. Base64 images are typically temporary or erroneous entries
117
+ * @param {string} content - Markdown content that may contain base64 images
118
+ * @returns {string} - Content with base64 images completely removed
119
+ */
120
+ function removeBase64Images(content) {
121
+ if (!content || typeof content !== "string") {
122
+ return content;
123
+ }
124
+
125
+ // Match markdown image syntax with data URLs: ![alt](data:image/...;base64,...)
126
+ // This regex matches:
127
+ // - ![alt text](data:image/type;base64,base64data...)
128
+ // - ![alt](data:image/type;base64,base64data...)
129
+ // - [![alt](data:image/type;base64,base64data...)](link)
130
+ const base64ImageRegex = /!\[([^\]]*)\]\(data:image\/[^)]+\)/g;
131
+
132
+ // Completely remove base64 images (including the entire markdown image syntax)
133
+ // This maximizes token reduction while preserving normal image references
134
+ const cleanedContent = content.replace(base64ImageRegex, "");
135
+
136
+ return cleanedContent;
137
+ }
138
+
110
139
  export async function readFileContent(docsDir, fileName) {
111
140
  try {
112
141
  const filePath = join(docsDir, fileName);
113
- return await readFile(filePath, "utf-8");
142
+ const content = await readFile(filePath, "utf-8");
143
+
144
+ // Remove base64 encoded images to reduce token usage
145
+ // Base64 image data is not useful for LLM processing and significantly increases token count
146
+ return removeBase64Images(content);
114
147
  } catch (readError) {
115
148
  console.warn(`⚠️ Could not read content from ${fileName}:`, readError.message);
116
149
  return null;
@@ -0,0 +1,75 @@
1
+ import sharp from "sharp";
2
+ import path from "node:path";
3
+ import { debug } from "./debug.mjs";
4
+
5
+ /**
6
+ * Compress an image using sharp
7
+ * Supports JPEG, PNG, and WebP formats
8
+ * @param {string} inputPath - Path to the input image file
9
+ * @param {object} options - Compression options
10
+ * @param {number} options.quality - Compression quality (0-100, default: 80)
11
+ * @param {string} options.outputFormat - Output format: 'jpeg', 'png', 'webp' (default: auto-detect from input)
12
+ * @param {string} options.outputPath - Output path for compressed image (if not provided, creates temp file)
13
+ * @returns {Promise<string>} - Path to the compressed image (outputPath if provided, or temp path, or inputPath if compression fails)
14
+ */
15
+ export async function compressImage(inputPath, options = {}) {
16
+ const { quality = 80, outputFormat, outputPath } = options;
17
+
18
+ try {
19
+ const inputExt = path.extname(inputPath).toLowerCase();
20
+
21
+ // Determine output format
22
+ let format = outputFormat;
23
+ if (!format) {
24
+ // Auto-detect from input extension
25
+ if (inputExt === ".jpg" || inputExt === ".jpeg") {
26
+ format = "jpeg";
27
+ } else if (inputExt === ".png") {
28
+ format = "png";
29
+ } else if (inputExt === ".webp") {
30
+ format = "webp";
31
+ } else {
32
+ // Default to JPEG for unknown formats
33
+ format = "jpeg";
34
+ debug(`Unknown image format ${inputExt}, defaulting to JPEG`);
35
+ }
36
+ }
37
+
38
+ // Determine output path
39
+ let finalOutputPath = outputPath;
40
+ if (!finalOutputPath) {
41
+ // If no output path provided, create temp file in same directory as input
42
+ const outputExt = format === "jpeg" ? ".jpg" : format === "png" ? ".png" : ".webp";
43
+ const inputDir = path.dirname(inputPath);
44
+ const inputBase = path.basename(inputPath, path.extname(inputPath));
45
+ finalOutputPath = path.join(inputDir, `${inputBase}.compressed${outputExt}`);
46
+ }
47
+
48
+ // Create sharp instance and compress
49
+ let sharpInstance = sharp(inputPath);
50
+
51
+ // Apply format-specific compression options
52
+ if (format === "jpeg") {
53
+ // mozjpeg is a valid sharp option for better JPEG compression
54
+ const jpegOptions = { quality, mozjpeg: true };
55
+ sharpInstance = sharpInstance.jpeg(jpegOptions);
56
+ } else if (format === "png") {
57
+ sharpInstance = sharpInstance.png({ quality, compressionLevel: 9 });
58
+ } else if (format === "webp") {
59
+ sharpInstance = sharpInstance.webp({ quality });
60
+ }
61
+
62
+ // Write compressed image directly to output path
63
+ await sharpInstance.toFile(finalOutputPath);
64
+
65
+ debug(
66
+ `✅ Image compressed: ${inputPath} -> ${finalOutputPath} (format: ${format}, quality: ${quality})`,
67
+ );
68
+
69
+ return finalOutputPath;
70
+ } catch (error) {
71
+ debug(`⚠️ Failed to compress image ${inputPath}: ${error.message}`);
72
+ // Return original path if compression fails
73
+ return inputPath;
74
+ }
75
+ }
@@ -15,6 +15,7 @@ import {
15
15
  TMP_DIR,
16
16
  } from "./constants/index.mjs";
17
17
  import { getContentHash } from "./utils.mjs";
18
+ import { d2CodeBlockRegex } from "./d2-utils.mjs";
18
19
 
19
20
  const debug = Debug("doc-smith");
20
21
 
@@ -59,11 +60,9 @@ export async function saveD2Assets({ markdown, docsDir }) {
59
60
  return markdown;
60
61
  }
61
62
 
62
- const codeBlockRegex = /```d2.*\n([\s\S]*?)```/g;
63
-
64
63
  const { replaced } = await runIterator({
65
64
  input: markdown,
66
- regexp: codeBlockRegex,
65
+ regexp: d2CodeBlockRegex,
67
66
  replace: true,
68
67
  fn: async ([_match, _code]) => {
69
68
  const assetDir = path.join(docsDir, "../", TMP_ASSETS_DIR, "d2");