@aigne/doc-smith 0.8.15-beta.5 → 0.8.15-beta.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.8.15-beta.6](https://github.com/AIGNE-io/aigne-doc-smith/compare/v0.8.15-beta.5...v0.8.15-beta.6) (2025-10-30)
4
+
5
+
6
+ ### Bug Fixes
7
+
8
+ * ensure document embed image is accessible ([#226](https://github.com/AIGNE-io/aigne-doc-smith/issues/226)) ([47dfc5d](https://github.com/AIGNE-io/aigne-doc-smith/commit/47dfc5d48440f435258c7d4b5629712c7eb886e7))
9
+
3
10
  ## [0.8.15-beta.5](https://github.com/AIGNE-io/aigne-doc-smith/compare/v0.8.15-beta.4...v0.8.15-beta.5) (2025-10-29)
4
11
 
5
12
 
@@ -163,31 +163,19 @@ export default async function loadMediaDescription(input, options) {
163
163
  let enhancedAssetsContent = "# Available Media Assets for Documentation\n\n";
164
164
 
165
165
  if (mediaFiles.length > 0) {
166
- enhancedAssetsContent += "```yaml\n";
167
- enhancedAssetsContent += "assets:\n";
168
-
169
- for (const asset of mediaFiles) {
170
- enhancedAssetsContent += ` - name: "${asset.name}"\n`;
171
- enhancedAssetsContent += ` path: "${asset.path}"\n`;
172
- enhancedAssetsContent += ` type: "${asset.type}"\n`;
173
-
174
- // Add description for images and videos
175
- if (asset.type === "image" || asset.type === "video") {
176
- const mediaHash = mediaHashMap.get(asset.path);
177
- const cachedDesc = cache[mediaHash];
178
- if (cachedDesc?.description) {
179
- enhancedAssetsContent += ` description: "${cachedDesc.description}"\n`;
180
- }
181
- }
182
-
183
- // Add dimensions for images and videos
184
- if (asset.width && asset.height) {
185
- enhancedAssetsContent += ` width: ${asset.width}\n`;
186
- enhancedAssetsContent += ` height: ${asset.height}\n`;
166
+ const assets = mediaFiles.map((x) => {
167
+ const mediaHash = mediaHashMap.get(x.path);
168
+ const description = cache[mediaHash]?.description;
169
+ const result = {
170
+ name: x.name,
171
+ path: x.path,
172
+ };
173
+ if (description) {
174
+ result.description = description;
187
175
  }
188
- }
189
-
190
- enhancedAssetsContent += "```\n";
176
+ return result;
177
+ });
178
+ enhancedAssetsContent += stringify(assets);
191
179
  }
192
180
 
193
181
  return {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aigne/doc-smith",
3
- "version": "0.8.15-beta.5",
3
+ "version": "0.8.15-beta.6",
4
4
  "description": "AI-driven documentation generation tool built on the AIGNE Framework",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -0,0 +1,12 @@
1
+ <media_file_list_usage_rules>
2
+
3
+ **Usage Workflow**
4
+ 1. Read the `<media_file_list>` data and take note of each file's `path` and `description` as references.
5
+ 2. Combine those descriptions with the current document's content to decide which images should be used and where they should be inserted.
6
+ 3. Confirm that every inserted image path comes from `<media_file_list>`. If a path is missing from that list, replace it with one that is included.
7
+
8
+ **Usage Requirements**
9
+ - Insert images with Markdown syntax: `![Descriptive alt text](<path-from-media_file_list>)`.
10
+ - Never invent, reinterpret, fabricate, normalize, or rewrite any media file path under any circumstances.
11
+
12
+ </media_file_list_usage_rules>
@@ -45,6 +45,8 @@ Custom code block generation rules:
45
45
 
46
46
  {% include "../d2-diagram/guide.md" %}
47
47
 
48
+ {% include "../../common/document/media-file-list-usage-rules.md" %}
49
+
48
50
  Tool result usage rules:
49
51
  - Only use the `"role": "tool"` result as the datasource for document enhancement.
50
52
  - Do not include `"role": "agent"` content in the final output.
@@ -19,11 +19,11 @@
19
19
 
20
20
  {{ additionalInformation }}
21
21
 
22
- <media_list>
22
+ <media_file_list>
23
23
  {{ assetsContent }}
24
- </media_list>
24
+ </media_file_list>
25
+
25
26
 
26
- {% include "../../common/document/media-handling-rules.md" %}
27
27
 
28
28
  </datasources>
29
29
 
@@ -38,6 +38,8 @@ Custom component optimization rules:
38
38
  Custom code block optimization rules:
39
39
  {% include "../custom/custom-code-block.md" %}
40
40
 
41
+ {% include "../../common/document/media-file-list-usage-rules.md" %}
42
+
41
43
  Diagram generation rules:
42
44
  {% include "../d2-diagram/guide.md" %}
43
45
  <diagram_generation_rules>
@@ -21,11 +21,10 @@
21
21
 
22
22
  {{ additionalInformation }}
23
23
 
24
- <media_list>
24
+ <media_file_list>
25
25
  {{ assetsContent }}
26
- </media_list>
26
+ </media_file_list>
27
27
 
28
- {% include "../../common/document/media-handling-rules.md" %}
29
28
  </datasources>
30
29
 
31
30
  <user_feedback>
@@ -408,10 +408,31 @@ async function isTextFile(filePath) {
408
408
  export function isRemoteFile(fileUrl) {
409
409
  if (typeof fileUrl !== "string") return false;
410
410
 
411
- if (fileUrl.startsWith("http://") || fileUrl.startsWith("https://")) {
412
- return true;
411
+ try {
412
+ const url = new URL(fileUrl);
413
+ // Only accept http and https url
414
+ if (["http:", "https:"].includes(url.protocol)) {
415
+ return true;
416
+ }
417
+ // other protocol will be treated as bad url
418
+ return false;
419
+ } catch {
420
+ return false;
421
+ }
422
+ }
423
+
424
+ export async function isRemoteFileAvailable(fileUrl) {
425
+ if (!isRemoteFile(fileUrl)) return false;
426
+
427
+ try {
428
+ const res = await fetch(fileUrl, {
429
+ method: "HEAD",
430
+ });
431
+ return res.ok;
432
+ } catch (error) {
433
+ debug(`Failed to check HTTP file availability: ${fileUrl} - ${error.message}`);
434
+ return false;
413
435
  }
414
- return false;
415
436
  }
416
437
 
417
438
  export async function isRemoteTextFile(fileUrl) {
@@ -3,9 +3,12 @@ import path from "node:path";
3
3
  import remarkGfm from "remark-gfm";
4
4
  import remarkLint from "remark-lint";
5
5
  import remarkParse from "remark-parse";
6
+ import { isRelative } from "ufo";
6
7
  import { unified } from "unified";
7
8
  import { visit } from "unist-util-visit";
8
9
  import { VFile } from "vfile";
10
+
11
+ import { isRemoteFile, isRemoteFileAvailable } from "./file-utils.mjs";
9
12
  import { validateMermaidSyntax } from "./mermaid-validator.mjs";
10
13
 
11
14
  /**
@@ -232,6 +235,34 @@ function checkLocalImages(markdown, source, errorMessages, markdownFilePath, bas
232
235
  }
233
236
  }
234
237
 
238
+ async function checkRemoteImages(markdown, source, errorMessages) {
239
+ const imageRegex = /!\[([^\]]*)\]\(([^)]+)\)/g;
240
+ let match;
241
+
242
+ while (true) {
243
+ match = imageRegex.exec(markdown);
244
+ if (match === null) break;
245
+ const imagePath = match[2].trim();
246
+ const altText = match[1];
247
+
248
+ if (isRelative(imagePath)) continue;
249
+ if (imagePath.startsWith("/")) continue;
250
+
251
+ // Skip data URLs
252
+ if (/^data:/.test(imagePath)) continue;
253
+
254
+ if (isRemoteFile(imagePath)) {
255
+ const isAvailable = await isRemoteFileAvailable(imagePath);
256
+ if (isAvailable) continue;
257
+ else {
258
+ errorMessages.push(
259
+ `Found invalid remote image in ${source}: ![${altText}](${imagePath}) - only valid media resources can be used`,
260
+ );
261
+ }
262
+ }
263
+ }
264
+ }
265
+
235
266
  /**
236
267
  * Check content structure and formatting issues
237
268
  * @param {string} markdown - The markdown content
@@ -370,7 +401,10 @@ export async function checkMarkdown(markdown, source = "content", options = {})
370
401
  // 2. Check local images existence
371
402
  checkLocalImages(markdown, source, errorMessages, filePath, baseDir);
372
403
 
373
- // 3. Check content structure and formatting issues
404
+ // 3. Check remote images existence
405
+ await checkRemoteImages(markdown, source, errorMessages);
406
+
407
+ // 4. Check content structure and formatting issues
374
408
  checkContentStructure(markdown, source, errorMessages);
375
409
 
376
410
  // Check mermaid code blocks and other custom validations
@@ -1,9 +0,0 @@
1
- <media_handling_rules>
2
- Media resource usage rules:
3
-
4
- - When DataSources contain media resource files, incorporate them appropriately in the generated content
5
- - Media resources are provided in markdown format, example: ![Resource description](https://xxxx)
6
- - Display images in markdown format within generated results
7
- - Based on resource descriptions, place images strategically in contextually relevant positions to enhance the presentation
8
- - To ensure correct media resource paths, **only use media resources provided in media_list or remote URL media resources**
9
- </media_handling_rules>