@aigne/doc-smith 0.8.15-beta.5 → 0.8.15-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/agents/media/load-media-description.mjs +12 -24
- package/package.json +1 -1
- package/prompts/common/document/media-file-list-usage-rules.md +12 -0
- package/prompts/detail/generate/system-prompt.md +2 -0
- package/prompts/detail/generate/user-prompt.md +3 -3
- package/prompts/detail/update/system-prompt.md +2 -0
- package/prompts/detail/update/user-prompt.md +2 -3
- package/utils/file-utils.mjs +24 -3
- package/utils/markdown-checker.mjs +35 -1
- package/prompts/common/document/media-handling-rules.md +0 -9
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.8.15-beta.6](https://github.com/AIGNE-io/aigne-doc-smith/compare/v0.8.15-beta.5...v0.8.15-beta.6) (2025-10-30)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Bug Fixes
|
|
7
|
+
|
|
8
|
+
* ensure document embed image is accessible ([#226](https://github.com/AIGNE-io/aigne-doc-smith/issues/226)) ([47dfc5d](https://github.com/AIGNE-io/aigne-doc-smith/commit/47dfc5d48440f435258c7d4b5629712c7eb886e7))
|
|
9
|
+
|
|
3
10
|
## [0.8.15-beta.5](https://github.com/AIGNE-io/aigne-doc-smith/compare/v0.8.15-beta.4...v0.8.15-beta.5) (2025-10-29)
|
|
4
11
|
|
|
5
12
|
|
|
@@ -163,31 +163,19 @@ export default async function loadMediaDescription(input, options) {
|
|
|
163
163
|
let enhancedAssetsContent = "# Available Media Assets for Documentation\n\n";
|
|
164
164
|
|
|
165
165
|
if (mediaFiles.length > 0) {
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
if (asset.type === "image" || asset.type === "video") {
|
|
176
|
-
const mediaHash = mediaHashMap.get(asset.path);
|
|
177
|
-
const cachedDesc = cache[mediaHash];
|
|
178
|
-
if (cachedDesc?.description) {
|
|
179
|
-
enhancedAssetsContent += ` description: "${cachedDesc.description}"\n`;
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
// Add dimensions for images and videos
|
|
184
|
-
if (asset.width && asset.height) {
|
|
185
|
-
enhancedAssetsContent += ` width: ${asset.width}\n`;
|
|
186
|
-
enhancedAssetsContent += ` height: ${asset.height}\n`;
|
|
166
|
+
const assets = mediaFiles.map((x) => {
|
|
167
|
+
const mediaHash = mediaHashMap.get(x.path);
|
|
168
|
+
const description = cache[mediaHash]?.description;
|
|
169
|
+
const result = {
|
|
170
|
+
name: x.name,
|
|
171
|
+
path: x.path,
|
|
172
|
+
};
|
|
173
|
+
if (description) {
|
|
174
|
+
result.description = description;
|
|
187
175
|
}
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
enhancedAssetsContent +=
|
|
176
|
+
return result;
|
|
177
|
+
});
|
|
178
|
+
enhancedAssetsContent += stringify(assets);
|
|
191
179
|
}
|
|
192
180
|
|
|
193
181
|
return {
|
package/package.json
CHANGED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
<media_file_list_usage_rules>
|
|
2
|
+
|
|
3
|
+
**Usage Workflow**
|
|
4
|
+
1. Read the `<media_file_list>` data and take note of each file's `path` and `description` as references.
|
|
5
|
+
2. Combine those descriptions with the current document's content to decide which images should be used and where they should be inserted.
|
|
6
|
+
3. Confirm that every inserted image path comes from `<media_file_list>`. If a path is missing from that list, replace it with one that is included.
|
|
7
|
+
|
|
8
|
+
**Usage Requirements**
|
|
9
|
+
- Insert images with Markdown syntax: ``.
|
|
10
|
+
- Never invent, reinterpret, fabricate, normalize, or rewrite any media file path under any circumstances.
|
|
11
|
+
|
|
12
|
+
</media_file_list_usage_rules>
|
|
@@ -45,6 +45,8 @@ Custom code block generation rules:
|
|
|
45
45
|
|
|
46
46
|
{% include "../d2-diagram/guide.md" %}
|
|
47
47
|
|
|
48
|
+
{% include "../../common/document/media-file-list-usage-rules.md" %}
|
|
49
|
+
|
|
48
50
|
Tool result usage rules:
|
|
49
51
|
- Only use the `"role": "tool"` result as the datasource for document enhancement.
|
|
50
52
|
- Do not include `"role": "agent"` content in the final output.
|
|
@@ -38,6 +38,8 @@ Custom component optimization rules:
|
|
|
38
38
|
Custom code block optimization rules:
|
|
39
39
|
{% include "../custom/custom-code-block.md" %}
|
|
40
40
|
|
|
41
|
+
{% include "../../common/document/media-file-list-usage-rules.md" %}
|
|
42
|
+
|
|
41
43
|
Diagram generation rules:
|
|
42
44
|
{% include "../d2-diagram/guide.md" %}
|
|
43
45
|
<diagram_generation_rules>
|
package/utils/file-utils.mjs
CHANGED
|
@@ -408,10 +408,31 @@ async function isTextFile(filePath) {
|
|
|
408
408
|
export function isRemoteFile(fileUrl) {
|
|
409
409
|
if (typeof fileUrl !== "string") return false;
|
|
410
410
|
|
|
411
|
-
|
|
412
|
-
|
|
411
|
+
try {
|
|
412
|
+
const url = new URL(fileUrl);
|
|
413
|
+
// Only accept http and https url
|
|
414
|
+
if (["http:", "https:"].includes(url.protocol)) {
|
|
415
|
+
return true;
|
|
416
|
+
}
|
|
417
|
+
// other protocol will be treated as bad url
|
|
418
|
+
return false;
|
|
419
|
+
} catch {
|
|
420
|
+
return false;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
export async function isRemoteFileAvailable(fileUrl) {
|
|
425
|
+
if (!isRemoteFile(fileUrl)) return false;
|
|
426
|
+
|
|
427
|
+
try {
|
|
428
|
+
const res = await fetch(fileUrl, {
|
|
429
|
+
method: "HEAD",
|
|
430
|
+
});
|
|
431
|
+
return res.ok;
|
|
432
|
+
} catch (error) {
|
|
433
|
+
debug(`Failed to check HTTP file availability: ${fileUrl} - ${error.message}`);
|
|
434
|
+
return false;
|
|
413
435
|
}
|
|
414
|
-
return false;
|
|
415
436
|
}
|
|
416
437
|
|
|
417
438
|
export async function isRemoteTextFile(fileUrl) {
|
|
@@ -3,9 +3,12 @@ import path from "node:path";
|
|
|
3
3
|
import remarkGfm from "remark-gfm";
|
|
4
4
|
import remarkLint from "remark-lint";
|
|
5
5
|
import remarkParse from "remark-parse";
|
|
6
|
+
import { isRelative } from "ufo";
|
|
6
7
|
import { unified } from "unified";
|
|
7
8
|
import { visit } from "unist-util-visit";
|
|
8
9
|
import { VFile } from "vfile";
|
|
10
|
+
|
|
11
|
+
import { isRemoteFile, isRemoteFileAvailable } from "./file-utils.mjs";
|
|
9
12
|
import { validateMermaidSyntax } from "./mermaid-validator.mjs";
|
|
10
13
|
|
|
11
14
|
/**
|
|
@@ -232,6 +235,34 @@ function checkLocalImages(markdown, source, errorMessages, markdownFilePath, bas
|
|
|
232
235
|
}
|
|
233
236
|
}
|
|
234
237
|
|
|
238
|
+
async function checkRemoteImages(markdown, source, errorMessages) {
|
|
239
|
+
const imageRegex = /!\[([^\]]*)\]\(([^)]+)\)/g;
|
|
240
|
+
let match;
|
|
241
|
+
|
|
242
|
+
while (true) {
|
|
243
|
+
match = imageRegex.exec(markdown);
|
|
244
|
+
if (match === null) break;
|
|
245
|
+
const imagePath = match[2].trim();
|
|
246
|
+
const altText = match[1];
|
|
247
|
+
|
|
248
|
+
if (isRelative(imagePath)) continue;
|
|
249
|
+
if (imagePath.startsWith("/")) continue;
|
|
250
|
+
|
|
251
|
+
// Skip data URLs
|
|
252
|
+
if (/^data:/.test(imagePath)) continue;
|
|
253
|
+
|
|
254
|
+
if (isRemoteFile(imagePath)) {
|
|
255
|
+
const isAvailable = await isRemoteFileAvailable(imagePath);
|
|
256
|
+
if (isAvailable) continue;
|
|
257
|
+
else {
|
|
258
|
+
errorMessages.push(
|
|
259
|
+
`Found invalid remote image in ${source}:  - only valid media resources can be used`,
|
|
260
|
+
);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
235
266
|
/**
|
|
236
267
|
* Check content structure and formatting issues
|
|
237
268
|
* @param {string} markdown - The markdown content
|
|
@@ -370,7 +401,10 @@ export async function checkMarkdown(markdown, source = "content", options = {})
|
|
|
370
401
|
// 2. Check local images existence
|
|
371
402
|
checkLocalImages(markdown, source, errorMessages, filePath, baseDir);
|
|
372
403
|
|
|
373
|
-
// 3. Check
|
|
404
|
+
// 3. Check remote images existence
|
|
405
|
+
await checkRemoteImages(markdown, source, errorMessages);
|
|
406
|
+
|
|
407
|
+
// 4. Check content structure and formatting issues
|
|
374
408
|
checkContentStructure(markdown, source, errorMessages);
|
|
375
409
|
|
|
376
410
|
// Check mermaid code blocks and other custom validations
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
<media_handling_rules>
|
|
2
|
-
Media resource usage rules:
|
|
3
|
-
|
|
4
|
-
- When DataSources contain media resource files, incorporate them appropriately in the generated content
|
|
5
|
-
- Media resources are provided in markdown format, example: 
|
|
6
|
-
- Display images in markdown format within generated results
|
|
7
|
-
- Based on resource descriptions, place images strategically in contextually relevant positions to enhance the presentation
|
|
8
|
-
- To ensure correct media resource paths, **only use media resources provided in media_list or remote URL media resources**
|
|
9
|
-
</media_handling_rules>
|