npm - @aigne/doc-smith - Versions diffs - 0.9.8 → 0.9.9-beta.1 - Mend

@aigne/doc-smith 0.9.8 → 0.9.9-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/CHANGELOG.md +19 -0
package/agents/create/aggregate-document-structure.mjs +21 -0
package/agents/create/analyze-diagram-type-llm.yaml +1 -2
package/agents/create/analyze-diagram-type.mjs +160 -2
package/agents/create/generate-diagram-image.yaml +31 -0
package/agents/create/generate-structure.yaml +1 -12
package/agents/create/replace-d2-with-image.mjs +12 -27
package/agents/create/utils/merge-document-structures.mjs +9 -3
package/agents/localize/index.yaml +4 -0
package/agents/localize/save-doc-translation-or-skip.mjs +18 -0
package/agents/localize/set-review-content.mjs +58 -0
package/agents/localize/translate-diagram.yaml +62 -0
package/agents/localize/translate-document-wrapper.mjs +34 -0
package/agents/localize/translate-multilingual.yaml +15 -9
package/agents/localize/translate-or-skip-diagram.mjs +52 -0
package/agents/publish/translate-meta.mjs +58 -6
package/agents/update/generate-diagram.yaml +25 -8
package/agents/update/index.yaml +1 -8
package/agents/update/save-and-translate-document.mjs +5 -1
package/agents/update/update-single/update-single-document-detail.mjs +52 -10
package/agents/utils/analyze-feedback-intent.mjs +197 -80
package/agents/utils/check-detail-result.mjs +14 -1
package/agents/utils/choose-docs.mjs +3 -43
package/agents/utils/save-doc-translation.mjs +2 -33
package/agents/utils/save-doc.mjs +3 -37
package/aigne.yaml +2 -2
package/package.json +1 -1
package/prompts/detail/diagram/generate-image-user.md +49 -0
package/utils/d2-utils.mjs +10 -3
package/utils/delete-diagram-images.mjs +3 -3
package/utils/diagram-version-utils.mjs +14 -0
package/utils/file-utils.mjs +40 -5
package/utils/image-compress.mjs +1 -1
package/utils/sync-diagram-to-translations.mjs +3 -3
package/utils/translate-diagram-images.mjs +790 -0
package/agents/update/check-sync-image-flag.mjs +0 -55
package/agents/update/sync-images-and-exit.mjs +0 -148

package/utils/diagram-version-utils.mjs ADDED Viewed

@@ -0,0 +1,14 @@
+import fs from "fs-extra";
+/**
+ * Calculate timestamp for diagram image
+ * Uses file modification time (mtime) as the version identifier
+ * @param {string} imagePath - Absolute path to the image file
+ * @returns {Promise<string>} Unix timestamp in seconds (as string)
+ */
+export async function calculateImageTimestamp(imagePath) {
+  const stats = await fs.stat(imagePath);
+  // Use modification time, convert to Unix timestamp (seconds)
+  const timestamp = Math.floor(stats.mtimeMs / 1000);
+  return timestamp.toString();
+}

package/utils/file-utils.mjs CHANGED Viewed

@@ -530,9 +530,38 @@ export async function readFileContents(files, baseDir = process.cwd(), options =
   return results.filter((result) => result !== null);
 }
+/**
+ * Sanitize text by removing or escaping disallowed LLM special tokens
+ * This prevents errors when encoding text that contains special tokens like <|endoftext|>
+ * @param {string} text - Text to sanitize
+ * @returns {string} Sanitized text safe for tokenization
+ */
+function sanitizeForTokenization(text) {
+  if (typeof text !== "string") return text;
+  // Replace <|endoftext|> with a safe alternative that won't trigger special token parsing
+  // We replace it with a space-separated version to prevent tokenizer from recognizing it as a special token
+  return text.replace(/<\|endoftext\|>/g, "<| endoftext |>");
+}
 export function calculateTokens(text) {
-  const tokens = encode(text);
-  return tokens.length;
+  try {
+    // Sanitize text before encoding to avoid errors with special tokens
+    const sanitizedText = sanitizeForTokenization(text);
+    const tokens = encode(sanitizedText);
+    return tokens.length;
+  } catch (error) {
+    // If encoding still fails, try with more aggressive sanitization
+    console.warn(`Token calculation warning: ${error.message}`);
+    const fallbackText = sanitizeForTokenization(text).replace(/<\|[^|]+\|>/g, "");
+    try {
+      const tokens = encode(fallbackText);
+      return tokens.length;
+    } catch {
+      // Last resort: estimate tokens based on character count (rough approximation)
+      console.warn(`Token calculation fallback: using character-based estimation`);
+      return Math.ceil(fallbackText.length / 4); // Rough estimate: ~4 chars per token
+    }
+  }
 }
 /**
@@ -547,9 +576,15 @@ export function calculateFileStats(sourceFiles) {
   for (const source of sourceFiles) {
     const { content } = source;
     if (content) {
-      // Count tokens using gpt-tokenizer
-      const tokens = encode(content);
-      totalTokens += tokens.length;
+      // Count tokens using gpt-tokenizer with sanitization
+      try {
+        const sanitizedContent = sanitizeForTokenization(content);
+        const tokens = encode(sanitizedContent);
+        totalTokens += tokens.length;
+      } catch {
+        // Fallback: use calculateTokens which has its own error handling
+        totalTokens += calculateTokens(content);
+      }
       // Count lines (excluding empty lines)
       totalLines += content.split("\n").filter((line) => line.trim() !== "").length;

package/utils/image-compress.mjs CHANGED Viewed

@@ -68,7 +68,7 @@ export async function compressImage(inputPath, options = {}) {
     return finalOutputPath;
   } catch (error) {
-    debug(`⚠️  Failed to compress image ${inputPath}: ${error.message}`);
+    debug(`❌ Failed to compress image ${inputPath}`, error);
     // Return original path if compression fails
     return inputPath;
   }

package/utils/sync-diagram-to-translations.mjs CHANGED Viewed

@@ -61,12 +61,12 @@ async function findTranslationFiles(docPath, docsDir, locale) {
  */
 function extractDiagramImagePaths(content) {
   const images = [];
-  const matches = Array.from(content.matchAll(diagramImageWithPathRegex));
+  const matches = Array.from((content || "").matchAll(diagramImageWithPathRegex));
   for (const match of matches) {
     images.push({
-      path: match[1],
-      fullMatch: match[0],
+      path: match[4] || "",
+      fullMatch: match[0] || "",
       index: match.index,
     });
   }