@aigne/doc-smith 0.9.8 → 0.9.9-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/agents/create/aggregate-document-structure.mjs +21 -0
  3. package/agents/create/analyze-diagram-type-llm.yaml +1 -2
  4. package/agents/create/analyze-diagram-type.mjs +160 -2
  5. package/agents/create/generate-diagram-image.yaml +31 -0
  6. package/agents/create/generate-structure.yaml +1 -12
  7. package/agents/create/replace-d2-with-image.mjs +12 -27
  8. package/agents/create/utils/merge-document-structures.mjs +9 -3
  9. package/agents/localize/index.yaml +4 -0
  10. package/agents/localize/save-doc-translation-or-skip.mjs +18 -0
  11. package/agents/localize/set-review-content.mjs +58 -0
  12. package/agents/localize/translate-diagram.yaml +62 -0
  13. package/agents/localize/translate-document-wrapper.mjs +34 -0
  14. package/agents/localize/translate-multilingual.yaml +15 -9
  15. package/agents/localize/translate-or-skip-diagram.mjs +52 -0
  16. package/agents/publish/translate-meta.mjs +58 -6
  17. package/agents/update/generate-diagram.yaml +25 -8
  18. package/agents/update/index.yaml +1 -8
  19. package/agents/update/save-and-translate-document.mjs +5 -1
  20. package/agents/update/update-single/update-single-document-detail.mjs +52 -10
  21. package/agents/utils/analyze-feedback-intent.mjs +197 -80
  22. package/agents/utils/check-detail-result.mjs +14 -1
  23. package/agents/utils/choose-docs.mjs +3 -43
  24. package/agents/utils/save-doc-translation.mjs +2 -33
  25. package/agents/utils/save-doc.mjs +3 -37
  26. package/aigne.yaml +2 -2
  27. package/package.json +1 -1
  28. package/prompts/detail/diagram/generate-image-user.md +49 -0
  29. package/utils/d2-utils.mjs +10 -3
  30. package/utils/delete-diagram-images.mjs +3 -3
  31. package/utils/diagram-version-utils.mjs +14 -0
  32. package/utils/file-utils.mjs +40 -5
  33. package/utils/image-compress.mjs +1 -1
  34. package/utils/sync-diagram-to-translations.mjs +3 -3
  35. package/utils/translate-diagram-images.mjs +790 -0
  36. package/agents/update/check-sync-image-flag.mjs +0 -55
  37. package/agents/update/sync-images-and-exit.mjs +0 -148
@@ -0,0 +1,14 @@
1
+ import fs from "fs-extra";
2
+
3
+ /**
4
+ * Calculate timestamp for diagram image
5
+ * Uses file modification time (mtime) as the version identifier
6
+ * @param {string} imagePath - Absolute path to the image file
7
+ * @returns {Promise<string>} Unix timestamp in seconds (as string)
8
+ */
9
+ export async function calculateImageTimestamp(imagePath) {
10
+ const stats = await fs.stat(imagePath);
11
+ // Use modification time, convert to Unix timestamp (seconds)
12
+ const timestamp = Math.floor(stats.mtimeMs / 1000);
13
+ return timestamp.toString();
14
+ }
@@ -530,9 +530,38 @@ export async function readFileContents(files, baseDir = process.cwd(), options =
530
530
  return results.filter((result) => result !== null);
531
531
  }
532
532
 
533
+ /**
534
+ * Sanitize text by removing or escaping disallowed LLM special tokens
535
+ * This prevents errors when encoding text that contains special tokens like <|endoftext|>
536
+ * @param {string} text - Text to sanitize
537
+ * @returns {string} Sanitized text safe for tokenization
538
+ */
539
+ function sanitizeForTokenization(text) {
540
+ if (typeof text !== "string") return text;
541
+ // Replace <|endoftext|> with a safe alternative that won't trigger special token parsing
542
+ // We replace it with a space-separated version to prevent tokenizer from recognizing it as a special token
543
+ return text.replace(/<\|endoftext\|>/g, "<| endoftext |>");
544
+ }
545
+
533
546
  export function calculateTokens(text) {
534
- const tokens = encode(text);
535
- return tokens.length;
547
+ try {
548
+ // Sanitize text before encoding to avoid errors with special tokens
549
+ const sanitizedText = sanitizeForTokenization(text);
550
+ const tokens = encode(sanitizedText);
551
+ return tokens.length;
552
+ } catch (error) {
553
+ // If encoding still fails, try with more aggressive sanitization
554
+ console.warn(`Token calculation warning: ${error.message}`);
555
+ const fallbackText = sanitizeForTokenization(text).replace(/<\|[^|]+\|>/g, "");
556
+ try {
557
+ const tokens = encode(fallbackText);
558
+ return tokens.length;
559
+ } catch {
560
+ // Last resort: estimate tokens based on character count (rough approximation)
561
+ console.warn(`Token calculation fallback: using character-based estimation`);
562
+ return Math.ceil(fallbackText.length / 4); // Rough estimate: ~4 chars per token
563
+ }
564
+ }
536
565
  }
537
566
 
538
567
  /**
@@ -547,9 +576,15 @@ export function calculateFileStats(sourceFiles) {
547
576
  for (const source of sourceFiles) {
548
577
  const { content } = source;
549
578
  if (content) {
550
- // Count tokens using gpt-tokenizer
551
- const tokens = encode(content);
552
- totalTokens += tokens.length;
579
+ // Count tokens using gpt-tokenizer with sanitization
580
+ try {
581
+ const sanitizedContent = sanitizeForTokenization(content);
582
+ const tokens = encode(sanitizedContent);
583
+ totalTokens += tokens.length;
584
+ } catch {
585
+ // Fallback: use calculateTokens which has its own error handling
586
+ totalTokens += calculateTokens(content);
587
+ }
553
588
 
554
589
  // Count lines (excluding empty lines)
555
590
  totalLines += content.split("\n").filter((line) => line.trim() !== "").length;
@@ -68,7 +68,7 @@ export async function compressImage(inputPath, options = {}) {
68
68
 
69
69
  return finalOutputPath;
70
70
  } catch (error) {
71
- debug(`⚠️ Failed to compress image ${inputPath}: ${error.message}`);
71
+ debug(`❌ Failed to compress image ${inputPath}`, error);
72
72
  // Return original path if compression fails
73
73
  return inputPath;
74
74
  }
@@ -61,12 +61,12 @@ async function findTranslationFiles(docPath, docsDir, locale) {
61
61
  */
62
62
  function extractDiagramImagePaths(content) {
63
63
  const images = [];
64
- const matches = Array.from(content.matchAll(diagramImageWithPathRegex));
64
+ const matches = Array.from((content || "").matchAll(diagramImageWithPathRegex));
65
65
 
66
66
  for (const match of matches) {
67
67
  images.push({
68
- path: match[1],
69
- fullMatch: match[0],
68
+ path: match[4] || "",
69
+ fullMatch: match[0] || "",
70
70
  index: match.index,
71
71
  });
72
72
  }