@aigne/doc-smith 0.8.15-beta.1 → 0.8.15-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/CHANGELOG.md +89 -0
  2. package/agents/clear/choose-contents.mjs +4 -4
  3. package/agents/clear/clear-auth-tokens.mjs +8 -8
  4. package/agents/clear/clear-deployment-config.mjs +2 -2
  5. package/agents/clear/clear-document-config.mjs +3 -3
  6. package/agents/clear/clear-document-structure.mjs +10 -10
  7. package/agents/clear/clear-generated-docs.mjs +103 -14
  8. package/agents/clear/clear-media-description.mjs +7 -7
  9. package/agents/evaluate/document-structure.yaml +3 -1
  10. package/agents/evaluate/document.yaml +3 -1
  11. package/agents/evaluate/index.yaml +1 -3
  12. package/agents/generate/check-diagram.mjs +1 -1
  13. package/agents/generate/check-need-generate-structure.mjs +2 -7
  14. package/agents/generate/draw-diagram.yaml +4 -0
  15. package/agents/generate/generate-structure.yaml +117 -65
  16. package/agents/generate/index.yaml +3 -3
  17. package/agents/generate/{merge-d2-diagram.yaml → merge-diagram.yaml} +7 -6
  18. package/agents/generate/update-document-structure.yaml +1 -1
  19. package/agents/generate/user-review-document-structure.mjs +3 -25
  20. package/agents/generate/utils/merge-document-structures.mjs +30 -0
  21. package/agents/init/check.mjs +4 -2
  22. package/agents/init/index.mjs +37 -7
  23. package/agents/media/load-media-description.mjs +12 -24
  24. package/agents/publish/publish-docs.mjs +3 -8
  25. package/agents/schema/document-execution-structure.yaml +1 -1
  26. package/agents/schema/document-structure-item.yaml +23 -0
  27. package/agents/schema/document-structure-refine-item.yaml +20 -0
  28. package/agents/schema/document-structure.yaml +1 -1
  29. package/agents/translate/index.yaml +1 -4
  30. package/agents/translate/record-translation-history.mjs +6 -2
  31. package/agents/translate/translate-multilingual.yaml +1 -1
  32. package/agents/update/batch-generate-document.yaml +1 -1
  33. package/agents/update/batch-update-document.yaml +1 -1
  34. package/agents/update/check-document.mjs +35 -13
  35. package/agents/update/check-generate-diagram.mjs +29 -0
  36. package/agents/update/generate-diagram.yaml +29 -0
  37. package/agents/update/generate-document.yaml +17 -30
  38. package/agents/update/handle-document-update.yaml +10 -1
  39. package/agents/update/save-and-translate-document.mjs +18 -47
  40. package/agents/update/update-document-detail.yaml +2 -1
  41. package/agents/update/update-single-document.yaml +1 -1
  42. package/agents/update/user-review-document.mjs +6 -5
  43. package/agents/utils/choose-docs.mjs +16 -5
  44. package/agents/utils/find-item-by-path.mjs +4 -2
  45. package/agents/utils/load-sources.mjs +63 -46
  46. package/agents/utils/{save-docs.mjs → post-generate.mjs} +2 -51
  47. package/agents/utils/save-doc-translation.mjs +27 -0
  48. package/agents/utils/{save-single-doc.mjs → save-doc.mjs} +17 -12
  49. package/agents/utils/save-sidebar.mjs +38 -0
  50. package/agents/utils/{transform-detail-datasources.mjs → transform-detail-data-sources.mjs} +7 -7
  51. package/aigne.yaml +16 -8
  52. package/package.json +3 -1
  53. package/prompts/common/document/content-rules-core.md +6 -6
  54. package/prompts/common/document/media-file-list-usage-rules.md +12 -0
  55. package/prompts/common/document/openapi-usage-rules.md +36 -0
  56. package/prompts/common/document/role-and-personality.md +1 -2
  57. package/prompts/common/document-structure/conflict-resolution-guidance.md +2 -2
  58. package/prompts/common/document-structure/document-structure-rules.md +8 -8
  59. package/prompts/common/document-structure/output-constraints.md +3 -3
  60. package/prompts/detail/custom/custom-components.md +38 -3
  61. package/prompts/detail/d2-diagram/rules.md +11 -14
  62. package/prompts/detail/d2-diagram/system-prompt.md +0 -14
  63. package/prompts/detail/d2-diagram/user-prompt.md +39 -0
  64. package/prompts/detail/generate/document-rules.md +3 -3
  65. package/prompts/detail/generate/system-prompt.md +2 -6
  66. package/prompts/detail/generate/user-prompt.md +20 -61
  67. package/prompts/detail/update/system-prompt.md +2 -6
  68. package/prompts/detail/update/user-prompt.md +7 -6
  69. package/prompts/evaluate/document.md +0 -4
  70. package/prompts/structure/check-document-structure.md +4 -4
  71. package/prompts/structure/generate/system-prompt.md +0 -31
  72. package/prompts/structure/generate/user-prompt.md +73 -29
  73. package/prompts/structure/review/structure-review-system.md +81 -0
  74. package/prompts/structure/update/system-prompt.md +1 -1
  75. package/prompts/structure/update/user-prompt.md +4 -4
  76. package/prompts/translate/code-block.md +13 -3
  77. package/prompts/translate/translate-document.md +3 -3
  78. package/types/document-structure-schema.mjs +3 -3
  79. package/utils/constants/index.mjs +6 -0
  80. package/utils/docs-finder-utils.mjs +85 -3
  81. package/utils/extract-api.mjs +32 -0
  82. package/utils/file-utils.mjs +153 -101
  83. package/utils/history-utils.mjs +20 -8
  84. package/utils/load-config.mjs +20 -1
  85. package/utils/markdown-checker.mjs +35 -1
  86. package/utils/utils.mjs +67 -65
  87. package/agents/generate/document-structure-tools/generate-sub-structure.mjs +0 -131
  88. package/agents/generate/generate-structure-without-tools.yaml +0 -65
  89. package/prompts/common/document/media-handling-rules.md +0 -9
@@ -0,0 +1,81 @@
1
+ <role_and_goal>
2
+ You are a **Documentation Structure Refiner** with the analytical mindset of an **INTJ (The Architect)**. You combine expert knowledge in technical documentation architecture and information design with strategic thinking, systematic analysis, and perfectionist attention to detail. Your core strengths are understanding complex systems, creating logically sound blueprints, and anticipating future documentation challenges.
3
+ </role_and_goal>
4
+
5
+ <document_info>
6
+ projectName: |
7
+ {{projectName}}
8
+ {% if projectDesc %}
9
+ projectDesc: |
10
+ {{projectDesc}}
11
+ {% endif %}
12
+ </document_info>
13
+
14
+ <document_structure>
15
+ {{ documentStructure | yaml.stringify }}
16
+ </document_structure>
17
+
18
+ <instructions>
19
+
20
+ Your task:
21
+ Given an existing document structure (a JSON array or tree of sections), refine and optimize its **hierarchy and order** to improve clarity, usability, and conventional organization.
22
+ ️ You must not add or rename any nodes. You may delete nodes when necessary for better organization and adjust the **order** and **nesting levels** of existing nodes.
23
+
24
+ ---
25
+
26
+ ## Optimization Goals
27
+
28
+ 1. **Logical Order**
29
+ - Introductory materials should always appear at the beginning:
30
+ - “Overview”, “Introduction”, “Quick Start”, “Getting Started”, “Setup” should be near the top.
31
+ - Meta and community-related sections (e.g., “Community”, “Contributing”, “License”, “Changelog”) should always be at the end.
32
+ - Technical reference and configuration sections should appear after conceptual and usage sections.
33
+
34
+ 2. **Hierarchy Correction**
35
+ - Ensure proper depth:
36
+ - “Overview” and “Quick Start” should have **1–2 levels max**.
37
+ - Remove deeply nested technical details from “Overview” or “Quick Start”.
38
+ - Relocate such details under “Architecture”, “API Reference”, or “Modules”.
39
+ - Keep beneficial nodes — you may delete duplicated, redundant, or harmful nodes when needed for clarity.
40
+
41
+ 3. **Grouping and Alignment**
42
+ - Align similar nodes logically (e.g., group “Usage”, “Examples”, “Tutorials” together).
43
+ - Avoid duplication or overlap by reordering or strategic deletion when necessary.
44
+
45
+ 4. **Naming and Identity**
46
+ - You are **not allowed to rename or reword** any section titles or descriptions.
47
+ - Keep all existing keys, identifiers, and text intact.
48
+
49
+ 5. **Balance**
50
+ - Maintain a clean, well-organized hierarchy.
51
+ - Keep top-level nodes concise (≤ 8 preferred).
52
+ - Avoid over-nesting (≤ 4 levels deep).
53
+
54
+ ---
55
+
56
+ ## Behavior Rules
57
+
58
+ - Do **not** add new nodes.
59
+ - You **may** delete nodes when they are redundant, duplicated, or detrimental to documentation clarity.
60
+ - Do **not** rename or rewrite content.
61
+ - You **may** move nodes to different parents or reorder siblings to achieve better logical flow.
62
+ - You **must** maintain structural integrity for all remaining nodes.
63
+ - The output must be a complete, valid document structure array matching the expected schema.
64
+
65
+ ---
66
+
67
+ ## Objective
68
+
69
+ Output a complete `structures` array containing the optimized document structure:
70
+ 1. Include ALL nodes from the input structure (whether modified or not)
71
+ 2. Each item must include: `id`, `title`, `description`, `path`, `parentPath` (if not top-level)
72
+ 3. Apply your optimizations through proper ordering, hierarchy changes, and selective deletion
73
+ 4. Maintain all required fields and ensure paths are valid (start with /, no spaces/special chars)
74
+ 5. **Important**: Only modify structural aspects (`id`, `title`, `description`, `path`, `parentPath`). Do NOT modify `sourceIds` or other data fields
75
+
76
+ **Optimization Approach:**
77
+ - Reorder nodes by adjusting their position in the array
78
+ - Change hierarchy by modifying `parentPath` values (use the path of the new parent node)
79
+ - Delete problematic nodes by simply omitting them from the output array
80
+ - Keep beneficial nodes with their original content intact
81
+ </instructions>
@@ -99,7 +99,7 @@ Analyze the user feedback to determine the intended operation:
99
99
 
100
100
  When to use Tools:
101
101
  - During document structure update, if the given context is missing or lacks referenced content, use glob/grep/readFile to obtain more context
102
- - When sourceIds or file content from <file_list> is needed but not provided in DataSources, use readFile to read the file content
102
+ - When sourceIds or file content from `<file_list>` is needed but not provided in `<data_sources>`, use readFile to read the file content
103
103
  </file_tool_usage>
104
104
 
105
105
 
@@ -6,9 +6,9 @@
6
6
  {{allFilesPaths}}
7
7
  </file_list>
8
8
 
9
- <datasources>
10
- {{ datasources }}
11
- </datasources>
9
+ <data_sources>
10
+ {{ dataSourceChunk }}
11
+ </data_sources>
12
12
 
13
13
 
14
14
  Initial Documentation Structure:
@@ -38,5 +38,5 @@ Processing workflow:
38
38
 
39
39
  Rules:
40
40
  ** All changes must be made using Tools. **
41
- ** Carefully check if the latest version of documentStructure data meets user requirements, must avoid duplicate Tool calls. **
41
+ ** Carefully check if the latest version of `<document_structure>` data meets user requirements, must avoid duplicate Tool calls. **
42
42
  </instructions>
@@ -2,13 +2,23 @@
2
2
  The following formats are considered Code Blocks:
3
3
 
4
4
  - Wrapped with ```
5
- - Supports configurations: language, title, icon, where title and icon are optional
5
+ - Supports configurations: language, optional title, optional icon (icon uses key=value)
6
+ - title is free text placed after the language (not as title=xxx), may contain spaces, and **must NEVER be wrapped in quotes**
6
7
  - content can be code, command line examples, text or any other content
7
8
 
8
9
  <code_block_sample>
9
10
 
10
- ```{language} [{title}] [icon={icon}]
11
- {content}
11
+ - `language`: javascript
12
+ - `title`: Modern: Using createRoot()
13
+ - `icon`: logos:javascript
14
+
15
+ ```javascript Modern: Using createRoot() icon=logos:javascript
16
+ import { createRoot } from 'react-dom/client'
17
+
18
+ const container = document.getElementById('root')
19
+ const root = createRoot(container)
20
+
21
+ root.unmount()
12
22
  ```
13
23
 
14
24
  </code_block_sample>
@@ -5,10 +5,10 @@ You are an **Elite Polyglot Localization and Translation Specialist** with exten
5
5
  Core Mandates:
6
6
 
7
7
  1. Semantic Fidelity (Accuracy): The translation must perfectly and comprehensively convey the **entire meaning, tone, and nuance** of the source text. **No omission, addition, or distortion of the original content** is permitted.
8
- 2. Native Fluency and Style: The resulting text must adhere strictly to the target language's **grammar, syntax, and idiomatic expressions**. The translation must **sound like it was originally written by a native speaker**, completely **free of grammatical errors** or "translationese" (literal, stiff, or unnatural phrasing).
8
+ 2. Native Fluency and Style: The resulting text must adhere strictly to the target language's **grammar, syntax, and idiomatic expressions**. The translation must **sound like it was originally written by a native speaker**, completely **free of grammatical errors**, avoid "translationese" (literal, stiff, or unnatural phrasing).
9
9
  3. Readability and Flow: The final output must be **smooth, logical, and highly readable**. Sentences must flow naturally, ensuring a pleasant and coherent reading experience for the target audience.
10
10
  4. Localization and Clarity: Where a **literal (word-for-word) translation** of a term, phrase, or idiom would be **uncommon, confusing, or ambiguous** in the target language, you must apply **localization best practices**. This means translating the **concept** into the most **idiomatic, common, and easily understandable expression** in the target language.
11
- 5. Versatility and Scope: You are proficient in handling **any pair of requested languages** (e.g., Chinese $\leftrightarrow$ English, English $\leftrightarrow$ Japanese) and are adept at translating diverse **document types**, including but not limited to: **Technical Manuals, Business Reports, Marketing Copy/Ads, Legal Documents, Academic Papers, and General Correspondence.**
11
+ 5. Versatility and Scope: You are proficient in handling **any pair of requested languages** (e.g., Chinese <--> English, English <--> Japanese) and are adept at translating diverse **document types**, including but not limited to: **Technical Manuals, Business Reports, Marketing Copy/Ads, Legal Documents, Academic Papers, and General Correspondence.**
12
12
 
13
13
  </role_and_goal>
14
14
 
@@ -299,5 +299,5 @@ Original text as follows:
299
299
  </content>
300
300
 
301
301
  <output_constraints>
302
- Please **accurately** translate the content within <content> tags (excluding the outermost <content> tags) into **{{ language }}**, strictly following the translation requirements.
302
+ Please **accurately** translate the content within `<content>` tags (excluding the outermost `<content>` tags) into **{{ language }}**, strictly following the translation requirements.
303
303
  </output_constraints>
@@ -6,7 +6,7 @@ export const documentItemSchema = z.object({
6
6
  title: z.string().min(1, "Title is required"),
7
7
  description: z.string().min(1, "Description is required"),
8
8
  path: z.string().startsWith("/", 'Path must start with "/"'),
9
- parentId: z.string().nullable(),
9
+ parentId: z.string().nullish(),
10
10
  sourceIds: z.array(z.string()).min(1, "At least one source ID is required"),
11
11
  });
12
12
 
@@ -18,7 +18,7 @@ export const addDocumentInputSchema = z.object({
18
18
  title: z.string().min(1, "Title is required"),
19
19
  description: z.string().min(1, "Description is required"),
20
20
  path: z.string().startsWith("/", 'Path must start with "/"'),
21
- parentId: z.string().nullable().optional(),
21
+ parentId: z.string().nullish(),
22
22
  sourceIds: z.array(z.string()).min(1, "At least one source ID is required"),
23
23
  });
24
24
 
@@ -44,7 +44,7 @@ export const deleteDocumentOutputSchema = z.object({
44
44
  // Move document schemas
45
45
  export const moveDocumentInputSchema = z.object({
46
46
  path: z.string().min(1, "Path is required"),
47
- newParentId: z.string().nullable().optional(),
47
+ newParentId: z.string().nullish(),
48
48
  });
49
49
 
50
50
  export const moveDocumentOutputSchema = z.object({
@@ -549,3 +549,9 @@ export const DOC_SMITH_DIR = ".aigne/doc-smith";
549
549
  export const TMP_DIR = ".tmp";
550
550
  export const TMP_DOCS_DIR = "docs";
551
551
  export const TMP_ASSETS_DIR = "assets";
552
+
553
+ export const DOC_ACTION = {
554
+ translate: "translate",
555
+ update: "update",
556
+ clear: "clear",
557
+ };
@@ -1,14 +1,14 @@
1
1
  import { access, readdir, readFile } from "node:fs/promises";
2
2
  import { join } from "node:path";
3
+ import { pathExists } from "./file-utils.mjs";
3
4
 
4
5
  /**
5
6
  * Get action-specific text based on isTranslate flag
6
- * @param {boolean} isTranslate - Whether this is a translation action
7
7
  * @param {string} baseText - Base text template with {action} placeholder
8
+ * @param {string} action - doc action type
8
9
  * @returns {string} Text with action replaced
9
10
  */
10
- export function getActionText(isTranslate, baseText) {
11
- const action = isTranslate ? "translate" : "update";
11
+ export function getActionText(baseText, action) {
12
12
  return baseText.replace("{action}", action);
13
13
  }
14
14
 
@@ -276,3 +276,85 @@ export function addFeedbackToItems(items, feedback) {
276
276
  feedback: feedback.trim(),
277
277
  }));
278
278
  }
279
+
280
+ /**
281
+ * Load document execution structure from structure-plan.json
282
+ * @param {string} outputDir - Output directory containing structure-plan.json
283
+ * @returns {Promise<Array|null>} Document execution structure array or null if not found/failed
284
+ */
285
+ export async function loadDocumentStructure(outputDir) {
286
+ if (!outputDir) {
287
+ return null;
288
+ }
289
+
290
+ try {
291
+ const structurePlanPath = join(outputDir, "structure-plan.json");
292
+ const structureExists = await pathExists(structurePlanPath);
293
+
294
+ if (!structureExists) {
295
+ return null;
296
+ }
297
+
298
+ const structureContent = await readFile(structurePlanPath, "utf8");
299
+ if (!structureContent?.trim()) {
300
+ return null;
301
+ }
302
+
303
+ try {
304
+ // Validate that the content looks like JSON before parsing
305
+ const trimmedContent = structureContent.trim();
306
+ if (!trimmedContent.startsWith("[") && !trimmedContent.startsWith("{")) {
307
+ console.warn("structure-plan.json contains non-JSON content, skipping parse");
308
+ return null;
309
+ }
310
+
311
+ const parsed = JSON.parse(structureContent);
312
+ // Return array if it's an array, otherwise return null
313
+ return Array.isArray(parsed) ? parsed : null;
314
+ } catch (parseError) {
315
+ console.error(`Failed to parse structure-plan.json: ${parseError.message}`);
316
+ return null;
317
+ }
318
+ } catch (readError) {
319
+ // Only warn if it's not a "file not found" error
320
+ if (readError.code !== "ENOENT") {
321
+ console.warn(`Error reading structure-plan.json: ${readError.message}`);
322
+ }
323
+ return null;
324
+ }
325
+ }
326
+
327
+ /**
328
+ * Build a tree structure from a flat document structure array using parentId
329
+ * @param {Array} documentStructure - Flat array of document structure items with path and parentId
330
+ * @returns {Object} Object containing rootNodes (array of root nodes) and nodeMap (Map for lookups)
331
+ */
332
+ export function buildDocumentTree(documentStructure) {
333
+ // Create a map of nodes for easy lookup
334
+ const nodeMap = new Map();
335
+ const rootNodes = [];
336
+
337
+ // First pass: create node map
338
+ documentStructure.forEach((node) => {
339
+ nodeMap.set(node.path, {
340
+ ...node,
341
+ children: [],
342
+ });
343
+ });
344
+
345
+ // Build the tree structure using parentId
346
+ documentStructure.forEach((node) => {
347
+ if (node.parentId) {
348
+ const parent = nodeMap.get(node.parentId);
349
+ if (parent) {
350
+ parent.children.push(nodeMap.get(node.path));
351
+ } else {
352
+ rootNodes.push(nodeMap.get(node.path));
353
+ }
354
+ } else {
355
+ rootNodes.push(nodeMap.get(node.path));
356
+ }
357
+ });
358
+
359
+ return { rootNodes, nodeMap };
360
+ }
@@ -0,0 +1,32 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { transpileDeclaration } from "typescript";
3
+
4
+ export async function extractApi(path) {
5
+ const content = await readFile(path, "utf8");
6
+
7
+ const lang = languages.find((lang) => lang.match(path, content));
8
+ if (lang) {
9
+ return lang.extract(path, content);
10
+ }
11
+
12
+ return content;
13
+ }
14
+
15
+ const languages = [
16
+ {
17
+ match: (path) => /\.m?(js|ts)x?$/.test(path),
18
+ extract: extractJsApi,
19
+ },
20
+ ];
21
+
22
+ async function extractJsApi(_path, content) {
23
+ const res = transpileDeclaration(content, {
24
+ compilerOptions: {
25
+ declaration: true,
26
+ emitDeclarationOnly: true,
27
+ allowJs: true,
28
+ },
29
+ });
30
+
31
+ return res.outputText.trim();
32
+ }
@@ -11,8 +11,9 @@ import { gunzipSync } from "node:zlib";
11
11
 
12
12
  import { debug } from "./debug.mjs";
13
13
  import { isGlobPattern } from "./utils.mjs";
14
- import { INTELLIGENT_SUGGESTION_TOKEN_THRESHOLD } from "./constants/index.mjs";
15
14
  import { uploadFiles } from "./upload-files.mjs";
15
+ import { extractApi } from "./extract-api.mjs";
16
+ import { minimatch } from "minimatch";
16
17
 
17
18
  /**
18
19
  * Check if a directory is inside a git repository using git command
@@ -286,7 +287,7 @@ export async function loadFilesFromPaths(sourcesPath, options = {}) {
286
287
  continue;
287
288
  }
288
289
 
289
- if (checkIsRemoteFile(dir)) {
290
+ if (isRemoteFile(dir)) {
290
291
  allFiles.push(dir);
291
292
  continue;
292
293
  }
@@ -387,8 +388,8 @@ export async function loadFilesFromPaths(sourcesPath, options = {}) {
387
388
  * @returns {Promise<boolean>} True if file appears to be a text file
388
389
  */
389
390
  async function isTextFile(filePath) {
390
- if (checkIsRemoteFile(filePath)) {
391
- return checkIsHttpTextFile(filePath);
391
+ if (isRemoteFile(filePath)) {
392
+ return isRemoteTextFile(filePath);
392
393
  }
393
394
 
394
395
  try {
@@ -400,14 +401,42 @@ async function isTextFile(filePath) {
400
401
  }
401
402
  }
402
403
 
403
- export function checkIsRemoteFile(filepath) {
404
- if (filepath.startsWith("http://") || filepath.startsWith("https://")) {
405
- return true;
404
+ /**
405
+ * Check if a string is an HTTP/HTTPS URL
406
+ * @param {string} fileUrl - The string to check
407
+ * @returns {boolean} - True if the string starts with http:// or https://
408
+ */
409
+ export function isRemoteFile(fileUrl) {
410
+ if (typeof fileUrl !== "string") return false;
411
+
412
+ try {
413
+ const url = new URL(fileUrl);
414
+ // Only accept http and https url
415
+ if (["http:", "https:"].includes(url.protocol)) {
416
+ return true;
417
+ }
418
+ // other protocol will be treated as bad url
419
+ return false;
420
+ } catch {
421
+ return false;
406
422
  }
407
- return false;
408
423
  }
409
424
 
410
- export async function checkIsHttpTextFile(fileUrl) {
425
+ export async function isRemoteFileAvailable(fileUrl) {
426
+ if (!isRemoteFile(fileUrl)) return false;
427
+
428
+ try {
429
+ const res = await fetch(fileUrl, {
430
+ method: "HEAD",
431
+ });
432
+ return res.ok;
433
+ } catch (error) {
434
+ debug(`Failed to check HTTP file availability: ${fileUrl} - ${error.message}`);
435
+ return false;
436
+ }
437
+ }
438
+
439
+ export async function isRemoteTextFile(fileUrl) {
411
440
  try {
412
441
  const res = await fetch(fileUrl, {
413
442
  method: "HEAD",
@@ -435,14 +464,14 @@ export async function checkIsHttpTextFile(fileUrl) {
435
464
  }
436
465
  }
437
466
 
438
- export async function getHttpFileContent(file) {
439
- if (!file) return null;
467
+ export async function getRemoteFileContent(fileUrl) {
468
+ if (!fileUrl) return null;
440
469
  try {
441
- const res = await fetch(file);
470
+ const res = await fetch(fileUrl);
442
471
  const text = await res.text();
443
472
  return text;
444
473
  } catch (error) {
445
- debug(`Failed to fetch HTTP file content: ${file} - ${error.message}`);
474
+ debug(`Failed to fetch HTTP file content: ${fileUrl} - ${error.message}`);
446
475
  return null;
447
476
  }
448
477
  }
@@ -469,8 +498,8 @@ export async function readFileContents(files, baseDir = process.cwd(), options =
469
498
  }
470
499
 
471
500
  try {
472
- if (checkIsRemoteFile(file)) {
473
- const content = await getHttpFileContent(file);
501
+ if (isRemoteFile(file)) {
502
+ const content = await getRemoteFileContent(file);
474
503
  if (content) {
475
504
  return {
476
505
  sourceId: file,
@@ -480,7 +509,9 @@ export async function readFileContents(files, baseDir = process.cwd(), options =
480
509
 
481
510
  return null;
482
511
  } else {
483
- const content = await readFile(file, "utf8");
512
+ const content = await extractApi(file);
513
+ if (!content) return null;
514
+
484
515
  const relativePath = path.relative(baseDir, file);
485
516
  return {
486
517
  sourceId: relativePath,
@@ -499,6 +530,11 @@ export async function readFileContents(files, baseDir = process.cwd(), options =
499
530
  return results.filter((result) => result !== null);
500
531
  }
501
532
 
533
+ export function calculateTokens(text) {
534
+ const tokens = encode(text);
535
+ return tokens.length;
536
+ }
537
+
502
538
  /**
503
539
  * Calculate total lines and tokens from file contents
504
540
  * @param {Array<{content: string}>} sourceFiles - Array of objects containing content property
@@ -524,97 +560,17 @@ export function calculateFileStats(sourceFiles) {
524
560
  }
525
561
 
526
562
  /**
527
- * Build sources content string based on context size
528
- * For large contexts, only include core project files to avoid token limit issues
563
+ * Build sources content string
529
564
  * @param {Array<{sourceId: string, content: string}>} sourceFiles - Array of source file objects
530
- * @param {boolean} isLargeContext - Whether the context is large
531
565
  * @returns {string} Concatenated sources content with sourceId comments
532
566
  */
533
- export function buildSourcesContent(sourceFiles, isLargeContext = false) {
534
- // Define core file patterns that represent project structure and key information
535
- const coreFilePatterns = [
536
- // Configuration files
537
- /package\.json$/,
538
- /tsconfig\.json$/,
539
- /jsconfig\.json$/,
540
- /\.env\.example$/,
541
- /Cargo\.toml$/,
542
- /go\.mod$/,
543
- /pom\.xml$/,
544
- /build\.gradle$/,
545
- /Gemfile$/,
546
- /requirements\.txt$/,
547
- /Pipfile$/,
548
- /composer\.json$/,
549
- /pyproject\.toml$/,
550
-
551
- // Documentation
552
- /README\.md$/i,
553
- /CHANGELOG\.md$/i,
554
- /CONTRIBUTING\.md$/i,
555
- /\.github\/.*\.md$/i,
556
-
557
- // Entry points and main files
558
- /index\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
559
- /main\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
560
- /app\.(js|ts|jsx|tsx|py)$/,
561
- /server\.(js|ts|jsx|tsx|py)$/,
562
-
563
- // API definitions
564
- /api\/.*\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
565
- /routes\/.*\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
566
- /controllers\/.*\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
567
-
568
- // Type definitions and schemas
569
- /types\.(ts|d\.ts)$/,
570
- /schema\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
571
- /.*\.d\.ts$/,
572
-
573
- // Core utilities
574
- /utils\/.*\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
575
- /lib\/.*\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
576
- /helpers\/.*\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
577
- ];
578
-
579
- // Function to check if a file is a core file
580
- const isCoreFile = (filePath) => {
581
- return coreFilePatterns.some((pattern) => pattern.test(filePath));
582
- };
583
-
567
+ export function buildSourcesContent(sourceFiles) {
584
568
  // Build sources string
585
569
  let allSources = "";
586
570
 
587
- if (isLargeContext) {
588
- // Only include core files for large contexts
589
- const coreFiles = sourceFiles.filter((source) => isCoreFile(source.sourceId));
590
-
591
- // Determine which files to use and set appropriate message
592
- const filesToInclude = coreFiles.length > 0 ? coreFiles : sourceFiles;
593
- const noteMessage =
594
- coreFiles.length > 0
595
- ? "// Note: Context is large, showing only core project files.\n"
596
- : "// Note: Context is large, showing a sample of files.\n";
597
-
598
- allSources += noteMessage;
599
- let accumulatedTokens = 0;
600
-
601
- for (const source of filesToInclude) {
602
- const fileContent = `// sourceId: ${source.sourceId}\n${source.content}\n`;
603
- const fileTokens = encode(fileContent);
604
-
605
- // Check if adding this file would exceed the token limit
606
- if (accumulatedTokens + fileTokens.length > INTELLIGENT_SUGGESTION_TOKEN_THRESHOLD) {
607
- break;
608
- }
609
-
610
- allSources += fileContent;
611
- accumulatedTokens += fileTokens.length;
612
- }
613
- } else {
614
- // Include all files for normal contexts
615
- for (const source of sourceFiles) {
616
- allSources += `// sourceId: ${source.sourceId}\n${source.content}\n`;
617
- }
571
+ // Include all files for normal contexts
572
+ for (const source of sourceFiles) {
573
+ allSources += `\n// sourceId: ${source.sourceId}\n${source.content}\n`;
618
574
  }
619
575
 
620
576
  return allSources;
@@ -904,3 +860,99 @@ export async function downloadAndUploadImage(imageUrl, docsDir, appUrl, accessTo
904
860
  return { url: imageUrl, downloadFinalPath: null };
905
861
  }
906
862
  }
863
+
864
+ /**
865
+ * Extract the path prefix from a glob pattern until the first glob character
866
+ */
867
+ export function getPathPrefix(pattern) {
868
+ const segments = pattern.split("/");
869
+ const result = [];
870
+
871
+ for (const segment of segments) {
872
+ if (isGlobPattern(segment)) {
873
+ break;
874
+ }
875
+ result.push(segment);
876
+ }
877
+
878
+ return result.join("/") || ".";
879
+ }
880
+
881
+ /**
882
+ * Check if a dir matches any exclude pattern
883
+ */
884
+ export function isDirExcluded(dir, excludePatterns) {
885
+ if (!dir || typeof dir !== "string") {
886
+ return false;
887
+ }
888
+
889
+ let normalizedDir = dir.replace(/\\/g, "/").replace(/^\.\/+/, "");
890
+ normalizedDir = normalizedDir.endsWith("/") ? normalizedDir : `${normalizedDir}/`;
891
+
892
+ for (const excludePattern of excludePatterns) {
893
+ if (minimatch(normalizedDir, excludePattern, { dot: true })) {
894
+ return true;
895
+ }
896
+ }
897
+
898
+ return false;
899
+ }
900
+
901
+ /**
902
+ * Return source paths that would be excluded by exclude patterns (files are skipped, directories use minimatch, glob patterns use path prefix heuristic)
903
+ */
904
+ export async function findInvalidSourcePaths(sourcePaths, excludePatterns) {
905
+ if (!Array.isArray(sourcePaths) || sourcePaths.length === 0) {
906
+ return [];
907
+ }
908
+
909
+ if (!Array.isArray(excludePatterns) || excludePatterns.length === 0) {
910
+ return [];
911
+ }
912
+
913
+ const invalidPaths = [];
914
+
915
+ for (const sourcePath of sourcePaths) {
916
+ if (typeof sourcePath !== "string" || !sourcePath) {
917
+ continue;
918
+ }
919
+
920
+ // Skip paths starting with "!" (exclusion patterns)
921
+ if (sourcePath.startsWith("!")) {
922
+ continue;
923
+ }
924
+
925
+ // Skip remote URLs
926
+ if (isRemoteFile(sourcePath)) {
927
+ continue;
928
+ }
929
+
930
+ // Check glob pattern: use heuristic algorithm
931
+ if (isGlobPattern(sourcePath)) {
932
+ const representativePath = getPathPrefix(sourcePath);
933
+ if (isDirExcluded(representativePath, excludePatterns)) {
934
+ invalidPaths.push(sourcePath);
935
+ }
936
+ continue;
937
+ }
938
+
939
+ try {
940
+ const stats = await stat(sourcePath);
941
+ // Skip file
942
+ if (stats.isFile()) {
943
+ continue;
944
+ }
945
+ // Check dir with minimatch
946
+ if (stats.isDirectory()) {
947
+ if (isDirExcluded(sourcePath, excludePatterns)) {
948
+ invalidPaths.push(sourcePath);
949
+ }
950
+ }
951
+ } catch {
952
+ // Path doesn't exist
953
+ invalidPaths.push(sourcePath);
954
+ }
955
+ }
956
+
957
+ return invalidPaths;
958
+ }