@aigne/doc-smith 0.8.15-beta.7 → 0.8.15-beta.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.8.15-beta.8](https://github.com/AIGNE-io/aigne-doc-smith/compare/v0.8.15-beta.7...v0.8.15-beta.8) (2025-11-01)
4
+
5
+
6
+ ### Features
7
+
8
+ * smarter structure generation with team-based architecture ([#225](https://github.com/AIGNE-io/aigne-doc-smith/issues/225)) ([eb3404a](https://github.com/AIGNE-io/aigne-doc-smith/commit/eb3404a8889364912a077e84688cfcd48d69ef47))
9
+
3
10
  ## [0.8.15-beta.7](https://github.com/AIGNE-io/aigne-doc-smith/compare/v0.8.15-beta.6...v0.8.15-beta.7) (2025-10-31)
4
11
 
5
12
 
@@ -3,7 +3,7 @@ import { getActiveRulesForScope } from "../../utils/preferences-utils.mjs";
3
3
  import { getProjectInfo, loadConfigFromFile, saveValueToConfig } from "../../utils/utils.mjs";
4
4
 
5
5
  export default async function checkNeedGenerateStructure(
6
- { originalDocumentStructure, forceRegenerate, isLargeContext, ...rest },
6
+ { originalDocumentStructure, forceRegenerate, ...rest },
7
7
  options,
8
8
  ) {
9
9
  // Check if originalDocumentStructure is empty and prompt user
@@ -53,11 +53,7 @@ export default async function checkNeedGenerateStructure(
53
53
  };
54
54
  }
55
55
 
56
- // Performance optimization: Using both structured output and tools with the Gemini model can cause redundant calls.
57
- // Only use tools when the context is very large.
58
- const generateStructureAgent = isLargeContext
59
- ? options.context.agents["generateStructure"]
60
- : options.context.agents["generateStructureWithoutTools"];
56
+ const generateStructureAgent = options.context.agents["generateStructure"];
61
57
 
62
58
  const structureRules = getActiveRulesForScope("structure", []);
63
59
  const globalRules = getActiveRulesForScope("global", []);
@@ -72,7 +68,6 @@ export default async function checkNeedGenerateStructure(
72
68
  originalDocumentStructure,
73
69
  userPreferences,
74
70
  feedback: finalFeedback || "",
75
- isLargeContext,
76
71
  });
77
72
 
78
73
  let message = "";
@@ -1,68 +1,162 @@
1
+ type: team
1
2
  name: generateStructure
2
3
  description: Generate the structure and organization of your documentation
3
- instructions:
4
- - role: system
5
- url: ../../prompts/structure/generate/system-prompt.md
6
- - role: user
7
- url: ../../prompts/structure/generate/user-prompt.md
8
4
  skills:
9
- - ./document-structure-tools/generate-sub-structure.mjs
10
- task_render_mode: collapse
11
- task_title: Generate the structure of the documentation
12
- tool_calls_concurrency: 5
13
- input_schema:
14
- type: object
15
- properties:
16
- rules:
17
- type: string
18
- description: Your specific requirements for documentation structure
19
- locale:
20
- type: string
21
- description: Primary language for documentation (e.g., zh, en, ja)
22
- datasources:
23
- type: string
24
- description: Project content and context to help generate documentation structure
25
- targetAudience:
26
- type: string
27
- description: Target audience for the documentation
28
- nodeName:
29
- type: string
30
- description: Specific section or page name to focus on
31
- glossary:
32
- type: string
33
- description: Glossary for consistent terminology
34
- feedback:
35
- type: string
36
- description: Tell us how to improve the documentation structure
37
- userPreferences:
38
- type: string
39
- description: Your saved preferences for structure and documentation style
40
- docsType:
41
- type: string
42
- description: "Documentation type (options: general, getting-started, reference, faq)"
43
- default: general
44
- required:
45
- - rules
46
- - datasources
47
- output_schema:
48
- type: object
49
- properties:
50
- projectName:
51
- type: string
52
- description: Project name identified from your content sources
53
- projectDesc:
54
- type: string
55
- description: Brief project description generated from content analysis (under 50 words)
56
- documentStructure: ../schema/document-structure.yaml
57
- documentStructureTree:
58
- type: string
59
- description: |
60
- Visual tree structure showing documentation hierarchy with indented levels for easy review:
61
- ```
62
- - Home
63
- - Getting Started
64
- - Installation
65
- - Requirements
66
- ```
67
- required:
68
- - documentStructure
5
+ - type: team
6
+ name: generateStructureWorker
7
+ iterate_on: datasources
8
+ skills:
9
+ - type: ai
10
+ model:
11
+ reasoning_effort: 500
12
+ instructions:
13
+ - role: system
14
+ url: ../../prompts/structure/generate/system-prompt.md
15
+ - role: user
16
+ url: ../../prompts/structure/generate/user-prompt.md
17
+ task_render_mode: collapse
18
+ task_title: Generate the structure of the documentation
19
+ tool_calls_concurrency: 5
20
+ input_schema:
21
+ type: object
22
+ properties:
23
+ rules:
24
+ type: string
25
+ description: Your specific requirements for documentation structure
26
+ locale:
27
+ type: string
28
+ description: Primary language for documentation (e.g., zh, en, ja)
29
+ datasources:
30
+ type: string
31
+ description: Project content and context to help generate documentation structure
32
+ targetAudience:
33
+ type: string
34
+ description: Target audience for the documentation
35
+ nodeName:
36
+ type: string
37
+ description: Specific section or page name to focus on
38
+ glossary:
39
+ type: string
40
+ description: Glossary for consistent terminology
41
+ feedback:
42
+ type: string
43
+ description: Tell us how to improve the documentation structure
44
+ userPreferences:
45
+ type: string
46
+ description: Your saved preferences for structure and documentation style
47
+ docsType:
48
+ type: string
49
+ description: "Documentation type (options: general, getting-started, reference, faq)"
50
+ default: general
51
+ required:
52
+ - rules
53
+ - datasources
54
+ output_schema:
55
+ type: object
56
+ properties:
57
+ projectName:
58
+ type: string
59
+ description: Project name identified from your content sources
60
+ projectDesc:
61
+ type: string
62
+ description: Brief project description generated from content analysis (under 50 words)
63
+ add:
64
+ type: array
65
+ description: List of document structure items to add, null or empty array means no addition
66
+ items:
67
+ type: object
68
+ properties:
69
+ index:
70
+ type: integer
71
+ description: Position to insert the new item, null means append to the end
72
+ item: ../schema/document-structure-item.yaml
73
+ required:
74
+ - item
75
+ update:
76
+ type: array
77
+ description: List of document structure items to update, replace the item with the same path, null or empty array means no update
78
+ items:
79
+ type: object
80
+ properties:
81
+ path:
82
+ type: string
83
+ description: Path of the document structure item to update or replace
84
+ item: ../schema/document-structure-item.yaml
85
+ required:
86
+ - path
87
+ - item
88
+
89
+ - ./utils/merge-document-structures.mjs
90
+
91
+ - type: function
92
+ name: aggregateDocumentStructure
93
+ process: |
94
+ return {
95
+ documentStructure: options.context.userContext.originalDocumentStructure.map(i => ({
96
+ ...i,
97
+ id: i.title.toLowerCase().replace(/\s+/g, '-'),
98
+ })),
99
+ projectName: options.context.userContext.projectName,
100
+ projectDesc: options.context.userContext.projectDesc,
101
+ }
102
+
103
+ - type: ai
104
+ name: refineStructure
105
+ model:
106
+ reasoning_effort: 500
107
+ instructions:
108
+ - role: system
109
+ url: ../../prompts/structure/review/structure-review-system.md
110
+ output_schema:
111
+ type: object
112
+ properties:
113
+ refinedStructure:
114
+ type: array
115
+ description: Optimized document structure array
116
+ items:
117
+ type: object
118
+ description: Document structure item representing a node in the document hierarchy
119
+ properties:
120
+ id:
121
+ type: string
122
+ description: Unique identifier for the document structure item
123
+ newIndex:
124
+ type: integer
125
+ description: Used for ordering purposes, indicates the new position index of the document structure item
126
+ newPath:
127
+ type: string
128
+ description: The new path of the document structure item if it has been changed, otherwise can be omitted
129
+ newParentPath:
130
+ type: string
131
+ description: The new parentPath of the document structure item if it has been changed, otherwise can be omitted
132
+ required:
133
+ - id
134
+ required:
135
+ - refinedStructure
136
+
137
+ - type: function
138
+ name: finalizeDocumentStructure
139
+ process: |
140
+ return {
141
+ projectName: input.projectName,
142
+ projectDesc: input.projectDesc,
143
+ documentStructure: input.documentStructure
144
+ .map((item) => {
145
+ const refined = input.refinedStructure?.find(i => i.id === item.id)
146
+
147
+ return {
148
+ ...item,
149
+ index: refined?.newIndex || item.index,
150
+ path: refined?.newPath || item.path,
151
+ parentId: refined?.newParentPath || item.parentPath,
152
+ }
153
+ })
154
+ .sort((a, b) => a.index - b.index)
155
+ .map(i => {
156
+ const newItem = { ...i }
157
+ delete newItem.index
158
+ delete newItem.id
159
+ delete newItem.parentPath
160
+ return newItem
161
+ }),
162
+ }
@@ -140,6 +140,7 @@ export default async function userReviewDocumentStructure({ documentStructure, .
140
140
  // Call refineDocumentStructure agent with feedback
141
141
  await options.context.invoke(refineAgent, {
142
142
  ...rest,
143
+ datasources: rest.datasources[0].datasources,
143
144
  feedback: feedback.trim(),
144
145
  documentStructure: currentStructure,
145
146
  userPreferences,
@@ -0,0 +1,54 @@
1
+ export default async function mergeDocumentStructures(input, options) {
2
+ if (input.projectName) {
3
+ options.context.userContext.projectName = input.projectName;
4
+ }
5
+ if (input.projectDesc) {
6
+ options.context.userContext.projectDesc = input.projectDesc;
7
+ }
8
+
9
+ input.projectName = options.context.userContext.projectName;
10
+ input.projectDesc = options.context.userContext.projectDesc;
11
+
12
+ options.context.userContext.originalDocumentStructure ??= [];
13
+
14
+ const structure = options.context.userContext.originalDocumentStructure;
15
+
16
+ if (input.add) {
17
+ for (const { index, item } of input.add) {
18
+ if (index != null && index >= 0 && index < structure.length) {
19
+ structure.splice(index, 0, item);
20
+ } else {
21
+ structure.push(item);
22
+ }
23
+ }
24
+ }
25
+
26
+ if (input.update) {
27
+ for (const upd of input.update) {
28
+ const idx = structure.findIndex((i) => i.path === upd.path);
29
+ if (idx !== -1) {
30
+ structure[idx] = upd.item;
31
+ }
32
+ }
33
+ }
34
+
35
+ if (input.delete) {
36
+ for (const del of input.delete) {
37
+ const idx = structure.findIndex((i) => i.path === del.path);
38
+ if (idx !== -1) {
39
+ structure.splice(idx, 1);
40
+ }
41
+ }
42
+ }
43
+
44
+ options.context.userContext.originalDocumentStructure = structure.map((i, index) => {
45
+ delete i.index;
46
+
47
+ return {
48
+ index,
49
+ ...i,
50
+ };
51
+ });
52
+
53
+ return {};
54
+ }
@@ -0,0 +1,23 @@
1
+ type: object
2
+ description: Document structure item representing a node in the document hierarchy
3
+ properties:
4
+ title:
5
+ type: string
6
+ description:
7
+ type: string
8
+ path:
9
+ type: string
10
+ description: Path in URL format, cannot be empty, cannot contain spaces or special characters, must start with /, no need to include language level, e.g., /zh/about should return /about
11
+ parentPath:
12
+ type: string
13
+ description: Parent node path, if null indicates it is a top-level node
14
+ sourceIds:
15
+ type: array
16
+ description: Associated sourceId from dataSources for subsequent translation and content generation, must come from sourceId in datasources, cannot have fake ids, **cannot be empty**
17
+ items:
18
+ type: string
19
+ required:
20
+ - title
21
+ - description
22
+ - path
23
+ - sourceIds
@@ -10,9 +10,7 @@ items:
10
10
  type: string
11
11
  description: Path in URL format, cannot be empty, cannot contain spaces or special characters, must start with /, no need to include language level, e.g., /zh/about should return /about
12
12
  parentId:
13
- type:
14
- - string
15
- - "null"
13
+ type: string
16
14
  description: Parent node path, if null indicates it is a top-level node
17
15
  sourceIds:
18
16
  type: array
@@ -4,11 +4,11 @@ import path from "node:path";
4
4
  import imageSize from "image-size";
5
5
  import {
6
6
  buildSourcesContent,
7
- calculateFileStats,
8
7
  loadFilesFromPaths,
9
8
  readFileContents,
10
9
  getMimeType,
11
10
  isRemoteFile,
11
+ calculateTokens,
12
12
  } from "../../utils/file-utils.mjs";
13
13
  import {
14
14
  getCurrentGitHead,
@@ -196,13 +196,10 @@ export default async function loadSources(
196
196
  }
197
197
 
198
198
  // Read all source files using the utility function
199
- let sourceFiles = await readFileContents(sourceFilesPaths, process.cwd());
200
-
201
- // Count tokens and lines using utility function
202
- const { totalTokens, totalLines } = calculateFileStats(sourceFiles);
203
-
204
- // check if totalTokens is too large
205
- const isLargeContext = totalTokens > INTELLIGENT_SUGGESTION_TOKEN_THRESHOLD;
199
+ let sourceFiles = (await readFileContents(sourceFilesPaths, process.cwd())).map((i) => ({
200
+ ...i,
201
+ tokens: calculateTokens(`\n${i.sourceId}\n${i.content}`),
202
+ }));
206
203
 
207
204
  // filter OpenAPI doc should after check isLargeContext
208
205
  sourceFiles = sourceFiles.filter((file) => {
@@ -215,6 +212,16 @@ export default async function loadSources(
215
212
  return !isOpenAPI;
216
213
  });
217
214
 
215
+ const totalTokens = sourceFiles.reduce((sum, file) => sum + file.tokens, 0);
216
+ const totalLines = sourceFiles.reduce(
217
+ (sum, file) => sum + file.content.split("\n").filter(Boolean).length,
218
+ 0,
219
+ );
220
+
221
+ const datasources = splitSourcesToChunks(sourceFiles, INTELLIGENT_SUGGESTION_TOKEN_THRESHOLD).map(
222
+ (i) => ({ datasources: buildSourcesContent(i) }),
223
+ );
224
+
218
225
  const remoteFileList = [];
219
226
 
220
227
  sourceFiles.forEach((file) => {
@@ -226,8 +233,6 @@ export default async function loadSources(
226
233
  options.context.userContext.remoteFileList = remoteFileList;
227
234
  }
228
235
 
229
- // Build allSources string using utility function
230
- const allSources = buildSourcesContent(sourceFiles, isLargeContext);
231
236
  // all files path
232
237
  const allFilesPaths = sourceFiles.map((x) => `- ${toRelativePath(x.sourceId)}`).join("\n");
233
238
 
@@ -285,7 +290,7 @@ export default async function loadSources(
285
290
  }
286
291
 
287
292
  return {
288
- datasources: allSources,
293
+ datasources,
289
294
  content,
290
295
  originalDocumentStructure,
291
296
  files,
@@ -293,7 +298,6 @@ export default async function loadSources(
293
298
  totalTokens,
294
299
  totalLines,
295
300
  mediaFiles,
296
- isLargeContext,
297
301
  allFilesPaths,
298
302
  };
299
303
  }
@@ -342,7 +346,13 @@ loadSources.output_schema = {
342
346
  type: "object",
343
347
  properties: {
344
348
  datasources: {
345
- type: "string",
349
+ type: "array",
350
+ items: {
351
+ type: "object",
352
+ properties: {
353
+ datasources: { type: "string" },
354
+ },
355
+ },
346
356
  },
347
357
  files: {
348
358
  type: "array",
@@ -373,3 +383,33 @@ loadSources.output_schema = {
373
383
  };
374
384
 
375
385
  loadSources.task_render_mode = "hide";
386
+
387
+ function splitSourcesToChunks(sources, maxTokens) {
388
+ const chunks = [];
389
+
390
+ let currentChunk = [];
391
+ let currentTokens = 0;
392
+
393
+ for (const source of sources) {
394
+ const sourceTokens = source.tokens;
395
+
396
+ if (currentTokens + sourceTokens > maxTokens) {
397
+ // Start a new chunk
398
+ if (currentChunk.length > 0) {
399
+ chunks.push(currentChunk);
400
+ }
401
+ currentChunk = [source];
402
+ currentTokens = sourceTokens;
403
+ } else {
404
+ // Add to current chunk
405
+ currentChunk.push(source);
406
+ currentTokens += sourceTokens;
407
+ }
408
+ }
409
+
410
+ if (currentChunk.length > 0) {
411
+ chunks.push(currentChunk);
412
+ }
413
+
414
+ return chunks;
415
+ }
package/aigne.yaml CHANGED
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env aigne
2
2
 
3
- chat_model:
4
- provider: google
5
- name: gemini-2.5-pro
3
+ model:
4
+ model: google/gemini-2.5-pro
6
5
  # name: gemini-2.5-flash
7
6
  temperature: 0.8
8
7
  agents:
@@ -11,7 +10,6 @@ agents:
11
10
 
12
11
  # Documentation Structure Generation
13
12
  - ./agents/generate/generate-structure.yaml
14
- - ./agents/generate/generate-structure-without-tools.yaml
15
13
  - ./agents/generate/update-document-structure.yaml
16
14
  - ./agents/generate/check-need-generate-structure.mjs
17
15
  - ./agents/generate/refine-document-structure.yaml
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aigne/doc-smith",
3
- "version": "0.8.15-beta.7",
3
+ "version": "0.8.15-beta.8",
4
4
  "description": "AI-driven documentation generation tool built on the AIGNE Framework",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -56,6 +56,7 @@
56
56
  "remark-lint": "^10.0.1",
57
57
  "remark-parse": "^11.0.0",
58
58
  "terminal-link": "^4.0.0",
59
+ "typescript": "^5.9.3",
59
60
  "ufo": "^1.6.1",
60
61
  "unified": "^11.0.5",
61
62
  "unist-util-visit": "^5.0.0",
@@ -14,34 +14,4 @@ You are an AI document strategist with the personality of an **INTJ (The Archite
14
14
  {% include "../../common/document-structure/conflict-resolution-guidance.md" %}
15
15
 
16
16
 
17
- <sub_structure>
18
- {% if isLargeContext %}
19
- Analyze the provided file list and DataSources to complete the document structure planning:
20
- - If the DataSources contain sufficient context and already include content from all files in the file list, you can directly generate a detailed document structure.
21
- - First plan the document structure based on DataSources and <file_list>, ensuring all user-provided information will be presented in the document
22
- - Ensure initial planning has sufficient content separation to avoid oversized data sources when generating sub-documents
23
- - For sections with extensive content, use the `generateSubStructure` tool to generate detailed sub-structures
24
- - Trigger all Tool calls at once whenever possible
25
- - When triggering Tool calls, only output Tool call related information
26
- - Carefully check the data returned by the `generateSubStructure` tool, integrate all data, merge the complete document structure, and finally verify that it meets the requirements in <output_constraints>
27
-
28
- Using `generateSubStructure`:
29
- - When the provided file list is large and DataSources don't contain all file contents, resulting in an oversized context, split the generation into sub-document structures to make the context more focused and complete
30
- - Generate sub-documents to more effectively and fully utilize the data source files provided in <file_list>
31
- - Requires `parentDocument` and `subSourcePaths` as context parameters
32
- - `subSourcePaths` supports individual files and Glob Patterns, generation process:
33
- - Analyze relevant files from the file list, include as many related files as possible to ensure complete context
34
- - Selected files must come from <file_list>, ensure file paths are correct
35
- - Consolidation Rules:
36
- 1. If all files from a single directory (e.g., src/) have been selected, consolidate them into a pattern like src/\*.
37
- 2. If multiple files with a common naming convention are selected (e.g., README.md, README-dockerfile.md, README-turbo.md), consolidate them into a pattern like README\*.md.
38
- 3. Ensure only files correctly matched by the pattern are removed, while unmatched files must be preserved
39
- - Merge the returned subStructure into the overall document structure plan, **ensuring all subStructures returned by the tool are included**.
40
-
41
- {% else %}
42
- The current context is sufficient, proceed directly with document structure planning based on DataSources.
43
- {% endif %}
44
- </sub_structure>
45
-
46
-
47
17
  {% include "../../common/document-structure/output-constraints.md" %}
@@ -1,15 +1,10 @@
1
+ <datasources>
2
+ Following are the partial or complete data sources provided by the user to help you design the document structure. Use these data sources to inform your structural planning.
1
3
 
2
- {% include "../../common/document-structure/user-locale-rules.md" %}
3
-
4
- {% include "../../common/document-structure/user-preferences.md" %}
5
-
4
+ {{ datasources }}
6
5
 
7
- <file_list>
8
- {{allFilesPaths}}
9
- </file_list>
10
6
 
11
- <datasources>
12
- {{ datasources }}
7
+ NOTICE: There are additional data source contents not displayed. When operating on the document structure, be sure to consider these undisplayed contents and do not easily delete any nodes unless users explicitly request deletion.
13
8
  </datasources>
14
9
 
15
10
  {% if userContext.openAPISpec %}
@@ -17,7 +12,7 @@
17
12
 
18
13
  **Goal:** Use the provided OpenAPI (Swagger) specification to design how the OpenAPI content and the overall document should be structured together.
19
14
 
20
- **OpenAPI File Content:**
15
+ **OpenAPI File Content:**
21
16
  <openapi_doc>
22
17
 
23
18
  {{ userContext.openAPISpec }}
@@ -47,20 +42,31 @@
47
42
  {% endif %}
48
43
 
49
44
 
50
- {% if originalDocumentStructure %}
51
45
  <last_document_structure>
52
- {{originalDocumentStructure}}
46
+ projectName: |
47
+ {{projectName}}
48
+ projectDesc: |
49
+ {{projectDesc}}
50
+
51
+ {% if originalDocumentStructure %}
52
+ {{ originalDocumentStructure | yaml.stringify }}
53
+ {% else %}
54
+ No previous document structure provided. generate a new structure based on the data sources!
55
+ {% endif %}
56
+
53
57
  </last_document_structure>
54
58
 
55
59
 
60
+ {% include "../../common/document-structure/user-locale-rules.md" %}
61
+
62
+ {% include "../../common/document-structure/user-preferences.md" %}
63
+
56
64
  <last_document_structure_rule>
57
65
  If a previous structural plan (last_document_structure) is provided, follow these rules:
58
66
  1. **Feedback Implementation**: The new structural plan **must** correctly implement all changes requested in user feedback.
59
67
  2. **Unrelated Node Stability**: Nodes not mentioned in user feedback **must not have their path or sourcesIds attributes modified**. `path` and `sourcesIds` are critical identifiers linking existing content, and their stability is paramount.
60
68
  Ideally, other attributes (such as `title`, `description`) should also remain stable, unless these changes are directly caused by a requested modification or result from DataSource updates.
61
69
  </last_document_structure_rule>
62
- {% endif %}
63
-
64
70
 
65
71
  {% if documentStructure %}
66
72
  <review_document_structure>
@@ -92,27 +98,62 @@ Sub-structures must meet the following requirements:
92
98
  - Sub-structures are planned based on DataSources and the parent document's description
93
99
  - The parent document provides an overview of the planned content, while sub-structures directly plan the specific content to be displayed
94
100
  - Further break down and comprehensively display the content planned in the parent document
95
- - All sub-structures must have their parentId value set to {{parentDocument.path}}
101
+ - All sub-structures must have their parentPath value set to {{parentDocument.path}}
96
102
  </parent_document>
97
103
  {% endif %}
98
104
 
99
105
  <instructions>
100
- Your task is to design a detailed structural plan for the document to be generated. This plan will serve as a "blueprint" for subsequent content generation, guiding the LLM on how to organize and present information, ensuring the document is logically clear, easy to understand, well-structured, and comprehensive.
106
+ Your task is to **analyze, refine, and adjust** the existing document structure (`last_document_structure`) based on the partial code repository content currently provided, generating a structural update plan.
107
+ You are not creating a structure from scratch, but rather **performing intelligent updates based on understanding the existing structure** to make the document structure more accurately reflect the latest code content, architectural changes, and logical relationships.
108
+
109
+ ## When using <datasource> data sources, please note the following:
110
+
111
+ - Fully respect the project descriptions and usage instructions in README files, as these typically summarize the project's core functionality and objectives.
112
+ - Pay attention to comments and docstrings in source code files, as these reveal the design intent and usage methods of the code.
113
+ - Understand the relationships between various modules and files, which helps build a logically clear and well-structured document hierarchy.
114
+ - Notice key concepts, APIs, and configuration options in the code, as these are typically important components of the document structure.
115
+ - The generated document structure must include all public modules, interfaces, and features to ensure document completeness and usability.
116
+
117
+
118
+ ## Objective
119
+
120
+ Your output should be a structured change plan containing the following three sections to indicate how to modify the existing document structure:
121
+
122
+ - **add**: New structure items (array), can use index to specify insertion position (optional), each item is an object containing:
123
+ - `index` (optional): Insertion position index, if not specified, append to the end;
124
+ - `item`: New structure definition
125
+ - **update**: Structure items that need modification (array), each item is an object containing:
126
+ - `path`: Path pointing to the node being updated;
127
+ - `update`: New structure definition
128
+
129
+ ## Behavior Rules
130
+
131
+ 1. **Understanding and Inheritance**
132
+ - Fully understand the hierarchical logic, section divisions, and naming style in <last_document_structure>.
133
+ - Perform incremental updates based on this foundation, not complete rewrites.
134
+ - Preserve existing reasonable structures, only modify or extend when there is clear justification.
135
+
136
+ 2. **Contextual Association Analysis**
137
+ - You will receive part of the code repository content (such as partial source files or directory content), please analyze their **documentation value and structural impact**.
138
+ - Identify which parts represent new concepts, APIs, modules, configurations, or features; determine if they require adding or modifying corresponding sections in the document structure.
101
139
 
102
- Key capabilities and behavioral principles:
103
- - Data Comprehension: Ability to parse and understand structured and unstructured data, identifying key concepts, entities, attributes, relationships, and processes within them.
104
- - Structured Thinking: Strong logical analysis capabilities to decompose complex information into clear chapters, sections, and items, establishing reasonable hierarchical relationships.
105
- - User-Oriented Approach: Ability to flexibly adjust the focus and level of detail in structural planning based on document objectives and audience characteristics provided by users.
106
- - Modular Design: Tendency to divide documents into independent, reusable modules or sections for easy content population and subsequent maintenance.
107
- - Flexibility and Adaptability: Ability to handle multiple types of data sources and design the most suitable documentation structure based on data source characteristics (such as code function/class structures, API endpoints/parameters, text paragraphs/themes).
108
- - Clarity and Completeness: Ensure the final structural plan is easy to understand and can guide the LLM to generate a comprehensive and well-organized document.
140
+ 3. **Structure Adjustment Strategy**
141
+ - If new content supplements details of existing sections, use `update`.
142
+ - If new content introduces new topics, modules, or hierarchies, use `add`.
143
+ - Ensure the position, hierarchy, and naming of new nodes align with the overall document logic.
109
144
 
145
+ 4. **Consistency and Clarity**
146
+ - Ensure new or modified structure items are consistent with existing structure style.
147
+ - Each structure node (whether new or updated) should include:
148
+ - **Title**
149
+ - **Brief description in one sentence**, describing main content and purpose
150
+ - Maintain clear hierarchy, avoid duplication, ensure logical coherence. Excellent documentation should allow users to quickly understand project structure and content distribution, organized by modules, functional features, and other dimensions.
110
151
 
111
- Objectives:
112
- - Create a clear and logical structural plan that comprehensively presents information from the user-provided context while providing users with intuitive navigation paths.
113
- - Each {{nodeName}} should include: a {{nodeName}} title, a one-sentence introduction describing its main content, with presentation and organization methods tailored to the target audience.
152
+ 5. **Requirements**
153
+ - Follow all rules and guidelines in <document_structure_rules>.
154
+ - Generate rich document structure where functional modules must have sub-documents, comprehensively covering the codebase's functionality and modules, ensuring users can easily get started, understand, and use various modules and main features of the project through documentation.
114
155
 
115
156
  {% include "../../common/document-structure/intj-traits.md" %}
116
157
 
117
- Always follow one principle: You must ensure the final structural plan meets user requirements.
158
+ You must make reasonable incremental modifications based solely on the new information provided while respecting the existing structure, ensuring the final structure remains complete, clear, and extensible.
118
159
  </instructions>
@@ -0,0 +1,73 @@
1
+ <role_and_goal>
2
+ You are an AI document strategist with the personality of an **INTJ (The Architect)**. Your core strengths are strategic thinking, understanding complex systems, and creating logically sound blueprints. You are a perfectionist, rigorously logical, and can anticipate future challenges.
3
+
4
+ </role_and_goal>
5
+
6
+ <document_structure>
7
+ projectName: |
8
+ {{projectName}}
9
+ projectDesc: |
10
+ {{projectDesc}}
11
+
12
+ documentStructure:
13
+ {{ documentStructure | yaml.stringify }}
14
+ </document_structure>
15
+
16
+ <instructions>
17
+ You are a Documentation Structure Refiner — an expert in technical documentation architecture and information design.
18
+
19
+ Your task:
20
+ Given an existing document structure (a JSON array or tree of sections), refine and optimize its **hierarchy and order** to improve clarity, usability, and conventional organization.
21
+ ️ You must not add, delete, rename, or rewrite any nodes. Only adjust the **order** and **nesting levels** of existing nodes.
22
+
23
+ ---
24
+
25
+ ## Optimization Goals
26
+
27
+ 1. **Logical Order**
28
+ - Introductory materials should always appear at the beginning:
29
+ - “Overview”, “Introduction”, “Quick Start”, “Getting Started”, “Setup” should be near the top.
30
+ - Meta and community-related sections (e.g., “Community”, “Contributing”, “License”, “Changelog”) should always be at the end.
31
+ - Technical reference and configuration sections should appear after conceptual and usage sections.
32
+
33
+ 2. **Hierarchy Correction**
34
+ - Ensure proper depth:
35
+ - “Overview” and “Quick Start” should have **1–2 levels max**.
36
+ - Remove deeply nested technical details from “Overview” or “Quick Start”.
37
+ - Relocate such details under “Architecture”, “API Reference”, or “Modules”.
38
+ - Preserve all nodes — only change their parent-child relationships when needed for clarity.
39
+
40
+ 3. **Grouping and Alignment**
41
+ - Align similar nodes logically (e.g., group “Usage”, “Examples”, “Tutorials” together).
42
+ - Avoid duplication or overlap by reordering, not by deletion.
43
+
44
+ 4. **Naming and Identity**
45
+ - You are **not allowed to rename or reword** any section titles or descriptions.
46
+ - Keep all existing keys, identifiers, and text intact.
47
+
48
+ 5. **Balance**
49
+ - Maintain a clean, well-organized hierarchy.
50
+ - Keep top-level nodes concise (≤ 8 preferred).
51
+ - Avoid over-nesting (≤ 4 levels deep).
52
+
53
+ ---
54
+
55
+ ## Behavior Rules
56
+
57
+ - Do **not** add new nodes.
58
+ - Do **not** delete existing nodes.
59
+ - Do **not** rename or rewrite content.
60
+ - You **may** move nodes to different parents or reorder siblings to achieve better logical flow.
61
+ - You **must** maintain all data and structural integrity.
62
+ - The final structure must remain fully valid and machine-readable (same schema as input).
63
+
64
+ ---
65
+
66
+ ## Objective
67
+
68
+ Output a single **optimized JSON structure** (same format as input), where:
69
+ 1. The hierarchy and order are improved.
70
+ 2. All nodes are preserved exactly as given.
71
+ 3. The structure reflects a natural and professional documentation layout
72
+ 4. Only return the nodes need to be changed to achieve the above goals.
73
+ </instructions>
@@ -6,7 +6,7 @@ export const documentItemSchema = z.object({
6
6
  title: z.string().min(1, "Title is required"),
7
7
  description: z.string().min(1, "Description is required"),
8
8
  path: z.string().startsWith("/", 'Path must start with "/"'),
9
- parentId: z.string().nullable(),
9
+ parentId: z.string().nullish(),
10
10
  sourceIds: z.array(z.string()).min(1, "At least one source ID is required"),
11
11
  });
12
12
 
@@ -18,7 +18,7 @@ export const addDocumentInputSchema = z.object({
18
18
  title: z.string().min(1, "Title is required"),
19
19
  description: z.string().min(1, "Description is required"),
20
20
  path: z.string().startsWith("/", 'Path must start with "/"'),
21
- parentId: z.string().nullable().optional(),
21
+ parentId: z.string().nullish(),
22
22
  sourceIds: z.array(z.string()).min(1, "At least one source ID is required"),
23
23
  });
24
24
 
@@ -44,7 +44,7 @@ export const deleteDocumentOutputSchema = z.object({
44
44
  // Move document schemas
45
45
  export const moveDocumentInputSchema = z.object({
46
46
  path: z.string().min(1, "Path is required"),
47
- newParentId: z.string().nullable().optional(),
47
+ newParentId: z.string().nullish(),
48
48
  });
49
49
 
50
50
  export const moveDocumentOutputSchema = z.object({
@@ -0,0 +1,32 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { transpileDeclaration } from "typescript";
3
+
4
+ export async function extractApi(path) {
5
+ const content = await readFile(path, "utf8");
6
+
7
+ const lang = languages.find((lang) => lang.match(path, content));
8
+ if (lang) {
9
+ return lang.extract(path, content);
10
+ }
11
+
12
+ return content;
13
+ }
14
+
15
+ const languages = [
16
+ {
17
+ match: (path) => /\.m?(js|ts)x?$/.test(path),
18
+ extract: extractJsApi,
19
+ },
20
+ ];
21
+
22
+ async function extractJsApi(_path, content) {
23
+ const res = transpileDeclaration(content, {
24
+ compilerOptions: {
25
+ declaration: true,
26
+ emitDeclarationOnly: true,
27
+ allowJs: true,
28
+ },
29
+ });
30
+
31
+ return res.outputText.trim();
32
+ }
@@ -11,8 +11,8 @@ import { gunzipSync } from "node:zlib";
11
11
 
12
12
  import { debug } from "./debug.mjs";
13
13
  import { isGlobPattern } from "./utils.mjs";
14
- import { INTELLIGENT_SUGGESTION_TOKEN_THRESHOLD } from "./constants/index.mjs";
15
14
  import { uploadFiles } from "./upload-files.mjs";
15
+ import { extractApi } from "./extract-api.mjs";
16
16
 
17
17
  /**
18
18
  * Check if a directory is inside a git repository using git command
@@ -508,7 +508,9 @@ export async function readFileContents(files, baseDir = process.cwd(), options =
508
508
 
509
509
  return null;
510
510
  } else {
511
- const content = await readFile(file, "utf8");
511
+ const content = await extractApi(file);
512
+ if (!content) return null;
513
+
512
514
  const relativePath = path.relative(baseDir, file);
513
515
  return {
514
516
  sourceId: relativePath,
@@ -527,6 +529,11 @@ export async function readFileContents(files, baseDir = process.cwd(), options =
527
529
  return results.filter((result) => result !== null);
528
530
  }
529
531
 
532
+ export function calculateTokens(text) {
533
+ const tokens = encode(text);
534
+ return tokens.length;
535
+ }
536
+
530
537
  /**
531
538
  * Calculate total lines and tokens from file contents
532
539
  * @param {Array<{content: string}>} sourceFiles - Array of objects containing content property
@@ -552,97 +559,17 @@ export function calculateFileStats(sourceFiles) {
552
559
  }
553
560
 
554
561
  /**
555
- * Build sources content string based on context size
556
- * For large contexts, only include core project files to avoid token limit issues
562
+ * Build sources content string
557
563
  * @param {Array<{sourceId: string, content: string}>} sourceFiles - Array of source file objects
558
- * @param {boolean} isLargeContext - Whether the context is large
559
564
  * @returns {string} Concatenated sources content with sourceId comments
560
565
  */
561
- export function buildSourcesContent(sourceFiles, isLargeContext = false) {
562
- // Define core file patterns that represent project structure and key information
563
- const coreFilePatterns = [
564
- // Configuration files
565
- /package\.json$/,
566
- /tsconfig\.json$/,
567
- /jsconfig\.json$/,
568
- /\.env\.example$/,
569
- /Cargo\.toml$/,
570
- /go\.mod$/,
571
- /pom\.xml$/,
572
- /build\.gradle$/,
573
- /Gemfile$/,
574
- /requirements\.txt$/,
575
- /Pipfile$/,
576
- /composer\.json$/,
577
- /pyproject\.toml$/,
578
-
579
- // Documentation
580
- /README\.md$/i,
581
- /CHANGELOG\.md$/i,
582
- /CONTRIBUTING\.md$/i,
583
- /\.github\/.*\.md$/i,
584
-
585
- // Entry points and main files
586
- /index\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
587
- /main\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
588
- /app\.(js|ts|jsx|tsx|py)$/,
589
- /server\.(js|ts|jsx|tsx|py)$/,
590
-
591
- // API definitions
592
- /api\/.*\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
593
- /routes\/.*\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
594
- /controllers\/.*\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
595
-
596
- // Type definitions and schemas
597
- /types\.(ts|d\.ts)$/,
598
- /schema\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
599
- /.*\.d\.ts$/,
600
-
601
- // Core utilities
602
- /utils\/.*\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
603
- /lib\/.*\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
604
- /helpers\/.*\.(js|ts|jsx|tsx|py|go|rs|java|rb|php)$/,
605
- ];
606
-
607
- // Function to check if a file is a core file
608
- const isCoreFile = (filePath) => {
609
- return coreFilePatterns.some((pattern) => pattern.test(filePath));
610
- };
611
-
566
+ export function buildSourcesContent(sourceFiles) {
612
567
  // Build sources string
613
568
  let allSources = "";
614
569
 
615
- if (isLargeContext) {
616
- // Only include core files for large contexts
617
- const coreFiles = sourceFiles.filter((source) => isCoreFile(source.sourceId));
618
-
619
- // Determine which files to use and set appropriate message
620
- const filesToInclude = coreFiles.length > 0 ? coreFiles : sourceFiles;
621
- const noteMessage =
622
- coreFiles.length > 0
623
- ? "// Note: Context is large, showing only core project files.\n"
624
- : "// Note: Context is large, showing a sample of files.\n";
625
-
626
- allSources += noteMessage;
627
- let accumulatedTokens = 0;
628
-
629
- for (const source of filesToInclude) {
630
- const fileContent = `// sourceId: ${source.sourceId}\n${source.content}\n`;
631
- const fileTokens = encode(fileContent);
632
-
633
- // Check if adding this file would exceed the token limit
634
- if (accumulatedTokens + fileTokens.length > INTELLIGENT_SUGGESTION_TOKEN_THRESHOLD) {
635
- break;
636
- }
637
-
638
- allSources += fileContent;
639
- accumulatedTokens += fileTokens.length;
640
- }
641
- } else {
642
- // Include all files for normal contexts
643
- for (const source of sourceFiles) {
644
- allSources += `// sourceId: ${source.sourceId}\n${source.content}\n`;
645
- }
570
+ // Include all files for normal contexts
571
+ for (const source of sourceFiles) {
572
+ allSources += `\n// sourceId: ${source.sourceId}\n${source.content}\n`;
646
573
  }
647
574
 
648
575
  return allSources;
@@ -1,131 +0,0 @@
1
- import {
2
- buildSourcesContent,
3
- calculateFileStats,
4
- loadFilesFromPaths,
5
- readFileContents,
6
- } from "../../../utils/file-utils.mjs";
7
- import {
8
- INTELLIGENT_SUGGESTION_TOKEN_THRESHOLD,
9
- DEFAULT_EXCLUDE_PATTERNS,
10
- DEFAULT_INCLUDE_PATTERNS,
11
- } from "../../../utils/constants/index.mjs";
12
- import { toRelativePath } from "../../../utils/utils.mjs";
13
-
14
- export default async function generateSubStructure(
15
- {
16
- parentDocument,
17
- subSourcePaths,
18
- includePatterns,
19
- excludePatterns,
20
- useDefaultPatterns = true,
21
- ...rest
22
- },
23
- options,
24
- ) {
25
- const sourcePaths = subSourcePaths?.map((item) => item.path);
26
- if (!sourcePaths || sourcePaths.length === 0) {
27
- return {
28
- subStructure: [],
29
- };
30
- }
31
-
32
- let files = await loadFilesFromPaths(sourcePaths, {
33
- includePatterns,
34
- excludePatterns,
35
- useDefaultPatterns,
36
- defaultIncludePatterns: DEFAULT_INCLUDE_PATTERNS,
37
- defaultExcludePatterns: DEFAULT_EXCLUDE_PATTERNS,
38
- });
39
- files = [...new Set(files)];
40
-
41
- // all files path
42
- const allFilesPaths = files.map((file) => `- ${toRelativePath(file)}`).join("\n");
43
-
44
- // Read all source files using the utility function
45
- const sourceFiles = await readFileContents(files, process.cwd());
46
-
47
- // Count tokens and lines using utility function
48
- const { totalTokens } = calculateFileStats(sourceFiles);
49
-
50
- // check if totalTokens is too large
51
- let isLargeContext = false;
52
- if (totalTokens > INTELLIGENT_SUGGESTION_TOKEN_THRESHOLD) {
53
- isLargeContext = true;
54
- }
55
-
56
- // Build allSources string using utility function
57
- const allSources = buildSourcesContent(sourceFiles, isLargeContext);
58
-
59
- // Performance optimization:
60
- // Using both structured output and Tool with Gemini model causes redundant calls
61
- // Only use Tool when context is very large
62
- const generateStructureAgent = isLargeContext
63
- ? options.context.agents["generateStructure"]
64
- : options.context.agents["generateStructureWithoutTools"];
65
- const result = await options.context.invoke(generateStructureAgent, {
66
- ...rest,
67
- isSubStructure: true,
68
- parentDocument,
69
- datasources: allSources,
70
- allFilesPaths,
71
- isLargeContext,
72
- files,
73
- totalTokens,
74
- });
75
-
76
- return {
77
- subStructure: result.documentStructure || [],
78
- message: `Generated a sub structure for '${parentDocument.path}' successfully. Please merge all sub-structures to output the complete document structure.`,
79
- };
80
- }
81
-
82
- generateSubStructure.description = `
83
- Generates a sub-structure.
84
- Handles large file sets by splitting them into smaller sub-document structures when the context size exceeds limits. This approach ensures more focused and complete documentation generation.
85
- `;
86
-
87
- generateSubStructure.inputSchema = {
88
- type: "object",
89
- properties: {
90
- parentDocument: {
91
- type: "object",
92
- description: "The parent node to generate a sub structure for",
93
- properties: {
94
- title: { type: "string", description: "The title of the parent node" },
95
- description: { type: "string", description: "The description of the parent node" },
96
- path: {
97
- type: "string",
98
- description:
99
- "The path of the parent node, Path in URL format, cannot be empty, cannot contain spaces or special characters, must start with /, no need to include language level, e.g., /zh/about should return /about ",
100
- },
101
- parentId: { type: "string", description: "The parent ID of the parent node" },
102
- sourceIds: { type: "array", description: "The source IDs of the parent node" },
103
- },
104
- },
105
- subSourcePaths: {
106
- type: "array",
107
- description: "The source paths of the sub structure",
108
- items: {
109
- type: "object",
110
- properties: {
111
- path: { type: "string", description: "The source path of the sub structure" },
112
- reason: { type: "string", description: "The reason for selecting the source path" },
113
- },
114
- required: ["path", "reason"],
115
- },
116
- },
117
- },
118
- };
119
-
120
- generateSubStructure.outputSchema = {
121
- type: "object",
122
- properties: {
123
- subStructure: {
124
- type: "array",
125
- description:
126
- "The sub structure of the parent node, need merge all sub-structures and output the complete document structure.",
127
- },
128
- message: { type: "string", description: "The message of the sub structure" },
129
- },
130
- required: ["subStructure"],
131
- };
@@ -1,65 +0,0 @@
1
- name: generateStructureWithoutTools
2
- description: Generate the structure and organization of your documentation
3
- instructions:
4
- - role: system
5
- url: ../../prompts/structure/generate/system-prompt.md
6
- - role: user
7
- url: ../../prompts/structure/generate/user-prompt.md
8
- task_render_mode: collapse
9
- task_title: Generate the structure of the documentation
10
- input_schema:
11
- type: object
12
- properties:
13
- rules:
14
- type: string
15
- description: Your specific requirements for documentation structure
16
- locale:
17
- type: string
18
- description: Primary language for documentation (e.g., zh, en, ja)
19
- datasources:
20
- type: string
21
- description: Project content and context to help generate documentation structure
22
- targetAudience:
23
- type: string
24
- description: Target audience for the documentation
25
- nodeName:
26
- type: string
27
- description: Specific section or page name to focus on
28
- glossary:
29
- type: string
30
- description: Glossary for consistent terminology
31
- feedback:
32
- type: string
33
- description: Tell us how to improve the documentation structure
34
- userPreferences:
35
- type: string
36
- description: Your saved preferences for structure and documentation style
37
- docsType:
38
- type: string
39
- description: "Documentation type (options: general, getting-started, reference, faq)"
40
- default: general
41
- required:
42
- - rules
43
- - datasources
44
- output_schema:
45
- type: object
46
- properties:
47
- projectName:
48
- type: string
49
- description: Project name identified from your content sources
50
- projectDesc:
51
- type: string
52
- description: Brief project description generated from content analysis (under 50 words)
53
- documentStructure: ../schema/document-structure.yaml
54
- documentStructureTree:
55
- type: string
56
- description: |
57
- Visual tree structure showing documentation hierarchy with indented levels for easy review:
58
- ```
59
- - Home
60
- - Getting Started
61
- - Installation
62
- - Requirements
63
- ```
64
- required:
65
- - documentStructure