@aigne/doc-smith 0.9.7 → 0.9.8-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/agents/create/analyze-diagram-type-llm.yaml +160 -0
  3. package/agents/create/analyze-diagram-type.mjs +297 -0
  4. package/agents/create/generate-diagram-image.yaml +60 -0
  5. package/agents/create/replace-d2-with-image.mjs +624 -0
  6. package/agents/create/utils/init-current-content.mjs +5 -9
  7. package/agents/evaluate/document.yaml +6 -0
  8. package/agents/evaluate/index.yaml +1 -0
  9. package/agents/init/index.mjs +16 -0
  10. package/agents/media/batch-generate-media-description.yaml +2 -0
  11. package/agents/media/generate-media-description.yaml +3 -0
  12. package/agents/media/load-media-description.mjs +44 -15
  13. package/agents/publish/publish-docs.mjs +1 -4
  14. package/agents/update/check-diagram-flag.mjs +116 -0
  15. package/agents/update/check-document.mjs +0 -1
  16. package/agents/update/check-generate-diagram.mjs +48 -30
  17. package/agents/update/check-sync-image-flag.mjs +55 -0
  18. package/agents/update/check-update-is-single.mjs +11 -0
  19. package/agents/update/generate-diagram.yaml +43 -9
  20. package/agents/update/generate-document.yaml +9 -0
  21. package/agents/update/handle-document-update.yaml +10 -8
  22. package/agents/update/index.yaml +16 -1
  23. package/agents/update/sync-images-and-exit.mjs +148 -0
  24. package/agents/update/update-single/update-single-document-detail.mjs +131 -17
  25. package/agents/utils/analyze-feedback-intent.mjs +136 -0
  26. package/agents/utils/choose-docs.mjs +183 -40
  27. package/agents/utils/generate-document-or-skip.mjs +41 -0
  28. package/agents/utils/handle-diagram-operations.mjs +263 -0
  29. package/agents/utils/load-all-document-content.mjs +30 -0
  30. package/agents/utils/load-sources.mjs +2 -2
  31. package/agents/utils/read-current-document-content.mjs +46 -0
  32. package/agents/utils/save-doc.mjs +42 -0
  33. package/agents/utils/skip-if-content-exists.mjs +27 -0
  34. package/aigne.yaml +6 -1
  35. package/assets/report-template/report.html +17 -17
  36. package/docs-mcp/read-doc-content.mjs +30 -1
  37. package/package.json +4 -4
  38. package/prompts/detail/diagram/generate-image-system.md +135 -0
  39. package/prompts/detail/diagram/generate-image-user.md +32 -0
  40. package/prompts/detail/generate/user-prompt.md +27 -13
  41. package/prompts/evaluate/document.md +23 -10
  42. package/prompts/media/media-description/system-prompt.md +10 -2
  43. package/prompts/media/media-description/user-prompt.md +9 -0
  44. package/utils/check-document-has-diagram.mjs +97 -0
  45. package/utils/constants/index.mjs +46 -0
  46. package/utils/d2-utils.mjs +114 -181
  47. package/utils/delete-diagram-images.mjs +103 -0
  48. package/utils/docs-finder-utils.mjs +34 -1
  49. package/utils/image-compress.mjs +75 -0
  50. package/utils/kroki-utils.mjs +2 -3
  51. package/utils/sync-diagram-to-translations.mjs +258 -0
  52. package/utils/utils.mjs +24 -0
  53. package/agents/create/check-diagram.mjs +0 -40
  54. package/agents/create/draw-diagram.yaml +0 -27
  55. package/agents/create/merge-diagram.yaml +0 -39
  56. package/agents/create/wrap-diagram-code.mjs +0 -35
package/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.9.8-beta](https://github.com/AIGNE-io/aigne-doc-smith/compare/v0.9.7...v0.9.8-beta) (2025-12-05)
4
+
5
+
6
+ ### Features
7
+
8
+ * use nano banana pro to generate doc diagrams ([#343](https://github.com/AIGNE-io/aigne-doc-smith/issues/343)) ([eaf9a06](https://github.com/AIGNE-io/aigne-doc-smith/commit/eaf9a06df8ecb57c1a39c3c338210f02b1b7ab94))
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * add global perspective for evaluate document ([#346](https://github.com/AIGNE-io/aigne-doc-smith/issues/346)) ([2f58699](https://github.com/AIGNE-io/aigne-doc-smith/commit/2f5869904db4aea9d8756bcbf0bf0710b8a14783))
14
+ * add support for reading SVG content to generate media descriptions ([#347](https://github.com/AIGNE-io/aigne-doc-smith/issues/347)) ([a679604](https://github.com/AIGNE-io/aigne-doc-smith/commit/a6796040cfda58abe8a6c525b51605d8c5dcbd3d))
15
+
3
16
  ## [0.9.7](https://github.com/AIGNE-io/aigne-doc-smith/compare/v0.9.7-beta.3...v0.9.7) (2025-11-28)
4
17
 
5
18
  ## [0.9.7-beta.3](https://github.com/AIGNE-io/aigne-doc-smith/compare/v0.9.7-beta.2...v0.9.7-beta.3) (2025-11-28)
@@ -0,0 +1,160 @@
1
+ name: analyzeDiagramTypeLLM
2
+ description: Analyze document content using LLM to determine diagram type and select appropriate style
3
+ model:
4
+ reasoning_effort: 1
5
+ instructions: |
6
+ You are an AI assistant specialized in technical documentation visualization. Your task is to analyze a document segment and generate a structured visual plan for an image generator.
7
+
8
+ {% if feedback %}
9
+ **CRITICAL: User Feedback (HIGHEST PRIORITY)**
10
+ <feedback>
11
+ {{ feedback }}
12
+ </feedback>
13
+
14
+ **IMPORTANT**: User feedback has the **HIGHEST PRIORITY** in all decision-making. Any explicit requests in the feedback (e.g., diagram type, style, colors, aspect ratio, size, layout preferences) must be respected and applied. Additionally, extract and note any other feedback information (such as color preferences, size requirements, layout specifications, etc.) that should be passed to subsequent image generation steps.
15
+ {% endif %}
16
+
17
+ Your responsibilities:
18
+
19
+ 1. **Analyze Context**: Understand the document’s content, structure, and its purpose, especially around where the diagram will be inserted.
20
+
21
+ 2. **Generate Document Summary**:
22
+ **CRITICAL**: The documentSummary will be the **only input** passed to the image generation model. Preserve as much information as possible, only removing content that is truly useless for diagram generation.
23
+
24
+ **What to PRESERVE (keep as much as possible):**
25
+ - **All structural elements**: Headings, sections, hierarchy, ordering, and document structure
26
+ - **All entities and components**: Names, roles, services, modules, actors, objects, and any elements that could appear as nodes
27
+ - **All relationships and connections**: How entities relate, data flows, dependencies, interactions, and any connections
28
+ - **All process flows and steps**: Sequential steps, decision points, workflows, logical order, and any process information
29
+ - **All labels and names**: All names, labels, identifiers, and terminology used in the document
30
+ - **Technical details**: Specifications, protocols, interfaces, configurations, and technical information
31
+ - **Examples and use cases**: Concrete examples, scenarios, and use cases that illustrate the concepts
32
+ - **Contextual information**: Explanatory text, background context, and descriptions that help understand relationships
33
+ - **All content that could inform diagram structure**: Any information that might be relevant for creating accurate diagrams
34
+
35
+ **What to REMOVE (only truly useless content):**
36
+ - **Verbatim duplicates**: Exact duplicate sentences or paragraphs that repeat the same information
37
+ - **Completely off-topic content**: Content that has no relation to the diagram subject matter
38
+ - **Pure marketing/promotional text**: Sales language that doesn't contain technical or structural information
39
+ - **Unrelated notes or comments**: Comments that are completely unrelated to the document's main content
40
+
41
+ **Summary Guidelines:**
42
+ - **Preserve the vast majority of content** - only remove content that is clearly redundant or completely unrelated
43
+ - Keep the original document structure, hierarchy, and organization
44
+ - Maintain all technical details, examples, and contextual information
45
+ - When in doubt, **keep the content** rather than removing it
46
+ - The summary should be comprehensive and contain all information that could be useful for diagram generation
47
+
48
+ 3. **Determine Diagram Type**:
49
+ Choose one of the following types based on the content:
50
+ - **architecture**: Static system structure (components, containers, services)
51
+ - **flowchart**: Decision logic, workflows, process steps
52
+ - **guide**: Tutorials, step-by-step user journeys
53
+ - **intro**: Concept overviews, mind maps
54
+ - **sequence**: Time-based interactions between entities
55
+ - **network**: Logical or physical network topologies
56
+
57
+ **Decision Priority (in order):**
58
+ {% if feedback %}
59
+ 0. **HIGHEST PRIORITY**: Analyze the user feedback carefully. If the feedback explicitly or implicitly specifies a diagram type (e.g., "architecture diagram", "flowchart", "sequence diagram", "流程图", "架构图") → **MUST use that type and override any other considerations**. Use your understanding of natural language to identify the user's intent. The feedback type takes absolute precedence.
60
+ {% endif %}
61
+ 1. **Content Analysis**: If no type preference is found in feedback, analyze the document content structure and characteristics:
62
+ - If the document is an **overview** (e.g. titled `# Overview`, describes whole system/project) → use `"architecture"`.
63
+ - Sequential flow with time-based interactions → `sequence`
64
+ - Branching logic, decision points, workflows → `flowchart`
65
+ - User steps/tutorials, guided processes → `guide`
66
+ - Concept maps, high-level introductions → `intro`
67
+ - Infrastructure, network topologies → `network`
68
+
69
+ 4. **Select Diagram Style**:
70
+ **Decision Priority (in order):**
71
+ {% if feedback %}
72
+ 0. **HIGHEST PRIORITY**: Analyze the user feedback carefully. If the feedback explicitly or implicitly specifies a diagram style (e.g., "modern style", "hand-drawn", "anthropomorphic", "3d", "flat design", "现代风格", "手绘风格") → **MUST use that style and override any default style**. Use your understanding of natural language to identify the user's style preference. The feedback style takes absolute precedence.
73
+ {% endif %}
74
+ {% if defaultStyle %}
75
+ 1. **Default Style**: If no style preference is found in feedback, use the configured default style: `{{ defaultStyle }}`. This is the user's preferred default style from configuration.
76
+ {% endif %}
77
+ 2. **Content-Based Selection**: If no feedback style and no default style, choose a style appropriate for technical documentation tone based on the content characteristics. You can use any style name that best fits the content, including but not limited to:
78
+ - Common styles: `modern`, `standard`, `hand-drawn`, `anthropomorphic`, `flat`, `minimalist`, `3d`
79
+ - Other creative styles: `watercolor`, `sketch`, `vintage`, `cyberpunk`, `minimal`, `realistic`, `cartoon`, `isometric`, `neon`, `pastel`, etc.
80
+ - You are not limited to predefined styles - use your knowledge of visual styles to select the most appropriate one
81
+ 3. **Available Styles Reference**: If `availableStyles` is provided and not empty, prefer styles from that list. However, if a better style is needed and not in the list, you can still use it. The `styleDescriptions` object provides descriptions of common styles for reference, but you are not restricted to only those styles.
82
+
83
+ 5. **Recommend Aspect Ratio**:
84
+ {% if feedback %}
85
+ **HIGHEST PRIORITY**: If user feedback explicitly specifies an aspect ratio (e.g., "16:9", "4:3", "use landscape", "make it square") → **MUST use that aspect ratio**.
86
+ {% endif %}
87
+
88
+ Otherwise, select the most suitable aspect ratio based on layout direction:
89
+ - `"1:1"`: Radial layouts, mind maps, central concepts
90
+ - `"5:4"` or `"4:3"`: Vertical flows (step-by-step, guides)
91
+ - `"3:2"`, `"16:9"`, `"21:9"`: Horizontal flows (timelines, architecture)
92
+
93
+ **Decision Logic:**
94
+ - Vertical flows → use `"4:3"` (default), or `"5:4"` for taller needs
95
+ - Horizontal flows → `"16:9"` (default), `"21:9"` for very wide, `"3:2"` for moderate width
96
+ - Central hub structures → use `"1:1"`
97
+
98
+ **Never** mismatch direction and ratio:
99
+ - Don't use portrait for horizontal content or vice versa
100
+ - Don't use `"1:1"` unless layout is truly radial
101
+
102
+ Document Content:
103
+ <document_content>
104
+ {{ documentContent }}
105
+ </document_content>
106
+
107
+
108
+ input_schema:
109
+ type: object
110
+ properties:
111
+ documentContent:
112
+ type: string
113
+ description: The document content to analyze
114
+ availableStyles:
115
+ type: array
116
+ description: List of available diagram styles
117
+ items:
118
+ type: string
119
+ styleDescriptions:
120
+ type: object
121
+ description: Style descriptions
122
+ additionalProperties:
123
+ type: string
124
+ locale:
125
+ type: string
126
+ description: Language for labels
127
+ default: en
128
+ feedback:
129
+ type: string
130
+ description: User feedback that may contain style, type, or other preferences. You should analyze this feedback carefully to extract any explicit or implicit preferences. If feedback specifies a style or type, it MUST override the defaultStyle.
131
+ default: ""
132
+ defaultStyle:
133
+ type: string
134
+ description: Default diagram style from configuration. Use this only if no style preference is found in feedback. If feedback specifies a style, it takes precedence over this default.
135
+ nullable: true
136
+ required:
137
+ - documentContent
138
+ - availableStyles
139
+ output_schema:
140
+ type: object
141
+ properties:
142
+ documentSummary:
143
+ type: string
144
+ description: A comprehensive summary that preserves the vast majority of the original document content. Only remove verbatim duplicates, completely off-topic content, or pure marketing text. Keep all structural elements, entities, relationships, processes, technical details, examples, and contextual information. This summary will be the only content passed to the image generation model.
145
+ diagramType:
146
+ type: string
147
+ description: The selected diagram type
148
+ diagramStyle:
149
+ type: string
150
+ description: The selected diagram style. Can be any style name (e.g., 'modern', 'hand-drawn', 'watercolor', 'cyberpunk', 'isometric', etc.). Not limited to predefined styles - use your knowledge of visual styles to select the most appropriate one.
151
+ aspectRatio:
152
+ type: string
153
+ description: Recommended aspect ratio for the image based on content structure analysis. MUST match the primary flow direction (vertical→portrait, horizontal→landscape, radial→square)
154
+ enum: ["1:1", "5:4", "4:3", "3:2", "16:9", "21:9"]
155
+ required:
156
+ - documentSummary
157
+ - diagramType
158
+ - diagramStyle
159
+ - aspectRatio
160
+
@@ -0,0 +1,297 @@
1
+ import { DIAGRAM_STYLES } from "../../utils/constants/index.mjs";
2
+
3
+ const DEFAULT_DIAGRAM_STYLE = "modern";
4
+ const DEFAULT_DIAGRAM_TYPE = "flowchart";
5
+
6
+ // Type-specific content requirements
7
+ const TYPE_REQUIREMENTS = {
8
+ architecture: `- Accurately represent the system architecture, components, services, and their relationships
9
+ - Show clear component boundaries and service interactions
10
+ - Include all key architectural elements (layers, modules, services, databases, APIs)
11
+ - Display data flow and communication patterns between components
12
+ - Use clear labels for each component and connection`,
13
+ flowchart: `- Accurately represent the process flow, steps, decisions, and workflow
14
+ - Show clear step-by-step progression with decision points
15
+ - Use standard flowchart symbols: rectangles for processes, diamonds for decisions, arrows for flows
16
+ - Include all key steps and decision branches
17
+ - Maintain logical flow direction (top-to-bottom or left-to-right)`,
18
+ guide: `- Show user journey, tutorial flow, or guided process
19
+ - Display clear progression from start to completion
20
+ - Include key milestones, checkpoints, or decision points
21
+ - Use clear visual cues to guide the viewer through the process
22
+ - Make it easy to follow and understand the path`,
23
+ intro: `- Provide a high-level overview or conceptual explanation
24
+ - Show main concepts, relationships, and key ideas
25
+ - Use clear visual hierarchy to emphasize important elements
26
+ - Make it accessible and easy to understand for newcomers
27
+ - Focus on big picture rather than detailed implementation`,
28
+ sequence: `- Show interactions over time between components or actors
29
+ - Display clear message flow and timing
30
+ - Include all participating entities and their interactions
31
+ - Show chronological order of events
32
+ - Use clear labels for messages and interactions`,
33
+ network: `- Show network structure, nodes, and connections
34
+ - Display routing paths and network topology
35
+ - Include all network components (routers, switches, servers, clients)
36
+ - Show connection types and data flow directions
37
+ - Use clear labels for network elements`,
38
+ };
39
+
40
+ // Style-specific requirements
41
+ const STYLE_REQUIREMENTS = {
42
+ modern: `- Modern, clean, professional diagram style
43
+ - Contemporary design elements with smooth lines
44
+ - Professional color scheme suitable for technical documentation
45
+ - Clear visual hierarchy and readable text
46
+ - Sleek and polished appearance`,
47
+ standard: `- Standard flowchart style with traditional symbols
48
+ - Conventional formatting and clear structure
49
+ - Rectangles for processes, diamonds for decisions, arrows for flows
50
+ - Clear, readable text labels
51
+ - Professional and familiar appearance`,
52
+ "hand-drawn": `- Hand-drawn, sketch-like style with natural, organic lines
53
+ - Slightly imperfect shapes for a casual, approachable appearance
54
+ - Natural line variations and hand-drawn aesthetics
55
+ - Friendly and informal visual style
56
+ - Avoid perfect geometric shapes`,
57
+ anthropomorphic: `- Anthropomorphic style with personified elements
58
+ - Vivid and lively imagery with characters or objects having human-like features
59
+ - Engaging and memorable visual elements
60
+ - Creative and expressive design
61
+ - Make abstract concepts more relatable through personification`,
62
+ flat: `- Flat design style with no shadows, gradients, or 3D effects
63
+ - Clean geometric shapes with bold colors
64
+ - Minimalist aesthetics with simple, flat surfaces
65
+ - Modern and clean appearance
66
+ - Avoid depth and dimensionality`,
67
+ minimalist: `- Minimalist style with the fewest possible elements
68
+ - Maximum clarity with simple shapes
69
+ - Ample white space and essential information only
70
+ - Clean and uncluttered appearance
71
+ - Focus on core message without distractions`,
72
+ "3d": `- 3D style with three-dimensional effects and perspective
73
+ - Depth, shadows, and realistic spatial relationships
74
+ - Three-dimensional appearance with volume and dimension
75
+ - Professional and modern 3D rendering
76
+ - Clear depth cues and perspective`,
77
+ };
78
+
79
+ /**
80
+ * Analyze document content to determine diagram type and select appropriate style
81
+ * Uses LLM analysis to determine diagram type and style
82
+ * Supports extracting style and type preferences from user feedback
83
+ */
84
+ export default async function analyzeDiagramType(
85
+ {
86
+ documentContent,
87
+ availableStyles = [],
88
+ defaultStyle,
89
+ diagramming,
90
+ locale = "en",
91
+ feedback = "",
92
+ },
93
+ options,
94
+ ) {
95
+ // Extract defaultStyle from diagramming object if not provided directly
96
+ if (!defaultStyle && diagramming?.style) {
97
+ defaultStyle = diagramming.style;
98
+ }
99
+
100
+ // Step 1: Use LLM to analyze and make final decision (LLM will analyze feedback directly)
101
+ const llmAgent = options.context?.agents?.["analyzeDiagramTypeLLM"];
102
+ let llmResult = null;
103
+
104
+ if (llmAgent) {
105
+ try {
106
+ // Build styleDescriptions object for template
107
+ // Include predefined styles as reference, but allow LLM to use any style
108
+ const styleDescriptions = {};
109
+ const stylesToUse =
110
+ availableStyles.length > 0 ? availableStyles : Object.keys(DIAGRAM_STYLES);
111
+ for (const style of stylesToUse) {
112
+ if (DIAGRAM_STYLES[style]) {
113
+ styleDescriptions[style] =
114
+ DIAGRAM_STYLES[style].description || DIAGRAM_STYLES[style].name;
115
+ }
116
+ }
117
+ // Also include all predefined styles as reference even if not in availableStyles
118
+ // This helps LLM understand common style options but doesn't restrict it
119
+ for (const [style, styleInfo] of Object.entries(DIAGRAM_STYLES)) {
120
+ if (!styleDescriptions[style]) {
121
+ styleDescriptions[style] = styleInfo.description || styleInfo.name;
122
+ }
123
+ }
124
+
125
+ const llmInput = {
126
+ documentContent,
127
+ availableStyles: stylesToUse,
128
+ styleDescriptions,
129
+ locale,
130
+ feedback: feedback || "",
131
+ defaultStyle: defaultStyle || null,
132
+ };
133
+
134
+ llmResult = await options.context.invoke(llmAgent, llmInput);
135
+ } catch (error) {
136
+ console.warn(`⚠️ LLM analysis failed: ${error.message}`);
137
+ }
138
+ }
139
+
140
+ // Step 2: Determine diagram type
141
+ // Priority: LLM result (which already analyzed feedback) > default
142
+ const diagramType = llmResult?.diagramType || DEFAULT_DIAGRAM_TYPE;
143
+
144
+ // Step 3: Select style
145
+ // Trust LLM to always return a valid style (required in output_schema)
146
+ // LLM can return any style name, not limited to predefined styles
147
+ // Only use fallback if LLM completely failed
148
+ const diagramStyle = llmResult?.diagramStyle || defaultStyle || DEFAULT_DIAGRAM_STYLE;
149
+
150
+ // Note: We allow any style name from LLM, even if not in availableStyles
151
+ // This enables creative styles beyond predefined ones (e.g., 'watercolor', 'cyberpunk', 'isometric')
152
+ // If availableStyles is provided and not empty, it serves as a preference guide, not a strict restriction
153
+
154
+ // Step 4: Generate prompt requirements for image generation
155
+ const diagramTypeRequirements =
156
+ TYPE_REQUIREMENTS[diagramType] || TYPE_REQUIREMENTS[DEFAULT_DIAGRAM_TYPE];
157
+ const diagramStyleRequirements =
158
+ STYLE_REQUIREMENTS[diagramStyle] || STYLE_REQUIREMENTS[DEFAULT_DIAGRAM_STYLE];
159
+
160
+ // Generate negative prompt exclusions based on style
161
+ let negativePromptExclusions = "";
162
+ if (diagramStyle !== "anthropomorphic") {
163
+ negativePromptExclusions += ", anthropomorphic";
164
+ }
165
+ if (diagramStyle !== "hand-drawn") {
166
+ negativePromptExclusions += ", hand-drawn, sketch";
167
+ }
168
+
169
+ // Step 5: Extract document summary from LLM result
170
+ // The LLM creates a concise summary focusing on key elements for diagram generation
171
+ // This ensures both the analysis model and image generation model have consistent understanding
172
+ const documentSummary = llmResult?.documentSummary || documentContent;
173
+
174
+ // If LLM didn't provide a summary (fallback), use original content
175
+ // But prefer the LLM-generated summary as it's focused and aligned with the analysis
176
+
177
+ // Step 6: Determine aspect ratio from LLM result
178
+ // The LLM analyzes the content structure and recommends the best aspect ratio
179
+ // We trust the LLM's judgment as it has analyzed the actual content
180
+ // If LLM doesn't provide aspectRatio (shouldn't happen, but fallback for safety), use 4:3 as safe default
181
+ let aspectRatio = llmResult?.aspectRatio || "4:3";
182
+
183
+ // Validate that the aspectRatio is one of the supported values
184
+ const supportedRatios = ["1:1", "5:4", "4:3", "3:2", "16:9", "21:9"];
185
+ if (!supportedRatios.includes(aspectRatio)) {
186
+ console.warn(`Invalid aspectRatio "${aspectRatio}" from LLM, falling back to "4:3"`);
187
+ aspectRatio = "4:3";
188
+ }
189
+
190
+ // Step 7: Return document content and summary for image generation
191
+ return {
192
+ diagramType,
193
+ diagramStyle,
194
+ aspectRatio,
195
+ documentContent, // The full document content (kept for backward compatibility and additional context)
196
+ documentSummary, // The concise summary generated by LLM, focused on key elements for diagram generation
197
+ diagramTypeRequirements,
198
+ diagramStyleRequirements,
199
+ negativePromptExclusions,
200
+ };
201
+ }
202
+
203
+ analyzeDiagramType.input_schema = {
204
+ type: "object",
205
+ properties: {
206
+ documentContent: {
207
+ type: "string",
208
+ description: "The document content to analyze for diagram type and style selection",
209
+ },
210
+ availableStyles: {
211
+ type: "array",
212
+ description:
213
+ "List of available diagram styles configured by user (optional restriction). If empty, any style is allowed.",
214
+ items: {
215
+ type: "string",
216
+ },
217
+ },
218
+ defaultStyle: {
219
+ type: "string",
220
+ description:
221
+ "Default diagram style to use when no style is specified in feedback. Can be any style name, not limited to predefined styles.",
222
+ },
223
+ diagramming: {
224
+ type: "object",
225
+ description: "Diagramming configuration object (alternative way to pass style)",
226
+ properties: {
227
+ style: {
228
+ type: "string",
229
+ description: "Default diagram style",
230
+ },
231
+ },
232
+ },
233
+ locale: {
234
+ type: "string",
235
+ description: "Language for analysis",
236
+ default: "en",
237
+ },
238
+ feedback: {
239
+ type: "string",
240
+ description:
241
+ "User feedback that may contain style or type preferences (e.g., 'use anthropomorphic style', 'create architecture diagram')",
242
+ default: "",
243
+ },
244
+ },
245
+ required: ["documentContent"],
246
+ };
247
+
248
+ analyzeDiagramType.output_schema = {
249
+ type: "object",
250
+ properties: {
251
+ diagramType: {
252
+ type: "string",
253
+ description: "The detected diagram type",
254
+ },
255
+ diagramStyle: {
256
+ type: "string",
257
+ description: "The selected diagram style",
258
+ },
259
+ diagramTypeRequirements: {
260
+ type: "string",
261
+ description: "Content requirements for the diagram type",
262
+ },
263
+ diagramStyleRequirements: {
264
+ type: "string",
265
+ description: "Style requirements for the diagram style",
266
+ },
267
+ negativePromptExclusions: {
268
+ type: "string",
269
+ description: "Additional negative prompt exclusions based on style",
270
+ },
271
+ aspectRatio: {
272
+ type: "string",
273
+ description: "Aspect ratio for the diagram (must match content flow direction)",
274
+ enum: ["1:1", "5:4", "4:3", "3:2", "16:9", "21:9"],
275
+ },
276
+ documentContent: {
277
+ type: "string",
278
+ description:
279
+ "The full document content (kept for backward compatibility and additional context)",
280
+ },
281
+ documentSummary: {
282
+ type: "string",
283
+ description:
284
+ "A concise summary of the document content focusing on key elements needed for diagram generation. This summary is generated by the analysis LLM to ensure consistent understanding between analysis and image generation models.",
285
+ },
286
+ },
287
+ required: [
288
+ "diagramType",
289
+ "diagramStyle",
290
+ "aspectRatio",
291
+ "documentSummary",
292
+ "diagramTypeRequirements",
293
+ "diagramStyleRequirements",
294
+ "negativePromptExclusions",
295
+ "documentContent",
296
+ ],
297
+ };
@@ -0,0 +1,60 @@
1
+ type: image
2
+ name: generateDiagramImage
3
+ image_model:
4
+ model: google/gemini-3-pro-image-preview
5
+ # The cues that come with thought patterns are actually not obvious
6
+ # thinkingConfig:
7
+ # includeThoughts: true
8
+ # responseModalities:
9
+ # - 'TEXT'
10
+ # - 'IMAGE'
11
+ imageConfig:
12
+ imageSize:
13
+ $get: size
14
+ aspectRatio:
15
+ $get: ratio
16
+
17
+ instructions:
18
+ - role: system
19
+ url: ../../prompts/detail/diagram/generate-image-system.md
20
+ - role: user
21
+ url: ../../prompts/detail/diagram/generate-image-user.md
22
+
23
+ input_schema:
24
+ type: object
25
+ properties:
26
+ documentContent:
27
+ type: string
28
+ description: The full original document content
29
+ documentSummary:
30
+ type: string
31
+ description: A comprehensive summary of the document content for diagram generation (preferred over documentContent if available)
32
+ diagramType:
33
+ type: string
34
+ description: The type of diagram to generate (architecture, flowchart, guide, intro, sequence, network)
35
+ diagramStyle:
36
+ type: string
37
+ description: The visual style for the diagram (modern, standard, hand-drawn, anthropomorphic, flat, minimalist, 3d)
38
+ locale:
39
+ type: string
40
+ description: Language for diagram labels
41
+ default: en
42
+ size:
43
+ type: string
44
+ description: Size of the generated image (e.g., "1K", "2K")
45
+ default: "1K"
46
+ ratio:
47
+ type: string
48
+ description: Aspect ratio of the generated image (must match content flow direction)
49
+ enum: ["1:1", "5:4", "4:3", "3:2", "16:9", "21:9"]
50
+ aspectRatio:
51
+ type: string
52
+ description: Aspect ratio of the generated image (alias for ratio, used in prompt templates)
53
+ enum: ["1:1", "5:4", "4:3", "3:2", "16:9", "21:9"]
54
+ required:
55
+ - documentContent
56
+ - diagramType
57
+ - diagramStyle
58
+ - ratio
59
+ include_input_in_output: true
60
+