alita-sdk 0.3.562__py3-none-any.whl → 0.3.584__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (74) hide show
  1. alita_sdk/cli/agents.py +358 -165
  2. alita_sdk/configurations/openapi.py +227 -15
  3. alita_sdk/runtime/langchain/langraph_agent.py +93 -20
  4. alita_sdk/runtime/langchain/utils.py +30 -14
  5. alita_sdk/runtime/toolkits/artifact.py +2 -1
  6. alita_sdk/runtime/toolkits/mcp.py +4 -2
  7. alita_sdk/runtime/toolkits/skill_router.py +1 -1
  8. alita_sdk/runtime/toolkits/vectorstore.py +1 -1
  9. alita_sdk/runtime/tools/data_analysis.py +1 -1
  10. alita_sdk/runtime/tools/llm.py +30 -11
  11. alita_sdk/runtime/utils/constants.py +5 -1
  12. alita_sdk/tools/ado/repos/__init__.py +2 -1
  13. alita_sdk/tools/ado/test_plan/__init__.py +2 -1
  14. alita_sdk/tools/ado/wiki/__init__.py +2 -1
  15. alita_sdk/tools/ado/work_item/__init__.py +2 -1
  16. alita_sdk/tools/advanced_jira_mining/__init__.py +2 -1
  17. alita_sdk/tools/aws/delta_lake/__init__.py +2 -1
  18. alita_sdk/tools/azure_ai/search/__init__.py +2 -1
  19. alita_sdk/tools/bitbucket/__init__.py +2 -1
  20. alita_sdk/tools/browser/__init__.py +1 -1
  21. alita_sdk/tools/carrier/__init__.py +1 -1
  22. alita_sdk/tools/cloud/aws/__init__.py +2 -1
  23. alita_sdk/tools/cloud/azure/__init__.py +2 -1
  24. alita_sdk/tools/cloud/gcp/__init__.py +2 -1
  25. alita_sdk/tools/cloud/k8s/__init__.py +2 -1
  26. alita_sdk/tools/code/linter/__init__.py +2 -1
  27. alita_sdk/tools/code/sonar/__init__.py +2 -1
  28. alita_sdk/tools/confluence/__init__.py +2 -1
  29. alita_sdk/tools/custom_open_api/__init__.py +2 -1
  30. alita_sdk/tools/elastic/__init__.py +2 -1
  31. alita_sdk/tools/figma/__init__.py +51 -5
  32. alita_sdk/tools/figma/api_wrapper.py +1157 -123
  33. alita_sdk/tools/figma/figma_client.py +73 -0
  34. alita_sdk/tools/figma/toon_tools.py +2748 -0
  35. alita_sdk/tools/github/__init__.py +2 -1
  36. alita_sdk/tools/gitlab/__init__.py +2 -1
  37. alita_sdk/tools/gitlab/api_wrapper.py +32 -0
  38. alita_sdk/tools/gitlab_org/__init__.py +2 -1
  39. alita_sdk/tools/google/bigquery/__init__.py +2 -1
  40. alita_sdk/tools/google_places/__init__.py +2 -1
  41. alita_sdk/tools/jira/__init__.py +2 -1
  42. alita_sdk/tools/keycloak/__init__.py +2 -1
  43. alita_sdk/tools/localgit/__init__.py +2 -1
  44. alita_sdk/tools/memory/__init__.py +1 -1
  45. alita_sdk/tools/ocr/__init__.py +2 -1
  46. alita_sdk/tools/openapi/__init__.py +227 -15
  47. alita_sdk/tools/openapi/api_wrapper.py +1276 -802
  48. alita_sdk/tools/pandas/__init__.py +3 -2
  49. alita_sdk/tools/postman/__init__.py +2 -1
  50. alita_sdk/tools/pptx/__init__.py +2 -1
  51. alita_sdk/tools/qtest/__init__.py +2 -1
  52. alita_sdk/tools/rally/__init__.py +2 -1
  53. alita_sdk/tools/report_portal/__init__.py +2 -1
  54. alita_sdk/tools/salesforce/__init__.py +2 -1
  55. alita_sdk/tools/servicenow/__init__.py +2 -1
  56. alita_sdk/tools/sharepoint/__init__.py +2 -1
  57. alita_sdk/tools/slack/__init__.py +3 -2
  58. alita_sdk/tools/sql/__init__.py +2 -1
  59. alita_sdk/tools/testio/__init__.py +2 -1
  60. alita_sdk/tools/testrail/__init__.py +2 -1
  61. alita_sdk/tools/utils/content_parser.py +68 -2
  62. alita_sdk/tools/xray/__init__.py +2 -1
  63. alita_sdk/tools/yagmail/__init__.py +2 -1
  64. alita_sdk/tools/zephyr/__init__.py +2 -1
  65. alita_sdk/tools/zephyr_enterprise/__init__.py +2 -1
  66. alita_sdk/tools/zephyr_essential/__init__.py +2 -1
  67. alita_sdk/tools/zephyr_scale/__init__.py +2 -1
  68. alita_sdk/tools/zephyr_squad/__init__.py +2 -1
  69. {alita_sdk-0.3.562.dist-info → alita_sdk-0.3.584.dist-info}/METADATA +1 -1
  70. {alita_sdk-0.3.562.dist-info → alita_sdk-0.3.584.dist-info}/RECORD +74 -72
  71. {alita_sdk-0.3.562.dist-info → alita_sdk-0.3.584.dist-info}/WHEEL +0 -0
  72. {alita_sdk-0.3.562.dist-info → alita_sdk-0.3.584.dist-info}/entry_points.txt +0 -0
  73. {alita_sdk-0.3.562.dist-info → alita_sdk-0.3.584.dist-info}/licenses/LICENSE +0 -0
  74. {alita_sdk-0.3.562.dist-info → alita_sdk-0.3.584.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2748 @@
1
+ """
2
+ TOON (Token-Optimized Object Notation) tools for Figma.
3
+
4
+ These tools output a compact, human-readable format optimized for LLM token consumption.
5
+ They are designed for A/B testing against JSON-based tools.
6
+
7
+ TOON Format Example:
8
+ ```
9
+ FILE: Mobile App [key:abc123]
10
+ PAGE: Authentication
11
+ FRAME: 01_Login [0,0 375x812] form/default #1:100
12
+ Headings: Welcome Back
13
+ Labels: Email | Password
14
+ Buttons: Sign In > auth | Forgot Password? > reset
15
+ Components: Input/Email, Button/Primary
16
+ FRAME: 01_Login_Error [400,0] form/error ~01_Login #1:101
17
+ Errors: Invalid email or password
18
+ FLOWS:
19
+ sequence: 01_Login > 02_Verify > 03_Dashboard
20
+ variants: Login ~ [Login#1:100, Login_Error#1:101, Login_Loading#1:102]
21
+ cta: "Sign In" > authenticated | "Create Account" > registration
22
+ ```
23
+
24
+ Legend:
25
+ Headings: Large text, titles
26
+ Labels: Form labels, navigation items, small text
27
+ Buttons: CTAs with inferred destinations
28
+ Components: Component instances used
29
+ Errors: Error messages (red text)
30
+ Text: Body text content
31
+ Image: Image description (if processed)
32
+
33
+ Flow markers:
34
+ sequence: Sequence inferred from naming (01_, Step 1, etc.)
35
+ variants: True variants of same screen with frame IDs (Login#1:100, Login_Error#1:101)
36
+ cta: CTA text with likely destination
37
+ >: Navigation/flow direction
38
+ ~: Variant of (similar to)
39
+ #: Frame ID (use with get_frame_detail_toon or get_file_nodes for drill-down)
40
+ """
41
+
42
+ import re
43
+ import logging
44
+ from typing import Callable, Dict, List, Optional, Tuple, Any
45
+ from pydantic import BaseModel, Field, create_model
46
+
47
+
48
+ # -----------------------------------------------------------------------------
49
+ # LLM Structured Output Schemas
50
+ # -----------------------------------------------------------------------------
51
+
52
+ class ExtractedInput(BaseModel):
53
+ """A form input/control extracted from the screen."""
54
+ label: str = Field(description="Label for this input (e.g., 'Email', 'Creativity', 'Model')")
55
+ input_type: str = Field(description="Type: text, email, password, number, slider, radio, checkbox, toggle, select, textarea, display")
56
+ current_value: Optional[str] = Field(default=None, description="Current value shown (if any)")
57
+ options: Optional[List[str]] = Field(default=None, description="Options for select/radio/slider (e.g., ['Low', 'Medium', 'High'])")
58
+ required: bool = Field(default=False, description="Whether this field appears required")
59
+
60
+
61
+ class ScreenExplanation(BaseModel):
62
+ """Structured explanation of a single screen/frame based on visual analysis."""
63
+ frame_id: str = Field(description="Frame ID from TOON data (e.g., '1:100')")
64
+ frame_name: str = Field(description="Frame name from TOON data")
65
+ purpose: str = Field(description="1-2 sentence explanation of screen's purpose")
66
+ user_goal: str = Field(description="What the user is trying to accomplish here")
67
+ primary_action: Optional[str] = Field(default=None, description="The main CTA/action on this screen")
68
+ visual_focus: str = Field(default="", description="What draws the eye first - the visual hierarchy focal point")
69
+ layout_pattern: str = Field(default="", description="Layout pattern used (e.g., card grid, form stack, split view)")
70
+ visual_state: str = Field(default="default", description="Visual state: default, error, success, loading, empty")
71
+ inputs: List[ExtractedInput] = Field(default_factory=list, description="Form inputs/controls visible on screen")
72
+
73
+
74
+ class FlowExplanation(BaseModel):
75
+ """Structured explanation of a user flow/journey."""
76
+ flow_name: str = Field(description="Name of the flow (e.g., 'authentication', 'checkout')")
77
+ description: str = Field(description="1-2 sentence description of the flow")
78
+ entry_point: str = Field(description="Starting screen of the flow")
79
+ exit_point: str = Field(description="End screen of the flow")
80
+ steps: List[str] = Field(description="Ordered list of screen names in the flow")
81
+ happy_path: str = Field(description="Description of the ideal user path")
82
+ error_states: List[str] = Field(default_factory=list, description="Screens showing error states")
83
+
84
+
85
+ class DesignAnalysis(BaseModel):
86
+ """Complete structured analysis of a Figma design."""
87
+ file_name: str = Field(description="Name of the Figma file")
88
+ overall_purpose: str = Field(description="2-3 sentence summary of what this design is for")
89
+ target_user: str = Field(description="Who this design is intended for")
90
+ design_type: str = Field(description="Type: mobile app, web app, landing page, dashboard, etc.")
91
+ screens: List[ScreenExplanation] = Field(description="Explanation of each screen")
92
+ flows: List[FlowExplanation] = Field(default_factory=list, description="Identified user flows")
93
+ design_patterns: List[str] = Field(default_factory=list, description="UI patterns used (cards, forms, modals, etc.)")
94
+ accessibility_notes: List[str] = Field(default_factory=list, description="Accessibility considerations observed")
95
+ gaps_or_concerns: List[str] = Field(default_factory=list, description="Missing screens or UX concerns")
96
+
97
+
98
+ # -----------------------------------------------------------------------------
99
+ # LLM Analysis Prompt Templates
100
+ # -----------------------------------------------------------------------------
101
+
102
+ # Vision-based prompt (when image is provided)
103
+ SCREEN_VISION_PROMPT = """Analyze this Figma screen image and provide visual insights.
104
+
105
+ Screen: "{frame_name}" (ID: {frame_id})
106
+
107
+ Look at the image and identify:
108
+ 1. purpose: What is this screen for? (1 sentence)
109
+ 2. user_goal: What is the user trying to accomplish? (1 sentence)
110
+ 3. primary_action: Which button/CTA is the main action? (just the name you see)
111
+ 4. visual_focus: What draws the eye first? What's the visual hierarchy focal point?
112
+ 5. layout_pattern: What layout is used? (card grid, form stack, list, split view, etc.)
113
+ 6. visual_state: Is this default, error, success, loading, or empty state?
114
+ 7. inputs: List ALL form inputs/controls you see:
115
+ - For each input: label, type (text/email/password/number/slider/radio/checkbox/toggle/select/textarea/display)
116
+ - Include current value if visible
117
+ - For sliders/radio/select: list the options (e.g., Low/Medium/High)
118
+ - Note if field appears required (has * or "required")
119
+ - Include dropdowns, model selectors, token counters, settings fields
120
+
121
+ IMPORTANT: Extract ALL inputs visible - settings screens often have many controls.
122
+ Be concise - 1 sentence per field except inputs which should be complete."""
123
+
124
+
125
+ # Text-only prompt (fallback when no image)
126
+ SCREEN_TEXT_PROMPT = """Analyze this Figma screen based on extracted data.
127
+
128
+ Screen: "{frame_name}" (ID: {frame_id})
129
+
130
+ Extracted Data:
131
+ {toon_data}
132
+
133
+ Based on this data, identify:
134
+ 1. purpose: What is this screen for? (1 sentence)
135
+ 2. user_goal: What is the user trying to accomplish? (1 sentence)
136
+ 3. primary_action: Which button/CTA is the main action? (just the name)
137
+ 4. visual_focus: Based on element hierarchy, what's likely the focal point?
138
+ 5. layout_pattern: What layout pattern is suggested? (form, list, cards, etc.)
139
+ 6. visual_state: What state is this? (default, error, success, loading, empty)
140
+ 7. inputs: Extract form inputs from the data:
141
+ - Look for input fields, sliders, dropdowns, toggles, checkboxes
142
+ - For each: label, type, current value (if shown), options (if applicable)
143
+ - Include display-only fields showing values (like "Remaining Tokens: 10000")
144
+
145
+ Be concise. DO NOT repeat the element lists - they're already shown separately."""
146
+
147
+
148
+ FILE_ANALYSIS_PROMPT = """Analyze this Figma file design and provide high-level insights.
149
+
150
+ Extracted TOON Data (reference this, don't repeat it):
151
+ {toon_data}
152
+
153
+ Provide ONLY insights not already visible in the data:
154
+ 1. design_type: What type of product is this? (mobile app, web app, dashboard, etc.)
155
+ 2. target_user: Who is this designed for? (1 sentence)
156
+ 3. overall_purpose: What problem does this solve? (1-2 sentences)
157
+ 4. flows: List user journeys identified (use frame names from data)
158
+ 5. design_patterns: UI patterns used (cards, forms, modals - be specific)
159
+ 6. gaps_or_concerns: What's missing or could be improved? (be specific)
160
+
161
+ IMPORTANT:
162
+ - The TOON data already lists screens, buttons, components - DO NOT repeat
163
+ - Focus on SYNTHESIS and INSIGHTS the data doesn't show
164
+ - Reference frame names/IDs when discussing flows"""
165
+
166
+
167
+ # Flow analysis prompt for LLM
168
+ FLOW_ANALYSIS_PROMPT = """Analyze the user flows in this Figma design.
169
+
170
+ Frame Names and Key Actions:
171
+ {frame_summary}
172
+
173
+ Identify the main user journeys. For each journey:
174
+ 1. Name it clearly (e.g., "User Registration", "Checkout Process")
175
+ 2. List the screens in order (use actual frame names)
176
+ 3. Describe what the user accomplishes
177
+
178
+ Focus on:
179
+ - Main happy paths (typical successful flows)
180
+ - Entry points (where users start)
181
+ - Exit points (where users complete their goal)
182
+ - Any error/recovery flows
183
+
184
+ Keep it concise. Use actual frame names from the data."""
185
+
186
+
187
+ class UserFlow(BaseModel):
188
+ """A single user flow/journey through the design."""
189
+ name: str = Field(description="Clear name for this flow (e.g., 'User Registration')")
190
+ description: str = Field(description="1 sentence describing what user accomplishes")
191
+ screens: List[str] = Field(description="Ordered list of frame names in this flow")
192
+ entry_screen: str = Field(description="First screen of the flow")
193
+ exit_screen: str = Field(description="Final screen of the flow")
194
+ flow_type: str = Field(default="happy_path", description="Type: happy_path, error_recovery, or alternate")
195
+
196
+
197
+ class FlowAnalysis(BaseModel):
198
+ """LLM analysis of user flows in a design."""
199
+ main_flows: List[UserFlow] = Field(description="Primary user journeys (2-4 flows)")
200
+ entry_points: List[str] = Field(description="Screens where users typically enter the app")
201
+ completion_points: List[str] = Field(description="Screens indicating task completion")
202
+ navigation_pattern: str = Field(description="Overall navigation style: linear, hub-spoke, hierarchical, etc.")
203
+
204
+
205
+ # -----------------------------------------------------------------------------
206
+ # TOON Format Constants
207
+ # -----------------------------------------------------------------------------
208
+
209
+ TOON_INDENT = " " # 2 spaces for indentation
210
+
211
+
212
+ # -----------------------------------------------------------------------------
213
+ # Flow Inference Helpers
214
+ # -----------------------------------------------------------------------------
215
+
216
+ def extract_sequence_number(name: str) -> Optional[Tuple[int, str]]:
217
+ """
218
+ Extract sequence number from frame name.
219
+
220
+ Patterns detected:
221
+ - "01_Login" -> (1, "Login")
222
+ - "Step 1 - Login" -> (1, "Login")
223
+ - "1. Login" -> (1, "Login")
224
+ - "Login (1)" -> (1, "Login")
225
+ - "Screen_001" -> (1, "Screen")
226
+
227
+ Returns (sequence_number, base_name) or None if no pattern found.
228
+ """
229
+ patterns = [
230
+ # "01_Login", "001_Login"
231
+ (r'^(\d{1,3})[-_\s]+(.+)$', lambda m: (int(m.group(1)), m.group(2).strip())),
232
+ # "Step 1 - Login", "Step1: Login"
233
+ (r'^step\s*(\d+)\s*[-:_]?\s*(.*)$', lambda m: (int(m.group(1)), m.group(2).strip() or f"Step {m.group(1)}")),
234
+ # "1. Login", "1) Login"
235
+ (r'^(\d+)\s*[.\)]\s*(.+)$', lambda m: (int(m.group(1)), m.group(2).strip())),
236
+ # "Login (1)", "Login [1]"
237
+ (r'^(.+?)\s*[\(\[](\d+)[\)\]]$', lambda m: (int(m.group(2)), m.group(1).strip())),
238
+ # "Screen_001", "Page001"
239
+ (r'^([a-zA-Z]+)[-_]?(\d{2,3})$', lambda m: (int(m.group(2)), m.group(1).strip())),
240
+ ]
241
+
242
+ for pattern, extractor in patterns:
243
+ match = re.match(pattern, name, re.IGNORECASE)
244
+ if match:
245
+ return extractor(match)
246
+ return None
247
+
248
+
249
+ def extract_base_name(name: str) -> str:
250
+ """
251
+ Extract base name for variant grouping.
252
+
253
+ "Login_Error" -> "Login"
254
+ "Login - Error State" -> "Login"
255
+ "01_Login_Loading" -> "Login"
256
+ """
257
+ # Remove sequence prefix first
258
+ seq_result = extract_sequence_number(name)
259
+ if seq_result:
260
+ name = seq_result[1]
261
+
262
+ # Common variant suffixes to strip
263
+ variant_patterns = [
264
+ r'[-_\s]+(error|success|loading|empty|disabled|active|hover|pressed|selected|default|filled|focused)(\s+state)?$',
265
+ r'[-_\s]+(v\d+|variant\s*\d*)$',
266
+ r'[-_\s]+\d+$', # Trailing numbers
267
+ ]
268
+
269
+ base = name
270
+ for pattern in variant_patterns:
271
+ base = re.sub(pattern, '', base, flags=re.IGNORECASE)
272
+
273
+ return base.strip()
274
+
275
+
276
+ def infer_state_from_name(name: str) -> str:
277
+ """
278
+ Infer screen state from name.
279
+
280
+ Returns: default, error, success, loading, empty, or the detected state
281
+ """
282
+ name_lower = name.lower()
283
+
284
+ state_keywords = {
285
+ 'error': ['error', 'fail', 'invalid', 'wrong'],
286
+ 'success': ['success', 'complete', 'done', 'confirmed'],
287
+ 'loading': ['loading', 'progress', 'spinner', 'wait'],
288
+ 'empty': ['empty', 'no data', 'no results', 'blank'],
289
+ 'disabled': ['disabled', 'inactive', 'locked'],
290
+ }
291
+
292
+ for state, keywords in state_keywords.items():
293
+ if any(kw in name_lower for kw in keywords):
294
+ return state
295
+
296
+ return 'default'
297
+
298
+
299
+ def infer_screen_type(frame_data: Dict) -> str:
300
+ """
301
+ Infer screen type from content.
302
+
303
+ Returns: form, list, detail, dashboard, modal, menu, settings, chat, etc.
304
+ """
305
+ name_lower = frame_data.get('name', '').lower()
306
+ components = frame_data.get('components', [])
307
+ buttons = frame_data.get('buttons', [])
308
+ labels = frame_data.get('labels', [])
309
+
310
+ components_lower = [c.lower() for c in components]
311
+ buttons_lower = [b.lower() for b in buttons]
312
+
313
+ # Check name hints
314
+ type_hints = {
315
+ 'form': ['login', 'signup', 'register', 'checkout', 'payment', 'form', 'input'],
316
+ 'list': ['list', 'feed', 'timeline', 'results', 'search results'],
317
+ 'detail': ['detail', 'profile', 'item', 'product', 'article'],
318
+ 'dashboard': ['dashboard', 'home', 'overview', 'summary'],
319
+ 'modal': ['modal', 'dialog', 'popup', 'overlay', 'alert'],
320
+ 'menu': ['menu', 'navigation', 'sidebar', 'drawer'],
321
+ 'settings': ['settings', 'preferences', 'config', 'options'],
322
+ 'chat': ['chat', 'message', 'conversation', 'inbox'],
323
+ 'onboarding': ['onboarding', 'welcome', 'intro', 'tutorial'],
324
+ }
325
+
326
+ for screen_type, keywords in type_hints.items():
327
+ if any(kw in name_lower for kw in keywords):
328
+ return screen_type
329
+
330
+ # Check components
331
+ if any('input' in c or 'textfield' in c or 'form' in c for c in components_lower):
332
+ return 'form'
333
+ if any('card' in c or 'listitem' in c for c in components_lower):
334
+ return 'list'
335
+ if any('modal' in c or 'dialog' in c for c in components_lower):
336
+ return 'modal'
337
+
338
+ # Check buttons for form indicators
339
+ form_buttons = ['submit', 'save', 'sign in', 'log in', 'register', 'next', 'continue']
340
+ if any(any(fb in b for fb in form_buttons) for b in buttons_lower):
341
+ return 'form'
342
+
343
+ return 'screen' # Generic fallback
344
+
345
+
346
+ def infer_cta_destination(cta_text: str, frame_context: str = '') -> str:
347
+ """
348
+ Infer likely destination/action from CTA/button text with context awareness.
349
+
350
+ Args:
351
+ cta_text: The button/CTA text
352
+ frame_context: Optional frame name for additional context
353
+
354
+ Returns semantic action category or descriptive action based on text.
355
+ """
356
+ cta_lower = cta_text.lower().strip()
357
+ context_lower = frame_context.lower() if frame_context else ''
358
+
359
+ # Skip very short or icon-only buttons
360
+ if len(cta_lower) < 2 or cta_lower in ['x', '+', '-', '...', '→', '←']:
361
+ return None # Will be filtered out
362
+
363
+ # Semantic destination mapping with expanded keywords
364
+ destinations = {
365
+ 'authenticate': ['sign in', 'log in', 'login', 'authenticate', 'sign-in', 'log-in'],
366
+ 'register': ['sign up', 'register', 'create account', 'join', 'get started', 'sign-up'],
367
+ 'navigate_next': ['next', 'continue', 'proceed', 'forward', 'go', 'start'],
368
+ 'navigate_back': ['back', 'previous', 'return', 'go back'],
369
+ 'cancel': ['cancel', 'nevermind', 'not now', 'maybe later', 'skip'],
370
+ 'submit_form': ['submit', 'send', 'apply', 'confirm', 'done', 'complete', 'finish', 'ok', 'okay'],
371
+ 'save': ['save', 'save changes', 'update', 'keep'],
372
+ 'view_detail': ['view', 'details', 'more', 'see more', 'read more', 'open', 'expand', 'show'],
373
+ 'search': ['search', 'find', 'look up', 'filter'],
374
+ 'settings': ['settings', 'preferences', 'options', 'configure', 'customize'],
375
+ 'help': ['help', 'support', 'faq', 'contact', 'learn more', 'how to', 'guide'],
376
+ 'share': ['share', 'invite', 'send to', 'export', 'copy link'],
377
+ 'delete': ['delete', 'remove', 'clear', 'trash', 'discard'],
378
+ 'edit': ['edit', 'modify', 'change', 'rename'],
379
+ 'create': ['add', 'create', 'new', 'plus', 'insert'],
380
+ 'close': ['close', 'dismiss', 'exit', 'hide'],
381
+ 'reset': ['reset', 'forgot password', 'recover', 'restore'],
382
+ 'download': ['download', 'export', 'get', 'install'],
383
+ 'upload': ['upload', 'import', 'attach', 'choose file'],
384
+ 'refresh': ['refresh', 'reload', 'retry', 'try again'],
385
+ 'select': ['select', 'choose', 'pick'],
386
+ 'toggle': ['enable', 'disable', 'turn on', 'turn off', 'switch'],
387
+ 'connect': ['connect', 'link', 'integrate', 'sync'],
388
+ 'pay': ['pay', 'checkout', 'purchase', 'buy', 'order', 'subscribe'],
389
+ 'upgrade': ['upgrade', 'premium', 'pro', 'unlock'],
390
+ }
391
+
392
+ # Try to match known categories
393
+ for dest, keywords in destinations.items():
394
+ if any(kw in cta_lower for kw in keywords):
395
+ return dest
396
+
397
+ # Context-aware inference from frame name
398
+ if context_lower:
399
+ if any(ctx in context_lower for ctx in ['login', 'auth', 'sign']):
400
+ if any(w in cta_lower for w in ['submit', 'go', 'enter']):
401
+ return 'authenticate'
402
+ if any(ctx in context_lower for ctx in ['modal', 'dialog', 'popup']):
403
+ return 'dismiss_modal'
404
+ if any(ctx in context_lower for ctx in ['form', 'input', 'settings']):
405
+ return 'submit_form'
406
+ if any(ctx in context_lower for ctx in ['checkout', 'payment', 'cart']):
407
+ return 'pay'
408
+
409
+ # Return cleaned button text as action (more informative than generic 'action')
410
+ # Clean up the text for display
411
+ clean_text = cta_text.strip()
412
+ if len(clean_text) <= 20:
413
+ return f"do:{clean_text}" # Short enough to show as-is
414
+ return f"do:{clean_text[:17]}..." # Truncate long text
415
+
416
+
417
+ def _frames_have_content_differences(frames: List[Dict]) -> bool:
418
+ """Check if frames have different content (not just duplicates)."""
419
+ if len(frames) < 2:
420
+ return False
421
+
422
+ # Compare content fields
423
+ content_fields = ['headings', 'labels', 'buttons', 'body', 'errors', 'placeholders']
424
+
425
+ first_frame = frames[0].get('frame', frames[0])
426
+ first_content = {
427
+ field: frozenset(first_frame.get(field, []))
428
+ for field in content_fields
429
+ }
430
+
431
+ for frame_data in frames[1:]:
432
+ frame = frame_data.get('frame', frame_data)
433
+ for field in content_fields:
434
+ current_values = frozenset(frame.get(field, []))
435
+ if current_values != first_content[field]:
436
+ return True # Found a difference
437
+
438
+ return False # All frames have identical content
439
+
440
+
441
+ def _infer_variant_state(frame: Dict, all_frames: List[Dict]) -> str:
442
+ """
443
+ Infer variant state from content differences.
444
+
445
+ Looks at errors, buttons, labels to infer: default, error, hover, active, focus, etc.
446
+ """
447
+ # First check name-based state
448
+ name_state = infer_state_from_name(frame.get('name', ''))
449
+ if name_state != 'default':
450
+ return name_state
451
+
452
+ # Check content for state indicators
453
+ errors = frame.get('errors', [])
454
+ if errors:
455
+ return 'error'
456
+
457
+ # Check for loading/progress indicators in components
458
+ components = [c.lower() for c in frame.get('components', [])]
459
+ if any('spinner' in c or 'loader' in c or 'progress' in c for c in components):
460
+ return 'loading'
461
+
462
+ # Check buttons for state hints
463
+ buttons = [b.lower() for b in frame.get('buttons', [])]
464
+ if any('retry' in b or 'try again' in b for b in buttons):
465
+ return 'error'
466
+
467
+ # Check if this frame has fewer elements (could be empty state)
468
+ total_content = sum(len(frame.get(f, [])) for f in ['headings', 'labels', 'buttons', 'body'])
469
+ if total_content == 0:
470
+ return 'empty'
471
+
472
+ # Check position relative to others for hover/active/focus detection
473
+ # Frames in the same row are often state variants
474
+ pos = frame.get('position', {})
475
+ frame_x = pos.get('x', 0)
476
+
477
+ # If this is not the leftmost frame, infer state from position order
478
+ frame_positions = [(f.get('frame', f).get('position', {}).get('x', 0), i)
479
+ for i, f in enumerate(all_frames)]
480
+ frame_positions.sort()
481
+
482
+ # Position order might indicate: default, hover, active, focus, disabled
483
+ position_states = ['default', 'hover', 'active', 'focus', 'disabled']
484
+ for i, (x, idx) in enumerate(frame_positions):
485
+ if abs(x - frame_x) < 10: # Same x position
486
+ if i < len(position_states):
487
+ return position_states[i]
488
+
489
+ return 'variant'
490
+
491
+
492
+ def group_variants(frames: List[Dict]) -> Dict[str, List[Dict]]:
493
+ """
494
+ Group frames that are variants of the same screen.
495
+
496
+ Detects variants by:
497
+ 1. Same base name (with variant suffixes like _error, _hover removed)
498
+ 2. Identical names but different content (true state variants)
499
+
500
+ Returns dict with frame data including position and inferred state:
501
+ {"Login": [{"name": "Login", "pos": "0,105", "id": "1:100", "state": "default"}, ...]}
502
+ """
503
+ groups = {}
504
+
505
+ for frame in frames:
506
+ name = frame.get('name', '')
507
+ base = extract_base_name(name)
508
+ pos = frame.get('position', {})
509
+ pos_str = f"{int(pos.get('x', 0))},{int(pos.get('y', 0))}"
510
+
511
+ if base not in groups:
512
+ groups[base] = []
513
+ groups[base].append({
514
+ 'name': name,
515
+ 'pos': pos_str,
516
+ 'id': frame.get('id', ''),
517
+ 'frame': frame, # Keep full frame data for delta computation
518
+ })
519
+
520
+ # Return groups that are true variants (different names OR different content)
521
+ result = {}
522
+ for base, variant_list in groups.items():
523
+ if len(variant_list) > 1:
524
+ unique_names = set(v['name'] for v in variant_list)
525
+
526
+ # Variants if: different names OR same names but different content
527
+ is_variant_group = (
528
+ len(unique_names) > 1 or
529
+ _frames_have_content_differences(variant_list)
530
+ )
531
+
532
+ if is_variant_group:
533
+ # Infer state for each variant
534
+ for v in variant_list:
535
+ v['state'] = _infer_variant_state(v.get('frame', v), variant_list)
536
+ result[base] = variant_list
537
+
538
+ return result
539
+
540
+
541
+ def compute_frame_delta(base_frame: Dict, variant_frame: Dict) -> Dict[str, Any]:
542
+ """
543
+ Compute the differences between a base frame and a variant.
544
+
545
+ Returns dict with only the changed/added content:
546
+ {
547
+ 'name': 'Login_Error',
548
+ 'state': 'error',
549
+ 'added': {'errors': ['Invalid email'], 'buttons': ['Retry']},
550
+ 'removed': {'buttons': ['Sign In']},
551
+ 'changed': {'headings': 'Welcome → Try Again'}
552
+ }
553
+ """
554
+ delta = {
555
+ 'name': variant_frame.get('name', ''),
556
+ 'id': variant_frame.get('id', ''),
557
+ 'state': variant_frame.get('state', 'default'),
558
+ 'added': {},
559
+ 'removed': {},
560
+ 'changed': {},
561
+ }
562
+
563
+ # Fields to compare
564
+ content_fields = ['headings', 'labels', 'buttons', 'body', 'errors', 'placeholders']
565
+
566
+ for field in content_fields:
567
+ base_values = set(base_frame.get(field, []))
568
+ variant_values = set(variant_frame.get(field, []))
569
+
570
+ added = variant_values - base_values
571
+ removed = base_values - variant_values
572
+
573
+ if added:
574
+ delta['added'][field] = list(added)[:5] # Limit to 5
575
+ if removed:
576
+ delta['removed'][field] = list(removed)[:5]
577
+
578
+ # Check for changed components
579
+ base_components = set(base_frame.get('components', []))
580
+ variant_components = set(variant_frame.get('components', []))
581
+ added_components = variant_components - base_components
582
+ removed_components = base_components - variant_components
583
+
584
+ if added_components:
585
+ delta['added']['components'] = list(added_components)[:5]
586
+ if removed_components:
587
+ delta['removed']['components'] = list(removed_components)[:5]
588
+
589
+ # Clean up empty dicts
590
+ if not delta['added']:
591
+ del delta['added']
592
+ if not delta['removed']:
593
+ del delta['removed']
594
+ if not delta['changed']:
595
+ del delta['changed']
596
+
597
+ return delta
598
+
599
+
600
+ def get_variant_groups_with_deltas(frames: List[Dict]) -> List[Dict]:
601
+ """
602
+ Get variant groups with computed deltas for efficient output.
603
+
604
+ Returns list of variant groups:
605
+ [
606
+ {
607
+ 'base_name': 'Login',
608
+ 'base_frame': {...full frame data...},
609
+ 'variants': [
610
+ {'name': 'Login_Error', 'state': 'error', 'added': {...}, ...},
611
+ ...
612
+ ]
613
+ }
614
+ ]
615
+ """
616
+ groups = group_variants(frames)
617
+ result = []
618
+
619
+ for base_name, variant_list in groups.items():
620
+ if len(variant_list) < 2:
621
+ continue
622
+
623
+ # Sort variants to find the "default" state as base
624
+ sorted_variants = sorted(variant_list, key=lambda v: (
625
+ 0 if infer_state_from_name(v['name']) == 'default' else 1,
626
+ v['name']
627
+ ))
628
+
629
+ base_variant = sorted_variants[0]
630
+ base_frame = base_variant.get('frame', {})
631
+
632
+ group_data = {
633
+ 'base_name': base_name,
634
+ 'base_frame': base_frame,
635
+ 'variants': [],
636
+ }
637
+
638
+ # Compute deltas for other variants
639
+ for variant in sorted_variants[1:]:
640
+ variant_frame = variant.get('frame', {})
641
+ delta = compute_frame_delta(base_frame, variant_frame)
642
+ group_data['variants'].append(delta)
643
+
644
+ result.append(group_data)
645
+
646
+ return result
647
+
648
+
649
+ def detect_sequences(frames: List[Dict]) -> List[Tuple[str, ...]]:
650
+ """
651
+ Detect sequential flows from frame naming.
652
+
653
+ Returns list of sequences: [("01_Login", "02_Verify", "03_Dashboard"), ...]
654
+ """
655
+ # Extract frames with sequence numbers
656
+ sequenced = []
657
+ for frame in frames:
658
+ name = frame.get('name', '')
659
+ seq_info = extract_sequence_number(name)
660
+ if seq_info:
661
+ sequenced.append((seq_info[0], name, frame))
662
+
663
+ if not sequenced:
664
+ return []
665
+
666
+ # Sort by sequence number
667
+ sequenced.sort(key=lambda x: x[0])
668
+
669
+ # Build sequence
670
+ sequence = tuple(item[1] for item in sequenced)
671
+ return [sequence] if len(sequence) > 1 else []
672
+
673
+
674
+ # User journey categories for flow grouping
675
+ JOURNEY_CATEGORIES = {
676
+ 'authentication': ['login', 'signin', 'sign-in', 'auth', 'password', 'forgot', 'reset'],
677
+ 'registration': ['signup', 'sign-up', 'register', 'create account', 'join', 'onboard'],
678
+ 'onboarding': ['welcome', 'intro', 'tutorial', 'getting started', 'setup', 'first'],
679
+ 'checkout': ['checkout', 'payment', 'cart', 'order', 'purchase', 'billing'],
680
+ 'profile': ['profile', 'account', 'settings', 'preferences', 'edit profile'],
681
+ 'search': ['search', 'results', 'filter', 'browse', 'explore'],
682
+ 'content': ['detail', 'view', 'article', 'post', 'item', 'product'],
683
+ }
684
+
685
+
686
+ def infer_journey_category(frame_name: str, buttons: List[str]) -> Optional[str]:
687
+ """
688
+ Infer which user journey a frame belongs to.
689
+ """
690
+ name_lower = frame_name.lower()
691
+ buttons_lower = [b.lower() for b in buttons]
692
+ all_text = name_lower + ' ' + ' '.join(buttons_lower)
693
+
694
+ for category, keywords in JOURNEY_CATEGORIES.items():
695
+ if any(kw in all_text for kw in keywords):
696
+ return category
697
+ return None
698
+
699
+
700
+ def detect_user_journeys(frames: List[Dict]) -> Dict[str, List[Dict]]:
701
+ """
702
+ Detect user journeys by analyzing frame names and button actions.
703
+
704
+ Returns dict of journeys with frames in order:
705
+ {
706
+ 'authentication': [
707
+ {'name': 'Login', 'id': '1:100', 'actions': ['authenticate', 'reset']},
708
+ {'name': 'Forgot Password', 'id': '1:101', 'actions': ['submit_form']},
709
+ ],
710
+ ...
711
+ }
712
+ """
713
+ journeys = {}
714
+
715
+ for frame in frames:
716
+ name = frame.get('name', '')
717
+ buttons = frame.get('buttons', [])
718
+ frame_id = frame.get('id', '')
719
+
720
+ # Get button destinations
721
+ actions = []
722
+ for btn in buttons:
723
+ btn_text = btn if isinstance(btn, str) else btn.get('text', '')
724
+ if btn_text:
725
+ dest = infer_cta_destination(btn_text, frame_context=name)
726
+ if dest and not dest.startswith('do:'):
727
+ actions.append(dest)
728
+
729
+ # Determine journey category
730
+ category = infer_journey_category(name, buttons)
731
+
732
+ if category:
733
+ if category not in journeys:
734
+ journeys[category] = []
735
+ journeys[category].append({
736
+ 'name': name,
737
+ 'id': frame_id,
738
+ 'actions': list(set(actions)), # Dedupe actions
739
+ 'state': infer_state_from_name(name),
740
+ })
741
+
742
+ return journeys
743
+
744
+
745
+ def _dedupe_frame_sequence(frame_names: List[str]) -> str:
746
+ """
747
+ Dedupe consecutive identical frame names and show counts.
748
+
749
+ "Model Settings > Model Settings > Model Settings" -> "Model Settings (×3)"
750
+ "Login > Login > Dashboard > Dashboard" -> "Login (×2) > Dashboard (×2)"
751
+ "Login > Dashboard > Settings" -> "Login > Dashboard > Settings" (no change)
752
+ """
753
+ if not frame_names:
754
+ return ''
755
+
756
+ deduped = []
757
+ count = 1
758
+ prev = frame_names[0]
759
+
760
+ for name in frame_names[1:]:
761
+ if name == prev:
762
+ count += 1
763
+ else:
764
+ if count > 1:
765
+ deduped.append(f"{prev} (×{count})")
766
+ else:
767
+ deduped.append(prev)
768
+ prev = name
769
+ count = 1
770
+
771
+ # Don't forget the last one
772
+ if count > 1:
773
+ deduped.append(f"{prev} (×{count})")
774
+ else:
775
+ deduped.append(prev)
776
+
777
+ return ' > '.join(deduped)
778
+
779
+
780
+ def infer_entry_exit_points(frames: List[Dict]) -> Dict[str, List[str]]:
781
+ """
782
+ Identify entry and exit points in the design.
783
+
784
+ Entry points: Frames that are likely starting screens (home, landing, welcome)
785
+ Exit points: Frames with completion/success states or external actions
786
+ """
787
+ entry_keywords = ['home', 'landing', 'welcome', 'splash', 'start', 'main', 'dashboard']
788
+ exit_keywords = ['success', 'complete', 'done', 'confirmed', 'thank', 'finish']
789
+
790
+ entry_points = []
791
+ exit_points = []
792
+
793
+ for frame in frames:
794
+ name = frame.get('name', '').lower()
795
+ state = frame.get('state', '')
796
+
797
+ # Check for entry points
798
+ if any(kw in name for kw in entry_keywords):
799
+ entry_points.append(frame.get('name', ''))
800
+
801
+ # Check for exit points
802
+ if any(kw in name for kw in exit_keywords) or state == 'success':
803
+ exit_points.append(frame.get('name', ''))
804
+
805
+ return {
806
+ 'entry': entry_points[:3], # Limit to 3
807
+ 'exit': exit_points[:3],
808
+ }
809
+
810
+
811
+ # -----------------------------------------------------------------------------
812
+ # Text Extraction Helpers
813
+ # -----------------------------------------------------------------------------
814
+
815
+ def extract_text_by_role(
816
+ node: Dict,
817
+ depth: int = 0,
818
+ max_depth: int = 10,
819
+ parent_context: str = ''
820
+ ) -> Dict[str, List[str]]:
821
+ """
822
+ Recursively extract text content categorized by likely role.
823
+
824
+ Improved categorization:
825
+ - Uses font size, weight, and style for heading detection
826
+ - Considers parent container names for context
827
+ - Deduplicates and cleans text
828
+ - Filters out placeholder/system text
829
+
830
+ Returns:
831
+ {
832
+ 'headings': [...], # Large/bold text, titles
833
+ 'labels': [...], # Form labels, field names, small descriptive text
834
+ 'buttons': [...], # Text inside interactive elements
835
+ 'body': [...], # Regular body/paragraph text
836
+ 'errors': [...], # Error messages (red text, error containers)
837
+ 'placeholders': [...], # Placeholder/hint text (gray, placeholder in name)
838
+ }
839
+ """
840
+ if depth > max_depth:
841
+ return {'headings': [], 'labels': [], 'buttons': [], 'body': [], 'errors': [], 'placeholders': []}
842
+
843
+ result = {'headings': [], 'labels': [], 'buttons': [], 'body': [], 'errors': [], 'placeholders': []}
844
+
845
+ node_type = node.get('type', '').upper()
846
+ node_name = node.get('name', '').lower()
847
+
848
+ # Build context path for children
849
+ current_context = f"{parent_context}/{node_name}" if parent_context else node_name
850
+
851
+ if node_type == 'TEXT':
852
+ text = node.get('characters', '').strip()
853
+ if not text:
854
+ return result
855
+
856
+ # Skip very short text that's likely icons or separators
857
+ if len(text) <= 1 and text not in ['?', '!']:
858
+ return result
859
+
860
+ # Skip obvious system/template text
861
+ skip_patterns = ['lorem ipsum', 'placeholder', '{{', '}}', 'xxx', '000']
862
+ if any(p in text.lower() for p in skip_patterns):
863
+ return result
864
+
865
+ # Get text style info
866
+ style = node.get('style', {})
867
+ font_size = style.get('fontSize', 14)
868
+ font_weight = style.get('fontWeight', 400)
869
+ text_decoration = style.get('textDecoration', '')
870
+
871
+ # Check fills for color-based categorization
872
+ fills = node.get('fills', [])
873
+ is_red = any(
874
+ f.get('type') == 'SOLID' and
875
+ f.get('color', {}).get('r', 0) > 0.7 and
876
+ f.get('color', {}).get('g', 0) < 0.3 and
877
+ f.get('color', {}).get('b', 0) < 0.3
878
+ for f in fills if isinstance(f, dict)
879
+ )
880
+ is_gray = any(
881
+ f.get('type') == 'SOLID' and
882
+ abs(f.get('color', {}).get('r', 0) - f.get('color', {}).get('g', 0)) < 0.1 and
883
+ f.get('color', {}).get('r', 0) > 0.4 and
884
+ f.get('color', {}).get('r', 0) < 0.7
885
+ for f in fills if isinstance(f, dict)
886
+ )
887
+
888
+ # Categorize based on multiple signals
889
+ # Priority: error > placeholder > heading > label > body
890
+
891
+ # Error detection
892
+ if is_red or 'error' in node_name or 'error' in current_context:
893
+ result['errors'].append(text)
894
+ # Placeholder detection
895
+ elif is_gray or 'placeholder' in node_name or 'hint' in node_name:
896
+ result['placeholders'].append(text)
897
+ # Heading detection (large, bold, or semantically marked)
898
+ elif (font_size >= 18 or
899
+ font_weight >= 600 or
900
+ any(h in node_name for h in ['heading', 'title', 'header', 'h1', 'h2', 'h3'])):
901
+ result['headings'].append(text)
902
+ # Label detection (small text, form context, specific naming)
903
+ elif (font_size <= 12 or
904
+ 'label' in node_name or
905
+ 'caption' in node_name or
906
+ any(ctx in current_context for ctx in ['form', 'input', 'field'])):
907
+ result['labels'].append(text)
908
+ # Everything else is body text
909
+ else:
910
+ result['body'].append(text)
911
+
912
+ # Check if this is an interactive element
913
+ is_button = (
914
+ node_type in ['INSTANCE', 'COMPONENT', 'FRAME'] and
915
+ any(kw in node_name for kw in ['button', 'btn', 'cta', 'action', 'link', 'tab', 'chip'])
916
+ )
917
+ is_input = (
918
+ node_type in ['INSTANCE', 'COMPONENT', 'FRAME'] and
919
+ any(kw in node_name for kw in ['input', 'textfield', 'textarea', 'dropdown', 'select'])
920
+ )
921
+
922
+ # Recurse into children
923
+ for child in node.get('children', []):
924
+ child_result = extract_text_by_role(child, depth + 1, max_depth, current_context)
925
+
926
+ if is_button:
927
+ # All text inside buttons goes to buttons category
928
+ for texts in child_result.values():
929
+ result['buttons'].extend(texts)
930
+ elif is_input:
931
+ # Input text goes to placeholders if gray, otherwise labels
932
+ result['placeholders'].extend(child_result.get('placeholders', []))
933
+ # Other text becomes labels (field values shown)
934
+ for key in ['headings', 'labels', 'body']:
935
+ result['labels'].extend(child_result.get(key, []))
936
+ result['errors'].extend(child_result.get('errors', []))
937
+ else:
938
+ for key in result:
939
+ result[key].extend(child_result.get(key, []))
940
+
941
+ return result
942
+
943
+
944
+ def dedupe_and_clean_text(text_list: List[str], max_items: int = 10) -> List[str]:
945
+ """
946
+ Deduplicate and clean a list of text items.
947
+
948
+ - Removes duplicates (case-insensitive)
949
+ - Strips whitespace
950
+ - Limits to max_items
951
+ - Preserves order (first occurrence)
952
+ """
953
+ seen = set()
954
+ result = []
955
+ for text in text_list:
956
+ clean = text.strip()
957
+ if clean and clean.lower() not in seen:
958
+ seen.add(clean.lower())
959
+ result.append(clean)
960
+ if len(result) >= max_items:
961
+ break
962
+ return result
963
+
964
+
965
+ def extract_components(node: Dict, depth: int = 0, max_depth: int = 10) -> List[str]:
966
+ """
967
+ Recursively extract component/instance names.
968
+ """
969
+ if depth > max_depth:
970
+ return []
971
+
972
+ components = []
973
+ node_type = node.get('type', '').upper()
974
+
975
+ if node_type in ['COMPONENT', 'INSTANCE', 'COMPONENT_SET']:
976
+ name = node.get('name', '')
977
+ if name:
978
+ components.append(name)
979
+
980
+ for child in node.get('children', []):
981
+ components.extend(extract_components(child, depth + 1, max_depth))
982
+
983
+ return components
984
+
985
+
986
+ # Input field detection - keywords and type mappings
987
+ INPUT_TYPE_KEYWORDS = {
988
+ 'password': ['password', 'pwd', 'secret'],
989
+ 'email': ['email', 'e-mail', 'mail'],
990
+ 'phone': ['phone', 'tel', 'mobile', 'cell'],
991
+ 'number': ['number', 'numeric', 'amount', 'quantity', 'token', 'tokens', 'max_tokens', 'count', 'limit'],
992
+ 'date': ['date', 'calendar'],
993
+ 'time': ['time', 'clock'],
994
+ 'search': ['search', 'find', 'query'],
995
+ 'select': ['select', 'dropdown', 'picker', 'combo', 'menu', 'chooser', 'selector', 'model', 'agent'],
996
+ 'textarea': ['textarea', 'multiline', 'description', 'comment', 'message', 'notes', 'prompt'],
997
+ 'slider': ['slider', 'range', 'creativity', 'temperature', 'progress', 'reasoning', 'level'],
998
+ 'radio': ['radio', 'option', 'choice', 'mode', 'token_mode', 'segment', 'segmented'],
999
+ 'toggle': ['toggle', 'switch', 'on/off', 'enable', 'disable', 'active'],
1000
+ 'checkbox': ['checkbox', 'check', 'agree', 'accept', 'remember'],
1001
+ 'display': ['display', 'remaining', 'available', 'total', 'used', 'balance', 'info', 'readonly', 'read-only'],
1002
+ }
1003
+
1004
+ # Generic input keywords (fallback to 'text' type)
1005
+ INPUT_GENERIC_KEYWORDS = ['input', 'textfield', 'field', 'text field', 'text-field',
1006
+ 'edittext', 'textbox', 'text box', 'entry', 'form-field', 'form field']
1007
+
1008
+ # Required field indicators
1009
+ REQUIRED_INDICATORS = ['*', 'required', 'mandatory', '(required)', '* required']
1010
+
1011
+ # Modal/settings form indicators - deeper extraction needed
1012
+ MODAL_FORM_KEYWORDS = ['modal', 'dialog', 'settings', 'configuration', 'config', 'preferences', 'options', 'form']
1013
+
1014
+
1015
+ def _extract_all_text_from_node(node: Dict, max_depth: int = 8, depth: int = 0) -> List[Dict]:
1016
+ """Extract all text nodes with their context from a node tree."""
1017
+ if depth > max_depth:
1018
+ return []
1019
+
1020
+ texts = []
1021
+ node_type = node.get('type', '').upper()
1022
+ node_name = node.get('name', '').lower()
1023
+
1024
+ if node_type == 'TEXT':
1025
+ text = node.get('characters', '').strip()
1026
+ if text and len(text) > 0:
1027
+ # Check if gray (placeholder-like)
1028
+ fills = node.get('fills', [])
1029
+ is_gray = any(
1030
+ f.get('type') == 'SOLID' and
1031
+ f.get('color', {}).get('r', 0) > 0.4 and
1032
+ f.get('color', {}).get('r', 0) < 0.75 and
1033
+ abs(f.get('color', {}).get('r', 0) - f.get('color', {}).get('g', 0)) < 0.15
1034
+ for f in fills if isinstance(f, dict)
1035
+ )
1036
+ # Get font info for better classification
1037
+ style = node.get('style', {})
1038
+ font_size = style.get('fontSize', 14)
1039
+ font_weight = style.get('fontWeight', 400)
1040
+
1041
+ texts.append({
1042
+ 'text': text,
1043
+ 'name': node_name,
1044
+ 'is_gray': is_gray,
1045
+ 'is_label': 'label' in node_name or font_size <= 12,
1046
+ 'is_placeholder': 'placeholder' in node_name or 'hint' in node_name,
1047
+ 'is_value': 'value' in node_name or 'selected' in node_name,
1048
+ 'is_option': 'option' in node_name or 'item' in node_name,
1049
+ 'font_size': font_size,
1050
+ 'is_bold': font_weight >= 600,
1051
+ })
1052
+
1053
+ for child in node.get('children', []):
1054
+ texts.extend(_extract_all_text_from_node(child, max_depth, depth + 1))
1055
+
1056
+ return texts
1057
+
1058
+
1059
+ def _infer_input_type(name: str, child_count: int = 0) -> Tuple[bool, str]:
1060
+ """Infer if node is an input and what type based on name and structure."""
1061
+ name_lower = name.lower()
1062
+
1063
+ # Check specific types first
1064
+ for input_type, keywords in INPUT_TYPE_KEYWORDS.items():
1065
+ for kw in keywords:
1066
+ if kw in name_lower:
1067
+ return True, input_type
1068
+
1069
+ # Check generic input keywords
1070
+ for kw in INPUT_GENERIC_KEYWORDS:
1071
+ if kw in name_lower:
1072
+ return True, 'text'
1073
+
1074
+ return False, 'text'
1075
+
1076
+
1077
+ def _clean_input_label(name: str) -> str:
1078
+ """Clean up a node name to use as input label."""
1079
+ # Get last part if path-like (e.g., "Form/Input/Email" -> "Email")
1080
+ label = name.split('/')[-1]
1081
+
1082
+ # Replace separators with spaces
1083
+ label = label.replace('_', ' ').replace('-', ' ')
1084
+
1085
+ # Remove common prefixes/suffixes
1086
+ remove_patterns = ['input', 'field', 'textfield', 'text field', 'form',
1087
+ 'component', 'instance', 'wrapper', 'container', 'group']
1088
+ label_lower = label.lower()
1089
+ for pattern in remove_patterns:
1090
+ label_lower = label_lower.replace(pattern, '')
1091
+
1092
+ # Clean up and title case
1093
+ label = ' '.join(label_lower.split()).strip().title()
1094
+
1095
+ return label if label else name
1096
+
1097
+
1098
+ def _detect_form_field_in_subtree(node: Dict, depth: int = 0, max_depth: int = 4) -> Optional[Dict]:
1099
+ """
1100
+ Detect if a subtree contains a form field with label+input pattern.
1101
+
1102
+ Returns dict with detected field info or None.
1103
+ """
1104
+ if depth > max_depth:
1105
+ return None
1106
+
1107
+ node_type = node.get('type', '').upper()
1108
+ node_name = node.get('name', '').lower()
1109
+ children = node.get('children', [])
1110
+
1111
+ # Look for label+control patterns
1112
+ has_label_node = False
1113
+ has_control_node = False
1114
+ control_type = 'text'
1115
+ label_text = ''
1116
+ value_text = ''
1117
+ options = []
1118
+
1119
+ for child in children:
1120
+ child_name = child.get('name', '').lower()
1121
+ child_type = child.get('type', '').upper()
1122
+
1123
+ # Check for label
1124
+ if 'label' in child_name or 'title' in child_name:
1125
+ has_label_node = True
1126
+ texts = _extract_all_text_from_node(child, max_depth=2)
1127
+ if texts:
1128
+ label_text = texts[0]['text']
1129
+
1130
+ # Check for various control types
1131
+ for ctrl_type, keywords in INPUT_TYPE_KEYWORDS.items():
1132
+ if any(kw in child_name for kw in keywords):
1133
+ has_control_node = True
1134
+ control_type = ctrl_type
1135
+ # Extract value/options from control
1136
+ ctrl_texts = _extract_all_text_from_node(child, max_depth=3)
1137
+ for t in ctrl_texts:
1138
+ if t.get('is_value') or t.get('is_option'):
1139
+ if control_type in ['select', 'radio', 'slider']:
1140
+ options.append(t['text'])
1141
+ else:
1142
+ value_text = t['text']
1143
+ elif not value_text and not t.get('is_label'):
1144
+ value_text = t['text']
1145
+ break
1146
+
1147
+ # If we found both label and control, return the field info
1148
+ if has_label_node and has_control_node and label_text:
1149
+ return {
1150
+ 'name': label_text,
1151
+ 'type': control_type,
1152
+ 'value': value_text,
1153
+ 'options': options[:5],
1154
+ }
1155
+
1156
+ return None
1157
+
1158
+
1159
+ def extract_inputs(node: Dict, depth: int = 0, max_depth: int = 12) -> List[Dict[str, Any]]:
1160
+ """
1161
+ Recursively extract input fields with their labels, types, and options.
1162
+
1163
+ Enhanced to detect:
1164
+ - Modal form fields with label+control patterns
1165
+ - Sliders with labels (Creativity, Reasoning, etc.)
1166
+ - Selectors with current values
1167
+ - Display fields (read-only info)
1168
+ - Radio/segment controls with options
1169
+
1170
+ Returns list of dicts:
1171
+ [{'name': 'Email', 'type': 'email', 'required': True, 'placeholder': '...', 'options': [], 'value': '...'}, ...]
1172
+ """
1173
+ if depth > max_depth:
1174
+ return []
1175
+
1176
+ inputs = []
1177
+ node_type = node.get('type', '').upper()
1178
+ node_name = node.get('name', '')
1179
+ node_name_lower = node_name.lower()
1180
+ children = node.get('children', [])
1181
+
1182
+ # Check if this node is a form container (modal, settings panel, etc.)
1183
+ is_form_container = any(kw in node_name_lower for kw in MODAL_FORM_KEYWORDS)
1184
+
1185
+ # Strategy 1: Check if this is a direct input component/instance
1186
+ is_input = False
1187
+ input_type = 'text'
1188
+
1189
+ if node_type in ['COMPONENT', 'INSTANCE', 'COMPONENT_SET', 'FRAME']:
1190
+ is_input, input_type = _infer_input_type(node_name, len(children))
1191
+
1192
+ if is_input:
1193
+ # Extract all text from this input node
1194
+ all_texts = _extract_all_text_from_node(node, max_depth=6)
1195
+
1196
+ label = ''
1197
+ placeholder = ''
1198
+ value = ''
1199
+ options = []
1200
+ required = False
1201
+
1202
+ # Check node name for required indicator
1203
+ for indicator in REQUIRED_INDICATORS:
1204
+ if indicator in node_name_lower:
1205
+ required = True
1206
+ break
1207
+
1208
+ # Process extracted texts with improved categorization
1209
+ for t in all_texts:
1210
+ text = t['text']
1211
+
1212
+ # Check for required indicators
1213
+ if '*' in text:
1214
+ required = True
1215
+ text = text.replace('*', '').strip()
1216
+
1217
+ # Skip very short text (but allow numbers)
1218
+ if len(text) < 2:
1219
+ continue
1220
+
1221
+ # Categorize text more carefully
1222
+ if t.get('is_placeholder') or (t.get('is_gray') and not t.get('is_value')):
1223
+ if not placeholder:
1224
+ placeholder = text
1225
+ elif t.get('is_label') or (t.get('font_size', 14) <= 12 and not t.get('is_value')):
1226
+ if not label:
1227
+ label = text
1228
+ elif t.get('is_value') or t.get('is_option'):
1229
+ # Value or option
1230
+ if input_type in ['select', 'radio', 'slider']:
1231
+ options.append(text)
1232
+ else:
1233
+ if not value:
1234
+ value = text
1235
+ else:
1236
+ # Guess based on context and length
1237
+ if not label and len(text) < 40:
1238
+ label = text
1239
+ elif input_type in ['select', 'radio', 'slider'] and len(text) < 25:
1240
+ options.append(text)
1241
+ elif not value:
1242
+ value = text
1243
+
1244
+ # Try to get label from node name if not found
1245
+ if not label:
1246
+ label = _clean_input_label(node_name)
1247
+
1248
+ # Skip if label is empty or generic
1249
+ if not label or label.lower() in ['input', 'field', '', 'text', 'frame']:
1250
+ label = _clean_input_label(node_name) or 'Input'
1251
+
1252
+ # Only add if we have a meaningful label
1253
+ if label and label.lower() not in ['', 'input', 'field', 'frame', 'instance', 'component']:
1254
+ input_data = {
1255
+ 'name': label,
1256
+ 'type': input_type,
1257
+ 'required': required,
1258
+ 'placeholder': placeholder,
1259
+ }
1260
+ # Add value if present
1261
+ if value:
1262
+ input_data['value'] = value
1263
+ # Add options for select/radio/slider types
1264
+ if options and input_type in ['select', 'radio', 'slider']:
1265
+ input_data['options'] = list(dict.fromkeys(options))[:6] # Dedupe, limit to 6
1266
+
1267
+ inputs.append(input_data)
1268
+ # Don't recurse into detected input nodes
1269
+ return inputs
1270
+
1271
+ # Strategy 2: For form containers, try to detect label+control patterns
1272
+ if is_form_container or node_type == 'FRAME':
1273
+ for child in children:
1274
+ field = _detect_form_field_in_subtree(child, depth=0, max_depth=4)
1275
+ if field:
1276
+ input_data = {
1277
+ 'name': field['name'],
1278
+ 'type': field['type'],
1279
+ 'required': False,
1280
+ 'placeholder': '',
1281
+ }
1282
+ if field.get('value'):
1283
+ input_data['value'] = field['value']
1284
+ if field.get('options'):
1285
+ input_data['options'] = field['options']
1286
+ inputs.append(input_data)
1287
+
1288
+ # Recurse into children
1289
+ for child in children:
1290
+ inputs.extend(extract_inputs(child, depth + 1, max_depth))
1291
+
1292
+ # Deduplicate by name (keep first occurrence)
1293
+ seen_names = set()
1294
+ deduped = []
1295
+ for inp in inputs:
1296
+ name_key = inp['name'].lower()
1297
+ if name_key not in seen_names:
1298
+ seen_names.add(name_key)
1299
+ deduped.append(inp)
1300
+
1301
+ return deduped
1302
+
1303
+
1304
+ # Component category keywords for semantic grouping
1305
+ COMPONENT_CATEGORIES = {
1306
+ 'buttons': ['button', 'btn', 'cta', 'action', 'submit'],
1307
+ 'inputs': ['input', 'textfield', 'textarea', 'field', 'form'],
1308
+ 'selects': ['dropdown', 'select', 'picker', 'combo', 'menu'],
1309
+ 'toggles': ['toggle', 'switch', 'checkbox', 'radio'],
1310
+ 'cards': ['card', 'tile', 'item', 'cell'],
1311
+ 'navigation': ['nav', 'tab', 'menu', 'breadcrumb', 'link', 'header', 'footer', 'sidebar'],
1312
+ 'icons': ['icon', 'ico', 'glyph', 'symbol'],
1313
+ 'images': ['image', 'img', 'photo', 'avatar', 'thumbnail', 'picture'],
1314
+ 'modals': ['modal', 'dialog', 'popup', 'overlay', 'sheet', 'drawer'],
1315
+ 'lists': ['list', 'table', 'grid', 'row', 'column'],
1316
+ 'badges': ['badge', 'tag', 'chip', 'label', 'pill'],
1317
+ 'progress': ['progress', 'spinner', 'loader', 'loading', 'skeleton'],
1318
+ 'alerts': ['alert', 'toast', 'notification', 'banner', 'snackbar'],
1319
+ 'dividers': ['divider', 'separator', 'line', 'hr'],
1320
+ }
1321
+
1322
+
1323
+ def categorize_components(components: List[str]) -> Dict[str, List[str]]:
1324
+ """
1325
+ Group components into semantic categories.
1326
+
1327
+ Returns:
1328
+ {
1329
+ 'buttons': ['Button/Primary', 'Button/Secondary'],
1330
+ 'inputs': ['Input/Text', 'Input/Email'],
1331
+ 'other': ['CustomComponent'],
1332
+ ...
1333
+ }
1334
+ """
1335
+ categorized = {cat: [] for cat in COMPONENT_CATEGORIES}
1336
+ categorized['other'] = []
1337
+
1338
+ for component in components:
1339
+ comp_lower = component.lower()
1340
+ found_category = False
1341
+
1342
+ for category, keywords in COMPONENT_CATEGORIES.items():
1343
+ if any(kw in comp_lower for kw in keywords):
1344
+ categorized[category].append(component)
1345
+ found_category = True
1346
+ break
1347
+
1348
+ if not found_category:
1349
+ categorized['other'].append(component)
1350
+
1351
+ # Remove empty categories
1352
+ return {k: v for k, v in categorized.items() if v}
1353
+
1354
+
1355
+ def format_component_summary(components: List[str]) -> str:
1356
+ """
1357
+ Format components into a semantic summary.
1358
+
1359
+ Instead of: "Button/Primary, Button/Secondary, Input/Text, Icon/Search"
1360
+ Returns: "2 buttons, 1 input, 1 icon"
1361
+ """
1362
+ if not components:
1363
+ return ''
1364
+
1365
+ categorized = categorize_components(components)
1366
+
1367
+ # Build summary parts with counts
1368
+ parts = []
1369
+ # Priority order for display
1370
+ priority = ['buttons', 'inputs', 'selects', 'toggles', 'cards', 'navigation',
1371
+ 'modals', 'lists', 'alerts', 'badges', 'images', 'icons', 'progress', 'dividers', 'other']
1372
+
1373
+ for cat in priority:
1374
+ if cat in categorized:
1375
+ count = len(categorized[cat])
1376
+ # Use singular/plural
1377
+ if cat == 'other':
1378
+ label = 'other' if count == 1 else 'other'
1379
+ else:
1380
+ label = cat.rstrip('s') if count == 1 else cat
1381
+ parts.append(f"{count} {label}")
1382
+
1383
+ return ', '.join(parts[:6]) # Limit to 6 categories
1384
+
1385
+
1386
+ def get_key_components(components: List[str], max_items: int = 5) -> List[str]:
1387
+ """
1388
+ Get the most important/unique component names for display.
1389
+
1390
+ Prioritizes:
1391
+ - Interactive elements (buttons, inputs)
1392
+ - Unique component names
1393
+ - Avoids generic names (icons, dividers)
1394
+ """
1395
+ if not components:
1396
+ return []
1397
+
1398
+ categorized = categorize_components(components)
1399
+
1400
+ result = []
1401
+ # Priority order: interactive first
1402
+ priority = ['buttons', 'inputs', 'selects', 'modals', 'cards', 'navigation', 'other']
1403
+
1404
+ for cat in priority:
1405
+ if cat in categorized:
1406
+ # Take first few unique components from this category
1407
+ for comp in categorized[cat]:
1408
+ if comp not in result:
1409
+ result.append(comp)
1410
+ if len(result) >= max_items:
1411
+ return result
1412
+
1413
+ return result
1414
+
1415
+
1416
+ # -----------------------------------------------------------------------------
1417
+ # Input Format Standardization
1418
+ # -----------------------------------------------------------------------------
1419
+
1420
+ # Standard limits for input formatting
1421
+ INPUT_FORMAT_MAX_INPUTS = 10
1422
+ INPUT_FORMAT_MAX_OPTIONS = 5
1423
+ INPUT_FORMAT_MAX_VALUE_LEN = 35
1424
+
1425
+
1426
+ def format_single_input(
1427
+ label: str,
1428
+ input_type: str,
1429
+ required: bool = False,
1430
+ value: Optional[str] = None,
1431
+ options: Optional[List[str]] = None,
1432
+ placeholder: Optional[str] = None,
1433
+ ) -> str:
1434
+ """
1435
+ Format a single input field in standardized TOON format.
1436
+
1437
+ Standard format: Label* (type): "value" or Label* (type): [Opt1/Opt2/...]
1438
+
1439
+ Args:
1440
+ label: Input label/name
1441
+ input_type: Type (text, email, slider, select, etc.)
1442
+ required: Whether field is required (adds * marker)
1443
+ value: Current value shown
1444
+ options: Options for select/radio/slider types
1445
+ placeholder: Placeholder text (fallback if no value/options)
1446
+
1447
+ Returns:
1448
+ Formatted input string
1449
+ """
1450
+ req_marker = '*' if required else ''
1451
+ base_str = f"{label}{req_marker} ({input_type})"
1452
+
1453
+ # Priority: value > options > placeholder
1454
+ if value and len(str(value)) < INPUT_FORMAT_MAX_VALUE_LEN:
1455
+ base_str += f': "{value}"'
1456
+ elif options:
1457
+ opts_str = '/'.join(options[:INPUT_FORMAT_MAX_OPTIONS])
1458
+ base_str += f": [{opts_str}]"
1459
+ elif placeholder and len(str(placeholder)) < INPUT_FORMAT_MAX_VALUE_LEN:
1460
+ base_str += f': "{placeholder}"'
1461
+
1462
+ return base_str
1463
+
1464
+
1465
+ def format_inputs_list(inputs: List[Dict], max_inputs: int = INPUT_FORMAT_MAX_INPUTS) -> str:
1466
+ """
1467
+ Format a list of input fields in standardized TOON format.
1468
+
1469
+ Args:
1470
+ inputs: List of input dicts with keys: name, type, required, value, options, placeholder
1471
+ max_inputs: Maximum inputs to include
1472
+
1473
+ Returns:
1474
+ Formatted string: "Input1 (type) | Input2* (type): value | ..."
1475
+ """
1476
+ if not inputs:
1477
+ return ''
1478
+
1479
+ input_strs = []
1480
+ for inp in inputs[:max_inputs]:
1481
+ formatted = format_single_input(
1482
+ label=inp.get('name', inp.get('label', 'Input')),
1483
+ input_type=inp.get('type', inp.get('input_type', 'text')),
1484
+ required=inp.get('required', False),
1485
+ value=inp.get('value', inp.get('current_value')),
1486
+ options=inp.get('options'),
1487
+ placeholder=inp.get('placeholder'),
1488
+ )
1489
+ input_strs.append(formatted)
1490
+
1491
+ return ' | '.join(input_strs)
1492
+
1493
+
1494
+ # -----------------------------------------------------------------------------
1495
+ # TOON Serializer
1496
+ # -----------------------------------------------------------------------------
1497
+
1498
+ class TOONSerializer:
1499
+ """Serialize Figma data to TOON format."""
1500
+
1501
+ def __init__(self, indent: str = TOON_INDENT):
1502
+ self.indent = indent
1503
+
1504
+ def _i(self, level: int) -> str:
1505
+ """Get indentation for level."""
1506
+ return self.indent * level
1507
+
1508
+ def serialize_file(self, file_data: Dict) -> str:
1509
+ """
1510
+ Serialize entire file structure to TOON format.
1511
+
1512
+ Expected input:
1513
+ {
1514
+ 'name': 'File Name',
1515
+ 'key': 'abc123',
1516
+ 'pages': [...]
1517
+ }
1518
+ """
1519
+ lines = []
1520
+
1521
+ # File header
1522
+ name = file_data.get('name', 'Untitled')
1523
+ key = file_data.get('key', '')
1524
+ lines.append(f"FILE: {name} [key:{key}]")
1525
+
1526
+ # Pages
1527
+ for page in file_data.get('pages', []):
1528
+ lines.extend(self.serialize_page(page, level=1))
1529
+
1530
+ # Global flow analysis
1531
+ all_frames = []
1532
+ for page in file_data.get('pages', []):
1533
+ all_frames.extend(page.get('frames', []))
1534
+
1535
+ flow_lines = self.serialize_flows(all_frames, level=1)
1536
+ if flow_lines:
1537
+ lines.append(f"{self._i(1)}FLOWS:")
1538
+ lines.extend(flow_lines)
1539
+
1540
+ return '\n'.join(lines)
1541
+
1542
+ def serialize_page(self, page_data: Dict, level: int = 0, dedupe_variants: bool = True) -> List[str]:
1543
+ """
1544
+ Serialize a single page.
1545
+
1546
+ Args:
1547
+ page_data: Page data dict
1548
+ level: Indentation level
1549
+ dedupe_variants: If True, group variants and show only deltas (default True)
1550
+ """
1551
+ lines = []
1552
+
1553
+ name = page_data.get('name', 'Untitled Page')
1554
+ page_id = page_data.get('id', '')
1555
+ id_str = f" #{page_id}" if page_id else ''
1556
+ lines.append(f"{self._i(level)}PAGE: {name}{id_str}")
1557
+
1558
+ frames = page_data.get('frames', [])
1559
+
1560
+ if dedupe_variants and len(frames) > 1:
1561
+ # Get variant groups with deltas
1562
+ variant_groups = get_variant_groups_with_deltas(frames)
1563
+
1564
+ # Track which frames are in variant groups (to avoid double-output)
1565
+ variant_frame_ids = set()
1566
+ for group in variant_groups:
1567
+ variant_frame_ids.add(group['base_frame'].get('id', ''))
1568
+ for v in group['variants']:
1569
+ variant_frame_ids.add(v.get('id', ''))
1570
+
1571
+ # Output non-variant frames normally
1572
+ for frame in frames:
1573
+ if frame.get('id', '') not in variant_frame_ids:
1574
+ lines.extend(self.serialize_frame(frame, level + 1))
1575
+
1576
+ # Output variant groups with deduplication
1577
+ for group in variant_groups:
1578
+ lines.extend(self.serialize_variant_group(group, level + 1))
1579
+ else:
1580
+ # No deduplication - output all frames
1581
+ for frame in frames:
1582
+ lines.extend(self.serialize_frame(frame, level + 1))
1583
+
1584
+ return lines
1585
+
1586
+ def serialize_variant_group(self, group: Dict, level: int = 0) -> List[str]:
1587
+ """
1588
+ Serialize a variant group with base frame + deltas.
1589
+
1590
+ Format:
1591
+ VARIANT_GROUP: Login (3 variants)
1592
+ [base] FRAME: Login [0,0 375x812] form/default #1:100
1593
+ Headings: Welcome Back
1594
+ ...
1595
+ [variant] Login_Error (error) #1:101: +Errors: Invalid email
1596
+ [variant] Login_Loading (loading) #1:102: +Components: Spinner
1597
+ """
1598
+ lines = []
1599
+
1600
+ base_name = group['base_name']
1601
+ base_frame = group['base_frame']
1602
+ variants = group['variants']
1603
+ total_count = 1 + len(variants)
1604
+
1605
+ # Group header
1606
+ lines.append(f"{self._i(level)}VARIANT_GROUP: {base_name} ({total_count} states)")
1607
+
1608
+ # Base frame (full output)
1609
+ base_lines = self.serialize_frame(base_frame, level + 1)
1610
+ if base_lines:
1611
+ # Mark first line as [base]
1612
+ base_lines[0] = base_lines[0].replace('FRAME:', '[base] FRAME:', 1)
1613
+ lines.extend(base_lines)
1614
+
1615
+ # Variant deltas (compact)
1616
+ for delta in variants:
1617
+ delta_line = self._serialize_delta(delta, level + 1)
1618
+ lines.append(delta_line)
1619
+
1620
+ return lines
1621
+
1622
+ def _serialize_delta(self, delta: Dict, level: int) -> str:
1623
+ """Serialize a variant delta in compact form."""
1624
+ name = delta.get('name', '')
1625
+ state = delta.get('state', 'default')
1626
+ frame_id = delta.get('id', '')
1627
+
1628
+ parts = [f"{self._i(level)}[{state}] {name}"]
1629
+ if frame_id:
1630
+ parts[0] += f" #{frame_id}"
1631
+ parts[0] += ":"
1632
+
1633
+ # Added content
1634
+ added = delta.get('added', {})
1635
+ for field, values in added.items():
1636
+ if values:
1637
+ parts.append(f"+{field.capitalize()}: {', '.join(str(v) for v in values[:3])}")
1638
+
1639
+ # Removed content
1640
+ removed = delta.get('removed', {})
1641
+ for field, values in removed.items():
1642
+ if values:
1643
+ parts.append(f"-{field.capitalize()}: {', '.join(str(v) for v in values[:3])}")
1644
+
1645
+ # If no changes, note that
1646
+ if not added and not removed:
1647
+ parts.append("(no content change)")
1648
+
1649
+ return ' '.join(parts)
1650
+
1651
+ def serialize_frame(self, frame_data: Dict, level: int = 0) -> List[str]:
1652
+ """Serialize a single frame with all its content."""
1653
+ lines = []
1654
+
1655
+ # Frame header: name [position size] type/state ~variant_of #id
1656
+ name = frame_data.get('name', 'Untitled')
1657
+ frame_id = frame_data.get('id', '')
1658
+ pos = frame_data.get('position', {})
1659
+ size = frame_data.get('size', {})
1660
+ screen_type = frame_data.get('type', 'screen')
1661
+ state = frame_data.get('state', 'default')
1662
+ variant_of = frame_data.get('variant_of', '')
1663
+
1664
+ # Build position/size string
1665
+ pos_str = ''
1666
+ if pos:
1667
+ x, y = int(pos.get('x', 0)), int(pos.get('y', 0))
1668
+ pos_str = f"[{x},{y}"
1669
+ if size:
1670
+ w, h = int(size.get('w', 0)), int(size.get('h', 0))
1671
+ pos_str += f" {w}x{h}"
1672
+ pos_str += "]"
1673
+
1674
+ # Build variant marker
1675
+ variant_str = f" ~{variant_of}" if variant_of else ''
1676
+
1677
+ # Build frame ID marker
1678
+ id_str = f" #{frame_id}" if frame_id else ''
1679
+
1680
+ header = f"{self._i(level)}FRAME: {name} {pos_str} {screen_type}/{state}{variant_str}{id_str}".strip()
1681
+ lines.append(header)
1682
+
1683
+ # Content sections
1684
+ content_level = level + 1
1685
+
1686
+ # Headings
1687
+ headings = frame_data.get('headings', [])
1688
+ if headings:
1689
+ lines.append(f"{self._i(content_level)}Headings: {' | '.join(headings[:5])}")
1690
+
1691
+ # Labels
1692
+ labels = frame_data.get('labels', [])
1693
+ if labels:
1694
+ lines.append(f"{self._i(content_level)}Labels: {' | '.join(labels[:10])}")
1695
+
1696
+ # Buttons with destinations
1697
+ buttons = frame_data.get('buttons', [])
1698
+ if buttons:
1699
+ btn_strs = []
1700
+ for btn in buttons[:8]:
1701
+ if isinstance(btn, dict):
1702
+ text = btn.get('text', '')
1703
+ dest = btn.get('destination', '')
1704
+ btn_strs.append(f"{text} > {dest}" if dest else text)
1705
+ else:
1706
+ dest = infer_cta_destination(btn)
1707
+ btn_strs.append(f"{btn} > {dest}")
1708
+ lines.append(f"{self._i(content_level)}Buttons: {' | '.join(btn_strs)}")
1709
+
1710
+ # Input fields (using standardized format)
1711
+ inputs = frame_data.get('inputs', [])
1712
+ if inputs:
1713
+ inputs_str = format_inputs_list(inputs)
1714
+ if inputs_str:
1715
+ lines.append(f"{self._i(content_level)}Inputs: {inputs_str}")
1716
+
1717
+ # Errors
1718
+ errors = frame_data.get('errors', [])
1719
+ if errors:
1720
+ lines.append(f"{self._i(content_level)}Errors: {' | '.join(errors[:3])}")
1721
+
1722
+ # Placeholders (for form fields) - skip if we have inputs
1723
+ placeholders = frame_data.get('placeholders', [])
1724
+ if placeholders and not inputs:
1725
+ lines.append(f"{self._i(content_level)}Placeholders: {' | '.join(placeholders[:5])}")
1726
+
1727
+ # Body text (truncated, only show if meaningful)
1728
+ body = frame_data.get('body', [])
1729
+ if body:
1730
+ # Join and truncate intelligently
1731
+ body_text = ' '.join(body)
1732
+ if len(body_text) > 150:
1733
+ body_text = body_text[:147] + '...'
1734
+ lines.append(f"{self._i(content_level)}Text: {body_text}")
1735
+
1736
+ # Components - show semantic summary plus key component names
1737
+ components = frame_data.get('components', [])
1738
+ if components:
1739
+ summary = format_component_summary(components)
1740
+ key_comps = get_key_components(components, max_items=5)
1741
+ if summary:
1742
+ comp_line = f"{self._i(content_level)}Components: {summary}"
1743
+ # Add key component names if they add value
1744
+ if key_comps and len(key_comps) <= 5:
1745
+ comp_line += f" ({', '.join(key_comps)})"
1746
+ lines.append(comp_line)
1747
+
1748
+ # Image description (if any)
1749
+ image_desc = frame_data.get('image_description', '')
1750
+ if image_desc:
1751
+ desc_truncated = image_desc[:300]
1752
+ if len(image_desc) > 300:
1753
+ desc_truncated += '...'
1754
+ lines.append(f"{self._i(content_level)}Image: {desc_truncated}")
1755
+
1756
+ return lines
1757
+
1758
+ def serialize_flows(self, frames: List[Dict], level: int = 0) -> List[str]:
1759
+ """Generate flow analysis section with semantic insights."""
1760
+ lines = []
1761
+
1762
+ # Entry/exit points
1763
+ points = infer_entry_exit_points(frames)
1764
+ if points['entry']:
1765
+ lines.append(f"{self._i(level + 1)}entry_points: {', '.join(points['entry'])}")
1766
+ if points['exit']:
1767
+ lines.append(f"{self._i(level + 1)}exit_points: {', '.join(points['exit'])}")
1768
+
1769
+ # Detect user journeys (grouped by purpose)
1770
+ journeys = detect_user_journeys(frames)
1771
+ for journey_name, journey_frames in journeys.items():
1772
+ if journey_frames:
1773
+ # Format: journey_name: Frame1 > Frame2 or Frame1 (×3) if repeated
1774
+ frame_names = [f['name'] for f in journey_frames[:10]] # Limit to 10
1775
+ journey_str = _dedupe_frame_sequence(frame_names)
1776
+ lines.append(f"{self._i(level + 1)}{journey_name}: {journey_str}")
1777
+
1778
+ # Detect sequences (numbered frames suggesting flow order)
1779
+ sequences = detect_sequences(frames)
1780
+ for seq in sequences:
1781
+ # Use dedupe to handle repeated names
1782
+ seq_str = _dedupe_frame_sequence(list(seq))
1783
+ # Skip if already covered by a journey (check deduped version)
1784
+ if not any(seq_str in line for line in lines):
1785
+ lines.append(f"{self._i(level + 1)}sequence: {seq_str}")
1786
+
1787
+ # Detect variants (frames that are states of the same screen)
1788
+ variants = group_variants(frames)
1789
+ for base, variant_list in variants.items():
1790
+ if len(variant_list) > 1:
1791
+ # Format: base (N variants: state1, state2, state3)
1792
+ states = [v.get('state', 'default') for v in variant_list]
1793
+ unique_states = list(dict.fromkeys(states)) # Preserve order, dedupe
1794
+ count = len(variant_list)
1795
+ states_str = ', '.join(unique_states[:5]) # Limit to 5 states
1796
+ lines.append(f"{self._i(level + 1)}variants: {base} ({count} variants: {states_str})")
1797
+
1798
+ # Extract CTA destinations with context awareness
1799
+ cta_by_category = {} # Group CTAs by destination category
1800
+ for frame in frames:
1801
+ frame_name = frame.get('name', '')
1802
+ for btn in frame.get('buttons', []):
1803
+ btn_text = btn if isinstance(btn, str) else btn.get('text', '')
1804
+ if btn_text:
1805
+ dest = infer_cta_destination(btn_text, frame_context=frame_name)
1806
+ if dest: # None means filtered out (icons, etc.)
1807
+ # Group by destination category for cleaner output
1808
+ if dest not in cta_by_category:
1809
+ cta_by_category[dest] = set()
1810
+ cta_by_category[dest].add(btn_text)
1811
+
1812
+ # Format CTAs grouped by action type
1813
+ if cta_by_category:
1814
+ # Primary actions first
1815
+ priority_order = ['authenticate', 'register', 'submit_form', 'save', 'pay',
1816
+ 'navigate_next', 'create', 'view_detail']
1817
+ sorted_categories = sorted(cta_by_category.keys(),
1818
+ key=lambda x: priority_order.index(x) if x in priority_order else 100)
1819
+
1820
+ cta_strs = []
1821
+ for cat in sorted_categories[:8]: # Limit to 8 categories
1822
+ buttons = list(cta_by_category[cat])[:3] # Max 3 buttons per category
1823
+ if cat.startswith('do:'):
1824
+ # Custom action - show as-is
1825
+ cta_strs.append(f'"{buttons[0]}"')
1826
+ else:
1827
+ # Known category
1828
+ cta_strs.append(f'{", ".join(buttons)} > {cat}')
1829
+ lines.append(f"{self._i(level + 1)}actions: {' | '.join(cta_strs)}")
1830
+
1831
+ return lines
1832
+
1833
+
1834
+ # -----------------------------------------------------------------------------
1835
+ # Frame Processing
1836
+ # -----------------------------------------------------------------------------
1837
+
1838
+ def process_frame_to_toon_data(frame_node: Dict) -> Dict:
1839
+ """
1840
+ Process a Figma frame node into TOON-ready data structure.
1841
+
1842
+ Returns structured data that can be serialized to TOON format.
1843
+ """
1844
+ name = frame_node.get('name', 'Untitled')
1845
+
1846
+ # Extract position and size
1847
+ bounds = frame_node.get('absoluteBoundingBox', {})
1848
+ position = {'x': bounds.get('x', 0), 'y': bounds.get('y', 0)}
1849
+ size = {'w': bounds.get('width', 0), 'h': bounds.get('height', 0)}
1850
+
1851
+ # Extract text by role
1852
+ text_data = extract_text_by_role(frame_node)
1853
+
1854
+ # Extract components
1855
+ components = extract_components(frame_node)
1856
+
1857
+ # Extract input fields
1858
+ inputs = extract_inputs(frame_node)
1859
+ # Dedupe inputs by name
1860
+ seen_inputs = set()
1861
+ unique_inputs = []
1862
+ for inp in inputs:
1863
+ if inp['name'] not in seen_inputs:
1864
+ seen_inputs.add(inp['name'])
1865
+ unique_inputs.append(inp)
1866
+
1867
+ # Build frame data with deduplication
1868
+ frame_data = {
1869
+ 'id': frame_node.get('id', ''),
1870
+ 'name': name,
1871
+ 'position': position,
1872
+ 'size': size,
1873
+ # Deduplicate and limit text fields
1874
+ 'headings': dedupe_and_clean_text(text_data['headings'], max_items=5),
1875
+ 'labels': dedupe_and_clean_text(text_data['labels'], max_items=15),
1876
+ 'buttons': dedupe_and_clean_text(text_data['buttons'], max_items=10),
1877
+ 'inputs': unique_inputs[:10], # Limit to 10 inputs
1878
+ 'body': dedupe_and_clean_text(text_data['body'], max_items=10),
1879
+ 'errors': dedupe_and_clean_text(text_data['errors'], max_items=5),
1880
+ 'placeholders': dedupe_and_clean_text(text_data.get('placeholders', []), max_items=5),
1881
+ 'components': list(dict.fromkeys(components))[:15], # Dedupe components too
1882
+ }
1883
+
1884
+ # Infer type and state
1885
+ frame_data['type'] = infer_screen_type(frame_data)
1886
+ frame_data['state'] = infer_state_from_name(name)
1887
+
1888
+ # Check if variant
1889
+ base_name = extract_base_name(name)
1890
+ if base_name != name:
1891
+ frame_data['variant_of'] = base_name
1892
+
1893
+ return frame_data
1894
+
1895
+
1896
+ def process_page_to_toon_data(page_node: Dict, max_frames: int = 50) -> Dict:
1897
+ """
1898
+ Process a Figma page node into TOON-ready data structure.
1899
+
1900
+ Args:
1901
+ page_node: Figma page node dict
1902
+ max_frames: Maximum number of frames to process (default 50)
1903
+ """
1904
+ frames = []
1905
+
1906
+ # Process each child frame (limited by max_frames)
1907
+ children = page_node.get('children', [])[:max_frames]
1908
+ for child in children:
1909
+ child_type = child.get('type', '').upper()
1910
+
1911
+ # Process frames, components, and component sets
1912
+ if child_type in ['FRAME', 'COMPONENT', 'COMPONENT_SET', 'SECTION']:
1913
+ frame_data = process_frame_to_toon_data(child)
1914
+ frames.append(frame_data)
1915
+
1916
+ # Sort frames by position (left-to-right, top-to-bottom)
1917
+ frames.sort(key=lambda f: (f['position']['y'], f['position']['x']))
1918
+
1919
+ return {
1920
+ 'id': page_node.get('id', ''),
1921
+ 'name': page_node.get('name', 'Untitled Page'),
1922
+ 'frames': frames,
1923
+ }
1924
+
1925
+
1926
+ # -----------------------------------------------------------------------------
1927
+ # LLM Analysis Functions
1928
+ # -----------------------------------------------------------------------------
1929
+
1930
+ def analyze_frame_with_llm(
1931
+ frame_data: Dict,
1932
+ llm: Any,
1933
+ toon_serializer: Optional['TOONSerializer'] = None,
1934
+ image_url: Optional[str] = None,
1935
+ ) -> Optional[ScreenExplanation]:
1936
+ """
1937
+ Analyze a single frame using LLM with structured output.
1938
+
1939
+ Supports vision-based analysis when image_url is provided (for multimodal LLMs).
1940
+ Falls back to text-based analysis using TOON data otherwise.
1941
+
1942
+ Args:
1943
+ frame_data: Processed frame data dict
1944
+ llm: LangChain LLM instance with structured output support
1945
+ toon_serializer: Optional serializer for formatting
1946
+ image_url: Optional URL to frame image for vision-based analysis
1947
+
1948
+ Returns:
1949
+ ScreenExplanation model or None if analysis fails
1950
+ """
1951
+ if not llm:
1952
+ return None
1953
+
1954
+ frame_name = frame_data.get('name', 'Unknown')
1955
+ frame_id = frame_data.get('id', '')
1956
+
1957
+ # Try vision-based analysis first if image available
1958
+ if image_url:
1959
+ try:
1960
+ from langchain_core.messages import HumanMessage
1961
+
1962
+ structured_llm = llm.with_structured_output(ScreenExplanation)
1963
+ prompt_text = SCREEN_VISION_PROMPT.format(
1964
+ frame_name=frame_name,
1965
+ frame_id=frame_id,
1966
+ )
1967
+
1968
+ # Create message with image content
1969
+ message = HumanMessage(
1970
+ content=[
1971
+ {"type": "text", "text": prompt_text},
1972
+ {"type": "image_url", "image_url": {"url": image_url}},
1973
+ ]
1974
+ )
1975
+ result = structured_llm.invoke([message])
1976
+ if result:
1977
+ return result
1978
+ except Exception as e:
1979
+ # Vision analysis failed - fall back to text-based
1980
+ logging.warning(f"Vision analysis failed for {frame_name}, falling back to text: {type(e).__name__}: {e}")
1981
+
1982
+ # Text-based analysis (fallback or primary if no image)
1983
+ try:
1984
+ if toon_serializer is None:
1985
+ toon_serializer = TOONSerializer()
1986
+
1987
+ toon_lines = toon_serializer.serialize_frame(frame_data, level=0)
1988
+ toon_data = '\n'.join(toon_lines)
1989
+
1990
+ prompt = SCREEN_TEXT_PROMPT.format(
1991
+ frame_name=frame_name,
1992
+ frame_id=frame_id,
1993
+ toon_data=toon_data,
1994
+ )
1995
+
1996
+ structured_llm = llm.with_structured_output(ScreenExplanation)
1997
+ result = structured_llm.invoke(prompt)
1998
+ return result
1999
+
2000
+ except Exception as e:
2001
+ logging.warning(f"LLM frame analysis failed for {frame_name}: {type(e).__name__}: {e}")
2002
+ return None
2003
+
2004
+
2005
+ def analyze_file_with_llm(
2006
+ file_data: Dict,
2007
+ llm: Any,
2008
+ max_screens: int = 10,
2009
+ ) -> Optional[DesignAnalysis]:
2010
+ """
2011
+ Analyze entire Figma file using LLM with structured output.
2012
+
2013
+ Args:
2014
+ file_data: Processed file data dict with pages and frames
2015
+ llm: LangChain LLM instance with structured output support
2016
+ max_screens: Maximum screens to include in analysis (for token limits)
2017
+
2018
+ Returns:
2019
+ DesignAnalysis model or None if analysis fails
2020
+ """
2021
+ if not llm:
2022
+ return None
2023
+
2024
+ try:
2025
+ # Serialize file to TOON format
2026
+ serializer = TOONSerializer()
2027
+ toon_output = serializer.serialize_file(file_data)
2028
+
2029
+ # Truncate if too long (rough token estimate: 4 chars per token)
2030
+ max_chars = 8000 # ~2000 tokens for input
2031
+ if len(toon_output) > max_chars:
2032
+ toon_output = toon_output[:max_chars] + "\n... (truncated)"
2033
+
2034
+ # Build prompt
2035
+ prompt = FILE_ANALYSIS_PROMPT.format(toon_data=toon_output)
2036
+
2037
+ # Call LLM with structured output
2038
+ structured_llm = llm.with_structured_output(DesignAnalysis)
2039
+ result = structured_llm.invoke(prompt)
2040
+
2041
+ return result
2042
+
2043
+ except Exception as e:
2044
+ logging.warning(f"LLM file analysis failed: {e}")
2045
+ return None
2046
+
2047
+
2048
+ def analyze_flows_with_llm(
2049
+ frames: List[Dict],
2050
+ llm: Any,
2051
+ ) -> Optional[FlowAnalysis]:
2052
+ """
2053
+ Analyze user flows using LLM with structured output.
2054
+
2055
+ Args:
2056
+ frames: List of processed frame data dicts
2057
+ llm: LangChain LLM instance with structured output support
2058
+
2059
+ Returns:
2060
+ FlowAnalysis model or None if analysis fails
2061
+ """
2062
+ if not llm or not frames:
2063
+ return None
2064
+
2065
+ try:
2066
+ # Build compact frame summary for LLM
2067
+ frame_lines = []
2068
+ for f in frames[:20]: # Limit to 20 frames
2069
+ name = f.get('name', 'Unknown')
2070
+ buttons = f.get('buttons', [])[:3] # Top 3 buttons
2071
+ btn_str = ', '.join(buttons) if buttons else '-'
2072
+ frame_lines.append(f"- {name}: [{btn_str}]")
2073
+
2074
+ frame_summary = '\n'.join(frame_lines)
2075
+
2076
+ # Build prompt
2077
+ prompt = FLOW_ANALYSIS_PROMPT.format(frame_summary=frame_summary)
2078
+
2079
+ # Call LLM with structured output
2080
+ structured_llm = llm.with_structured_output(FlowAnalysis)
2081
+ result = structured_llm.invoke(prompt)
2082
+
2083
+ return result
2084
+
2085
+ except Exception as e:
2086
+ logging.warning(f"LLM flow analysis failed: {e}")
2087
+ return None
2088
+
2089
+
2090
+ def serialize_flow_analysis(analysis: FlowAnalysis, level: int = 0) -> List[str]:
2091
+ """Serialize LLM flow analysis to TOON format."""
2092
+ lines = []
2093
+ indent = TOON_INDENT * (level + 1)
2094
+
2095
+ # Navigation pattern
2096
+ if analysis.navigation_pattern:
2097
+ lines.append(f"{indent}pattern: {analysis.navigation_pattern}")
2098
+
2099
+ # Entry/exit points
2100
+ if analysis.entry_points:
2101
+ lines.append(f"{indent}entry: {', '.join(analysis.entry_points[:3])}")
2102
+ if analysis.completion_points:
2103
+ lines.append(f"{indent}completion: {', '.join(analysis.completion_points[:3])}")
2104
+
2105
+ # Main flows
2106
+ for flow in analysis.main_flows[:4]: # Limit to 4 flows
2107
+ # Format: flow_name: Screen1 > Screen2 or Screen1 (×3) if repeated (description)
2108
+ screens_str = _dedupe_frame_sequence(flow.screens[:10]) # Max 10 screens
2109
+ flow_line = f"{indent}{flow.name}: {screens_str}"
2110
+ if flow.description:
2111
+ flow_line += f" ({flow.description})"
2112
+ lines.append(flow_line)
2113
+
2114
+ return lines
2115
+
2116
+
2117
+ def serialize_screen_explanation(
2118
+ explanation: ScreenExplanation,
2119
+ indent: str = TOON_INDENT,
2120
+ level: int = 0,
2121
+ ) -> List[str]:
2122
+ """
2123
+ Serialize a ScreenExplanation to TOON-compatible format.
2124
+
2125
+ Returns list of formatted lines.
2126
+ """
2127
+ lines = []
2128
+ i = indent * level
2129
+
2130
+ lines.append(f"{i}EXPLANATION: {explanation.frame_name} #{explanation.frame_id}")
2131
+ lines.append(f"{i}{indent}Purpose: {explanation.purpose}")
2132
+ lines.append(f"{i}{indent}User Goal: {explanation.user_goal}")
2133
+
2134
+ if explanation.primary_action:
2135
+ lines.append(f"{i}{indent}Primary Action: {explanation.primary_action}")
2136
+
2137
+ if explanation.secondary_actions:
2138
+ lines.append(f"{i}{indent}Secondary Actions: {', '.join(explanation.secondary_actions)}")
2139
+
2140
+ if explanation.input_fields:
2141
+ lines.append(f"{i}{indent}Inputs Required: {', '.join(explanation.input_fields)}")
2142
+
2143
+ if explanation.state_indicators:
2144
+ lines.append(f"{i}{indent}State Indicators: {', '.join(explanation.state_indicators)}")
2145
+
2146
+ # Element mappings (compact format)
2147
+ if explanation.element_mappings:
2148
+ lines.append(f"{i}{indent}Element Map:")
2149
+ for mapping in explanation.element_mappings[:5]: # Limit to 5
2150
+ action_str = f" -> {mapping.user_action}" if mapping.user_action else ""
2151
+ lines.append(f"{i}{indent}{indent}{mapping.element_type}: \"{mapping.extracted_value}\" = {mapping.semantic_meaning}{action_str}")
2152
+
2153
+ return lines
2154
+
2155
+
2156
+ def serialize_design_analysis(
2157
+ analysis: DesignAnalysis,
2158
+ indent: str = TOON_INDENT,
2159
+ ) -> str:
2160
+ """
2161
+ Serialize a complete DesignAnalysis to readable format.
2162
+
2163
+ Returns formatted string.
2164
+ """
2165
+ lines = []
2166
+
2167
+ lines.append(f"DESIGN ANALYSIS: {analysis.file_name}")
2168
+ lines.append(f"{indent}Type: {analysis.design_type}")
2169
+ lines.append(f"{indent}Target User: {analysis.target_user}")
2170
+ lines.append(f"{indent}Purpose: {analysis.overall_purpose}")
2171
+
2172
+ if analysis.design_patterns:
2173
+ lines.append(f"{indent}Patterns: {', '.join(analysis.design_patterns)}")
2174
+
2175
+ # Flows
2176
+ if analysis.flows:
2177
+ lines.append(f"{indent}USER FLOWS:")
2178
+ for flow in analysis.flows:
2179
+ lines.append(f"{indent}{indent}{flow.flow_name}: {flow.entry_point} > ... > {flow.exit_point}")
2180
+ lines.append(f"{indent}{indent}{indent}{flow.description}")
2181
+ if flow.error_states:
2182
+ lines.append(f"{indent}{indent}{indent}Error states: {', '.join(flow.error_states)}")
2183
+
2184
+ # Screens
2185
+ if analysis.screens:
2186
+ lines.append(f"{indent}SCREENS ({len(analysis.screens)}):")
2187
+ for screen in analysis.screens:
2188
+ lines.append(f"{indent}{indent}{screen.frame_name}: {screen.purpose}")
2189
+ if screen.primary_action:
2190
+ lines.append(f"{indent}{indent}{indent}Primary: {screen.primary_action}")
2191
+
2192
+ # Concerns
2193
+ if analysis.gaps_or_concerns:
2194
+ lines.append(f"{indent}GAPS/CONCERNS:")
2195
+ for concern in analysis.gaps_or_concerns:
2196
+ lines.append(f"{indent}{indent}- {concern}")
2197
+
2198
+ # Accessibility
2199
+ if analysis.accessibility_notes:
2200
+ lines.append(f"{indent}ACCESSIBILITY:")
2201
+ for note in analysis.accessibility_notes:
2202
+ lines.append(f"{indent}{indent}- {note}")
2203
+
2204
+ return '\n'.join(lines)
2205
+
2206
+
2207
+ def enrich_toon_with_llm_analysis(
2208
+ toon_output: str,
2209
+ file_data: Dict,
2210
+ llm: Any,
2211
+ analysis_level: str = 'summary',
2212
+ frame_images: Optional[Dict[str, str]] = None,
2213
+ status_callback: Optional[Callable[[str], None]] = None,
2214
+ include_design_insights: bool = True,
2215
+ parallel_workers: int = 5,
2216
+ max_frames_to_analyze: int = 50,
2217
+ ) -> str:
2218
+ """
2219
+ Enrich TOON output with LLM-generated explanations.
2220
+
2221
+ For 'detailed' mode, explanations are merged inline with each FRAME.
2222
+ Supports vision-based analysis when frame_images are provided.
2223
+ For 'summary' mode, only file-level analysis is appended.
2224
+
2225
+ Args:
2226
+ toon_output: Base TOON formatted string
2227
+ file_data: Processed file data
2228
+ llm: LangChain LLM instance
2229
+ analysis_level: 'summary' (file-level only) or 'detailed' (per-screen inline)
2230
+ frame_images: Optional dict mapping frame_id -> image_url for vision analysis
2231
+ status_callback: Optional callback for progress updates
2232
+ include_design_insights: Whether to include DESIGN INSIGHTS section (default True)
2233
+ parallel_workers: Number of parallel LLM workers (default 5)
2234
+ max_frames_to_analyze: Maximum frames to analyze with LLM (default 50)
2235
+
2236
+ Returns:
2237
+ Enriched TOON output with LLM analysis
2238
+ """
2239
+ if not llm:
2240
+ return toon_output
2241
+
2242
+ if analysis_level == 'detailed':
2243
+ # Re-serialize with inline LLM explanations (with optional vision)
2244
+ return serialize_file_with_llm_explanations(
2245
+ file_data, llm, frame_images=frame_images, status_callback=status_callback,
2246
+ include_design_insights=include_design_insights,
2247
+ parallel_workers=parallel_workers,
2248
+ max_frames_to_analyze=max_frames_to_analyze,
2249
+ )
2250
+
2251
+ # For summary mode, just append file-level analysis
2252
+ lines = [toon_output]
2253
+ analysis = analyze_file_with_llm(file_data, llm)
2254
+ if analysis:
2255
+ lines.append("")
2256
+ lines.append("=" * 60)
2257
+ lines.append("DESIGN ANALYSIS")
2258
+ lines.append("=" * 60)
2259
+ lines.append(serialize_design_analysis(analysis))
2260
+
2261
+ return '\n'.join(lines)
2262
+
2263
+
2264
+ def serialize_file_with_llm_explanations(
2265
+ file_data: Dict,
2266
+ llm: Any,
2267
+ frame_images: Optional[Dict[str, str]] = None,
2268
+ max_frames_to_analyze: int = 50,
2269
+ status_callback: Optional[Callable[[str], None]] = None,
2270
+ parallel_workers: int = 5,
2271
+ include_design_insights: bool = True,
2272
+ ) -> str:
2273
+ """
2274
+ Serialize file with LLM explanations merged inline with each FRAME.
2275
+
2276
+ Supports vision-based analysis when frame_images dict is provided.
2277
+ Uses parallel LLM processing for faster analysis.
2278
+ Output uses visual insights instead of element mappings.
2279
+
2280
+ Args:
2281
+ file_data: Processed file data dict
2282
+ llm: LangChain LLM instance
2283
+ frame_images: Optional dict mapping frame_id -> image_url for vision analysis
2284
+ max_frames_to_analyze: Maximum frames to analyze with LLM (default 50)
2285
+ status_callback: Optional callback function for progress updates
2286
+ parallel_workers: Number of parallel LLM workers (default 5)
2287
+ include_design_insights: Whether to include DESIGN INSIGHTS section (default True)
2288
+
2289
+ Output format:
2290
+ FILE: Name [key:xxx]
2291
+ PAGE: Page Name #id
2292
+ FRAME: Frame Name [pos] type/state #id
2293
+ Purpose: Authentication screen for returning users
2294
+ Goal: Sign into account | Action: "Sign In"
2295
+ Visual: [focus] title and form | [layout] form-stack | [state] default
2296
+ """
2297
+ from concurrent.futures import ThreadPoolExecutor, as_completed
2298
+
2299
+ def _log_status(msg: str):
2300
+ """Log status via callback if provided."""
2301
+ logging.info(msg)
2302
+ if status_callback:
2303
+ try:
2304
+ status_callback(msg)
2305
+ except Exception:
2306
+ pass # Don't let callback failures break processing
2307
+
2308
+ lines = []
2309
+ serializer = TOONSerializer()
2310
+ frame_images = frame_images or {}
2311
+
2312
+ # File header
2313
+ name = file_data.get('name', 'Untitled')
2314
+ key = file_data.get('key', '')
2315
+ lines.append(f"FILE: {name} [key:{key}]")
2316
+
2317
+ # Collect all frames first for parallel processing
2318
+ all_frames = []
2319
+ frame_to_page = {} # Map frame_id to page info for later
2320
+
2321
+ for page in file_data.get('pages', []):
2322
+ for frame in page.get('frames', []):
2323
+ all_frames.append(frame)
2324
+ frame_to_page[frame.get('id', '')] = {
2325
+ 'page_name': page.get('name', 'Untitled Page'),
2326
+ 'page_id': page.get('id', ''),
2327
+ }
2328
+
2329
+ total_frames = len(all_frames)
2330
+ frames_to_analyze = all_frames[:max_frames_to_analyze]
2331
+ _log_status(f"Starting LLM analysis for {len(frames_to_analyze)} of {total_frames} frames...")
2332
+
2333
+ # Parallel LLM analysis
2334
+ frame_explanations = {} # frame_id -> explanation
2335
+
2336
+ def _analyze_single_frame(frame: Dict) -> Tuple[str, Optional[Any]]:
2337
+ """Worker function to analyze a single frame."""
2338
+ frame_id = frame.get('id', '')
2339
+ image_url = frame_images.get(frame_id)
2340
+ try:
2341
+ explanation = analyze_frame_with_llm(
2342
+ frame, llm, serializer, image_url=image_url
2343
+ )
2344
+ return frame_id, explanation
2345
+ except Exception as e:
2346
+ logging.warning(f"LLM analysis failed for frame {frame.get('name')}: {e}")
2347
+ return frame_id, None
2348
+
2349
+ # Use parallel processing for LLM calls
2350
+ if frames_to_analyze:
2351
+ workers = min(parallel_workers, len(frames_to_analyze))
2352
+ _log_status(f"Analyzing frames with {workers} parallel LLM workers...")
2353
+ completed = 0
2354
+
2355
+ with ThreadPoolExecutor(max_workers=workers) as executor:
2356
+ future_to_frame = {
2357
+ executor.submit(_analyze_single_frame, frame): frame
2358
+ for frame in frames_to_analyze
2359
+ }
2360
+
2361
+ for future in as_completed(future_to_frame):
2362
+ frame = future_to_frame[future]
2363
+ frame_name = frame.get('name', 'Untitled')
2364
+ try:
2365
+ frame_id, explanation = future.result()
2366
+ if explanation:
2367
+ frame_explanations[frame_id] = explanation
2368
+ _log_status(f"Analyzed {completed + 1}/{len(frames_to_analyze)}: {frame_name} ✓")
2369
+ else:
2370
+ _log_status(f"Analyzed {completed + 1}/{len(frames_to_analyze)}: {frame_name} (no result)")
2371
+ completed += 1
2372
+ except Exception as e:
2373
+ completed += 1
2374
+ logging.warning(f"Frame analysis failed for {frame_name}: {e}")
2375
+ _log_status(f"Analyzed {completed}/{len(frames_to_analyze)}: {frame_name} (error: {type(e).__name__})")
2376
+
2377
+ # Summary of LLM analysis results
2378
+ success_count = len(frame_explanations)
2379
+ _log_status(f"Frame analysis complete: {success_count}/{len(frames_to_analyze)} frames analyzed successfully")
2380
+
2381
+ # Now generate output with pre-computed explanations
2382
+ for page in file_data.get('pages', []):
2383
+ page_name = page.get('name', 'Untitled Page')
2384
+ page_id = page.get('id', '')
2385
+ lines.append(f" PAGE: {page_name} #{page_id}")
2386
+
2387
+ for frame in page.get('frames', []):
2388
+ frame_name = frame.get('name', 'Untitled')
2389
+ frame_id = frame.get('id', '')
2390
+ frame_type = frame.get('type', 'screen')
2391
+ frame_state = frame.get('state', 'default')
2392
+
2393
+ # Compact frame header
2394
+ pos = frame.get('position', {})
2395
+ size = frame.get('size', {})
2396
+ pos_str = f"[{int(pos.get('x', 0))},{int(pos.get('y', 0))} {int(size.get('w', 0))}x{int(size.get('h', 0))}]"
2397
+ lines.append(f" FRAME: {frame_name} {pos_str} {frame_type}/{frame_state} #{frame_id}")
2398
+
2399
+ # Get frame content
2400
+ headings = frame.get('headings', [])
2401
+ buttons = frame.get('buttons', []).copy() # Copy to allow modification
2402
+ inputs = frame.get('inputs', [])
2403
+ primary_action = None # Track LLM-identified action
2404
+
2405
+ # LLM analysis from pre-computed results
2406
+ explanation = frame_explanations.get(frame_id)
2407
+ if explanation:
2408
+ lines.append(f" Purpose: {explanation.purpose}")
2409
+ goal_action = f"Goal: {explanation.user_goal}"
2410
+ if explanation.primary_action:
2411
+ primary_action = explanation.primary_action
2412
+ goal_action += f" | Action: \"{primary_action}\""
2413
+ lines.append(f" {goal_action}")
2414
+
2415
+ # Visual insights
2416
+ visual_parts = []
2417
+ if explanation.visual_focus:
2418
+ visual_parts.append(f"[focus] {explanation.visual_focus}")
2419
+ if explanation.layout_pattern:
2420
+ visual_parts.append(f"[layout] {explanation.layout_pattern}")
2421
+ if explanation.visual_state and explanation.visual_state != 'default':
2422
+ visual_parts.append(f"[state] {explanation.visual_state}")
2423
+ if visual_parts:
2424
+ lines.append(f" Visual: {' | '.join(visual_parts)}")
2425
+
2426
+ # Ensure primary_action from LLM appears in buttons (button completeness)
2427
+ if primary_action:
2428
+ # Check if action is already in buttons (case-insensitive)
2429
+ buttons_lower = [b.lower() for b in buttons]
2430
+ action_lower = primary_action.lower()
2431
+ if not any(action_lower in b or b in action_lower for b in buttons_lower):
2432
+ # Add the LLM-identified action to buttons list
2433
+ buttons.insert(0, f"[LLM] {primary_action}")
2434
+
2435
+ # Use LLM-extracted inputs if available (preferred), else fall back to heuristic
2436
+ llm_inputs = []
2437
+ if explanation and hasattr(explanation, 'inputs') and explanation.inputs:
2438
+ llm_inputs = explanation.inputs
2439
+
2440
+ # Extracted content (Headings, Buttons, Inputs)
2441
+ if headings:
2442
+ lines.append(f" Headings: {' | '.join(headings[:3])}")
2443
+ if buttons:
2444
+ # Show buttons with inferred actions
2445
+ btn_strs = []
2446
+ for btn in buttons[:6]: # Increased limit to accommodate added LLM action
2447
+ dest = infer_cta_destination(btn, frame_context=frame_name)
2448
+ btn_strs.append(f"{btn} > {dest}" if dest else btn)
2449
+ lines.append(f" Buttons: {' | '.join(btn_strs)}")
2450
+
2451
+ # Prefer LLM-extracted inputs over heuristic extraction (standardized format)
2452
+ if llm_inputs:
2453
+ # Convert LLM ExtractedInput objects to dicts for standardized formatting
2454
+ llm_inputs_dicts = [
2455
+ {
2456
+ 'name': inp.label,
2457
+ 'type': inp.input_type,
2458
+ 'required': inp.required,
2459
+ 'value': inp.current_value,
2460
+ 'options': inp.options,
2461
+ }
2462
+ for inp in llm_inputs
2463
+ ]
2464
+ inputs_str = format_inputs_list(llm_inputs_dicts)
2465
+ if inputs_str:
2466
+ lines.append(f" Inputs: {inputs_str}")
2467
+ elif inputs:
2468
+ # Fallback to heuristic-extracted inputs (standardized format)
2469
+ inputs_str = format_inputs_list(inputs)
2470
+ if inputs_str:
2471
+ lines.append(f" Inputs: {inputs_str}")
2472
+
2473
+ lines.append("") # Blank line after page
2474
+
2475
+ _log_status("Analyzing flows and variants (parallel)...")
2476
+
2477
+ # Run LLM flow analysis and design insights in parallel
2478
+ flow_analysis_result = None
2479
+ design_analysis_result = None
2480
+ flow_error = None
2481
+ design_error = None
2482
+
2483
+ def _analyze_flows():
2484
+ """Worker for flow analysis."""
2485
+ nonlocal flow_analysis_result, flow_error
2486
+ try:
2487
+ flow_analysis_result = analyze_flows_with_llm(all_frames, llm)
2488
+ except Exception as e:
2489
+ flow_error = e
2490
+
2491
+ def _analyze_design():
2492
+ """Worker for design insights."""
2493
+ nonlocal design_analysis_result, design_error
2494
+ try:
2495
+ design_analysis_result = analyze_file_with_llm(file_data, llm)
2496
+ except Exception as e:
2497
+ design_error = e
2498
+
2499
+ # Run flow analysis and optionally design insights in parallel
2500
+ if all_frames:
2501
+ parallel_tasks = [_analyze_flows]
2502
+ if include_design_insights:
2503
+ parallel_tasks.append(_analyze_design)
2504
+
2505
+ with ThreadPoolExecutor(max_workers=2) as executor:
2506
+ futures = [executor.submit(task) for task in parallel_tasks]
2507
+ for future in as_completed(futures):
2508
+ try:
2509
+ future.result()
2510
+ except Exception as e:
2511
+ logging.warning(f"Parallel analysis task failed: {e}")
2512
+
2513
+ # Add FLOWS section
2514
+ if all_frames:
2515
+ lines.append("FLOWS:")
2516
+ if flow_analysis_result:
2517
+ flow_lines = serialize_flow_analysis(flow_analysis_result, level=0)
2518
+ lines.extend(flow_lines)
2519
+ elif flow_error:
2520
+ logging.warning(f"LLM flow analysis failed, using heuristics: {flow_error}")
2521
+ flow_lines = serializer.serialize_flows(all_frames, level=0)
2522
+ lines.extend(flow_lines)
2523
+ else:
2524
+ # Fallback to heuristic flows if LLM returned nothing
2525
+ flow_lines = serializer.serialize_flows(all_frames, level=0)
2526
+ lines.extend(flow_lines)
2527
+
2528
+ # Explicit VARIANTS section with frame differences (fast heuristic, no parallelization needed)
2529
+ if all_frames:
2530
+ variants = group_variants(all_frames)
2531
+ if variants:
2532
+ lines.append("")
2533
+ lines.append("VARIANTS:")
2534
+ for base, variant_list in variants.items():
2535
+ if len(variant_list) > 1:
2536
+ # Group by states
2537
+ states_by_id = {}
2538
+ for v in variant_list:
2539
+ state = v.get('state', 'default')
2540
+ frame_id = v.get('id', '')[:6] # Short ID
2541
+ states_by_id[f"#{frame_id}"] = state
2542
+
2543
+ # Detect potential duplicates (same state, similar content)
2544
+ state_groups = {}
2545
+ for v in variant_list:
2546
+ state = v.get('state', 'default')
2547
+ if state not in state_groups:
2548
+ state_groups[state] = []
2549
+ state_groups[state].append(v)
2550
+
2551
+ # Build variant line
2552
+ lines.append(f" {base} ({len(variant_list)} frames):")
2553
+
2554
+ # List variants by state
2555
+ for state, variants_in_state in state_groups.items():
2556
+ ids = [f"#{v.get('id', '')[:6]}" for v in variants_in_state]
2557
+ if len(variants_in_state) > 1:
2558
+ # Potential duplicates or responsive variants
2559
+ lines.append(f" {state}: {', '.join(ids)} - potential duplicates or responsive")
2560
+ else:
2561
+ # Unique state
2562
+ frame = variants_in_state[0].get('frame', {})
2563
+ # Try to identify distinguishing feature
2564
+ distinguisher = ""
2565
+ if frame.get('headings'):
2566
+ distinguisher = f" - \"{frame['headings'][0][:30]}...\""
2567
+ elif frame.get('inputs'):
2568
+ inp = frame['inputs'][0]
2569
+ distinguisher = f" - {inp.get('type', 'text')} labeled \"{inp.get('name', '')[:20]}\""
2570
+ lines.append(f" {state}: {ids[0]}{distinguisher}")
2571
+
2572
+ # Add DESIGN INSIGHTS section (if enabled and analysis succeeded)
2573
+ if include_design_insights:
2574
+ if design_analysis_result:
2575
+ _log_status("Adding design insights...")
2576
+ lines.append("")
2577
+ lines.append("DESIGN INSIGHTS:")
2578
+ lines.append(f" Type: {design_analysis_result.design_type} | Target: {design_analysis_result.target_user}")
2579
+ lines.append(f" Purpose: {design_analysis_result.overall_purpose}")
2580
+ if design_analysis_result.design_patterns:
2581
+ lines.append(f" Patterns: {', '.join(design_analysis_result.design_patterns[:5])}")
2582
+ if design_analysis_result.gaps_or_concerns:
2583
+ lines.append(f" Gaps: {'; '.join(design_analysis_result.gaps_or_concerns[:3])}")
2584
+ elif design_error:
2585
+ logging.warning(f"File-level LLM analysis failed: {design_error}")
2586
+
2587
+ return '\n'.join(lines)
2588
+
2589
+
2590
+ # -----------------------------------------------------------------------------
2591
+ # Args Schemas for New Tools
2592
+ # -----------------------------------------------------------------------------
2593
+
2594
+ FileStructureTOONSchema = create_model(
2595
+ "FileStructureTOON",
2596
+ url=(
2597
+ Optional[str],
2598
+ Field(
2599
+ description=(
2600
+ "Full Figma URL with file key and optional node-id. "
2601
+ "Example: 'https://www.figma.com/file/<FILE_KEY>/...?node-id=<NODE_ID>'. "
2602
+ "If provided, overrides file_key parameter."
2603
+ ),
2604
+ default=None,
2605
+ ),
2606
+ ),
2607
+ file_key=(
2608
+ Optional[str],
2609
+ Field(
2610
+ description="Figma file key (used only if URL not provided).",
2611
+ default=None,
2612
+ examples=["Fp24FuzPwH0L74ODSrCnQo"],
2613
+ ),
2614
+ ),
2615
+ include_pages=(
2616
+ Optional[str],
2617
+ Field(
2618
+ description="Comma-separated page IDs to include. Example: '1:2,1:3'",
2619
+ default=None,
2620
+ ),
2621
+ ),
2622
+ exclude_pages=(
2623
+ Optional[str],
2624
+ Field(
2625
+ description="Comma-separated page IDs to exclude (only if include_pages not set).",
2626
+ default=None,
2627
+ ),
2628
+ ),
2629
+ max_frames=(
2630
+ Optional[int],
2631
+ Field(
2632
+ description="Maximum frames per page to process. Default: 50",
2633
+ default=50,
2634
+ ge=1,
2635
+ le=200,
2636
+ ),
2637
+ ),
2638
+ )
2639
+
2640
+
2641
+ PageFlowsTOONSchema = create_model(
2642
+ "PageFlowsTOON",
2643
+ url=(
2644
+ Optional[str],
2645
+ Field(
2646
+ description="Full Figma URL pointing to a specific page.",
2647
+ default=None,
2648
+ ),
2649
+ ),
2650
+ file_key=(
2651
+ Optional[str],
2652
+ Field(
2653
+ description="Figma file key.",
2654
+ default=None,
2655
+ ),
2656
+ ),
2657
+ page_id=(
2658
+ Optional[str],
2659
+ Field(
2660
+ description="Page ID to analyze. Required if URL doesn't include node-id.",
2661
+ default=None,
2662
+ ),
2663
+ ),
2664
+ )
2665
+
2666
+
2667
+ FrameDetailTOONSchema = create_model(
2668
+ "FrameDetailTOON",
2669
+ file_key=(
2670
+ str,
2671
+ Field(
2672
+ description="Figma file key.",
2673
+ examples=["Fp24FuzPwH0L74ODSrCnQo"],
2674
+ ),
2675
+ ),
2676
+ frame_ids=(
2677
+ str,
2678
+ Field(
2679
+ description="Comma-separated frame IDs to get details for.",
2680
+ examples=["1:100,1:200,1:300"],
2681
+ ),
2682
+ ),
2683
+ )
2684
+
2685
+
2686
+ # Unified TOON tool with detail levels
2687
+ AnalyzeFileSchema = create_model(
2688
+ "AnalyzeFile",
2689
+ url=(
2690
+ Optional[str],
2691
+ Field(
2692
+ description=(
2693
+ "Full Figma URL with file key and optional node-id. "
2694
+ "Example: 'https://www.figma.com/file/<FILE_KEY>/...?node-id=<NODE_ID>'. "
2695
+ "If provided, overrides file_key parameter."
2696
+ ),
2697
+ default=None,
2698
+ ),
2699
+ ),
2700
+ file_key=(
2701
+ Optional[str],
2702
+ Field(
2703
+ description="Figma file key (used only if URL not provided).",
2704
+ default=None,
2705
+ examples=["Fp24FuzPwH0L74ODSrCnQo"],
2706
+ ),
2707
+ ),
2708
+ node_id=(
2709
+ Optional[str],
2710
+ Field(
2711
+ description=(
2712
+ "Optional node ID to focus on. Can be a page ID or frame ID. "
2713
+ "If a frame ID is provided, returns detailed frame analysis."
2714
+ ),
2715
+ default=None,
2716
+ ),
2717
+ ),
2718
+ include_pages=(
2719
+ Optional[str],
2720
+ Field(
2721
+ description="Comma-separated page IDs to include. Example: '1:2,1:3'",
2722
+ default=None,
2723
+ ),
2724
+ ),
2725
+ exclude_pages=(
2726
+ Optional[str],
2727
+ Field(
2728
+ description="Comma-separated page IDs to exclude (only if include_pages not set).",
2729
+ default=None,
2730
+ ),
2731
+ ),
2732
+ max_frames=(
2733
+ int,
2734
+ Field(
2735
+ description="Maximum frames per page to process. Default: 50",
2736
+ default=50,
2737
+ ge=1,
2738
+ le=200,
2739
+ ),
2740
+ ),
2741
+ include_design_insights=(
2742
+ bool,
2743
+ Field(
2744
+ description="Include DESIGN INSIGHTS section with file-level LLM analysis. Set to False to skip and speed up processing. Default: True",
2745
+ default=True,
2746
+ ),
2747
+ ),
2748
+ )