amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
  2. amd_gaia-0.15.1.dist-info/RECORD +178 -0
  3. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
  4. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
  5. gaia/__init__.py +29 -29
  6. gaia/agents/__init__.py +19 -19
  7. gaia/agents/base/__init__.py +9 -9
  8. gaia/agents/base/agent.py +2177 -2177
  9. gaia/agents/base/api_agent.py +120 -120
  10. gaia/agents/base/console.py +1841 -1841
  11. gaia/agents/base/errors.py +237 -237
  12. gaia/agents/base/mcp_agent.py +86 -86
  13. gaia/agents/base/tools.py +83 -83
  14. gaia/agents/blender/agent.py +556 -556
  15. gaia/agents/blender/agent_simple.py +133 -135
  16. gaia/agents/blender/app.py +211 -211
  17. gaia/agents/blender/app_simple.py +41 -41
  18. gaia/agents/blender/core/__init__.py +16 -16
  19. gaia/agents/blender/core/materials.py +506 -506
  20. gaia/agents/blender/core/objects.py +316 -316
  21. gaia/agents/blender/core/rendering.py +225 -225
  22. gaia/agents/blender/core/scene.py +220 -220
  23. gaia/agents/blender/core/view.py +146 -146
  24. gaia/agents/chat/__init__.py +9 -9
  25. gaia/agents/chat/agent.py +835 -835
  26. gaia/agents/chat/app.py +1058 -1058
  27. gaia/agents/chat/session.py +508 -508
  28. gaia/agents/chat/tools/__init__.py +15 -15
  29. gaia/agents/chat/tools/file_tools.py +96 -96
  30. gaia/agents/chat/tools/rag_tools.py +1729 -1729
  31. gaia/agents/chat/tools/shell_tools.py +436 -436
  32. gaia/agents/code/__init__.py +7 -7
  33. gaia/agents/code/agent.py +549 -549
  34. gaia/agents/code/cli.py +377 -0
  35. gaia/agents/code/models.py +135 -135
  36. gaia/agents/code/orchestration/__init__.py +24 -24
  37. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  38. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  39. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  40. gaia/agents/code/orchestration/factories/base.py +63 -63
  41. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  42. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  43. gaia/agents/code/orchestration/orchestrator.py +841 -841
  44. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  45. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  46. gaia/agents/code/orchestration/steps/base.py +188 -188
  47. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  48. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  49. gaia/agents/code/orchestration/steps/python.py +307 -307
  50. gaia/agents/code/orchestration/template_catalog.py +469 -469
  51. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  52. gaia/agents/code/orchestration/workflows/base.py +80 -80
  53. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  54. gaia/agents/code/orchestration/workflows/python.py +94 -94
  55. gaia/agents/code/prompts/__init__.py +11 -11
  56. gaia/agents/code/prompts/base_prompt.py +77 -77
  57. gaia/agents/code/prompts/code_patterns.py +2036 -2036
  58. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  59. gaia/agents/code/prompts/python_prompt.py +109 -109
  60. gaia/agents/code/schema_inference.py +365 -365
  61. gaia/agents/code/system_prompt.py +41 -41
  62. gaia/agents/code/tools/__init__.py +42 -42
  63. gaia/agents/code/tools/cli_tools.py +1138 -1138
  64. gaia/agents/code/tools/code_formatting.py +319 -319
  65. gaia/agents/code/tools/code_tools.py +769 -769
  66. gaia/agents/code/tools/error_fixing.py +1347 -1347
  67. gaia/agents/code/tools/external_tools.py +180 -180
  68. gaia/agents/code/tools/file_io.py +845 -845
  69. gaia/agents/code/tools/prisma_tools.py +190 -190
  70. gaia/agents/code/tools/project_management.py +1016 -1016
  71. gaia/agents/code/tools/testing.py +321 -321
  72. gaia/agents/code/tools/typescript_tools.py +122 -122
  73. gaia/agents/code/tools/validation_parsing.py +461 -461
  74. gaia/agents/code/tools/validation_tools.py +806 -806
  75. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  76. gaia/agents/code/validators/__init__.py +16 -16
  77. gaia/agents/code/validators/antipattern_checker.py +241 -241
  78. gaia/agents/code/validators/ast_analyzer.py +197 -197
  79. gaia/agents/code/validators/requirements_validator.py +145 -145
  80. gaia/agents/code/validators/syntax_validator.py +171 -171
  81. gaia/agents/docker/__init__.py +7 -7
  82. gaia/agents/docker/agent.py +642 -642
  83. gaia/agents/emr/__init__.py +8 -8
  84. gaia/agents/emr/agent.py +1506 -1506
  85. gaia/agents/emr/cli.py +1322 -1322
  86. gaia/agents/emr/constants.py +475 -475
  87. gaia/agents/emr/dashboard/__init__.py +4 -4
  88. gaia/agents/emr/dashboard/server.py +1974 -1974
  89. gaia/agents/jira/__init__.py +11 -11
  90. gaia/agents/jira/agent.py +894 -894
  91. gaia/agents/jira/jql_templates.py +299 -299
  92. gaia/agents/routing/__init__.py +7 -7
  93. gaia/agents/routing/agent.py +567 -570
  94. gaia/agents/routing/system_prompt.py +75 -75
  95. gaia/agents/summarize/__init__.py +11 -0
  96. gaia/agents/summarize/agent.py +885 -0
  97. gaia/agents/summarize/prompts.py +129 -0
  98. gaia/api/__init__.py +23 -23
  99. gaia/api/agent_registry.py +238 -238
  100. gaia/api/app.py +305 -305
  101. gaia/api/openai_server.py +575 -575
  102. gaia/api/schemas.py +186 -186
  103. gaia/api/sse_handler.py +373 -373
  104. gaia/apps/__init__.py +4 -4
  105. gaia/apps/llm/__init__.py +6 -6
  106. gaia/apps/llm/app.py +173 -169
  107. gaia/apps/summarize/app.py +116 -633
  108. gaia/apps/summarize/html_viewer.py +133 -133
  109. gaia/apps/summarize/pdf_formatter.py +284 -284
  110. gaia/audio/__init__.py +2 -2
  111. gaia/audio/audio_client.py +439 -439
  112. gaia/audio/audio_recorder.py +269 -269
  113. gaia/audio/kokoro_tts.py +599 -599
  114. gaia/audio/whisper_asr.py +432 -432
  115. gaia/chat/__init__.py +16 -16
  116. gaia/chat/app.py +430 -430
  117. gaia/chat/prompts.py +522 -522
  118. gaia/chat/sdk.py +1228 -1225
  119. gaia/cli.py +5481 -5621
  120. gaia/database/__init__.py +10 -10
  121. gaia/database/agent.py +176 -176
  122. gaia/database/mixin.py +290 -290
  123. gaia/database/testing.py +64 -64
  124. gaia/eval/batch_experiment.py +2332 -2332
  125. gaia/eval/claude.py +542 -542
  126. gaia/eval/config.py +37 -37
  127. gaia/eval/email_generator.py +512 -512
  128. gaia/eval/eval.py +3179 -3179
  129. gaia/eval/groundtruth.py +1130 -1130
  130. gaia/eval/transcript_generator.py +582 -582
  131. gaia/eval/webapp/README.md +167 -167
  132. gaia/eval/webapp/package-lock.json +875 -875
  133. gaia/eval/webapp/package.json +20 -20
  134. gaia/eval/webapp/public/app.js +3402 -3402
  135. gaia/eval/webapp/public/index.html +87 -87
  136. gaia/eval/webapp/public/styles.css +3661 -3661
  137. gaia/eval/webapp/server.js +415 -415
  138. gaia/eval/webapp/test-setup.js +72 -72
  139. gaia/llm/__init__.py +9 -2
  140. gaia/llm/base_client.py +60 -0
  141. gaia/llm/exceptions.py +12 -0
  142. gaia/llm/factory.py +70 -0
  143. gaia/llm/lemonade_client.py +3236 -3221
  144. gaia/llm/lemonade_manager.py +294 -294
  145. gaia/llm/providers/__init__.py +9 -0
  146. gaia/llm/providers/claude.py +108 -0
  147. gaia/llm/providers/lemonade.py +120 -0
  148. gaia/llm/providers/openai_provider.py +79 -0
  149. gaia/llm/vlm_client.py +382 -382
  150. gaia/logger.py +189 -189
  151. gaia/mcp/agent_mcp_server.py +245 -245
  152. gaia/mcp/blender_mcp_client.py +138 -138
  153. gaia/mcp/blender_mcp_server.py +648 -648
  154. gaia/mcp/context7_cache.py +332 -332
  155. gaia/mcp/external_services.py +518 -518
  156. gaia/mcp/mcp_bridge.py +811 -550
  157. gaia/mcp/servers/__init__.py +6 -6
  158. gaia/mcp/servers/docker_mcp.py +83 -83
  159. gaia/perf_analysis.py +361 -0
  160. gaia/rag/__init__.py +10 -10
  161. gaia/rag/app.py +293 -293
  162. gaia/rag/demo.py +304 -304
  163. gaia/rag/pdf_utils.py +235 -235
  164. gaia/rag/sdk.py +2194 -2194
  165. gaia/security.py +163 -163
  166. gaia/talk/app.py +289 -289
  167. gaia/talk/sdk.py +538 -538
  168. gaia/testing/__init__.py +87 -87
  169. gaia/testing/assertions.py +330 -330
  170. gaia/testing/fixtures.py +333 -333
  171. gaia/testing/mocks.py +493 -493
  172. gaia/util.py +46 -46
  173. gaia/utils/__init__.py +33 -33
  174. gaia/utils/file_watcher.py +675 -675
  175. gaia/utils/parsing.py +223 -223
  176. gaia/version.py +100 -100
  177. amd_gaia-0.14.3.dist-info/RECORD +0 -168
  178. gaia/agents/code/app.py +0 -266
  179. gaia/llm/llm_client.py +0 -729
  180. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
  181. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
@@ -1,365 +1,365 @@
1
- # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
- # SPDX-License-Identifier: MIT
3
- """AI-powered schema inference for Code Agent.
4
-
5
- This module provides dynamic schema inference using AI (Perplexity or local LLM)
6
- to understand what fields an application should have based on natural language
7
- descriptions. NO hardcoded app types or patterns - all inference is AI-driven.
8
-
9
- Example:
10
- User: "Build me a task tracker"
11
- AI Response: {"entity": "Task", "fields": [
12
- {"name": "title", "type": "string", "required": true},
13
- {"name": "completed", "type": "boolean", "required": true},
14
- {"name": "dueDate", "type": "datetime", "required": false}
15
- ]}
16
- """
17
-
18
- import json
19
- import logging
20
- import os
21
- import re
22
- from typing import Any, Dict, Optional
23
-
24
- logger = logging.getLogger(__name__)
25
-
26
- # Schema inference prompt - instructs AI to return minimal, appropriate fields
27
- # NOTE: This prompt is optimized for both Perplexity and local LLMs
28
- SCHEMA_INFERENCE_PROMPT = """You are a database schema designer. Analyze the app description and return the schema for the MAIN entity only.
29
-
30
- CRITICAL RULES:
31
- 1. Return ONLY ONE entity - the main data entity (NOT User, NOT Auth)
32
- 2. Keep fields MINIMAL - only what's absolutely necessary
33
- 3. DO NOT include: id, createdAt, updatedAt, userId (auto-generated)
34
- 4. Think about INTUITIVE UX (e.g., Address Book apps NEED a "first name" string field)
35
-
36
- EXACT OUTPUT FORMAT (single JSON object, no array):
37
- {{"entity": "EntityName", "fields": [{{"name": "fieldName", "type": "type", "required": true}}]}}
38
-
39
- Valid types: string, text, number, boolean, datetime, email, url
40
-
41
- EXAMPLES:
42
- - "todo app" -> {{"entity": "Todo", "fields": [{{"name": "title", "type": "string", "required": true}}, {{"name": "completed", "type": "boolean", "required": true}}]}}
43
- - "contact manager" -> {{"entity": "Contact", "fields": [{{"name": "firstName", "type": "string", "required": true}}, {{"name": "lastName", "type": "string", "required": true}}, {{"name": "email", "type": "email", "required": false}}]}}
44
-
45
- App description: "{query}"
46
-
47
- Keep the schema dead simple, and focus on the most basic fields needed. For example, if the app is a contact manager, include "firstName" and "lastName" fields, but do NOT add address fields unless absolutely necessary.
48
-
49
- Return ONLY the JSON object for the MAIN entity:"""
50
-
51
-
52
- def infer_schema(
53
- user_query: str,
54
- chat_sdk: Optional[Any] = None,
55
- ) -> Dict[str, Any]:
56
- """Infer schema fields from user's natural language query using AI.
57
-
58
- Uses cascading fallback: Perplexity API -> Local LLM -> Generic fallback.
59
-
60
- Args:
61
- user_query: The user's app description (e.g., "build me a todo app")
62
- chat_sdk: Optional ChatSDK instance for local LLM fallback
63
-
64
- Returns:
65
- Dictionary with:
66
- - entity: Suggested entity name (e.g., "Todo", "Task", "Contact")
67
- - fields: List of field definitions with name, type, required
68
- - source: Which method was used ("perplexity", "local_llm", "fallback")
69
- """
70
- # Check if this looks like an app creation request
71
- if not _is_app_creation_request(user_query):
72
- logger.debug(f"Query doesn't appear to be app creation: {user_query[:50]}...")
73
- return {"entity": None, "fields": [], "source": "skipped"}
74
-
75
- # Try Perplexity first (if API key is set)
76
- perplexity_key = os.getenv("PERPLEXITY_API_KEY")
77
- if perplexity_key:
78
- logger.info("Attempting schema inference via Perplexity")
79
- result = _infer_via_perplexity(user_query)
80
- if result.get("entity"):
81
- result["source"] = "perplexity"
82
- logger.info(
83
- f"Perplexity inferred schema: {result['entity']} with {len(result['fields'])} fields"
84
- )
85
- return result
86
-
87
- # Fall back to local LLM
88
- if chat_sdk:
89
- logger.debug("Attempting schema inference via local LLM")
90
- result = _infer_via_local_llm(user_query, chat_sdk)
91
- if result.get("entity"):
92
- result["source"] = "local_llm"
93
- logger.debug(
94
- f"Local LLM inferred schema: {result['entity']} with {len(result['fields'])} fields"
95
- )
96
- return result
97
-
98
- # Final fallback - no schema inference available
99
- logger.warning("No schema inference available - returning empty schema")
100
- return {"entity": None, "fields": [], "source": "fallback"}
101
-
102
-
103
- def _is_app_creation_request(query: str) -> bool:
104
- """Check if the query appears to be an app creation request.
105
-
106
- Uses semantic patterns to detect app creation intent without hardcoding
107
- specific app types.
108
- """
109
- query_lower = query.lower()
110
-
111
- # App creation indicators (verbs + objects)
112
- creation_verbs = [
113
- "build",
114
- "create",
115
- "make",
116
- "develop",
117
- "generate",
118
- "design",
119
- "implement",
120
- ]
121
- app_objects = [
122
- "app",
123
- "application",
124
- "crud",
125
- "website",
126
- "site",
127
- "system",
128
- "tracker",
129
- "manager",
130
- "dashboard",
131
- ]
132
-
133
- # Check for creation verb + app object pattern
134
- has_creation_verb = any(verb in query_lower for verb in creation_verbs)
135
- has_app_object = any(obj in query_lower for obj in app_objects)
136
-
137
- # Also check for "for managing X" or "to track X" patterns
138
- management_patterns = [
139
- "for managing",
140
- "to manage",
141
- "to track",
142
- "for tracking",
143
- "to organize",
144
- "for organizing",
145
- ]
146
- has_management_pattern = any(
147
- pattern in query_lower for pattern in management_patterns
148
- )
149
-
150
- return (has_creation_verb and has_app_object) or has_management_pattern
151
-
152
-
153
- def _infer_via_perplexity(query: str) -> Dict[str, Any]:
154
- """Infer schema using Perplexity API.
155
-
156
- Args:
157
- query: User's app description
158
-
159
- Returns:
160
- Schema result or empty dict on failure
161
- """
162
- try:
163
- from gaia.mcp.external_services import get_perplexity_service
164
-
165
- service = get_perplexity_service()
166
- prompt = SCHEMA_INFERENCE_PROMPT.format(query=query)
167
- result = service.search_web(prompt)
168
-
169
- if result.get("success") and result.get("answer"):
170
- return _parse_schema_response(result["answer"])
171
-
172
- logger.warning(
173
- f"Perplexity inference failed: {result.get('error', 'No answer')}"
174
- )
175
- return {"entity": None, "fields": []}
176
-
177
- except Exception as e:
178
- logger.warning(f"Perplexity inference error: {e}")
179
- return {"entity": None, "fields": []}
180
-
181
-
182
- def _infer_via_local_llm(query: str, chat_sdk: Any) -> Dict[str, Any]:
183
- """Infer schema using local LLM via ChatSDK.
184
-
185
- Args:
186
- query: User's app description
187
- chat_sdk: ChatSDK instance for LLM calls
188
-
189
- Returns:
190
- Schema result or empty dict on failure
191
- """
192
- try:
193
- prompt = SCHEMA_INFERENCE_PROMPT.format(query=query)
194
- response = chat_sdk.send(prompt, max_tokens=500)
195
-
196
- if response and response.text:
197
- return _parse_schema_response(response.text)
198
-
199
- logger.warning("Local LLM returned empty response")
200
- return {"entity": None, "fields": []}
201
-
202
- except Exception as e:
203
- logger.warning(f"Local LLM inference error: {e}")
204
- return {"entity": None, "fields": []}
205
-
206
-
207
- def _parse_schema_response(response: str) -> Dict[str, Any]:
208
- """Parse schema JSON from AI response.
209
-
210
- Handles various response formats including:
211
- - Clean JSON object
212
- - JSON array (takes first non-User entity)
213
- - JSON in markdown code blocks
214
- - JSON with surrounding text
215
-
216
- Args:
217
- response: Raw AI response text
218
-
219
- Returns:
220
- Parsed schema or empty dict on failure
221
- """
222
- try:
223
- # Try to extract JSON from the response
224
- json_str = _extract_json(response)
225
- if not json_str:
226
- logger.warning(f"Could not extract JSON from response: {response[:100]}...")
227
- return {"entity": None, "fields": []}
228
-
229
- logger.debug(f"Extracted JSON: {json_str[:200]}...")
230
- data = json.loads(json_str)
231
-
232
- # Handle array response - take first non-User/Auth entity
233
- if isinstance(data, list):
234
- logger.debug(
235
- f"Response is array with {len(data)} items, selecting main entity"
236
- )
237
- skip_names = {"user", "auth", "session", "account"}
238
- for item in data:
239
- if isinstance(item, dict):
240
- name = item.get("entity", "").lower()
241
- if name and name not in skip_names:
242
- data = item
243
- break
244
- else:
245
- # No suitable entity found, take first if available
246
- data = data[0] if data else {}
247
-
248
- # Validate it's a dict
249
- if not isinstance(data, dict):
250
- logger.warning(f"Expected dict but got {type(data).__name__}")
251
- return {"entity": None, "fields": []}
252
-
253
- # Validate required fields
254
- entity = data.get("entity")
255
- fields = data.get("fields", [])
256
-
257
- if not entity or not isinstance(fields, list):
258
- logger.warning(f"Invalid schema format: {data}")
259
- return {"entity": None, "fields": []}
260
-
261
- # Normalize fields, filter out auto-generated ones
262
- normalized_fields = []
263
- skip_fields = {"id", "createdat", "updatedat", "userid"}
264
- for field in fields:
265
- if isinstance(field, dict) and "name" in field:
266
- if field["name"].lower() in skip_fields:
267
- continue
268
- normalized_fields.append(
269
- {
270
- "name": field["name"],
271
- "type": field.get("type", "string"),
272
- "required": field.get("required", False),
273
- }
274
- )
275
-
276
- logger.debug(f"Parsed schema: {entity} with {len(normalized_fields)} fields")
277
- return {"entity": entity, "fields": normalized_fields}
278
-
279
- except json.JSONDecodeError as e:
280
- logger.warning(f"JSON parse error: {e}")
281
- return {"entity": None, "fields": []}
282
- except Exception as e:
283
- logger.warning(f"Schema parse error: {e}")
284
- return {"entity": None, "fields": []}
285
-
286
-
287
- def _extract_json(text: str) -> Optional[str]:
288
- """Extract JSON from text, handling various formats.
289
-
290
- Args:
291
- text: Raw text possibly containing JSON
292
-
293
- Returns:
294
- Extracted JSON string or None
295
- """
296
- # Try to find JSON in code blocks first
297
- code_block_patterns = [
298
- r"```json\s*([\s\S]*?)\s*```",
299
- r"```\s*([\s\S]*?)\s*```",
300
- ]
301
-
302
- for pattern in code_block_patterns:
303
- match = re.search(pattern, text)
304
- if match:
305
- return match.group(1).strip()
306
-
307
- # Check if text starts with array or object to pick correct pattern
308
- stripped = text.strip()
309
- if stripped.startswith("["):
310
- # JSON array - extract it
311
- bracket_match = re.search(r"\[[\s\S]*\]", text)
312
- if bracket_match:
313
- return bracket_match.group(0)
314
-
315
- # Try to find JSON object directly
316
- brace_match = re.search(r"\{[\s\S]*\}", text)
317
- if brace_match:
318
- return brace_match.group(0)
319
-
320
- # Return stripped text as last resort
321
- return text.strip()
322
-
323
-
324
- def format_schema_context(schema_result: Dict[str, Any]) -> str:
325
- """Format inferred schema for injection into system prompt.
326
-
327
- Args:
328
- schema_result: Result from infer_schema()
329
-
330
- Returns:
331
- Formatted string for system prompt, or empty string if no schema
332
- """
333
- entity = schema_result.get("entity")
334
- fields = schema_result.get("fields", [])
335
- source = schema_result.get("source", "unknown")
336
-
337
- if not entity or not fields:
338
- return ""
339
-
340
- # Format fields for prompt
341
- field_lines = []
342
- for field in fields:
343
- name = field["name"]
344
- field_type = field["type"]
345
- required = "required" if field.get("required") else "optional"
346
- field_lines.append(f" - {name}: {field_type} ({required})")
347
-
348
- fields_str = "\n".join(field_lines)
349
-
350
- context = f"""
351
- ## AI-Inferred Schema (source: {source})
352
-
353
- Based on the user's request, the following schema has been determined:
354
-
355
- **Entity:** {entity}
356
- **Fields:**
357
- {fields_str}
358
-
359
- IMPORTANT: Use these fields when creating the data model and components.
360
- - Use `manage_data_model` with these field names and types
361
- - Use the same fields consistently across all tools (API, components, forms)
362
- - Boolean fields (like 'completed') should render as checkboxes in forms and lists
363
- """
364
-
365
- return context
1
+ # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ """AI-powered schema inference for Code Agent.
4
+
5
+ This module provides dynamic schema inference using AI (Perplexity or local LLM)
6
+ to understand what fields an application should have based on natural language
7
+ descriptions. NO hardcoded app types or patterns - all inference is AI-driven.
8
+
9
+ Example:
10
+ User: "Build me a task tracker"
11
+ AI Response: {"entity": "Task", "fields": [
12
+ {"name": "title", "type": "string", "required": true},
13
+ {"name": "completed", "type": "boolean", "required": true},
14
+ {"name": "dueDate", "type": "datetime", "required": false}
15
+ ]}
16
+ """
17
+
18
+ import json
19
+ import logging
20
+ import os
21
+ import re
22
+ from typing import Any, Dict, Optional
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ # Schema inference prompt - instructs AI to return minimal, appropriate fields
27
+ # NOTE: This prompt is optimized for both Perplexity and local LLMs
28
+ SCHEMA_INFERENCE_PROMPT = """You are a database schema designer. Analyze the app description and return the schema for the MAIN entity only.
29
+
30
+ CRITICAL RULES:
31
+ 1. Return ONLY ONE entity - the main data entity (NOT User, NOT Auth)
32
+ 2. Keep fields MINIMAL - only what's absolutely necessary
33
+ 3. DO NOT include: id, createdAt, updatedAt, userId (auto-generated)
34
+ 4. Think about INTUITIVE UX (e.g., Address Book apps NEED a "first name" string field)
35
+
36
+ EXACT OUTPUT FORMAT (single JSON object, no array):
37
+ {{"entity": "EntityName", "fields": [{{"name": "fieldName", "type": "type", "required": true}}]}}
38
+
39
+ Valid types: string, text, number, boolean, datetime, email, url
40
+
41
+ EXAMPLES:
42
+ - "todo app" -> {{"entity": "Todo", "fields": [{{"name": "title", "type": "string", "required": true}}, {{"name": "completed", "type": "boolean", "required": true}}]}}
43
+ - "contact manager" -> {{"entity": "Contact", "fields": [{{"name": "firstName", "type": "string", "required": true}}, {{"name": "lastName", "type": "string", "required": true}}, {{"name": "email", "type": "email", "required": false}}]}}
44
+
45
+ App description: "{query}"
46
+
47
+ Keep the schema dead simple, and focus on the most basic fields needed. For example, if the app is a contact manager, include "firstName" and "lastName" fields, but do NOT add address fields unless absolutely necessary.
48
+
49
+ Return ONLY the JSON object for the MAIN entity:"""
50
+
51
+
52
+ def infer_schema(
53
+ user_query: str,
54
+ chat_sdk: Optional[Any] = None,
55
+ ) -> Dict[str, Any]:
56
+ """Infer schema fields from user's natural language query using AI.
57
+
58
+ Uses cascading fallback: Perplexity API -> Local LLM -> Generic fallback.
59
+
60
+ Args:
61
+ user_query: The user's app description (e.g., "build me a todo app")
62
+ chat_sdk: Optional ChatSDK instance for local LLM fallback
63
+
64
+ Returns:
65
+ Dictionary with:
66
+ - entity: Suggested entity name (e.g., "Todo", "Task", "Contact")
67
+ - fields: List of field definitions with name, type, required
68
+ - source: Which method was used ("perplexity", "local_llm", "fallback")
69
+ """
70
+ # Check if this looks like an app creation request
71
+ if not _is_app_creation_request(user_query):
72
+ logger.debug(f"Query doesn't appear to be app creation: {user_query[:50]}...")
73
+ return {"entity": None, "fields": [], "source": "skipped"}
74
+
75
+ # Try Perplexity first (if API key is set)
76
+ perplexity_key = os.getenv("PERPLEXITY_API_KEY")
77
+ if perplexity_key:
78
+ logger.info("Attempting schema inference via Perplexity")
79
+ result = _infer_via_perplexity(user_query)
80
+ if result.get("entity"):
81
+ result["source"] = "perplexity"
82
+ logger.info(
83
+ f"Perplexity inferred schema: {result['entity']} with {len(result['fields'])} fields"
84
+ )
85
+ return result
86
+
87
+ # Fall back to local LLM
88
+ if chat_sdk:
89
+ logger.debug("Attempting schema inference via local LLM")
90
+ result = _infer_via_local_llm(user_query, chat_sdk)
91
+ if result.get("entity"):
92
+ result["source"] = "local_llm"
93
+ logger.debug(
94
+ f"Local LLM inferred schema: {result['entity']} with {len(result['fields'])} fields"
95
+ )
96
+ return result
97
+
98
+ # Final fallback - no schema inference available
99
+ logger.warning("No schema inference available - returning empty schema")
100
+ return {"entity": None, "fields": [], "source": "fallback"}
101
+
102
+
103
+ def _is_app_creation_request(query: str) -> bool:
104
+ """Check if the query appears to be an app creation request.
105
+
106
+ Uses semantic patterns to detect app creation intent without hardcoding
107
+ specific app types.
108
+ """
109
+ query_lower = query.lower()
110
+
111
+ # App creation indicators (verbs + objects)
112
+ creation_verbs = [
113
+ "build",
114
+ "create",
115
+ "make",
116
+ "develop",
117
+ "generate",
118
+ "design",
119
+ "implement",
120
+ ]
121
+ app_objects = [
122
+ "app",
123
+ "application",
124
+ "crud",
125
+ "website",
126
+ "site",
127
+ "system",
128
+ "tracker",
129
+ "manager",
130
+ "dashboard",
131
+ ]
132
+
133
+ # Check for creation verb + app object pattern
134
+ has_creation_verb = any(verb in query_lower for verb in creation_verbs)
135
+ has_app_object = any(obj in query_lower for obj in app_objects)
136
+
137
+ # Also check for "for managing X" or "to track X" patterns
138
+ management_patterns = [
139
+ "for managing",
140
+ "to manage",
141
+ "to track",
142
+ "for tracking",
143
+ "to organize",
144
+ "for organizing",
145
+ ]
146
+ has_management_pattern = any(
147
+ pattern in query_lower for pattern in management_patterns
148
+ )
149
+
150
+ return (has_creation_verb and has_app_object) or has_management_pattern
151
+
152
+
153
+ def _infer_via_perplexity(query: str) -> Dict[str, Any]:
154
+ """Infer schema using Perplexity API.
155
+
156
+ Args:
157
+ query: User's app description
158
+
159
+ Returns:
160
+ Schema result or empty dict on failure
161
+ """
162
+ try:
163
+ from gaia.mcp.external_services import get_perplexity_service
164
+
165
+ service = get_perplexity_service()
166
+ prompt = SCHEMA_INFERENCE_PROMPT.format(query=query)
167
+ result = service.search_web(prompt)
168
+
169
+ if result.get("success") and result.get("answer"):
170
+ return _parse_schema_response(result["answer"])
171
+
172
+ logger.warning(
173
+ f"Perplexity inference failed: {result.get('error', 'No answer')}"
174
+ )
175
+ return {"entity": None, "fields": []}
176
+
177
+ except Exception as e:
178
+ logger.warning(f"Perplexity inference error: {e}")
179
+ return {"entity": None, "fields": []}
180
+
181
+
182
+ def _infer_via_local_llm(query: str, chat_sdk: Any) -> Dict[str, Any]:
183
+ """Infer schema using local LLM via ChatSDK.
184
+
185
+ Args:
186
+ query: User's app description
187
+ chat_sdk: ChatSDK instance for LLM calls
188
+
189
+ Returns:
190
+ Schema result or empty dict on failure
191
+ """
192
+ try:
193
+ prompt = SCHEMA_INFERENCE_PROMPT.format(query=query)
194
+ response = chat_sdk.send(prompt, max_tokens=500)
195
+
196
+ if response and response.text:
197
+ return _parse_schema_response(response.text)
198
+
199
+ logger.warning("Local LLM returned empty response")
200
+ return {"entity": None, "fields": []}
201
+
202
+ except Exception as e:
203
+ logger.warning(f"Local LLM inference error: {e}")
204
+ return {"entity": None, "fields": []}
205
+
206
+
207
+ def _parse_schema_response(response: str) -> Dict[str, Any]:
208
+ """Parse schema JSON from AI response.
209
+
210
+ Handles various response formats including:
211
+ - Clean JSON object
212
+ - JSON array (takes first non-User entity)
213
+ - JSON in markdown code blocks
214
+ - JSON with surrounding text
215
+
216
+ Args:
217
+ response: Raw AI response text
218
+
219
+ Returns:
220
+ Parsed schema or empty dict on failure
221
+ """
222
+ try:
223
+ # Try to extract JSON from the response
224
+ json_str = _extract_json(response)
225
+ if not json_str:
226
+ logger.warning(f"Could not extract JSON from response: {response[:100]}...")
227
+ return {"entity": None, "fields": []}
228
+
229
+ logger.debug(f"Extracted JSON: {json_str[:200]}...")
230
+ data = json.loads(json_str)
231
+
232
+ # Handle array response - take first non-User/Auth entity
233
+ if isinstance(data, list):
234
+ logger.debug(
235
+ f"Response is array with {len(data)} items, selecting main entity"
236
+ )
237
+ skip_names = {"user", "auth", "session", "account"}
238
+ for item in data:
239
+ if isinstance(item, dict):
240
+ name = item.get("entity", "").lower()
241
+ if name and name not in skip_names:
242
+ data = item
243
+ break
244
+ else:
245
+ # No suitable entity found, take first if available
246
+ data = data[0] if data else {}
247
+
248
+ # Validate it's a dict
249
+ if not isinstance(data, dict):
250
+ logger.warning(f"Expected dict but got {type(data).__name__}")
251
+ return {"entity": None, "fields": []}
252
+
253
+ # Validate required fields
254
+ entity = data.get("entity")
255
+ fields = data.get("fields", [])
256
+
257
+ if not entity or not isinstance(fields, list):
258
+ logger.warning(f"Invalid schema format: {data}")
259
+ return {"entity": None, "fields": []}
260
+
261
+ # Normalize fields, filter out auto-generated ones
262
+ normalized_fields = []
263
+ skip_fields = {"id", "createdat", "updatedat", "userid"}
264
+ for field in fields:
265
+ if isinstance(field, dict) and "name" in field:
266
+ if field["name"].lower() in skip_fields:
267
+ continue
268
+ normalized_fields.append(
269
+ {
270
+ "name": field["name"],
271
+ "type": field.get("type", "string"),
272
+ "required": field.get("required", False),
273
+ }
274
+ )
275
+
276
+ logger.debug(f"Parsed schema: {entity} with {len(normalized_fields)} fields")
277
+ return {"entity": entity, "fields": normalized_fields}
278
+
279
+ except json.JSONDecodeError as e:
280
+ logger.warning(f"JSON parse error: {e}")
281
+ return {"entity": None, "fields": []}
282
+ except Exception as e:
283
+ logger.warning(f"Schema parse error: {e}")
284
+ return {"entity": None, "fields": []}
285
+
286
+
287
+ def _extract_json(text: str) -> Optional[str]:
288
+ """Extract JSON from text, handling various formats.
289
+
290
+ Args:
291
+ text: Raw text possibly containing JSON
292
+
293
+ Returns:
294
+ Extracted JSON string or None
295
+ """
296
+ # Try to find JSON in code blocks first
297
+ code_block_patterns = [
298
+ r"```json\s*([\s\S]*?)\s*```",
299
+ r"```\s*([\s\S]*?)\s*```",
300
+ ]
301
+
302
+ for pattern in code_block_patterns:
303
+ match = re.search(pattern, text)
304
+ if match:
305
+ return match.group(1).strip()
306
+
307
+ # Check if text starts with array or object to pick correct pattern
308
+ stripped = text.strip()
309
+ if stripped.startswith("["):
310
+ # JSON array - extract it
311
+ bracket_match = re.search(r"\[[\s\S]*\]", text)
312
+ if bracket_match:
313
+ return bracket_match.group(0)
314
+
315
+ # Try to find JSON object directly
316
+ brace_match = re.search(r"\{[\s\S]*\}", text)
317
+ if brace_match:
318
+ return brace_match.group(0)
319
+
320
+ # Return stripped text as last resort
321
+ return text.strip()
322
+
323
+
324
+ def format_schema_context(schema_result: Dict[str, Any]) -> str:
325
+ """Format inferred schema for injection into system prompt.
326
+
327
+ Args:
328
+ schema_result: Result from infer_schema()
329
+
330
+ Returns:
331
+ Formatted string for system prompt, or empty string if no schema
332
+ """
333
+ entity = schema_result.get("entity")
334
+ fields = schema_result.get("fields", [])
335
+ source = schema_result.get("source", "unknown")
336
+
337
+ if not entity or not fields:
338
+ return ""
339
+
340
+ # Format fields for prompt
341
+ field_lines = []
342
+ for field in fields:
343
+ name = field["name"]
344
+ field_type = field["type"]
345
+ required = "required" if field.get("required") else "optional"
346
+ field_lines.append(f" - {name}: {field_type} ({required})")
347
+
348
+ fields_str = "\n".join(field_lines)
349
+
350
+ context = f"""
351
+ ## AI-Inferred Schema (source: {source})
352
+
353
+ Based on the user's request, the following schema has been determined:
354
+
355
+ **Entity:** {entity}
356
+ **Fields:**
357
+ {fields_str}
358
+
359
+ IMPORTANT: Use these fields when creating the data model and components.
360
+ - Use `manage_data_model` with these field names and types
361
+ - Use the same fields consistently across all tools (API, components, forms)
362
+ - Boolean fields (like 'completed') should render as checkboxes in forms and lists
363
+ """
364
+
365
+ return context