kairo-code 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kairo/backend/api/agents.py +337 -16
- kairo/backend/app.py +84 -4
- kairo/backend/config.py +4 -2
- kairo/backend/models/agent.py +216 -2
- kairo/backend/models/api_key.py +4 -1
- kairo/backend/models/task.py +31 -0
- kairo/backend/models/user_provider_key.py +26 -0
- kairo/backend/schemas/agent.py +249 -2
- kairo/backend/schemas/api_key.py +3 -0
- kairo/backend/services/agent/__init__.py +52 -0
- kairo/backend/services/agent/agent_alerts_evaluation_service.py +224 -0
- kairo/backend/services/agent/agent_alerts_service.py +201 -0
- kairo/backend/services/agent/agent_commands_service.py +142 -0
- kairo/backend/services/agent/agent_crud_service.py +150 -0
- kairo/backend/services/agent/agent_events_service.py +103 -0
- kairo/backend/services/agent/agent_heartbeat_service.py +207 -0
- kairo/backend/services/agent/agent_metrics_rollup_service.py +248 -0
- kairo/backend/services/agent/agent_metrics_service.py +259 -0
- kairo/backend/services/agent/agent_service.py +315 -0
- kairo/backend/services/agent/agent_setup_service.py +180 -0
- kairo/backend/services/agent/constants.py +28 -0
- kairo/backend/services/agent_service.py +18 -102
- kairo/backend/services/api_key_service.py +23 -3
- kairo/backend/services/byok_service.py +204 -0
- kairo/backend/services/chat_service.py +398 -63
- kairo/backend/services/deep_search_service.py +159 -0
- kairo/backend/services/email_service.py +418 -19
- kairo/backend/services/few_shot_service.py +223 -0
- kairo/backend/services/post_processor.py +261 -0
- kairo/backend/services/rag_service.py +150 -0
- kairo/backend/services/task_service.py +119 -0
- kairo/backend/tests/__init__.py +1 -0
- kairo/backend/tests/e2e/__init__.py +1 -0
- kairo/backend/tests/e2e/agents/__init__.py +1 -0
- kairo/backend/tests/e2e/agents/conftest.py +389 -0
- kairo/backend/tests/e2e/agents/test_agent_alerts.py +802 -0
- kairo/backend/tests/e2e/agents/test_agent_commands.py +456 -0
- kairo/backend/tests/e2e/agents/test_agent_crud.py +455 -0
- kairo/backend/tests/e2e/agents/test_agent_events.py +415 -0
- kairo/backend/tests/e2e/agents/test_agent_heartbeat.py +520 -0
- kairo/backend/tests/e2e/agents/test_agent_metrics.py +587 -0
- kairo/backend/tests/e2e/agents/test_agent_setup.py +349 -0
- kairo/migrations/versions/010_agent_dashboard.py +246 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/METADATA +1 -1
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/RECORD +50 -16
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/top_level.txt +1 -0
- kairo_migrations/env.py +92 -0
- kairo_migrations/versions/001_add_agent_dashboard_extensions.py +450 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/WHEEL +0 -0
- {kairo_code-0.1.0.dist-info → kairo_code-0.2.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""Dynamic few-shot example injection for small models.
|
|
2
|
+
|
|
3
|
+
Provides relevant examples based on query type to improve response
|
|
4
|
+
quality for 14B parameter models like Nyx Lite.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
# Code examples for different languages/contexts
|
|
12
|
+
_PYTHON_CODE_EXAMPLE = """
|
|
13
|
+
<example>
|
|
14
|
+
User: Write a function to validate email addresses
|
|
15
|
+
Assistant: Here's a function to validate email addresses:
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
import re
|
|
19
|
+
|
|
20
|
+
def validate_email(email: str) -> bool:
|
|
21
|
+
\"\"\"Validate email address format.\"\"\"
|
|
22
|
+
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
|
|
23
|
+
return bool(re.match(pattern, email))
|
|
24
|
+
|
|
25
|
+
# Usage
|
|
26
|
+
print(validate_email("test@example.com")) # True
|
|
27
|
+
print(validate_email("invalid")) # False
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
The regex checks for: local part + @ + domain + TLD (2+ chars).
|
|
31
|
+
</example>
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
_JAVASCRIPT_CODE_EXAMPLE = """
|
|
35
|
+
<example>
|
|
36
|
+
User: Create a debounce function in JavaScript
|
|
37
|
+
Assistant: Here's a debounce function:
|
|
38
|
+
|
|
39
|
+
```javascript
|
|
40
|
+
function debounce(func, delay) {
|
|
41
|
+
let timeoutId;
|
|
42
|
+
return function(...args) {
|
|
43
|
+
clearTimeout(timeoutId);
|
|
44
|
+
timeoutId = setTimeout(() => func.apply(this, args), delay);
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Usage
|
|
49
|
+
const debouncedSearch = debounce((query) => {
|
|
50
|
+
console.log('Searching:', query);
|
|
51
|
+
}, 300);
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
This delays execution until `delay` ms after the last call.
|
|
55
|
+
</example>
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
_API_USAGE_EXAMPLE = """
|
|
59
|
+
<example>
|
|
60
|
+
User: How do I fetch data from an API in Python?
|
|
61
|
+
Assistant: [Uses deep_search to verify API documentation]
|
|
62
|
+
|
|
63
|
+
Here's how to fetch data from an API:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
import requests
|
|
67
|
+
|
|
68
|
+
response = requests.get(
|
|
69
|
+
"https://api.example.com/data",
|
|
70
|
+
headers={"Authorization": "Bearer YOUR_TOKEN"},
|
|
71
|
+
params={"limit": 10}
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if response.status_code == 200:
|
|
75
|
+
data = response.json()
|
|
76
|
+
print(data)
|
|
77
|
+
else:
|
|
78
|
+
print(f"Error: {response.status_code}")
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Always check the status code before parsing the response.
|
|
82
|
+
</example>
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
_DEBUGGING_EXAMPLE = """
|
|
86
|
+
<example>
|
|
87
|
+
User: I get TypeError: 'NoneType' object is not subscriptable
|
|
88
|
+
Assistant: This error occurs when you try to access an index or key on a `None` value.
|
|
89
|
+
|
|
90
|
+
**Common causes:**
|
|
91
|
+
1. Function returning `None` instead of expected value
|
|
92
|
+
2. Dictionary key doesn't exist (`.get()` returns `None`)
|
|
93
|
+
3. API returning empty response
|
|
94
|
+
|
|
95
|
+
**Fix:** Add None checks:
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
result = get_data()
|
|
99
|
+
if result is not None:
|
|
100
|
+
value = result['key']
|
|
101
|
+
else:
|
|
102
|
+
value = default_value
|
|
103
|
+
|
|
104
|
+
# Or use .get() with default:
|
|
105
|
+
value = data.get('key', default_value)
|
|
106
|
+
```
|
|
107
|
+
</example>
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
_FACTUAL_EXAMPLE = """
|
|
111
|
+
<example>
|
|
112
|
+
User: What is the difference between REST and GraphQL?
|
|
113
|
+
Assistant: **REST vs GraphQL:**
|
|
114
|
+
|
|
115
|
+
| Aspect | REST | GraphQL |
|
|
116
|
+
|--------|------|---------|
|
|
117
|
+
| Endpoints | Multiple (one per resource) | Single endpoint |
|
|
118
|
+
| Data fetching | Fixed response structure | Client specifies exact fields |
|
|
119
|
+
| Over-fetching | Common problem | Avoided by design |
|
|
120
|
+
| Versioning | URL versioning (v1, v2) | Schema evolution |
|
|
121
|
+
|
|
122
|
+
**Use REST when:** Simple CRUD, caching is critical, team familiarity.
|
|
123
|
+
**Use GraphQL when:** Complex data relationships, mobile apps need minimal data.
|
|
124
|
+
</example>
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
# Trigger words for each example type
|
|
128
|
+
_EXAMPLE_TRIGGERS = {
|
|
129
|
+
"python": {
|
|
130
|
+
"triggers": ["python", "py", "django", "flask", "fastapi", "pip"],
|
|
131
|
+
"example": _PYTHON_CODE_EXAMPLE,
|
|
132
|
+
},
|
|
133
|
+
"javascript": {
|
|
134
|
+
"triggers": ["javascript", "js", "node", "react", "vue", "typescript", "npm"],
|
|
135
|
+
"example": _JAVASCRIPT_CODE_EXAMPLE,
|
|
136
|
+
},
|
|
137
|
+
"api": {
|
|
138
|
+
"triggers": ["api", "endpoint", "rest", "http", "fetch", "request", "curl"],
|
|
139
|
+
"example": _API_USAGE_EXAMPLE,
|
|
140
|
+
},
|
|
141
|
+
"debug": {
|
|
142
|
+
"triggers": ["error", "bug", "fix", "debug", "not working", "issue", "traceback", "exception"],
|
|
143
|
+
"example": _DEBUGGING_EXAMPLE,
|
|
144
|
+
},
|
|
145
|
+
"factual": {
|
|
146
|
+
"triggers": ["what is", "difference between", "compare", "vs", "versus", "explain"],
|
|
147
|
+
"example": _FACTUAL_EXAMPLE,
|
|
148
|
+
},
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def get_few_shot_examples(message: str, model: str) -> str:
|
|
153
|
+
"""Get relevant few-shot examples based on query type.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
message: The user's message
|
|
157
|
+
model: The model being used (only injects for small models)
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Examples string to inject into system prompt, or empty string.
|
|
161
|
+
"""
|
|
162
|
+
# Only inject for small models
|
|
163
|
+
if model not in ("nyx-lite",):
|
|
164
|
+
return ""
|
|
165
|
+
|
|
166
|
+
msg_lower = message.lower()
|
|
167
|
+
examples = []
|
|
168
|
+
|
|
169
|
+
for category, data in _EXAMPLE_TRIGGERS.items():
|
|
170
|
+
if any(trigger in msg_lower for trigger in data["triggers"]):
|
|
171
|
+
examples.append(data["example"])
|
|
172
|
+
if len(examples) >= 2: # Max 2 examples to save context
|
|
173
|
+
break
|
|
174
|
+
|
|
175
|
+
if not examples:
|
|
176
|
+
return ""
|
|
177
|
+
|
|
178
|
+
result = "\n".join(examples)
|
|
179
|
+
logger.debug("Injecting %d few-shot examples for query type", len(examples))
|
|
180
|
+
return result
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def get_output_format_instructions(message: str, model: str) -> str:
|
|
184
|
+
"""Get explicit output format instructions based on query type.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
message: The user's message
|
|
188
|
+
model: The model being used
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Format instructions string, or empty string.
|
|
192
|
+
"""
|
|
193
|
+
if model not in ("nyx-lite",):
|
|
194
|
+
return ""
|
|
195
|
+
|
|
196
|
+
msg_lower = message.lower()
|
|
197
|
+
|
|
198
|
+
# Code queries
|
|
199
|
+
if any(w in msg_lower for w in ["code", "function", "implement", "write", "create a"]):
|
|
200
|
+
return """
|
|
201
|
+
[Output Format]
|
|
202
|
+
1. Brief explanation (1-2 sentences)
|
|
203
|
+
2. Code in fenced block with language tag
|
|
204
|
+
3. Usage example
|
|
205
|
+
Do NOT repeat code blocks."""
|
|
206
|
+
|
|
207
|
+
# Debugging queries
|
|
208
|
+
if any(w in msg_lower for w in ["error", "bug", "fix", "not working"]):
|
|
209
|
+
return """
|
|
210
|
+
[Output Format]
|
|
211
|
+
1. Identify the problem
|
|
212
|
+
2. Explain the cause
|
|
213
|
+
3. Provide the solution with code
|
|
214
|
+
4. Explain why it works"""
|
|
215
|
+
|
|
216
|
+
# Comparison queries
|
|
217
|
+
if any(w in msg_lower for w in ["compare", "difference", "vs", "versus"]):
|
|
218
|
+
return """
|
|
219
|
+
[Output Format]
|
|
220
|
+
Use a comparison table when appropriate.
|
|
221
|
+
Be concise and direct."""
|
|
222
|
+
|
|
223
|
+
return ""
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"""Post-processor for LLM responses.
|
|
2
|
+
|
|
3
|
+
Validates generated code against tool results to catch hallucinated
|
|
4
|
+
endpoints, parameters, and URLs. Runs after the model generates a
|
|
5
|
+
response but before it's sent to the user.
|
|
6
|
+
|
|
7
|
+
Enhanced with:
|
|
8
|
+
- Code syntax validation (unclosed brackets, incomplete imports)
|
|
9
|
+
- Duplicate code detection
|
|
10
|
+
- Placeholder detection
|
|
11
|
+
- Claims verification against sources
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
import re
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _extract_urls_from_text(text: str) -> set[str]:
|
|
21
|
+
"""Extract all URLs from text."""
|
|
22
|
+
return set(re.findall(r'https?://[^\s\'"<>\)]+', text))
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _extract_code_blocks(text: str) -> list[str]:
|
|
26
|
+
"""Extract code from fenced code blocks."""
|
|
27
|
+
blocks = re.findall(r'```[\w]*\n(.*?)```', text, re.DOTALL)
|
|
28
|
+
return blocks
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _extract_urls_from_code(code_blocks: list[str]) -> set[str]:
|
|
32
|
+
"""Extract URLs used in code blocks."""
|
|
33
|
+
urls = set()
|
|
34
|
+
for block in code_blocks:
|
|
35
|
+
# Match string literals containing URLs
|
|
36
|
+
urls.update(re.findall(r'["\']+(https?://[^\s\'"<>\)]+)["\']', block))
|
|
37
|
+
# Match f-string URLs
|
|
38
|
+
urls.update(re.findall(r'f["\']+(https?://[^"\'{}]+)', block))
|
|
39
|
+
return urls
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _extract_param_names(code_blocks: list[str]) -> set[str]:
|
|
43
|
+
"""Extract parameter names from request params/dicts in code."""
|
|
44
|
+
params = set()
|
|
45
|
+
for block in code_blocks:
|
|
46
|
+
# Match dict keys in params-like structures: {'key': value} or {"key": value}
|
|
47
|
+
params.update(re.findall(r'["\'](\w+)["\']\s*:', block))
|
|
48
|
+
return params
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _validate_code_syntax(code_blocks: list[str]) -> list[str]:
|
|
52
|
+
"""Validate code block syntax and return issues.
|
|
53
|
+
|
|
54
|
+
Checks for common problems small models make:
|
|
55
|
+
- Unclosed brackets/parentheses/braces
|
|
56
|
+
- Incomplete import statements
|
|
57
|
+
- Placeholder values
|
|
58
|
+
- Unterminated strings
|
|
59
|
+
"""
|
|
60
|
+
issues = []
|
|
61
|
+
|
|
62
|
+
for i, block in enumerate(code_blocks):
|
|
63
|
+
block_issues = []
|
|
64
|
+
|
|
65
|
+
# Check for unclosed brackets/parentheses/braces
|
|
66
|
+
open_parens = block.count('(') - block.count(')')
|
|
67
|
+
open_brackets = block.count('[') - block.count(']')
|
|
68
|
+
open_braces = block.count('{') - block.count('}')
|
|
69
|
+
|
|
70
|
+
if open_parens > 0:
|
|
71
|
+
block_issues.append(f"{open_parens} unclosed parenthesis")
|
|
72
|
+
elif open_parens < 0:
|
|
73
|
+
block_issues.append(f"{-open_parens} extra closing parenthesis")
|
|
74
|
+
|
|
75
|
+
if open_brackets > 0:
|
|
76
|
+
block_issues.append(f"{open_brackets} unclosed bracket")
|
|
77
|
+
elif open_brackets < 0:
|
|
78
|
+
block_issues.append(f"{-open_brackets} extra closing bracket")
|
|
79
|
+
|
|
80
|
+
if open_braces > 0:
|
|
81
|
+
block_issues.append(f"{open_braces} unclosed brace")
|
|
82
|
+
elif open_braces < 0:
|
|
83
|
+
block_issues.append(f"{-open_braces} extra closing brace")
|
|
84
|
+
|
|
85
|
+
# Check for incomplete imports (ending with comma or nothing after 'import')
|
|
86
|
+
lines = block.split('\n')
|
|
87
|
+
for line in lines:
|
|
88
|
+
stripped = line.strip()
|
|
89
|
+
if stripped.startswith('import ') and stripped.endswith(','):
|
|
90
|
+
block_issues.append("incomplete import statement")
|
|
91
|
+
break
|
|
92
|
+
if stripped == 'import' or stripped == 'from':
|
|
93
|
+
block_issues.append("incomplete import statement")
|
|
94
|
+
break
|
|
95
|
+
|
|
96
|
+
# Check for placeholder patterns that shouldn't be in final code
|
|
97
|
+
placeholders = re.findall(
|
|
98
|
+
r'YOUR_[A-Z_]+|<[A-Z_]+>|REPLACE_THIS|TODO:|FIXME:|XXX:',
|
|
99
|
+
block
|
|
100
|
+
)
|
|
101
|
+
if placeholders:
|
|
102
|
+
unique_placeholders = list(set(placeholders))[:3] # Limit display
|
|
103
|
+
block_issues.append(f"placeholder values: {', '.join(unique_placeholders)}")
|
|
104
|
+
|
|
105
|
+
# Check for ellipsis used as placeholder (common in small model output)
|
|
106
|
+
if '...' in block and 'range(' not in block and 'slice' not in block:
|
|
107
|
+
# Check if ... is used as code placeholder vs legitimate use
|
|
108
|
+
ellipsis_lines = [l for l in lines if '...' in l and not l.strip().startswith('#')]
|
|
109
|
+
if ellipsis_lines:
|
|
110
|
+
# Check context - if it looks like placeholder code
|
|
111
|
+
for el in ellipsis_lines:
|
|
112
|
+
if el.strip() == '...' or el.strip().endswith('...'):
|
|
113
|
+
block_issues.append("incomplete code (ellipsis placeholder)")
|
|
114
|
+
break
|
|
115
|
+
|
|
116
|
+
if block_issues:
|
|
117
|
+
issues.extend(block_issues)
|
|
118
|
+
|
|
119
|
+
return issues[:5] # Limit to 5 issues
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _detect_duplicate_code(code_blocks: list[str]) -> bool:
|
|
123
|
+
"""Detect if there are duplicate code blocks."""
|
|
124
|
+
if len(code_blocks) < 2:
|
|
125
|
+
return False
|
|
126
|
+
|
|
127
|
+
seen_normalized = set()
|
|
128
|
+
for block in code_blocks:
|
|
129
|
+
# Normalize: collapse whitespace, lowercase
|
|
130
|
+
normalized = ' '.join(block.lower().split())
|
|
131
|
+
if len(normalized) < 50:
|
|
132
|
+
continue # Skip very short blocks
|
|
133
|
+
|
|
134
|
+
if normalized in seen_normalized:
|
|
135
|
+
return True
|
|
136
|
+
|
|
137
|
+
# Also check for substantial overlap (80%+)
|
|
138
|
+
for seen in seen_normalized:
|
|
139
|
+
shorter = min(len(normalized), len(seen))
|
|
140
|
+
if shorter > 100:
|
|
141
|
+
# Compare first N chars
|
|
142
|
+
if normalized[:shorter] == seen[:shorter]:
|
|
143
|
+
return True
|
|
144
|
+
|
|
145
|
+
seen_normalized.add(normalized)
|
|
146
|
+
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _verify_claims_against_sources(response: str, tool_results: str) -> list[str]:
|
|
151
|
+
"""Identify claims in response that aren't supported by sources."""
|
|
152
|
+
warnings = []
|
|
153
|
+
response_lower = response.lower()
|
|
154
|
+
tool_lower = tool_results.lower()
|
|
155
|
+
|
|
156
|
+
# Check for statistics/numbers that should come from sources
|
|
157
|
+
stat_patterns = [
|
|
158
|
+
(r'(\d{1,3}(?:,\d{3})*)\s*(?:users?|customers?|downloads?|stars?)', "user/download count"),
|
|
159
|
+
(r'(?:costs?|pricing?|price)\s*(?:is\s*)?\$(\d+)', "pricing"),
|
|
160
|
+
(r'(\d+)%\s*(?:faster|slower|better|improvement)', "performance claim"),
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
for pattern, claim_type in stat_patterns:
|
|
164
|
+
matches = re.findall(pattern, response_lower)
|
|
165
|
+
for match in matches:
|
|
166
|
+
# Check if this number appears in the sources
|
|
167
|
+
if match not in tool_lower:
|
|
168
|
+
warnings.append(f"{claim_type} ({match}) not found in sources")
|
|
169
|
+
|
|
170
|
+
# Check for requirement claims
|
|
171
|
+
if 'require' in response_lower or 'must have' in response_lower or 'need to' in response_lower:
|
|
172
|
+
# Check if sources mention it's free/no signup
|
|
173
|
+
if 'free' in tool_lower and 'no' in tool_lower and ('signup' in tool_lower or 'registration' in tool_lower):
|
|
174
|
+
if 'sign up' in response_lower or 'register' in response_lower:
|
|
175
|
+
warnings.append("response mentions signup but sources indicate it may be free/no signup required")
|
|
176
|
+
|
|
177
|
+
return warnings[:3] # Limit warnings
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def validate_response(response: str, tool_results: str | None) -> str:
|
|
181
|
+
"""Validate a model response against tool results.
|
|
182
|
+
|
|
183
|
+
If the response contains code with API calls that contradict
|
|
184
|
+
the tool results, append a correction note.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
response: The model's generated response
|
|
188
|
+
tool_results: The raw tool result text (from deep_search/web_search), or None
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
The response, potentially with a correction appended.
|
|
192
|
+
"""
|
|
193
|
+
if not response:
|
|
194
|
+
return response
|
|
195
|
+
|
|
196
|
+
code_blocks = _extract_code_blocks(response)
|
|
197
|
+
corrections = []
|
|
198
|
+
|
|
199
|
+
# CODE SYNTAX VALIDATION (always run, even without tool results)
|
|
200
|
+
if code_blocks:
|
|
201
|
+
syntax_issues = _validate_code_syntax(code_blocks)
|
|
202
|
+
for issue in syntax_issues:
|
|
203
|
+
corrections.append(f"**Code issue:** {issue}")
|
|
204
|
+
|
|
205
|
+
# Duplicate detection
|
|
206
|
+
if _detect_duplicate_code(code_blocks):
|
|
207
|
+
corrections.append("**Note:** Duplicate code block detected - please use only the final version")
|
|
208
|
+
|
|
209
|
+
# TOOL RESULT VALIDATION (only if we have tool results)
|
|
210
|
+
if tool_results and code_blocks:
|
|
211
|
+
# Extract URLs from tool results and code
|
|
212
|
+
tool_urls = _extract_urls_from_text(tool_results)
|
|
213
|
+
code_urls = _extract_urls_from_code(code_blocks)
|
|
214
|
+
|
|
215
|
+
# Check for fabricated base URLs in code that don't appear in tool results
|
|
216
|
+
if code_urls and tool_urls:
|
|
217
|
+
# Normalize to base domains for comparison
|
|
218
|
+
def base_domain(url: str) -> str:
|
|
219
|
+
match = re.match(r'https?://([^/]+)', url)
|
|
220
|
+
return match.group(1) if match else url
|
|
221
|
+
|
|
222
|
+
tool_domains = {base_domain(u) for u in tool_urls}
|
|
223
|
+
for url in code_urls:
|
|
224
|
+
domain = base_domain(url)
|
|
225
|
+
# Skip common/known domains
|
|
226
|
+
if domain in ('api.openweathermap.org', 'api.github.com', 'jsonplaceholder.typicode.com',
|
|
227
|
+
'api.example.com', 'example.com', 'localhost'):
|
|
228
|
+
continue
|
|
229
|
+
if domain not in tool_domains and not any(domain in td or td in domain for td in tool_domains):
|
|
230
|
+
# Check if the full URL path was fabricated
|
|
231
|
+
url_base = url.split('?')[0]
|
|
232
|
+
if not any(url_base in tr for tr in tool_urls):
|
|
233
|
+
corrections.append(
|
|
234
|
+
f"**Verify:** The URL `{url}` was not found in the documentation. "
|
|
235
|
+
f"Please confirm this endpoint exists."
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# Verify claims against sources
|
|
239
|
+
claim_warnings = _verify_claims_against_sources(response, tool_results)
|
|
240
|
+
for warning in claim_warnings:
|
|
241
|
+
corrections.append(f"**Verify:** {warning}")
|
|
242
|
+
|
|
243
|
+
# Check date format patterns in code
|
|
244
|
+
if tool_results:
|
|
245
|
+
for block in code_blocks:
|
|
246
|
+
# Wrong date format: %Y%m%d when docs show YYYY-MM-DD
|
|
247
|
+
if '%Y%m%d' in block and 'YYYY-MM-DD' in tool_results:
|
|
248
|
+
corrections.append(
|
|
249
|
+
"**Correction:** The date format should be `YYYY-MM-DD` (use `strftime('%Y-%m-%d')`), "
|
|
250
|
+
"not `YYYYMMDD`, according to the API documentation."
|
|
251
|
+
)
|
|
252
|
+
break
|
|
253
|
+
|
|
254
|
+
if corrections:
|
|
255
|
+
# Deduplicate corrections
|
|
256
|
+
unique_corrections = list(dict.fromkeys(corrections))
|
|
257
|
+
correction_text = "\n\n---\n**Review Notes:**\n" + "\n".join(f"- {c}" for c in unique_corrections)
|
|
258
|
+
logger.info("Post-processor added %d corrections", len(unique_corrections))
|
|
259
|
+
return response + correction_text
|
|
260
|
+
|
|
261
|
+
return response
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""RAG service for looking up internal documentation.
|
|
2
|
+
|
|
3
|
+
Loads curated JSON knowledge bases and returns relevant docs
|
|
4
|
+
when the model calls the lookup_kairo_docs tool.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data"
|
|
14
|
+
_DOCS_CACHE: dict | None = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _load_docs() -> dict:
|
|
18
|
+
"""Load and cache the API docs JSON."""
|
|
19
|
+
global _DOCS_CACHE
|
|
20
|
+
if _DOCS_CACHE is not None:
|
|
21
|
+
return _DOCS_CACHE
|
|
22
|
+
|
|
23
|
+
docs_path = _DATA_DIR / "kairo_docs.json"
|
|
24
|
+
if not docs_path.exists():
|
|
25
|
+
logger.warning("kairo_docs.json not found at %s", docs_path)
|
|
26
|
+
return {}
|
|
27
|
+
|
|
28
|
+
with open(docs_path) as f:
|
|
29
|
+
_DOCS_CACHE = json.load(f)
|
|
30
|
+
logger.info("Loaded RAG docs from %s", docs_path)
|
|
31
|
+
return _DOCS_CACHE
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def lookup_kairo_docs(topic: str) -> str:
|
|
35
|
+
"""Look up Kairo API documentation by topic.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
topic: What the user is asking about (e.g. "chat completions",
|
|
39
|
+
"authentication", "models", "streaming", "python sdk")
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Formatted documentation string with all relevant Kairo API info.
|
|
43
|
+
"""
|
|
44
|
+
docs = _load_docs()
|
|
45
|
+
if not docs:
|
|
46
|
+
return "No internal documentation available."
|
|
47
|
+
|
|
48
|
+
kairo = docs.get("kairo", {})
|
|
49
|
+
auth = kairo.get("auth", {})
|
|
50
|
+
endpoints = kairo.get("endpoints", [])
|
|
51
|
+
models = kairo.get("models", [])
|
|
52
|
+
notes = kairo.get("notes", [])
|
|
53
|
+
|
|
54
|
+
topic_lower = topic.lower()
|
|
55
|
+
|
|
56
|
+
# Determine if user wants streaming or specific language examples
|
|
57
|
+
want_streaming = any(w in topic_lower for w in ("stream", "sse", "realtime"))
|
|
58
|
+
want_js = any(w in topic_lower for w in ("javascript", "js", "node", "typescript"))
|
|
59
|
+
want_curl = any(w in topic_lower for w in ("curl", "bash", "shell", "http"))
|
|
60
|
+
|
|
61
|
+
parts = []
|
|
62
|
+
|
|
63
|
+
# Strong instruction header so the model uses this data
|
|
64
|
+
parts.append(
|
|
65
|
+
"IMPORTANT: The following is OFFICIAL Kairo API documentation. "
|
|
66
|
+
"You MUST use ONLY the information below when answering about the Kairo API. "
|
|
67
|
+
"Do NOT use your training data for Kairo API details. The code examples below "
|
|
68
|
+
"are verified and correct — use them as-is or adapt them to the user's request."
|
|
69
|
+
)
|
|
70
|
+
parts.append("")
|
|
71
|
+
parts.append("=== KAIRO API DOCUMENTATION ===")
|
|
72
|
+
parts.append(f"Query: {topic}")
|
|
73
|
+
parts.append("")
|
|
74
|
+
|
|
75
|
+
# Auth
|
|
76
|
+
parts.append("## Authentication")
|
|
77
|
+
parts.append(f"- Method: {auth.get('method', 'N/A')}")
|
|
78
|
+
parts.append(f"- Header: {auth.get('header', 'N/A')}")
|
|
79
|
+
parts.append(f"- Key format: {auth.get('key_format', 'N/A')}")
|
|
80
|
+
parts.append(f"- How to get a key: {auth.get('how_to_get_key', 'N/A')}")
|
|
81
|
+
rl = kairo.get("rate_limits", {})
|
|
82
|
+
parts.append(f"- Rate limit: {rl.get('requests_per_minute', 'N/A')} requests/min per key")
|
|
83
|
+
parts.append("")
|
|
84
|
+
|
|
85
|
+
# Models
|
|
86
|
+
parts.append("## Available Models")
|
|
87
|
+
for m in models:
|
|
88
|
+
parts.append(f"- {m['id']} ({m['name']}): {m['description']} | Context: {m['context_window']} tokens")
|
|
89
|
+
parts.append("")
|
|
90
|
+
|
|
91
|
+
# Chat completions endpoint (always include — it's the main endpoint)
|
|
92
|
+
for ep in endpoints:
|
|
93
|
+
if ep["path"] == "/v1/chat/completions":
|
|
94
|
+
parts.append(f"## {ep['method']} {ep['path']}")
|
|
95
|
+
parts.append(ep["description"])
|
|
96
|
+
parts.append("")
|
|
97
|
+
parts.append("Request body parameters:")
|
|
98
|
+
for param, desc in ep["request_body"].items():
|
|
99
|
+
parts.append(f" - {param}: {desc}")
|
|
100
|
+
parts.append("")
|
|
101
|
+
|
|
102
|
+
# Always include Python example (primary use case)
|
|
103
|
+
if not want_curl or want_js:
|
|
104
|
+
parts.append("### Python Example (CORRECT — use this pattern)")
|
|
105
|
+
parts.append("```python")
|
|
106
|
+
parts.append(ep["example_python"])
|
|
107
|
+
parts.append("```")
|
|
108
|
+
parts.append("")
|
|
109
|
+
|
|
110
|
+
if want_streaming:
|
|
111
|
+
parts.append("### Python Streaming Example")
|
|
112
|
+
parts.append("```python")
|
|
113
|
+
parts.append(ep["example_streaming"])
|
|
114
|
+
parts.append("```")
|
|
115
|
+
parts.append("")
|
|
116
|
+
|
|
117
|
+
if want_js:
|
|
118
|
+
parts.append("### JavaScript/Node.js Example")
|
|
119
|
+
parts.append("```javascript")
|
|
120
|
+
parts.append(ep["example_javascript"])
|
|
121
|
+
parts.append("```")
|
|
122
|
+
parts.append("")
|
|
123
|
+
|
|
124
|
+
if want_curl:
|
|
125
|
+
parts.append("### cURL Example")
|
|
126
|
+
parts.append("```bash")
|
|
127
|
+
parts.append(ep["example_curl"])
|
|
128
|
+
parts.append("```")
|
|
129
|
+
parts.append("")
|
|
130
|
+
|
|
131
|
+
# Other endpoints
|
|
132
|
+
for ep in endpoints:
|
|
133
|
+
if ep["path"] != "/v1/chat/completions":
|
|
134
|
+
parts.append(f"## {ep['method']} {ep['path']}")
|
|
135
|
+
parts.append(ep["description"])
|
|
136
|
+
if "example_curl" in ep:
|
|
137
|
+
parts.append(f"Example: {ep['example_curl']}")
|
|
138
|
+
parts.append("")
|
|
139
|
+
|
|
140
|
+
# Notes
|
|
141
|
+
parts.append("## Important Notes")
|
|
142
|
+
for note in notes:
|
|
143
|
+
parts.append(f"- {note}")
|
|
144
|
+
|
|
145
|
+
parts.append("")
|
|
146
|
+
parts.append("=== END KAIRO API DOCUMENTATION ===")
|
|
147
|
+
|
|
148
|
+
result = "\n".join(parts)
|
|
149
|
+
logger.info("lookup_kairo_docs(%s) returned %d chars", topic, len(result))
|
|
150
|
+
return result
|