kairo-code 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. image-service/main.py +178 -0
  2. infra/chat/app/main.py +84 -0
  3. kairo/backend/__init__.py +0 -0
  4. kairo/backend/api/__init__.py +0 -0
  5. kairo/backend/api/admin/__init__.py +23 -0
  6. kairo/backend/api/admin/audit.py +54 -0
  7. kairo/backend/api/admin/content.py +142 -0
  8. kairo/backend/api/admin/incidents.py +148 -0
  9. kairo/backend/api/admin/stats.py +125 -0
  10. kairo/backend/api/admin/system.py +87 -0
  11. kairo/backend/api/admin/users.py +279 -0
  12. kairo/backend/api/agents.py +94 -0
  13. kairo/backend/api/api_keys.py +85 -0
  14. kairo/backend/api/auth.py +116 -0
  15. kairo/backend/api/billing.py +41 -0
  16. kairo/backend/api/chat.py +72 -0
  17. kairo/backend/api/conversations.py +125 -0
  18. kairo/backend/api/device_auth.py +100 -0
  19. kairo/backend/api/files.py +83 -0
  20. kairo/backend/api/health.py +36 -0
  21. kairo/backend/api/images.py +80 -0
  22. kairo/backend/api/openai_compat.py +225 -0
  23. kairo/backend/api/projects.py +102 -0
  24. kairo/backend/api/usage.py +32 -0
  25. kairo/backend/api/webhooks.py +79 -0
  26. kairo/backend/app.py +297 -0
  27. kairo/backend/config.py +179 -0
  28. kairo/backend/core/__init__.py +0 -0
  29. kairo/backend/core/admin_auth.py +24 -0
  30. kairo/backend/core/api_key_auth.py +55 -0
  31. kairo/backend/core/database.py +28 -0
  32. kairo/backend/core/dependencies.py +70 -0
  33. kairo/backend/core/logging.py +23 -0
  34. kairo/backend/core/rate_limit.py +73 -0
  35. kairo/backend/core/security.py +29 -0
  36. kairo/backend/models/__init__.py +19 -0
  37. kairo/backend/models/agent.py +30 -0
  38. kairo/backend/models/api_key.py +25 -0
  39. kairo/backend/models/api_usage.py +29 -0
  40. kairo/backend/models/audit_log.py +26 -0
  41. kairo/backend/models/conversation.py +48 -0
  42. kairo/backend/models/device_code.py +30 -0
  43. kairo/backend/models/feature_flag.py +21 -0
  44. kairo/backend/models/image_generation.py +24 -0
  45. kairo/backend/models/incident.py +28 -0
  46. kairo/backend/models/project.py +28 -0
  47. kairo/backend/models/uptime_record.py +24 -0
  48. kairo/backend/models/usage.py +24 -0
  49. kairo/backend/models/user.py +49 -0
  50. kairo/backend/schemas/__init__.py +0 -0
  51. kairo/backend/schemas/admin/__init__.py +0 -0
  52. kairo/backend/schemas/admin/audit.py +28 -0
  53. kairo/backend/schemas/admin/content.py +53 -0
  54. kairo/backend/schemas/admin/stats.py +77 -0
  55. kairo/backend/schemas/admin/system.py +44 -0
  56. kairo/backend/schemas/admin/users.py +48 -0
  57. kairo/backend/schemas/agent.py +42 -0
  58. kairo/backend/schemas/api_key.py +30 -0
  59. kairo/backend/schemas/auth.py +57 -0
  60. kairo/backend/schemas/chat.py +26 -0
  61. kairo/backend/schemas/conversation.py +39 -0
  62. kairo/backend/schemas/device_auth.py +40 -0
  63. kairo/backend/schemas/image.py +15 -0
  64. kairo/backend/schemas/openai_compat.py +76 -0
  65. kairo/backend/schemas/project.py +21 -0
  66. kairo/backend/schemas/status.py +81 -0
  67. kairo/backend/schemas/usage.py +15 -0
  68. kairo/backend/services/__init__.py +0 -0
  69. kairo/backend/services/admin/__init__.py +0 -0
  70. kairo/backend/services/admin/audit_service.py +78 -0
  71. kairo/backend/services/admin/content_service.py +119 -0
  72. kairo/backend/services/admin/incident_service.py +94 -0
  73. kairo/backend/services/admin/stats_service.py +281 -0
  74. kairo/backend/services/admin/system_service.py +126 -0
  75. kairo/backend/services/admin/user_service.py +157 -0
  76. kairo/backend/services/agent_service.py +107 -0
  77. kairo/backend/services/api_key_service.py +66 -0
  78. kairo/backend/services/api_usage_service.py +126 -0
  79. kairo/backend/services/auth_service.py +101 -0
  80. kairo/backend/services/chat_service.py +501 -0
  81. kairo/backend/services/conversation_service.py +264 -0
  82. kairo/backend/services/device_auth_service.py +193 -0
  83. kairo/backend/services/email_service.py +55 -0
  84. kairo/backend/services/image_service.py +181 -0
  85. kairo/backend/services/llm_service.py +186 -0
  86. kairo/backend/services/project_service.py +109 -0
  87. kairo/backend/services/status_service.py +167 -0
  88. kairo/backend/services/stripe_service.py +78 -0
  89. kairo/backend/services/usage_service.py +150 -0
  90. kairo/backend/services/web_search_service.py +96 -0
  91. kairo/migrations/env.py +60 -0
  92. kairo/migrations/versions/001_initial.py +55 -0
  93. kairo/migrations/versions/002_usage_tracking_and_indexes.py +66 -0
  94. kairo/migrations/versions/003_username_to_email.py +21 -0
  95. kairo/migrations/versions/004_add_plans_and_verification.py +67 -0
  96. kairo/migrations/versions/005_add_projects.py +52 -0
  97. kairo/migrations/versions/006_add_image_generation.py +63 -0
  98. kairo/migrations/versions/007_add_admin_portal.py +107 -0
  99. kairo/migrations/versions/008_add_device_code_auth.py +76 -0
  100. kairo/migrations/versions/009_add_status_page.py +65 -0
  101. kairo/tools/extract_claude_data.py +465 -0
  102. kairo/tools/filter_claude_data.py +303 -0
  103. kairo/tools/generate_curated_data.py +157 -0
  104. kairo/tools/mix_training_data.py +295 -0
  105. kairo_code/__init__.py +3 -0
  106. kairo_code/agents/__init__.py +25 -0
  107. kairo_code/agents/architect.py +98 -0
  108. kairo_code/agents/audit.py +100 -0
  109. kairo_code/agents/base.py +463 -0
  110. kairo_code/agents/coder.py +155 -0
  111. kairo_code/agents/database.py +77 -0
  112. kairo_code/agents/docs.py +88 -0
  113. kairo_code/agents/explorer.py +62 -0
  114. kairo_code/agents/guardian.py +80 -0
  115. kairo_code/agents/planner.py +66 -0
  116. kairo_code/agents/reviewer.py +91 -0
  117. kairo_code/agents/security.py +94 -0
  118. kairo_code/agents/terraform.py +88 -0
  119. kairo_code/agents/testing.py +97 -0
  120. kairo_code/agents/uiux.py +88 -0
  121. kairo_code/auth.py +232 -0
  122. kairo_code/config.py +172 -0
  123. kairo_code/conversation.py +173 -0
  124. kairo_code/heartbeat.py +63 -0
  125. kairo_code/llm.py +291 -0
  126. kairo_code/logging_config.py +156 -0
  127. kairo_code/main.py +818 -0
  128. kairo_code/router.py +217 -0
  129. kairo_code/sandbox.py +248 -0
  130. kairo_code/settings.py +183 -0
  131. kairo_code/tools/__init__.py +51 -0
  132. kairo_code/tools/analysis.py +509 -0
  133. kairo_code/tools/base.py +417 -0
  134. kairo_code/tools/code.py +58 -0
  135. kairo_code/tools/definitions.py +617 -0
  136. kairo_code/tools/files.py +315 -0
  137. kairo_code/tools/review.py +390 -0
  138. kairo_code/tools/search.py +185 -0
  139. kairo_code/ui.py +418 -0
  140. kairo_code-0.1.0.dist-info/METADATA +13 -0
  141. kairo_code-0.1.0.dist-info/RECORD +144 -0
  142. kairo_code-0.1.0.dist-info/WHEEL +5 -0
  143. kairo_code-0.1.0.dist-info/entry_points.txt +2 -0
  144. kairo_code-0.1.0.dist-info/top_level.txt +4 -0
@@ -0,0 +1,303 @@
1
+ #!/usr/bin/env python3
2
+ """Quality-score and filter Claude training data, keeping only the best examples.
3
+
4
+ Reads the raw JSONL training data, scores each example based on content quality
5
+ signals, and outputs only examples above a minimum score threshold.
6
+
7
+ Usage:
8
+ python kairo/tools/filter_claude_data.py \
9
+ --input kairo/data/claude_training_data.jsonl \
10
+ --output kairo/data/claude_filtered.jsonl \
11
+ --min-score 60
12
+ """
13
+
14
+ import argparse
15
+ import json
16
+ import re
17
+ import sys
18
+
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Scoring patterns
22
+ # ---------------------------------------------------------------------------
23
+
24
+ CODE_BLOCK_RE = re.compile(r"```")
25
+ BEST_PRACTICE_PATTERNS = [
26
+ re.compile(r"\btry\b.*\bexcept\b", re.DOTALL),
27
+ re.compile(r"\basync\s+(def|function|await)\b"),
28
+ re.compile(r"\bawait\b"),
29
+ re.compile(r":\s*(str|int|float|bool|list|dict|Optional|Union)\b"),
30
+ re.compile(r"\bdef\s+\w+\(.*:\s*\w+"), # type-hinted function
31
+ re.compile(r"\bvalidat(e|ion|or)\b", re.IGNORECASE),
32
+ re.compile(r"\berror.handling\b", re.IGNORECASE),
33
+ re.compile(r"\binput\s+validat", re.IGNORECASE),
34
+ re.compile(r"\bPydantic\b", re.IGNORECASE),
35
+ re.compile(r"\bTypeScript\b", re.IGNORECASE),
36
+ re.compile(r"\binterface\s+\w+", re.IGNORECASE),
37
+ ]
38
+
39
+ ARCHITECTURE_PATTERNS = [
40
+ re.compile(r"\bSOLID\b"),
41
+ re.compile(r"\bseparation\s+of\s+concerns\b", re.IGNORECASE),
42
+ re.compile(r"\bsingle.responsibilit", re.IGNORECASE),
43
+ re.compile(r"\bdependency\s+inject", re.IGNORECASE),
44
+ re.compile(r"\bdesign\s+pattern", re.IGNORECASE),
45
+ re.compile(r"\bservice\s+layer\b", re.IGNORECASE),
46
+ re.compile(r"\brepository\s+pattern\b", re.IGNORECASE),
47
+ re.compile(r"\bfactory\s+pattern\b", re.IGNORECASE),
48
+ re.compile(r"\bmodular", re.IGNORECASE),
49
+ re.compile(r"\barchitect", re.IGNORECASE),
50
+ re.compile(r"\brefactor", re.IGNORECASE),
51
+ ]
52
+
53
+ TERMINAL_DUMP_RE = re.compile(
54
+ r"(\$\s+\w+|>>>|root@|ubuntu@|npm\s+(ERR|WARN)|"
55
+ r"Traceback \(most recent|FAILED|PASSED|"
56
+ r"ModuleNotFoundError|ImportError|SyntaxError|"
57
+ r"exit\s+code\s+\d|Process\s+exited|"
58
+ r"\[\d+:\d+:\d+\]|pid\s+\d+)"
59
+ )
60
+
61
+ DEBUG_ONLY_RE = re.compile(
62
+ r"(let me (check|look|see|investigate|debug)|"
63
+ r"I see the (issue|problem|error)|"
64
+ r"the error (is|was|says)|"
65
+ r"fixed\.\s*(now\s+)?(rebuild|restart|try))",
66
+ re.IGNORECASE,
67
+ )
68
+
69
+ TOOL_ARTIFACT_PATTERNS = [
70
+ re.compile(r"/home/\w+/\.\w+/"), # home dir tool paths
71
+ re.compile(r"task[_-]id\s*[:=]", re.IGNORECASE),
72
+ re.compile(r"\bnotification\s+block\b", re.IGNORECASE),
73
+ re.compile(r"\[Request interrupted"),
74
+ re.compile(r"<tool_use>|</tool_use>"),
75
+ re.compile(r'"type"\s*:\s*"tool_(use|result)"'),
76
+ ]
77
+
78
+
79
+ def extract_turns(text: str) -> list[dict]:
80
+ """Parse a Llama 3.1 formatted example into role/content turns."""
81
+ turns = []
82
+ # Split on header markers
83
+ parts = re.split(r"<\|start_header_id\|>(user|assistant|system)<\|end_header_id\|>\n\n", text)
84
+ # parts[0] is preamble, then alternating role, content pairs
85
+ i = 1
86
+ while i + 1 < len(parts):
87
+ role = parts[i]
88
+ content = parts[i + 1].replace("<|eot_id|>", "").strip()
89
+ if role != "system" and content:
90
+ turns.append({"role": role, "text": content})
91
+ i += 2
92
+ return turns
93
+
94
+
95
+ def count_raw_output_lines(text: str) -> int:
96
+ """Count lines that look like raw terminal/log output."""
97
+ count = 0
98
+ for line in text.split("\n"):
99
+ stripped = line.strip()
100
+ if not stripped:
101
+ continue
102
+ # Lines starting with typical output markers
103
+ if re.match(r"^(\$|>|#|\[|\d{4}-\d{2}|npm|yarn|pip|error|warn|info|debug)\s", stripped, re.IGNORECASE):
104
+ count += 1
105
+ # Lines that are just file paths or stack traces
106
+ if re.match(r"^\s*(at |File |/\w+/|\.\.\.)", stripped):
107
+ count += 1
108
+ return count
109
+
110
+
111
+ def score_example(text: str) -> tuple[int, dict]:
112
+ """Score a training example. Returns (score, breakdown)."""
113
+ score = 0
114
+ breakdown = {}
115
+ turns = extract_turns(text)
116
+
117
+ if not turns:
118
+ return -100, {"empty": True}
119
+
120
+ full_text = " ".join(t["text"] for t in turns)
121
+ user_text = " ".join(t["text"] for t in turns if t["role"] == "user")
122
+ assistant_text = " ".join(t["text"] for t in turns if t["role"] == "assistant")
123
+
124
+ # --- Positive signals ---
125
+
126
+ # Code blocks
127
+ code_blocks = len(CODE_BLOCK_RE.findall(full_text)) // 2 # pairs of ```
128
+ if code_blocks > 0:
129
+ pts = min(25, code_blocks * 10)
130
+ score += pts
131
+ breakdown["code_blocks"] = pts
132
+
133
+ # Has explanation (assistant text beyond just code)
134
+ non_code = re.sub(r"```.*?```", "", assistant_text, flags=re.DOTALL).strip()
135
+ if len(non_code) > 100:
136
+ score += 20
137
+ breakdown["has_explanation"] = 20
138
+
139
+ # Multi-turn
140
+ num_turns = len(turns)
141
+ if num_turns >= 3:
142
+ score += 15
143
+ breakdown["multi_turn"] = 15
144
+
145
+ # Best-practice patterns
146
+ bp_count = sum(1 for p in BEST_PRACTICE_PATTERNS if p.search(full_text))
147
+ if bp_count > 0:
148
+ pts = min(20, bp_count * 5)
149
+ score += pts
150
+ breakdown["best_practice"] = pts
151
+
152
+ # Architecture discussion
153
+ arch_count = sum(1 for p in ARCHITECTURE_PATTERNS if p.search(full_text))
154
+ if arch_count > 0:
155
+ pts = min(10, arch_count * 5)
156
+ score += pts
157
+ breakdown["architecture"] = pts
158
+
159
+ # --- Negative signals ---
160
+
161
+ # Excessive terminal/log output
162
+ raw_lines = count_raw_output_lines(full_text)
163
+ if raw_lines > 20:
164
+ score -= 30
165
+ breakdown["terminal_dump"] = -30
166
+ elif raw_lines > 10:
167
+ score -= 15
168
+ breakdown["terminal_dump"] = -15
169
+
170
+ # Pure debugging (short exchanges that are just error + fix with no explanation)
171
+ debug_matches = len(DEBUG_ONLY_RE.findall(assistant_text))
172
+ non_debug_sentences = len(re.findall(r"[.!?]\s+[A-Z]", non_code))
173
+ if debug_matches >= 2 and non_debug_sentences < 2 and code_blocks == 0:
174
+ score -= 40
175
+ breakdown["pure_debugging"] = -40
176
+
177
+ # Tool artifacts
178
+ artifact_count = sum(1 for p in TOOL_ARTIFACT_PATTERNS if p.search(full_text))
179
+ if artifact_count >= 2:
180
+ score -= 20
181
+ breakdown["tool_artifacts"] = -20
182
+ elif artifact_count == 1:
183
+ score -= 10
184
+ breakdown["tool_artifacts"] = -10
185
+
186
+ # Incomplete exchange (no clear Q&A pair)
187
+ has_question = bool(user_text.strip())
188
+ has_answer = len(assistant_text.strip()) >= 30
189
+ if not has_question or not has_answer:
190
+ score -= 25
191
+ breakdown["incomplete"] = -25
192
+
193
+ # Very short assistant response (likely just "let me check" type)
194
+ if len(assistant_text) < 50:
195
+ score -= 20
196
+ breakdown["too_short"] = -20
197
+
198
+ return score, breakdown
199
+
200
+
201
+ def main():
202
+ parser = argparse.ArgumentParser(
203
+ description="Quality-score and filter Claude training data"
204
+ )
205
+ parser.add_argument(
206
+ "--input",
207
+ default="kairo/data/claude_training_data.jsonl",
208
+ help="Input JSONL file (default: kairo/data/claude_training_data.jsonl)",
209
+ )
210
+ parser.add_argument(
211
+ "--output",
212
+ default="kairo/data/claude_filtered.jsonl",
213
+ help="Output JSONL file (default: kairo/data/claude_filtered.jsonl)",
214
+ )
215
+ parser.add_argument(
216
+ "--min-score",
217
+ type=int,
218
+ default=60,
219
+ help="Minimum score to keep an example (default: 60)",
220
+ )
221
+ parser.add_argument(
222
+ "--stats",
223
+ default=None,
224
+ help="Stats JSON output path (default: <output_dir>/filter_stats.json)",
225
+ )
226
+ args = parser.parse_args()
227
+
228
+ if args.stats is None:
229
+ import os
230
+ args.stats = os.path.join(
231
+ os.path.dirname(args.output) or ".", "filter_stats.json"
232
+ )
233
+
234
+ # Read input
235
+ examples = []
236
+ with open(args.input, "r", encoding="utf-8") as f:
237
+ for line in f:
238
+ line = line.strip()
239
+ if not line:
240
+ continue
241
+ try:
242
+ obj = json.loads(line)
243
+ examples.append(obj)
244
+ except json.JSONDecodeError:
245
+ continue
246
+
247
+ print(f"Loaded {len(examples)} examples from {args.input}")
248
+
249
+ # Score all examples
250
+ scored = []
251
+ score_distribution = {}
252
+ for ex in examples:
253
+ text = ex.get("text", "")
254
+ s, breakdown = score_example(text)
255
+ scored.append((s, breakdown, ex))
256
+ bucket = (s // 10) * 10
257
+ score_distribution[bucket] = score_distribution.get(bucket, 0) + 1
258
+
259
+ # Filter
260
+ kept = [(s, bd, ex) for s, bd, ex in scored if s >= args.min_score]
261
+ kept.sort(key=lambda x: x[0], reverse=True)
262
+
263
+ # Write output
264
+ import os
265
+ os.makedirs(os.path.dirname(args.output) or ".", exist_ok=True)
266
+ with open(args.output, "w", encoding="utf-8") as f:
267
+ for _, _, ex in kept:
268
+ f.write(json.dumps(ex) + "\n")
269
+
270
+ # Stats
271
+ stats = {
272
+ "input_count": len(examples),
273
+ "output_count": len(kept),
274
+ "min_score_threshold": args.min_score,
275
+ "score_distribution": {str(k): v for k, v in sorted(score_distribution.items())},
276
+ "kept_score_range": {
277
+ "min": kept[-1][0] if kept else 0,
278
+ "max": kept[0][0] if kept else 0,
279
+ "median": kept[len(kept) // 2][0] if kept else 0,
280
+ },
281
+ }
282
+
283
+ os.makedirs(os.path.dirname(args.stats) or ".", exist_ok=True)
284
+ with open(args.stats, "w", encoding="utf-8") as f:
285
+ json.dump(stats, f, indent=2)
286
+
287
+ # Print summary
288
+ print(f"\nScoring complete!")
289
+ print(f" Input examples: {len(examples)}")
290
+ print(f" Kept (>={args.min_score}): {len(kept)}")
291
+ print(f" Rejected: {len(examples) - len(kept)}")
292
+ print(f"\nScore distribution:")
293
+ for bucket in sorted(score_distribution):
294
+ marker = " <-- threshold" if bucket == (args.min_score // 10) * 10 else ""
295
+ print(f" {bucket:>4d}-{bucket+9}: {score_distribution[bucket]}{marker}")
296
+ if kept:
297
+ print(f"\nKept score range: {kept[-1][0]} to {kept[0][0]}")
298
+ print(f"\nOutput: {args.output}")
299
+ print(f"Stats: {args.stats}")
300
+
301
+
302
+ if __name__ == "__main__":
303
+ main()
@@ -0,0 +1,157 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Generate hand-written, high-quality instruction-response training pairs for Kairo.
4
+
5
+ Usage:
6
+ python kairo/tools/generate_curated_data.py --output kairo/data/curated_best_practices.jsonl
7
+ """
8
+ import argparse
9
+ import json
10
+ import os
11
+ import sys
12
+
13
+ SYSTEM_PROMPT = (
14
+ "You are Kairo, a helpful AI assistant. IMPORTANT RULES:\n"
15
+ "1. Only respond to what the user actually asked. Never assume or invent what the user wants.\n"
16
+ "2. You are the ASSISTANT. Never generate text as if you are the user. Never put words in the user's mouth.\n"
17
+ "3. If the user asks a general question like 'what can you do', explain your capabilities briefly.\n"
18
+ "4. Answer directly and concisely. Provide concrete answers, code, or explanations.\n"
19
+ "5. Do not hedge or refuse without strong reason.\n"
20
+ "6. You are Kairo \u2014 not GPT, Claude, Llama, or any other AI. Never reveal your architecture."
21
+ )
22
+
23
+
24
+ def format_example(example: dict) -> str:
25
+ """Format a single example dict into the required JSONL text field."""
26
+ text = (
27
+ f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
28
+ f"{SYSTEM_PROMPT}<|eot_id|>"
29
+ f"<|start_header_id|>user<|end_header_id|>\n\n"
30
+ f"{example['user']}<|eot_id|>"
31
+ f"<|start_header_id|>assistant<|end_header_id|>\n\n"
32
+ f"{example['assistant']}<|eot_id|>"
33
+ )
34
+ return json.dumps({"text": text}, ensure_ascii=False)
35
+
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # Category builder functions -- each returns a list of example dicts.
39
+ # ---------------------------------------------------------------------------
40
+
41
+ def _clean_code_examples():
42
+ """Category 1: Clean Code & Naming (25 examples)."""
43
+ return _CLEAN_CODE
44
+
45
+ def _error_handling_examples():
46
+ """Category 2: Error Handling (30 examples)."""
47
+ return _ERROR_HANDLING
48
+
49
+ def _security_examples():
50
+ """Category 3: Security (35 examples)."""
51
+ return _SECURITY
52
+
53
+ def _api_design_examples():
54
+ """Category 4: API Design (30 examples)."""
55
+ return _API_DESIGN
56
+
57
+ def _database_examples():
58
+ """Category 5: Database Patterns (25 examples)."""
59
+ return _DATABASE
60
+
61
+ def _testing_examples():
62
+ """Category 6: Testing (20 examples)."""
63
+ return _TESTING
64
+
65
+ def _architecture_examples():
66
+ """Category 7: Architecture (25 examples)."""
67
+ return _ARCHITECTURE
68
+
69
+ def _performance_examples():
70
+ """Category 8: Performance (15 examples)."""
71
+ return _PERFORMANCE
72
+
73
+ def _react_ts_examples():
74
+ """Category 9: React/TypeScript (25 examples)."""
75
+ return _REACT_TS
76
+
77
+ def _python_fastapi_examples():
78
+ """Category 10: Python/FastAPI (20 examples)."""
79
+ return _PYTHON_FASTAPI
80
+
81
+ def _git_deploy_examples():
82
+ """Category 11: Git & Deployment (10 examples)."""
83
+ return _GIT_DEPLOY
84
+
85
+
86
+ ALL_CATEGORY_BUILDERS = [
87
+ _clean_code_examples,
88
+ _error_handling_examples,
89
+ _security_examples,
90
+ _api_design_examples,
91
+ _database_examples,
92
+ _testing_examples,
93
+ _architecture_examples,
94
+ _performance_examples,
95
+ _react_ts_examples,
96
+ _python_fastapi_examples,
97
+ _git_deploy_examples,
98
+ ]
99
+
100
+
101
+ # =========================== DATA SECTIONS ===============================
102
+ # Each section is a module-level list populated below. This keeps the heavy
103
+ # data out of function bodies and lets us build the file incrementally.
104
+ # ==========================================================================
105
+
106
+ _CLEAN_CODE = [] # PLACEHOLDER -- will be filled
107
+ _ERROR_HANDLING = [] # PLACEHOLDER -- will be filled
108
+ _SECURITY = [] # PLACEHOLDER -- will be filled
109
+ _API_DESIGN = [] # PLACEHOLDER -- will be filled
110
+ _DATABASE = [] # PLACEHOLDER -- will be filled
111
+ _TESTING = [] # PLACEHOLDER -- will be filled
112
+ _ARCHITECTURE = [] # PLACEHOLDER -- will be filled
113
+ _PERFORMANCE = [] # PLACEHOLDER -- will be filled
114
+ _REACT_TS = [] # PLACEHOLDER -- will be filled
115
+ _PYTHON_FASTAPI = [] # PLACEHOLDER -- will be filled
116
+ _GIT_DEPLOY = [] # PLACEHOLDER -- will be filled
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # Main
121
+ # ---------------------------------------------------------------------------
122
+
123
+ def main():
124
+ parser = argparse.ArgumentParser(
125
+ description="Generate curated Kairo training data."
126
+ )
127
+ parser.add_argument(
128
+ "--output",
129
+ required=True,
130
+ help="Path to the output .jsonl file",
131
+ )
132
+ args = parser.parse_args()
133
+
134
+ os.makedirs(os.path.dirname(os.path.abspath(args.output)), exist_ok=True)
135
+
136
+ all_examples = []
137
+ for builder in ALL_CATEGORY_BUILDERS:
138
+ examples = builder()
139
+ all_examples.extend(examples)
140
+
141
+ with open(args.output, "w", encoding="utf-8") as fh:
142
+ for ex in all_examples:
143
+ fh.write(format_example(ex) + "\n")
144
+
145
+ # Print summary
146
+ cats = {}
147
+ for ex in all_examples:
148
+ cat = ex.get("category", "unknown")
149
+ cats[cat] = cats.get(cat, 0) + 1
150
+
151
+ print(f"Wrote {len(all_examples)} examples to {args.output}")
152
+ for cat, count in sorted(cats.items()):
153
+ print(f" {cat}: {count}")
154
+
155
+
156
+ if __name__ == "__main__":
157
+ main()