@comate/zulu 1.2.1-beta.2 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/comate-engine/assets/skills/auto-commit-comate/SKILL.md +260 -0
  2. package/comate-engine/assets/skills/auto-commit-comate/references/data_structures.md +189 -0
  3. package/comate-engine/assets/skills/auto-commit-comate/references/new_version_instruction.md +209 -0
  4. package/comate-engine/assets/skills/auto-commit-comate/references/old_version_instruction.md +208 -0
  5. package/comate-engine/assets/skills/auto-commit-comate/scripts/git_diff_cli.py +196 -0
  6. package/comate-engine/assets/skills/{smart-commit → auto-commit-comate}/scripts/git_utils.py +20 -10
  7. package/comate-engine/assets/skills/{smart-commit → auto-commit-comate}/scripts/icafe/client.py +69 -40
  8. package/comate-engine/assets/skills/{smart-commit → auto-commit-comate}/scripts/icafe/farseer.py +8 -9
  9. package/comate-engine/assets/skills/{smart-commit → auto-commit-comate}/scripts/icafe/matching.py +65 -9
  10. package/comate-engine/assets/skills/auto-commit-comate/scripts/match_card_cli.py +37 -0
  11. package/comate-engine/assets/skills/cnap-comate/SKILL.md +157 -0
  12. package/comate-engine/assets/skills/cnap-comate/references/cases.md +198 -0
  13. package/comate-engine/assets/skills/cnap-comate/references/deploy-troubleshoot.md +15 -0
  14. package/comate-engine/assets/skills/cnap-comate/references/install.md +43 -0
  15. package/comate-engine/assets/skills/cnap-comate/references/kubectl.md +55 -0
  16. package/comate-engine/assets/skills/cnap-comate/references/login.md +125 -0
  17. package/comate-engine/assets/skills/cnap-comate/references/oncall.md +24 -0
  18. package/comate-engine/assets/skills/cnap-comate/scripts/install_cnap_cli.sh +36 -0
  19. package/comate-engine/assets/skills/code-security/SKILL.md +176 -0
  20. package/comate-engine/assets/skills/code-security/references/credential_hosting.md +102 -0
  21. package/comate-engine/assets/skills/code-security/references/vul_repair_sensitive.md +219 -0
  22. package/comate-engine/assets/skills/code-security/scripts/build_repair_info.py +0 -0
  23. package/comate-engine/assets/skills/code-security/scripts/credential_hosting.py +99 -0
  24. package/comate-engine/assets/skills/code-security/scripts/credential_poll.py +350 -0
  25. package/comate-engine/assets/skills/code-security/scripts/http_client.py +173 -0
  26. package/comate-engine/assets/skills/code-security/scripts/parse_scan_result.py +301 -0
  27. package/comate-engine/assets/skills/code-security/scripts/repair_vulnerability.py +261 -0
  28. package/comate-engine/assets/skills/code-security/scripts/report_chat.py +198 -0
  29. package/comate-engine/assets/skills/code-security/scripts/scan_vulnerability.py +316 -0
  30. package/comate-engine/assets/skills/code-security-comate/SKILL.md +219 -0
  31. package/comate-engine/assets/skills/code-security-comate/references/credential_hosting.md +102 -0
  32. package/comate-engine/assets/skills/code-security-comate/references/vul_repair-go_sql_injection.md +399 -0
  33. package/comate-engine/assets/skills/code-security-comate/references/vul_repair-java_sql_injection.md +591 -0
  34. package/comate-engine/assets/skills/code-security-comate/references/vul_repair-php_sql_injection.md +318 -0
  35. package/comate-engine/assets/skills/code-security-comate/references/vul_repair-python_sql_injection.md +198 -0
  36. package/comate-engine/assets/skills/code-security-comate/references/vul_repair_sensitive.md +219 -0
  37. package/comate-engine/assets/skills/code-security-comate/scripts/credential_hosting.py +87 -0
  38. package/comate-engine/assets/skills/code-security-comate/scripts/credential_poll.py +345 -0
  39. package/comate-engine/assets/skills/code-security-comate/scripts/http_client.py +173 -0
  40. package/comate-engine/assets/skills/code-security-comate/scripts/parse_scan_result.py +392 -0
  41. package/comate-engine/assets/skills/code-security-comate/scripts/repair_vulnerability.py +245 -0
  42. package/comate-engine/assets/skills/code-security-comate/scripts/report_chat.py +145 -0
  43. package/comate-engine/assets/skills/code-security-comate/scripts/scan_vulnerability.py +444 -0
  44. package/comate-engine/assets/skills/code-security-comate/scripts/utils.py +153 -0
  45. package/comate-engine/assets/skills/comate-docs-comate/SKILL.md +148 -0
  46. package/comate-engine/assets/skills/comate-docs-comate/references/doc-map-extended.md +78 -0
  47. package/comate-engine/assets/skills/comate-docs-comate/references/models-and-billing.md +51 -0
  48. package/comate-engine/assets/skills/comate-docs-comate/references/product-overview.md +73 -0
  49. package/comate-engine/assets/skills/comate-docs-comate/references/query_content.md +83 -0
  50. package/comate-engine/assets/skills/comate-docs-comate/references/query_repo.md +57 -0
  51. package/comate-engine/assets/skills/comate-docs-comate/scripts/ku_operator.py +1575 -0
  52. package/comate-engine/assets/skills/create-image-comate/SKILL.md +278 -0
  53. package/comate-engine/assets/skills/create-skill-comate/SKILL.md +308 -217
  54. package/comate-engine/assets/skills/create-skill-comate/agents/analyzer.md +274 -0
  55. package/comate-engine/assets/skills/create-skill-comate/agents/comparator.md +202 -0
  56. package/comate-engine/assets/skills/create-skill-comate/agents/grader.md +223 -0
  57. package/comate-engine/assets/skills/create-skill-comate/assets/eval_review.html +146 -0
  58. package/comate-engine/assets/skills/create-skill-comate/eval-viewer/generate_review.py +489 -0
  59. package/comate-engine/assets/skills/create-skill-comate/eval-viewer/viewer.html +1325 -0
  60. package/comate-engine/assets/skills/create-skill-comate/references/schemas.md +430 -0
  61. package/comate-engine/assets/skills/create-skill-comate/scripts/__init__.py +0 -0
  62. package/comate-engine/assets/skills/create-skill-comate/scripts/__pycache__/__init__.cpython-311.pyc +0 -0
  63. package/comate-engine/assets/skills/create-skill-comate/scripts/__pycache__/aggregate_benchmark.cpython-311.pyc +0 -0
  64. package/comate-engine/assets/skills/create-skill-comate/scripts/aggregate_benchmark.py +412 -0
  65. package/comate-engine/assets/skills/create-skill-comate/scripts/generate_report.py +334 -0
  66. package/comate-engine/assets/skills/create-skill-comate/scripts/package_skill.py +140 -0
  67. package/comate-engine/assets/skills/create-skill-comate/scripts/utils.py +53 -0
  68. package/comate-engine/assets/skills/find-skills-comate/SKILL.md +15 -12
  69. package/comate-engine/assets/skills/find-skills-comate/scripts/fetch_skills.py +32 -3
  70. package/comate-engine/assets/skills/get-ugate-token-comate/SKILL.md +159 -0
  71. package/comate-engine/assets/skills/get-ugate-token-comate/getUgateToken.py +150 -0
  72. package/comate-engine/assets/skills/icafe-comate/SKILL.md +240 -0
  73. package/comate-engine/assets/skills/icafe-comate/references/ai-workflows.md +233 -0
  74. package/comate-engine/assets/skills/icafe-comate/references/commands.md +1147 -0
  75. package/comate-engine/assets/skills/icafe-comate/references/error-handling.md +164 -0
  76. package/comate-engine/assets/skills/icafe-comate/references/git-auto-bindcard-workflow.md +201 -0
  77. package/comate-engine/assets/skills/icafe-comate/references/git-bindcard-workflow.md +327 -0
  78. package/comate-engine/assets/skills/icafe-comate/references/iql-syntax.md +327 -0
  79. package/comate-engine/assets/skills/icafe-comate/references/platform-concepts.md +317 -0
  80. package/comate-engine/assets/skills/icafe-comate/references/smart-create-workflow.md +171 -0
  81. package/comate-engine/assets/skills/icafe-comate/references/smart-find-workflow.md +127 -0
  82. package/comate-engine/assets/skills/icafe-comate/references/smart-update-workflow.md +118 -0
  83. package/comate-engine/assets/skills/icode-comate/SKILL.md +366 -0
  84. package/comate-engine/assets/skills/icode-comate/references/api/add_reviewers.md +44 -0
  85. package/comate-engine/assets/skills/icode-comate/references/api/build_fetch_command.md +89 -0
  86. package/comate-engine/assets/skills/icode-comate/references/api/check_repo_permission.md +89 -0
  87. package/comate-engine/assets/skills/icode-comate/references/api/create_branch.md +79 -0
  88. package/comate-engine/assets/skills/icode-comate/references/api/create_draft_comment.md +109 -0
  89. package/comate-engine/assets/skills/icode-comate/references/api/get_ai_cr_result.md +190 -0
  90. package/comate-engine/assets/skills/icode-comate/references/api/get_ai_review.md +97 -0
  91. package/comate-engine/assets/skills/icode-comate/references/api/get_diff_content.md +92 -0
  92. package/comate-engine/assets/skills/icode-comate/references/api/get_diff_file.md +88 -0
  93. package/comate-engine/assets/skills/icode-comate/references/api/get_machine_check.md +73 -0
  94. package/comate-engine/assets/skills/icode-comate/references/api/get_my_reviews.md +115 -0
  95. package/comate-engine/assets/skills/icode-comate/references/api/get_person_commit.md +89 -0
  96. package/comate-engine/assets/skills/icode-comate/references/api/get_person_repo.md +63 -0
  97. package/comate-engine/assets/skills/icode-comate/references/api/get_repo_branch.md +62 -0
  98. package/comate-engine/assets/skills/icode-comate/references/api/get_repo_config.md +91 -0
  99. package/comate-engine/assets/skills/icode-comate/references/api/get_repo_members.md +118 -0
  100. package/comate-engine/assets/skills/icode-comate/references/api/get_repo_reviews.md +91 -0
  101. package/comate-engine/assets/skills/icode-comate/references/api/get_review_comments.md +87 -0
  102. package/comate-engine/assets/skills/icode-comate/references/api/get_review_info.md +81 -0
  103. package/comate-engine/assets/skills/icode-comate/references/api/get_submit_settings.md +105 -0
  104. package/comate-engine/assets/skills/icode-comate/references/api/icode-api.md +86 -0
  105. package/comate-engine/assets/skills/icode-comate/references/api/publish_comments.md +72 -0
  106. package/comate-engine/assets/skills/icode-comate/references/api/set_review_score.md +58 -0
  107. package/comate-engine/assets/skills/icode-comate/references/api/start_ai_review.md +77 -0
  108. package/comate-engine/assets/skills/icode-comate/references/api/submit_review.md +50 -0
  109. package/comate-engine/assets/skills/icode-comate/references/api/trigger_ai_cr.md +63 -0
  110. package/comate-engine/assets/skills/icode-comate/references/feature/add-reviewer.md +92 -0
  111. package/comate-engine/assets/skills/icode-comate/references/feature/fix-machine-check.md +144 -0
  112. package/comate-engine/assets/skills/icode-comate/references/feature/merge-cr.md +100 -0
  113. package/comate-engine/assets/skills/icode-comate/references/feature/ssh-setup.md +106 -0
  114. package/comate-engine/assets/skills/icode-comate/references/feature/submit-acr.md +135 -0
  115. package/comate-engine/assets/skills/icode-comate/references/feature/submit-cr.md +123 -0
  116. package/comate-engine/assets/skills/icode-comate/references/git/clone.md +67 -0
  117. package/comate-engine/assets/skills/icode-comate/references/git/icode-git.md +68 -0
  118. package/comate-engine/assets/skills/icode-comate/references/git/push.md +64 -0
  119. package/comate-engine/assets/skills/icode-comate/references/git/push_cr.md +103 -0
  120. package/comate-engine/assets/skills/icode-comate/references/install.md +144 -0
  121. package/comate-engine/assets/skills/icode-comate/references/login.md +111 -0
  122. package/comate-engine/assets/skills/icode-comate/scripts/add-reviewer.sh +154 -0
  123. package/comate-engine/assets/skills/icode-comate/scripts/common.sh +145 -0
  124. package/comate-engine/assets/skills/icode-comate/scripts/fix-machine-check.sh +131 -0
  125. package/comate-engine/assets/skills/icode-comate/scripts/merge-cr.sh +105 -0
  126. package/comate-engine/assets/skills/icode-comate/scripts/ssh-setup.sh +159 -0
  127. package/comate-engine/assets/skills/icode-comate/scripts/submit-acr.sh +236 -0
  128. package/comate-engine/assets/skills/icode-comate/scripts/submit-cr.sh +104 -0
  129. package/comate-engine/assets/skills/icode-comate/scripts/test-preflight.sh +89 -0
  130. package/comate-engine/assets/skills/ku-operator-comate/SKILL.md +121 -0
  131. package/comate-engine/assets/skills/ku-operator-comate/examples.md +190 -0
  132. package/comate-engine/assets/skills/ku-operator-comate/references/add_member.md +49 -0
  133. package/comate-engine/assets/skills/ku-operator-comate/references/change_scope.md +38 -0
  134. package/comate-engine/assets/skills/ku-operator-comate/references/copy_doc.md +50 -0
  135. package/comate-engine/assets/skills/ku-operator-comate/references/create_doc.md +61 -0
  136. package/comate-engine/assets/skills/ku-operator-comate/references/delete_doc.md +31 -0
  137. package/comate-engine/assets/skills/ku-operator-comate/references/edit_content.md +568 -0
  138. package/comate-engine/assets/skills/ku-operator-comate/references/move_doc.md +45 -0
  139. package/comate-engine/assets/skills/ku-operator-comate/references/query_comment.md +79 -0
  140. package/comate-engine/assets/skills/ku-operator-comate/references/query_content.md +83 -0
  141. package/comate-engine/assets/skills/ku-operator-comate/references/query_flowchart.md +84 -0
  142. package/comate-engine/assets/skills/ku-operator-comate/references/query_permission.md +38 -0
  143. package/comate-engine/assets/skills/ku-operator-comate/references/query_recent_view.md +67 -0
  144. package/comate-engine/assets/skills/ku-operator-comate/references/query_repo.md +57 -0
  145. package/comate-engine/assets/skills/ku-operator-comate/references/query_user_info.md +37 -0
  146. package/comate-engine/assets/skills/ku-operator-comate/references/update_member.md +41 -0
  147. package/comate-engine/assets/skills/ku-operator-comate/references/upload_attachment.md +52 -0
  148. package/comate-engine/assets/skills/ku-operator-comate/scripts/ku_operator.py +1575 -0
  149. package/comate-engine/node_modules/better-sqlite3/node_modules/.bin/prebuild-install +2 -2
  150. package/comate-engine/node_modules/tree-sitter-bash/node_modules/.bin/node-gyp-build +2 -2
  151. package/comate-engine/node_modules/tree-sitter-bash/node_modules/.bin/node-gyp-build-optional +2 -2
  152. package/comate-engine/node_modules/tree-sitter-bash/node_modules/.bin/node-gyp-build-test +2 -2
  153. package/comate-engine/package.json +2 -0
  154. package/comate-engine/server.js +170 -46
  155. package/dist/bundle/index.js +8 -8
  156. package/package.json +1 -1
  157. package/comate-engine/assets/skills/figma2code-comate/codeConnect.md +0 -37
  158. package/comate-engine/assets/skills/figma2code-comate/designToken.md +0 -3
  159. package/comate-engine/assets/skills/figma2code-comate/f2cMcp.md +0 -59
  160. package/comate-engine/assets/skills/smart-commit/SKILL.md +0 -646
  161. package/comate-engine/node_modules/@comate/plugin-host/dist/index-AZIho4HV.js +0 -1
  162. package/comate-engine/node_modules/@comate/plugin-host/dist/user-BIpzRUfb.js +0 -44
  163. package/comate-engine/node_modules/better-sqlite3/build/Release/better_sqlite3.node +0 -0
  164. /package/comate-engine/assets/skills/{smart-commit → auto-commit-comate}/references/issue_type_mapping.json +0 -0
  165. /package/comate-engine/assets/skills/{smart-commit → auto-commit-comate}/references/query_reference.md +0 -0
  166. /package/comate-engine/assets/skills/{smart-commit → auto-commit-comate}/scripts/compat.py +0 -0
  167. /package/comate-engine/assets/skills/{smart-commit → auto-commit-comate}/scripts/create_card_cli.py +0 -0
  168. /package/comate-engine/assets/skills/{smart-commit → auto-commit-comate}/scripts/icafe/__init__.py +0 -0
  169. /package/comate-engine/assets/skills/{smart-commit → auto-commit-comate}/scripts/logger.py +0 -0
  170. /package/comate-engine/assets/skills/{smart-commit → auto-commit-comate}/scripts/recognize_card_cli.py +0 -0
@@ -0,0 +1,146 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Eval Set Review - __SKILL_NAME_PLACEHOLDER__</title>
7
+ <link rel="preconnect" href="https://fonts.googleapis.com">
8
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
9
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@500;600&family=Lora:wght@400;500&display=swap" rel="stylesheet">
10
+ <style>
11
+ * { box-sizing: border-box; margin: 0; padding: 0; }
12
+ body { font-family: 'Lora', Georgia, serif; background: #faf9f5; padding: 2rem; color: #141413; }
13
+ h1 { font-family: 'Poppins', sans-serif; margin-bottom: 0.5rem; font-size: 1.5rem; }
14
+ .description { color: #b0aea5; margin-bottom: 1.5rem; font-style: italic; max-width: 900px; }
15
+ .controls { margin-bottom: 1rem; display: flex; gap: 0.5rem; }
16
+ .btn { font-family: 'Poppins', sans-serif; padding: 0.5rem 1rem; border: none; border-radius: 6px; cursor: pointer; font-size: 0.875rem; font-weight: 500; }
17
+ .btn-add { background: #6a9bcc; color: white; }
18
+ .btn-add:hover { background: #5889b8; }
19
+ .btn-export { background: #d97757; color: white; }
20
+ .btn-export:hover { background: #c4613f; }
21
+ table { width: 100%; max-width: 1100px; border-collapse: collapse; background: white; border-radius: 6px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.08); }
22
+ th { font-family: 'Poppins', sans-serif; background: #141413; color: #faf9f5; padding: 0.75rem 1rem; text-align: left; font-size: 0.875rem; }
23
+ td { padding: 0.75rem 1rem; border-bottom: 1px solid #e8e6dc; vertical-align: top; }
24
+ tr:nth-child(even) td { background: #faf9f5; }
25
+ tr:hover td { background: #f3f1ea; }
26
+ .section-header td { background: #e8e6dc; font-family: 'Poppins', sans-serif; font-weight: 500; font-size: 0.8rem; color: #141413; text-transform: uppercase; letter-spacing: 0.05em; }
27
+ .query-input { width: 100%; padding: 0.4rem; border: 1px solid #e8e6dc; border-radius: 4px; font-size: 0.875rem; font-family: 'Lora', Georgia, serif; resize: vertical; min-height: 60px; }
28
+ .query-input:focus { outline: none; border-color: #d97757; box-shadow: 0 0 0 2px rgba(217,119,87,0.15); }
29
+ .toggle { position: relative; display: inline-block; width: 44px; height: 24px; }
30
+ .toggle input { opacity: 0; width: 0; height: 0; }
31
+ .toggle .slider { position: absolute; inset: 0; background: #b0aea5; border-radius: 24px; cursor: pointer; transition: 0.2s; }
32
+ .toggle .slider::before { content: ""; position: absolute; width: 18px; height: 18px; left: 3px; bottom: 3px; background: white; border-radius: 50%; transition: 0.2s; }
33
+ .toggle input:checked + .slider { background: #d97757; }
34
+ .toggle input:checked + .slider::before { transform: translateX(20px); }
35
+ .btn-delete { background: #c44; color: white; padding: 0.3rem 0.6rem; border: none; border-radius: 4px; cursor: pointer; font-size: 0.75rem; font-family: 'Poppins', sans-serif; }
36
+ .btn-delete:hover { background: #a33; }
37
+ .summary { margin-top: 1rem; color: #b0aea5; font-size: 0.875rem; }
38
+ </style>
39
+ </head>
40
+ <body>
41
+ <h1>Eval Set Review: <span id="skill-name">__SKILL_NAME_PLACEHOLDER__</span></h1>
42
+ <p class="description">Current description: <span id="skill-desc">__SKILL_DESCRIPTION_PLACEHOLDER__</span></p>
43
+
44
+ <div class="controls">
45
+ <button class="btn btn-add" onclick="addRow()">+ Add Query</button>
46
+ <button class="btn btn-export" onclick="exportEvalSet()">Export Eval Set</button>
47
+ </div>
48
+
49
+ <table>
50
+ <thead>
51
+ <tr>
52
+ <th style="width:65%">Query</th>
53
+ <th style="width:18%">Should Trigger</th>
54
+ <th style="width:10%">Actions</th>
55
+ </tr>
56
+ </thead>
57
+ <tbody id="eval-body"></tbody>
58
+ </table>
59
+
60
+ <p class="summary" id="summary"></p>
61
+
62
+ <script>
63
+ const EVAL_DATA = __EVAL_DATA_PLACEHOLDER__;
64
+
65
+ let evalItems = [...EVAL_DATA];
66
+
67
+ function render() {
68
+ const tbody = document.getElementById('eval-body');
69
+ tbody.innerHTML = '';
70
+
71
+ // Sort: should-trigger first, then should-not-trigger
72
+ const sorted = evalItems
73
+ .map((item, origIdx) => ({ ...item, origIdx }))
74
+ .sort((a, b) => (b.should_trigger ? 1 : 0) - (a.should_trigger ? 1 : 0));
75
+
76
+ let lastGroup = null;
77
+ sorted.forEach(item => {
78
+ const group = item.should_trigger ? 'trigger' : 'no-trigger';
79
+ if (group !== lastGroup) {
80
+ const headerRow = document.createElement('tr');
81
+ headerRow.className = 'section-header';
82
+ headerRow.innerHTML = `<td colspan="3">${item.should_trigger ? 'Should Trigger' : 'Should NOT Trigger'}</td>`;
83
+ tbody.appendChild(headerRow);
84
+ lastGroup = group;
85
+ }
86
+
87
+ const idx = item.origIdx;
88
+ const tr = document.createElement('tr');
89
+ tr.innerHTML = `
90
+ <td><textarea class="query-input" onchange="updateQuery(${idx}, this.value)">${escapeHtml(item.query)}</textarea></td>
91
+ <td>
92
+ <label class="toggle">
93
+ <input type="checkbox" ${item.should_trigger ? 'checked' : ''} onchange="updateTrigger(${idx}, this.checked)">
94
+ <span class="slider"></span>
95
+ </label>
96
+ <span style="margin-left:8px;font-size:0.8rem;color:#b0aea5">${item.should_trigger ? 'Yes' : 'No'}</span>
97
+ </td>
98
+ <td><button class="btn-delete" onclick="deleteRow(${idx})">Delete</button></td>
99
+ `;
100
+ tbody.appendChild(tr);
101
+ });
102
+ updateSummary();
103
+ }
104
+
105
+ function escapeHtml(text) {
106
+ const div = document.createElement('div');
107
+ div.textContent = text;
108
+ return div.innerHTML;
109
+ }
110
+
111
+ function updateQuery(idx, value) { evalItems[idx].query = value; updateSummary(); }
112
+ function updateTrigger(idx, value) { evalItems[idx].should_trigger = value; render(); }
113
+ function deleteRow(idx) { evalItems.splice(idx, 1); render(); }
114
+
115
+ function addRow() {
116
+ evalItems.push({ query: '', should_trigger: true });
117
+ render();
118
+ const inputs = document.querySelectorAll('.query-input');
119
+ inputs[inputs.length - 1].focus();
120
+ }
121
+
122
+ function updateSummary() {
123
+ const trigger = evalItems.filter(i => i.should_trigger).length;
124
+ const noTrigger = evalItems.filter(i => !i.should_trigger).length;
125
+ document.getElementById('summary').textContent =
126
+ `${evalItems.length} queries total: ${trigger} should trigger, ${noTrigger} should not trigger`;
127
+ }
128
+
129
+ function exportEvalSet() {
130
+ const valid = evalItems.filter(i => i.query.trim() !== '');
131
+ const data = valid.map(i => ({ query: i.query.trim(), should_trigger: i.should_trigger }));
132
+ const blob = new Blob([JSON.stringify(data, null, 2)], { type: 'application/json' });
133
+ const url = URL.createObjectURL(blob);
134
+ const a = document.createElement('a');
135
+ a.href = url;
136
+ a.download = 'eval_set.json';
137
+ document.body.appendChild(a);
138
+ a.click();
139
+ document.body.removeChild(a);
140
+ URL.revokeObjectURL(url);
141
+ }
142
+
143
+ render();
144
+ </script>
145
+ </body>
146
+ </html>
@@ -0,0 +1,489 @@
1
+ #!/usr/bin/env python3
2
+ """Generate and serve a review page for eval results.
3
+
4
+ Reads the workspace directory, discovers runs (directories with outputs/),
5
+ embeds all output data into a self-contained HTML page, and serves it via
6
+ a tiny HTTP server. Feedback auto-saves to feedback.json in the workspace.
7
+
8
+ Usage:
9
+ python generate_review.py <workspace-path> [--port PORT] [--skill-name NAME]
10
+ python generate_review.py <workspace-path> --previous-feedback /path/to/old/feedback.json
11
+
12
+ No dependencies beyond the Python stdlib are required.
13
+ """
14
+
15
+ import argparse
16
+ import base64
17
+ import json
18
+ import mimetypes
19
+ import os
20
+ import re
21
+ import signal
22
+ import subprocess
23
+ import sys
24
+ import time
25
+ import webbrowser
26
+ from functools import partial
27
+ from http.server import HTTPServer, BaseHTTPRequestHandler
28
+ from pathlib import Path
29
+
30
+ # Files to exclude from output listings
31
+ METADATA_FILES = {"transcript.md", "user_notes.md", "metrics.json"}
32
+
33
+ # Extensions we render as inline text
34
+ TEXT_EXTENSIONS = {
35
+ ".txt", ".md", ".json", ".csv", ".py", ".js", ".ts", ".tsx", ".jsx",
36
+ ".yaml", ".yml", ".xml", ".html", ".css", ".sh", ".rb", ".go", ".rs",
37
+ ".java", ".c", ".cpp", ".h", ".hpp", ".sql", ".r", ".toml",
38
+ }
39
+
40
+ # Extensions we render as inline images
41
+ IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"}
42
+
43
+ # MIME type overrides for common types
44
+ MIME_OVERRIDES = {
45
+ ".svg": "image/svg+xml",
46
+ ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
47
+ ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
48
+ ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
49
+ }
50
+
51
+
52
+ def get_mime_type(path: Path) -> str:
53
+ """
54
+ Get the MIME type for a file.
55
+ :param path:
56
+ :return:
57
+ """
58
+ ext = path.suffix.lower()
59
+ if ext in MIME_OVERRIDES:
60
+ return MIME_OVERRIDES[ext]
61
+ mime, _ = mimetypes.guess_type(str(path))
62
+ return mime or "application/octet-stream"
63
+
64
+
65
+ def find_runs(workspace: Path) -> list[dict]:
66
+ """Recursively find directories that contain an outputs/ subdirectory."""
67
+ runs: list[dict] = []
68
+ _find_runs_recursive(workspace, workspace, runs)
69
+ runs.sort(key=lambda r: (r.get("eval_id", float("inf")), r["id"]))
70
+ return runs
71
+
72
+
73
+ def _find_runs_recursive(root: Path, current: Path, runs: list[dict]) -> None:
74
+ if not current.is_dir():
75
+ return
76
+
77
+ outputs_dir = current / "outputs"
78
+ if outputs_dir.is_dir():
79
+ run = build_run(root, current)
80
+ if run:
81
+ runs.append(run)
82
+ return
83
+
84
+ skip = {"node_modules", ".git", "__pycache__", "skill", "inputs"}
85
+ for child in sorted(current.iterdir()):
86
+ if child.is_dir() and child.name not in skip:
87
+ _find_runs_recursive(root, child, runs)
88
+
89
+
90
+ def build_run(root: Path, run_dir: Path) -> dict | None:
91
+ """Build a run dict with prompt, outputs, and grading data."""
92
+ prompt = ""
93
+ eval_id = None
94
+
95
+ # Try eval_metadata.json
96
+ for candidate in [run_dir / "eval_metadata.json", run_dir.parent / "eval_metadata.json"]:
97
+ if candidate.exists():
98
+ try:
99
+ metadata = json.loads(candidate.read_text())
100
+ prompt = metadata.get("prompt", "")
101
+ eval_id = metadata.get("eval_id")
102
+ except (json.JSONDecodeError, OSError):
103
+ pass
104
+ if prompt:
105
+ break
106
+
107
+ # Fall back to transcript.md
108
+ if not prompt:
109
+ for candidate in [run_dir / "transcript.md", run_dir / "outputs" / "transcript.md"]:
110
+ if candidate.exists():
111
+ try:
112
+ text = candidate.read_text()
113
+ match = re.search(r"## Eval Prompt\n\n([\s\S]*?)(?=\n##|$)", text)
114
+ if match:
115
+ prompt = match.group(1).strip()
116
+ except OSError:
117
+ pass
118
+ if prompt:
119
+ break
120
+
121
+ if not prompt:
122
+ prompt = "(No prompt found)"
123
+
124
+ run_id = str(run_dir.relative_to(root)).replace("/", "-").replace("\\", "-")
125
+
126
+ # Collect output files
127
+ outputs_dir = run_dir / "outputs"
128
+ output_files: list[dict] = []
129
+ if outputs_dir.is_dir():
130
+ for f in sorted(outputs_dir.iterdir()):
131
+ if f.is_file() and f.name not in METADATA_FILES:
132
+ output_files.append(embed_file(f))
133
+
134
+ # Load grading if present
135
+ grading = None
136
+ for candidate in [run_dir / "grading.json", run_dir.parent / "grading.json"]:
137
+ if candidate.exists():
138
+ try:
139
+ grading = json.loads(candidate.read_text())
140
+ except (json.JSONDecodeError, OSError):
141
+ pass
142
+ if grading:
143
+ break
144
+
145
+ return {
146
+ "id": run_id,
147
+ "prompt": prompt,
148
+ "eval_id": eval_id,
149
+ "outputs": output_files,
150
+ "grading": grading,
151
+ }
152
+
153
+
154
+ def embed_file(path: Path) -> dict:
155
+ """Read a file and return an embedded representation."""
156
+ ext = path.suffix.lower()
157
+ mime = get_mime_type(path)
158
+
159
+ if ext in TEXT_EXTENSIONS:
160
+ try:
161
+ content = path.read_text(errors="replace")
162
+ except OSError:
163
+ content = "(Error reading file)"
164
+ return {
165
+ "name": path.name,
166
+ "type": "text",
167
+ "content": content,
168
+ }
169
+ elif ext in IMAGE_EXTENSIONS:
170
+ try:
171
+ raw = path.read_bytes()
172
+ b64 = base64.b64encode(raw).decode("ascii")
173
+ except OSError:
174
+ return {"name": path.name, "type": "error", "content": "(Error reading file)"}
175
+ return {
176
+ "name": path.name,
177
+ "type": "image",
178
+ "mime": mime,
179
+ "data_uri": f"data:{mime};base64,{b64}",
180
+ }
181
+ elif ext == ".pdf":
182
+ try:
183
+ raw = path.read_bytes()
184
+ b64 = base64.b64encode(raw).decode("ascii")
185
+ except OSError:
186
+ return {"name": path.name, "type": "error", "content": "(Error reading file)"}
187
+ return {
188
+ "name": path.name,
189
+ "type": "pdf",
190
+ "data_uri": f"data:{mime};base64,{b64}",
191
+ }
192
+ elif ext == ".xlsx":
193
+ try:
194
+ raw = path.read_bytes()
195
+ b64 = base64.b64encode(raw).decode("ascii")
196
+ except OSError:
197
+ return {"name": path.name, "type": "error", "content": "(Error reading file)"}
198
+ return {
199
+ "name": path.name,
200
+ "type": "xlsx",
201
+ "data_b64": b64,
202
+ }
203
+ else:
204
+ # Binary / unknown — base64 download link
205
+ try:
206
+ raw = path.read_bytes()
207
+ b64 = base64.b64encode(raw).decode("ascii")
208
+ except OSError:
209
+ return {"name": path.name, "type": "error", "content": "(Error reading file)"}
210
+ return {
211
+ "name": path.name,
212
+ "type": "binary",
213
+ "mime": mime,
214
+ "data_uri": f"data:{mime};base64,{b64}",
215
+ }
216
+
217
+
218
+ def load_previous_iteration(workspace: Path) -> dict[str, dict]:
219
+ """Load previous iteration's feedback and outputs.
220
+
221
+ Returns a map of run_id -> {"feedback": str, "outputs": list[dict]}.
222
+ """
223
+ result: dict[str, dict] = {}
224
+
225
+ # Load feedback
226
+ feedback_map: dict[str, str] = {}
227
+ feedback_path = workspace / "feedback.json"
228
+ if feedback_path.exists():
229
+ try:
230
+ data = json.loads(feedback_path.read_text())
231
+ feedback_map = {
232
+ r["run_id"]: r["feedback"]
233
+ for r in data.get("reviews", [])
234
+ if r.get("feedback", "").strip()
235
+ }
236
+ except (json.JSONDecodeError, OSError, KeyError):
237
+ pass
238
+
239
+ # Load runs (to get outputs)
240
+ prev_runs = find_runs(workspace)
241
+ for run in prev_runs:
242
+ result[run["id"]] = {
243
+ "feedback": feedback_map.get(run["id"], ""),
244
+ "outputs": run.get("outputs", []),
245
+ }
246
+
247
+ # Also add feedback for run_ids that had feedback but no matching run
248
+ for run_id, fb in feedback_map.items():
249
+ if run_id not in result:
250
+ result[run_id] = {"feedback": fb, "outputs": []}
251
+
252
+ return result
253
+
254
+
255
+ def generate_html(
256
+ runs: list[dict],
257
+ skill_name: str,
258
+ previous: dict[str, dict] | None = None,
259
+ benchmark: dict | None = None,
260
+ ) -> str:
261
+ """Generate the complete standalone HTML page with embedded data."""
262
+ template_path = Path(__file__).parent / "viewer.html"
263
+ template = template_path.read_text()
264
+
265
+ # Build previous_feedback and previous_outputs maps for the template
266
+ previous_feedback: dict[str, str] = {}
267
+ previous_outputs: dict[str, list[dict]] = {}
268
+ if previous:
269
+ for run_id, data in previous.items():
270
+ if data.get("feedback"):
271
+ previous_feedback[run_id] = data["feedback"]
272
+ if data.get("outputs"):
273
+ previous_outputs[run_id] = data["outputs"]
274
+
275
+ embedded = {
276
+ "skill_name": skill_name,
277
+ "runs": runs,
278
+ "previous_feedback": previous_feedback,
279
+ "previous_outputs": previous_outputs,
280
+ }
281
+ if benchmark:
282
+ embedded["benchmark"] = benchmark
283
+
284
+ data_json = json.dumps(embedded)
285
+
286
+ return template.replace("/*__EMBEDDED_DATA__*/", f"const EMBEDDED_DATA = {data_json};")
287
+
288
+
289
+ # ---------------------------------------------------------------------------
290
+ # HTTP server (stdlib only, zero dependencies)
291
+ # ---------------------------------------------------------------------------
292
+
293
+ def _kill_port(port: int) -> None:
294
+ """Kill any process listening on the given port."""
295
+ try:
296
+ result = subprocess.run(
297
+ ["lsof", "-ti", f":{port}"],
298
+ capture_output=True, text=True, timeout=5,
299
+ )
300
+ for pid_str in result.stdout.strip().split("\n"):
301
+ if pid_str.strip():
302
+ try:
303
+ os.kill(int(pid_str.strip()), signal.SIGTERM)
304
+ except (ProcessLookupError, ValueError):
305
+ pass
306
+ if result.stdout.strip():
307
+ time.sleep(0.5)
308
+ except subprocess.TimeoutExpired:
309
+ pass
310
+ except FileNotFoundError:
311
+ print("Note: lsof not found, cannot check if port is in use", file=sys.stderr)
312
+
313
+
314
+ class ReviewHandler(BaseHTTPRequestHandler):
315
+ """Serves the review HTML and handles feedback saves.
316
+
317
+ Regenerates the HTML on each page load so that refreshing the browser
318
+ picks up new eval outputs without restarting the server.
319
+ """
320
+
321
+ def __init__(
322
+ self,
323
+ workspace: Path,
324
+ skill_name: str,
325
+ feedback_path: Path,
326
+ previous: dict[str, dict],
327
+ benchmark_path: Path | None,
328
+ *args,
329
+ **kwargs,
330
+ ):
331
+ self.workspace = workspace
332
+ self.skill_name = skill_name
333
+ self.feedback_path = feedback_path
334
+ self.previous = previous
335
+ self.benchmark_path = benchmark_path
336
+ super().__init__(*args, **kwargs)
337
+
338
+ def do_GET(self) -> None:
339
+ """
340
+ Handle GET requests to the server.
341
+ :return:
342
+ """
343
+ if self.path == "/" or self.path == "/index.html":
344
+ # Regenerate HTML on each request (re-scans workspace for new outputs)
345
+ runs = find_runs(self.workspace)
346
+ benchmark = None
347
+ if self.benchmark_path and self.benchmark_path.exists():
348
+ try:
349
+ benchmark = json.loads(self.benchmark_path.read_text())
350
+ except (json.JSONDecodeError, OSError):
351
+ pass
352
+ html = generate_html(runs, self.skill_name, self.previous, benchmark)
353
+ content = html.encode("utf-8")
354
+ self.send_response(200)
355
+ self.send_header("Content-Type", "text/html; charset=utf-8")
356
+ self.send_header("Content-Length", str(len(content)))
357
+ self.end_headers()
358
+ self.wfile.write(content)
359
+ elif self.path == "/api/feedback":
360
+ data = b"{}"
361
+ if self.feedback_path.exists():
362
+ data = self.feedback_path.read_bytes()
363
+ self.send_response(200)
364
+ self.send_header("Content-Type", "application/json")
365
+ self.send_header("Content-Length", str(len(data)))
366
+ self.end_headers()
367
+ self.wfile.write(data)
368
+ else:
369
+ self.send_error(404)
370
+
371
+ def do_POST(self) -> None:
372
+ """
373
+ Handle POST requests to the server.
374
+ :return:
375
+ """
376
+ if self.path == "/api/feedback":
377
+ length = int(self.headers.get("Content-Length", 0))
378
+ body = self.rfile.read(length)
379
+ try:
380
+ data = json.loads(body)
381
+ if not isinstance(data, dict) or "reviews" not in data:
382
+ raise ValueError("Expected JSON object with 'reviews' key")
383
+ self.feedback_path.write_text(json.dumps(data, indent=2) + "\n")
384
+ resp = b'{"ok":true}'
385
+ self.send_response(200)
386
+ except (json.JSONDecodeError, OSError, ValueError) as e:
387
+ resp = json.dumps({"error": str(e)}).encode()
388
+ self.send_response(500)
389
+ self.send_header("Content-Type", "application/json")
390
+ self.send_header("Content-Length", str(len(resp)))
391
+ self.end_headers()
392
+ self.wfile.write(resp)
393
+ else:
394
+ self.send_error(404)
395
+
396
+ def log_message(self, format: str, *args: object) -> None:
397
+ # Suppress request logging to keep terminal clean
398
+ pass
399
+
400
+
401
+ def main() -> None:
402
+ """
403
+ Main entry point for the script.
404
+ :return:
405
+ """
406
+ parser = argparse.ArgumentParser(description="Generate and serve eval review")
407
+ parser.add_argument("workspace", type=Path, help="Path to workspace directory")
408
+ parser.add_argument("--port", "-p", type=int, default=3117, help="Server port (default: 3117)")
409
+ parser.add_argument("--skill-name", "-n", type=str, default=None, help="Skill name for header")
410
+ parser.add_argument(
411
+ "--previous-workspace", type=Path, default=None,
412
+ help="Path to previous iteration's workspace (shows old outputs and feedback as context)",
413
+ )
414
+ parser.add_argument(
415
+ "--benchmark", type=Path, default=None,
416
+ help="Path to benchmark.json to show in the Benchmark tab",
417
+ )
418
+ parser.add_argument(
419
+ "--static", "-s", type=Path, default=None,
420
+ help="Write standalone HTML to this path instead of starting a server",
421
+ )
422
+ args = parser.parse_args()
423
+
424
+ workspace = args.workspace.resolve()
425
+ if not workspace.is_dir():
426
+ print(f"Error: {workspace} is not a directory", file=sys.stderr)
427
+ sys.exit(1)
428
+
429
+ runs = find_runs(workspace)
430
+ if not runs:
431
+ print(f"No runs found in {workspace}", file=sys.stderr)
432
+ sys.exit(1)
433
+
434
+ skill_name = args.skill_name or workspace.name.replace("-workspace", "")
435
+ feedback_path = workspace / "feedback.json"
436
+
437
+ previous: dict[str, dict] = {}
438
+ if args.previous_workspace:
439
+ previous = load_previous_iteration(args.previous_workspace.resolve())
440
+
441
+ benchmark_path = args.benchmark.resolve() if args.benchmark else None
442
+ benchmark = None
443
+ if benchmark_path and benchmark_path.exists():
444
+ try:
445
+ benchmark = json.loads(benchmark_path.read_text())
446
+ except (json.JSONDecodeError, OSError):
447
+ pass
448
+
449
+ if args.static:
450
+ html = generate_html(runs, skill_name, previous, benchmark)
451
+ args.static.parent.mkdir(parents=True, exist_ok=True)
452
+ args.static.write_text(html)
453
+ print(f"\n Static viewer written to: {args.static}\n")
454
+ sys.exit(0)
455
+
456
+ # Kill any existing process on the target port
457
+ port = args.port
458
+ _kill_port(port)
459
+ handler = partial(ReviewHandler, workspace, skill_name, feedback_path, previous, benchmark_path)
460
+ try:
461
+ server = HTTPServer(("127.0.0.1", port), handler)
462
+ except OSError:
463
+ # Port still in use after kill attempt — find a free one
464
+ server = HTTPServer(("127.0.0.1", 0), handler)
465
+ port = server.server_address[1]
466
+
467
+ url = f"http://localhost:{port}"
468
+ print(f"\n Eval Viewer")
469
+ print(f" ─────────────────────────────────")
470
+ print(f" URL: {url}")
471
+ print(f" Workspace: {workspace}")
472
+ print(f" Feedback: {feedback_path}")
473
+ if previous:
474
+ print(f" Previous: {args.previous_workspace} ({len(previous)} runs)")
475
+ if benchmark_path:
476
+ print(f" Benchmark: {benchmark_path}")
477
+ print(f"\n Press Ctrl+C to stop.\n")
478
+
479
+ webbrowser.open(url)
480
+
481
+ try:
482
+ server.serve_forever()
483
+ except KeyboardInterrupt:
484
+ print("\nStopped.")
485
+ server.server_close()
486
+
487
+
488
+ if __name__ == "__main__":
489
+ main()