@devrev-computer/skills 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/README.md +37 -0
  2. package/bin/install.mjs +158 -0
  3. package/package.json +33 -0
  4. package/skills/account-evaluation/account-evaluation.md +64 -0
  5. package/skills/account-research/account-research.md +323 -0
  6. package/skills/account-research/references/signals-guide.md +52 -0
  7. package/skills/create-workflow-template/create-workflow-template.md +1091 -0
  8. package/skills/create-workflow-template/examples/3592-Generate rca from pia-template.json +1 -0
  9. package/skills/create-workflow-template/examples/4392-Async opportunity review agent-template.json +1 -0
  10. package/skills/create-workflow-template/examples/4441-Ticket escalator from customer message-template.json +1 -0
  11. package/skills/create-workflow-template/examples/4505-Auto-update issue tcd as end of sprint date-template.json +1 -0
  12. package/skills/create-workflow-template/examples/5040-Devrevu - enablement journey - poc emails-template.json +1 -0
  13. package/skills/create-workflow-template/examples/5158-Devrevu - enablement journey - mailing for non enablement journey users-template.json +1 -0
  14. package/skills/create-workflow-template/examples/5216-Account segment missing notification-template.json +1 -0
  15. package/skills/create-workflow-template/examples/working-csat-score-on-ticket-resolved.json +1 -0
  16. package/skills/create-workflow-template/examples/working-enhancement-replace-agent.json +1 -0
  17. package/skills/create-workflow-template/examples/working-invoke-code-sample.json +1 -0
  18. package/skills/create-workflow-template/examples/working-loop-variable-sample.json +1 -0
  19. package/skills/create-workflow-template/operations/actions.md +2919 -0
  20. package/skills/create-workflow-template/operations/blockings.md +38 -0
  21. package/skills/create-workflow-template/operations/controls.md +108 -0
  22. package/skills/create-workflow-template/operations/schema-index.md +166 -0
  23. package/skills/create-workflow-template/operations/schemas/account_created.md +58 -0
  24. package/skills/create-workflow-template/operations/schemas/account_updated.md +73 -0
  25. package/skills/create-workflow-template/operations/schemas/add_comment.md +29 -0
  26. package/skills/create-workflow-template/operations/schemas/airdrop_sync_run_started.md +33 -0
  27. package/skills/create-workflow-template/operations/schemas/airdrop_sync_run_status_updated.md +35 -0
  28. package/skills/create-workflow-template/operations/schemas/article_created.md +96 -0
  29. package/skills/create-workflow-template/operations/schemas/article_updated.md +135 -0
  30. package/skills/create-workflow-template/operations/schemas/ask_ai.md +11 -0
  31. package/skills/create-workflow-template/operations/schemas/classify_object.md +22 -0
  32. package/skills/create-workflow-template/operations/schemas/contact_created.md +43 -0
  33. package/skills/create-workflow-template/operations/schemas/contact_updated.md +65 -0
  34. package/skills/create-workflow-template/operations/schemas/conversation_created.md +108 -0
  35. package/skills/create-workflow-template/operations/schemas/conversation_sla_tracker_updated.md +46 -0
  36. package/skills/create-workflow-template/operations/schemas/conversation_updated.md +130 -0
  37. package/skills/create-workflow-template/operations/schemas/convert_conversation_to_ticket.md +13 -0
  38. package/skills/create-workflow-template/operations/schemas/create_account.md +62 -0
  39. package/skills/create-workflow-template/operations/schemas/create_article.md +79 -0
  40. package/skills/create-workflow-template/operations/schemas/create_brand.md +42 -0
  41. package/skills/create-workflow-template/operations/schemas/create_contact.md +65 -0
  42. package/skills/create-workflow-template/operations/schemas/create_dm.md +53 -0
  43. package/skills/create-workflow-template/operations/schemas/create_enhancement.md +63 -0
  44. package/skills/create-workflow-template/operations/schemas/create_incident.md +136 -0
  45. package/skills/create-workflow-template/operations/schemas/create_issue.md +150 -0
  46. package/skills/create-workflow-template/operations/schemas/create_meeting.md +105 -0
  47. package/skills/create-workflow-template/operations/schemas/create_opportunity.md +123 -0
  48. package/skills/create-workflow-template/operations/schemas/create_ticket.md +184 -0
  49. package/skills/create-workflow-template/operations/schemas/csat_response_received.md +73 -0
  50. package/skills/create-workflow-template/operations/schemas/dev_user_created.md +54 -0
  51. package/skills/create-workflow-template/operations/schemas/dev_user_updated.md +99 -0
  52. package/skills/create-workflow-template/operations/schemas/enhancement_created.md +46 -0
  53. package/skills/create-workflow-template/operations/schemas/enhancement_updated.md +89 -0
  54. package/skills/create-workflow-template/operations/schemas/evaluate_sentiment.md +14 -0
  55. package/skills/create-workflow-template/operations/schemas/execute_metric_action.md +11 -0
  56. package/skills/create-workflow-template/operations/schemas/feature_created.md +40 -0
  57. package/skills/create-workflow-template/operations/schemas/for_each.md +45 -0
  58. package/skills/create-workflow-template/operations/schemas/get_account.md +59 -0
  59. package/skills/create-workflow-template/operations/schemas/get_airdrop_sync_unit.md +32 -0
  60. package/skills/create-workflow-template/operations/schemas/get_brand.md +40 -0
  61. package/skills/create-workflow-template/operations/schemas/get_complete_enhancement_details.md +13 -0
  62. package/skills/create-workflow-template/operations/schemas/get_conversation.md +120 -0
  63. package/skills/create-workflow-template/operations/schemas/get_customer.md +60 -0
  64. package/skills/create-workflow-template/operations/schemas/get_enhancement.md +66 -0
  65. package/skills/create-workflow-template/operations/schemas/get_feature.md +56 -0
  66. package/skills/create-workflow-template/operations/schemas/get_incident.md +85 -0
  67. package/skills/create-workflow-template/operations/schemas/get_issue.md +117 -0
  68. package/skills/create-workflow-template/operations/schemas/get_kg_schema.md +23 -0
  69. package/skills/create-workflow-template/operations/schemas/get_meeting.md +87 -0
  70. package/skills/create-workflow-template/operations/schemas/get_metric_trackers.md +20 -0
  71. package/skills/create-workflow-template/operations/schemas/get_node_schema.md +29 -0
  72. package/skills/create-workflow-template/operations/schemas/get_opportunity.md +93 -0
  73. package/skills/create-workflow-template/operations/schemas/get_org_user.md +57 -0
  74. package/skills/create-workflow-template/operations/schemas/get_org_user_preference.md +40 -0
  75. package/skills/create-workflow-template/operations/schemas/get_part.md +55 -0
  76. package/skills/create-workflow-template/operations/schemas/get_self.md +54 -0
  77. package/skills/create-workflow-template/operations/schemas/get_session_details.md +45 -0
  78. package/skills/create-workflow-template/operations/schemas/get_sprint_board.md +103 -0
  79. package/skills/create-workflow-template/operations/schemas/get_ticket.md +136 -0
  80. package/skills/create-workflow-template/operations/schemas/get_workspace.md +21 -0
  81. package/skills/create-workflow-template/operations/schemas/go_back.md +13 -0
  82. package/skills/create-workflow-template/operations/schemas/http.md +38 -0
  83. package/skills/create-workflow-template/operations/schemas/hybrid_search.md +144 -0
  84. package/skills/create-workflow-template/operations/schemas/if_else.md +16 -0
  85. package/skills/create-workflow-template/operations/schemas/incident_created.md +88 -0
  86. package/skills/create-workflow-template/operations/schemas/incident_updated.md +126 -0
  87. package/skills/create-workflow-template/operations/schemas/init_variable.md +67 -0
  88. package/skills/create-workflow-template/operations/schemas/invoice_created.md +21 -0
  89. package/skills/create-workflow-template/operations/schemas/invoice_updated.md +41 -0
  90. package/skills/create-workflow-template/operations/schemas/invoke_code.md +132 -0
  91. package/skills/create-workflow-template/operations/schemas/issue_created.md +105 -0
  92. package/skills/create-workflow-template/operations/schemas/issue_sla_tracker_updated.md +46 -0
  93. package/skills/create-workflow-template/operations/schemas/issue_updated.md +172 -0
  94. package/skills/create-workflow-template/operations/schemas/link_incident_with_issue.md +14 -0
  95. package/skills/create-workflow-template/operations/schemas/link_ticket_with_issue.md +14 -0
  96. package/skills/create-workflow-template/operations/schemas/list_enhancements.md +74 -0
  97. package/skills/create-workflow-template/operations/schemas/list_issues.md +108 -0
  98. package/skills/create-workflow-template/operations/schemas/list_sessions.md +79 -0
  99. package/skills/create-workflow-template/operations/schemas/list_sprint.md +29 -0
  100. package/skills/create-workflow-template/operations/schemas/list_web_sessions.md +87 -0
  101. package/skills/create-workflow-template/operations/schemas/loop_over_accounts.md +106 -0
  102. package/skills/create-workflow-template/operations/schemas/loop_over_articles.md +126 -0
  103. package/skills/create-workflow-template/operations/schemas/loop_over_customers.md +88 -0
  104. package/skills/create-workflow-template/operations/schemas/loop_over_dev_users.md +75 -0
  105. package/skills/create-workflow-template/operations/schemas/loop_over_enhancements.md +112 -0
  106. package/skills/create-workflow-template/operations/schemas/loop_over_incidents.md +113 -0
  107. package/skills/create-workflow-template/operations/schemas/loop_over_issues.md +217 -0
  108. package/skills/create-workflow-template/operations/schemas/loop_over_meetings.md +150 -0
  109. package/skills/create-workflow-template/operations/schemas/loop_over_opportunity.md +161 -0
  110. package/skills/create-workflow-template/operations/schemas/loop_over_sprints.md +50 -0
  111. package/skills/create-workflow-template/operations/schemas/loop_over_tickets.md +203 -0
  112. package/skills/create-workflow-template/operations/schemas/manual_trigger.md +11 -0
  113. package/skills/create-workflow-template/operations/schemas/meeting_created.md +116 -0
  114. package/skills/create-workflow-template/operations/schemas/meeting_updated.md +152 -0
  115. package/skills/create-workflow-template/operations/schemas/oasis_sql_execute.md +11 -0
  116. package/skills/create-workflow-template/operations/schemas/opportunity_created.md +92 -0
  117. package/skills/create-workflow-template/operations/schemas/opportunity_updated.md +124 -0
  118. package/skills/create-workflow-template/operations/schemas/pick_user.md +16 -0
  119. package/skills/create-workflow-template/operations/schemas/question_answer_created.md +44 -0
  120. package/skills/create-workflow-template/operations/schemas/question_answer_updated.md +75 -0
  121. package/skills/create-workflow-template/operations/schemas/recall_chats.md +13 -0
  122. package/skills/create-workflow-template/operations/schemas/router.md +15 -0
  123. package/skills/create-workflow-template/operations/schemas/send_notification.md +19 -0
  124. package/skills/create-workflow-template/operations/schemas/set_variable.md +67 -0
  125. package/skills/create-workflow-template/operations/schemas/sleep_for.md +12 -0
  126. package/skills/create-workflow-template/operations/schemas/sleep_until.md +17 -0
  127. package/skills/create-workflow-template/operations/schemas/sprint_updated.md +37 -0
  128. package/skills/create-workflow-template/operations/schemas/suggest_part.md +14 -0
  129. package/skills/create-workflow-template/operations/schemas/task_updated.md +79 -0
  130. package/skills/create-workflow-template/operations/schemas/test_example.md +16 -0
  131. package/skills/create-workflow-template/operations/schemas/ticket_created.md +136 -0
  132. package/skills/create-workflow-template/operations/schemas/ticket_sla_tracker_updated.md +46 -0
  133. package/skills/create-workflow-template/operations/schemas/ticket_updated.md +198 -0
  134. package/skills/create-workflow-template/operations/schemas/timeline_comment_created.md +70 -0
  135. package/skills/create-workflow-template/operations/schemas/update_account.md +68 -0
  136. package/skills/create-workflow-template/operations/schemas/update_article.md +95 -0
  137. package/skills/create-workflow-template/operations/schemas/update_brand.md +44 -0
  138. package/skills/create-workflow-template/operations/schemas/update_contact.md +53 -0
  139. package/skills/create-workflow-template/operations/schemas/update_conversation.md +149 -0
  140. package/skills/create-workflow-template/operations/schemas/update_enhancement.md +64 -0
  141. package/skills/create-workflow-template/operations/schemas/update_incident.md +156 -0
  142. package/skills/create-workflow-template/operations/schemas/update_issue.md +173 -0
  143. package/skills/create-workflow-template/operations/schemas/update_meeting.md +114 -0
  144. package/skills/create-workflow-template/operations/schemas/update_opportunity.md +137 -0
  145. package/skills/create-workflow-template/operations/schemas/update_question_answer.md +60 -0
  146. package/skills/create-workflow-template/operations/schemas/update_ticket.md +188 -0
  147. package/skills/create-workflow-template/operations/schemas/watch_ticket_for_updates.md +225 -0
  148. package/skills/create-workflow-template/operations/schemas/web_search.md +17 -0
  149. package/skills/create-workflow-template/operations/schemas/while.md +24 -0
  150. package/skills/create-workflow-template/operations/schemas/widget_created.md +75 -0
  151. package/skills/create-workflow-template/operations/schemas/widget_updated.md +98 -0
  152. package/skills/create-workflow-template/operations/schemas/workspace_created.md +20 -0
  153. package/skills/create-workflow-template/operations/triggers.md +1583 -0
  154. package/skills/customer-brief/customer-brief.md +66 -0
  155. package/skills/deal-review-meddpicc/deal-review-meddpicc.md +58 -0
  156. package/skills/next-step-for-opportunity/next-step-for-opportunity.md +55 -0
  157. package/skills/opportunity-feature-prioritizer/SKILL.md +183 -0
  158. package/skills/sales-call-plan-coach/sales-call-plan-coach.md +73 -0
  159. package/skills/sales-context/sales-context.md +44 -0
  160. package/skills/sales-search-and-lookup/sales-search-and-lookup.md +58 -0
  161. package/skills/skill-creator/SKILL.md +570 -0
  162. package/skills/skill-creator/agents/analyzer.md +274 -0
  163. package/skills/skill-creator/agents/comparator.md +202 -0
  164. package/skills/skill-creator/agents/grader.md +223 -0
  165. package/skills/skill-creator/assets/eval_review.html +146 -0
  166. package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  167. package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  168. package/skills/skill-creator/references/schemas.md +430 -0
  169. package/skills/skill-creator/references/tool-patterns.md +290 -0
  170. package/skills/skill-creator/scripts/__init__.py +0 -0
  171. package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  172. package/skills/skill-creator/scripts/generate_report.py +326 -0
  173. package/skills/skill-creator/scripts/improve_description.py +247 -0
  174. package/skills/skill-creator/scripts/package_skill.py +136 -0
  175. package/skills/skill-creator/scripts/quick_validate.py +103 -0
  176. package/skills/skill-creator/scripts/run_eval.py +310 -0
  177. package/skills/skill-creator/scripts/run_loop.py +328 -0
  178. package/skills/skill-creator/scripts/utils.py +47 -0
  179. package/skills/trace-diagnosis/trace-diagnosis.md +186 -0
@@ -0,0 +1,290 @@
1
+ # Tool Patterns for Platform-Integrated Skills
2
+
3
+ This reference covers patterns for writing skills that leverage platform tools effectively. The key principle: **describe capabilities, not tool names**. Tools change; capabilities persist.
4
+
5
+ ## Table of Contents
6
+
7
+ 1. [Tool Discovery Pattern](#tool-discovery-pattern)
8
+ 2. [Data Querying Skills](#data-querying-skills)
9
+ 3. [Workflow Management Skills](#workflow-management-skills)
10
+ 4. [Communication Skills](#communication-skills)
11
+ 5. [Observability Skills](#observability-skills)
12
+ 6. [Multi-Tool Orchestration](#multi-tool-orchestration)
13
+ 7. [Common Skill Archetypes](#common-skill-archetypes)
14
+
15
+ ---
16
+
17
+ ## Tool Discovery Pattern
18
+
19
+ Every skill that depends on external tools should include a discovery step. Here's the pattern:
20
+
21
+ ```markdown
22
+ ## Prerequisites
23
+
24
+ Before starting, list the tools available in your current session:
25
+
26
+ 1. Scan the tool list and identify tools relevant to [the domain this skill needs]
27
+ 2. Group them by capability (data querying, work management, messaging, etc.)
28
+ 3. If the required capabilities aren't available, inform the user what's needed and stop
29
+ 4. Note the exact tool names for use in subsequent steps
30
+ ```
31
+
32
+ Why this matters: A skill installed in one environment might be used in another where different tools are configured. Tool discovery makes skills portable. The discovery step should work in any client — just list the tools you have.
33
+
34
+ ---
35
+
36
+ ## Data Querying Skills
37
+
38
+ Skills that query organizational data (reports, dashboards, analytics) typically follow this pattern:
39
+
40
+ ### The Schema-First Pattern
41
+
42
+ ```markdown
43
+ ## Workflow
44
+
45
+ ### Step 1: Understand the data model
46
+ Before writing any queries, discover what data is available:
47
+ - Look for schema or knowledge graph tools in your tool list
48
+ - Use them to get an overview of all objects and relationships
49
+ - For specific objects you'll query, get their detailed schema (fields, types, relationships)
50
+ - Note which objects are global vs organization-specific
51
+
52
+ ### Step 2: Construct queries
53
+ Based on the schema and the user's request:
54
+ - Build SQL queries that use the correct field names from the schema
55
+ - If natural-language-to-SQL tools are available, use them — they have context on query syntax
56
+ - Always validate field names against the schema before executing
57
+
58
+ ### Step 3: Execute and validate
59
+ - Execute queries using the available SQL execution tools
60
+ - Check that results make sense given the schema
61
+ - Handle empty results gracefully — explain what was queried and why it might be empty
62
+
63
+ ### Step 4: Present results
64
+ - Format results according to the user's needs (table, summary, chart data)
65
+ - Include the query used so the user can modify it
66
+ ```
67
+
68
+ ### Key principles for data skills
69
+
70
+ - **Schema before query**: The model should always understand the data model before writing SQL. This prevents hallucinated column names and wrong joins.
71
+ - **Iterative refinement**: SQL queries often need adjustment. The skill should encourage trying a query, examining results, and refining.
72
+ - **Explain the data**: Don't just dump results. Summarize what the data shows and highlight interesting patterns.
73
+
74
+ ### Common data skill types
75
+
76
+ | Skill Type | Typical Queries | Key Considerations |
77
+ |------------|----------------|-------------------|
78
+ | Dashboard | Aggregations, GROUP BY, time series | Handle date ranges, support filtering |
79
+ | Report | JOINs across objects, computed fields | May need multiple queries stitched together |
80
+ | Analytics | Statistical queries, trend analysis | Consider data freshness and completeness |
81
+ | Search | WHERE with multiple conditions, LIKE | Handle fuzzy matching, suggest alternatives |
82
+
83
+ ---
84
+
85
+ ## Workflow Management Skills
86
+
87
+ Skills that interact with work management systems (issues, tickets, enhancements, sprints).
88
+
89
+ ### Object Lifecycle Awareness
90
+
91
+ Work objects typically have specific lifecycles with valid stage transitions. A skill that creates or updates work items should:
92
+
93
+ ```markdown
94
+ ## Working with work items
95
+
96
+ ### Before creating or updating
97
+ 1. List the available work management tools in your session
98
+ 2. Check available subtypes for the work type you're creating
99
+ 3. For updates, verify the stage transition is valid
100
+ 4. When linking objects, understand the relationship types available
101
+
102
+ ### Creating work items
103
+ - Set the appropriate subtype based on the nature of the work
104
+ - Assign to the right product area if known
105
+ - Add to the current sprint if the work is immediate
106
+ - Include a clear title and description with enough context for anyone to pick it up
107
+
108
+ ### Updating work items
109
+ - Always check current state before updating
110
+ - Use valid stage transitions — don't skip stages
111
+ - Add timeline entries to explain why changes were made
112
+ ```
113
+
114
+ ### Common workflow skill patterns
115
+
116
+ - **Triage skills**: Read incoming items, categorize, assign, set priority
117
+ - **Sprint management**: Plan sprints, track progress, identify blockers
118
+ - **Reporting skills**: Aggregate work item data for status reports
119
+ - **Escalation skills**: Detect stale items, notify owners, update status
120
+
121
+ ---
122
+
123
+ ## Communication Skills
124
+
125
+ Skills that post messages, create issues/PRs, or otherwise communicate externally.
126
+
127
+ ### The Confirm-Before-Send Pattern
128
+
129
+ ```markdown
130
+ ## Posting messages
131
+
132
+ External communication is hard to undo. Before posting:
133
+ 1. Draft the message and show it to the user
134
+ 2. Wait for explicit confirmation before sending
135
+ 3. After sending, confirm what was sent and where
136
+
137
+ Exception: If the user has explicitly said "just post it" or the skill is designed for automated posting, skip the confirmation. But default to confirming.
138
+ ```
139
+
140
+ ### Messaging patterns
141
+
142
+ - **Channel/recipient discovery**: Use list/search tools to find the right destination
143
+ - **Thread awareness**: When responding to a discussion, post in the thread, not the channel
144
+ - **Formatting**: Different platforms have different markdown flavors — the skill should note this
145
+ - **User mentions**: Search for users by name to get their IDs for mentions
146
+
147
+ ### Code collaboration patterns
148
+
149
+ - **PR creation**: Include a clear title, description with context, and link to relevant issues
150
+ - **Code review**: Use inline comments on specific files/lines, not just top-level comments
151
+ - **Issue management**: Check for duplicates before creating, link related issues
152
+
153
+ ---
154
+
155
+ ## Observability Skills
156
+
157
+ Skills that query monitoring platforms for logs, metrics, traces, and incidents.
158
+
159
+ ### The Context-First Pattern
160
+
161
+ ```markdown
162
+ ## Investigating issues
163
+
164
+ ### Step 1: Establish context
165
+ - What service or component is involved?
166
+ - What time range are we looking at?
167
+ - Are there known incidents or deployments in that window?
168
+
169
+ ### Step 2: Gather signals
170
+ - List the observability tools available in your session
171
+ - Search logs with relevant filters (service, severity, time range)
172
+ - Check metrics for the affected service
173
+ - Look for related traces if available
174
+ - Check for active monitors or incidents
175
+
176
+ ### Step 3: Correlate
177
+ - Cross-reference logs, metrics, and traces
178
+ - Look for patterns (error spikes, latency increases, deployment markers)
179
+ - Check upstream/downstream dependencies
180
+
181
+ ### Step 4: Summarize findings
182
+ - Present a timeline of events
183
+ - Highlight the most likely root cause
184
+ - Suggest next steps
185
+ ```
186
+
187
+ ---
188
+
189
+ ## Multi-Tool Orchestration
190
+
191
+ Many valuable skills combine multiple tool categories. Here's how to structure them:
192
+
193
+ ### The Pipeline Pattern
194
+
195
+ ```markdown
196
+ ## Workflow
197
+
198
+ This skill uses multiple platform capabilities in sequence:
199
+
200
+ ### Phase 1: Gather (data tools)
201
+ - Query the relevant data using available querying tools
202
+ - Pull context from work items
203
+
204
+ ### Phase 2: Analyze (compute)
205
+ - Process the gathered data (scripts, inline analysis)
206
+ - Identify patterns, anomalies, or action items
207
+
208
+ ### Phase 3: Act (workflow/communication tools)
209
+ - Create work items for action items found
210
+ - Post summaries to relevant channels (if messaging tools are available)
211
+ - Update dashboards or reports
212
+
213
+ ### Phase 4: Verify
214
+ - Confirm all actions were taken
215
+ - Summarize what was done for the user
216
+ ```
217
+
218
+ ### Tool dependency chains
219
+
220
+ When skills chain tools, make the dependencies explicit:
221
+
222
+ ```markdown
223
+ ## Tool chain
224
+ This skill needs tools from these categories (list your tools and check availability before starting):
225
+ 1. **Data querying** — to pull organizational data
226
+ 2. **Work management** — to create/update work items based on findings
227
+ 3. **Messaging** — to notify stakeholders (optional — gracefully degrade if unavailable)
228
+ ```
229
+
230
+ ---
231
+
232
+ ## Common Skill Archetypes
233
+
234
+ These are the most common types of skills people build. Use these as starting points:
235
+
236
+ ### 1. Report Generator
237
+ **Purpose**: Pull data, analyze it, produce a formatted report
238
+ **Tools needed**: Data querying (required), filesystem (for output)
239
+ **Key pattern**: Schema-first, iterative query refinement, structured output template
240
+
241
+ ### 2. Workflow Automator
242
+ **Purpose**: Automate a multi-step process (triage, sprint planning, release tracking)
243
+ **Tools needed**: Work management (required), messaging (optional for notifications)
244
+ **Key pattern**: Object lifecycle awareness, confirm-before-act, audit trail via timeline entries
245
+
246
+ ### 3. Dashboard Builder
247
+ **Purpose**: Create visual or data-driven dashboards from organizational data
248
+ **Tools needed**: Data querying (required), filesystem (for HTML/charts)
249
+ **Key pattern**: Schema-first, aggregation queries, HTML/chart generation scripts
250
+
251
+ ### 4. Investigation Assistant
252
+ **Purpose**: Help debug production issues by correlating signals
253
+ **Tools needed**: Observability (required), work management (optional for linked incidents)
254
+ **Key pattern**: Context-first, multi-signal correlation, timeline reconstruction
255
+
256
+ ### 5. Communication Drafter
257
+ **Purpose**: Draft and send messages (status updates, incident comms, release notes)
258
+ **Tools needed**: Messaging (required), work management (for data)
259
+ **Key pattern**: Confirm-before-send, audience-aware tone, structured templates
260
+
261
+ ### 6. Data Explorer
262
+ **Purpose**: Help users explore and understand their organizational data
263
+ **Tools needed**: Data querying (required)
264
+ **Key pattern**: Interactive schema exploration, progressive query building, explain-as-you-go
265
+
266
+ ### 7. Sprint/Project Tracker
267
+ **Purpose**: Track sprint progress, identify risks, generate status reports
268
+ **Tools needed**: Work management (required), messaging (optional)
269
+ **Key pattern**: Aggregate work item states, compare against goals, highlight blockers
270
+
271
+ ### 8. Deck/Presentation Builder
272
+ **Purpose**: Create slide decks or presentations from organizational data
273
+ **Tools needed**: Data querying (for data), filesystem (for output), templates (in assets/)
274
+ **Key pattern**: Data gathering -> narrative construction -> template population -> output generation
275
+
276
+ ---
277
+
278
+ ## Anti-Patterns to Avoid
279
+
280
+ 1. **Hardcoding tool names**: Don't write `Use mcp_devrev__create_work`. Write "Create a work item using the available work management tools."
281
+
282
+ 2. **Assuming tool availability**: Don't write a skill that silently fails if a tool is missing. Include discovery and graceful degradation.
283
+
284
+ 3. **Ignoring schema discovery**: Don't guess field names or object structures. Always discover the schema first for data-querying skills.
285
+
286
+ 4. **Skipping confirmation for external actions**: Creating issues, posting messages, and updating work items should be confirmed with the user unless the skill is explicitly designed for automation.
287
+
288
+ 5. **Monolithic tool chains**: Don't write a skill that requires 10 different tool categories. Keep tool dependencies minimal and make optional tools truly optional.
289
+
290
+ 6. **Tool-specific error messages**: Don't say "ExecuteSQL returned error 422". Say "The query failed — the error suggests [interpretation]. Try [alternative approach]."
File without changes
@@ -0,0 +1,401 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Aggregate individual run results into benchmark summary statistics.
4
+
5
+ Reads grading.json files from run directories and produces:
6
+ - run_summary with mean, stddev, min, max for each metric
7
+ - delta between with_skill and without_skill configurations
8
+
9
+ Usage:
10
+ python aggregate_benchmark.py <benchmark_dir>
11
+
12
+ Example:
13
+ python aggregate_benchmark.py benchmarks/2026-01-15T10-30-00/
14
+
15
+ The script supports two directory layouts:
16
+
17
+ Workspace layout (from skill-creator iterations):
18
+ <benchmark_dir>/
19
+ └── eval-N/
20
+ ├── with_skill/
21
+ │ ├── run-1/grading.json
22
+ │ └── run-2/grading.json
23
+ └── without_skill/
24
+ ├── run-1/grading.json
25
+ └── run-2/grading.json
26
+
27
+ Legacy layout (with runs/ subdirectory):
28
+ <benchmark_dir>/
29
+ └── runs/
30
+ └── eval-N/
31
+ ├── with_skill/
32
+ │ └── run-1/grading.json
33
+ └── without_skill/
34
+ └── run-1/grading.json
35
+ """
36
+
37
+ import argparse
38
+ import json
39
+ import math
40
+ import sys
41
+ from datetime import datetime, timezone
42
+ from pathlib import Path
43
+
44
+
45
+ def calculate_stats(values: list[float]) -> dict:
46
+ """Calculate mean, stddev, min, max for a list of values."""
47
+ if not values:
48
+ return {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0}
49
+
50
+ n = len(values)
51
+ mean = sum(values) / n
52
+
53
+ if n > 1:
54
+ variance = sum((x - mean) ** 2 for x in values) / (n - 1)
55
+ stddev = math.sqrt(variance)
56
+ else:
57
+ stddev = 0.0
58
+
59
+ return {
60
+ "mean": round(mean, 4),
61
+ "stddev": round(stddev, 4),
62
+ "min": round(min(values), 4),
63
+ "max": round(max(values), 4)
64
+ }
65
+
66
+
67
+ def load_run_results(benchmark_dir: Path) -> dict:
68
+ """
69
+ Load all run results from a benchmark directory.
70
+
71
+ Returns dict keyed by config name (e.g. "with_skill"/"without_skill",
72
+ or "new_skill"/"old_skill"), each containing a list of run results.
73
+ """
74
+ # Support both layouts: eval dirs directly under benchmark_dir, or under runs/
75
+ runs_dir = benchmark_dir / "runs"
76
+ if runs_dir.exists():
77
+ search_dir = runs_dir
78
+ elif list(benchmark_dir.glob("eval-*")):
79
+ search_dir = benchmark_dir
80
+ else:
81
+ print(f"No eval directories found in {benchmark_dir} or {benchmark_dir / 'runs'}")
82
+ return {}
83
+
84
+ results: dict[str, list] = {}
85
+
86
+ for eval_idx, eval_dir in enumerate(sorted(search_dir.glob("eval-*"))):
87
+ metadata_path = eval_dir / "eval_metadata.json"
88
+ if metadata_path.exists():
89
+ try:
90
+ with open(metadata_path) as mf:
91
+ eval_id = json.load(mf).get("eval_id", eval_idx)
92
+ except (json.JSONDecodeError, OSError):
93
+ eval_id = eval_idx
94
+ else:
95
+ try:
96
+ eval_id = int(eval_dir.name.split("-")[1])
97
+ except ValueError:
98
+ eval_id = eval_idx
99
+
100
+ # Discover config directories dynamically rather than hardcoding names
101
+ for config_dir in sorted(eval_dir.iterdir()):
102
+ if not config_dir.is_dir():
103
+ continue
104
+ # Skip non-config directories (inputs, outputs, etc.)
105
+ if not list(config_dir.glob("run-*")):
106
+ continue
107
+ config = config_dir.name
108
+ if config not in results:
109
+ results[config] = []
110
+
111
+ for run_dir in sorted(config_dir.glob("run-*")):
112
+ run_number = int(run_dir.name.split("-")[1])
113
+ grading_file = run_dir / "grading.json"
114
+
115
+ if not grading_file.exists():
116
+ print(f"Warning: grading.json not found in {run_dir}")
117
+ continue
118
+
119
+ try:
120
+ with open(grading_file) as f:
121
+ grading = json.load(f)
122
+ except json.JSONDecodeError as e:
123
+ print(f"Warning: Invalid JSON in {grading_file}: {e}")
124
+ continue
125
+
126
+ # Extract metrics
127
+ result = {
128
+ "eval_id": eval_id,
129
+ "run_number": run_number,
130
+ "pass_rate": grading.get("summary", {}).get("pass_rate", 0.0),
131
+ "passed": grading.get("summary", {}).get("passed", 0),
132
+ "failed": grading.get("summary", {}).get("failed", 0),
133
+ "total": grading.get("summary", {}).get("total", 0),
134
+ }
135
+
136
+ # Extract timing — check grading.json first, then sibling timing.json
137
+ timing = grading.get("timing", {})
138
+ result["time_seconds"] = timing.get("total_duration_seconds", 0.0)
139
+ timing_file = run_dir / "timing.json"
140
+ if result["time_seconds"] == 0.0 and timing_file.exists():
141
+ try:
142
+ with open(timing_file) as tf:
143
+ timing_data = json.load(tf)
144
+ result["time_seconds"] = timing_data.get("total_duration_seconds", 0.0)
145
+ result["tokens"] = timing_data.get("total_tokens", 0)
146
+ except json.JSONDecodeError:
147
+ pass
148
+
149
+ # Extract metrics if available
150
+ metrics = grading.get("execution_metrics", {})
151
+ result["tool_calls"] = metrics.get("total_tool_calls", 0)
152
+ if not result.get("tokens"):
153
+ result["tokens"] = metrics.get("output_chars", 0)
154
+ result["errors"] = metrics.get("errors_encountered", 0)
155
+
156
+ # Extract expectations — viewer requires fields: text, passed, evidence
157
+ raw_expectations = grading.get("expectations", [])
158
+ for exp in raw_expectations:
159
+ if "text" not in exp or "passed" not in exp:
160
+ print(f"Warning: expectation in {grading_file} missing required fields (text, passed, evidence): {exp}")
161
+ result["expectations"] = raw_expectations
162
+
163
+ # Extract notes from user_notes_summary
164
+ notes_summary = grading.get("user_notes_summary", {})
165
+ notes = []
166
+ notes.extend(notes_summary.get("uncertainties", []))
167
+ notes.extend(notes_summary.get("needs_review", []))
168
+ notes.extend(notes_summary.get("workarounds", []))
169
+ result["notes"] = notes
170
+
171
+ results[config].append(result)
172
+
173
+ return results
174
+
175
+
176
+ def aggregate_results(results: dict) -> dict:
177
+ """
178
+ Aggregate run results into summary statistics.
179
+
180
+ Returns run_summary with stats for each configuration and delta.
181
+ """
182
+ run_summary = {}
183
+ configs = list(results.keys())
184
+
185
+ for config in configs:
186
+ runs = results.get(config, [])
187
+
188
+ if not runs:
189
+ run_summary[config] = {
190
+ "pass_rate": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0},
191
+ "time_seconds": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0},
192
+ "tokens": {"mean": 0, "stddev": 0, "min": 0, "max": 0}
193
+ }
194
+ continue
195
+
196
+ pass_rates = [r["pass_rate"] for r in runs]
197
+ times = [r["time_seconds"] for r in runs]
198
+ tokens = [r.get("tokens", 0) for r in runs]
199
+
200
+ run_summary[config] = {
201
+ "pass_rate": calculate_stats(pass_rates),
202
+ "time_seconds": calculate_stats(times),
203
+ "tokens": calculate_stats(tokens)
204
+ }
205
+
206
+ # Calculate delta between the first two configs (if two exist)
207
+ if len(configs) >= 2:
208
+ primary = run_summary.get(configs[0], {})
209
+ baseline = run_summary.get(configs[1], {})
210
+ else:
211
+ primary = run_summary.get(configs[0], {}) if configs else {}
212
+ baseline = {}
213
+
214
+ delta_pass_rate = primary.get("pass_rate", {}).get("mean", 0) - baseline.get("pass_rate", {}).get("mean", 0)
215
+ delta_time = primary.get("time_seconds", {}).get("mean", 0) - baseline.get("time_seconds", {}).get("mean", 0)
216
+ delta_tokens = primary.get("tokens", {}).get("mean", 0) - baseline.get("tokens", {}).get("mean", 0)
217
+
218
+ run_summary["delta"] = {
219
+ "pass_rate": f"{delta_pass_rate:+.2f}",
220
+ "time_seconds": f"{delta_time:+.1f}",
221
+ "tokens": f"{delta_tokens:+.0f}"
222
+ }
223
+
224
+ return run_summary
225
+
226
+
227
+ def generate_benchmark(benchmark_dir: Path, skill_name: str = "", skill_path: str = "") -> dict:
228
+ """
229
+ Generate complete benchmark.json from run results.
230
+ """
231
+ results = load_run_results(benchmark_dir)
232
+ run_summary = aggregate_results(results)
233
+
234
+ # Build runs array for benchmark.json
235
+ runs = []
236
+ for config in results:
237
+ for result in results[config]:
238
+ runs.append({
239
+ "eval_id": result["eval_id"],
240
+ "configuration": config,
241
+ "run_number": result["run_number"],
242
+ "result": {
243
+ "pass_rate": result["pass_rate"],
244
+ "passed": result["passed"],
245
+ "failed": result["failed"],
246
+ "total": result["total"],
247
+ "time_seconds": result["time_seconds"],
248
+ "tokens": result.get("tokens", 0),
249
+ "tool_calls": result.get("tool_calls", 0),
250
+ "errors": result.get("errors", 0)
251
+ },
252
+ "expectations": result["expectations"],
253
+ "notes": result["notes"]
254
+ })
255
+
256
+ # Determine eval IDs from results
257
+ eval_ids = sorted(set(
258
+ r["eval_id"]
259
+ for config in results.values()
260
+ for r in config
261
+ ))
262
+
263
+ benchmark = {
264
+ "metadata": {
265
+ "skill_name": skill_name or "<skill-name>",
266
+ "skill_path": skill_path or "<path/to/skill>",
267
+ "executor_model": "<model-name>",
268
+ "analyzer_model": "<model-name>",
269
+ "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
270
+ "evals_run": eval_ids,
271
+ "runs_per_configuration": 3
272
+ },
273
+ "runs": runs,
274
+ "run_summary": run_summary,
275
+ "notes": [] # To be filled by analyzer
276
+ }
277
+
278
+ return benchmark
279
+
280
+
281
+ def generate_markdown(benchmark: dict) -> str:
282
+ """Generate human-readable benchmark.md from benchmark data."""
283
+ metadata = benchmark["metadata"]
284
+ run_summary = benchmark["run_summary"]
285
+
286
+ # Determine config names (excluding "delta")
287
+ configs = [k for k in run_summary if k != "delta"]
288
+ config_a = configs[0] if len(configs) >= 1 else "config_a"
289
+ config_b = configs[1] if len(configs) >= 2 else "config_b"
290
+ label_a = config_a.replace("_", " ").title()
291
+ label_b = config_b.replace("_", " ").title()
292
+
293
+ lines = [
294
+ f"# Skill Benchmark: {metadata['skill_name']}",
295
+ "",
296
+ f"**Model**: {metadata['executor_model']}",
297
+ f"**Date**: {metadata['timestamp']}",
298
+ f"**Evals**: {', '.join(map(str, metadata['evals_run']))} ({metadata['runs_per_configuration']} runs each per configuration)",
299
+ "",
300
+ "## Summary",
301
+ "",
302
+ f"| Metric | {label_a} | {label_b} | Delta |",
303
+ "|--------|------------|---------------|-------|",
304
+ ]
305
+
306
+ a_summary = run_summary.get(config_a, {})
307
+ b_summary = run_summary.get(config_b, {})
308
+ delta = run_summary.get("delta", {})
309
+
310
+ # Format pass rate
311
+ a_pr = a_summary.get("pass_rate", {})
312
+ b_pr = b_summary.get("pass_rate", {})
313
+ lines.append(f"| Pass Rate | {a_pr.get('mean', 0)*100:.0f}% ± {a_pr.get('stddev', 0)*100:.0f}% | {b_pr.get('mean', 0)*100:.0f}% ± {b_pr.get('stddev', 0)*100:.0f}% | {delta.get('pass_rate', '—')} |")
314
+
315
+ # Format time
316
+ a_time = a_summary.get("time_seconds", {})
317
+ b_time = b_summary.get("time_seconds", {})
318
+ lines.append(f"| Time | {a_time.get('mean', 0):.1f}s ± {a_time.get('stddev', 0):.1f}s | {b_time.get('mean', 0):.1f}s ± {b_time.get('stddev', 0):.1f}s | {delta.get('time_seconds', '—')}s |")
319
+
320
+ # Format tokens
321
+ a_tokens = a_summary.get("tokens", {})
322
+ b_tokens = b_summary.get("tokens", {})
323
+ lines.append(f"| Tokens | {a_tokens.get('mean', 0):.0f} ± {a_tokens.get('stddev', 0):.0f} | {b_tokens.get('mean', 0):.0f} ± {b_tokens.get('stddev', 0):.0f} | {delta.get('tokens', '—')} |")
324
+
325
+ # Notes section
326
+ if benchmark.get("notes"):
327
+ lines.extend([
328
+ "",
329
+ "## Notes",
330
+ ""
331
+ ])
332
+ for note in benchmark["notes"]:
333
+ lines.append(f"- {note}")
334
+
335
+ return "\n".join(lines)
336
+
337
+
338
+ def main():
339
+ parser = argparse.ArgumentParser(
340
+ description="Aggregate benchmark run results into summary statistics"
341
+ )
342
+ parser.add_argument(
343
+ "benchmark_dir",
344
+ type=Path,
345
+ help="Path to the benchmark directory"
346
+ )
347
+ parser.add_argument(
348
+ "--skill-name",
349
+ default="",
350
+ help="Name of the skill being benchmarked"
351
+ )
352
+ parser.add_argument(
353
+ "--skill-path",
354
+ default="",
355
+ help="Path to the skill being benchmarked"
356
+ )
357
+ parser.add_argument(
358
+ "--output", "-o",
359
+ type=Path,
360
+ help="Output path for benchmark.json (default: <benchmark_dir>/benchmark.json)"
361
+ )
362
+
363
+ args = parser.parse_args()
364
+
365
+ if not args.benchmark_dir.exists():
366
+ print(f"Directory not found: {args.benchmark_dir}")
367
+ sys.exit(1)
368
+
369
+ # Generate benchmark
370
+ benchmark = generate_benchmark(args.benchmark_dir, args.skill_name, args.skill_path)
371
+
372
+ # Determine output paths
373
+ output_json = args.output or (args.benchmark_dir / "benchmark.json")
374
+ output_md = output_json.with_suffix(".md")
375
+
376
+ # Write benchmark.json
377
+ with open(output_json, "w") as f:
378
+ json.dump(benchmark, f, indent=2)
379
+ print(f"Generated: {output_json}")
380
+
381
+ # Write benchmark.md
382
+ markdown = generate_markdown(benchmark)
383
+ with open(output_md, "w") as f:
384
+ f.write(markdown)
385
+ print(f"Generated: {output_md}")
386
+
387
+ # Print summary
388
+ run_summary = benchmark["run_summary"]
389
+ configs = [k for k in run_summary if k != "delta"]
390
+ delta = run_summary.get("delta", {})
391
+
392
+ print(f"\nSummary:")
393
+ for config in configs:
394
+ pr = run_summary[config]["pass_rate"]["mean"]
395
+ label = config.replace("_", " ").title()
396
+ print(f" {label}: {pr*100:.1f}% pass rate")
397
+ print(f" Delta: {delta.get('pass_rate', '—')}")
398
+
399
+
400
+ if __name__ == "__main__":
401
+ main()