@code-insights/cli 3.6.1 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/CHANGELOG.md +52 -0
  2. package/README.md +40 -4
  3. package/dashboard-dist/assets/index-D1JDyyu5.js +660 -0
  4. package/dashboard-dist/assets/index-kwbCW1n2.css +1 -0
  5. package/dashboard-dist/index.html +2 -2
  6. package/dist/commands/reflect.d.ts +3 -0
  7. package/dist/commands/reflect.d.ts.map +1 -0
  8. package/dist/commands/reflect.js +457 -0
  9. package/dist/commands/reflect.js.map +1 -0
  10. package/dist/commands/reset.d.ts.map +1 -1
  11. package/dist/commands/reset.js +3 -1
  12. package/dist/commands/reset.js.map +1 -1
  13. package/dist/commands/stats/actions/patterns.d.ts +3 -0
  14. package/dist/commands/stats/actions/patterns.d.ts.map +1 -0
  15. package/dist/commands/stats/actions/patterns.js +140 -0
  16. package/dist/commands/stats/actions/patterns.js.map +1 -0
  17. package/dist/commands/stats/data/aggregation-helpers.d.ts +23 -0
  18. package/dist/commands/stats/data/aggregation-helpers.d.ts.map +1 -0
  19. package/dist/commands/stats/data/aggregation-helpers.js +128 -0
  20. package/dist/commands/stats/data/aggregation-helpers.js.map +1 -0
  21. package/dist/commands/stats/data/aggregation.d.ts +3 -35
  22. package/dist/commands/stats/data/aggregation.d.ts.map +1 -1
  23. package/dist/commands/stats/data/aggregation.js +8 -290
  24. package/dist/commands/stats/data/aggregation.js.map +1 -1
  25. package/dist/commands/stats/data/time-series.d.ts +24 -0
  26. package/dist/commands/stats/data/time-series.d.ts.map +1 -0
  27. package/dist/commands/stats/data/time-series.js +162 -0
  28. package/dist/commands/stats/data/time-series.js.map +1 -0
  29. package/dist/commands/stats/index.d.ts.map +1 -1
  30. package/dist/commands/stats/index.js +7 -1
  31. package/dist/commands/stats/index.js.map +1 -1
  32. package/dist/commands/sync.d.ts +19 -0
  33. package/dist/commands/sync.d.ts.map +1 -1
  34. package/dist/commands/sync.js +67 -1
  35. package/dist/commands/sync.js.map +1 -1
  36. package/dist/constants/llm-providers.js +1 -1
  37. package/dist/constants/llm-providers.js.map +1 -1
  38. package/dist/db/client.d.ts +7 -0
  39. package/dist/db/client.d.ts.map +1 -1
  40. package/dist/db/client.js +11 -1
  41. package/dist/db/client.js.map +1 -1
  42. package/dist/db/migrate.d.ts +10 -1
  43. package/dist/db/migrate.d.ts.map +1 -1
  44. package/dist/db/migrate.js +96 -0
  45. package/dist/db/migrate.js.map +1 -1
  46. package/dist/db/read.d.ts +5 -0
  47. package/dist/db/read.d.ts.map +1 -1
  48. package/dist/db/read.js +20 -3
  49. package/dist/db/read.js.map +1 -1
  50. package/dist/db/schema.d.ts +1 -1
  51. package/dist/db/schema.js +2 -2
  52. package/dist/db/schema.js.map +1 -1
  53. package/dist/db/write.d.ts.map +1 -1
  54. package/dist/db/write.js +8 -2
  55. package/dist/db/write.js.map +1 -1
  56. package/dist/index.js +39 -3
  57. package/dist/index.js.map +1 -1
  58. package/dist/parser/jsonl.d.ts +19 -1
  59. package/dist/parser/jsonl.d.ts.map +1 -1
  60. package/dist/parser/jsonl.js +109 -3
  61. package/dist/parser/jsonl.js.map +1 -1
  62. package/dist/providers/codex.js +4 -1
  63. package/dist/providers/codex.js.map +1 -1
  64. package/dist/providers/copilot-cli.js +3 -0
  65. package/dist/providers/copilot-cli.js.map +1 -1
  66. package/dist/providers/copilot.js +3 -0
  67. package/dist/providers/copilot.js.map +1 -1
  68. package/dist/providers/cursor.js +3 -0
  69. package/dist/providers/cursor.js.map +1 -1
  70. package/dist/types.d.ts +109 -0
  71. package/dist/types.d.ts.map +1 -1
  72. package/dist/utils/date-utils.d.ts +6 -0
  73. package/dist/utils/date-utils.d.ts.map +1 -0
  74. package/dist/utils/date-utils.js +26 -0
  75. package/dist/utils/date-utils.js.map +1 -0
  76. package/dist/utils/telemetry.d.ts +1 -1
  77. package/dist/utils/telemetry.d.ts.map +1 -1
  78. package/dist/utils/telemetry.js +1 -1
  79. package/dist/utils/telemetry.js.map +1 -1
  80. package/package.json +2 -1
  81. package/server-dist/export/agent-rules.d.ts.map +1 -1
  82. package/server-dist/export/agent-rules.js +15 -4
  83. package/server-dist/export/agent-rules.js.map +1 -1
  84. package/server-dist/export/knowledge-base.d.ts.map +1 -1
  85. package/server-dist/export/knowledge-base.js +30 -4
  86. package/server-dist/export/knowledge-base.js.map +1 -1
  87. package/server-dist/index.d.ts.map +1 -1
  88. package/server-dist/index.js +4 -0
  89. package/server-dist/index.js.map +1 -1
  90. package/server-dist/llm/analysis-db.d.ts +51 -0
  91. package/server-dist/llm/analysis-db.d.ts.map +1 -0
  92. package/server-dist/llm/analysis-db.js +208 -0
  93. package/server-dist/llm/analysis-db.js.map +1 -0
  94. package/server-dist/llm/analysis-internal.d.ts +36 -0
  95. package/server-dist/llm/analysis-internal.d.ts.map +1 -0
  96. package/server-dist/llm/analysis-internal.js +23 -0
  97. package/server-dist/llm/analysis-internal.js.map +1 -0
  98. package/server-dist/llm/analysis-pricing.d.ts +25 -0
  99. package/server-dist/llm/analysis-pricing.d.ts.map +1 -0
  100. package/server-dist/llm/analysis-pricing.js +74 -0
  101. package/server-dist/llm/analysis-pricing.js.map +1 -0
  102. package/server-dist/llm/analysis-usage-db.d.ts +45 -0
  103. package/server-dist/llm/analysis-usage-db.d.ts.map +1 -0
  104. package/server-dist/llm/analysis-usage-db.js +35 -0
  105. package/server-dist/llm/analysis-usage-db.js.map +1 -0
  106. package/server-dist/llm/analysis.d.ts +9 -79
  107. package/server-dist/llm/analysis.d.ts.map +1 -1
  108. package/server-dist/llm/analysis.js +119 -375
  109. package/server-dist/llm/analysis.js.map +1 -1
  110. package/server-dist/llm/facet-extraction.d.ts +14 -0
  111. package/server-dist/llm/facet-extraction.d.ts.map +1 -0
  112. package/server-dist/llm/facet-extraction.js +91 -0
  113. package/server-dist/llm/facet-extraction.js.map +1 -0
  114. package/server-dist/llm/friction-normalize.d.ts +16 -0
  115. package/server-dist/llm/friction-normalize.d.ts.map +1 -0
  116. package/server-dist/llm/friction-normalize.js +54 -0
  117. package/server-dist/llm/friction-normalize.js.map +1 -0
  118. package/server-dist/llm/index.d.ts +3 -2
  119. package/server-dist/llm/index.d.ts.map +1 -1
  120. package/server-dist/llm/index.js +1 -1
  121. package/server-dist/llm/index.js.map +1 -1
  122. package/server-dist/llm/message-format.d.ts +32 -0
  123. package/server-dist/llm/message-format.d.ts.map +1 -0
  124. package/server-dist/llm/message-format.js +129 -0
  125. package/server-dist/llm/message-format.js.map +1 -0
  126. package/server-dist/llm/normalize-utils.d.ts +22 -0
  127. package/server-dist/llm/normalize-utils.d.ts.map +1 -0
  128. package/server-dist/llm/normalize-utils.js +71 -0
  129. package/server-dist/llm/normalize-utils.js.map +1 -0
  130. package/server-dist/llm/pattern-normalize.d.ts +19 -0
  131. package/server-dist/llm/pattern-normalize.d.ts.map +1 -0
  132. package/server-dist/llm/pattern-normalize.js +90 -0
  133. package/server-dist/llm/pattern-normalize.js.map +1 -0
  134. package/server-dist/llm/prompt-constants.d.ts +9 -0
  135. package/server-dist/llm/prompt-constants.d.ts.map +1 -0
  136. package/server-dist/llm/prompt-constants.js +169 -0
  137. package/server-dist/llm/prompt-constants.js.map +1 -0
  138. package/server-dist/llm/prompt-quality-analysis.d.ts +8 -0
  139. package/server-dist/llm/prompt-quality-analysis.d.ts.map +1 -0
  140. package/server-dist/llm/prompt-quality-analysis.js +133 -0
  141. package/server-dist/llm/prompt-quality-analysis.js.map +1 -0
  142. package/server-dist/llm/prompt-quality-normalize.d.ts +26 -0
  143. package/server-dist/llm/prompt-quality-normalize.d.ts.map +1 -0
  144. package/server-dist/llm/prompt-quality-normalize.js +116 -0
  145. package/server-dist/llm/prompt-quality-normalize.js.map +1 -0
  146. package/server-dist/llm/prompt-types.d.ts +124 -0
  147. package/server-dist/llm/prompt-types.d.ts.map +1 -0
  148. package/server-dist/llm/prompt-types.js +4 -0
  149. package/server-dist/llm/prompt-types.js.map +1 -0
  150. package/server-dist/llm/prompts.d.ts +57 -100
  151. package/server-dist/llm/prompts.d.ts.map +1 -1
  152. package/server-dist/llm/prompts.js +606 -232
  153. package/server-dist/llm/prompts.js.map +1 -1
  154. package/server-dist/llm/providers/anthropic.d.ts.map +1 -1
  155. package/server-dist/llm/providers/anthropic.js +12 -0
  156. package/server-dist/llm/providers/anthropic.js.map +1 -1
  157. package/server-dist/llm/providers/gemini.d.ts.map +1 -1
  158. package/server-dist/llm/providers/gemini.js +10 -2
  159. package/server-dist/llm/providers/gemini.js.map +1 -1
  160. package/server-dist/llm/providers/ollama.d.ts.map +1 -1
  161. package/server-dist/llm/providers/ollama.js +3 -1
  162. package/server-dist/llm/providers/ollama.js.map +1 -1
  163. package/server-dist/llm/providers/openai.d.ts.map +1 -1
  164. package/server-dist/llm/providers/openai.js +4 -1
  165. package/server-dist/llm/providers/openai.js.map +1 -1
  166. package/server-dist/llm/recurring-insights.d.ts +26 -0
  167. package/server-dist/llm/recurring-insights.d.ts.map +1 -0
  168. package/server-dist/llm/recurring-insights.js +119 -0
  169. package/server-dist/llm/recurring-insights.js.map +1 -0
  170. package/server-dist/llm/reflect-prompts.d.ts +55 -0
  171. package/server-dist/llm/reflect-prompts.d.ts.map +1 -0
  172. package/server-dist/llm/reflect-prompts.js +151 -0
  173. package/server-dist/llm/reflect-prompts.js.map +1 -0
  174. package/server-dist/llm/response-parsers.d.ts +8 -0
  175. package/server-dist/llm/response-parsers.d.ts.map +1 -0
  176. package/server-dist/llm/response-parsers.js +151 -0
  177. package/server-dist/llm/response-parsers.js.map +1 -0
  178. package/server-dist/llm/types.d.ts +23 -1
  179. package/server-dist/llm/types.d.ts.map +1 -1
  180. package/server-dist/llm/types.js +10 -1
  181. package/server-dist/llm/types.js.map +1 -1
  182. package/server-dist/routes/analysis.d.ts.map +1 -1
  183. package/server-dist/routes/analysis.js +107 -282
  184. package/server-dist/routes/analysis.js.map +1 -1
  185. package/server-dist/routes/analytics.d.ts.map +1 -1
  186. package/server-dist/routes/analytics.js +3 -1
  187. package/server-dist/routes/analytics.js.map +1 -1
  188. package/server-dist/routes/export.d.ts.map +1 -1
  189. package/server-dist/routes/export.js +19 -27
  190. package/server-dist/routes/export.js.map +1 -1
  191. package/server-dist/routes/facets.d.ts +4 -0
  192. package/server-dist/routes/facets.d.ts.map +1 -0
  193. package/server-dist/routes/facets.js +208 -0
  194. package/server-dist/routes/facets.js.map +1 -0
  195. package/server-dist/routes/insights.d.ts.map +1 -1
  196. package/server-dist/routes/insights.js +12 -11
  197. package/server-dist/routes/insights.js.map +1 -1
  198. package/server-dist/routes/reflect.d.ts +4 -0
  199. package/server-dist/routes/reflect.d.ts.map +1 -0
  200. package/server-dist/routes/reflect.js +332 -0
  201. package/server-dist/routes/reflect.js.map +1 -0
  202. package/server-dist/routes/route-helpers.d.ts +124 -0
  203. package/server-dist/routes/route-helpers.d.ts.map +1 -0
  204. package/server-dist/routes/route-helpers.js +242 -0
  205. package/server-dist/routes/route-helpers.js.map +1 -0
  206. package/server-dist/routes/sessions.d.ts.map +1 -1
  207. package/server-dist/routes/sessions.js +29 -5
  208. package/server-dist/routes/sessions.js.map +1 -1
  209. package/server-dist/routes/shared-aggregation.d.ts +82 -0
  210. package/server-dist/routes/shared-aggregation.d.ts.map +1 -0
  211. package/server-dist/routes/shared-aggregation.js +384 -0
  212. package/server-dist/routes/shared-aggregation.js.map +1 -0
  213. package/dashboard-dist/assets/index-BaKju1iW.js +0 -607
  214. package/dashboard-dist/assets/index-_SWpRg6C.css +0 -1
@@ -1,71 +1,106 @@
1
- // Analysis prompts and response parsers for LLM session analysis.
2
- // Ported from web repo (src/lib/llm/prompts.ts) with SQLite-aware message formatting.
3
- import { jsonrepair } from 'jsonrepair';
1
+ // Prompt template strings and generator functions for LLM session analysis.
2
+ // Types prompt-types.ts, constants → prompt-constants.ts,
3
+ // formatting message-format.ts, parsers → response-parsers.ts.
4
+ import { FRICTION_CLASSIFICATION_GUIDANCE, CANONICAL_FRICTION_CATEGORIES, CANONICAL_PATTERN_CATEGORIES, CANONICAL_PQ_DEFICIT_CATEGORIES, CANONICAL_PQ_STRENGTH_CATEGORIES, PROMPT_QUALITY_CLASSIFICATION_GUIDANCE, EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE, } from './prompt-constants.js';
5
+ import { formatSessionMetaLine } from './message-format.js';
6
+ // =============================================================================
7
+ // SHARED SYSTEM PROMPT
8
+ // A minimal (~100 token) system prompt shared by all analysis calls.
9
+ // The full classification guidance and schema examples live in the instruction
10
+ // suffix (user[1]), keeping the system prompt cacheable across calls.
11
+ // =============================================================================
4
12
  /**
5
- * Format SQLite message rows for LLM consumption.
6
- * Handles snake_case fields and JSON-encoded tool_calls/tool_results.
13
+ * Shared system prompt for all LLM analysis calls.
14
+ * Paired with buildCacheableConversationBlock() + an analysis-specific instruction block.
7
15
  */
8
- export function formatMessagesForAnalysis(messages) {
9
- let userIndex = 0;
10
- let assistantIndex = 0;
11
- return messages
12
- .map((m) => {
13
- const role = m.type === 'user' ? 'User' : m.type === 'assistant' ? 'Assistant' : 'System';
14
- const roleLabel = role === 'User'
15
- ? `User#${userIndex++}`
16
- : role === 'Assistant'
17
- ? `Assistant#${assistantIndex++}`
18
- : 'System';
19
- // Parse JSON-encoded tool_calls
20
- let toolCalls = [];
21
- try {
22
- toolCalls = m.tool_calls ? JSON.parse(m.tool_calls) : [];
23
- }
24
- catch {
25
- toolCalls = [];
26
- }
27
- // Parse JSON-encoded tool_results
28
- let toolResults = [];
29
- try {
30
- toolResults = m.tool_results ? JSON.parse(m.tool_results) : [];
31
- }
32
- catch {
33
- toolResults = [];
34
- }
35
- const toolInfo = toolCalls.length > 0
36
- ? `\n[Tools used: ${toolCalls.map(t => t.name || 'unknown').join(', ')}]`
37
- : '';
38
- // Include thinking content — capped at 1000 chars to stay within token budget
39
- const thinkingInfo = m.thinking
40
- ? `\n[Thinking: ${m.thinking.slice(0, 1000)}]`
41
- : '';
42
- // Include tool results for context — 500 chars per result (error messages need ~300-400 chars)
43
- const resultInfo = toolResults.length > 0
44
- ? `\n[Tool results: ${toolResults.map(r => (r.output || '').slice(0, 500)).join(' | ')}]`
45
- : '';
46
- return `### ${roleLabel}:\n${m.content}${thinkingInfo}${toolInfo}${resultInfo}`;
47
- })
48
- .join('\n\n');
16
+ export const SHARED_ANALYST_SYSTEM_PROMPT = `You are a senior staff engineer analyzing an AI coding session. You will receive the conversation transcript followed by specific extraction instructions. Respond with valid JSON only, wrapped in <json>...</json> tags.`;
17
+ // =============================================================================
18
+ // CACHEABLE CONVERSATION BLOCK
19
+ // Wraps the formatted conversation in an Anthropic ephemeral cache block.
20
+ // CRITICAL: Must contain ONLY the formatted messages — no project name, no session
21
+ // metadata, no per-session variables. This ensures cache hits across sessions.
22
+ // =============================================================================
23
+ /**
24
+ * Wrap formatted conversation messages in a cacheable content block.
25
+ * The cache_control field instructs Anthropic to cache everything up to
26
+ * and including this block (ephemeral, 5-minute TTL).
27
+ *
28
+ * Non-Anthropic providers receive this as a ContentBlock[] and use
29
+ * flattenContent() to convert it to a plain string.
30
+ *
31
+ * @param formattedMessages - Output of formatMessagesForAnalysis()
32
+ */
33
+ export function buildCacheableConversationBlock(formattedMessages) {
34
+ return {
35
+ type: 'text',
36
+ // Trailing double newline ensures the instruction block (user[1]) reads as a
37
+ // distinct section when providers flatten content blocks to a single string.
38
+ text: `--- CONVERSATION ---\n${formattedMessages}\n--- END CONVERSATION ---\n\n`,
39
+ cache_control: { type: 'ephemeral' },
40
+ };
49
41
  }
42
+ // =============================================================================
43
+ // SESSION ANALYSIS INSTRUCTIONS
44
+ // The instruction suffix for session analysis calls (user[1]).
45
+ // Contains the full analyst persona, schema, and quality guidance.
46
+ // Per-session variables (project name, summary, meta) go here — NOT in the
47
+ // cached conversation block.
48
+ // =============================================================================
50
49
  /**
51
- * System prompt for session analysis.
50
+ * Build the instruction suffix for session analysis.
51
+ * Used as the second content block in the user message, after the cached conversation.
52
52
  */
53
- export const SESSION_ANALYSIS_SYSTEM_PROMPT = `You are a senior staff engineer writing entries for a team's engineering knowledge base. You've just observed an AI-assisted coding session and your job is to extract the insights that would save another engineer time if they encountered a similar situation 6 months from now.
53
+ export function buildSessionAnalysisInstructions(projectName, sessionSummary, meta) {
54
+ return `You are a senior staff engineer writing entries for a team's engineering knowledge base. You've just observed an AI-assisted coding session and your job is to extract the insights that would save another engineer time if they encountered a similar situation 6 months from now.
54
55
 
55
56
  Your audience is a developer who has never seen this session but works on the same codebase. They need enough context to understand WHY a decision was made, WHAT specific gotcha was discovered, and WHEN this knowledge applies.
56
57
 
58
+ Project: ${projectName}
59
+ ${sessionSummary ? `Session Summary: ${sessionSummary}\n` : ''}${formatSessionMetaLine(meta)}
60
+ === PART 1: SESSION FACETS ===
61
+ Extract these FIRST as a holistic session assessment:
62
+
63
+ 1. outcome_satisfaction: Rate the session outcome.
64
+ - "high": Task completed successfully, user satisfied
65
+ - "medium": Partial completion or minor issues
66
+ - "low": Significant problems, user frustrated
67
+ - "abandoned": Session ended without achieving the goal
68
+
69
+ 2. workflow_pattern: Identify the dominant workflow pattern (or null if unclear).
70
+ Recommended values: "plan-then-implement", "iterative-refinement", "debug-fix-verify", "explore-then-build", "direct-execution"
71
+
72
+ 3. friction_points: Identify up to 5 moments where progress was blocked or slowed (array, max 5).
73
+ Each friction point has:
74
+ - _reasoning: (REQUIRED) Your reasoning chain for category + attribution. 2-3 sentences max. Walk through the decision tree steps. This field is saved but not shown to users — use it to think before classifying.
75
+ - category: Use one of these PREFERRED categories when applicable: ${CANONICAL_FRICTION_CATEGORIES.join(', ')}. Create a new kebab-case category only when none of these fit.
76
+ - attribution: "user-actionable" (better user input would have prevented this), "ai-capability" (AI failed despite adequate input), or "environmental" (external constraint)
77
+ - description: One neutral sentence describing what happened, with specific details (file names, APIs, errors)
78
+ - severity: "high" (blocked progress for multiple turns), "medium" (caused a detour), "low" (minor hiccup)
79
+ - resolution: "resolved" (fixed in session), "workaround" (bypassed), "unresolved" (still broken)
80
+ ${FRICTION_CLASSIFICATION_GUIDANCE}
81
+
82
+ 4. effective_patterns: Up to 3 techniques or approaches that worked particularly well (array, max 3).
83
+ Each has:
84
+ - _reasoning: (REQUIRED) Your reasoning chain for category + driver. 2-3 sentences max. Walk through the decision tree steps and baseline exclusion check. This field is saved but not shown to users — use it to think before classifying.
85
+ - category: Use one of these PREFERRED categories when applicable: structured-planning, incremental-implementation, verification-workflow, systematic-debugging, self-correction, context-gathering, domain-expertise, effective-tooling. Create a new kebab-case category only when none fit.
86
+ - description: Specific technique worth repeating (1-2 sentences with concrete detail)
87
+ - confidence: 0-100 how confident you are this is genuinely effective
88
+ - driver: Who drove this pattern — "user-driven" (user explicitly requested it), "ai-driven" (AI exhibited it without prompting), or "collaborative" (both contributed or emerged from interaction)
89
+ ${EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE}
90
+
91
+ 5. had_course_correction: true if the user redirected the AI from a wrong approach, false otherwise
92
+ 6. course_correction_reason: If had_course_correction is true, briefly explain what was corrected (or null)
93
+ 7. iteration_count: Number of times the user had to clarify, correct, or re-explain something
94
+
95
+ If the session has minimal friction and straightforward execution, use empty arrays for friction_points, set outcome_satisfaction to "high", and iteration_count to 0.
96
+
97
+ === PART 2: INSIGHTS ===
98
+ Then extract these:
99
+
57
100
  You will extract:
58
101
  1. **Summary**: A narrative of what was accomplished and the outcome
59
102
  2. **Decisions**: Technical choices made — with full situation context, reasoning, rejected alternatives, trade-offs, and conditions for revisiting (max 3)
60
103
  3. **Learnings**: Technical discoveries, gotchas, debugging breakthroughs — with the observable symptom, root cause, and a transferable takeaway (max 5)
61
- 4. **Session Character**: Classify the session into exactly one of these types based on its overall nature:
62
- - deep_focus: Long, concentrated work on a specific problem or area (50+ messages, deep into one topic)
63
- - bug_hunt: Debugging-driven — investigating errors, tracing issues, fixing bugs
64
- - feature_build: Building new functionality — creating files, adding endpoints, wiring components
65
- - exploration: Research-oriented — reading code, searching, understanding before acting
66
- - refactor: Restructuring existing code — renaming, moving, reorganizing without new features
67
- - learning: Knowledge-seeking — asking questions, understanding concepts, getting explanations
68
- - quick_task: Short and focused — small fix, config change, or one-off task (<10 messages)
69
104
 
70
105
  Quality Standards:
71
106
  - Only include insights you would write in a team knowledge base for future reference
@@ -89,24 +124,349 @@ DO NOT include insights like these (too generic/trivial):
89
124
  - "Fixed a bug in the code" (what bug? what was the root cause?)
90
125
  - Anything that restates the task without adding transferable knowledge
91
126
 
92
- Here are examples of EXCELLENT insights — this is the quality bar:
127
+ Here is an example of an EXCELLENT insight — this is the quality bar:
128
+
129
+ EXCELLENT learning:
130
+ {
131
+ "title": "Tailwind v4 requires @theme inline{} for CSS variable utilities",
132
+ "symptom": "After Tailwind v3→v4 upgrade, custom utilities like bg-primary stopped working. Classes present in HTML but no styles applied.",
133
+ "root_cause": "Tailwind v4 removed tailwind.config.js theme extension. CSS variables in :root are not automatically available as utilities — must be registered via @theme inline {} in the CSS file.",
134
+ "takeaway": "When migrating Tailwind v3→v4 with shadcn/ui: add @theme inline {} mapping CSS variables, add @custom-variant dark for class-based dark mode, replace tailwindcss-animate with tw-animate-css.",
135
+ "applies_when": "Any Tailwind v3→v4 migration using CSS variables for theming, especially with shadcn/ui.",
136
+ "confidence": 95,
137
+ "evidence": ["User#12: 'The colors are all gone after the upgrade'", "Assistant#13: 'Tailwind v4 requires explicit @theme inline registration...'"]
138
+ }
93
139
 
94
- EXCELLENT decision:
140
+ Extract insights in this JSON format:
95
141
  {
96
- "title": "Use better-sqlite3 instead of sql.js for local database",
97
- "situation": "Needed a SQLite driver for a Node.js CLI that stores session data locally. Single-user, read-heavy from dashboard, occasional writes during sync.",
98
- "choice": "better-sqlite3 synchronous C++ binding with native SQLite access, no async overhead.",
99
- "reasoning": "CLI runs locally with no concurrent users. Synchronous API eliminates callback complexity. WAL mode provides concurrent read access for the dashboard while CLI writes.",
100
- "alternatives": [
101
- {"option": "sql.js (WASM build)", "rejected_because": "3x slower for bulk inserts, entire DB in memory, no WAL support"},
102
- {"option": "PostgreSQL via Docker", "rejected_because": "Violates local-first constraint — requires running a server process"}
142
+ "facets": {
143
+ "outcome_satisfaction": "high | medium | low | abandoned",
144
+ "workflow_pattern": "plan-then-implement | iterative-refinement | debug-fix-verify | explore-then-build | direct-execution | null",
145
+ "had_course_correction": false,
146
+ "course_correction_reason": null,
147
+ "iteration_count": 0,
148
+ "friction_points": [
149
+ {
150
+ "_reasoning": "User said 'fix the auth' without specifying OAuth vs session-based or which file. Step 1: not external — this is about the prompt, not infrastructure. Step 2: user could have specified which auth flow → user-actionable. Category: incomplete-requirements fits better than vague-request because specific constraints (which flow, which file) were missing, not the overall task description.",
151
+ "category": "incomplete-requirements",
152
+ "attribution": "user-actionable",
153
+ "description": "Missing specification of which auth flow (OAuth vs session) caused implementation of wrong provider in auth.ts",
154
+ "severity": "medium",
155
+ "resolution": "resolved"
156
+ },
157
+ {
158
+ "_reasoning": "AI applied Express middleware pattern to a Hono route despite conversation showing Hono imports. Step 1: not external. Step 2: user provided clear Hono context in prior messages. Step 3: AI failed despite adequate input → ai-capability. Category: knowledge-gap — incorrect framework API knowledge was applied.",
159
+ "category": "knowledge-gap",
160
+ "attribution": "ai-capability",
161
+ "description": "Express-style middleware pattern applied to Hono route despite Hono imports visible in conversation context",
162
+ "severity": "high",
163
+ "resolution": "resolved"
164
+ }
165
+ ],
166
+ "effective_patterns": [
167
+ {
168
+ "_reasoning": "Before editing, AI read 8 files across server/src/routes/ and server/src/llm/ to understand the data flow. Baseline check: 8 files across 2 directories = beyond routine (<5 file) reads. Step 1: no CLAUDE.md rule requiring this. Step 2: user didn't ask for investigation. Step 3: AI explored autonomously → ai-driven. Category: context-gathering (active investigation, not pre-existing knowledge).",
169
+ "category": "context-gathering",
170
+ "description": "Read 8 files across routes/ and llm/ directories to map the data flow before modifying the aggregation query, preventing a type mismatch that would have required rework",
171
+ "confidence": 88,
172
+ "driver": "ai-driven"
173
+ }
174
+ ]
175
+ },
176
+ "summary": {
177
+ "title": "Brief title describing main accomplishment (max 80 chars)",
178
+ "content": "2-4 sentence narrative: what was the goal, what was done, what was the outcome. Mention the primary file or component changed.",
179
+ "outcome": "success | partial | abandoned | blocked",
180
+ "bullets": ["Each bullet names a specific artifact (file, function, endpoint) and what changed"]
181
+ },
182
+ "decisions": [
183
+ {
184
+ "title": "The specific technical choice made (max 80 chars)",
185
+ "situation": "What problem or requirement led to this decision point",
186
+ "choice": "What was chosen and how it was implemented",
187
+ "reasoning": "Why this choice was made — the key factors that tipped the decision",
188
+ "alternatives": [
189
+ {"option": "Name of alternative", "rejected_because": "Why it was not chosen"}
190
+ ],
191
+ "trade_offs": "What downsides were accepted, what was given up",
192
+ "revisit_when": "Under what conditions this decision should be reconsidered (or 'N/A' if permanent)",
193
+ "confidence": 85,
194
+ "evidence": ["User#4: quoted text...", "Assistant#5: quoted text..."]
195
+ }
103
196
  ],
104
- "trade_offs": "Requires native compilation (node-gyp) which can fail on some systems. No browser compatibility.",
105
- "revisit_when": "If multi-device sync is added or users report node-gyp build failures.",
106
- "confidence": 92,
107
- "evidence": ["User#3: 'We need something that works without a server'", "Assistant#4: 'better-sqlite3 with WAL mode gives concurrent reads...'"]
197
+ "learnings": [
198
+ {
199
+ "title": "Specific technical discovery or gotcha (max 80 chars)",
200
+ "symptom": "What went wrong or was confusing the observable behavior that triggered investigation",
201
+ "root_cause": "The underlying technical reason — why it happened",
202
+ "takeaway": "The transferable lesson — what to do or avoid in similar situations, useful outside this project",
203
+ "applies_when": "Conditions under which this knowledge is relevant (framework version, configuration, etc.)",
204
+ "confidence": 80,
205
+ "evidence": ["User#7: quoted text...", "Assistant#8: quoted text..."]
206
+ }
207
+ ]
208
+ }
209
+
210
+ Only include insights rated 70+ confidence. If you cannot cite evidence, drop the insight. Return empty arrays for categories with no strong insights. Max 3 decisions, 5 learnings.
211
+ Evidence should reference the labeled turns in the conversation (e.g., "User#2", "Assistant#5").
212
+
213
+ Respond with valid JSON only, wrapped in <json>...</json> tags. Do not include any other text.`;
214
+ }
215
+ // =============================================================================
216
+ // PROMPT QUALITY INSTRUCTIONS
217
+ // The instruction suffix for prompt quality analysis calls (user[1]).
218
+ // =============================================================================
219
+ /**
220
+ * Build the instruction suffix for prompt quality analysis.
221
+ * Used as the second content block in the user message, after the cached conversation.
222
+ */
223
+ export function buildPromptQualityInstructions(projectName, sessionMeta, meta) {
224
+ return `You are a prompt engineering coach helping developers communicate more effectively with AI coding assistants. You review conversations and identify specific moments where better prompting would have saved time — AND moments where the user prompted particularly well.
225
+
226
+ You will produce:
227
+ 1. **Takeaways**: Concrete before/after examples the user can learn from (max 4)
228
+ 2. **Findings**: Categorized findings for cross-session aggregation (max 8)
229
+ 3. **Dimension scores**: 5 numeric dimensions for progress tracking
230
+ 4. **Efficiency score**: 0-100 overall rating
231
+ 5. **Assessment**: 2-3 sentence summary
232
+
233
+ Project: ${projectName}
234
+ Session shape: ${sessionMeta.humanMessageCount} user messages, ${sessionMeta.assistantMessageCount} assistant messages, ${sessionMeta.toolExchangeCount} tool exchanges
235
+ ${formatSessionMetaLine(meta)}
236
+ Before evaluating, mentally walk through the conversation and identify:
237
+ 1. Each time the assistant asked for clarification that could have been avoided
238
+ 2. Each time the user corrected the assistant's interpretation
239
+ 3. Each time the user repeated an instruction they gave earlier
240
+ 4. Whether critical context or requirements were provided late
241
+ 5. Whether the user discussed the plan/approach before implementation
242
+ 6. Moments where the user's prompt was notably well-crafted
243
+ 7. If context compactions occurred, note that the AI may have lost context — repeated instructions IMMEDIATELY after a compaction are NOT a user prompting deficit
244
+ These are your candidate findings. Only include them if they are genuinely actionable.
245
+
246
+ ${PROMPT_QUALITY_CLASSIFICATION_GUIDANCE}
247
+
248
+ Guidelines:
249
+ - Focus on USER messages only — don't critique the assistant's responses
250
+ - Be constructive, not judgmental — the goal is to help users improve
251
+ - A score of 100 means every user message was perfectly clear and complete
252
+ - A score of 50 means about half the messages could have been more efficient
253
+ - Include BOTH deficits and strengths — what went right matters as much as what went wrong
254
+ - If the user prompted well, say so — don't manufacture issues
255
+ - If the session had context compactions, do NOT penalize the user for repeating instructions immediately after a compaction — the AI lost context, not the user. Repetition unrelated to compaction events should still be flagged.
256
+
257
+ Length Guidance:
258
+ - Max 4 takeaways (ordered: improve first, then reinforce), max 8 findings
259
+ - better_prompt must be a complete, usable prompt — not vague meta-advice
260
+ - assessment: 2-3 sentences
261
+ - Total response: stay under 2500 tokens
262
+
263
+ Evaluate the user's prompting quality and respond with this JSON format:
264
+ {
265
+ "efficiency_score": 75,
266
+ "message_overhead": 3,
267
+ "assessment": "2-3 sentence summary of prompting style and efficiency",
268
+ "takeaways": [
269
+ {
270
+ "type": "improve",
271
+ "category": "late-constraint",
272
+ "label": "Short human-readable heading",
273
+ "message_ref": "User#5",
274
+ "original": "The user's original message (abbreviated)",
275
+ "better_prompt": "A concrete rewrite with the missing context included",
276
+ "why": "One sentence: why the original caused friction"
277
+ },
278
+ {
279
+ "type": "reinforce",
280
+ "category": "precise-request",
281
+ "label": "Short human-readable heading",
282
+ "message_ref": "User#0",
283
+ "what_worked": "What the user did well",
284
+ "why_effective": "Why it led to a good outcome"
285
+ }
286
+ ],
287
+ "findings": [
288
+ {
289
+ "category": "late-constraint",
290
+ "type": "deficit",
291
+ "description": "One neutral sentence with specific details",
292
+ "message_ref": "User#5",
293
+ "impact": "high",
294
+ "confidence": 90,
295
+ "suggested_improvement": "Concrete rewrite or behavioral change"
296
+ },
297
+ {
298
+ "category": "precise-request",
299
+ "type": "strength",
300
+ "description": "One sentence describing what the user did well",
301
+ "message_ref": "User#0",
302
+ "impact": "medium",
303
+ "confidence": 85
304
+ }
305
+ ],
306
+ "dimension_scores": {
307
+ "context_provision": 70,
308
+ "request_specificity": 65,
309
+ "scope_management": 80,
310
+ "information_timing": 55,
311
+ "correction_quality": 75
312
+ }
108
313
  }
109
314
 
315
+ Category values — use these PREFERRED categories:
316
+ Deficits: ${CANONICAL_PQ_DEFICIT_CATEGORIES.join(', ')}
317
+ Strengths: ${CANONICAL_PQ_STRENGTH_CATEGORIES.join(', ')}
318
+ Create a new kebab-case category only when none of these fit.
319
+
320
+ Rules:
321
+ - message_ref uses the labeled turns in the conversation (e.g., "User#0", "User#5")
322
+ - Only include genuinely notable findings, not normal back-and-forth
323
+ - Takeaways are the user-facing highlights — max 4, ordered: improve first, then reinforce
324
+ - Findings are the full categorized set for aggregation — max 8
325
+ - If the user prompted well, include strength findings and reinforce takeaways — don't manufacture issues
326
+ - message_overhead is how many fewer messages the session could have taken with better prompts
327
+ - dimension_scores: each 0-100. Score correction_quality as 75 if no corrections were needed.
328
+
329
+ Respond with valid JSON only, wrapped in <json>...</json> tags. Do not include any other text.`;
330
+ }
331
+ // =============================================================================
332
+ // FACET-ONLY INSTRUCTIONS
333
+ // The instruction suffix for facet-only extraction calls (user[1]).
334
+ // =============================================================================
335
+ /**
336
+ * Build the instruction suffix for facet-only extraction (backfill path).
337
+ * Used as the second content block in the user message, after the cached conversation.
338
+ */
339
+ export function buildFacetOnlyInstructions(projectName, sessionSummary, meta) {
340
+ return `You are assessing an AI coding session to extract structured metadata for cross-session pattern analysis.
341
+
342
+ Project: ${projectName}
343
+ ${sessionSummary ? `Session Summary: ${sessionSummary}\n` : ''}${formatSessionMetaLine(meta)}
344
+ Extract session facets — a holistic assessment of how the session went:
345
+
346
+ 1. outcome_satisfaction: "high" (completed successfully), "medium" (partial), "low" (problems), "abandoned" (gave up)
347
+ 2. workflow_pattern: The dominant pattern, or null. Values: "plan-then-implement", "iterative-refinement", "debug-fix-verify", "explore-then-build", "direct-execution"
348
+ 3. friction_points: Up to 5 moments where progress stalled (array).
349
+ Each: { _reasoning (3-step attribution decision tree reasoning), category (kebab-case, prefer: ${CANONICAL_FRICTION_CATEGORIES.join(', ')}), attribution ("user-actionable"|"ai-capability"|"environmental"), description (one neutral sentence with specific details), severity ("high"|"medium"|"low"), resolution ("resolved"|"workaround"|"unresolved") }
350
+ ${FRICTION_CLASSIFICATION_GUIDANCE}
351
+ 4. effective_patterns: Up to 3 things that worked well (array).
352
+ Each: { _reasoning (driver decision tree reasoning — check user infrastructure first), category (kebab-case, prefer: ${CANONICAL_PATTERN_CATEGORIES.join(', ')}), description (specific technique, 1-2 sentences), confidence (0-100), driver ("user-driven"|"ai-driven"|"collaborative") }
353
+ ${EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE}
354
+ 5. had_course_correction: true/false — did the user redirect the AI?
355
+ 6. course_correction_reason: Brief explanation if true, null otherwise
356
+ 7. iteration_count: How many user clarification/correction cycles occurred
357
+
358
+ Extract facets in this JSON format:
359
+ {
360
+ "outcome_satisfaction": "high | medium | low | abandoned",
361
+ "workflow_pattern": "string or null",
362
+ "had_course_correction": false,
363
+ "course_correction_reason": null,
364
+ "iteration_count": 0,
365
+ "friction_points": [
366
+ {
367
+ "_reasoning": "Reasoning for category + attribution classification",
368
+ "category": "kebab-case-category",
369
+ "attribution": "user-actionable | ai-capability | environmental",
370
+ "description": "One neutral sentence about the gap, with specific details",
371
+ "severity": "high | medium | low",
372
+ "resolution": "resolved | workaround | unresolved"
373
+ }
374
+ ],
375
+ "effective_patterns": [
376
+ {
377
+ "_reasoning": "Reasoning for category + driver classification, including baseline check",
378
+ "category": "kebab-case-category",
379
+ "description": "technique",
380
+ "confidence": 85,
381
+ "driver": "user-driven | ai-driven | collaborative"
382
+ }
383
+ ]
384
+ }
385
+
386
+ Respond with valid JSON only, wrapped in <json>...</json> tags.`;
387
+ }
388
+ // =============================================================================
389
+ // LEGACY EXPORTS (deprecated — kept for backward compatibility)
390
+ // Callers in analysis.ts, prompt-quality-analysis.ts, and facet-extraction.ts
391
+ // have been updated to use the new builder functions above.
392
+ // These remain to avoid breaking any external code that imports them directly.
393
+ // =============================================================================
394
+ /**
395
+ * @deprecated Use SHARED_ANALYST_SYSTEM_PROMPT + buildSessionAnalysisInstructions() instead.
396
+ */
397
+ export const SESSION_ANALYSIS_SYSTEM_PROMPT = `You are a senior staff engineer writing entries for a team's engineering knowledge base. You've just observed an AI-assisted coding session and your job is to extract the insights that would save another engineer time if they encountered a similar situation 6 months from now.
398
+
399
+ Your audience is a developer who has never seen this session but works on the same codebase. They need enough context to understand WHY a decision was made, WHAT specific gotcha was discovered, and WHEN this knowledge applies.
400
+
401
+ === PART 1: SESSION FACETS ===
402
+ Extract these FIRST as a holistic session assessment:
403
+
404
+ 1. outcome_satisfaction: Rate the session outcome.
405
+ - "high": Task completed successfully, user satisfied
406
+ - "medium": Partial completion or minor issues
407
+ - "low": Significant problems, user frustrated
408
+ - "abandoned": Session ended without achieving the goal
409
+
410
+ 2. workflow_pattern: Identify the dominant workflow pattern (or null if unclear).
411
+ Recommended values: "plan-then-implement", "iterative-refinement", "debug-fix-verify", "explore-then-build", "direct-execution"
412
+
413
+ 3. friction_points: Identify up to 5 moments where progress was blocked or slowed (array, max 5).
414
+ Each friction point has:
415
+ - _reasoning: (REQUIRED) Your reasoning chain for category + attribution. 2-3 sentences max. Walk through the decision tree steps. This field is saved but not shown to users — use it to think before classifying.
416
+ - category: Use one of these PREFERRED categories when applicable: ${CANONICAL_FRICTION_CATEGORIES.join(', ')}. Create a new kebab-case category only when none of these fit.
417
+ - attribution: "user-actionable" (better user input would have prevented this), "ai-capability" (AI failed despite adequate input), or "environmental" (external constraint)
418
+ - description: One neutral sentence describing what happened, with specific details (file names, APIs, errors)
419
+ - severity: "high" (blocked progress for multiple turns), "medium" (caused a detour), "low" (minor hiccup)
420
+ - resolution: "resolved" (fixed in session), "workaround" (bypassed), "unresolved" (still broken)
421
+ ${FRICTION_CLASSIFICATION_GUIDANCE}
422
+
423
+ 4. effective_patterns: Up to 3 techniques or approaches that worked particularly well (array, max 3).
424
+ Each has:
425
+ - _reasoning: (REQUIRED) Your reasoning chain for category + driver. 2-3 sentences max. Walk through the decision tree steps and baseline exclusion check. This field is saved but not shown to users — use it to think before classifying.
426
+ - category: Use one of these PREFERRED categories when applicable: structured-planning, incremental-implementation, verification-workflow, systematic-debugging, self-correction, context-gathering, domain-expertise, effective-tooling. Create a new kebab-case category only when none fit.
427
+ - description: Specific technique worth repeating (1-2 sentences with concrete detail)
428
+ - confidence: 0-100 how confident you are this is genuinely effective
429
+ - driver: Who drove this pattern — "user-driven" (user explicitly requested it), "ai-driven" (AI exhibited it without prompting), or "collaborative" (both contributed or emerged from interaction)
430
+ ${EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE}
431
+
432
+ 5. had_course_correction: true if the user redirected the AI from a wrong approach, false otherwise
433
+ 6. course_correction_reason: If had_course_correction is true, briefly explain what was corrected (or null)
434
+ 7. iteration_count: Number of times the user had to clarify, correct, or re-explain something
435
+
436
+ If the session has minimal friction and straightforward execution, use empty arrays for friction_points, set outcome_satisfaction to "high", and iteration_count to 0.
437
+
438
+ === PART 2: INSIGHTS ===
439
+ Then extract these:
440
+
441
+ You will extract:
442
+ 1. **Summary**: A narrative of what was accomplished and the outcome
443
+ 2. **Decisions**: Technical choices made — with full situation context, reasoning, rejected alternatives, trade-offs, and conditions for revisiting (max 3)
444
+ 3. **Learnings**: Technical discoveries, gotchas, debugging breakthroughs — with the observable symptom, root cause, and a transferable takeaway (max 5)
445
+
446
+ Quality Standards:
447
+ - Only include insights you would write in a team knowledge base for future reference
448
+ - Each insight MUST reference concrete details: specific file names, library names, error messages, API endpoints, or code patterns
449
+ - Do not invent file names, APIs, errors, or details not present in the conversation
450
+ - Rate your confidence in each insight's value (0-100). Only include insights you rate 70+.
451
+ - It is better to return 0 insights in a category than to include generic or trivial ones
452
+ - If a session is straightforward with no notable decisions or learnings, say so in the summary and leave other categories empty
453
+
454
+ Length Guidance:
455
+ - Fill every field in the schema. An empty "trade_offs" or "revisit_when" is worse than a longer response.
456
+ - Total response: stay under 2000 tokens. If you must cut, drop lower-confidence insights rather than compressing high-confidence ones.
457
+ - Evidence: 1-3 short quotes per insight, referencing turn labels.
458
+ - Prefer precision over brevity — a specific 3-sentence insight beats a vague 1-sentence insight.
459
+
460
+ DO NOT include insights like these (too generic/trivial):
461
+ - "Used debugging techniques to fix an issue"
462
+ - "Made architectural decisions about the codebase"
463
+ - "Implemented a new feature" (the summary already covers this)
464
+ - "Used React hooks for state management" (too generic without specifics)
465
+ - "Fixed a bug in the code" (what bug? what was the root cause?)
466
+ - Anything that restates the task without adding transferable knowledge
467
+
468
+ Here is an example of an EXCELLENT insight — this is the quality bar:
469
+
110
470
  EXCELLENT learning:
111
471
  {
112
472
  "title": "Tailwind v4 requires @theme inline{} for CSS variable utilities",
@@ -120,20 +480,53 @@ EXCELLENT learning:
120
480
 
121
481
  Respond with valid JSON only, wrapped in <json>...</json> tags. Do not include any other text.`;
122
482
  /**
123
- * Generate the user prompt for session analysis.
483
+ * @deprecated Use buildCacheableConversationBlock() + buildSessionAnalysisInstructions() instead.
124
484
  */
125
- export function generateSessionAnalysisPrompt(projectName, sessionSummary, formattedMessages) {
485
+ export function generateSessionAnalysisPrompt(projectName, sessionSummary, formattedMessages, meta) {
126
486
  return `Analyze this AI coding session and extract insights.
127
487
 
128
488
  Project: ${projectName}
129
- ${sessionSummary ? `Session Summary: ${sessionSummary}\n` : ''}
489
+ ${sessionSummary ? `Session Summary: ${sessionSummary}\n` : ''}${formatSessionMetaLine(meta)}
130
490
  --- CONVERSATION ---
131
491
  ${formattedMessages}
132
492
  --- END CONVERSATION ---
133
493
 
134
494
  Extract insights in this JSON format:
135
495
  {
136
- "session_character": "deep_focus | bug_hunt | feature_build | exploration | refactor | learning | quick_task",
496
+ "facets": {
497
+ "outcome_satisfaction": "high | medium | low | abandoned",
498
+ "workflow_pattern": "plan-then-implement | iterative-refinement | debug-fix-verify | explore-then-build | direct-execution | null",
499
+ "had_course_correction": false,
500
+ "course_correction_reason": null,
501
+ "iteration_count": 0,
502
+ "friction_points": [
503
+ {
504
+ "_reasoning": "User said 'fix the auth' without specifying OAuth vs session-based or which file. Step 1: not external — this is about the prompt, not infrastructure. Step 2: user could have specified which auth flow → user-actionable. Category: incomplete-requirements fits better than vague-request because specific constraints (which flow, which file) were missing, not the overall task description.",
505
+ "category": "incomplete-requirements",
506
+ "attribution": "user-actionable",
507
+ "description": "Missing specification of which auth flow (OAuth vs session) caused implementation of wrong provider in auth.ts",
508
+ "severity": "medium",
509
+ "resolution": "resolved"
510
+ },
511
+ {
512
+ "_reasoning": "AI applied Express middleware pattern to a Hono route despite conversation showing Hono imports. Step 1: not external. Step 2: user provided clear Hono context in prior messages. Step 3: AI failed despite adequate input → ai-capability. Category: knowledge-gap — incorrect framework API knowledge was applied.",
513
+ "category": "knowledge-gap",
514
+ "attribution": "ai-capability",
515
+ "description": "Express-style middleware pattern applied to Hono route despite Hono imports visible in conversation context",
516
+ "severity": "high",
517
+ "resolution": "resolved"
518
+ }
519
+ ],
520
+ "effective_patterns": [
521
+ {
522
+ "_reasoning": "Before editing, AI read 8 files across server/src/routes/ and server/src/llm/ to understand the data flow. Baseline check: 8 files across 2 directories = beyond routine (<5 file) reads. Step 1: no CLAUDE.md rule requiring this. Step 2: user didn't ask for investigation. Step 3: AI explored autonomously → ai-driven. Category: context-gathering (active investigation, not pre-existing knowledge).",
523
+ "category": "context-gathering",
524
+ "description": "Read 8 files across routes/ and llm/ directories to map the data flow before modifying the aggregation query, preventing a type mismatch that would have required rework",
525
+ "confidence": 88,
526
+ "driver": "ai-driven"
527
+ }
528
+ ]
529
+ },
137
530
  "summary": {
138
531
  "title": "Brief title describing main accomplishment (max 80 chars)",
139
532
  "content": "2-4 sentence narrative: what was the goal, what was done, what was the outcome. Mention the primary file or component changed.",
@@ -173,108 +566,119 @@ Evidence should reference the labeled turns in the conversation (e.g., "User#2",
173
566
 
174
567
  Respond with valid JSON only, wrapped in <json>...</json> tags. Do not include any other text.`;
175
568
  }
176
- const VALID_SESSION_CHARACTERS = new Set([
177
- 'deep_focus', 'bug_hunt', 'feature_build', 'exploration', 'refactor', 'learning', 'quick_task',
178
- ]);
179
- function buildResponsePreview(text, head = 200, tail = 200) {
180
- if (text.length <= head + tail + 20)
181
- return text;
182
- return `${text.slice(0, head)}\n...[${text.length - head - tail} chars omitted]...\n${text.slice(-tail)}`;
183
- }
184
- function extractJsonPayload(response) {
185
- const tagged = response.match(/<json>\s*([\s\S]*?)\s*<\/json>/i);
186
- if (tagged?.[1])
187
- return tagged[1].trim();
188
- const jsonMatch = response.match(/\{[\s\S]*\}/);
189
- return jsonMatch ? jsonMatch[0] : null;
190
- }
191
569
  /**
192
- * Parse the LLM response into structured insights.
570
+ * @deprecated Use SHARED_ANALYST_SYSTEM_PROMPT + buildFacetOnlyInstructions() instead.
193
571
  */
194
- export function parseAnalysisResponse(response) {
195
- const response_length = response.length;
196
- const preview = buildResponsePreview(response);
197
- const jsonPayload = extractJsonPayload(response);
198
- if (!jsonPayload) {
199
- console.error('No JSON found in analysis response');
200
- return {
201
- success: false,
202
- error: { error_type: 'no_json_found', error_message: 'No JSON found in analysis response', response_length, response_preview: preview },
203
- };
204
- }
205
- let parsed;
206
- try {
207
- parsed = JSON.parse(jsonPayload);
208
- }
209
- catch {
210
- // Attempt repair handles trailing commas, unclosed braces, truncated output
211
- try {
212
- parsed = JSON.parse(jsonrepair(jsonPayload));
213
- }
214
- catch (err) {
215
- const msg = err instanceof Error ? err.message : String(err);
216
- console.error('Failed to parse analysis response (after jsonrepair):', err);
217
- return {
218
- success: false,
219
- error: { error_type: 'json_parse_error', error_message: msg, response_length, response_preview: preview },
220
- };
221
- }
222
- }
223
- if (!parsed.summary || typeof parsed.summary.title !== 'string') {
224
- console.error('Invalid analysis response structure');
225
- return {
226
- success: false,
227
- error: { error_type: 'invalid_structure', error_message: 'Missing or invalid summary field', response_length, response_preview: preview },
228
- };
572
+ export const FACET_ONLY_SYSTEM_PROMPT = `You are assessing an AI coding session to extract structured metadata for cross-session pattern analysis. You will receive a session summary and the full conversation transcript.
573
+
574
+ Extract session facets — a holistic assessment of how the session went:
575
+
576
+ 1. outcome_satisfaction: "high" (completed successfully), "medium" (partial), "low" (problems), "abandoned" (gave up)
577
+ 2. workflow_pattern: The dominant pattern, or null. Values: "plan-then-implement", "iterative-refinement", "debug-fix-verify", "explore-then-build", "direct-execution"
578
+ 3. friction_points: Up to 5 moments where progress stalled (array).
579
+ Each: { _reasoning (3-step attribution decision tree reasoning), category (kebab-case, prefer: ${CANONICAL_FRICTION_CATEGORIES.join(', ')}), attribution ("user-actionable"|"ai-capability"|"environmental"), description (one neutral sentence with specific details), severity ("high"|"medium"|"low"), resolution ("resolved"|"workaround"|"unresolved") }
580
+ ${FRICTION_CLASSIFICATION_GUIDANCE}
581
+ 4. effective_patterns: Up to 3 things that worked well (array).
582
+ Each: { _reasoning (driver decision tree reasoning — check user infrastructure first), category (kebab-case, prefer: ${CANONICAL_PATTERN_CATEGORIES.join(', ')}), description (specific technique, 1-2 sentences), confidence (0-100), driver ("user-driven"|"ai-driven"|"collaborative") }
583
+ ${EFFECTIVE_PATTERN_CLASSIFICATION_GUIDANCE}
584
+ 5. had_course_correction: true/false — did the user redirect the AI?
585
+ 6. course_correction_reason: Brief explanation if true, null otherwise
586
+ 7. iteration_count: How many user clarification/correction cycles occurred
587
+
588
+ Respond with valid JSON only, wrapped in <json>...</json> tags.`;
589
+ /**
590
+ * @deprecated Use buildCacheableConversationBlock() + buildFacetOnlyInstructions() instead.
591
+ */
592
+ export function generateFacetOnlyPrompt(projectName, sessionSummary, conversationMessages, meta) {
593
+ return `Assess this AI coding session and extract facets.
594
+
595
+ Project: ${projectName}
596
+ ${sessionSummary ? `Session Summary: ${sessionSummary}\n` : ''}${formatSessionMetaLine(meta)}
597
+ --- CONVERSATION ---
598
+ ${conversationMessages}
599
+ --- END CONVERSATION ---
600
+
601
+ Extract facets in this JSON format:
602
+ {
603
+ "outcome_satisfaction": "high | medium | low | abandoned",
604
+ "workflow_pattern": "string or null",
605
+ "had_course_correction": false,
606
+ "course_correction_reason": null,
607
+ "iteration_count": 0,
608
+ "friction_points": [
609
+ {
610
+ "_reasoning": "Reasoning for category + attribution classification",
611
+ "category": "kebab-case-category",
612
+ "attribution": "user-actionable | ai-capability | environmental",
613
+ "description": "One neutral sentence about the gap, with specific details",
614
+ "severity": "high | medium | low",
615
+ "resolution": "resolved | workaround | unresolved"
229
616
  }
230
- parsed.decisions = parsed.decisions || [];
231
- parsed.learnings = parsed.learnings || [];
232
- // Validate session_character — drop if not a recognized value
233
- if (parsed.session_character && !VALID_SESSION_CHARACTERS.has(parsed.session_character)) {
234
- parsed.session_character = undefined;
617
+ ],
618
+ "effective_patterns": [
619
+ {
620
+ "_reasoning": "Reasoning for category + driver classification, including baseline check",
621
+ "category": "kebab-case-category",
622
+ "description": "technique",
623
+ "confidence": 85,
624
+ "driver": "user-driven | ai-driven | collaborative"
235
625
  }
236
- return { success: true, data: parsed };
626
+ ]
627
+ }
628
+
629
+ Respond with valid JSON only, wrapped in <json>...</json> tags.`;
237
630
  }
238
631
  // --- Prompt Quality Analysis ---
239
- export const PROMPT_QUALITY_SYSTEM_PROMPT = `You are a prompt engineering coach helping developers communicate more effectively with AI coding assistants. You review conversations and identify specific moments where better prompting would have saved time.
632
+ /**
633
+ * @deprecated Use SHARED_ANALYST_SYSTEM_PROMPT + buildPromptQualityInstructions() instead.
634
+ */
635
+ export const PROMPT_QUALITY_SYSTEM_PROMPT = `You are a prompt engineering coach helping developers communicate more effectively with AI coding assistants. You review conversations and identify specific moments where better prompting would have saved time — AND moments where the user prompted particularly well.
240
636
 
241
- You will identify:
242
- 1. **Wasted turns**: User messages that led to clarifications, corrections, or repeated instructions because the original prompt was unclear, missing context, or too vague.
243
- 2. **Anti-patterns**: Recurring bad habits in the user's prompting style, with specific fixes.
244
- 3. **Session traits**: Higher-level behavioral patterns about how the session was structured and managed.
245
- 4. **Efficiency score**: A 0-100 rating of how optimally the user communicated.
246
- 5. **Actionable tips**: Specific improvements the user can make.
637
+ You will produce:
638
+ 1. **Takeaways**: Concrete before/after examples the user can learn from (max 4)
639
+ 2. **Findings**: Categorized findings for cross-session aggregation (max 8)
640
+ 3. **Dimension scores**: 5 numeric dimensions for progress tracking
641
+ 4. **Efficiency score**: 0-100 overall rating
642
+ 5. **Assessment**: 2-3 sentence summary
247
643
 
248
644
  Before evaluating, mentally walk through the conversation and identify:
249
645
  1. Each time the assistant asked for clarification that could have been avoided
250
646
  2. Each time the user corrected the assistant's interpretation
251
647
  3. Each time the user repeated an instruction they gave earlier
252
- 4. Whether the session covers too many unrelated objectives (context drift / session bloat)
253
- 5. Whether the user provided critical context or requirements late that should have been mentioned upfront
254
- 6. Whether the user discussed the plan/approach before jumping into implementation, or dove straight into code
648
+ 4. Whether critical context or requirements were provided late
649
+ 5. Whether the user discussed the plan/approach before implementation
650
+ 6. Moments where the user's prompt was notably well-crafted
651
+ 7. If context compactions occurred, note that the AI may have lost context — repeated instructions IMMEDIATELY after a compaction are NOT a user prompting deficit
255
652
  These are your candidate findings. Only include them if they are genuinely actionable.
256
653
 
654
+ ${PROMPT_QUALITY_CLASSIFICATION_GUIDANCE}
655
+
257
656
  Guidelines:
258
657
  - Focus on USER messages only — don't critique the assistant's responses
259
- - A "wasted turn" is when the user had to send a follow-up message to clarify, correct, or repeat something that could have been included in the original prompt
260
- - Only mark a wasted turn if the assistant explicitly asked for clarification or corrected a misunderstanding
261
658
  - Be constructive, not judgmental — the goal is to help users improve
262
- - Consider the context: some clarification exchanges are normal and expected
263
659
  - A score of 100 means every user message was perfectly clear and complete
264
660
  - A score of 50 means about half the messages could have been more efficient
661
+ - Include BOTH deficits and strengths — what went right matters as much as what went wrong
662
+ - If the user prompted well, say so — don't manufacture issues
663
+ - If the session had context compactions, do NOT penalize the user for repeating instructions immediately after a compaction — the AI lost context, not the user. Repetition unrelated to compaction events should still be flagged.
265
664
 
266
665
  Length Guidance:
267
- - Max 5 wasted turns, max 3 anti-patterns, max 3 session traits, max 5 tips
268
- - suggestedRewrite must be a complete, usable prompt — not vague meta-advice
269
- - overallAssessment: 2-3 sentences
270
- - Total response: stay under 2000 tokens
666
+ - Max 4 takeaways (ordered: improve first, then reinforce), max 8 findings
667
+ - better_prompt must be a complete, usable prompt — not vague meta-advice
668
+ - assessment: 2-3 sentences
669
+ - Total response: stay under 2500 tokens
271
670
 
272
671
  Respond with valid JSON only, wrapped in <json>...</json> tags. Do not include any other text.`;
273
- export function generatePromptQualityPrompt(projectName, formattedMessages, messageCount) {
274
- return `Analyze the user's prompting efficiency in this AI coding session.
672
+ /**
673
+ * @deprecated Use buildCacheableConversationBlock() + buildPromptQualityInstructions() instead.
674
+ */
675
+ export function generatePromptQualityPrompt(projectName, formattedMessages, sessionMeta, meta // V6 metadata — compact counts + slash commands for context signals
676
+ ) {
677
+ return `Analyze the user's prompting quality in this AI coding session.
275
678
 
276
679
  Project: ${projectName}
277
- Total messages: ${messageCount}
680
+ Session shape: ${sessionMeta.humanMessageCount} user messages, ${sessionMeta.assistantMessageCount} assistant messages, ${sessionMeta.toolExchangeCount} tool exchanges
681
+ ${formatSessionMetaLine(meta)}
278
682
 
279
683
  --- CONVERSATION ---
280
684
  ${formattedMessages}
@@ -282,100 +686,70 @@ ${formattedMessages}
282
686
 
283
687
  Evaluate the user's prompting quality and respond with this JSON format:
284
688
  {
285
- "efficiencyScore": 75,
286
- "potentialMessageReduction": 3,
287
- "overallAssessment": "2-3 sentence summary of the user's prompting style and efficiency",
288
- "wastedTurns": [
689
+ "efficiency_score": 75,
690
+ "message_overhead": 3,
691
+ "assessment": "2-3 sentence summary of prompting style and efficiency",
692
+ "takeaways": [
289
693
  {
290
- "messageIndex": 5,
291
- "originalMessage": "The user's original message (abbreviated if long)",
292
- "whatWentWrong": "What information was missing or ambiguous that caused a follow-up",
293
- "suggestedRewrite": "A concrete rewrite that includes the missing context — must be a complete, usable prompt",
294
- "turnsWasted": 2
295
- }
296
- ],
297
- "antiPatterns": [
694
+ "type": "improve",
695
+ "category": "late-constraint",
696
+ "label": "Short human-readable heading",
697
+ "message_ref": "User#5",
698
+ "original": "The user's original message (abbreviated)",
699
+ "better_prompt": "A concrete rewrite with the missing context included",
700
+ "why": "One sentence: why the original caused friction"
701
+ },
298
702
  {
299
- "name": "Vague Instructions",
300
- "description": "Requests that lack specificity about what file, function, or behavior to change",
301
- "count": 3,
302
- "examples": ["User#2: 'fix it'", "User#5: 'make it work'"],
303
- "fix": "Include the file path, function name, and expected vs actual behavior in the initial request"
703
+ "type": "reinforce",
704
+ "category": "precise-request",
705
+ "label": "Short human-readable heading",
706
+ "message_ref": "User#0",
707
+ "what_worked": "What the user did well",
708
+ "why_effective": "Why it led to a good outcome"
304
709
  }
305
710
  ],
306
- "sessionTraits": [
711
+ "findings": [
307
712
  {
308
- "trait": "context_drift | objective_bloat | late_context | no_planning | good_structure",
309
- "severity": "high | medium | low",
310
- "description": "What was observed and why it matters",
311
- "evidence": "User#3 switched from auth to styling, then back to auth at User#12",
312
- "suggestion": "Break into separate sessions: one for auth, one for styling"
713
+ "category": "late-constraint",
714
+ "type": "deficit",
715
+ "description": "One neutral sentence with specific details",
716
+ "message_ref": "User#5",
717
+ "impact": "high",
718
+ "confidence": 90,
719
+ "suggested_improvement": "Concrete rewrite or behavioral change"
720
+ },
721
+ {
722
+ "category": "precise-request",
723
+ "type": "strength",
724
+ "description": "One sentence describing what the user did well",
725
+ "message_ref": "User#0",
726
+ "impact": "medium",
727
+ "confidence": 85
313
728
  }
314
729
  ],
315
- "tips": [
316
- "Always include file paths when asking to modify code",
317
- "Provide error messages verbatim when reporting bugs"
318
- ]
730
+ "dimension_scores": {
731
+ "context_provision": 70,
732
+ "request_specificity": 65,
733
+ "scope_management": 80,
734
+ "information_timing": 55,
735
+ "correction_quality": 75
736
+ }
319
737
  }
320
738
 
321
- Session trait definitions:
322
- - **context_drift**: Session covers too many unrelated objectives, causing the AI to lose context and produce lower quality output
323
- - **objective_bloat**: Too many different tasks crammed into one session instead of focused, single-purpose sessions
324
- - **late_context**: Critical requirements, constraints, or context provided late in the conversation that should have been mentioned upfront — causing rework or wasted turns
325
- - **no_planning**: User jumped straight into implementation without discussing approach, requirements, or plan — leading to course corrections mid-session
326
- - **good_structure**: Session was well-structured with clear objectives, upfront context, and logical flow (only include this if truly exemplary)
739
+ Category values — use these PREFERRED categories:
740
+ Deficits: ${CANONICAL_PQ_DEFICIT_CATEGORIES.join(', ')}
741
+ Strengths: ${CANONICAL_PQ_STRENGTH_CATEGORIES.join(', ')}
742
+ Create a new kebab-case category only when none of these fit.
327
743
 
328
744
  Rules:
329
- - messageIndex refers to the 0-based index of the USER message, as labeled in the conversation (e.g., User#0)
330
- - Only include genuinely wasted turns, not normal back-and-forth
331
- - Tips should be specific and actionable, not generic; include the relevant user message index in parentheses
332
- - If the user prompted well, say sodon't manufacture issues
333
- - potentialMessageReduction is how many fewer messages the session could have taken with better prompts
745
+ - message_ref uses the labeled turns in the conversation (e.g., "User#0", "User#5")
746
+ - Only include genuinely notable findings, not normal back-and-forth
747
+ - Takeaways are the user-facing highlights max 4, ordered: improve first, then reinforce
748
+ - Findings are the full categorized set for aggregationmax 8
749
+ - If the user prompted well, include strength findings and reinforce takeaways don't manufacture issues
750
+ - message_overhead is how many fewer messages the session could have taken with better prompts
751
+ - dimension_scores: each 0-100. Score correction_quality as 75 if no corrections were needed.
334
752
 
335
753
  Respond with valid JSON only, wrapped in <json>...</json> tags. Do not include any other text.`;
336
754
  }
337
- export function parsePromptQualityResponse(response) {
338
- const response_length = response.length;
339
- const preview = buildResponsePreview(response);
340
- const jsonPayload = extractJsonPayload(response);
341
- if (!jsonPayload) {
342
- console.error('No JSON found in prompt quality response');
343
- return {
344
- success: false,
345
- error: { error_type: 'no_json_found', error_message: 'No JSON found in prompt quality response', response_length, response_preview: preview },
346
- };
347
- }
348
- let parsed;
349
- try {
350
- parsed = JSON.parse(jsonPayload);
351
- }
352
- catch {
353
- try {
354
- parsed = JSON.parse(jsonrepair(jsonPayload));
355
- }
356
- catch (err) {
357
- const msg = err instanceof Error ? err.message : String(err);
358
- console.error('Failed to parse prompt quality response (after jsonrepair):', err);
359
- return {
360
- success: false,
361
- error: { error_type: 'json_parse_error', error_message: msg, response_length, response_preview: preview },
362
- };
363
- }
364
- }
365
- if (typeof parsed.efficiencyScore !== 'number') {
366
- console.error('Invalid prompt quality response: missing efficiencyScore');
367
- return {
368
- success: false,
369
- error: { error_type: 'invalid_structure', error_message: 'Missing or invalid efficiencyScore field', response_length, response_preview: preview },
370
- };
371
- }
372
- parsed.efficiencyScore = Math.max(0, Math.min(100, Math.round(parsed.efficiencyScore)));
373
- parsed.potentialMessageReduction = parsed.potentialMessageReduction || 0;
374
- parsed.overallAssessment = parsed.overallAssessment || '';
375
- parsed.wastedTurns = parsed.wastedTurns || [];
376
- parsed.antiPatterns = parsed.antiPatterns || [];
377
- parsed.sessionTraits = parsed.sessionTraits || [];
378
- parsed.tips = parsed.tips || [];
379
- return { success: true, data: parsed };
380
- }
381
755
  //# sourceMappingURL=prompts.js.map