promptup-plugin 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +78 -0
- package/bin/install.cjs +306 -0
- package/bin/promptup-plugin +8 -0
- package/dist/config.d.ts +40 -0
- package/dist/config.js +123 -0
- package/dist/db.d.ts +35 -0
- package/dist/db.js +327 -0
- package/dist/decision-detector.d.ts +11 -0
- package/dist/decision-detector.js +47 -0
- package/dist/evaluator.d.ts +10 -0
- package/dist/evaluator.js +844 -0
- package/dist/git-activity-extractor.d.ts +35 -0
- package/dist/git-activity-extractor.js +167 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.js +54 -0
- package/dist/pr-report-generator.d.ts +20 -0
- package/dist/pr-report-generator.js +421 -0
- package/dist/shared/decision-classifier.d.ts +60 -0
- package/dist/shared/decision-classifier.js +385 -0
- package/dist/shared/decision-score.d.ts +7 -0
- package/dist/shared/decision-score.js +31 -0
- package/dist/shared/dimensions.d.ts +43 -0
- package/dist/shared/dimensions.js +361 -0
- package/dist/shared/scoring.d.ts +89 -0
- package/dist/shared/scoring.js +161 -0
- package/dist/shared/types.d.ts +108 -0
- package/dist/shared/types.js +9 -0
- package/dist/tools.d.ts +30 -0
- package/dist/tools.js +456 -0
- package/dist/transcript-parser.d.ts +36 -0
- package/dist/transcript-parser.js +201 -0
- package/hooks/auto-eval.sh +44 -0
- package/hooks/check-update.sh +26 -0
- package/hooks/debug-hook.sh +3 -0
- package/hooks/hooks.json +36 -0
- package/hooks/render-eval.sh +137 -0
- package/package.json +60 -0
- package/skills/eval/SKILL.md +12 -0
- package/skills/pr-report/SKILL.md +37 -0
- package/skills/status/SKILL.md +28 -0
- package/statusline.sh +46 -0
|
@@ -0,0 +1,844 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluation engine for the standalone PromptUp plugin.
|
|
3
|
+
*
|
|
4
|
+
* Primary: spawns `claude -p` to get real LLM analysis of the session.
|
|
5
|
+
* Fallback: heuristic pattern matching if Claude Code is unavailable.
|
|
6
|
+
*
|
|
7
|
+
* STANDALONE copy — no imports from @promptup/shared or session-watcher.
|
|
8
|
+
*/
|
|
9
|
+
import { spawn } from 'node:child_process';
|
|
10
|
+
import { ulid } from 'ulid';
|
|
11
|
+
import { BASE_DIMENSIONS, BASE_DIMENSION_KEYS, DOMAIN_DIMENSIONS, DOMAIN_DIMENSION_KEYS, WEIGHT_PROFILES, } from './shared/dimensions.js';
|
|
12
|
+
import { computeCompositeScore, computeDomainComposite, computeTechComposite, computeOverallComposite, computeGrandComposite, computeRiskFlagsWithHistory, } from './shared/scoring.js';
|
|
13
|
+
import { getLatestEvaluation, insertEvaluation, insertDecision, } from './db.js';
|
|
14
|
+
/**
|
|
15
|
+
* Combined role + skill roadmaps catalog for tech detection.
|
|
16
|
+
* Mirrors the full list from @promptup/shared/roadmaps without importing it.
|
|
17
|
+
*/
|
|
18
|
+
const ALL_ROADMAPS = {
|
|
19
|
+
// Role roadmaps
|
|
20
|
+
frontend: { name: 'Frontend Developer', competencies: ['component_architecture', 'css_layout', 'js_fundamentals', 'frameworks', 'state_management', 'build_tools', 'testing', 'performance', 'accessibility', 'responsive_design'] },
|
|
21
|
+
backend: { name: 'Backend Developer', competencies: ['api_design', 'databases', 'authentication', 'caching', 'message_queues', 'testing', 'security', 'scaling', 'containerization', 'monitoring'] },
|
|
22
|
+
devops: { name: 'DevOps Engineer', competencies: ['ci_cd', 'containerization', 'orchestration', 'iac', 'monitoring', 'cloud_platforms', 'networking', 'scripting', 'security', 'logging'] },
|
|
23
|
+
fullstack: { name: 'Full Stack Developer', competencies: ['frontend_frameworks', 'backend_frameworks', 'databases', 'api_design', 'deployment', 'testing', 'authentication', 'state_management'] },
|
|
24
|
+
android: { name: 'Android Developer', competencies: ['kotlin_java', 'android_sdk', 'jetpack_compose', 'architecture_patterns', 'networking', 'storage', 'testing', 'publishing'] },
|
|
25
|
+
ios: { name: 'iOS Developer', competencies: ['swift', 'swiftui_uikit', 'architecture_patterns', 'networking', 'core_data', 'concurrency', 'testing', 'publishing'] },
|
|
26
|
+
postgresql_dba: { name: 'PostgreSQL DBA', competencies: ['sql_fundamentals', 'schema_design', 'indexing', 'query_optimization', 'replication', 'backup_recovery', 'security', 'monitoring'] },
|
|
27
|
+
blockchain: { name: 'Blockchain Developer', competencies: ['smart_contracts', 'cryptography', 'consensus', 'defi_protocols', 'token_standards', 'security_auditing', 'testing', 'web3_integration'] },
|
|
28
|
+
qa: { name: 'QA Engineer', competencies: ['test_planning', 'manual_testing', 'automation', 'api_testing', 'performance_testing', 'ci_integration', 'bug_tracking', 'test_frameworks'] },
|
|
29
|
+
software_architect: { name: 'Software Architect', competencies: ['system_design', 'design_patterns', 'microservices', 'event_driven', 'data_modeling', 'scalability', 'security_architecture', 'documentation'] },
|
|
30
|
+
cyber_security: { name: 'Cyber Security Expert', competencies: ['network_security', 'web_security', 'cryptography', 'penetration_testing', 'incident_response', 'compliance', 'threat_modeling', 'forensics'] },
|
|
31
|
+
ux_design: { name: 'UX Designer', competencies: ['user_research', 'wireframing', 'prototyping', 'usability_testing', 'information_architecture', 'interaction_design', 'design_systems', 'accessibility'] },
|
|
32
|
+
game_developer: { name: 'Game Developer', competencies: ['game_engines', 'graphics_programming', 'physics', 'ai_pathfinding', 'networking_multiplayer', 'audio', 'optimization', 'platform_deployment'] },
|
|
33
|
+
ai_data_scientist: { name: 'AI & Data Scientist', competencies: ['statistics', 'machine_learning', 'deep_learning', 'nlp', 'computer_vision', 'data_wrangling', 'model_evaluation', 'deployment'] },
|
|
34
|
+
data_analyst: { name: 'Data Analyst', competencies: ['sql', 'data_visualization', 'statistics', 'spreadsheets', 'etl', 'reporting', 'python_r', 'business_intelligence'] },
|
|
35
|
+
data_engineer: { name: 'Data Engineer', competencies: ['data_pipelines', 'etl', 'data_warehousing', 'streaming', 'sql', 'cloud_data_services', 'orchestration', 'data_quality'] },
|
|
36
|
+
ai_engineer: { name: 'AI Engineer', competencies: ['ml_fundamentals', 'llm_integration', 'prompt_engineering', 'fine_tuning', 'rag', 'model_serving', 'evaluation', 'vector_databases'] },
|
|
37
|
+
mlops: { name: 'MLOps Engineer', competencies: ['ml_pipelines', 'model_versioning', 'experiment_tracking', 'model_serving', 'monitoring', 'ci_cd_ml', 'feature_stores', 'infrastructure'] },
|
|
38
|
+
product_manager: { name: 'Product Manager', competencies: ['product_strategy', 'user_research', 'roadmapping', 'stakeholder_management', 'metrics', 'prioritization', 'agile', 'technical_literacy'] },
|
|
39
|
+
engineering_manager: { name: 'Engineering Manager', competencies: ['team_leadership', 'project_management', 'technical_strategy', 'hiring', 'mentoring', 'process_improvement', 'stakeholder_communication', 'architecture_oversight'] },
|
|
40
|
+
developer_relations: { name: 'Developer Relations', competencies: ['technical_writing', 'public_speaking', 'community_building', 'sdk_documentation', 'developer_experience', 'content_creation', 'advocacy', 'feedback_loops'] },
|
|
41
|
+
technical_writer: { name: 'Technical Writer', competencies: ['documentation_structure', 'api_documentation', 'tutorials', 'style_guides', 'diagrams', 'versioning', 'tooling', 'audience_analysis'] },
|
|
42
|
+
platform_engineer: { name: 'Platform Engineer', competencies: ['infrastructure_automation', 'developer_tooling', 'ci_cd', 'observability', 'service_mesh', 'cloud_native', 'security', 'self_service_platforms'] },
|
|
43
|
+
sre: { name: 'Site Reliability Engineer', competencies: ['reliability_engineering', 'incident_management', 'monitoring_alerting', 'capacity_planning', 'automation', 'slo_sli_sla', 'chaos_engineering', 'postmortems'] },
|
|
44
|
+
api_design: { name: 'API Designer', competencies: ['rest_design', 'graphql_design', 'openapi_spec', 'versioning', 'authentication', 'rate_limiting', 'documentation', 'error_handling'] },
|
|
45
|
+
flutter_developer: { name: 'Flutter Developer', competencies: ['dart', 'widgets', 'state_management', 'navigation', 'platform_channels', 'animations', 'testing', 'publishing'] },
|
|
46
|
+
react_native_developer: { name: 'React Native Developer', competencies: ['react_fundamentals', 'native_modules', 'navigation', 'state_management', 'animations', 'platform_specific', 'testing', 'publishing'] },
|
|
47
|
+
server_side_game: { name: 'Server-side Game Developer', competencies: ['networking_protocols', 'game_state_sync', 'matchmaking', 'persistence', 'scalability', 'anti_cheat', 'real_time_processing', 'load_balancing'] },
|
|
48
|
+
// Skill roadmaps
|
|
49
|
+
react: { name: 'React', competencies: ['components', 'hooks', 'state_management', 'routing', 'context_api', 'performance', 'testing', 'ssr_next'] },
|
|
50
|
+
vue: { name: 'Vue.js', competencies: ['components', 'composition_api', 'reactivity', 'routing', 'state_management', 'directives', 'testing', 'ssr_nuxt'] },
|
|
51
|
+
angular: { name: 'Angular', competencies: ['components', 'modules', 'dependency_injection', 'routing', 'rxjs', 'forms', 'testing', 'change_detection'] },
|
|
52
|
+
javascript: { name: 'JavaScript', competencies: ['fundamentals', 'async_programming', 'closures_scope', 'dom_manipulation', 'es_modules', 'error_handling', 'prototypes', 'event_loop'] },
|
|
53
|
+
typescript: { name: 'TypeScript', competencies: ['type_system', 'generics', 'interfaces', 'enums_unions', 'utility_types', 'type_guards', 'declaration_files', 'strict_mode'] },
|
|
54
|
+
nodejs: { name: 'Node.js', competencies: ['core_modules', 'event_loop', 'streams', 'http_server', 'npm_ecosystem', 'error_handling', 'performance', 'security'] },
|
|
55
|
+
python: { name: 'Python', competencies: ['fundamentals', 'oop', 'decorators_generators', 'async_await', 'packages', 'type_hints', 'testing', 'data_structures'] },
|
|
56
|
+
java: { name: 'Java', competencies: ['oop', 'collections', 'generics', 'concurrency', 'streams_api', 'jvm', 'testing', 'build_tools'] },
|
|
57
|
+
golang: { name: 'Go', competencies: ['fundamentals', 'goroutines_channels', 'interfaces', 'error_handling', 'packages', 'testing', 'concurrency_patterns', 'standard_library'] },
|
|
58
|
+
rust: { name: 'Rust', competencies: ['ownership_borrowing', 'lifetimes', 'traits', 'error_handling', 'concurrency', 'macros', 'unsafe_code', 'cargo'] },
|
|
59
|
+
cpp: { name: 'C++', competencies: ['memory_management', 'oop', 'templates', 'stl', 'smart_pointers', 'concurrency', 'move_semantics', 'build_systems'] },
|
|
60
|
+
csharp: { name: 'C#', competencies: ['oop', 'linq', 'async_await', 'generics', 'delegates_events', 'dependency_injection', 'entity_framework', 'testing'] },
|
|
61
|
+
swift: { name: 'Swift', competencies: ['fundamentals', 'optionals', 'protocols', 'closures', 'concurrency', 'generics', 'memory_management', 'error_handling'] },
|
|
62
|
+
kotlin: { name: 'Kotlin', competencies: ['fundamentals', 'coroutines', 'null_safety', 'extensions', 'dsl', 'generics', 'collections', 'interop'] },
|
|
63
|
+
php: { name: 'PHP', competencies: ['fundamentals', 'oop', 'composer', 'pdo_databases', 'frameworks', 'testing', 'security', 'performance'] },
|
|
64
|
+
ruby: { name: 'Ruby', competencies: ['fundamentals', 'oop', 'blocks_procs', 'metaprogramming', 'gems', 'testing', 'rails', 'concurrency'] },
|
|
65
|
+
sql: { name: 'SQL', competencies: ['queries', 'joins', 'subqueries', 'indexing', 'transactions', 'window_functions', 'stored_procedures', 'optimization'] },
|
|
66
|
+
mongodb: { name: 'MongoDB', competencies: ['crud', 'aggregation', 'indexing', 'schema_design', 'replication', 'sharding', 'transactions', 'performance'] },
|
|
67
|
+
redis: { name: 'Redis', competencies: ['data_structures', 'caching_patterns', 'pub_sub', 'persistence', 'clustering', 'lua_scripting', 'streams', 'security'] },
|
|
68
|
+
graphql: { name: 'GraphQL', competencies: ['schema_design', 'queries_mutations', 'resolvers', 'subscriptions', 'authentication', 'pagination', 'error_handling', 'performance'] },
|
|
69
|
+
docker: { name: 'Docker', competencies: ['images', 'containers', 'dockerfile', 'compose', 'networking', 'volumes', 'registry', 'security'] },
|
|
70
|
+
kubernetes: { name: 'Kubernetes', competencies: ['pods_deployments', 'services', 'ingress', 'configmaps_secrets', 'storage', 'rbac', 'helm', 'monitoring'] },
|
|
71
|
+
aws: { name: 'AWS', competencies: ['compute', 'storage', 'networking', 'databases', 'iam', 'serverless', 'containers', 'monitoring'] },
|
|
72
|
+
terraform: { name: 'Terraform', competencies: ['hcl', 'providers', 'state_management', 'modules', 'workspaces', 'variables', 'lifecycle', 'ci_cd_integration'] },
|
|
73
|
+
git: { name: 'Git', competencies: ['branching', 'merging', 'rebasing', 'cherry_pick', 'hooks', 'workflows', 'conflict_resolution', 'advanced_log'] },
|
|
74
|
+
linux: { name: 'Linux', competencies: ['filesystem', 'permissions', 'processes', 'networking', 'shell_scripting', 'package_management', 'systemd', 'security'] },
|
|
75
|
+
nginx: { name: 'Nginx', competencies: ['static_serving', 'reverse_proxy', 'load_balancing', 'ssl_tls', 'caching', 'rate_limiting', 'logging', 'security'] },
|
|
76
|
+
prometheus: { name: 'Prometheus', competencies: ['metrics', 'promql', 'alerting', 'service_discovery', 'exporters', 'grafana_integration', 'recording_rules', 'storage'] },
|
|
77
|
+
design_system: { name: 'Design System', competencies: ['component_library', 'tokens', 'documentation', 'accessibility', 'theming', 'versioning', 'testing', 'governance'] },
|
|
78
|
+
tailwindcss: { name: 'Tailwind CSS', competencies: ['utility_classes', 'responsive_design', 'customization', 'components', 'plugins', 'dark_mode', 'animations', 'performance'] },
|
|
79
|
+
sass: { name: 'Sass', competencies: ['variables', 'nesting', 'mixins', 'functions', 'partials', 'extends', 'operators', 'architecture'] },
|
|
80
|
+
webpack: { name: 'Webpack', competencies: ['entry_output', 'loaders', 'plugins', 'code_splitting', 'dev_server', 'optimization', 'module_federation', 'configuration'] },
|
|
81
|
+
vite: { name: 'Vite', competencies: ['dev_server', 'build', 'plugins', 'ssr', 'library_mode', 'env_variables', 'optimization', 'configuration'] },
|
|
82
|
+
nextjs: { name: 'Next.js', competencies: ['routing', 'rendering_strategies', 'data_fetching', 'api_routes', 'middleware', 'optimization', 'deployment', 'authentication'] },
|
|
83
|
+
nuxt: { name: 'Nuxt', competencies: ['routing', 'data_fetching', 'server_engine', 'modules', 'middleware', 'state_management', 'deployment', 'seo'] },
|
|
84
|
+
svelte: { name: 'Svelte', competencies: ['reactivity', 'components', 'stores', 'transitions', 'actions', 'slots', 'ssr_sveltekit', 'testing'] },
|
|
85
|
+
expressjs: { name: 'Express.js', competencies: ['routing', 'middleware', 'error_handling', 'template_engines', 'authentication', 'validation', 'testing', 'security'] },
|
|
86
|
+
fastify: { name: 'Fastify', competencies: ['routing', 'plugins', 'hooks', 'validation', 'serialization', 'decorators', 'testing', 'performance'] },
|
|
87
|
+
django: { name: 'Django', competencies: ['models', 'views', 'templates', 'orm', 'admin', 'authentication', 'rest_framework', 'testing'] },
|
|
88
|
+
flask: { name: 'Flask', competencies: ['routing', 'templates', 'blueprints', 'extensions', 'database_integration', 'authentication', 'testing', 'deployment'] },
|
|
89
|
+
spring_boot: { name: 'Spring Boot', competencies: ['dependency_injection', 'rest_controllers', 'data_jpa', 'security', 'actuator', 'testing', 'configuration', 'microservices'] },
|
|
90
|
+
nestjs: { name: 'NestJS', competencies: ['modules', 'controllers', 'providers', 'middleware', 'guards', 'pipes', 'interceptors', 'testing'] },
|
|
91
|
+
prisma: { name: 'Prisma', competencies: ['schema_modeling', 'migrations', 'queries', 'relations', 'transactions', 'raw_queries', 'seeding', 'client_generation'] },
|
|
92
|
+
elasticsearch: { name: 'Elasticsearch', competencies: ['indexing', 'queries', 'aggregations', 'mappings', 'analyzers', 'cluster_management', 'performance', 'security'] },
|
|
93
|
+
rabbitmq: { name: 'RabbitMQ', competencies: ['exchanges', 'queues', 'bindings', 'routing', 'dead_letter', 'clustering', 'monitoring', 'patterns'] },
|
|
94
|
+
kafka: { name: 'Apache Kafka', competencies: ['topics_partitions', 'producers', 'consumers', 'consumer_groups', 'streams', 'connect', 'schema_registry', 'monitoring'] },
|
|
95
|
+
solana: { name: 'Solana Development', competencies: ['accounts_model', 'programs', 'transactions', 'pda', 'tokens', 'anchor_framework', 'testing', 'security'] },
|
|
96
|
+
};
|
|
97
|
+
// ─── Claude Code Evaluator ──────────────────────────────────────────────────
|
|
98
|
+
function buildEvalPrompt(messages) {
|
|
99
|
+
// Build base dimension reference
|
|
100
|
+
const baseDimRef = BASE_DIMENSION_KEYS.map(key => {
|
|
101
|
+
const d = BASE_DIMENSIONS[key];
|
|
102
|
+
return `### ${d.label} (key: "${key}")
|
|
103
|
+
${d.description}
|
|
104
|
+
Signals: ${d.signals.join(' | ')}
|
|
105
|
+
Ranges:
|
|
106
|
+
${d.ranges.map(r => ` ${r.min}-${r.max}: ${r.description}`).join('\n')}`;
|
|
107
|
+
}).join('\n\n');
|
|
108
|
+
// Build domain dimension reference
|
|
109
|
+
const domainDimRef = DOMAIN_DIMENSION_KEYS.map(key => {
|
|
110
|
+
const d = DOMAIN_DIMENSIONS[key];
|
|
111
|
+
return `### ${d.label} (key: "${key}")
|
|
112
|
+
${d.description}
|
|
113
|
+
Signals: ${d.signals.join(' | ')}
|
|
114
|
+
Ranges:
|
|
115
|
+
${d.ranges.map(r => ` ${r.min}-${r.max}: ${r.description}`).join('\n')}`;
|
|
116
|
+
}).join('\n\n');
|
|
117
|
+
// Format conversation (cap at ~80 messages to stay within context)
|
|
118
|
+
const capped = messages.slice(-80);
|
|
119
|
+
const convo = capped.map(m => {
|
|
120
|
+
const role = m.role.toUpperCase();
|
|
121
|
+
const content = (m.content || '').slice(0, 600);
|
|
122
|
+
const tools = m.tool_uses ? (() => {
|
|
123
|
+
try {
|
|
124
|
+
const parsed = JSON.parse(m.tool_uses);
|
|
125
|
+
if (Array.isArray(parsed)) {
|
|
126
|
+
return ` [tools: ${parsed.map((t) => t.name || '?').join(', ')}]`;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
catch { /* ignore */ }
|
|
130
|
+
return '';
|
|
131
|
+
})() : '';
|
|
132
|
+
return `[${role}]${tools} ${content}`;
|
|
133
|
+
}).join('\n\n');
|
|
134
|
+
// Build roadmap catalog excerpt for tech detection
|
|
135
|
+
const roadmapList = Object.entries(ALL_ROADMAPS)
|
|
136
|
+
.map(([key, r]) => `${key}: ${r.name} (${r.competencies.slice(0, 4).join(', ')}...)`)
|
|
137
|
+
.join('\n');
|
|
138
|
+
return `You are a developer productivity evaluator for PromptUp. Analyze the following conversation between a developer (USER) and an AI coding assistant (ASSISTANT).
|
|
139
|
+
|
|
140
|
+
Score the DEVELOPER across 11 dimensions (6 base + 5 domain) AND detect which technologies/roadmaps are demonstrated.
|
|
141
|
+
|
|
142
|
+
## Base Dimensions (interaction quality)
|
|
143
|
+
|
|
144
|
+
${baseDimRef}
|
|
145
|
+
|
|
146
|
+
## Domain Dimensions (depth of understanding)
|
|
147
|
+
|
|
148
|
+
${domainDimRef}
|
|
149
|
+
|
|
150
|
+
## Tech Expertise Detection
|
|
151
|
+
|
|
152
|
+
From the conversation, identify which technology roadmaps the developer is working with. For each detected roadmap, score the developer's demonstrated competency level 0-100. Use these roadmap keys:
|
|
153
|
+
${roadmapList}
|
|
154
|
+
|
|
155
|
+
## Conversation (${messages.length} messages)
|
|
156
|
+
|
|
157
|
+
${convo}
|
|
158
|
+
|
|
159
|
+
## Instructions
|
|
160
|
+
|
|
161
|
+
1. Score each of the 11 dimensions 0-100 based on the developer's (USER's) behavior, not the assistant's quality.
|
|
162
|
+
2. Provide specific, concrete reasoning referencing actual messages.
|
|
163
|
+
3. Give 1-3 feedback items for the developer's weakest areas. First, categorize their prompts:
|
|
164
|
+
- COMMANDS ("eval now", "build it") — clear directives, fine terse. Do NOT suggest improvements for these.
|
|
165
|
+
- DECISIONS ("yep", "3", "go with that") — the developer picked an option. Don't ask them to explain why. Instead, suggest how they could BUILD ON IT — combine approaches, add constraints, refine the solution. The goal is shaping the output, not justifying the choice.
|
|
166
|
+
- STEERING ("not like that", "use X instead") — the developer redirected. Only flag if the assistant needed follow-up clarification.
|
|
167
|
+
- AMBIGUOUS ("is it correct?", "check the thing") — needs referent clarity. Only flag if the referent was actually unclear.
|
|
168
|
+
|
|
169
|
+
Only generate suggestions for DECISIONS, STEERING, and AMBIGUOUS prompts — NEVER for clear COMMANDS. Each recommendation MUST have BOTH fields:
|
|
170
|
+
- recommendation: Short coaching tip (max 60 chars). Frame as opportunity to shape better output, not as criticism. This shows in the developer's status bar.
|
|
171
|
+
- suggestions: REQUIRED array of 2-3 before→after examples from THIS session. The "after" version should show how adding one idea, constraint, or combination would produce a BETTER SOLUTION — not just explain the choice. Format: "Instead of '<actual prompt>', try '<improved version that shapes the output>'"
|
|
172
|
+
4. Detect technologies used and score the developer's demonstrated expertise per roadmap.
|
|
173
|
+
5. Produce a concise activity log: a chronological list of what was accomplished in this session.
|
|
174
|
+
6. Extract the developer's KEY DECISIONS — moments where they steered, rejected, validated, modified, or scoped the AI's work. For each decision:
|
|
175
|
+
- type: "steer" (redirected approach), "reject" (refused output), "validate" (tested/verified), "modify" (accepted with changes), "scope" (added/removed work), "accept" (approved output)
|
|
176
|
+
- summary: One sentence describing WHAT was decided and WHY (max 100 chars). Write as "Chose X over Y because Z" or "Rejected X, asked for Y instead" — be specific, not generic.
|
|
177
|
+
- signal: "high" (architectural/strategic decision), "medium" (tactical choice), "low" (routine approval)
|
|
178
|
+
- Only include decisions where the developer actively influenced direction. Skip routine "ok" / "looks good" unless they approved something significant.
|
|
179
|
+
|
|
180
|
+
Return ONLY valid JSON with no markdown formatting, no code fences, no extra text:
|
|
181
|
+
{"dimensions":[{"key":"task_decomposition","score":0,"reasoning":"..."},{"key":"prompt_specificity","score":0,"reasoning":"..."},{"key":"output_validation","score":0,"reasoning":"..."},{"key":"iteration_quality","score":0,"reasoning":"..."},{"key":"strategic_tool_usage","score":0,"reasoning":"..."},{"key":"context_management","score":0,"reasoning":"..."}],"domain_dimensions":[{"key":"architectural_awareness","score":0,"reasoning":"..."},{"key":"error_anticipation","score":0,"reasoning":"..."},{"key":"technical_vocabulary","score":0,"reasoning":"..."},{"key":"dependency_reasoning","score":0,"reasoning":"..."},{"key":"tradeoff_articulation","score":0,"reasoning":"..."}],"tech_expertise":[{"roadmap":"typescript","score":75,"competencies":{"type_system":80,"generics":70}}],"recommendations":[{"dimension_key":"...","priority":"high","recommendation":"Add context to prompts","suggestions":["Instead of 'no', try 'no — terminal shows nothing after response'","Instead of 'yep', try 'yes, use the Stop hook approach'"]}],"activity_log":["Did X","Did Y","Fixed Z"],"decisions":[{"type":"steer","summary":"Chose bcrypt over argon2 — simpler dependency","signal":"high"},{"type":"validate","summary":"Ran integration tests after auth implementation","signal":"medium"}]}`;
|
|
182
|
+
}
|
|
183
|
+
function runClaudeCode(prompt, timeoutMs = 120_000) {
|
|
184
|
+
return new Promise((resolve, reject) => {
|
|
185
|
+
// Strip CLAUDECODE env var to allow spawning from within a Claude Code session
|
|
186
|
+
const env = { ...process.env };
|
|
187
|
+
delete env.CLAUDECODE;
|
|
188
|
+
delete env.CLAUDE_CODE;
|
|
189
|
+
const proc = spawn('claude', ['-p', '--output-format', 'text', '--no-session-persistence'], {
|
|
190
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
191
|
+
env,
|
|
192
|
+
});
|
|
193
|
+
let stdout = '';
|
|
194
|
+
let stderr = '';
|
|
195
|
+
proc.stdout.on('data', (chunk) => { stdout += chunk.toString(); });
|
|
196
|
+
proc.stderr.on('data', (chunk) => { stderr += chunk.toString(); });
|
|
197
|
+
const timer = setTimeout(() => {
|
|
198
|
+
proc.kill('SIGTERM');
|
|
199
|
+
reject(new Error(`Claude Code timed out after ${timeoutMs}ms`));
|
|
200
|
+
}, timeoutMs);
|
|
201
|
+
proc.on('close', (code) => {
|
|
202
|
+
clearTimeout(timer);
|
|
203
|
+
if (code === 0) {
|
|
204
|
+
resolve(stdout.trim());
|
|
205
|
+
}
|
|
206
|
+
else {
|
|
207
|
+
reject(new Error(`Claude Code exited with code ${code}: ${stderr.slice(0, 500)}`));
|
|
208
|
+
}
|
|
209
|
+
});
|
|
210
|
+
proc.on('error', (err) => {
|
|
211
|
+
clearTimeout(timer);
|
|
212
|
+
reject(err);
|
|
213
|
+
});
|
|
214
|
+
// Write prompt to stdin and close
|
|
215
|
+
proc.stdin.write(prompt);
|
|
216
|
+
proc.stdin.end();
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
function parseClaudeResponse(raw) {
|
|
220
|
+
// Claude might wrap in markdown code fences despite instructions
|
|
221
|
+
let cleaned = raw.trim();
|
|
222
|
+
// Strip markdown code fences
|
|
223
|
+
cleaned = cleaned.replace(/^```(?:json)?\s*\n?/i, '').replace(/\n?```\s*$/i, '');
|
|
224
|
+
// Find the JSON object
|
|
225
|
+
const jsonStart = cleaned.indexOf('{');
|
|
226
|
+
const jsonEnd = cleaned.lastIndexOf('}');
|
|
227
|
+
if (jsonStart === -1 || jsonEnd === -1) {
|
|
228
|
+
throw new Error('No JSON object found in Claude response');
|
|
229
|
+
}
|
|
230
|
+
cleaned = cleaned.slice(jsonStart, jsonEnd + 1);
|
|
231
|
+
const parsed = JSON.parse(cleaned);
|
|
232
|
+
if (!parsed.dimensions || !Array.isArray(parsed.dimensions)) {
|
|
233
|
+
throw new Error('Missing dimensions array in response');
|
|
234
|
+
}
|
|
235
|
+
return parsed;
|
|
236
|
+
}
|
|
237
|
+
// ─── Main Evaluator ─────────────────────────────────────────────────────────
|
|
238
|
+
export async function evaluateSession(sessionId, messages, triggerType, weightProfile = 'balanced') {
|
|
239
|
+
if (messages.length < 3)
|
|
240
|
+
return null;
|
|
241
|
+
const userMessages = messages.filter(m => m.role === 'user');
|
|
242
|
+
if (userMessages.length === 0)
|
|
243
|
+
return null;
|
|
244
|
+
const profile = WEIGHT_PROFILES[weightProfile] ?? WEIGHT_PROFILES.balanced;
|
|
245
|
+
// Try Claude Code first, fall back to heuristic
|
|
246
|
+
let dimensionScores;
|
|
247
|
+
let domainDimensionScores = [];
|
|
248
|
+
let techExpertise = [];
|
|
249
|
+
let rawEvaluation = null;
|
|
250
|
+
let recommendations = [];
|
|
251
|
+
let usedClaude = false;
|
|
252
|
+
try {
|
|
253
|
+
console.log(`[eval] Running Claude Code evaluation for session ${sessionId.slice(0, 8)}...`);
|
|
254
|
+
const prompt = buildEvalPrompt(messages);
|
|
255
|
+
const rawOutput = await runClaudeCode(prompt);
|
|
256
|
+
const result = parseClaudeResponse(rawOutput);
|
|
257
|
+
usedClaude = true;
|
|
258
|
+
// Store structured data in raw_evaluation (activity log + decisions + raw text)
|
|
259
|
+
rawEvaluation = JSON.stringify({
|
|
260
|
+
activity_log: result.activity_log || [],
|
|
261
|
+
decisions: result.decisions || [],
|
|
262
|
+
domain_dimensions: result.domain_dimensions || [],
|
|
263
|
+
tech_expertise: result.tech_expertise || [],
|
|
264
|
+
raw_text: rawOutput,
|
|
265
|
+
});
|
|
266
|
+
// Persist Claude-extracted decisions to the decisions table
|
|
267
|
+
if (result.decisions && result.decisions.length > 0) {
|
|
268
|
+
for (const d of result.decisions) {
|
|
269
|
+
const validTypes = ['steer', 'accept', 'reject', 'modify', 'validate', 'scope'];
|
|
270
|
+
const type = validTypes.includes(d.type) ? d.type : 'accept';
|
|
271
|
+
const validSignals = ['high', 'medium', 'low'];
|
|
272
|
+
const signal = validSignals.includes(d.signal) ? d.signal : 'medium';
|
|
273
|
+
insertDecision({
|
|
274
|
+
id: ulid(),
|
|
275
|
+
session_id: sessionId,
|
|
276
|
+
type: type,
|
|
277
|
+
message_index: 0,
|
|
278
|
+
context: d.summary.slice(0, 200),
|
|
279
|
+
files_affected: '[]',
|
|
280
|
+
source: 'plugin',
|
|
281
|
+
matched_rule: null,
|
|
282
|
+
depth: signal === 'high' ? 'architectural' : signal === 'medium' ? 'tactical' : 'surface',
|
|
283
|
+
opinionation: signal === 'high' ? 'high' : signal === 'medium' ? 'medium' : 'low',
|
|
284
|
+
ai_action: null,
|
|
285
|
+
signal: signal,
|
|
286
|
+
created_at: new Date().toISOString(),
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
// Map Claude's base dimension scores to our format with weights
|
|
291
|
+
dimensionScores = BASE_DIMENSION_KEYS.map(key => {
|
|
292
|
+
const claudeDim = result.dimensions.find(d => d.key === key);
|
|
293
|
+
return {
|
|
294
|
+
key,
|
|
295
|
+
score: Math.max(0, Math.min(100, Math.round(claudeDim?.score ?? 50))),
|
|
296
|
+
weight: profile.weights[key],
|
|
297
|
+
reasoning: claudeDim?.reasoning ?? 'No reasoning provided',
|
|
298
|
+
};
|
|
299
|
+
});
|
|
300
|
+
// Map Claude's domain dimension scores
|
|
301
|
+
if (result.domain_dimensions && result.domain_dimensions.length > 0) {
|
|
302
|
+
const domainWeight = 1 / DOMAIN_DIMENSION_KEYS.length;
|
|
303
|
+
domainDimensionScores = DOMAIN_DIMENSION_KEYS.map(key => {
|
|
304
|
+
const claudeDim = result.domain_dimensions.find(d => d.key === key);
|
|
305
|
+
return {
|
|
306
|
+
key,
|
|
307
|
+
score: Math.max(0, Math.min(100, Math.round(claudeDim?.score ?? 50))),
|
|
308
|
+
weight: domainWeight,
|
|
309
|
+
reasoning: claudeDim?.reasoning ?? 'No reasoning provided',
|
|
310
|
+
};
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
// Map Claude's tech expertise
|
|
314
|
+
if (result.tech_expertise && result.tech_expertise.length > 0) {
|
|
315
|
+
techExpertise = result.tech_expertise
|
|
316
|
+
.filter(te => te.roadmap && ALL_ROADMAPS[te.roadmap])
|
|
317
|
+
.map(te => {
|
|
318
|
+
const roadmapDef = ALL_ROADMAPS[te.roadmap];
|
|
319
|
+
const competencies = {};
|
|
320
|
+
for (const comp of roadmapDef.competencies) {
|
|
321
|
+
competencies[comp] = {
|
|
322
|
+
score: te.competencies?.[comp] ?? null,
|
|
323
|
+
demonstrated: te.competencies?.[comp] != null,
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
return {
|
|
327
|
+
roadmap: te.roadmap,
|
|
328
|
+
score: Math.max(0, Math.min(100, Math.round(te.score))),
|
|
329
|
+
competencies,
|
|
330
|
+
};
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
// Use Claude's recommendations (with suggestions if provided)
|
|
334
|
+
recommendations = (result.recommendations || []).slice(0, 3).map(r => ({
|
|
335
|
+
dimension_key: r.dimension_key,
|
|
336
|
+
priority: r.priority || 'medium',
|
|
337
|
+
recommendation: r.recommendation,
|
|
338
|
+
suggestions: r.suggestions,
|
|
339
|
+
}));
|
|
340
|
+
console.log(`[eval] Claude Code evaluation complete for ${sessionId.slice(0, 8)}`);
|
|
341
|
+
}
|
|
342
|
+
catch (err) {
|
|
343
|
+
console.warn(`[eval] Claude Code unavailable, using heuristic fallback:`, err.message);
|
|
344
|
+
// Fall back to heuristic — generate basic activity log from messages
|
|
345
|
+
const heuristic = heuristicEvaluate(messages, profile);
|
|
346
|
+
dimensionScores = heuristic.dimensionScores;
|
|
347
|
+
domainDimensionScores = heuristic.domainDimensionScores;
|
|
348
|
+
techExpertise = heuristicTechDetect(messages);
|
|
349
|
+
recommendations = heuristic.recommendations;
|
|
350
|
+
rawEvaluation = JSON.stringify({
|
|
351
|
+
activity_log: heuristicActivityLog(messages),
|
|
352
|
+
domain_dimensions: domainDimensionScores,
|
|
353
|
+
tech_expertise: techExpertise,
|
|
354
|
+
raw_text: null,
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
// Compute base composite score
|
|
358
|
+
const composite = computeCompositeScore(dimensionScores.map(d => ({ score: d.score, weight: d.weight })));
|
|
359
|
+
// Compute domain composite
|
|
360
|
+
const domainComposite = domainDimensionScores.length > 0
|
|
361
|
+
? computeDomainComposite(Object.fromEntries(domainDimensionScores.map(d => [d.key, { score: d.score, weight: d.weight }])))
|
|
362
|
+
: null;
|
|
363
|
+
// Compute tech composite
|
|
364
|
+
const techComposite = techExpertise.length > 0
|
|
365
|
+
? computeTechComposite(techExpertise)
|
|
366
|
+
: null;
|
|
367
|
+
// Compute overall and grand composites
|
|
368
|
+
const overallComposite = computeOverallComposite(composite, domainComposite);
|
|
369
|
+
const grandComposite = computeGrandComposite(overallComposite, techComposite);
|
|
370
|
+
// Compute trends from previous evaluation
|
|
371
|
+
const prevEval = getLatestEvaluation(sessionId);
|
|
372
|
+
let trends = null;
|
|
373
|
+
if (prevEval) {
|
|
374
|
+
const prevScores = JSON.parse(prevEval.dimension_scores);
|
|
375
|
+
const prevMap = new Map(prevScores.map(d => [d.key, d.score]));
|
|
376
|
+
trends = dimensionScores.map(d => {
|
|
377
|
+
const prev = prevMap.get(d.key) ?? d.score;
|
|
378
|
+
const delta = d.score - prev;
|
|
379
|
+
return {
|
|
380
|
+
dimension_key: d.key,
|
|
381
|
+
direction: delta > 3 ? 'improving' : delta < -3 ? 'declining' : 'stable',
|
|
382
|
+
delta,
|
|
383
|
+
previous_score: prev,
|
|
384
|
+
current_score: d.score,
|
|
385
|
+
};
|
|
386
|
+
});
|
|
387
|
+
}
|
|
388
|
+
// Compute risk flags
|
|
389
|
+
const riskFlags = computeRiskFlagsWithHistory(dimensionScores.map(d => ({ dimension: d.key, score: d.score })), prevEval ? JSON.parse(prevEval.dimension_scores).map((d) => ({ dimension: d.key, score: d.score })) : null, composite);
|
|
390
|
+
// Build evaluation row
|
|
391
|
+
const seqNumbers = messages.map(m => m.sequence_number);
|
|
392
|
+
const triggerReason = `${triggerType}${usedClaude ? '' : ' [heuristic]'}`;
|
|
393
|
+
const evalRow = {
|
|
394
|
+
id: ulid(),
|
|
395
|
+
session_id: sessionId,
|
|
396
|
+
trigger_type: triggerType,
|
|
397
|
+
report_type: 'checkpoint',
|
|
398
|
+
composite_score: composite,
|
|
399
|
+
dimension_scores: JSON.stringify([...dimensionScores, ...domainDimensionScores]),
|
|
400
|
+
recommendations: JSON.stringify(recommendations),
|
|
401
|
+
trends: trends ? JSON.stringify(trends) : null,
|
|
402
|
+
risk_flags: JSON.stringify(riskFlags),
|
|
403
|
+
message_range_from: Math.min(...seqNumbers),
|
|
404
|
+
message_range_to: Math.max(...seqNumbers),
|
|
405
|
+
message_count: messages.length,
|
|
406
|
+
weight_profile: weightProfile,
|
|
407
|
+
raw_evaluation: JSON.stringify({
|
|
408
|
+
...(rawEvaluation ? JSON.parse(rawEvaluation) : {}),
|
|
409
|
+
trigger_reason: triggerReason,
|
|
410
|
+
domain_composite_score: domainComposite,
|
|
411
|
+
tech_composite_score: techComposite,
|
|
412
|
+
overall_composite_score: overallComposite,
|
|
413
|
+
grand_composite_score: grandComposite,
|
|
414
|
+
tech_expertise: techExpertise,
|
|
415
|
+
}),
|
|
416
|
+
created_at: new Date().toISOString(),
|
|
417
|
+
};
|
|
418
|
+
insertEvaluation(evalRow);
|
|
419
|
+
return evalRow;
|
|
420
|
+
}
|
|
421
|
+
// ─── Heuristic Fallback ─────────────────────────────────────────────────────
|
|
422
|
+
function heuristicEvaluate(messages, profile) {
|
|
423
|
+
const userMessages = messages.filter(m => m.role === 'user');
|
|
424
|
+
const assistantMessages = messages.filter(m => m.role === 'assistant');
|
|
425
|
+
const dimensionScores = BASE_DIMENSION_KEYS.map(key => {
|
|
426
|
+
const scorer = HEURISTIC_SCORERS[key];
|
|
427
|
+
const { score, reasoning } = scorer(userMessages, assistantMessages);
|
|
428
|
+
return { key, score, weight: profile.weights[key], reasoning };
|
|
429
|
+
});
|
|
430
|
+
const domainWeight = 1 / DOMAIN_DIMENSION_KEYS.length;
|
|
431
|
+
const domainDimensionScores = DOMAIN_DIMENSION_KEYS.map(key => {
|
|
432
|
+
const scorer = DOMAIN_HEURISTIC_SCORERS[key];
|
|
433
|
+
const { score, reasoning } = scorer(userMessages, assistantMessages);
|
|
434
|
+
return { key, score, weight: domainWeight, reasoning };
|
|
435
|
+
});
|
|
436
|
+
// Recommendations for weakest across all 11 dimensions
|
|
437
|
+
const allScores = [...dimensionScores, ...domainDimensionScores];
|
|
438
|
+
const sorted = [...allScores].sort((a, b) => a.score - b.score);
|
|
439
|
+
const recommendations = [];
|
|
440
|
+
for (const dim of sorted.slice(0, 3)) {
|
|
441
|
+
if (dim.score >= 75)
|
|
442
|
+
break;
|
|
443
|
+
const def = BASE_DIMENSIONS[dim.key]
|
|
444
|
+
?? DOMAIN_DIMENSIONS[dim.key];
|
|
445
|
+
if (!def)
|
|
446
|
+
continue;
|
|
447
|
+
const next = def.ranges.find(r => r.min > dim.score);
|
|
448
|
+
recommendations.push({
|
|
449
|
+
dimension_key: dim.key,
|
|
450
|
+
priority: dim.score < 35 ? 'high' : dim.score < 55 ? 'medium' : 'low',
|
|
451
|
+
recommendation: next ? `Aim for: ${next.description}` : 'Continue current approach',
|
|
452
|
+
suggestions: def.signals.slice(0, 2),
|
|
453
|
+
});
|
|
454
|
+
}
|
|
455
|
+
return { dimensionScores, domainDimensionScores, recommendations };
|
|
456
|
+
}
|
|
457
|
+
// ─── Heuristic Scoring Functions ────────────────────────────────────────────
|
|
458
|
+
function countPhrases(text, phrases) {
|
|
459
|
+
const lower = text.toLowerCase();
|
|
460
|
+
return phrases.reduce((c, p) => c + (lower.includes(p) ? 1 : 0), 0);
|
|
461
|
+
}
|
|
462
|
+
function avgLen(msgs) {
|
|
463
|
+
if (msgs.length === 0)
|
|
464
|
+
return 0;
|
|
465
|
+
return msgs.reduce((s, m) => s + (m.content?.length ?? 0), 0) / msgs.length;
|
|
466
|
+
}
|
|
467
|
+
function clamp(v) {
|
|
468
|
+
return Math.max(0, Math.min(100, Math.round(v)));
|
|
469
|
+
}
|
|
470
|
+
function getToolNames(msgs) {
|
|
471
|
+
const tools = new Set();
|
|
472
|
+
for (const m of msgs) {
|
|
473
|
+
if (!m.tool_uses)
|
|
474
|
+
continue;
|
|
475
|
+
try {
|
|
476
|
+
const uses = JSON.parse(m.tool_uses);
|
|
477
|
+
if (Array.isArray(uses))
|
|
478
|
+
uses.forEach((u) => { if (u.name)
|
|
479
|
+
tools.add(u.name); });
|
|
480
|
+
}
|
|
481
|
+
catch { /* ignore */ }
|
|
482
|
+
}
|
|
483
|
+
return tools;
|
|
484
|
+
}
|
|
485
|
+
const HEURISTIC_SCORERS = {
|
|
486
|
+
task_decomposition(user) {
|
|
487
|
+
let s = 45;
|
|
488
|
+
const r = [];
|
|
489
|
+
const dc = user.reduce((c, m) => c + countPhrases(m.content ?? '', ['first', 'then', 'next', 'step 1', 'step 2', '1.', '2.', '3.', 'now that', 'finally']), 0);
|
|
490
|
+
const ratio = dc / (user.length || 1);
|
|
491
|
+
if (ratio > 0.5) {
|
|
492
|
+
s += 20;
|
|
493
|
+
r.push('frequent step-by-step');
|
|
494
|
+
}
|
|
495
|
+
else if (ratio > 0.2) {
|
|
496
|
+
s += 10;
|
|
497
|
+
r.push('some structure');
|
|
498
|
+
}
|
|
499
|
+
if (avgLen(user) > 800 && ratio < 0.2) {
|
|
500
|
+
s -= 15;
|
|
501
|
+
r.push('long prompts, no decomposition');
|
|
502
|
+
}
|
|
503
|
+
return { score: clamp(s), reasoning: r.join('; ') || 'baseline' };
|
|
504
|
+
},
|
|
505
|
+
prompt_specificity(user) {
|
|
506
|
+
let s = 40;
|
|
507
|
+
const r = [];
|
|
508
|
+
const al = avgLen(user);
|
|
509
|
+
if (al > 500) {
|
|
510
|
+
s += 15;
|
|
511
|
+
r.push('detailed');
|
|
512
|
+
}
|
|
513
|
+
else if (al < 50) {
|
|
514
|
+
s -= 10;
|
|
515
|
+
r.push('very short');
|
|
516
|
+
}
|
|
517
|
+
const code = user.filter(m => /```/.test(m.content ?? '')).length;
|
|
518
|
+
if (code > 0) {
|
|
519
|
+
s += Math.min(code * 4, 15);
|
|
520
|
+
r.push(`code examples (${code}x)`);
|
|
521
|
+
}
|
|
522
|
+
const con = user.reduce((c, m) => c + countPhrases(m.content ?? '', ["don't", 'must', 'should not', 'only', 'ensure']), 0);
|
|
523
|
+
if (con > 3) {
|
|
524
|
+
s += 12;
|
|
525
|
+
r.push('constraints');
|
|
526
|
+
}
|
|
527
|
+
else if (con > 0)
|
|
528
|
+
s += 5;
|
|
529
|
+
return { score: clamp(s), reasoning: r.join('; ') || 'baseline' };
|
|
530
|
+
},
|
|
531
|
+
output_validation(user) {
|
|
532
|
+
let s = 35;
|
|
533
|
+
const r = [];
|
|
534
|
+
const follow = user.slice(1);
|
|
535
|
+
const vc = follow.reduce((c, m) => c + countPhrases(m.content ?? '', ["that's wrong", "doesn't work", 'fix', 'bug', 'error', 'actually', 'wrong', 'broken']), 0);
|
|
536
|
+
if (vc > 3) {
|
|
537
|
+
s += 25;
|
|
538
|
+
r.push(`challenges (${vc}x)`);
|
|
539
|
+
}
|
|
540
|
+
else if (vc > 0) {
|
|
541
|
+
s += 10;
|
|
542
|
+
r.push('some validation');
|
|
543
|
+
}
|
|
544
|
+
const ex = follow.reduce((c, m) => c + countPhrases(m.content ?? '', ['why', 'explain', 'how does']), 0);
|
|
545
|
+
if (ex > 0) {
|
|
546
|
+
s += Math.min(ex * 4, 12);
|
|
547
|
+
r.push('asks explanations');
|
|
548
|
+
}
|
|
549
|
+
return { score: clamp(s), reasoning: r.join('; ') || 'limited validation' };
|
|
550
|
+
},
|
|
551
|
+
iteration_quality(user) {
|
|
552
|
+
let s = 45;
|
|
553
|
+
const r = [];
|
|
554
|
+
if (user.length < 2)
|
|
555
|
+
return { score: 50, reasoning: 'too few messages' };
|
|
556
|
+
const pivots = user.reduce((c, m) => c + countPhrases(m.content ?? '', ['different approach', 'instead', "let's try", 'scratch that']), 0);
|
|
557
|
+
if (pivots > 0) {
|
|
558
|
+
s += Math.min(pivots * 7, 15);
|
|
559
|
+
r.push(`pivots (${pivots}x)`);
|
|
560
|
+
}
|
|
561
|
+
// repetition
|
|
562
|
+
let reps = 0;
|
|
563
|
+
for (let i = 1; i < user.length; i++) {
|
|
564
|
+
const a = new Set((user[i - 1].content ?? '').toLowerCase().split(/\s+/));
|
|
565
|
+
const b = new Set((user[i].content ?? '').toLowerCase().split(/\s+/));
|
|
566
|
+
if (a.size > 5 && b.size > 5) {
|
|
567
|
+
const inter = [...b].filter(w => a.has(w)).length;
|
|
568
|
+
if (inter / Math.max(a.size, b.size) > 0.6)
|
|
569
|
+
reps++;
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
if (reps > 2) {
|
|
573
|
+
s -= 15;
|
|
574
|
+
r.push('repetitive');
|
|
575
|
+
}
|
|
576
|
+
else {
|
|
577
|
+
s += 10;
|
|
578
|
+
r.push('distinct iterations');
|
|
579
|
+
}
|
|
580
|
+
return { score: clamp(s), reasoning: r.join('; ') || 'baseline' };
|
|
581
|
+
},
|
|
582
|
+
strategic_tool_usage(_user, asst) {
|
|
583
|
+
let s = 40;
|
|
584
|
+
const r = [];
|
|
585
|
+
const tools = getToolNames(asst);
|
|
586
|
+
if (tools.size >= 6) {
|
|
587
|
+
s += 25;
|
|
588
|
+
r.push(`${tools.size} tool types`);
|
|
589
|
+
}
|
|
590
|
+
else if (tools.size >= 3) {
|
|
591
|
+
s += 15;
|
|
592
|
+
r.push(`${tools.size} tool types`);
|
|
593
|
+
}
|
|
594
|
+
else if (tools.size >= 1) {
|
|
595
|
+
s += 5;
|
|
596
|
+
r.push(`${tools.size} tool type`);
|
|
597
|
+
}
|
|
598
|
+
if (tools.has('Read') || tools.has('Grep')) {
|
|
599
|
+
s += 8;
|
|
600
|
+
r.push('exploration');
|
|
601
|
+
}
|
|
602
|
+
if (tools.has('Edit') || tools.has('Write')) {
|
|
603
|
+
s += 5;
|
|
604
|
+
r.push('modification');
|
|
605
|
+
}
|
|
606
|
+
if (tools.has('Bash')) {
|
|
607
|
+
s += 5;
|
|
608
|
+
r.push('shell');
|
|
609
|
+
}
|
|
610
|
+
return { score: clamp(s), reasoning: r.join('; ') || 'no tools' };
|
|
611
|
+
},
|
|
612
|
+
context_management(user) {
|
|
613
|
+
let s = 40;
|
|
614
|
+
const r = [];
|
|
615
|
+
const first = (user[0]?.content ?? '').length;
|
|
616
|
+
if (first > 500) {
|
|
617
|
+
s += 15;
|
|
618
|
+
r.push('strong initial context');
|
|
619
|
+
}
|
|
620
|
+
else if (first < 50) {
|
|
621
|
+
s -= 5;
|
|
622
|
+
r.push('minimal context');
|
|
623
|
+
}
|
|
624
|
+
const ctx = user.reduce((c, m) => c + countPhrases(m.content ?? '', ['for context', 'background', 'so far', 'to recap', 'previously']), 0);
|
|
625
|
+
if (ctx > 3) {
|
|
626
|
+
s += 15;
|
|
627
|
+
r.push('active context mgmt');
|
|
628
|
+
}
|
|
629
|
+
else if (ctx > 0)
|
|
630
|
+
s += 7;
|
|
631
|
+
return { score: clamp(s), reasoning: r.join('; ') || 'baseline' };
|
|
632
|
+
},
|
|
633
|
+
};
|
|
634
|
+
// ─── Domain Dimension Heuristic Scorers ────────────────────────────────────
|
|
635
|
+
const DOMAIN_HEURISTIC_SCORERS = {
|
|
636
|
+
architectural_awareness(user) {
|
|
637
|
+
let s = 40;
|
|
638
|
+
const r = [];
|
|
639
|
+
const archPhrases = ['architecture', 'design pattern', 'component', 'module', 'layer', 'service', 'microservice', 'monolith', 'separation of concerns', 'coupling', 'cohesion', 'boundary', 'interface'];
|
|
640
|
+
const ac = user.reduce((c, m) => c + countPhrases(m.content ?? '', archPhrases), 0);
|
|
641
|
+
if (ac > 5) {
|
|
642
|
+
s += 25;
|
|
643
|
+
r.push(`strong architecture awareness (${ac}x)`);
|
|
644
|
+
}
|
|
645
|
+
else if (ac > 2) {
|
|
646
|
+
s += 15;
|
|
647
|
+
r.push('some architecture discussion');
|
|
648
|
+
}
|
|
649
|
+
else if (ac > 0) {
|
|
650
|
+
s += 5;
|
|
651
|
+
r.push('minimal architecture mentions');
|
|
652
|
+
}
|
|
653
|
+
const systemPhrases = ['scaling', 'performance', 'latency', 'throughput', 'bottleneck', 'trade-off', 'migration'];
|
|
654
|
+
const sc = user.reduce((c, m) => c + countPhrases(m.content ?? '', systemPhrases), 0);
|
|
655
|
+
if (sc > 2) {
|
|
656
|
+
s += 10;
|
|
657
|
+
r.push('system-level thinking');
|
|
658
|
+
}
|
|
659
|
+
return { score: clamp(s), reasoning: r.join('; ') || 'limited architecture awareness' };
|
|
660
|
+
},
|
|
661
|
+
error_anticipation(user) {
|
|
662
|
+
let s = 35;
|
|
663
|
+
const r = [];
|
|
664
|
+
const errorPhrases = ['edge case', 'error handling', 'what if', 'failure', 'fallback', 'timeout', 'retry', 'catch', 'throw', 'validate', 'null check', 'undefined', 'boundary'];
|
|
665
|
+
const ec = user.reduce((c, m) => c + countPhrases(m.content ?? '', errorPhrases), 0);
|
|
666
|
+
if (ec > 5) {
|
|
667
|
+
s += 30;
|
|
668
|
+
r.push(`proactive error thinking (${ec}x)`);
|
|
669
|
+
}
|
|
670
|
+
else if (ec > 2) {
|
|
671
|
+
s += 15;
|
|
672
|
+
r.push('some error consideration');
|
|
673
|
+
}
|
|
674
|
+
else if (ec > 0) {
|
|
675
|
+
s += 5;
|
|
676
|
+
r.push('minimal error awareness');
|
|
677
|
+
}
|
|
678
|
+
const testPhrases = ['test', 'spec', 'assert', 'expect', 'unhappy path', 'negative test'];
|
|
679
|
+
const tc = user.reduce((c, m) => c + countPhrases(m.content ?? '', testPhrases), 0);
|
|
680
|
+
if (tc > 2) {
|
|
681
|
+
s += 10;
|
|
682
|
+
r.push('tests for error cases');
|
|
683
|
+
}
|
|
684
|
+
return { score: clamp(s), reasoning: r.join('; ') || 'limited error anticipation' };
|
|
685
|
+
},
|
|
686
|
+
technical_vocabulary(user) {
|
|
687
|
+
let s = 45;
|
|
688
|
+
const r = [];
|
|
689
|
+
const preciseTerms = ['idempotent', 'polymorphism', 'encapsulation', 'immutable', 'pure function', 'side effect', 'closure', 'decorator', 'middleware', 'serialization', 'deserialization', 'abstraction', 'dependency injection', 'generic', 'type guard', 'discriminated union', 'enum', 'interface', 'schema', 'migration', 'ORM', 'query builder'];
|
|
690
|
+
const pc = user.reduce((c, m) => c + countPhrases(m.content ?? '', preciseTerms), 0);
|
|
691
|
+
if (pc > 6) {
|
|
692
|
+
s += 25;
|
|
693
|
+
r.push(`precise vocabulary (${pc} terms)`);
|
|
694
|
+
}
|
|
695
|
+
else if (pc > 3) {
|
|
696
|
+
s += 15;
|
|
697
|
+
r.push('good technical language');
|
|
698
|
+
}
|
|
699
|
+
else if (pc > 0) {
|
|
700
|
+
s += 5;
|
|
701
|
+
r.push('basic technical terms');
|
|
702
|
+
}
|
|
703
|
+
// Check message clarity (longer, more specific messages suggest better vocabulary)
|
|
704
|
+
const al = avgLen(user);
|
|
705
|
+
if (al > 300) {
|
|
706
|
+
s += 5;
|
|
707
|
+
r.push('detailed communication');
|
|
708
|
+
}
|
|
709
|
+
return { score: clamp(s), reasoning: r.join('; ') || 'baseline vocabulary' };
|
|
710
|
+
},
|
|
711
|
+
dependency_reasoning(user) {
|
|
712
|
+
let s = 40;
|
|
713
|
+
const r = [];
|
|
714
|
+
const depPhrases = ['import', 'require', 'dependency', 'depends on', 'peer dep', 'circular', 'side effect', 'breaking change', 'downstream', 'upstream', 'coupling', 'version', 'compatible'];
|
|
715
|
+
const dc = user.reduce((c, m) => c + countPhrases(m.content ?? '', depPhrases), 0);
|
|
716
|
+
if (dc > 5) {
|
|
717
|
+
s += 25;
|
|
718
|
+
r.push(`strong dependency awareness (${dc}x)`);
|
|
719
|
+
}
|
|
720
|
+
else if (dc > 2) {
|
|
721
|
+
s += 12;
|
|
722
|
+
r.push('some dependency discussion');
|
|
723
|
+
}
|
|
724
|
+
else if (dc > 0) {
|
|
725
|
+
s += 5;
|
|
726
|
+
r.push('minimal dependency mentions');
|
|
727
|
+
}
|
|
728
|
+
const flowPhrases = ['data flow', 'call chain', 'event', 'propagate', 'cascade', 'trigger'];
|
|
729
|
+
const fc = user.reduce((c, m) => c + countPhrases(m.content ?? '', flowPhrases), 0);
|
|
730
|
+
if (fc > 1) {
|
|
731
|
+
s += 10;
|
|
732
|
+
r.push('traces data flow');
|
|
733
|
+
}
|
|
734
|
+
return { score: clamp(s), reasoning: r.join('; ') || 'limited dependency reasoning' };
|
|
735
|
+
},
|
|
736
|
+
tradeoff_articulation(user) {
|
|
737
|
+
let s = 40;
|
|
738
|
+
const r = [];
|
|
739
|
+
const tradeoffPhrases = ['trade-off', 'tradeoff', 'pros and cons', 'alternative', 'versus', 'vs', 'instead of', 'compared to', 'option a', 'option b', 'approach', 'downside', 'benefit'];
|
|
740
|
+
const tc = user.reduce((c, m) => c + countPhrases(m.content ?? '', tradeoffPhrases), 0);
|
|
741
|
+
if (tc > 4) {
|
|
742
|
+
s += 25;
|
|
743
|
+
r.push(`explicit tradeoff analysis (${tc}x)`);
|
|
744
|
+
}
|
|
745
|
+
else if (tc > 1) {
|
|
746
|
+
s += 12;
|
|
747
|
+
r.push('some alternatives considered');
|
|
748
|
+
}
|
|
749
|
+
else if (tc > 0) {
|
|
750
|
+
s += 5;
|
|
751
|
+
r.push('occasional comparison');
|
|
752
|
+
}
|
|
753
|
+
const reasoningPhrases = ['because', 'since', 'the reason', 'rationale', 'chose this because', 'better because', 'prefer', 'maintainable', 'readable'];
|
|
754
|
+
const rc = user.reduce((c, m) => c + countPhrases(m.content ?? '', reasoningPhrases), 0);
|
|
755
|
+
if (rc > 3) {
|
|
756
|
+
s += 10;
|
|
757
|
+
r.push('explains reasoning');
|
|
758
|
+
}
|
|
759
|
+
return { score: clamp(s), reasoning: r.join('; ') || 'limited tradeoff discussion' };
|
|
760
|
+
},
|
|
761
|
+
};
|
|
762
|
+
// ─── Heuristic Tech Detection ──────────────────────────────────────────────
|
|
763
|
+
/** Simple tech detection from conversation content for heuristic fallback. */
|
|
764
|
+
function heuristicTechDetect(messages) {
|
|
765
|
+
const allText = messages.map(m => m.content ?? '').join(' ').toLowerCase();
|
|
766
|
+
const toolText = messages
|
|
767
|
+
.filter(m => m.tool_uses)
|
|
768
|
+
.map(m => m.tool_uses)
|
|
769
|
+
.join(' ')
|
|
770
|
+
.toLowerCase();
|
|
771
|
+
const TECH_SIGNALS = {
|
|
772
|
+
typescript: { patterns: ['typescript', '.ts', 'tsconfig', 'type guard', 'interface', 'generic'], fileExts: ['.ts', '.tsx'] },
|
|
773
|
+
react: { patterns: ['react', 'usestate', 'useeffect', 'jsx', 'component', 'props'], fileExts: ['.tsx', '.jsx'] },
|
|
774
|
+
nextjs: { patterns: ['next.js', 'nextjs', 'next.config', 'app router', 'server component', 'getserversideprops'] },
|
|
775
|
+
nodejs: { patterns: ['node.js', 'nodejs', 'express', 'npm', 'pnpm', 'package.json'], fileExts: ['.mjs', '.cjs'] },
|
|
776
|
+
python: { patterns: ['python', 'pip', 'django', 'flask', 'pytest', 'requirements.txt'], fileExts: ['.py'] },
|
|
777
|
+
docker: { patterns: ['docker', 'dockerfile', 'container', 'docker-compose', 'image'] },
|
|
778
|
+
sql: { patterns: ['select', 'insert', 'join', 'postgresql', 'mysql', 'migration', 'drizzle', 'prisma'] },
|
|
779
|
+
git: { patterns: ['git commit', 'git push', 'branch', 'merge', 'rebase', 'pull request'] },
|
|
780
|
+
tailwindcss: { patterns: ['tailwind', 'className', 'utility class', 'tw-'] },
|
|
781
|
+
golang: { patterns: ['golang', 'go.mod', 'goroutine', 'chan ', 'func main'], fileExts: ['.go'] },
|
|
782
|
+
rust: { patterns: ['cargo', 'rustc', '.rs', 'fn main', 'impl ', 'trait '], fileExts: ['.rs'] },
|
|
783
|
+
};
|
|
784
|
+
const detected = [];
|
|
785
|
+
for (const [key, { patterns, fileExts }] of Object.entries(TECH_SIGNALS)) {
|
|
786
|
+
let hits = 0;
|
|
787
|
+
for (const p of patterns) {
|
|
788
|
+
if (allText.includes(p))
|
|
789
|
+
hits++;
|
|
790
|
+
}
|
|
791
|
+
if (fileExts) {
|
|
792
|
+
for (const ext of fileExts) {
|
|
793
|
+
if (toolText.includes(ext))
|
|
794
|
+
hits += 2;
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
if (hits >= 2 && ALL_ROADMAPS[key]) {
|
|
798
|
+
const roadmapDef = ALL_ROADMAPS[key];
|
|
799
|
+
const competencies = {};
|
|
800
|
+
for (const comp of roadmapDef.competencies) {
|
|
801
|
+
competencies[comp] = { score: null, demonstrated: false };
|
|
802
|
+
}
|
|
803
|
+
// Score based on signal density — more mentions = higher demonstrated expertise
|
|
804
|
+
const score = clamp(35 + Math.min(hits * 8, 45));
|
|
805
|
+
detected.push({ roadmap: key, score, competencies });
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
return detected;
|
|
809
|
+
}
|
|
810
|
+
// ─── Heuristic Activity Log ─────────────────────────────────────────────────
|
|
811
|
+
function heuristicActivityLog(messages) {
|
|
812
|
+
const log = [];
|
|
813
|
+
const toolFiles = new Set();
|
|
814
|
+
for (const m of messages) {
|
|
815
|
+
if (m.role === 'user') {
|
|
816
|
+
const text = (m.content ?? '').slice(0, 100).replace(/\n/g, ' ').trim();
|
|
817
|
+
if (text)
|
|
818
|
+
log.push(`User: ${text}`);
|
|
819
|
+
}
|
|
820
|
+
if (m.tool_uses) {
|
|
821
|
+
try {
|
|
822
|
+
const uses = JSON.parse(m.tool_uses);
|
|
823
|
+
if (Array.isArray(uses)) {
|
|
824
|
+
for (const u of uses) {
|
|
825
|
+
const name = u.name || 'unknown';
|
|
826
|
+
if ((name === 'Edit' || name === 'Write') && u.input?.file_path) {
|
|
827
|
+
const file = u.input.file_path.split('/').pop();
|
|
828
|
+
if (!toolFiles.has(file)) {
|
|
829
|
+
toolFiles.add(file);
|
|
830
|
+
log.push(`${name === 'Write' ? 'Created' : 'Modified'} ${file}`);
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
else if (name === 'Bash' && u.input?.command) {
|
|
834
|
+
const cmd = u.input.command.slice(0, 60);
|
|
835
|
+
log.push(`Ran: ${cmd}`);
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
catch { /* ignore */ }
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
return log.slice(0, 30); // Cap at 30 entries
|
|
844
|
+
}
|