groove-dev 0.27.102 → 0.27.103

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CLAUDE.md +0 -7
  2. package/moe-training/client/domain-tagger.js +205 -0
  3. package/moe-training/client/edit-normalizer.js +188 -0
  4. package/moe-training/client/envelope-builder.js +1 -1
  5. package/moe-training/client/parsers/claude-code.js +56 -9
  6. package/moe-training/client/parsers/codex.js +25 -5
  7. package/moe-training/client/parsers/gemini.js +21 -2
  8. package/moe-training/client/parsers/grok.js +18 -0
  9. package/moe-training/client/trajectory-capture.js +95 -3
  10. package/moe-training/server/routes/ingest.js +26 -0
  11. package/moe-training/server/verifier.js +34 -0
  12. package/moe-training/shared/constants.js +9 -0
  13. package/moe-training/shared/envelope-schema.js +128 -2
  14. package/moe-training/test/client/domain-tagger.test.js +203 -0
  15. package/moe-training/test/client/edit-normalizer.test.js +376 -0
  16. package/moe-training/test/client/envelope-builder.test.js +28 -0
  17. package/moe-training/test/client/parsers/claude-code.test.js +248 -38
  18. package/moe-training/test/client/parsers/codex.test.js +2 -0
  19. package/moe-training/test/client/parsers/gemini.test.js +2 -0
  20. package/moe-training/test/client/trajectory-capture.test.js +345 -0
  21. package/moe-training/test/server/verifier.test.js +94 -0
  22. package/moe-training/test/shared/envelope-schema.test.js +291 -0
  23. package/node_modules/@groove-dev/cli/package.json +1 -1
  24. package/node_modules/@groove-dev/daemon/package.json +1 -1
  25. package/node_modules/@groove-dev/gui/package.json +1 -1
  26. package/node_modules/moe-training/client/domain-tagger.js +205 -0
  27. package/node_modules/moe-training/client/edit-normalizer.js +188 -0
  28. package/node_modules/moe-training/client/envelope-builder.js +1 -1
  29. package/node_modules/moe-training/client/parsers/claude-code.js +56 -9
  30. package/node_modules/moe-training/client/parsers/codex.js +25 -5
  31. package/node_modules/moe-training/client/parsers/gemini.js +21 -2
  32. package/node_modules/moe-training/client/parsers/grok.js +18 -0
  33. package/node_modules/moe-training/client/trajectory-capture.js +95 -3
  34. package/node_modules/moe-training/server/routes/ingest.js +26 -0
  35. package/node_modules/moe-training/server/verifier.js +34 -0
  36. package/node_modules/moe-training/shared/constants.js +9 -0
  37. package/node_modules/moe-training/shared/envelope-schema.js +128 -2
  38. package/node_modules/moe-training/test/client/domain-tagger.test.js +203 -0
  39. package/node_modules/moe-training/test/client/edit-normalizer.test.js +376 -0
  40. package/node_modules/moe-training/test/client/envelope-builder.test.js +28 -0
  41. package/node_modules/moe-training/test/client/parsers/claude-code.test.js +248 -38
  42. package/node_modules/moe-training/test/client/parsers/codex.test.js +2 -0
  43. package/node_modules/moe-training/test/client/parsers/gemini.test.js +2 -0
  44. package/node_modules/moe-training/test/client/trajectory-capture.test.js +345 -0
  45. package/node_modules/moe-training/test/server/verifier.test.js +94 -0
  46. package/node_modules/moe-training/test/shared/envelope-schema.test.js +291 -0
  47. package/package.json +1 -1
  48. package/packages/cli/package.json +1 -1
  49. package/packages/daemon/package.json +1 -1
  50. package/packages/gui/package.json +1 -1
@@ -225,6 +225,100 @@ describe('EnvelopeVerifier', () => {
225
225
  assert.equal(result.valid, false);
226
226
  assert.ok(result.reason.includes('HMAC'));
227
227
  });
228
+
229
+ // --- verifyFeedback ---
230
+
231
+ it('verifyFeedback accepts valid USER_FEEDBACK envelope', () => {
232
+ const envelope = {
233
+ envelope_id: 'env_fb_1',
234
+ session_id: sessionId,
235
+ type: 'USER_FEEDBACK',
236
+ feedback: {
237
+ signal: 'accepted',
238
+ timestamp: Date.now() / 1000,
239
+ context: 'completed with no interventions',
240
+ target_step: 10,
241
+ revision_rounds: 0,
242
+ delta_summary: null,
243
+ },
244
+ };
245
+
246
+ const forHmac = { ...envelope };
247
+ const envelopeBytes = JSON.stringify(forHmac);
248
+ const hmac = signEnvelope(sharedSecret, envelopeBytes, 0);
249
+ envelope.attestation = { session_hmac: hmac, sequence: 0, app_version_hash: VALID_APP_HASH };
250
+
251
+ const result = verifier.verifyFeedback(envelope);
252
+ assert.equal(result.valid, true);
253
+ });
254
+
255
+ it('verifyFeedback rejects unknown session_id', () => {
256
+ const envelope = {
257
+ envelope_id: 'env_fb_2',
258
+ session_id: 'sess_nonexistent',
259
+ type: 'USER_FEEDBACK',
260
+ feedback: { signal: 'accepted', timestamp: Date.now() / 1000 },
261
+ attestation: { session_hmac: 'a'.repeat(64), sequence: 0, app_version_hash: VALID_APP_HASH },
262
+ };
263
+ const result = verifier.verifyFeedback(envelope);
264
+ assert.equal(result.valid, false);
265
+ assert.ok(result.reason.includes('unknown session_id'));
266
+ });
267
+
268
+ it('verifyFeedback rejects missing attestation', () => {
269
+ const envelope = {
270
+ envelope_id: 'env_fb_3',
271
+ session_id: sessionId,
272
+ type: 'USER_FEEDBACK',
273
+ feedback: { signal: 'accepted', timestamp: Date.now() / 1000 },
274
+ };
275
+ const result = verifier.verifyFeedback(envelope);
276
+ assert.equal(result.valid, false);
277
+ assert.ok(result.reason.includes('attestation'));
278
+ });
279
+
280
+ it('verifyFeedback rejects tampered HMAC', () => {
281
+ const envelope = {
282
+ envelope_id: 'env_fb_4',
283
+ session_id: sessionId,
284
+ type: 'USER_FEEDBACK',
285
+ feedback: { signal: 'accepted', timestamp: Date.now() / 1000 },
286
+ attestation: { session_hmac: 'f'.repeat(64), sequence: 0, app_version_hash: VALID_APP_HASH },
287
+ };
288
+ const result = verifier.verifyFeedback(envelope);
289
+ assert.equal(result.valid, false);
290
+ assert.ok(result.reason.includes('HMAC'));
291
+ });
292
+
293
+ it('verifyFeedback rejects invalid signal via schema', () => {
294
+ const envelope = {
295
+ envelope_id: 'env_fb_5',
296
+ session_id: sessionId,
297
+ type: 'USER_FEEDBACK',
298
+ feedback: { signal: 'thumbs_up', timestamp: Date.now() / 1000 },
299
+ };
300
+
301
+ const forHmac = { ...envelope };
302
+ const envelopeBytes = JSON.stringify(forHmac);
303
+ const hmac = signEnvelope(sharedSecret, envelopeBytes, 0);
304
+ envelope.attestation = { session_hmac: hmac, sequence: 0, app_version_hash: VALID_APP_HASH };
305
+
306
+ const result = verifier.verifyFeedback(envelope);
307
+ assert.equal(result.valid, false);
308
+ assert.ok(result.reason.includes('schema'));
309
+ });
310
+
311
+ it('verifyFeedback rejects missing session_id', () => {
312
+ const envelope = {
313
+ envelope_id: 'env_fb_6',
314
+ type: 'USER_FEEDBACK',
315
+ feedback: { signal: 'accepted', timestamp: Date.now() / 1000 },
316
+ attestation: { session_hmac: 'a'.repeat(64), sequence: 0, app_version_hash: VALID_APP_HASH },
317
+ };
318
+ const result = verifier.verifyFeedback(envelope);
319
+ assert.equal(result.valid, false);
320
+ assert.ok(result.reason.includes('session_id'));
321
+ });
228
322
  });
229
323
 
230
324
  function verifyClose(verifier, envelope) {
@@ -3,6 +3,7 @@
3
3
  import { describe, it } from 'node:test';
4
4
  import assert from 'node:assert/strict';
5
5
  import { validateEnvelope, STEP_TYPES } from '../../shared/envelope-schema.js';
6
+ import { TRAINING_EXCLUSION_REASONS } from '../../shared/constants.js';
6
7
 
7
8
  const VALID_HMAC = 'a'.repeat(64);
8
9
  const VALID_APP_HASH = 'b'.repeat(64);
@@ -89,6 +90,17 @@ describe('envelope-schema', () => {
89
90
  }
90
91
  });
91
92
 
93
+ it('edit step type is valid', () => {
94
+ const env = validEnvelope();
95
+ env.trajectory_log = [{
96
+ step: 1, type: 'edit', timestamp: Date.now() / 1000,
97
+ file_path: 'index.html', edit_type: 'create', content: '<html></html>',
98
+ token_count: 5,
99
+ }];
100
+ const result = validateEnvelope(env);
101
+ assert.equal(result.valid, true);
102
+ });
103
+
92
104
  // --- New security tests ---
93
105
 
94
106
  it('rejects trajectory_log with > 500 steps', () => {
@@ -348,4 +360,283 @@ describe('envelope-schema', () => {
348
360
  assert.equal(result.valid, false);
349
361
  assert.ok(result.errors.some(e => e.includes('session_hmac')));
350
362
  });
363
+
364
+ // --- Observation truncation fields ---
365
+
366
+ it('accepts observation step with truncated and original_token_count', () => {
367
+ const env = validEnvelope();
368
+ env.trajectory_log.push({
369
+ step: 3, type: 'observation', timestamp: Date.now() / 1000,
370
+ content: 'output', token_count: 100, truncated: false, original_token_count: 100,
371
+ });
372
+ const result = validateEnvelope(env);
373
+ assert.equal(result.valid, true);
374
+ });
375
+
376
+ it('accepts observation step with truncated=true', () => {
377
+ const env = validEnvelope();
378
+ env.trajectory_log.push({
379
+ step: 3, type: 'observation', timestamp: Date.now() / 1000,
380
+ content: 'output...', token_count: 4096, truncated: true, original_token_count: 9000,
381
+ });
382
+ const result = validateEnvelope(env);
383
+ assert.equal(result.valid, true);
384
+ });
385
+
386
+ it('rejects non-boolean truncated field', () => {
387
+ const env = validEnvelope();
388
+ env.trajectory_log[0].truncated = 'yes';
389
+ const result = validateEnvelope(env);
390
+ assert.equal(result.valid, false);
391
+ assert.ok(result.errors.some(e => e.includes('truncated')));
392
+ });
393
+
394
+ it('rejects negative original_token_count', () => {
395
+ const env = validEnvelope();
396
+ env.trajectory_log[0].original_token_count = -5;
397
+ const result = validateEnvelope(env);
398
+ assert.equal(result.valid, false);
399
+ assert.ok(result.errors.some(e => e.includes('original_token_count')));
400
+ });
401
+
402
+ it('steps without truncation fields still validate (backward compat)', () => {
403
+ const env = validEnvelope();
404
+ assert.equal(env.trajectory_log[0].truncated, undefined);
405
+ const result = validateEnvelope(env);
406
+ assert.equal(result.valid, true);
407
+ });
408
+
409
+ // --- domain_tags ---
410
+
411
+ it('accepts null domain_tags in metadata', () => {
412
+ const env = validEnvelope();
413
+ env.metadata.domain_tags = null;
414
+ const result = validateEnvelope(env);
415
+ assert.equal(result.valid, true);
416
+ });
417
+
418
+ it('accepts absent domain_tags in metadata (backward compat)', () => {
419
+ const env = validEnvelope();
420
+ assert.equal(env.metadata.domain_tags, undefined);
421
+ const result = validateEnvelope(env);
422
+ assert.equal(result.valid, true);
423
+ });
424
+
425
+ it('accepts valid domain_tags object', () => {
426
+ const env = validEnvelope();
427
+ env.metadata.domain_tags = {
428
+ primary: { domain: 'python', confidence: 0.42 },
429
+ secondary: { domain: 'data_science_ml', confidence: 0.23 },
430
+ tertiary: { domain: 'devops_docker', confidence: 0.11 },
431
+ };
432
+ const result = validateEnvelope(env);
433
+ assert.equal(result.valid, true);
434
+ });
435
+
436
+ it('rejects domain_tags with invalid confidence', () => {
437
+ const env = validEnvelope();
438
+ env.metadata.domain_tags = {
439
+ primary: { domain: 'python', confidence: 1.5 },
440
+ secondary: { domain: 'rust', confidence: 0.2 },
441
+ tertiary: { domain: 'react_frontend', confidence: 0.1 },
442
+ };
443
+ const result = validateEnvelope(env);
444
+ assert.equal(result.valid, false);
445
+ assert.ok(result.errors.some(e => e.includes('confidence')));
446
+ });
447
+
448
+ it('rejects domain_tags missing tertiary', () => {
449
+ const env = validEnvelope();
450
+ env.metadata.domain_tags = {
451
+ primary: { domain: 'python', confidence: 0.4 },
452
+ secondary: { domain: 'rust', confidence: 0.2 },
453
+ };
454
+ const result = validateEnvelope(env);
455
+ assert.equal(result.valid, false);
456
+ assert.ok(result.errors.some(e => e.includes('tertiary')));
457
+ });
458
+
459
+ // --- leaf_context ---
460
+
461
+ it('accepts null leaf_context in metadata', () => {
462
+ const env = validEnvelope();
463
+ env.metadata.leaf_context = null;
464
+ const result = validateEnvelope(env);
465
+ assert.equal(result.valid, true);
466
+ });
467
+
468
+ it('accepts absent leaf_context in metadata (backward compat)', () => {
469
+ const env = validEnvelope();
470
+ assert.equal(env.metadata.leaf_context, undefined);
471
+ const result = validateEnvelope(env);
472
+ assert.equal(result.valid, true);
473
+ });
474
+
475
+ it('accepts valid leaf_context object', () => {
476
+ const env = validEnvelope();
477
+ env.metadata.leaf_context = {
478
+ leaf_id: 'python_expert_v3', leaf_version: '1.2.0',
479
+ confidence_at_route: 0.42, chassis_model: 'Qwen/Qwen3-0.6B',
480
+ };
481
+ const result = validateEnvelope(env);
482
+ assert.equal(result.valid, true);
483
+ });
484
+
485
+ it('rejects leaf_context with invalid confidence_at_route', () => {
486
+ const env = validEnvelope();
487
+ env.metadata.leaf_context = {
488
+ leaf_id: 'test', leaf_version: '1.0', confidence_at_route: 1.5, chassis_model: 'test',
489
+ };
490
+ const result = validateEnvelope(env);
491
+ assert.equal(result.valid, false);
492
+ assert.ok(result.errors.some(e => e.includes('confidence_at_route')));
493
+ });
494
+
495
+ // --- Quality tier in SESSION_CLOSE ---
496
+
497
+ it('SESSION_CLOSE accepts quality_tier and training fields', () => {
498
+ const close = {
499
+ envelope_id: 'env_close-qt',
500
+ session_id: 'sess_test-qt',
501
+ type: 'SESSION_CLOSE',
502
+ attestation: { session_hmac: VALID_HMAC, sequence: 0, app_version_hash: VALID_APP_HASH },
503
+ outcome: {
504
+ status: 'SUCCESS', total_steps: 10, total_chunks: 1,
505
+ quality_tier: 'TIER_A', quality_tier_reason: 'high_quality_no_errors',
506
+ training_eligible: true, training_exclusion_reason: null,
507
+ },
508
+ };
509
+ const result = validateEnvelope(close);
510
+ assert.equal(result.valid, true);
511
+ });
512
+
513
+ it('SESSION_CLOSE rejects invalid quality_tier', () => {
514
+ const close = {
515
+ envelope_id: 'env_close-qt2',
516
+ session_id: 'sess_test-qt2',
517
+ type: 'SESSION_CLOSE',
518
+ attestation: { session_hmac: VALID_HMAC, sequence: 0, app_version_hash: VALID_APP_HASH },
519
+ outcome: {
520
+ status: 'SUCCESS', total_steps: 10, total_chunks: 1,
521
+ quality_tier: 'TIER_Z',
522
+ },
523
+ };
524
+ const result = validateEnvelope(close);
525
+ assert.equal(result.valid, false);
526
+ assert.ok(result.errors.some(e => e.includes('quality_tier')));
527
+ });
528
+
529
+ it('SESSION_CLOSE rejects invalid training_exclusion_reason', () => {
530
+ const close = {
531
+ envelope_id: 'env_close-te',
532
+ session_id: 'sess_test-te',
533
+ type: 'SESSION_CLOSE',
534
+ attestation: { session_hmac: VALID_HMAC, sequence: 0, app_version_hash: VALID_APP_HASH },
535
+ outcome: {
536
+ status: 'SUCCESS', total_steps: 10, total_chunks: 1,
537
+ training_eligible: false, training_exclusion_reason: 'bad_vibes',
538
+ },
539
+ };
540
+ const result = validateEnvelope(close);
541
+ assert.equal(result.valid, false);
542
+ assert.ok(result.errors.some(e => e.includes('training_exclusion_reason')));
543
+ });
544
+
545
+ it('SESSION_CLOSE rejects non-boolean training_eligible', () => {
546
+ const close = {
547
+ envelope_id: 'env_close-te2',
548
+ session_id: 'sess_test-te2',
549
+ type: 'SESSION_CLOSE',
550
+ attestation: { session_hmac: VALID_HMAC, sequence: 0, app_version_hash: VALID_APP_HASH },
551
+ outcome: {
552
+ status: 'SUCCESS', total_steps: 10, total_chunks: 1,
553
+ training_eligible: 'yes',
554
+ },
555
+ };
556
+ const result = validateEnvelope(close);
557
+ assert.equal(result.valid, false);
558
+ assert.ok(result.errors.some(e => e.includes('training_eligible')));
559
+ });
560
+
561
+ it('SESSION_CLOSE without new fields still validates (backward compat)', () => {
562
+ const close = {
563
+ envelope_id: 'env_close-bc',
564
+ session_id: 'sess_test-bc',
565
+ type: 'SESSION_CLOSE',
566
+ attestation: { session_hmac: VALID_HMAC, sequence: 0, app_version_hash: VALID_APP_HASH },
567
+ outcome: { status: 'SUCCESS', total_steps: 10, total_chunks: 1 },
568
+ };
569
+ const result = validateEnvelope(close);
570
+ assert.equal(result.valid, true);
571
+ });
572
+
573
+ // --- USER_FEEDBACK validation ---
574
+
575
+ it('valid USER_FEEDBACK passes', () => {
576
+ const feedback = {
577
+ envelope_id: 'env_fb_1',
578
+ session_id: 'sess_fb_1',
579
+ type: 'USER_FEEDBACK',
580
+ attestation: { session_hmac: VALID_HMAC, sequence: 0, app_version_hash: VALID_APP_HASH },
581
+ feedback: {
582
+ signal: 'accepted', timestamp: Date.now() / 1000,
583
+ context: 'user ran code without modifications',
584
+ target_step: 10, revision_rounds: 0, delta_summary: null,
585
+ },
586
+ };
587
+ const result = validateEnvelope(feedback);
588
+ assert.equal(result.valid, true);
589
+ });
590
+
591
+ it('USER_FEEDBACK rejects invalid signal', () => {
592
+ const feedback = {
593
+ envelope_id: 'env_fb_2',
594
+ session_id: 'sess_fb_2',
595
+ type: 'USER_FEEDBACK',
596
+ attestation: { session_hmac: VALID_HMAC, sequence: 0, app_version_hash: VALID_APP_HASH },
597
+ feedback: { signal: 'thumbs_up', timestamp: Date.now() / 1000 },
598
+ };
599
+ const result = validateEnvelope(feedback);
600
+ assert.equal(result.valid, false);
601
+ assert.ok(result.errors.some(e => e.includes('signal')));
602
+ });
603
+
604
+ it('USER_FEEDBACK rejects missing feedback object', () => {
605
+ const feedback = {
606
+ envelope_id: 'env_fb_3',
607
+ session_id: 'sess_fb_3',
608
+ type: 'USER_FEEDBACK',
609
+ attestation: { session_hmac: VALID_HMAC, sequence: 0, app_version_hash: VALID_APP_HASH },
610
+ };
611
+ const result = validateEnvelope(feedback);
612
+ assert.equal(result.valid, false);
613
+ assert.ok(result.errors.some(e => e.includes('feedback')));
614
+ });
615
+
616
+ it('USER_FEEDBACK accepts all valid signal types', () => {
617
+ for (const signal of ['accepted', 'modified', 'rejected', 'iterated']) {
618
+ const feedback = {
619
+ envelope_id: `env_fb_${signal}`,
620
+ session_id: `sess_fb_${signal}`,
621
+ type: 'USER_FEEDBACK',
622
+ attestation: { session_hmac: VALID_HMAC, sequence: 0, app_version_hash: VALID_APP_HASH },
623
+ feedback: { signal, timestamp: Date.now() / 1000 },
624
+ };
625
+ const result = validateEnvelope(feedback);
626
+ assert.equal(result.valid, true, `Signal "${signal}" should be valid`);
627
+ }
628
+ });
629
+
630
+ it('USER_FEEDBACK rejects negative revision_rounds', () => {
631
+ const feedback = {
632
+ envelope_id: 'env_fb_neg',
633
+ session_id: 'sess_fb_neg',
634
+ type: 'USER_FEEDBACK',
635
+ attestation: { session_hmac: VALID_HMAC, sequence: 0, app_version_hash: VALID_APP_HASH },
636
+ feedback: { signal: 'iterated', timestamp: Date.now() / 1000, revision_rounds: -1 },
637
+ };
638
+ const result = validateEnvelope(feedback);
639
+ assert.equal(result.valid, false);
640
+ assert.ok(result.errors.some(e => e.includes('revision_rounds')));
641
+ });
351
642
  });
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/cli",
3
- "version": "0.27.102",
3
+ "version": "0.27.103",
4
4
  "description": "GROOVE CLI — manage AI coding agents from your terminal",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/daemon",
3
- "version": "0.27.102",
3
+ "version": "0.27.103",
4
4
  "description": "GROOVE daemon — agent orchestration engine",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@groove-dev/gui",
3
- "version": "0.27.102",
3
+ "version": "0.27.103",
4
4
  "description": "GROOVE GUI — visual agent control plane",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "type": "module",
@@ -0,0 +1,205 @@
1
+ // FSL-1.1-Apache-2.0 — see LICENSE
2
+
3
+ const DEFAULT_DOMAINS = [
4
+ 'python', 'typescript_node', 'react_frontend', 'postgresql_database',
5
+ 'devops_docker', 'rust', 'data_science_ml', 'security_pentest',
6
+ 'mobile_swift', 'system_design',
7
+ ];
8
+
9
+ const DEFAULT_MODEL = 'sentence-transformers/all-MiniLM-L6-v2';
10
+ const DEFAULT_TOP_K = 3;
11
+
12
+ const DOMAIN_KEYWORDS = {
13
+ python: ['python', 'pip', 'pytest', 'django', 'flask', 'fastapi', '.py', 'pandas', 'numpy', 'venv', 'poetry', 'pyproject', '__init__', 'def ', 'import '],
14
+ typescript_node: ['typescript', 'node', 'npm', 'express', '.ts', 'tsconfig', 'package.json', 'nestjs', 'prisma', 'tsc', 'deno', 'bun'],
15
+ react_frontend: ['react', 'jsx', 'tsx', 'component', 'hook', 'usestate', 'useeffect', 'tailwind', 'css', 'vite', 'nextjs', 'styled', 'frontend', 'dom', 'html'],
16
+ postgresql_database: ['postgresql', 'postgres', 'sql', 'database', 'query', 'schema', 'migration', 'table', 'index', 'select', 'insert', 'join', 'foreign key', 'sequelize', 'knex'],
17
+ devops_docker: ['docker', 'kubernetes', 'k8s', 'ci/cd', 'github actions', 'deployment', 'terraform', 'ansible', 'nginx', 'dockerfile', 'compose', 'helm', 'aws', 'gcp', 'pipeline'],
18
+ rust: ['rust', 'cargo', 'ownership', 'lifetime', 'borrow', '.rs', 'impl ', 'fn ', 'struct ', 'enum ', 'trait ', 'crate', 'tokio'],
19
+ data_science_ml: ['machine learning', 'pytorch', 'tensorflow', 'ml', 'training', 'dataset', 'neural', 'deep learning', 'transformer', 'huggingface', 'sklearn', 'prediction', 'epoch', 'loss'],
20
+ security_pentest: ['security', 'vulnerability', 'cve', 'authentication', 'authorization', 'encryption', 'xss', 'sql injection', 'pentest', 'exploit', 'firewall', 'oauth', 'csrf'],
21
+ mobile_swift: ['swift', 'ios', 'swiftui', 'xcode', 'cocoapod', 'uikit', 'storyboard', 'watchos', 'macos', 'apple', 'carthage', 'spm'],
22
+ system_design: ['architecture', 'system design', 'scalability', 'microservice', 'distributed', 'load balancer', 'cache', 'message queue', 'api gateway', 'monorepo', 'design pattern', 'event driven'],
23
+ };
24
+
25
+ export class DomainTagger {
26
+ constructor(options = {}) {
27
+ this._serviceUrl = options.serviceUrl || process.env.EMBEDDING_SERVICE_URL || null;
28
+ this._model = options.model || DEFAULT_MODEL;
29
+ this._topK = options.topK || DEFAULT_TOP_K;
30
+ this._domains = options.domains || DEFAULT_DOMAINS;
31
+ this._ready = false;
32
+ this._mode = null;
33
+ this._centroids = null;
34
+ this._lastError = null;
35
+ }
36
+
37
+ async init() {
38
+ this._lastError = null;
39
+
40
+ if (this._serviceUrl) {
41
+ try {
42
+ const res = await fetch(this._serviceUrl, {
43
+ method: 'POST',
44
+ headers: { 'Content-Type': 'application/json' },
45
+ body: JSON.stringify({ input: 'health check', model: this._model }),
46
+ signal: AbortSignal.timeout(5_000),
47
+ });
48
+ if (res.ok) {
49
+ this._mode = 'http';
50
+ await this._buildCentroids();
51
+ this._ready = true;
52
+ return;
53
+ }
54
+ } catch {
55
+ // HTTP service unavailable
56
+ }
57
+ }
58
+
59
+ this._mode = 'keyword';
60
+ this._ready = true;
61
+ }
62
+
63
+ async tag(routingText) {
64
+ if (!this._ready || !routingText || typeof routingText !== 'string') return null;
65
+
66
+ this._lastError = null;
67
+ try {
68
+ if (this._mode === 'http') {
69
+ return await this._tagWithEmbeddings(routingText);
70
+ }
71
+ return this._tagWithKeywords(routingText);
72
+ } catch (err) {
73
+ this._lastError = err.message || String(err);
74
+ return null;
75
+ }
76
+ }
77
+
78
+ get lastError() {
79
+ return this._lastError;
80
+ }
81
+
82
+ get ready() {
83
+ return this._ready;
84
+ }
85
+
86
+ get mode() {
87
+ return this._mode;
88
+ }
89
+
90
+ static buildRoutingText(taskTitle, firstPrompt, thoughtSteps = []) {
91
+ const parts = [];
92
+ if (taskTitle) parts.push(taskTitle);
93
+ if (firstPrompt) parts.push(firstPrompt);
94
+ for (const step of thoughtSteps.slice(0, 2)) {
95
+ if (step?.content) parts.push(step.content);
96
+ }
97
+ return parts.join('\n');
98
+ }
99
+
100
+ async _tagWithEmbeddings(routingText) {
101
+ const embedding = await this._embed(routingText);
102
+ if (!embedding) return null;
103
+
104
+ const scores = [];
105
+ for (const [domain, centroid] of Object.entries(this._centroids)) {
106
+ scores.push({ domain, confidence: cosineSimilarity(embedding, centroid) });
107
+ }
108
+
109
+ scores.sort((a, b) => b.confidence - a.confidence);
110
+ const top = scores.slice(0, this._topK);
111
+
112
+ if (top.length < 3) return null;
113
+
114
+ return {
115
+ primary: { domain: top[0].domain, confidence: round4(top[0].confidence) },
116
+ secondary: { domain: top[1].domain, confidence: round4(top[1].confidence) },
117
+ tertiary: { domain: top[2].domain, confidence: round4(top[2].confidence) },
118
+ };
119
+ }
120
+
121
+ _tagWithKeywords(routingText) {
122
+ const text = routingText.toLowerCase();
123
+ const scores = [];
124
+
125
+ for (const domain of this._domains) {
126
+ const keywords = DOMAIN_KEYWORDS[domain];
127
+ if (!keywords) {
128
+ scores.push({ domain, confidence: 0 });
129
+ continue;
130
+ }
131
+
132
+ let hits = 0;
133
+ for (const kw of keywords) {
134
+ if (text.includes(kw.toLowerCase())) hits++;
135
+ }
136
+ scores.push({ domain, confidence: keywords.length > 0 ? hits / keywords.length : 0 });
137
+ }
138
+
139
+ scores.sort((a, b) => b.confidence - a.confidence);
140
+
141
+ if (scores[0].confidence === 0) return null;
142
+
143
+ const top = scores.slice(0, this._topK);
144
+ return {
145
+ primary: { domain: top[0].domain, confidence: round4(top[0].confidence) },
146
+ secondary: { domain: top[1].domain, confidence: round4(top[1].confidence) },
147
+ tertiary: { domain: top[2].domain, confidence: round4(top[2].confidence) },
148
+ };
149
+ }
150
+
151
+ async _buildCentroids() {
152
+ this._centroids = {};
153
+ for (const domain of this._domains) {
154
+ const kws = DOMAIN_KEYWORDS[domain];
155
+ const description = kws ? `${domain}: ${kws.join(', ')}` : domain;
156
+ const embedding = await this._embed(description);
157
+ if (embedding) {
158
+ this._centroids[domain] = embedding;
159
+ }
160
+ }
161
+ }
162
+
163
+ async _embed(text) {
164
+ try {
165
+ const res = await fetch(this._serviceUrl, {
166
+ method: 'POST',
167
+ headers: { 'Content-Type': 'application/json' },
168
+ body: JSON.stringify({ input: text, model: this._model }),
169
+ signal: AbortSignal.timeout(10_000),
170
+ });
171
+
172
+ if (!res.ok) {
173
+ this._lastError = `Embedding service returned ${res.status}`;
174
+ return null;
175
+ }
176
+
177
+ const data = await res.json();
178
+ const embedding = data?.data?.[0]?.embedding;
179
+ if (!Array.isArray(embedding)) {
180
+ this._lastError = 'Invalid embedding response format';
181
+ return null;
182
+ }
183
+ return embedding;
184
+ } catch (err) {
185
+ this._lastError = err.message || String(err);
186
+ return null;
187
+ }
188
+ }
189
+ }
190
+
191
+ export function cosineSimilarity(a, b) {
192
+ if (!a || !b || a.length !== b.length) return 0;
193
+ let dot = 0, magA = 0, magB = 0;
194
+ for (let i = 0; i < a.length; i++) {
195
+ dot += a[i] * b[i];
196
+ magA += a[i] * a[i];
197
+ magB += b[i] * b[i];
198
+ }
199
+ const denom = Math.sqrt(magA) * Math.sqrt(magB);
200
+ return denom === 0 ? 0 : dot / denom;
201
+ }
202
+
203
+ function round4(n) {
204
+ return Math.round(n * 10000) / 10000;
205
+ }