groove-dev 0.27.77 → 0.27.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +0 -7
- package/MOE_TRAINING_PIPELINE.md +216 -12
- package/moe-training/DEPLOY_CENTRAL_COMMAND.md +413 -0
- package/moe-training/client/consent.js +96 -0
- package/moe-training/client/envelope-builder.js +56 -0
- package/moe-training/client/index.js +10 -0
- package/moe-training/client/parsers/claude-code.js +110 -0
- package/moe-training/client/parsers/codex.js +80 -0
- package/moe-training/client/parsers/gemini.js +80 -0
- package/moe-training/client/parsers/grok.js +16 -0
- package/moe-training/client/parsers/index.js +20 -0
- package/moe-training/client/scrubber.js +126 -0
- package/moe-training/client/session-attestation.js +114 -0
- package/moe-training/client/step-classifier.js +51 -0
- package/moe-training/client/trajectory-capture.js +227 -0
- package/moe-training/client/transmission-queue.js +93 -0
- package/moe-training/package-lock.json +1266 -0
- package/moe-training/package.json +20 -0
- package/moe-training/server/enrichment.js +24 -0
- package/moe-training/server/index.js +119 -0
- package/moe-training/server/ledger.js +110 -0
- package/moe-training/server/routes/ingest.js +96 -0
- package/moe-training/server/routes/sessions.js +43 -0
- package/moe-training/server/routes/stats.js +31 -0
- package/moe-training/server/scoring.js +63 -0
- package/moe-training/server/session-registry.js +156 -0
- package/moe-training/server/stats.js +129 -0
- package/moe-training/server/stitcher.js +69 -0
- package/moe-training/server/storage.js +147 -0
- package/moe-training/server/verifier.js +102 -0
- package/moe-training/shared/constants.js +30 -0
- package/moe-training/shared/crypto.js +45 -0
- package/moe-training/shared/envelope-schema.js +220 -0
- package/moe-training/test/client/consent.test.js +121 -0
- package/moe-training/test/client/envelope-builder.test.js +107 -0
- package/moe-training/test/client/parsers/claude-code.test.js +119 -0
- package/moe-training/test/client/parsers/codex.test.js +83 -0
- package/moe-training/test/client/parsers/gemini.test.js +99 -0
- package/moe-training/test/client/scrubber.test.js +133 -0
- package/moe-training/test/client/session-attestation-security.test.js +95 -0
- package/moe-training/test/client/step-classifier.test.js +88 -0
- package/moe-training/test/integration/handshake.test.js +260 -0
- package/moe-training/test/server/ingest-security.test.js +166 -0
- package/moe-training/test/server/ledger.test.js +131 -0
- package/moe-training/test/server/scoring.test.js +242 -0
- package/moe-training/test/server/session-registry.test.js +125 -0
- package/moe-training/test/server/stitcher.test.js +157 -0
- package/moe-training/test/server/verifier.test.js +232 -0
- package/moe-training/test/shared/crypto.test.js +87 -0
- package/moe-training/test/shared/envelope-schema.test.js +351 -0
- package/node_modules/@groove-dev/cli/package.json +1 -1
- package/node_modules/@groove-dev/daemon/package.json +1 -1
- package/node_modules/@groove-dev/daemon/src/agent-loop.js +48 -5
- package/node_modules/@groove-dev/daemon/src/api.js +77 -0
- package/node_modules/@groove-dev/daemon/src/index.js +61 -0
- package/node_modules/@groove-dev/daemon/src/journalist.js +64 -21
- package/node_modules/@groove-dev/daemon/src/process.js +199 -0
- package/node_modules/@groove-dev/daemon/src/providers/grok.js +15 -0
- package/node_modules/@groove-dev/daemon/src/state.js +20 -1
- package/node_modules/@groove-dev/gui/dist/assets/{index-BbmPDhuW.js → index-BJgEJ9lZ.js} +1677 -1677
- package/node_modules/@groove-dev/gui/dist/index.html +1 -1
- package/node_modules/@groove-dev/gui/package.json +1 -1
- package/node_modules/@groove-dev/gui/src/stores/groove.js +32 -0
- package/node_modules/@groove-dev/gui/src/views/settings.jsx +167 -1
- package/package.json +1 -1
- package/packages/cli/package.json +1 -1
- package/packages/daemon/package.json +1 -1
- package/packages/daemon/src/agent-loop.js +48 -5
- package/packages/daemon/src/api.js +77 -0
- package/packages/daemon/src/index.js +61 -0
- package/packages/daemon/src/journalist.js +64 -21
- package/packages/daemon/src/process.js +199 -0
- package/packages/daemon/src/providers/grok.js +15 -0
- package/packages/daemon/src/state.js +20 -1
- package/packages/gui/dist/assets/{index-BbmPDhuW.js → index-BJgEJ9lZ.js} +1677 -1677
- package/packages/gui/dist/index.html +1 -1
- package/packages/gui/package.json +1 -1
- package/packages/gui/src/stores/groove.js +32 -0
- package/packages/gui/src/views/settings.jsx +167 -1
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
// FSL-1.1-Apache-2.0 — see LICENSE
|
|
2
|
+
|
|
3
|
+
import { describe, it, beforeEach, afterEach } from 'node:test';
|
|
4
|
+
import assert from 'node:assert/strict';
|
|
5
|
+
import { mkdtempSync, rmSync } from 'node:fs';
|
|
6
|
+
import { join } from 'node:path';
|
|
7
|
+
import { tmpdir } from 'node:os';
|
|
8
|
+
import { generateECDHKeypair, signEnvelope } from '../../shared/crypto.js';
|
|
9
|
+
import { SessionRegistry } from '../../server/session-registry.js';
|
|
10
|
+
import { EnvelopeVerifier } from '../../server/verifier.js';
|
|
11
|
+
import { EnvelopeStorage } from '../../server/storage.js';
|
|
12
|
+
|
|
13
|
+
const VALID_CONTRIBUTOR = 'c'.repeat(32);
|
|
14
|
+
const VALID_APP_HASH = 'b'.repeat(64);
|
|
15
|
+
|
|
16
|
+
function makeSignedEnvelope(sessionId, sequence, sharedSecret, overrides = {}) {
|
|
17
|
+
const envelope = {
|
|
18
|
+
envelope_id: `env_test_${sequence}`,
|
|
19
|
+
session_id: sessionId,
|
|
20
|
+
chunk_sequence: sequence,
|
|
21
|
+
contributor_id: VALID_CONTRIBUTOR,
|
|
22
|
+
metadata: { model_engine: 'claude-opus-4-6', provider: 'claude-code', agent_role: 'backend', agent_id: 'backend-1' },
|
|
23
|
+
trajectory_log: [{ step: 1, type: 'thought', timestamp: Date.now() / 1000, content: 'test', token_count: 10 }],
|
|
24
|
+
...overrides,
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const forHmac = { ...envelope };
|
|
28
|
+
const envelopeBytes = JSON.stringify(forHmac);
|
|
29
|
+
const hmac = signEnvelope(sharedSecret, envelopeBytes, sequence);
|
|
30
|
+
|
|
31
|
+
envelope.attestation = {
|
|
32
|
+
session_hmac: hmac,
|
|
33
|
+
sequence,
|
|
34
|
+
app_version_hash: VALID_APP_HASH,
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
return envelope;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
describe('Ingest Security', () => {
|
|
41
|
+
let registry;
|
|
42
|
+
let verifier;
|
|
43
|
+
let storage;
|
|
44
|
+
let tmpDir;
|
|
45
|
+
let sharedSecret;
|
|
46
|
+
const sessionId = 'sess_ingest_sec_001';
|
|
47
|
+
|
|
48
|
+
beforeEach(() => {
|
|
49
|
+
tmpDir = mkdtempSync(join(tmpdir(), 'ingest-sec-'));
|
|
50
|
+
registry = new SessionRegistry(join(tmpDir, 'sessions.db'));
|
|
51
|
+
storage = new EnvelopeStorage(join(tmpDir, 'envelopes'));
|
|
52
|
+
verifier = new EnvelopeVerifier(registry);
|
|
53
|
+
|
|
54
|
+
const clientKeypair = generateECDHKeypair();
|
|
55
|
+
registry.openSession(
|
|
56
|
+
sessionId, clientKeypair.publicKey, 'claude-code', 'claude-opus-4-6',
|
|
57
|
+
'fp_ingest', 'hash_ingest', '0.27.77'
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
const session = registry.getSession(sessionId);
|
|
61
|
+
sharedSecret = session.shared_secret;
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
afterEach(() => {
|
|
65
|
+
registry.close();
|
|
66
|
+
rmSync(tmpDir, { recursive: true, force: true });
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it('rejects envelope with > 500 steps', () => {
|
|
70
|
+
const steps = Array.from({ length: 501 }, (_, i) => ({
|
|
71
|
+
step: i, type: 'thought', timestamp: Date.now() / 1000,
|
|
72
|
+
}));
|
|
73
|
+
const envelope = makeSignedEnvelope(sessionId, 0, sharedSecret, { trajectory_log: steps });
|
|
74
|
+
const result = verifier.verify(envelope);
|
|
75
|
+
assert.equal(result.valid, false);
|
|
76
|
+
assert.ok(result.reason.includes('schema'));
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it('rejects envelope when session has > 200 envelopes', () => {
|
|
80
|
+
// Simulate 200 envelopes already received
|
|
81
|
+
for (let i = 0; i < 200; i++) {
|
|
82
|
+
registry.incrementEnvelopeCount(sessionId);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const withinLimit = registry.checkEnvelopeCount(sessionId, 200);
|
|
86
|
+
assert.equal(withinLimit, false);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('server generates envelope_id (client value ignored)', () => {
|
|
90
|
+
const envelope = makeSignedEnvelope(sessionId, 0, sharedSecret);
|
|
91
|
+
const originalId = envelope.envelope_id;
|
|
92
|
+
|
|
93
|
+
// Verify passes
|
|
94
|
+
const result = verifier.verify(envelope);
|
|
95
|
+
assert.equal(result.valid, true);
|
|
96
|
+
|
|
97
|
+
// In the real ingest flow, server overwrites envelope_id
|
|
98
|
+
// Verify the dedup infrastructure works
|
|
99
|
+
const generatedId = 'env_server_generated';
|
|
100
|
+
registry.recordProcessedEnvelope(generatedId, sessionId);
|
|
101
|
+
assert.equal(registry.isEnvelopeProcessed(generatedId), true);
|
|
102
|
+
assert.equal(registry.isEnvelopeProcessed(originalId), false);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
it('rejects invalid model_engine via schema validation', () => {
|
|
106
|
+
const envelope = makeSignedEnvelope(sessionId, 0, sharedSecret, {
|
|
107
|
+
metadata: { model_engine: 'gpt-5-turbo', provider: 'claude-code', agent_role: 'backend', agent_id: 'backend-1' },
|
|
108
|
+
});
|
|
109
|
+
const result = verifier.verify(envelope);
|
|
110
|
+
assert.equal(result.valid, false);
|
|
111
|
+
assert.ok(result.reason.includes('schema'));
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it('rejects invalid contributor_id format', () => {
|
|
115
|
+
const envelope = makeSignedEnvelope(sessionId, 0, sharedSecret, {
|
|
116
|
+
contributor_id: 'not-a-valid-hex-id',
|
|
117
|
+
});
|
|
118
|
+
const result = verifier.verify(envelope);
|
|
119
|
+
assert.equal(result.valid, false);
|
|
120
|
+
assert.ok(result.reason.includes('schema'));
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it('envelope dedup prevents double-processing', () => {
|
|
124
|
+
const envelopeId = 'env_dedup_test';
|
|
125
|
+
assert.equal(registry.isEnvelopeProcessed(envelopeId), false);
|
|
126
|
+
|
|
127
|
+
registry.recordProcessedEnvelope(envelopeId, sessionId);
|
|
128
|
+
assert.equal(registry.isEnvelopeProcessed(envelopeId), true);
|
|
129
|
+
|
|
130
|
+
// Recording again should not throw (INSERT OR IGNORE)
|
|
131
|
+
registry.recordProcessedEnvelope(envelopeId, sessionId);
|
|
132
|
+
assert.equal(registry.isEnvelopeProcessed(envelopeId), true);
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
it('per-session envelope count tracks correctly', () => {
|
|
136
|
+
assert.equal(registry.checkEnvelopeCount(sessionId, 200), true);
|
|
137
|
+
|
|
138
|
+
registry.incrementEnvelopeCount(sessionId);
|
|
139
|
+
const session = registry.getSession(sessionId);
|
|
140
|
+
assert.equal(session.envelope_count, 1);
|
|
141
|
+
|
|
142
|
+
registry.incrementEnvelopeCount(sessionId);
|
|
143
|
+
const session2 = registry.getSession(sessionId);
|
|
144
|
+
assert.equal(session2.envelope_count, 2);
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
it('atomic sequence check prevents race condition', () => {
|
|
148
|
+
// First call should succeed
|
|
149
|
+
const r1 = registry.checkAndIncrementSequence(sessionId, 0);
|
|
150
|
+
assert.equal(r1.valid, true);
|
|
151
|
+
|
|
152
|
+
// Same sequence again should fail
|
|
153
|
+
const r2 = registry.checkAndIncrementSequence(sessionId, 0);
|
|
154
|
+
assert.equal(r2.valid, false);
|
|
155
|
+
assert.ok(r2.reason.includes('sequence'));
|
|
156
|
+
|
|
157
|
+
// Next sequence should succeed
|
|
158
|
+
const r3 = registry.checkAndIncrementSequence(sessionId, 1);
|
|
159
|
+
assert.equal(r3.valid, true);
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
it('storage quota check works', () => {
|
|
163
|
+
const ok = storage.checkQuota();
|
|
164
|
+
assert.equal(ok, true);
|
|
165
|
+
});
|
|
166
|
+
});
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
// FSL-1.1-Apache-2.0 — see LICENSE
|
|
2
|
+
|
|
3
|
+
import { describe, it, beforeEach, afterEach } from 'node:test';
|
|
4
|
+
import assert from 'node:assert/strict';
|
|
5
|
+
import { mkdtempSync, rmSync } from 'node:fs';
|
|
6
|
+
import { join } from 'node:path';
|
|
7
|
+
import { tmpdir } from 'node:os';
|
|
8
|
+
import { ContributorLedger } from '../../server/ledger.js';
|
|
9
|
+
|
|
10
|
+
describe('ContributorLedger', () => {
|
|
11
|
+
let ledger;
|
|
12
|
+
let tmpDir;
|
|
13
|
+
|
|
14
|
+
beforeEach(() => {
|
|
15
|
+
tmpDir = mkdtempSync(join(tmpdir(), 'ledger-test-'));
|
|
16
|
+
ledger = new ContributorLedger(join(tmpDir, 'ledger.db'));
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
afterEach(() => {
|
|
20
|
+
ledger.close();
|
|
21
|
+
rmSync(tmpDir, { recursive: true, force: true });
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('credits a contributor and updates balance', () => {
|
|
25
|
+
ledger.credit('contrib_001', 'sess_001', {
|
|
26
|
+
basePoints: 10,
|
|
27
|
+
totalPoints: 50,
|
|
28
|
+
modelMultiplier: 5,
|
|
29
|
+
correctionBonus: 0,
|
|
30
|
+
coordinationBonus: 0,
|
|
31
|
+
errorRecoveryBonus: 0,
|
|
32
|
+
complexityBonus: 0,
|
|
33
|
+
qualityBonus: 0,
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
const balance = ledger.getBalance('contrib_001');
|
|
37
|
+
assert.ok(balance);
|
|
38
|
+
assert.equal(balance.total_points, 50);
|
|
39
|
+
assert.equal(balance.total_sessions, 1);
|
|
40
|
+
assert.equal(balance.trust_score, 1.0);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it('accumulates multiple credits', () => {
|
|
44
|
+
const scoreResult = {
|
|
45
|
+
basePoints: 5, totalPoints: 25, modelMultiplier: 5,
|
|
46
|
+
correctionBonus: 0, coordinationBonus: 0, errorRecoveryBonus: 0,
|
|
47
|
+
complexityBonus: 0, qualityBonus: 0,
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
ledger.credit('contrib_002', 'sess_a', scoreResult);
|
|
51
|
+
ledger.credit('contrib_002', 'sess_b', scoreResult);
|
|
52
|
+
ledger.credit('contrib_002', 'sess_c', scoreResult);
|
|
53
|
+
|
|
54
|
+
const balance = ledger.getBalance('contrib_002');
|
|
55
|
+
assert.equal(balance.total_points, 75);
|
|
56
|
+
assert.equal(balance.total_sessions, 3);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it('returns null for unknown contributor', () => {
|
|
60
|
+
assert.equal(ledger.getBalance('nonexistent'), null);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it('returns leaderboard sorted by points', () => {
|
|
64
|
+
const score = (pts) => ({
|
|
65
|
+
basePoints: 1, totalPoints: pts, modelMultiplier: 1,
|
|
66
|
+
correctionBonus: 0, coordinationBonus: 0, errorRecoveryBonus: 0,
|
|
67
|
+
complexityBonus: 0, qualityBonus: 0,
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
ledger.credit('user_a', 'sess_1', score(100));
|
|
71
|
+
ledger.credit('user_b', 'sess_2', score(300));
|
|
72
|
+
ledger.credit('user_c', 'sess_3', score(200));
|
|
73
|
+
|
|
74
|
+
const board = ledger.getLeaderboard(10);
|
|
75
|
+
assert.equal(board.length, 3);
|
|
76
|
+
assert.equal(board[0].contributor_id, 'user_b');
|
|
77
|
+
assert.equal(board[0].total_points, 300);
|
|
78
|
+
assert.equal(board[1].contributor_id, 'user_c');
|
|
79
|
+
assert.equal(board[2].contributor_id, 'user_a');
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it('gets credit history for a contributor', () => {
|
|
83
|
+
const scoreResult = {
|
|
84
|
+
basePoints: 5, totalPoints: 25, modelMultiplier: 5,
|
|
85
|
+
correctionBonus: 0, coordinationBonus: 0, errorRecoveryBonus: 0,
|
|
86
|
+
complexityBonus: 0, qualityBonus: 0,
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
ledger.credit('contrib_hist', 'sess_x', scoreResult);
|
|
90
|
+
ledger.credit('contrib_hist', 'sess_y', scoreResult);
|
|
91
|
+
|
|
92
|
+
const credits = ledger.getCreditsForContributor('contrib_hist');
|
|
93
|
+
assert.equal(credits.length, 2);
|
|
94
|
+
assert.ok(credits[0].multiplier_breakdown);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it('daily credits aggregation works', () => {
|
|
98
|
+
const scoreResult = {
|
|
99
|
+
basePoints: 10, totalPoints: 50, modelMultiplier: 5,
|
|
100
|
+
correctionBonus: 0, coordinationBonus: 0, errorRecoveryBonus: 0,
|
|
101
|
+
complexityBonus: 0, qualityBonus: 0,
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
ledger.credit('contrib_daily', 'sess_d1', scoreResult);
|
|
105
|
+
ledger.credit('contrib_daily', 'sess_d2', scoreResult);
|
|
106
|
+
|
|
107
|
+
const daily = ledger.getDailyCredits(7);
|
|
108
|
+
assert.ok(daily.length >= 1);
|
|
109
|
+
const today = daily.find(d => d.date === new Date().toISOString().slice(0, 10));
|
|
110
|
+
assert.ok(today);
|
|
111
|
+
assert.equal(today.totalPoints, 100);
|
|
112
|
+
assert.equal(today.totalSessions, 2);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it('adjusts trust score within bounds', () => {
|
|
116
|
+
const scoreResult = {
|
|
117
|
+
basePoints: 1, totalPoints: 1, modelMultiplier: 1,
|
|
118
|
+
correctionBonus: 0, coordinationBonus: 0, errorRecoveryBonus: 0,
|
|
119
|
+
complexityBonus: 0, qualityBonus: 0,
|
|
120
|
+
};
|
|
121
|
+
ledger.credit('trust_user', 'sess_t', scoreResult);
|
|
122
|
+
|
|
123
|
+
ledger.adjustTrustScore('trust_user', 2.5);
|
|
124
|
+
let balance = ledger.getBalance('trust_user');
|
|
125
|
+
assert.equal(balance.trust_score, 3.5);
|
|
126
|
+
|
|
127
|
+
ledger.adjustTrustScore('trust_user', -10);
|
|
128
|
+
balance = ledger.getBalance('trust_user');
|
|
129
|
+
assert.equal(balance.trust_score, 0);
|
|
130
|
+
});
|
|
131
|
+
});
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
// FSL-1.1-Apache-2.0 — see LICENSE
|
|
2
|
+
|
|
3
|
+
import { describe, it } from 'node:test';
|
|
4
|
+
import assert from 'node:assert/strict';
|
|
5
|
+
import { TrajectoryScorer } from '../../server/scoring.js';
|
|
6
|
+
import { MODEL_TIERS, QUALITY_MULTIPLIERS } from '../../shared/constants.js';
|
|
7
|
+
|
|
8
|
+
const scorer = new TrajectoryScorer({ MODEL_TIERS, QUALITY_MULTIPLIERS });
|
|
9
|
+
|
|
10
|
+
function makeTrajectory(overrides = {}) {
|
|
11
|
+
return {
|
|
12
|
+
trajectory_log: overrides.steps || [
|
|
13
|
+
{ step: 1, type: 'thought', token_count: 10 },
|
|
14
|
+
{ step: 2, type: 'action', token_count: 5 },
|
|
15
|
+
{ step: 3, type: 'observation', token_count: 8 },
|
|
16
|
+
],
|
|
17
|
+
metadata: overrides.metadata || {},
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
describe('TrajectoryScorer', () => {
|
|
22
|
+
it('base scoring: 1 point per step', () => {
|
|
23
|
+
const result = scorer.score(makeTrajectory());
|
|
24
|
+
assert.equal(result.basePoints, 3);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it('caps base points at 5000', () => {
|
|
28
|
+
const steps = Array.from({ length: 6000 }, (_, i) => ({ step: i, type: 'thought' }));
|
|
29
|
+
const result = scorer.score(makeTrajectory({ steps }));
|
|
30
|
+
assert.equal(result.basePoints, 5000);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it('applies model multiplier correctly (5x for opus)', () => {
|
|
34
|
+
const result = scorer.score(makeTrajectory({
|
|
35
|
+
metadata: { model_engine: 'claude-opus-4-6' },
|
|
36
|
+
}));
|
|
37
|
+
assert.equal(result.basePoints, 3);
|
|
38
|
+
assert.equal(result.modelMultiplier, 5);
|
|
39
|
+
assert.equal(result.totalPoints, 3 * 5);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it('derives correction bonus from actual trajectory steps (not outcome)', () => {
|
|
43
|
+
const result = scorer.score(makeTrajectory({
|
|
44
|
+
steps: [
|
|
45
|
+
{ step: 1, type: 'thought', token_count: 10 },
|
|
46
|
+
{ step: 2, type: 'correction', token_count: 5 },
|
|
47
|
+
{ step: 3, type: 'action', token_count: 8 },
|
|
48
|
+
{ step: 4, type: 'action', token_count: 3 },
|
|
49
|
+
],
|
|
50
|
+
}));
|
|
51
|
+
// 4 steps, 30% cap = 1, 1 correction step * 10 = 10
|
|
52
|
+
assert.equal(result.correctionBonus, 10);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it('caps correction steps at 30% of trajectory', () => {
|
|
56
|
+
// 10 steps, 5 are corrections — only 3 should count (30% of 10)
|
|
57
|
+
const steps = [
|
|
58
|
+
{ step: 1, type: 'thought' },
|
|
59
|
+
{ step: 2, type: 'correction' },
|
|
60
|
+
{ step: 3, type: 'correction' },
|
|
61
|
+
{ step: 4, type: 'correction' },
|
|
62
|
+
{ step: 5, type: 'correction' },
|
|
63
|
+
{ step: 6, type: 'correction' },
|
|
64
|
+
{ step: 7, type: 'action' },
|
|
65
|
+
{ step: 8, type: 'action' },
|
|
66
|
+
{ step: 9, type: 'action' },
|
|
67
|
+
{ step: 10, type: 'action' },
|
|
68
|
+
];
|
|
69
|
+
const result = scorer.score(makeTrajectory({ steps }));
|
|
70
|
+
assert.equal(result.correctionBonus, 3 * 10); // 3 capped corrections x 10
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it('derives coordination bonus from actual trajectory steps (not outcome)', () => {
|
|
74
|
+
const result = scorer.score(makeTrajectory({
|
|
75
|
+
steps: [
|
|
76
|
+
{ step: 1, type: 'thought', token_count: 10 },
|
|
77
|
+
{ step: 2, type: 'coordination', token_count: 5 },
|
|
78
|
+
{ step: 3, type: 'coordination', token_count: 3 },
|
|
79
|
+
{ step: 4, type: 'action', token_count: 8 },
|
|
80
|
+
{ step: 5, type: 'action', token_count: 4 },
|
|
81
|
+
],
|
|
82
|
+
}));
|
|
83
|
+
// 5 steps, 20% cap = 1, but 2 coordination steps → capped at 1 * 5 = 5
|
|
84
|
+
assert.equal(result.coordinationBonus, 5);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it('caps coordination steps at 20% of trajectory', () => {
|
|
88
|
+
// 10 steps, 4 are coordination — only 2 should count (20% of 10)
|
|
89
|
+
const steps = [
|
|
90
|
+
{ step: 1, type: 'thought' },
|
|
91
|
+
{ step: 2, type: 'coordination' },
|
|
92
|
+
{ step: 3, type: 'coordination' },
|
|
93
|
+
{ step: 4, type: 'coordination' },
|
|
94
|
+
{ step: 5, type: 'coordination' },
|
|
95
|
+
{ step: 6, type: 'action' },
|
|
96
|
+
{ step: 7, type: 'action' },
|
|
97
|
+
{ step: 8, type: 'action' },
|
|
98
|
+
{ step: 9, type: 'action' },
|
|
99
|
+
{ step: 10, type: 'action' },
|
|
100
|
+
];
|
|
101
|
+
const result = scorer.score(makeTrajectory({ steps }));
|
|
102
|
+
assert.equal(result.coordinationBonus, 2 * 5); // 2 capped coordination x 5
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
it('derives error recovery from actual error and resolution steps', () => {
|
|
106
|
+
const result = scorer.score(makeTrajectory({
|
|
107
|
+
steps: [
|
|
108
|
+
{ step: 1, type: 'error', token_count: 5 },
|
|
109
|
+
{ step: 2, type: 'resolution', token_count: 10 },
|
|
110
|
+
],
|
|
111
|
+
}));
|
|
112
|
+
assert.equal(result.errorRecoveryBonus, 3);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it('error recovery: can not recover more than encountered', () => {
|
|
116
|
+
const result = scorer.score(makeTrajectory({
|
|
117
|
+
steps: [
|
|
118
|
+
{ step: 1, type: 'error' },
|
|
119
|
+
{ step: 2, type: 'resolution' },
|
|
120
|
+
{ step: 3, type: 'resolution' },
|
|
121
|
+
{ step: 4, type: 'resolution' },
|
|
122
|
+
],
|
|
123
|
+
}));
|
|
124
|
+
// only 1 error, 3 resolutions → errorsRecovered = min(1,3) = 1
|
|
125
|
+
assert.equal(result.errorRecoveryBonus, 1 * 3);
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it('no error recovery bonus when no resolution steps', () => {
|
|
129
|
+
const result = scorer.score(makeTrajectory({
|
|
130
|
+
steps: [{ step: 1, type: 'error', token_count: 5 }],
|
|
131
|
+
}));
|
|
132
|
+
assert.equal(result.errorRecoveryBonus, 0);
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
it('applies complexity bonus for heavy tasks', () => {
|
|
136
|
+
const result = scorer.score(makeTrajectory({
|
|
137
|
+
metadata: { task_complexity: 'heavy' },
|
|
138
|
+
}));
|
|
139
|
+
assert.equal(result.basePoints, 3);
|
|
140
|
+
assert.equal(result.complexityBonus, 3);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it('no complexity bonus for medium tasks', () => {
|
|
144
|
+
const result = scorer.score(makeTrajectory({
|
|
145
|
+
metadata: { task_complexity: 'medium' },
|
|
146
|
+
}));
|
|
147
|
+
assert.equal(result.complexityBonus, 0);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
it('client-provided session_quality is ignored (quality is server-derived)', () => {
|
|
151
|
+
// Without resolution steps, quality bonus should be 0 regardless of metadata
|
|
152
|
+
const result = scorer.score(makeTrajectory({
|
|
153
|
+
metadata: { session_quality: 100 },
|
|
154
|
+
}));
|
|
155
|
+
assert.equal(result.qualityBonus, 0);
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
it('quality bonus applies when trajectory has resolution and reasonable length', () => {
|
|
159
|
+
const steps = Array.from({ length: 10 }, (_, i) => ({ step: i, type: 'thought' }));
|
|
160
|
+
steps.push({ step: 10, type: 'resolution' });
|
|
161
|
+
const result = scorer.score(makeTrajectory({ steps }));
|
|
162
|
+
assert.ok(result.qualityBonus > 0);
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
it('quality bonus is 0 when trajectory has no resolution', () => {
|
|
166
|
+
const steps = Array.from({ length: 10 }, (_, i) => ({ step: i, type: 'thought' }));
|
|
167
|
+
const result = scorer.score(makeTrajectory({ steps }));
|
|
168
|
+
assert.equal(result.qualityBonus, 0);
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
it('quality bonus is 0 when trajectory is too short (< 5 steps)', () => {
|
|
172
|
+
const result = scorer.score(makeTrajectory({
|
|
173
|
+
steps: [
|
|
174
|
+
{ step: 1, type: 'thought' },
|
|
175
|
+
{ step: 2, type: 'resolution' },
|
|
176
|
+
],
|
|
177
|
+
}));
|
|
178
|
+
assert.equal(result.qualityBonus, 0);
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
it('stacks all multipliers correctly (server-derived)', () => {
|
|
182
|
+
const steps = [
|
|
183
|
+
{ step: 1, type: 'thought' },
|
|
184
|
+
{ step: 2, type: 'correction' },
|
|
185
|
+
{ step: 3, type: 'coordination' },
|
|
186
|
+
{ step: 4, type: 'error' },
|
|
187
|
+
{ step: 5, type: 'resolution' },
|
|
188
|
+
{ step: 6, type: 'action' },
|
|
189
|
+
{ step: 7, type: 'action' },
|
|
190
|
+
{ step: 8, type: 'action' },
|
|
191
|
+
{ step: 9, type: 'action' },
|
|
192
|
+
{ step: 10, type: 'action' },
|
|
193
|
+
];
|
|
194
|
+
const result = scorer.score(makeTrajectory({
|
|
195
|
+
steps,
|
|
196
|
+
metadata: { model_engine: 'claude-opus-4-6', task_complexity: 'heavy' },
|
|
197
|
+
}));
|
|
198
|
+
|
|
199
|
+
assert.equal(result.basePoints, 10);
|
|
200
|
+
assert.equal(result.modelMultiplier, 5);
|
|
201
|
+
// 1 correction out of 10 steps, max 3 → 1 * 10 = 10
|
|
202
|
+
assert.equal(result.correctionBonus, 10);
|
|
203
|
+
// 1 coordination out of 10 steps, max 2 → 1 * 5 = 5
|
|
204
|
+
assert.equal(result.coordinationBonus, 5);
|
|
205
|
+
// 1 error, 1 resolution → 1 recovered * 3 = 3
|
|
206
|
+
assert.equal(result.errorRecoveryBonus, 3);
|
|
207
|
+
// heavy task: basePoints * 1 = 10
|
|
208
|
+
assert.equal(result.complexityBonus, 10);
|
|
209
|
+
|
|
210
|
+
const subtotal = (10 * 5) + 10 + 5 + 3 + 10; // 78
|
|
211
|
+
// has resolution + length >= 5 → quality = floor(78 * 0.1) = 7
|
|
212
|
+
assert.equal(result.qualityBonus, Math.floor(subtotal * 0.1));
|
|
213
|
+
assert.equal(result.totalPoints, subtotal + result.qualityBonus);
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
it('ignores outcome.user_interventions — score derives from step count only', () => {
|
|
217
|
+
const result = scorer.score({
|
|
218
|
+
trajectory_log: [
|
|
219
|
+
{ step: 1, type: 'thought', token_count: 10 },
|
|
220
|
+
],
|
|
221
|
+
metadata: { model_engine: 'claude-opus-4-6' },
|
|
222
|
+
outcome: { user_interventions: 1_000_000 },
|
|
223
|
+
});
|
|
224
|
+
assert.equal(result.basePoints, 1);
|
|
225
|
+
assert.equal(result.modelMultiplier, 5);
|
|
226
|
+
assert.equal(result.totalPoints, 5);
|
|
227
|
+
assert.ok(result.totalPoints < 100, 'score should be small, NOT derived from user_interventions');
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
it('ignores outcome entirely for multiplier calculations', () => {
|
|
231
|
+
const withOutcome = scorer.score({
|
|
232
|
+
trajectory_log: [{ step: 1, type: 'thought' }],
|
|
233
|
+
metadata: {},
|
|
234
|
+
outcome: { errors_encountered: 999, errors_recovered: 999, coordination_events: 999 },
|
|
235
|
+
});
|
|
236
|
+
const withoutOutcome = scorer.score({
|
|
237
|
+
trajectory_log: [{ step: 1, type: 'thought' }],
|
|
238
|
+
metadata: {},
|
|
239
|
+
});
|
|
240
|
+
assert.equal(withOutcome.totalPoints, withoutOutcome.totalPoints);
|
|
241
|
+
});
|
|
242
|
+
});
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
// FSL-1.1-Apache-2.0 — see LICENSE
|
|
2
|
+
|
|
3
|
+
import { describe, it, beforeEach, afterEach } from 'node:test';
|
|
4
|
+
import assert from 'node:assert/strict';
|
|
5
|
+
import { mkdtempSync, rmSync } from 'node:fs';
|
|
6
|
+
import { join } from 'node:path';
|
|
7
|
+
import { tmpdir } from 'node:os';
|
|
8
|
+
import { generateECDHKeypair } from '../../shared/crypto.js';
|
|
9
|
+
import { SessionRegistry } from '../../server/session-registry.js';
|
|
10
|
+
|
|
11
|
+
describe('SessionRegistry', () => {
|
|
12
|
+
let registry;
|
|
13
|
+
let tmpDir;
|
|
14
|
+
|
|
15
|
+
beforeEach(() => {
|
|
16
|
+
tmpDir = mkdtempSync(join(tmpdir(), 'sess-test-'));
|
|
17
|
+
registry = new SessionRegistry(join(tmpDir, 'sessions.db'));
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
afterEach(() => {
|
|
21
|
+
registry.close();
|
|
22
|
+
rmSync(tmpDir, { recursive: true, force: true });
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it('opens a session and returns server public key', () => {
|
|
26
|
+
const clientKeypair = generateECDHKeypair();
|
|
27
|
+
const result = registry.openSession(
|
|
28
|
+
'sess_001', clientKeypair.publicKey, 'claude-code', 'claude-opus-4-6',
|
|
29
|
+
'fp_abc123', 'hash_xyz', '0.27.77'
|
|
30
|
+
);
|
|
31
|
+
assert.ok(result.serverPublicKey);
|
|
32
|
+
assert.equal(typeof result.serverPublicKey, 'string');
|
|
33
|
+
assert.ok(result.serverPublicKey.length > 10);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it('gets a session with all fields stored', () => {
|
|
37
|
+
const clientKeypair = generateECDHKeypair();
|
|
38
|
+
registry.openSession(
|
|
39
|
+
'sess_002', clientKeypair.publicKey, 'codex', 'o3',
|
|
40
|
+
'fp_def456', 'hash_abc', '0.27.77'
|
|
41
|
+
);
|
|
42
|
+
|
|
43
|
+
const session = registry.getSession('sess_002');
|
|
44
|
+
assert.ok(session);
|
|
45
|
+
assert.equal(session.session_id, 'sess_002');
|
|
46
|
+
assert.equal(session.provider, 'codex');
|
|
47
|
+
assert.equal(session.model, 'o3');
|
|
48
|
+
assert.equal(session.machine_fingerprint, 'fp_def456');
|
|
49
|
+
assert.equal(session.app_version_hash, 'hash_abc');
|
|
50
|
+
assert.equal(session.groove_version, '0.27.77');
|
|
51
|
+
assert.equal(session.status, 'active');
|
|
52
|
+
assert.equal(session.expected_sequence, 0);
|
|
53
|
+
assert.ok(session.server_public_key);
|
|
54
|
+
assert.ok(session.server_private_key);
|
|
55
|
+
assert.ok(session.shared_secret);
|
|
56
|
+
assert.ok(session.created_at);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it('returns null for unknown session', () => {
|
|
60
|
+
assert.equal(registry.getSession('nonexistent'), null);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it('increments sequence atomically and monotonically', () => {
|
|
64
|
+
const clientKeypair = generateECDHKeypair();
|
|
65
|
+
registry.openSession(
|
|
66
|
+
'sess_003', clientKeypair.publicKey, 'claude-code', 'claude-opus-4-6',
|
|
67
|
+
'fp_seq', 'hash_seq', '0.27.77'
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
const seq1 = registry.incrementSequence('sess_003');
|
|
71
|
+
assert.equal(seq1, 1);
|
|
72
|
+
|
|
73
|
+
const seq2 = registry.incrementSequence('sess_003');
|
|
74
|
+
assert.equal(seq2, 2);
|
|
75
|
+
|
|
76
|
+
const seq3 = registry.incrementSequence('sess_003');
|
|
77
|
+
assert.equal(seq3, 3);
|
|
78
|
+
|
|
79
|
+
const session = registry.getSession('sess_003');
|
|
80
|
+
assert.equal(session.expected_sequence, 3);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('closes a session with status and timestamp', () => {
|
|
84
|
+
const clientKeypair = generateECDHKeypair();
|
|
85
|
+
registry.openSession(
|
|
86
|
+
'sess_004', clientKeypair.publicKey, 'gemini', 'gemini-2.5-pro',
|
|
87
|
+
'fp_close', 'hash_close', '0.27.77'
|
|
88
|
+
);
|
|
89
|
+
|
|
90
|
+
registry.closeSession('sess_004');
|
|
91
|
+
const session = registry.getSession('sess_004');
|
|
92
|
+
assert.equal(session.status, 'closed');
|
|
93
|
+
assert.ok(session.closed_at);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it('rate limits at 20 sessions per fingerprint per hour', () => {
|
|
97
|
+
const clientKeypair = generateECDHKeypair();
|
|
98
|
+
const fp = 'fp_ratelimit';
|
|
99
|
+
|
|
100
|
+
for (let i = 0; i < 20; i++) {
|
|
101
|
+
const result = registry.openSession(
|
|
102
|
+
`sess_rl_${i}`, clientKeypair.publicKey, 'claude-code', 'claude-opus-4-6',
|
|
103
|
+
fp, 'hash_rl', '0.27.77'
|
|
104
|
+
);
|
|
105
|
+
assert.ok(result.serverPublicKey, `session ${i} should succeed`);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const result = registry.openSession(
|
|
109
|
+
'sess_rl_21', clientKeypair.publicKey, 'claude-code', 'claude-opus-4-6',
|
|
110
|
+
fp, 'hash_rl', '0.27.77'
|
|
111
|
+
);
|
|
112
|
+
assert.equal(result.rateLimited, true);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it('returns active sessions', () => {
|
|
116
|
+
const clientKeypair = generateECDHKeypair();
|
|
117
|
+
registry.openSession('sess_a1', clientKeypair.publicKey, 'claude-code', 'claude-opus-4-6', 'fp1', 'h1', '0.27.77');
|
|
118
|
+
registry.openSession('sess_a2', clientKeypair.publicKey, 'codex', 'o3', 'fp2', 'h2', '0.27.77');
|
|
119
|
+
registry.closeSession('sess_a1');
|
|
120
|
+
|
|
121
|
+
const active = registry.getActiveSessions();
|
|
122
|
+
assert.equal(active.length, 1);
|
|
123
|
+
assert.equal(active[0].session_id, 'sess_a2');
|
|
124
|
+
});
|
|
125
|
+
});
|