incremnt 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/remote.js CHANGED
@@ -40,7 +40,8 @@ const remoteCommandHandlers = {
40
40
  'training-load': executeRemoteRead,
41
41
  'ask-history': executeRemoteRead,
42
42
  'ask-show': executeRemoteRead,
43
- 'program-share-fetch': executeRemoteRead
43
+ 'program-share-fetch': executeRemoteRead,
44
+ 'increment-score-history': executeRemoteRead
44
45
  };
45
46
 
46
47
  async function executeRemoteRead(options, sessionState, normalizedCommand) {
@@ -161,6 +162,13 @@ function endpointForCommand(baseUrl, normalizedCommand, options) {
161
162
  return resolveServiceUrl(baseUrl, `/cli/ask/history/${options.id}`);
162
163
  case 'program-share-fetch':
163
164
  return resolveServiceUrl(baseUrl, `/program-share/${options.token}`);
165
+ case 'increment-score-history': {
166
+ const url = resolveServiceUrl(baseUrl, '/mobile/score-snapshots');
167
+ if (options.from) url.searchParams.set('from', options.from);
168
+ if (options.to) url.searchParams.set('to', options.to);
169
+ if (options.limit) url.searchParams.set('limit', options.limit);
170
+ return url;
171
+ }
164
172
  default:
165
173
  return resolveServiceUrl(baseUrl, '/');
166
174
  }
@@ -346,16 +354,54 @@ const remoteWriteCommandHandlers = {
346
354
  return response.json();
347
355
  },
348
356
 
357
+ 'increment-score-upload': async (options, sessionState) => {
358
+ const baseUrl = sessionState.session?.transport?.baseUrl;
359
+ if (!baseUrl) throw notImplementedError();
360
+ if (!options.file) {
361
+ const error = new Error('--file is required for increment-score upload.');
362
+ error.code = 'MISSING_OPTION';
363
+ throw error;
364
+ }
365
+
366
+ const raw = await fs.readFile(options.file, 'utf8');
367
+ const body = JSON.parse(raw);
368
+ if (!body || !Array.isArray(body.snapshots)) {
369
+ const error = new Error('Invalid file: expected an object with a snapshots array.');
370
+ error.code = 'INVALID_PAYLOAD';
371
+ throw error;
372
+ }
373
+
374
+ const endpoint = resolveServiceUrl(baseUrl, '/mobile/score-snapshots');
375
+ const response = await fetch(endpoint, {
376
+ method: 'POST',
377
+ headers: {
378
+ 'Content-Type': 'application/json',
379
+ Authorization: `Bearer ${sessionState.session?.auth?.accessToken ?? ''}`
380
+ },
381
+ body: JSON.stringify(body)
382
+ });
383
+
384
+ if (response.status === 401 || response.status === 403) throw authenticationFailedError();
385
+ if (!response.ok) {
386
+ const payload = await response.json().catch(() => null);
387
+ const error = new Error(payload?.error ?? `Unexpected error (HTTP ${response.status}).`);
388
+ error.code = 'REMOTE_HTTP_ERROR';
389
+ throw error;
390
+ }
391
+
392
+ return response.json();
393
+ },
394
+
349
395
  'program-share-revoke': async (options, sessionState) => {
350
396
  const baseUrl = sessionState.session?.transport?.baseUrl;
351
397
  if (!baseUrl) throw notImplementedError();
352
- if (!options.token) {
353
- const error = new Error('--token is required for programs share revoke.');
398
+ if (!options['share-id']) {
399
+ const error = new Error('--share-id is required for programs share revoke.');
354
400
  error.code = 'MISSING_OPTION';
355
401
  throw error;
356
402
  }
357
403
 
358
- const endpoint = resolveServiceUrl(baseUrl, `/cli/program-share/${options.token}/revoke`);
404
+ const endpoint = resolveServiceUrl(baseUrl, `/cli/program-share/${options['share-id']}/revoke`);
359
405
  const response = await fetch(endpoint, {
360
406
  method: 'POST',
361
407
  headers: {
@@ -365,7 +411,7 @@ const remoteWriteCommandHandlers = {
365
411
 
366
412
  if (response.status === 401 || response.status === 403) throw authenticationFailedError();
367
413
  if (response.status === 404) {
368
- const error = new Error(`Program share not found: ${options.token}`);
414
+ const error = new Error(`Program share not found: ${options['share-id']}`);
369
415
  error.code = 'REMOTE_NOT_FOUND';
370
416
  throw error;
371
417
  }
package/src/state.js CHANGED
@@ -4,12 +4,19 @@ import path from 'node:path';
4
4
 
5
5
  export const sessionSchemaVersion = 1;
6
6
 
7
+ // Prefer HOME env over os.homedir() so test fixtures and explicit overrides
8
+ // work consistently across platforms. On Linux, os.homedir() reads from
9
+ // /etc/passwd via getpwuid and ignores HOME, breaking tests that override HOME.
10
+ export function userHomeDir() {
11
+ return process.env.HOME || os.homedir();
12
+ }
13
+
7
14
  function fallbackConfigRoot() {
8
15
  if (process.platform === 'darwin') {
9
- return path.join(os.homedir(), 'Library', 'Application Support');
16
+ return path.join(userHomeDir(), 'Library', 'Application Support');
10
17
  }
11
18
 
12
- return path.join(os.homedir(), '.config');
19
+ return path.join(userHomeDir(), '.config');
13
20
  }
14
21
 
15
22
  export function resolveConfigDir() {
@@ -1,3 +1,10 @@
1
+ function withPassRate(entry) {
2
+ return {
3
+ ...entry,
4
+ passRate: entry.total > 0 ? entry.passed / entry.total : 0
5
+ };
6
+ }
7
+
1
8
  export function summarizeResults(results) {
2
9
  const counts = {
3
10
  total: results.length,
@@ -8,75 +15,85 @@ export function summarizeResults(results) {
8
15
  return counts;
9
16
  }
10
17
 
11
- export function summarizeBySurface(results) {
18
+ function summarizeByKey(results, keyFn) {
12
19
  const grouped = new Map();
13
20
  for (const result of results) {
14
- const entry = grouped.get(result.surface) ?? { total: 0, passed: 0, failed: 0 };
21
+ const key = keyFn(result);
22
+ const entry = grouped.get(key) ?? { total: 0, passed: 0, failed: 0 };
15
23
  entry.total += 1;
16
24
  if (result.passed) entry.passed += 1;
17
25
  else entry.failed += 1;
18
- grouped.set(result.surface, entry);
26
+ grouped.set(key, entry);
19
27
  }
20
28
 
21
29
  return Object.fromEntries(
22
- [...grouped.entries()].map(([surface, entry]) => [
23
- surface,
24
- {
25
- ...entry,
26
- passRate: entry.total > 0 ? entry.passed / entry.total : 0
27
- }
28
- ])
30
+ [...grouped.entries()]
31
+ .sort(([left], [right]) => left.localeCompare(right))
32
+ .map(([key, entry]) => [key, withPassRate(entry)])
29
33
  );
30
34
  }
31
35
 
32
- export function buildStoredSummaryReport(snapshotPath, results) {
36
+ export function summarizeBySurface(results) {
37
+ return summarizeByKey(results, (result) => result.surface);
38
+ }
39
+
40
+ function normalizeGeneratedDate(generatedAt) {
41
+ if (typeof generatedAt !== 'string' || generatedAt.trim().length === 0) {
42
+ return 'legacy';
43
+ }
44
+ return generatedAt.slice(0, 10);
45
+ }
46
+
47
+ function metadataValue(value) {
48
+ return typeof value === 'string' && value.trim().length > 0 ? value : 'legacy';
49
+ }
50
+
51
+ function summarizeMetadata(results) {
52
+ return {
53
+ byPromptVersion: summarizeByKey(results, (result) => metadataValue(result.metadata?.promptVersion)),
54
+ byModel: summarizeByKey(results, (result) => metadataValue(result.metadata?.model)),
55
+ byGeneratedDate: summarizeByKey(results, (result) => normalizeGeneratedDate(result.metadata?.generatedAt)),
56
+ byGitSha: summarizeByKey(results, (result) => metadataValue(result.metadata?.gitSha)),
57
+ byCohort: summarizeByKey(results, (result) => {
58
+ const promptVersion = metadataValue(result.metadata?.promptVersion);
59
+ const model = metadataValue(result.metadata?.model);
60
+ const generatedDate = normalizeGeneratedDate(result.metadata?.generatedAt);
61
+ return `${result.surface} / ${promptVersion} / ${model} / ${generatedDate}`;
62
+ })
63
+ };
64
+ }
65
+
66
+ function summarizeStoredResults(results) {
33
67
  return {
34
- snapshotPath,
35
68
  summary: summarizeResults(results),
36
69
  bySurface: summarizeBySurface(results),
70
+ metadata: summarizeMetadata(results)
71
+ };
72
+ }
73
+
74
+ export function buildStoredSummaryReport(snapshotPath, results) {
75
+ const summary = summarizeStoredResults(results);
76
+ return {
77
+ snapshotPath,
78
+ summary: summary.summary,
79
+ bySurface: summary.bySurface,
80
+ metadata: summary.metadata,
37
81
  results: results.map((result) => ({
38
82
  id: result.id,
39
83
  surface: result.surface,
40
84
  passed: result.passed,
41
85
  output: result.output,
86
+ metadata: result.metadata ?? null,
42
87
  failedChecks: result.checks.filter((check) => !check.passed)
43
88
  }))
44
89
  };
45
90
  }
46
91
 
47
92
  export function summarizeBatchReports(reports) {
48
- const bySurface = new Map();
49
- let total = 0;
50
- let passed = 0;
51
-
52
- for (const report of reports) {
53
- total += report.summary.total;
54
- passed += report.summary.passed;
55
-
56
- for (const [surface, entry] of Object.entries(report.bySurface ?? {})) {
57
- const current = bySurface.get(surface) ?? { total: 0, passed: 0, failed: 0 };
58
- current.total += entry.total;
59
- current.passed += entry.passed;
60
- current.failed += entry.failed;
61
- bySurface.set(surface, current);
62
- }
63
- }
64
-
93
+ const results = reports.flatMap((report) => report.results ?? []);
65
94
  return {
66
95
  snapshotCount: reports.length,
67
- total,
68
- passed,
69
- failed: total - passed,
70
- passRate: total > 0 ? passed / total : 0,
71
- bySurface: Object.fromEntries(
72
- [...bySurface.entries()].map(([surface, entry]) => [
73
- surface,
74
- {
75
- ...entry,
76
- passRate: entry.total > 0 ? entry.passed / entry.total : 0
77
- }
78
- ])
79
- )
96
+ ...summarizeStoredResults(results)
80
97
  };
81
98
  }
82
99
 
@@ -90,8 +107,8 @@ export function evaluateBatchThresholds(summary, {
90
107
  } = {}) {
91
108
  const failures = [];
92
109
 
93
- if (typeof minPassRate === 'number' && summary.passRate < minPassRate) {
94
- failures.push(`Overall pass rate ${percentage(summary.passRate)} is below required ${percentage(minPassRate)}.`);
110
+ if (typeof minPassRate === 'number' && summary.summary.passRate < minPassRate) {
111
+ failures.push(`Overall pass rate ${percentage(summary.summary.passRate)} is below required ${percentage(minPassRate)}.`);
95
112
  }
96
113
 
97
114
  for (const [surface, minimum] of Object.entries(minSurfacePassRates)) {
@@ -105,22 +122,38 @@ export function evaluateBatchThresholds(summary, {
105
122
  return failures;
106
123
  }
107
124
 
125
+ function formatSummaryLines(entries, { limit = null } = {}) {
126
+ const lines = Object.entries(entries).map(
127
+ ([label, entry]) => `- ${label}: ${entry.passed}/${entry.total} passed (${(entry.passRate * 100).toFixed(1)}%)`
128
+ );
129
+ return limit == null ? lines : lines.slice(0, limit);
130
+ }
131
+
108
132
  export function formatBatchSummaryMarkdown(summary, reports, failures = []) {
109
133
  const lines = [
110
134
  '# Stored Summary Eval Report',
111
135
  '',
112
136
  `- Snapshots: ${summary.snapshotCount}`,
113
- `- Total summaries: ${summary.total}`,
114
- `- Passed: ${summary.passed}`,
115
- `- Failed: ${summary.failed}`,
116
- `- Pass rate: ${(summary.passRate * 100).toFixed(1)}%`,
137
+ `- Total summaries: ${summary.summary.total}`,
138
+ `- Passed: ${summary.summary.passed}`,
139
+ `- Failed: ${summary.summary.failed}`,
140
+ `- Pass rate: ${(summary.summary.passRate * 100).toFixed(1)}%`,
117
141
  '',
118
- '## By Surface'
142
+ '## By Surface',
143
+ ...formatSummaryLines(summary.bySurface)
119
144
  ];
120
145
 
121
- for (const [surface, entry] of Object.entries(summary.bySurface)) {
122
- lines.push(`- ${surface}: ${entry.passed}/${entry.total} passed (${(entry.passRate * 100).toFixed(1)}%)`);
123
- }
146
+ lines.push('', '## By Prompt Version');
147
+ lines.push(...formatSummaryLines(summary.metadata.byPromptVersion));
148
+
149
+ lines.push('', '## By Model');
150
+ lines.push(...formatSummaryLines(summary.metadata.byModel));
151
+
152
+ lines.push('', '## By Generated Date');
153
+ lines.push(...formatSummaryLines(summary.metadata.byGeneratedDate));
154
+
155
+ lines.push('', '## Versioned Cohorts');
156
+ lines.push(...formatSummaryLines(summary.metadata.byCohort, { limit: 20 }));
124
157
 
125
158
  lines.push('', '## Snapshots');
126
159
  for (const report of reports) {