codeflash 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -0
- package/bin/codeflash-setup.js +13 -0
- package/bin/codeflash.js +131 -0
- package/package.json +71 -6
- package/runtime/capture.js +707 -0
- package/runtime/comparator.js +406 -0
- package/runtime/compare-results.js +329 -0
- package/runtime/index.js +79 -0
- package/runtime/serializer.js +851 -0
- package/scripts/postinstall.js +265 -0
- package/index.js +0 -7
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Codeflash Result Comparator
|
|
4
|
+
*
|
|
5
|
+
* This script compares test results between original and optimized code runs.
|
|
6
|
+
* It reads serialized behavior data from SQLite databases and compares them
|
|
7
|
+
* using the codeflash-comparator in JavaScript land.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node codeflash-compare-results.js <original_db> <candidate_db>
|
|
11
|
+
* node codeflash-compare-results.js --json <json_input>
|
|
12
|
+
*
|
|
13
|
+
* Output (JSON):
|
|
14
|
+
* {
|
|
15
|
+
* "equivalent": true/false,
|
|
16
|
+
* "diffs": [
|
|
17
|
+
* {
|
|
18
|
+
* "invocation_id": "...",
|
|
19
|
+
* "scope": "return_value|stdout|did_pass",
|
|
20
|
+
* "original": "...",
|
|
21
|
+
* "candidate": "..."
|
|
22
|
+
* }
|
|
23
|
+
* ],
|
|
24
|
+
* "error": null | "error message"
|
|
25
|
+
* }
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
const fs = require('fs');
|
|
29
|
+
const path = require('path');
|
|
30
|
+
|
|
31
|
+
// Import our modules
|
|
32
|
+
const { deserialize } = require('./serializer');
|
|
33
|
+
const { comparator } = require('./comparator');
|
|
34
|
+
|
|
35
|
+
// Lazy-load better-sqlite3 to avoid exit on require()
|
|
36
|
+
let Database = null;
|
|
37
|
+
let databaseLoadError = null;
|
|
38
|
+
|
|
39
|
+
function getDatabase() {
|
|
40
|
+
if (Database === null && databaseLoadError === null) {
|
|
41
|
+
try {
|
|
42
|
+
Database = require('better-sqlite3');
|
|
43
|
+
} catch (e) {
|
|
44
|
+
databaseLoadError = 'better-sqlite3 not installed. Run: npm install better-sqlite3';
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return { Database, error: databaseLoadError };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Read test results from a SQLite database.
|
|
52
|
+
*
|
|
53
|
+
* @param {string} dbPath - Path to SQLite database
|
|
54
|
+
* @returns {Map<string, object>} Map of invocation_id -> result object
|
|
55
|
+
*/
|
|
56
|
+
function readTestResults(dbPath) {
|
|
57
|
+
const results = new Map();
|
|
58
|
+
|
|
59
|
+
if (!fs.existsSync(dbPath)) {
|
|
60
|
+
throw new Error(`Database not found: ${dbPath}`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const { Database: DB, error } = getDatabase();
|
|
64
|
+
if (error) {
|
|
65
|
+
throw new Error(error);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const db = new DB(dbPath, { readonly: true });
|
|
69
|
+
|
|
70
|
+
try {
|
|
71
|
+
const stmt = db.prepare(`
|
|
72
|
+
SELECT
|
|
73
|
+
test_module_path,
|
|
74
|
+
test_class_name,
|
|
75
|
+
test_function_name,
|
|
76
|
+
function_getting_tested,
|
|
77
|
+
loop_index,
|
|
78
|
+
iteration_id,
|
|
79
|
+
runtime,
|
|
80
|
+
return_value,
|
|
81
|
+
verification_type
|
|
82
|
+
FROM test_results
|
|
83
|
+
WHERE loop_index = 1
|
|
84
|
+
`);
|
|
85
|
+
|
|
86
|
+
for (const row of stmt.iterate()) {
|
|
87
|
+
// Build unique invocation ID (matches Python's format)
|
|
88
|
+
const invocationId = `${row.loop_index}:${row.test_module_path}:${row.test_class_name || ''}:${row.test_function_name}:${row.function_getting_tested}:${row.iteration_id}`;
|
|
89
|
+
|
|
90
|
+
// Deserialize the return value
|
|
91
|
+
let returnValue = null;
|
|
92
|
+
if (row.return_value) {
|
|
93
|
+
try {
|
|
94
|
+
returnValue = deserialize(row.return_value);
|
|
95
|
+
} catch (e) {
|
|
96
|
+
console.error(`Failed to deserialize result for ${invocationId}: ${e.message}`);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
results.set(invocationId, {
|
|
101
|
+
testModulePath: row.test_module_path,
|
|
102
|
+
testClassName: row.test_class_name,
|
|
103
|
+
testFunctionName: row.test_function_name,
|
|
104
|
+
functionGettingTested: row.function_getting_tested,
|
|
105
|
+
loopIndex: row.loop_index,
|
|
106
|
+
iterationId: row.iteration_id,
|
|
107
|
+
runtime: row.runtime,
|
|
108
|
+
returnValue,
|
|
109
|
+
verificationType: row.verification_type,
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
} finally {
|
|
113
|
+
db.close();
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return results;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Compare two sets of test results.
|
|
121
|
+
*
|
|
122
|
+
* @param {Map<string, object>} originalResults - Results from original code
|
|
123
|
+
* @param {Map<string, object>} candidateResults - Results from optimized code
|
|
124
|
+
* @returns {object} Comparison result
|
|
125
|
+
*/
|
|
126
|
+
function compareResults(originalResults, candidateResults) {
|
|
127
|
+
const diffs = [];
|
|
128
|
+
let allEquivalent = true;
|
|
129
|
+
|
|
130
|
+
// Get all unique invocation IDs
|
|
131
|
+
const allIds = new Set([...originalResults.keys(), ...candidateResults.keys()]);
|
|
132
|
+
|
|
133
|
+
for (const invocationId of allIds) {
|
|
134
|
+
const original = originalResults.get(invocationId);
|
|
135
|
+
const candidate = candidateResults.get(invocationId);
|
|
136
|
+
|
|
137
|
+
// If candidate has extra results not in original, that's OK
|
|
138
|
+
if (candidate && !original) {
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// If original has results not in candidate, that's a diff
|
|
143
|
+
if (original && !candidate) {
|
|
144
|
+
allEquivalent = false;
|
|
145
|
+
diffs.push({
|
|
146
|
+
invocation_id: invocationId,
|
|
147
|
+
scope: 'missing',
|
|
148
|
+
original: summarizeValue(original.returnValue),
|
|
149
|
+
candidate: null,
|
|
150
|
+
test_info: {
|
|
151
|
+
test_module_path: original.testModulePath,
|
|
152
|
+
test_function_name: original.testFunctionName,
|
|
153
|
+
function_getting_tested: original.functionGettingTested,
|
|
154
|
+
}
|
|
155
|
+
});
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Compare return values using the JavaScript comparator
|
|
160
|
+
// The return value format is [args, kwargs, returnValue] (behavior tuple)
|
|
161
|
+
const originalValue = original.returnValue;
|
|
162
|
+
const candidateValue = candidate.returnValue;
|
|
163
|
+
|
|
164
|
+
const isEqual = comparator(originalValue, candidateValue);
|
|
165
|
+
|
|
166
|
+
if (!isEqual) {
|
|
167
|
+
allEquivalent = false;
|
|
168
|
+
diffs.push({
|
|
169
|
+
invocation_id: invocationId,
|
|
170
|
+
scope: 'return_value',
|
|
171
|
+
original: summarizeValue(originalValue),
|
|
172
|
+
candidate: summarizeValue(candidateValue),
|
|
173
|
+
test_info: {
|
|
174
|
+
test_module_path: original.testModulePath,
|
|
175
|
+
test_function_name: original.testFunctionName,
|
|
176
|
+
function_getting_tested: original.functionGettingTested,
|
|
177
|
+
}
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return {
|
|
183
|
+
equivalent: allEquivalent,
|
|
184
|
+
diffs,
|
|
185
|
+
total_invocations: allIds.size,
|
|
186
|
+
original_count: originalResults.size,
|
|
187
|
+
candidate_count: candidateResults.size,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Create a summary of a value for diff reporting.
|
|
193
|
+
* Truncates long values to avoid huge output.
|
|
194
|
+
*
|
|
195
|
+
* @param {any} value - Value to summarize
|
|
196
|
+
* @returns {string} String representation
|
|
197
|
+
*/
|
|
198
|
+
function summarizeValue(value, maxLength = 200) {
|
|
199
|
+
try {
|
|
200
|
+
let str;
|
|
201
|
+
if (value === undefined) {
|
|
202
|
+
str = 'undefined';
|
|
203
|
+
} else if (value === null) {
|
|
204
|
+
str = 'null';
|
|
205
|
+
} else if (typeof value === 'function') {
|
|
206
|
+
str = `[Function: ${value.name || 'anonymous'}]`;
|
|
207
|
+
} else if (value instanceof Map) {
|
|
208
|
+
str = `Map(${value.size}) { ${[...value.entries()].slice(0, 3).map(([k, v]) => `${summarizeValue(k, 50)} => ${summarizeValue(v, 50)}`).join(', ')}${value.size > 3 ? ', ...' : ''} }`;
|
|
209
|
+
} else if (value instanceof Set) {
|
|
210
|
+
str = `Set(${value.size}) { ${[...value].slice(0, 3).map(v => summarizeValue(v, 50)).join(', ')}${value.size > 3 ? ', ...' : ''} }`;
|
|
211
|
+
} else if (value instanceof Date) {
|
|
212
|
+
str = value.toISOString();
|
|
213
|
+
} else if (Array.isArray(value)) {
|
|
214
|
+
if (value.length <= 5) {
|
|
215
|
+
str = JSON.stringify(value);
|
|
216
|
+
} else {
|
|
217
|
+
str = `[${value.slice(0, 3).map(v => summarizeValue(v, 50)).join(', ')}, ... (${value.length} items)]`;
|
|
218
|
+
}
|
|
219
|
+
} else if (typeof value === 'object') {
|
|
220
|
+
str = JSON.stringify(value);
|
|
221
|
+
} else {
|
|
222
|
+
str = String(value);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (str.length > maxLength) {
|
|
226
|
+
return str.slice(0, maxLength - 3) + '...';
|
|
227
|
+
}
|
|
228
|
+
return str;
|
|
229
|
+
} catch (e) {
|
|
230
|
+
return `[Unable to stringify: ${e.message}]`;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Compare results from serialized buffers directly (for stdin input).
|
|
236
|
+
*
|
|
237
|
+
* @param {Buffer} originalBuffer - Serialized original result
|
|
238
|
+
* @param {Buffer} candidateBuffer - Serialized candidate result
|
|
239
|
+
* @returns {boolean} True if equivalent
|
|
240
|
+
*/
|
|
241
|
+
function compareBuffers(originalBuffer, candidateBuffer) {
|
|
242
|
+
try {
|
|
243
|
+
const original = deserialize(originalBuffer);
|
|
244
|
+
const candidate = deserialize(candidateBuffer);
|
|
245
|
+
return comparator(original, candidate);
|
|
246
|
+
} catch (e) {
|
|
247
|
+
console.error(`Comparison error: ${e.message}`);
|
|
248
|
+
return false;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Main entry point.
|
|
254
|
+
*/
|
|
255
|
+
function main() {
|
|
256
|
+
const args = process.argv.slice(2);
|
|
257
|
+
|
|
258
|
+
if (args.length === 0) {
|
|
259
|
+
console.error('Usage: node codeflash-compare-results.js <original_db> <candidate_db>');
|
|
260
|
+
console.error(' node codeflash-compare-results.js --stdin (reads JSON from stdin)');
|
|
261
|
+
process.exit(1);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Handle stdin mode for programmatic use
|
|
265
|
+
if (args[0] === '--stdin') {
|
|
266
|
+
let input = '';
|
|
267
|
+
process.stdin.setEncoding('utf8');
|
|
268
|
+
process.stdin.on('data', chunk => input += chunk);
|
|
269
|
+
process.stdin.on('end', () => {
|
|
270
|
+
try {
|
|
271
|
+
const data = JSON.parse(input);
|
|
272
|
+
const originalBuffer = Buffer.from(data.original, 'base64');
|
|
273
|
+
const candidateBuffer = Buffer.from(data.candidate, 'base64');
|
|
274
|
+
const isEqual = compareBuffers(originalBuffer, candidateBuffer);
|
|
275
|
+
console.log(JSON.stringify({ equivalent: isEqual, error: null }));
|
|
276
|
+
} catch (e) {
|
|
277
|
+
console.log(JSON.stringify({ equivalent: false, error: e.message }));
|
|
278
|
+
}
|
|
279
|
+
});
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Standard mode: compare two SQLite databases
|
|
284
|
+
if (args.length < 2) {
|
|
285
|
+
console.error('Usage: node codeflash-compare-results.js <original_db> <candidate_db>');
|
|
286
|
+
process.exit(1);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
const [originalDb, candidateDb] = args;
|
|
290
|
+
|
|
291
|
+
try {
|
|
292
|
+
const originalResults = readTestResults(originalDb);
|
|
293
|
+
const candidateResults = readTestResults(candidateDb);
|
|
294
|
+
|
|
295
|
+
const comparison = compareResults(originalResults, candidateResults);
|
|
296
|
+
|
|
297
|
+
// Limit the number of diffs to avoid huge output
|
|
298
|
+
const MAX_DIFFS = 50;
|
|
299
|
+
if (comparison.diffs.length > MAX_DIFFS) {
|
|
300
|
+
const truncatedCount = comparison.diffs.length - MAX_DIFFS;
|
|
301
|
+
comparison.diffs = comparison.diffs.slice(0, MAX_DIFFS);
|
|
302
|
+
comparison.diffs_truncated = truncatedCount;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Use compact JSON (no pretty-printing) to reduce output size
|
|
306
|
+
console.log(JSON.stringify(comparison));
|
|
307
|
+
process.exit(comparison.equivalent ? 0 : 1);
|
|
308
|
+
} catch (e) {
|
|
309
|
+
console.log(JSON.stringify({
|
|
310
|
+
equivalent: false,
|
|
311
|
+
diffs: [],
|
|
312
|
+
error: e.message
|
|
313
|
+
}));
|
|
314
|
+
process.exit(1);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Export for programmatic use
|
|
319
|
+
module.exports = {
|
|
320
|
+
readTestResults,
|
|
321
|
+
compareResults,
|
|
322
|
+
compareBuffers,
|
|
323
|
+
summarizeValue,
|
|
324
|
+
};
|
|
325
|
+
|
|
326
|
+
// Run if called directly
|
|
327
|
+
if (require.main === module) {
|
|
328
|
+
main();
|
|
329
|
+
}
|
package/runtime/index.js
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* codeflash
|
|
3
|
+
*
|
|
4
|
+
* Codeflash CLI runtime helpers for test instrumentation and behavior verification.
|
|
5
|
+
*
|
|
6
|
+
* Main exports:
|
|
7
|
+
* - capture: Capture function return values for behavior verification
|
|
8
|
+
* - capturePerf: Capture performance metrics (timing only)
|
|
9
|
+
* - serialize/deserialize: Value serialization for storage
|
|
10
|
+
* - comparator: Deep equality comparison
|
|
11
|
+
*
|
|
12
|
+
* Usage (CommonJS):
|
|
13
|
+
* const { capture, capturePerf } = require('codeflash');
|
|
14
|
+
*
|
|
15
|
+
* Usage (ES Modules):
|
|
16
|
+
* import { capture, capturePerf } from 'codeflash';
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
'use strict';
|
|
20
|
+
|
|
21
|
+
// Main capture functions (instrumentation)
|
|
22
|
+
const capture = require('./capture');
|
|
23
|
+
|
|
24
|
+
// Serialization utilities
|
|
25
|
+
const serializer = require('./serializer');
|
|
26
|
+
|
|
27
|
+
// Comparison utilities
|
|
28
|
+
const comparator = require('./comparator');
|
|
29
|
+
|
|
30
|
+
// Result comparison (used by CLI)
|
|
31
|
+
const compareResults = require('./compare-results');
|
|
32
|
+
|
|
33
|
+
// Re-export all public APIs
|
|
34
|
+
module.exports = {
|
|
35
|
+
// === Main Instrumentation API ===
|
|
36
|
+
capture: capture.capture,
|
|
37
|
+
capturePerf: capture.capturePerf,
|
|
38
|
+
captureMultiple: capture.captureMultiple,
|
|
39
|
+
|
|
40
|
+
// === Test Lifecycle ===
|
|
41
|
+
writeResults: capture.writeResults,
|
|
42
|
+
clearResults: capture.clearResults,
|
|
43
|
+
getResults: capture.getResults,
|
|
44
|
+
setTestName: capture.setTestName,
|
|
45
|
+
initDatabase: capture.initDatabase,
|
|
46
|
+
resetInvocationCounters: capture.resetInvocationCounters,
|
|
47
|
+
|
|
48
|
+
// === Serialization ===
|
|
49
|
+
serialize: serializer.serialize,
|
|
50
|
+
deserialize: serializer.deserialize,
|
|
51
|
+
getSerializerType: serializer.getSerializerType,
|
|
52
|
+
safeSerialize: capture.safeSerialize,
|
|
53
|
+
safeDeserialize: capture.safeDeserialize,
|
|
54
|
+
|
|
55
|
+
// === Comparison ===
|
|
56
|
+
comparator: comparator.comparator,
|
|
57
|
+
createComparator: comparator.createComparator,
|
|
58
|
+
strictComparator: comparator.strictComparator,
|
|
59
|
+
looseComparator: comparator.looseComparator,
|
|
60
|
+
isClose: comparator.isClose,
|
|
61
|
+
|
|
62
|
+
// === Result Comparison (CLI helpers) ===
|
|
63
|
+
readTestResults: compareResults.readTestResults,
|
|
64
|
+
compareResults: compareResults.compareResults,
|
|
65
|
+
compareBuffers: compareResults.compareBuffers,
|
|
66
|
+
|
|
67
|
+
// === Utilities ===
|
|
68
|
+
getInvocationIndex: capture.getInvocationIndex,
|
|
69
|
+
sanitizeTestId: capture.sanitizeTestId,
|
|
70
|
+
|
|
71
|
+
// === Constants ===
|
|
72
|
+
LOOP_INDEX: capture.LOOP_INDEX,
|
|
73
|
+
OUTPUT_FILE: capture.OUTPUT_FILE,
|
|
74
|
+
TEST_ITERATION: capture.TEST_ITERATION,
|
|
75
|
+
|
|
76
|
+
// === Feature Detection ===
|
|
77
|
+
hasV8: serializer.hasV8,
|
|
78
|
+
hasMsgpack: serializer.hasMsgpack,
|
|
79
|
+
};
|