codeflash 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,331 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Codeflash Result Comparator
4
+ *
5
+ * This script compares test results between original and optimized code runs.
6
+ * It reads serialized behavior data from SQLite databases and compares them
7
+ * using the codeflash-comparator in JavaScript land.
8
+ *
9
+ * Usage:
10
+ * node codeflash-compare-results.js <original_db> <candidate_db>
11
+ * node codeflash-compare-results.js --json <json_input>
12
+ *
13
+ * Output (JSON):
14
+ * {
15
+ * "equivalent": true/false,
16
+ * "diffs": [
17
+ * {
18
+ * "invocation_id": "...",
19
+ * "scope": "return_value|stdout|did_pass",
20
+ * "original": "...",
21
+ * "candidate": "..."
22
+ * }
23
+ * ],
24
+ * "error": null | "error message"
25
+ * }
26
+ */
27
+
28
+ const fs = require('fs');
29
+ const path = require('path');
30
+
31
+ // Import our modules
32
+ const { deserialize } = require('./serializer');
33
+ const { comparator } = require('./comparator');
34
+
35
+ // Lazy-load better-sqlite3 to avoid process.exit during module require
36
+ // This prevents crashes when this module is imported by test files that don't use it
37
+ let Database = null;
38
+ let databaseLoadError = null;
39
+
40
+ function getDatabase() {
41
+ if (Database === null && databaseLoadError === null) {
42
+ try {
43
+ Database = require('better-sqlite3');
44
+ } catch (e) {
45
+ databaseLoadError = 'better-sqlite3 not installed. Run: npm install better-sqlite3';
46
+ }
47
+ }
48
+ return { Database, error: databaseLoadError };
49
+ }
50
+
51
+ /**
52
+ * Read test results from a SQLite database.
53
+ *
54
+ * @param {string} dbPath - Path to SQLite database
55
+ * @returns {Map<string, object>} Map of invocation_id -> result object
56
+ */
57
+ function readTestResults(dbPath) {
58
+ const results = new Map();
59
+
60
+ if (!fs.existsSync(dbPath)) {
61
+ throw new Error(`Database not found: ${dbPath}`);
62
+ }
63
+
64
+ // Get Database lazily - throws if not available
65
+ const { Database: DB, error } = getDatabase();
66
+ if (error) {
67
+ throw new Error(error);
68
+ }
69
+
70
+ const db = new DB(dbPath, { readonly: true });
71
+
72
+ try {
73
+ const stmt = db.prepare(`
74
+ SELECT
75
+ test_module_path,
76
+ test_class_name,
77
+ test_function_name,
78
+ function_getting_tested,
79
+ loop_index,
80
+ iteration_id,
81
+ runtime,
82
+ return_value,
83
+ verification_type
84
+ FROM test_results
85
+ WHERE loop_index = 1
86
+ `);
87
+
88
+ for (const row of stmt.iterate()) {
89
+ // Build unique invocation ID (matches Python's format)
90
+ const invocationId = `${row.loop_index}:${row.test_module_path}:${row.test_class_name || ''}:${row.test_function_name}:${row.function_getting_tested}:${row.iteration_id}`;
91
+
92
+ // Deserialize the return value
93
+ let returnValue = null;
94
+ if (row.return_value) {
95
+ try {
96
+ returnValue = deserialize(row.return_value);
97
+ } catch (e) {
98
+ console.error(`Failed to deserialize result for ${invocationId}: ${e.message}`);
99
+ }
100
+ }
101
+
102
+ results.set(invocationId, {
103
+ testModulePath: row.test_module_path,
104
+ testClassName: row.test_class_name,
105
+ testFunctionName: row.test_function_name,
106
+ functionGettingTested: row.function_getting_tested,
107
+ loopIndex: row.loop_index,
108
+ iterationId: row.iteration_id,
109
+ runtime: row.runtime,
110
+ returnValue,
111
+ verificationType: row.verification_type,
112
+ });
113
+ }
114
+ } finally {
115
+ db.close();
116
+ }
117
+
118
+ return results;
119
+ }
120
+
121
+ /**
122
+ * Compare two sets of test results.
123
+ *
124
+ * @param {Map<string, object>} originalResults - Results from original code
125
+ * @param {Map<string, object>} candidateResults - Results from optimized code
126
+ * @returns {object} Comparison result
127
+ */
128
+ function compareResults(originalResults, candidateResults) {
129
+ const diffs = [];
130
+ let allEquivalent = true;
131
+
132
+ // Get all unique invocation IDs
133
+ const allIds = new Set([...originalResults.keys(), ...candidateResults.keys()]);
134
+
135
+ for (const invocationId of allIds) {
136
+ const original = originalResults.get(invocationId);
137
+ const candidate = candidateResults.get(invocationId);
138
+
139
+ // If candidate has extra results not in original, that's OK
140
+ if (candidate && !original) {
141
+ continue;
142
+ }
143
+
144
+ // If original has results not in candidate, that's a diff
145
+ if (original && !candidate) {
146
+ allEquivalent = false;
147
+ diffs.push({
148
+ invocation_id: invocationId,
149
+ scope: 'missing',
150
+ original: summarizeValue(original.returnValue),
151
+ candidate: null,
152
+ test_info: {
153
+ test_module_path: original.testModulePath,
154
+ test_function_name: original.testFunctionName,
155
+ function_getting_tested: original.functionGettingTested,
156
+ }
157
+ });
158
+ continue;
159
+ }
160
+
161
+ // Compare return values using the JavaScript comparator
162
+ // The return value format is [args, kwargs, returnValue] (behavior tuple)
163
+ const originalValue = original.returnValue;
164
+ const candidateValue = candidate.returnValue;
165
+
166
+ const isEqual = comparator(originalValue, candidateValue);
167
+
168
+ if (!isEqual) {
169
+ allEquivalent = false;
170
+ diffs.push({
171
+ invocation_id: invocationId,
172
+ scope: 'return_value',
173
+ original: summarizeValue(originalValue),
174
+ candidate: summarizeValue(candidateValue),
175
+ test_info: {
176
+ test_module_path: original.testModulePath,
177
+ test_function_name: original.testFunctionName,
178
+ function_getting_tested: original.functionGettingTested,
179
+ }
180
+ });
181
+ }
182
+ }
183
+
184
+ return {
185
+ equivalent: allEquivalent,
186
+ diffs,
187
+ total_invocations: allIds.size,
188
+ original_count: originalResults.size,
189
+ candidate_count: candidateResults.size,
190
+ };
191
+ }
192
+
193
+ /**
194
+ * Create a summary of a value for diff reporting.
195
+ * Truncates long values to avoid huge output.
196
+ *
197
+ * @param {any} value - Value to summarize
198
+ * @returns {string} String representation
199
+ */
200
+ function summarizeValue(value, maxLength = 200) {
201
+ try {
202
+ let str;
203
+ if (value === undefined) {
204
+ str = 'undefined';
205
+ } else if (value === null) {
206
+ str = 'null';
207
+ } else if (typeof value === 'function') {
208
+ str = `[Function: ${value.name || 'anonymous'}]`;
209
+ } else if (value instanceof Map) {
210
+ str = `Map(${value.size}) { ${[...value.entries()].slice(0, 3).map(([k, v]) => `${summarizeValue(k, 50)} => ${summarizeValue(v, 50)}`).join(', ')}${value.size > 3 ? ', ...' : ''} }`;
211
+ } else if (value instanceof Set) {
212
+ str = `Set(${value.size}) { ${[...value].slice(0, 3).map(v => summarizeValue(v, 50)).join(', ')}${value.size > 3 ? ', ...' : ''} }`;
213
+ } else if (value instanceof Date) {
214
+ str = value.toISOString();
215
+ } else if (Array.isArray(value)) {
216
+ if (value.length <= 5) {
217
+ str = JSON.stringify(value);
218
+ } else {
219
+ str = `[${value.slice(0, 3).map(v => summarizeValue(v, 50)).join(', ')}, ... (${value.length} items)]`;
220
+ }
221
+ } else if (typeof value === 'object') {
222
+ str = JSON.stringify(value);
223
+ } else {
224
+ str = String(value);
225
+ }
226
+
227
+ if (str.length > maxLength) {
228
+ return str.slice(0, maxLength - 3) + '...';
229
+ }
230
+ return str;
231
+ } catch (e) {
232
+ return `[Unable to stringify: ${e.message}]`;
233
+ }
234
+ }
235
+
236
+ /**
237
+ * Compare results from serialized buffers directly (for stdin input).
238
+ *
239
+ * @param {Buffer} originalBuffer - Serialized original result
240
+ * @param {Buffer} candidateBuffer - Serialized candidate result
241
+ * @returns {boolean} True if equivalent
242
+ */
243
+ function compareBuffers(originalBuffer, candidateBuffer) {
244
+ try {
245
+ const original = deserialize(originalBuffer);
246
+ const candidate = deserialize(candidateBuffer);
247
+ return comparator(original, candidate);
248
+ } catch (e) {
249
+ console.error(`Comparison error: ${e.message}`);
250
+ return false;
251
+ }
252
+ }
253
+
254
+ /**
255
+ * Main entry point.
256
+ */
257
+ function main() {
258
+ const args = process.argv.slice(2);
259
+
260
+ if (args.length === 0) {
261
+ console.error('Usage: node codeflash-compare-results.js <original_db> <candidate_db>');
262
+ console.error(' node codeflash-compare-results.js --stdin (reads JSON from stdin)');
263
+ process.exit(1);
264
+ }
265
+
266
+ // Handle stdin mode for programmatic use
267
+ if (args[0] === '--stdin') {
268
+ let input = '';
269
+ process.stdin.setEncoding('utf8');
270
+ process.stdin.on('data', chunk => input += chunk);
271
+ process.stdin.on('end', () => {
272
+ try {
273
+ const data = JSON.parse(input);
274
+ const originalBuffer = Buffer.from(data.original, 'base64');
275
+ const candidateBuffer = Buffer.from(data.candidate, 'base64');
276
+ const isEqual = compareBuffers(originalBuffer, candidateBuffer);
277
+ console.log(JSON.stringify({ equivalent: isEqual, error: null }));
278
+ } catch (e) {
279
+ console.log(JSON.stringify({ equivalent: false, error: e.message }));
280
+ }
281
+ });
282
+ return;
283
+ }
284
+
285
+ // Standard mode: compare two SQLite databases
286
+ if (args.length < 2) {
287
+ console.error('Usage: node codeflash-compare-results.js <original_db> <candidate_db>');
288
+ process.exit(1);
289
+ }
290
+
291
+ const [originalDb, candidateDb] = args;
292
+
293
+ try {
294
+ const originalResults = readTestResults(originalDb);
295
+ const candidateResults = readTestResults(candidateDb);
296
+
297
+ const comparison = compareResults(originalResults, candidateResults);
298
+
299
+ // Limit the number of diffs to avoid huge output
300
+ const MAX_DIFFS = 50;
301
+ if (comparison.diffs.length > MAX_DIFFS) {
302
+ const truncatedCount = comparison.diffs.length - MAX_DIFFS;
303
+ comparison.diffs = comparison.diffs.slice(0, MAX_DIFFS);
304
+ comparison.diffs_truncated = truncatedCount;
305
+ }
306
+
307
+ // Use compact JSON (no pretty-printing) to reduce output size
308
+ console.log(JSON.stringify(comparison));
309
+ process.exit(comparison.equivalent ? 0 : 1);
310
+ } catch (e) {
311
+ console.log(JSON.stringify({
312
+ equivalent: false,
313
+ diffs: [],
314
+ error: e.message
315
+ }));
316
+ process.exit(1);
317
+ }
318
+ }
319
+
320
+ // Export for programmatic use
321
+ module.exports = {
322
+ readTestResults,
323
+ compareResults,
324
+ compareBuffers,
325
+ summarizeValue,
326
+ };
327
+
328
+ // Run if called directly
329
+ if (require.main === module) {
330
+ main();
331
+ }
@@ -0,0 +1,146 @@
1
+ /**
2
+ * Codeflash TypeScript Declarations
3
+ */
4
+
5
+ /**
6
+ * Capture a function call for behavior verification.
7
+ * Records inputs, outputs, timing to SQLite database.
8
+ *
9
+ * @param funcName - Name of the function being tested
10
+ * @param lineId - Line number identifier in test file
11
+ * @param fn - The function to call
12
+ * @param args - Arguments to pass to the function
13
+ * @returns The function's return value
14
+ */
15
+ export function capture<T extends (...args: any[]) => any>(
16
+ funcName: string,
17
+ lineId: string,
18
+ fn: T,
19
+ ...args: Parameters<T>
20
+ ): ReturnType<T>;
21
+
22
+ /**
23
+ * Capture a function call for performance benchmarking.
24
+ * Only measures timing, prints to stdout.
25
+ *
26
+ * @param funcName - Name of the function being tested
27
+ * @param lineId - Line number identifier in test file
28
+ * @param fn - The function to call
29
+ * @param args - Arguments to pass to the function
30
+ * @returns The function's return value
31
+ */
32
+ export function capturePerf<T extends (...args: any[]) => any>(
33
+ funcName: string,
34
+ lineId: string,
35
+ fn: T,
36
+ ...args: Parameters<T>
37
+ ): ReturnType<T>;
38
+
39
+ /**
40
+ * Capture multiple invocations for benchmarking.
41
+ *
42
+ * @param funcName - Name of the function being tested
43
+ * @param lineId - Line number identifier
44
+ * @param fn - The function to call
45
+ * @param argsList - List of argument arrays to test
46
+ * @returns Array of return values
47
+ */
48
+ export function captureMultiple<T extends (...args: any[]) => any>(
49
+ funcName: string,
50
+ lineId: string,
51
+ fn: T,
52
+ argsList: Parameters<T>[]
53
+ ): ReturnType<T>[];
54
+
55
+ /**
56
+ * Write remaining results to file.
57
+ */
58
+ export function writeResults(): void;
59
+
60
+ /**
61
+ * Clear all recorded results.
62
+ */
63
+ export function clearResults(): void;
64
+
65
+ /**
66
+ * Get the current results buffer.
67
+ */
68
+ export function getResults(): any[];
69
+
70
+ /**
71
+ * Set the current test name.
72
+ */
73
+ export function setTestName(name: string): void;
74
+
75
+ /**
76
+ * Serialize a value for storage.
77
+ */
78
+ export function safeSerialize(value: any): Buffer;
79
+
80
+ /**
81
+ * Deserialize a buffer back to a value.
82
+ */
83
+ export function safeDeserialize(buffer: Buffer | Uint8Array): any;
84
+
85
+ /**
86
+ * Initialize the SQLite database.
87
+ */
88
+ export function initDatabase(): void;
89
+
90
+ /**
91
+ * Reset invocation counters.
92
+ */
93
+ export function resetInvocationCounters(): void;
94
+
95
+ /**
96
+ * Get invocation index for a testId.
97
+ */
98
+ export function getInvocationIndex(testId: string): number;
99
+
100
+ /**
101
+ * Sanitize a string for use in test IDs.
102
+ */
103
+ export function sanitizeTestId(str: string): string;
104
+
105
+ /**
106
+ * Get the serializer type being used.
107
+ */
108
+ export function getSerializerType(): 'v8' | 'msgpack';
109
+
110
+ /**
111
+ * Current loop index from environment.
112
+ */
113
+ export const LOOP_INDEX: number;
114
+
115
+ /**
116
+ * Output file path from environment.
117
+ */
118
+ export const OUTPUT_FILE: string;
119
+
120
+ /**
121
+ * Test iteration from environment.
122
+ */
123
+ export const TEST_ITERATION: string;
124
+
125
+ // Default export for CommonJS compatibility
126
+ declare const codeflash: {
127
+ capture: typeof capture;
128
+ capturePerf: typeof capturePerf;
129
+ captureMultiple: typeof captureMultiple;
130
+ writeResults: typeof writeResults;
131
+ clearResults: typeof clearResults;
132
+ getResults: typeof getResults;
133
+ setTestName: typeof setTestName;
134
+ safeSerialize: typeof safeSerialize;
135
+ safeDeserialize: typeof safeDeserialize;
136
+ initDatabase: typeof initDatabase;
137
+ resetInvocationCounters: typeof resetInvocationCounters;
138
+ getInvocationIndex: typeof getInvocationIndex;
139
+ sanitizeTestId: typeof sanitizeTestId;
140
+ getSerializerType: typeof getSerializerType;
141
+ LOOP_INDEX: typeof LOOP_INDEX;
142
+ OUTPUT_FILE: typeof OUTPUT_FILE;
143
+ TEST_ITERATION: typeof TEST_ITERATION;
144
+ };
145
+
146
+ export default codeflash;
@@ -0,0 +1,86 @@
1
+ /**
2
+ * codeflash
3
+ *
4
+ * Codeflash CLI runtime helpers for test instrumentation and behavior verification.
5
+ *
6
+ * Main exports:
7
+ * - capture: Capture function return values for behavior verification
8
+ * - capturePerf: Capture performance metrics (timing only)
9
+ * - serialize/deserialize: Value serialization for storage
10
+ * - comparator: Deep equality comparison
11
+ *
12
+ * Usage (CommonJS):
13
+ * const { capture, capturePerf } = require('codeflash');
14
+ *
15
+ * Usage (ES Modules):
16
+ * import { capture, capturePerf } from 'codeflash';
17
+ */
18
+
19
+ 'use strict';
20
+
21
+ // Main capture functions (instrumentation)
22
+ const capture = require('./capture');
23
+
24
+ // Serialization utilities
25
+ const serializer = require('./serializer');
26
+
27
+ // Comparison utilities
28
+ const comparator = require('./comparator');
29
+
30
+ // Result comparison (used by CLI)
31
+ const compareResults = require('./compare-results');
32
+
33
+ // Re-export all public APIs
34
+ module.exports = {
35
+ // === Main Instrumentation API ===
36
+ capture: capture.capture,
37
+ capturePerf: capture.capturePerf,
38
+ captureMultiple: capture.captureMultiple,
39
+
40
+ // === Test Lifecycle ===
41
+ writeResults: capture.writeResults,
42
+ clearResults: capture.clearResults,
43
+ getResults: capture.getResults,
44
+ setTestName: capture.setTestName,
45
+ initDatabase: capture.initDatabase,
46
+ resetInvocationCounters: capture.resetInvocationCounters,
47
+
48
+ // === Serialization ===
49
+ serialize: serializer.serialize,
50
+ deserialize: serializer.deserialize,
51
+ getSerializerType: serializer.getSerializerType,
52
+ safeSerialize: capture.safeSerialize,
53
+ safeDeserialize: capture.safeDeserialize,
54
+
55
+ // === Comparison ===
56
+ comparator: comparator.comparator,
57
+ createComparator: comparator.createComparator,
58
+ strictComparator: comparator.strictComparator,
59
+ looseComparator: comparator.looseComparator,
60
+ isClose: comparator.isClose,
61
+
62
+ // === Result Comparison (CLI helpers) ===
63
+ readTestResults: compareResults.readTestResults,
64
+ compareResults: compareResults.compareResults,
65
+ compareBuffers: compareResults.compareBuffers,
66
+
67
+ // === Utilities ===
68
+ getInvocationIndex: capture.getInvocationIndex,
69
+ sanitizeTestId: capture.sanitizeTestId,
70
+
71
+ // === Constants ===
72
+ LOOP_INDEX: capture.LOOP_INDEX,
73
+ OUTPUT_FILE: capture.OUTPUT_FILE,
74
+ TEST_ITERATION: capture.TEST_ITERATION,
75
+
76
+ // === Batch Looping Control (used by loop-runner) ===
77
+ incrementBatch: capture.incrementBatch,
78
+ getCurrentBatch: capture.getCurrentBatch,
79
+ checkSharedTimeLimit: capture.checkSharedTimeLimit,
80
+ PERF_BATCH_SIZE: capture.PERF_BATCH_SIZE,
81
+ PERF_LOOP_COUNT: capture.PERF_LOOP_COUNT,
82
+
83
+ // === Feature Detection ===
84
+ hasV8: serializer.hasV8,
85
+ hasMsgpack: serializer.hasMsgpack,
86
+ };