@oss-autopilot/core 0.51.1 → 0.52.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,510 @@
1
+ /**
2
+ * State persistence layer for the OSS Contribution Agent.
3
+ * Handles file I/O, locking, backup/restore, and v1-to-v2 migration.
4
+ * No module-level mutable state — functions accept/return AgentState objects.
5
+ */
6
+ import * as fs from 'fs';
7
+ import * as path from 'path';
8
+ import { INITIAL_STATE } from './types.js';
9
+ import { getStatePath, getBackupDir, getDataDir } from './utils.js';
10
+ import { errorMessage } from './errors.js';
11
+ import { debug, warn } from './logger.js';
12
+ const MODULE = 'state';
13
+ // Current state version
14
+ const CURRENT_STATE_VERSION = 2;
15
+ // Lock file timeout: if a lock is older than this, it is considered stale
16
+ const LOCK_TIMEOUT_MS = 30_000; // 30 seconds
17
+ // Legacy path for migration
18
+ const LEGACY_STATE_FILE = path.join(process.cwd(), 'data', 'state.json');
19
+ const LEGACY_BACKUP_DIR = path.join(process.cwd(), 'data', 'backups');
20
+ /**
21
+ * Check whether an existing lock file is stale (expired or corrupt).
22
+ * Returns true if the lock should be considered stale and can be removed.
23
+ */
24
+ function isLockStale(lockPath) {
25
+ try {
26
+ const existing = JSON.parse(fs.readFileSync(lockPath, 'utf-8'));
27
+ return Date.now() - existing.timestamp > LOCK_TIMEOUT_MS;
28
+ }
29
+ catch (err) {
30
+ // Lock file is unreadable or contains invalid JSON — treat as stale
31
+ debug(MODULE, 'Lock file unreadable or invalid JSON, treating as stale', err);
32
+ return true;
33
+ }
34
+ }
35
+ /**
36
+ * Acquire an advisory file lock using exclusive-create (`wx` flag).
37
+ * If the lock file already exists but is stale (older than LOCK_TIMEOUT_MS or corrupt),
38
+ * it is removed and re-acquired.
39
+ * @throws Error if the lock is held by another active process.
40
+ */
41
+ export function acquireLock(lockPath) {
42
+ const lockData = JSON.stringify({ pid: process.pid, timestamp: Date.now() });
43
+ try {
44
+ fs.writeFileSync(lockPath, lockData, { flag: 'wx' }); // Fails if file exists
45
+ return;
46
+ }
47
+ catch (err) {
48
+ // Lock file exists (EEXIST from 'wx' flag) — check if it is stale
49
+ debug(MODULE, 'Lock file already exists, checking staleness', err);
50
+ }
51
+ if (!isLockStale(lockPath)) {
52
+ throw new Error('State file is locked by another process');
53
+ }
54
+ // Stale lock detected — remove it and try to re-acquire
55
+ try {
56
+ fs.unlinkSync(lockPath);
57
+ }
58
+ catch (err) {
59
+ // Another process may have removed the stale lock first — proceed to re-acquire regardless
60
+ debug(MODULE, 'Stale lock already removed by another process', err);
61
+ }
62
+ try {
63
+ fs.writeFileSync(lockPath, lockData, { flag: 'wx' });
64
+ }
65
+ catch (err) {
66
+ // Another process grabbed the lock between unlink and write
67
+ debug(MODULE, 'Lock re-acquire failed (race condition)', err);
68
+ throw new Error('State file is locked by another process', { cause: err });
69
+ }
70
+ }
71
+ /**
72
+ * Release an advisory file lock, but only if this process owns it.
73
+ * Silently ignores missing lock files or locks owned by other processes.
74
+ */
75
+ export function releaseLock(lockPath) {
76
+ try {
77
+ const data = JSON.parse(fs.readFileSync(lockPath, 'utf-8'));
78
+ if (data.pid === process.pid) {
79
+ fs.unlinkSync(lockPath);
80
+ }
81
+ }
82
+ catch (err) {
83
+ // Lock already removed or unreadable — nothing to do
84
+ debug(MODULE, 'Lock file already removed or unreadable during release', err);
85
+ }
86
+ }
87
+ /**
88
+ * Write data to `filePath` atomically by first writing to a temporary file
89
+ * in the same directory and then renaming. Rename is atomic on POSIX filesystems,
90
+ * preventing partial/corrupt state files if the process crashes mid-write.
91
+ */
92
+ export function atomicWriteFileSync(filePath, data, mode) {
93
+ const tmpPath = filePath + '.tmp';
94
+ fs.writeFileSync(tmpPath, data, { mode: mode ?? 0o600 });
95
+ fs.renameSync(tmpPath, filePath);
96
+ // Ensure permissions are correct (rename preserves the tmp file's mode,
97
+ // but on some systems the mode from writeFileSync is masked by umask)
98
+ if (mode !== undefined) {
99
+ fs.chmodSync(filePath, mode);
100
+ }
101
+ }
102
+ /**
103
+ * Migrate state from v1 (local PR tracking) to v2 (fresh GitHub fetching).
104
+ * Preserves repoScores and config; drops the legacy PR arrays.
105
+ */
106
+ function migrateV1ToV2(rawState) {
107
+ debug(MODULE, 'Migrating state from v1 to v2 (fresh GitHub fetching)...');
108
+ // Extract merged/closed PR arrays from v1 state to seed repo scores.
109
+ // Don't increment counts here as the score may already reflect these PRs.
110
+ const mergedPRs = rawState.mergedPRs || [];
111
+ const closedPRs = rawState.closedPRs || [];
112
+ // Ensure every repo referenced by historical PRs has a score record
113
+ const repoScores = { ...(rawState.repoScores || {}) };
114
+ for (const pr of [...mergedPRs, ...closedPRs]) {
115
+ if (!repoScores[pr.repo]) {
116
+ repoScores[pr.repo] = {
117
+ repo: pr.repo,
118
+ score: 5,
119
+ mergedPRCount: 0,
120
+ closedWithoutMergeCount: 0,
121
+ avgResponseDays: null,
122
+ lastEvaluatedAt: new Date().toISOString(),
123
+ signals: {
124
+ hasActiveMaintainers: true,
125
+ isResponsive: false,
126
+ hasHostileComments: false,
127
+ },
128
+ };
129
+ }
130
+ }
131
+ const migratedState = {
132
+ version: 2,
133
+ activeIssues: rawState.activeIssues || [],
134
+ repoScores,
135
+ config: rawState.config,
136
+ events: rawState.events || [],
137
+ lastRunAt: new Date().toISOString(),
138
+ };
139
+ debug(MODULE, `Migration complete. Preserved ${Object.keys(repoScores).length} repo scores.`);
140
+ return migratedState;
141
+ }
142
+ /**
143
+ * Validate that a loaded state has the required structure.
144
+ * Handles both v1 (with PR arrays) and v2 (without).
145
+ */
146
+ function isValidState(state) {
147
+ if (!state || typeof state !== 'object')
148
+ return false;
149
+ const s = state;
150
+ // Migrate older states that don't have repoScores
151
+ if (s.repoScores === undefined) {
152
+ s.repoScores = {};
153
+ }
154
+ // Migrate older states that don't have events
155
+ if (s.events === undefined) {
156
+ s.events = [];
157
+ }
158
+ // Migrate older states that don't have mergedPRs
159
+ if (s.mergedPRs === undefined) {
160
+ s.mergedPRs = [];
161
+ }
162
+ // Base requirements for all versions
163
+ const hasBaseFields = typeof s.version === 'number' &&
164
+ typeof s.repoScores === 'object' &&
165
+ s.repoScores !== null &&
166
+ Array.isArray(s.events) &&
167
+ typeof s.config === 'object' &&
168
+ s.config !== null;
169
+ if (!hasBaseFields)
170
+ return false;
171
+ // v1 requires base PR arrays to be present (they will be dropped during migration)
172
+ if (s.version === 1) {
173
+ return (Array.isArray(s.activePRs) &&
174
+ Array.isArray(s.dormantPRs) &&
175
+ Array.isArray(s.mergedPRs) &&
176
+ Array.isArray(s.closedPRs));
177
+ }
178
+ // v2+ doesn't require PR arrays
179
+ return true;
180
+ }
181
+ /**
182
+ * Create a fresh state (v2: fresh GitHub fetching).
183
+ */
184
+ export function createFreshState() {
185
+ return {
186
+ version: CURRENT_STATE_VERSION,
187
+ activeIssues: [],
188
+ repoScores: {},
189
+ config: {
190
+ ...INITIAL_STATE.config,
191
+ setupComplete: false,
192
+ languages: [...INITIAL_STATE.config.languages],
193
+ labels: [...INITIAL_STATE.config.labels],
194
+ excludeRepos: [],
195
+ trustedProjects: [],
196
+ shelvedPRUrls: [],
197
+ dismissedIssues: {},
198
+ },
199
+ events: [],
200
+ lastRunAt: new Date().toISOString(),
201
+ };
202
+ }
203
+ /**
204
+ * Migrate state from legacy ./data/ location to ~/.oss-autopilot/.
205
+ * Returns true if migration was performed.
206
+ */
207
+ function migrateFromLegacyLocation() {
208
+ const newStatePath = getStatePath();
209
+ // If new state already exists, no migration needed
210
+ if (fs.existsSync(newStatePath)) {
211
+ return false;
212
+ }
213
+ // Check for legacy state file
214
+ if (!fs.existsSync(LEGACY_STATE_FILE)) {
215
+ return false;
216
+ }
217
+ debug(MODULE, 'Migrating state from ./data/ to ~/.oss-autopilot/...');
218
+ try {
219
+ // Ensure the new data directory exists
220
+ getDataDir();
221
+ // Copy state file
222
+ fs.copyFileSync(LEGACY_STATE_FILE, newStatePath);
223
+ debug(MODULE, `Migrated state file to ${newStatePath}`);
224
+ // Copy backups if they exist
225
+ if (fs.existsSync(LEGACY_BACKUP_DIR)) {
226
+ const newBackupDir = getBackupDir();
227
+ const backupFiles = fs
228
+ .readdirSync(LEGACY_BACKUP_DIR)
229
+ .filter((f) => f.startsWith('state-') && f.endsWith('.json'));
230
+ for (const backupFile of backupFiles) {
231
+ const srcPath = path.join(LEGACY_BACKUP_DIR, backupFile);
232
+ const destPath = path.join(newBackupDir, backupFile);
233
+ fs.copyFileSync(srcPath, destPath);
234
+ }
235
+ debug(MODULE, `Migrated ${backupFiles.length} backup files`);
236
+ }
237
+ // Remove legacy files
238
+ fs.unlinkSync(LEGACY_STATE_FILE);
239
+ debug(MODULE, 'Removed legacy state file');
240
+ // Remove legacy backup files
241
+ if (fs.existsSync(LEGACY_BACKUP_DIR)) {
242
+ const backupFiles = fs.readdirSync(LEGACY_BACKUP_DIR);
243
+ for (const file of backupFiles) {
244
+ fs.unlinkSync(path.join(LEGACY_BACKUP_DIR, file));
245
+ }
246
+ fs.rmdirSync(LEGACY_BACKUP_DIR);
247
+ }
248
+ // Try to remove legacy data directory if empty
249
+ const legacyDataDir = path.dirname(LEGACY_STATE_FILE);
250
+ if (fs.existsSync(legacyDataDir)) {
251
+ const remaining = fs.readdirSync(legacyDataDir);
252
+ if (remaining.length === 0) {
253
+ fs.rmdirSync(legacyDataDir);
254
+ debug(MODULE, 'Removed empty legacy data directory');
255
+ }
256
+ }
257
+ debug(MODULE, 'Migration complete!');
258
+ return true;
259
+ }
260
+ catch (error) {
261
+ warn(MODULE, `Failed to migrate state: ${errorMessage(error)}`);
262
+ // Clean up partial migration to avoid inconsistent state
263
+ if (fs.existsSync(newStatePath) && fs.existsSync(LEGACY_STATE_FILE)) {
264
+ // If both files exist, the migration was partial - remove the new file
265
+ try {
266
+ fs.unlinkSync(newStatePath);
267
+ debug(MODULE, 'Cleaned up partial migration - removed incomplete new state file');
268
+ }
269
+ catch (cleanupErr) {
270
+ warn(MODULE, 'Could not clean up partial migration file');
271
+ debug(MODULE, 'Partial migration cleanup failed', cleanupErr);
272
+ }
273
+ }
274
+ warn(MODULE, 'To resolve this issue:');
275
+ warn(MODULE, ' 1. Ensure you have write permissions to ~/.oss-autopilot/');
276
+ warn(MODULE, ' 2. Check available disk space');
277
+ warn(MODULE, ' 3. Manually copy ./data/state.json to ~/.oss-autopilot/state.json');
278
+ warn(MODULE, ' 4. Or delete ./data/state.json to start fresh');
279
+ return false;
280
+ }
281
+ }
282
+ /**
283
+ * Attempt to restore state from the most recent valid backup.
284
+ * Returns the restored state if successful, or null if no valid backup is found.
285
+ */
286
+ function tryRestoreFromBackup() {
287
+ const backupDir = getBackupDir();
288
+ if (!fs.existsSync(backupDir)) {
289
+ return null;
290
+ }
291
+ // Get backup files sorted by name (most recent first, since names include timestamps)
292
+ const backupFiles = fs
293
+ .readdirSync(backupDir)
294
+ .filter((f) => f.startsWith('state-') && f.endsWith('.json'))
295
+ .sort()
296
+ .reverse();
297
+ for (const backupFile of backupFiles) {
298
+ const backupPath = path.join(backupDir, backupFile);
299
+ try {
300
+ const data = fs.readFileSync(backupPath, 'utf-8');
301
+ let state = JSON.parse(data);
302
+ if (isValidState(state)) {
303
+ debug(MODULE, `Successfully restored state from backup: ${backupFile}`);
304
+ // Migrate from v1 to v2 if needed
305
+ if (state.version === 1) {
306
+ state = migrateV1ToV2(state);
307
+ }
308
+ const repoCount = Object.keys(state.repoScores).length;
309
+ debug(MODULE, `Restored state v${state.version}: ${repoCount} repo scores`);
310
+ // Overwrite the corrupted main state file with the restored backup (atomic write)
311
+ const statePath = getStatePath();
312
+ atomicWriteFileSync(statePath, JSON.stringify(state, null, 2), 0o600);
313
+ debug(MODULE, 'Restored backup written to main state file');
314
+ return state;
315
+ }
316
+ }
317
+ catch (backupErr) {
318
+ // This backup is also corrupted, try the next one
319
+ warn(MODULE, `Backup ${backupFile} is corrupted, trying next...`);
320
+ debug(MODULE, `Backup ${backupFile} parse failed`, backupErr);
321
+ }
322
+ }
323
+ return null;
324
+ }
325
+ /**
326
+ * Load state from file, or create initial state if none exists.
327
+ * If the main state file is corrupted, attempts to restore from the most recent backup.
328
+ * Performs migration from legacy ./data/ location if needed.
329
+ * @returns Object with the loaded state and the file's mtime (for change detection).
330
+ */
331
+ export function loadState() {
332
+ // Try to migrate from legacy location first
333
+ migrateFromLegacyLocation();
334
+ const statePath = getStatePath();
335
+ try {
336
+ if (fs.existsSync(statePath)) {
337
+ const data = fs.readFileSync(statePath, 'utf-8');
338
+ let state = JSON.parse(data);
339
+ // Validate required fields exist
340
+ if (!isValidState(state)) {
341
+ warn(MODULE, 'Invalid state file structure, attempting to restore from backup...');
342
+ const restoredState = tryRestoreFromBackup();
343
+ if (restoredState) {
344
+ const mtimeMs = safeGetMtimeMs(statePath);
345
+ return { state: restoredState, mtimeMs };
346
+ }
347
+ warn(MODULE, 'No valid backup found, starting fresh');
348
+ return { state: createFreshState(), mtimeMs: 0 };
349
+ }
350
+ // Migrate from v1 to v2 if needed
351
+ if (state.version === 1) {
352
+ state = migrateV1ToV2(state);
353
+ // Save the migrated state immediately (atomic write)
354
+ atomicWriteFileSync(statePath, JSON.stringify(state, null, 2), 0o600);
355
+ debug(MODULE, 'Migrated state saved');
356
+ }
357
+ // Strip legacy fields from persisted state (snoozedPRs and PR dismiss
358
+ // entries were removed in the three-state PR model simplification)
359
+ try {
360
+ let needsCleanupSave = false;
361
+ const rawConfig = state.config;
362
+ if (rawConfig.snoozedPRs) {
363
+ delete rawConfig.snoozedPRs;
364
+ needsCleanupSave = true;
365
+ }
366
+ // Strip PR URLs from dismissedIssues (PR dismiss removed)
367
+ if (state.config.dismissedIssues) {
368
+ const PR_URL_RE = /\/pull\/\d+$/;
369
+ for (const url of Object.keys(state.config.dismissedIssues)) {
370
+ if (PR_URL_RE.test(url)) {
371
+ delete state.config.dismissedIssues[url];
372
+ needsCleanupSave = true;
373
+ }
374
+ }
375
+ }
376
+ if (needsCleanupSave) {
377
+ atomicWriteFileSync(statePath, JSON.stringify(state, null, 2), 0o600);
378
+ warn(MODULE, 'Cleaned up removed features (snoozedPRs, dismissed PR URLs) from persisted state');
379
+ }
380
+ }
381
+ catch (cleanupError) {
382
+ warn(MODULE, `Failed to clean up removed features from state: ${errorMessage(cleanupError)}`);
383
+ // Continue with loaded state — cleanup will be retried on next load
384
+ }
385
+ // Record file mtime so reloadIfChanged() can detect external writes
386
+ const mtimeMs = safeGetMtimeMs(statePath);
387
+ // Log appropriate message based on version
388
+ const repoCount = Object.keys(state.repoScores).length;
389
+ debug(MODULE, `Loaded state v${state.version}: ${repoCount} repo scores tracked`);
390
+ return { state, mtimeMs };
391
+ }
392
+ }
393
+ catch (error) {
394
+ warn(MODULE, 'Error loading state:', error);
395
+ warn(MODULE, 'Attempting to restore from backup...');
396
+ const restoredState = tryRestoreFromBackup();
397
+ if (restoredState) {
398
+ const mtimeMs = safeGetMtimeMs(statePath);
399
+ return { state: restoredState, mtimeMs };
400
+ }
401
+ warn(MODULE, 'No valid backup found, starting fresh');
402
+ }
403
+ debug(MODULE, 'No existing state found, initializing...');
404
+ return { state: createFreshState(), mtimeMs: 0 };
405
+ }
406
+ /**
407
+ * Safely read a file's mtime. Returns 0 if the stat call fails.
408
+ */
409
+ function safeGetMtimeMs(filePath) {
410
+ try {
411
+ return fs.statSync(filePath).mtimeMs;
412
+ }
413
+ catch (error) {
414
+ debug(MODULE, `Could not read state file mtime (reload detection will always trigger): ${errorMessage(error)}`);
415
+ return 0;
416
+ }
417
+ }
418
+ /**
419
+ * Clean up old backup files, keeping only the 10 most recent.
420
+ */
421
+ function cleanupBackups() {
422
+ const backupDir = getBackupDir();
423
+ try {
424
+ const files = fs
425
+ .readdirSync(backupDir)
426
+ .filter((f) => f.startsWith('state-'))
427
+ .sort()
428
+ .reverse();
429
+ // Keep only the 10 most recent backups
430
+ for (const file of files.slice(10)) {
431
+ try {
432
+ fs.unlinkSync(path.join(backupDir, file));
433
+ }
434
+ catch (error) {
435
+ warn(MODULE, `Could not delete old backup ${file}:`, errorMessage(error));
436
+ }
437
+ }
438
+ }
439
+ catch (error) {
440
+ warn(MODULE, 'Could not clean up backups:', errorMessage(error));
441
+ }
442
+ }
443
+ /**
444
+ * Persist state to disk, creating a timestamped backup of the previous
445
+ * state file first. Retains at most 10 backup files.
446
+ * @returns The file's mtime after writing (for change detection).
447
+ */
448
+ export function saveState(state) {
449
+ const statePath = getStatePath();
450
+ const lockPath = statePath + '.lock';
451
+ const backupDir = getBackupDir();
452
+ // Acquire advisory lock to prevent concurrent writes
453
+ acquireLock(lockPath);
454
+ try {
455
+ // Create backup of existing state (best-effort, non-fatal)
456
+ try {
457
+ if (fs.existsSync(statePath)) {
458
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
459
+ const randomSuffix = Math.random().toString(36).slice(2, 8).padEnd(6, '0');
460
+ const backupFile = path.join(backupDir, `state-${timestamp}-${randomSuffix}.json`);
461
+ fs.copyFileSync(statePath, backupFile);
462
+ fs.chmodSync(backupFile, 0o600);
463
+ // Keep only last 10 backups
464
+ cleanupBackups();
465
+ }
466
+ }
467
+ catch (backupErr) {
468
+ warn(MODULE, `Could not create backup before save: ${errorMessage(backupErr)}`);
469
+ // Continue with save — losing a backup is acceptable, losing the save is not
470
+ }
471
+ // Atomic write: write to temp file then rename to prevent corruption on crash
472
+ atomicWriteFileSync(statePath, JSON.stringify(state, null, 2), 0o600);
473
+ const mtimeMs = safeGetMtimeMs(statePath);
474
+ debug(MODULE, 'State saved successfully');
475
+ return mtimeMs;
476
+ }
477
+ finally {
478
+ releaseLock(lockPath);
479
+ }
480
+ }
481
+ /**
482
+ * Re-read state from disk if the file has been modified since the last load/save.
483
+ * Uses mtime comparison (single statSync call) to avoid unnecessary JSON parsing.
484
+ * @returns The new state and mtime if reloaded, or null if no change detected.
485
+ */
486
+ export function reloadStateIfChanged(lastLoadedMtimeMs) {
487
+ try {
488
+ const statePath = getStatePath();
489
+ const currentMtimeMs = fs.statSync(statePath).mtimeMs;
490
+ if (currentMtimeMs === lastLoadedMtimeMs)
491
+ return null;
492
+ const result = loadState();
493
+ // Ensure mtime is always current after reload (covers backup-restore and fresh-state paths)
494
+ // to prevent repeated unnecessary reloads on every request.
495
+ try {
496
+ result.mtimeMs = fs.statSync(statePath).mtimeMs;
497
+ }
498
+ catch (err) {
499
+ // If file was just loaded, stat should not fail. If it does,
500
+ // next reloadIfChanged() will simply trigger another reload.
501
+ debug(MODULE, 'Could not re-read mtime after reload (will retry next cycle)', err);
502
+ }
503
+ return result;
504
+ }
505
+ catch (error) {
506
+ // statSync failure (file deleted) is benign — keep current in-memory state.
507
+ warn(MODULE, `Failed to reload state from disk: ${errorMessage(error)}`);
508
+ return null;
509
+ }
510
+ }