@hardlydifficult/repo-processor 1.0.75 → 1.0.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +215 -222
- package/package.json +9 -9
package/README.md
CHANGED
|
@@ -11,276 +11,277 @@ npm install @hardlydifficult/repo-processor
|
|
|
11
11
|
## Quick Start
|
|
12
12
|
|
|
13
13
|
```typescript
|
|
14
|
-
import { RepoProcessor, GitYamlStore } from "@hardlydifficult/repo-processor";
|
|
15
|
-
import {
|
|
14
|
+
import { RepoProcessor, GitYamlStore, type ProcessingProgress } from "@hardlydifficult/repo-processor";
|
|
15
|
+
import { GitHubClient } from "@hardlydifficult/github";
|
|
16
16
|
|
|
17
|
-
//
|
|
17
|
+
// Setup the git-backed YAML store for state persistence
|
|
18
18
|
const store = new GitYamlStore({
|
|
19
|
-
cloneUrl: "https://github.com/owner/repo.git",
|
|
20
|
-
localPath: "
|
|
19
|
+
cloneUrl: "https://github.com/owner/repo-data.git",
|
|
20
|
+
localPath: "./repo-data",
|
|
21
21
|
resultDir: (owner, repo) => `repos/${owner}/${repo}`,
|
|
22
|
-
gitUser: { name: "
|
|
22
|
+
gitUser: { name: "Processor Bot", email: "bot@example.com" },
|
|
23
23
|
});
|
|
24
24
|
|
|
25
|
-
//
|
|
25
|
+
// Build a GitHub client with token authentication
|
|
26
|
+
const github = new GitHubClient({ token: process.env.GITHUB_TOKEN });
|
|
27
|
+
|
|
28
|
+
// Configure callbacks to define how files and directories are processed
|
|
26
29
|
const callbacks = {
|
|
27
|
-
shouldProcess: (entry) => entry.
|
|
30
|
+
shouldProcess: (entry) => entry.path.endsWith(".ts"),
|
|
28
31
|
processFile: async ({ entry, content }) => ({
|
|
29
32
|
path: entry.path,
|
|
33
|
+
sha: entry.sha,
|
|
30
34
|
length: content.length,
|
|
31
35
|
}),
|
|
32
|
-
processDirectory: async (
|
|
33
|
-
path
|
|
34
|
-
fileCount:
|
|
36
|
+
processDirectory: async ({ path, subtreeFilePaths }) => ({
|
|
37
|
+
path,
|
|
38
|
+
fileCount: subtreeFilePaths.length,
|
|
35
39
|
}),
|
|
36
40
|
};
|
|
37
41
|
|
|
38
|
-
//
|
|
42
|
+
// Create and run the processor
|
|
39
43
|
const processor = new RepoProcessor({
|
|
40
|
-
githubClient:
|
|
44
|
+
githubClient: github,
|
|
41
45
|
store,
|
|
42
46
|
callbacks,
|
|
47
|
+
concurrency: 5, // Optional: default 5
|
|
48
|
+
branch: "main", // Optional: default "main"
|
|
43
49
|
});
|
|
44
50
|
|
|
45
|
-
const result = await processor.run("
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
## RepoProcessor: Incremental Repository Processing
|
|
50
|
-
|
|
51
|
-
The `RepoProcessor` class implements an incremental pipeline for processing GitHub repository file trees. It detects changes by comparing current and previous file SHAs, processes only changed files in parallel batches, and processes affected directories bottom-up.
|
|
51
|
+
const result = await processor.run("hardlydifficult", "typescript", (progress) => {
|
|
52
|
+
console.log(`${progress.phase}: ${progress.message}`);
|
|
53
|
+
});
|
|
52
54
|
|
|
53
|
-
|
|
55
|
+
// Example result:
|
|
56
|
+
// {
|
|
57
|
+
// filesProcessed: 12,
|
|
58
|
+
// filesRemoved: 0,
|
|
59
|
+
// dirsProcessed: 5
|
|
60
|
+
// }
|
|
61
|
+
```
|
|
54
62
|
|
|
55
|
-
|
|
56
|
-
|-------|------|:-------:|-------------|
|
|
57
|
-
| `githubClient` | `GitHubClient` | — | GitHub API client from `@hardlydifficult/github` |
|
|
58
|
-
| `store` | `ProcessorStore` | — | Persistence layer for file/dir results and manifests |
|
|
59
|
-
| `callbacks` | `ProcessorCallbacks` | — | Domain logic for filtering, file processing, and directory processing |
|
|
60
|
-
| `concurrency` | `number` | `5` | Max concurrent file/dir processing per batch |
|
|
61
|
-
| `branch` | `string` | `"main"` | Git branch to fetch tree from |
|
|
63
|
+
## RepoProcessor
|
|
62
64
|
|
|
63
|
-
|
|
65
|
+
Processes GitHub repositories incrementally by fetching file trees, detecting changes via SHA diffing, and persisting results.
|
|
64
66
|
|
|
65
|
-
|
|
67
|
+
### Constructor
|
|
66
68
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
69
|
+
| Parameter | Description |
|
|
70
|
+
|-----------|-------------|
|
|
71
|
+
| `githubClient` | GitHub client for fetching trees and file contents |
|
|
72
|
+
| `store` | Persistence layer implementing `ProcessorStore` |
|
|
73
|
+
| `callbacks` | Domain logic callbacks (`shouldProcess`, `processFile`, `processDirectory`) |
|
|
74
|
+
| `concurrency?` | Parallel batch size (default: `5`) |
|
|
75
|
+
| `branch?` | Git branch to fetch from (default: `"main"`) |
|
|
74
76
|
|
|
75
|
-
###
|
|
77
|
+
### `run(owner, repo, onProgress?)`
|
|
76
78
|
|
|
77
|
-
|
|
79
|
+
Processes the repository and returns a summary.
|
|
78
80
|
|
|
81
|
+
**Returns:**
|
|
79
82
|
```typescript
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
interface DirectoryContext {
|
|
86
|
-
path: string;
|
|
87
|
-
sha: string;
|
|
88
|
-
subtreeFilePaths: readonly string[];
|
|
89
|
-
children: readonly DirectoryChild[];
|
|
90
|
-
tree: readonly TreeEntry[];
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
interface DirectoryChild {
|
|
94
|
-
readonly name: string;
|
|
95
|
-
readonly isDir: boolean;
|
|
96
|
-
readonly fullPath: string;
|
|
83
|
+
{
|
|
84
|
+
filesProcessed: number;
|
|
85
|
+
filesRemoved: number;
|
|
86
|
+
dirsProcessed: number;
|
|
97
87
|
}
|
|
98
88
|
```
|
|
99
89
|
|
|
100
|
-
|
|
101
|
-
|
|
90
|
+
**Example:**
|
|
102
91
|
```typescript
|
|
103
|
-
import { RepoProcessor } from "@hardlydifficult/repo-processor";
|
|
104
|
-
import { createGitHubClient } from "@hardlydifficult/github";
|
|
105
|
-
|
|
106
|
-
const processor = new RepoProcessor({
|
|
107
|
-
githubClient: createGitHubClient({ token: process.env.GITHUB_TOKEN! }),
|
|
108
|
-
store: new GitYamlStore({
|
|
109
|
-
cloneUrl: "https://github.com/hardlydifficult/results.git",
|
|
110
|
-
localPath: ".results",
|
|
111
|
-
resultDir: () => "repos",
|
|
112
|
-
gitUser: { name: "CI", email: "ci@example.com" },
|
|
113
|
-
}),
|
|
114
|
-
callbacks: {
|
|
115
|
-
shouldProcess: (entry) => entry.path.endsWith(".ts"),
|
|
116
|
-
processFile: async ({ entry, content }) => ({
|
|
117
|
-
lines: content.split("\n").length,
|
|
118
|
-
checksum: crypto.createHash("sha256").update(content).digest("hex"),
|
|
119
|
-
}),
|
|
120
|
-
processDirectory: async (ctx) => ({
|
|
121
|
-
path: ctx.path,
|
|
122
|
-
fileCount: ctx.subtreeFilePaths.length,
|
|
123
|
-
hasSubdirs: ctx.children.some((c) => c.isDir),
|
|
124
|
-
}),
|
|
125
|
-
},
|
|
126
|
-
concurrency: 10,
|
|
127
|
-
});
|
|
128
|
-
|
|
129
92
|
const result = await processor.run("hardlydifficult", "typescript", (progress) => {
|
|
130
|
-
console.log(`Phase: ${progress.phase}
|
|
93
|
+
console.log(`Phase: ${progress.phase}, Files: ${progress.filesCompleted}/${progress.filesTotal}`);
|
|
131
94
|
});
|
|
132
|
-
// => { filesProcessed: 12, filesRemoved: 0, dirsProcessed: 4 }
|
|
133
95
|
```
|
|
134
96
|
|
|
135
|
-
|
|
97
|
+
### Directory Processing
|
|
136
98
|
|
|
137
|
-
|
|
99
|
+
Directories are processed bottom-up (deepest first) after all files. Each directory context includes:
|
|
100
|
+
- `path`: Directory path (empty string for root)
|
|
101
|
+
- `sha`: Git tree SHA
|
|
102
|
+
- `subtreeFilePaths`: All file paths beneath the directory
|
|
103
|
+
- `children`: Immediate children (files and subdirectories)
|
|
104
|
+
- `tree`: Full repo tree entries
|
|
138
105
|
|
|
139
|
-
|
|
106
|
+
## RepoWatcher
|
|
140
107
|
|
|
141
|
-
|
|
142
|
-
|-------|------|:-------:|-------------|
|
|
143
|
-
| `stateKey` | `string` | — | Key used for persisting state (e.g., `"repo-processor"`). |
|
|
144
|
-
| `stateDirectory` | `string` | — | Directory where state is persisted. |
|
|
145
|
-
| `autoSaveMs` | `number` | `5000` | Auto-save interval in milliseconds. |
|
|
146
|
-
| `run` | `(owner: string, name: string) => Promise<T>` | — | Function to execute when processing is triggered. |
|
|
147
|
-
| `onComplete` | `(owner, name, result, sha) => void` | — | Called after a successful run (optional). |
|
|
148
|
-
| `onError` | `(owner, name, error) => void` | — | Called when a run fails (optional). |
|
|
149
|
-
| `onEvent` | `(event) => void` | — | Logger/event callback (optional). |
|
|
150
|
-
| `maxAttempts` | `number` | `1` | Max attempts for each run (includes initial + retries). |
|
|
108
|
+
Watches repositories for SHA changes and triggers processing with retry logic and deduplication.
|
|
151
109
|
|
|
152
|
-
###
|
|
110
|
+
### Constructor
|
|
153
111
|
|
|
154
112
|
```typescript
|
|
155
|
-
|
|
156
|
-
|
|
113
|
+
interface RepoWatcherConfig<TResult> {
|
|
114
|
+
stateKey: string; // Key for persisting state
|
|
115
|
+
stateDirectory: string; // Directory for state files
|
|
116
|
+
autoSaveMs?: number; // Auto-save interval (default: 5000)
|
|
117
|
+
run: (owner: string, name: string) => Promise<TResult>;
|
|
118
|
+
onComplete?: (owner: string, name: string, result: TResult, sha: string) => void;
|
|
119
|
+
onError?: (owner: string, name: string, error: unknown) => void;
|
|
120
|
+
onEvent?: (event: StateTrackerEvent) => void;
|
|
121
|
+
maxAttempts?: number; // Retry attempts (default: 1)
|
|
122
|
+
}
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Methods
|
|
157
126
|
|
|
127
|
+
| Method | Description |
|
|
128
|
+
|--------|-------------|
|
|
129
|
+
| `init()` | Load persisted state from disk |
|
|
130
|
+
| `handlePush(owner, name, sha)` | Handle GitHub push event. Queues processing if SHA changed |
|
|
131
|
+
| `trigger(owner, name)` | Manually trigger processing (skips SHA check) |
|
|
132
|
+
| `triggerManual(owner, name)` | Synchronous trigger. Returns `{ success, result }` |
|
|
133
|
+
| `isRunning(owner, name)` | Check if repo is currently processing |
|
|
134
|
+
| `getLastSha(key)` | Get last processed SHA for a repo key |
|
|
135
|
+
| `setLastSha(key, sha)` | Manually set last processed SHA |
|
|
136
|
+
|
|
137
|
+
**Example:**
|
|
138
|
+
```typescript
|
|
158
139
|
const watcher = new RepoWatcher({
|
|
159
140
|
stateKey: "repo-processor",
|
|
160
|
-
stateDirectory: "
|
|
141
|
+
stateDirectory: "./state",
|
|
161
142
|
run: async (owner, name) => {
|
|
162
|
-
const processor = new RepoProcessor({
|
|
163
|
-
|
|
164
|
-
store: new GitYamlStore({
|
|
165
|
-
cloneUrl: "https://github.com/hardlydifficult/results.git",
|
|
166
|
-
localPath: ".results",
|
|
167
|
-
resultDir: () => "repos",
|
|
168
|
-
gitUser: { name: "CI", email: "ci@example.com" },
|
|
169
|
-
}),
|
|
170
|
-
callbacks: {
|
|
171
|
-
shouldProcess: (entry) => entry.path.endsWith(".ts"),
|
|
172
|
-
processFile: async ({ entry, content }) => ({
|
|
173
|
-
path: entry.path,
|
|
174
|
-
lines: content.split("\n").length,
|
|
175
|
-
}),
|
|
176
|
-
processDirectory: async (ctx) => ({
|
|
177
|
-
path: ctx.path,
|
|
178
|
-
fileCount: ctx.subtreeFilePaths.length,
|
|
179
|
-
}),
|
|
180
|
-
},
|
|
181
|
-
});
|
|
182
|
-
return processor.run(owner, name);
|
|
143
|
+
const processor = new RepoProcessor({ /* ... */ });
|
|
144
|
+
return await processor.run(owner, name);
|
|
183
145
|
},
|
|
146
|
+
onComplete: (owner, name, result, sha) => {
|
|
147
|
+
console.log(`Processed ${owner}/${name}, SHA: ${sha}`);
|
|
148
|
+
},
|
|
149
|
+
maxAttempts: 3,
|
|
184
150
|
});
|
|
185
151
|
|
|
186
|
-
await watcher.init();
|
|
152
|
+
await watcher.init();
|
|
187
153
|
watcher.handlePush("hardlydifficult", "typescript", "abc123");
|
|
188
|
-
// Triggers processing if SHA differs from last tracked SHA
|
|
189
154
|
```
|
|
190
155
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
```typescript
|
|
194
|
-
// Handle push events (SHA comparison performed automatically)
|
|
195
|
-
watcher.handlePush("hardlydifficult", "typescript", "abc123...");
|
|
196
|
-
|
|
197
|
-
// Manual trigger (no SHA comparison)
|
|
198
|
-
watcher.trigger("hardlydifficult", "typescript");
|
|
156
|
+
## GitYamlStore
|
|
199
157
|
|
|
200
|
-
|
|
201
|
-
const response = await watcher.triggerManual("hardlydifficult", "typescript");
|
|
202
|
-
// => { success: true, result: ProcessingResult } | { success: false, reason: string }
|
|
203
|
-
```
|
|
158
|
+
Persists processing results as YAML files in a git repository.
|
|
204
159
|
|
|
205
|
-
|
|
160
|
+
### Constructor
|
|
206
161
|
|
|
207
|
-
|
|
162
|
+
| Parameter | Description |
|
|
163
|
+
|-----------|-------------|
|
|
164
|
+
| `cloneUrl` | URL of the git repository to clone/pull |
|
|
165
|
+
| `localPath` | Local directory to clone the repo into |
|
|
166
|
+
| `resultDir` | Function mapping `(owner, repo)` to result subdirectory |
|
|
167
|
+
| `authToken?` | GitHub token for authenticated operations (fallback: `GITHUB_TOKEN` env) |
|
|
168
|
+
| `gitUser` | Git committer identity: `{ name: string, email: string }` |
|
|
208
169
|
|
|
209
|
-
###
|
|
170
|
+
### Result Storage
|
|
210
171
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
| `cloneUrl` | `string` | — | URL of the git repository to clone (e.g., `"https://github.com/user/results.git"`). |
|
|
214
|
-
| `localPath` | `string` | — | Local directory to clone the repo into. |
|
|
215
|
-
| `resultDir` | `(owner, repo) => string` | — | Function mapping owner/repo to result subdirectory. |
|
|
216
|
-
| `authToken` | `string` | `process.env.GITHUB_TOKEN` | GitHub token for authenticated clone/push. |
|
|
217
|
-
| `gitUser` | `{ name: string; email: string }` | — | Git user identity used when committing. |
|
|
172
|
+
- File results: `<resultDir>/<filePath>.yml`
|
|
173
|
+
- Directory results: `<resultDir>/<dirPath>/dir.yml`
|
|
218
174
|
|
|
219
|
-
|
|
175
|
+
Each YAML file includes a `sha` field for change detection.
|
|
220
176
|
|
|
177
|
+
**Example:**
|
|
221
178
|
```typescript
|
|
222
|
-
import { GitYamlStore } from "@hardlydifficult/repo-processor";
|
|
223
|
-
import { z } from "zod";
|
|
224
|
-
|
|
225
179
|
const store = new GitYamlStore({
|
|
226
|
-
cloneUrl: "https://github.com/
|
|
227
|
-
localPath: "
|
|
228
|
-
resultDir: (owner, repo) => `
|
|
229
|
-
gitUser: { name: "
|
|
180
|
+
cloneUrl: "https://github.com/owner/repo-data.git",
|
|
181
|
+
localPath: "./data",
|
|
182
|
+
resultDir: (owner, repo) => `results/${owner}/${repo}`,
|
|
183
|
+
gitUser: { name: "Processor Bot", email: "bot@example.com" },
|
|
230
184
|
});
|
|
185
|
+
```
|
|
231
186
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
"
|
|
239
|
-
"typescript",
|
|
187
|
+
### Typed Load Helpers
|
|
188
|
+
|
|
189
|
+
```typescript
|
|
190
|
+
// Load file result with Zod validation
|
|
191
|
+
const result = await store.loadFileResult(
|
|
192
|
+
"owner",
|
|
193
|
+
"repo",
|
|
240
194
|
"src/index.ts",
|
|
241
|
-
|
|
195
|
+
z.object({ path: z.string(), sha: z.string(), length: z.number() })
|
|
196
|
+
);
|
|
197
|
+
|
|
198
|
+
// Load directory result with Zod validation
|
|
199
|
+
const dirResult = await store.loadDirResult(
|
|
200
|
+
"owner",
|
|
201
|
+
"repo",
|
|
202
|
+
"src/utils",
|
|
203
|
+
z.object({ path: z.string(), fileCount: z.number() })
|
|
242
204
|
);
|
|
243
|
-
// { path: "src/index.ts", lines: 12, sha: "abc..." }
|
|
244
205
|
```
|
|
245
206
|
|
|
246
|
-
## resolveStaleDirectories
|
|
207
|
+
## resolveStaleDirectories
|
|
208
|
+
|
|
209
|
+
Identifies directories requiring reprocessing by combining SHA-based stale detection with diff-derived stale directories.
|
|
247
210
|
|
|
248
|
-
|
|
211
|
+
### Signature
|
|
249
212
|
|
|
250
213
|
```typescript
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
214
|
+
async function resolveStaleDirectories(
|
|
215
|
+
owner: string,
|
|
216
|
+
repo: string,
|
|
217
|
+
staleDirsFromDiff: readonly string[],
|
|
218
|
+
allFilePaths: readonly string[],
|
|
219
|
+
tree: readonly TreeEntry[],
|
|
220
|
+
store: ProcessorStore
|
|
221
|
+
): Promise<string[]>
|
|
222
|
+
```
|
|
256
223
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
224
|
+
**Logic:**
|
|
225
|
+
1. Start with stale directories from file diff
|
|
226
|
+
2. Add any directory whose stored SHA is missing or differs from current tree SHA
|
|
227
|
+
3. Always include root directory if missing
|
|
260
228
|
|
|
229
|
+
**Example:**
|
|
230
|
+
```typescript
|
|
261
231
|
const staleDirs = await resolveStaleDirectories(
|
|
262
232
|
"owner",
|
|
263
233
|
"repo",
|
|
264
|
-
[], //
|
|
265
|
-
|
|
266
|
-
|
|
234
|
+
["src"], // directories with changed children
|
|
235
|
+
["src/index.ts", "src/utils/helper.ts"],
|
|
236
|
+
treeEntries,
|
|
267
237
|
store
|
|
268
238
|
);
|
|
269
|
-
// ["src/utils", "src", ""] (bottom-up order inferred later)
|
|
270
|
-
```
|
|
271
239
|
|
|
272
|
-
|
|
240
|
+
// Returns ["src", "src/utils"] if SHAs differ or missing
|
|
241
|
+
```
|
|
273
242
|
|
|
274
|
-
|
|
275
|
-
2. **SHA mismatch** — any directory whose stored SHA differs from the current tree SHA
|
|
243
|
+
## Types
|
|
276
244
|
|
|
277
|
-
|
|
245
|
+
### ProcessorStore Interface
|
|
278
246
|
|
|
279
|
-
|
|
247
|
+
| Method | Description |
|
|
248
|
+
|--------|-------------|
|
|
249
|
+
| `ensureReady?(owner, repo)` | One-time init (e.g. clone repo). Optional |
|
|
250
|
+
| `getFileManifest(owner, repo)` | Get manifest of previous file SHAs |
|
|
251
|
+
| `getDirSha(owner, repo, dirPath)` | Get stored directory SHA |
|
|
252
|
+
| `writeFileResult(owner, repo, path, sha, result)` | Persist file result |
|
|
253
|
+
| `writeDirResult(owner, repo, path, sha, result)` | Persist directory result |
|
|
254
|
+
| `deleteFileResult(owner, repo, path)` | Delete result for removed file |
|
|
255
|
+
| `commitBatch(owner, repo, count)` | Commit changes |
|
|
280
256
|
|
|
281
|
-
|
|
257
|
+
### Callback Interfaces
|
|
282
258
|
|
|
283
259
|
```typescript
|
|
260
|
+
interface FileContext {
|
|
261
|
+
entry: TreeEntry;
|
|
262
|
+
content: string;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
interface DirectoryContext {
|
|
266
|
+
path: string;
|
|
267
|
+
sha: string;
|
|
268
|
+
subtreeFilePaths: readonly string[];
|
|
269
|
+
children: readonly DirectoryChild[];
|
|
270
|
+
tree: readonly TreeEntry[];
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
interface DirectoryChild {
|
|
274
|
+
name: string;
|
|
275
|
+
isDir: boolean;
|
|
276
|
+
fullPath: string;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
interface ProcessorCallbacks {
|
|
280
|
+
shouldProcess(entry: TreeEntry): boolean;
|
|
281
|
+
processFile(ctx: FileContext): Promise<unknown>;
|
|
282
|
+
processDirectory(ctx: DirectoryContext): Promise<unknown>;
|
|
283
|
+
}
|
|
284
|
+
|
|
284
285
|
interface ProcessingProgress {
|
|
285
286
|
phase: "loading" | "files" | "directories" | "committing";
|
|
286
287
|
message: string;
|
|
@@ -289,45 +290,37 @@ interface ProcessingProgress {
|
|
|
289
290
|
dirsTotal: number;
|
|
290
291
|
dirsCompleted: number;
|
|
291
292
|
}
|
|
292
|
-
```
|
|
293
|
-
|
|
294
|
-
| Phase | Description |
|
|
295
|
-
|-------|-------------|
|
|
296
|
-
| `"loading"` | Initial fetching of file tree. |
|
|
297
|
-
| `"files"` | Processing of files. |
|
|
298
|
-
| `"directories"` | Processing of directories (bottom-up). |
|
|
299
|
-
| `"committing"` | Final commit to persistence. |
|
|
300
293
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
| `ensureReady?(owner, repo)` | One-time initialization (optional). |
|
|
308
|
-
| `getFileManifest(owner, repo)` | Return manifest of previously processed file SHAs (path → blob SHA). |
|
|
309
|
-
| `getDirSha(owner, repo, dirPath)` | Return stored SHA for a directory. Null if not stored. |
|
|
310
|
-
| `writeFileResult(owner, repo, path, sha, result)` | Persist result for a processed file. |
|
|
311
|
-
| `writeDirResult(owner, repo, path, sha, result)` | Persist result for a processed directory. |
|
|
312
|
-
| `deleteFileResult(owner, repo, path)` | Delete stored result for a removed file. |
|
|
313
|
-
| `commitBatch(owner, repo, count)` | Commit current batch of changes. |
|
|
314
|
-
|
|
315
|
-
## ProcessorCallbacks Interface
|
|
294
|
+
interface ProcessingResult {
|
|
295
|
+
filesProcessed: number;
|
|
296
|
+
filesRemoved: number;
|
|
297
|
+
dirsProcessed: number;
|
|
298
|
+
}
|
|
299
|
+
```
|
|
316
300
|
|
|
317
|
-
|
|
301
|
+
## Error Handling
|
|
318
302
|
|
|
319
|
-
|
|
320
|
-
|--------|-------------|
|
|
321
|
-
| `shouldProcess(entry)` | Filter: which tree entries should be processed? |
|
|
322
|
-
| `processFile(ctx)` | Process a single changed file. Return value passed to `store.writeFileResult`. |
|
|
323
|
-
| `processDirectory(ctx)` | Process a directory after all children. Return value passed to `store.writeDirResult`. |
|
|
303
|
+
File and directory processing failures throw descriptive errors:
|
|
324
304
|
|
|
325
|
-
|
|
305
|
+
```typescript
|
|
306
|
+
try {
|
|
307
|
+
await processor.run("owner", "repo");
|
|
308
|
+
} catch (error) {
|
|
309
|
+
// Error includes all failed paths and messages
|
|
310
|
+
// e.g., "2 file(s) failed to process:\nfile1.ts: Connection timeout\nfile2.ts: Invalid format"
|
|
311
|
+
}
|
|
312
|
+
```
|
|
326
313
|
|
|
327
|
-
|
|
314
|
+
### Retries
|
|
328
315
|
|
|
329
|
-
|
|
316
|
+
`RepoWatcher` includes automatic retries for transient failures:
|
|
330
317
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
318
|
+
```typescript
|
|
319
|
+
const watcher = new RepoWatcher({
|
|
320
|
+
// ...
|
|
321
|
+
maxAttempts: 3, // Initial + 2 retries
|
|
322
|
+
onError: (owner, name, error) => {
|
|
323
|
+
console.error(`Failed for ${owner}/${name}: ${error}`);
|
|
324
|
+
},
|
|
325
|
+
});
|
|
326
|
+
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hardlydifficult/repo-processor",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.76",
|
|
4
4
|
"main": "./dist/index.js",
|
|
5
5
|
"types": "./dist/index.d.ts",
|
|
6
6
|
"files": [
|
|
@@ -15,19 +15,19 @@
|
|
|
15
15
|
"clean": "rm -rf dist"
|
|
16
16
|
},
|
|
17
17
|
"dependencies": {
|
|
18
|
-
"@hardlydifficult/collections": "1.0.
|
|
19
|
-
"@hardlydifficult/github": "1.0.
|
|
20
|
-
"@hardlydifficult/state-tracker": "2.0.
|
|
21
|
-
"@hardlydifficult/text": "1.0.
|
|
18
|
+
"@hardlydifficult/collections": "1.0.11",
|
|
19
|
+
"@hardlydifficult/github": "1.0.34",
|
|
20
|
+
"@hardlydifficult/state-tracker": "2.0.22",
|
|
21
|
+
"@hardlydifficult/text": "1.0.31",
|
|
22
22
|
"simple-git": "3.32.2",
|
|
23
23
|
"yaml": "2.8.2",
|
|
24
24
|
"zod": "4.3.6"
|
|
25
25
|
},
|
|
26
26
|
"peerDependencies": {
|
|
27
|
-
"@hardlydifficult/collections": "1.0.
|
|
28
|
-
"@hardlydifficult/github": "1.0.
|
|
29
|
-
"@hardlydifficult/state-tracker": "2.0.
|
|
30
|
-
"@hardlydifficult/text": "1.0.
|
|
27
|
+
"@hardlydifficult/collections": "1.0.11",
|
|
28
|
+
"@hardlydifficult/github": "1.0.34",
|
|
29
|
+
"@hardlydifficult/state-tracker": "2.0.22",
|
|
30
|
+
"@hardlydifficult/text": "1.0.31"
|
|
31
31
|
},
|
|
32
32
|
"devDependencies": {
|
|
33
33
|
"@types/node": "25.3.0",
|