@hardlydifficult/repo-processor 1.0.17 → 1.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +172 -0
- package/package.json +4 -4
package/README.md
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# @hardlydifficult/repo-processor
|
|
2
|
+
|
|
3
|
+
Incremental GitHub repository processor that fetches file trees, diffs against a manifest, processes files in parallel, and updates directories bottom-up with SHA-based stale detection.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @hardlydifficult/repo-processor
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
Process all `.ts` files in a repository and generate summaries:
|
|
14
|
+
|
|
15
|
+
```typescript
|
|
16
|
+
import { RepoProcessor, GitYamlStore } from "@hardlydifficult/repo-processor";
|
|
17
|
+
import { GitHubClient } from "@hardlydifficult/github";
|
|
18
|
+
|
|
19
|
+
// Create store that persists results to a git repository
|
|
20
|
+
const store = new GitYamlStore({
|
|
21
|
+
cloneUrl: "https://github.com/owner/repo.git",
|
|
22
|
+
localPath: "./results",
|
|
23
|
+
resultDir: (owner, repo) => `${owner}/${repo}`,
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
// Create GitHub client (requires GITHUB_TOKEN env var or token in constructor)
|
|
27
|
+
const github = new GitHubClient({ authToken: process.env.GITHUB_TOKEN });
|
|
28
|
+
|
|
29
|
+
// Create processor with custom callbacks
|
|
30
|
+
const processor = new RepoProcessor({
|
|
31
|
+
githubClient: github,
|
|
32
|
+
store,
|
|
33
|
+
callbacks: {
|
|
34
|
+
shouldProcess: (entry) => entry.path.endsWith(".ts"),
|
|
35
|
+
async processFile({ entry, content }) {
|
|
36
|
+
return { summary: `Processed ${entry.path}` };
|
|
37
|
+
},
|
|
38
|
+
async processDirectory(ctx) {
|
|
39
|
+
return { fileCount: ctx.subtreeFilePaths.length };
|
|
40
|
+
},
|
|
41
|
+
},
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// Run the processor
|
|
45
|
+
const result = await processor.run("owner", "repo", (progress) => {
|
|
46
|
+
console.log(`${progress.phase}: ${progress.filesCompleted}/${progress.filesTotal} files`);
|
|
47
|
+
});
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Core Concepts
|
|
51
|
+
|
|
52
|
+
### RepoProcessor
|
|
53
|
+
|
|
54
|
+
The main processor class that orchestrates incremental GitHub repository updates using the following pipeline:
|
|
55
|
+
1. Initializes the store and fetches the file tree from GitHub
|
|
56
|
+
2. Filters entries and computes diffs against the previous manifest
|
|
57
|
+
3. Processes changed files in parallel with configurable concurrency
|
|
58
|
+
4. Removes deleted files
|
|
59
|
+
5. Resolves stale directories (by SHA mismatch or diff)
|
|
60
|
+
6. Processes directories bottom-up by depth
|
|
61
|
+
|
|
62
|
+
### RepoProcessorConfig
|
|
63
|
+
|
|
64
|
+
| Field | Description |
|
|
65
|
+
|-------|-------------|
|
|
66
|
+
| `githubClient` | GitHub client for fetching trees and file contents |
|
|
67
|
+
| `store` | Persistent store implementing `ProcessorStore` interface |
|
|
68
|
+
| `callbacks` | Consumer-provided domain logic for filtering and processing |
|
|
69
|
+
| `concurrency?` | Max concurrent file/directory operations (default: `5`) |
|
|
70
|
+
| `branch?` | Repository branch to process (default: `"main"`) |
|
|
71
|
+
|
|
72
|
+
### ProcessorCallbacks
|
|
73
|
+
|
|
74
|
+
Consumer-implemented domain logic for filtering and processing:
|
|
75
|
+
|
|
76
|
+
| Callback | Description |
|
|
77
|
+
|----------|-------------|
|
|
78
|
+
| `shouldProcess(entry)` | Returns `true` if the tree entry should be processed |
|
|
79
|
+
| `processFile(ctx)` | Processes a file's content; result saved to store |
|
|
80
|
+
| `processDirectory(ctx)` | Processes a directory after all children are processed |
|
|
81
|
+
|
|
82
|
+
### FileContext
|
|
83
|
+
|
|
84
|
+
Passed to `processFile`:
|
|
85
|
+
|
|
86
|
+
| Field | Type | Description |
|
|
87
|
+
|-------|------|-------------|
|
|
88
|
+
| `entry` | `TreeEntry` | Tree entry metadata (path, sha, type) |
|
|
89
|
+
| `content` | `string` | Raw file contents from GitHub |
|
|
90
|
+
|
|
91
|
+
### DirectoryContext
|
|
92
|
+
|
|
93
|
+
Passed to `processDirectory`:
|
|
94
|
+
|
|
95
|
+
| Field | Type | Description |
|
|
96
|
+
|-------|------|-------------|
|
|
97
|
+
| `path` | `string` | Directory path (empty string for root) |
|
|
98
|
+
| `sha` | `string` | Current tree SHA for the directory |
|
|
99
|
+
| `subtreeFilePaths` | `readonly string[]` | All file paths under this directory |
|
|
100
|
+
| `children` | `readonly DirectoryChild[]` | Immediate children (files and subdirs) |
|
|
101
|
+
| `tree` | `readonly TreeEntry[]` | Full tree for the repository |
|
|
102
|
+
|
|
103
|
+
### DirectoryChild
|
|
104
|
+
|
|
105
|
+
Immediate child of a directory:
|
|
106
|
+
|
|
107
|
+
| Field | Type | Description |
|
|
108
|
+
|-------|------|-------------|
|
|
109
|
+
| `name` | `string` | Child name (file/dir name) |
|
|
110
|
+
| `isDir` | `boolean` | `true` if child is a directory |
|
|
111
|
+
| `fullPath` | `string` | Full path to the child |
|
|
112
|
+
|
|
113
|
+
### GitYamlStore
|
|
114
|
+
|
|
115
|
+
Persistent store implementation using a local git repository with YAML files:
|
|
116
|
+
|
|
117
|
+
```typescript
|
|
118
|
+
const store = new GitYamlStore({
|
|
119
|
+
cloneUrl: "https://github.com/owner/repo.git",
|
|
120
|
+
localPath: "./results",
|
|
121
|
+
resultDir: (owner, repo) => `${owner}/${repo}`,
|
|
122
|
+
authToken?: string; // optional; falls back to GITHUB_TOKEN
|
|
123
|
+
});
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
File results are stored at `<resultDir>/<filePath>.yml`.
|
|
127
|
+
Directory results are stored at `<resultDir>/<dirPath>/dir.yml`.
|
|
128
|
+
Each YAML file includes a `sha` field for change detection.
|
|
129
|
+
|
|
130
|
+
### resolveStaleDirectories
|
|
131
|
+
|
|
132
|
+
Determines which directories need reprocessing by combining two sources:
|
|
133
|
+
1. Directories identified as stale via `diffTree` (changed/removed children)
|
|
134
|
+
2. Directories whose stored SHA differs from the current tree SHA
|
|
135
|
+
|
|
136
|
+
```typescript
|
|
137
|
+
const staleDirs = await resolveStaleDirectories(
|
|
138
|
+
owner,
|
|
139
|
+
repo,
|
|
140
|
+
staleDirsFromDiff,
|
|
141
|
+
allFilePaths,
|
|
142
|
+
tree,
|
|
143
|
+
store
|
|
144
|
+
);
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Progress Reporting
|
|
148
|
+
|
|
149
|
+
During `RepoProcessor.run`, progress callbacks are called with:
|
|
150
|
+
|
|
151
|
+
```typescript
|
|
152
|
+
interface ProcessingProgress {
|
|
153
|
+
phase: "loading" | "files" | "directories" | "committing";
|
|
154
|
+
message: string;
|
|
155
|
+
filesTotal: number;
|
|
156
|
+
filesCompleted: number;
|
|
157
|
+
dirsTotal: number;
|
|
158
|
+
dirsCompleted: number;
|
|
159
|
+
}
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Result
|
|
163
|
+
|
|
164
|
+
`RepoProcessor.run` returns:
|
|
165
|
+
|
|
166
|
+
```typescript
|
|
167
|
+
interface ProcessingResult {
|
|
168
|
+
filesProcessed: number;
|
|
169
|
+
filesRemoved: number;
|
|
170
|
+
dirsProcessed: number;
|
|
171
|
+
}
|
|
172
|
+
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hardlydifficult/repo-processor",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.19",
|
|
4
4
|
"main": "./dist/index.js",
|
|
5
5
|
"types": "./dist/index.d.ts",
|
|
6
6
|
"files": [
|
|
@@ -15,9 +15,9 @@
|
|
|
15
15
|
"clean": "rm -rf dist"
|
|
16
16
|
},
|
|
17
17
|
"dependencies": {
|
|
18
|
-
"@hardlydifficult/collections": "1.0.
|
|
19
|
-
"@hardlydifficult/github": "1.0.
|
|
20
|
-
"@hardlydifficult/text": "1.0.
|
|
18
|
+
"@hardlydifficult/collections": "1.0.5",
|
|
19
|
+
"@hardlydifficult/github": "1.0.27",
|
|
20
|
+
"@hardlydifficult/text": "1.0.21",
|
|
21
21
|
"simple-git": "3.31.1",
|
|
22
22
|
"yaml": "2.8.2",
|
|
23
23
|
"zod": "4.3.6"
|