@checkstack/gitops-backend 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +67 -0
- package/drizzle/0000_tense_stryfe.sql +46 -0
- package/drizzle/meta/0000_snapshot.json +310 -0
- package/drizzle/meta/_journal.json +13 -0
- package/drizzle.config.ts +7 -0
- package/package.json +37 -0
- package/src/index.ts +136 -0
- package/src/kind-registry.test.ts +262 -0
- package/src/kind-registry.ts +191 -0
- package/src/router.ts +355 -0
- package/src/schema.ts +77 -0
- package/src/scrapers/github-scraper.test.ts +355 -0
- package/src/scrapers/github-scraper.ts +263 -0
- package/src/scrapers/gitlab-scraper.test.ts +296 -0
- package/src/scrapers/gitlab-scraper.ts +242 -0
- package/src/scrapers/types.ts +52 -0
- package/src/secret-resolver.test.ts +86 -0
- package/src/secret-resolver.ts +54 -0
- package/src/sync/document-parser.test.ts +116 -0
- package/src/sync/document-parser.ts +124 -0
- package/src/sync/reconciler-delete.test.ts +123 -0
- package/src/sync/reconciler.ts +476 -0
- package/src/sync/sort-entities.test.ts +481 -0
- package/src/sync/sort-entities.ts +100 -0
- package/src/sync/sync-worker.ts +158 -0
- package/tsconfig.json +4 -0
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
import { describe, it, expect } from "bun:test";
|
|
2
|
+
import { githubScraper } from "./github-scraper";
|
|
3
|
+
import type { ScraperOptions, FetchFn } from "./types";
|
|
4
|
+
import type { Logger } from "@checkstack/backend-api";
|
|
5
|
+
|
|
6
|
+
const mockLogger: Logger = {
|
|
7
|
+
info: () => {},
|
|
8
|
+
error: () => {},
|
|
9
|
+
warn: () => {},
|
|
10
|
+
debug: () => {},
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Creates a mock fetch that returns pre-configured responses based on URL patterns.
|
|
15
|
+
*/
|
|
16
|
+
function createMockFetch(
|
|
17
|
+
handlers: Array<{
|
|
18
|
+
pattern: string | RegExp;
|
|
19
|
+
response: unknown;
|
|
20
|
+
status?: number;
|
|
21
|
+
headers?: Record<string, string>;
|
|
22
|
+
}>,
|
|
23
|
+
): FetchFn {
|
|
24
|
+
return async (input: RequestInfo | URL) => {
|
|
25
|
+
const url = typeof input === "string" ? input : input.toString();
|
|
26
|
+
|
|
27
|
+
for (const handler of handlers) {
|
|
28
|
+
const matches =
|
|
29
|
+
typeof handler.pattern === "string"
|
|
30
|
+
? url.includes(handler.pattern)
|
|
31
|
+
: handler.pattern.test(url);
|
|
32
|
+
|
|
33
|
+
if (matches) {
|
|
34
|
+
return new Response(JSON.stringify(handler.response), {
|
|
35
|
+
status: handler.status ?? 200,
|
|
36
|
+
headers: {
|
|
37
|
+
"Content-Type": "application/json",
|
|
38
|
+
...handler.headers,
|
|
39
|
+
},
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return new Response("Not Found", { status: 404 });
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const BASE_OPTIONS: Omit<ScraperOptions, "fetch"> = {
|
|
49
|
+
target: "my-org",
|
|
50
|
+
pathPattern: ".checkstack/**/*.yaml",
|
|
51
|
+
authToken: "ghp_test_token",
|
|
52
|
+
logger: mockLogger,
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
describe("githubScraper", () => {
|
|
56
|
+
it("discovers files from a single repo target", async () => {
|
|
57
|
+
const mockFetch = createMockFetch([
|
|
58
|
+
{
|
|
59
|
+
pattern: /repos\/my-org\/my-repo\/git\/trees/,
|
|
60
|
+
response: {
|
|
61
|
+
sha: "abc",
|
|
62
|
+
tree: [
|
|
63
|
+
{ path: ".checkstack/systems.yaml", type: "blob" },
|
|
64
|
+
{ path: "README.md", type: "blob" },
|
|
65
|
+
{ path: "src", type: "tree" },
|
|
66
|
+
],
|
|
67
|
+
truncated: false,
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
pattern: /repos\/my-org\/my-repo\/contents\//,
|
|
72
|
+
response: {
|
|
73
|
+
content: btoa("apiVersion: checkstack.io/v1alpha1"),
|
|
74
|
+
encoding: "base64",
|
|
75
|
+
},
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
pattern: /repos\/my-org\/my-repo$/,
|
|
79
|
+
response: { full_name: "my-org/my-repo", default_branch: "main" },
|
|
80
|
+
},
|
|
81
|
+
]);
|
|
82
|
+
|
|
83
|
+
const files = await githubScraper.discoverFiles({
|
|
84
|
+
...BASE_OPTIONS,
|
|
85
|
+
target: "my-org/my-repo",
|
|
86
|
+
fetch: mockFetch,
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
expect(files).toHaveLength(1);
|
|
90
|
+
expect(files[0].repository).toBe("my-org/my-repo");
|
|
91
|
+
expect(files[0].filePath).toBe(".checkstack/systems.yaml");
|
|
92
|
+
expect(files[0].content).toBe("apiVersion: checkstack.io/v1alpha1");
|
|
93
|
+
expect(files[0].branch).toBe("main");
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it("enumerates repos from an org target", async () => {
|
|
97
|
+
const mockFetch = createMockFetch([
|
|
98
|
+
{
|
|
99
|
+
pattern: "orgs/my-org/repos",
|
|
100
|
+
response: [
|
|
101
|
+
{ full_name: "my-org/repo-a", default_branch: "main" },
|
|
102
|
+
{ full_name: "my-org/repo-b", default_branch: "develop" },
|
|
103
|
+
],
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
pattern: "my-org/repo-a/git/trees",
|
|
107
|
+
response: {
|
|
108
|
+
sha: "abc",
|
|
109
|
+
tree: [{ path: ".checkstack/sys.yaml", type: "blob" }],
|
|
110
|
+
truncated: false,
|
|
111
|
+
},
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
pattern: "my-org/repo-b/git/trees",
|
|
115
|
+
response: { sha: "def", tree: [], truncated: false },
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
pattern: "repo-a/contents",
|
|
119
|
+
response: { content: btoa("yaml-content"), encoding: "base64" },
|
|
120
|
+
},
|
|
121
|
+
]);
|
|
122
|
+
|
|
123
|
+
const files = await githubScraper.discoverFiles({
|
|
124
|
+
...BASE_OPTIONS,
|
|
125
|
+
fetch: mockFetch,
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
expect(files).toHaveLength(1);
|
|
129
|
+
expect(files[0].repository).toBe("my-org/repo-a");
|
|
130
|
+
expect(files[0].branch).toBe("main");
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it("falls back to user endpoint when org returns 404", async () => {
|
|
134
|
+
let userEndpointCalled = false;
|
|
135
|
+
|
|
136
|
+
const mockFetch = createMockFetch([
|
|
137
|
+
{
|
|
138
|
+
pattern: "orgs/my-user/repos",
|
|
139
|
+
response: { message: "Not Found" },
|
|
140
|
+
status: 404,
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
pattern: "users/my-user/repos",
|
|
144
|
+
response: [
|
|
145
|
+
{ full_name: "my-user/personal-repo", default_branch: "main" },
|
|
146
|
+
],
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
pattern: "personal-repo/git/trees",
|
|
150
|
+
response: { sha: "abc", tree: [], truncated: false },
|
|
151
|
+
},
|
|
152
|
+
]);
|
|
153
|
+
|
|
154
|
+
// Wrap to detect user endpoint call
|
|
155
|
+
const wrappedFetch: FetchFn = async (input, init) => {
|
|
156
|
+
const url = typeof input === "string" ? input : input.toString();
|
|
157
|
+
if (url.includes("users/my-user")) userEndpointCalled = true;
|
|
158
|
+
return mockFetch(input, init);
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
await githubScraper.discoverFiles({
|
|
162
|
+
...BASE_OPTIONS,
|
|
163
|
+
target: "my-user",
|
|
164
|
+
fetch: wrappedFetch,
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
expect(userEndpointCalled).toBe(true);
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
it("filters files using minimatch pattern", async () => {
|
|
171
|
+
const mockFetch = createMockFetch([
|
|
172
|
+
{
|
|
173
|
+
pattern: /repos\/my-org\/repo\/git\/trees/,
|
|
174
|
+
response: {
|
|
175
|
+
sha: "abc",
|
|
176
|
+
tree: [
|
|
177
|
+
{ path: ".checkstack/systems.yaml", type: "blob" },
|
|
178
|
+
{ path: ".checkstack/deep/nested.yaml", type: "blob" },
|
|
179
|
+
{ path: "other/file.yaml", type: "blob" },
|
|
180
|
+
{ path: ".checkstack/readme.md", type: "blob" },
|
|
181
|
+
],
|
|
182
|
+
truncated: false,
|
|
183
|
+
},
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
pattern: /repos\/my-org\/repo\/contents\//,
|
|
187
|
+
response: { content: btoa("content"), encoding: "base64" },
|
|
188
|
+
},
|
|
189
|
+
{
|
|
190
|
+
pattern: /repos\/my-org\/repo$/,
|
|
191
|
+
response: { full_name: "my-org/repo", default_branch: "main" },
|
|
192
|
+
},
|
|
193
|
+
]);
|
|
194
|
+
|
|
195
|
+
const files = await githubScraper.discoverFiles({
|
|
196
|
+
...BASE_OPTIONS,
|
|
197
|
+
target: "my-org/repo",
|
|
198
|
+
fetch: mockFetch,
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
// Should match .checkstack/**/*.yaml only
|
|
202
|
+
expect(files).toHaveLength(2);
|
|
203
|
+
expect(files[0].filePath).toBe(".checkstack/systems.yaml");
|
|
204
|
+
expect(files[1].filePath).toBe(".checkstack/deep/nested.yaml");
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it("handles pagination via Link header", async () => {
|
|
208
|
+
const mockFetch: FetchFn = async (input) => {
|
|
209
|
+
const url = typeof input === "string" ? input : input.toString();
|
|
210
|
+
|
|
211
|
+
if (url.includes("orgs/big-org/repos") && !url.includes("page=2")) {
|
|
212
|
+
return new Response(
|
|
213
|
+
JSON.stringify([
|
|
214
|
+
{ full_name: "big-org/repo-1", default_branch: "main" },
|
|
215
|
+
]),
|
|
216
|
+
{
|
|
217
|
+
headers: {
|
|
218
|
+
"Content-Type": "application/json",
|
|
219
|
+
Link: '<https://api.github.com/orgs/big-org/repos?per_page=100&page=2>; rel="next"',
|
|
220
|
+
},
|
|
221
|
+
},
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (url.includes("page=2")) {
|
|
226
|
+
return new Response(
|
|
227
|
+
JSON.stringify([
|
|
228
|
+
{ full_name: "big-org/repo-2", default_branch: "main" },
|
|
229
|
+
]),
|
|
230
|
+
{ headers: { "Content-Type": "application/json" } },
|
|
231
|
+
);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if (url.includes("git/trees")) {
|
|
235
|
+
return new Response(
|
|
236
|
+
JSON.stringify({ sha: "abc", tree: [], truncated: false }),
|
|
237
|
+
{ headers: { "Content-Type": "application/json" } },
|
|
238
|
+
);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return new Response("Not Found", { status: 404 });
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
const files = await githubScraper.discoverFiles({
|
|
245
|
+
...BASE_OPTIONS,
|
|
246
|
+
target: "big-org",
|
|
247
|
+
fetch: mockFetch,
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
// Both repos processed but no matching files
|
|
251
|
+
expect(files).toHaveLength(0);
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
it("continues on individual file fetch errors", async () => {
|
|
255
|
+
const mockFetch: FetchFn = async (input) => {
|
|
256
|
+
const url = typeof input === "string" ? input : input.toString();
|
|
257
|
+
|
|
258
|
+
// Content requests
|
|
259
|
+
if (url.includes("contents/") && url.includes("bad.yaml")) {
|
|
260
|
+
return new Response("Internal Server Error", { status: 500 });
|
|
261
|
+
}
|
|
262
|
+
if (url.includes("contents/") && url.includes("good.yaml")) {
|
|
263
|
+
return new Response(
|
|
264
|
+
JSON.stringify({ content: btoa("good"), encoding: "base64" }),
|
|
265
|
+
{ headers: { "Content-Type": "application/json" } },
|
|
266
|
+
);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Tree request
|
|
270
|
+
if (url.includes("git/trees")) {
|
|
271
|
+
return new Response(
|
|
272
|
+
JSON.stringify({
|
|
273
|
+
sha: "abc",
|
|
274
|
+
tree: [
|
|
275
|
+
{ path: ".checkstack/good.yaml", type: "blob" },
|
|
276
|
+
{ path: ".checkstack/bad.yaml", type: "blob" },
|
|
277
|
+
],
|
|
278
|
+
truncated: false,
|
|
279
|
+
}),
|
|
280
|
+
{ headers: { "Content-Type": "application/json" } },
|
|
281
|
+
);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// Repo metadata (must be last — matches broadly)
|
|
285
|
+
if (url.includes("repos/my-org/repo")) {
|
|
286
|
+
return new Response(
|
|
287
|
+
JSON.stringify({ full_name: "my-org/repo", default_branch: "main" }),
|
|
288
|
+
{ headers: { "Content-Type": "application/json" } },
|
|
289
|
+
);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
return new Response("Not Found", { status: 404 });
|
|
293
|
+
};
|
|
294
|
+
|
|
295
|
+
const files = await githubScraper.discoverFiles({
|
|
296
|
+
...BASE_OPTIONS,
|
|
297
|
+
target: "my-org/repo",
|
|
298
|
+
fetch: mockFetch,
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
// Only the good file should be returned
|
|
302
|
+
expect(files).toHaveLength(1);
|
|
303
|
+
expect(files[0].filePath).toBe(".checkstack/good.yaml");
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
it("uses custom baseUrl for enterprise installations", async () => {
|
|
307
|
+
const enterpriseUrl = "https://github.acme.corp/api/v3";
|
|
308
|
+
const requestedUrls: string[] = [];
|
|
309
|
+
|
|
310
|
+
const mockFetch: FetchFn = async (input) => {
|
|
311
|
+
const url = typeof input === "string" ? input : input.toString();
|
|
312
|
+
requestedUrls.push(url);
|
|
313
|
+
|
|
314
|
+
if (url.includes("git/trees")) {
|
|
315
|
+
return new Response(
|
|
316
|
+
JSON.stringify({
|
|
317
|
+
sha: "abc",
|
|
318
|
+
tree: [{ path: ".checkstack/sys.yaml", type: "blob" }],
|
|
319
|
+
truncated: false,
|
|
320
|
+
}),
|
|
321
|
+
{ headers: { "Content-Type": "application/json" } },
|
|
322
|
+
);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
if (url.includes("contents/")) {
|
|
326
|
+
return new Response(
|
|
327
|
+
JSON.stringify({ content: btoa("yaml"), encoding: "base64" }),
|
|
328
|
+
{ headers: { "Content-Type": "application/json" } },
|
|
329
|
+
);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
if (url.includes("repos/acme/infra")) {
|
|
333
|
+
return new Response(
|
|
334
|
+
JSON.stringify({ full_name: "acme/infra", default_branch: "main" }),
|
|
335
|
+
{ headers: { "Content-Type": "application/json" } },
|
|
336
|
+
);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
return new Response("Not Found", { status: 404 });
|
|
340
|
+
};
|
|
341
|
+
|
|
342
|
+
const files = await githubScraper.discoverFiles({
|
|
343
|
+
...BASE_OPTIONS,
|
|
344
|
+
target: "acme/infra",
|
|
345
|
+
baseUrl: enterpriseUrl,
|
|
346
|
+
fetch: mockFetch,
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
expect(files).toHaveLength(1);
|
|
350
|
+
// All requests should use the enterprise URL, not api.github.com
|
|
351
|
+
for (const url of requestedUrls) {
|
|
352
|
+
expect(url).toStartWith(enterpriseUrl);
|
|
353
|
+
}
|
|
354
|
+
});
|
|
355
|
+
});
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
import { minimatch } from "minimatch";
|
|
2
|
+
import type { DiscoveredFile, ScraperOptions, Scraper, FetchFn } from "./types";
|
|
3
|
+
|
|
4
|
+
const DEFAULT_GITHUB_API_URL = "https://api.github.com";
|
|
5
|
+
|
|
6
|
+
// ─── GitHub API Types ──────────────────────────────────────────────────────
|
|
7
|
+
|
|
8
|
+
interface GitHubRepo {
|
|
9
|
+
full_name: string;
|
|
10
|
+
default_branch: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
interface GitHubTreeItem {
|
|
14
|
+
path: string;
|
|
15
|
+
type: "blob" | "tree";
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
interface GitHubTreeResponse {
|
|
19
|
+
sha: string;
|
|
20
|
+
tree: GitHubTreeItem[];
|
|
21
|
+
truncated: boolean;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
interface GitHubContentResponse {
|
|
25
|
+
content: string;
|
|
26
|
+
encoding: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// ─── Helpers ───────────────────────────────────────────────────────────────
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Parses the GitHub `Link` header for pagination.
|
|
33
|
+
* Returns the URL for the next page, or undefined if there is none.
|
|
34
|
+
*/
|
|
35
|
+
function parseNextPageUrl(linkHeader: string | null): string | undefined {
|
|
36
|
+
if (!linkHeader) return undefined;
|
|
37
|
+
const match = /<([^>]+)>;\s*rel="next"/.exec(linkHeader);
|
|
38
|
+
return match?.[1];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Makes an authenticated request to the GitHub API.
|
|
43
|
+
*/
|
|
44
|
+
async function githubFetch(params: {
|
|
45
|
+
url: string;
|
|
46
|
+
authToken: string;
|
|
47
|
+
fetchFn: FetchFn;
|
|
48
|
+
}): Promise<Response> {
|
|
49
|
+
const { url, authToken, fetchFn } = params;
|
|
50
|
+
return fetchFn(url, {
|
|
51
|
+
headers: {
|
|
52
|
+
Authorization: `Bearer ${authToken}`,
|
|
53
|
+
Accept: "application/vnd.github+json",
|
|
54
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
55
|
+
},
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ─── Core Logic ────────────────────────────────────────────────────────────
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Enumerates repositories for an org or user target.
|
|
63
|
+
* Supports pagination via Link header.
|
|
64
|
+
*/
|
|
65
|
+
async function enumerateRepos(params: {
|
|
66
|
+
target: string;
|
|
67
|
+
authToken: string;
|
|
68
|
+
fetchFn: FetchFn;
|
|
69
|
+
apiUrl: string;
|
|
70
|
+
}): Promise<GitHubRepo[]> {
|
|
71
|
+
const { target, authToken, fetchFn, apiUrl } = params;
|
|
72
|
+
const repos: GitHubRepo[] = [];
|
|
73
|
+
|
|
74
|
+
// Try org endpoint first, fall back to user endpoint
|
|
75
|
+
let url: string | undefined =
|
|
76
|
+
`${apiUrl}/orgs/${encodeURIComponent(target)}/repos?per_page=100`;
|
|
77
|
+
|
|
78
|
+
const orgResponse = await githubFetch({ url, authToken, fetchFn });
|
|
79
|
+
|
|
80
|
+
if (orgResponse.status === 404) {
|
|
81
|
+
// Fall back to user endpoint
|
|
82
|
+
url = `${apiUrl}/users/${encodeURIComponent(target)}/repos?per_page=100`;
|
|
83
|
+
} else if (orgResponse.ok) {
|
|
84
|
+
const data = (await orgResponse.json()) as GitHubRepo[];
|
|
85
|
+
repos.push(...data);
|
|
86
|
+
url = parseNextPageUrl(orgResponse.headers.get("Link"));
|
|
87
|
+
} else {
|
|
88
|
+
throw new Error(
|
|
89
|
+
`GitHub API error listing org repos: ${orgResponse.status} ${orgResponse.statusText}`,
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Paginate through remaining pages (or user repos if org 404'd)
|
|
94
|
+
while (url) {
|
|
95
|
+
const response = await githubFetch({ url, authToken, fetchFn });
|
|
96
|
+
if (!response.ok) {
|
|
97
|
+
throw new Error(
|
|
98
|
+
`GitHub API error listing repos: ${response.status} ${response.statusText}`,
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
const data = (await response.json()) as GitHubRepo[];
|
|
102
|
+
repos.push(...data);
|
|
103
|
+
url = parseNextPageUrl(response.headers.get("Link"));
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return repos;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Gets the file tree for a repository using the Git Trees API.
|
|
111
|
+
* Returns only blob (file) paths matching the path pattern.
|
|
112
|
+
*/
|
|
113
|
+
async function getMatchingFiles(params: {
|
|
114
|
+
repo: GitHubRepo;
|
|
115
|
+
pathPattern: string;
|
|
116
|
+
authToken: string;
|
|
117
|
+
fetchFn: FetchFn;
|
|
118
|
+
apiUrl: string;
|
|
119
|
+
}): Promise<string[]> {
|
|
120
|
+
const { repo, pathPattern, authToken, fetchFn, apiUrl } = params;
|
|
121
|
+
|
|
122
|
+
const url = `${apiUrl}/repos/${repo.full_name}/git/trees/${encodeURIComponent(repo.default_branch)}?recursive=1`;
|
|
123
|
+
const response = await githubFetch({ url, authToken, fetchFn });
|
|
124
|
+
|
|
125
|
+
if (!response.ok) {
|
|
126
|
+
throw new Error(
|
|
127
|
+
`GitHub API error getting tree for ${repo.full_name}: ${response.status} ${response.statusText}`,
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const tree = (await response.json()) as GitHubTreeResponse;
|
|
132
|
+
|
|
133
|
+
return tree.tree
|
|
134
|
+
.filter((item) => item.type === "blob")
|
|
135
|
+
.map((item) => item.path)
|
|
136
|
+
.filter((path) => minimatch(path, pathPattern));
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Fetches the content of a single file from a repository.
|
|
141
|
+
*/
|
|
142
|
+
async function fetchFileContent(params: {
|
|
143
|
+
repoFullName: string;
|
|
144
|
+
filePath: string;
|
|
145
|
+
branch: string;
|
|
146
|
+
authToken: string;
|
|
147
|
+
fetchFn: FetchFn;
|
|
148
|
+
apiUrl: string;
|
|
149
|
+
}): Promise<string> {
|
|
150
|
+
const { repoFullName, filePath, branch, authToken, fetchFn, apiUrl } = params;
|
|
151
|
+
|
|
152
|
+
const url = `${apiUrl}/repos/${repoFullName}/contents/${encodeURIComponent(filePath)}?ref=${encodeURIComponent(branch)}`;
|
|
153
|
+
const response = await githubFetch({ url, authToken, fetchFn });
|
|
154
|
+
|
|
155
|
+
if (!response.ok) {
|
|
156
|
+
throw new Error(
|
|
157
|
+
`GitHub API error fetching ${filePath} from ${repoFullName}: ${response.status} ${response.statusText}`,
|
|
158
|
+
);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const data = (await response.json()) as GitHubContentResponse;
|
|
162
|
+
|
|
163
|
+
if (data.encoding === "base64") {
|
|
164
|
+
return atob(data.content.replaceAll("\n", ""));
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return data.content;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// ─── Scraper ───────────────────────────────────────────────────────────────
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* GitHub scraper implementation.
|
|
174
|
+
*
|
|
175
|
+
* Supports:
|
|
176
|
+
* - Org/user target: enumerates all repos with pagination
|
|
177
|
+
* - Single repo target: `owner/repo` format
|
|
178
|
+
* - Default branch resolution per-repo
|
|
179
|
+
* - Recursive tree walking with minimatch filtering
|
|
180
|
+
* - Custom base URL for GitHub Enterprise
|
|
181
|
+
*/
|
|
182
|
+
export const githubScraper: Scraper = {
|
|
183
|
+
async discoverFiles(options: ScraperOptions): Promise<DiscoveredFile[]> {
|
|
184
|
+
const {
|
|
185
|
+
target,
|
|
186
|
+
pathPattern,
|
|
187
|
+
authToken,
|
|
188
|
+
baseUrl,
|
|
189
|
+
logger,
|
|
190
|
+
fetch: fetchFn = globalThis.fetch,
|
|
191
|
+
} = options;
|
|
192
|
+
|
|
193
|
+
const apiUrl = baseUrl ?? DEFAULT_GITHUB_API_URL;
|
|
194
|
+
const isSingleRepo = target.includes("/");
|
|
195
|
+
const files: DiscoveredFile[] = [];
|
|
196
|
+
|
|
197
|
+
let repos: GitHubRepo[];
|
|
198
|
+
|
|
199
|
+
if (isSingleRepo) {
|
|
200
|
+
// Single repo mode: fetch repo metadata directly
|
|
201
|
+
const url = `${apiUrl}/repos/${target}`;
|
|
202
|
+
const response = await githubFetch({ url, authToken, fetchFn });
|
|
203
|
+
if (!response.ok) {
|
|
204
|
+
throw new Error(
|
|
205
|
+
`GitHub API error fetching repo ${target}: ${response.status} ${response.statusText}`,
|
|
206
|
+
);
|
|
207
|
+
}
|
|
208
|
+
repos = [(await response.json()) as GitHubRepo];
|
|
209
|
+
} else {
|
|
210
|
+
repos = await enumerateRepos({ target, authToken, fetchFn, apiUrl });
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
logger.debug(
|
|
214
|
+
`GitHub scraper: found ${repos.length} repo(s) for target "${target}"`,
|
|
215
|
+
);
|
|
216
|
+
|
|
217
|
+
for (const repo of repos) {
|
|
218
|
+
try {
|
|
219
|
+
const matchingPaths = await getMatchingFiles({
|
|
220
|
+
repo,
|
|
221
|
+
pathPattern,
|
|
222
|
+
authToken,
|
|
223
|
+
fetchFn,
|
|
224
|
+
apiUrl,
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
logger.debug(
|
|
228
|
+
`GitHub scraper: ${matchingPaths.length} matching file(s) in ${repo.full_name}`,
|
|
229
|
+
);
|
|
230
|
+
|
|
231
|
+
for (const filePath of matchingPaths) {
|
|
232
|
+
try {
|
|
233
|
+
const content = await fetchFileContent({
|
|
234
|
+
repoFullName: repo.full_name,
|
|
235
|
+
filePath,
|
|
236
|
+
branch: repo.default_branch,
|
|
237
|
+
authToken,
|
|
238
|
+
fetchFn,
|
|
239
|
+
apiUrl,
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
files.push({
|
|
243
|
+
repository: repo.full_name,
|
|
244
|
+
filePath,
|
|
245
|
+
content,
|
|
246
|
+
branch: repo.default_branch,
|
|
247
|
+
});
|
|
248
|
+
} catch (error) {
|
|
249
|
+
logger.error(
|
|
250
|
+
`GitHub scraper: failed to fetch ${filePath} from ${repo.full_name}: ${error}`,
|
|
251
|
+
);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
} catch (error) {
|
|
255
|
+
logger.error(
|
|
256
|
+
`GitHub scraper: failed to process repo ${repo.full_name}: ${error}`,
|
|
257
|
+
);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return files;
|
|
262
|
+
},
|
|
263
|
+
};
|