@checkstack/gitops-backend 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,355 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import { githubScraper } from "./github-scraper";
3
+ import type { ScraperOptions, FetchFn } from "./types";
4
+ import type { Logger } from "@checkstack/backend-api";
5
+
6
+ const mockLogger: Logger = {
7
+ info: () => {},
8
+ error: () => {},
9
+ warn: () => {},
10
+ debug: () => {},
11
+ };
12
+
13
+ /**
14
+ * Creates a mock fetch that returns pre-configured responses based on URL patterns.
15
+ */
16
+ function createMockFetch(
17
+ handlers: Array<{
18
+ pattern: string | RegExp;
19
+ response: unknown;
20
+ status?: number;
21
+ headers?: Record<string, string>;
22
+ }>,
23
+ ): FetchFn {
24
+ return async (input: RequestInfo | URL) => {
25
+ const url = typeof input === "string" ? input : input.toString();
26
+
27
+ for (const handler of handlers) {
28
+ const matches =
29
+ typeof handler.pattern === "string"
30
+ ? url.includes(handler.pattern)
31
+ : handler.pattern.test(url);
32
+
33
+ if (matches) {
34
+ return new Response(JSON.stringify(handler.response), {
35
+ status: handler.status ?? 200,
36
+ headers: {
37
+ "Content-Type": "application/json",
38
+ ...handler.headers,
39
+ },
40
+ });
41
+ }
42
+ }
43
+
44
+ return new Response("Not Found", { status: 404 });
45
+ };
46
+ }
47
+
48
+ const BASE_OPTIONS: Omit<ScraperOptions, "fetch"> = {
49
+ target: "my-org",
50
+ pathPattern: ".checkstack/**/*.yaml",
51
+ authToken: "ghp_test_token",
52
+ logger: mockLogger,
53
+ };
54
+
55
+ describe("githubScraper", () => {
56
+ it("discovers files from a single repo target", async () => {
57
+ const mockFetch = createMockFetch([
58
+ {
59
+ pattern: /repos\/my-org\/my-repo\/git\/trees/,
60
+ response: {
61
+ sha: "abc",
62
+ tree: [
63
+ { path: ".checkstack/systems.yaml", type: "blob" },
64
+ { path: "README.md", type: "blob" },
65
+ { path: "src", type: "tree" },
66
+ ],
67
+ truncated: false,
68
+ },
69
+ },
70
+ {
71
+ pattern: /repos\/my-org\/my-repo\/contents\//,
72
+ response: {
73
+ content: btoa("apiVersion: checkstack.io/v1alpha1"),
74
+ encoding: "base64",
75
+ },
76
+ },
77
+ {
78
+ pattern: /repos\/my-org\/my-repo$/,
79
+ response: { full_name: "my-org/my-repo", default_branch: "main" },
80
+ },
81
+ ]);
82
+
83
+ const files = await githubScraper.discoverFiles({
84
+ ...BASE_OPTIONS,
85
+ target: "my-org/my-repo",
86
+ fetch: mockFetch,
87
+ });
88
+
89
+ expect(files).toHaveLength(1);
90
+ expect(files[0].repository).toBe("my-org/my-repo");
91
+ expect(files[0].filePath).toBe(".checkstack/systems.yaml");
92
+ expect(files[0].content).toBe("apiVersion: checkstack.io/v1alpha1");
93
+ expect(files[0].branch).toBe("main");
94
+ });
95
+
96
+ it("enumerates repos from an org target", async () => {
97
+ const mockFetch = createMockFetch([
98
+ {
99
+ pattern: "orgs/my-org/repos",
100
+ response: [
101
+ { full_name: "my-org/repo-a", default_branch: "main" },
102
+ { full_name: "my-org/repo-b", default_branch: "develop" },
103
+ ],
104
+ },
105
+ {
106
+ pattern: "my-org/repo-a/git/trees",
107
+ response: {
108
+ sha: "abc",
109
+ tree: [{ path: ".checkstack/sys.yaml", type: "blob" }],
110
+ truncated: false,
111
+ },
112
+ },
113
+ {
114
+ pattern: "my-org/repo-b/git/trees",
115
+ response: { sha: "def", tree: [], truncated: false },
116
+ },
117
+ {
118
+ pattern: "repo-a/contents",
119
+ response: { content: btoa("yaml-content"), encoding: "base64" },
120
+ },
121
+ ]);
122
+
123
+ const files = await githubScraper.discoverFiles({
124
+ ...BASE_OPTIONS,
125
+ fetch: mockFetch,
126
+ });
127
+
128
+ expect(files).toHaveLength(1);
129
+ expect(files[0].repository).toBe("my-org/repo-a");
130
+ expect(files[0].branch).toBe("main");
131
+ });
132
+
133
+ it("falls back to user endpoint when org returns 404", async () => {
134
+ let userEndpointCalled = false;
135
+
136
+ const mockFetch = createMockFetch([
137
+ {
138
+ pattern: "orgs/my-user/repos",
139
+ response: { message: "Not Found" },
140
+ status: 404,
141
+ },
142
+ {
143
+ pattern: "users/my-user/repos",
144
+ response: [
145
+ { full_name: "my-user/personal-repo", default_branch: "main" },
146
+ ],
147
+ },
148
+ {
149
+ pattern: "personal-repo/git/trees",
150
+ response: { sha: "abc", tree: [], truncated: false },
151
+ },
152
+ ]);
153
+
154
+ // Wrap to detect user endpoint call
155
+ const wrappedFetch: FetchFn = async (input, init) => {
156
+ const url = typeof input === "string" ? input : input.toString();
157
+ if (url.includes("users/my-user")) userEndpointCalled = true;
158
+ return mockFetch(input, init);
159
+ };
160
+
161
+ await githubScraper.discoverFiles({
162
+ ...BASE_OPTIONS,
163
+ target: "my-user",
164
+ fetch: wrappedFetch,
165
+ });
166
+
167
+ expect(userEndpointCalled).toBe(true);
168
+ });
169
+
170
+ it("filters files using minimatch pattern", async () => {
171
+ const mockFetch = createMockFetch([
172
+ {
173
+ pattern: /repos\/my-org\/repo\/git\/trees/,
174
+ response: {
175
+ sha: "abc",
176
+ tree: [
177
+ { path: ".checkstack/systems.yaml", type: "blob" },
178
+ { path: ".checkstack/deep/nested.yaml", type: "blob" },
179
+ { path: "other/file.yaml", type: "blob" },
180
+ { path: ".checkstack/readme.md", type: "blob" },
181
+ ],
182
+ truncated: false,
183
+ },
184
+ },
185
+ {
186
+ pattern: /repos\/my-org\/repo\/contents\//,
187
+ response: { content: btoa("content"), encoding: "base64" },
188
+ },
189
+ {
190
+ pattern: /repos\/my-org\/repo$/,
191
+ response: { full_name: "my-org/repo", default_branch: "main" },
192
+ },
193
+ ]);
194
+
195
+ const files = await githubScraper.discoverFiles({
196
+ ...BASE_OPTIONS,
197
+ target: "my-org/repo",
198
+ fetch: mockFetch,
199
+ });
200
+
201
+ // Should match .checkstack/**/*.yaml only
202
+ expect(files).toHaveLength(2);
203
+ expect(files[0].filePath).toBe(".checkstack/systems.yaml");
204
+ expect(files[1].filePath).toBe(".checkstack/deep/nested.yaml");
205
+ });
206
+
207
+ it("handles pagination via Link header", async () => {
208
+ const mockFetch: FetchFn = async (input) => {
209
+ const url = typeof input === "string" ? input : input.toString();
210
+
211
+ if (url.includes("orgs/big-org/repos") && !url.includes("page=2")) {
212
+ return new Response(
213
+ JSON.stringify([
214
+ { full_name: "big-org/repo-1", default_branch: "main" },
215
+ ]),
216
+ {
217
+ headers: {
218
+ "Content-Type": "application/json",
219
+ Link: '<https://api.github.com/orgs/big-org/repos?per_page=100&page=2>; rel="next"',
220
+ },
221
+ },
222
+ );
223
+ }
224
+
225
+ if (url.includes("page=2")) {
226
+ return new Response(
227
+ JSON.stringify([
228
+ { full_name: "big-org/repo-2", default_branch: "main" },
229
+ ]),
230
+ { headers: { "Content-Type": "application/json" } },
231
+ );
232
+ }
233
+
234
+ if (url.includes("git/trees")) {
235
+ return new Response(
236
+ JSON.stringify({ sha: "abc", tree: [], truncated: false }),
237
+ { headers: { "Content-Type": "application/json" } },
238
+ );
239
+ }
240
+
241
+ return new Response("Not Found", { status: 404 });
242
+ };
243
+
244
+ const files = await githubScraper.discoverFiles({
245
+ ...BASE_OPTIONS,
246
+ target: "big-org",
247
+ fetch: mockFetch,
248
+ });
249
+
250
+ // Both repos processed but no matching files
251
+ expect(files).toHaveLength(0);
252
+ });
253
+
254
+ it("continues on individual file fetch errors", async () => {
255
+ const mockFetch: FetchFn = async (input) => {
256
+ const url = typeof input === "string" ? input : input.toString();
257
+
258
+ // Content requests
259
+ if (url.includes("contents/") && url.includes("bad.yaml")) {
260
+ return new Response("Internal Server Error", { status: 500 });
261
+ }
262
+ if (url.includes("contents/") && url.includes("good.yaml")) {
263
+ return new Response(
264
+ JSON.stringify({ content: btoa("good"), encoding: "base64" }),
265
+ { headers: { "Content-Type": "application/json" } },
266
+ );
267
+ }
268
+
269
+ // Tree request
270
+ if (url.includes("git/trees")) {
271
+ return new Response(
272
+ JSON.stringify({
273
+ sha: "abc",
274
+ tree: [
275
+ { path: ".checkstack/good.yaml", type: "blob" },
276
+ { path: ".checkstack/bad.yaml", type: "blob" },
277
+ ],
278
+ truncated: false,
279
+ }),
280
+ { headers: { "Content-Type": "application/json" } },
281
+ );
282
+ }
283
+
284
+ // Repo metadata (must be last — matches broadly)
285
+ if (url.includes("repos/my-org/repo")) {
286
+ return new Response(
287
+ JSON.stringify({ full_name: "my-org/repo", default_branch: "main" }),
288
+ { headers: { "Content-Type": "application/json" } },
289
+ );
290
+ }
291
+
292
+ return new Response("Not Found", { status: 404 });
293
+ };
294
+
295
+ const files = await githubScraper.discoverFiles({
296
+ ...BASE_OPTIONS,
297
+ target: "my-org/repo",
298
+ fetch: mockFetch,
299
+ });
300
+
301
+ // Only the good file should be returned
302
+ expect(files).toHaveLength(1);
303
+ expect(files[0].filePath).toBe(".checkstack/good.yaml");
304
+ });
305
+
306
+ it("uses custom baseUrl for enterprise installations", async () => {
307
+ const enterpriseUrl = "https://github.acme.corp/api/v3";
308
+ const requestedUrls: string[] = [];
309
+
310
+ const mockFetch: FetchFn = async (input) => {
311
+ const url = typeof input === "string" ? input : input.toString();
312
+ requestedUrls.push(url);
313
+
314
+ if (url.includes("git/trees")) {
315
+ return new Response(
316
+ JSON.stringify({
317
+ sha: "abc",
318
+ tree: [{ path: ".checkstack/sys.yaml", type: "blob" }],
319
+ truncated: false,
320
+ }),
321
+ { headers: { "Content-Type": "application/json" } },
322
+ );
323
+ }
324
+
325
+ if (url.includes("contents/")) {
326
+ return new Response(
327
+ JSON.stringify({ content: btoa("yaml"), encoding: "base64" }),
328
+ { headers: { "Content-Type": "application/json" } },
329
+ );
330
+ }
331
+
332
+ if (url.includes("repos/acme/infra")) {
333
+ return new Response(
334
+ JSON.stringify({ full_name: "acme/infra", default_branch: "main" }),
335
+ { headers: { "Content-Type": "application/json" } },
336
+ );
337
+ }
338
+
339
+ return new Response("Not Found", { status: 404 });
340
+ };
341
+
342
+ const files = await githubScraper.discoverFiles({
343
+ ...BASE_OPTIONS,
344
+ target: "acme/infra",
345
+ baseUrl: enterpriseUrl,
346
+ fetch: mockFetch,
347
+ });
348
+
349
+ expect(files).toHaveLength(1);
350
+ // All requests should use the enterprise URL, not api.github.com
351
+ for (const url of requestedUrls) {
352
+ expect(url).toStartWith(enterpriseUrl);
353
+ }
354
+ });
355
+ });
@@ -0,0 +1,263 @@
1
+ import { minimatch } from "minimatch";
2
+ import type { DiscoveredFile, ScraperOptions, Scraper, FetchFn } from "./types";
3
+
4
+ const DEFAULT_GITHUB_API_URL = "https://api.github.com";
5
+
6
+ // ─── GitHub API Types ──────────────────────────────────────────────────────
7
+
8
+ interface GitHubRepo {
9
+ full_name: string;
10
+ default_branch: string;
11
+ }
12
+
13
+ interface GitHubTreeItem {
14
+ path: string;
15
+ type: "blob" | "tree";
16
+ }
17
+
18
+ interface GitHubTreeResponse {
19
+ sha: string;
20
+ tree: GitHubTreeItem[];
21
+ truncated: boolean;
22
+ }
23
+
24
+ interface GitHubContentResponse {
25
+ content: string;
26
+ encoding: string;
27
+ }
28
+
29
+ // ─── Helpers ───────────────────────────────────────────────────────────────
30
+
31
+ /**
32
+ * Parses the GitHub `Link` header for pagination.
33
+ * Returns the URL for the next page, or undefined if there is none.
34
+ */
35
+ function parseNextPageUrl(linkHeader: string | null): string | undefined {
36
+ if (!linkHeader) return undefined;
37
+ const match = /<([^>]+)>;\s*rel="next"/.exec(linkHeader);
38
+ return match?.[1];
39
+ }
40
+
41
+ /**
42
+ * Makes an authenticated request to the GitHub API.
43
+ */
44
+ async function githubFetch(params: {
45
+ url: string;
46
+ authToken: string;
47
+ fetchFn: FetchFn;
48
+ }): Promise<Response> {
49
+ const { url, authToken, fetchFn } = params;
50
+ return fetchFn(url, {
51
+ headers: {
52
+ Authorization: `Bearer ${authToken}`,
53
+ Accept: "application/vnd.github+json",
54
+ "X-GitHub-Api-Version": "2022-11-28",
55
+ },
56
+ });
57
+ }
58
+
59
+ // ─── Core Logic ────────────────────────────────────────────────────────────
60
+
61
+ /**
62
+ * Enumerates repositories for an org or user target.
63
+ * Supports pagination via Link header.
64
+ */
65
+ async function enumerateRepos(params: {
66
+ target: string;
67
+ authToken: string;
68
+ fetchFn: FetchFn;
69
+ apiUrl: string;
70
+ }): Promise<GitHubRepo[]> {
71
+ const { target, authToken, fetchFn, apiUrl } = params;
72
+ const repos: GitHubRepo[] = [];
73
+
74
+ // Try org endpoint first, fall back to user endpoint
75
+ let url: string | undefined =
76
+ `${apiUrl}/orgs/${encodeURIComponent(target)}/repos?per_page=100`;
77
+
78
+ const orgResponse = await githubFetch({ url, authToken, fetchFn });
79
+
80
+ if (orgResponse.status === 404) {
81
+ // Fall back to user endpoint
82
+ url = `${apiUrl}/users/${encodeURIComponent(target)}/repos?per_page=100`;
83
+ } else if (orgResponse.ok) {
84
+ const data = (await orgResponse.json()) as GitHubRepo[];
85
+ repos.push(...data);
86
+ url = parseNextPageUrl(orgResponse.headers.get("Link"));
87
+ } else {
88
+ throw new Error(
89
+ `GitHub API error listing org repos: ${orgResponse.status} ${orgResponse.statusText}`,
90
+ );
91
+ }
92
+
93
+ // Paginate through remaining pages (or user repos if org 404'd)
94
+ while (url) {
95
+ const response = await githubFetch({ url, authToken, fetchFn });
96
+ if (!response.ok) {
97
+ throw new Error(
98
+ `GitHub API error listing repos: ${response.status} ${response.statusText}`,
99
+ );
100
+ }
101
+ const data = (await response.json()) as GitHubRepo[];
102
+ repos.push(...data);
103
+ url = parseNextPageUrl(response.headers.get("Link"));
104
+ }
105
+
106
+ return repos;
107
+ }
108
+
109
+ /**
110
+ * Gets the file tree for a repository using the Git Trees API.
111
+ * Returns only blob (file) paths matching the path pattern.
112
+ */
113
+ async function getMatchingFiles(params: {
114
+ repo: GitHubRepo;
115
+ pathPattern: string;
116
+ authToken: string;
117
+ fetchFn: FetchFn;
118
+ apiUrl: string;
119
+ }): Promise<string[]> {
120
+ const { repo, pathPattern, authToken, fetchFn, apiUrl } = params;
121
+
122
+ const url = `${apiUrl}/repos/${repo.full_name}/git/trees/${encodeURIComponent(repo.default_branch)}?recursive=1`;
123
+ const response = await githubFetch({ url, authToken, fetchFn });
124
+
125
+ if (!response.ok) {
126
+ throw new Error(
127
+ `GitHub API error getting tree for ${repo.full_name}: ${response.status} ${response.statusText}`,
128
+ );
129
+ }
130
+
131
+ const tree = (await response.json()) as GitHubTreeResponse;
132
+
133
+ return tree.tree
134
+ .filter((item) => item.type === "blob")
135
+ .map((item) => item.path)
136
+ .filter((path) => minimatch(path, pathPattern));
137
+ }
138
+
139
+ /**
140
+ * Fetches the content of a single file from a repository.
141
+ */
142
+ async function fetchFileContent(params: {
143
+ repoFullName: string;
144
+ filePath: string;
145
+ branch: string;
146
+ authToken: string;
147
+ fetchFn: FetchFn;
148
+ apiUrl: string;
149
+ }): Promise<string> {
150
+ const { repoFullName, filePath, branch, authToken, fetchFn, apiUrl } = params;
151
+
152
+ const url = `${apiUrl}/repos/${repoFullName}/contents/${encodeURIComponent(filePath)}?ref=${encodeURIComponent(branch)}`;
153
+ const response = await githubFetch({ url, authToken, fetchFn });
154
+
155
+ if (!response.ok) {
156
+ throw new Error(
157
+ `GitHub API error fetching ${filePath} from ${repoFullName}: ${response.status} ${response.statusText}`,
158
+ );
159
+ }
160
+
161
+ const data = (await response.json()) as GitHubContentResponse;
162
+
163
+ if (data.encoding === "base64") {
164
+ return atob(data.content.replaceAll("\n", ""));
165
+ }
166
+
167
+ return data.content;
168
+ }
169
+
170
+ // ─── Scraper ───────────────────────────────────────────────────────────────
171
+
172
+ /**
173
+ * GitHub scraper implementation.
174
+ *
175
+ * Supports:
176
+ * - Org/user target: enumerates all repos with pagination
177
+ * - Single repo target: `owner/repo` format
178
+ * - Default branch resolution per-repo
179
+ * - Recursive tree walking with minimatch filtering
180
+ * - Custom base URL for GitHub Enterprise
181
+ */
182
+ export const githubScraper: Scraper = {
183
+ async discoverFiles(options: ScraperOptions): Promise<DiscoveredFile[]> {
184
+ const {
185
+ target,
186
+ pathPattern,
187
+ authToken,
188
+ baseUrl,
189
+ logger,
190
+ fetch: fetchFn = globalThis.fetch,
191
+ } = options;
192
+
193
+ const apiUrl = baseUrl ?? DEFAULT_GITHUB_API_URL;
194
+ const isSingleRepo = target.includes("/");
195
+ const files: DiscoveredFile[] = [];
196
+
197
+ let repos: GitHubRepo[];
198
+
199
+ if (isSingleRepo) {
200
+ // Single repo mode: fetch repo metadata directly
201
+ const url = `${apiUrl}/repos/${target}`;
202
+ const response = await githubFetch({ url, authToken, fetchFn });
203
+ if (!response.ok) {
204
+ throw new Error(
205
+ `GitHub API error fetching repo ${target}: ${response.status} ${response.statusText}`,
206
+ );
207
+ }
208
+ repos = [(await response.json()) as GitHubRepo];
209
+ } else {
210
+ repos = await enumerateRepos({ target, authToken, fetchFn, apiUrl });
211
+ }
212
+
213
+ logger.debug(
214
+ `GitHub scraper: found ${repos.length} repo(s) for target "${target}"`,
215
+ );
216
+
217
+ for (const repo of repos) {
218
+ try {
219
+ const matchingPaths = await getMatchingFiles({
220
+ repo,
221
+ pathPattern,
222
+ authToken,
223
+ fetchFn,
224
+ apiUrl,
225
+ });
226
+
227
+ logger.debug(
228
+ `GitHub scraper: ${matchingPaths.length} matching file(s) in ${repo.full_name}`,
229
+ );
230
+
231
+ for (const filePath of matchingPaths) {
232
+ try {
233
+ const content = await fetchFileContent({
234
+ repoFullName: repo.full_name,
235
+ filePath,
236
+ branch: repo.default_branch,
237
+ authToken,
238
+ fetchFn,
239
+ apiUrl,
240
+ });
241
+
242
+ files.push({
243
+ repository: repo.full_name,
244
+ filePath,
245
+ content,
246
+ branch: repo.default_branch,
247
+ });
248
+ } catch (error) {
249
+ logger.error(
250
+ `GitHub scraper: failed to fetch ${filePath} from ${repo.full_name}: ${error}`,
251
+ );
252
+ }
253
+ }
254
+ } catch (error) {
255
+ logger.error(
256
+ `GitHub scraper: failed to process repo ${repo.full_name}: ${error}`,
257
+ );
258
+ }
259
+ }
260
+
261
+ return files;
262
+ },
263
+ };