@ghcrawl/api-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/package.json +54 -0
- package/src/api/server.test.ts +296 -0
- package/src/api/server.ts +171 -0
- package/src/cluster/build.test.ts +18 -0
- package/src/cluster/build.ts +74 -0
- package/src/config.test.ts +247 -0
- package/src/config.ts +421 -0
- package/src/db/migrate.test.ts +30 -0
- package/src/db/migrate.ts +235 -0
- package/src/db/sqlite.ts +14 -0
- package/src/documents/normalize.test.ts +25 -0
- package/src/documents/normalize.ts +52 -0
- package/src/github/client.ts +241 -0
- package/src/index.ts +6 -0
- package/src/openai/provider.ts +141 -0
- package/src/search/exact.test.ts +22 -0
- package/src/search/exact.ts +28 -0
- package/src/service.test.ts +2036 -0
- package/src/service.ts +2497 -0
- package/src/types/better-sqlite3.d.ts +1 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 PwrDrvr LLC
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/package.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@ghcrawl/api-core",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "ghcrawl core library for GitHub sync, SQLite storage, embeddings, search, and clustering",
|
|
6
|
+
"author": "PwrDrvr LLC <harold@pwrdrvr.com>",
|
|
7
|
+
"license": "MIT",
|
|
8
|
+
"homepage": "https://github.com/pwrdrvr/ghcrawl",
|
|
9
|
+
"bugs": {
|
|
10
|
+
"url": "https://github.com/pwrdrvr/ghcrawl/issues"
|
|
11
|
+
},
|
|
12
|
+
"repository": {
|
|
13
|
+
"type": "git",
|
|
14
|
+
"url": "git+https://github.com/pwrdrvr/ghcrawl.git"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"ghcrawl",
|
|
18
|
+
"github",
|
|
19
|
+
"sqlite",
|
|
20
|
+
"openai",
|
|
21
|
+
"embeddings",
|
|
22
|
+
"clustering"
|
|
23
|
+
],
|
|
24
|
+
"engines": {
|
|
25
|
+
"node": ">=22"
|
|
26
|
+
},
|
|
27
|
+
"files": [
|
|
28
|
+
"src"
|
|
29
|
+
],
|
|
30
|
+
"publishConfig": {
|
|
31
|
+
"access": "public"
|
|
32
|
+
},
|
|
33
|
+
"exports": {
|
|
34
|
+
".": {
|
|
35
|
+
"types": "./src/index.ts",
|
|
36
|
+
"default": "./src/index.ts"
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
"dependencies": {
|
|
40
|
+
"@shutterstock/p-map-iterable": "^1.1.2",
|
|
41
|
+
"@octokit/plugin-retry": "^8.0.3",
|
|
42
|
+
"@octokit/plugin-throttling": "^11.0.1",
|
|
43
|
+
"octokit": "^5.0.3",
|
|
44
|
+
"better-sqlite3": "^12.2.0",
|
|
45
|
+
"dotenv": "^17.2.2",
|
|
46
|
+
"openai": "^5.20.3",
|
|
47
|
+
"zod": "^3.25.76",
|
|
48
|
+
"@ghcrawl/api-contract": "0.1.0"
|
|
49
|
+
},
|
|
50
|
+
"scripts": {
|
|
51
|
+
"typecheck": "tsc -p tsconfig.json --noEmit",
|
|
52
|
+
"test": "tsx --test src/*.test.ts src/**/*.test.ts"
|
|
53
|
+
}
|
|
54
|
+
}
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
import test from 'node:test';
|
|
2
|
+
import assert from 'node:assert/strict';
|
|
3
|
+
|
|
4
|
+
import { clusterDetailResponseSchema, clusterSummariesResponseSchema, healthResponseSchema, neighborsResponseSchema } from '@ghcrawl/api-contract';
|
|
5
|
+
|
|
6
|
+
import { createApiServer } from './server.js';
|
|
7
|
+
import { GHCrawlService } from '../service.js';
|
|
8
|
+
|
|
9
|
+
test('health endpoint returns contract payload', async () => {
|
|
10
|
+
const service = new GHCrawlService({
|
|
11
|
+
config: {
|
|
12
|
+
workspaceRoot: process.cwd(),
|
|
13
|
+
configDir: '/tmp/ghcrawl-test',
|
|
14
|
+
configPath: '/tmp/ghcrawl-test/config.json',
|
|
15
|
+
configFileExists: true,
|
|
16
|
+
dbPath: ':memory:',
|
|
17
|
+
dbPathSource: 'config',
|
|
18
|
+
apiPort: 5179,
|
|
19
|
+
secretProvider: 'plaintext',
|
|
20
|
+
githubTokenSource: 'none',
|
|
21
|
+
openaiApiKeySource: 'none',
|
|
22
|
+
summaryModel: 'gpt-5-mini',
|
|
23
|
+
embedModel: 'text-embedding-3-large',
|
|
24
|
+
embedBatchSize: 8,
|
|
25
|
+
embedConcurrency: 10,
|
|
26
|
+
embedMaxUnread: 20,
|
|
27
|
+
openSearchIndex: 'ghcrawl-threads',
|
|
28
|
+
tuiPreferences: {},
|
|
29
|
+
},
|
|
30
|
+
github: {
|
|
31
|
+
checkAuth: async () => undefined,
|
|
32
|
+
getRepo: async () => ({}),
|
|
33
|
+
listRepositoryIssues: async () => [],
|
|
34
|
+
getIssue: async () => ({}),
|
|
35
|
+
getPull: async () => ({}),
|
|
36
|
+
listIssueComments: async () => [],
|
|
37
|
+
listPullReviews: async () => [],
|
|
38
|
+
listPullReviewComments: async () => [],
|
|
39
|
+
},
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
const server = createApiServer(service);
|
|
43
|
+
try {
|
|
44
|
+
await new Promise<void>((resolve) => server.listen(0, '127.0.0.1', resolve));
|
|
45
|
+
const address = server.address();
|
|
46
|
+
assert(address && typeof address === 'object');
|
|
47
|
+
|
|
48
|
+
const response = await fetch(`http://127.0.0.1:${address.port}/health`);
|
|
49
|
+
assert.equal(response.status, 200);
|
|
50
|
+
const payload = healthResponseSchema.parse((await response.json()) as unknown);
|
|
51
|
+
|
|
52
|
+
assert.equal(payload.ok, true);
|
|
53
|
+
} finally {
|
|
54
|
+
await new Promise<void>((resolve, reject) => server.close((error) => (error ? reject(error) : resolve())));
|
|
55
|
+
service.close();
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test('neighbors endpoint returns contract payload', async () => {
|
|
60
|
+
const service = new GHCrawlService({
|
|
61
|
+
config: {
|
|
62
|
+
workspaceRoot: process.cwd(),
|
|
63
|
+
configDir: '/tmp/ghcrawl-test',
|
|
64
|
+
configPath: '/tmp/ghcrawl-test/config.json',
|
|
65
|
+
configFileExists: true,
|
|
66
|
+
dbPath: ':memory:',
|
|
67
|
+
dbPathSource: 'config',
|
|
68
|
+
apiPort: 5179,
|
|
69
|
+
secretProvider: 'plaintext',
|
|
70
|
+
githubTokenSource: 'none',
|
|
71
|
+
openaiApiKeySource: 'none',
|
|
72
|
+
summaryModel: 'gpt-5-mini',
|
|
73
|
+
embedModel: 'text-embedding-3-large',
|
|
74
|
+
embedBatchSize: 8,
|
|
75
|
+
embedConcurrency: 10,
|
|
76
|
+
embedMaxUnread: 20,
|
|
77
|
+
openSearchIndex: 'ghcrawl-threads',
|
|
78
|
+
tuiPreferences: {},
|
|
79
|
+
},
|
|
80
|
+
github: {
|
|
81
|
+
checkAuth: async () => undefined,
|
|
82
|
+
getRepo: async () => ({}),
|
|
83
|
+
listRepositoryIssues: async () => [],
|
|
84
|
+
getIssue: async () => ({}),
|
|
85
|
+
getPull: async () => ({}),
|
|
86
|
+
listIssueComments: async () => [],
|
|
87
|
+
listPullReviews: async () => [],
|
|
88
|
+
listPullReviewComments: async () => [],
|
|
89
|
+
},
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
const now = '2026-03-09T00:00:00Z';
|
|
93
|
+
service.db
|
|
94
|
+
.prepare(
|
|
95
|
+
`insert into repositories (id, owner, name, full_name, github_repo_id, raw_json, updated_at)
|
|
96
|
+
values (?, ?, ?, ?, ?, ?, ?)`,
|
|
97
|
+
)
|
|
98
|
+
.run(1, 'openclaw', 'openclaw', 'openclaw/openclaw', '1', '{}', now);
|
|
99
|
+
service.db
|
|
100
|
+
.prepare(
|
|
101
|
+
`insert into threads (
|
|
102
|
+
id, repo_id, github_id, number, kind, state, title, body, author_login, author_type, html_url,
|
|
103
|
+
labels_json, assignees_json, raw_json, content_hash, is_draft, created_at_gh, updated_at_gh, closed_at_gh,
|
|
104
|
+
merged_at_gh, first_pulled_at, last_pulled_at, updated_at
|
|
105
|
+
) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
106
|
+
)
|
|
107
|
+
.run(10, 1, '100', 42, 'issue', 'open', 'Downloader hangs', 'The transfer never finishes.', 'alice', 'User', 'https://github.com/openclaw/openclaw/issues/42', '[]', '[]', '{}', 'hash-42', 0, now, now, null, null, now, now, now);
|
|
108
|
+
service.db
|
|
109
|
+
.prepare(
|
|
110
|
+
`insert into threads (
|
|
111
|
+
id, repo_id, github_id, number, kind, state, title, body, author_login, author_type, html_url,
|
|
112
|
+
labels_json, assignees_json, raw_json, content_hash, is_draft, created_at_gh, updated_at_gh, closed_at_gh,
|
|
113
|
+
merged_at_gh, first_pulled_at, last_pulled_at, updated_at
|
|
114
|
+
) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
115
|
+
)
|
|
116
|
+
.run(11, 1, '101', 43, 'pull_request', 'open', 'Fix downloader hang', 'Implements a fix.', 'bob', 'User', 'https://github.com/openclaw/openclaw/pull/43', '[]', '[]', '{}', 'hash-43', 0, now, now, null, null, now, now, now);
|
|
117
|
+
service.db
|
|
118
|
+
.prepare(
|
|
119
|
+
`insert into document_embeddings (thread_id, source_kind, model, dimensions, content_hash, embedding_json, created_at, updated_at)
|
|
120
|
+
values (?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
121
|
+
)
|
|
122
|
+
.run(10, 'dedupe_summary', 'text-embedding-3-large', 2, 'hash-42', '[1,0]', now, now);
|
|
123
|
+
service.db
|
|
124
|
+
.prepare(
|
|
125
|
+
`insert into document_embeddings (thread_id, source_kind, model, dimensions, content_hash, embedding_json, created_at, updated_at)
|
|
126
|
+
values (?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
127
|
+
)
|
|
128
|
+
.run(11, 'dedupe_summary', 'text-embedding-3-large', 2, 'hash-43', '[0.99,0.01]', now, now);
|
|
129
|
+
|
|
130
|
+
const server = createApiServer(service);
|
|
131
|
+
try {
|
|
132
|
+
await new Promise<void>((resolve) => server.listen(0, '127.0.0.1', resolve));
|
|
133
|
+
const address = server.address();
|
|
134
|
+
assert(address && typeof address === 'object');
|
|
135
|
+
|
|
136
|
+
const response = await fetch(
|
|
137
|
+
`http://127.0.0.1:${address.port}/neighbors?owner=openclaw&repo=openclaw&number=42&limit=5&minScore=0.1`,
|
|
138
|
+
);
|
|
139
|
+
assert.equal(response.status, 200);
|
|
140
|
+
const payload = neighborsResponseSchema.parse((await response.json()) as unknown);
|
|
141
|
+
|
|
142
|
+
assert.equal(payload.thread.number, 42);
|
|
143
|
+
assert.equal(payload.neighbors.length, 1);
|
|
144
|
+
assert.equal(payload.neighbors[0].number, 43);
|
|
145
|
+
} finally {
|
|
146
|
+
await new Promise<void>((resolve, reject) => server.close((error) => (error ? reject(error) : resolve())));
|
|
147
|
+
service.close();
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
test('server returns 400 for malformed request inputs', async () => {
|
|
152
|
+
const service = new GHCrawlService({
|
|
153
|
+
config: {
|
|
154
|
+
workspaceRoot: process.cwd(),
|
|
155
|
+
configDir: '/tmp/ghcrawl-test',
|
|
156
|
+
configPath: '/tmp/ghcrawl-test/config.json',
|
|
157
|
+
configFileExists: true,
|
|
158
|
+
dbPath: ':memory:',
|
|
159
|
+
dbPathSource: 'config',
|
|
160
|
+
apiPort: 5179,
|
|
161
|
+
secretProvider: 'plaintext',
|
|
162
|
+
githubTokenSource: 'none',
|
|
163
|
+
openaiApiKeySource: 'none',
|
|
164
|
+
summaryModel: 'gpt-5-mini',
|
|
165
|
+
embedModel: 'text-embedding-3-large',
|
|
166
|
+
embedBatchSize: 8,
|
|
167
|
+
embedConcurrency: 10,
|
|
168
|
+
embedMaxUnread: 20,
|
|
169
|
+
openSearchIndex: 'ghcrawl-threads',
|
|
170
|
+
tuiPreferences: {},
|
|
171
|
+
},
|
|
172
|
+
github: {
|
|
173
|
+
checkAuth: async () => undefined,
|
|
174
|
+
getRepo: async () => ({}),
|
|
175
|
+
listRepositoryIssues: async () => [],
|
|
176
|
+
getIssue: async () => ({}),
|
|
177
|
+
getPull: async () => ({}),
|
|
178
|
+
listIssueComments: async () => [],
|
|
179
|
+
listPullReviews: async () => [],
|
|
180
|
+
listPullReviewComments: async () => [],
|
|
181
|
+
},
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
const server = createApiServer(service);
|
|
185
|
+
try {
|
|
186
|
+
await new Promise<void>((resolve) => server.listen(0, '127.0.0.1', resolve));
|
|
187
|
+
const address = server.address();
|
|
188
|
+
assert(address && typeof address === 'object');
|
|
189
|
+
|
|
190
|
+
const missingRepo = await fetch(`http://127.0.0.1:${address.port}/threads?owner=openclaw`);
|
|
191
|
+
assert.equal(missingRepo.status, 400);
|
|
192
|
+
|
|
193
|
+
const badJson = await fetch(`http://127.0.0.1:${address.port}/actions/rerun`, {
|
|
194
|
+
method: 'POST',
|
|
195
|
+
headers: { 'content-type': 'application/json' },
|
|
196
|
+
body: '{"owner":"openclaw"',
|
|
197
|
+
});
|
|
198
|
+
assert.equal(badJson.status, 400);
|
|
199
|
+
} finally {
|
|
200
|
+
await new Promise<void>((resolve, reject) => server.close((error) => (error ? reject(error) : resolve())));
|
|
201
|
+
service.close();
|
|
202
|
+
}
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
test('cluster summary and detail endpoints return contract payloads', async () => {
|
|
206
|
+
const service = new GHCrawlService({
|
|
207
|
+
config: {
|
|
208
|
+
workspaceRoot: process.cwd(),
|
|
209
|
+
configDir: '/tmp/ghcrawl-test',
|
|
210
|
+
configPath: '/tmp/ghcrawl-test/config.json',
|
|
211
|
+
configFileExists: true,
|
|
212
|
+
dbPath: ':memory:',
|
|
213
|
+
dbPathSource: 'config',
|
|
214
|
+
apiPort: 5179,
|
|
215
|
+
secretProvider: 'plaintext',
|
|
216
|
+
githubTokenSource: 'none',
|
|
217
|
+
openaiApiKeySource: 'none',
|
|
218
|
+
summaryModel: 'gpt-5-mini',
|
|
219
|
+
embedModel: 'text-embedding-3-large',
|
|
220
|
+
embedBatchSize: 8,
|
|
221
|
+
embedConcurrency: 10,
|
|
222
|
+
embedMaxUnread: 20,
|
|
223
|
+
openSearchIndex: 'ghcrawl-threads',
|
|
224
|
+
tuiPreferences: {},
|
|
225
|
+
},
|
|
226
|
+
github: {
|
|
227
|
+
checkAuth: async () => undefined,
|
|
228
|
+
getRepo: async () => ({}),
|
|
229
|
+
listRepositoryIssues: async () => [],
|
|
230
|
+
getIssue: async () => ({}),
|
|
231
|
+
getPull: async () => ({}),
|
|
232
|
+
listIssueComments: async () => [],
|
|
233
|
+
listPullReviews: async () => [],
|
|
234
|
+
listPullReviewComments: async () => [],
|
|
235
|
+
},
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
const now = '2026-03-09T00:00:00Z';
|
|
239
|
+
service.db
|
|
240
|
+
.prepare(
|
|
241
|
+
`insert into repositories (id, owner, name, full_name, github_repo_id, raw_json, updated_at)
|
|
242
|
+
values (?, ?, ?, ?, ?, ?, ?)`,
|
|
243
|
+
)
|
|
244
|
+
.run(1, 'openclaw', 'openclaw', 'openclaw/openclaw', '1', '{}', now);
|
|
245
|
+
service.db
|
|
246
|
+
.prepare(
|
|
247
|
+
`insert into threads (
|
|
248
|
+
id, repo_id, github_id, number, kind, state, title, body, author_login, author_type, html_url,
|
|
249
|
+
labels_json, assignees_json, raw_json, content_hash, is_draft, created_at_gh, updated_at_gh, closed_at_gh,
|
|
250
|
+
merged_at_gh, first_pulled_at, last_pulled_at, updated_at
|
|
251
|
+
) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
252
|
+
)
|
|
253
|
+
.run(10, 1, '100', 42, 'issue', 'open', 'Downloader hangs', 'The transfer never finishes.', 'alice', 'User', 'https://github.com/openclaw/openclaw/issues/42', '[]', '[]', '{}', 'hash-42', 0, now, now, null, null, now, now, now);
|
|
254
|
+
service.db
|
|
255
|
+
.prepare(
|
|
256
|
+
`insert into cluster_runs (id, repo_id, scope, status, started_at, finished_at) values (?, ?, ?, ?, ?, ?)`,
|
|
257
|
+
)
|
|
258
|
+
.run(1, 1, 'openclaw/openclaw', 'completed', now, now);
|
|
259
|
+
service.db
|
|
260
|
+
.prepare(
|
|
261
|
+
`insert into clusters (id, repo_id, cluster_run_id, representative_thread_id, member_count, created_at)
|
|
262
|
+
values (?, ?, ?, ?, ?, ?)`,
|
|
263
|
+
)
|
|
264
|
+
.run(100, 1, 1, 10, 1, now);
|
|
265
|
+
service.db
|
|
266
|
+
.prepare(
|
|
267
|
+
`insert into cluster_members (cluster_id, thread_id, score_to_representative, created_at)
|
|
268
|
+
values (?, ?, ?, ?)`,
|
|
269
|
+
)
|
|
270
|
+
.run(100, 10, null, now);
|
|
271
|
+
|
|
272
|
+
const server = createApiServer(service);
|
|
273
|
+
try {
|
|
274
|
+
await new Promise<void>((resolve) => server.listen(0, '127.0.0.1', resolve));
|
|
275
|
+
const address = server.address();
|
|
276
|
+
assert(address && typeof address === 'object');
|
|
277
|
+
|
|
278
|
+
const summariesResponse = await fetch(
|
|
279
|
+
`http://127.0.0.1:${address.port}/cluster-summaries?owner=openclaw&repo=openclaw&minSize=0`,
|
|
280
|
+
);
|
|
281
|
+
assert.equal(summariesResponse.status, 200);
|
|
282
|
+
const summaries = clusterSummariesResponseSchema.parse((await summariesResponse.json()) as unknown);
|
|
283
|
+
assert.equal(summaries.clusters[0]?.clusterId, 100);
|
|
284
|
+
|
|
285
|
+
const detailResponse = await fetch(
|
|
286
|
+
`http://127.0.0.1:${address.port}/cluster-detail?owner=openclaw&repo=openclaw&clusterId=100&bodyChars=20`,
|
|
287
|
+
);
|
|
288
|
+
assert.equal(detailResponse.status, 200);
|
|
289
|
+
const detail = clusterDetailResponseSchema.parse((await detailResponse.json()) as unknown);
|
|
290
|
+
assert.equal(detail.cluster.clusterId, 100);
|
|
291
|
+
assert.equal(detail.members[0]?.thread.number, 42);
|
|
292
|
+
} finally {
|
|
293
|
+
await new Promise<void>((resolve, reject) => server.close((error) => (error ? reject(error) : resolve())));
|
|
294
|
+
service.close();
|
|
295
|
+
}
|
|
296
|
+
});
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import http from 'node:http';
|
|
2
|
+
|
|
3
|
+
import { actionRequestSchema, refreshRequestSchema } from '@ghcrawl/api-contract';
|
|
4
|
+
import { ZodError } from 'zod';
|
|
5
|
+
|
|
6
|
+
import { GHCrawlService, parseRepoParams } from '../service.js';
|
|
7
|
+
|
|
8
|
+
function sendJson(res: http.ServerResponse, status: number, payload: unknown): void {
|
|
9
|
+
res.writeHead(status, { 'content-type': 'application/json; charset=utf-8' });
|
|
10
|
+
res.end(JSON.stringify(payload));
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
async function readBody(req: http.IncomingMessage): Promise<unknown> {
|
|
14
|
+
const chunks: Buffer[] = [];
|
|
15
|
+
for await (const chunk of req) {
|
|
16
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
|
|
17
|
+
}
|
|
18
|
+
if (chunks.length === 0) return null;
|
|
19
|
+
return JSON.parse(Buffer.concat(chunks).toString('utf8')) as unknown;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function createApiServer(service: GHCrawlService): http.Server {
|
|
23
|
+
return http.createServer(async (req, res) => {
|
|
24
|
+
try {
|
|
25
|
+
if (!req.url || !req.method) {
|
|
26
|
+
sendJson(res, 400, { error: 'Missing request metadata' });
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const url = new URL(req.url, 'http://127.0.0.1');
|
|
31
|
+
|
|
32
|
+
if (req.method === 'GET' && url.pathname === '/health') {
|
|
33
|
+
sendJson(res, 200, service.init());
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if (req.method === 'GET' && url.pathname === '/repositories') {
|
|
38
|
+
sendJson(res, 200, service.listRepositories());
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (req.method === 'GET' && url.pathname === '/threads') {
|
|
43
|
+
const params = parseRepoParams(url);
|
|
44
|
+
const kindParam = url.searchParams.get('kind');
|
|
45
|
+
const kind = kindParam === 'issue' || kindParam === 'pull_request' ? kindParam : undefined;
|
|
46
|
+
sendJson(res, 200, service.listThreads({ ...params, kind }));
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (req.method === 'GET' && url.pathname === '/search') {
|
|
51
|
+
const params = parseRepoParams(url);
|
|
52
|
+
const query = url.searchParams.get('query');
|
|
53
|
+
if (!query) {
|
|
54
|
+
sendJson(res, 400, { error: 'Missing query parameter' });
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
const modeParam = url.searchParams.get('mode');
|
|
58
|
+
const mode = modeParam === 'keyword' || modeParam === 'semantic' || modeParam === 'hybrid' ? modeParam : undefined;
|
|
59
|
+
sendJson(res, 200, await service.searchRepository({ ...params, query, mode }));
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (req.method === 'GET' && url.pathname === '/neighbors') {
|
|
64
|
+
const params = parseRepoParams(url);
|
|
65
|
+
const numberValue = url.searchParams.get('number');
|
|
66
|
+
if (!numberValue) {
|
|
67
|
+
sendJson(res, 400, { error: 'Missing number parameter' });
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
const threadNumber = Number(numberValue);
|
|
71
|
+
if (!Number.isInteger(threadNumber) || threadNumber <= 0) {
|
|
72
|
+
sendJson(res, 400, { error: 'Invalid number parameter' });
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
const limitValue = url.searchParams.get('limit');
|
|
76
|
+
const minScoreValue = url.searchParams.get('minScore');
|
|
77
|
+
sendJson(
|
|
78
|
+
res,
|
|
79
|
+
200,
|
|
80
|
+
service.listNeighbors({
|
|
81
|
+
...params,
|
|
82
|
+
threadNumber,
|
|
83
|
+
limit: limitValue ? Number(limitValue) : undefined,
|
|
84
|
+
minScore: minScoreValue ? Number(minScoreValue) : undefined,
|
|
85
|
+
}),
|
|
86
|
+
);
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (req.method === 'GET' && url.pathname === '/clusters') {
|
|
91
|
+
const params = parseRepoParams(url);
|
|
92
|
+
sendJson(res, 200, service.listClusters(params));
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (req.method === 'GET' && url.pathname === '/cluster-summaries') {
|
|
97
|
+
const params = parseRepoParams(url);
|
|
98
|
+
const sortParam = url.searchParams.get('sort');
|
|
99
|
+
const sort = sortParam === 'recent' || sortParam === 'size' ? sortParam : undefined;
|
|
100
|
+
const minSizeValue = url.searchParams.get('minSize');
|
|
101
|
+
const limitValue = url.searchParams.get('limit');
|
|
102
|
+
const search = url.searchParams.get('search') ?? undefined;
|
|
103
|
+
sendJson(
|
|
104
|
+
res,
|
|
105
|
+
200,
|
|
106
|
+
service.listClusterSummaries({
|
|
107
|
+
...params,
|
|
108
|
+
minSize: minSizeValue ? Number(minSizeValue) : undefined,
|
|
109
|
+
limit: limitValue ? Number(limitValue) : undefined,
|
|
110
|
+
sort,
|
|
111
|
+
search,
|
|
112
|
+
}),
|
|
113
|
+
);
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (req.method === 'GET' && url.pathname === '/cluster-detail') {
|
|
118
|
+
const params = parseRepoParams(url);
|
|
119
|
+
const clusterIdValue = url.searchParams.get('clusterId');
|
|
120
|
+
if (!clusterIdValue) {
|
|
121
|
+
sendJson(res, 400, { error: 'Missing clusterId parameter' });
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
const clusterId = Number(clusterIdValue);
|
|
125
|
+
if (!Number.isInteger(clusterId) || clusterId <= 0) {
|
|
126
|
+
sendJson(res, 400, { error: 'Invalid clusterId parameter' });
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
const memberLimitValue = url.searchParams.get('memberLimit');
|
|
130
|
+
const bodyCharsValue = url.searchParams.get('bodyChars');
|
|
131
|
+
sendJson(
|
|
132
|
+
res,
|
|
133
|
+
200,
|
|
134
|
+
service.getClusterDetailDump({
|
|
135
|
+
...params,
|
|
136
|
+
clusterId,
|
|
137
|
+
memberLimit: memberLimitValue ? Number(memberLimitValue) : undefined,
|
|
138
|
+
bodyChars: bodyCharsValue ? Number(bodyCharsValue) : undefined,
|
|
139
|
+
}),
|
|
140
|
+
);
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if (req.method === 'POST' && url.pathname === '/actions/rerun') {
|
|
145
|
+
const body = actionRequestSchema.parse(await readBody(req));
|
|
146
|
+
sendJson(res, 200, await service.rerunAction(body));
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (req.method === 'POST' && url.pathname === '/actions/refresh') {
|
|
151
|
+
const body = refreshRequestSchema.parse(await readBody(req));
|
|
152
|
+
sendJson(res, 200, await service.refreshRepository(body));
|
|
153
|
+
return;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
sendJson(res, 404, { error: 'Not found' });
|
|
157
|
+
} catch (error) {
|
|
158
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
159
|
+
sendJson(res, isBadRequestError(error, message) ? 400 : 500, { error: message });
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function isBadRequestError(error: unknown, message: string): boolean {
|
|
165
|
+
return (
|
|
166
|
+
error instanceof SyntaxError ||
|
|
167
|
+
error instanceof ZodError ||
|
|
168
|
+
message.startsWith('Missing ') ||
|
|
169
|
+
message.startsWith('Invalid ')
|
|
170
|
+
);
|
|
171
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import test from 'node:test';
|
|
2
|
+
import assert from 'node:assert/strict';
|
|
3
|
+
|
|
4
|
+
import { buildClusters } from './build.js';
|
|
5
|
+
|
|
6
|
+
test('buildClusters groups connected components', () => {
|
|
7
|
+
const clusters = buildClusters(
|
|
8
|
+
[
|
|
9
|
+
{ threadId: 1, number: 10, title: 'a' },
|
|
10
|
+
{ threadId: 2, number: 11, title: 'b' },
|
|
11
|
+
{ threadId: 3, number: 12, title: 'c' },
|
|
12
|
+
],
|
|
13
|
+
[{ leftThreadId: 1, rightThreadId: 2, score: 0.9 }],
|
|
14
|
+
);
|
|
15
|
+
|
|
16
|
+
assert.equal(clusters.length, 2);
|
|
17
|
+
assert.deepEqual(clusters[0]?.members, [1, 2]);
|
|
18
|
+
});
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
export type SimilarityEdge = {
|
|
2
|
+
leftThreadId: number;
|
|
3
|
+
rightThreadId: number;
|
|
4
|
+
score: number;
|
|
5
|
+
};
|
|
6
|
+
|
|
7
|
+
type Node = {
|
|
8
|
+
threadId: number;
|
|
9
|
+
number: number;
|
|
10
|
+
title: string;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
class UnionFind {
|
|
14
|
+
private readonly parent = new Map<number, number>();
|
|
15
|
+
|
|
16
|
+
add(value: number): void {
|
|
17
|
+
if (!this.parent.has(value)) this.parent.set(value, value);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
find(value: number): number {
|
|
21
|
+
const parent = this.parent.get(value);
|
|
22
|
+
if (parent === undefined) {
|
|
23
|
+
this.parent.set(value, value);
|
|
24
|
+
return value;
|
|
25
|
+
}
|
|
26
|
+
if (parent === value) return value;
|
|
27
|
+
const root = this.find(parent);
|
|
28
|
+
this.parent.set(value, root);
|
|
29
|
+
return root;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
union(left: number, right: number): void {
|
|
33
|
+
const leftRoot = this.find(left);
|
|
34
|
+
const rightRoot = this.find(right);
|
|
35
|
+
if (leftRoot !== rightRoot) {
|
|
36
|
+
this.parent.set(rightRoot, leftRoot);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function buildClusters(nodes: Node[], edges: SimilarityEdge[]): Array<{ representativeThreadId: number; members: number[] }> {
|
|
42
|
+
const uf = new UnionFind();
|
|
43
|
+
for (const node of nodes) uf.add(node.threadId);
|
|
44
|
+
for (const edge of edges) uf.union(edge.leftThreadId, edge.rightThreadId);
|
|
45
|
+
|
|
46
|
+
const byRoot = new Map<number, number[]>();
|
|
47
|
+
for (const node of nodes) {
|
|
48
|
+
const root = uf.find(node.threadId);
|
|
49
|
+
const list = byRoot.get(root) ?? [];
|
|
50
|
+
list.push(node.threadId);
|
|
51
|
+
byRoot.set(root, list);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const edgeCounts = new Map<number, number>();
|
|
55
|
+
for (const edge of edges) {
|
|
56
|
+
edgeCounts.set(edge.leftThreadId, (edgeCounts.get(edge.leftThreadId) ?? 0) + 1);
|
|
57
|
+
edgeCounts.set(edge.rightThreadId, (edgeCounts.get(edge.rightThreadId) ?? 0) + 1);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const nodesById = new Map(nodes.map((node) => [node.threadId, node]));
|
|
61
|
+
return Array.from(byRoot.values())
|
|
62
|
+
.map((members) => {
|
|
63
|
+
const representative = [...members].sort((leftId, rightId) => {
|
|
64
|
+
const left = nodesById.get(leftId);
|
|
65
|
+
const right = nodesById.get(rightId);
|
|
66
|
+
const edgeDelta = (edgeCounts.get(rightId) ?? 0) - (edgeCounts.get(leftId) ?? 0);
|
|
67
|
+
if (edgeDelta !== 0) return edgeDelta;
|
|
68
|
+
if (!left || !right) return leftId - rightId;
|
|
69
|
+
return left.number - right.number;
|
|
70
|
+
})[0];
|
|
71
|
+
return { representativeThreadId: representative, members: members.sort((left, right) => left - right) };
|
|
72
|
+
})
|
|
73
|
+
.sort((left, right) => right.members.length - left.members.length);
|
|
74
|
+
}
|