@ghcrawl/api-core 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +25 -0
  2. package/dist/api/server.d.ts +4 -0
  3. package/dist/api/server.d.ts.map +1 -0
  4. package/dist/api/server.js +142 -0
  5. package/dist/api/server.js.map +1 -0
  6. package/dist/cluster/build.d.ts +16 -0
  7. package/dist/cluster/build.d.ts.map +1 -0
  8. package/dist/cluster/build.js +62 -0
  9. package/dist/cluster/build.js.map +1 -0
  10. package/dist/config.d.ts +83 -0
  11. package/dist/config.d.ts.map +1 -0
  12. package/dist/config.js +257 -0
  13. package/dist/config.js.map +1 -0
  14. package/dist/db/migrate.d.ts +3 -0
  15. package/dist/db/migrate.d.ts.map +1 -0
  16. package/{src/db/migrate.ts → dist/db/migrate.js} +30 -36
  17. package/dist/db/migrate.js.map +1 -0
  18. package/dist/db/sqlite.d.ts +4 -0
  19. package/dist/db/sqlite.d.ts.map +1 -0
  20. package/dist/db/sqlite.js +11 -0
  21. package/dist/db/sqlite.js.map +1 -0
  22. package/dist/documents/normalize.d.ts +23 -0
  23. package/dist/documents/normalize.d.ts.map +1 -0
  24. package/dist/documents/normalize.js +36 -0
  25. package/dist/documents/normalize.js.map +1 -0
  26. package/dist/github/client.d.ts +24 -0
  27. package/dist/github/client.d.ts.map +1 -0
  28. package/dist/github/client.js +170 -0
  29. package/dist/github/client.js.map +1 -0
  30. package/dist/index.d.ts +7 -0
  31. package/dist/index.d.ts.map +1 -0
  32. package/{src/index.ts → dist/index.js} +1 -0
  33. package/dist/index.js.map +1 -0
  34. package/dist/openai/provider.d.ts +44 -0
  35. package/dist/openai/provider.d.ts.map +1 -0
  36. package/dist/openai/provider.js +107 -0
  37. package/dist/openai/provider.js.map +1 -0
  38. package/dist/search/exact.d.ts +14 -0
  39. package/dist/search/exact.d.ts.map +1 -0
  40. package/dist/search/exact.js +26 -0
  41. package/dist/search/exact.js.map +1 -0
  42. package/dist/service.d.ts +249 -0
  43. package/dist/service.d.ts.map +1 -0
  44. package/dist/service.js +1801 -0
  45. package/dist/service.js.map +1 -0
  46. package/package.json +8 -6
  47. package/src/api/server.test.ts +0 -296
  48. package/src/api/server.ts +0 -171
  49. package/src/cluster/build.test.ts +0 -18
  50. package/src/cluster/build.ts +0 -74
  51. package/src/config.test.ts +0 -247
  52. package/src/config.ts +0 -421
  53. package/src/db/migrate.test.ts +0 -30
  54. package/src/db/sqlite.ts +0 -14
  55. package/src/documents/normalize.test.ts +0 -25
  56. package/src/documents/normalize.ts +0 -52
  57. package/src/github/client.ts +0 -241
  58. package/src/openai/provider.ts +0 -141
  59. package/src/search/exact.test.ts +0 -22
  60. package/src/search/exact.ts +0 -28
  61. package/src/service.test.ts +0 -2036
  62. package/src/service.ts +0 -2497
  63. package/src/types/better-sqlite3.d.ts +0 -1
package/src/api/server.ts DELETED
@@ -1,171 +0,0 @@
1
- import http from 'node:http';
2
-
3
- import { actionRequestSchema, refreshRequestSchema } from '@ghcrawl/api-contract';
4
- import { ZodError } from 'zod';
5
-
6
- import { GHCrawlService, parseRepoParams } from '../service.js';
7
-
8
- function sendJson(res: http.ServerResponse, status: number, payload: unknown): void {
9
- res.writeHead(status, { 'content-type': 'application/json; charset=utf-8' });
10
- res.end(JSON.stringify(payload));
11
- }
12
-
13
- async function readBody(req: http.IncomingMessage): Promise<unknown> {
14
- const chunks: Buffer[] = [];
15
- for await (const chunk of req) {
16
- chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
17
- }
18
- if (chunks.length === 0) return null;
19
- return JSON.parse(Buffer.concat(chunks).toString('utf8')) as unknown;
20
- }
21
-
22
- export function createApiServer(service: GHCrawlService): http.Server {
23
- return http.createServer(async (req, res) => {
24
- try {
25
- if (!req.url || !req.method) {
26
- sendJson(res, 400, { error: 'Missing request metadata' });
27
- return;
28
- }
29
-
30
- const url = new URL(req.url, 'http://127.0.0.1');
31
-
32
- if (req.method === 'GET' && url.pathname === '/health') {
33
- sendJson(res, 200, service.init());
34
- return;
35
- }
36
-
37
- if (req.method === 'GET' && url.pathname === '/repositories') {
38
- sendJson(res, 200, service.listRepositories());
39
- return;
40
- }
41
-
42
- if (req.method === 'GET' && url.pathname === '/threads') {
43
- const params = parseRepoParams(url);
44
- const kindParam = url.searchParams.get('kind');
45
- const kind = kindParam === 'issue' || kindParam === 'pull_request' ? kindParam : undefined;
46
- sendJson(res, 200, service.listThreads({ ...params, kind }));
47
- return;
48
- }
49
-
50
- if (req.method === 'GET' && url.pathname === '/search') {
51
- const params = parseRepoParams(url);
52
- const query = url.searchParams.get('query');
53
- if (!query) {
54
- sendJson(res, 400, { error: 'Missing query parameter' });
55
- return;
56
- }
57
- const modeParam = url.searchParams.get('mode');
58
- const mode = modeParam === 'keyword' || modeParam === 'semantic' || modeParam === 'hybrid' ? modeParam : undefined;
59
- sendJson(res, 200, await service.searchRepository({ ...params, query, mode }));
60
- return;
61
- }
62
-
63
- if (req.method === 'GET' && url.pathname === '/neighbors') {
64
- const params = parseRepoParams(url);
65
- const numberValue = url.searchParams.get('number');
66
- if (!numberValue) {
67
- sendJson(res, 400, { error: 'Missing number parameter' });
68
- return;
69
- }
70
- const threadNumber = Number(numberValue);
71
- if (!Number.isInteger(threadNumber) || threadNumber <= 0) {
72
- sendJson(res, 400, { error: 'Invalid number parameter' });
73
- return;
74
- }
75
- const limitValue = url.searchParams.get('limit');
76
- const minScoreValue = url.searchParams.get('minScore');
77
- sendJson(
78
- res,
79
- 200,
80
- service.listNeighbors({
81
- ...params,
82
- threadNumber,
83
- limit: limitValue ? Number(limitValue) : undefined,
84
- minScore: minScoreValue ? Number(minScoreValue) : undefined,
85
- }),
86
- );
87
- return;
88
- }
89
-
90
- if (req.method === 'GET' && url.pathname === '/clusters') {
91
- const params = parseRepoParams(url);
92
- sendJson(res, 200, service.listClusters(params));
93
- return;
94
- }
95
-
96
- if (req.method === 'GET' && url.pathname === '/cluster-summaries') {
97
- const params = parseRepoParams(url);
98
- const sortParam = url.searchParams.get('sort');
99
- const sort = sortParam === 'recent' || sortParam === 'size' ? sortParam : undefined;
100
- const minSizeValue = url.searchParams.get('minSize');
101
- const limitValue = url.searchParams.get('limit');
102
- const search = url.searchParams.get('search') ?? undefined;
103
- sendJson(
104
- res,
105
- 200,
106
- service.listClusterSummaries({
107
- ...params,
108
- minSize: minSizeValue ? Number(minSizeValue) : undefined,
109
- limit: limitValue ? Number(limitValue) : undefined,
110
- sort,
111
- search,
112
- }),
113
- );
114
- return;
115
- }
116
-
117
- if (req.method === 'GET' && url.pathname === '/cluster-detail') {
118
- const params = parseRepoParams(url);
119
- const clusterIdValue = url.searchParams.get('clusterId');
120
- if (!clusterIdValue) {
121
- sendJson(res, 400, { error: 'Missing clusterId parameter' });
122
- return;
123
- }
124
- const clusterId = Number(clusterIdValue);
125
- if (!Number.isInteger(clusterId) || clusterId <= 0) {
126
- sendJson(res, 400, { error: 'Invalid clusterId parameter' });
127
- return;
128
- }
129
- const memberLimitValue = url.searchParams.get('memberLimit');
130
- const bodyCharsValue = url.searchParams.get('bodyChars');
131
- sendJson(
132
- res,
133
- 200,
134
- service.getClusterDetailDump({
135
- ...params,
136
- clusterId,
137
- memberLimit: memberLimitValue ? Number(memberLimitValue) : undefined,
138
- bodyChars: bodyCharsValue ? Number(bodyCharsValue) : undefined,
139
- }),
140
- );
141
- return;
142
- }
143
-
144
- if (req.method === 'POST' && url.pathname === '/actions/rerun') {
145
- const body = actionRequestSchema.parse(await readBody(req));
146
- sendJson(res, 200, await service.rerunAction(body));
147
- return;
148
- }
149
-
150
- if (req.method === 'POST' && url.pathname === '/actions/refresh') {
151
- const body = refreshRequestSchema.parse(await readBody(req));
152
- sendJson(res, 200, await service.refreshRepository(body));
153
- return;
154
- }
155
-
156
- sendJson(res, 404, { error: 'Not found' });
157
- } catch (error) {
158
- const message = error instanceof Error ? error.message : String(error);
159
- sendJson(res, isBadRequestError(error, message) ? 400 : 500, { error: message });
160
- }
161
- });
162
- }
163
-
164
- function isBadRequestError(error: unknown, message: string): boolean {
165
- return (
166
- error instanceof SyntaxError ||
167
- error instanceof ZodError ||
168
- message.startsWith('Missing ') ||
169
- message.startsWith('Invalid ')
170
- );
171
- }
@@ -1,18 +0,0 @@
1
- import test from 'node:test';
2
- import assert from 'node:assert/strict';
3
-
4
- import { buildClusters } from './build.js';
5
-
6
- test('buildClusters groups connected components', () => {
7
- const clusters = buildClusters(
8
- [
9
- { threadId: 1, number: 10, title: 'a' },
10
- { threadId: 2, number: 11, title: 'b' },
11
- { threadId: 3, number: 12, title: 'c' },
12
- ],
13
- [{ leftThreadId: 1, rightThreadId: 2, score: 0.9 }],
14
- );
15
-
16
- assert.equal(clusters.length, 2);
17
- assert.deepEqual(clusters[0]?.members, [1, 2]);
18
- });
@@ -1,74 +0,0 @@
1
- export type SimilarityEdge = {
2
- leftThreadId: number;
3
- rightThreadId: number;
4
- score: number;
5
- };
6
-
7
- type Node = {
8
- threadId: number;
9
- number: number;
10
- title: string;
11
- };
12
-
13
- class UnionFind {
14
- private readonly parent = new Map<number, number>();
15
-
16
- add(value: number): void {
17
- if (!this.parent.has(value)) this.parent.set(value, value);
18
- }
19
-
20
- find(value: number): number {
21
- const parent = this.parent.get(value);
22
- if (parent === undefined) {
23
- this.parent.set(value, value);
24
- return value;
25
- }
26
- if (parent === value) return value;
27
- const root = this.find(parent);
28
- this.parent.set(value, root);
29
- return root;
30
- }
31
-
32
- union(left: number, right: number): void {
33
- const leftRoot = this.find(left);
34
- const rightRoot = this.find(right);
35
- if (leftRoot !== rightRoot) {
36
- this.parent.set(rightRoot, leftRoot);
37
- }
38
- }
39
- }
40
-
41
- export function buildClusters(nodes: Node[], edges: SimilarityEdge[]): Array<{ representativeThreadId: number; members: number[] }> {
42
- const uf = new UnionFind();
43
- for (const node of nodes) uf.add(node.threadId);
44
- for (const edge of edges) uf.union(edge.leftThreadId, edge.rightThreadId);
45
-
46
- const byRoot = new Map<number, number[]>();
47
- for (const node of nodes) {
48
- const root = uf.find(node.threadId);
49
- const list = byRoot.get(root) ?? [];
50
- list.push(node.threadId);
51
- byRoot.set(root, list);
52
- }
53
-
54
- const edgeCounts = new Map<number, number>();
55
- for (const edge of edges) {
56
- edgeCounts.set(edge.leftThreadId, (edgeCounts.get(edge.leftThreadId) ?? 0) + 1);
57
- edgeCounts.set(edge.rightThreadId, (edgeCounts.get(edge.rightThreadId) ?? 0) + 1);
58
- }
59
-
60
- const nodesById = new Map(nodes.map((node) => [node.threadId, node]));
61
- return Array.from(byRoot.values())
62
- .map((members) => {
63
- const representative = [...members].sort((leftId, rightId) => {
64
- const left = nodesById.get(leftId);
65
- const right = nodesById.get(rightId);
66
- const edgeDelta = (edgeCounts.get(rightId) ?? 0) - (edgeCounts.get(leftId) ?? 0);
67
- if (edgeDelta !== 0) return edgeDelta;
68
- if (!left || !right) return leftId - rightId;
69
- return left.number - right.number;
70
- })[0];
71
- return { representativeThreadId: representative, members: members.sort((left, right) => left - right) };
72
- })
73
- .sort((left, right) => right.members.length - left.members.length);
74
- }
@@ -1,247 +0,0 @@
1
- import test from 'node:test';
2
- import assert from 'node:assert/strict';
3
- import fs from 'node:fs';
4
- import os from 'node:os';
5
- import path from 'node:path';
6
-
7
- import {
8
- getConfigPath,
9
- getTuiRepositoryPreference,
10
- isLikelyGitHubToken,
11
- isLikelyOpenAiApiKey,
12
- loadConfig,
13
- readPersistedConfig,
14
- writeTuiRepositoryPreference,
15
- writePersistedConfig,
16
- } from './config.js';
17
-
18
- function makeTempHome(): string {
19
- return fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-config-test-'));
20
- }
21
-
22
- function makeTestEnv(overrides: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
23
- return {
24
- ...process.env,
25
- XDG_CONFIG_HOME: undefined,
26
- APPDATA: undefined,
27
- ...overrides,
28
- };
29
- }
30
-
31
- test('loadConfig prefers persisted config and stores defaults under the user config directory', () => {
32
- const home = makeTempHome();
33
- const workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-workspace-'));
34
- fs.writeFileSync(path.join(workspace, 'pnpm-workspace.yaml'), 'packages:\n - "packages/*"\n');
35
- const env = {
36
- ...makeTestEnv(),
37
- HOME: home,
38
- };
39
-
40
- writePersistedConfig(
41
- {
42
- githubToken: 'ghp_testtoken1234567890',
43
- openaiApiKey: 'sk-proj-testkey1234567890',
44
- apiPort: 6123,
45
- embedConcurrency: 12,
46
- },
47
- { env },
48
- );
49
-
50
- const config = loadConfig({ cwd: workspace, env });
51
- assert.equal(config.configPath, path.join(home, '.config', 'ghcrawl', 'config.json'));
52
- assert.equal(config.configFileExists, true);
53
- assert.equal(config.apiPort, 6123);
54
- assert.equal(config.embedConcurrency, 12);
55
- assert.equal(config.githubTokenSource, 'config');
56
- assert.equal(config.openaiApiKeySource, 'config');
57
- assert.equal(config.dbPath, path.join(home, '.config', 'ghcrawl', 'ghcrawl.db'));
58
- });
59
-
60
- test('loadConfig lets environment variables override persisted config', () => {
61
- const home = makeTempHome();
62
- const env = {
63
- ...makeTestEnv(),
64
- HOME: home,
65
- GITHUB_TOKEN: 'ghp_override1234567890',
66
- GHCRAWL_API_PORT: '7001',
67
- };
68
-
69
- writePersistedConfig(
70
- {
71
- githubToken: 'ghp_stored1234567890',
72
- openaiApiKey: 'sk-proj-stored1234567890',
73
- apiPort: 6123,
74
- },
75
- { env },
76
- );
77
-
78
- const config = loadConfig({ cwd: process.cwd(), env });
79
- assert.equal(config.githubToken, 'ghp_override1234567890');
80
- assert.equal(config.githubTokenSource, 'env');
81
- assert.equal(config.apiPort, 7001);
82
- });
83
-
84
- test('loadConfig falls back to repo .env.local when no persisted config exists', () => {
85
- const workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-workspace-'));
86
- fs.writeFileSync(path.join(workspace, 'pnpm-workspace.yaml'), 'packages:\n - "packages/*"\n');
87
- fs.writeFileSync(
88
- path.join(workspace, '.env.local'),
89
- ['GITHUB_TOKEN=ghp_dotenv1234567890', 'OPENAI_API_KEY=sk-proj-dotenv1234567890', 'GHCRAWL_API_PORT=6111'].join('\n'),
90
- );
91
-
92
- const config = loadConfig({
93
- cwd: workspace,
94
- env: {
95
- ...makeTestEnv(),
96
- HOME: makeTempHome(),
97
- },
98
- });
99
-
100
- assert.equal(config.githubTokenSource, 'dotenv');
101
- assert.equal(config.openaiApiKeySource, 'dotenv');
102
- assert.equal(config.apiPort, 6111);
103
- });
104
-
105
- test('loadConfig reuses an existing workspace database when no explicit db path is configured', () => {
106
- const workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-workspace-'));
107
- fs.writeFileSync(path.join(workspace, 'pnpm-workspace.yaml'), 'packages:\n - "packages/*"\n');
108
- fs.mkdirSync(path.join(workspace, 'data'), { recursive: true });
109
- fs.writeFileSync(path.join(workspace, 'data', 'ghcrawl.db'), '');
110
-
111
- const config = loadConfig({
112
- cwd: workspace,
113
- env: {
114
- ...makeTestEnv(),
115
- HOME: makeTempHome(),
116
- },
117
- });
118
-
119
- assert.equal(config.dbPath, path.join(workspace, 'data', 'ghcrawl.db'));
120
- });
121
-
122
- test('writePersistedConfig creates a readable config file', () => {
123
- const home = makeTempHome();
124
- const env = {
125
- ...makeTestEnv(),
126
- HOME: home,
127
- };
128
-
129
- const { configPath } = writePersistedConfig(
130
- {
131
- githubToken: 'ghp_testtoken1234567890',
132
- openaiApiKey: 'sk-proj-testkey1234567890',
133
- },
134
- { env },
135
- );
136
-
137
- assert.equal(configPath, getConfigPath({ env }));
138
- assert.equal(fs.existsSync(configPath), true);
139
-
140
- const persisted = readPersistedConfig({ env });
141
- assert.equal(persisted.data.githubToken, 'ghp_testtoken1234567890');
142
- assert.equal(persisted.data.openaiApiKey, 'sk-proj-testkey1234567890');
143
- });
144
-
145
- test('loadConfig restores op metadata and repository tui preferences', () => {
146
- const home = makeTempHome();
147
- const workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-workspace-'));
148
- fs.writeFileSync(path.join(workspace, 'pnpm-workspace.yaml'), 'packages:\n - "packages/*"\n');
149
- const env = {
150
- ...makeTestEnv(),
151
- HOME: home,
152
- };
153
-
154
- writePersistedConfig(
155
- {
156
- secretProvider: 'op',
157
- opVaultName: 'PwrDrvr LLC',
158
- opItemName: 'ghcrawl',
159
- tuiPreferences: {
160
- 'openclaw/openclaw': {
161
- minClusterSize: 1,
162
- sortMode: 'size',
163
- },
164
- },
165
- },
166
- { env },
167
- );
168
-
169
- const config = loadConfig({ cwd: workspace, env });
170
- assert.equal(config.secretProvider, 'op');
171
- assert.equal(config.opVaultName, 'PwrDrvr LLC');
172
- assert.equal(config.opItemName, 'ghcrawl');
173
- assert.deepEqual(getTuiRepositoryPreference(config, 'openclaw', 'openclaw'), {
174
- minClusterSize: 1,
175
- sortMode: 'size',
176
- });
177
- });
178
-
179
- test('writeTuiRepositoryPreference persists sort and min cluster size by repository', () => {
180
- const home = makeTempHome();
181
- const workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-workspace-'));
182
- fs.writeFileSync(path.join(workspace, 'pnpm-workspace.yaml'), 'packages:\n - "packages/*"\n');
183
- const env = {
184
- ...makeTestEnv(),
185
- HOME: home,
186
- };
187
-
188
- const config = loadConfig({ cwd: workspace, env });
189
- writeTuiRepositoryPreference(config, {
190
- owner: 'openclaw',
191
- repo: 'openclaw',
192
- minClusterSize: 1,
193
- sortMode: 'size',
194
- });
195
-
196
- const reloaded = loadConfig({ cwd: workspace, env });
197
- assert.deepEqual(getTuiRepositoryPreference(reloaded, 'openclaw', 'openclaw'), {
198
- minClusterSize: 1,
199
- sortMode: 'size',
200
- });
201
- assert.deepEqual(getTuiRepositoryPreference(reloaded, 'other', 'repo'), {
202
- minClusterSize: 10,
203
- sortMode: 'recent',
204
- });
205
- });
206
-
207
- test('getConfigPath uses APPDATA on Windows', () => {
208
- const configPath = getConfigPath({
209
- env: {
210
- ...makeTestEnv(),
211
- APPDATA: 'C:\\Users\\example\\AppData\\Roaming',
212
- },
213
- platform: 'win32',
214
- });
215
-
216
- assert.equal(configPath, path.win32.resolve('C:\\Users\\example\\AppData\\Roaming', 'ghcrawl', 'config.json'));
217
- });
218
-
219
- test('loadConfig rejects invalid port', () => {
220
- const home = makeTempHome();
221
- assert.throws(() =>
222
- loadConfig({
223
- cwd: process.cwd(),
224
- env: { ...makeTestEnv(), HOME: home, GHCRAWL_API_PORT: 'abc' },
225
- }),
226
- );
227
- });
228
-
229
- test('loadConfig rejects invalid embed queue settings', () => {
230
- const home = makeTempHome();
231
- assert.throws(() =>
232
- loadConfig({
233
- cwd: process.cwd(),
234
- env: { ...makeTestEnv(), HOME: home, GHCRAWL_EMBED_CONCURRENCY: '0' },
235
- }),
236
- );
237
- });
238
-
239
- test('token format helpers match expected API key shapes', () => {
240
- assert.equal(isLikelyGitHubToken('ghp_testtoken1234567890'), true);
241
- assert.equal(isLikelyGitHubToken('github_pat_1234567890abcdefghijklmnopqrstuvwxyz'), true);
242
- assert.equal(isLikelyGitHubToken('not-a-token'), false);
243
-
244
- assert.equal(isLikelyOpenAiApiKey('sk-proj-testkey1234567890'), true);
245
- assert.equal(isLikelyOpenAiApiKey('sk-testkey1234567890'), true);
246
- assert.equal(isLikelyOpenAiApiKey('openai-key'), false);
247
- });