@ghcrawl/api-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/package.json +54 -0
- package/src/api/server.test.ts +296 -0
- package/src/api/server.ts +171 -0
- package/src/cluster/build.test.ts +18 -0
- package/src/cluster/build.ts +74 -0
- package/src/config.test.ts +247 -0
- package/src/config.ts +421 -0
- package/src/db/migrate.test.ts +30 -0
- package/src/db/migrate.ts +235 -0
- package/src/db/sqlite.ts +14 -0
- package/src/documents/normalize.test.ts +25 -0
- package/src/documents/normalize.ts +52 -0
- package/src/github/client.ts +241 -0
- package/src/index.ts +6 -0
- package/src/openai/provider.ts +141 -0
- package/src/search/exact.test.ts +22 -0
- package/src/search/exact.ts +28 -0
- package/src/service.test.ts +2036 -0
- package/src/service.ts +2497 -0
- package/src/types/better-sqlite3.d.ts +1 -0
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import test from 'node:test';
|
|
2
|
+
import assert from 'node:assert/strict';
|
|
3
|
+
import fs from 'node:fs';
|
|
4
|
+
import os from 'node:os';
|
|
5
|
+
import path from 'node:path';
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
getConfigPath,
|
|
9
|
+
getTuiRepositoryPreference,
|
|
10
|
+
isLikelyGitHubToken,
|
|
11
|
+
isLikelyOpenAiApiKey,
|
|
12
|
+
loadConfig,
|
|
13
|
+
readPersistedConfig,
|
|
14
|
+
writeTuiRepositoryPreference,
|
|
15
|
+
writePersistedConfig,
|
|
16
|
+
} from './config.js';
|
|
17
|
+
|
|
18
|
+
function makeTempHome(): string {
|
|
19
|
+
return fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-config-test-'));
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function makeTestEnv(overrides: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv {
|
|
23
|
+
return {
|
|
24
|
+
...process.env,
|
|
25
|
+
XDG_CONFIG_HOME: undefined,
|
|
26
|
+
APPDATA: undefined,
|
|
27
|
+
...overrides,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
test('loadConfig prefers persisted config and stores defaults under the user config directory', () => {
|
|
32
|
+
const home = makeTempHome();
|
|
33
|
+
const workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-workspace-'));
|
|
34
|
+
fs.writeFileSync(path.join(workspace, 'pnpm-workspace.yaml'), 'packages:\n - "packages/*"\n');
|
|
35
|
+
const env = {
|
|
36
|
+
...makeTestEnv(),
|
|
37
|
+
HOME: home,
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
writePersistedConfig(
|
|
41
|
+
{
|
|
42
|
+
githubToken: 'ghp_testtoken1234567890',
|
|
43
|
+
openaiApiKey: 'sk-proj-testkey1234567890',
|
|
44
|
+
apiPort: 6123,
|
|
45
|
+
embedConcurrency: 12,
|
|
46
|
+
},
|
|
47
|
+
{ env },
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
const config = loadConfig({ cwd: workspace, env });
|
|
51
|
+
assert.equal(config.configPath, path.join(home, '.config', 'ghcrawl', 'config.json'));
|
|
52
|
+
assert.equal(config.configFileExists, true);
|
|
53
|
+
assert.equal(config.apiPort, 6123);
|
|
54
|
+
assert.equal(config.embedConcurrency, 12);
|
|
55
|
+
assert.equal(config.githubTokenSource, 'config');
|
|
56
|
+
assert.equal(config.openaiApiKeySource, 'config');
|
|
57
|
+
assert.equal(config.dbPath, path.join(home, '.config', 'ghcrawl', 'ghcrawl.db'));
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
test('loadConfig lets environment variables override persisted config', () => {
|
|
61
|
+
const home = makeTempHome();
|
|
62
|
+
const env = {
|
|
63
|
+
...makeTestEnv(),
|
|
64
|
+
HOME: home,
|
|
65
|
+
GITHUB_TOKEN: 'ghp_override1234567890',
|
|
66
|
+
GHCRAWL_API_PORT: '7001',
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
writePersistedConfig(
|
|
70
|
+
{
|
|
71
|
+
githubToken: 'ghp_stored1234567890',
|
|
72
|
+
openaiApiKey: 'sk-proj-stored1234567890',
|
|
73
|
+
apiPort: 6123,
|
|
74
|
+
},
|
|
75
|
+
{ env },
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
const config = loadConfig({ cwd: process.cwd(), env });
|
|
79
|
+
assert.equal(config.githubToken, 'ghp_override1234567890');
|
|
80
|
+
assert.equal(config.githubTokenSource, 'env');
|
|
81
|
+
assert.equal(config.apiPort, 7001);
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
test('loadConfig falls back to repo .env.local when no persisted config exists', () => {
|
|
85
|
+
const workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-workspace-'));
|
|
86
|
+
fs.writeFileSync(path.join(workspace, 'pnpm-workspace.yaml'), 'packages:\n - "packages/*"\n');
|
|
87
|
+
fs.writeFileSync(
|
|
88
|
+
path.join(workspace, '.env.local'),
|
|
89
|
+
['GITHUB_TOKEN=ghp_dotenv1234567890', 'OPENAI_API_KEY=sk-proj-dotenv1234567890', 'GHCRAWL_API_PORT=6111'].join('\n'),
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
const config = loadConfig({
|
|
93
|
+
cwd: workspace,
|
|
94
|
+
env: {
|
|
95
|
+
...makeTestEnv(),
|
|
96
|
+
HOME: makeTempHome(),
|
|
97
|
+
},
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
assert.equal(config.githubTokenSource, 'dotenv');
|
|
101
|
+
assert.equal(config.openaiApiKeySource, 'dotenv');
|
|
102
|
+
assert.equal(config.apiPort, 6111);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
test('loadConfig reuses an existing workspace database when no explicit db path is configured', () => {
|
|
106
|
+
const workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-workspace-'));
|
|
107
|
+
fs.writeFileSync(path.join(workspace, 'pnpm-workspace.yaml'), 'packages:\n - "packages/*"\n');
|
|
108
|
+
fs.mkdirSync(path.join(workspace, 'data'), { recursive: true });
|
|
109
|
+
fs.writeFileSync(path.join(workspace, 'data', 'ghcrawl.db'), '');
|
|
110
|
+
|
|
111
|
+
const config = loadConfig({
|
|
112
|
+
cwd: workspace,
|
|
113
|
+
env: {
|
|
114
|
+
...makeTestEnv(),
|
|
115
|
+
HOME: makeTempHome(),
|
|
116
|
+
},
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
assert.equal(config.dbPath, path.join(workspace, 'data', 'ghcrawl.db'));
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
test('writePersistedConfig creates a readable config file', () => {
|
|
123
|
+
const home = makeTempHome();
|
|
124
|
+
const env = {
|
|
125
|
+
...makeTestEnv(),
|
|
126
|
+
HOME: home,
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
const { configPath } = writePersistedConfig(
|
|
130
|
+
{
|
|
131
|
+
githubToken: 'ghp_testtoken1234567890',
|
|
132
|
+
openaiApiKey: 'sk-proj-testkey1234567890',
|
|
133
|
+
},
|
|
134
|
+
{ env },
|
|
135
|
+
);
|
|
136
|
+
|
|
137
|
+
assert.equal(configPath, getConfigPath({ env }));
|
|
138
|
+
assert.equal(fs.existsSync(configPath), true);
|
|
139
|
+
|
|
140
|
+
const persisted = readPersistedConfig({ env });
|
|
141
|
+
assert.equal(persisted.data.githubToken, 'ghp_testtoken1234567890');
|
|
142
|
+
assert.equal(persisted.data.openaiApiKey, 'sk-proj-testkey1234567890');
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
test('loadConfig restores op metadata and repository tui preferences', () => {
|
|
146
|
+
const home = makeTempHome();
|
|
147
|
+
const workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-workspace-'));
|
|
148
|
+
fs.writeFileSync(path.join(workspace, 'pnpm-workspace.yaml'), 'packages:\n - "packages/*"\n');
|
|
149
|
+
const env = {
|
|
150
|
+
...makeTestEnv(),
|
|
151
|
+
HOME: home,
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
writePersistedConfig(
|
|
155
|
+
{
|
|
156
|
+
secretProvider: 'op',
|
|
157
|
+
opVaultName: 'PwrDrvr LLC',
|
|
158
|
+
opItemName: 'ghcrawl',
|
|
159
|
+
tuiPreferences: {
|
|
160
|
+
'openclaw/openclaw': {
|
|
161
|
+
minClusterSize: 1,
|
|
162
|
+
sortMode: 'size',
|
|
163
|
+
},
|
|
164
|
+
},
|
|
165
|
+
},
|
|
166
|
+
{ env },
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
const config = loadConfig({ cwd: workspace, env });
|
|
170
|
+
assert.equal(config.secretProvider, 'op');
|
|
171
|
+
assert.equal(config.opVaultName, 'PwrDrvr LLC');
|
|
172
|
+
assert.equal(config.opItemName, 'ghcrawl');
|
|
173
|
+
assert.deepEqual(getTuiRepositoryPreference(config, 'openclaw', 'openclaw'), {
|
|
174
|
+
minClusterSize: 1,
|
|
175
|
+
sortMode: 'size',
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
test('writeTuiRepositoryPreference persists sort and min cluster size by repository', () => {
|
|
180
|
+
const home = makeTempHome();
|
|
181
|
+
const workspace = fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-workspace-'));
|
|
182
|
+
fs.writeFileSync(path.join(workspace, 'pnpm-workspace.yaml'), 'packages:\n - "packages/*"\n');
|
|
183
|
+
const env = {
|
|
184
|
+
...makeTestEnv(),
|
|
185
|
+
HOME: home,
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
const config = loadConfig({ cwd: workspace, env });
|
|
189
|
+
writeTuiRepositoryPreference(config, {
|
|
190
|
+
owner: 'openclaw',
|
|
191
|
+
repo: 'openclaw',
|
|
192
|
+
minClusterSize: 1,
|
|
193
|
+
sortMode: 'size',
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
const reloaded = loadConfig({ cwd: workspace, env });
|
|
197
|
+
assert.deepEqual(getTuiRepositoryPreference(reloaded, 'openclaw', 'openclaw'), {
|
|
198
|
+
minClusterSize: 1,
|
|
199
|
+
sortMode: 'size',
|
|
200
|
+
});
|
|
201
|
+
assert.deepEqual(getTuiRepositoryPreference(reloaded, 'other', 'repo'), {
|
|
202
|
+
minClusterSize: 10,
|
|
203
|
+
sortMode: 'recent',
|
|
204
|
+
});
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
test('getConfigPath uses APPDATA on Windows', () => {
|
|
208
|
+
const configPath = getConfigPath({
|
|
209
|
+
env: {
|
|
210
|
+
...makeTestEnv(),
|
|
211
|
+
APPDATA: 'C:\\Users\\example\\AppData\\Roaming',
|
|
212
|
+
},
|
|
213
|
+
platform: 'win32',
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
assert.equal(configPath, path.win32.resolve('C:\\Users\\example\\AppData\\Roaming', 'ghcrawl', 'config.json'));
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
test('loadConfig rejects invalid port', () => {
|
|
220
|
+
const home = makeTempHome();
|
|
221
|
+
assert.throws(() =>
|
|
222
|
+
loadConfig({
|
|
223
|
+
cwd: process.cwd(),
|
|
224
|
+
env: { ...makeTestEnv(), HOME: home, GHCRAWL_API_PORT: 'abc' },
|
|
225
|
+
}),
|
|
226
|
+
);
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
test('loadConfig rejects invalid embed queue settings', () => {
|
|
230
|
+
const home = makeTempHome();
|
|
231
|
+
assert.throws(() =>
|
|
232
|
+
loadConfig({
|
|
233
|
+
cwd: process.cwd(),
|
|
234
|
+
env: { ...makeTestEnv(), HOME: home, GHCRAWL_EMBED_CONCURRENCY: '0' },
|
|
235
|
+
}),
|
|
236
|
+
);
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
test('token format helpers match expected API key shapes', () => {
|
|
240
|
+
assert.equal(isLikelyGitHubToken('ghp_testtoken1234567890'), true);
|
|
241
|
+
assert.equal(isLikelyGitHubToken('github_pat_1234567890abcdefghijklmnopqrstuvwxyz'), true);
|
|
242
|
+
assert.equal(isLikelyGitHubToken('not-a-token'), false);
|
|
243
|
+
|
|
244
|
+
assert.equal(isLikelyOpenAiApiKey('sk-proj-testkey1234567890'), true);
|
|
245
|
+
assert.equal(isLikelyOpenAiApiKey('sk-testkey1234567890'), true);
|
|
246
|
+
assert.equal(isLikelyOpenAiApiKey('openai-key'), false);
|
|
247
|
+
});
|
package/src/config.ts
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import os from 'node:os';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
|
|
5
|
+
import dotenv from 'dotenv';
|
|
6
|
+
|
|
7
|
+
export type ConfigValueSource = 'env' | 'config' | 'dotenv' | 'default' | 'none';
|
|
8
|
+
export type SecretProvider = 'plaintext' | 'op';
|
|
9
|
+
export type TuiSortPreference = 'recent' | 'size';
|
|
10
|
+
export type TuiMinClusterSize = 0 | 1 | 10 | 20 | 50;
|
|
11
|
+
|
|
12
|
+
export type TuiRepositoryPreference = {
|
|
13
|
+
minClusterSize: TuiMinClusterSize;
|
|
14
|
+
sortMode: TuiSortPreference;
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
export type PersistedGitcrawlConfig = {
|
|
18
|
+
githubToken?: string;
|
|
19
|
+
openaiApiKey?: string;
|
|
20
|
+
secretProvider?: SecretProvider;
|
|
21
|
+
opVaultName?: string;
|
|
22
|
+
opItemName?: string;
|
|
23
|
+
dbPath?: string;
|
|
24
|
+
apiPort?: number;
|
|
25
|
+
summaryModel?: string;
|
|
26
|
+
embedModel?: string;
|
|
27
|
+
embedBatchSize?: number;
|
|
28
|
+
embedConcurrency?: number;
|
|
29
|
+
embedMaxUnread?: number;
|
|
30
|
+
openSearchUrl?: string;
|
|
31
|
+
openSearchIndex?: string;
|
|
32
|
+
tuiPreferences?: Record<string, TuiRepositoryPreference>;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
export type GitcrawlConfig = {
|
|
36
|
+
workspaceRoot: string;
|
|
37
|
+
configDir: string;
|
|
38
|
+
configPath: string;
|
|
39
|
+
configFileExists: boolean;
|
|
40
|
+
dbPath: string;
|
|
41
|
+
dbPathSource: ConfigValueSource;
|
|
42
|
+
apiPort: number;
|
|
43
|
+
githubToken?: string;
|
|
44
|
+
githubTokenSource: ConfigValueSource;
|
|
45
|
+
openaiApiKey?: string;
|
|
46
|
+
openaiApiKeySource: ConfigValueSource;
|
|
47
|
+
secretProvider: SecretProvider;
|
|
48
|
+
opVaultName?: string;
|
|
49
|
+
opItemName?: string;
|
|
50
|
+
summaryModel: string;
|
|
51
|
+
embedModel: string;
|
|
52
|
+
embedBatchSize: number;
|
|
53
|
+
embedConcurrency: number;
|
|
54
|
+
embedMaxUnread: number;
|
|
55
|
+
openSearchUrl?: string;
|
|
56
|
+
openSearchIndex: string;
|
|
57
|
+
tuiPreferences: Record<string, TuiRepositoryPreference>;
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
type LoadedStoredConfig = {
|
|
61
|
+
configDir: string;
|
|
62
|
+
configPath: string;
|
|
63
|
+
exists: boolean;
|
|
64
|
+
data: PersistedGitcrawlConfig;
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
type LoadConfigOptions = {
|
|
68
|
+
cwd?: string;
|
|
69
|
+
env?: NodeJS.ProcessEnv;
|
|
70
|
+
platform?: NodeJS.Platform;
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
type LayeredValue<T> = {
|
|
74
|
+
source: ConfigValueSource;
|
|
75
|
+
value: T | undefined;
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
function pathModuleForPlatform(platform: NodeJS.Platform) {
|
|
79
|
+
return platform === 'win32' ? path.win32 : path;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function findWorkspaceRoot(start: string): string {
|
|
83
|
+
let current = path.resolve(start);
|
|
84
|
+
while (true) {
|
|
85
|
+
if (fs.existsSync(path.join(current, 'pnpm-workspace.yaml'))) {
|
|
86
|
+
return current;
|
|
87
|
+
}
|
|
88
|
+
const parent = path.dirname(current);
|
|
89
|
+
if (parent === current) return path.resolve(start);
|
|
90
|
+
current = parent;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function resolveHomeDirectory(env: NodeJS.ProcessEnv): string {
|
|
95
|
+
const home = env.HOME ?? env.USERPROFILE ?? os.homedir();
|
|
96
|
+
return path.resolve(home);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export function getConfigDir(options: LoadConfigOptions = {}): string {
|
|
100
|
+
const env = options.env ?? process.env;
|
|
101
|
+
const platform = options.platform ?? process.platform;
|
|
102
|
+
const pathModule = pathModuleForPlatform(platform);
|
|
103
|
+
if (env.XDG_CONFIG_HOME) {
|
|
104
|
+
return pathModule.resolve(env.XDG_CONFIG_HOME, 'ghcrawl');
|
|
105
|
+
}
|
|
106
|
+
if (platform === 'win32' && env.APPDATA) {
|
|
107
|
+
return pathModule.resolve(env.APPDATA, 'ghcrawl');
|
|
108
|
+
}
|
|
109
|
+
return pathModule.join(resolveHomeDirectory(env), '.config', 'ghcrawl');
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export function getConfigPath(options: LoadConfigOptions = {}): string {
|
|
113
|
+
const platform = options.platform ?? process.platform;
|
|
114
|
+
const pathModule = pathModuleForPlatform(platform);
|
|
115
|
+
return pathModule.join(getConfigDir(options), 'config.json');
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function readDotenvFile(workspaceRoot: string): Record<string, string> {
|
|
119
|
+
const dotenvPath = path.join(workspaceRoot, '.env.local');
|
|
120
|
+
if (!fs.existsSync(dotenvPath)) {
|
|
121
|
+
return {};
|
|
122
|
+
}
|
|
123
|
+
return dotenv.parse(fs.readFileSync(dotenvPath, 'utf8'));
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function pickDefined<T>(...values: Array<LayeredValue<T>>): LayeredValue<T> {
|
|
127
|
+
for (const entry of values) {
|
|
128
|
+
if (entry.value !== undefined && entry.value !== null) {
|
|
129
|
+
return entry;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return { source: 'none', value: undefined };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function getString(value: unknown): string | undefined {
|
|
136
|
+
return typeof value === 'string' && value.trim().length > 0 ? value : undefined;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function getEnvString(env: NodeJS.ProcessEnv, primary: string, legacy?: string): string | undefined {
|
|
140
|
+
return getString(env[primary]) ?? (legacy ? getString(env[legacy]) : undefined);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function getDotenvString(values: Record<string, string>, primary: string, legacy?: string): string | undefined {
|
|
144
|
+
return getString(values[primary]) ?? (legacy ? getString(values[legacy]) : undefined);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function getNumber(value: unknown): number | undefined {
|
|
148
|
+
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function getSecretProvider(value: unknown): SecretProvider | undefined {
|
|
152
|
+
return value === 'plaintext' || value === 'op' ? value : undefined;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function getTuiSortPreference(value: unknown): TuiSortPreference | undefined {
|
|
156
|
+
return value === 'recent' || value === 'size' ? value : undefined;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function getTuiMinClusterSize(value: unknown): TuiMinClusterSize | undefined {
|
|
160
|
+
return value === 0 || value === 1 || value === 10 || value === 20 || value === 50 ? value : undefined;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function getTuiPreferences(value: unknown): Record<string, TuiRepositoryPreference> | undefined {
|
|
164
|
+
if (!value || typeof value !== 'object') {
|
|
165
|
+
return undefined;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const preferences: Record<string, TuiRepositoryPreference> = {};
|
|
169
|
+
for (const [fullName, preference] of Object.entries(value as Record<string, unknown>)) {
|
|
170
|
+
if (!preference || typeof preference !== 'object') {
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
const record = preference as Record<string, unknown>;
|
|
174
|
+
const minClusterSize = getTuiMinClusterSize(record.minClusterSize);
|
|
175
|
+
const sortMode = getTuiSortPreference(record.sortMode);
|
|
176
|
+
if (minClusterSize === undefined || sortMode === undefined) {
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
preferences[fullName] = { minClusterSize, sortMode };
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return preferences;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
export function readPersistedConfig(options: LoadConfigOptions = {}): LoadedStoredConfig {
|
|
186
|
+
const configDir = getConfigDir(options);
|
|
187
|
+
const configPath = getConfigPath(options);
|
|
188
|
+
if (!fs.existsSync(configPath)) {
|
|
189
|
+
return { configDir, configPath, exists: false, data: {} };
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const raw = JSON.parse(fs.readFileSync(configPath, 'utf8')) as Record<string, unknown>;
|
|
193
|
+
return {
|
|
194
|
+
configDir,
|
|
195
|
+
configPath,
|
|
196
|
+
exists: true,
|
|
197
|
+
data: {
|
|
198
|
+
githubToken: getString(raw.githubToken),
|
|
199
|
+
openaiApiKey: getString(raw.openaiApiKey),
|
|
200
|
+
secretProvider: getSecretProvider(raw.secretProvider),
|
|
201
|
+
opVaultName: getString(raw.opVaultName),
|
|
202
|
+
opItemName: getString(raw.opItemName),
|
|
203
|
+
dbPath: getString(raw.dbPath),
|
|
204
|
+
apiPort: getNumber(raw.apiPort),
|
|
205
|
+
summaryModel: getString(raw.summaryModel),
|
|
206
|
+
embedModel: getString(raw.embedModel),
|
|
207
|
+
embedBatchSize: getNumber(raw.embedBatchSize),
|
|
208
|
+
embedConcurrency: getNumber(raw.embedConcurrency),
|
|
209
|
+
embedMaxUnread: getNumber(raw.embedMaxUnread),
|
|
210
|
+
openSearchUrl: getString(raw.openSearchUrl),
|
|
211
|
+
openSearchIndex: getString(raw.openSearchIndex),
|
|
212
|
+
tuiPreferences: getTuiPreferences(raw.tuiPreferences),
|
|
213
|
+
},
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
export function writePersistedConfig(values: PersistedGitcrawlConfig, options: LoadConfigOptions = {}): { configPath: string } {
|
|
218
|
+
const current = readPersistedConfig(options);
|
|
219
|
+
fs.mkdirSync(current.configDir, { recursive: true });
|
|
220
|
+
const next = {
|
|
221
|
+
...current.data,
|
|
222
|
+
...values,
|
|
223
|
+
};
|
|
224
|
+
fs.writeFileSync(current.configPath, `${JSON.stringify(next, null, 2)}\n`, { mode: 0o600 });
|
|
225
|
+
return { configPath: current.configPath };
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function resolveConfiguredPath(configDir: string, value: string): string {
|
|
229
|
+
return path.isAbsolute(value) ? value : path.resolve(configDir, value);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function getWorkspaceDbPath(workspaceRoot: string): string | null {
|
|
233
|
+
const workspacePath = path.join(workspaceRoot, 'data', 'ghcrawl.db');
|
|
234
|
+
return fs.existsSync(workspacePath) ? workspacePath : null;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
function parseIntegerSetting(name: string, raw: string): number {
|
|
238
|
+
const parsed = Number(raw);
|
|
239
|
+
if (!Number.isSafeInteger(parsed) || parsed <= 0) {
|
|
240
|
+
throw new Error(`Invalid ${name}: ${raw}`);
|
|
241
|
+
}
|
|
242
|
+
return parsed;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
export function isLikelyGitHubToken(value: string): boolean {
|
|
246
|
+
return /^(gh[pousr]_[A-Za-z0-9_]+|github_pat_[A-Za-z0-9_]+)$/.test(value.trim());
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
export function isLikelyOpenAiApiKey(value: string): boolean {
|
|
250
|
+
return /^sk-[A-Za-z0-9._-]+$/.test(value.trim());
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
export function loadConfig(options: LoadConfigOptions = {}): GitcrawlConfig {
|
|
254
|
+
const cwd = options.cwd ?? process.cwd();
|
|
255
|
+
const env = options.env ?? process.env;
|
|
256
|
+
const platform = options.platform ?? process.platform;
|
|
257
|
+
const workspaceRoot = findWorkspaceRoot(cwd);
|
|
258
|
+
const stored = readPersistedConfig({ cwd, env, platform });
|
|
259
|
+
const dotenvValues = readDotenvFile(workspaceRoot);
|
|
260
|
+
|
|
261
|
+
const githubToken = pickDefined<string>(
|
|
262
|
+
{ source: 'env', value: getString(env.GITHUB_TOKEN) },
|
|
263
|
+
{ source: 'config', value: stored.data.githubToken },
|
|
264
|
+
{ source: 'dotenv', value: getString(dotenvValues.GITHUB_TOKEN) },
|
|
265
|
+
);
|
|
266
|
+
const openaiApiKey = pickDefined<string>(
|
|
267
|
+
{ source: 'env', value: getString(env.OPENAI_API_KEY) },
|
|
268
|
+
{ source: 'config', value: stored.data.openaiApiKey },
|
|
269
|
+
{ source: 'dotenv', value: getString(dotenvValues.OPENAI_API_KEY) },
|
|
270
|
+
);
|
|
271
|
+
const configuredDbPath = pickDefined<string>(
|
|
272
|
+
{ source: 'env', value: getEnvString(env, 'GHCRAWL_DB_PATH', 'GHCRAWL_DB_PATH') },
|
|
273
|
+
{ source: 'config', value: stored.data.dbPath },
|
|
274
|
+
{ source: 'dotenv', value: getDotenvString(dotenvValues, 'GHCRAWL_DB_PATH', 'GHCRAWL_DB_PATH') },
|
|
275
|
+
);
|
|
276
|
+
const workspaceDbPath = configuredDbPath.value === undefined ? getWorkspaceDbPath(workspaceRoot) : null;
|
|
277
|
+
const dbPathValue =
|
|
278
|
+
workspaceDbPath !== null
|
|
279
|
+
? { source: 'default' as const, value: workspaceDbPath }
|
|
280
|
+
: pickDefined<string>(configuredDbPath, { source: 'default', value: 'ghcrawl.db' });
|
|
281
|
+
const apiPortValue = pickDefined<string | number>(
|
|
282
|
+
{ source: 'env', value: getEnvString(env, 'GHCRAWL_API_PORT', 'GHCRAWL_API_PORT') },
|
|
283
|
+
{ source: 'config', value: stored.data.apiPort },
|
|
284
|
+
{ source: 'dotenv', value: getDotenvString(dotenvValues, 'GHCRAWL_API_PORT', 'GHCRAWL_API_PORT') },
|
|
285
|
+
{ source: 'default', value: '5179' },
|
|
286
|
+
);
|
|
287
|
+
const embedBatchSizeValue = pickDefined<string | number>(
|
|
288
|
+
{ source: 'env', value: getEnvString(env, 'GHCRAWL_EMBED_BATCH_SIZE', 'GHCRAWL_EMBED_BATCH_SIZE') },
|
|
289
|
+
{ source: 'config', value: stored.data.embedBatchSize },
|
|
290
|
+
{ source: 'dotenv', value: getDotenvString(dotenvValues, 'GHCRAWL_EMBED_BATCH_SIZE', 'GHCRAWL_EMBED_BATCH_SIZE') },
|
|
291
|
+
{ source: 'default', value: '8' },
|
|
292
|
+
);
|
|
293
|
+
const embedConcurrencyValue = pickDefined<string | number>(
|
|
294
|
+
{ source: 'env', value: getEnvString(env, 'GHCRAWL_EMBED_CONCURRENCY', 'GHCRAWL_EMBED_CONCURRENCY') },
|
|
295
|
+
{ source: 'config', value: stored.data.embedConcurrency },
|
|
296
|
+
{ source: 'dotenv', value: getDotenvString(dotenvValues, 'GHCRAWL_EMBED_CONCURRENCY', 'GHCRAWL_EMBED_CONCURRENCY') },
|
|
297
|
+
{ source: 'default', value: '10' },
|
|
298
|
+
);
|
|
299
|
+
const embedMaxUnreadValue = pickDefined<string | number>(
|
|
300
|
+
{ source: 'env', value: getEnvString(env, 'GHCRAWL_EMBED_MAX_UNREAD', 'GHCRAWL_EMBED_MAX_UNREAD') },
|
|
301
|
+
{ source: 'config', value: stored.data.embedMaxUnread },
|
|
302
|
+
{ source: 'dotenv', value: getDotenvString(dotenvValues, 'GHCRAWL_EMBED_MAX_UNREAD', 'GHCRAWL_EMBED_MAX_UNREAD') },
|
|
303
|
+
{ source: 'default', value: '20' },
|
|
304
|
+
);
|
|
305
|
+
const summaryModel = pickDefined<string>(
|
|
306
|
+
{ source: 'env', value: getEnvString(env, 'GHCRAWL_SUMMARY_MODEL', 'GHCRAWL_SUMMARY_MODEL') },
|
|
307
|
+
{ source: 'config', value: stored.data.summaryModel },
|
|
308
|
+
{ source: 'dotenv', value: getDotenvString(dotenvValues, 'GHCRAWL_SUMMARY_MODEL', 'GHCRAWL_SUMMARY_MODEL') },
|
|
309
|
+
{ source: 'default', value: 'gpt-5-mini' },
|
|
310
|
+
);
|
|
311
|
+
const embedModel = pickDefined<string>(
|
|
312
|
+
{ source: 'env', value: getEnvString(env, 'GHCRAWL_EMBED_MODEL', 'GHCRAWL_EMBED_MODEL') },
|
|
313
|
+
{ source: 'config', value: stored.data.embedModel },
|
|
314
|
+
{ source: 'dotenv', value: getDotenvString(dotenvValues, 'GHCRAWL_EMBED_MODEL', 'GHCRAWL_EMBED_MODEL') },
|
|
315
|
+
{ source: 'default', value: 'text-embedding-3-large' },
|
|
316
|
+
);
|
|
317
|
+
const openSearchUrl = pickDefined<string>(
|
|
318
|
+
{ source: 'env', value: getEnvString(env, 'GHCRAWL_OPENSEARCH_URL', 'GHCRAWL_OPENSEARCH_URL') },
|
|
319
|
+
{ source: 'config', value: stored.data.openSearchUrl },
|
|
320
|
+
{ source: 'dotenv', value: getDotenvString(dotenvValues, 'GHCRAWL_OPENSEARCH_URL', 'GHCRAWL_OPENSEARCH_URL') },
|
|
321
|
+
);
|
|
322
|
+
const openSearchIndex = pickDefined<string>(
|
|
323
|
+
{ source: 'env', value: getEnvString(env, 'GHCRAWL_OPENSEARCH_INDEX', 'GHCRAWL_OPENSEARCH_INDEX') },
|
|
324
|
+
{ source: 'config', value: stored.data.openSearchIndex },
|
|
325
|
+
{ source: 'dotenv', value: getDotenvString(dotenvValues, 'GHCRAWL_OPENSEARCH_INDEX', 'GHCRAWL_OPENSEARCH_INDEX') },
|
|
326
|
+
{ source: 'default', value: 'ghcrawl-threads' },
|
|
327
|
+
);
|
|
328
|
+
|
|
329
|
+
const dbPath =
|
|
330
|
+
dbPathValue.value && path.isAbsolute(dbPathValue.value)
|
|
331
|
+
? dbPathValue.value
|
|
332
|
+
: resolveConfiguredPath(stored.configDir, dbPathValue.value ?? 'ghcrawl.db');
|
|
333
|
+
const apiPort = parseIntegerSetting('GHCRAWL_API_PORT', String(apiPortValue.value ?? '5179'));
|
|
334
|
+
const embedBatchSize = parseIntegerSetting('GHCRAWL_EMBED_BATCH_SIZE', String(embedBatchSizeValue.value ?? '8'));
|
|
335
|
+
const embedConcurrency = parseIntegerSetting('GHCRAWL_EMBED_CONCURRENCY', String(embedConcurrencyValue.value ?? '10'));
|
|
336
|
+
const embedMaxUnread = parseIntegerSetting('GHCRAWL_EMBED_MAX_UNREAD', String(embedMaxUnreadValue.value ?? '20'));
|
|
337
|
+
|
|
338
|
+
return {
|
|
339
|
+
workspaceRoot,
|
|
340
|
+
configDir: stored.configDir,
|
|
341
|
+
configPath: stored.configPath,
|
|
342
|
+
configFileExists: stored.exists,
|
|
343
|
+
dbPath,
|
|
344
|
+
dbPathSource: dbPathValue.source,
|
|
345
|
+
apiPort,
|
|
346
|
+
githubToken: githubToken.value,
|
|
347
|
+
githubTokenSource: githubToken.source,
|
|
348
|
+
openaiApiKey: openaiApiKey.value,
|
|
349
|
+
openaiApiKeySource: openaiApiKey.source,
|
|
350
|
+
secretProvider: stored.data.secretProvider ?? 'plaintext',
|
|
351
|
+
opVaultName: stored.data.opVaultName,
|
|
352
|
+
opItemName: stored.data.opItemName,
|
|
353
|
+
summaryModel: summaryModel.value ?? 'gpt-5-mini',
|
|
354
|
+
embedModel: embedModel.value ?? 'text-embedding-3-large',
|
|
355
|
+
embedBatchSize,
|
|
356
|
+
embedConcurrency,
|
|
357
|
+
embedMaxUnread,
|
|
358
|
+
openSearchUrl: openSearchUrl.value,
|
|
359
|
+
openSearchIndex: openSearchIndex.value ?? 'ghcrawl-threads',
|
|
360
|
+
tuiPreferences: stored.data.tuiPreferences ?? {},
|
|
361
|
+
};
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
export function ensureRuntimeDirs(config: GitcrawlConfig): void {
|
|
365
|
+
fs.mkdirSync(config.configDir, { recursive: true });
|
|
366
|
+
fs.mkdirSync(path.dirname(config.dbPath), { recursive: true });
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
export function getTuiRepositoryPreference(config: GitcrawlConfig, owner: string, repo: string): TuiRepositoryPreference {
|
|
370
|
+
return config.tuiPreferences[`${owner}/${repo}`] ?? { minClusterSize: 10, sortMode: 'recent' };
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
export function writeTuiRepositoryPreference(
|
|
374
|
+
config: GitcrawlConfig,
|
|
375
|
+
params: { owner: string; repo: string; minClusterSize: TuiMinClusterSize; sortMode: TuiSortPreference },
|
|
376
|
+
): { configPath: string } {
|
|
377
|
+
const fullName = `${params.owner}/${params.repo}`;
|
|
378
|
+
const nextPreferences = {
|
|
379
|
+
...config.tuiPreferences,
|
|
380
|
+
[fullName]: {
|
|
381
|
+
minClusterSize: params.minClusterSize,
|
|
382
|
+
sortMode: params.sortMode,
|
|
383
|
+
},
|
|
384
|
+
};
|
|
385
|
+
config.tuiPreferences = nextPreferences;
|
|
386
|
+
const next = fs.existsSync(config.configPath)
|
|
387
|
+
? ({
|
|
388
|
+
...(JSON.parse(fs.readFileSync(config.configPath, 'utf8')) as PersistedGitcrawlConfig),
|
|
389
|
+
tuiPreferences: nextPreferences,
|
|
390
|
+
} satisfies PersistedGitcrawlConfig)
|
|
391
|
+
: ({
|
|
392
|
+
tuiPreferences: nextPreferences,
|
|
393
|
+
} satisfies PersistedGitcrawlConfig);
|
|
394
|
+
fs.mkdirSync(config.configDir, { recursive: true });
|
|
395
|
+
fs.writeFileSync(config.configPath, `${JSON.stringify(next, null, 2)}\n`, { mode: 0o600 });
|
|
396
|
+
return { configPath: config.configPath };
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
export function requireGithubToken(config: GitcrawlConfig): string {
|
|
400
|
+
if (!config.githubToken) {
|
|
401
|
+
if (config.secretProvider === 'op' && config.opVaultName && config.opItemName) {
|
|
402
|
+
throw new Error(
|
|
403
|
+
`Missing GitHub token in the environment. This config is set to use 1Password CLI via ${config.opVaultName}/${config.opItemName}; run ghcrawl through your op wrapper or set GITHUB_TOKEN. Expected config at ${config.configPath}`,
|
|
404
|
+
);
|
|
405
|
+
}
|
|
406
|
+
throw new Error(`Missing GitHub token. Run ghcrawl init or set GITHUB_TOKEN. Expected config at ${config.configPath}`);
|
|
407
|
+
}
|
|
408
|
+
return config.githubToken;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
export function requireOpenAiKey(config: GitcrawlConfig): string {
|
|
412
|
+
if (!config.openaiApiKey) {
|
|
413
|
+
if (config.secretProvider === 'op' && config.opVaultName && config.opItemName) {
|
|
414
|
+
throw new Error(
|
|
415
|
+
`Missing OpenAI API key in the environment. This config is set to use 1Password CLI via ${config.opVaultName}/${config.opItemName}; run ghcrawl through your op wrapper or set OPENAI_API_KEY. Expected config at ${config.configPath}`,
|
|
416
|
+
);
|
|
417
|
+
}
|
|
418
|
+
throw new Error(`Missing OpenAI API key. Run ghcrawl init or set OPENAI_API_KEY. Expected config at ${config.configPath}`);
|
|
419
|
+
}
|
|
420
|
+
return config.openaiApiKey;
|
|
421
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import test from 'node:test';
|
|
2
|
+
import assert from 'node:assert/strict';
|
|
3
|
+
|
|
4
|
+
import { migrate } from './migrate.js';
|
|
5
|
+
import { openDb } from './sqlite.js';
|
|
6
|
+
|
|
7
|
+
test('migrate creates core tables', () => {
|
|
8
|
+
const db = openDb(':memory:');
|
|
9
|
+
try {
|
|
10
|
+
migrate(db);
|
|
11
|
+
const rows = db
|
|
12
|
+
.prepare("select name from sqlite_master where type in ('table', 'view') order by name asc")
|
|
13
|
+
.all() as Array<{ name: string }>;
|
|
14
|
+
const names = rows.map((row) => row.name);
|
|
15
|
+
|
|
16
|
+
assert.ok(names.includes('repositories'));
|
|
17
|
+
assert.ok(names.includes('threads'));
|
|
18
|
+
assert.ok(names.includes('documents'));
|
|
19
|
+
assert.ok(names.includes('document_embeddings'));
|
|
20
|
+
assert.ok(names.includes('cluster_runs'));
|
|
21
|
+
assert.ok(names.includes('repo_sync_state'));
|
|
22
|
+
|
|
23
|
+
const threadColumns = db.prepare('pragma table_info(threads)').all() as Array<{ name: string }>;
|
|
24
|
+
const threadColumnNames = threadColumns.map((column) => column.name);
|
|
25
|
+
assert.ok(threadColumnNames.includes('first_pulled_at'));
|
|
26
|
+
assert.ok(threadColumnNames.includes('last_pulled_at'));
|
|
27
|
+
} finally {
|
|
28
|
+
db.close();
|
|
29
|
+
}
|
|
30
|
+
});
|