@ghcrawl/api-core 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -0
- package/dist/api/server.d.ts +4 -0
- package/dist/api/server.d.ts.map +1 -0
- package/dist/api/server.js +142 -0
- package/dist/api/server.js.map +1 -0
- package/dist/cluster/build.d.ts +16 -0
- package/dist/cluster/build.d.ts.map +1 -0
- package/dist/cluster/build.js +62 -0
- package/dist/cluster/build.js.map +1 -0
- package/dist/config.d.ts +83 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +257 -0
- package/dist/config.js.map +1 -0
- package/dist/db/migrate.d.ts +3 -0
- package/dist/db/migrate.d.ts.map +1 -0
- package/{src/db/migrate.ts → dist/db/migrate.js} +30 -36
- package/dist/db/migrate.js.map +1 -0
- package/dist/db/sqlite.d.ts +4 -0
- package/dist/db/sqlite.d.ts.map +1 -0
- package/dist/db/sqlite.js +11 -0
- package/dist/db/sqlite.js.map +1 -0
- package/dist/documents/normalize.d.ts +23 -0
- package/dist/documents/normalize.d.ts.map +1 -0
- package/dist/documents/normalize.js +36 -0
- package/dist/documents/normalize.js.map +1 -0
- package/dist/github/client.d.ts +24 -0
- package/dist/github/client.d.ts.map +1 -0
- package/dist/github/client.js +170 -0
- package/dist/github/client.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -0
- package/{src/index.ts → dist/index.js} +1 -0
- package/dist/index.js.map +1 -0
- package/dist/openai/provider.d.ts +44 -0
- package/dist/openai/provider.d.ts.map +1 -0
- package/dist/openai/provider.js +107 -0
- package/dist/openai/provider.js.map +1 -0
- package/dist/search/exact.d.ts +14 -0
- package/dist/search/exact.d.ts.map +1 -0
- package/dist/search/exact.js +26 -0
- package/dist/search/exact.js.map +1 -0
- package/dist/service.d.ts +249 -0
- package/dist/service.d.ts.map +1 -0
- package/dist/service.js +1801 -0
- package/dist/service.js.map +1 -0
- package/package.json +8 -6
- package/src/api/server.test.ts +0 -296
- package/src/api/server.ts +0 -171
- package/src/cluster/build.test.ts +0 -18
- package/src/cluster/build.ts +0 -74
- package/src/config.test.ts +0 -247
- package/src/config.ts +0 -421
- package/src/db/migrate.test.ts +0 -30
- package/src/db/sqlite.ts +0 -14
- package/src/documents/normalize.test.ts +0 -25
- package/src/documents/normalize.ts +0 -52
- package/src/github/client.ts +0 -241
- package/src/openai/provider.ts +0 -141
- package/src/search/exact.test.ts +0 -22
- package/src/search/exact.ts +0 -28
- package/src/service.test.ts +0 -2036
- package/src/service.ts +0 -2497
- package/src/types/better-sqlite3.d.ts +0 -1
package/src/github/client.ts
DELETED
|
@@ -1,241 +0,0 @@
|
|
|
1
|
-
import { retry } from '@octokit/plugin-retry';
|
|
2
|
-
import { throttling } from '@octokit/plugin-throttling';
|
|
3
|
-
import { Octokit } from 'octokit';
|
|
4
|
-
|
|
5
|
-
export type GitHubClient = {
|
|
6
|
-
checkAuth: (reporter?: GitHubReporter) => Promise<void>;
|
|
7
|
-
getRepo: (owner: string, repo: string, reporter?: GitHubReporter) => Promise<Record<string, unknown>>;
|
|
8
|
-
listRepositoryIssues: (
|
|
9
|
-
owner: string,
|
|
10
|
-
repo: string,
|
|
11
|
-
since?: string,
|
|
12
|
-
limit?: number,
|
|
13
|
-
reporter?: GitHubReporter,
|
|
14
|
-
) => Promise<Array<Record<string, unknown>>>;
|
|
15
|
-
getIssue: (owner: string, repo: string, number: number, reporter?: GitHubReporter) => Promise<Record<string, unknown>>;
|
|
16
|
-
getPull: (owner: string, repo: string, number: number, reporter?: GitHubReporter) => Promise<Record<string, unknown>>;
|
|
17
|
-
listIssueComments: (owner: string, repo: string, number: number, reporter?: GitHubReporter) => Promise<Array<Record<string, unknown>>>;
|
|
18
|
-
listPullReviews: (owner: string, repo: string, number: number, reporter?: GitHubReporter) => Promise<Array<Record<string, unknown>>>;
|
|
19
|
-
listPullReviewComments: (
|
|
20
|
-
owner: string,
|
|
21
|
-
repo: string,
|
|
22
|
-
number: number,
|
|
23
|
-
reporter?: GitHubReporter,
|
|
24
|
-
) => Promise<Array<Record<string, unknown>>>;
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
export type GitHubReporter = (message: string) => void;
|
|
28
|
-
|
|
29
|
-
export class GitHubRequestError extends Error {
|
|
30
|
-
readonly status?: number;
|
|
31
|
-
|
|
32
|
-
constructor(message: string, status?: number) {
|
|
33
|
-
super(message);
|
|
34
|
-
this.name = 'GitHubRequestError';
|
|
35
|
-
this.status = status;
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
type RequestOptions = {
|
|
40
|
-
token: string;
|
|
41
|
-
userAgent?: string;
|
|
42
|
-
timeoutMs?: number;
|
|
43
|
-
pageDelayMs?: number;
|
|
44
|
-
};
|
|
45
|
-
|
|
46
|
-
type OctokitPage<T> = {
|
|
47
|
-
data: T[];
|
|
48
|
-
};
|
|
49
|
-
|
|
50
|
-
function delay(ms: number): Promise<void> {
|
|
51
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
function formatDuration(ms: number): string {
|
|
55
|
-
if (ms < 1000) return `${ms}ms`;
|
|
56
|
-
const seconds = Math.ceil(ms / 1000);
|
|
57
|
-
if (seconds < 60) return `${seconds}s`;
|
|
58
|
-
const minutes = Math.floor(seconds / 60);
|
|
59
|
-
const remainingSeconds = seconds % 60;
|
|
60
|
-
if (minutes < 60) return remainingSeconds === 0 ? `${minutes}m` : `${minutes}m ${remainingSeconds}s`;
|
|
61
|
-
const hours = Math.floor(minutes / 60);
|
|
62
|
-
const remainingMinutes = minutes % 60;
|
|
63
|
-
return remainingMinutes === 0 ? `${hours}h` : `${hours}h ${remainingMinutes}m`;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
function formatResetTime(resetSeconds: string | null | undefined): string | null {
|
|
67
|
-
if (!resetSeconds) return null;
|
|
68
|
-
const value = Number(resetSeconds);
|
|
69
|
-
if (!Number.isFinite(value) || value <= 0) return null;
|
|
70
|
-
return new Date(value * 1000).toISOString();
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
export function makeGitHubClient(options: RequestOptions): GitHubClient {
|
|
74
|
-
const userAgent = options.userAgent ?? 'ghcrawl';
|
|
75
|
-
const timeoutMs = options.timeoutMs ?? 30_000;
|
|
76
|
-
const pageDelayMs = options.pageDelayMs ?? 5000;
|
|
77
|
-
const BaseOctokit = Octokit.plugin(retry, throttling);
|
|
78
|
-
|
|
79
|
-
function createOctokit(reporter?: GitHubReporter) {
|
|
80
|
-
return new BaseOctokit({
|
|
81
|
-
auth: options.token,
|
|
82
|
-
request: {
|
|
83
|
-
timeout: timeoutMs,
|
|
84
|
-
},
|
|
85
|
-
userAgent,
|
|
86
|
-
retry: {
|
|
87
|
-
doNotRetry: [400, 401, 403, 404, 422],
|
|
88
|
-
retries: 4,
|
|
89
|
-
},
|
|
90
|
-
throttle: {
|
|
91
|
-
fallbackSecondaryRateRetryAfter: Math.ceil(pageDelayMs / 1000),
|
|
92
|
-
onRateLimit: (retryAfter, requestOptions) => {
|
|
93
|
-
const responseHeaders = (requestOptions.response as { headers?: Record<string, string> } | undefined)?.headers;
|
|
94
|
-
const resetAt = formatResetTime(responseHeaders?.['x-ratelimit-reset']);
|
|
95
|
-
const remaining = responseHeaders?.['x-ratelimit-remaining'];
|
|
96
|
-
const method = requestOptions.method ?? 'GET';
|
|
97
|
-
const url = requestOptions.url ?? 'unknown';
|
|
98
|
-
reporter?.(
|
|
99
|
-
`[github] backoff rate-limited wait=${formatDuration(retryAfter * 1000)}${remaining ? ` remaining=${remaining}` : ''}${resetAt ? ` reset_at=${resetAt}` : ''} method=${method} url=${url}`,
|
|
100
|
-
);
|
|
101
|
-
return true;
|
|
102
|
-
},
|
|
103
|
-
onSecondaryRateLimit: (retryAfter, requestOptions) => {
|
|
104
|
-
const method = requestOptions.method ?? 'GET';
|
|
105
|
-
const url = requestOptions.url ?? 'unknown';
|
|
106
|
-
reporter?.(
|
|
107
|
-
`[github] backoff secondary-rate-limit wait=${formatDuration(retryAfter * 1000)} method=${method} url=${url}`,
|
|
108
|
-
);
|
|
109
|
-
return true;
|
|
110
|
-
},
|
|
111
|
-
},
|
|
112
|
-
});
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
async function request<T>(label: string, reporter: GitHubReporter | undefined, fn: (octokit: InstanceType<typeof BaseOctokit>) => Promise<T>): Promise<T> {
|
|
116
|
-
reporter?.(`[github] request ${label}`);
|
|
117
|
-
const octokit = createOctokit(reporter);
|
|
118
|
-
try {
|
|
119
|
-
return await fn(octokit);
|
|
120
|
-
} catch (error) {
|
|
121
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
122
|
-
const status = typeof (error as { status?: unknown })?.status === 'number' ? Number((error as { status?: unknown }).status) : undefined;
|
|
123
|
-
throw new GitHubRequestError(`GitHub request failed for ${label}: ${message}`, status);
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
async function paginate<T>(
|
|
128
|
-
label: string,
|
|
129
|
-
limit: number | undefined,
|
|
130
|
-
reporter: GitHubReporter | undefined,
|
|
131
|
-
iteratorFactory: (octokit: InstanceType<typeof BaseOctokit>) => AsyncIterable<OctokitPage<T>>,
|
|
132
|
-
): Promise<T[]> {
|
|
133
|
-
reporter?.(`[github] request ${label}`);
|
|
134
|
-
const octokit = createOctokit(reporter);
|
|
135
|
-
const out: T[] = [];
|
|
136
|
-
|
|
137
|
-
try {
|
|
138
|
-
let pageIndex = 0;
|
|
139
|
-
for await (const page of iteratorFactory(octokit)) {
|
|
140
|
-
pageIndex += 1;
|
|
141
|
-
const remaining = typeof limit === 'number' ? Math.max(limit - out.length, 0) : page.data.length;
|
|
142
|
-
out.push(...page.data.slice(0, remaining));
|
|
143
|
-
reporter?.(`[github] page ${pageIndex} fetched count=${page.data.length} accumulated=${out.length}`);
|
|
144
|
-
if (typeof limit === 'number' && out.length >= limit) {
|
|
145
|
-
break;
|
|
146
|
-
}
|
|
147
|
-
await delay(pageDelayMs);
|
|
148
|
-
}
|
|
149
|
-
return out;
|
|
150
|
-
} catch (error) {
|
|
151
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
152
|
-
const status = typeof (error as { status?: unknown })?.status === 'number' ? Number((error as { status?: unknown }).status) : undefined;
|
|
153
|
-
throw new GitHubRequestError(`GitHub pagination failed for ${label}: ${message}`, status);
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
return {
|
|
158
|
-
async checkAuth(reporter) {
|
|
159
|
-
await request('GET /rate_limit', reporter, async (octokit) => {
|
|
160
|
-
await octokit.request('GET /rate_limit');
|
|
161
|
-
});
|
|
162
|
-
},
|
|
163
|
-
async getRepo(owner, repo, reporter) {
|
|
164
|
-
return request(`GET /repos/${owner}/${repo}`, reporter, async (octokit) => {
|
|
165
|
-
const response = await octokit.rest.repos.get({ owner, repo });
|
|
166
|
-
return response.data as Record<string, unknown>;
|
|
167
|
-
});
|
|
168
|
-
},
|
|
169
|
-
async listRepositoryIssues(owner, repo, since, limit, reporter) {
|
|
170
|
-
return paginate(
|
|
171
|
-
`GET /repos/${owner}/${repo}/issues state=open per_page=100`,
|
|
172
|
-
limit,
|
|
173
|
-
reporter,
|
|
174
|
-
(octokit) =>
|
|
175
|
-
octokit.paginate.iterator(octokit.rest.issues.listForRepo, {
|
|
176
|
-
owner,
|
|
177
|
-
repo,
|
|
178
|
-
state: 'open',
|
|
179
|
-
sort: 'updated',
|
|
180
|
-
direction: 'desc',
|
|
181
|
-
per_page: 100,
|
|
182
|
-
since,
|
|
183
|
-
}) as AsyncIterable<OctokitPage<Record<string, unknown>>>,
|
|
184
|
-
);
|
|
185
|
-
},
|
|
186
|
-
async getIssue(owner, repo, number, reporter) {
|
|
187
|
-
return request(`GET /repos/${owner}/${repo}/issues/${number}`, reporter, async (octokit) => {
|
|
188
|
-
const response = await octokit.rest.issues.get({ owner, repo, issue_number: number });
|
|
189
|
-
return response.data as Record<string, unknown>;
|
|
190
|
-
});
|
|
191
|
-
},
|
|
192
|
-
async getPull(owner, repo, number, reporter) {
|
|
193
|
-
return request(`GET /repos/${owner}/${repo}/pulls/${number}`, reporter, async (octokit) => {
|
|
194
|
-
const response = await octokit.rest.pulls.get({ owner, repo, pull_number: number });
|
|
195
|
-
return response.data as Record<string, unknown>;
|
|
196
|
-
});
|
|
197
|
-
},
|
|
198
|
-
async listIssueComments(owner, repo, number, reporter) {
|
|
199
|
-
return paginate(
|
|
200
|
-
`GET /repos/${owner}/${repo}/issues/${number}/comments per_page=100`,
|
|
201
|
-
undefined,
|
|
202
|
-
reporter,
|
|
203
|
-
(octokit) =>
|
|
204
|
-
octokit.paginate.iterator(octokit.rest.issues.listComments, {
|
|
205
|
-
owner,
|
|
206
|
-
repo,
|
|
207
|
-
issue_number: number,
|
|
208
|
-
per_page: 100,
|
|
209
|
-
}) as AsyncIterable<OctokitPage<Record<string, unknown>>>,
|
|
210
|
-
);
|
|
211
|
-
},
|
|
212
|
-
async listPullReviews(owner, repo, number, reporter) {
|
|
213
|
-
return paginate(
|
|
214
|
-
`GET /repos/${owner}/${repo}/pulls/${number}/reviews per_page=100`,
|
|
215
|
-
undefined,
|
|
216
|
-
reporter,
|
|
217
|
-
(octokit) =>
|
|
218
|
-
octokit.paginate.iterator(octokit.rest.pulls.listReviews, {
|
|
219
|
-
owner,
|
|
220
|
-
repo,
|
|
221
|
-
pull_number: number,
|
|
222
|
-
per_page: 100,
|
|
223
|
-
}) as AsyncIterable<OctokitPage<Record<string, unknown>>>,
|
|
224
|
-
);
|
|
225
|
-
},
|
|
226
|
-
async listPullReviewComments(owner, repo, number, reporter) {
|
|
227
|
-
return paginate(
|
|
228
|
-
`GET /repos/${owner}/${repo}/pulls/${number}/comments per_page=100`,
|
|
229
|
-
undefined,
|
|
230
|
-
reporter,
|
|
231
|
-
(octokit) =>
|
|
232
|
-
octokit.paginate.iterator(octokit.rest.pulls.listReviewComments, {
|
|
233
|
-
owner,
|
|
234
|
-
repo,
|
|
235
|
-
pull_number: number,
|
|
236
|
-
per_page: 100,
|
|
237
|
-
}) as AsyncIterable<OctokitPage<Record<string, unknown>>>,
|
|
238
|
-
);
|
|
239
|
-
},
|
|
240
|
-
};
|
|
241
|
-
}
|
package/src/openai/provider.ts
DELETED
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
import OpenAI from 'openai';
|
|
2
|
-
import { APIConnectionError, APIConnectionTimeoutError, APIError, RateLimitError } from 'openai/error';
|
|
3
|
-
import { zodTextFormat } from 'openai/helpers/zod';
|
|
4
|
-
import { z } from 'zod';
|
|
5
|
-
|
|
6
|
-
export type SummaryResult = {
|
|
7
|
-
problemSummary: string;
|
|
8
|
-
solutionSummary: string;
|
|
9
|
-
maintainerSignalSummary: string;
|
|
10
|
-
dedupeSummary: string;
|
|
11
|
-
};
|
|
12
|
-
|
|
13
|
-
export type SummaryUsage = {
|
|
14
|
-
inputTokens: number;
|
|
15
|
-
outputTokens: number;
|
|
16
|
-
totalTokens: number;
|
|
17
|
-
cachedInputTokens: number;
|
|
18
|
-
reasoningTokens: number;
|
|
19
|
-
};
|
|
20
|
-
|
|
21
|
-
export type AiProvider = {
|
|
22
|
-
checkAuth: () => Promise<void>;
|
|
23
|
-
summarizeThread: (params: { model: string; text: string }) => Promise<{ summary: SummaryResult; usage?: SummaryUsage }>;
|
|
24
|
-
embedTexts: (params: { model: string; texts: string[] }) => Promise<number[][]>;
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
const summarySchema = z.object({
|
|
28
|
-
problem_summary: z.string(),
|
|
29
|
-
solution_summary: z.string(),
|
|
30
|
-
maintainer_signal_summary: z.string(),
|
|
31
|
-
dedupe_summary: z.string(),
|
|
32
|
-
});
|
|
33
|
-
|
|
34
|
-
export class OpenAiProvider implements AiProvider {
|
|
35
|
-
private readonly client: OpenAI;
|
|
36
|
-
|
|
37
|
-
constructor(apiKey: string) {
|
|
38
|
-
this.client = new OpenAI({ apiKey });
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
async checkAuth(): Promise<void> {
|
|
42
|
-
await this.client.models.list();
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
async summarizeThread(params: { model: string; text: string }): Promise<{ summary: SummaryResult; usage?: SummaryUsage }> {
|
|
46
|
-
const format = zodTextFormat(summarySchema, 'ghcrawl_thread_summary');
|
|
47
|
-
let lastError: Error | null = null;
|
|
48
|
-
|
|
49
|
-
for (const [attemptIndex, maxOutputTokens] of [500, 900, 1400].entries()) {
|
|
50
|
-
try {
|
|
51
|
-
const response = await this.client.responses.create({
|
|
52
|
-
model: params.model,
|
|
53
|
-
input: [
|
|
54
|
-
{
|
|
55
|
-
role: 'system',
|
|
56
|
-
content: [
|
|
57
|
-
{
|
|
58
|
-
type: 'input_text',
|
|
59
|
-
text:
|
|
60
|
-
'Summarize this GitHub issue or pull request thread. Return concise JSON only with keys problem_summary, solution_summary, maintainer_signal_summary, dedupe_summary. Each field should be plain text, no markdown, and usually 1-3 sentences.',
|
|
61
|
-
},
|
|
62
|
-
],
|
|
63
|
-
},
|
|
64
|
-
{
|
|
65
|
-
role: 'user',
|
|
66
|
-
content: [{ type: 'input_text', text: params.text }],
|
|
67
|
-
},
|
|
68
|
-
],
|
|
69
|
-
text: {
|
|
70
|
-
format,
|
|
71
|
-
verbosity: 'low',
|
|
72
|
-
},
|
|
73
|
-
max_output_tokens: maxOutputTokens,
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
const raw = response.output_text ?? '';
|
|
77
|
-
const parsed = summarySchema.parse(JSON.parse(raw));
|
|
78
|
-
|
|
79
|
-
return {
|
|
80
|
-
summary: {
|
|
81
|
-
problemSummary: parsed.problem_summary,
|
|
82
|
-
solutionSummary: parsed.solution_summary,
|
|
83
|
-
maintainerSignalSummary: parsed.maintainer_signal_summary,
|
|
84
|
-
dedupeSummary: parsed.dedupe_summary,
|
|
85
|
-
},
|
|
86
|
-
usage: response.usage
|
|
87
|
-
? {
|
|
88
|
-
inputTokens: response.usage.input_tokens,
|
|
89
|
-
outputTokens: response.usage.output_tokens,
|
|
90
|
-
totalTokens: response.usage.total_tokens,
|
|
91
|
-
cachedInputTokens: response.usage.input_tokens_details?.cached_tokens ?? 0,
|
|
92
|
-
reasoningTokens: response.usage.output_tokens_details?.reasoning_tokens ?? 0,
|
|
93
|
-
}
|
|
94
|
-
: undefined,
|
|
95
|
-
};
|
|
96
|
-
} catch (error) {
|
|
97
|
-
lastError = error instanceof Error ? error : new Error(String(error));
|
|
98
|
-
if (attemptIndex === 2) {
|
|
99
|
-
break;
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
throw new Error(`OpenAI summarization failed after 3 attempts: ${lastError?.message ?? 'unknown error'}`);
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
async embedTexts(params: { model: string; texts: string[] }): Promise<number[][]> {
|
|
108
|
-
if (params.texts.length === 0) {
|
|
109
|
-
return [];
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
let lastError: Error | null = null;
|
|
113
|
-
for (const attempt of [1, 2, 3, 4, 5]) {
|
|
114
|
-
try {
|
|
115
|
-
const response = await this.client.embeddings.create({
|
|
116
|
-
model: params.model,
|
|
117
|
-
input: params.texts,
|
|
118
|
-
});
|
|
119
|
-
|
|
120
|
-
return response.data.map((item) => item.embedding);
|
|
121
|
-
} catch (error) {
|
|
122
|
-
const shouldRetry =
|
|
123
|
-
error instanceof RateLimitError ||
|
|
124
|
-
error instanceof APIConnectionError ||
|
|
125
|
-
error instanceof APIConnectionTimeoutError ||
|
|
126
|
-
(error instanceof APIError && typeof error.status === 'number' && error.status >= 500);
|
|
127
|
-
lastError = error instanceof Error ? error : new Error(String(error));
|
|
128
|
-
if (!shouldRetry || attempt === 5) {
|
|
129
|
-
break;
|
|
130
|
-
}
|
|
131
|
-
await sleep(1000 * 2 ** (attempt - 1));
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
throw new Error(`OpenAI embeddings failed after 5 attempts: ${lastError?.message ?? 'unknown error'}`);
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
function sleep(ms: number): Promise<void> {
|
|
140
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
141
|
-
}
|
package/src/search/exact.test.ts
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import test from 'node:test';
|
|
2
|
-
import assert from 'node:assert/strict';
|
|
3
|
-
|
|
4
|
-
import { cosineSimilarity, rankNearestNeighbors } from './exact.js';
|
|
5
|
-
|
|
6
|
-
test('cosine similarity is 1 for identical embeddings', () => {
|
|
7
|
-
assert.equal(cosineSimilarity([1, 0], [1, 0]), 1);
|
|
8
|
-
});
|
|
9
|
-
|
|
10
|
-
test('nearest neighbors sorts by similarity descending', () => {
|
|
11
|
-
const ranked = rankNearestNeighbors(
|
|
12
|
-
[
|
|
13
|
-
{ id: 1, embedding: [1, 0] },
|
|
14
|
-
{ id: 2, embedding: [0.9, 0.1] },
|
|
15
|
-
{ id: 3, embedding: [0, 1] },
|
|
16
|
-
],
|
|
17
|
-
{ targetEmbedding: [1, 0], limit: 2, skipId: 1 },
|
|
18
|
-
);
|
|
19
|
-
|
|
20
|
-
assert.equal(ranked[0]?.item.id, 2);
|
|
21
|
-
assert.equal(ranked[1]?.item.id, 3);
|
|
22
|
-
});
|
package/src/search/exact.ts
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
export function cosineSimilarity(left: number[], right: number[]): number {
|
|
2
|
-
if (left.length !== right.length) {
|
|
3
|
-
throw new Error('Embedding dimensions do not match');
|
|
4
|
-
}
|
|
5
|
-
let dot = 0;
|
|
6
|
-
let leftNorm = 0;
|
|
7
|
-
let rightNorm = 0;
|
|
8
|
-
for (let index = 0; index < left.length; index += 1) {
|
|
9
|
-
dot += left[index] * right[index];
|
|
10
|
-
leftNorm += left[index] * left[index];
|
|
11
|
-
rightNorm += right[index] * right[index];
|
|
12
|
-
}
|
|
13
|
-
if (leftNorm === 0 || rightNorm === 0) return 0;
|
|
14
|
-
return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm));
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
export function rankNearestNeighbors<T extends { id: number; embedding: number[] }>(
|
|
18
|
-
items: T[],
|
|
19
|
-
params: { targetEmbedding: number[]; limit: number; minScore?: number; skipId?: number },
|
|
20
|
-
): Array<{ item: T; score: number }> {
|
|
21
|
-
const minScore = params.minScore ?? -1;
|
|
22
|
-
return items
|
|
23
|
-
.filter((item) => item.id !== params.skipId)
|
|
24
|
-
.map((item) => ({ item, score: cosineSimilarity(params.targetEmbedding, item.embedding) }))
|
|
25
|
-
.filter((entry) => entry.score >= minScore)
|
|
26
|
-
.sort((left, right) => right.score - left.score)
|
|
27
|
-
.slice(0, params.limit);
|
|
28
|
-
}
|