@ghcrawl/api-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,141 @@
1
+ import OpenAI from 'openai';
2
+ import { APIConnectionError, APIConnectionTimeoutError, APIError, RateLimitError } from 'openai/error';
3
+ import { zodTextFormat } from 'openai/helpers/zod';
4
+ import { z } from 'zod';
5
+
6
+ export type SummaryResult = {
7
+ problemSummary: string;
8
+ solutionSummary: string;
9
+ maintainerSignalSummary: string;
10
+ dedupeSummary: string;
11
+ };
12
+
13
+ export type SummaryUsage = {
14
+ inputTokens: number;
15
+ outputTokens: number;
16
+ totalTokens: number;
17
+ cachedInputTokens: number;
18
+ reasoningTokens: number;
19
+ };
20
+
21
+ export type AiProvider = {
22
+ checkAuth: () => Promise<void>;
23
+ summarizeThread: (params: { model: string; text: string }) => Promise<{ summary: SummaryResult; usage?: SummaryUsage }>;
24
+ embedTexts: (params: { model: string; texts: string[] }) => Promise<number[][]>;
25
+ };
26
+
27
+ const summarySchema = z.object({
28
+ problem_summary: z.string(),
29
+ solution_summary: z.string(),
30
+ maintainer_signal_summary: z.string(),
31
+ dedupe_summary: z.string(),
32
+ });
33
+
34
+ export class OpenAiProvider implements AiProvider {
35
+ private readonly client: OpenAI;
36
+
37
+ constructor(apiKey: string) {
38
+ this.client = new OpenAI({ apiKey });
39
+ }
40
+
41
+ async checkAuth(): Promise<void> {
42
+ await this.client.models.list();
43
+ }
44
+
45
+ async summarizeThread(params: { model: string; text: string }): Promise<{ summary: SummaryResult; usage?: SummaryUsage }> {
46
+ const format = zodTextFormat(summarySchema, 'ghcrawl_thread_summary');
47
+ let lastError: Error | null = null;
48
+
49
+ for (const [attemptIndex, maxOutputTokens] of [500, 900, 1400].entries()) {
50
+ try {
51
+ const response = await this.client.responses.create({
52
+ model: params.model,
53
+ input: [
54
+ {
55
+ role: 'system',
56
+ content: [
57
+ {
58
+ type: 'input_text',
59
+ text:
60
+ 'Summarize this GitHub issue or pull request thread. Return concise JSON only with keys problem_summary, solution_summary, maintainer_signal_summary, dedupe_summary. Each field should be plain text, no markdown, and usually 1-3 sentences.',
61
+ },
62
+ ],
63
+ },
64
+ {
65
+ role: 'user',
66
+ content: [{ type: 'input_text', text: params.text }],
67
+ },
68
+ ],
69
+ text: {
70
+ format,
71
+ verbosity: 'low',
72
+ },
73
+ max_output_tokens: maxOutputTokens,
74
+ });
75
+
76
+ const raw = response.output_text ?? '';
77
+ const parsed = summarySchema.parse(JSON.parse(raw));
78
+
79
+ return {
80
+ summary: {
81
+ problemSummary: parsed.problem_summary,
82
+ solutionSummary: parsed.solution_summary,
83
+ maintainerSignalSummary: parsed.maintainer_signal_summary,
84
+ dedupeSummary: parsed.dedupe_summary,
85
+ },
86
+ usage: response.usage
87
+ ? {
88
+ inputTokens: response.usage.input_tokens,
89
+ outputTokens: response.usage.output_tokens,
90
+ totalTokens: response.usage.total_tokens,
91
+ cachedInputTokens: response.usage.input_tokens_details?.cached_tokens ?? 0,
92
+ reasoningTokens: response.usage.output_tokens_details?.reasoning_tokens ?? 0,
93
+ }
94
+ : undefined,
95
+ };
96
+ } catch (error) {
97
+ lastError = error instanceof Error ? error : new Error(String(error));
98
+ if (attemptIndex === 2) {
99
+ break;
100
+ }
101
+ }
102
+ }
103
+
104
+ throw new Error(`OpenAI summarization failed after 3 attempts: ${lastError?.message ?? 'unknown error'}`);
105
+ }
106
+
107
+ async embedTexts(params: { model: string; texts: string[] }): Promise<number[][]> {
108
+ if (params.texts.length === 0) {
109
+ return [];
110
+ }
111
+
112
+ let lastError: Error | null = null;
113
+ for (const attempt of [1, 2, 3, 4, 5]) {
114
+ try {
115
+ const response = await this.client.embeddings.create({
116
+ model: params.model,
117
+ input: params.texts,
118
+ });
119
+
120
+ return response.data.map((item) => item.embedding);
121
+ } catch (error) {
122
+ const shouldRetry =
123
+ error instanceof RateLimitError ||
124
+ error instanceof APIConnectionError ||
125
+ error instanceof APIConnectionTimeoutError ||
126
+ (error instanceof APIError && typeof error.status === 'number' && error.status >= 500);
127
+ lastError = error instanceof Error ? error : new Error(String(error));
128
+ if (!shouldRetry || attempt === 5) {
129
+ break;
130
+ }
131
+ await sleep(1000 * 2 ** (attempt - 1));
132
+ }
133
+ }
134
+
135
+ throw new Error(`OpenAI embeddings failed after 5 attempts: ${lastError?.message ?? 'unknown error'}`);
136
+ }
137
+ }
138
+
139
+ function sleep(ms: number): Promise<void> {
140
+ return new Promise((resolve) => setTimeout(resolve, ms));
141
+ }
@@ -0,0 +1,22 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+
4
+ import { cosineSimilarity, rankNearestNeighbors } from './exact.js';
5
+
6
+ test('cosine similarity is 1 for identical embeddings', () => {
7
+ assert.equal(cosineSimilarity([1, 0], [1, 0]), 1);
8
+ });
9
+
10
+ test('nearest neighbors sorts by similarity descending', () => {
11
+ const ranked = rankNearestNeighbors(
12
+ [
13
+ { id: 1, embedding: [1, 0] },
14
+ { id: 2, embedding: [0.9, 0.1] },
15
+ { id: 3, embedding: [0, 1] },
16
+ ],
17
+ { targetEmbedding: [1, 0], limit: 2, skipId: 1 },
18
+ );
19
+
20
+ assert.equal(ranked[0]?.item.id, 2);
21
+ assert.equal(ranked[1]?.item.id, 3);
22
+ });
@@ -0,0 +1,28 @@
1
+ export function cosineSimilarity(left: number[], right: number[]): number {
2
+ if (left.length !== right.length) {
3
+ throw new Error('Embedding dimensions do not match');
4
+ }
5
+ let dot = 0;
6
+ let leftNorm = 0;
7
+ let rightNorm = 0;
8
+ for (let index = 0; index < left.length; index += 1) {
9
+ dot += left[index] * right[index];
10
+ leftNorm += left[index] * left[index];
11
+ rightNorm += right[index] * right[index];
12
+ }
13
+ if (leftNorm === 0 || rightNorm === 0) return 0;
14
+ return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm));
15
+ }
16
+
17
+ export function rankNearestNeighbors<T extends { id: number; embedding: number[] }>(
18
+ items: T[],
19
+ params: { targetEmbedding: number[]; limit: number; minScore?: number; skipId?: number },
20
+ ): Array<{ item: T; score: number }> {
21
+ const minScore = params.minScore ?? -1;
22
+ return items
23
+ .filter((item) => item.id !== params.skipId)
24
+ .map((item) => ({ item, score: cosineSimilarity(params.targetEmbedding, item.embedding) }))
25
+ .filter((entry) => entry.score >= minScore)
26
+ .sort((left, right) => right.score - left.score)
27
+ .slice(0, params.limit);
28
+ }