@disco_trooper/apple-notes-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,191 @@
1
+ /**
2
+ * Local embedding generation using HuggingFace Transformers.js
3
+ *
4
+ * Lazy-loads the model on first use to minimize startup time.
5
+ * Supports model override via EMBEDDING_MODEL env var.
6
+ */
7
+
8
+ import { DEFAULT_LOCAL_EMBEDDING_DIMS } from "../config/constants.js";
9
+ import { createDebugLogger } from "../utils/debug.js";
10
+
11
+ // Model configuration
12
+ const DEFAULT_MODEL = "Xenova/multilingual-e5-small";
13
+
14
+ // Model dimensions lookup (common models)
15
+ const MODEL_DIMENSIONS: Record<string, number> = {
16
+ "Xenova/multilingual-e5-small": 384,
17
+ "Xenova/all-MiniLM-L6-v2": 384,
18
+ "Xenova/bge-m3": 1024,
19
+ "Xenova/bge-small-en-v1.5": 384,
20
+ "Xenova/gte-small": 384,
21
+ };
22
+
23
+ // Debug logging
24
+ const debug = createDebugLogger("LOCAL");
25
+
26
+ // Lazy-loaded pipeline
27
+ type FeatureExtractionPipeline = (
28
+ text: string,
29
+ options?: { pooling?: string; normalize?: boolean }
30
+ ) => Promise<{ tolist: () => number[][] }>;
31
+
32
+ let pipelineInstance: FeatureExtractionPipeline | null = null;
33
+ let pipelinePromise: Promise<FeatureExtractionPipeline> | null = null;
34
+ let resolvedModel: string | null = null;
35
+
36
+ /**
37
+ * Get the configured model name.
38
+ * Uses EMBEDDING_MODEL env var if set, otherwise defaults to multilingual-e5-small.
39
+ */
40
+ function getModelName(): string {
41
+ return process.env.EMBEDDING_MODEL || DEFAULT_MODEL;
42
+ }
43
+
44
+ /**
45
+ * Lazy-load the HuggingFace transformers pipeline.
46
+ * Only loads once, subsequent calls return the cached instance.
47
+ */
48
+ async function getPipeline(): Promise<FeatureExtractionPipeline> {
49
+ // Return cached instance if available
50
+ if (pipelineInstance) {
51
+ return pipelineInstance;
52
+ }
53
+
54
+ // If already loading, wait for that promise
55
+ if (pipelinePromise) {
56
+ return pipelinePromise;
57
+ }
58
+
59
+ // Start loading
60
+ const modelName = getModelName();
61
+ debug(`Loading embedding model: ${modelName}`);
62
+
63
+ pipelinePromise = (async () => {
64
+ try {
65
+ // Dynamic import to support lazy loading
66
+ const { pipeline } = await import("@huggingface/transformers");
67
+
68
+ const startTime = Date.now();
69
+
70
+ // Create feature extraction pipeline
71
+ // @ts-expect-error - pipeline returns a union type, we know it's FeatureExtractionPipeline for "feature-extraction"
72
+ const pipe: FeatureExtractionPipeline = await pipeline(
73
+ "feature-extraction",
74
+ modelName,
75
+ {
76
+ // Use quantized model for faster loading and inference
77
+ dtype: "fp32",
78
+ }
79
+ );
80
+
81
+ const loadTime = Date.now() - startTime;
82
+ debug(`Model loaded in ${loadTime}ms`);
83
+
84
+ pipelineInstance = pipe;
85
+ resolvedModel = modelName;
86
+
87
+ return pipe;
88
+ } catch (error) {
89
+ // Reset promise so next call retries
90
+ pipelinePromise = null;
91
+
92
+ const message = error instanceof Error ? error.message : String(error);
93
+ debug(`Failed to load model: ${message}`);
94
+
95
+ throw new Error(`Failed to load embedding model "${modelName}": ${message}`);
96
+ }
97
+ })();
98
+
99
+ return pipelinePromise;
100
+ }
101
+
102
+ /**
103
+ * Generate embedding for a text string.
104
+ *
105
+ * Uses mean pooling and L2 normalization for best results with e5/MiniLM models.
106
+ *
107
+ * @param text - The text to embed
108
+ * @returns Promise resolving to embedding vector (number array)
109
+ * @throws Error if model loading or inference fails
110
+ */
111
+ export async function getLocalEmbedding(text: string): Promise<number[]> {
112
+ if (!text || typeof text !== "string") {
113
+ throw new Error("Text must be a non-empty string");
114
+ }
115
+
116
+ const pipe = await getPipeline();
117
+
118
+ debug(`Generating embedding for ${text.length} chars`);
119
+ const startTime = Date.now();
120
+
121
+ try {
122
+ // For e5 models, prepend "passage: " for document embedding
123
+ // or "query: " for search queries - using passage for general text
124
+ const modelName = getModelName();
125
+ const isE5Model = modelName.toLowerCase().includes("e5");
126
+ const inputText = isE5Model ? `passage: ${text}` : text;
127
+
128
+ // Run inference with mean pooling and normalization
129
+ const output = await pipe(inputText, {
130
+ pooling: "mean",
131
+ normalize: true,
132
+ });
133
+
134
+ // Extract the embedding vector
135
+ const embedding = output.tolist()[0];
136
+
137
+ const inferenceTime = Date.now() - startTime;
138
+ debug(`Embedding generated in ${inferenceTime}ms (${embedding.length} dims)`);
139
+
140
+ return embedding;
141
+ } catch (error) {
142
+ const message = error instanceof Error ? error.message : String(error);
143
+ debug(`Embedding generation failed: ${message}`);
144
+
145
+ throw new Error(`Failed to generate embedding: ${message}`);
146
+ }
147
+ }
148
+
149
+ /**
150
+ * Get the dimensions of the embedding vector for the configured model.
151
+ *
152
+ * Returns the known dimensions for common models, or the default (384) for unknown models.
153
+ * This is a synchronous function that doesn't require loading the model.
154
+ */
155
+ export function getLocalDimensions(): number {
156
+ const modelName = getModelName();
157
+
158
+ // Check known models first
159
+ if (MODEL_DIMENSIONS[modelName]) {
160
+ return MODEL_DIMENSIONS[modelName];
161
+ }
162
+
163
+ // If we've already loaded the model and have embeddings, we could cache the actual dimension
164
+ // For now, return default for unknown models
165
+ debug(`Unknown model "${modelName}", using default dimensions: ${DEFAULT_LOCAL_EMBEDDING_DIMS}`);
166
+ return DEFAULT_LOCAL_EMBEDDING_DIMS;
167
+ }
168
+
169
+ /**
170
+ * Get the currently configured model name.
171
+ * Useful for logging and diagnostics.
172
+ */
173
+ export function getLocalModelName(): string {
174
+ return getModelName();
175
+ }
176
+
177
+ /**
178
+ * Check if the model has been loaded.
179
+ * Useful for diagnostics without triggering a load.
180
+ */
181
+ export function isModelLoaded(): boolean {
182
+ return pipelineInstance !== null;
183
+ }
184
+
185
+ /**
186
+ * Get the name of the actually loaded model.
187
+ * Returns null if no model has been loaded yet.
188
+ */
189
+ export function getLoadedModelName(): string | null {
190
+ return resolvedModel;
191
+ }
@@ -0,0 +1,21 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { getCacheKey } from "./openrouter.js";
3
+
4
+ describe("getCacheKey", () => {
5
+ it("generates different keys for texts with same prefix", () => {
6
+ const prefix = "a".repeat(100);
7
+ const text1 = prefix + " first document content";
8
+ const text2 = prefix + " second document content";
9
+ expect(getCacheKey(text1)).not.toBe(getCacheKey(text2));
10
+ });
11
+
12
+ it("generates same key for identical texts", () => {
13
+ const text = "This is a test document for embedding";
14
+ expect(getCacheKey(text)).toBe(getCacheKey(text));
15
+ });
16
+
17
+ it("generates consistent hash format", () => {
18
+ const key = getCacheKey("test");
19
+ expect(key).toMatch(/^[a-f0-9]{64}$/);
20
+ });
21
+ });
@@ -0,0 +1,285 @@
1
+ /**
2
+ * OpenRouter Embeddings Client
3
+ *
4
+ * Provides embedding generation via OpenRouter API with:
5
+ * - Configurable model and dimensions via environment variables
6
+ * - Retry logic with exponential backoff
7
+ * - Rate limiting handling (429 status)
8
+ * - Caching for repeated queries
9
+ * - Input truncation
10
+ * - Debug logging to stderr
11
+ */
12
+
13
+ import { createHash } from "node:crypto";
14
+ import { DEFAULT_OPENROUTER_EMBEDDING_DIMS, DEFAULT_OPENROUTER_MODEL, EMBEDDING_CACHE_MAX_SIZE, MAX_RETRIES, OPENROUTER_TIMEOUT_MS, RATE_LIMIT_BACKOFF_BASE_MS } from "../config/constants.js";
15
+ import { createDebugLogger } from "../utils/debug.js";
16
+ import { truncateForEmbedding } from "../utils/text.js";
17
+
18
+ // Configuration from environment
19
+ const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
20
+ const EMBEDDING_MODEL = process.env.EMBEDDING_MODEL || DEFAULT_OPENROUTER_MODEL;
21
+ const EMBEDDING_DIMS = parseInt(process.env.EMBEDDING_DIMS || String(DEFAULT_OPENROUTER_EMBEDDING_DIMS), 10);
22
+
23
+ // Constants
24
+ const API_URL = "https://openrouter.ai/api/v1/embeddings";
25
+
26
+ // Debug logging
27
+ const debug = createDebugLogger("OPENROUTER");
28
+
29
+ /**
30
+ * Simple LRU cache for embeddings.
31
+ * Evicts oldest entries when max size is reached.
32
+ */
33
+ class LRUCache<K, V> {
34
+ private cache = new Map<K, V>();
35
+
36
+ constructor(private maxSize: number) {}
37
+
38
+ get(key: K): V | undefined {
39
+ const value = this.cache.get(key);
40
+ if (value !== undefined) {
41
+ // Move to end (most recently used)
42
+ this.cache.delete(key);
43
+ this.cache.set(key, value);
44
+ }
45
+ return value;
46
+ }
47
+
48
+ set(key: K, value: V): void {
49
+ this.cache.delete(key);
50
+ if (this.cache.size >= this.maxSize) {
51
+ const oldestKey = this.cache.keys().next().value;
52
+ if (oldestKey !== undefined) {
53
+ this.cache.delete(oldestKey);
54
+ }
55
+ }
56
+ this.cache.set(key, value);
57
+ }
58
+
59
+ get size(): number {
60
+ return this.cache.size;
61
+ }
62
+
63
+ clear(): void {
64
+ this.cache.clear();
65
+ }
66
+ }
67
+
68
+ // Embedding cache to reduce API calls
69
+ // Key: SHA-256 hash of input text
70
+ // Value: embedding vector
71
+ const embeddingCache = new LRUCache<string, number[]>(EMBEDDING_CACHE_MAX_SIZE);
72
+
73
+ /**
74
+ * Sleep for a specified duration
75
+ */
76
+ function sleep(ms: number): Promise<void> {
77
+ return new Promise((resolve) => setTimeout(resolve, ms));
78
+ }
79
+
80
+ /**
81
+ * Calculate exponential backoff delay
82
+ * @param attempt - Current attempt number (0-indexed)
83
+ * @param baseMs - Base delay in milliseconds
84
+ * @returns Delay in milliseconds
85
+ */
86
+ function getBackoffDelay(attempt: number, baseMs: number = 1000): number {
87
+ return Math.pow(2, attempt) * baseMs;
88
+ }
89
+
90
+ /**
91
+ * Generate cache key from input text using SHA-256 hash.
92
+ */
93
+ export function getCacheKey(text: string): string {
94
+ return createHash("sha256").update(text).digest("hex");
95
+ }
96
+
97
+ /**
98
+ * OpenRouter API error with additional context
99
+ */
100
+ class OpenRouterError extends Error {
101
+ constructor(
102
+ message: string,
103
+ public readonly statusCode?: number,
104
+ public readonly responseBody?: string
105
+ ) {
106
+ super(message);
107
+ this.name = "OpenRouterError";
108
+ }
109
+ }
110
+
111
+ /**
112
+ * Get embedding vector for text using OpenRouter API
113
+ *
114
+ * Features:
115
+ * - Caches results based on SHA-256 hash of input
116
+ * - Retries up to 3 times with exponential backoff
117
+ * - Handles rate limiting (429) with longer delays
118
+ * - Truncates input to 8000 chars
119
+ *
120
+ * @param text - Input text to embed
121
+ * @returns Promise resolving to embedding vector
122
+ * @throws OpenRouterError if API call fails after all retries
123
+ */
124
+ export async function getOpenRouterEmbedding(text: string): Promise<number[]> {
125
+ // Validate API key
126
+ if (!OPENROUTER_API_KEY) {
127
+ throw new OpenRouterError(
128
+ "OPENROUTER_API_KEY environment variable is not set"
129
+ );
130
+ }
131
+
132
+ // Truncate input first - cache key must match actual embedded text
133
+ const truncatedText = truncateForEmbedding(text);
134
+
135
+ // Check cache using truncated text hash
136
+ const cacheKey = getCacheKey(truncatedText);
137
+ const cached = embeddingCache.get(cacheKey);
138
+ if (cached) {
139
+ debug(`Cache hit for key: "${cacheKey.substring(0, 16)}..."`);
140
+ return cached;
141
+ }
142
+
143
+ debug(`Cache miss, fetching embedding for: "${cacheKey.substring(0, 16)}..."`);
144
+
145
+ // Retry loop
146
+ let lastError: Error | null = null;
147
+
148
+ for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
149
+ // Create abort controller for timeout
150
+ const controller = new AbortController();
151
+ const timeoutId = setTimeout(() => controller.abort(), OPENROUTER_TIMEOUT_MS);
152
+
153
+ try {
154
+ debug(`Attempt ${attempt + 1}/${MAX_RETRIES}`);
155
+
156
+ const response = await fetch(API_URL, {
157
+ method: "POST",
158
+ headers: {
159
+ Authorization: `Bearer ${OPENROUTER_API_KEY}`,
160
+ "Content-Type": "application/json",
161
+ "HTTP-Referer": "https://github.com/apple-notes-mcp",
162
+ "X-Title": "Apple Notes MCP",
163
+ },
164
+ body: JSON.stringify({
165
+ model: EMBEDDING_MODEL,
166
+ input: truncatedText,
167
+ dimensions: EMBEDDING_DIMS,
168
+ }),
169
+ signal: controller.signal,
170
+ });
171
+
172
+ // Handle rate limiting
173
+ if (response.status === 429) {
174
+ clearTimeout(timeoutId); // Clear timeout before sleeping
175
+ const waitTime = getBackoffDelay(attempt, RATE_LIMIT_BACKOFF_BASE_MS); // Longer base delay for rate limits
176
+ debug(`Rate limited (429), waiting ${waitTime}ms before retry`);
177
+ await sleep(waitTime);
178
+ continue;
179
+ }
180
+
181
+ // Handle other errors
182
+ if (!response.ok) {
183
+ const errorBody = await response.text();
184
+ throw new OpenRouterError(
185
+ `OpenRouter API error: ${response.status} - ${errorBody}`,
186
+ response.status,
187
+ errorBody
188
+ );
189
+ }
190
+
191
+ // Parse response
192
+ const data = await response.json() as {
193
+ data?: Array<{ embedding?: number[] }>;
194
+ };
195
+
196
+ // Validate response structure
197
+ if (!data?.data?.[0]?.embedding) {
198
+ throw new OpenRouterError(
199
+ "Invalid API response: missing embedding data",
200
+ response.status,
201
+ JSON.stringify(data)
202
+ );
203
+ }
204
+
205
+ const embedding = data.data[0].embedding;
206
+
207
+ // Validate embedding dimensions
208
+ if (embedding.length !== EMBEDDING_DIMS) {
209
+ debug(
210
+ `Warning: Expected ${EMBEDDING_DIMS} dimensions, got ${embedding.length}`
211
+ );
212
+ }
213
+
214
+ // Cache the result
215
+ embeddingCache.set(cacheKey, embedding);
216
+ debug(`Successfully got embedding with ${embedding.length} dimensions`);
217
+
218
+ return embedding;
219
+ } catch (error) {
220
+ // Handle timeout errors - treat as retryable
221
+ if (error instanceof Error && error.name === "AbortError") {
222
+ debug(`Request timed out after ${OPENROUTER_TIMEOUT_MS}ms (attempt ${attempt + 1}/${MAX_RETRIES})`);
223
+ lastError = new OpenRouterError(
224
+ `Request timed out after ${OPENROUTER_TIMEOUT_MS}ms`,
225
+ 408
226
+ );
227
+ // Don't throw - fall through to retry logic below
228
+ } else {
229
+ lastError = error instanceof Error ? error : new Error(String(error));
230
+
231
+ // Don't retry on non-retryable errors
232
+ if (error instanceof OpenRouterError && error.statusCode) {
233
+ const nonRetryable = [400, 401, 403, 404];
234
+ if (nonRetryable.includes(error.statusCode)) {
235
+ debug(`Non-retryable error (${error.statusCode}), failing immediately`);
236
+ throw error;
237
+ }
238
+ }
239
+ }
240
+
241
+ // If not the last attempt, wait before retrying
242
+ if (attempt < MAX_RETRIES - 1) {
243
+ const waitTime = getBackoffDelay(attempt);
244
+ debug(`Error: ${lastError.message}, retrying in ${waitTime}ms`);
245
+ await sleep(waitTime);
246
+ }
247
+ } finally {
248
+ clearTimeout(timeoutId);
249
+ }
250
+ }
251
+
252
+ // All retries exhausted
253
+ throw new OpenRouterError(
254
+ `Failed to get embedding after ${MAX_RETRIES} attempts: ${lastError?.message}`,
255
+ undefined,
256
+ undefined
257
+ );
258
+ }
259
+
260
+ /**
261
+ * Get the configured embedding dimensions
262
+ *
263
+ * @returns Number of dimensions for embeddings
264
+ */
265
+ export function getOpenRouterDimensions(): number {
266
+ return EMBEDDING_DIMS;
267
+ }
268
+
269
+ /**
270
+ * Clear the embedding cache
271
+ * Useful for testing or memory management
272
+ */
273
+ export function clearEmbeddingCache(): void {
274
+ const size = embeddingCache.size;
275
+ embeddingCache.clear();
276
+ debug(`Cleared embedding cache (${size} entries)`);
277
+ }
278
+
279
+ /**
280
+ * Get current cache size
281
+ * Useful for monitoring
282
+ */
283
+ export function getEmbeddingCacheSize(): number {
284
+ return embeddingCache.size;
285
+ }