@qearlyao/familiar 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/.env.example +31 -0
  2. package/HEARTBEAT.md +23 -0
  3. package/LICENSE +21 -0
  4. package/MEMORY.md +1 -0
  5. package/README.md +245 -0
  6. package/SOUL.md +13 -0
  7. package/USER.md +13 -0
  8. package/config.example.toml +221 -0
  9. package/dist/agent-events.js +167 -0
  10. package/dist/agent.js +590 -0
  11. package/dist/browser-tools.js +638 -0
  12. package/dist/chat-log.js +130 -0
  13. package/dist/cli.js +168 -0
  14. package/dist/config.js +804 -0
  15. package/dist/data-retention.js +54 -0
  16. package/dist/discord.js +1203 -0
  17. package/dist/generated-media.js +86 -0
  18. package/dist/image-derivatives.js +102 -0
  19. package/dist/image-gen.js +440 -0
  20. package/dist/inbound-attachments.js +266 -0
  21. package/dist/index.js +10 -0
  22. package/dist/media-understanding.js +120 -0
  23. package/dist/memory/diary/ambient-injector.js +180 -0
  24. package/dist/memory/diary/ambient.js +124 -0
  25. package/dist/memory/diary/chunks.js +231 -0
  26. package/dist/memory/diary/index.js +3 -0
  27. package/dist/memory/diary/indexer.js +93 -0
  28. package/dist/memory/doctor.js +250 -0
  29. package/dist/memory/index/chunk-indexer.js +151 -0
  30. package/dist/memory/index/embedding-provider.js +119 -0
  31. package/dist/memory/index/fts-query.js +18 -0
  32. package/dist/memory/index/retrieval.js +246 -0
  33. package/dist/memory/index/schema.js +157 -0
  34. package/dist/memory/index/store.js +513 -0
  35. package/dist/memory/index/vec.js +72 -0
  36. package/dist/memory/index/vector-codec.js +27 -0
  37. package/dist/memory/lcm/backfill.js +247 -0
  38. package/dist/memory/lcm/condense.js +146 -0
  39. package/dist/memory/lcm/context-transformer.js +662 -0
  40. package/dist/memory/lcm/context.js +421 -0
  41. package/dist/memory/lcm/eviction-score.js +38 -0
  42. package/dist/memory/lcm/index.js +6 -0
  43. package/dist/memory/lcm/indexer.js +200 -0
  44. package/dist/memory/lcm/normalize.js +235 -0
  45. package/dist/memory/lcm/schema.js +188 -0
  46. package/dist/memory/lcm/segment-manager.js +136 -0
  47. package/dist/memory/lcm/store.js +722 -0
  48. package/dist/memory/lcm/summarizer.js +258 -0
  49. package/dist/memory/lcm/types.js +1 -0
  50. package/dist/memory/operator.js +477 -0
  51. package/dist/memory/service.js +202 -0
  52. package/dist/memory/tools.js +205 -0
  53. package/dist/models.js +165 -0
  54. package/dist/persona.js +54 -0
  55. package/dist/runtime.js +493 -0
  56. package/dist/scheduler.js +200 -0
  57. package/dist/settings.js +116 -0
  58. package/dist/skills.js +38 -0
  59. package/dist/tts.js +143 -0
  60. package/dist/web-auth.js +105 -0
  61. package/dist/web-events.js +114 -0
  62. package/dist/web-http.js +29 -0
  63. package/dist/web-static.js +106 -0
  64. package/dist/web-tools.js +940 -0
  65. package/dist/web-types.js +2 -0
  66. package/dist/web.js +844 -0
  67. package/package.json +60 -0
  68. package/web/dist/assets/index-ClgkMgaq.css +2 -0
  69. package/web/dist/assets/index-Cu2QquuR.js +59 -0
  70. package/web/dist/favicon.svg +1 -0
  71. package/web/dist/icons.svg +24 -0
  72. package/web/dist/index.html +20 -0
@@ -0,0 +1,250 @@
1
+ export function runDoctor(stores, opts = {}) {
2
+ void opts;
3
+ const findings = [];
4
+ findDanglingIndexSources(stores, findings);
5
+ findOrphanEmptySegments(stores, findings);
6
+ findStaleLcmIndexRows(stores, findings);
7
+ findBrokenContextOrdering(stores, findings);
8
+ findSummaryFkViolations(stores, findings);
9
+ findMissingPrunedSnapshots(stores, findings);
10
+ findRequiresReindex(stores, findings);
11
+ findEmbeddingMismatches(stores, findings);
12
+ return { findings, clean: findings.length === 0 };
13
+ }
14
+ export function applyDoctorFixes(stores, report) {
15
+ let fixed = 0;
16
+ const warnings = [];
17
+ const runIndexFixes = () => {
18
+ fixed += stores.index.db
19
+ .prepare(`DELETE FROM memory_index_sources
20
+ WHERE chunk_id NOT IN (SELECT id FROM memory_chunks)`)
21
+ .run().changes;
22
+ const staleSources = stores.index.db
23
+ .prepare(`SELECT corpus, source_id
24
+ FROM memory_index_sources
25
+ WHERE corpus IN ('lcm_record', 'lcm_summary')`)
26
+ .all();
27
+ for (const source of staleSources) {
28
+ if (lcmSourceExists(stores, source.corpus, source.source_id))
29
+ continue;
30
+ const before = countIndexSourceRows(stores.index, source.corpus, source.source_id);
31
+ stores.index.deleteBySourceUnsafe(source.corpus, source.source_id);
32
+ fixed += before;
33
+ }
34
+ };
35
+ if (stores.index.db.inTransaction)
36
+ runIndexFixes();
37
+ else
38
+ stores.index.db.transaction(runIndexFixes).immediate();
39
+ const runLcmFixes = () => {
40
+ fixed += stores.lcm.db
41
+ .prepare(`DELETE FROM lcm_segments
42
+ WHERE status != 'active'
43
+ AND id NOT IN (SELECT DISTINCT segment_id FROM lcm_records)
44
+ AND id NOT IN (SELECT DISTINCT segment_id FROM lcm_summaries)`)
45
+ .run().changes;
46
+ const sessions = stores.lcm.db
47
+ .prepare("SELECT DISTINCT session_key FROM lcm_context_items ORDER BY session_key")
48
+ .all();
49
+ for (const session of sessions) {
50
+ const rows = stores.lcm.db
51
+ .prepare(`SELECT rowid AS rowid, ordinal
52
+ FROM lcm_context_items
53
+ WHERE session_key = ?
54
+ ORDER BY ordinal, rowid`)
55
+ .all(session.session_key);
56
+ for (const [index, row] of rows.entries()) {
57
+ if (row.ordinal === index)
58
+ continue;
59
+ stores.lcm.db
60
+ .prepare("UPDATE lcm_context_items SET ordinal = ?, updated_at = unixepoch() WHERE rowid = ?")
61
+ .run(index, row.rowid);
62
+ fixed += 1;
63
+ }
64
+ }
65
+ };
66
+ if (stores.lcm.db.inTransaction)
67
+ runLcmFixes();
68
+ else
69
+ stores.lcm.db.transaction(runLcmFixes).immediate();
70
+ if (report.findings.some((finding) => finding.kind === "summary_fk_violation")) {
71
+ warnings.push("summary FK violations were not modified; inspect LCM summary lineage manually");
72
+ }
73
+ if (report.findings.some((finding) => finding.kind === "missing_pruned_summary_snapshot")) {
74
+ warnings.push("missing pruned summary snapshots were not modified; inspect retained summaries manually");
75
+ }
76
+ if (report.findings.some((finding) => finding.kind === "embedding_mismatch")) {
77
+ warnings.push("embedding mismatches were not rebuilt; run 'familiar memory reindex'");
78
+ }
79
+ if (report.findings.some((finding) => finding.kind === "requires_reindex")) {
80
+ warnings.push("reindex requirement was not cleared; run 'familiar memory reindex --force'");
81
+ }
82
+ const summary = [`fixed ${fixed} item(s)`, ...warnings].join("; ");
83
+ return { fixed, summary };
84
+ }
85
+ function findDanglingIndexSources(stores, findings) {
86
+ const rows = stores.index.db
87
+ .prepare(`SELECT chunk_id, corpus, source_id, chunk_index
88
+ FROM memory_index_sources
89
+ WHERE chunk_id NOT IN (SELECT id FROM memory_chunks)
90
+ ORDER BY corpus, source_id, chunk_index`)
91
+ .all();
92
+ for (const row of rows) {
93
+ findings.push({
94
+ kind: "dangling_index_source",
95
+ detail: `${row.corpus}:${row.source_id}#${row.chunk_index} references missing chunk ${row.chunk_id}`,
96
+ fixable: true,
97
+ });
98
+ }
99
+ }
100
+ function findOrphanEmptySegments(stores, findings) {
101
+ const rows = stores.lcm.db
102
+ .prepare(`SELECT id
103
+ FROM lcm_segments
104
+ WHERE status != 'active'
105
+ AND id NOT IN (SELECT DISTINCT segment_id FROM lcm_records)
106
+ AND id NOT IN (SELECT DISTINCT segment_id FROM lcm_summaries)
107
+ ORDER BY started_at, id`)
108
+ .all();
109
+ for (const row of rows) {
110
+ findings.push({
111
+ kind: "orphan_empty_segment",
112
+ detail: `closed segment ${row.id} has no records`,
113
+ fixable: true,
114
+ });
115
+ }
116
+ }
117
+ function findStaleLcmIndexRows(stores, findings) {
118
+ const rows = stores.index.db
119
+ .prepare(`SELECT corpus, source_id, chunk_index
120
+ FROM memory_index_sources
121
+ WHERE corpus IN ('lcm_record', 'lcm_summary')
122
+ ORDER BY corpus, source_id, chunk_index`)
123
+ .all();
124
+ for (const row of rows) {
125
+ if (lcmSourceExists(stores, row.corpus, row.source_id))
126
+ continue;
127
+ findings.push({
128
+ kind: "stale_lcm_index_source",
129
+ detail: `${row.corpus}:${row.source_id}#${row.chunk_index} points at missing LCM source`,
130
+ fixable: true,
131
+ });
132
+ }
133
+ }
134
+ function findBrokenContextOrdering(stores, findings) {
135
+ const sessions = stores.lcm.db
136
+ .prepare("SELECT DISTINCT session_key FROM lcm_context_items ORDER BY session_key")
137
+ .all();
138
+ for (const session of sessions) {
139
+ const rows = stores.lcm.db
140
+ .prepare(`SELECT ordinal
141
+ FROM lcm_context_items
142
+ WHERE session_key = ?
143
+ ORDER BY ordinal`)
144
+ .all(session.session_key);
145
+ const ordinals = rows.map((row) => row.ordinal);
146
+ if (ordinals.every((ordinal, index) => ordinal === index) && new Set(ordinals).size === ordinals.length)
147
+ continue;
148
+ findings.push({
149
+ kind: "broken_context_ordering",
150
+ detail: `session ${session.session_key} ordinals are ${ordinals.join(",")}`,
151
+ fixable: true,
152
+ });
153
+ }
154
+ }
155
+ function findSummaryFkViolations(stores, findings) {
156
+ const sourceRows = stores.lcm.db
157
+ .prepare(`SELECT summary_id, ord, record_id
158
+ FROM lcm_summary_sources
159
+ WHERE record_id IS NOT NULL
160
+ AND record_id NOT IN (SELECT id FROM lcm_records)
161
+ ORDER BY summary_id, ord`)
162
+ .all();
163
+ for (const row of sourceRows) {
164
+ findings.push({
165
+ kind: "summary_fk_violation",
166
+ detail: `summary ${row.summary_id} source ${row.ord} references missing record ${row.record_id}`,
167
+ fixable: false,
168
+ });
169
+ }
170
+ const parentRows = stores.lcm.db
171
+ .prepare(`SELECT summary_id, parent_summary_id
172
+ FROM lcm_summary_parents
173
+ WHERE parent_summary_id NOT IN (SELECT id FROM lcm_summaries)
174
+ ORDER BY summary_id, parent_summary_id`)
175
+ .all();
176
+ for (const row of parentRows) {
177
+ findings.push({
178
+ kind: "summary_fk_violation",
179
+ detail: `summary ${row.summary_id} references missing parent summary ${row.parent_summary_id}`,
180
+ fixable: false,
181
+ });
182
+ }
183
+ }
184
+ function findMissingPrunedSnapshots(stores, findings) {
185
+ const rows = stores.lcm.db
186
+ .prepare(`SELECT id
187
+ FROM lcm_summaries
188
+ WHERE covers_from_record_id IS NULL
189
+ AND snapshot_json IS NULL
190
+ ORDER BY id`)
191
+ .all();
192
+ for (const row of rows) {
193
+ findings.push({
194
+ kind: "missing_pruned_summary_snapshot",
195
+ detail: `summary ${row.id} has pruned raw coverage without snapshot_json`,
196
+ fixable: false,
197
+ });
198
+ }
199
+ }
200
+ function findRequiresReindex(stores, findings) {
201
+ const row = stores.index.db.prepare("SELECT v FROM memory_meta WHERE k = 'requires_reindex'").get();
202
+ if (row?.v !== "1")
203
+ return;
204
+ findings.push({
205
+ kind: "requires_reindex",
206
+ detail: "memory index was cleared after embedding config changed; run 'familiar memory reindex --force'",
207
+ fixable: false,
208
+ });
209
+ }
210
+ function findEmbeddingMismatches(stores, findings) {
211
+ const current = stores.index.embeddingConfig();
212
+ const rows = stores.index.db
213
+ .prepare(`SELECT id, corpus, embedding_model, embedding_dimensions
214
+ FROM memory_chunks
215
+ WHERE embedding_model != ? OR embedding_dimensions != ?
216
+ ORDER BY id`)
217
+ .all(current.model, current.dimensions);
218
+ for (const row of rows) {
219
+ findings.push({
220
+ kind: "embedding_mismatch",
221
+ detail: `chunk ${row.id} (${row.corpus}) has ${row.embedding_model}/${row.embedding_dimensions}; ` +
222
+ `current is ${current.model}/${current.dimensions}`,
223
+ fixable: false,
224
+ });
225
+ }
226
+ }
227
+ function lcmSourceExists(stores, corpus, sourceId) {
228
+ if (corpus === "lcm_record") {
229
+ const id = parseIndexSourceId(sourceId, "lcm_record");
230
+ return id !== null && stores.lcm.getRecord(id) !== null;
231
+ }
232
+ if (corpus === "lcm_summary") {
233
+ const id = parseIndexSourceId(sourceId, "lcm_summary");
234
+ return id !== null && stores.lcm.getSummary(id) !== null;
235
+ }
236
+ return true;
237
+ }
238
+ function countIndexSourceRows(store, corpus, sourceId) {
239
+ const row = store.db
240
+ .prepare("SELECT COUNT(*) AS n FROM memory_index_sources WHERE corpus = ? AND source_id = ?")
241
+ .get(corpus, sourceId);
242
+ return row.n;
243
+ }
244
+ function parseIndexSourceId(value, prefix) {
245
+ const expectedPrefix = `${prefix}:`;
246
+ if (!value?.startsWith(expectedPrefix))
247
+ return null;
248
+ const id = Number(value.slice(expectedPrefix.length));
249
+ return Number.isInteger(id) && id > 0 ? id : null;
250
+ }
@@ -0,0 +1,151 @@
1
+ import { createMemoryContentHash } from "./store.js";
2
+ export class ChunkIndexer {
3
+ store;
4
+ embeddingProvider;
5
+ constructor(options) {
6
+ this.store = options.store;
7
+ this.embeddingProvider = options.embeddingProvider;
8
+ }
9
+ async indexChunks(inputs, signal) {
10
+ const prepared = this.prepare(inputs);
11
+ if (prepared.length === 0)
12
+ return { ids: [], embedded: 0, reused: 0, skipped: inputs.length };
13
+ return this.insertPrepared(prepared, inputs.length - prepared.length, signal);
14
+ }
15
+ async replaceSource(corpus, sourceId, inputs, signal) {
16
+ const prepared = this.prepare(inputs.map((input) => ({ ...input, corpus, sourceId })));
17
+ const keepMappings = prepared.map((item) => ({ contentHash: item.contentHash, chunkIndex: item.chunkIndex }));
18
+ this.store.deleteBySourceExceptMappings(corpus, sourceId, keepMappings);
19
+ const result = await this.insertPrepared(prepared, inputs.length - prepared.length, signal);
20
+ return result;
21
+ }
22
+ prepare(inputs) {
23
+ const embeddingConfig = this.store.embeddingConfig();
24
+ const prepared = [];
25
+ for (const input of inputs) {
26
+ const text = input.text.trim();
27
+ if (!text)
28
+ continue;
29
+ const chunkIndex = input.chunkIndex ?? 0;
30
+ const sourceId = input.sourceId ?? null;
31
+ const contentHash = createMemoryContentHash({
32
+ corpus: input.corpus,
33
+ text,
34
+ embeddingModel: embeddingConfig.model,
35
+ embeddingDimensions: embeddingConfig.dimensions,
36
+ });
37
+ prepared.push({
38
+ input,
39
+ text,
40
+ chunkIndex,
41
+ sourceId,
42
+ contentHash,
43
+ existingId: null,
44
+ embedding: input.embedding,
45
+ });
46
+ }
47
+ return prepared;
48
+ }
49
+ async insertPrepared(prepared, skipped, signal) {
50
+ const startedAt = Date.now();
51
+ if (prepared.length === 0)
52
+ return { ids: [], embedded: 0, reused: 0, skipped };
53
+ const present = this.store.whichHashesPresent(prepared.map((item) => item.contentHash));
54
+ for (const item of prepared)
55
+ item.existingId = present.get(item.contentHash) ?? null;
56
+ const pendingEmbeddings = new Map();
57
+ const suppliedByHash = new Map();
58
+ for (const item of prepared) {
59
+ if (item.embedding) {
60
+ suppliedByHash.set(item.contentHash, item.embedding);
61
+ pendingEmbeddings.delete(item.contentHash);
62
+ continue;
63
+ }
64
+ if (item.existingId === null &&
65
+ !pendingEmbeddings.has(item.contentHash) &&
66
+ !suppliedByHash.has(item.contentHash)) {
67
+ pendingEmbeddings.set(item.contentHash, item);
68
+ }
69
+ }
70
+ const itemsToEmbed = [...pendingEmbeddings.values()];
71
+ let embeddingCost = 0;
72
+ const embeddings = itemsToEmbed.length === 0
73
+ ? []
74
+ : await this.embeddingProvider.embed(itemsToEmbed.map((item) => {
75
+ embeddingCost += item.text.length;
76
+ return item.text;
77
+ }), signal);
78
+ if (embeddings.length !== itemsToEmbed.length) {
79
+ throw new Error(`Embedding count mismatch: expected ${itemsToEmbed.length}, got ${embeddings.length}`);
80
+ }
81
+ for (let index = 0; index < itemsToEmbed.length; index++) {
82
+ const item = itemsToEmbed[index];
83
+ const embedding = embeddings[index];
84
+ if (!embedding)
85
+ throw new Error(`Embedding provider returned no result for chunk ${index}`);
86
+ item.embedding = embedding;
87
+ }
88
+ const embeddedByHash = new Map(suppliedByHash);
89
+ for (const item of itemsToEmbed)
90
+ embeddedByHash.set(item.contentHash, item.embedding);
91
+ const ids = new Array(prepared.length);
92
+ const toInsert = [];
93
+ const insertPositions = [];
94
+ const existingMappings = [];
95
+ for (let resultIndex = 0; resultIndex < prepared.length; resultIndex++) {
96
+ const item = prepared[resultIndex];
97
+ if (item.existingId !== null) {
98
+ ids[resultIndex] = item.existingId;
99
+ existingMappings.push({
100
+ corpus: item.input.corpus,
101
+ sourceId: item.sourceId,
102
+ sourceRef: item.input.sourceRef ?? null,
103
+ chunkIndex: item.chunkIndex,
104
+ text: item.text,
105
+ snippet: item.input.snippet,
106
+ tokenCount: item.input.tokenCount ?? null,
107
+ metadata: item.input.metadata ?? null,
108
+ embedding: item.embedding ?? new Float32Array(this.store.embeddingConfig().dimensions),
109
+ });
110
+ continue;
111
+ }
112
+ const embedding = item.embedding ?? embeddedByHash.get(item.contentHash);
113
+ if (!embedding)
114
+ throw new Error("Missing embedding for memory chunk");
115
+ insertPositions.push(resultIndex);
116
+ toInsert.push({
117
+ corpus: item.input.corpus,
118
+ sourceId: item.sourceId,
119
+ sourceRef: item.input.sourceRef ?? null,
120
+ chunkIndex: item.chunkIndex,
121
+ text: item.text,
122
+ snippet: item.input.snippet,
123
+ tokenCount: item.input.tokenCount ?? null,
124
+ metadata: item.input.metadata ?? null,
125
+ embedding,
126
+ });
127
+ }
128
+ this.store.recordSourceMappings(existingMappings);
129
+ const insertedIds = this.store.insertChunks(toInsert);
130
+ for (let index = 0; index < insertPositions.length; index++) {
131
+ ids[insertPositions[index]] = insertedIds[index];
132
+ }
133
+ const result = {
134
+ ids,
135
+ embedded: itemsToEmbed.length,
136
+ reused: prepared.length - toInsert.length,
137
+ skipped,
138
+ };
139
+ logMemoryIndexBatch({
140
+ chunks: prepared.length,
141
+ durationMs: Date.now() - startedAt,
142
+ embeddingCost,
143
+ });
144
+ return result;
145
+ }
146
+ }
147
+ function logMemoryIndexBatch(payload) {
148
+ if (process.env.DEBUG !== "memory-index")
149
+ return;
150
+ console.error(JSON.stringify({ event: "memory_index_batch", ...payload }));
151
+ }
@@ -0,0 +1,119 @@
1
+ export function createEmbeddingProvider(config, options = {}) {
2
+ const format = config.memory.embedding.format ?? config.memory.embedding.api;
3
+ if (format === "gemini")
4
+ return new GeminiEmbeddingProvider(config, options.fetchFn ?? fetch);
5
+ throw new Error(`NotImplementedError: memory.embedding.format=${format} is recognized but only gemini is implemented in v0`);
6
+ }
7
+ class GeminiEmbeddingProvider {
8
+ api = "gemini";
9
+ provider;
10
+ model;
11
+ dimensions;
12
+ baseUrl;
13
+ apiKeyEnv;
14
+ batchSize;
15
+ fetchFn;
16
+ constructor(config, fetchFn) {
17
+ this.provider = config.memory.embedding.provider;
18
+ this.model = config.memory.embedding.model;
19
+ this.dimensions = config.memory.embedding.dimensions;
20
+ this.baseUrl = config.memory.embedding.baseUrl.replace(/\/+$/, "");
21
+ this.apiKeyEnv = config.memory.embedding.apiKeyEnv;
22
+ this.batchSize = config.memory.embedding.batchSize;
23
+ if (this.batchSize < 1)
24
+ throw new Error(`Embedding batch size must be >= 1, got ${this.batchSize}`);
25
+ this.fetchFn = fetchFn;
26
+ }
27
+ async embedOne(input, signal) {
28
+ const [embedding] = await this.embed([input], signal);
29
+ if (!embedding)
30
+ throw new Error("Embedding provider returned no result");
31
+ return embedding;
32
+ }
33
+ async embed(inputs, signal) {
34
+ if (inputs.length === 0)
35
+ return [];
36
+ const embeddings = [];
37
+ for (let index = 0; index < inputs.length; index += this.batchSize) {
38
+ const chunk = inputs.slice(index, index + this.batchSize);
39
+ // Sequential batches are gentle on hosted rate limits; add bounded
40
+ // concurrency later if indexing throughput becomes a bottleneck.
41
+ embeddings.push(...(await this.embedBatch(chunk, signal)));
42
+ }
43
+ return embeddings;
44
+ }
45
+ async embedBatch(inputs, signal) {
46
+ const apiKey = this.apiKey();
47
+ const response = await this.fetchFn(this.buildUrl(), {
48
+ method: "POST",
49
+ headers: this.buildHeaders(apiKey),
50
+ body: JSON.stringify({
51
+ requests: inputs.map((input) => ({
52
+ model: this.modelResourceName(),
53
+ content: { parts: embeddingInputParts(input) },
54
+ outputDimensionality: this.dimensions,
55
+ })),
56
+ }),
57
+ signal,
58
+ });
59
+ const { body, rawText } = await parseJsonResponse(response);
60
+ if (!response.ok) {
61
+ const message = typeof body.error?.message === "string"
62
+ ? body.error.message
63
+ : truncate(rawText.trim() || response.statusText);
64
+ throw new Error(`Embedding request failed: HTTP ${response.status} ${message}`.trim());
65
+ }
66
+ const rawEmbeddings = Array.isArray(body.embeddings) ? body.embeddings : body.embedding ? [body.embedding] : [];
67
+ if (rawEmbeddings.length !== inputs.length) {
68
+ throw new Error(`Embedding response count mismatch: expected ${inputs.length}, got ${rawEmbeddings.length}`);
69
+ }
70
+ return rawEmbeddings.map((embedding, index) => this.parseEmbeddingValues(embedding.values, index));
71
+ }
72
+ buildUrl() {
73
+ return `${this.baseUrl}/${this.modelResourceName()}:batchEmbedContents`;
74
+ }
75
+ buildHeaders(apiKey) {
76
+ const headers = { "content-type": "application/json" };
77
+ if (apiKey)
78
+ headers["x-goog-api-key"] = apiKey;
79
+ return headers;
80
+ }
81
+ apiKey() {
82
+ return this.apiKeyEnv ? process.env[this.apiKeyEnv] : undefined;
83
+ }
84
+ modelResourceName() {
85
+ return this.model.startsWith("models/") ? this.model : `models/${this.model}`;
86
+ }
87
+ parseEmbeddingValues(values, index) {
88
+ if (!Array.isArray(values) || values.some((value) => typeof value !== "number" || !Number.isFinite(value))) {
89
+ throw new Error(`Embedding response ${index} did not contain numeric values`);
90
+ }
91
+ if (values.length !== this.dimensions) {
92
+ throw new Error(`Embedding dimension mismatch for result ${index}: expected ${this.dimensions}, got ${values.length}`);
93
+ }
94
+ return new Float32Array(values);
95
+ }
96
+ }
97
+ function embeddingInputParts(input) {
98
+ if (typeof input === "string")
99
+ return [{ text: input }];
100
+ return input.parts.map((part) => {
101
+ if (part.type === "text")
102
+ return { text: part.text };
103
+ return { inlineData: { mimeType: part.mimeType, data: part.data } };
104
+ });
105
+ }
106
+ async function parseJsonResponse(response) {
107
+ const rawText = await response.text();
108
+ if (!rawText.trim())
109
+ return { body: {}, rawText };
110
+ try {
111
+ return { body: JSON.parse(rawText), rawText };
112
+ }
113
+ catch {
114
+ return { body: {}, rawText };
115
+ }
116
+ }
117
+ function truncate(text, maxLength = 300) {
118
+ return text.length <= maxLength ? text : `${text.slice(0, maxLength)}...`;
119
+ }
@@ -0,0 +1,18 @@
1
+ export function normalizeFtsMatchQuery(query) {
2
+ const tokens = [];
3
+ for (const rawToken of query.normalize("NFKC").split(/\s+/u)) {
4
+ if (!rawToken)
5
+ continue;
6
+ const hasPrefix = rawToken.endsWith("*");
7
+ const body = hasPrefix ? rawToken.slice(0, -1) : rawToken;
8
+ const parts = body.match(/[\p{L}\p{N}_]+/gu) ?? [];
9
+ if (parts.length === 0)
10
+ continue;
11
+ for (let index = 0; index < parts.length; index++) {
12
+ const part = parts[index];
13
+ const suffix = hasPrefix && index === parts.length - 1 ? "*" : "";
14
+ tokens.push(`"${part.replaceAll('"', '""')}"${suffix}`);
15
+ }
16
+ }
17
+ return tokens.length > 0 ? tokens.join(" ") : null;
18
+ }