@gmickel/gno 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +256 -0
  2. package/assets/skill/SKILL.md +112 -0
  3. package/assets/skill/cli-reference.md +327 -0
  4. package/assets/skill/examples.md +234 -0
  5. package/assets/skill/mcp-reference.md +159 -0
  6. package/package.json +90 -0
  7. package/src/app/constants.ts +313 -0
  8. package/src/cli/colors.ts +65 -0
  9. package/src/cli/commands/ask.ts +545 -0
  10. package/src/cli/commands/cleanup.ts +105 -0
  11. package/src/cli/commands/collection/add.ts +120 -0
  12. package/src/cli/commands/collection/index.ts +10 -0
  13. package/src/cli/commands/collection/list.ts +108 -0
  14. package/src/cli/commands/collection/remove.ts +64 -0
  15. package/src/cli/commands/collection/rename.ts +95 -0
  16. package/src/cli/commands/context/add.ts +67 -0
  17. package/src/cli/commands/context/check.ts +153 -0
  18. package/src/cli/commands/context/index.ts +10 -0
  19. package/src/cli/commands/context/list.ts +109 -0
  20. package/src/cli/commands/context/rm.ts +52 -0
  21. package/src/cli/commands/doctor.ts +393 -0
  22. package/src/cli/commands/embed.ts +462 -0
  23. package/src/cli/commands/get.ts +356 -0
  24. package/src/cli/commands/index-cmd.ts +119 -0
  25. package/src/cli/commands/index.ts +102 -0
  26. package/src/cli/commands/init.ts +328 -0
  27. package/src/cli/commands/ls.ts +217 -0
  28. package/src/cli/commands/mcp/config.ts +300 -0
  29. package/src/cli/commands/mcp/index.ts +24 -0
  30. package/src/cli/commands/mcp/install.ts +203 -0
  31. package/src/cli/commands/mcp/paths.ts +470 -0
  32. package/src/cli/commands/mcp/status.ts +222 -0
  33. package/src/cli/commands/mcp/uninstall.ts +158 -0
  34. package/src/cli/commands/mcp.ts +20 -0
  35. package/src/cli/commands/models/clear.ts +103 -0
  36. package/src/cli/commands/models/index.ts +32 -0
  37. package/src/cli/commands/models/list.ts +214 -0
  38. package/src/cli/commands/models/path.ts +51 -0
  39. package/src/cli/commands/models/pull.ts +199 -0
  40. package/src/cli/commands/models/use.ts +85 -0
  41. package/src/cli/commands/multi-get.ts +400 -0
  42. package/src/cli/commands/query.ts +220 -0
  43. package/src/cli/commands/ref-parser.ts +108 -0
  44. package/src/cli/commands/reset.ts +191 -0
  45. package/src/cli/commands/search.ts +136 -0
  46. package/src/cli/commands/shared.ts +156 -0
  47. package/src/cli/commands/skill/index.ts +19 -0
  48. package/src/cli/commands/skill/install.ts +197 -0
  49. package/src/cli/commands/skill/paths-cmd.ts +81 -0
  50. package/src/cli/commands/skill/paths.ts +191 -0
  51. package/src/cli/commands/skill/show.ts +73 -0
  52. package/src/cli/commands/skill/uninstall.ts +141 -0
  53. package/src/cli/commands/status.ts +205 -0
  54. package/src/cli/commands/update.ts +68 -0
  55. package/src/cli/commands/vsearch.ts +188 -0
  56. package/src/cli/context.ts +64 -0
  57. package/src/cli/errors.ts +64 -0
  58. package/src/cli/format/search-results.ts +211 -0
  59. package/src/cli/options.ts +183 -0
  60. package/src/cli/program.ts +1330 -0
  61. package/src/cli/run.ts +213 -0
  62. package/src/cli/ui.ts +92 -0
  63. package/src/config/defaults.ts +20 -0
  64. package/src/config/index.ts +55 -0
  65. package/src/config/loader.ts +161 -0
  66. package/src/config/paths.ts +87 -0
  67. package/src/config/saver.ts +153 -0
  68. package/src/config/types.ts +280 -0
  69. package/src/converters/adapters/markitdownTs/adapter.ts +140 -0
  70. package/src/converters/adapters/officeparser/adapter.ts +126 -0
  71. package/src/converters/canonicalize.ts +89 -0
  72. package/src/converters/errors.ts +218 -0
  73. package/src/converters/index.ts +51 -0
  74. package/src/converters/mime.ts +163 -0
  75. package/src/converters/native/markdown.ts +115 -0
  76. package/src/converters/native/plaintext.ts +56 -0
  77. package/src/converters/path.ts +48 -0
  78. package/src/converters/pipeline.ts +159 -0
  79. package/src/converters/registry.ts +74 -0
  80. package/src/converters/types.ts +123 -0
  81. package/src/converters/versions.ts +24 -0
  82. package/src/index.ts +27 -0
  83. package/src/ingestion/chunker.ts +238 -0
  84. package/src/ingestion/index.ts +32 -0
  85. package/src/ingestion/language.ts +276 -0
  86. package/src/ingestion/sync.ts +671 -0
  87. package/src/ingestion/types.ts +219 -0
  88. package/src/ingestion/walker.ts +235 -0
  89. package/src/llm/cache.ts +467 -0
  90. package/src/llm/errors.ts +191 -0
  91. package/src/llm/index.ts +58 -0
  92. package/src/llm/nodeLlamaCpp/adapter.ts +133 -0
  93. package/src/llm/nodeLlamaCpp/embedding.ts +165 -0
  94. package/src/llm/nodeLlamaCpp/generation.ts +88 -0
  95. package/src/llm/nodeLlamaCpp/lifecycle.ts +317 -0
  96. package/src/llm/nodeLlamaCpp/rerank.ts +94 -0
  97. package/src/llm/registry.ts +86 -0
  98. package/src/llm/types.ts +129 -0
  99. package/src/mcp/resources/index.ts +151 -0
  100. package/src/mcp/server.ts +229 -0
  101. package/src/mcp/tools/get.ts +220 -0
  102. package/src/mcp/tools/index.ts +160 -0
  103. package/src/mcp/tools/multi-get.ts +263 -0
  104. package/src/mcp/tools/query.ts +226 -0
  105. package/src/mcp/tools/search.ts +119 -0
  106. package/src/mcp/tools/status.ts +81 -0
  107. package/src/mcp/tools/vsearch.ts +198 -0
  108. package/src/pipeline/chunk-lookup.ts +44 -0
  109. package/src/pipeline/expansion.ts +256 -0
  110. package/src/pipeline/explain.ts +115 -0
  111. package/src/pipeline/fusion.ts +185 -0
  112. package/src/pipeline/hybrid.ts +535 -0
  113. package/src/pipeline/index.ts +64 -0
  114. package/src/pipeline/query-language.ts +118 -0
  115. package/src/pipeline/rerank.ts +223 -0
  116. package/src/pipeline/search.ts +261 -0
  117. package/src/pipeline/types.ts +328 -0
  118. package/src/pipeline/vsearch.ts +348 -0
  119. package/src/store/index.ts +41 -0
  120. package/src/store/migrations/001-initial.ts +196 -0
  121. package/src/store/migrations/index.ts +20 -0
  122. package/src/store/migrations/runner.ts +187 -0
  123. package/src/store/sqlite/adapter.ts +1242 -0
  124. package/src/store/sqlite/index.ts +7 -0
  125. package/src/store/sqlite/setup.ts +129 -0
  126. package/src/store/sqlite/types.ts +28 -0
  127. package/src/store/types.ts +506 -0
  128. package/src/store/vector/index.ts +13 -0
  129. package/src/store/vector/sqlite-vec.ts +373 -0
  130. package/src/store/vector/stats.ts +152 -0
  131. package/src/store/vector/types.ts +115 -0
@@ -0,0 +1,671 @@
1
+ /**
2
+ * Sync service - orchestrates file ingestion.
3
+ * Walks collections, converts files, chunks content, updates store.
4
+ *
5
+ * @module src/ingestion/sync
6
+ */
7
+
8
+ import type { Collection } from '../config/types';
9
+ import { getDefaultMimeDetector, type MimeDetector } from '../converters/mime';
10
+ import {
11
+ type ConversionPipeline,
12
+ getDefaultPipeline,
13
+ } from '../converters/pipeline';
14
+ import { DEFAULT_LIMITS } from '../converters/types';
15
+ import type {
16
+ ChunkInput,
17
+ DocumentRow,
18
+ IngestErrorInput,
19
+ StorePort,
20
+ StoreResult,
21
+ } from '../store/types';
22
+ import { defaultChunker } from './chunker';
23
+ import type {
24
+ ChunkerPort,
25
+ CollectionSyncResult,
26
+ FileSyncResult,
27
+ ProcessDecision,
28
+ SyncOptions,
29
+ SyncResult,
30
+ WalkEntry,
31
+ WalkerPort,
32
+ } from './types';
33
+ import { collectionToWalkConfig, DEFAULT_CHUNK_PARAMS } from './types';
34
+ import { defaultWalker } from './walker';
35
+
36
+ /** Default concurrency for file processing */
37
+ const DEFAULT_CONCURRENCY = 1;
38
+
39
+ /** Batch size for grouping writes into single transaction (Windows perf) */
40
+ const TX_BATCH_SIZE = 50;
41
+
42
+ /** Max concurrency to prevent resource exhaustion */
43
+ const MAX_CONCURRENCY = 16;
44
+
45
+ /**
46
+ * Decide whether to process a file or skip it.
47
+ * Handles repair cases where sourceHash matches but content is incomplete.
48
+ */
49
+ function decideAction(
50
+ existing: DocumentRow | null,
51
+ sourceHash: string
52
+ ): ProcessDecision {
53
+ // No existing doc - must process
54
+ if (!existing) {
55
+ return { kind: 'process', reason: 'new file' };
56
+ }
57
+
58
+ // Source hash changed - must process
59
+ if (existing.sourceHash !== sourceHash) {
60
+ return { kind: 'process', reason: 'content changed' };
61
+ }
62
+
63
+ // Source unchanged, but check for repair cases:
64
+
65
+ // 1. Previous conversion failed (mirrorHash is null)
66
+ if (!existing.mirrorHash) {
67
+ return { kind: 'repair', reason: 'previous conversion failed' };
68
+ }
69
+
70
+ // 2. Document has error recorded
71
+ if (existing.lastErrorCode) {
72
+ return { kind: 'repair', reason: 'previous error recorded' };
73
+ }
74
+
75
+ // All good - skip
76
+ return { kind: 'skip', reason: 'unchanged' };
77
+ }
78
+
79
+ /**
80
+ * Check if path is a git repository (supports worktrees and submodules).
81
+ * Uses git rev-parse which handles all git directory layouts.
82
+ */
83
+ async function isGitRepo(path: string): Promise<boolean> {
84
+ try {
85
+ const result = await Bun.$`git -C ${path} rev-parse --is-inside-work-tree`
86
+ .quiet()
87
+ .nothrow();
88
+ return result.exitCode === 0;
89
+ } catch {
90
+ return false;
91
+ }
92
+ }
93
+
94
+ /**
95
+ * Run git pull in directory (best effort).
96
+ */
97
+ async function gitPull(path: string): Promise<void> {
98
+ try {
99
+ await Bun.$`git -C ${path} pull`.quiet().nothrow();
100
+ } catch {
101
+ // Ignore git pull failures
102
+ }
103
+ }
104
+
105
+ /**
106
+ * Run collection update command (best effort).
107
+ */
108
+ async function runUpdateCmd(path: string, cmd: string): Promise<void> {
109
+ try {
110
+ await Bun.$`sh -c ${cmd}`.cwd(path).quiet().nothrow();
111
+ } catch {
112
+ // Ignore update command failures
113
+ }
114
+ }
115
+
116
+ /**
117
+ * Helper to unwrap Result and throw on error.
118
+ * Provides consistent error handling for store operations.
119
+ */
120
+ function mustOk<T>(
121
+ result: StoreResult<T>,
122
+ operation: string,
123
+ context: Record<string, unknown>
124
+ ): T {
125
+ if (!result.ok) {
126
+ const error = new Error(
127
+ `Store operation failed: ${operation} - ${result.error.message}`
128
+ );
129
+ (error as Error & { context: unknown }).context = context;
130
+ throw error;
131
+ }
132
+ return result.value;
133
+ }
134
+
135
+ /**
136
+ * Simple semaphore for bounded concurrency.
137
+ */
138
+ class Semaphore {
139
+ private permits: number;
140
+ private readonly waiting: Array<() => void> = [];
141
+
142
+ constructor(permits: number) {
143
+ this.permits = permits;
144
+ }
145
+
146
+ async acquire(): Promise<void> {
147
+ if (this.permits > 0) {
148
+ this.permits -= 1;
149
+ return;
150
+ }
151
+ await new Promise<void>((resolve) => {
152
+ this.waiting.push(resolve);
153
+ });
154
+ }
155
+
156
+ release(): void {
157
+ const next = this.waiting.shift();
158
+ if (next) {
159
+ next();
160
+ } else {
161
+ this.permits += 1;
162
+ }
163
+ }
164
+ }
165
+
166
+ /**
167
+ * Sync service implementation.
168
+ */
169
+ export class SyncService {
170
+ private readonly walker: WalkerPort;
171
+ private readonly chunker: ChunkerPort;
172
+ private readonly mimeDetector: MimeDetector;
173
+ private readonly pipeline: ConversionPipeline;
174
+
175
+ constructor(
176
+ walker?: WalkerPort,
177
+ chunker?: ChunkerPort,
178
+ mimeDetector?: MimeDetector,
179
+ pipeline?: ConversionPipeline
180
+ ) {
181
+ this.walker = walker ?? defaultWalker;
182
+ this.chunker = chunker ?? defaultChunker;
183
+ this.mimeDetector = mimeDetector ?? getDefaultMimeDetector();
184
+ this.pipeline = pipeline ?? getDefaultPipeline();
185
+ }
186
+
187
+ /**
188
+ * Process a single file through the ingestion pipeline.
189
+ * All store operations are checked and errors are propagated.
190
+ */
191
+ // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: file processing with multiple extraction and embedding paths
192
+ private async processFile(
193
+ collection: Collection,
194
+ entry: WalkEntry,
195
+ store: StorePort,
196
+ options: SyncOptions
197
+ ): Promise<FileSyncResult> {
198
+ const limits = {
199
+ maxBytes: options.limits?.maxBytes ?? DEFAULT_LIMITS.maxBytes,
200
+ timeoutMs: options.limits?.timeoutMs ?? DEFAULT_LIMITS.timeoutMs,
201
+ maxOutputChars:
202
+ options.limits?.maxOutputChars ?? DEFAULT_LIMITS.maxOutputChars,
203
+ };
204
+
205
+ try {
206
+ // 1. Read file bytes
207
+ const bytes = await Bun.file(entry.absPath).bytes();
208
+
209
+ // 2. Compute sourceHash
210
+ const hasher = new Bun.CryptoHasher('sha256');
211
+ hasher.update(bytes);
212
+ const sourceHash = hasher.digest('hex');
213
+
214
+ // 3. Check existing doc for skip/repair decision
215
+ const existingResult = await store.getDocument(
216
+ collection.name,
217
+ entry.relPath
218
+ );
219
+ const existing = existingResult.ok ? existingResult.value : null;
220
+ const decision = decideAction(existing, sourceHash);
221
+
222
+ if (decision.kind === 'skip') {
223
+ return { relPath: entry.relPath, status: 'unchanged' };
224
+ }
225
+
226
+ // 4. Detect MIME (bytes is already Uint8Array from Bun.file().bytes())
227
+ const mime = this.mimeDetector.detect(entry.absPath, bytes);
228
+
229
+ // 5. Convert via pipeline
230
+ const convertResult = await this.pipeline.convert({
231
+ sourcePath: entry.absPath,
232
+ relativePath: entry.relPath,
233
+ collection: collection.name,
234
+ bytes,
235
+ mime: mime.mime,
236
+ ext: mime.ext,
237
+ limits,
238
+ });
239
+
240
+ if (!convertResult.ok) {
241
+ // Record error (checked)
242
+ const errorInput: IngestErrorInput = {
243
+ collection: collection.name,
244
+ relPath: entry.relPath,
245
+ code: convertResult.error.code,
246
+ message: convertResult.error.message,
247
+ details: convertResult.error.details,
248
+ };
249
+ const recordResult = await store.recordError(errorInput);
250
+ if (!recordResult.ok) {
251
+ // Log but continue - error recording is best-effort
252
+ }
253
+
254
+ // Upsert document with error info, explicitly clear mirrorHash
255
+ const upsertResult = await store.upsertDocument({
256
+ collection: collection.name,
257
+ relPath: entry.relPath,
258
+ sourceHash,
259
+ sourceMime: mime.mime,
260
+ sourceExt: mime.ext,
261
+ sourceSize: entry.size,
262
+ sourceMtime: entry.mtime,
263
+ lastErrorCode: convertResult.error.code,
264
+ lastErrorMessage: convertResult.error.message,
265
+ // mirrorHash intentionally omitted (will be null)
266
+ });
267
+
268
+ if (!upsertResult.ok) {
269
+ return {
270
+ relPath: entry.relPath,
271
+ status: 'error',
272
+ errorCode: 'STORE_ERROR',
273
+ errorMessage: upsertResult.error.message,
274
+ };
275
+ }
276
+
277
+ return {
278
+ relPath: entry.relPath,
279
+ status: 'error',
280
+ errorCode: convertResult.error.code,
281
+ errorMessage: convertResult.error.message,
282
+ };
283
+ }
284
+
285
+ const artifact = convertResult.value;
286
+
287
+ // 6. Upsert document - EXPLICITLY clear error fields on success
288
+ const docidResult = await store.upsertDocument({
289
+ collection: collection.name,
290
+ relPath: entry.relPath,
291
+ sourceHash,
292
+ sourceMime: mime.mime,
293
+ sourceExt: mime.ext,
294
+ sourceSize: entry.size,
295
+ sourceMtime: entry.mtime,
296
+ title: artifact.title,
297
+ mirrorHash: artifact.mirrorHash,
298
+ converterId: artifact.meta.converterId,
299
+ converterVersion: artifact.meta.converterVersion,
300
+ languageHint: artifact.languageHint ?? collection.languageHint,
301
+ // Clear error fields on success (requires store to handle undefined → null)
302
+ lastErrorCode: undefined,
303
+ lastErrorMessage: undefined,
304
+ });
305
+
306
+ const docid = mustOk(docidResult, 'upsertDocument', {
307
+ collection: collection.name,
308
+ relPath: entry.relPath,
309
+ });
310
+
311
+ // 7. Upsert content (content-addressed dedupe) - CHECKED
312
+ const contentResult = await store.upsertContent(
313
+ artifact.mirrorHash,
314
+ artifact.markdown
315
+ );
316
+ mustOk(contentResult, 'upsertContent', {
317
+ mirrorHash: artifact.mirrorHash,
318
+ });
319
+
320
+ // 8. Chunk content
321
+ const chunks = this.chunker.chunk(
322
+ artifact.markdown,
323
+ DEFAULT_CHUNK_PARAMS,
324
+ artifact.languageHint ?? collection.languageHint
325
+ );
326
+
327
+ // 9. Convert to ChunkInput for store
328
+ const chunkInputs: ChunkInput[] = chunks.map((c) => ({
329
+ seq: c.seq,
330
+ pos: c.pos,
331
+ text: c.text,
332
+ startLine: c.startLine,
333
+ endLine: c.endLine,
334
+ language: c.language ?? undefined,
335
+ tokenCount: c.tokenCount ?? undefined,
336
+ }));
337
+
338
+ // 10. Upsert chunks - CHECKED
339
+ const chunksResult = await store.upsertChunks(
340
+ artifact.mirrorHash,
341
+ chunkInputs
342
+ );
343
+ mustOk(chunksResult, 'upsertChunks', {
344
+ mirrorHash: artifact.mirrorHash,
345
+ chunkCount: chunkInputs.length,
346
+ });
347
+
348
+ // 11. Rebuild FTS for this hash - CHECKED
349
+ const ftsResult = await store.rebuildFtsForHash(artifact.mirrorHash);
350
+ mustOk(ftsResult, 'rebuildFtsForHash', {
351
+ mirrorHash: artifact.mirrorHash,
352
+ });
353
+
354
+ const status = existing ? 'updated' : 'added';
355
+ return {
356
+ relPath: entry.relPath,
357
+ status,
358
+ docid,
359
+ mirrorHash: artifact.mirrorHash,
360
+ };
361
+ } catch (error) {
362
+ const message = error instanceof Error ? error.message : 'Unknown error';
363
+ // Distinguish store errors from other internal errors
364
+ const isStoreError =
365
+ message.startsWith('Store operation failed:') ||
366
+ (error instanceof Error &&
367
+ (error as Error & { context?: unknown }).context !== undefined);
368
+ const code = isStoreError ? 'STORE_ERROR' : 'INTERNAL';
369
+
370
+ // Record internal error to store (best-effort)
371
+ try {
372
+ await store.recordError({
373
+ collection: collection.name,
374
+ relPath: entry.relPath,
375
+ code,
376
+ message,
377
+ details: {
378
+ stack: error instanceof Error ? error.stack : undefined,
379
+ },
380
+ });
381
+
382
+ // Also update document with error info if it exists
383
+ const existingResult = await store.getDocument(
384
+ collection.name,
385
+ entry.relPath
386
+ );
387
+ if (existingResult.ok && existingResult.value) {
388
+ await store.upsertDocument({
389
+ collection: collection.name,
390
+ relPath: entry.relPath,
391
+ sourceHash: existingResult.value.sourceHash,
392
+ sourceMime: existingResult.value.sourceMime,
393
+ sourceExt: existingResult.value.sourceExt,
394
+ sourceSize: existingResult.value.sourceSize,
395
+ sourceMtime: existingResult.value.sourceMtime,
396
+ lastErrorCode: code,
397
+ lastErrorMessage: message,
398
+ });
399
+ }
400
+ } catch {
401
+ // Best-effort error recording
402
+ }
403
+
404
+ return {
405
+ relPath: entry.relPath,
406
+ status: 'error',
407
+ errorCode: code,
408
+ errorMessage: message,
409
+ };
410
+ }
411
+ }
412
+
413
+ /**
414
+ * Sync a single collection.
415
+ */
416
+ // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: sync orchestration with git integration and progress tracking
417
+ async syncCollection(
418
+ collection: Collection,
419
+ store: StorePort,
420
+ options: SyncOptions = {}
421
+ ): Promise<CollectionSyncResult> {
422
+ const startTime = Date.now();
423
+ const errors: Array<{ relPath: string; code: string; message: string }> =
424
+ [];
425
+
426
+ // 1. Run preflight commands
427
+ if (options.runUpdateCmd !== false && collection.updateCmd) {
428
+ await runUpdateCmd(collection.path, collection.updateCmd);
429
+ }
430
+
431
+ if (options.gitPull && (await isGitRepo(collection.path))) {
432
+ await gitPull(collection.path);
433
+ }
434
+
435
+ // 2. Walk collection
436
+ const maxBytes = options.limits?.maxBytes ?? DEFAULT_LIMITS.maxBytes;
437
+ const walkConfig = collectionToWalkConfig(collection, maxBytes);
438
+ const { entries, skipped } = await this.walker.walk(walkConfig);
439
+
440
+ // Track seen paths for marking inactive
441
+ // Only include TOO_LARGE files (they exist but are unprocessable)
442
+ // EXCLUDED files should NOT be in seenPaths - if config changes to exclude
443
+ // a previously-included file, that doc SHOULD be marked inactive
444
+ const seenPaths = new Set<string>();
445
+ for (const skip of skipped) {
446
+ if (skip.reason === 'TOO_LARGE') {
447
+ seenPaths.add(skip.relPath);
448
+ }
449
+ }
450
+
451
+ // 3. Record TOO_LARGE errors and track in seenPaths
452
+ for (const skip of skipped) {
453
+ if (skip.reason === 'TOO_LARGE') {
454
+ const recordResult = await store.recordError({
455
+ collection: collection.name,
456
+ relPath: skip.relPath,
457
+ code: 'TOO_LARGE',
458
+ message: `File size ${skip.size} exceeds limit ${maxBytes}`,
459
+ });
460
+ // Log failure but continue
461
+ if (!recordResult.ok) {
462
+ errors.push({
463
+ relPath: skip.relPath,
464
+ code: 'STORE_ERROR',
465
+ message: `Failed to record error: ${recordResult.error.message}`,
466
+ });
467
+ }
468
+ errors.push({
469
+ relPath: skip.relPath,
470
+ code: 'TOO_LARGE',
471
+ message: `File size ${skip.size} exceeds limit ${maxBytes}`,
472
+ });
473
+ }
474
+ }
475
+
476
+ // 4. Process files with bounded concurrency
477
+ const concurrency = Math.max(
478
+ 1,
479
+ Math.min(MAX_CONCURRENCY, options.concurrency ?? DEFAULT_CONCURRENCY)
480
+ );
481
+
482
+ let added = 0;
483
+ let updated = 0;
484
+ let unchanged = 0;
485
+ let errored = 0;
486
+
487
+ if (concurrency === 1) {
488
+ // Sequential processing with batched transactions (Windows perf)
489
+ for (let i = 0; i < entries.length; i += TX_BATCH_SIZE) {
490
+ const batch = entries.slice(i, i + TX_BATCH_SIZE);
491
+
492
+ const runBatch = async (): Promise<void> => {
493
+ for (const entry of batch) {
494
+ seenPaths.add(entry.relPath);
495
+ const result = await this.processFile(
496
+ collection,
497
+ entry,
498
+ store,
499
+ options
500
+ );
501
+ switch (result.status) {
502
+ case 'added':
503
+ added += 1;
504
+ break;
505
+ case 'updated':
506
+ updated += 1;
507
+ break;
508
+ case 'unchanged':
509
+ unchanged += 1;
510
+ break;
511
+ case 'error':
512
+ errored += 1;
513
+ if (result.errorCode && result.errorMessage) {
514
+ errors.push({
515
+ relPath: result.relPath,
516
+ code: result.errorCode,
517
+ message: result.errorMessage,
518
+ });
519
+ }
520
+ break;
521
+ default:
522
+ // 'skipped' status - already counted in filesSkipped
523
+ break;
524
+ }
525
+ }
526
+ };
527
+
528
+ // Wrap batch in single transaction when supported (reduces commits)
529
+ if (store.withTransaction) {
530
+ const txResult = await store.withTransaction(runBatch);
531
+ if (!txResult.ok) {
532
+ errors.push({
533
+ relPath: '(transaction batch)',
534
+ code: txResult.error.code,
535
+ message: txResult.error.message,
536
+ });
537
+ break; // Abort on transaction failure
538
+ }
539
+ } else {
540
+ await runBatch();
541
+ }
542
+ }
543
+ } else {
544
+ // Concurrent processing with semaphore
545
+ const semaphore = new Semaphore(concurrency);
546
+ const results: FileSyncResult[] = [];
547
+
548
+ await Promise.all(
549
+ entries.map(async (entry) => {
550
+ seenPaths.add(entry.relPath);
551
+ await semaphore.acquire();
552
+ try {
553
+ const result = await this.processFile(
554
+ collection,
555
+ entry,
556
+ store,
557
+ options
558
+ );
559
+ results.push(result);
560
+ } finally {
561
+ semaphore.release();
562
+ }
563
+ })
564
+ );
565
+
566
+ // Aggregate results
567
+ for (const result of results) {
568
+ switch (result.status) {
569
+ case 'added':
570
+ added += 1;
571
+ break;
572
+ case 'updated':
573
+ updated += 1;
574
+ break;
575
+ case 'unchanged':
576
+ unchanged += 1;
577
+ break;
578
+ case 'error':
579
+ errored += 1;
580
+ if (result.errorCode && result.errorMessage) {
581
+ errors.push({
582
+ relPath: result.relPath,
583
+ code: result.errorCode,
584
+ message: result.errorMessage,
585
+ });
586
+ }
587
+ break;
588
+ default:
589
+ // 'skipped' status - already counted in filesSkipped
590
+ break;
591
+ }
592
+ }
593
+ }
594
+
595
+ // 5. Mark missing files as inactive
596
+ let markedInactive = 0;
597
+ const existingDocsResult = await store.listDocuments(collection.name);
598
+ if (existingDocsResult.ok) {
599
+ const missingPaths = existingDocsResult.value
600
+ .filter((d) => d.active && !seenPaths.has(d.relPath))
601
+ .map((d) => d.relPath);
602
+
603
+ if (missingPaths.length > 0) {
604
+ const markResult = await store.markInactive(
605
+ collection.name,
606
+ missingPaths
607
+ );
608
+ if (markResult.ok) {
609
+ markedInactive = markResult.value;
610
+ }
611
+ }
612
+ }
613
+
614
+ return {
615
+ collection: collection.name,
616
+ filesProcessed: entries.length,
617
+ filesAdded: added,
618
+ filesUpdated: updated,
619
+ filesUnchanged: unchanged,
620
+ filesErrored: errored,
621
+ filesSkipped: skipped.length,
622
+ filesMarkedInactive: markedInactive,
623
+ durationMs: Date.now() - startTime,
624
+ errors,
625
+ };
626
+ }
627
+
628
+ /**
629
+ * Sync all collections.
630
+ */
631
+ async syncAll(
632
+ collections: Collection[],
633
+ store: StorePort,
634
+ options: SyncOptions = {}
635
+ ): Promise<SyncResult> {
636
+ const startTime = Date.now();
637
+ const results: CollectionSyncResult[] = [];
638
+
639
+ for (const collection of collections) {
640
+ const result = await this.syncCollection(collection, store, options);
641
+ results.push(result);
642
+ }
643
+
644
+ // Aggregate totals
645
+ const totals = results.reduce(
646
+ (acc, r) => ({
647
+ processed: acc.processed + r.filesProcessed,
648
+ added: acc.added + r.filesAdded,
649
+ updated: acc.updated + r.filesUpdated,
650
+ errored: acc.errored + r.filesErrored,
651
+ skipped: acc.skipped + r.filesSkipped,
652
+ }),
653
+ { processed: 0, added: 0, updated: 0, errored: 0, skipped: 0 }
654
+ );
655
+
656
+ return {
657
+ collections: results,
658
+ totalDurationMs: Date.now() - startTime,
659
+ totalFilesProcessed: totals.processed,
660
+ totalFilesAdded: totals.added,
661
+ totalFilesUpdated: totals.updated,
662
+ totalFilesErrored: totals.errored,
663
+ totalFilesSkipped: totals.skipped,
664
+ };
665
+ }
666
+ }
667
+
668
+ /**
669
+ * Default sync service instance.
670
+ */
671
+ export const defaultSyncService = new SyncService();