@soundbi/sound-connect 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +111 -0
  2. package/dist/__tests__/ingest.test.d.ts +18 -0
  3. package/dist/__tests__/ingest.test.d.ts.map +1 -0
  4. package/dist/__tests__/ingest.test.js +639 -0
  5. package/dist/__tests__/ingest.test.js.map +1 -0
  6. package/dist/__tests__/isolation.test.d.ts +12 -0
  7. package/dist/__tests__/isolation.test.d.ts.map +1 -0
  8. package/dist/__tests__/isolation.test.js +149 -0
  9. package/dist/__tests__/isolation.test.js.map +1 -0
  10. package/dist/__tests__/retry-queue.test.d.ts +11 -0
  11. package/dist/__tests__/retry-queue.test.d.ts.map +1 -0
  12. package/dist/__tests__/retry-queue.test.js +458 -0
  13. package/dist/__tests__/retry-queue.test.js.map +1 -0
  14. package/dist/auth.d.ts +80 -0
  15. package/dist/auth.d.ts.map +1 -0
  16. package/dist/auth.js +211 -0
  17. package/dist/auth.js.map +1 -0
  18. package/dist/config.d.ts +35 -0
  19. package/dist/config.d.ts.map +1 -0
  20. package/dist/config.js +66 -0
  21. package/dist/config.js.map +1 -0
  22. package/dist/index.d.ts +23 -0
  23. package/dist/index.d.ts.map +1 -0
  24. package/dist/index.js +100 -0
  25. package/dist/index.js.map +1 -0
  26. package/dist/ingest.d.ts +253 -0
  27. package/dist/ingest.d.ts.map +1 -0
  28. package/dist/ingest.js +573 -0
  29. package/dist/ingest.js.map +1 -0
  30. package/dist/proxy.d.ts +79 -0
  31. package/dist/proxy.d.ts.map +1 -0
  32. package/dist/proxy.js +217 -0
  33. package/dist/proxy.js.map +1 -0
  34. package/dist/retry-queue.d.ts +236 -0
  35. package/dist/retry-queue.d.ts.map +1 -0
  36. package/dist/retry-queue.js +461 -0
  37. package/dist/retry-queue.js.map +1 -0
  38. package/dist/tools.d.ts +75 -0
  39. package/dist/tools.d.ts.map +1 -0
  40. package/dist/tools.js +368 -0
  41. package/dist/tools.js.map +1 -0
  42. package/package.json +36 -0
@@ -0,0 +1,639 @@
1
+ /**
2
+ * STORY-011 / STORY-012 — ingest_file / ingest_folder unit tests.
3
+ *
4
+ * Tests the pure logic functions in src/ingest.ts:
5
+ * - validateAndResolvePath: path confinement and extension check (AC3)
6
+ * - normalizeMarkdown: normalization (AC1)
7
+ * - chunkContent: chunking (AC1)
8
+ * - sha256: hash computation (AC2)
9
+ * - sourceTypeForExt: source_type derivation (STORY-012 AC2)
10
+ * - parseGlobToExtensions: glob filter parsing (STORY-012 AC1)
11
+ * - enumerateFolder: folder file enumeration (STORY-012 AC1)
12
+ * - ingestFolder: full folder ingest pipeline with mocked backend (STORY-012)
13
+ *
14
+ * Network-touching code (postChunk / ingestMarkdownFile / ingestFolder) is tested
15
+ * via fetch mocks rather than live backend calls.
16
+ */
17
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
18
+ import { writeFile, rm, mkdir } from 'node:fs/promises';
19
+ import { join } from 'node:path';
20
+ import { tmpdir } from 'node:os';
21
+ import { validateAndResolvePath, normalizeMarkdown, chunkContent, sha256, ingestMarkdownFile, sourceTypeForExt, parseGlobToExtensions, enumerateFolder, ingestFolder, SUPPORTED_EXTENSIONS, } from '../ingest.js';
22
+ // ── normalizeMarkdown ─────────────────────────────────────────────────────────
23
+ describe('normalizeMarkdown()', () => {
24
+ it('converts CRLF to LF', () => {
25
+ expect(normalizeMarkdown('line1\r\nline2')).toBe('line1\nline2');
26
+ });
27
+ it('converts bare CR to LF', () => {
28
+ expect(normalizeMarkdown('line1\rline2')).toBe('line1\nline2');
29
+ });
30
+ it('strips trailing whitespace from each line', () => {
31
+ expect(normalizeMarkdown('hello \nworld ')).toBe('hello\nworld');
32
+ });
33
+ it('collapses 3+ blank lines to 2', () => {
34
+ const input = 'a\n\n\n\nb';
35
+ expect(normalizeMarkdown(input)).toBe('a\n\nb');
36
+ });
37
+ it('preserves exactly 2 blank lines (paragraph spacing)', () => {
38
+ const input = 'a\n\nb';
39
+ expect(normalizeMarkdown(input)).toBe('a\n\nb');
40
+ });
41
+ it('trims leading and trailing blank lines', () => {
42
+ expect(normalizeMarkdown('\n\nhello\n\n')).toBe('hello');
43
+ });
44
+ it('returns empty string for whitespace-only content', () => {
45
+ expect(normalizeMarkdown(' \n\n ')).toBe('');
46
+ });
47
+ it('is idempotent — normalizing twice gives the same result', () => {
48
+ const content = 'Hello\r\n\nWorld \n\n\nFoo';
49
+ const once = normalizeMarkdown(content);
50
+ const twice = normalizeMarkdown(once);
51
+ expect(once).toBe(twice);
52
+ });
53
+ });
54
+ // ── chunkContent ──────────────────────────────────────────────────────────────
55
+ describe('chunkContent()', () => {
56
+ it('returns single chunk for short content', () => {
57
+ const content = 'Hello world';
58
+ const chunks = chunkContent(content);
59
+ expect(chunks).toHaveLength(1);
60
+ expect(chunks[0]).toBe(content);
61
+ });
62
+ it('returns single chunk when content is exactly at limit', () => {
63
+ // 6000 chars exactly
64
+ const content = 'a'.repeat(6_000);
65
+ const chunks = chunkContent(content);
66
+ expect(chunks).toHaveLength(1);
67
+ });
68
+ it('splits long content into multiple chunks', () => {
69
+ // Build content > 6000 chars with clear paragraph breaks
70
+ const para = 'x'.repeat(500);
71
+ const content = Array.from({ length: 20 }, () => para).join('\n\n');
72
+ const chunks = chunkContent(content);
73
+ expect(chunks.length).toBeGreaterThan(1);
74
+ });
75
+ it('every chunk is non-empty', () => {
76
+ const para = 'word '.repeat(200);
77
+ const content = Array.from({ length: 15 }, () => para).join('\n\n');
78
+ const chunks = chunkContent(content);
79
+ for (const chunk of chunks) {
80
+ expect(chunk.length).toBeGreaterThan(0);
81
+ }
82
+ });
83
+ it('chunks together cover all paragraphs (no content lost)', () => {
84
+ // Use distinguishable paragraph markers
85
+ const paragraphs = Array.from({ length: 20 }, (_, i) => `PARA${i}: ${'y'.repeat(400)}`);
86
+ const content = paragraphs.join('\n\n');
87
+ const chunks = chunkContent(content);
88
+ for (let i = 0; i < paragraphs.length; i++) {
89
+ const marker = `PARA${i}:`;
90
+ const found = chunks.some(c => c.includes(marker));
91
+ expect(found, `${marker} not found in any chunk`).toBe(true);
92
+ }
93
+ });
94
+ });
95
+ // ── sha256 ────────────────────────────────────────────────────────────────────
96
+ describe('sha256()', () => {
97
+ it('returns a 64-char hex string', () => {
98
+ const hash = sha256('hello');
99
+ expect(hash).toMatch(/^[0-9a-f]{64}$/);
100
+ });
101
+ it('is deterministic', () => {
102
+ expect(sha256('test content')).toBe(sha256('test content'));
103
+ });
104
+ it('different content produces different hashes', () => {
105
+ expect(sha256('content A')).not.toBe(sha256('content B'));
106
+ });
107
+ it('matches known SHA-256 value for empty string', () => {
108
+ // SHA-256('') = e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
109
+ expect(sha256('')).toBe('e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855');
110
+ });
111
+ });
112
+ // ── validateAndResolvePath ────────────────────────────────────────────────────
113
+ describe('validateAndResolvePath() — AC3', () => {
114
+ let tmpDir;
115
+ let testFile;
116
+ beforeEach(async () => {
117
+ tmpDir = join(tmpdir(), `sc-test-${Date.now()}`);
118
+ await mkdir(tmpDir, { recursive: true });
119
+ testFile = join(tmpDir, 'note.md');
120
+ await writeFile(testFile, '# Test\nHello world\n');
121
+ });
122
+ afterEach(async () => {
123
+ await rm(tmpDir, { recursive: true, force: true });
124
+ });
125
+ it('resolves and returns the absolute path for a valid .md file', async () => {
126
+ const resolved = await validateAndResolvePath(testFile);
127
+ expect(resolved).toBe(testFile);
128
+ });
129
+ it('AC3: throws on empty path', async () => {
130
+ await expect(validateAndResolvePath('')).rejects.toThrow(/empty/i);
131
+ });
132
+ it('AC3: throws on whitespace-only path', async () => {
133
+ await expect(validateAndResolvePath(' ')).rejects.toThrow(/empty/i);
134
+ });
135
+ it('AC3: throws for unsupported extension (.pdf, v1.1 deferred)', async () => {
136
+ const pdfFile = join(tmpDir, 'report.pdf');
137
+ await writeFile(pdfFile, '%PDF-fake');
138
+ await expect(validateAndResolvePath(pdfFile)).rejects.toThrow(/supported/i);
139
+ });
140
+ it('AC3 STORY-012: accepts .txt transcript files', async () => {
141
+ const txtFile = join(tmpDir, 'transcript.txt');
142
+ await writeFile(txtFile, 'Hello transcript');
143
+ const resolved = await validateAndResolvePath(txtFile);
144
+ expect(resolved).toBe(txtFile);
145
+ });
146
+ it('AC3 STORY-012: accepts .vtt transcript files', async () => {
147
+ const vttFile = join(tmpDir, 'captions.vtt');
148
+ await writeFile(vttFile, 'WEBVTT\n\n00:00:00.000 --> 00:00:01.000\nHello');
149
+ const resolved = await validateAndResolvePath(vttFile);
150
+ expect(resolved).toBe(vttFile);
151
+ });
152
+ it('AC3 STORY-012: accepts .srt transcript files', async () => {
153
+ const srtFile = join(tmpDir, 'captions.srt');
154
+ await writeFile(srtFile, '1\n00:00:00,000 --> 00:00:01,000\nHello\n');
155
+ const resolved = await validateAndResolvePath(srtFile);
156
+ expect(resolved).toBe(srtFile);
157
+ });
158
+ it('AC3: throws with "File not found" for missing file', async () => {
159
+ const missing = join(tmpDir, 'does-not-exist.md');
160
+ await expect(validateAndResolvePath(missing)).rejects.toThrow(/not found/i);
161
+ });
162
+ it('AC3: resolves ../ sequences in the path (no traversal surprise)', async () => {
163
+ // Build a path that uses ../ to re-enter the same directory.
164
+ // Use the parent dir name + the tmp dir's last segment to avoid
165
+ // Windows absolute-path-join issues (join() on Win32 with an absolute
166
+ // mid-segment keeps both roots).
167
+ const { dirname, basename: pathBasename } = await import('node:path');
168
+ const parentDir = dirname(tmpDir);
169
+ const dirName = pathBasename(tmpDir);
170
+ const tricky = join(parentDir, dirName, '..', dirName, 'note.md');
171
+ const resolved = await validateAndResolvePath(tricky);
172
+ expect(resolved).toBe(testFile);
173
+ });
174
+ });
175
+ // ── ingestMarkdownFile (mocked fetch) ─────────────────────────────────────────
176
+ describe('ingestMarkdownFile() — AC1–AC4 (mocked backend)', () => {
177
+ let tmpDir;
178
+ let testFile;
179
+ beforeEach(async () => {
180
+ tmpDir = join(tmpdir(), `sc-ingest-${Date.now()}`);
181
+ await mkdir(tmpDir, { recursive: true });
182
+ testFile = join(tmpDir, 'note.md');
183
+ await writeFile(testFile, '# Test Note\n\nHello from Sound Connect.\n');
184
+ });
185
+ afterEach(async () => {
186
+ await rm(tmpDir, { recursive: true, force: true });
187
+ vi.restoreAllMocks();
188
+ });
189
+ function mockFetch(response) {
190
+ const mockRes = {
191
+ ok: response.ok,
192
+ status: response.status,
193
+ headers: { get: () => null },
194
+ text: async () => JSON.stringify(response.body),
195
+ };
196
+ vi.stubGlobal('fetch', vi.fn().mockResolvedValue(mockRes));
197
+ }
198
+ it('AC4: returns summary with chunks_ingested=1, chunks_deduped=0 on fresh ingest', async () => {
199
+ mockFetch({ ok: true, status: 200, body: { ok: true, deduped: false } });
200
+ const summary = await ingestMarkdownFile({
201
+ filePath: testFile,
202
+ backendUrl: 'https://example.com',
203
+ clientSlug: 'beacon',
204
+ token: 'test-token',
205
+ });
206
+ expect(summary.chunks_sent).toBe(1);
207
+ expect(summary.chunks_ingested).toBe(1);
208
+ expect(summary.chunks_deduped).toBe(0);
209
+ expect(summary.file).toBe(testFile);
210
+ expect(summary.content_hash).toMatch(/^[0-9a-f]{64}$/);
211
+ });
212
+ it('AC4: returns summary with chunks_deduped=1 when backend reports dedup', async () => {
213
+ mockFetch({ ok: true, status: 200, body: { ok: true, deduped: true } });
214
+ const summary = await ingestMarkdownFile({
215
+ filePath: testFile,
216
+ backendUrl: 'https://example.com',
217
+ clientSlug: 'beacon',
218
+ token: 'test-token',
219
+ });
220
+ expect(summary.chunks_sent).toBe(1);
221
+ expect(summary.chunks_ingested).toBe(0);
222
+ expect(summary.chunks_deduped).toBe(1);
223
+ });
224
+ it('AC2: sends correct provenance fields in POST body', async () => {
225
+ let capturedBody = null;
226
+ const mockRes = {
227
+ ok: true,
228
+ status: 200,
229
+ headers: { get: () => null },
230
+ text: async () => JSON.stringify({ ok: true, deduped: false }),
231
+ };
232
+ vi.stubGlobal('fetch', vi.fn().mockImplementation(async (_url, opts) => {
233
+ capturedBody = JSON.parse(opts.body);
234
+ return mockRes;
235
+ }));
236
+ await ingestMarkdownFile({
237
+ filePath: testFile,
238
+ backendUrl: 'https://example.com',
239
+ clientSlug: 'beacon',
240
+ token: 'test-token',
241
+ authorEmail: 'test@example.com',
242
+ });
243
+ expect(capturedBody).not.toBeNull();
244
+ expect(capturedBody['source_type']).toBe('markdown');
245
+ expect(capturedBody['filename']).toBe('note.md');
246
+ expect(capturedBody['author_email']).toBe('test@example.com');
247
+ expect(typeof capturedBody['sha256']).toBe('string');
248
+ expect(typeof capturedBody['timestamp']).toBe('string');
249
+ expect(typeof capturedBody['content']).toBe('string');
250
+ });
251
+ it('AC2: sends Authorization: Bearer header', async () => {
252
+ let capturedHeaders = null;
253
+ const mockRes = {
254
+ ok: true,
255
+ status: 200,
256
+ headers: { get: () => null },
257
+ text: async () => JSON.stringify({ ok: true, deduped: false }),
258
+ };
259
+ vi.stubGlobal('fetch', vi.fn().mockImplementation(async (_url, opts) => {
260
+ capturedHeaders = opts.headers;
261
+ return mockRes;
262
+ }));
263
+ await ingestMarkdownFile({
264
+ filePath: testFile,
265
+ backendUrl: 'https://example.com',
266
+ clientSlug: 'beacon',
267
+ token: 'my-bearer-token',
268
+ });
269
+ expect(capturedHeaders['Authorization']).toBe('Bearer my-bearer-token');
270
+ });
271
+ it('AC2: POSTs to /ingest/:slug URL', async () => {
272
+ let capturedUrl = null;
273
+ const mockRes = {
274
+ ok: true,
275
+ status: 200,
276
+ headers: { get: () => null },
277
+ text: async () => JSON.stringify({ ok: true, deduped: false }),
278
+ };
279
+ vi.stubGlobal('fetch', vi.fn().mockImplementation(async (url) => {
280
+ capturedUrl = url;
281
+ return mockRes;
282
+ }));
283
+ await ingestMarkdownFile({
284
+ filePath: testFile,
285
+ backendUrl: 'https://example.com',
286
+ clientSlug: 'beacon',
287
+ token: 'tok',
288
+ });
289
+ expect(capturedUrl).toBe('https://example.com/ingest/beacon');
290
+ });
291
+ it('AC3: throws on missing file with clear message', async () => {
292
+ await expect(ingestMarkdownFile({
293
+ filePath: join(tmpDir, 'missing.md'),
294
+ backendUrl: 'https://example.com',
295
+ clientSlug: 'beacon',
296
+ token: 'tok',
297
+ })).rejects.toThrow(/not found/i);
298
+ });
299
+ it('throws clear error on 401 from backend', async () => {
300
+ mockFetch({ ok: false, status: 401, body: {} });
301
+ await expect(ingestMarkdownFile({
302
+ filePath: testFile,
303
+ backendUrl: 'https://example.com',
304
+ clientSlug: 'beacon',
305
+ token: 'bad-token',
306
+ })).rejects.toThrow(/401/);
307
+ });
308
+ it('throws clear error on 403 from backend', async () => {
309
+ mockFetch({ ok: false, status: 403, body: { hint: 'Not a member' } });
310
+ await expect(ingestMarkdownFile({
311
+ filePath: testFile,
312
+ backendUrl: 'https://example.com',
313
+ clientSlug: 'beacon',
314
+ token: 'tok',
315
+ })).rejects.toThrow(/403/);
316
+ });
317
+ it('AC4: multi-chunk file returns correct totals', async () => {
318
+ // Write a file large enough to produce multiple chunks
319
+ const bigContent = Array.from({ length: 30 }, (_, i) => `## Section ${i}\n\n${'word '.repeat(200).trim()}`).join('\n\n');
320
+ const bigFile = join(tmpDir, 'big.md');
321
+ await writeFile(bigFile, bigContent);
322
+ // All chunks succeed (not deduped)
323
+ const mockRes = {
324
+ ok: true,
325
+ status: 200,
326
+ headers: { get: () => null },
327
+ text: async () => JSON.stringify({ ok: true, deduped: false }),
328
+ };
329
+ vi.stubGlobal('fetch', vi.fn().mockResolvedValue(mockRes));
330
+ const summary = await ingestMarkdownFile({
331
+ filePath: bigFile,
332
+ backendUrl: 'https://example.com',
333
+ clientSlug: 'beacon',
334
+ token: 'tok',
335
+ });
336
+ expect(summary.chunks_sent).toBeGreaterThan(1);
337
+ expect(summary.chunks_ingested).toBe(summary.chunks_sent);
338
+ expect(summary.chunks_deduped).toBe(0);
339
+ expect(summary.chunks_ingested + summary.chunks_deduped).toBe(summary.chunks_sent);
340
+ });
341
+ });
342
+ // ── STORY-012: sourceTypeForExt ───────────────────────────────────────────────
343
+ describe('sourceTypeForExt()', () => {
344
+ it('returns "markdown" for .md', () => {
345
+ expect(sourceTypeForExt('.md')).toBe('markdown');
346
+ });
347
+ it('returns "transcript" for .txt', () => {
348
+ expect(sourceTypeForExt('.txt')).toBe('transcript');
349
+ });
350
+ it('returns "transcript" for .vtt', () => {
351
+ expect(sourceTypeForExt('.vtt')).toBe('transcript');
352
+ });
353
+ it('returns "transcript" for .srt', () => {
354
+ expect(sourceTypeForExt('.srt')).toBe('transcript');
355
+ });
356
+ });
357
+ // ── STORY-012: SUPPORTED_EXTENSIONS ──────────────────────────────────────────
358
+ describe('SUPPORTED_EXTENSIONS', () => {
359
+ it('contains .md, .txt, .vtt, .srt', () => {
360
+ expect(SUPPORTED_EXTENSIONS.has('.md')).toBe(true);
361
+ expect(SUPPORTED_EXTENSIONS.has('.txt')).toBe(true);
362
+ expect(SUPPORTED_EXTENSIONS.has('.vtt')).toBe(true);
363
+ expect(SUPPORTED_EXTENSIONS.has('.srt')).toBe(true);
364
+ });
365
+ it('does not contain binary formats deferred to v1.1', () => {
366
+ expect(SUPPORTED_EXTENSIONS.has('.pdf')).toBe(false);
367
+ expect(SUPPORTED_EXTENSIONS.has('.docx')).toBe(false);
368
+ expect(SUPPORTED_EXTENSIONS.has('.xlsx')).toBe(false);
369
+ });
370
+ });
371
+ // ── STORY-012: parseGlobToExtensions ─────────────────────────────────────────
372
+ describe('parseGlobToExtensions()', () => {
373
+ it('returns all supported extensions when glob is undefined', () => {
374
+ const exts = parseGlobToExtensions(undefined);
375
+ expect(exts.has('.md')).toBe(true);
376
+ expect(exts.has('.txt')).toBe(true);
377
+ expect(exts.has('.vtt')).toBe(true);
378
+ expect(exts.has('.srt')).toBe(true);
379
+ });
380
+ it('returns all supported extensions when glob is empty string', () => {
381
+ const exts = parseGlobToExtensions('');
382
+ expect(exts.size).toBe(SUPPORTED_EXTENSIONS.size);
383
+ });
384
+ it('filters to .md only for "*.md"', () => {
385
+ const exts = parseGlobToExtensions('*.md');
386
+ expect(exts.has('.md')).toBe(true);
387
+ expect(exts.has('.txt')).toBe(false);
388
+ expect(exts.has('.vtt')).toBe(false);
389
+ });
390
+ it('filters to .vtt only for "*.vtt"', () => {
391
+ const exts = parseGlobToExtensions('*.vtt');
392
+ expect(exts.has('.vtt')).toBe(true);
393
+ expect(exts.has('.md')).toBe(false);
394
+ });
395
+ it('filters to .txt only for "*.txt"', () => {
396
+ const exts = parseGlobToExtensions('*.txt');
397
+ expect(exts.has('.txt')).toBe(true);
398
+ expect(exts.has('.srt')).toBe(false);
399
+ });
400
+ it('throws for unsupported extension in glob', () => {
401
+ expect(() => parseGlobToExtensions('*.pdf')).toThrow(/supported/i);
402
+ });
403
+ it('throws for .docx (v1.1 deferred)', () => {
404
+ expect(() => parseGlobToExtensions('*.docx')).toThrow(/supported/i);
405
+ });
406
+ it('returns all extensions for a pattern without wildcard extension', () => {
407
+ // A pattern like "docs/" has no *.ext — falls back to all supported.
408
+ const exts = parseGlobToExtensions('docs/');
409
+ expect(exts.size).toBe(SUPPORTED_EXTENSIONS.size);
410
+ });
411
+ });
412
+ // ── STORY-012: enumerateFolder ────────────────────────────────────────────────
413
+ describe('enumerateFolder()', () => {
414
+ let tmpDir;
415
+ beforeEach(async () => {
416
+ tmpDir = join(tmpdir(), `sc-folder-${Date.now()}`);
417
+ await mkdir(tmpDir, { recursive: true });
418
+ });
419
+ afterEach(async () => {
420
+ await rm(tmpDir, { recursive: true, force: true });
421
+ });
422
+ it('AC1: returns .md and transcript files matching the allowed set', async () => {
423
+ await writeFile(join(tmpDir, 'note.md'), '# Note');
424
+ await writeFile(join(tmpDir, 'meeting.txt'), 'transcript');
425
+ await writeFile(join(tmpDir, 'captions.vtt'), 'WEBVTT');
426
+ await writeFile(join(tmpDir, 'captions.srt'), '1\n');
427
+ await writeFile(join(tmpDir, 'data.xlsx'), 'binary');
428
+ const files = await enumerateFolder(tmpDir, SUPPORTED_EXTENSIONS);
429
+ const names = files.map(f => f.slice(tmpDir.length + 1));
430
+ expect(names).toContain('note.md');
431
+ expect(names).toContain('meeting.txt');
432
+ expect(names).toContain('captions.vtt');
433
+ expect(names).toContain('captions.srt');
434
+ expect(names).not.toContain('data.xlsx');
435
+ });
436
+ it('AC1: filters to only .md when allowedExts = {".md"}', async () => {
437
+ await writeFile(join(tmpDir, 'note.md'), '# Note');
438
+ await writeFile(join(tmpDir, 'meeting.txt'), 'transcript');
439
+ const files = await enumerateFolder(tmpDir, new Set(['.md']));
440
+ const names = files.map(f => f.slice(tmpDir.length + 1));
441
+ expect(names).toContain('note.md');
442
+ expect(names).not.toContain('meeting.txt');
443
+ });
444
+ it('returns empty array for empty folder', async () => {
445
+ const files = await enumerateFolder(tmpDir, SUPPORTED_EXTENSIONS);
446
+ expect(files).toHaveLength(0);
447
+ });
448
+ it('returns sorted file paths', async () => {
449
+ await writeFile(join(tmpDir, 'z-note.md'), 'Z');
450
+ await writeFile(join(tmpDir, 'a-note.md'), 'A');
451
+ await writeFile(join(tmpDir, 'm-note.md'), 'M');
452
+ const files = await enumerateFolder(tmpDir, SUPPORTED_EXTENSIONS);
453
+ const names = files.map(f => f.slice(tmpDir.length + 1));
454
+ expect(names).toEqual([...names].sort());
455
+ });
456
+ it('throws with "Folder not found" for missing directory', async () => {
457
+ await expect(enumerateFolder(join(tmpDir, 'does-not-exist'), SUPPORTED_EXTENSIONS)).rejects.toThrow(/not found/i);
458
+ });
459
+ it('skips subdirectories (flat enumeration only)', async () => {
460
+ const subDir = join(tmpDir, 'subdir');
461
+ await mkdir(subDir);
462
+ await writeFile(join(subDir, 'nested.md'), '# Nested');
463
+ await writeFile(join(tmpDir, 'top.md'), '# Top');
464
+ const files = await enumerateFolder(tmpDir, SUPPORTED_EXTENSIONS);
465
+ const names = files.map(f => f.slice(tmpDir.length + 1));
466
+ expect(names).toContain('top.md');
467
+ expect(names).not.toContain(join('subdir', 'nested.md'));
468
+ });
469
+ });
470
+ // ── STORY-012: ingestFolder (mocked backend) ──────────────────────────────────
471
+ describe('ingestFolder() — AC1–AC4 (mocked backend)', () => {
472
+ let tmpDir;
473
+ beforeEach(async () => {
474
+ tmpDir = join(tmpdir(), `sc-folder-ingest-${Date.now()}`);
475
+ await mkdir(tmpDir, { recursive: true });
476
+ });
477
+ afterEach(async () => {
478
+ await rm(tmpDir, { recursive: true, force: true });
479
+ vi.restoreAllMocks();
480
+ });
481
+ function mockFetchAlways(body, ok = true, status = 200) {
482
+ const mockRes = {
483
+ ok,
484
+ status,
485
+ headers: { get: () => null },
486
+ text: async () => JSON.stringify(body),
487
+ };
488
+ vi.stubGlobal('fetch', vi.fn().mockResolvedValue(mockRes));
489
+ }
490
+ it('AC4: returns per-file result table with correct structure', async () => {
491
+ await writeFile(join(tmpDir, 'note.md'), '# Note\n\nContent');
492
+ await writeFile(join(tmpDir, 'meeting.txt'), 'Hello transcript');
493
+ mockFetchAlways({ ok: true, deduped: false });
494
+ const result = await ingestFolder({
495
+ folderPath: tmpDir,
496
+ backendUrl: 'https://example.com',
497
+ clientSlug: 'beacon',
498
+ token: 'tok',
499
+ });
500
+ expect(result.files_found).toBe(2);
501
+ expect(result.results).toHaveLength(2);
502
+ expect(result.results.every(r => r.status === 'ingested')).toBe(true);
503
+ });
504
+ it('AC1: enumerates .md and transcript files', async () => {
505
+ await writeFile(join(tmpDir, 'note.md'), '# Note');
506
+ await writeFile(join(tmpDir, 'meeting.vtt'), 'WEBVTT\n');
507
+ await writeFile(join(tmpDir, 'report.pdf'), '%PDF'); // should be skipped
508
+ mockFetchAlways({ ok: true, deduped: false });
509
+ const result = await ingestFolder({
510
+ folderPath: tmpDir,
511
+ backendUrl: 'https://example.com',
512
+ clientSlug: 'beacon',
513
+ token: 'tok',
514
+ });
515
+ expect(result.files_found).toBe(2);
516
+ });
517
+ it('AC2: source_type is "markdown" for .md and "transcript" for .txt', async () => {
518
+ await writeFile(join(tmpDir, 'note.md'), '# Markdown file');
519
+ await writeFile(join(tmpDir, 'transcript.txt'), 'Transcript content');
520
+ const capturedBodies = [];
521
+ const mockRes = {
522
+ ok: true,
523
+ status: 200,
524
+ headers: { get: () => null },
525
+ text: async () => JSON.stringify({ ok: true, deduped: false }),
526
+ };
527
+ vi.stubGlobal('fetch', vi.fn().mockImplementation(async (_url, opts) => {
528
+ capturedBodies.push(JSON.parse(opts.body));
529
+ return mockRes;
530
+ }));
531
+ await ingestFolder({
532
+ folderPath: tmpDir,
533
+ backendUrl: 'https://example.com',
534
+ clientSlug: 'beacon',
535
+ token: 'tok',
536
+ });
537
+ const mdBody = capturedBodies.find(b => b['filename'].endsWith('.md'));
538
+ const txtBody = capturedBodies.find(b => b['filename'].endsWith('.txt'));
539
+ expect(mdBody['source_type']).toBe('markdown');
540
+ expect(txtBody['source_type']).toBe('transcript');
541
+ });
542
+ it('AC3: idempotency — re-run marks files as deduped', async () => {
543
+ await writeFile(join(tmpDir, 'note.md'), '# Note\n\nContent');
544
+ mockFetchAlways({ ok: true, deduped: true });
545
+ const result = await ingestFolder({
546
+ folderPath: tmpDir,
547
+ backendUrl: 'https://example.com',
548
+ clientSlug: 'beacon',
549
+ token: 'tok',
550
+ });
551
+ expect(result.files_ingested).toBe(0);
552
+ expect(result.files_deduped).toBe(1);
553
+ expect(result.results[0].status).toBe('deduped');
554
+ });
555
+ it('AC4: failed files are captured in results, do not abort the run (ADR-011)', async () => {
556
+ await writeFile(join(tmpDir, 'good.md'), '# Good file');
557
+ await writeFile(join(tmpDir, 'bad.md'), '# Bad file');
558
+ let callCount = 0;
559
+ vi.stubGlobal('fetch', vi.fn().mockImplementation(async () => {
560
+ callCount++;
561
+ if (callCount === 1) {
562
+ // First file succeeds.
563
+ return {
564
+ ok: true,
565
+ status: 200,
566
+ headers: { get: () => null },
567
+ text: async () => JSON.stringify({ ok: true, deduped: false }),
568
+ };
569
+ }
570
+ // Second file returns 401 — should be captured as failed.
571
+ return {
572
+ ok: false,
573
+ status: 401,
574
+ headers: { get: () => null },
575
+ text: async () => JSON.stringify({}),
576
+ };
577
+ }));
578
+ const result = await ingestFolder({
579
+ folderPath: tmpDir,
580
+ backendUrl: 'https://example.com',
581
+ clientSlug: 'beacon',
582
+ token: 'tok',
583
+ });
584
+ expect(result.files_found).toBe(2);
585
+ expect(result.files_ingested + result.files_failed).toBe(2);
586
+ const failedFile = result.results.find(r => r.status === 'failed');
587
+ expect(failedFile).toBeDefined();
588
+ expect(failedFile.error).toBeTruthy();
589
+ });
590
+ it('AC1: glob filter "*.md" restricts to markdown files only', async () => {
591
+ await writeFile(join(tmpDir, 'note.md'), '# Note');
592
+ await writeFile(join(tmpDir, 'meeting.txt'), 'transcript');
593
+ mockFetchAlways({ ok: true, deduped: false });
594
+ const result = await ingestFolder({
595
+ folderPath: tmpDir,
596
+ glob: '*.md',
597
+ backendUrl: 'https://example.com',
598
+ clientSlug: 'beacon',
599
+ token: 'tok',
600
+ });
601
+ expect(result.files_found).toBe(1);
602
+ expect(result.results[0].file).toMatch(/\.md$/);
603
+ });
604
+ it('AC4: folder-level totals are consistent with per-file results', async () => {
605
+ await writeFile(join(tmpDir, 'a.md'), '# A');
606
+ await writeFile(join(tmpDir, 'b.md'), '# B');
607
+ await writeFile(join(tmpDir, 'c.txt'), 'C');
608
+ let call = 0;
609
+ vi.stubGlobal('fetch', vi.fn().mockImplementation(async () => {
610
+ call++;
611
+ // a.md → ingested, b.md → deduped, c.txt → ingested
612
+ const deduped = call === 2;
613
+ return {
614
+ ok: true,
615
+ status: 200,
616
+ headers: { get: () => null },
617
+ text: async () => JSON.stringify({ ok: true, deduped }),
618
+ };
619
+ }));
620
+ const result = await ingestFolder({
621
+ folderPath: tmpDir,
622
+ backendUrl: 'https://example.com',
623
+ clientSlug: 'beacon',
624
+ token: 'tok',
625
+ });
626
+ expect(result.files_found).toBe(3);
627
+ expect(result.files_ingested + result.files_deduped + result.files_failed).toBe(3);
628
+ expect(result.results).toHaveLength(3);
629
+ });
630
+ it('throws when the folder does not exist', async () => {
631
+ await expect(ingestFolder({
632
+ folderPath: join(tmpDir, 'no-such-folder'),
633
+ backendUrl: 'https://example.com',
634
+ clientSlug: 'beacon',
635
+ token: 'tok',
636
+ })).rejects.toThrow(/not found/i);
637
+ });
638
+ });
639
+ //# sourceMappingURL=ingest.test.js.map