@disco_trooper/apple-notes-mcp 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,13 +7,13 @@
7
7
  * - Single note reindexing
8
8
  */
9
9
 
10
- import { getEmbedding } from "../embeddings/index.js";
10
+ import { getEmbedding, getEmbeddingBatch } from "../embeddings/index.js";
11
11
  import { getVectorStore, type NoteRecord } from "../db/lancedb.js";
12
- import { getAllNotes, getNoteByFolderAndTitle, getNoteByTitle, type NoteInfo } from "../notes/read.js";
12
+ import { getAllNotes, getAllNotesWithContent, getNoteByFolderAndTitle, getNoteByTitle, type NoteInfo } from "../notes/read.js";
13
13
  import { createDebugLogger } from "../utils/debug.js";
14
14
  import { truncateForEmbedding } from "../utils/text.js";
15
- import { EMBEDDING_DELAY_MS } from "../config/constants.js";
16
15
  import { NoteNotFoundError } from "../errors/index.js";
16
+ import { extractMetadata } from "../graph/extract.js";
17
17
 
18
18
  /**
19
19
  * Extract note title from folder/title key.
@@ -50,90 +50,127 @@ export interface IndexResult {
50
50
  }
51
51
 
52
52
  /**
53
- * Sleep for a specified duration.
53
+ * Note data prepared for embedding.
54
54
  */
55
- function sleep(ms: number): Promise<void> {
56
- return new Promise((resolve) => setTimeout(resolve, ms));
55
+ interface PreparedNote {
56
+ id: string;
57
+ title: string;
58
+ content: string;
59
+ truncatedContent: string;
60
+ folder: string;
61
+ created: string;
62
+ modified: string;
63
+ tags: string[];
64
+ outlinks: string[];
65
+ }
66
+
67
+ /**
68
+ * Prepare a note for embedding by extracting metadata and truncating content.
69
+ * Returns null if the note content is empty.
70
+ */
71
+ function prepareNoteForEmbedding(note: {
72
+ id: string;
73
+ title: string;
74
+ content: string;
75
+ folder: string;
76
+ created: string;
77
+ modified: string;
78
+ }): PreparedNote | null {
79
+ if (!note.content.trim()) {
80
+ return null;
81
+ }
82
+
83
+ const metadata = extractMetadata(note.content);
84
+
85
+ return {
86
+ id: note.id,
87
+ title: note.title,
88
+ content: note.content,
89
+ truncatedContent: truncateForEmbedding(note.content),
90
+ folder: note.folder,
91
+ created: note.created,
92
+ modified: note.modified,
93
+ tags: metadata.tags,
94
+ outlinks: metadata.outlinks,
95
+ };
96
+ }
97
+
98
+ /**
99
+ * Build a NoteRecord from a PreparedNote and its embedding vector.
100
+ */
101
+ function buildNoteRecord(
102
+ note: PreparedNote,
103
+ vector: number[],
104
+ indexedAt: string
105
+ ): NoteRecord {
106
+ return {
107
+ id: note.id,
108
+ title: note.title,
109
+ content: note.content,
110
+ vector,
111
+ folder: note.folder,
112
+ created: note.created,
113
+ modified: note.modified,
114
+ indexed_at: indexedAt,
115
+ tags: note.tags,
116
+ outlinks: note.outlinks,
117
+ };
57
118
  }
58
119
 
59
120
  /**
60
121
  * Perform full reindexing of all notes.
61
122
  * Drops existing index and rebuilds from scratch.
123
+ * Uses single JXA call + batch embedding for maximum speed.
62
124
  */
63
125
  export async function fullIndex(): Promise<IndexResult> {
64
126
  const startTime = Date.now();
65
127
  debug("Starting full index...");
66
128
 
67
- // Get all notes from Apple Notes
68
- const notes = await getAllNotes();
69
- debug(`Found ${notes.length} notes in Apple Notes`);
129
+ // Phase 1: Fetch all notes with content in single JXA call
130
+ debug("Phase 1: Fetching all notes with content (single JXA call)...");
131
+ const allNotes = await getAllNotesWithContent();
132
+ debug(`Fetched ${allNotes.length} notes from Apple Notes`);
70
133
 
71
- const records: NoteRecord[] = [];
72
- let errors = 0;
73
- const failedNotes: string[] = [];
134
+ // Filter empty notes and prepare for embedding
135
+ const preparedNotes = allNotes
136
+ .map(prepareNoteForEmbedding)
137
+ .filter((note): note is PreparedNote => note !== null);
74
138
 
75
- for (let i = 0; i < notes.length; i++) {
76
- const noteInfo = notes[i];
77
- debug(`Processing ${i + 1}/${notes.length}: ${noteInfo.title}`);
139
+ debug(`Prepared ${preparedNotes.length} notes for embedding`);
78
140
 
79
- try {
80
- // Get full note content using folder and title separately
81
- // to handle notes with "/" in their titles
82
- const noteDetails = await getNoteByFolderAndTitle(noteInfo.folder, noteInfo.title);
83
- if (!noteDetails) {
84
- debug(`Could not fetch note: ${noteInfo.title}`);
85
- failedNotes.push(`${noteInfo.folder}/${noteInfo.title}`);
86
- errors++;
87
- continue;
88
- }
141
+ // Phase 2: Generate embeddings in batch (with concurrent API calls)
142
+ debug("Phase 2: Generating embeddings in batch...");
143
+ const textsToEmbed = preparedNotes.map(n => n.truncatedContent);
89
144
 
90
- // Skip empty notes
91
- if (!noteDetails.content.trim()) {
92
- debug(`Skipping empty note: ${noteInfo.title}`);
93
- continue;
94
- }
95
-
96
- // Generate embedding
97
- const content = truncateForEmbedding(noteDetails.content);
98
- const vector = await getEmbedding(content);
99
-
100
- const record: NoteRecord = {
101
- id: noteDetails.id,
102
- title: noteDetails.title,
103
- content: noteDetails.content,
104
- vector,
105
- folder: noteDetails.folder,
106
- created: noteDetails.created,
107
- modified: noteDetails.modified,
108
- indexed_at: new Date().toISOString(),
109
- };
110
-
111
- records.push(record);
112
-
113
- // Delay to avoid rate limiting
114
- if (i < notes.length - 1) {
115
- await sleep(EMBEDDING_DELAY_MS);
116
- }
117
- } catch (error) {
118
- debug(`Error processing ${noteInfo.title}:`, error);
119
- failedNotes.push(`${noteInfo.folder}/${noteInfo.title}`);
120
- errors++;
121
- }
145
+ let vectors: number[][];
146
+ try {
147
+ vectors = await getEmbeddingBatch(textsToEmbed);
148
+ } catch (error) {
149
+ debug("Batch embedding failed:", error);
150
+ throw error;
122
151
  }
123
152
 
124
- // Store all records in vector database
153
+ debug(`Generated ${vectors.length} embeddings`);
154
+
155
+ // Phase 3: Build records and store
156
+ debug("Phase 3: Storing in database...");
157
+ const indexedAt = new Date().toISOString();
158
+ const records = preparedNotes.map((note, i) =>
159
+ buildNoteRecord(note, vectors[i], indexedAt)
160
+ );
161
+
125
162
  const store = getVectorStore();
126
163
  await store.index(records);
127
164
 
128
165
  const timeMs = Date.now() - startTime;
129
- debug(`Full index complete: ${records.length} indexed, ${errors} errors, ${timeMs}ms`);
166
+ const skipped = allNotes.length - preparedNotes.length;
167
+ debug(`Full index complete: ${records.length} indexed, ${skipped} empty/skipped, ${timeMs}ms`);
130
168
 
131
169
  return {
132
- total: notes.length,
170
+ total: allNotes.length,
133
171
  indexed: records.length,
134
- errors,
172
+ errors: 0,
135
173
  timeMs,
136
- failedNotes: failedNotes.length > 0 ? failedNotes : undefined,
137
174
  };
138
175
  }
139
176
 
@@ -212,48 +249,63 @@ export async function incrementalIndex(): Promise<IndexResult> {
212
249
  let errors = 0;
213
250
  const failedNotes: string[] = [];
214
251
 
215
- // Process additions and updates
252
+ // Process additions and updates in batch
216
253
  const toProcess = [...toAdd, ...toUpdate];
217
- for (let i = 0; i < toProcess.length; i++) {
218
- const noteInfo = toProcess[i];
219
- debug(`Processing ${i + 1}/${toProcess.length}: ${noteInfo.title}`);
220
254
 
221
- try {
222
- // Use folder and title separately to handle "/" in titles
223
- const noteDetails = await getNoteByFolderAndTitle(noteInfo.folder, noteInfo.title);
224
- if (!noteDetails) {
255
+ if (toProcess.length > 0) {
256
+ // Phase 1: Fetch all note content
257
+ debug(`Phase 1: Fetching ${toProcess.length} notes content...`);
258
+ const preparedNotes: PreparedNote[] = [];
259
+
260
+ for (const noteInfo of toProcess) {
261
+ try {
262
+ const noteDetails = await getNoteByFolderAndTitle(noteInfo.folder, noteInfo.title);
263
+ if (!noteDetails) {
264
+ failedNotes.push(`${noteInfo.folder}/${noteInfo.title}`);
265
+ errors++;
266
+ continue;
267
+ }
268
+
269
+ const prepared = prepareNoteForEmbedding(noteDetails);
270
+ if (prepared) {
271
+ preparedNotes.push(prepared);
272
+ }
273
+ } catch (error) {
274
+ debug(`Error fetching ${noteInfo.title}:`, error);
225
275
  failedNotes.push(`${noteInfo.folder}/${noteInfo.title}`);
226
276
  errors++;
227
- continue;
228
277
  }
278
+ }
229
279
 
230
- if (!noteDetails.content.trim()) {
231
- continue;
280
+ if (preparedNotes.length > 0) {
281
+ // Phase 2: Generate embeddings in batch
282
+ debug(`Phase 2: Generating ${preparedNotes.length} embeddings in batch...`);
283
+ const textsToEmbed = preparedNotes.map(n => n.truncatedContent);
284
+
285
+ let vectors: number[][];
286
+ try {
287
+ vectors = await getEmbeddingBatch(textsToEmbed);
288
+ } catch (error) {
289
+ debug("Batch embedding failed:", error);
290
+ throw error;
232
291
  }
233
292
 
234
- const content = truncateForEmbedding(noteDetails.content);
235
- const vector = await getEmbedding(content);
236
-
237
- const record: NoteRecord = {
238
- id: noteDetails.id,
239
- title: noteDetails.title,
240
- content: noteDetails.content,
241
- vector,
242
- folder: noteDetails.folder,
243
- created: noteDetails.created,
244
- modified: noteDetails.modified,
245
- indexed_at: new Date().toISOString(),
246
- };
247
-
248
- await store.update(record);
249
-
250
- if (i < toProcess.length - 1) {
251
- await sleep(EMBEDDING_DELAY_MS);
293
+ // Phase 3: Update database
294
+ debug("Phase 3: Updating database...");
295
+ const indexedAt = new Date().toISOString();
296
+
297
+ for (let i = 0; i < preparedNotes.length; i++) {
298
+ const note = preparedNotes[i];
299
+ const record = buildNoteRecord(note, vectors[i], indexedAt);
300
+
301
+ try {
302
+ await store.update(record);
303
+ } catch (error) {
304
+ debug(`Error updating ${note.title}:`, error);
305
+ failedNotes.push(`${note.folder}/${note.title}`);
306
+ errors++;
307
+ }
252
308
  }
253
- } catch (error) {
254
- debug(`Error processing ${noteInfo.title}:`, error);
255
- failedNotes.push(`${noteInfo.folder}/${noteInfo.title}`);
256
- errors++;
257
309
  }
258
310
  }
259
311
 
@@ -307,23 +359,13 @@ export async function reindexNote(title: string): Promise<void> {
307
359
  throw new NoteNotFoundError(title);
308
360
  }
309
361
 
310
- if (!noteDetails.content.trim()) {
362
+ const prepared = prepareNoteForEmbedding(noteDetails);
363
+ if (!prepared) {
311
364
  throw new Error(`Note is empty: "${title}"`);
312
365
  }
313
366
 
314
- const content = truncateForEmbedding(noteDetails.content);
315
- const vector = await getEmbedding(content);
316
-
317
- const record: NoteRecord = {
318
- id: noteDetails.id,
319
- title: noteDetails.title,
320
- content: noteDetails.content,
321
- vector,
322
- folder: noteDetails.folder,
323
- created: noteDetails.created,
324
- modified: noteDetails.modified,
325
- indexed_at: new Date().toISOString(),
326
- };
367
+ const vector = await getEmbedding(prepared.truncatedContent);
368
+ const record = buildNoteRecord(prepared, vector, new Date().toISOString());
327
369
 
328
370
  const store = getVectorStore();
329
371
  await store.update(record);
package/src/setup.ts CHANGED
@@ -12,14 +12,23 @@
12
12
 
13
13
  import * as p from "@clack/prompts";
14
14
  import * as fs from "node:fs";
15
- import * as path from "node:path";
15
+ import {
16
+ getEnvPath,
17
+ ensureConfigDir,
18
+ hasLegacyConfig,
19
+ getLegacyEnvPath,
20
+ hasConfig,
21
+ isNpmInstall,
22
+ } from "./config/paths.js";
23
+ import {
24
+ getClaudeConfigEntry,
25
+ writeClaudeConfig,
26
+ getExistingInstallMethod,
27
+ } from "./config/claude.js";
28
+ import { checkBunRuntime } from "./utils/runtime.js";
29
+
16
30
  // Paths
17
- const PROJECT_DIR = path.dirname(new URL(import.meta.url).pathname);
18
- const ENV_FILE = path.join(PROJECT_DIR, "..", ".env");
19
- const CLAUDE_CONFIG_PATH = path.join(
20
- process.env.HOME || "~",
21
- ".claude.json"
22
- );
31
+ const ENV_FILE = getEnvPath();
23
32
 
24
33
  interface Config {
25
34
  provider: "local" | "openrouter";
@@ -69,6 +78,7 @@ function readExistingEnv(): Record<string, string> {
69
78
  * Write configuration to .env file
70
79
  */
71
80
  function writeEnvFile(config: Config): void {
81
+ ensureConfigDir();
72
82
  const lines: string[] = [
73
83
  "# apple-notes-mcp configuration",
74
84
  "# Generated by setup wizard",
@@ -114,80 +124,29 @@ function writeEnvFile(config: Config): void {
114
124
  fs.writeFileSync(ENV_FILE, lines.join("\n") + "\n");
115
125
  }
116
126
 
117
- /**
118
- * Read Claude Code config if it exists
119
- */
120
- function readClaudeConfig(): Record<string, unknown> | null {
121
- if (!fs.existsSync(CLAUDE_CONFIG_PATH)) {
122
- return null;
123
- }
124
-
125
- try {
126
- const content = fs.readFileSync(CLAUDE_CONFIG_PATH, "utf-8");
127
- return JSON.parse(content);
128
- } catch (error) {
129
- // Config doesn't exist or is invalid JSON
130
- if (process.env.DEBUG === "true") {
131
- console.error("[SETUP] Could not read Claude config:", error);
132
- }
133
- return null;
134
- }
135
- }
136
-
137
127
  /**
138
128
  * Add MCP server to Claude Code config
139
129
  */
140
130
  function addToClaudeConfig(): boolean {
141
- const projectPath = path.resolve(PROJECT_DIR, "..");
142
- const serverEntry = {
143
- command: "bun",
144
- args: ["run", path.join(projectPath, "src", "index.ts")],
145
- env: {},
146
- };
131
+ const entry = getClaudeConfigEntry();
147
132
 
148
- let config = readClaudeConfig();
133
+ // Check for install method change
134
+ const existingMethod = getExistingInstallMethod();
135
+ const currentMethod = isNpmInstall() ? "npm" : "source";
149
136
 
150
- if (!config) {
151
- // Create new config
152
- config = {
153
- mcpServers: {
154
- "apple-notes": serverEntry,
155
- },
156
- };
157
- } else {
158
- // Add to existing config
159
- const mcpServers = (config.mcpServers || {}) as Record<string, unknown>;
160
- mcpServers["apple-notes"] = serverEntry;
161
- config.mcpServers = mcpServers;
137
+ if (existingMethod && existingMethod !== currentMethod) {
138
+ p.log.info(`Updating Claude config from ${existingMethod} to ${currentMethod} installation`);
162
139
  }
163
140
 
164
- try {
165
- fs.writeFileSync(CLAUDE_CONFIG_PATH, JSON.stringify(config, null, 2) + "\n");
166
- return true;
167
- } catch (error) {
168
- if (process.env.DEBUG === "true") {
169
- console.error("[SETUP] Failed to write Claude config:", error);
170
- }
171
- return false;
172
- }
141
+ return writeClaudeConfig(entry);
173
142
  }
174
143
 
175
144
  /**
176
145
  * Generate config snippet for manual setup
177
146
  */
178
147
  function getConfigSnippet(): string {
179
- const projectPath = path.resolve(PROJECT_DIR, "..");
180
- return JSON.stringify(
181
- {
182
- "apple-notes": {
183
- command: "bun",
184
- args: ["run", path.join(projectPath, "src", "index.ts")],
185
- env: {},
186
- },
187
- },
188
- null,
189
- 2
190
- );
148
+ const entry = getClaudeConfigEntry();
149
+ return JSON.stringify({ "apple-notes": entry }, null, 2);
191
150
  }
192
151
 
193
152
  /**
@@ -217,6 +176,7 @@ async function downloadLocalModel(): Promise<void> {
217
176
  * Main setup wizard
218
177
  */
219
178
  async function main(): Promise<void> {
179
+ checkBunRuntime();
220
180
  console.clear();
221
181
 
222
182
  p.intro("apple-notes-mcp Setup Wizard");
@@ -233,6 +193,25 @@ async function main(): Promise<void> {
233
193
  );
234
194
  }
235
195
 
196
+ // Check for legacy config migration
197
+ if (hasLegacyConfig() && !hasConfig()) {
198
+ const migrate = await p.confirm({
199
+ message: "Found config in project directory. Migrate to ~/.apple-notes-mcp/?",
200
+ initialValue: true,
201
+ });
202
+
203
+ if (p.isCancel(migrate)) {
204
+ p.cancel("Setup cancelled.");
205
+ process.exit(0);
206
+ }
207
+
208
+ if (migrate) {
209
+ ensureConfigDir();
210
+ fs.copyFileSync(getLegacyEnvPath(), getEnvPath());
211
+ p.log.success("Config migrated to ~/.apple-notes-mcp/.env");
212
+ }
213
+ }
214
+
236
215
  // Provider selection
237
216
  const provider = await p.select({
238
217
  message: "Which embedding provider would you like to use?",
@@ -0,0 +1,182 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import {
3
+ chunkText,
4
+ type ChunkOptions,
5
+ DEFAULT_CHUNK_OPTIONS,
6
+ SEPARATORS,
7
+ findSplitPoint,
8
+ } from "./chunker.js";
9
+
10
+ describe("chunker", () => {
11
+ describe("exports", () => {
12
+ it("exports SEPARATORS array with correct order", () => {
13
+ expect(SEPARATORS).toEqual([
14
+ "\n\n",
15
+ "\n",
16
+ ". ",
17
+ "! ",
18
+ "? ",
19
+ "; ",
20
+ ", ",
21
+ " ",
22
+ "",
23
+ ]);
24
+ });
25
+
26
+ it("exports DEFAULT_CHUNK_OPTIONS with correct values", () => {
27
+ expect(DEFAULT_CHUNK_OPTIONS).toEqual({
28
+ chunkSize: 500,
29
+ overlap: 100,
30
+ });
31
+ });
32
+ });
33
+
34
+ describe("findSplitPoint", () => {
35
+ it("finds paragraph boundary near target", () => {
36
+ const text = "First paragraph.\n\nSecond paragraph.";
37
+ const target = 20;
38
+ const result = findSplitPoint(text, target);
39
+ // Should find the \n\n at position 16
40
+ expect(result).toBe(18); // After \n\n
41
+ });
42
+
43
+ it("falls back to sentence boundary", () => {
44
+ const text = "First sentence. Second sentence.";
45
+ const target = 18;
46
+ const result = findSplitPoint(text, target);
47
+ // Should find ". " at position 14-16
48
+ expect(result).toBe(16); // After ". "
49
+ });
50
+
51
+ it("falls back to word boundary", () => {
52
+ const text = "oneword anotherword";
53
+ const target = 10;
54
+ const result = findSplitPoint(text, target);
55
+ // Should find space at position 7
56
+ expect(result).toBe(8); // After " "
57
+ });
58
+
59
+ it("returns target when no separator found", () => {
60
+ const text = "noseparatorshere";
61
+ const target = 8;
62
+ const result = findSplitPoint(text, target);
63
+ expect(result).toBe(8);
64
+ });
65
+ });
66
+
67
+ describe("chunkText", () => {
68
+ it("returns single chunk for short text", () => {
69
+ const text = "Short text";
70
+ const options: ChunkOptions = { chunkSize: 100, overlap: 20 };
71
+
72
+ const result = chunkText(text, options);
73
+
74
+ expect(result).toHaveLength(1);
75
+ expect(result[0]).toEqual({
76
+ content: "Short text",
77
+ index: 0,
78
+ totalChunks: 1,
79
+ startPos: 0,
80
+ endPos: 10,
81
+ });
82
+ });
83
+
84
+ it("creates multiple chunks for long text", () => {
85
+ const text = "Word ".repeat(50).trim(); // 249 chars
86
+ const options: ChunkOptions = { chunkSize: 50, overlap: 10 };
87
+
88
+ const result = chunkText(text, options);
89
+
90
+ expect(result.length).toBeGreaterThan(1);
91
+ // Each chunk should have content
92
+ result.forEach((chunk) => {
93
+ expect(chunk.content.length).toBeGreaterThan(0);
94
+ expect(chunk.content.length).toBeLessThanOrEqual(options.chunkSize);
95
+ });
96
+ });
97
+
98
+ it("includes overlap between chunks", () => {
99
+ const text = "First part. Second part. Third part. Fourth part.";
100
+ const options: ChunkOptions = { chunkSize: 25, overlap: 10 };
101
+
102
+ const result = chunkText(text, options);
103
+
104
+ // Check that chunks overlap - endPos of chunk N should be > startPos of chunk N+1
105
+ for (let i = 0; i < result.length - 1; i++) {
106
+ const currentChunk = result[i];
107
+ const nextChunk = result[i + 1];
108
+ // Overlap means next chunk starts before current chunk ends
109
+ expect(nextChunk.startPos).toBeLessThan(currentChunk.endPos);
110
+ }
111
+ });
112
+
113
+ it("respects paragraph boundaries when splitting", () => {
114
+ const text = "First paragraph here.\n\nSecond paragraph here.\n\nThird paragraph.";
115
+ const options: ChunkOptions = { chunkSize: 30, overlap: 5 };
116
+
117
+ const result = chunkText(text, options);
118
+
119
+ // At least one chunk should end at a paragraph boundary
120
+ const hasParaBoundary = result.some((chunk) => {
121
+ const endContent = text.slice(chunk.startPos, chunk.endPos);
122
+ return endContent.endsWith("\n\n") || chunk.endPos === text.length;
123
+ });
124
+ expect(hasParaBoundary).toBe(true);
125
+ });
126
+
127
+ it("sets correct totalChunks on all chunks", () => {
128
+ const text = "A ".repeat(100).trim(); // Create text that will be chunked
129
+ const options: ChunkOptions = { chunkSize: 20, overlap: 5 };
130
+
131
+ const result = chunkText(text, options);
132
+
133
+ const expectedTotal = result.length;
134
+ result.forEach((chunk, idx) => {
135
+ expect(chunk.totalChunks).toBe(expectedTotal);
136
+ expect(chunk.index).toBe(idx);
137
+ });
138
+ });
139
+
140
+ it("handles empty text", () => {
141
+ const result = chunkText("", { chunkSize: 100, overlap: 20 });
142
+
143
+ expect(result).toHaveLength(0);
144
+ });
145
+
146
+ it("handles whitespace-only text", () => {
147
+ const result = chunkText(" \n\n ", { chunkSize: 100, overlap: 20 });
148
+
149
+ expect(result).toHaveLength(0);
150
+ });
151
+
152
+ it("uses default options when not provided", () => {
153
+ const text = "Test";
154
+ const result = chunkText(text);
155
+
156
+ expect(result).toHaveLength(1);
157
+ expect(result[0].content).toBe("Test");
158
+ });
159
+
160
+ it("covers all original text with chunks", () => {
161
+ const text = "The quick brown fox jumps over the lazy dog. Pack my box with five dozen liquor jugs.";
162
+ const options: ChunkOptions = { chunkSize: 30, overlap: 10 };
163
+
164
+ const result = chunkText(text, options);
165
+
166
+ // Verify chunks cover the entire text
167
+ expect(result[0].startPos).toBe(0);
168
+ expect(result[result.length - 1].endPos).toBe(text.length);
169
+
170
+ // Verify each chunk's content matches its position in original text
171
+ for (const chunk of result) {
172
+ expect(chunk.content).toBe(text.slice(chunk.startPos, chunk.endPos));
173
+ }
174
+
175
+ // Verify chunks are contiguous (no gaps)
176
+ for (let i = 0; i < result.length - 1; i++) {
177
+ // Next chunk should start before or at current chunk's end (overlap)
178
+ expect(result[i + 1].startPos).toBeLessThanOrEqual(result[i].endPos);
179
+ }
180
+ });
181
+ });
182
+ });