@disco_trooper/apple-notes-mcp 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +104 -24
  2. package/package.json +11 -12
  3. package/src/config/claude.test.ts +47 -0
  4. package/src/config/claude.ts +106 -0
  5. package/src/config/constants.ts +11 -2
  6. package/src/config/paths.test.ts +40 -0
  7. package/src/config/paths.ts +86 -0
  8. package/src/db/arrow-fix.test.ts +101 -0
  9. package/src/db/lancedb.test.ts +254 -2
  10. package/src/db/lancedb.ts +385 -38
  11. package/src/embeddings/cache.test.ts +150 -0
  12. package/src/embeddings/cache.ts +204 -0
  13. package/src/embeddings/index.ts +22 -4
  14. package/src/embeddings/local.ts +57 -17
  15. package/src/embeddings/openrouter.ts +233 -11
  16. package/src/errors/index.test.ts +64 -0
  17. package/src/errors/index.ts +62 -0
  18. package/src/graph/export.test.ts +81 -0
  19. package/src/graph/export.ts +163 -0
  20. package/src/graph/extract.test.ts +90 -0
  21. package/src/graph/extract.ts +52 -0
  22. package/src/graph/queries.test.ts +156 -0
  23. package/src/graph/queries.ts +224 -0
  24. package/src/index.ts +309 -23
  25. package/src/notes/conversion.ts +62 -0
  26. package/src/notes/crud.test.ts +41 -8
  27. package/src/notes/crud.ts +75 -64
  28. package/src/notes/read.test.ts +58 -3
  29. package/src/notes/read.ts +142 -210
  30. package/src/notes/resolve.ts +174 -0
  31. package/src/notes/tables.ts +69 -40
  32. package/src/search/chunk-indexer.test.ts +353 -0
  33. package/src/search/chunk-indexer.ts +207 -0
  34. package/src/search/chunk-search.test.ts +327 -0
  35. package/src/search/chunk-search.ts +298 -0
  36. package/src/search/index.ts +4 -6
  37. package/src/search/indexer.ts +164 -109
  38. package/src/setup.ts +46 -67
  39. package/src/types/index.ts +4 -0
  40. package/src/utils/chunker.test.ts +182 -0
  41. package/src/utils/chunker.ts +170 -0
  42. package/src/utils/content-filter.test.ts +225 -0
  43. package/src/utils/content-filter.ts +275 -0
  44. package/src/utils/debug.ts +0 -2
  45. package/src/utils/runtime.test.ts +70 -0
  46. package/src/utils/runtime.ts +40 -0
  47. package/src/utils/text.test.ts +32 -0
  48. package/CLAUDE.md +0 -56
  49. package/src/server.ts +0 -427
@@ -0,0 +1,182 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import {
3
+ chunkText,
4
+ type ChunkOptions,
5
+ DEFAULT_CHUNK_OPTIONS,
6
+ SEPARATORS,
7
+ findSplitPoint,
8
+ } from "./chunker.js";
9
+
10
+ describe("chunker", () => {
11
+ describe("exports", () => {
12
+ it("exports SEPARATORS array with correct order", () => {
13
+ expect(SEPARATORS).toEqual([
14
+ "\n\n",
15
+ "\n",
16
+ ". ",
17
+ "! ",
18
+ "? ",
19
+ "; ",
20
+ ", ",
21
+ " ",
22
+ "",
23
+ ]);
24
+ });
25
+
26
+ it("exports DEFAULT_CHUNK_OPTIONS with correct values", () => {
27
+ expect(DEFAULT_CHUNK_OPTIONS).toEqual({
28
+ chunkSize: 500,
29
+ overlap: 100,
30
+ });
31
+ });
32
+ });
33
+
34
+ describe("findSplitPoint", () => {
35
+ it("finds paragraph boundary near target", () => {
36
+ const text = "First paragraph.\n\nSecond paragraph.";
37
+ const target = 20;
38
+ const result = findSplitPoint(text, target);
39
+ // Should find the \n\n at position 16
40
+ expect(result).toBe(18); // After \n\n
41
+ });
42
+
43
+ it("falls back to sentence boundary", () => {
44
+ const text = "First sentence. Second sentence.";
45
+ const target = 18;
46
+ const result = findSplitPoint(text, target);
47
+ // Should find ". " at position 14-16
48
+ expect(result).toBe(16); // After ". "
49
+ });
50
+
51
+ it("falls back to word boundary", () => {
52
+ const text = "oneword anotherword";
53
+ const target = 10;
54
+ const result = findSplitPoint(text, target);
55
+ // Should find space at position 7
56
+ expect(result).toBe(8); // After " "
57
+ });
58
+
59
+ it("returns target when no separator found", () => {
60
+ const text = "noseparatorshere";
61
+ const target = 8;
62
+ const result = findSplitPoint(text, target);
63
+ expect(result).toBe(8);
64
+ });
65
+ });
66
+
67
+ describe("chunkText", () => {
68
+ it("returns single chunk for short text", () => {
69
+ const text = "Short text";
70
+ const options: ChunkOptions = { chunkSize: 100, overlap: 20 };
71
+
72
+ const result = chunkText(text, options);
73
+
74
+ expect(result).toHaveLength(1);
75
+ expect(result[0]).toEqual({
76
+ content: "Short text",
77
+ index: 0,
78
+ totalChunks: 1,
79
+ startPos: 0,
80
+ endPos: 10,
81
+ });
82
+ });
83
+
84
+ it("creates multiple chunks for long text", () => {
85
+ const text = "Word ".repeat(50).trim(); // 249 chars
86
+ const options: ChunkOptions = { chunkSize: 50, overlap: 10 };
87
+
88
+ const result = chunkText(text, options);
89
+
90
+ expect(result.length).toBeGreaterThan(1);
91
+ // Each chunk should have content
92
+ result.forEach((chunk) => {
93
+ expect(chunk.content.length).toBeGreaterThan(0);
94
+ expect(chunk.content.length).toBeLessThanOrEqual(options.chunkSize);
95
+ });
96
+ });
97
+
98
+ it("includes overlap between chunks", () => {
99
+ const text = "First part. Second part. Third part. Fourth part.";
100
+ const options: ChunkOptions = { chunkSize: 25, overlap: 10 };
101
+
102
+ const result = chunkText(text, options);
103
+
104
+ // Check that chunks overlap - endPos of chunk N should be > startPos of chunk N+1
105
+ for (let i = 0; i < result.length - 1; i++) {
106
+ const currentChunk = result[i];
107
+ const nextChunk = result[i + 1];
108
+ // Overlap means next chunk starts before current chunk ends
109
+ expect(nextChunk.startPos).toBeLessThan(currentChunk.endPos);
110
+ }
111
+ });
112
+
113
+ it("respects paragraph boundaries when splitting", () => {
114
+ const text = "First paragraph here.\n\nSecond paragraph here.\n\nThird paragraph.";
115
+ const options: ChunkOptions = { chunkSize: 30, overlap: 5 };
116
+
117
+ const result = chunkText(text, options);
118
+
119
+ // At least one chunk should end at a paragraph boundary
120
+ const hasParaBoundary = result.some((chunk) => {
121
+ const endContent = text.slice(chunk.startPos, chunk.endPos);
122
+ return endContent.endsWith("\n\n") || chunk.endPos === text.length;
123
+ });
124
+ expect(hasParaBoundary).toBe(true);
125
+ });
126
+
127
+ it("sets correct totalChunks on all chunks", () => {
128
+ const text = "A ".repeat(100).trim(); // Create text that will be chunked
129
+ const options: ChunkOptions = { chunkSize: 20, overlap: 5 };
130
+
131
+ const result = chunkText(text, options);
132
+
133
+ const expectedTotal = result.length;
134
+ result.forEach((chunk, idx) => {
135
+ expect(chunk.totalChunks).toBe(expectedTotal);
136
+ expect(chunk.index).toBe(idx);
137
+ });
138
+ });
139
+
140
+ it("handles empty text", () => {
141
+ const result = chunkText("", { chunkSize: 100, overlap: 20 });
142
+
143
+ expect(result).toHaveLength(0);
144
+ });
145
+
146
+ it("handles whitespace-only text", () => {
147
+ const result = chunkText(" \n\n ", { chunkSize: 100, overlap: 20 });
148
+
149
+ expect(result).toHaveLength(0);
150
+ });
151
+
152
+ it("uses default options when not provided", () => {
153
+ const text = "Test";
154
+ const result = chunkText(text);
155
+
156
+ expect(result).toHaveLength(1);
157
+ expect(result[0].content).toBe("Test");
158
+ });
159
+
160
+ it("covers all original text with chunks", () => {
161
+ const text = "The quick brown fox jumps over the lazy dog. Pack my box with five dozen liquor jugs.";
162
+ const options: ChunkOptions = { chunkSize: 30, overlap: 10 };
163
+
164
+ const result = chunkText(text, options);
165
+
166
+ // Verify chunks cover the entire text
167
+ expect(result[0].startPos).toBe(0);
168
+ expect(result[result.length - 1].endPos).toBe(text.length);
169
+
170
+ // Verify each chunk's content matches its position in original text
171
+ for (const chunk of result) {
172
+ expect(chunk.content).toBe(text.slice(chunk.startPos, chunk.endPos));
173
+ }
174
+
175
+ // Verify chunks are contiguous (no gaps)
176
+ for (let i = 0; i < result.length - 1; i++) {
177
+ // Next chunk should start before or at current chunk's end (overlap)
178
+ expect(result[i + 1].startPos).toBeLessThanOrEqual(result[i].endPos);
179
+ }
180
+ });
181
+ });
182
+ });
@@ -0,0 +1,170 @@
1
+ /**
2
+ * Text chunker with recursive character splitting that respects natural boundaries.
3
+ * Prioritizes splitting at: paragraphs > sentences > words > characters
4
+ */
5
+
6
+ /**
7
+ * Separators in priority order - prefer splitting at larger boundaries first
8
+ */
9
+ export const SEPARATORS = [
10
+ "\n\n", // Paragraph
11
+ "\n", // Line
12
+ ". ", // Sentence (period)
13
+ "! ", // Sentence (exclamation)
14
+ "? ", // Sentence (question)
15
+ "; ", // Clause
16
+ ", ", // Phrase
17
+ " ", // Word
18
+ "", // Character (fallback)
19
+ ] as const;
20
+
21
+ export interface ChunkOptions {
22
+ /** Maximum size of each chunk in characters */
23
+ chunkSize: number;
24
+ /** Number of characters to overlap between chunks */
25
+ overlap: number;
26
+ }
27
+
28
+ export interface ChunkResult {
29
+ /** The text content of this chunk */
30
+ content: string;
31
+ /** Zero-based index of this chunk */
32
+ index: number;
33
+ /** Total number of chunks */
34
+ totalChunks: number;
35
+ /** Start position in original text */
36
+ startPos: number;
37
+ /** End position in original text (exclusive) */
38
+ endPos: number;
39
+ }
40
+
41
+ export const DEFAULT_CHUNK_OPTIONS: ChunkOptions = {
42
+ chunkSize: 500,
43
+ overlap: 100,
44
+ };
45
+
46
+ /**
47
+ * Find the best split point near the target position.
48
+ * Searches for separators in priority order within a reasonable range.
49
+ *
50
+ * @param text - The full text to search in
51
+ * @param target - The target position to split near
52
+ * @returns The best split position (after the separator)
53
+ */
54
+ export function findSplitPoint(text: string, target: number): number {
55
+ // Search window: look backwards and forwards from target
56
+ const searchWindow = Math.min(50, Math.floor(target / 2));
57
+ const searchStart = Math.max(0, target - searchWindow);
58
+ const searchEnd = Math.min(text.length, target + searchWindow);
59
+ const searchText = text.slice(searchStart, searchEnd);
60
+
61
+ // Try each separator in priority order
62
+ for (const sep of SEPARATORS) {
63
+ if (sep === "") continue; // Skip empty string fallback for now
64
+
65
+ // Find all occurrences of separator in search window
66
+ let bestPos = -1;
67
+ let bestDistance = Infinity;
68
+
69
+ let idx = 0;
70
+ while ((idx = searchText.indexOf(sep, idx)) !== -1) {
71
+ const absolutePos = searchStart + idx + sep.length;
72
+ const distance = Math.abs(absolutePos - target);
73
+
74
+ if (distance < bestDistance) {
75
+ bestDistance = distance;
76
+ bestPos = absolutePos;
77
+ }
78
+ idx += 1;
79
+ }
80
+
81
+ if (bestPos !== -1) {
82
+ return bestPos;
83
+ }
84
+ }
85
+
86
+ // No separator found, return target as-is
87
+ return target;
88
+ }
89
+
90
+ /**
91
+ * Split text into overlapping chunks that respect natural boundaries.
92
+ *
93
+ * @param text - The text to chunk
94
+ * @param options - Chunk size and overlap options
95
+ * @returns Array of chunk results
96
+ */
97
+ export function chunkText(
98
+ text: string,
99
+ options: ChunkOptions = DEFAULT_CHUNK_OPTIONS
100
+ ): ChunkResult[] {
101
+ const { chunkSize, overlap } = options;
102
+
103
+ // Handle empty or whitespace-only text
104
+ const trimmed = text.trim();
105
+ if (trimmed.length === 0) {
106
+ return [];
107
+ }
108
+
109
+ // If text fits in a single chunk, return it
110
+ if (text.length <= chunkSize) {
111
+ return [
112
+ {
113
+ content: text,
114
+ index: 0,
115
+ totalChunks: 1,
116
+ startPos: 0,
117
+ endPos: text.length,
118
+ },
119
+ ];
120
+ }
121
+
122
+ const chunks: ChunkResult[] = [];
123
+ let startPos = 0;
124
+ // Minimum step size to ensure progress and avoid tiny chunks
125
+ const minStep = Math.max(1, chunkSize - overlap);
126
+
127
+ while (startPos < text.length) {
128
+ // Calculate target end position
129
+ let endPos = Math.min(startPos + chunkSize, text.length);
130
+
131
+ // If not at the end, find a good split point
132
+ if (endPos < text.length) {
133
+ const splitPoint = findSplitPoint(text, endPos);
134
+ // Only use split point if it creates a reasonably sized chunk
135
+ if (
136
+ splitPoint > startPos + minStep / 2 &&
137
+ splitPoint - startPos <= chunkSize * 1.2
138
+ ) {
139
+ endPos = splitPoint;
140
+ }
141
+ }
142
+
143
+ // Extract chunk content
144
+ const content = text.slice(startPos, endPos);
145
+
146
+ chunks.push({
147
+ content,
148
+ index: chunks.length,
149
+ totalChunks: 0, // Will be set after all chunks are created
150
+ startPos,
151
+ endPos,
152
+ });
153
+
154
+ // If we've reached the end, stop
155
+ if (endPos >= text.length) {
156
+ break;
157
+ }
158
+
159
+ // Move to next chunk - ensure minimum step for progress
160
+ startPos = startPos + minStep;
161
+ }
162
+
163
+ // Set totalChunks on all chunks
164
+ const totalChunks = chunks.length;
165
+ for (const chunk of chunks) {
166
+ chunk.totalChunks = totalChunks;
167
+ }
168
+
169
+ return chunks;
170
+ }
@@ -0,0 +1,225 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import {
3
+ calculateEntropy,
4
+ isLikelyBase64,
5
+ getBase64Ratio,
6
+ hasBinaryContent,
7
+ removeBase64Blocks,
8
+ redactSecrets,
9
+ filterContent,
10
+ shouldIndexContent,
11
+ } from "./content-filter.js";
12
+
13
+ describe("content-filter", () => {
14
+ describe("calculateEntropy", () => {
15
+ it("returns 0 for empty string", () => {
16
+ expect(calculateEntropy("")).toBe(0);
17
+ });
18
+
19
+ it("returns low entropy for repetitive text", () => {
20
+ const entropy = calculateEntropy("aaaaaaaaaa");
21
+ expect(entropy).toBe(0);
22
+ });
23
+
24
+ it("returns moderate entropy for normal text", () => {
25
+ const entropy = calculateEntropy("Hello, this is normal text.");
26
+ expect(entropy).toBeGreaterThan(2);
27
+ expect(entropy).toBeLessThan(5);
28
+ });
29
+
30
+ it("returns high entropy for Base64 content", () => {
31
+ const base64 = "ZXlKMGVYQWlPaUpLVjFRaUxDSmhiR2NpT2lKU1V6STFOaUo5";
32
+ const entropy = calculateEntropy(base64);
33
+ expect(entropy).toBeGreaterThan(4.5);
34
+ });
35
+ });
36
+
37
+ describe("isLikelyBase64", () => {
38
+ it("returns false for short strings", () => {
39
+ expect(isLikelyBase64("abc123")).toBe(false);
40
+ });
41
+
42
+ it("returns false for normal text", () => {
43
+ expect(isLikelyBase64("This is normal text with spaces and punctuation!")).toBe(false);
44
+ });
45
+
46
+ it("returns true for Base64 encoded content", () => {
47
+ const base64 = "ZXlKMGVYQWlPaUpLVjFRaUxDSmhiR2NpT2lKU1V6STFOaUo5eyJpc3MiOiJodHRwczovL2V4YW1wbGUu";
48
+ expect(isLikelyBase64(base64)).toBe(true);
49
+ });
50
+
51
+ it("returns true for URL-safe Base64", () => {
52
+ const urlSafe = "ZXlKMGVYQWlPaUpLVjFRaUxDSmhiR2NpT2lKU1V6STFOaUo5_abc-def123456";
53
+ expect(isLikelyBase64(urlSafe)).toBe(true);
54
+ });
55
+ });
56
+
57
+ describe("getBase64Ratio", () => {
58
+ it("returns 0 for normal text", () => {
59
+ const ratio = getBase64Ratio("This is completely normal text.");
60
+ expect(ratio).toBe(0);
61
+ });
62
+
63
+ it("returns high ratio for mostly Base64 content", () => {
64
+ // Use actual high-entropy Base64, not repeated chars
65
+ const base64 = "ZXlKMGVYQWlPaUpLVjFRaUxDSmhiR2NpT2lKU1V6STFOaUo5eyJpc3M".repeat(3);
66
+ const content = "Token: " + base64;
67
+ const ratio = getBase64Ratio(content);
68
+ expect(ratio).toBeGreaterThan(0.5);
69
+ });
70
+
71
+ it("returns partial ratio for mixed content", () => {
72
+ const base64 = "ZXlKMGVYQWlPaUpLVjFRaUxDSmhiR2NpT2lKU1V6STFOaUo5";
73
+ const content = `Normal text here. ${base64} More normal text.`;
74
+ const ratio = getBase64Ratio(content);
75
+ expect(ratio).toBeGreaterThan(0);
76
+ expect(ratio).toBeLessThan(0.7);
77
+ });
78
+ });
79
+
80
+ describe("hasBinaryContent", () => {
81
+ it("returns false for normal text", () => {
82
+ expect(hasBinaryContent("Normal text")).toBe(false);
83
+ });
84
+
85
+ it("returns false for text with newlines and tabs", () => {
86
+ expect(hasBinaryContent("Line 1\nLine 2\tTabbed")).toBe(false);
87
+ });
88
+
89
+ it("returns true for null bytes", () => {
90
+ expect(hasBinaryContent("Text\x00with null")).toBe(true);
91
+ });
92
+
93
+ it("returns true for control characters", () => {
94
+ expect(hasBinaryContent("Text\x03with control")).toBe(true);
95
+ });
96
+ });
97
+
98
+ describe("removeBase64Blocks", () => {
99
+ it("removes Base64 blocks from content", () => {
100
+ const base64 = "ZXlKMGVYQWlPaUpLVjFRaUxDSmhiR2NpT2lKU1V6STFOaUo5eyJpc3M";
101
+ const content = `API Token: ${base64}\n\nNext section...`;
102
+ const result = removeBase64Blocks(content);
103
+
104
+ expect(result).not.toContain(base64);
105
+ expect(result).toContain("[ENCODED]");
106
+ expect(result).toContain("API Token:");
107
+ expect(result).toContain("Next section...");
108
+ });
109
+
110
+ it("preserves normal text", () => {
111
+ const content = "This is completely normal text without any encoding.";
112
+ expect(removeBase64Blocks(content)).toBe(content);
113
+ });
114
+ });
115
+
116
+ describe("redactSecrets", () => {
117
+ it("redacts JWT tokens", () => {
118
+ const jwt = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U";
119
+ const content = `Bearer ${jwt}`;
120
+ const { content: redacted, secretsFound } = redactSecrets(content);
121
+
122
+ expect(redacted).toContain("[JWT_REDACTED]");
123
+ expect(secretsFound).toContain("jwt");
124
+ });
125
+
126
+ it("redacts AWS access keys", () => {
127
+ const content = "AWS Key: AKIAIOSFODNN7EXAMPLE";
128
+ const { content: redacted, secretsFound } = redactSecrets(content);
129
+
130
+ expect(redacted).toContain("[AWSACCESSKEY_REDACTED]");
131
+ expect(secretsFound).toContain("awsAccessKey");
132
+ });
133
+
134
+ it("redacts GitHub tokens", () => {
135
+ const content = "Token: ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
136
+ const { content: redacted, secretsFound } = redactSecrets(content);
137
+
138
+ expect(redacted).toContain("[GITHUBTOKEN_REDACTED]");
139
+ expect(secretsFound).toContain("githubToken");
140
+ });
141
+
142
+ it("preserves normal text", () => {
143
+ const content = "This is normal text without secrets.";
144
+ const { content: redacted, secretsFound } = redactSecrets(content);
145
+
146
+ expect(redacted).toBe(content);
147
+ expect(secretsFound).toHaveLength(0);
148
+ });
149
+ });
150
+
151
+ describe("filterContent", () => {
152
+ it("returns 'index' for clean content", () => {
153
+ // Content must be at least 50 chars (minContentLength default)
154
+ const content = "This is clean, normal content for indexing. It contains enough text to pass the minimum length requirement.";
155
+ const result = filterContent(content);
156
+
157
+ expect(result.action).toBe("index");
158
+ expect(result.cleanedContent).toBe(content);
159
+ expect(result.reasons).toHaveLength(0);
160
+ });
161
+
162
+ it("returns 'skip' for binary content", () => {
163
+ const result = filterContent("Text\x00with null bytes");
164
+
165
+ expect(result.action).toBe("skip");
166
+ expect(result.reasons).toContain("Contains binary content");
167
+ });
168
+
169
+ it("returns 'skip' for mostly Base64 content", () => {
170
+ const base64 = "ZXlKMGVYQWlPaUpLVjFRaUxDSmhiR2NpT2lKU1V6STFOaUo5".repeat(10);
171
+ const result = filterContent(base64);
172
+
173
+ expect(result.action).toBe("skip");
174
+ expect(result.reasons[0]).toContain("Base64 encoded");
175
+ });
176
+
177
+ it("returns 'filter' for mixed content with Base64", () => {
178
+ const base64 = "ZXlKMGVYQWlPaUpLVjFRaUxDSmhiR2NpT2lKU1V6STFOaUo5";
179
+ const content = `This is important text. Token: ${base64}. More important content here that we want to index.`;
180
+ const result = filterContent(content);
181
+
182
+ expect(result.action).toBe("filter");
183
+ expect(result.cleanedContent).toContain("[ENCODED]");
184
+ expect(result.cleanedContent).toContain("This is important text");
185
+ expect(result.reasons.some(r => r.includes("Base64"))).toBe(true);
186
+ });
187
+
188
+ it("returns 'skip' if content too short after filtering", () => {
189
+ // Short text + Base64 that will be removed, leaving less than 50 chars
190
+ const base64 = "ZXlKMGVYQWlPaUpLVjFRaUxDSmhiR2NpT2lKU1V6STFOaUo5eyJpc3M";
191
+ const content = `Hi ${base64}`;
192
+ const result = filterContent(content);
193
+
194
+ expect(result.action).toBe("skip");
195
+ // After removing Base64, only "Hi [ENCODED]" remains which is too short
196
+ });
197
+
198
+ it("respects custom configuration", () => {
199
+ const base64 = "ZXlKMGVYQWlPaUpLVjFRaUxDSmhiR2NpT2lKU1V6STFOaUo5eyJpc3M";
200
+ // Need enough remaining content after potential filtering
201
+ const content = `This is some text before the token. Token: ${base64}. And this is some text after the token that should remain.`;
202
+
203
+ // With removeBase64 disabled, the Base64 should stay
204
+ const result = filterContent(content, { removeBase64: false });
205
+
206
+ expect(result.action).not.toBe("skip");
207
+ expect(result.cleanedContent).toContain(base64);
208
+ });
209
+ });
210
+
211
+ describe("shouldIndexContent", () => {
212
+ it("returns true for normal content", () => {
213
+ expect(shouldIndexContent("Normal text content")).toBe(true);
214
+ });
215
+
216
+ it("returns false for binary content", () => {
217
+ expect(shouldIndexContent("Binary\x00content")).toBe(false);
218
+ });
219
+
220
+ it("returns false for mostly Base64", () => {
221
+ const base64 = "ZXlKMGVYQWlPaUpLVjFRaUxDSmhiR2NpT2lKU1V6STFOaUo5".repeat(10);
222
+ expect(shouldIndexContent(base64)).toBe(false);
223
+ });
224
+ });
225
+ });