@voidwire/lore 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/indexer.ts +8 -6
  2. package/package.json +1 -1
package/lib/indexer.ts CHANGED
@@ -129,6 +129,10 @@ export function createIndexerContext(
129
129
  "INSERT INTO search (source, title, content, metadata, topic, type, timestamp) VALUES (?, ?, ?, ?, ?, ?, ?)",
130
130
  );
131
131
 
132
+ // Separate sets: entry-level avoids re-chunking identical docs,
133
+ // chunk-level catches duplicate chunks across different documents
134
+ const seenEntryHashes = new Set<string>();
135
+
132
136
  return {
133
137
  db,
134
138
  config,
@@ -136,21 +140,19 @@ export function createIndexerContext(
136
140
  insert: (entry: IndexEntry) => {
137
141
  validateEntry(entry);
138
142
 
139
- // Generate content hash for dedup
143
+ // Entry-level dedup: skip re-chunking identical documents
140
144
  const contentHash = createHash("sha256")
141
145
  .update(entry.content)
142
146
  .digest("hex");
143
-
144
- // Skip if already indexed
145
- if (seenHashes.has(contentHash)) {
147
+ if (seenEntryHashes.has(contentHash)) {
146
148
  return;
147
149
  }
148
- seenHashes.add(contentHash);
150
+ seenEntryHashes.add(contentHash);
149
151
 
150
152
  // Chunk content if needed
151
153
  const chunks = chunkContent(entry.content);
152
154
 
153
- // Insert each chunk (dedup at chunk level)
155
+ // Chunk-level dedup: skip duplicate chunks across documents
154
156
  for (const chunk of chunks) {
155
157
  const chunkHash = createHash("sha256").update(chunk).digest("hex");
156
158
  if (seenHashes.has(chunkHash)) continue;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@voidwire/lore",
3
- "version": "1.0.3",
3
+ "version": "1.0.4",
4
4
  "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
5
5
  "type": "module",
6
6
  "main": "./index.ts",