@voidwire/lore 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/indexer.ts CHANGED
@@ -129,6 +129,10 @@ export function createIndexerContext(
129
129
  "INSERT INTO search (source, title, content, metadata, topic, type, timestamp) VALUES (?, ?, ?, ?, ?, ?, ?)",
130
130
  );
131
131
 
132
+ // Separate sets: entry-level avoids re-chunking identical docs,
133
+ // chunk-level catches duplicate chunks across different documents
134
+ const seenEntryHashes = new Set<string>();
135
+
132
136
  return {
133
137
  db,
134
138
  config,
@@ -136,22 +140,23 @@ export function createIndexerContext(
136
140
  insert: (entry: IndexEntry) => {
137
141
  validateEntry(entry);
138
142
 
139
- // Generate content hash for dedup
143
+ // Entry-level dedup: skip re-chunking identical documents
140
144
  const contentHash = createHash("sha256")
141
145
  .update(entry.content)
142
146
  .digest("hex");
143
-
144
- // Skip if already indexed
145
- if (seenHashes.has(contentHash)) {
147
+ if (seenEntryHashes.has(contentHash)) {
146
148
  return;
147
149
  }
148
- seenHashes.add(contentHash);
150
+ seenEntryHashes.add(contentHash);
149
151
 
150
152
  // Chunk content if needed
151
153
  const chunks = chunkContent(entry.content);
152
154
 
153
- // Insert each chunk
155
+ // Chunk-level dedup: skip duplicate chunks across documents
154
156
  for (const chunk of chunks) {
157
+ const chunkHash = createHash("sha256").update(chunk).digest("hex");
158
+ if (seenHashes.has(chunkHash)) continue;
159
+ seenHashes.add(chunkHash);
155
160
  insertStmt.run(
156
161
  entry.source,
157
162
  entry.title,
@@ -12,7 +12,7 @@
12
12
  * Timestamp: captured date if present, otherwise empty
13
13
  */
14
14
 
15
- import { readdirSync, readFileSync, existsSync } from "fs";
15
+ import { readdirSync, readFileSync, existsSync, statSync } from "fs";
16
16
  import { join, basename } from "path";
17
17
  import type { IndexerContext } from "../indexer";
18
18
 
@@ -82,6 +82,7 @@ function parseFluxFile(
82
82
  status: string,
83
83
  ): void {
84
84
  const raw = readFileSync(filePath, "utf-8");
85
+ const mtime = statSync(filePath).mtime;
85
86
  const lines = raw.split("\n");
86
87
 
87
88
  for (const line of lines) {
@@ -112,6 +113,11 @@ function parseFluxFile(
112
113
  );
113
114
  }
114
115
 
116
+ // Fall back to file mtime if no captured date
117
+ if (!timestamp) {
118
+ timestamp = mtime.toISOString();
119
+ }
120
+
115
121
  // Extract archived date if present (strip from description)
116
122
  rest = rest.replace(/\s*archived::\s*\S+/, "");
117
123
 
@@ -135,6 +135,8 @@ export async function indexObsidian(ctx: IndexerContext): Promise<void> {
135
135
 
136
136
  const title = basename(filePath, ".md");
137
137
 
138
+ if (!content.trim()) continue;
139
+
138
140
  ctx.insert({
139
141
  source: "obsidian",
140
142
  title,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@voidwire/lore",
3
- "version": "1.0.2",
3
+ "version": "1.0.4",
4
4
  "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
5
5
  "type": "module",
6
6
  "main": "./index.ts",