npm - @voidwire/lore - Versions diffs - 1.0.2 → 1.0.4 - Mend

@voidwire/lore 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/lib/indexer.ts CHANGED Viewed

@@ -129,6 +129,10 @@ export function createIndexerContext(
     "INSERT INTO search (source, title, content, metadata, topic, type, timestamp) VALUES (?, ?, ?, ?, ?, ?, ?)",
   );
+  // Separate sets: entry-level avoids re-chunking identical docs,
+  // chunk-level catches duplicate chunks across different documents
+  const seenEntryHashes = new Set<string>();
   return {
     db,
     config,
@@ -136,22 +140,23 @@ export function createIndexerContext(
     insert: (entry: IndexEntry) => {
       validateEntry(entry);
-      // Generate content hash for dedup
+      // Entry-level dedup: skip re-chunking identical documents
       const contentHash = createHash("sha256")
         .update(entry.content)
         .digest("hex");
-      // Skip if already indexed
-      if (seenHashes.has(contentHash)) {
+      if (seenEntryHashes.has(contentHash)) {
         return;
       }
-      seenHashes.add(contentHash);
+      seenEntryHashes.add(contentHash);
       // Chunk content if needed
       const chunks = chunkContent(entry.content);
-      // Insert each chunk
+      // Chunk-level dedup: skip duplicate chunks across documents
       for (const chunk of chunks) {
+        const chunkHash = createHash("sha256").update(chunk).digest("hex");
+        if (seenHashes.has(chunkHash)) continue;
+        seenHashes.add(chunkHash);
         insertStmt.run(
           entry.source,
           entry.title,

package/lib/indexers/flux.ts CHANGED Viewed

@@ -12,7 +12,7 @@
  * Timestamp: captured date if present, otherwise empty
  */
-import { readdirSync, readFileSync, existsSync } from "fs";
+import { readdirSync, readFileSync, existsSync, statSync } from "fs";
 import { join, basename } from "path";
 import type { IndexerContext } from "../indexer";
@@ -82,6 +82,7 @@ function parseFluxFile(
   status: string,
 ): void {
   const raw = readFileSync(filePath, "utf-8");
+  const mtime = statSync(filePath).mtime;
   const lines = raw.split("\n");
   for (const line of lines) {
@@ -112,6 +113,11 @@ function parseFluxFile(
       );
     }
+    // Fall back to file mtime if no captured date
+    if (!timestamp) {
+      timestamp = mtime.toISOString();
+    }
     // Extract archived date if present (strip from description)
     rest = rest.replace(/\s*archived::\s*\S+/, "");

package/lib/indexers/obsidian.ts CHANGED Viewed

@@ -135,6 +135,8 @@ export async function indexObsidian(ctx: IndexerContext): Promise<void> {
       const title = basename(filePath, ".md");
+      if (!content.trim()) continue;
       ctx.insert({
         source: "obsidian",
         title,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@voidwire/lore",
-  "version": "1.0.2",
+  "version": "1.0.4",
   "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
   "type": "module",
   "main": "./index.ts",