npm - @voidwire/lore - Versions diffs - 0.9.1 → 1.0.1 - Mend

@voidwire/lore 0.9.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/cli.ts +66 -1
package/lib/config.ts +134 -0
package/lib/db.ts +2 -2
package/lib/indexer.ts +213 -0
package/lib/indexers/blogs.ts +146 -0
package/lib/indexers/captures.ts +105 -0
package/lib/indexers/commits.ts +90 -0
package/lib/indexers/development.ts +68 -0
package/lib/indexers/events.ts +61 -0
package/lib/indexers/explorations.ts +89 -0
package/lib/indexers/flux.ts +142 -0
package/lib/indexers/index.ts +41 -0
package/lib/indexers/insights.ts +53 -0
package/lib/indexers/learnings.ts +53 -0
package/lib/indexers/observations.ts +53 -0
package/lib/indexers/obsidian.ts +151 -0
package/lib/indexers/personal.ts +262 -0
package/lib/indexers/readmes.ts +49 -0
package/lib/indexers/sessions.ts +127 -0
package/lib/indexers/teachings.ts +52 -0
package/lib/info.ts +4 -8
package/lib/list.ts +25 -39
package/lib/projects.ts +28 -37
package/lib/realtime.ts +16 -23
package/lib/search.ts +6 -12
package/lib/semantic.ts +6 -31
package/package.json +3 -2

package/cli.ts CHANGED Viewed

@@ -57,6 +57,8 @@ import {
   type ObservationConfidence,
 } from "./index";
 import { isValidLoreType, LORE_TYPES } from "./lib/types";
+import { runIndexer } from "./lib/indexer";
+import { indexers } from "./lib/indexers/index";
 // ============================================================================
 // Argument Parsing
@@ -99,6 +101,8 @@ const BOOLEAN_FLAGS = new Set([
   "exact",
   "semantic",
   "brief",
+  "list",
+  "rebuild",
 ]);
 function getPositionalArgs(args: string[]): string[] {
@@ -830,6 +834,63 @@ async function handleCapture(args: string[]): Promise<void> {
   }
 }
+// ============================================================================
+// Index Command
+// ============================================================================
+async function handleIndex(args: string[]): Promise<void> {
+  if (hasFlag(args, "help")) {
+    showIndexHelp();
+  }
+  if (hasFlag(args, "list")) {
+    console.log("Registered indexers:");
+    const names = Object.keys(indexers);
+    if (names.length === 0) {
+      console.log("  (none)");
+    } else {
+      names.forEach((name) => console.log(`  - ${name}`));
+    }
+    process.exit(0);
+  }
+  const positional = getPositionalArgs(args);
+  const source = positional.length > 0 ? positional[0] : "all";
+  const rebuild = hasFlag(args, "rebuild");
+  try {
+    await runIndexer(source, rebuild, indexers);
+    process.exit(0);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : "Unknown error";
+    fail(`Index failed: ${message}`, 2);
+  }
+}
+function showIndexHelp(): void {
+  console.log(`
+lore index - Run indexers to populate the search database
+Usage:
+  lore index                            Run all registered indexers
+  lore index <source>                   Run a specific indexer
+  lore index --rebuild                  Clear and rebuild all sources
+  lore index --list                     List registered indexers
+Options:
+  --rebuild         Clear existing entries before indexing
+  --list            Show registered indexers and exit
+  --help            Show this help
+Examples:
+  lore index --list
+  lore index obsidian
+  lore index --rebuild
+  lore index commits --rebuild
+`);
+  process.exit(0);
+}
 // ============================================================================
 // Help & Main
 // ============================================================================
@@ -853,6 +914,7 @@ Usage:
   lore about <project>                  Aggregate view of project knowledge
   lore about <project> --brief          Compact project summary
   lore capture task|knowledge|note|teaching  Capture knowledge
+  lore index [source] [--rebuild] [--list]  Run indexers
 Search Options:
   --exact           Use FTS5 text search (bypasses semantic search)
@@ -1234,9 +1296,12 @@ async function main(): Promise<void> {
     case "capture":
       await handleCapture(commandArgs);
       break;
+    case "index":
+      await handleIndex(commandArgs);
+      break;
     default:
       fail(
-        `Unknown command: ${command}. Use: search, list, sources, info, projects, about, or capture`,
+        `Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, or index`,
       );
   }
 }

package/lib/config.ts ADDED Viewed

@@ -0,0 +1,134 @@
+/**
+ * lib/config.ts - TOML configuration reader
+ *
+ * Reads ~/.config/lore/config.toml, validates required fields,
+ * resolves ~ to absolute paths, and caches the result.
+ *
+ * Usage:
+ *   import { getConfig } from "./config";
+ *   const config = getConfig();
+ *   console.log(config.paths.data);      // /Users/rudy/.local/share/lore
+ *   console.log(config.database.sqlite);  // /Users/rudy/.local/share/lore/lore.db
+ */
+import { readFileSync } from "fs";
+import { homedir } from "os";
+import { parse as parseToml } from "@iarna/toml";
+export interface LoreConfig {
+  paths: {
+    data: string;
+    obsidian: string;
+    explorations: string;
+    blogs: string;
+    projects: string;
+    personal: string;
+    session_events?: string;
+    flux?: string;
+    flux_projects?: string;
+  };
+  database: {
+    sqlite: string;
+  };
+}
+let cachedConfig: LoreConfig | null = null;
+/**
+ * Resolve ~ to the user's home directory
+ */
+function resolvePath(path: string): string {
+  return path.replace(/^~/, homedir());
+}
+/**
+ * Read and parse the TOML config, validate required fields,
+ * resolve paths, and cache the result.
+ */
+export function getConfig(): LoreConfig {
+  if (cachedConfig) return cachedConfig;
+  const configPath = `${homedir()}/.config/lore/config.toml`;
+  let raw: string;
+  try {
+    raw = readFileSync(configPath, "utf-8");
+  } catch {
+    throw new Error(
+      `Config file not found: ${configPath}\n` +
+        `Create it with [paths] and [database] sections.\n` +
+        `See: https://github.com/nickpending/llmcli-tools/tree/main/packages/lore#configuration`,
+    );
+  }
+  let parsed: Record<string, unknown>;
+  try {
+    parsed = parseToml(raw) as Record<string, unknown>;
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    throw new Error(`Failed to parse config.toml: ${message}`);
+  }
+  // Validate required sections
+  if (!parsed.paths || typeof parsed.paths !== "object") {
+    throw new Error("Invalid config: missing [paths] section in config.toml");
+  }
+  if (!parsed.database || typeof parsed.database !== "object") {
+    throw new Error(
+      "Invalid config: missing [database] section in config.toml",
+    );
+  }
+  const paths = parsed.paths as Record<string, unknown>;
+  const database = parsed.database as Record<string, unknown>;
+  // Validate required path fields
+  const requiredPaths = [
+    "data",
+    "obsidian",
+    "explorations",
+    "blogs",
+    "projects",
+    "personal",
+  ];
+  for (const field of requiredPaths) {
+    if (typeof paths[field] !== "string") {
+      throw new Error(
+        `Invalid config: paths.${field} is missing or not a string`,
+      );
+    }
+  }
+  if (typeof database.sqlite !== "string") {
+    throw new Error(
+      "Invalid config: database.sqlite is missing or not a string",
+    );
+  }
+  // Build config with resolved paths
+  cachedConfig = {
+    paths: {
+      data: resolvePath(paths.data as string),
+      obsidian: resolvePath(paths.obsidian as string),
+      explorations: resolvePath(paths.explorations as string),
+      blogs: resolvePath(paths.blogs as string),
+      projects: resolvePath(paths.projects as string),
+      personal: resolvePath(paths.personal as string),
+      session_events:
+        typeof paths.session_events === "string"
+          ? resolvePath(paths.session_events)
+          : undefined,
+      flux:
+        typeof paths.flux === "string" ? resolvePath(paths.flux) : undefined,
+      flux_projects:
+        typeof paths.flux_projects === "string"
+          ? resolvePath(paths.flux_projects)
+          : undefined,
+    },
+    database: {
+      sqlite: resolvePath(database.sqlite as string),
+    },
+  };
+  return cachedConfig;
+}

package/lib/db.ts CHANGED Viewed

@@ -7,7 +7,7 @@
 import { Database } from "bun:sqlite";
 import { existsSync } from "fs";
-import { homedir } from "os";
+import { getConfig } from "./config";
 // Use Homebrew SQLite on macOS to enable extension loading
 // Must be called before any Database instances are created
@@ -20,7 +20,7 @@ if (existsSync(HOMEBREW_SQLITE)) {
  * Get the path to the lore database
  */
 export function getDatabasePath(): string {
-  return `${homedir()}/.local/share/lore/lore.db`;
+  return getConfig().database.sqlite;
 }
 /**

package/lib/indexer.ts ADDED Viewed

@@ -0,0 +1,213 @@
+/**
+ * lib/indexer.ts - Indexer framework core
+ *
+ * Shared framework for all indexers. Handles:
+ * - IndexEntry/IndexerContext interfaces
+ * - Content chunking (2500 chars, 200 overlap, sentence boundaries)
+ * - Content hash dedup (SHA-256)
+ * - Entry validation (no topic/content in metadata, no internals)
+ * - FTS5 parameterized INSERT
+ * - Orchestration (runIndexer)
+ *
+ * Usage:
+ *   import { runIndexer, type IndexerFunction } from "./indexer";
+ *   const myIndexer: IndexerFunction = async (ctx) => {
+ *     ctx.insert({ source: "mySource", title: "...", content: "...", topic: "..." });
+ *   };
+ */
+import { Database } from "bun:sqlite";
+import { createHash } from "crypto";
+import { getConfig, type LoreConfig } from "./config";
+export interface IndexEntry {
+  source: string;
+  title: string;
+  content: string;
+  topic: string;
+  type?: string;
+  timestamp?: string;
+  metadata?: Record<string, unknown>;
+}
+export interface IndexerContext {
+  db: Database;
+  config: LoreConfig;
+  insert: (entry: IndexEntry) => void;
+  rebuild: boolean;
+}
+export type IndexerFunction = (ctx: IndexerContext) => Promise<void>;
+/**
+ * Content chunking with overlap.
+ * Splits content at sentence boundaries when possible.
+ * Chunk size: 2500 chars, overlap: 200 chars.
+ */
+function chunkContent(content: string): string[] {
+  const CHUNK_SIZE = 2500;
+  const OVERLAP = 200;
+  if (content.length <= CHUNK_SIZE) return [content];
+  const chunks: string[] = [];
+  let start = 0;
+  while (start < content.length) {
+    let end = start + CHUNK_SIZE;
+    // Break at sentence boundary if possible
+    if (end < content.length) {
+      const slice = content.slice(start, end);
+      // Try paragraph break first, then sentence break
+      const paragraphBreak = slice.lastIndexOf("\n\n");
+      if (paragraphBreak > CHUNK_SIZE - 500) {
+        end = start + paragraphBreak + 2;
+      } else {
+        const sentenceBreak = slice.search(/[.!?]\s+(?=[A-Z])/);
+        if (sentenceBreak > -1) {
+          // Find the last sentence break, not the first
+          const lastSentenceBreak = slice
+            .slice(0, end - start)
+            .lastIndexOf(". ");
+          if (lastSentenceBreak > CHUNK_SIZE - 500) {
+            end = start + lastSentenceBreak + 2;
+          }
+        }
+      }
+    } else {
+      end = content.length;
+    }
+    chunks.push(content.slice(start, end));
+    if (end >= content.length) break;
+    start = end - OVERLAP;
+  }
+  return chunks;
+}
+/**
+ * Validate entry before insert.
+ * Ensures metadata does not contain promoted columns or framework internals.
+ */
+function validateEntry(entry: IndexEntry): void {
+  const meta = entry.metadata || {};
+  if ("topic" in meta) {
+    console.warn(
+      `WARNING: topic should not be in metadata for ${entry.source}:${entry.title}`,
+    );
+  }
+  if ("content" in meta) {
+    console.warn(
+      `WARNING: content should not be in metadata for ${entry.source}:${entry.title}`,
+    );
+  }
+  const forbidden = ["content_hash", "chunk_idx", "total_chunks"];
+  for (const key of forbidden) {
+    if (key in meta) {
+      throw new Error(
+        `Framework internal '${key}' found in metadata for ${entry.source}:${entry.title}`,
+      );
+    }
+  }
+}
+/**
+ * Create an IndexerContext with insert helper that handles
+ * validation, dedup, chunking, and FTS5 insert.
+ */
+export function createIndexerContext(
+  db: Database,
+  config: LoreConfig,
+  rebuild: boolean,
+  seenHashes: Set<string>,
+): IndexerContext {
+  const insertStmt = db.prepare(
+    "INSERT INTO search (source, title, content, metadata, topic, type, timestamp) VALUES (?, ?, ?, ?, ?, ?, ?)",
+  );
+  return {
+    db,
+    config,
+    rebuild,
+    insert: (entry: IndexEntry) => {
+      validateEntry(entry);
+      // Generate content hash for dedup
+      const contentHash = createHash("sha256")
+        .update(entry.content)
+        .digest("hex");
+      // Skip if already indexed
+      if (seenHashes.has(contentHash)) {
+        return;
+      }
+      seenHashes.add(contentHash);
+      // Chunk content if needed
+      const chunks = chunkContent(entry.content);
+      // Insert each chunk
+      for (const chunk of chunks) {
+        insertStmt.run(
+          entry.source,
+          entry.title,
+          chunk,
+          JSON.stringify(entry.metadata || {}),
+          entry.topic,
+          entry.type || "",
+          entry.timestamp || "",
+        );
+      }
+    },
+  };
+}
+/**
+ * Main indexing orchestrator.
+ * Runs registered indexers for the given source (or all).
+ */
+export async function runIndexer(
+  source: string | "all",
+  rebuild: boolean,
+  registry: Record<string, IndexerFunction>,
+): Promise<void> {
+  const config = getConfig();
+  const db = new Database(config.database.sqlite);
+  try {
+    db.run("PRAGMA busy_timeout = 5000");
+    // Initialize seen hashes set
+    const seenHashes = new Set<string>();
+    const ctx = createIndexerContext(db, config, rebuild, seenHashes);
+    // Determine which indexers to run
+    const toRun = source === "all" ? Object.keys(registry) : [source];
+    for (const src of toRun) {
+      const indexer = registry[src];
+      if (!indexer) {
+        console.error(`Unknown source: ${src}`);
+        continue;
+      }
+      console.log(`Indexing ${src}...`);
+      // Clear source if rebuilding
+      if (rebuild) {
+        db.run("DELETE FROM search WHERE source = ?", [src]);
+      }
+      await indexer(ctx);
+    }
+    console.log("Indexing complete");
+  } finally {
+    db.close();
+  }
+}

package/lib/indexers/blogs.ts ADDED Viewed

@@ -0,0 +1,146 @@
+/**
+ * lib/indexers/blogs.ts - Hugo blog posts indexer
+ *
+ * Scans blog content/posts directory for markdown files.
+ * Extracts title, date, categories, tags from frontmatter.
+ * Derives URL from filename when slug not available.
+ *
+ * Source: blogs
+ * Topic: frontmatter categories joined (empty if none)
+ * Type: (empty)
+ * Timestamp: frontmatter date or file mtime as ISO 8601
+ */
+import { readdirSync, readFileSync, statSync, existsSync } from "fs";
+import { join, basename } from "path";
+import type { IndexerContext } from "../indexer";
+function walkMarkdownFiles(dir: string, files: string[] = []): string[] {
+  if (!existsSync(dir)) return files;
+  const entries = readdirSync(dir, { withFileTypes: true });
+  for (const entry of entries) {
+    const fullPath = join(dir, entry.name);
+    if (entry.isDirectory()) {
+      walkMarkdownFiles(fullPath, files);
+    } else if (entry.isFile() && entry.name.endsWith(".md")) {
+      files.push(fullPath);
+    }
+  }
+  return files;
+}
+export async function indexBlogs(ctx: IndexerContext): Promise<void> {
+  const blogsDir = ctx.config.paths.blogs;
+  const postsDir = join(blogsDir, "content", "posts");
+  if (!existsSync(postsDir)) {
+    console.log(`Blog posts directory not found: ${postsDir}`);
+    return;
+  }
+  const files = walkMarkdownFiles(postsDir);
+  for (const filePath of files) {
+    try {
+      const raw = readFileSync(filePath, "utf-8");
+      let content = raw;
+      let title = basename(filePath, ".md");
+      let date: string | undefined;
+      let categories: string[] = [];
+      let tags: string[] = [];
+      let slug: string | undefined;
+      // Extract frontmatter
+      const frontmatterMatch = raw.match(/^---\n([\s\S]*?)\n---\n/);
+      if (frontmatterMatch) {
+        const frontmatter = frontmatterMatch[1];
+        const titleMatch = frontmatter.match(/^title:\s*"?(.+?)"?$/m);
+        const dateMatch = frontmatter.match(/^date:\s*(.+)$/m);
+        const slugMatch = frontmatter.match(/^slug:\s*"?(.+?)"?$/m);
+        if (titleMatch) title = titleMatch[1].trim();
+        if (dateMatch) date = dateMatch[1].trim();
+        if (slugMatch) slug = slugMatch[1].trim();
+        // Try inline: categories: [foo, bar]
+        const categoriesMatch = frontmatter.match(/^categories:\s*\[(.+)\]$/m);
+        if (categoriesMatch) {
+          categories = categoriesMatch[1]
+            .split(",")
+            .map((c) => c.trim().replace(/"/g, ""));
+        } else {
+          // Try multi-line: categories:\n  - foo\n  - bar
+          const multiMatch = frontmatter.match(
+            /^categories:\s*\n((?:\s+-\s+.+\n?)+)/m,
+          );
+          if (multiMatch) {
+            categories = multiMatch[1]
+              .split("\n")
+              .map((l) => l.replace(/^\s+-\s+/, "").trim())
+              .filter(Boolean);
+          }
+        }
+        // Try inline: tags: [foo, bar]
+        const tagsInlineMatch = frontmatter.match(/^tags:\s*\[(.+)\]$/m);
+        if (tagsInlineMatch) {
+          tags = tagsInlineMatch[1]
+            .split(",")
+            .map((t) => t.trim().replace(/"/g, ""));
+        } else {
+          // Try multi-line: tags:\n  - foo\n  - bar
+          const tagsMultiMatch = frontmatter.match(
+            /^tags:\s*\n((?:\s+-\s+.+\n?)+)/m,
+          );
+          if (tagsMultiMatch) {
+            tags = tagsMultiMatch[1]
+              .split("\n")
+              .map((l) => l.replace(/^\s+-\s+/, "").trim())
+              .filter(Boolean);
+          }
+        }
+        content = raw.slice(frontmatterMatch[0].length);
+      }
+      // Append tags to content for search visibility
+      if (tags.length > 0) {
+        content += `\nTags: ${tags.join(", ")}`;
+      }
+      // Topic from categories
+      const topic = categories.length > 0 ? categories.join(" ") : "";
+      // URL from slug or filename
+      const urlSlug = slug || basename(filePath, ".md");
+      const url = `https://labs.voidwire.info/posts/${urlSlug}/`;
+      // Word count
+      const wordCount = content.split(/\s+/).filter(Boolean).length;
+      const timestamp = date || statSync(filePath).mtime.toISOString();
+      const metadata: Record<string, unknown> = {};
+      if (url) metadata.url = url;
+      if (wordCount) metadata.word_count = wordCount;
+      ctx.insert({
+        source: "blogs",
+        title: `[blog] ${title}`,
+        content,
+        topic,
+        timestamp,
+        metadata: Object.keys(metadata).length > 0 ? metadata : undefined,
+      });
+    } catch (e) {
+      console.warn(`Failed to read ${filePath}: ${e}`);
+      continue;
+    }
+  }
+}