npm - @context-vault/core - Versions diffs - 2.8.3 - Mend

@context-vault/core 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/LICENSE +21 -0
package/package.json +52 -0
package/src/capture/file-ops.js +93 -0
package/src/capture/formatters.js +29 -0
package/src/capture/import-pipeline.js +46 -0
package/src/capture/importers.js +387 -0
package/src/capture/index.js +199 -0
package/src/capture/ingest-url.js +252 -0
package/src/constants.js +8 -0
package/src/core/categories.js +51 -0
package/src/core/config.js +127 -0
package/src/core/files.js +108 -0
package/src/core/frontmatter.js +120 -0
package/src/core/status.js +146 -0
package/src/index/db.js +268 -0
package/src/index/embed.js +101 -0
package/src/index/index.js +451 -0
package/src/index.js +62 -0
package/src/retrieve/index.js +219 -0
package/src/server/helpers.js +31 -0
package/src/server/tools/context-status.js +104 -0
package/src/server/tools/delete-context.js +53 -0
package/src/server/tools/get-context.js +235 -0
package/src/server/tools/ingest-url.js +99 -0
package/src/server/tools/list-context.js +134 -0
package/src/server/tools/save-context.js +297 -0
package/src/server/tools/submit-feedback.js +55 -0
package/src/server/tools.js +111 -0
package/src/sync/sync.js +235 -0

package/src/capture/index.js ADDED Viewed

@@ -0,0 +1,199 @@
+/**
+ * Capture Layer — Public API
+ *
+ * Writes knowledge entries to vault as .md files and indexes them.
+ * captureAndIndex() is the write-through entry point (capture + index + rollback on failure).
+ */
+import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
+import { resolve } from "node:path";
+import { ulid, slugify, kindToPath } from "../core/files.js";
+import { categoryFor } from "../core/categories.js";
+import { parseFrontmatter, formatFrontmatter } from "../core/frontmatter.js";
+import { formatBody } from "./formatters.js";
+import { writeEntryFile } from "./file-ops.js";
+import { indexEntry } from "../index/index.js";
+export function writeEntry(
+  ctx,
+  {
+    kind,
+    title,
+    body,
+    meta,
+    tags,
+    source,
+    folder,
+    identity_key,
+    expires_at,
+    userId,
+  },
+) {
+  if (!kind || typeof kind !== "string") {
+    throw new Error("writeEntry: kind is required (non-empty string)");
+  }
+  if (!body || typeof body !== "string" || !body.trim()) {
+    throw new Error("writeEntry: body is required (non-empty string)");
+  }
+  if (tags != null && !Array.isArray(tags)) {
+    throw new Error("writeEntry: tags must be an array if provided");
+  }
+  if (meta != null && typeof meta !== "object") {
+    throw new Error("writeEntry: meta must be an object if provided");
+  }
+  const category = categoryFor(kind);
+  // Entity upsert: check for existing file at deterministic path
+  let id;
+  let createdAt;
+  if (category === "entity" && identity_key) {
+    const identitySlug = slugify(identity_key);
+    const dir = resolve(ctx.config.vaultDir, kindToPath(kind));
+    const existingPath = resolve(dir, `${identitySlug}.md`);
+    if (existsSync(existingPath)) {
+      // Preserve original ID and created timestamp from existing file
+      const raw = readFileSync(existingPath, "utf-8");
+      const { meta: fmMeta } = parseFrontmatter(raw);
+      id = fmMeta.id || ulid();
+      createdAt = fmMeta.created || new Date().toISOString();
+    } else {
+      id = ulid();
+      createdAt = new Date().toISOString();
+    }
+  } else {
+    id = ulid();
+    createdAt = new Date().toISOString();
+  }
+  const filePath = writeEntryFile(ctx.config.vaultDir, kind, {
+    id,
+    title,
+    body,
+    meta,
+    tags,
+    source,
+    createdAt,
+    folder,
+    category,
+    identity_key,
+    expires_at,
+  });
+  return {
+    id,
+    filePath,
+    kind,
+    category,
+    title,
+    body,
+    meta,
+    tags,
+    source,
+    createdAt,
+    identity_key,
+    expires_at,
+    userId: userId || null,
+  };
+}
+/**
+ * Update an existing entry's file on disk (merge provided fields with existing).
+ * Does NOT re-index — caller must call indexEntry after.
+ *
+ * @param {{ config, stmts }} ctx
+ * @param {object} existing — Row from vault table (from getEntryById)
+ * @param {{ title?, body?, tags?, meta?, source?, expires_at? }} updates
+ * @returns {object} Entry object suitable for indexEntry
+ */
+export function updateEntryFile(ctx, existing, updates) {
+  const raw = readFileSync(existing.file_path, "utf-8");
+  const { meta: fmMeta } = parseFrontmatter(raw);
+  const existingMeta = existing.meta ? JSON.parse(existing.meta) : {};
+  const existingTags = existing.tags ? JSON.parse(existing.tags) : [];
+  const title = updates.title !== undefined ? updates.title : existing.title;
+  const body = updates.body !== undefined ? updates.body : existing.body;
+  const tags = updates.tags !== undefined ? updates.tags : existingTags;
+  const source =
+    updates.source !== undefined ? updates.source : existing.source;
+  const expires_at =
+    updates.expires_at !== undefined ? updates.expires_at : existing.expires_at;
+  let mergedMeta;
+  if (updates.meta !== undefined) {
+    mergedMeta = { ...existingMeta, ...(updates.meta || {}) };
+  } else {
+    mergedMeta = { ...existingMeta };
+  }
+  // Build frontmatter
+  const fmFields = { id: existing.id };
+  for (const [k, v] of Object.entries(mergedMeta)) {
+    if (k === "folder") continue;
+    if (v !== null && v !== undefined) fmFields[k] = v;
+  }
+  if (existing.identity_key) fmFields.identity_key = existing.identity_key;
+  if (expires_at) fmFields.expires_at = expires_at;
+  fmFields.tags = tags;
+  fmFields.source = source || "claude-code";
+  fmFields.created = fmMeta.created || existing.created_at;
+  const mdBody = formatBody(existing.kind, { title, body, meta: mergedMeta });
+  const md = formatFrontmatter(fmFields) + mdBody;
+  writeFileSync(existing.file_path, md);
+  const finalMeta = Object.keys(mergedMeta).length ? mergedMeta : undefined;
+  return {
+    id: existing.id,
+    filePath: existing.file_path,
+    kind: existing.kind,
+    category: existing.category,
+    title,
+    body,
+    meta: finalMeta,
+    tags,
+    source,
+    createdAt: fmMeta.created || existing.created_at,
+    identity_key: existing.identity_key,
+    expires_at,
+    userId: existing.user_id || null,
+  };
+}
+export async function captureAndIndex(ctx, data) {
+  // For entity upserts, preserve previous file content for safe rollback
+  let previousContent = null;
+  if (categoryFor(data.kind) === "entity" && data.identity_key) {
+    const identitySlug = slugify(data.identity_key);
+    const dir = resolve(ctx.config.vaultDir, kindToPath(data.kind));
+    const existingPath = resolve(dir, `${identitySlug}.md`);
+    if (existsSync(existingPath)) {
+      previousContent = readFileSync(existingPath, "utf-8");
+    }
+  }
+  const entry = writeEntry(ctx, data);
+  try {
+    await indexEntry(ctx, entry);
+    return entry;
+  } catch (err) {
+    // Rollback: restore previous content for entity upserts, delete for new entries
+    if (previousContent) {
+      try {
+        writeFileSync(entry.filePath, previousContent);
+      } catch {}
+    } else {
+      try {
+        unlinkSync(entry.filePath);
+      } catch {}
+    }
+    throw new Error(
+      `Capture succeeded but indexing failed — file rolled back. ${err.message}`,
+    );
+  }
+}

package/src/capture/ingest-url.js ADDED Viewed

@@ -0,0 +1,252 @@
+export function htmlToMarkdown(html) {
+  let md = html;
+  // Remove scripts, styles, nav, header, footer, aside
+  md = md.replace(/<script[\s\S]*?<\/script>/gi, "");
+  md = md.replace(/<style[\s\S]*?<\/style>/gi, "");
+  md = md.replace(/<nav[\s\S]*?<\/nav>/gi, "");
+  md = md.replace(/<header[\s\S]*?<\/header>/gi, "");
+  md = md.replace(/<footer[\s\S]*?<\/footer>/gi, "");
+  md = md.replace(/<aside[\s\S]*?<\/aside>/gi, "");
+  // Convert headings
+  md = md.replace(
+    /<h1[^>]*>([\s\S]*?)<\/h1>/gi,
+    (_, c) => `\n# ${stripTags(c).trim()}\n`,
+  );
+  md = md.replace(
+    /<h2[^>]*>([\s\S]*?)<\/h2>/gi,
+    (_, c) => `\n## ${stripTags(c).trim()}\n`,
+  );
+  md = md.replace(
+    /<h3[^>]*>([\s\S]*?)<\/h3>/gi,
+    (_, c) => `\n### ${stripTags(c).trim()}\n`,
+  );
+  md = md.replace(
+    /<h4[^>]*>([\s\S]*?)<\/h4>/gi,
+    (_, c) => `\n#### ${stripTags(c).trim()}\n`,
+  );
+  md = md.replace(
+    /<h5[^>]*>([\s\S]*?)<\/h5>/gi,
+    (_, c) => `\n##### ${stripTags(c).trim()}\n`,
+  );
+  md = md.replace(
+    /<h6[^>]*>([\s\S]*?)<\/h6>/gi,
+    (_, c) => `\n###### ${stripTags(c).trim()}\n`,
+  );
+  // Convert links
+  md = md.replace(
+    /<a[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi,
+    (_, href, text) => {
+      const cleanText = stripTags(text).trim();
+      return cleanText ? `[${cleanText}](${href})` : "";
+    },
+  );
+  // Convert code blocks
+  md = md.replace(
+    /<pre[^>]*><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi,
+    (_, c) => `\n\`\`\`\n${decodeEntities(c).trim()}\n\`\`\`\n`,
+  );
+  md = md.replace(
+    /<pre[^>]*>([\s\S]*?)<\/pre>/gi,
+    (_, c) => `\n\`\`\`\n${decodeEntities(stripTags(c)).trim()}\n\`\`\`\n`,
+  );
+  // Convert inline code
+  md = md.replace(
+    /<code[^>]*>([\s\S]*?)<\/code>/gi,
+    (_, c) => `\`${decodeEntities(c).trim()}\``,
+  );
+  // Convert strong/em
+  md = md.replace(
+    /<(strong|b)[^>]*>([\s\S]*?)<\/\1>/gi,
+    (_, __, c) => `**${stripTags(c).trim()}**`,
+  );
+  md = md.replace(
+    /<(em|i)[^>]*>([\s\S]*?)<\/\1>/gi,
+    (_, __, c) => `*${stripTags(c).trim()}*`,
+  );
+  // Convert list items
+  md = md.replace(
+    /<li[^>]*>([\s\S]*?)<\/li>/gi,
+    (_, c) => `- ${stripTags(c).trim()}\n`,
+  );
+  // Convert paragraphs and line breaks
+  md = md.replace(/<br\s*\/?>/gi, "\n");
+  md = md.replace(
+    /<p[^>]*>([\s\S]*?)<\/p>/gi,
+    (_, c) => `\n${stripTags(c).trim()}\n`,
+  );
+  md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, (_, c) => {
+    return (
+      "\n" +
+      stripTags(c)
+        .trim()
+        .split("\n")
+        .map((l) => `> ${l}`)
+        .join("\n") +
+      "\n"
+    );
+  });
+  // Remove remaining HTML tags
+  md = stripTags(md);
+  // Decode HTML entities
+  md = decodeEntities(md);
+  // Clean up whitespace
+  md = md.replace(/\n{3,}/g, "\n\n").trim();
+  return md;
+}
+function stripTags(html) {
+  return html.replace(/<[^>]+>/g, "");
+}
+function decodeEntities(text) {
+  return text
+    .replace(/&amp;/g, "&")
+    .replace(/&lt;/g, "<")
+    .replace(/&gt;/g, ">")
+    .replace(/&quot;/g, '"')
+    .replace(/&#39;/g, "'")
+    .replace(/&nbsp;/g, " ")
+    .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10)))
+    .replace(/&#x([0-9a-f]+);/gi, (_, n) =>
+      String.fromCharCode(parseInt(n, 16)),
+    );
+}
+/**
+ * Extract the main readable content from an HTML page.
+ * Prefers <article> or <main>, falls back to <body>.
+ *
+ * @param {string} html
+ * @param {string} url
+ * @returns {{ title: string, body: string }}
+ */
+export function extractHtmlContent(html, url) {
+  // Extract <title>
+  const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
+  const title = titleMatch
+    ? stripTags(decodeEntities(titleMatch[1])).trim()
+    : "";
+  // Try to extract main content area
+  let contentHtml = "";
+  const articleMatch = html.match(/<article[^>]*>([\s\S]*?)<\/article>/i);
+  const mainMatch = html.match(/<main[^>]*>([\s\S]*?)<\/main>/i);
+  if (articleMatch) {
+    contentHtml = articleMatch[1];
+  } else if (mainMatch) {
+    contentHtml = mainMatch[1];
+  } else {
+    // Fall back to <body>
+    const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
+    contentHtml = bodyMatch ? bodyMatch[1] : html;
+  }
+  const body = htmlToMarkdown(contentHtml);
+  return { title, body };
+}
+/**
+ * Fetch a URL, extract readable content, and return an EntryData object.
+ *
+ * @param {string} url
+ * @param {{ kind?: string, tags?: string[], source?: string, maxBodyLength?: number, timeoutMs?: number }} [opts]
+ * @returns {Promise<{ kind: string, title: string, body: string, tags: string[], meta: object, source: string }>}
+ */
+export async function ingestUrl(url, opts = {}) {
+  const {
+    kind = "reference",
+    tags = [],
+    source,
+    maxBodyLength = 50000,
+    timeoutMs = 15000,
+  } = opts;
+  let domain;
+  try {
+    domain = new URL(url).hostname;
+  } catch {
+    throw new Error(`Invalid URL: ${url}`);
+  }
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), timeoutMs);
+  let response;
+  try {
+    response = await fetch(url, {
+      signal: controller.signal,
+      headers: {
+        "User-Agent":
+          "ContextVault/1.0 (+https://github.com/fellanH/context-vault)",
+        Accept: "text/html,application/xhtml+xml,text/plain,*/*",
+      },
+    });
+  } catch (err) {
+    if (err.name === "AbortError") {
+      throw new Error(`Request timed out after ${timeoutMs}ms`);
+    }
+    throw new Error(`Fetch failed: ${err.message}`);
+  } finally {
+    clearTimeout(timeout);
+  }
+  if (!response.ok) {
+    throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+  }
+  const contentType = response.headers.get("content-type") || "";
+  const html = await response.text();
+  let title, body;
+  if (
+    contentType.includes("text/html") ||
+    contentType.includes("application/xhtml")
+  ) {
+    const extracted = extractHtmlContent(html, url);
+    title = extracted.title;
+    body = extracted.body;
+  } else {
+    // Plain text or other — use as-is
+    title = domain;
+    body = html;
+  }
+  // Truncate if too long
+  if (body.length > maxBodyLength) {
+    body = body.slice(0, maxBodyLength) + "\n\n[Content truncated]";
+  }
+  if (!body.trim()) {
+    throw new Error("No readable content extracted from URL");
+  }
+  return {
+    kind,
+    title: title || domain,
+    body,
+    tags: [...tags, "web-import"],
+    meta: {
+      url,
+      domain,
+      fetched_at: new Date().toISOString(),
+      content_type: contentType.split(";")[0].trim() || "text/html",
+    },
+    source: source || domain,
+  };
+}

package/src/constants.js ADDED Viewed

@@ -0,0 +1,8 @@
+export const MAX_BODY_LENGTH = 100 * 1024; // 100KB
+export const MAX_TITLE_LENGTH = 500;
+export const MAX_KIND_LENGTH = 64;
+export const MAX_TAG_LENGTH = 100;
+export const MAX_TAGS_COUNT = 20;
+export const MAX_META_LENGTH = 10 * 1024; // 10KB
+export const MAX_SOURCE_LENGTH = 200;
+export const MAX_IDENTITY_KEY_LENGTH = 200;

package/src/core/categories.js ADDED Viewed

@@ -0,0 +1,51 @@
+/**
+ * categories.js — Static kind→category mapping
+ *
+ * Three categories with distinct write semantics:
+ *   knowledge — append-only, enduring (default)
+ *   entity    — upsert by identity_key, enduring
+ *   event     — append-only, decaying relevance
+ */
+const KIND_CATEGORY = {
+  // Knowledge — append-only, enduring
+  insight: "knowledge",
+  decision: "knowledge",
+  pattern: "knowledge",
+  prompt: "knowledge",
+  note: "knowledge",
+  document: "knowledge",
+  reference: "knowledge",
+  // Entity — upsert, enduring
+  contact: "entity",
+  project: "entity",
+  tool: "entity",
+  source: "entity",
+  // Event — append-only, decaying
+  conversation: "event",
+  message: "event",
+  session: "event",
+  task: "event",
+  log: "event",
+  feedback: "event",
+};
+/** Map category name → directory name on disk */
+const CATEGORY_DIR_NAMES = {
+  knowledge: "knowledge",
+  entity: "entities",
+  event: "events",
+};
+/** Set of valid category directory names (for reindex discovery) */
+export const CATEGORY_DIRS = new Set(Object.values(CATEGORY_DIR_NAMES));
+export function categoryFor(kind) {
+  return KIND_CATEGORY[kind] || "knowledge";
+}
+/** Returns the category directory name for a given kind (e.g. "insight" → "knowledge") */
+export function categoryDirFor(kind) {
+  const cat = categoryFor(kind);
+  return CATEGORY_DIR_NAMES[cat] || "knowledge";
+}

package/src/core/config.js ADDED Viewed

@@ -0,0 +1,127 @@
+import { existsSync, readFileSync } from "node:fs";
+import { join, resolve } from "node:path";
+import { homedir } from "node:os";
+export function parseArgs(argv) {
+  const args = {};
+  for (let i = 2; i < argv.length; i++) {
+    if (argv[i] === "--vault-dir" && argv[i + 1]) args.vaultDir = argv[++i];
+    else if (argv[i] === "--data-dir" && argv[i + 1]) args.dataDir = argv[++i];
+    else if (argv[i] === "--db-path" && argv[i + 1]) args.dbPath = argv[++i];
+    else if (argv[i] === "--dev-dir" && argv[i + 1]) args.devDir = argv[++i];
+    else if (argv[i] === "--event-decay-days" && argv[i + 1])
+      args.eventDecayDays = Number(argv[++i]);
+  }
+  return args;
+}
+export function resolveConfig() {
+  const HOME = homedir();
+  const cliArgs = parseArgs(process.argv);
+  const dataDir = resolve(
+    cliArgs.dataDir ||
+      process.env.CONTEXT_VAULT_DATA_DIR ||
+      process.env.CONTEXT_MCP_DATA_DIR ||
+      join(HOME, ".context-mcp"),
+  );
+  const config = {
+    vaultDir: join(HOME, "vault"),
+    dataDir,
+    dbPath: join(dataDir, "vault.db"),
+    devDir: join(HOME, "dev"),
+    eventDecayDays: 30,
+    resolvedFrom: "defaults",
+  };
+  const configPath = join(dataDir, "config.json");
+  if (existsSync(configPath)) {
+    try {
+      const fc = JSON.parse(readFileSync(configPath, "utf-8"));
+      if (fc.vaultDir) config.vaultDir = fc.vaultDir;
+      if (fc.dataDir) {
+        config.dataDir = fc.dataDir;
+        config.dbPath = join(resolve(fc.dataDir), "vault.db");
+      }
+      if (fc.dbPath) config.dbPath = fc.dbPath;
+      if (fc.devDir) config.devDir = fc.devDir;
+      if (fc.eventDecayDays != null) config.eventDecayDays = fc.eventDecayDays;
+      // Hosted account linking (Phase 4)
+      if (fc.hostedUrl) config.hostedUrl = fc.hostedUrl;
+      if (fc.apiKey) config.apiKey = fc.apiKey;
+      if (fc.userId) config.userId = fc.userId;
+      if (fc.email) config.email = fc.email;
+      if (fc.linkedAt) config.linkedAt = fc.linkedAt;
+      config.resolvedFrom = "config file";
+    } catch (e) {
+      throw new Error(
+        `[context-vault] Invalid config at ${configPath}: ${e.message}`,
+      );
+    }
+  }
+  config.configPath = configPath;
+  if (
+    process.env.CONTEXT_VAULT_VAULT_DIR ||
+    process.env.CONTEXT_MCP_VAULT_DIR
+  ) {
+    config.vaultDir =
+      process.env.CONTEXT_VAULT_VAULT_DIR || process.env.CONTEXT_MCP_VAULT_DIR;
+    config.resolvedFrom = "env";
+  }
+  if (process.env.CONTEXT_VAULT_DB_PATH || process.env.CONTEXT_MCP_DB_PATH) {
+    config.dbPath =
+      process.env.CONTEXT_VAULT_DB_PATH || process.env.CONTEXT_MCP_DB_PATH;
+    config.resolvedFrom = "env";
+  }
+  if (process.env.CONTEXT_VAULT_DEV_DIR || process.env.CONTEXT_MCP_DEV_DIR) {
+    config.devDir =
+      process.env.CONTEXT_VAULT_DEV_DIR || process.env.CONTEXT_MCP_DEV_DIR;
+    config.resolvedFrom = "env";
+  }
+  if (
+    process.env.CONTEXT_VAULT_EVENT_DECAY_DAYS != null ||
+    process.env.CONTEXT_MCP_EVENT_DECAY_DAYS != null
+  ) {
+    config.eventDecayDays = Number(
+      process.env.CONTEXT_VAULT_EVENT_DECAY_DAYS ??
+        process.env.CONTEXT_MCP_EVENT_DECAY_DAYS,
+    );
+    config.resolvedFrom = "env";
+  }
+  if (process.env.CONTEXT_VAULT_API_KEY) {
+    config.apiKey = process.env.CONTEXT_VAULT_API_KEY;
+  }
+  if (process.env.CONTEXT_VAULT_HOSTED_URL) {
+    config.hostedUrl = process.env.CONTEXT_VAULT_HOSTED_URL;
+  }
+  if (cliArgs.vaultDir) {
+    config.vaultDir = cliArgs.vaultDir;
+    config.resolvedFrom = "CLI args";
+  }
+  if (cliArgs.dbPath) {
+    config.dbPath = cliArgs.dbPath;
+    config.resolvedFrom = "CLI args";
+  }
+  if (cliArgs.devDir) {
+    config.devDir = cliArgs.devDir;
+    config.resolvedFrom = "CLI args";
+  }
+  if (cliArgs.eventDecayDays != null) {
+    config.eventDecayDays = cliArgs.eventDecayDays;
+    config.resolvedFrom = "CLI args";
+  }
+  // Resolve all paths to absolute
+  config.vaultDir = resolve(config.vaultDir);
+  config.dataDir = resolve(config.dataDir);
+  config.dbPath = resolve(config.dbPath);
+  config.devDir = resolve(config.devDir);
+  // Check existence
+  config.vaultDirExists = existsSync(config.vaultDir);
+  return config;
+}