npm - @openparachute/vault - Versions diffs - 0.4.7-rc.2 → 0.4.8-rc.4 - Mend

@openparachute/vault 0.4.7-rc.2 → 0.4.8-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/README.md +44 -10
package/core/src/connection-pragmas.test.ts +232 -0
package/core/src/core.test.ts +257 -0
package/core/src/cursor.test.ts +160 -0
package/core/src/cursor.ts +272 -0
package/core/src/mcp.ts +51 -7
package/core/src/notes.ts +164 -2
package/core/src/schema.ts +98 -2
package/core/src/store.ts +11 -1
package/core/src/types.ts +32 -0
package/package.json +1 -1
package/src/auth-status.ts +4 -0
package/src/auto-transcribe.test.ts +116 -0
package/src/auto-transcribe.ts +48 -0
package/src/cli.ts +57 -48
package/src/config.test.ts +26 -0
package/src/config.ts +53 -1
package/src/db.ts +15 -2
package/src/mcp-install-interactive.test.ts +23 -2
package/src/mcp-install-interactive.ts +21 -2
package/src/mcp-install.test.ts +40 -0
package/src/mcp-tools.ts +17 -1
package/src/module-config.ts +70 -14
package/src/module-manifest.test.ts +93 -0
package/src/module-manifest.ts +94 -0
package/src/routes.ts +267 -50
package/src/scribe-discovery.test.ts +77 -0
package/src/scribe-discovery.ts +91 -0
package/src/scribe-env.test.ts +66 -1
package/src/scribe-env.ts +42 -1
package/src/self-register.test.ts +380 -0
package/src/self-register.ts +234 -0
package/src/server.ts +46 -11
package/src/transcript-note.test.ts +171 -0
package/src/transcript-note.ts +189 -0
package/src/transcription-registry.ts +22 -0
package/src/transcription-worker.test.ts +250 -0
package/src/transcription-worker.ts +186 -27
package/src/vault.test.ts +347 -0

package/core/src/cursor.test.ts ADDED Viewed

@@ -0,0 +1,160 @@
+/**
+ * Unit tests for the opaque-cursor primitives (vault#313).
+ *
+ * Integration tests against `queryNotesPaged` live in core.test.ts under
+ * `describe("cursor pagination")`. This file pins down the
+ * encode/decode/hash invariants directly so a regression in the codec
+ * surfaces here before it reaches the wider query pipeline.
+ */
+import { describe, it, expect } from "bun:test";
+import {
+  CURSOR_VERSION,
+  CursorError,
+  computeQueryHash,
+  decodeCursor,
+  encodeCursor,
+  isoToMillis,
+  millisToIso,
+} from "./cursor.js";
+describe("cursor codec", () => {
+  it("encodes + decodes round-trips a payload", () => {
+    const payload = {
+      v: CURSOR_VERSION,
+      last_updated_at: 1714000000000,
+      last_id: "note-xyz",
+      query_hash: "a".repeat(64),
+    };
+    const cursor = encodeCursor(payload);
+    expect(typeof cursor).toBe("string");
+    expect(cursor.length).toBeGreaterThan(0);
+    // base64url has no `+`, `/`, or `=` padding.
+    expect(cursor).not.toMatch(/[+/=]/);
+    const decoded = decodeCursor(cursor);
+    expect(decoded).toEqual(payload);
+  });
+  it("decodeCursor rejects an empty string", () => {
+    try {
+      decodeCursor("");
+      throw new Error("expected throw");
+    } catch (err: any) {
+      expect(err).toBeInstanceOf(CursorError);
+      expect(err.code).toBe("cursor_invalid");
+    }
+  });
+  it("decodeCursor rejects non-JSON inside a valid base64url", () => {
+    const bogus = Buffer.from("not-json", "utf8").toString("base64url");
+    try {
+      decodeCursor(bogus);
+      throw new Error("expected throw");
+    } catch (err: any) {
+      expect(err).toBeInstanceOf(CursorError);
+      expect(err.code).toBe("cursor_invalid");
+    }
+  });
+  it("decodeCursor rejects a wrong version number", () => {
+    const cursor = encodeCursor({
+      v: 999,
+      last_updated_at: 0,
+      last_id: "",
+      query_hash: "abc",
+    } as any);
+    try {
+      decodeCursor(cursor);
+      throw new Error("expected throw");
+    } catch (err: any) {
+      expect(err.code).toBe("cursor_invalid");
+      expect(err.message).toContain("schema version");
+    }
+  });
+  it("decodeCursor rejects missing or wrong-type fields", () => {
+    // Missing last_id.
+    const missing = Buffer.from(
+      JSON.stringify({ v: CURSOR_VERSION, last_updated_at: 0, query_hash: "x" }),
+    ).toString("base64url");
+    expect(() => decodeCursor(missing)).toThrow();
+    // last_updated_at NaN.
+    const nan = encodeCursor({
+      v: CURSOR_VERSION,
+      last_updated_at: NaN,
+      last_id: "",
+      query_hash: "x",
+    });
+    expect(() => decodeCursor(nan)).toThrow();
+  });
+});
+describe("computeQueryHash", () => {
+  it("is stable across key-order permutations", () => {
+    const h1 = computeQueryHash({
+      tags: ["alpha"],
+      path: "p",
+      metadata: { status: "open" },
+    });
+    const h2 = computeQueryHash({
+      metadata: { status: "open" },
+      path: "p",
+      tags: ["alpha"],
+    });
+    expect(h1).toBe(h2);
+  });
+  it("is stable across tag-array order permutations", () => {
+    const h1 = computeQueryHash({ tags: ["a", "b", "c"] });
+    const h2 = computeQueryHash({ tags: ["c", "b", "a"] });
+    expect(h1).toBe(h2);
+  });
+  it("treats `tags: []` and missing `tags` as equivalent (both mean 'no tag filter')", () => {
+    const h1 = computeQueryHash({});
+    const h2 = computeQueryHash({ tags: [] });
+    expect(h1).toBe(h2);
+  });
+  it("changes when the query filters change", () => {
+    const h1 = computeQueryHash({ tags: ["a"] });
+    const h2 = computeQueryHash({ tags: ["b"] });
+    expect(h1).not.toBe(h2);
+    const h3 = computeQueryHash({ path: "p" });
+    expect(h3).not.toBe(h1);
+  });
+  it("returns a 64-char hex string (sha256)", () => {
+    const h = computeQueryHash({ tags: ["x"] });
+    expect(h).toMatch(/^[0-9a-f]{64}$/);
+  });
+  it("nested metadata operator clauses hash stably under key reorder", () => {
+    // `{gte: 5, lt: 10}` and `{lt: 10, gte: 5}` are semantically identical
+    // at the SQL layer (AND-conjunction of clauses); the hash must match.
+    const h1 = computeQueryHash({ metadata: { priority: { gte: 5, lt: 10 } } });
+    const h2 = computeQueryHash({ metadata: { priority: { lt: 10, gte: 5 } } });
+    expect(h1).toBe(h2);
+  });
+  it("dateFilter contributes to the hash", () => {
+    const h1 = computeQueryHash({ dateFilter: { field: "created_at", from: "2026-01-01" } });
+    const h2 = computeQueryHash({ dateFilter: { field: "created_at", from: "2026-02-01" } });
+    expect(h1).not.toBe(h2);
+  });
+});
+describe("isoToMillis / millisToIso", () => {
+  it("round-trips", () => {
+    const iso = "2026-04-15T12:34:56.789Z";
+    const ms = isoToMillis(iso);
+    expect(millisToIso(ms)).toBe(iso);
+  });
+  it("rejects malformed ISO", () => {
+    expect(() => isoToMillis("not-a-date")).toThrow();
+  });
+});

package/core/src/cursor.ts ADDED Viewed

@@ -0,0 +1,272 @@
+/**
+ * Opaque cursors for `query-notes` (vault#313).
+ *
+ * Agent loops want "give me notes I haven't seen since last call." Today's
+ * pattern — pass `dateFilter: { field: "updated_at", from: <iso> }` and
+ * track the timestamp client-side — is brittle: the client has to remember
+ * the watermark, two notes at the same millisecond may collide, and a
+ * second call landed mid-millisecond can miss or double-count rows.
+ *
+ * The opaque-cursor pattern (Stripe, GitHub, et al.) fixes this. The server
+ * returns a `next_cursor: string` on each query response; the client passes
+ * it back on the next call and the server resumes from exactly where it
+ * left off. The cursor is base64url-encoded JSON the client must not
+ * inspect — internal layout can evolve without breaking callers.
+ *
+ * # Cursor payload
+ *
+ * ```ts
+ * {
+ *   v: 1,                      // schema version
+ *   last_updated_at: number,   // millisecond epoch of the last seen note
+ *   last_id: string,           // ID of the last seen note — tiebreaker
+ *   query_hash: string,        // sha256 of normalized query params (hex)
+ * }
+ * ```
+ *
+ * - `last_updated_at` is millisecond epoch (not ISO) so cursor bytes stay
+ *   compact and the tiebreaker math is integer.
+ * - `last_id` is the tiebreaker — when two notes share `updated_at`, the
+ *   keyset query advances `id > last_id` at that timestamp so neither is
+ *   skipped nor returned twice.
+ * - `query_hash` binds the cursor to the exact query it was minted for.
+ *   Passing a cursor minted on `tag: "foo"` into a call for `tag: "bar"`
+ *   would silently return the wrong page; mismatch raises a structured
+ *   400 (`cursor_query_mismatch`) instead.
+ *
+ * # Why JSON inside base64url
+ *
+ * A flat-string format (`<ts>:<id>:<hash>`) is two characters shorter but
+ * forecloses on optional fields. JSON gives us a schema-versioned envelope
+ * — if v2 needs additional state (e.g. a search-relevance secondary key),
+ * old clients keep working and new clients can read both.
+ *
+ * # Race safety
+ *
+ * The cursor stores the maximum-`updated_at`+`id` of the LAST returned
+ * page. The next call's keyset predicate is:
+ *
+ *   (updated_at > last_updated_at)
+ *   OR (updated_at = last_updated_at AND id > last_id)
+ *
+ * A note written between calls A and B at a brand-new `updated_at` is
+ * picked up by the first half of the predicate. A note written at the
+ * exact same `updated_at` as the cursor's watermark (uncommon — wall-clock
+ * collisions are rare at millisecond resolution but not impossible) is
+ * picked up by the tiebreaker because the SQL `ORDER BY updated_at ASC,
+ * id ASC` ensures stable interleaving with the prior page. Without the
+ * tiebreaker, two notes sharing an `updated_at` would be at the mercy of
+ * SQLite's row order, which is "stable in practice" but not contract.
+ */
+import { createHash } from "node:crypto";
+export const CURSOR_VERSION = 1;
+export interface CursorPayload {
+  /** Schema version. Bumped if the cursor layout changes incompatibly. */
+  v: number;
+  /** Millisecond epoch of the last note returned. */
+  last_updated_at: number;
+  /** ID of the last note returned — tiebreaker for same-ms collisions. */
+  last_id: string;
+  /** sha256(hex) of normalized query params. Mismatch → cursor_query_mismatch. */
+  query_hash: string;
+}
+/**
+ * Thrown when a caller passes a malformed or stale cursor. The wrapping
+ * layer (MCP / REST) catches and surfaces a 400 with the structured code
+ * — callers should drop the cursor and restart the iteration.
+ */
+export class CursorError extends Error {
+  override name = "CursorError";
+  code: "cursor_invalid" | "cursor_query_mismatch";
+  constructor(message: string, code: "cursor_invalid" | "cursor_query_mismatch") {
+    super(message);
+    this.code = code;
+  }
+}
+/** Encode a cursor payload to a base64url-safe opaque string. */
+export function encodeCursor(payload: CursorPayload): string {
+  const json = JSON.stringify(payload);
+  return Buffer.from(json, "utf8").toString("base64url");
+}
+/** Decode a cursor string. Throws `CursorError` on any structural problem. */
+export function decodeCursor(cursor: string): CursorPayload {
+  if (typeof cursor !== "string" || cursor.length === 0) {
+    throw new CursorError("cursor must be a non-empty string", "cursor_invalid");
+  }
+  let json: string;
+  try {
+    json = Buffer.from(cursor, "base64url").toString("utf8");
+  } catch {
+    throw new CursorError("cursor is not valid base64url", "cursor_invalid");
+  }
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(json);
+  } catch {
+    throw new CursorError("cursor payload is not valid JSON", "cursor_invalid");
+  }
+  if (!parsed || typeof parsed !== "object") {
+    throw new CursorError("cursor payload must be an object", "cursor_invalid");
+  }
+  const p = parsed as Record<string, unknown>;
+  if (typeof p.v !== "number" || p.v !== CURSOR_VERSION) {
+    throw new CursorError(
+      `cursor schema version mismatch (expected ${CURSOR_VERSION}, got ${String(p.v)})`,
+      "cursor_invalid",
+    );
+  }
+  if (typeof p.last_updated_at !== "number" || !Number.isFinite(p.last_updated_at)) {
+    throw new CursorError("cursor.last_updated_at must be a finite number", "cursor_invalid");
+  }
+  if (typeof p.last_id !== "string") {
+    throw new CursorError("cursor.last_id must be a string", "cursor_invalid");
+  }
+  if (typeof p.query_hash !== "string" || p.query_hash.length === 0) {
+    throw new CursorError("cursor.query_hash must be a non-empty string", "cursor_invalid");
+  }
+  return {
+    v: p.v,
+    last_updated_at: p.last_updated_at,
+    last_id: p.last_id,
+    query_hash: p.query_hash,
+  };
+}
+/**
+ * Shape of query parameters that participate in the query-hash.
+ *
+ * Pagination / cursor parameters themselves are excluded — bumping `limit`
+ * or advancing the cursor must NOT invalidate the cursor. Output-shape
+ * parameters (`include_content`, etc.) are also excluded — they don't
+ * affect *which* rows are returned, just how each row is rendered.
+ *
+ * The fields here are the *result-set-affecting* inputs. Any future filter
+ * added to `QueryOpts` should also be added here.
+ */
+export interface QueryHashInputs {
+  tags?: string[];
+  tagMatch?: "all" | "any";
+  excludeTags?: string[];
+  hasTags?: boolean;
+  hasLinks?: boolean;
+  path?: string;
+  pathPrefix?: string;
+  extension?: string | string[];
+  ids?: string[];
+  metadata?: Record<string, unknown>;
+  dateFrom?: string;
+  dateTo?: string;
+  dateFilter?: { field?: string; from?: string; to?: string };
+  sort?: "asc" | "desc";
+  orderBy?: string;
+}
+/**
+ * Compute a stable hash of the query parameters.
+ *
+ * Stability matters: a caller that passes `{tag: "x", path_prefix: "p"}`
+ * on call 1 and `{path_prefix: "p", tag: "x"}` on call 2 (same query,
+ * different object-key order) must get the same hash. We achieve this
+ * by canonicalizing — sorting array fields (where order is irrelevant),
+ * recursively sorting object keys, and stringifying with a deterministic
+ * key order.
+ *
+ * `undefined` fields are dropped before hashing. An empty `tags: []` and
+ * an unset `tags` produce the same hash (both mean "no tag filter"), so
+ * a caller that conditionally sets it doesn't accidentally invalidate
+ * their cursor.
+ *
+ * Returned as a hex sha256 digest — 64 chars, fits comfortably in the
+ * base64url cursor envelope.
+ */
+export function computeQueryHash(inputs: QueryHashInputs): string {
+  const canonical = canonicalize(inputs);
+  const json = JSON.stringify(canonical);
+  return createHash("sha256").update(json, "utf8").digest("hex");
+}
+/**
+ * Canonicalize a value for stable hashing.
+ *
+ * - Drops `undefined` properties (object keys with `undefined` values).
+ * - Drops empty arrays at the top level (treated equivalent to unset).
+ * - Sorts string-array fields where order doesn't affect query semantics
+ *   (`tags`, `excludeTags`, `ids`, `extension` when array-shaped).
+ * - Recursively sorts plain-object keys so JSON.stringify is order-stable.
+ * - Primitives and arrays of primitives pass through unchanged (after the
+ *   array-sort rule above).
+ *
+ * Inside `metadata`, sub-object keys (operator-clause shapes like
+ * `{eq, gte, lt}`) are sorted too — the engine treats `{gte: 5, lt: 10}`
+ * and `{lt: 10, gte: 5}` identically, so the cursor binding should as well.
+ */
+function canonicalize(value: unknown): unknown {
+  if (value === null || value === undefined) return null;
+  if (typeof value !== "object") return value;
+  if (Array.isArray(value)) {
+    // Don't sort arbitrary arrays — order may be semantic (e.g. an `in`
+    // operator's array value is order-irrelevant to SQLite, but cursor
+    // semantics defer to the caller). For the known order-irrelevant
+    // string-array fields we sort at the top-level canonicalization;
+    // deep arrays pass through unchanged so a caller's intent is preserved.
+    return (value as unknown[]).map((v) => canonicalize(v));
+  }
+  // Plain object. Sort keys, drop undefineds, sort known order-irrelevant
+  // string-array fields.
+  const ORDER_IRRELEVANT_STRING_ARRAYS = new Set([
+    "tags",
+    "excludeTags",
+    "ids",
+    "extension",
+  ]);
+  const out: Record<string, unknown> = {};
+  const keys = Object.keys(value as object).sort();
+  for (const k of keys) {
+    const v = (value as Record<string, unknown>)[k];
+    if (v === undefined) continue;
+    if (Array.isArray(v) && v.length === 0) continue;
+    if (ORDER_IRRELEVANT_STRING_ARRAYS.has(k) && Array.isArray(v) && v.every((x) => typeof x === "string")) {
+      out[k] = [...(v as string[])].sort();
+      continue;
+    }
+    out[k] = canonicalize(v);
+  }
+  return out;
+}
+/**
+ * Parse an ISO-8601 timestamp to millisecond epoch.
+ *
+ * SQLite stores `updated_at` as a string ISO timestamp (set on insert /
+ * update by the store layer). The cursor pipes that string out as a
+ * millisecond integer for compact serialization. This helper exists so
+ * the call sites (mint-cursor + decode-cursor-into-SQL-predicate) share
+ * exactly one conversion, with NaN guarded.
+ */
+export function isoToMillis(iso: string): number {
+  const ms = Date.parse(iso);
+  if (!Number.isFinite(ms)) {
+    throw new CursorError(`invalid ISO timestamp for cursor: ${iso}`, "cursor_invalid");
+  }
+  return ms;
+}
+/**
+ * Convert millisecond epoch back to an ISO-8601 timestamp string.
+ *
+ * Used to translate the cursor's `last_updated_at` into the form SQLite
+ * compares (`n.updated_at` is a TEXT column carrying ISO strings). ISO
+ * timestamps sort correctly lexicographically when they're all in the same
+ * canonical form (Z-suffixed, fixed millisecond precision) — every
+ * timestamp vault mints goes through `new Date(...).toISOString()` so the
+ * lex-order matches the millis-order.
+ */
+export function millisToIso(ms: number): string {
+  return new Date(ms).toISOString();
+}

package/core/src/mcp.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import { Database } from "bun:sqlite";
 import type { Store, Note } from "./types.js";
 import * as noteOps from "./notes.js";
 import { filterMetadata, MAX_BATCH_SIZE, validateExtension, ExtensionValidationError } from "./notes.js";
+import { QueryError } from "./query-operators.js";
 import * as linkOps from "./links.js";
 import * as tagSchemaOps from "./tag-schemas.js";
 import type { TagFieldSchema } from "./tag-schemas.js";
@@ -189,6 +190,11 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
           sort: { type: "string", enum: ["asc", "desc"], description: "Sort by created_at" },
           limit: { type: "number", description: "Max results (default 50)" },
           offset: { type: "number", description: "Pagination offset (default 0)" },
+          cursor: {
+            type: "string",
+            description:
+              "Opaque cursor for 'since last checked' agent loops (vault#313). First call: omit. The response will include `next_cursor` — pass it on the subsequent call to receive only notes created or updated since the prior page. The cursor binds to the query's filters (tag, path, metadata, etc.); changing them between calls returns a structured `cursor_query_mismatch` error. Pagination via cursor orders results by `updated_at ASC` and is mutually exclusive with `order_by` and `sort: \"desc\"`. The response shape switches to `{notes, next_cursor}` when this parameter is present.",
+          },
           include_content: { type: "boolean", description: "Include note content (default: true for single, false for list)" },
           include_metadata: {
             oneOf: [
@@ -254,8 +260,32 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
           nearScope = new Set([anchor.id, ...traversed.map((t) => t.noteId)]);
         }
+        // --- Cursor mode (vault#313) ---
+        // When the caller passes `cursor`, the response shape switches to
+        // `{notes, next_cursor}` and `queryNotesPaged` handles the keyset
+        // pagination. Cursor mode is incompatible with full-text search
+        // (FTS owns its own ordering — relevance, not updated_at) and
+        // graph-neighborhood scoping (`near` would have to rebuild the
+        // neighborhood every call to be cursor-stable; we punt for now).
+        // Both surface as INVALID_QUERY rather than silently returning
+        // wrong rows.
+        const cursorMode = typeof params.cursor === "string" && params.cursor.length > 0;
+        if (cursorMode && params.search) {
+          throw new QueryError(
+            `cursor is incompatible with full-text search — FTS has its own ordering. Use date_filter on updated_at for since-last-checked search.`,
+            "INVALID_QUERY",
+          );
+        }
+        if (cursorMode && params.near) {
+          throw new QueryError(
+            `cursor is incompatible with near (graph neighborhood). Resolve the neighborhood first, then iterate with cursor + ids.`,
+            "INVALID_QUERY",
+          );
+        }
         // --- Full-text search ---
         let results: Note[];
+        let nextCursor: string | null = null;
         if (params.search) {
           // Normalize tag param
           const tags = normalizeTags(params.tag);
@@ -277,12 +307,13 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
           // unknown keys silently; aliasing here closes the silent-no-op gap.
           const excludeTagsRaw = params.exclude_tags ?? params.excludeTags ?? params.exclude_tag;
           const excludeTags = normalizeTags(excludeTagsRaw);
-          // Route through `store.queryNotes` (not `noteOps.queryNotes`) so
-          // tag-hierarchy expansion fires for MCP callers the same as for
-          // HTTP REST callers — `tag: "manual"` matches descendants declared
-          // via `_tags/*` config notes. The previous direct-noteOps call
-          // bypassed the wrapper and silently dropped hierarchy expansion.
-          results = await store.queryNotes({
+          // Route through `store.queryNotes`/`queryNotesPaged` (not the raw
+          // `noteOps` exports) so tag-hierarchy expansion fires for MCP
+          // callers the same as for HTTP REST callers — `tag: "manual"`
+          // matches descendants declared via `_tags/*` config notes. The
+          // previous direct-noteOps call bypassed the wrapper and silently
+          // dropped hierarchy expansion.
+          const queryOpts = {
             tags,
             tagMatch: (params.tag_match as "all" | "any") ?? (tags && tags.length > 1 ? "any" : undefined),
             excludeTags,
@@ -307,7 +338,15 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
             orderBy: params.order_by as string | undefined,
             limit: (params.limit as number) ?? 50,
             offset: params.offset as number | undefined,
-          });
+            cursor: cursorMode ? (params.cursor as string) : undefined,
+          };
+          if (cursorMode) {
+            const page = await store.queryNotesPaged(queryOpts);
+            results = page.notes;
+            nextCursor = page.next_cursor;
+          } else {
+            results = await store.queryNotes(queryOpts);
+          }
         }
         // For full-text search the post-filter is still the right shape — FTS
@@ -347,9 +386,14 @@ Link expansion: pass \`expand_links: true\` to inline [[wikilinks]] from returne
             if (params.include_attachments) enriched.attachments = await store.getAttachments(n.id);
             enrichedOut.push(enriched);
           }
+          // Cursor mode wraps the list in `{notes, next_cursor}` so callers can
+          // chain calls without tracking a watermark client-side. Legacy
+          // callers (no `cursor` param) still get the flat array.
+          if (cursorMode) return { notes: enrichedOut, next_cursor: nextCursor };
           return enrichedOut;
         }
+        if (cursorMode) return { notes: output, next_cursor: nextCursor };
         return output;
       },
     },