npm - @mammothb/pi-hashline - Versions diffs - 0.2.0 - Mend

@mammothb/pi-hashline 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/src/read.ts ADDED Viewed

@@ -0,0 +1,239 @@
+/**
+ * Hashline read tool override.
+ *
+ * Overrides the built-in `read` tool to emit `¶PATH#TAG` headers and
+ * record content snapshots. Text files get tagged; images delegate to the
+ * native read implementation.
+ */
+import { constants } from "node:fs";
+import { access, readFile } from "node:fs/promises";
+import { extname, isAbsolute, relative, resolve } from "node:path";
+import {
+  createReadToolDefinition,
+  type ToolDefinition,
+} from "@earendil-works/pi-coding-agent";
+import { Type } from "typebox";
+import {
+  computeFileHash,
+  formatHashlineHeader,
+  formatNumberedLines,
+} from "./format";
+import type { SnapshotStore } from "./snapshots";
+const DEFAULT_MAX_BYTES = 50 * 1024;
+const ReadSchema = Type.Object({
+  path: Type.String({
+    description: "Path to the file to read (relative or absolute)",
+  }),
+  offset: Type.Optional(
+    Type.Number({
+      description: "Line number to start reading from (1-indexed)",
+    }),
+  ),
+  limit: Type.Optional(
+    Type.Number({
+      description: "Maximum number of lines to read",
+    }),
+  ),
+});
+/** File extensions handled by the native read tool (images, etc.). */
+const IMAGE_EXTENSIONS = new Set([
+  ".png",
+  ".jpg",
+  ".jpeg",
+  ".gif",
+  ".webp",
+  ".bmp",
+  ".ico",
+  ".tiff",
+  ".tif",
+  ".svg",
+]);
+/**
+ * Resolve a user-provided path relative to cwd.
+ * Returns a display-friendly relative path when within cwd.
+ */
+function resolvePath(rawPath: string, cwd: string): string {
+  const resolved = resolve(cwd, rawPath);
+  try {
+    const rel = relative(cwd, resolved);
+    if (!rel.startsWith("..") && !isAbsolute(rel)) {
+      return rel || ".";
+    }
+  } catch {
+    // Fall through — use absolute path.
+  }
+  return resolved;
+}
+export interface ReadToolDetails {
+  /** Total lines in the file (before offset/limit). */
+  totalLines: number;
+  /** Total bytes in the file. */
+  totalBytes: number;
+  /** Whether the displayed output was truncated. */
+  truncated: boolean;
+  /** Content hash of the full file. */
+  fileHash: string;
+  /** Hashline header for this file snapshot. */
+  header: string;
+}
+function errorResult(message: string): {
+  content: { type: "text"; text: string }[];
+  details: ReadToolDetails;
+} {
+  return {
+    content: [{ type: "text", text: message }],
+    details: {
+      totalLines: 0,
+      totalBytes: 0,
+      truncated: false,
+      fileHash: "",
+      header: "",
+    },
+  };
+}
+export function createReadTool(
+  snapshots: SnapshotStore,
+): ToolDefinition<typeof ReadSchema, ReadToolDetails> {
+  // Lazily create the native read tool so we can delegate image/file-not-found
+  // handling to it. Created once per session on first delegate call.
+  let _nativeRead: ToolDefinition<typeof ReadSchema, unknown> | undefined;
+  function nativeRead(ctx: {
+    cwd: string;
+  }): ToolDefinition<typeof ReadSchema, unknown> {
+    if (!_nativeRead) {
+      _nativeRead = createReadToolDefinition(ctx.cwd) as ToolDefinition<
+        typeof ReadSchema,
+        unknown
+      >;
+    }
+    return _nativeRead;
+  }
+  return {
+    name: "read",
+    label: "Read",
+    description:
+      "Read the contents of a file. Supports text files and images (jpg, png, gif, webp). " +
+      "Every text file view includes a ¶PATH#TAG header — copy this tag when editing " +
+      "the file so the edit tool can validate you're working against the current version.",
+    promptSnippet:
+      "Read file contents — every output includes a ¶PATH#TAG header required by the edit tool",
+    promptGuidelines: [
+      "Use read to inspect file content instead of cat or tail. Every text output starts with a ¶PATH#TAG header — copy the entire header (including the tag) into edit tool calls to validate you're editing the current file version.",
+      "Use offset/limit to read large files in sections. Tags are per-file, not per-section — any section of a file carries the same tag.",
+    ],
+    parameters: ReadSchema,
+    async execute(toolCallId, params, signal, onUpdate, ctx) {
+      const { path: rawPath } = params;
+      const absolutePath = resolve(ctx.cwd, rawPath);
+      // Delegate to native read for images and SVGs.
+      const ext = extname(absolutePath).toLowerCase();
+      if (IMAGE_EXTENSIONS.has(ext)) {
+        // Native read details type differs from hashline. Safe cast.
+        const result = nativeRead(ctx).execute(
+          toolCallId,
+          params,
+          signal,
+          onUpdate,
+          ctx,
+        );
+        return result;
+      }
+      // Check readability.
+      try {
+        await access(absolutePath, constants.R_OK);
+      } catch (err: unknown) {
+        const message = err instanceof Error ? err.message : "unknown error";
+        return errorResult(`Cannot read file: ${message}`);
+      }
+      // Text file — read fully, compute hash, record snapshot, format output.
+      try {
+        const rawContent = await readFile(absolutePath, "utf-8");
+        const totalBytes = Buffer.byteLength(rawContent, "utf-8");
+        // Normalize to LF for line counting and hash computation.
+        const normalized = rawContent.replace(/\r\n/g, "\n");
+        // Split into lines. A trailing newline produces an empty string at
+        // the end — strip it for display purposes (it's a line terminator,
+        // not a blank line).
+        const rawLines = normalized.split("\n");
+        if (rawLines.length > 0 && rawLines[rawLines.length - 1] === "") {
+          rawLines.pop();
+        }
+        const allLines = rawLines;
+        const totalLines = allLines.length;
+        // Compute hash over full normalized content.
+        const fileHash = computeFileHash(normalized);
+        // Record snapshot (keyed by absolute path for consistency).
+        snapshots.record(absolutePath, normalized);
+        const displayPath = resolvePath(rawPath, ctx.cwd);
+        const header = formatHashlineHeader(displayPath, fileHash);
+        // Apply offset and limit.
+        const startLine = params.offset ? Math.max(0, params.offset - 1) : 0;
+        const endLine = params.limit
+          ? startLine + params.limit
+          : allLines.length;
+        const selectedLines = allLines.slice(startLine, endLine);
+        let text = selectedLines.join("\n");
+        let truncated = false;
+        // Truncate at 50KB.
+        const textBytes = Buffer.byteLength(text, "utf-8");
+        if (textBytes > DEFAULT_MAX_BYTES) {
+          let truncatedText = text.slice(0, DEFAULT_MAX_BYTES);
+          while (
+            Buffer.byteLength(truncatedText, "utf-8") > DEFAULT_MAX_BYTES
+          ) {
+            truncatedText = truncatedText.slice(0, -1);
+          }
+          const truncatedLines = truncatedText.split("\n");
+          const originalLastLine = selectedLines[truncatedLines.length - 1];
+          const displayLines =
+            truncatedLines[truncatedLines.length - 1] === originalLastLine
+              ? truncatedLines
+              : truncatedLines.slice(0, -1);
+          text = `${displayLines.join("\n")}\n\n[Output truncated at 50KB. Use offset/limit to read specific sections.]`;
+          truncated = true;
+        }
+        // Format with hashline header + numbered lines.
+        const output = `${header}\n${formatNumberedLines(text, startLine + 1)}`;
+        return {
+          content: [{ type: "text", text: output }],
+          details: {
+            totalLines,
+            totalBytes,
+            truncated,
+            fileHash,
+            header,
+          } satisfies ReadToolDetails,
+        };
+      } catch (err: unknown) {
+        const message = err instanceof Error ? err.message : "unknown error";
+        return errorResult(`Error reading file: ${message}`);
+      }
+    },
+  };
+}

package/src/recovery.ts ADDED Viewed

@@ -0,0 +1,141 @@
+/**
+ * Stale-tag recovery: attempts to salvage edits when the file's content
+ * hash no longer matches the tag the edit was authored against.
+ *
+ * Two strategies, tried in order:
+ *
+ * 1. **Structured-patch 3-way merge**: apply edits to cached snapshot,
+ *    create a structured patch, then apply that patch to the live content.
+ *    Handles most drift (unrelated lines changed, formatters, etc.).
+ *
+ * 2. **Session-chain replay**: when the structured merge refuses but the
+ *    anchors still point at identical content, apply the edits directly to
+ *    the live file. Less certain — the model must verify the result.
+ */
+import { applyPatch, structuredPatch } from "diff";
+import { applyEdits } from "./apply";
+import {
+  RECOVERY_EXTERNAL_WARNING,
+  RECOVERY_SESSION_CHAIN_WARNING,
+  RECOVERY_SESSION_REPLAY_WARNING,
+} from "./messages";
+import type { SnapshotStore } from "./snapshots";
+import type { Edit } from "./types";
+// ─── Recovery result ─────────────────────────────────────────────────
+export interface RecoveryResult {
+  /** Recovered text (normalized LF). */
+  text: string;
+  /** Human-readable warning describing what recovery did. */
+  warning: string;
+}
+// ─── Helpers ─────────────────────────────────────────────────────────
+/**
+ * Check whether all anchor lines at the same positions in `live` still
+ * have the same content as in `snapshot`. If any anchor moved or changed,
+ * direct replay is unsafe.
+ */
+function anchorsStillMatch(
+  snapshotText: string,
+  liveText: string,
+  anchorLines: readonly number[],
+): boolean {
+  const snapshotLines = snapshotText.split("\n");
+  const liveLines = liveText.split("\n");
+  for (const line of anchorLines) {
+    const idx = line - 1;
+    if (idx >= snapshotLines.length || idx >= liveLines.length) {
+      return false;
+    }
+    if (snapshotLines[idx] !== liveLines[idx]) {
+      return false;
+    }
+  }
+  return true;
+}
+// ─── Recovery ────────────────────────────────────────────────────────
+/**
+ * Attempt to recover from a stale tag.
+ *
+ * @param snapshots — the session's snapshot store
+ * @param path — absolute file path
+ * @param currentText — normalized LF content of the live file
+ * @param fileHash — the stale tag from the edit's header
+ * @param edits — the parsed edits to apply
+ * @param anchorLines — anchor lines targeted by the edits (1-indexed, deduplicated)
+ * @returns RecoveryResult on success, null if recovery is impossible
+ */
+export function tryRecover(
+  snapshots: SnapshotStore,
+  path: string,
+  currentText: string,
+  fileHash: string,
+  edits: readonly Edit[],
+  anchorLines: readonly number[],
+): RecoveryResult | null {
+  // 1. Look up the snapshot the edit was authored against.
+  const snapshot = snapshots.byHash(path, fileHash);
+  if (snapshot === null) {
+    return null; // hash never recorded — agent forged it or from another session
+  }
+  // 2. Apply edits to the snapshot text to get the would-be result.
+  const { text: wouldBeResult } = applyEdits(snapshot.text, edits);
+  // 3. Create a structured patch from snapshot → would-be result.
+  const patch = structuredPatch(
+    path,
+    path,
+    snapshot.text,
+    wouldBeResult,
+    undefined, // oldHeader
+    undefined, // newHeader
+    { context: 3 },
+  );
+  // 4. Apply the patch to the current live text.
+  const merged = applyPatch(currentText, patch);
+  // Strategy 1: structured-patch 3-way merge succeeded.
+  if (merged !== false && merged !== currentText) {
+    const head = snapshots.head(path);
+    const isExternal = head !== null && head.hash === snapshot.hash;
+    return {
+      text: merged,
+      warning: isExternal
+        ? RECOVERY_EXTERNAL_WARNING
+        : RECOVERY_SESSION_CHAIN_WARNING,
+    };
+  }
+  // 5. Strategy 2: direct anchor-based replay.
+  // Structured merge failed (or produced no change), but if the anchor
+  // lines still match between snapshot and live, apply edits directly.
+  // This handles both external changes and session-chain when context
+  // diverged but anchor content is identical.
+  if (anchorLines.length > 0) {
+    if (anchorsStillMatch(snapshot.text, currentText, anchorLines)) {
+      const { text: replayed } = applyEdits(currentText, edits);
+      if (replayed !== currentText) {
+        const head = snapshots.head(path);
+        const isExternal = head !== null && head.hash === snapshot.hash;
+        return {
+          text: replayed,
+          warning: isExternal
+            ? RECOVERY_EXTERNAL_WARNING
+            : RECOVERY_SESSION_REPLAY_WARNING,
+        };
+      }
+    }
+  }
+  // Recovery impossible.
+  return null;
+}

package/src/snapshots.ts ADDED Viewed

@@ -0,0 +1,166 @@
+/**
+ * Per-session snapshot store used by recovery and the patcher to bind
+ * hashline section tags to the exact file content that minted them.
+ *
+ * A section tag is a content-derived hash of the *whole file* (see
+ * {@link computeFileHash}). Any read of byte-identical content mints the
+ * same tag, so reads of one file state fuse onto one anchor and a follow-up
+ * edit anchored at any line validates whenever the live file still hashes
+ * to it.
+ *
+ * Producers (typically `read` / `grep` / `write` tools) call
+ * {@link SnapshotStore.record} with the full normalized text they observed.
+ * The store hashes it, dedups against the per-path history, and returns the
+ * tag. Consumers (the patcher) resolve a stale tag back to the recorded
+ * full text via {@link SnapshotStore.byHash} and 3-way-merge the would-be
+ * edit onto the live content.
+ */
+import { computeFileHash } from "./format";
+/**
+ * One full-file version observed at a point in time. The tag the model sees
+ * is {@link Snapshot.hash}; recovery replays edits against
+ * {@link Snapshot.text}.
+ */
+export interface Snapshot {
+  /** Canonical path this version belongs to. */
+  readonly path: string;
+  /** Full normalized (LF, no BOM) file text as observed. */
+  readonly text: string;
+  /** Content-derived tag for {@link Snapshot.text} (see {@link computeFileHash}). */
+  readonly hash: string;
+  /** Timestamp (ms since epoch) the version was recorded. */
+  recordedAt: number;
+}
+/**
+ * Storage seam for full-file version snapshots. The patcher calls
+ * {@link head} for the latest version of a path and {@link byHash} when it
+ * needs the specific historical version a section's stale tag names.
+ */
+export abstract class SnapshotStore {
+  /** Most-recently recorded version for `path`, or `null` if none. */
+  abstract head(path: string): Snapshot | null;
+  /** Recorded version for `path` whose tag equals `hash`, or `null`. */
+  abstract byHash(path: string, hash: string): Snapshot | null;
+  /** Record the full normalized text of `path` and return its content tag. */
+  abstract record(path: string, fullText: string): string;
+  /** Drop the version history for a single path. */
+  abstract invalidate(path: string): void;
+  /** Drop every version history. */
+  abstract clear(): void;
+}
+const DEFAULT_MAX_PATHS = 30;
+const DEFAULT_MAX_VERSIONS_PER_PATH = 4;
+export interface InMemorySnapshotStoreOptions {
+  /** Maximum number of distinct paths tracked at once (default 30). */
+  maxPaths?: number;
+  /** Maximum full-file versions retained per path (default 4). Oldest dropped first. */
+  maxVersionsPerPath?: number;
+}
+/**
+ * In-memory {@link SnapshotStore} with a simple LRU eviction policy.
+ * Per-path history is a short ring of full-file versions (oldest dropped
+ * first); per-session path tracking is LRU-bounded so cold paths age out
+ * automatically.
+ *
+ * Recording byte-identical content again refreshes recency and reuses the
+ * existing tag (read fusion); recording new content unshifts a fresh
+ * version onto the front of the path history.
+ */
+export class InMemorySnapshotStore extends SnapshotStore {
+  // Map preserves insertion order — we use that for LRU eviction.
+  // Each value is a list of snapshots, newest first.
+  readonly #versions = new Map<string, Snapshot[]>();
+  readonly #maxPaths: number;
+  readonly #maxVersionsPerPath: number;
+  constructor(options: InMemorySnapshotStoreOptions = {}) {
+    super();
+    this.#maxPaths = options.maxPaths ?? DEFAULT_MAX_PATHS;
+    this.#maxVersionsPerPath =
+      options.maxVersionsPerPath ?? DEFAULT_MAX_VERSIONS_PER_PATH;
+  }
+  head(path: string): Snapshot | null {
+    return this.#versions.get(path)?.[0] ?? null;
+  }
+  byHash(path: string, hash: string): Snapshot | null {
+    const history = this.#versions.get(path);
+    return history?.find((version) => version.hash === hash) ?? null;
+  }
+  record(path: string, fullText: string): string {
+    const hash = computeFileHash(fullText);
+    // Refresh LRU recency: delete-then-set moves path to end of insertion order.
+    const history = this.#versions.get(path);
+    if (history) {
+      this.#versions.delete(path);
+      const existing = history.find((version) => version.hash === hash);
+      if (existing) {
+        // Same content state observed again: refresh recency and promote to
+        // head (it is the current file content), then reuse the tag.
+        existing.recordedAt = Date.now();
+        const filtered = history.filter((version) => version !== existing);
+        this.#versions.set(path, [existing, ...filtered]);
+        this.#evictPathsIfNeeded();
+        return hash;
+      }
+      // New content: prepend to history, cap versions per path.
+      const snapshot: Snapshot = {
+        path,
+        text: fullText,
+        hash,
+        recordedAt: Date.now(),
+      };
+      this.#versions.set(
+        path,
+        [snapshot, ...history].slice(0, this.#maxVersionsPerPath),
+      );
+      this.#evictPathsIfNeeded();
+      return hash;
+    }
+    // First version for this path.
+    const snapshot: Snapshot = {
+      path,
+      text: fullText,
+      hash,
+      recordedAt: Date.now(),
+    };
+    this.#versions.set(path, [snapshot]);
+    this.#evictPathsIfNeeded();
+    return hash;
+  }
+  invalidate(path: string): void {
+    this.#versions.delete(path);
+  }
+  clear(): void {
+    this.#versions.clear();
+  }
+  /** Evict the least-recently-used path when the path cap is exceeded. */
+  #evictPathsIfNeeded(): void {
+    while (this.#versions.size > this.#maxPaths) {
+      // Map keys iterate in insertion order — first key is LRU.
+      const firstKey = this.#versions.keys().next().value;
+      if (firstKey !== undefined) {
+        this.#versions.delete(firstKey);
+      }
+    }
+  }
+}