npm - @semiont/jobs - Versions diffs - 0.5.5 → 0.5.7 - Mend

@semiont/jobs 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.js CHANGED Viewed

@@ -1,23 +1,30 @@
-import { promises, watch } from 'fs';
+import { promises } from 'fs';
 import * as path from 'path';
-import { validateAndCorrectOffsets, getLocaleEnglishName, didToAgent } from '@semiont/core';
+import { jobId, reconcileSelector, getLocaleEnglishName, didToAgent } from '@semiont/core';
 import { generateAnnotationId } from '@semiont/event-sourcing';
 // src/fs-job-queue.ts
+var REANNOUNCE_INTERVAL_MS = 3e4;
+var STALE_RUNNING_MS = 30 * 6e4;
+var PROGRESS_WRITE_MIN_INTERVAL_MS = 5e3;
+var RETENTION_HOURS = 24;
+var CLEANUP_INTERVAL_MS = 36e5;
 var FsJobQueue = class {
   constructor(project, logger, eventBus) {
     this.eventBus = eventBus;
     this.jobsDir = project.jobsDir;
     this.logger = logger;
   }
+  eventBus;
   jobsDir;
   logger;
-  // In-memory pending queue: avoids fs.readdir() on every poll (6×/sec with 6 workers)
-  pendingQueue = [];
-  watcher = null;
-  loadDebounceTimer = null;
+  reannounceTimer = null;
+  cleanupTimer = null;
+  /** Per-job timestamp of the last progress write, for throttling. */
+  lastProgressWrite = /* @__PURE__ */ new Map();
   /**
-   * Initialize job queue directories, load pending jobs, and start fs.watch
+   * Initialize job queue directories, announce any pending backlog,
+   * and start the re-announce interval. Idempotent.
    */
   async initialize() {
     const statuses = ["pending", "running", "complete", "failed", "cancelled"];
@@ -25,62 +32,83 @@ var FsJobQueue = class {
       const dir = path.join(this.jobsDir, status);
       await promises.mkdir(dir, { recursive: true });
     }
-    await this.loadPendingJobs();
-    const pendingDir = path.join(this.jobsDir, "pending");
-    try {
-      this.watcher = watch(pendingDir, () => {
-        this.debouncedLoadPendingJobs();
-      });
-    } catch (error) {
-      this.logger.warn("Failed to watch pending directory", {
-        error: error instanceof Error ? error.message : String(error)
-      });
+    if (this.eventBus && !this.reannounceTimer) {
+      await this.announcePendingJobs();
+      this.reannounceTimer = setInterval(() => {
+        this.announcePendingJobs().catch((error) => {
+          this.logger.warn("Pending-job re-announce failed", {
+            error: error instanceof Error ? error.message : String(error)
+          });
+        });
+        this.recoverStaleRunningJobs().catch((error) => {
+          this.logger.warn("Stale-running recovery failed", {
+            error: error instanceof Error ? error.message : String(error)
+          });
+        });
+      }, REANNOUNCE_INTERVAL_MS);
+      this.reannounceTimer.unref?.();
+    }
+    if (!this.cleanupTimer) {
+      this.cleanupTimer = setInterval(() => {
+        this.cleanupOldJobs(RETENTION_HOURS).catch((error) => {
+          this.logger.warn("Job retention cleanup failed", {
+            error: error instanceof Error ? error.message : String(error)
+          });
+        });
+      }, CLEANUP_INTERVAL_MS);
+      this.cleanupTimer.unref?.();
     }
     this.logger.info("Job queue initialized");
   }
   /**
-   * Clean up watcher
+   * Stop the re-announce and retention intervals
    */
   destroy() {
-    if (this.watcher) {
-      this.watcher.close();
-      this.watcher = null;
+    if (this.reannounceTimer) {
+      clearInterval(this.reannounceTimer);
+      this.reannounceTimer = null;
     }
-    if (this.loadDebounceTimer) {
-      clearTimeout(this.loadDebounceTimer);
-      this.loadDebounceTimer = null;
+    if (this.cleanupTimer) {
+      clearInterval(this.cleanupTimer);
+      this.cleanupTimer = null;
     }
   }
   /**
-   * Load pending jobs from disk into in-memory queue
+   * Emit `job:queued` for a pending job, if an EventBus is wired and
+   * the job carries a `resourceId` (every current job type does).
    */
-  async loadPendingJobs() {
-    const pendingDir = path.join(this.jobsDir, "pending");
-    try {
-      const files = await promises.readdir(pendingDir);
-      files.sort();
-      const jobs = [];
-      for (const file of files) {
-        try {
-          const content = await promises.readFile(path.join(pendingDir, file), "utf-8");
-          jobs.push(JSON.parse(content));
-        } catch {
-        }
-      }
-      this.pendingQueue = jobs;
-    } catch {
-      this.pendingQueue = [];
+  announce(job) {
+    if (this.eventBus && "params" in job && "resourceId" in job.params) {
+      this.eventBus.get("job:queued").next({
+        jobId: job.metadata.id,
+        jobType: job.metadata.type,
+        resourceId: job.params.resourceId,
+        userId: job.metadata.userId
+      });
     }
   }
   /**
-   * Debounced version of loadPendingJobs — fs.watch can fire rapidly
+   * Announce every job currently in `pending/`. Files that vanish or
+   * fail to parse mid-scan (claimed, cancelled, partially written)
+   * are skipped — they're either gone for a good reason or picked up
+   * on the next tick.
    */
-  debouncedLoadPendingJobs() {
-    if (this.loadDebounceTimer) return;
-    this.loadDebounceTimer = setTimeout(async () => {
-      this.loadDebounceTimer = null;
-      await this.loadPendingJobs();
-    }, 100);
+  async announcePendingJobs() {
+    const pendingDir = path.join(this.jobsDir, "pending");
+    let files;
+    try {
+      files = await promises.readdir(pendingDir);
+    } catch {
+      return;
+    }
+    files.sort();
+    for (const file of files) {
+      try {
+        const content = await promises.readFile(path.join(pendingDir, file), "utf-8");
+        this.announce(JSON.parse(content));
+      } catch {
+      }
+    }
   }
   /**
    * Create a new job
@@ -90,16 +118,7 @@ var FsJobQueue = class {
     await promises.writeFile(jobPath, JSON.stringify(job, null, 2), "utf-8");
     this.logger.info("Job created", { jobId: job.metadata.id, status: job.status });
     if (job.status === "pending") {
-      this.pendingQueue.push(job);
-      this.pendingQueue.sort((a, b) => a.metadata.id.localeCompare(b.metadata.id));
-    }
-    if (this.eventBus && "params" in job && "resourceId" in job.params) {
-      this.eventBus.get("job:queued").next({
-        jobId: job.metadata.id,
-        jobType: job.metadata.type,
-        resourceId: job.params.resourceId,
-        userId: job.metadata.userId
-      });
+      this.announce(job);
     }
   }
   /**
@@ -128,34 +147,92 @@ var FsJobQueue = class {
         await promises.unlink(oldPath);
       } catch (error) {
       }
-      if (oldStatus === "pending") {
-        const idx = this.pendingQueue.findIndex((j) => j.metadata.id === job.metadata.id);
-        if (idx !== -1) this.pendingQueue.splice(idx, 1);
-      }
-      if (job.status === "pending") {
-        this.pendingQueue.push(job);
-        this.pendingQueue.sort((a, b) => a.metadata.id.localeCompare(b.metadata.id));
-      }
     }
     const newPath = this.getJobPath(job.metadata.id, job.status);
     await promises.writeFile(newPath, JSON.stringify(job, null, 2), "utf-8");
     if (oldStatus && oldStatus !== job.status) {
       this.logger.info("Job moved", { jobId: job.metadata.id, oldStatus, newStatus: job.status });
+      if (job.status === "pending") {
+        this.announce(job);
+      }
     } else {
       this.logger.info("Job updated", { jobId: job.metadata.id, status: job.status });
     }
   }
   /**
-   * Poll for next pending job (FIFO) from in-memory queue.
-   * If a predicate is provided, returns the first matching job (skipping non-matching ones).
+   * Move a running job to `complete`. Returns false (and changes
+   * nothing) if the job is missing or not running — which also makes
+   * duplicate `job:complete` events harmless.
+   */
+  async completeJob(jobId, result) {
+    const job = await this.getJob(jobId);
+    if (!job || job.status !== "running") {
+      return false;
+    }
+    this.lastProgressWrite.delete(jobId);
+    const completed = {
+      status: "complete",
+      metadata: job.metadata,
+      params: job.params,
+      startedAt: job.startedAt,
+      completedAt: (/* @__PURE__ */ new Date()).toISOString(),
+      result
+    };
+    await this.updateJob(completed, "running");
+    return true;
+  }
+  /**
+   * Retry-or-fail a running job. While `retryCount < maxRetries` the
+   * job goes back to `pending` with the count bumped (and is
+   * re-announced); after that it lands in `failed` with the error.
+   * Returns null (and changes nothing) if the job isn't running.
+   */
+  async failJob(jobId, error) {
+    const job = await this.getJob(jobId);
+    if (!job || job.status !== "running") {
+      return null;
+    }
+    this.lastProgressWrite.delete(jobId);
+    if (job.metadata.retryCount < job.metadata.maxRetries) {
+      const retried = {
+        status: "pending",
+        metadata: { ...job.metadata, retryCount: job.metadata.retryCount + 1 },
+        params: job.params
+      };
+      await this.updateJob(retried, "running");
+      return "retried";
+    }
+    const failed = {
+      status: "failed",
+      metadata: job.metadata,
+      params: job.params,
+      startedAt: job.startedAt,
+      completedAt: (/* @__PURE__ */ new Date()).toISOString(),
+      error
+    };
+    await this.updateJob(failed, "running");
+    return "failed";
+  }
+  /**
+   * Write progress into a running job's file. Throttled per job, and
+   * a no-op for jobs that aren't running. Beyond surfacing live
+   * progress to `job:status-requested`, each write refreshes the
+   * file's mtime — the heartbeat `recoverStaleRunningJobs` watches.
    */
-  async pollNextPendingJob(predicate) {
-    if (!predicate) {
-      return this.pendingQueue.shift() ?? null;
+  async recordProgress(jobId, progress) {
+    const now = Date.now();
+    const lastWrite = this.lastProgressWrite.get(jobId) ?? 0;
+    if (now - lastWrite < PROGRESS_WRITE_MIN_INTERVAL_MS) {
+      return;
     }
-    const index = this.pendingQueue.findIndex(predicate);
-    if (index === -1) return null;
-    return this.pendingQueue.splice(index, 1)[0] ?? null;
+    this.lastProgressWrite.set(jobId, now);
+    const job = await this.getJob(jobId);
+    if (!job || job.status !== "running") {
+      this.lastProgressWrite.delete(jobId);
+      return;
+    }
+    const updated = { ...job, progress };
+    await promises.writeFile(this.getJobPath(jobId, "running"), JSON.stringify(updated, null, 2), "utf-8");
   }
   /**
    * List jobs with filters
@@ -206,6 +283,63 @@ var FsJobQueue = class {
     await this.updateJob(cancelledJob, oldStatus);
     return true;
   }
+  /**
+   * Cancel all pending jobs in a category — the granularity of the
+   * `job:cancel-requested` UI signal. Running jobs are left to finish:
+   * interrupting a worker mid-inference would need a worker-side kill
+   * channel that doesn't exist.
+   */
+  async cancelPendingJobs(category) {
+    const matches = category === "generation" ? (type) => type === "generation" : (type) => type.endsWith("-annotation");
+    const pending = await this.listJobs({ status: "pending", limit: Number.MAX_SAFE_INTEGER });
+    let cancelled = 0;
+    for (const job of pending) {
+      if (!matches(job.metadata.type)) continue;
+      if (await this.cancelJob(job.metadata.id)) {
+        cancelled++;
+      }
+    }
+    if (cancelled > 0) {
+      this.logger.info("Cancelled pending jobs", { category, cancelled });
+    }
+    return cancelled;
+  }
+  /**
+   * Recover running jobs orphaned by a dead worker: any `running/`
+   * file whose mtime is older than the stale window is fed through
+   * the same retry-or-fail path as `job:fail`. Progress writes
+   * refresh the mtime, so a live worker is never recovered out from
+   * under itself as long as it reports within the window.
+   */
+  async recoverStaleRunningJobs() {
+    const runningDir = path.join(this.jobsDir, "running");
+    let files;
+    try {
+      files = await promises.readdir(runningDir);
+    } catch {
+      return 0;
+    }
+    const now = Date.now();
+    let recovered = 0;
+    for (const file of files) {
+      if (!file.endsWith(".json")) continue;
+      try {
+        const stat = await promises.stat(path.join(runningDir, file));
+        if (now - stat.mtimeMs < STALE_RUNNING_MS) continue;
+        const staleId = jobId(file.slice(0, -".json".length));
+        const outcome = await this.failJob(
+          staleId,
+          `worker presumed dead \u2014 no progress within ${STALE_RUNNING_MS / 6e4} minutes`
+        );
+        if (outcome) {
+          this.logger.warn("Recovered stale running job", { jobId: staleId, outcome });
+          recovered++;
+        }
+      } catch {
+      }
+    }
+    return recovered;
+  }
   /**
    * Clean up old completed/failed jobs (older than retention period)
    */
@@ -269,156 +403,6 @@ var FsJobQueue = class {
   }
 };
-// src/job-worker.ts
-var JobWorker = class {
-  running = false;
-  currentJob = null;
-  pollIntervalMs;
-  errorBackoffMs;
-  jobQueue;
-  logger;
-  constructor(jobQueue, pollIntervalMs = 1e3, errorBackoffMs = 5e3, logger) {
-    this.jobQueue = jobQueue;
-    this.pollIntervalMs = pollIntervalMs;
-    this.errorBackoffMs = errorBackoffMs;
-    this.logger = logger;
-  }
-  /**
-   * Start the worker (polls queue in loop)
-   */
-  async start() {
-    this.running = true;
-    this.logger.info("Worker started", { worker: this.getWorkerName() });
-    while (this.running) {
-      try {
-        const job = await this.pollNextJob();
-        if (job) {
-          await this.processJob(job);
-        } else {
-          await this.sleep(this.pollIntervalMs);
-        }
-      } catch (error) {
-        this.logger.error("Error in worker main loop", { worker: this.getWorkerName(), error: error instanceof Error ? error.message : String(error) });
-        await this.sleep(this.errorBackoffMs);
-      }
-    }
-    this.logger.info("Worker stopped", { worker: this.getWorkerName() });
-  }
-  /**
-   * Stop the worker (graceful shutdown)
-   */
-  async stop() {
-    this.logger.info("Stopping worker", { worker: this.getWorkerName() });
-    this.running = false;
-    const timeout = 6e4;
-    const startTime = Date.now();
-    while (this.currentJob && Date.now() - startTime < timeout) {
-      await this.sleep(100);
-    }
-    if (this.currentJob) {
-      this.logger.warn("Forced worker shutdown", { worker: this.getWorkerName(), jobId: this.currentJob.metadata.id });
-    }
-  }
-  /**
-   * Poll for next job to process
-   */
-  async pollNextJob() {
-    return this.jobQueue.pollNextPendingJob((job) => this.canProcessJob(job));
-  }
-  /**
-   * Process a job (handles state transitions and error handling)
-   */
-  async processJob(job) {
-    this.currentJob = job;
-    try {
-      if (job.status !== "pending") {
-        this.logger.warn("Skipping non-pending job", { worker: this.getWorkerName(), jobId: job.metadata.id, status: job.status });
-        return;
-      }
-      const runningJob = {
-        status: "running",
-        metadata: job.metadata,
-        params: job.params,
-        startedAt: (/* @__PURE__ */ new Date()).toISOString(),
-        progress: {}
-        // Initialize with empty progress
-      };
-      await this.jobQueue.updateJob(runningJob, "pending");
-      this.logger.info("Processing job", { worker: this.getWorkerName(), jobId: job.metadata.id, jobType: job.metadata.type });
-      const result = await this.executeJob(runningJob);
-      await this.emitCompletionEvent(runningJob, result);
-      const completeJob = {
-        status: "complete",
-        metadata: runningJob.metadata,
-        params: runningJob.params,
-        startedAt: runningJob.startedAt,
-        completedAt: (/* @__PURE__ */ new Date()).toISOString(),
-        result: result ?? {}
-        // Use returned result or empty object
-      };
-      await this.jobQueue.updateJob(completeJob, "running");
-      this.logger.info("Job completed successfully", { worker: this.getWorkerName(), jobId: job.metadata.id });
-    } catch (error) {
-      await this.handleJobFailure(job, error);
-    } finally {
-      this.currentJob = null;
-    }
-  }
-  /**
-   * Handle job failure (retry or move to failed)
-   */
-  async handleJobFailure(job, error) {
-    const updatedMetadata = {
-      ...job.metadata,
-      retryCount: job.metadata.retryCount + 1
-    };
-    if (updatedMetadata.retryCount < updatedMetadata.maxRetries) {
-      this.logger.info("Job failed, will retry", { worker: this.getWorkerName(), jobId: job.metadata.id, retryCount: updatedMetadata.retryCount, maxRetries: updatedMetadata.maxRetries });
-      this.logger.debug("Job error details", { error: error instanceof Error ? error.message : String(error), stack: error instanceof Error ? error.stack : void 0 });
-      const retryJob = {
-        status: "pending",
-        metadata: updatedMetadata,
-        params: job.status === "pending" ? job.params : job.params
-      };
-      await this.jobQueue.updateJob(retryJob, job.status);
-    } else {
-      this.logger.error("Job failed permanently", { worker: this.getWorkerName(), jobId: job.metadata.id, retryCount: updatedMetadata.retryCount });
-      this.logger.error("Job error details", { error: error instanceof Error ? error.message : String(error), stack: error instanceof Error ? error.stack : void 0 });
-      const failedJob = {
-        status: "failed",
-        metadata: updatedMetadata,
-        params: job.status === "pending" ? job.params : job.params,
-        startedAt: job.status === "running" ? job.startedAt : void 0,
-        completedAt: (/* @__PURE__ */ new Date()).toISOString(),
-        error: error instanceof Error ? error.message : String(error)
-      };
-      await this.jobQueue.updateJob(failedJob, job.status);
-    }
-  }
-  /**
-   * Update job progress (best-effort, doesn't throw)
-   */
-  async updateJobProgress(job) {
-    try {
-      await this.jobQueue.updateJob(job);
-    } catch (error) {
-      this.logger.warn("Failed to update job progress", { worker: this.getWorkerName(), error: error instanceof Error ? error.message : String(error) });
-    }
-  }
-  /**
-   * Sleep utility
-   */
-  sleep(ms) {
-    return new Promise((resolve) => setTimeout(resolve, ms));
-  }
-  /**
-   * Emit completion event (optional hook for subclasses)
-   * Override this to emit job-specific completion events (e.g., job.completed)
-   */
-  async emitCompletionEvent(_job, _result) {
-  }
-};
 // src/types.ts
 function isPendingJob(job) {
   return job.status === "pending";
@@ -480,17 +464,15 @@ ${content.substring(0, 8e3)}
 Return a JSON array of comments. Each comment must have:
 - "exact": the exact text passage being commented on (quoted verbatim from source)
-- "start": character offset where the passage starts
-- "end": character offset where the passage ends
-- "prefix": up to 32 characters of text immediately before the passage
-- "suffix": up to 32 characters of text immediately after the passage
+- "prefix": up to 64 characters of text immediately before the passage
+- "suffix": up to 64 characters of text immediately after the passage
 - "comment": your comment following the instructions above
 Respond with a valid JSON array.
 Example:
 [
-  {"exact": "the quarterly review meeting", "start": 142, "end": 169, "prefix": "We need to schedule ", "suffix": " for next month.", "comment": "Who will lead this? Should we invite the external auditors?"}
+  {"exact": "the quarterly review meeting", "prefix": "We need to schedule ", "suffix": " for next month.", "comment": "Who will lead this? Should we invite the external auditors?"}
 ]`;
     } else {
       const toneGuidance = tone ? `
@@ -516,17 +498,15 @@ ${content.substring(0, 8e3)}
 Return a JSON array of comments. Each comment should have:
 - "exact": the exact text passage being commented on (quoted verbatim from source)
-- "start": character offset where the passage starts
-- "end": character offset where the passage ends
-- "prefix": up to 32 characters of text immediately before the passage
-- "suffix": up to 32 characters of text immediately after the passage
+- "prefix": up to 64 characters of text immediately before the passage
+- "suffix": up to 64 characters of text immediately after the passage
 - "comment": your explanatory comment (1-3 sentences, provide context/background/clarification)
 Respond with a valid JSON array.
 Example format:
 [
-  {"exact": "Ouranos", "start": 52, "end": 59, "prefix": "In the beginning, ", "suffix": " ruled the universe", "comment": "Ouranos (also spelled Uranus) is the primordial Greek deity personifying the sky. In Hesiod's Theogony, he is the son and husband of Gaia (Earth) and father of the Titans."}
+  {"exact": "Ouranos", "prefix": "In the beginning, ", "suffix": " ruled the universe", "comment": "Ouranos (also spelled Uranus) is the primordial Greek deity personifying the sky. In Hesiod's Theogony, he is the son and husband of Gaia (Earth) and father of the Titans."}
 ]`;
     }
     return prompt;
@@ -557,16 +537,14 @@ ${content.substring(0, 8e3)}
 Return a JSON array of highlights. Each highlight must have:
 - "exact": the exact text passage to highlight (quoted verbatim from source)
-- "start": character offset where the passage starts
-- "end": character offset where the passage ends
-- "prefix": up to 32 characters of text immediately before the passage
-- "suffix": up to 32 characters of text immediately after the passage
+- "prefix": up to 64 characters of text immediately before the passage
+- "suffix": up to 64 characters of text immediately after the passage
 Respond with a valid JSON array.
 Example:
 [
-  {"exact": "revenue grew 45% year-over-year", "start": 142, "end": 174, "prefix": "In Q3 2024, ", "suffix": ", exceeding all forecasts."}
+  {"exact": "revenue grew 45% year-over-year", "prefix": "In Q3 2024, ", "suffix": ", exceeding all forecasts."}
 ]`;
     } else {
       const densityGuidance = density ? `
@@ -590,16 +568,14 @@ ${content.substring(0, 8e3)}
 Return a JSON array of highlights. Each highlight should have:
 - "exact": the exact text passage to highlight (quoted verbatim from source)
-- "start": character offset where the passage starts
-- "end": character offset where the passage ends
-- "prefix": up to 32 characters of text immediately before the passage
-- "suffix": up to 32 characters of text immediately after the passage
+- "prefix": up to 64 characters of text immediately before the passage
+- "suffix": up to 64 characters of text immediately after the passage
 Respond with a valid JSON array.
 Example format:
 [
-  {"exact": "we will discontinue support for legacy systems by March 2025", "start": 52, "end": 113, "prefix": "After careful consideration, ", "suffix": ". This decision affects"}
+  {"exact": "we will discontinue support for legacy systems by March 2025", "prefix": "After careful consideration, ", "suffix": ". This decision affects"}
 ]`;
     }
     return prompt;
@@ -633,17 +609,15 @@ ${content.substring(0, 8e3)}
 Return a JSON array of assessments. Each assessment must have:
 - "exact": the exact text passage being assessed (quoted verbatim from source)
-- "start": character offset where the passage starts
-- "end": character offset where the passage ends
-- "prefix": up to 32 characters of text immediately before the passage
-- "suffix": up to 32 characters of text immediately after the passage
+- "prefix": up to 64 characters of text immediately before the passage
+- "suffix": up to 64 characters of text immediately after the passage
 - "assessment": your assessment following the instructions above
 Respond with a valid JSON array.
 Example:
 [
-  {"exact": "the quarterly revenue target", "start": 142, "end": 169, "prefix": "We established ", "suffix": " for Q4 2024.", "assessment": "This target seems ambitious given market conditions. Consider revising based on recent trends."}
+  {"exact": "the quarterly revenue target", "prefix": "We established ", "suffix": " for Q4 2024.", "assessment": "This target seems ambitious given market conditions. Consider revising based on recent trends."}
 ]`;
     } else {
       const toneGuidance = tone ? `
@@ -669,17 +643,15 @@ ${content.substring(0, 8e3)}
 Return a JSON array of assessments. Each assessment should have:
 - "exact": the exact text passage being assessed (quoted verbatim from source)
-- "start": character offset where the passage starts
-- "end": character offset where the passage ends
-- "prefix": up to 32 characters of text immediately before the passage
-- "suffix": up to 32 characters of text immediately after the passage
+- "prefix": up to 64 characters of text immediately before the passage
+- "suffix": up to 64 characters of text immediately after the passage
 - "assessment": your analytical assessment (1-3 sentences, evaluate validity/strength/implications)
 Respond with a valid JSON array.
 Example format:
 [
-  {"exact": "AI will replace most jobs by 2030", "start": 52, "end": 89, "prefix": "Many experts predict that ", "suffix": ", fundamentally reshaping", "assessment": "This claim lacks nuance and supporting evidence. Employment patterns historically show job transformation rather than wholesale replacement. The timeline appears speculative without specific sector analysis."}
+  {"exact": "AI will replace most jobs by 2030", "prefix": "Many experts predict that ", "suffix": ", fundamentally reshaping", "assessment": "This claim lacks nuance and supporting evidence. Employment patterns historically show job transformation rather than wholesale replacement. The timeline appears speculative without specific sector analysis."}
 ]`;
     }
     return prompt;
@@ -725,17 +697,15 @@ ${content}
 Return a JSON array of tags. Each tag should have:
 - "exact": the exact text passage (quoted verbatim from source)
-- "start": character offset where the passage starts
-- "end": character offset where the passage ends
-- "prefix": up to 32 characters of text immediately before the passage
-- "suffix": up to 32 characters of text immediately after the passage
+- "prefix": up to 64 characters of text immediately before the passage
+- "suffix": up to 64 characters of text immediately after the passage
 Respond with a valid JSON array.
 Example format:
 [
-  {"exact": "What duty did the defendant owe?", "start": 142, "end": 175, "prefix": "The central question is: ", "suffix": " This question must be"},
-  {"exact": "In tort law, a duty of care is established when...", "start": 412, "end": 520, "prefix": "Legal framework:\\n", "suffix": "\\n\\nApplying this standard"}
+  {"exact": "What duty did the defendant owe?", "prefix": "The central question is: ", "suffix": " This question must be"},
+  {"exact": "In tort law, a duty of care is established when...", "prefix": "Legal framework:\\n", "suffix": "\\n\\nApplying this standard"}
 ]`;
     return prompt;
   }
@@ -803,23 +773,29 @@ var MotivationParsers = class {
     try {
       const parsed = extractObjectsFromArray(response);
       const valid = parsed.filter(
-        (c) => !!c && typeof c === "object" && typeof c.exact === "string" && typeof c.start === "number" && typeof c.end === "number" && typeof c.comment === "string" && c.comment.trim().length > 0
+        (c) => !!c && typeof c === "object" && typeof c.exact === "string" && typeof c.comment === "string" && c.comment.trim().length > 0
       );
       console.log(`[MotivationParsers] Parsed ${valid.length} valid comments from ${parsed.length} total`);
       const validatedComments = [];
       for (const comment of valid) {
-        try {
-          const validated = validateAndCorrectOffsets(content, comment.start, comment.end, comment.exact);
-          validatedComments.push({
-            ...comment,
-            start: validated.start,
-            end: validated.end,
-            prefix: validated.prefix,
-            suffix: validated.suffix
-          });
-        } catch (error) {
-          console.warn(`[MotivationParsers] Skipping invalid comment "${comment.exact}":`, error);
+        const reconciled = reconcileSelector(content, {
+          exact: comment.exact,
+          ...typeof comment.prefix === "string" ? { prefix: comment.prefix } : {},
+          ...typeof comment.suffix === "string" ? { suffix: comment.suffix } : {}
+        });
+        if (!reconciled) {
+          console.warn(`[MotivationParsers] Dropped hallucinated comment "${comment.exact}"`);
+          continue;
         }
+        logAnchorMethod("comment", comment.exact, reconciled.anchorMethod);
+        validatedComments.push({
+          comment: comment.comment,
+          exact: reconciled.exact,
+          start: reconciled.start,
+          end: reconciled.end,
+          ...reconciled.prefix !== void 0 ? { prefix: reconciled.prefix } : {},
+          ...reconciled.suffix !== void 0 ? { suffix: reconciled.suffix } : {}
+        });
       }
       return validatedComments;
     } catch (error) {
@@ -838,22 +814,27 @@ var MotivationParsers = class {
     try {
       const parsed = extractObjectsFromArray(response);
       const highlights = parsed.filter(
-        (h) => !!h && typeof h === "object" && typeof h.exact === "string" && typeof h.start === "number" && typeof h.end === "number"
+        (h) => !!h && typeof h === "object" && typeof h.exact === "string"
       );
       const validatedHighlights = [];
       for (const highlight of highlights) {
-        try {
-          const validated = validateAndCorrectOffsets(content, highlight.start, highlight.end, highlight.exact);
-          validatedHighlights.push({
-            ...highlight,
-            start: validated.start,
-            end: validated.end,
-            prefix: validated.prefix,
-            suffix: validated.suffix
-          });
-        } catch (error) {
-          console.warn(`[MotivationParsers] Skipping invalid highlight "${highlight.exact}":`, error);
+        const reconciled = reconcileSelector(content, {
+          exact: highlight.exact,
+          ...typeof highlight.prefix === "string" ? { prefix: highlight.prefix } : {},
+          ...typeof highlight.suffix === "string" ? { suffix: highlight.suffix } : {}
+        });
+        if (!reconciled) {
+          console.warn(`[MotivationParsers] Dropped hallucinated highlight "${highlight.exact}"`);
+          continue;
         }
+        logAnchorMethod("highlight", highlight.exact, reconciled.anchorMethod);
+        validatedHighlights.push({
+          exact: reconciled.exact,
+          start: reconciled.start,
+          end: reconciled.end,
+          ...reconciled.prefix !== void 0 ? { prefix: reconciled.prefix } : {},
+          ...reconciled.suffix !== void 0 ? { suffix: reconciled.suffix } : {}
+        });
       }
       return validatedHighlights;
     } catch (error) {
@@ -873,22 +854,28 @@ var MotivationParsers = class {
     try {
       const parsed = extractObjectsFromArray(response);
       const assessments = parsed.filter(
-        (a) => !!a && typeof a === "object" && typeof a.exact === "string" && typeof a.start === "number" && typeof a.end === "number" && typeof a.assessment === "string"
+        (a) => !!a && typeof a === "object" && typeof a.exact === "string" && typeof a.assessment === "string"
       );
       const validatedAssessments = [];
       for (const assessment of assessments) {
-        try {
-          const validated = validateAndCorrectOffsets(content, assessment.start, assessment.end, assessment.exact);
-          validatedAssessments.push({
-            ...assessment,
-            start: validated.start,
-            end: validated.end,
-            prefix: validated.prefix,
-            suffix: validated.suffix
-          });
-        } catch (error) {
-          console.warn(`[MotivationParsers] Skipping invalid assessment "${assessment.exact}":`, error);
+        const reconciled = reconcileSelector(content, {
+          exact: assessment.exact,
+          ...typeof assessment.prefix === "string" ? { prefix: assessment.prefix } : {},
+          ...typeof assessment.suffix === "string" ? { suffix: assessment.suffix } : {}
+        });
+        if (!reconciled) {
+          console.warn(`[MotivationParsers] Dropped hallucinated assessment "${assessment.exact}"`);
+          continue;
         }
+        logAnchorMethod("assessment", assessment.exact, reconciled.anchorMethod);
+        validatedAssessments.push({
+          assessment: assessment.assessment,
+          exact: reconciled.exact,
+          start: reconciled.start,
+          end: reconciled.end,
+          ...reconciled.prefix !== void 0 ? { prefix: reconciled.prefix } : {},
+          ...reconciled.suffix !== void 0 ? { suffix: reconciled.suffix } : {}
+        });
       }
       return validatedAssessments;
     } catch (error) {
@@ -898,17 +885,15 @@ var MotivationParsers = class {
     }
   }
   /**
-   * Parse and validate AI response for tag detection
-   * Note: Does NOT validate offsets - caller must do that with content
-   *
-   * @param response - Raw AI response string (may include markdown code fences)
-   * @returns Array of tag matches (offsets not yet validated)
+   * Parse the LLM's tag response into raw, pre-reconciliation tag inputs.
+   * Reconciliation happens in `validateTagOffsets`, which adds `start`/`end`
+   * by anchoring `exact` against the source content.
    */
   static parseTags(response) {
     try {
       const parsed = extractObjectsFromArray(response);
       const valid = parsed.filter(
-        (t) => !!t && typeof t === "object" && typeof t.exact === "string" && typeof t.start === "number" && typeof t.end === "number" && t.exact.trim().length > 0
+        (t) => !!t && typeof t === "object" && typeof t.exact === "string" && t.exact.trim().length > 0
       );
       console.log(`[MotivationParsers] Parsed ${valid.length} valid tags from ${parsed.length} total`);
       return valid;
@@ -918,52 +903,41 @@ var MotivationParsers = class {
     }
   }
   /**
-   * Validate tag offsets against content and add category
-   * Helper for tag detection after initial parsing
-   *
-   * @param tags - Parsed tags without validated offsets
-   * @param content - Original content to validate against
-   * @param category - Category to assign to validated tags
-   * @returns Array of validated tag matches
+   * Anchor raw tag inputs against source content and add category.
    */
   static validateTagOffsets(tags, content, category) {
     const validatedTags = [];
     for (const tag of tags) {
-      try {
-        const validated = validateAndCorrectOffsets(content, tag.start, tag.end, tag.exact);
-        validatedTags.push({
-          ...tag,
-          category,
-          start: validated.start,
-          end: validated.end,
-          prefix: validated.prefix,
-          suffix: validated.suffix
-        });
-      } catch (error) {
-        console.warn(`[MotivationParsers] Skipping invalid tag for category "${category}":`, error);
+      const reconciled = reconcileSelector(content, {
+        exact: tag.exact,
+        ...typeof tag.prefix === "string" ? { prefix: tag.prefix } : {},
+        ...typeof tag.suffix === "string" ? { suffix: tag.suffix } : {}
+      });
+      if (!reconciled) {
+        console.warn(`[MotivationParsers] Dropped hallucinated tag "${tag.exact}" for category "${category}"`);
+        continue;
       }
+      logAnchorMethod("tag", tag.exact, reconciled.anchorMethod);
+      validatedTags.push({
+        category,
+        exact: reconciled.exact,
+        start: reconciled.start,
+        end: reconciled.end,
+        ...reconciled.prefix !== void 0 ? { prefix: reconciled.prefix } : {},
+        ...reconciled.suffix !== void 0 ? { suffix: reconciled.suffix } : {}
+      });
     }
     return validatedTags;
   }
 };
+function logAnchorMethod(motivation, exact, anchorMethod) {
+  if (anchorMethod === "first-of-many" || anchorMethod === "fuzzy-match") {
+    console.warn(`[MotivationParsers] ${motivation} anchored via ${anchorMethod}: "${exact}"`);
+  }
+}
 // src/workers/annotation-detection.ts
 var AnnotationDetection = class {
-  /**
-   * Fetch content from a ContentFetcher and read the stream to a string.
-   * Shared helper for all workers.
-   */
-  static async fetchContent(contentFetcher, resourceId) {
-    const stream = await contentFetcher(resourceId);
-    if (!stream) {
-      throw new Error(`Could not load content for resource ${resourceId}`);
-    }
-    const chunks = [];
-    for await (const chunk of stream) {
-      chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
-    }
-    return Buffer.concat(chunks).toString("utf-8");
-  }
   /**
    * Detect comments in content.
    *
@@ -1073,17 +1047,15 @@ ${exact}
 """
 Respond with a JSON array of entities found. Each entity should have:
-- exact: the exact text span from the input
+- exact: the exact text span from the input (quoted verbatim \u2014 character-for-character)
 - entityType: one of the provided entity types
-- startOffset: character position where the entity starts (0-indexed)
-- endOffset: character position where the entity ends
-- prefix: up to 32 characters of text immediately before the entity (helps identify correct occurrence)
-- suffix: up to 32 characters of text immediately after the entity (helps identify correct occurrence)
+- prefix: up to 64 characters of text immediately before the entity (used to disambiguate when the same text appears more than once)
+- suffix: up to 64 characters of text immediately after the entity (same purpose)
 If no entities are found, respond with an empty array [].
 Example output:
-[{"exact":"Alice","entityType":"Person","startOffset":0,"endOffset":5,"prefix":"","suffix":" went to"},{"exact":"Paris","entityType":"Location","startOffset":20,"endOffset":25,"prefix":"went to ","suffix":" yesterday"}]`;
+[{"exact":"Alice","entityType":"Person","prefix":"","suffix":" went to"},{"exact":"Paris","entityType":"Location","prefix":"went to ","suffix":" yesterday"}]`;
   logger.debug("Sending entity extraction request", { entityTypes: entityTypesDescription });
   const response = await client.generateTextWithMetadata(
     prompt,
@@ -1112,151 +1084,18 @@ Example output:
       logger.error(errorMsg);
       throw new Error(errorMsg);
     }
-    return entities.map((entity, idx) => {
-      let start = entity.startOffset;
-      let end = entity.endOffset;
-      logger.debug("Processing entity", {
-        index: idx + 1,
-        total: entities.length,
-        type: entity.entityType,
-        text: entity.exact,
-        offsetsFromAI: `[${start}:${end}]`
-      });
-      const extractedText = exact.substring(start, end);
-      let anchorMethod;
-      if (extractedText === entity.exact) {
-        anchorMethod = "llm-exact";
-        logger.debug("Entity anchored", {
-          text: entity.exact,
-          entityType: entity.entityType,
-          anchorMethod
-        });
-      } else {
-        logger.debug("LLM offsets mismatch \u2014 attempting re-anchor", {
-          expected: entity.exact,
-          llmOffsets: `[${start}:${end}]`,
-          foundAtLlmOffsets: extractedText
-        });
-        let occurrenceCount = 0;
-        let firstOccurrence = -1;
-        let searchPos = 0;
-        while ((searchPos = exact.indexOf(entity.exact, searchPos)) !== -1) {
-          if (firstOccurrence === -1) firstOccurrence = searchPos;
-          occurrenceCount++;
-          searchPos++;
-        }
-        if (occurrenceCount === 0) {
-          anchorMethod = "dropped";
-          logger.error("Entity text not found in resource \u2014 dropping", {
-            text: entity.exact,
-            entityType: entity.entityType,
-            llmOffsets: `[${start}:${end}]`,
-            anchorMethod,
-            resourceStart: exact.substring(0, 200)
-          });
-          return null;
-        }
-        let recoveredOffset = -1;
-        if (entity.prefix || entity.suffix) {
-          let p = 0;
-          while ((p = exact.indexOf(entity.exact, p)) !== -1) {
-            const candidatePrefix = exact.substring(Math.max(0, p - 32), p);
-            const candidateSuffix = exact.substring(
-              p + entity.exact.length,
-              Math.min(exact.length, p + entity.exact.length + 32)
-            );
-            const prefixMatch = !entity.prefix || candidatePrefix.endsWith(entity.prefix);
-            const suffixMatch = !entity.suffix || candidateSuffix.startsWith(entity.suffix);
-            if (prefixMatch && suffixMatch) {
-              recoveredOffset = p;
-              break;
-            }
-            p++;
-          }
-        }
-        if (recoveredOffset !== -1) {
-          anchorMethod = "context-recovered";
-          start = recoveredOffset;
-          end = recoveredOffset + entity.exact.length;
-          logger.debug("Entity anchored", {
-            text: entity.exact,
-            entityType: entity.entityType,
-            anchorMethod,
-            offsetDiff: recoveredOffset - entity.startOffset
-          });
-        } else if (occurrenceCount === 1) {
-          anchorMethod = "unique-match";
-          start = firstOccurrence;
-          end = firstOccurrence + entity.exact.length;
-          logger.debug("Entity anchored", {
-            text: entity.exact,
-            entityType: entity.entityType,
-            anchorMethod,
-            offsetDiff: firstOccurrence - entity.startOffset
-          });
-        } else {
-          anchorMethod = "first-of-many";
-          start = firstOccurrence;
-          end = firstOccurrence + entity.exact.length;
-          logger.warn("Entity anchored at first of multiple occurrences \u2014 may be wrong", {
-            text: entity.exact,
-            entityType: entity.entityType,
-            anchorMethod,
-            occurrenceCount,
-            chosenOffset: firstOccurrence,
-            llmOffsets: `[${entity.startOffset}:${entity.endOffset}]`,
-            hasPrefix: !!entity.prefix,
-            hasSuffix: !!entity.suffix
-          });
-        }
+    return entities.filter((e) => {
+      const ok = e && typeof e === "object" && typeof e.exact === "string" && typeof e.entityType === "string";
+      if (!ok) {
+        logger.debug("Dropped malformed LLM entity", { entity: e });
       }
-      return {
-        exact: entity.exact,
-        entityType: entity.entityType,
-        start,
-        end,
-        prefix: entity.prefix,
-        suffix: entity.suffix
-      };
-    }).filter((entity) => {
-      if (entity === null) {
-        logger.debug("Filtered entity: null");
-        return false;
-      }
-      if (entity.start === void 0 || entity.end === void 0) {
-        logger.warn("Filtered entity: missing offsets", { text: entity.exact });
-        return false;
-      }
-      if (entity.start < 0) {
-        logger.warn("Filtered entity: negative start", {
-          text: entity.exact,
-          start: entity.start
-        });
-        return false;
-      }
-      if (entity.end > exact.length) {
-        logger.warn("Filtered entity: end exceeds text length", {
-          text: entity.exact,
-          end: entity.end,
-          textLength: exact.length
-        });
-        return false;
-      }
-      const extractedText = exact.substring(entity.start, entity.end);
-      if (extractedText !== entity.exact) {
-        logger.warn("Filtered entity: offset mismatch", {
-          expected: entity.exact,
-          got: extractedText,
-          offsets: `[${entity.start}:${entity.end}]`
-        });
-        return false;
-      }
-      logger.debug("Accepted entity", {
-        text: entity.exact,
-        offsets: `[${entity.start}:${entity.end}]`
-      });
-      return true;
-    });
+      return ok;
+    }).map((entity) => ({
+      exact: entity.exact,
+      entityType: entity.entityType,
+      ...typeof entity.prefix === "string" ? { prefix: entity.prefix } : {},
+      ...typeof entity.suffix === "string" ? { suffix: entity.suffix } : {}
+    }));
   } catch (error) {
     logger.error("Failed to parse entity extraction response", {
       error: error instanceof Error ? error.message : String(error)
@@ -1392,7 +1231,59 @@ Requirements:
   });
   return result;
 }
-function buildTextAnnotation(resourceId, userId, generator, motivation, match, body) {
+function toMatch(r) {
+  return {
+    exact: r.exact,
+    start: r.start,
+    end: r.end,
+    ...r.prefix !== void 0 ? { prefix: r.prefix } : {},
+    ...r.suffix !== void 0 ? { suffix: r.suffix } : {}
+  };
+}
+function annotationDedupeKey(ann) {
+  const target = ann.target;
+  const selectors = Array.isArray(target?.selector) ? target.selector : [];
+  const pos = selectors.find((s) => s.type === "TextPositionSelector");
+  return [
+    ann.motivation,
+    pos?.start ?? "?",
+    pos?.end ?? "?",
+    JSON.stringify(ann.body ?? null)
+  ].join("|");
+}
+function dedupeAnnotations(annotations) {
+  const seen = /* @__PURE__ */ new Set();
+  const out = [];
+  for (const ann of annotations) {
+    const key = annotationDedupeKey(ann);
+    if (seen.has(key)) continue;
+    seen.add(key);
+    out.push(ann);
+  }
+  return out;
+}
+function buildTextAnnotation(content, resourceId, userId, generator, motivation, match, body) {
+  if (content.substring(match.start, match.end) !== match.exact) {
+    throw new Error(
+      `buildTextAnnotation invariant: content.substring(${match.start}, ${match.end}) !== exact for resource ${resourceId}, motivation ${motivation}`
+    );
+  }
+  if (match.prefix !== void 0) {
+    const actualPrefix = content.substring(Math.max(0, match.start - match.prefix.length), match.start);
+    if (actualPrefix !== match.prefix) {
+      throw new Error(
+        `buildTextAnnotation invariant: content prefix-slice !== prefix for resource ${resourceId}, motivation ${motivation}`
+      );
+    }
+  }
+  if (match.suffix !== void 0) {
+    const actualSuffix = content.substring(match.end, Math.min(content.length, match.end + match.suffix.length));
+    if (actualSuffix !== match.suffix) {
+      throw new Error(
+        `buildTextAnnotation invariant: content suffix-slice !== suffix for resource ${resourceId}, motivation ${motivation}`
+      );
+    }
+  }
   const creator = didToAgent(userId);
   const wasAttributedTo = creator["@id"] === generator["@id"] ? [generator] : [creator, generator];
   return {
@@ -1431,9 +1322,9 @@ async function processHighlightJob(content, inferenceClient, params, userId, gen
     params.sourceLanguage
   );
   onProgress(60, `Creating ${highlights.length} annotations...`, "creating");
-  const annotations = highlights.map(
-    (h) => buildTextAnnotation(params.resourceId, userId, generator, "highlighting", h)
-  );
+  const annotations = dedupeAnnotations(highlights.map(
+    (h) => buildTextAnnotation(content, params.resourceId, userId, generator, "highlighting", h)
+  ));
   onProgress(100, `Complete! Created ${annotations.length} highlights`, "creating");
   return {
     annotations,
@@ -1454,16 +1345,16 @@ async function processCommentJob(content, inferenceClient, params, userId, gener
   );
   onProgress(60, `Creating ${comments.length} annotations...`, "creating");
   const bodyLanguage = params.language ?? "en";
-  const annotations = comments.map(
+  const annotations = dedupeAnnotations(comments.map(
     (c) => (
       // Match the pre-#651 CommentAnnotationWorker: include format and
       // language on the body TextualBody. Optional in the schema, but
       // consumers that do language-aware rendering rely on them.
-      buildTextAnnotation(params.resourceId, userId, generator, "commenting", c, [
+      buildTextAnnotation(content, params.resourceId, userId, generator, "commenting", c, [
         { type: "TextualBody", value: c.comment, purpose: "commenting", format: "text/plain", language: bodyLanguage }
       ])
     )
-  );
+  ));
   onProgress(100, `Complete! Created ${annotations.length} comments`, "creating");
   return {
     annotations,
@@ -1484,7 +1375,7 @@ async function processAssessmentJob(content, inferenceClient, params, userId, ge
   );
   onProgress(60, `Creating ${assessments.length} annotations...`, "creating");
   const bodyLanguage = params.language ?? "en";
-  const annotations = assessments.map(
+  const annotations = dedupeAnnotations(assessments.map(
     (a) => (
       // Single-object body with purpose aligned to motivation, matching the
       // pre-#651 AssessmentAnnotationWorker's shape and the majority of
@@ -1492,7 +1383,7 @@ async function processAssessmentJob(content, inferenceClient, params, userId, ge
       // purpose='describing' — that loses the "this is an assessment, not
       // a description" signal and breaks existing readers that access
       // `body.value` directly on the object.
-      buildTextAnnotation(params.resourceId, userId, generator, "assessing", a, {
+      buildTextAnnotation(content, params.resourceId, userId, generator, "assessing", a, {
         type: "TextualBody",
         value: a.assessment,
         purpose: "assessing",
@@ -1500,7 +1391,7 @@ async function processAssessmentJob(content, inferenceClient, params, userId, ge
         language: bodyLanguage
       })
     )
-  );
+  ));
   onProgress(100, `Complete! Created ${annotations.length} assessments`, "creating");
   return {
     annotations,
@@ -1544,27 +1435,44 @@ async function processReferenceJob(content, inferenceClient, params, userId, gen
       { type: "TextualBody", value: entityTypeName, purpose: "tagging", format: "text/plain", language: bodyLanguage }
     ];
     for (const entity of extractedEntities) {
-      try {
-        const validated = validateAndCorrectOffsets(content, entity.start, entity.end, entity.exact);
-        const ann = buildTextAnnotation(
-          params.resourceId,
-          userId,
-          generator,
-          "linking",
-          validated,
-          unresolvedBody
-        );
-        allAnnotations.push(ann);
-        totalEmitted++;
-      } catch {
+      const reconciled = reconcileSelector(content, {
+        exact: entity.exact,
+        ...entity.prefix !== void 0 ? { prefix: entity.prefix } : {},
+        ...entity.suffix !== void 0 ? { suffix: entity.suffix } : {}
+      });
+      if (!reconciled) {
+        logger.error("Entity dropped \u2014 text not found in source", {
+          text: entity.exact,
+          entityType: entity.entityType
+        });
         errors++;
+        continue;
       }
+      if (reconciled.anchorMethod === "first-of-many" || reconciled.anchorMethod === "fuzzy-match") {
+        logger.warn("Entity anchored via degraded method", {
+          text: entity.exact,
+          entityType: entity.entityType,
+          anchorMethod: reconciled.anchorMethod
+        });
+      }
+      const ann = buildTextAnnotation(
+        content,
+        params.resourceId,
+        userId,
+        generator,
+        "linking",
+        toMatch(reconciled),
+        unresolvedBody
+      );
+      allAnnotations.push(ann);
+      totalEmitted++;
     }
   }
-  onProgress(100, `Complete! Created ${totalEmitted} references`, "creating");
+  const annotations = dedupeAnnotations(allAnnotations);
+  onProgress(100, `Complete! Created ${annotations.length} references`, "creating");
   return {
-    annotations: allAnnotations,
-    result: { totalFound, totalEmitted, errors }
+    annotations,
+    result: { totalFound, totalEmitted: annotations.length, errors }
   };
 }
 async function processTagJob(content, inferenceClient, params, userId, generator, onProgress) {
@@ -1584,15 +1492,19 @@ async function processTagJob(content, inferenceClient, params, userId, generator
   const tags = allTags;
   onProgress(60, `Creating ${tags.length} tag annotations...`, "creating");
   const bodyLanguage = params.language ?? "en";
-  const byCategory = {};
-  const annotations = tags.map((t) => {
+  const annotations = dedupeAnnotations(tags.map((t) => {
     const category = t.category ?? "unknown";
-    byCategory[category] = (byCategory[category] ?? 0) + 1;
-    return buildTextAnnotation(params.resourceId, userId, generator, "tagging", t, [
+    return buildTextAnnotation(content, params.resourceId, userId, generator, "tagging", t, [
       { type: "TextualBody", value: category, purpose: "tagging", format: "text/plain", language: bodyLanguage },
       { type: "TextualBody", value: params.schema.id, purpose: "classifying", format: "text/plain" }
     ]);
-  });
+  }));
+  const byCategory = {};
+  for (const ann of annotations) {
+    const body = ann.body;
+    const category = Array.isArray(body) && typeof body[0]?.value === "string" ? body[0].value : "unknown";
+    byCategory[category] = (byCategory[category] ?? 0) + 1;
+  }
   onProgress(100, `Complete! Created ${annotations.length} tags`, "creating");
   return {
     annotations,
@@ -1628,6 +1540,6 @@ async function processGenerationJob(inferenceClient, params, onProgress, logger)
   };
 }
-export { AnnotationDetection, FsJobQueue, JobWorker, generateResourceFromTopic, isCancelledJob, isCompleteJob, isFailedJob, isPendingJob, isRunningJob, processAssessmentJob, processCommentJob, processGenerationJob, processHighlightJob, processReferenceJob, processTagJob };
+export { AnnotationDetection, FsJobQueue, generateResourceFromTopic, isCancelledJob, isCompleteJob, isFailedJob, isPendingJob, isRunningJob, processAssessmentJob, processCommentJob, processGenerationJob, processHighlightJob, processReferenceJob, processTagJob };
 //# sourceMappingURL=index.js.map
 //# sourceMappingURL=index.js.map