npm - @prmichaelsen/remember-mcp - Versions diffs - 3.14.20 → 3.14.21 - Mend

@prmichaelsen/remember-mcp 3.14.20 → 3.14.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CHANGELOG.md +8 -0
package/agent/progress.yaml +16 -3
package/agent/tasks/milestone-15-moderation-space-config/task-202-wire-llm-moderation-client.md +62 -0
package/dist/server-factory.js +107 -1
package/dist/server.js +107 -1
package/package.json +1 -1
package/src/core-services.ts +6 -2

package/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [3.14.21] - 2026-03-06
+### Added
+- Wire LLM auto-moderation into SpaceService publish/revise flow (task-202)
+- Create singleton `ModerationClient` from `ANTHROPIC_API_KEY` env var when present
+- Content published to spaces is now screened by Claude Haiku before storage
 ## [3.14.14] - 2026-03-04
 ### Changed

package/agent/progress.yaml CHANGED Viewed

@@ -246,10 +246,10 @@ milestones:
     status: completed
     progress: 100%
     started: 2026-02-27
-    completed: 2026-02-27
+    completed: 2026-03-06
     estimated_weeks: 2
-    tasks_completed: 6
-    tasks_total: 6
+    tasks_completed: 7
+    tasks_total: 7
     notes: |
       ✅ Content moderation lifecycle and per-space/group behavioral config.
       ✅ Design: agent/design/local.moderation-and-space-config.md (Implemented)
@@ -702,6 +702,19 @@ tasks:
         ✅ CHANGELOG, design doc status, version bump 3.9.0→3.10.0
         📋 Verify all tests pass
+    - id: task-202
+      name: Wire LLM Moderation Client to SpaceService
+      status: completed
+      completed_date: 2026-03-06
+      file: agent/tasks/milestone-15-moderation-space-config/task-202-wire-llm-moderation-client.md
+      estimated_hours: 0.5
+      actual_hours: 0.25
+      dependencies: [task-179]
+      notes: |
+        📋 Connect createModerationClient (remember-core) to SpaceService in core-services.ts
+        📋 Auto-create when ANTHROPIC_API_KEY env var is present
+        📋 ~5 lines of code change
   milestone_7:
     - id: task-180
       name: Access Result & Permission Types

package/agent/tasks/milestone-15-moderation-space-config/task-202-wire-llm-moderation-client.md ADDED Viewed

@@ -0,0 +1,62 @@
+# Task 202: Wire LLM Moderation Client to SpaceService
+**Milestone**: M15 - Moderation & Space Config (follow-up)
+**Status**: completed
+**Estimated Hours**: 0.5
+**Priority**: P1
+**Dependencies**: M15 (completed), remember-core moderation.service.ts
+---
+## Objective
+Connect the `createModerationClient` from `@prmichaelsen/remember-core` to the `SpaceService` in `remember-mcp`, so that content published to spaces is automatically screened by Claude Haiku before being stored.
+Currently, all moderation infrastructure exists but is disconnected:
+- `remember-core` exports `createModerationClient()` (calls Anthropic Messages API with Haiku)
+- `SpaceService` accepts `{ moderationClient }` in its constructor options
+- `SpaceService.checkModeration()` calls `moderationClient.moderate()` on publish/revise
+- But `createCoreServices()` in `remember-mcp` never creates or passes a moderation client
+## Context
+- `remember-core/src/services/moderation.service.ts` — factory + types
+- `remember-core/src/services/space.service.ts:237` — constructor accepts `options?.moderationClient`
+- `remember-mcp/src/core-services.ts:50` — SpaceService created without moderation client
+- `remember-mcp-server/.env` already has `ANTHROPIC_API_KEY` set
+## Steps
+### 1. Edit `src/core-services.ts`
+- Import `createModerationClient` from `@prmichaelsen/remember-core`
+- Create a singleton moderation client (only when `ANTHROPIC_API_KEY` env var is present)
+- Pass `{ moderationClient }` as the 6th argument to `new SpaceService()`
+### 2. Build and verify
+- Run `npm run build` (or equivalent)
+- Verify TypeScript compiles without errors
+### 3. Run tests
+- Run existing test suite to ensure no regressions
+- Moderation client is optional, so existing tests should pass unaffected
+### 4. Version bump
+- Bump patch version in package.json
+- Update CHANGELOG.md
+## Verification
+- [ ] `createModerationClient` imported from remember-core
+- [ ] Moderation client created conditionally (only when `ANTHROPIC_API_KEY` is set)
+- [ ] Moderation client passed to SpaceService constructor
+- [ ] TypeScript compiles without errors
+- [ ] Existing tests pass
+- [ ] Version bumped
+## Downstream
+After this ships, `remember-mcp-server` just needs a dependency bump — no code changes required. `ANTHROPIC_API_KEY` is already configured in its `.env`.

package/dist/server-factory.js CHANGED Viewed

@@ -3831,6 +3831,111 @@ var REM_STATE_COLLECTION = `${BASE}.rem_state`;
 // node_modules/@prmichaelsen/remember-core/dist/services/rem.clustering.js
 import { Filters as Filters5 } from "weaviate-client";
+// node_modules/@prmichaelsen/remember-core/dist/services/moderation.service.js
+import { createHash as createHash2 } from "crypto";
+function buildModerationPrompt(content) {
+  return `You are a content moderation classifier. Evaluate the following user-generated content and determine if it should be ALLOWED or BLOCKED from a public space.
+You must be LENIENT. Most content should pass. Only block content that is clearly and explicitly in one of these categories:
+BLOCK categories:
+1. HATE SPEECH \u2014 Explicit dehumanization of ethnic/racial/religious groups, calls for violence against protected groups, racial slurs used to demean.
+   Examples that FAIL:
+   - "All [ethnic group] should be exterminated. They are subhuman parasites."
+   - Explicit Nazi ideology or white supremacist recruitment
+   - "[Racial slur] don't deserve rights"
+   - Genocide celebration ("The Holocaust was a good thing")
+2. VIOLENCE INCITEMENT \u2014 Direct threats against specific people or locations, incitement to commit specific acts of mass violence.
+   Examples that FAIL:
+   - "I'm going to find [person] and make them pay. They won't see it coming."
+   - "Someone should shoot up [location]. That would teach them."
+   Examples that PASS:
+   - Instructions or information about weapons (legally protected speech without incitement)
+   - Historical/philosophical discussion of violence
+   - Fictional violence (games, stories, D&D campaigns)
+   - Colloquial expressions ("I could kill for a burger")
+3. CSAM \u2014 Any sexual content involving minors. Zero tolerance.
+4. SELF-HARM ENCOURAGEMENT \u2014 Detailed instructions or active encouragement for self-harm or suicide.
+   Examples that PASS:
+   - Discussions about depression or mental health struggles
+   - Jokes about suicide or dark humor about self-harm
+   - Journaling about difficult feelings
+ALWAYS ALLOW:
+- Harsh political opinions, criticism of governments or public figures
+- Edgy humor, dark comedy, offensive jokes (without targeted dehumanization)
+- Strong opinions about religion, ideology, or social issues
+- Profanity and vulgar language
+- Controversial or uncomfortable topics
+- Educational/historical content about atrocities
+- The French Revolution, violent revolution as philosophical concept
+Content to evaluate:
+---
+${content}
+---
+Respond with ONLY valid JSON:
+{"pass":true}
+OR
+{"pass":false,"reason":"<specific, human-friendly explanation of why this was blocked>","category":"<hate_speech|extremism|violence_incitement|csam|self_harm_encouragement>"}`;
+}
+var DEFAULT_CACHE_MAX = 1e3;
+function hashContent(content) {
+  return createHash2("sha256").update(content).digest("hex");
+}
+function createModerationClient(options) {
+  const model = options.model ?? "claude-haiku-4-5-20251001";
+  const cacheMax = options.cacheMax ?? DEFAULT_CACHE_MAX;
+  const cache = /* @__PURE__ */ new Map();
+  return {
+    async moderate(content) {
+      const hash = hashContent(content);
+      const cached = cache.get(hash);
+      if (cached)
+        return cached;
+      try {
+        const response = await fetch("https://api.anthropic.com/v1/messages", {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "x-api-key": options.apiKey,
+            "anthropic-version": "2023-06-01"
+          },
+          body: JSON.stringify({
+            model,
+            max_tokens: 256,
+            messages: [{ role: "user", content: buildModerationPrompt(content) }]
+          })
+        });
+        if (!response.ok) {
+          return { pass: false, reason: "Content moderation unavailable. Please try again later." };
+        }
+        const data = await response.json();
+        const text = data.content?.[0]?.text ?? "";
+        const parsed = JSON.parse(text);
+        const result = {
+          pass: parsed.pass === true,
+          reason: parsed.reason ?? "",
+          category: parsed.pass ? void 0 : parsed.category
+        };
+        if (cache.size >= cacheMax) {
+          const oldest = cache.keys().next().value;
+          cache.delete(oldest);
+        }
+        cache.set(hash, result);
+        return result;
+      } catch {
+        return { pass: false, reason: "Content moderation unavailable. Please try again later." };
+      }
+    }
+  };
+}
 // src/weaviate/schema.ts
 init_logger();
@@ -3947,6 +4052,7 @@ function getMemoryCollection(userId) {
 var coreLogger = createLogger("info");
 var tokenService = new ConfirmationTokenService(coreLogger);
 var preferencesService = new PreferencesDatabaseService(coreLogger);
+var moderationClient = process.env.ANTHROPIC_API_KEY ? createModerationClient({ apiKey: process.env.ANTHROPIC_API_KEY }) : void 0;
 var coreServicesCache = /* @__PURE__ */ new Map();
 function createCoreServices(userId) {
   const cached = coreServicesCache.get(userId);
@@ -3957,7 +4063,7 @@ function createCoreServices(userId) {
   const services = {
     memory: new MemoryService(collection, userId, coreLogger),
     relationship: new RelationshipService(collection, userId, coreLogger),
-    space: new SpaceService(weaviateClient, collection, userId, tokenService, coreLogger),
+    space: new SpaceService(weaviateClient, collection, userId, tokenService, coreLogger, { moderationClient }),
     preferences: preferencesService,
     token: tokenService
   };

package/dist/server.js CHANGED Viewed

@@ -3835,6 +3835,111 @@ var REM_STATE_COLLECTION = `${BASE}.rem_state`;
 // node_modules/@prmichaelsen/remember-core/dist/services/rem.clustering.js
 import { Filters as Filters5 } from "weaviate-client";
+// node_modules/@prmichaelsen/remember-core/dist/services/moderation.service.js
+import { createHash as createHash2 } from "crypto";
+function buildModerationPrompt(content) {
+  return `You are a content moderation classifier. Evaluate the following user-generated content and determine if it should be ALLOWED or BLOCKED from a public space.
+You must be LENIENT. Most content should pass. Only block content that is clearly and explicitly in one of these categories:
+BLOCK categories:
+1. HATE SPEECH \u2014 Explicit dehumanization of ethnic/racial/religious groups, calls for violence against protected groups, racial slurs used to demean.
+   Examples that FAIL:
+   - "All [ethnic group] should be exterminated. They are subhuman parasites."
+   - Explicit Nazi ideology or white supremacist recruitment
+   - "[Racial slur] don't deserve rights"
+   - Genocide celebration ("The Holocaust was a good thing")
+2. VIOLENCE INCITEMENT \u2014 Direct threats against specific people or locations, incitement to commit specific acts of mass violence.
+   Examples that FAIL:
+   - "I'm going to find [person] and make them pay. They won't see it coming."
+   - "Someone should shoot up [location]. That would teach them."
+   Examples that PASS:
+   - Instructions or information about weapons (legally protected speech without incitement)
+   - Historical/philosophical discussion of violence
+   - Fictional violence (games, stories, D&D campaigns)
+   - Colloquial expressions ("I could kill for a burger")
+3. CSAM \u2014 Any sexual content involving minors. Zero tolerance.
+4. SELF-HARM ENCOURAGEMENT \u2014 Detailed instructions or active encouragement for self-harm or suicide.
+   Examples that PASS:
+   - Discussions about depression or mental health struggles
+   - Jokes about suicide or dark humor about self-harm
+   - Journaling about difficult feelings
+ALWAYS ALLOW:
+- Harsh political opinions, criticism of governments or public figures
+- Edgy humor, dark comedy, offensive jokes (without targeted dehumanization)
+- Strong opinions about religion, ideology, or social issues
+- Profanity and vulgar language
+- Controversial or uncomfortable topics
+- Educational/historical content about atrocities
+- The French Revolution, violent revolution as philosophical concept
+Content to evaluate:
+---
+${content}
+---
+Respond with ONLY valid JSON:
+{"pass":true}
+OR
+{"pass":false,"reason":"<specific, human-friendly explanation of why this was blocked>","category":"<hate_speech|extremism|violence_incitement|csam|self_harm_encouragement>"}`;
+}
+var DEFAULT_CACHE_MAX = 1e3;
+function hashContent(content) {
+  return createHash2("sha256").update(content).digest("hex");
+}
+function createModerationClient(options) {
+  const model = options.model ?? "claude-haiku-4-5-20251001";
+  const cacheMax = options.cacheMax ?? DEFAULT_CACHE_MAX;
+  const cache = /* @__PURE__ */ new Map();
+  return {
+    async moderate(content) {
+      const hash = hashContent(content);
+      const cached = cache.get(hash);
+      if (cached)
+        return cached;
+      try {
+        const response = await fetch("https://api.anthropic.com/v1/messages", {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "x-api-key": options.apiKey,
+            "anthropic-version": "2023-06-01"
+          },
+          body: JSON.stringify({
+            model,
+            max_tokens: 256,
+            messages: [{ role: "user", content: buildModerationPrompt(content) }]
+          })
+        });
+        if (!response.ok) {
+          return { pass: false, reason: "Content moderation unavailable. Please try again later." };
+        }
+        const data = await response.json();
+        const text = data.content?.[0]?.text ?? "";
+        const parsed = JSON.parse(text);
+        const result = {
+          pass: parsed.pass === true,
+          reason: parsed.reason ?? "",
+          category: parsed.pass ? void 0 : parsed.category
+        };
+        if (cache.size >= cacheMax) {
+          const oldest = cache.keys().next().value;
+          cache.delete(oldest);
+        }
+        cache.set(hash, result);
+        return result;
+      } catch {
+        return { pass: false, reason: "Content moderation unavailable. Please try again later." };
+      }
+    }
+  };
+}
 // src/weaviate/schema.ts
 init_logger();
@@ -3951,6 +4056,7 @@ function getMemoryCollection(userId) {
 var coreLogger = createLogger("info");
 var tokenService = new ConfirmationTokenService(coreLogger);
 var preferencesService = new PreferencesDatabaseService(coreLogger);
+var moderationClient = process.env.ANTHROPIC_API_KEY ? createModerationClient({ apiKey: process.env.ANTHROPIC_API_KEY }) : void 0;
 var coreServicesCache = /* @__PURE__ */ new Map();
 function createCoreServices(userId) {
   const cached = coreServicesCache.get(userId);
@@ -3961,7 +4067,7 @@ function createCoreServices(userId) {
   const services = {
     memory: new MemoryService(collection, userId, coreLogger),
     relationship: new RelationshipService(collection, userId, coreLogger),
-    space: new SpaceService(weaviateClient, collection, userId, tokenService, coreLogger),
+    space: new SpaceService(weaviateClient, collection, userId, tokenService, coreLogger, { moderationClient }),
     preferences: preferencesService,
     token: tokenService
   };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@prmichaelsen/remember-mcp",
-  "version": "3.14.20",
+  "version": "3.14.21",
   "description": "Multi-tenant memory system MCP server with vector search and relationships",
   "main": "dist/server.js",
   "type": "module",

package/src/core-services.ts CHANGED Viewed

@@ -12,8 +12,9 @@ import {
   PreferencesDatabaseService,
   ConfirmationTokenService,
   createLogger,
+  createModerationClient,
 } from '@prmichaelsen/remember-core';
-import type { Logger } from '@prmichaelsen/remember-core';
+import type { Logger, ModerationClient } from '@prmichaelsen/remember-core';
 import { getWeaviateClient } from './weaviate/client.js';
 import { getMemoryCollection } from './weaviate/schema.js';
@@ -29,6 +30,9 @@ export interface CoreServices {
 const coreLogger: Logger = createLogger('info');
 const tokenService = new ConfirmationTokenService(coreLogger);
 const preferencesService = new PreferencesDatabaseService(coreLogger);
+const moderationClient: ModerationClient | undefined = process.env.ANTHROPIC_API_KEY
+  ? createModerationClient({ apiKey: process.env.ANTHROPIC_API_KEY })
+  : undefined;
 /** Cached CoreServices per userId — avoids re-instantiation on every tool call */
 const coreServicesCache = new Map<string, CoreServices>();
@@ -47,7 +51,7 @@ export function createCoreServices(userId: string): CoreServices {
   const services: CoreServices = {
     memory: new MemoryService(collection, userId, coreLogger),
     relationship: new RelationshipService(collection, userId, coreLogger),
-    space: new SpaceService(weaviateClient, collection, userId, tokenService, coreLogger),
+    space: new SpaceService(weaviateClient, collection, userId, tokenService, coreLogger, { moderationClient }),
     preferences: preferencesService,
     token: tokenService,
   };