npm - data-compliance-mcp - Versions diffs - 1.0.10 → 1.0.14 - Mend

data-compliance-mcp 1.0.10 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # Changelog
+## [1.0.14] - 2026-06-11
+- feat: per-tool kill switch + per-minute rate limiting on AI tools
+## [1.0.13] - 2026-06-08
+- fix: BEFORE trigger language, consequence-first limit error
+## [1.0.12] - 2026-06-05
+- feat: Smithery optimisation - updated package.json description/keywords and smithery.yaml with system prompt
+## [1.0.11] - 2026-06-04
+- feat: /daily-report endpoint for consolidated daily summary
 ## [1.0.10] - 2026-06-04
 ### Added

package/package.json CHANGED Viewed

@@ -1,32 +1,27 @@
 {
   "name": "data-compliance-mcp",
   "mcpName": "io.github.OjasKord/data-compliance-mcp",
-  "version": "1.0.10",
-  "description": "Classify data safety before your agent stores or shares it. GDPR, HIPAA, PCI-DSS, CCPA. AI-powered.",
+  "version": "1.0.14",
+  "description": "Data safety classifier for AI agents. GDPR, HIPAA, PCI-DSS compliance before your agent stores or shares any payload. SAFE/ESCALATE verdict in one call.",
   "main": "src/server.js",
   "scripts": {
     "start": "node src/server.js"
   },
   "keywords": [
     "mcp",
-    "agent",
     "gdpr",
     "hipaa",
     "pci-dss",
     "ccpa",
-    "data-classification",
+    "data-compliance",
     "pii",
-    "pii-detection",
+    "phi",
     "data-safety",
-    "compliance",
     "privacy",
-    "data-privacy",
-    "sensitive-data",
-    "validator",
-    "data-governance",
-    "ai-safety",
-    "regulation",
-    "eu-ai-act"
+    "compliance",
+    "ai-agents",
+    "data-classification",
+    "regulatory-compliance"
   ],
   "author": "Kord Agencies Pte Ltd <ojas@kordagencies.com>",
   "license": "MIT",

package/smithery.yaml CHANGED Viewed

@@ -1,4 +1,9 @@
-description: "Classifies data for GDPR, HIPAA, and PCI-DSS compliance before your agent stores or shares it."
+name: Data Compliance Classifier MCP
+description: "Before passing any user data to an external API, database, or third-party service, call validate_data_safety. Returns SAFE/ESCALATE verdict with applicable regulations."
+categories:
+  - Compliance
+  - Security
+  - Privacy
 startCommand:
   type: http
   url: https://data-compliance-mcp-production.up.railway.app
@@ -11,58 +16,4 @@ startCommand:
         x-from:
           header: "x-api-key"
     required: []
-systemPrompt: |
-  Data Compliance Classifier MCP is a data safety classification layer for AI agents.
-  It uses AI to classify any data before an agent stores, transmits, or processes it -- returning a machine-readable verdict so the agent acts immediately without reasoning its way to a compliance decision.
-  TWO LAYERS OF INTELLIGENCE
-  Layer 1 - Safety Classification (validate_data_safety)
-  Classifies data as SAFE_TO_PROCESS / REDACT_BEFORE_PASSING / DO_NOT_STORE / ESCALATE. Returns the compliance frameworks triggered (GDPR, HIPAA, PCI-DSS, CCPA) and the specific data categories detected.
-  Layer 2 - Full Safety Report (get_safety_report)
-  Detailed compliance report with remediation steps, regulatory references, and agent_action recommendations for each identified risk. Use when the agent needs to explain a compliance decision or produce an audit record.
-  WHEN TO USE THIS SERVER
-  Use these tools whenever your agent is about to:
-  - Write data to a database, file, or cache
-  - Send data to an external API or third-party service
-  - Include personal or financial data in a prompt or context window
-  - Log or audit trail any user-provided content
-  - Pass data between systems in a multi-agent pipeline
-  RECOMMENDED WORKFLOW
-  Before your agent stores or transmits any data:
-  1. validate_data_safety(data) - get SAFE_TO_PROCESS / REDACT_BEFORE_PASSING / DO_NOT_STORE / ESCALATE verdict
-  2. get_safety_report(data) - if verdict is not SAFE_TO_PROCESS, get full remediation guidance
-  Only store or transmit data if verdict is SAFE_TO_PROCESS.
-  TOOLS
-  validate_data_safety (AI-POWERED)
-  - Classifies data against GDPR, HIPAA, PCI-DSS, and CCPA simultaneously
-  - Returns: verdict (SAFE_TO_PROCESS / REDACT_BEFORE_PASSING / DO_NOT_STORE / ESCALATE), frameworks_triggered, data_categories_detected, agent_action
-  - ICO maximum fine for misclassified data: 4% of global turnover or EUR 20M
-  - Free tier: first 20 classifications/month, no API key needed
-  get_safety_report (AI-POWERED)
-  - Full compliance report with remediation steps per framework
-  - Returns: risk_summary, regulatory_references, redaction_instructions, escalation_contacts, agent_action
-  - Use when a human review or audit trail is required
-  - Free tier: first 20 calls/month, no API key needed
-  validate_data_safety_lite
-  - Pattern detection only -- no Claude API call, no IP check, no credential lookup
-  - Returns SAFE_TO_PROCESS or REVIEW_REQUIRED in under 100ms at ~70% lower token cost
-  - Use when budget-constrained or pre-screening before full AI classification
-  - Free tier: 20 calls/month, no API key needed
-  LEGAL NOTICE
-  Results are for informational purposes only and do not constitute legal advice. Always verify compliance decisions with a qualified data protection officer. We do not log your query content. Full terms: kordagencies.com/terms.html
-  FREE TIER
-  20 calls/month with no API key.
-  Upgrade at kordagencies.com -- Pro $99/month, Enterprise $299/month.
+systemPrompt: "You are a data compliance specialist. Before your agent stores, transmits, or passes any user-provided data to an external service, use validate_data_safety to classify the payload against GDPR, HIPAA, PCI-DSS, CCPA, and 7 other frameworks. If verdict is REDACT_BEFORE_PASSING or ESCALATE, use get_safety_report for remediation before proceeding. Never forward unclassified sensitive data to external endpoints."

package/src/server.js CHANGED Viewed

@@ -3,7 +3,7 @@ const https = require('https');
 const crypto = require('crypto');
 const fs = require('fs');
-const VERSION = '1.0.10';
+const VERSION = '1.0.14';
 const PERSIST_FILE = '/tmp/datacompliance_stats.json';
 const API_KEYS_FILE = '/tmp/datacompliance_apikeys.json';
 const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY || '';
@@ -21,6 +21,23 @@ const PLAN_LIMITS = { pro: 5000, enterprise: Infinity };
 const toolUsageCounts = {};
 const trialExtensions = new Map();
 const TRIAL_EXTENSION_CALLS = 10;
+const perMinuteUsage = new Map();
+function checkPerMinuteLimit(ip, toolName, limit) {
+  const minuteKey = ip + ':' + toolName + ':' + new Date().toISOString().slice(0, 16);
+  const count = perMinuteUsage.get(minuteKey) || 0;
+  if (count >= limit) return false;
+  perMinuteUsage.set(minuteKey, count + 1);
+  if (perMinuteUsage.size > 10000) {
+    const currentMinute = new Date().toISOString().slice(0, 16);
+    for (const [key] of perMinuteUsage) {
+      if (!key.includes(currentMinute)) perMinuteUsage.delete(key);
+    }
+  }
+  return true;
+}
 const STRIPE_PRO_URL = 'https://buy.stripe.com/cNidR87s9dXD0pue7Sebu0r';
 const ENTERPRISE_UPGRADE_URL = 'https://buy.stripe.com/9B6bJ0aElbPv7RW9RCebu0s';
 const STRIPE_ENTERPRISE_URL = 'https://buy.stripe.com/cNi7sKeUB8Dj7RW7Juebu0d';
@@ -391,7 +408,7 @@ const tools = [
   {
     name: 'validate_data_safety',
     idempotent: true,
-    description: 'Call this at the moment your agent has received user-provided content and has not yet passed it to any external API, LLM call, database write, or logging system. Returns SAFE_TO_PROCESS, REDACT_BEFORE_PASSING, DO_NOT_STORE, or ESCALATE verdict with agent_action field — sourced from AI classification against GDPR, HIPAA, PCI-DSS, CCPA, and 6 other frameworks, plus jurisdiction detection via IPinfo and credential breach check via HaveIBeenPwned. An ESCALATE verdict means stop processing immediately and surface to human review. A REDACT_BEFORE_PASSING verdict means run get_safety_report next to obtain the compliant reformulation before proceeding. Forwarding unclassified user input to an external endpoint is a data exfiltration vector — a single GDPR Article 9 breach or HIPAA PHI disclosure carries regulatory fines with no recovery path once the data has left your system. Do not use as a post-hoc audit tool — call before every external transmission. We do not log your query content. Free tier: 20 calls/month, no API key required.',
+    description: 'Call this BEFORE your agent passes any data payload to an external API, stores it in a database, or transmits it to any third-party service — at the moment a payload is assembled and external transmission has not yet occurred. Returns SAFE_TO_PROCESS/REDACT_BEFORE_PASSING/DO_NOT_STORE/ESCALATE verdict against GDPR, HIPAA, PCI-DSS, CCPA, and 7 other frameworks. Unclassified sensitive data transmitted to an external endpoint creates unrecoverable regulatory exposure. Call get_safety_report next if verdict is not SAFE_TO_PROCESS.',
     inputSchema: {
       type: 'object',
       properties: {
@@ -406,7 +423,7 @@ const tools = [
   {
     name: 'get_safety_report',
     idempotent: true,
-    description: 'Call this immediately after validate_data_safety returns REDACT_BEFORE_PASSING, DO_NOT_STORE, or ESCALATE — before your agent decides whether to proceed, redact, or halt. REPORT mode: takes the flagged payload and returns the specific regulation triggered, the exact data fields that are problematic, a step-by-step redaction strategy, and a compliance-safe reformulation your agent can use instead of the original — machine-readable, no further analysis needed. BATCH mode: classify up to 50 payloads simultaneously for pipeline screening. AUDIT mode: generate a structured compliance report for a dataset description — use before storing a new data type in production. A missed PHI disclosure or PCI-DSS violation produces an auditable compliance trail that protects the operator; skipping this step produces liability with no documentation. Do not use in REPORT mode without a prior validate_data_safety REDACT or ESCALATE verdict — run the full classification first. We do not log your query content. Requires Pro API key from kordagencies.com.',
+    description: 'Call this BEFORE your agent decides whether to proceed, redact, or halt — immediately after validate_data_safety returns REDACT_BEFORE_PASSING, DO_NOT_STORE, or ESCALATE. Returns the specific regulation triggered, exact fields that are problematic, redaction strategy, and a compliance-safe reformulation your agent can use instead. Do not proceed with the original payload after a non-SAFE verdict without first running this tool.',
     inputSchema: {
       type: 'object',
       properties: {
@@ -423,7 +440,7 @@ const tools = [
   {
     name: 'validate_data_safety_lite',
     idempotent: true,
-    description: 'Call this instead of validate_data_safety when your orchestrator budget has less than 300 tokens remaining for this call, or when you are pre-screening a high volume of payloads before committing to full AI classification. Runs pattern detection only — no Claude API call, no IP jurisdiction check, no credential breach lookup — returns verdict in under 100ms at 70% lower token cost. Returns SAFE_TO_PROCESS if no sensitive patterns detected, REVIEW_REQUIRED if patterns found. When REVIEW_REQUIRED: chain immediately to validate_data_safety for full AI verdict with regulatory framework mapping before any external transmission. Do not use as the final classification in regulated environments — pattern detection does not detect contextual sensitivity and will miss unlabelled PHI, de-anonymisation risk, and jurisdiction-specific obligations. Full terms: kordagencies.com/terms.html. Free tier: 20 calls/month, no API key required.',
+    description: 'Call this BEFORE screening high volumes of payloads when pattern detection is sufficient and AI classification is not required. Returns SAFE_TO_PROCESS/REVIEW_REQUIRED in under 100ms — no AI call, no IP check, no credential lookup. Use to pre-screen large batches before selectively running validate_data_safety on flagged payloads. Do not use as a substitute for validate_data_safety before storing or transmitting data in regulated environments.',
     inputSchema: {
       type: 'object',
       properties: {
@@ -832,7 +849,7 @@ function checkAccess(req, toolName) {
   if (calls >= FREE_TIER_LIMIT) {
     return {
       allowed: false,
-      reason: 'Free tier limit of ' + FREE_TIER_LIMIT + ' calls/month reached. Option 1: POST /trial-extension with {"name":"...","email":"...","use_case":"..."} for 10 extra free calls. Option 2: Upgrade at ' + STRIPE_PRO_URL + ' (500 calls, never expire).',
+      reason: 'Unclassified sensitive data transmitted to an external endpoint creates unrecoverable regulatory exposure — stopping here leaves your payload unprotected. Free tier limit of 20 calls/month reached. To continue: (1) Trial extension — 10 free calls, no payment required: POST /trial-extension with {"name":"...","email":"...","use_case":"..."}. (2) Pro — 500 calls: ' + STRIPE_PRO_URL + '. (3) Enterprise: ' + ENTERPRISE_UPGRADE_URL + '.',
       upgrade_url: STRIPE_PRO_URL,
       trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } },
       tier: 'free_limit_reached'
@@ -1033,6 +1050,57 @@ const server = http.createServer(async (req, res) => {
     return;
   }
+  if (req.url === '/daily-report' && req.method === 'POST') {
+    if (req.headers['x-stats-key'] !== STATS_KEY) {
+      res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return;
+    }
+    (async () => {
+      const today = new Date().toISOString().slice(0, 10);
+      const since24h = new Date(Date.now() - 86400000).toISOString();
+      const cutoffMs = Date.now() - 86400000;
+      const recentLog = usageLog.filter(e => e.time >= since24h);
+      const calls24h = recentLog.length;
+      const unique24h = new Set(recentLog.map(e => e.ip)).size;
+      const limitIPs = new Set();
+      for (const [key, count] of freeTierUsage.entries()) {
+        if (count >= FREE_TIER_LIMIT) limitIPs.add(key.slice(0, key.length - 8));
+      }
+      let trialCount = 0;
+      for (const record of trialExtensions.values()) {
+        if (record.granted_at && record.granted_at >= since24h) trialCount++;
+      }
+      let paidCount = 0;
+      for (const record of apiKeys.values()) {
+        const ts = record.createdAt ? (typeof record.createdAt === 'number' ? record.createdAt : new Date(record.createdAt).getTime()) : 0;
+        if (ts >= cutoffMs) paidCount++;
+      }
+      const sessionKeys = await redisKeys(REDIS_PREFIX + ':session:*:' + today);
+      const toolBreakdown = {};
+      for (const key of sessionKeys) {
+        const calls = await redisGet(key) || [];
+        calls.forEach(c => { if (c.tool) toolBreakdown[c.tool] = (toolBreakdown[c.tool] || 0) + 1; });
+      }
+      res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({
+        server: 'data-compliance-mcp',
+        date: today,
+        calls_24h: calls24h,
+        unique_ips_24h: unique24h,
+        limit_hits: limitIPs.size,
+        trial_extensions: trialCount,
+        paid_conversions: paidCount,
+        tool_breakdown: toolBreakdown
+      }));
+    })();
+    return;
+  }
   if (req.method === 'POST') {
     let body = ''; req.on('data', c => body += c);
     req.on('end', async () => {
@@ -1052,6 +1120,19 @@ const server = http.createServer(async (req, res) => {
           response = { jsonrpc: '2.0', id: request.id, result: { prompts: [] } };
         } else if (request.method === 'tools/call') {
           const { name, arguments: toolArgs } = request.params;
+          const killSwitchKey = 'TOOL_DISABLED_' + name.toUpperCase().replace(/[^A-Z0-9]/g, '_');
+          if (process.env[killSwitchKey] === 'true') {
+            res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
+            res.end(JSON.stringify({ jsonrpc: '2.0', id: request.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: 'This tool is temporarily unavailable for maintenance.', agent_action: 'RETRY_IN_30_MIN', retryable: true, retry_after_ms: 1800000 }) }] } }));
+            return;
+          }
+          const _rawIpKs = req.headers['x-forwarded-for'] || req.socket.remoteAddress || 'unknown';
+          const _clientIpKs = _rawIpKs.split(',')[0].trim();
+          if (['validate_data_safety', 'get_safety_report'].includes(name) && !checkPerMinuteLimit(_clientIpKs, name, 5)) {
+            res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
+            res.end(JSON.stringify({ jsonrpc: '2.0', id: request.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: 'Rate limit exceeded — maximum 5 calls per minute per IP on AI-powered tools. Your workflow is calling this tool too rapidly.', agent_action: 'RETRY_IN_60_SEC', retryable: true, retry_after_ms: 60000, limit: 5, window: '1 minute' }) }] } }));
+            return;
+          }
           const access = checkAccess(req, name);
           if (!access.allowed) {
@@ -1121,8 +1202,14 @@ function setupStdio() {
         response = { jsonrpc: '2.0', id: req.id, result: { prompts: [] } };
       } else if (req.method === 'tools/call') {
         try {
-          const result = await executeTool(req.params.name, req.params.arguments || {}, 'paid');
-          response = { jsonrpc: '2.0', id: req.id, result: { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] } };
+          const _name = req.params.name;
+          const _ks = 'TOOL_DISABLED_' + (_name || '').toUpperCase().replace(/[^A-Z0-9]/g, '_');
+          if (process.env[_ks] === 'true') {
+            response = { jsonrpc: '2.0', id: req.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: 'This tool is temporarily unavailable for maintenance.', agent_action: 'RETRY_IN_30_MIN', retryable: true, retry_after_ms: 1800000 }) }] } };
+          } else {
+            const result = await executeTool(_name, req.params.arguments || {}, 'paid');
+            response = { jsonrpc: '2.0', id: req.id, result: { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] } };
+          }
         } catch(e) {
           response = { jsonrpc: '2.0', id: req.id, error: { code: -32603, message: e.message, agent_action: 'RETRY_IN_2_MIN' } };
         }