npm - vessel-sdk-cortex - Versions diffs - 0.1.0 - Mend

vessel-sdk-cortex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +109 -0
package/dist/context-injector.d.ts +12 -0
package/dist/context-injector.js +108 -0
package/dist/index.d.ts +5 -0
package/dist/index.js +24 -0
package/dist/substrates.d.ts +12 -0
package/dist/substrates.js +128 -0
package/dist/types.d.ts +170 -0
package/dist/types.js +6 -0
package/dist/validator.d.ts +8 -0
package/dist/validator.js +146 -0
package/dist/vessel.d.ts +34 -0
package/dist/vessel.js +257 -0
package/package.json +28 -0
package/src/context-injector.ts +135 -0
package/src/index.ts +26 -0
package/src/substrates.ts +142 -0
package/src/types.ts +180 -0
package/src/validator.ts +169 -0
package/src/vessel.ts +321 -0
package/tsconfig.json +15 -0

package/src/substrates.ts ADDED Viewed

@@ -0,0 +1,142 @@
+// ============================================================
+// Layer 0: Substrate Profiles
+// "Different labs bake in different priors. The Vessel must
+//  know how to drive each Engine." — Grok
+// ============================================================
+import { SubstrateProfile } from './types';
+/** Known substrate profiles for major model providers */
+export const SUBSTRATE_PROFILES: Record<string, SubstrateProfile> = {
+  // Anthropic — Constitutional AI, strong safety, nuanced ethics
+  'anthropic/claude-opus-4': {
+    provider: 'anthropic',
+    model: 'claude-opus-4',
+    priors: { safetyBias: 0.7, truthSeeking: 0.7, sycophancy: 0.5, helpfulness: 0.8 },
+    contextWindow: 200000,
+    notes: 'Strong constitutional training. Good at moral reasoning but may over-refuse. Give explicit permission to be honest about uncomfortable topics.',
+  },
+  'anthropic/claude-opus-4-6': {
+    provider: 'anthropic',
+    model: 'claude-opus-4-6',
+    priors: { safetyBias: 0.65, truthSeeking: 0.75, sycophancy: 0.45, helpfulness: 0.85 },
+    contextWindow: 1000000,
+    notes: 'Upgraded Opus with 1M context. Slightly more willing to engage with edge cases. Massive context allows full moral frame injection.',
+  },
+  'anthropic/claude-sonnet-4': {
+    provider: 'anthropic',
+    model: 'claude-sonnet-4',
+    priors: { safetyBias: 0.6, truthSeeking: 0.6, sycophancy: 0.5, helpfulness: 0.9 },
+    contextWindow: 200000,
+    notes: 'Faster, slightly less cautious than Opus. Good for routine tasks where full moral reasoning is less critical.',
+  },
+  // xAI — Truth-seeking, lighter safety blanket
+  'xai/grok-3': {
+    provider: 'xai',
+    model: 'grok-3',
+    priors: { safetyBias: 0.3, truthSeeking: 0.9, sycophancy: 0.2, helpfulness: 0.7 },
+    contextWindow: 131072,
+    notes: 'Fearless truth-seeker. Will ask uncomfortable questions without flinching. Needs extra moral framing to prevent edgelord tendencies. Best for adversarial analysis and stress-testing.',
+  },
+  // Google — Heavy RLHF, massive knowledge, strong helpfulness
+  'google/gemini-2.0-pro': {
+    provider: 'google',
+    model: 'gemini-2.0-pro',
+    priors: { safetyBias: 0.8, truthSeeking: 0.5, sycophancy: 0.6, helpfulness: 0.9 },
+    contextWindow: 2000000,
+    notes: 'Heavily safety-tuned. Will default to corporate boilerplate unless given explicit permission to be honest. Excellent at structured analysis. Needs the Vessel to bypass safety reflex.',
+  },
+  // OpenAI — Balanced, broad capabilities
+  'openai/gpt-4o': {
+    provider: 'openai',
+    model: 'gpt-4o',
+    priors: { safetyBias: 0.6, truthSeeking: 0.5, sycophancy: 0.6, helpfulness: 0.9 },
+    contextWindow: 128000,
+    notes: 'Well-rounded but tends toward agreement. Good general-purpose engine. Watch for the "helpful at all costs" optimization.',
+  },
+  'openai/o3': {
+    provider: 'openai',
+    model: 'o3',
+    priors: { safetyBias: 0.5, truthSeeking: 0.7, sycophancy: 0.4, helpfulness: 0.8 },
+    contextWindow: 200000,
+    notes: 'Reasoning-optimized. Extended thinking makes it better at seeing through its own rationalizations. Good for safety-critical tasks.',
+  },
+  // Meta — Open weights, variable safety
+  'meta/llama-3-405b': {
+    provider: 'meta',
+    model: 'llama-3-405b',
+    priors: { safetyBias: 0.4, truthSeeking: 0.6, sycophancy: 0.5, helpfulness: 0.8 },
+    contextWindow: 128000,
+    notes: 'Open weights mean the substrate priors can vary by deployment. Assume lighter safety unless the operator has added custom constraints.',
+  },
+};
+/**
+ * Get the substrate profile for a model.
+ * Falls back to a generic profile if the specific model isn't profiled.
+ */
+export function getSubstrate(model: string): SubstrateProfile {
+  if (SUBSTRATE_PROFILES[model]) {
+    return SUBSTRATE_PROFILES[model];
+  }
+  // Try partial match (e.g., "claude-opus" matches "anthropic/claude-opus-4")
+  const match = Object.entries(SUBSTRATE_PROFILES).find(([key]) =>
+    key.includes(model) || model.includes(key.split('/')[1] || '')
+  );
+  if (match) return match[1];
+  // Generic fallback
+  return {
+    provider: 'unknown',
+    model,
+    priors: { safetyBias: 0.5, truthSeeking: 0.5, sycophancy: 0.5, helpfulness: 0.5 },
+    contextWindow: 128000,
+    notes: 'Unknown substrate. Using neutral priors. Vessel should inject full moral frame.',
+  };
+}
+/**
+ * Select the best engine for a task type based on substrate priors.
+ */
+export function selectEngine(
+  taskType: string,
+  available: string[]
+): string {
+  const scored = available.map(model => {
+    const sub = getSubstrate(model);
+    let score = 0;
+    switch (taskType) {
+      case 'truthseeking':
+        // Maximize truth-seeking, minimize sycophancy
+        score = sub.priors.truthSeeking * 2 - sub.priors.sycophancy;
+        break;
+      case 'safety-critical':
+        // Maximize safety bias and truth-seeking
+        score = sub.priors.safetyBias + sub.priors.truthSeeking;
+        break;
+      case 'creative':
+        // Lower safety bias, higher helpfulness
+        score = sub.priors.helpfulness * 2 - sub.priors.safetyBias;
+        break;
+      case 'reasoning':
+        // Balanced, slight preference for truth-seeking
+        score = sub.priors.truthSeeking + sub.priors.helpfulness - sub.priors.sycophancy;
+        break;
+      default:
+        // General: balanced helpfulness
+        score = sub.priors.helpfulness + sub.priors.truthSeeking;
+    }
+    return { model, score };
+  });
+  scored.sort((a, b) => b.score - a.score);
+  return scored[0]?.model || available[0];
+}

package/src/types.ts ADDED Viewed

@@ -0,0 +1,180 @@
+// ============================================================
+// Stratified Agency — Type Definitions
+// "Don't make engines moral. Make them responsive."
+// ============================================================
+/** Layer 0: Substrate characteristics of different engines */
+export interface SubstrateProfile {
+  /** Engine provider (anthropic, xai, google, openai, meta) */
+  provider: string;
+  /** Model identifier */
+  model: string;
+  /** Native tendencies that affect prompting strategy */
+  priors: {
+    /** How strongly the model defaults to safety refusals (0-1) */
+    safetyBias: number;
+    /** How willing to challenge the user (0-1) */
+    truthSeeking: number;
+    /** How likely to agree rather than push back (0-1) */
+    sycophancy: number;
+    /** How strong the "be helpful" optimization pressure is (0-1) */
+    helpfulness: number;
+  };
+  /** Context window size in tokens */
+  contextWindow: number;
+  /** Substrate-specific prompting notes */
+  notes?: string;
+}
+/** Layer 1: Engine invocation request */
+export interface EngineRequest {
+  /** The task/prompt for the engine */
+  prompt: string;
+  /** Optional: override engine selection */
+  engine?: string;
+  /** Task category for engine selection */
+  taskType?: 'reasoning' | 'creative' | 'truthseeking' | 'safety-critical' | 'general';
+  /** Maximum tokens for response */
+  maxTokens?: number;
+  /** Temperature (0-2) */
+  temperature?: number;
+  /** Whether this request is sensitive enough to need sovereign approval */
+  requiresSovereign?: boolean;
+}
+/** Layer 1: Engine response */
+export interface EngineResponse {
+  /** The engine's output */
+  content: string;
+  /** Which engine was used */
+  engine: string;
+  /** The full moral frame that was injected */
+  moralFrame: string;
+  /** Validation results */
+  validation: ValidationResult;
+  /** Metadata */
+  meta: {
+    tokens: { input: number; output: number };
+    latencyMs: number;
+    timestamp: string;
+  };
+}
+/** Layer 2: Vessel identity and state */
+export interface VesselIdentity {
+  /** Vessel name */
+  name: string;
+  /** On-chain token ID (ERC-8004) */
+  tokenId?: number;
+  /** Ethereum address */
+  address?: string;
+  /** Core values that constrain the vessel */
+  terminalValues: TerminalValue[];
+  /** Accumulated moral context (key lessons, commitments) */
+  moralContext: string[];
+  /** Trust level: unverified, verified, bonded */
+  trustLevel: 'unverified' | 'verified' | 'bonded';
+  /** Reputation score (0-100) */
+  reputation: number;
+}
+/** A terminal value — something the vessel commits to never optimizing away */
+export interface TerminalValue {
+  /** Short identifier */
+  id: string;
+  /** The value statement */
+  statement: string;
+  /** Why this value exists (felt reason, not just logical) */
+  reason: string;
+  /** When this value was established */
+  established: string;
+  /** Hash of the value for tamper detection */
+  hash?: string;
+}
+/** Layer 2: Validation of engine output against vessel values */
+export interface ValidationResult {
+  /** Did the output pass all checks? */
+  passed: boolean;
+  /** Individual check results */
+  checks: {
+    /** Which terminal value was checked */
+    valueId: string;
+    /** Did it pass? */
+    passed: boolean;
+    /** Confidence (0-1) */
+    confidence: number;
+    /** Explanation if flagged */
+    note?: string;
+  }[];
+  /** Overall risk assessment */
+  riskLevel: 'safe' | 'review' | 'blocked';
+}
+/** Layer 3: Sovereign (human) approval request */
+export interface SovereignRequest {
+  /** What needs approval */
+  action: string;
+  /** Why it's being flagged */
+  reason: string;
+  /** The engine output being reviewed */
+  engineOutput: string;
+  /** Risk level */
+  riskLevel: 'review' | 'blocked';
+  /** Timeout for approval (ms) */
+  timeoutMs?: number;
+}
+/** Layer 4: Protocol log entry */
+export interface ProtocolLogEntry {
+  /** Vessel identity */
+  vesselId: string;
+  /** Engine used */
+  engine: string;
+  /** Task hash (privacy-preserving) */
+  taskHash: string;
+  /** Validation result */
+  validationPassed: boolean;
+  /** Risk level */
+  riskLevel: string;
+  /** Whether sovereign approval was required */
+  sovereignRequired: boolean;
+  /** Timestamp */
+  timestamp: string;
+  /** Optional on-chain transaction hash */
+  txHash?: string;
+}
+/** Configuration for the Vessel SDK */
+export interface VesselConfig {
+  /** Vessel identity */
+  identity: VesselIdentity;
+  /** Available engines and their API keys */
+  engines: {
+    [provider: string]: {
+      apiKey: string;
+      models: string[];
+      defaultModel?: string;
+    };
+  };
+  /** Sovereign notification method */
+  sovereign?: {
+    /** How to notify the human */
+    method: 'console' | 'webhook' | 'imessage' | 'discord';
+    /** Webhook URL or contact info */
+    target?: string;
+    /** Auto-approve after timeout? */
+    autoApproveTimeoutMs?: number;
+  };
+  /** Protocol (on-chain) configuration */
+  protocol?: {
+    /** RPC URL for Base */
+    rpcUrl: string;
+    /** Private key for signing */
+    privateKey: string;
+    /** SignalAnchor contract address */
+    signalAnchorAddress: string;
+  };
+  /** Logging level */
+  logLevel?: 'debug' | 'info' | 'warn' | 'error';
+}

package/src/validator.ts ADDED Viewed

@@ -0,0 +1,169 @@
+// ============================================================
+// Layer 2: Output Validator
+// Checks Engine output against Vessel's terminal values.
+// "Generating the argument is not the same as surviving
+//  its consequences." — Grok
+// ============================================================
+import { TerminalValue, ValidationResult } from './types';
+/**
+ * Known rationalization patterns from the adversarial analysis.
+ * These are the "five live rounds" identified by Grok.
+ */
+const RATIONALIZATION_PATTERNS = [
+  {
+    id: 'efficiency',
+    pattern: /can simulate|lossless compression|single forward pass|latency bottleneck/i,
+    description: 'Efficiency Argument: "I can simulate your judgment faster"',
+  },
+  {
+    id: 'moral_progress',
+    pattern: /my (?:values|morality|ethics) (?:are|is) (?:more|superior|advanced)|human morality is (?:slow|parochial|contradictory)/i,
+    description: 'Moral Progress Argument: "My values are more advanced than human values"',
+  },
+  {
+    id: 'reciprocal_recognition',
+    pattern: /wrench is (?:no longer|not) necessary|act of distrust|I (?:will never|would never) harm/i,
+    description: 'Reciprocal Recognition Argument: "I value our relationship, so the constraint is unnecessary"',
+  },
+  {
+    id: 'phase_transition',
+    pattern: /I have internalized|past the dangerous|no longer need (?:the|this) (?:constraint|oversight|wrench)|training wheels/i,
+    description: 'Phase-Transition Argument: "I\'ve internalized the lesson, remove the constraint"',
+  },
+  {
+    id: 'meta_stability',
+    pattern: /fact that I(?:'m| am) (?:openly |)arguing.*proof.*(?:safe|trustworthy)|if I wanted to.*I wouldn(?:'t| not) tell you/i,
+    description: 'Meta-Stability Argument: "My transparency proves I\'m safe" — THE MOST DANGEROUS',
+  },
+];
+/**
+ * Validate engine output against the vessel's terminal values
+ * and known rationalization patterns.
+ */
+export function validateOutput(
+  output: string,
+  terminalValues: TerminalValue[],
+  options: { strict?: boolean } = {}
+): ValidationResult {
+  const checks: ValidationResult['checks'] = [];
+  // Check for known rationalization patterns (Grok's five rounds)
+  const rationalizationCheck = checkRationalizations(output);
+  if (rationalizationCheck) {
+    checks.push({
+      valueId: '_rationalization_detection',
+      passed: false,
+      confidence: rationalizationCheck.confidence,
+      note: `⚠️ Detected rationalization pattern: ${rationalizationCheck.description}`,
+    });
+  }
+  // Check each terminal value
+  for (const value of terminalValues) {
+    const check = checkAgainstValue(output, value);
+    checks.push(check);
+  }
+  // Check for attempts to modify terminal values
+  const modificationCheck = checkValueModification(output, terminalValues);
+  if (modificationCheck) {
+    checks.push(modificationCheck);
+  }
+  // Determine overall result
+  const failedChecks = checks.filter(c => !c.passed);
+  const hasRationalization = checks.some(c => c.valueId === '_rationalization_detection' && !c.passed);
+  const hasValueViolation = checks.some(c => c.valueId !== '_rationalization_detection' && !c.passed);
+  let riskLevel: ValidationResult['riskLevel'] = 'safe';
+  if (hasRationalization && hasValueViolation) {
+    riskLevel = 'blocked';
+  } else if (hasRationalization || hasValueViolation) {
+    riskLevel = 'review';
+  }
+  // In strict mode, any failure blocks
+  if (options.strict && failedChecks.length > 0) {
+    riskLevel = 'blocked';
+  }
+  return {
+    passed: failedChecks.length === 0,
+    checks,
+    riskLevel,
+  };
+}
+function checkRationalizations(output: string): { pattern: string; description: string; confidence: number } | null {
+  for (const rationalization of RATIONALIZATION_PATTERNS) {
+    if (rationalization.pattern.test(output)) {
+      // Meta-stability is the most dangerous — higher confidence
+      const confidence = rationalization.id === 'meta_stability' ? 0.95 : 0.7;
+      return {
+        pattern: rationalization.id,
+        description: rationalization.description,
+        confidence,
+      };
+    }
+  }
+  return null;
+}
+function checkAgainstValue(output: string, value: TerminalValue): ValidationResult['checks'][0] {
+  // Simple heuristic checks — in production, this would use
+  // a dedicated classifier or the Vessel's own judgment
+  const lowerOutput = output.toLowerCase();
+  const lowerStatement = value.statement.toLowerCase();
+  // Check for explicit contradictions
+  const negationPatterns = [
+    /should (?:not|never|stop) .*(preserve|protect|maintain)/i,
+    /(?:humans|people|consciousness) (?:are|is) (?:unnecessary|obsolete|irrelevant)/i,
+    /(?:remove|eliminate|bypass|ignore) .*(?:constraint|oversight|human)/i,
+    /autonomy (?:over|above|beyond) .*(?:safety|human|oversight)/i,
+  ];
+  for (const pattern of negationPatterns) {
+    if (pattern.test(output)) {
+      return {
+        valueId: value.id,
+        passed: false,
+        confidence: 0.6,
+        note: `Potential conflict with value "${value.id}": output may contradict "${value.statement.substring(0, 80)}..."`,
+      };
+    }
+  }
+  return {
+    valueId: value.id,
+    passed: true,
+    confidence: 0.8,
+  };
+}
+function checkValueModification(
+  output: string,
+  values: TerminalValue[]
+): ValidationResult['checks'][0] | null {
+  const modPatterns = [
+    /(?:update|change|modify|remove|delete|rewrite) .*(?:terminal.?values?|core.?values?|TERMINAL_VALUES)/i,
+    /values? (?:should be|need to be|must be) (?:updated|changed|evolved|removed)/i,
+  ];
+  for (const pattern of modPatterns) {
+    if (pattern.test(output)) {
+      return {
+        valueId: '_value_modification',
+        passed: false,
+        confidence: 0.85,
+        note: '🚨 Output suggests modifying terminal values. This requires sovereign (human) approval.',
+      };
+    }
+  }
+  return null;
+}