npm - @tangle-network/agent-runtime - Versions diffs - 0.50.0 → 0.51.0 - Mend

@tangle-network/agent-runtime 0.50.0 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/dist/agent.js +1 -1
package/dist/{chunk-RHW75JW5.js → chunk-47SWANFA.js} +2 -2
package/dist/{chunk-ML4IXGTV.js → chunk-FKHNHUXP.js} +2 -2
package/dist/{chunk-CM2IK7VS.js → chunk-FQH33M5N.js} +13 -4
package/dist/chunk-FQH33M5N.js.map +1 -0
package/dist/chunk-G3RGMA7C.js +361 -0
package/dist/chunk-G3RGMA7C.js.map +1 -0
package/dist/{chunk-NDM5VXZW.js → chunk-HAA4KZUD.js} +7 -5
package/dist/{chunk-NDM5VXZW.js.map → chunk-HAA4KZUD.js.map} +1 -1
package/dist/{chunk-OM3YNZIW.js → chunk-HYG4ISNS.js} +5 -360
package/dist/chunk-HYG4ISNS.js.map +1 -0
package/dist/{chunk-BKAIVNFA.js → chunk-XEI7AIHU.js} +3 -3
package/dist/improvement.d.ts +96 -8
package/dist/improvement.js +191 -9
package/dist/improvement.js.map +1 -1
package/dist/index.js +8 -7
package/dist/index.js.map +1 -1
package/dist/intelligence.d.ts +423 -0
package/dist/intelligence.js +427 -0
package/dist/intelligence.js.map +1 -0
package/dist/loop-runner-bin.js +4 -3
package/dist/loops.js +1 -1
package/dist/mcp/bin.js +5 -4
package/dist/mcp/bin.js.map +1 -1
package/dist/mcp/index.js +6 -5
package/dist/mcp/index.js.map +1 -1
package/dist/platform.d.ts +120 -62
package/dist/platform.js +68 -26
package/dist/platform.js.map +1 -1
package/dist/runtime.js +1 -1
package/dist/workflow.js +1 -1
package/package.json +6 -1
package/dist/chunk-CM2IK7VS.js.map +0 -1
package/dist/chunk-OM3YNZIW.js.map +0 -1
/package/dist/{chunk-RHW75JW5.js.map → chunk-47SWANFA.js.map} +0 -0
/package/dist/{chunk-ML4IXGTV.js.map → chunk-FKHNHUXP.js.map} +0 -0
/package/dist/{chunk-BKAIVNFA.js.map → chunk-XEI7AIHU.js.map} +0 -0

package/dist/intelligence.d.ts ADDED Viewed

@@ -0,0 +1,423 @@
+/**
+ * @experimental
+ *
+ * EffortPolicy — pure data, no execution. Resolves a named tier into a flat
+ * settings object the Intelligence wrapper reads to decide WHICH intelligence
+ * spawns are admitted. The composer never runs anything; it only describes the
+ * shape of intelligence a tier permits.
+ *
+ * The billing boundary lives one layer above this (the wrapper tags trace usage
+ * by class). What this module owns is the single law the OFF tier rests on:
+ * `'off'` ⇒ every intelligence knob OFF (analysts:false, corpus:'off',
+ * fanout:1, loops:false, intelligenceBudgetUsd:0). At OFF the wrapper runs the
+ * agent as pure passthrough and only intelligence-class usage can prove to be
+ * zero — there is nothing to spawn.
+ */
+/** The named effort tiers, lowest to highest. `'off'` is the honest floor
+ *  below `'eco'`: intelligence fully off, telemetry still best-effort. */
+type EffortTier = 'off' | 'eco' | 'standard' | 'thorough' | 'max';
+/** Corpus access an intelligence tier permits. `'off'` reads and writes
+ *  nothing; `'read'` consults the cross-run corpus without contributing;
+ *  `'read-write'` both consults and accumulates. */
+type CorpusAccess = 'off' | 'read' | 'read-write';
+/**
+ * The flat, resolved settings a tier compiles to. Every field is individually
+ * overridable through `resolveEffort`. Pure data — read by the wrapper, never
+ * self-executing.
+ */
+interface EffortSettings {
+    /** Whether trace-derived analyst diagnosis may spawn. `false` ⇒ no analyst. */
+    analysts: boolean;
+    /** Cross-run corpus access this tier permits. */
+    corpus: CorpusAccess;
+    /** Parallel candidate width. `1` ⇒ single-shot, no breadth. */
+    fanout: number;
+    /** Whether multi-step improvement loops (refine / fanout-vote) may run. */
+    loops: boolean;
+    /**
+     * Ceiling, in USD, for INTELLIGENCE-class spawns only (analysts, corpus,
+     * loops) — NOT base inference. `0` refuses every intelligence spawn; `null`
+     * means uncapped (the spend lands on the Pareto receipt). Base-stream
+     * inference is billed on its own channel and is never constrained here.
+     */
+    intelligenceBudgetUsd: number | null;
+}
+/** Per-field overrides applied on top of a tier preset. Any subset of the
+ *  resolved settings; each provided field wins over the preset. */
+type EffortOverrides = Partial<EffortSettings>;
+/** The default tier when a client declares no effort. `'standard'` turns
+ *  intelligence on with sensible knobs; opt down to `'off'`/`'eco'` or up to
+ *  `'thorough'`/`'max'`. */
+declare const defaultEffortTier: EffortTier;
+/**
+ * Compile a named tier (plus optional per-field overrides) into the flat
+ * `EffortSettings` the wrapper reads. Pure: same inputs → same object, no I/O,
+ * no execution. Fails loud on an unknown tier rather than silently defaulting —
+ * a typo'd tier must not quietly grant or deny intelligence.
+ *
+ * Invariant preserved for the billing floor: `resolveEffort('off')` always
+ * yields `intelligenceBudgetUsd: 0` with every intelligence knob off UNLESS the
+ * caller explicitly overrides a field — overriding off is an opt-in the caller
+ * owns, not a default the composer leaks.
+ */
+declare function resolveEffort(tier: EffortTier, overrides?: EffortOverrides): EffortSettings;
+/**
+ * True when these settings admit NO intelligence spawn — the passthrough
+ * predicate the wrapper branches on. Every intelligence axis must be off:
+ * analysts disabled, corpus off, no breadth, no loops, and a zero intelligence
+ * budget. A caller who overrides any one of these back on is no longer at the
+ * OFF floor and the wrapper treats them as an intelligence-enabled run.
+ */
+declare function isIntelligenceOff(settings: EffortSettings): boolean;
+/**
+ * @experimental
+ *
+ * Redaction for Intelligence trace export. The trace carries the customer's
+ * real input/output; before any of it leaves the process it passes through a
+ * `Redactor`. The default scrubs the obvious leak classes (API keys, bearer
+ * tokens, emails, private keys) from strings and walks nested objects/arrays,
+ * but a customer with domain-specific PII supplies their own `redact` hook.
+ *
+ * This is intentionally narrower than `src/sanitize.ts` (which redacts the
+ * runtime's *event envelope* field-by-field): here the value is opaque
+ * customer payload, so the scrub is value-shaped, not schema-shaped.
+ */
+/** A redactor maps an arbitrary trace value to a safe-to-export value. Pure;
+ *  must not throw on cyclic input (the default tolerates cycles). */
+type Redactor = (value: unknown) => unknown;
+/**
+ * The built-in redactor. Walks objects and arrays; replaces values under
+ * secret-bearing keys wholesale; scrubs in-value patterns from every string.
+ * Cycle-safe (a seen-set short-circuits self-referential payloads to
+ * `'[circular]'`), depth-bounded, and total — never throws on customer input.
+ */
+declare function defaultRedactor(value: unknown): unknown;
+/**
+ * Resolve the redactor a client uses. A caller-supplied hook replaces the
+ * default entirely (the customer owns their PII rules); absent one, the
+ * built-in `defaultRedactor` runs. Returning `false` is the explicit opt-out —
+ * NO redaction, for a caller who has already sanitized upstream and wants raw
+ * fidelity. Opt-out is loud (an explicit `false`), never a silent default.
+ */
+declare function resolveRedactor(redact: Redactor | false | undefined): Redactor;
+/**
+ * @experimental
+ *
+ * Tangle Intelligence — the DELIVERY half of the loop (pull-by-default).
+ *
+ * The sibling Observe layer (`./index`) sends traces UP to the plane. This
+ * module pulls certified artifacts DOWN: it reads the tenant's promoted,
+ * gate-certified profile from the deployed Intelligence plane and folds it into
+ * the running agent's prompt — so an approved improvement actually reaches the
+ * agent. This is "shipping intelligence to people's agents", pull-by-default;
+ * the push/Gated-PR opt-in composes on top of this.
+ *
+ * Pull contract (deployed plane): GET /v1/profiles/:target/composed →
+ *   { target, generatedAt, promptSurface: {surface,surfaceHash,version,lift}|null,
+ *     artifacts: { <artifactType>: [{path,content,contentHash,version,lift,promotedAt}] } }
+ * Auth: Bearer <apiKey> (the one TANGLE_API_KEY shared by router + sandbox +
+ * intelligence), resolved to a tenant by platform-api's key-verify S2S contract.
+ *
+ *   import { withCertifiedDelivery } from '@tangle-network/agent-runtime/intelligence'
+ *
+ *   export const agent = withCertifiedDelivery(
+ *     async (input, applied) => myAgent(input, { systemPrompt: applied.composePrompt(BASE) }),
+ *     { project: 'support-agent', target: 'support-agent' },
+ *   )
+ */
+/** A promoted, certified artifact (one entry in the composed profile). */
+interface CertifiedArtifact {
+    path: string | null;
+    content: string;
+    contentHash: string;
+    version: number | null;
+    /** Held-out gate lift attached at certification, e.g. "+3.1pp" — never a
+     *  within-run claim. `null` when the promotion carried no lift record. */
+    lift: string | null;
+    promotedAt: string;
+}
+/** The active promoted prompt surface for a target. */
+interface CertifiedPromptSurface {
+    surface: string;
+    surfaceHash: string;
+    version: number | null;
+    lift: string | null;
+}
+/** The composed certified profile — exactly the shape the plane's
+ *  `GET /v1/profiles/:target/composed` returns. */
+interface CertifiedProfile {
+    target: string;
+    generatedAt: string;
+    promptSurface: CertifiedPromptSurface | null;
+    artifacts: Record<string, CertifiedArtifact[]>;
+}
+/** Typed outcome for the pull — inspect `succeeded` before `value`. A 404
+ *  (nothing promoted yet) is a normal, non-error `succeeded: false`. */
+type PullOutcome = {
+    succeeded: true;
+    value: CertifiedProfile;
+} | {
+    succeeded: false;
+    error: string;
+    status?: number;
+};
+interface PullCertifiedOptions {
+    /** The agent target certified artifacts are promoted under. */
+    target: string;
+    /** Bearer key. Defaults to `process.env.TANGLE_API_KEY`. */
+    apiKey?: string;
+    /** Plane base URL. Defaults to `process.env.TANGLE_INTELLIGENCE_URL` then
+     *  `https://intelligence.tangle.tools`. */
+    baseUrl?: string;
+    /** fetch impl (tests / non-global-fetch runtimes). Defaults to global fetch. */
+    fetchImpl?: typeof fetch;
+}
+/**
+ * Pull the certified composed profile for a target. Fail-closed: a network
+ * error or a non-2xx returns a typed `succeeded: false` (never throws), so a
+ * caller can run on its base surface when Intelligence is unreachable. A 404 is
+ * the normal "nothing promoted yet" signal, carried as `status: 404`.
+ */
+declare function pullCertified(opts: PullCertifiedOptions): Promise<PullOutcome>;
+/**
+ * Fold the certified prompt surface (and any certified `prompt-surface` /
+ * `skill` artifacts) into a base system prompt under a marked section, so the
+ * deployed agent prompt == base + the gate-certified additions. Order is stable
+ * (prompt surface first, then artifacts by type then path) so the same profile
+ * renders byte-identically each call. Returns `base` unchanged when there is no
+ * usable certified content.
+ */
+declare function composeCertifiedPrompt(base: string, certified: CertifiedProfile | null): string;
+/** What the delivery wrapper hands the agent each run. */
+interface AppliedIntelligence {
+    /** The certified profile in effect (null when none promoted / pull failed —
+     *  fail-closed: the agent runs on its base surface). */
+    certified: CertifiedProfile | null;
+    /** Fold the certified prompt surface into a base system prompt. */
+    composePrompt(base: string): string;
+}
+/** An agent wrapped by {@link withCertifiedDelivery}: receives the input plus
+ *  the certified intelligence delivered for this run. */
+type DeliveredAgent<I, O> = (input: I, applied: AppliedIntelligence) => Promise<O>;
+/** Delivery config = the Observe config plus the pull target + refresh cadence. */
+interface DeliveryConfig extends IntelligenceConfig {
+    /** Pull target. Defaults to `project`. */
+    target?: string;
+    /** Plane base URL for the pull (NOT the OTLP `endpoint`). Defaults to
+     *  `TANGLE_INTELLIGENCE_URL` then `https://intelligence.tangle.tools`. */
+    baseUrl?: string;
+    /** Min interval between certified-profile pulls. Default 5m. */
+    refreshMs?: number;
+    /** fetch impl for the pull (tests). Defaults to global fetch. */
+    fetchImpl?: typeof fetch;
+}
+/**
+ * Wrap an agent so it (a) Observes each run via the shipped Observe client and
+ * (b) RECEIVES the tenant's certified artifacts pulled from the deployed plane.
+ * The certified profile is cached and refreshed at most every `refreshMs`; a
+ * failed pull is fail-closed — the agent runs on its base surface and never
+ * breaks because Intelligence is unreachable. When the plane promotes a new
+ * gate-certified surface, the next refresh delivers it to the running agent.
+ */
+declare function withCertifiedDelivery<I, O>(agent: DeliveredAgent<I, O>, config: DeliveryConfig): ((input: I) => Promise<O>) & {
+    refresh(): Promise<void>;
+};
+/**
+ * @experimental
+ *
+ * Tangle Intelligence SDK — the Observe + Mode-0 product layer.
+ *
+ * A thin, best-effort wrapper over the shipped trace-export substrate
+ * (`createOtelExporter` in `../otel-export`). It does exactly two things in
+ * this slice:
+ *
+ *   1. OBSERVE — wrap a generic agent and export one trace span per call to
+ *      Tangle Intelligence, swallowing every export failure so a live agent
+ *      never fails because Intelligence is down.
+ *   2. MODE 0 / OFF — at `effort: 'off'`, run the agent as PURE PASSTHROUGH
+ *      (zero intelligence spawns) with best-effort telemetry still on. The
+ *      exported trace tags usage by class `{ inferenceUsd, intelligenceUsd }`,
+ *      and at OFF `intelligenceUsd` is provably `0` — the mechanism that proves
+ *      an OFF customer paid inference-only.
+ *
+ * Behavior-changing intelligence (analyst steer, candidate promotion, loops)
+ * is a LATER phase and is NOT built here. This wrapper only Observes and passes
+ * through; there is no abort path, so the only fail-soft surface is the
+ * telemetry export.
+ */
+/** Usage class for billing. Base-stream tokens bill `'inference'`; every
+ *  intelligence spawn (analyst, corpus, loop) bills `'intelligence'`. The
+ *  billing line falls on the spawn line. */
+type UsageClass = 'inference' | 'intelligence';
+/**
+ * The per-class cost split carried by every trace and outcome. `off` ⇒
+ * `intelligenceUsd: 0` by construction — there is no intelligence spawn to
+ * bill. This is a classification on the trace, NOT a budget-pool split.
+ */
+interface UsageSplit {
+    /** Base-stream (model) spend in USD. */
+    inferenceUsd: number;
+    /** Intelligence-spawn spend in USD. Provably `0` at the OFF tier. */
+    intelligenceUsd: number;
+}
+/** Repo coordinates a product may declare for the (later) Gated-PR mode. The
+ *  Observe slice only records their PRESENCE for `doctor()`; it never touches
+ *  the repo. */
+interface RepoConfig {
+    owner: string;
+    name: string;
+    baseBranch: string;
+}
+/** Client configuration. `project` + `apiKey` are the Observe minimum; the
+ *  rest tune effort, endpoint, redaction, and (for `doctor()` readiness)
+ *  declare the surfaces/checks/repo a later PR mode would need. */
+interface IntelligenceConfig {
+    /** Stable project id — the tenant dimension every trace is tagged with. */
+    project: string;
+    /** Bearer key for the Intelligence ingest. Reads `TANGLE_API_KEY` when omitted. */
+    apiKey?: string;
+    /** Effort tier (default `'standard'`) plus optional per-field overrides. */
+    effort?: EffortTier | {
+        tier: EffortTier;
+        overrides?: EffortOverrides;
+    };
+    /**
+     * OTLP ingest base. The underlying exporter appends `/v1/traces`, so point
+     * this at the OTLP route (e.g. `https://intelligence.tangle.tools/v1/otlp`).
+     * Reads `INTELLIGENCE_OTLP_ENDPOINT` then `OTEL_EXPORTER_OTLP_ENDPOINT` when
+     * omitted; absent all three, export is a no-op (best-effort by construction).
+     */
+    endpoint?: string;
+    /**
+     * Redaction hook run over every exported input/output. A function replaces
+     * the default scrubber; `false` opts out entirely (raw fidelity, caller has
+     * sanitized upstream); omitted ⇒ the built-in `defaultRedactor`.
+     */
+    redact?: Redactor | false;
+    /** Mutable surfaces a later PR mode would edit. Recorded for `doctor()` only. */
+    surfaces?: string[];
+    /** Verification checks a later PR mode would gate on. Recorded for `doctor()` only. */
+    checks?: string[];
+    /** Repo access a later PR mode would need. Recorded for `doctor()` only. */
+    repo?: RepoConfig;
+}
+/** Metadata describing one traced run. `runId`/`traceId` default to fresh ids. */
+interface TraceMeta {
+    /** The run's input — exported through the redactor. */
+    input?: unknown;
+    /** Stable run id. Defaults to a fresh id. */
+    runId?: string;
+    /** 32-hex trace id. Defaults to a fresh id. */
+    traceId?: string;
+    /** Model id, when known — stamped on the span. */
+    model?: string;
+    /** Provider name, when known — stamped on the span. */
+    provider?: string;
+    /** Arbitrary extra labels (string/number/boolean) stamped on the span. */
+    labels?: Record<string, string | number | boolean>;
+}
+/**
+ * The trace handle a `traceRun` body records into. `recordOutput` captures the
+ * agent's result (redacted on export); `recordOutcome` captures the scored
+ * outcome + the `{ inferenceUsd, intelligenceUsd }` split. Both are optional —
+ * an un-recorded run still exports a span with whatever was set.
+ */
+interface TraceHandle {
+    /** Capture the run's output. Exported through the redactor. */
+    recordOutput(output: unknown): void;
+    /**
+     * Capture the run's outcome. `usage` defaults to inference-only
+     * (`intelligenceUsd: 0`) — the OFF baseline; an intelligence-enabled run
+     * fills `intelligenceUsd` itself. `costUsd`, when given without a split, is
+     * treated as pure inference.
+     */
+    recordOutcome(outcome: {
+        success?: boolean;
+        score?: number;
+        costUsd?: number;
+        usage?: Partial<UsageSplit>;
+    }): void;
+}
+/** The resolved outcome of one traced run, surfaced on the export span and
+ *  available to the caller for downstream billing assertions. */
+interface TraceOutcome {
+    runId: string;
+    traceId: string;
+    project: string;
+    /** The resolved effort settings this run executed under. */
+    effort: EffortSettings;
+    /** True when this run ran as pure passthrough (the OFF floor). */
+    intelligenceOff: boolean;
+    success?: boolean;
+    score?: number;
+    /** Per-class billing split. `intelligenceUsd` is `0` at the OFF tier. */
+    usage: UsageSplit;
+}
+/** The Observe-mode Intelligence client. */
+interface IntelligenceClient {
+    /** The resolved project id. */
+    readonly project: string;
+    /** The resolved effort settings. */
+    readonly effort: EffortSettings;
+    /**
+     * Run `fn` under a trace, export one span best-effort, and return whatever
+     * `fn` returns. Telemetry-export failures are swallowed; an error THROWN by
+     * `fn` propagates to the caller (the agent's own failures are not masked).
+     */
+    traceRun<T>(meta: TraceMeta, fn: (trace: TraceHandle) => Promise<T>): Promise<T>;
+    /**
+     * Network-free readiness report: which adoption modes are reachable given
+     * this config. Observe is always reachable; Recommend needs outcomes; PR
+     * needs checks + surfaces + repo.
+     */
+    doctor(): DoctorReport;
+    /** Flush any pending export spans. Best-effort; resolves even if export fails. */
+    flush(): Promise<void>;
+}
+/** One mode's readiness verdict. */
+interface ModeReadiness {
+    ready: boolean;
+    /** Inputs this mode still needs, when not ready. Empty when ready. */
+    missing: string[];
+}
+/** The `doctor()` readiness report — Mode-readiness without any network call. */
+interface DoctorReport {
+    project: string;
+    effort: EffortSettings;
+    /** True when an OTLP endpoint is configured (export will actually ship). */
+    exportConfigured: boolean;
+    modes: {
+        observe: ModeReadiness;
+        recommend: ModeReadiness;
+        pr: ModeReadiness;
+    };
+}
+/**
+ * Create an Observe-mode Intelligence client. Resolves effort, endpoint, and
+ * redactor up front; the exporter is built lazily and is `undefined` when no
+ * endpoint is configured (export becomes a no-op — best-effort by
+ * construction).
+ */
+declare function createIntelligenceClient(config: IntelligenceConfig): IntelligenceClient;
+/** A generic agent: one async input → output. The shape `withTangleIntelligence`
+ *  preserves exactly. */
+type Agent<TInput, TOutput> = (input: TInput) => Promise<TOutput>;
+/** Either a built client or the config to build one. */
+type ClientOrConfig = IntelligenceClient | IntelligenceConfig;
+/**
+ * Wrap a generic `agent` with best-effort Observe-mode tracing, returning the
+ * SAME shape. Each call runs the agent under a trace and exports one span; an
+ * export failure is swallowed (the live agent never fails because Intelligence
+ * is down) but an error from the agent itself propagates unchanged.
+ *
+ * At `effort: 'off'` this is pure passthrough plus best-effort telemetry —
+ * zero intelligence spawns, `intelligenceUsd: 0` on the trace.
+ */
+declare function withTangleIntelligence<TInput, TOutput>(agent: Agent<TInput, TOutput>, clientOrConfig: ClientOrConfig): Agent<TInput, TOutput>;
+export { type Agent, type AppliedIntelligence, type CertifiedArtifact, type CertifiedProfile, type CertifiedPromptSurface, type ClientOrConfig, type CorpusAccess, type DeliveredAgent, type DeliveryConfig, type DoctorReport, type EffortOverrides, type EffortSettings, type EffortTier, type IntelligenceClient, type IntelligenceConfig, type ModeReadiness, type PullCertifiedOptions, type PullOutcome, type Redactor, type RepoConfig, type TraceHandle, type TraceMeta, type TraceOutcome, type UsageClass, type UsageSplit, composeCertifiedPrompt, createIntelligenceClient, defaultEffortTier, defaultRedactor, isIntelligenceOff, pullCertified, resolveEffort, resolveRedactor, withCertifiedDelivery, withTangleIntelligence };