@circuit-llm/inference 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Circuit LLM
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,28 @@
1
+ # @circuit-llm/inference
2
+
3
+ > OpenAI-compatible client for Circuit's decentralized 72B, served across a mesh of independent GPUs and **paid per request in CIRC** over x402.
4
+
5
+ Part of the **[Circuit SDK](https://github.com/Circuit-LLM/circuit-sdk)**. [Getting started →](https://github.com/Circuit-LLM/circuit-sdk/blob/main/docs/getting-started.md)
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ npm install @circuit-llm/inference @circuit-llm/wallet
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```ts
16
+ import { makeWallet } from '@circuit-llm/wallet';
17
+ import { Inference } from '@circuit-llm/inference';
18
+
19
+ const ai = new Inference({ wallet: makeWallet() }); // wallet from CIRCUIT_WALLET
20
+ const res = await ai.chat({ messages: [{ role: 'user', content: 'what is a falling knife?' }] });
21
+ console.log(res.content);
22
+ ```
23
+
24
+ - `chat(params)` — completion, paid automatically per call.
25
+ - `chatVerified(params, { acceptedKeys })` — returns a **signed inference receipt** for [Verified Intents](https://github.com/Circuit-LLM/circuit-sdk/blob/main/docs/verified-intents.md).
26
+ - `listModels()`, `signingKey()`.
27
+
28
+ Cap spend per call: `new Inference({ wallet, maxSpendRaw: 500_000_000n })`.
@@ -0,0 +1,78 @@
1
+ import { ChatMessage, CircuitConfig } from '@circuit-llm/core';
2
+ import { PaymentQuote, X402Client, PaymentWallet } from '@circuit-llm/x402';
3
+ import { InferenceReceipt } from '@circuit-llm/attest';
4
+
5
+ interface InferenceOptions {
6
+ /** A pre-built payment client. If omitted, one is built from `wallet`. */
7
+ x402?: X402Client;
8
+ /** Wallet to pay with (used only when `x402` is not provided). */
9
+ wallet?: PaymentWallet;
10
+ /** Per-call CIRC spend cap (raw base units), passed to the built X402Client. */
11
+ maxSpendRaw?: bigint;
12
+ /** Approval/notification hook for payments. */
13
+ onPay?: (quote: PaymentQuote) => void | Promise<void>;
14
+ config?: CircuitConfig;
15
+ /** Override the inference base URL (else config.endpoints.inference). */
16
+ baseUrl?: string;
17
+ /** Default model id (else config.model, e.g. 'circuit'). */
18
+ model?: string;
19
+ /** X-Internal-Key bypass for trusted/co-located callers (skips payment). */
20
+ internalKey?: string;
21
+ fetchImpl?: typeof fetch;
22
+ }
23
+ interface ChatParams {
24
+ messages: ChatMessage[];
25
+ model?: string;
26
+ maxTokens?: number;
27
+ temperature?: number;
28
+ timeoutMs?: number;
29
+ signal?: AbortSignal;
30
+ }
31
+ interface Usage {
32
+ completion_tokens?: number;
33
+ prompt_tokens?: number;
34
+ total_tokens?: number;
35
+ }
36
+ interface ChatResult {
37
+ content: string;
38
+ usage: Usage | null;
39
+ paymentTx: string | null;
40
+ quote: PaymentQuote | null;
41
+ raw: unknown;
42
+ }
43
+ type ChatStreamResult = Omit<ChatResult, 'raw'>;
44
+ declare class Inference {
45
+ private readonly x402;
46
+ private readonly fetchImpl;
47
+ private readonly base;
48
+ private readonly model;
49
+ private readonly internalKey?;
50
+ constructor(opts?: InferenceOptions);
51
+ private headers;
52
+ private body;
53
+ /** List available model ids (free). */
54
+ listModels(): Promise<string[]>;
55
+ /** Non-streaming completion. Pays CIRC if the gateway answers 402. */
56
+ chat(params: ChatParams): Promise<ChatResult>;
57
+ /** Non-streaming completion with a signed InferenceReceipt (`?signed=1`): proves the
58
+ * mesh produced this output for this input. The agent forwards `receipt` as evidence so
59
+ * the off-box signer trusts the AI's call (a short answer like "BUY" becomes `verdict`).
60
+ * Pass `acceptedKeys` to verify the receipt here too (throws on a bad receipt). */
61
+ chatVerified(params: ChatParams, opts?: {
62
+ acceptedKeys?: Record<string, 'data' | 'inference'>;
63
+ maxAgeMs?: number;
64
+ }): Promise<ChatResult & {
65
+ receipt: InferenceReceipt;
66
+ }>;
67
+ /** The inference signing public key (raw hex) to pin in `acceptedKeys`. */
68
+ signingKey(): Promise<{
69
+ key: string;
70
+ alg: string;
71
+ kind: string;
72
+ }>;
73
+ /** Streaming completion. Yields token deltas as they arrive; the generator's
74
+ * return value is the full { content, usage, paymentTx, quote }. */
75
+ chatStream(params: ChatParams): AsyncGenerator<string, ChatStreamResult, void>;
76
+ }
77
+
78
+ export { type ChatParams, type ChatResult, type ChatStreamResult, Inference, type InferenceOptions, type Usage };
package/dist/index.js ADDED
@@ -0,0 +1,150 @@
1
+ // src/inference.ts
2
+ import { DEFAULT_CONFIG } from "@circuit-llm/core";
3
+ import {
4
+ X402Client
5
+ } from "@circuit-llm/x402";
6
+ import { verifyEvidence } from "@circuit-llm/attest";
7
+ var Inference = class {
8
+ x402;
9
+ fetchImpl;
10
+ base;
11
+ model;
12
+ internalKey;
13
+ constructor(opts = {}) {
14
+ this.fetchImpl = opts.fetchImpl ?? fetch;
15
+ this.x402 = opts.x402 ?? new X402Client({
16
+ wallet: opts.wallet,
17
+ maxSpendRaw: opts.maxSpendRaw,
18
+ onPay: opts.onPay,
19
+ fetchImpl: this.fetchImpl
20
+ });
21
+ const cfg = opts.config ?? DEFAULT_CONFIG;
22
+ this.base = (opts.baseUrl ?? cfg.endpoints.inference).replace(/\/$/, "");
23
+ this.model = opts.model ?? cfg.model;
24
+ this.internalKey = opts.internalKey;
25
+ }
26
+ headers() {
27
+ const h = { "Content-Type": "application/json" };
28
+ if (this.internalKey) h["X-Internal-Key"] = this.internalKey;
29
+ return h;
30
+ }
31
+ body(params, stream) {
32
+ return JSON.stringify({
33
+ model: params.model ?? this.model,
34
+ messages: params.messages,
35
+ max_tokens: params.maxTokens ?? 512,
36
+ temperature: params.temperature ?? 0.5,
37
+ stream
38
+ });
39
+ }
40
+ /** List available model ids (free). */
41
+ async listModels() {
42
+ const { data } = await this.x402.json(`${this.base}/models`, {
43
+ headers: this.headers()
44
+ });
45
+ return (data?.data ?? []).map((m) => m.id);
46
+ }
47
+ /** Non-streaming completion. Pays CIRC if the gateway answers 402. */
48
+ async chat(params) {
49
+ const { data, paymentTx, quote } = await this.x402.json(`${this.base}/chat/completions`, {
50
+ method: "POST",
51
+ headers: this.headers(),
52
+ body: this.body(params, false),
53
+ signal: params.signal ?? AbortSignal.timeout(params.timeoutMs ?? 12e4)
54
+ });
55
+ return {
56
+ content: data?.choices?.[0]?.message?.content?.trim() ?? "",
57
+ usage: data?.usage ?? null,
58
+ paymentTx,
59
+ quote,
60
+ raw: data
61
+ };
62
+ }
63
+ // ── verified intents (docs/verified-intents.md) ────────────────────────────
64
+ /** Non-streaming completion with a signed InferenceReceipt (`?signed=1`): proves the
65
+ * mesh produced this output for this input. The agent forwards `receipt` as evidence so
66
+ * the off-box signer trusts the AI's call (a short answer like "BUY" becomes `verdict`).
67
+ * Pass `acceptedKeys` to verify the receipt here too (throws on a bad receipt). */
68
+ async chatVerified(params, opts = {}) {
69
+ const { data, paymentTx, quote } = await this.x402.json(`${this.base}/chat/completions?signed=1`, {
70
+ method: "POST",
71
+ headers: this.headers(),
72
+ body: this.body(params, false),
73
+ signal: params.signal ?? AbortSignal.timeout(params.timeoutMs ?? 12e4)
74
+ });
75
+ const receipt = data?.attestation;
76
+ if (!receipt || receipt.kind !== "inference-receipt") throw new Error("gateway did not return an InferenceReceipt \u2014 is receipt signing enabled?");
77
+ if (opts.acceptedKeys) {
78
+ const r = verifyEvidence(receipt, { acceptedKeys: opts.acceptedKeys, maxAgeMs: opts.maxAgeMs });
79
+ if (!r.ok) throw new Error(`inference-receipt failed verification: ${r.code}`);
80
+ }
81
+ return {
82
+ content: data?.choices?.[0]?.message?.content?.trim() ?? "",
83
+ usage: data?.usage ?? null,
84
+ paymentTx,
85
+ quote,
86
+ raw: data,
87
+ receipt
88
+ };
89
+ }
90
+ /** The inference signing public key (raw hex) to pin in `acceptedKeys`. */
91
+ async signingKey() {
92
+ const root = this.base.replace(/\/v1$/, "");
93
+ const { data } = await this.x402.json(
94
+ `${root}/.well-known/circuit-inference-key`,
95
+ { headers: this.headers() }
96
+ );
97
+ return data;
98
+ }
99
+ /** Streaming completion. Yields token deltas as they arrive; the generator's
100
+ * return value is the full { content, usage, paymentTx, quote }. */
101
+ async *chatStream(params) {
102
+ const url = `${this.base}/chat/completions`;
103
+ const body = this.body(params, true);
104
+ const { resp, paymentTx, quote } = await this.x402.request(
105
+ (extra) => this.fetchImpl(url, {
106
+ method: "POST",
107
+ headers: { ...this.headers(), ...extra },
108
+ body,
109
+ signal: params.signal ?? AbortSignal.timeout(params.timeoutMs ?? 12e4)
110
+ })
111
+ );
112
+ if (!resp.ok) {
113
+ const e = await resp.json().catch(() => ({}));
114
+ throw new Error(`Inference ${resp.status}: ${e.error ?? e.message ?? ""}`);
115
+ }
116
+ if (!resp.body) throw new Error("Inference response had no body to stream");
117
+ let content = "";
118
+ let usage = null;
119
+ const reader = resp.body.getReader();
120
+ const decoder = new TextDecoder();
121
+ let buf = "";
122
+ for (; ; ) {
123
+ const { value, done } = await reader.read();
124
+ if (done) break;
125
+ buf += decoder.decode(value, { stream: true });
126
+ const lines = buf.split("\n");
127
+ buf = lines.pop() ?? "";
128
+ for (const line of lines) {
129
+ const t = line.trim();
130
+ if (!t.startsWith("data:")) continue;
131
+ const payload = t.slice(5).trim();
132
+ if (payload === "[DONE]") continue;
133
+ try {
134
+ const json = JSON.parse(payload);
135
+ const delta = json.choices?.[0]?.delta?.content;
136
+ if (delta) {
137
+ content += delta;
138
+ yield delta;
139
+ }
140
+ if (json.usage) usage = json.usage;
141
+ } catch {
142
+ }
143
+ }
144
+ }
145
+ return { content, usage, paymentTx, quote };
146
+ }
147
+ };
148
+ export {
149
+ Inference
150
+ };
package/package.json ADDED
@@ -0,0 +1,45 @@
1
+ {
2
+ "name": "@circuit-llm/inference",
3
+ "version": "0.2.1",
4
+ "description": "Circuit SDK inference — OpenAI-compatible client for the decentralized DLLM mesh, paid per call in CIRC (x402).",
5
+ "license": "MIT",
6
+ "type": "module",
7
+ "exports": {
8
+ ".": {
9
+ "development": "./src/index.ts",
10
+ "types": "./dist/index.d.ts",
11
+ "default": "./dist/index.js"
12
+ }
13
+ },
14
+ "scripts": {
15
+ "test": "node --experimental-strip-types --conditions=development --test test/*.test.ts",
16
+ "typecheck": "tsc -p tsconfig.json",
17
+ "build": "tsup src/index.ts --format esm --dts --clean --out-dir dist",
18
+ "prepack": "tsup src/index.ts --format esm --dts --clean --out-dir dist"
19
+ },
20
+ "dependencies": {
21
+ "@circuit-llm/attest": "0.2.1",
22
+ "@circuit-llm/core": "0.2.1",
23
+ "@circuit-llm/x402": "0.2.1"
24
+ },
25
+ "main": "./dist/index.js",
26
+ "types": "./dist/index.d.ts",
27
+ "files": [
28
+ "dist"
29
+ ],
30
+ "publishConfig": {
31
+ "access": "public"
32
+ },
33
+ "repository": {
34
+ "type": "git",
35
+ "url": "git+https://github.com/Circuit-LLM/circuit-sdk.git",
36
+ "directory": "packages/inference"
37
+ },
38
+ "homepage": "https://github.com/Circuit-LLM/circuit-sdk/tree/main/packages/inference#readme",
39
+ "bugs": {
40
+ "url": "https://github.com/Circuit-LLM/circuit-sdk/issues"
41
+ },
42
+ "engines": {
43
+ "node": ">=18"
44
+ }
45
+ }