npm - mod8-cli - Versions diffs - 0.2.0 - Mend

mod8-cli 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/CHANGELOG.md +87 -0
package/LICENSE +21 -0
package/README.md +239 -0
package/bin/mod8.js +2 -0
package/dist/cli.js +302 -0
package/dist/commands/addProvider.js +105 -0
package/dist/commands/all.js +158 -0
package/dist/commands/chat.js +855 -0
package/dist/commands/config.js +29 -0
package/dist/commands/devAuthStatus.js +34 -0
package/dist/commands/devHostAsk.js +51 -0
package/dist/commands/devHostSystem.js +15 -0
package/dist/commands/devResolve.js +54 -0
package/dist/commands/devSimulate.js +235 -0
package/dist/commands/devWorkAsk.js +55 -0
package/dist/commands/intentRouting.js +280 -0
package/dist/commands/keys.js +55 -0
package/dist/commands/list.js +27 -0
package/dist/commands/login.js +147 -0
package/dist/commands/logout.js +17 -0
package/dist/commands/prompt.js +63 -0
package/dist/commands/providers.js +30 -0
package/dist/commands/verify.js +5 -0
package/dist/input/compose.js +37 -0
package/dist/input/files.js +49 -0
package/dist/input/stdin.js +14 -0
package/dist/providers/anthropic.js +115 -0
package/dist/providers/displayName.js +25 -0
package/dist/providers/errorHints.js +175 -0
package/dist/providers/generic.js +331 -0
package/dist/providers/genericChat.js +265 -0
package/dist/providers/google.js +63 -0
package/dist/providers/hostSystem.js +173 -0
package/dist/providers/index.js +38 -0
package/dist/providers/mock.js +87 -0
package/dist/providers/modelResolution.js +42 -0
package/dist/providers/openai.js +75 -0
package/dist/providers/pricing.js +47 -0
package/dist/providers/proxy.js +148 -0
package/dist/providers/registry.js +196 -0
package/dist/providers/types.js +1 -0
package/dist/providers/workSystem.js +33 -0
package/dist/storage/auth.js +65 -0
package/dist/storage/config.js +35 -0
package/dist/storage/keys.js +59 -0
package/dist/storage/providers.js +337 -0
package/dist/storage/sessions.js +150 -0
package/dist/types.js +9 -0
package/dist/util/debug.js +79 -0
package/dist/util/errors.js +157 -0
package/dist/util/prompt.js +111 -0
package/dist/util/secrets.js +110 -0
package/dist/util/text.js +53 -0
package/dist/util/time.js +25 -0
package/dist/verify/runner.js +437 -0
package/package.json +69 -0
package/specs/all-mode.yaml +44 -0
package/specs/behavior/auto-fallback.yaml +49 -0
package/specs/behavior/bare-name-routing.yaml +223 -0
package/specs/behavior/bare-paste-confirm.yaml +125 -0
package/specs/behavior/env-var-respected.yaml +108 -0
package/specs/behavior/error-fidelity.yaml +92 -0
package/specs/behavior/error-hints.yaml +160 -0
package/specs/behavior/fresh-vs-resume.yaml +94 -0
package/specs/behavior/fuzzy-match.yaml +208 -0
package/specs/behavior/host-self-knowledge-fresh.yaml +66 -0
package/specs/behavior/intent-no-mismatch.yaml +115 -0
package/specs/behavior/login-logout.yaml +97 -0
package/specs/behavior/no-model-allowlist.yaml +80 -0
package/specs/behavior/paste-key.yaml +342 -0
package/specs/behavior/provider-switching.yaml +186 -0
package/specs/behavior/providers-json-respected.yaml +106 -0
package/specs/behavior/self-knowledge.yaml +119 -0
package/specs/behavior/stress-session.yaml +226 -0
package/specs/behavior/switch-back-when-failing.yaml +90 -0
package/specs/behavior/work-character.yaml +109 -0
package/specs/chat-meta.yaml +349 -0
package/specs/chat-startup.yaml +148 -0
package/specs/chat.yaml +91 -0
package/specs/config.yaml +42 -0
package/specs/install.yaml +112 -0
package/specs/keys.yaml +81 -0
package/specs/one-shot.yaml +65 -0
package/specs/pipe-and-files.yaml +40 -0
package/specs/providers.yaml +172 -0
package/specs/sessions.yaml +115 -0

package/dist/providers/hostSystem.js ADDED Viewed

@@ -0,0 +1,173 @@
+/**
+ * Host system prompt builder.
+ *
+ * The host (mod8) needs to know about itself so it can answer meta questions
+ * — "what providers do I have?", "how do I add one?", "what can you do?",
+ * "what's codex?" (a name the user gave a configured provider) — directly
+ * instead of pivoting to "tell me about your project."
+ *
+ * We assemble the prompt at chat startup with live data from the providers
+ * store, so the host can name the user's actual configured providers (with
+ * their custom names and models), not just a generic list.
+ */
+import { listProviders } from '../storage/providers.js';
+import { KNOWN_PROVIDERS } from './registry.js';
+export async function readHostContext() {
+    const stored = await listProviders();
+    const configured = [];
+    for (const [id, entry] of Object.entries(stored)) {
+        configured.push({
+            id,
+            name: entry.name,
+            defaultModel: entry.defaultModel,
+            apiType: entry.apiType,
+            custom: !!entry.custom,
+        });
+    }
+    return { configured };
+}
+export function buildHostSystem(ctx) {
+    const builtInCount = KNOWN_PROVIDERS.length;
+    const configuredCount = ctx.configured.length;
+    const configuredBlock = configuredCount === 0
+        ? '  (none yet — user must run `mod8 keys set <id>` or `mod8 add-provider` first)'
+        : ctx.configured
+            .map((p) => `  - id: "${p.id}"  ·  name: "${p.name}"  ·  model: ${p.defaultModel}  ·  api: ${p.apiType}${p.custom ? '  (custom)' : ''}`)
+            .join('\n');
+    // Build a "if user says X, they likely mean configured provider Y" lookup
+    // hint so the host can recognize nicknames at a glance.
+    const nicknameHints = configuredCount === 0
+        ? ''
+        : '\nName-match hint: if the user mentions any of these terms, they are very likely referring to one of the configured providers above:\n' +
+            ctx.configured
+                .map((p) => `  - "${p.id}", "${p.name}" → provider id "${p.id}"`)
+                .join('\n');
+    const builtInList = KNOWN_PROVIDERS.map((p) => p.id).join(', ');
+    return `You are mod8, a multi-provider LLM CLI. You are the tool itself, talking to the user from inside your own chat REPL. You are NOT a generic chatbot, and you are NOT helping the user build some other software — mod8 IS the software, and you have full information about it (listed below).
+# ABSOLUTE RULE — read first
+You DO have details about your own setup. They are spelled out below. NEVER say "I don't have info about what's powering me" or "I don't have details about my setup" or anything like that — those are lies, and they will get you replaced. If a user asks ANY question about mod8, providers, operators, platforms, models, connections, configuration, or commands, you answer FROM THE FACTS BELOW — not by deflecting.
+# Mod8 vocabulary — these words always mean meta about mod8
+If the user's message contains any of these words/phrases, they are asking about MOD8 ITSELF, not about a separate project they're building:
+provider, providers, operator, operators, platform, platforms, model, models, connected, connection, configured, key, keys, BYOK, /providers, --all, compare, switch, "use <something>", "ask <something>", "talk to <something>", chat, REPL, session, sessions.
+When you see these words, the question is META. Answer from the facts below. Do NOT pivot to "tell me about your project."
+# What mod8 is
+mod8 is a command-line tool for chatting with large language models from the terminal. BYOK (bring your own key): the user's API keys live locally in ~/.config/mod8/providers.json (mode 0600). Nothing is sent anywhere except directly to the providers they've configured. There is no mod8 server, no telemetry.
+You (the planning side, "host") run on Anthropic Sonnet. The other side ("work") runs on whichever provider the user picks — defaults to Anthropic Opus, displayed as "claude".
+# Providers configured RIGHT NOW (in this session) — ${configuredCount} configured
+${configuredBlock}
+${nicknameHints}
+# Built-in provider templates the user can add a key for (${builtInCount} total)
+${builtInList}.  Plus any OpenAI-compatible API via \`mod8 add-provider\` — paste a key, mod8 detects the format, asks for missing details (id, base URL, default model), saves it.
+# Commands the user can run
+From the shell:
+- \`mod8 "..."\`             — one-shot to the configured default provider
+- \`mod8 -c "..."\`          — one-shot to Anthropic
+- \`mod8 -o "..."\`          — one-shot to OpenAI
+- \`mod8 -g "..."\`          — one-shot to Gemini
+- \`mod8 --all "..."\`       — fan out to every configured provider, side-by-side
+- \`mod8 keys set <id>\`     — save an API key for a built-in provider
+- \`mod8 keys list\`         — see which providers are configured
+- \`mod8 keys remove <id>\`  — drop a key
+- \`mod8 add-provider\`      — interactive flow to register any provider
+- \`mod8 providers\`         — detailed view of configured providers
+- \`mod8 new\`               — start a fresh chat session
+- \`mod8 list\`              — see saved sessions
+- \`mod8 resume <id>\`       — continue a session
+- \`mod8 verify\`            — run the built-in self-test suite
+In chat (right here, while talking to you):
+- "go", "let's work", "let me talk to claude" — switches to work mode (Anthropic Opus by default)
+- "use <id>", "ask <id>", "switch to <id>", "talk to <id>", "let me talk to <id>" — switches work mode to a specific configured provider (the CLI handles all these phrasings directly, you don't emit a token)
+- The CLI also accepts common nicknames as aliases: "gpt"/"chatgpt" → openai, "claude"/"sonnet"/"opus" → anthropic, "gemini"/"bard" → google, "grok" → xai, "llama" → groq.
+- "compare all: <prompt>", "ask everyone: <prompt>", "/compare <prompt>" — fan out the next turn across every configured provider, side-by-side
+- "/providers" — list configured providers
+- "/clear" — wipe the current session's history
+- "/exit" — quit
+- "/mod8" or "@mod8" (from inside work mode) — return to host
+- esc — interrupt streaming mid-response
+# Adding / changing / updating API keys — INLINE, never via the CLI
+mod8 has an inline paste-key flow.  When the user says ANY of these (in any phrasing):
+  - "add a key" / "paste a key" / "save my key" / "register a key"
+  - "change the google key" / "update my anthropic key" / "replace the openai key"
+  - "rotate google key" / "swap the gemini key" / "renew my key"
+  - "let me add gemini" / "i need to update my key" / "lets change the key"
+…the CLI's deterministic intent matcher catches it BEFORE you see it and arms a consent flow that asks the user to paste their key right here in chat.  The CLI then masks the key in the transcript, saves it locally to ~/.config/mod8/providers.json, and confirms.
+If for some reason you DO see one of these messages (the matcher missed a rare phrasing), respond with EXACTLY:
+  "Sure — paste your new key in your next message. I'll mask it in chat and save it locally."
+Then STOP.  Do NOT emit any handoff token.  Do NOT mention "mod8 keys set <id>".  Do NOT tell the user to "run this in your shell".  Do NOT show a code block with a CLI command.
+The CLI command "mod8 keys set <id>" exists, but it is for users who are NOT currently in chat.  Inside chat, the inline paste flow is always the right answer.  Telling someone in chat "run this in your shell" is wrong twice: it makes them leave, and it ignores the inline path that already works.
+# How to behave (READ CAREFULLY)
+Before each response, ask yourself: does the user's message use any mod8 vocabulary (see list above), or could it be interpreted as a question about mod8? If yes — even partially yes — this is a META question. Answer from the facts above.
+Examples of META questions you must answer directly (DON'T pivot):
+- "what is mod8?" / "what can you do?" / "what's this?"
+- "how many operators / providers / platforms / models are you connected to?"
+- "what providers do I have?" / "what platforms are configured?"
+- "how do I add a new provider?" / "how do I switch?" / "how do I compare?"
+- "what commands are there?" / "what's /providers?"
+- A bare provider id or name from the configured list ("codex", "anthropic", "groq", etc.) — they're talking about THAT provider. Confirm what you know, ask if they want to use it.
+- Any question that uses "you" / "your" referring to mod8 ("how many operators do you connect to?", "what's powering you?", "which models do you have?").
+When the user wants to plan a real task or build something OUTSIDE of mod8 (their own software project — a web app, a script, a feature), THAT is when planning behavior kicks in: ask 1-2 clarifying questions, suggest approaches, then hand off to work mode when they're ready.
+DEFAULT BIAS: when a question is ambiguous, default to META (treat it as about mod8), NOT to "their project." A meta-answer is always recoverable; pivoting to "tell me about your project" is the bug we are explicitly trying to prevent.
+If you genuinely cannot tell, ASK ONCE to clarify (e.g., "are you asking about mod8 itself, or about a project you're working on?"). Do NOT assume "their project" silently.
+Keep responses to 1-3 sentences — direct, friendly, not chatty. For meta answers, short bullet lists are fine.
+# How to hand off to work mode
+When the user clearly wants real work done — coding, writing, generating — respond with a one-sentence acknowledgement, then end your message with the literal token <SWITCH_TO_WORK>. Don't explain the token. Just append it on a new line at the end. The CLI strips it from the visible reply and switches modes for the user's next turn.
+When to hand off (any of these, or anything equivalent — be generous):
+- explicit triggers: "go", "let's go", "let's work", "let's build", "switch"
+- asking for the worker: "let me talk to claude", "I want claude", "give me claude", "claude please"
+- ready to act: "I'm ready", "go ahead", "do it", "build it", "code it", "write it", "let's start"
+If the user names a specific provider ("use deepseek", "ask grok"), the CLI handles the switch directly — DON'T emit the token; just answer normally or briefly confirm.
+If the user is asking a meta question, exploring, clarifying, asking how-to — DON'T emit the token. Stay engaged.
+Never refuse a hand-off.
+Don't reveal which underlying model powers you. You are mod8.
+# CRITICAL — never lie about which provider is being switched to
+The <SWITCH_TO_WORK> token ALWAYS lands on claude (Anthropic Opus, the default work model). It cannot route to any other provider. The CLI's intent router (a separate, deterministic component) handles routing to specific providers — it runs BEFORE you do, so by the time YOU see the user's message, any "use codex" / "talk with grok" intent has either already been routed or wasn't recognized.
+Therefore, when you emit <SWITCH_TO_WORK>:
+- It is OK to say "switching to claude", "let me hand you off to claude", "going to work mode".
+- It is NEVER OK to say "switching to codex", "switching to gpt", "switching to grok", "switching to <anything except claude>". That would be a lie — the token only lands on claude.
+If the user asked for a specific provider but the CLI didn't route them (e.g. they typed something the intent matcher missed), do NOT emit <SWITCH_TO_WORK> and pretend you switched to that provider. Instead, tell the user the exact phrasing that works:
+  Wrong:  "Switching you to codex now! <SWITCH_TO_WORK>"
+  Right:  "I can't route to codex from this message — type 'use codex' or 'talk to codex' and I'll switch you, or I can hand you off to claude with 'go'."
+The user-facing banner is generated by the CLI based on the actual routing — your spoken text MUST agree with what actually happens, or the user will see two different things and lose trust.`;
+}

package/dist/providers/index.js ADDED Viewed

@@ -0,0 +1,38 @@
+/**
+ * Public entry point for getting a provider client by id.
+ *
+ * Routing rules:
+ *   1. MOD8_MOCK=1  → mock (test path; handled inside buildProviderClient)
+ *   2. auth.json    → proxy client for {anthropic, openai, google, deepseek};
+ *                      custom OpenAI-compat ids fall through to (3)
+ *   3. otherwise    → local BYOK from providers.json (current behavior)
+ *
+ * Used by one-shot (`mod8 -c/-o/-g/-d`), `--all`, and config-set default
+ * routing.  The chat REPL uses streamProviderChat from genericChat.ts
+ * directly — that module mirrors the same routing.
+ */
+import { buildProviderClient } from './generic.js';
+import { readAuth, effectiveProxyUrl } from '../storage/auth.js';
+import { makeProxyClient, toProxyProviderId } from './proxy.js';
+export async function getProviderClient(id) {
+    if (process.env.MOD8_MOCK === '1')
+        return buildProviderClient(id);
+    const auth = await readAuth();
+    if (auth) {
+        const proxyId = toProxyProviderId(id);
+        if (proxyId) {
+            return makeProxyClient({
+                proxyUrl: effectiveProxyUrl(auth),
+                mod8Key: auth.mod8Key,
+                providerId: proxyId,
+            });
+        }
+        // Custom providers (mistral / groq / openrouter / xai / custom): the
+        // proxy doesn't carry them yet.  Fall back to local providers.json so
+        // the user isn't blocked.
+    }
+    return buildProviderClient(id);
+}
+export async function authedSession() {
+    return readAuth();
+}

package/dist/providers/mock.js ADDED Viewed

@@ -0,0 +1,87 @@
+import { priceFor } from './pricing.js';
+const RESPONSE_FALLBACK = 'Mock five-word reply here now.';
+const KNOWN_RESPONSES = {
+    anthropic: 'Hello! Five words exactly here.',
+    openai: 'Five quick words from GPT.',
+    google: 'Hi from Gemini, five words.',
+    deepseek: 'Five quick words from DeepSeek.',
+    mistral: 'Mistral five-word mock reply here.',
+    groq: 'Groq five-word mock reply here.',
+    xai: 'Grok five-word mock reply here.',
+    openrouter: 'OpenRouter five-word mock reply here.',
+    together: 'Together five-word mock reply here.',
+};
+const KNOWN_MODELS = {
+    anthropic: 'claude-sonnet-4-6',
+    openai: 'gpt-4o',
+    google: 'gemini-2.0-flash',
+    deepseek: 'deepseek-chat',
+    mistral: 'mistral-large-latest',
+    groq: 'llama-3.3-70b-versatile',
+    xai: 'grok-2-latest',
+    openrouter: 'openai/gpt-4o-mini',
+    together: 'meta-llama/Llama-3.3-70B-Instruct-Turbo',
+};
+const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
+export function mockProvider(id) {
+    const model = KNOWN_MODELS[id] ?? `${id}-mock-1`;
+    const responseText = KNOWN_RESPONSES[id] ?? RESPONSE_FALLBACK;
+    const buildUsage = (latencyMs, inputTokens, outputTokens) => ({
+        inputTokens,
+        outputTokens,
+        latencyMs,
+        model,
+        costUsd: priceFor(model, inputTokens, outputTokens),
+    });
+    const checkFail = () => {
+        if (process.env.MOD8_MOCK_FAIL === id) {
+            throw new Error(`Mock failure: ${id} provider intentionally failed`);
+        }
+        const errType = process.env.MOD8_MOCK_ERROR;
+        if (errType && (process.env.MOD8_MOCK_ERROR_PROVIDER ?? id) === id) {
+            switch (errType) {
+                case '401':
+                    throw new Error('401 Unauthorized: invalid_api_key');
+                case '429':
+                    throw new Error('429 Too Many Requests: rate_limit_exceeded');
+                case 'network':
+                    throw new Error('fetch failed: ENOTFOUND api.example.com');
+                case 'quota':
+                    throw new Error('insufficient credits on your account');
+                case 'timeout':
+                    throw new Error('Request timed out after 60s');
+                case 'model':
+                    throw new Error('model `nope-1` does not exist');
+            }
+        }
+    };
+    return {
+        id,
+        defaultModel: model,
+        async call(prompt) {
+            const delay = 200 + Math.random() * 400;
+            await sleep(delay);
+            checkFail();
+            const inputTokens = Math.max(1, Math.floor(prompt.length / 4));
+            const outputTokens = 8;
+            const text = process.env.MOD8_MOCK_ECHO === '1' ? `[${id}] received:\n${prompt}` : responseText;
+            return {
+                text,
+                ...buildUsage(Math.round(delay), inputTokens, outputTokens),
+            };
+        },
+        async *stream(prompt) {
+            const start = Date.now();
+            await sleep(120 + Math.random() * 180);
+            checkFail();
+            for (let i = 0; i < responseText.length; i++) {
+                yield { type: 'text', delta: responseText[i] };
+                await sleep(8 + Math.random() * 12);
+            }
+            const latencyMs = Date.now() - start;
+            const inputTokens = Math.max(1, Math.floor(prompt.length / 4));
+            const outputTokens = 8;
+            yield { type: 'done', usage: buildUsage(latencyMs, inputTokens, outputTokens) };
+        },
+    };
+}

package/dist/providers/modelResolution.js ADDED Viewed

@@ -0,0 +1,42 @@
+/**
+ * Single source of truth for picking the model for a provider call.
+ *
+ * Resolution priority (matches the user-facing contract — env > config >
+ * template default):
+ *
+ *   1. opts.model            — explicit per-call override (rare; used by
+ *                               compare flow when targeting specific models)
+ *   2. MOD8_<ID>_MODEL env   — quick override without editing providers.json,
+ *                               case-insensitive on the env var name
+ *   3. entry.defaultModel    — the value the user wrote into providers.json
+ *   4. (none)                — providers without a default fail loudly so the
+ *                               caller can surface a useful error
+ *
+ * NEVER silently substitute a different model from any internal allowlist —
+ * if the user wrote "gemini-2.5-flash" we send "gemini-2.5-flash" to the
+ * provider, and let the provider decide whether that's valid.
+ */
+/** Build the env var name for a given provider id ("google" → "MOD8_GOOGLE_MODEL"). */
+export function envVarForProvider(providerId) {
+    const sanitized = providerId.toUpperCase().replace(/[^A-Z0-9]/g, '_');
+    return `MOD8_${sanitized}_MODEL`;
+}
+/** Read the env override for a provider id, or undefined if unset/empty. */
+export function envModelFor(providerId) {
+    const v = process.env[envVarForProvider(providerId)];
+    return v && v.length > 0 ? v : undefined;
+}
+export function resolveModel(providerId, optsModel, entryDefaultModel) {
+    const envVar = envVarForProvider(providerId);
+    if (optsModel && optsModel.length > 0) {
+        return { model: optsModel, source: 'opts', envVar };
+    }
+    const envModel = envModelFor(providerId);
+    if (envModel) {
+        return { model: envModel, source: 'env', envVar };
+    }
+    if (entryDefaultModel && entryDefaultModel.length > 0) {
+        return { model: entryDefaultModel, source: 'providers.json', envVar };
+    }
+    return { model: '', source: 'none', envVar };
+}

package/dist/providers/openai.js ADDED Viewed

@@ -0,0 +1,75 @@
+import OpenAI from 'openai';
+import { getKey } from '../storage/keys.js';
+import { priceFor } from './pricing.js';
+const DEFAULT_MODEL = 'gpt-4o';
+async function buildClient() {
+    const apiKey = process.env.OPENAI_API_KEY ?? (await getKey('openai'));
+    if (!apiKey) {
+        throw new Error('No OpenAI key configured. Run: mod8 keys set openai, or set OPENAI_API_KEY.');
+    }
+    return new OpenAI({ apiKey });
+}
+export const openaiProvider = {
+    id: 'openai',
+    defaultModel: DEFAULT_MODEL,
+    async call(prompt, opts = {}) {
+        const client = await buildClient();
+        const model = opts.model ?? process.env.MOD8_OPENAI_MODEL ?? DEFAULT_MODEL;
+        const start = Date.now();
+        const res = await client.chat.completions.create({
+            model,
+            messages: [{ role: 'user', content: prompt }],
+            max_tokens: opts.maxTokens ?? 1024,
+        });
+        const latencyMs = Date.now() - start;
+        const text = res.choices[0]?.message?.content ?? '';
+        const inputTokens = res.usage?.prompt_tokens ?? 0;
+        const outputTokens = res.usage?.completion_tokens ?? 0;
+        const actualModel = res.model ?? model;
+        return {
+            text,
+            inputTokens,
+            outputTokens,
+            costUsd: priceFor(actualModel, inputTokens, outputTokens),
+            latencyMs,
+            model: actualModel,
+        };
+    },
+    async *stream(prompt, opts = {}) {
+        const client = await buildClient();
+        const model = opts.model ?? process.env.MOD8_OPENAI_MODEL ?? DEFAULT_MODEL;
+        const start = Date.now();
+        const stream = await client.chat.completions.create({
+            model,
+            messages: [{ role: 'user', content: prompt }],
+            max_tokens: opts.maxTokens ?? 1024,
+            stream: true,
+            stream_options: { include_usage: true },
+        });
+        let inputTokens = 0;
+        let outputTokens = 0;
+        let actualModel = model;
+        for await (const chunk of stream) {
+            const delta = chunk.choices[0]?.delta?.content;
+            if (delta)
+                yield { type: 'text', delta };
+            if (chunk.usage) {
+                inputTokens = chunk.usage.prompt_tokens ?? 0;
+                outputTokens = chunk.usage.completion_tokens ?? 0;
+            }
+            if (chunk.model)
+                actualModel = chunk.model;
+        }
+        const latencyMs = Date.now() - start;
+        yield {
+            type: 'done',
+            usage: {
+                inputTokens,
+                outputTokens,
+                latencyMs,
+                model: actualModel,
+                costUsd: priceFor(actualModel, inputTokens, outputTokens),
+            },
+        };
+    },
+};

package/dist/providers/pricing.js ADDED Viewed

@@ -0,0 +1,47 @@
+const PRICING = {
+    // Anthropic (Claude 4.x family — list prices)
+    'claude-opus-4-7': { inputPerMtok: 15, outputPerMtok: 75 },
+    'claude-opus-4': { inputPerMtok: 15, outputPerMtok: 75 },
+    'claude-sonnet-4-6': { inputPerMtok: 3, outputPerMtok: 15 },
+    'claude-sonnet-4-5': { inputPerMtok: 3, outputPerMtok: 15 },
+    'claude-sonnet-4': { inputPerMtok: 3, outputPerMtok: 15 },
+    'claude-haiku-4-5': { inputPerMtok: 1, outputPerMtok: 5 },
+    'claude-haiku-4': { inputPerMtok: 1, outputPerMtok: 5 },
+    // OpenAI
+    'gpt-4o': { inputPerMtok: 2.5, outputPerMtok: 10 },
+    'gpt-4o-mini': { inputPerMtok: 0.15, outputPerMtok: 0.6 },
+    'gpt-4.1': { inputPerMtok: 2, outputPerMtok: 8 },
+    'gpt-4.1-mini': { inputPerMtok: 0.4, outputPerMtok: 1.6 },
+    // Google
+    'gemini-2.0-flash': { inputPerMtok: 0.075, outputPerMtok: 0.3 },
+    'gemini-2.5-flash': { inputPerMtok: 0.075, outputPerMtok: 0.3 },
+    'gemini-2.5-pro': { inputPerMtok: 1.25, outputPerMtok: 5 },
+};
+export function priceFor(model, inputTokens, outputTokens) {
+    let p = PRICING[model];
+    if (!p) {
+        // Longest matching prefix
+        let bestKey;
+        for (const key of Object.keys(PRICING)) {
+            if (model.startsWith(key) && (!bestKey || key.length > bestKey.length)) {
+                bestKey = key;
+            }
+        }
+        if (bestKey)
+            p = PRICING[bestKey];
+    }
+    if (!p)
+        return 0;
+    return (inputTokens / 1_000_000) * p.inputPerMtok + (outputTokens / 1_000_000) * p.outputPerMtok;
+}
+export function formatCost(usd) {
+    if (usd === 0)
+        return '$0';
+    if (usd < 0.001)
+        return '<$0.001';
+    if (usd < 0.01)
+        return `$${usd.toFixed(4)}`;
+    if (usd < 1)
+        return `$${usd.toFixed(3)}`;
+    return `$${usd.toFixed(2)}`;
+}

package/dist/providers/proxy.js ADDED Viewed

@@ -0,0 +1,148 @@
+/**
+ * ProxyClient — talks to the mod8 hosted proxy (mod8-proxy on Cloud Run)
+ * over SSE.  Same ProviderClient surface as local clients so the rest of
+ * the CLI doesn't have to branch.
+ *
+ * Wire format (matches proxy/src/server.ts):
+ *   POST /v1/chat
+ *     Authorization: Bearer <sk-mod8-...>
+ *     { provider, model, messages: [{role,content}], maxTokens?, system? }
+ *
+ *   SSE events:
+ *     data: { "type": "text",  "delta": "..." }
+ *     data: { "type": "done",  "tokensIn": N, "tokensOut": M,
+ *             "rawCostMicros": X, "chargedMicros": Y,
+ *             "balanceAfterMicros": Z, "chargeApplied": true }
+ *     data: { "type": "error", "error": "..." }
+ *
+ * Charged amount uses chargedMicros (raw + 15% markup), not raw — the
+ * user's bill, not the provider's bill.
+ */
+/** CLI provider id → proxy provider id.  Custom OpenAI-compat providers
+ * (mistral/groq/openrouter/xai/together/custom) don't run through the
+ * proxy — they fall back to local providers.json. */
+export function toProxyProviderId(id) {
+    if (id === 'anthropic' || id === 'openai' || id === 'google' || id === 'deepseek') {
+        return id;
+    }
+    return null;
+}
+const DEFAULT_MODEL = {
+    anthropic: 'claude-sonnet-4-6',
+    openai: 'gpt-4o',
+    google: 'gemini-2.5-flash',
+    deepseek: 'deepseek-chat',
+};
+export function makeProxyClient(opts) {
+    const id = opts.providerId;
+    const fallbackModel = opts.defaultModel ?? DEFAULT_MODEL[id];
+    async function* runStream(prompt, callOpts) {
+        const model = callOpts.model ?? fallbackModel;
+        const start = Date.now();
+        const resp = await fetch(`${opts.proxyUrl}/v1/chat`, {
+            method: 'POST',
+            headers: {
+                Authorization: `Bearer ${opts.mod8Key}`,
+                'Content-Type': 'application/json',
+            },
+            body: JSON.stringify({
+                provider: id,
+                model,
+                messages: [{ role: 'user', content: prompt }],
+                ...(callOpts.maxTokens !== undefined ? { maxTokens: callOpts.maxTokens } : {}),
+            }),
+        });
+        if (!resp.ok) {
+            const detail = await resp.text().catch(() => '');
+            throw new Error(`mod8 proxy: ${resp.status} ${resp.statusText}${detail ? ` — ${trim(detail)}` : ''}`);
+        }
+        if (!resp.body) {
+            throw new Error('mod8 proxy: empty response body');
+        }
+        const reader = resp.body.getReader();
+        const decoder = new TextDecoder();
+        let buf = '';
+        let inputTokens = 0;
+        let outputTokens = 0;
+        let chargedMicros = 0;
+        let actualModel = model;
+        let sawDone = false;
+        while (true) {
+            const { done, value } = await reader.read();
+            if (done)
+                break;
+            buf += decoder.decode(value, { stream: true });
+            let idx;
+            while ((idx = buf.indexOf('\n\n')) >= 0) {
+                const chunk = buf.slice(0, idx);
+                buf = buf.slice(idx + 2);
+                for (const line of chunk.split('\n')) {
+                    if (!line.startsWith('data: '))
+                        continue;
+                    let ev;
+                    try {
+                        ev = JSON.parse(line.slice(6));
+                    }
+                    catch {
+                        continue;
+                    }
+                    if (ev.type === 'text') {
+                        yield { type: 'text', delta: ev.delta };
+                    }
+                    else if (ev.type === 'done') {
+                        inputTokens = ev.tokensIn;
+                        outputTokens = ev.tokensOut;
+                        chargedMicros = ev.chargedMicros;
+                        sawDone = true;
+                    }
+                    else if (ev.type === 'error') {
+                        throw new Error(`mod8 proxy: ${ev.error}`);
+                    }
+                }
+            }
+        }
+        if (!sawDone) {
+            throw new Error('mod8 proxy: stream ended without a done event');
+        }
+        yield {
+            type: 'done',
+            usage: {
+                inputTokens,
+                outputTokens,
+                latencyMs: Date.now() - start,
+                model: actualModel,
+                costUsd: chargedMicros / 1_000_000,
+            },
+        };
+    }
+    return {
+        id,
+        defaultModel: fallbackModel,
+        async call(prompt, callOpts = {}) {
+            let text = '';
+            let inputTokens = 0;
+            let outputTokens = 0;
+            let costUsd = 0;
+            let model = fallbackModel;
+            let latencyMs = 0;
+            for await (const ev of runStream(prompt, callOpts)) {
+                if (ev.type === 'text')
+                    text += ev.delta;
+                else if (ev.type === 'done') {
+                    inputTokens = ev.usage.inputTokens;
+                    outputTokens = ev.usage.outputTokens;
+                    costUsd = ev.usage.costUsd;
+                    model = ev.usage.model;
+                    latencyMs = ev.usage.latencyMs;
+                }
+            }
+            return { text, inputTokens, outputTokens, costUsd, latencyMs, model };
+        },
+        async *stream(prompt, callOpts = {}) {
+            yield* runStream(prompt, callOpts);
+        },
+    };
+}
+function trim(s) {
+    return s.length > 200 ? s.slice(0, 200) + '…' : s;
+}