npm - @qatonic_innovations/qaios - Versions diffs - 0.1.2 → 0.2.0 - Mend

@qatonic_innovations/qaios 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -159,8 +159,7 @@ mode: LITE # LITE | FULL | TRUST
 app:
   baseUrl: https://staging.myapp.com
 llm:
-  provider: anthropic
-  apiKeyEnv: ANTHROPIC_API_KEY # key is read from env, never stored
+  provider: anthropic # anthropic | openai
   maxLlmCallsPerWorkflow: 15
   costAlertThresholdUsdCents: 50
 testing:
@@ -176,6 +175,35 @@ defects:
 `qaios config show` prints the resolved config; `qaios config set <key> <value>` validates against the schema before writing. **API keys are never written to config** — they come from environment variables.
+### Choose your LLM provider
+QAIOS works with **Anthropic** (default) or **OpenAI**. Set the provider in
+`.qaios/config.yaml` and export that provider's key:
+```yaml
+# Anthropic (default)
+llm:
+  provider: anthropic # reads ANTHROPIC_API_KEY
+```
+```yaml
+# OpenAI
+llm:
+  provider: openai # reads OPENAI_API_KEY
+  model: gpt-4o # optional; default gpt-4o (try gpt-4o-mini for lower cost)
+```
+```bash
+export OPENAI_API_KEY=sk-...
+qaios doctor   # confirms the configured provider's key is reachable
+```
+The rest of QAIOS is provider-blind — every command (`test`, `run`, `fix`,
+`explore`, `a11y`, …) works identically on either. Structured output uses each
+provider's native guaranteed-schema mode (Anthropic forced tool-use / OpenAI
+strict function calling), so generated artifacts stay schema-valid. You can
+override the key's env-var name with `llm.apiKeyEnv` if needed.
 ### Operating modes
 - **LITE** (default) — HIGH/CRITICAL risk pauses for review; routine work flows through.
@@ -190,8 +218,8 @@ qaios config set mode TRUST
 ## Cost & privacy
-- All v0.1 skills use Claude Sonnet. A typical `qaios test` costs ~**$0.04–0.10**. Each workflow is capped (default `min(15 calls, $0.50)`, configurable) and aborts if exceeded.
-- **Local-first.** No telemetry, no phone-home. The only outbound traffic is (a) LLM calls to Anthropic with the prompts QAIOS builds, and (b) any MCP servers you configure. You bring your own API key; QAIOS does not proxy your requests. See [SECURITY.md](https://github.com/qatonic/qaios/blob/main/SECURITY.md) for exactly what's sent.
+- Skills run on Claude Sonnet (Anthropic) or gpt-4o (OpenAI) per your `llm.provider`. A typical `qaios test` costs ~**$0.04–0.10**. Each workflow is capped (default `min(15 calls, $0.50)`, configurable) and aborts if exceeded.
+- **Local-first.** No telemetry, no phone-home. The only outbound traffic is (a) LLM calls to your configured provider (Anthropic or OpenAI) with the prompts QAIOS builds, and (b) any MCP servers you configure. You bring your own API key; QAIOS does not proxy your requests. See [SECURITY.md](https://github.com/qatonic/qaios/blob/main/SECURITY.md) for exactly what's sent.
 ---

package/dist/index.js CHANGED Viewed

@@ -11,6 +11,7 @@ import { createHash } from 'crypto';
 import { z, ZodError } from 'zod';
 import { monotonicFactory } from 'ulid';
 import Anthropic from '@anthropic-ai/sdk';
+import OpenAI from 'openai';
 import { zodToJsonSchema } from 'zod-to-json-schema';
 import { spawn, spawnSync } from 'child_process';
 import { tmpdir } from 'os';
@@ -843,14 +844,23 @@ var QaiosConfig = z.object({
   version: z.literal(1),
   mode: Mode.default("LITE"),
   llm: z.object({
-    provider: z.literal("anthropic").default("anthropic"),
-    apiKeyEnv: z.string().default("ANTHROPIC_API_KEY"),
-    // v0.1: every tier maps to claude-sonnet-4-6 (the current Sonnet
-    // when v0.1 went GA — see PRICING table in runtime/llm/cost.ts).
-    // There is no real tier routing yet. The skill-declared tier
-    // (1/2/3) is recorded in the audit log so v0.5 can light up real
-    // per-tier routing without re-tagging skills. Do not branch on
-    // tier in skill code; treat the resolved model as opaque.
+    // Which LLM provider backs every skill. Default stays anthropic so
+    // existing projects are unchanged. Set `openai` to use OpenAI instead;
+    // the provider's own key env var (ANTHROPIC_API_KEY / OPENAI_API_KEY)
+    // is read automatically — see runtime/config/env.ts + llm/factory.ts.
+    provider: z.enum(["anthropic", "openai"]).default("anthropic"),
+    // Optional override of the env var that holds the API key. When unset,
+    // the factory picks the provider's conventional var.
+    apiKeyEnv: z.string().optional(),
+    // Optional explicit model id. When set, it overrides the per-tier
+    // models below for ALL tiers (single-model v0.1 behavior). When unset,
+    // the factory resolves the provider's default (Sonnet for anthropic,
+    // gpt-4o for openai).
+    model: z.string().optional(),
+    // v0.1: every tier maps to one model (claude-sonnet-4-6 for anthropic).
+    // There is no real tier routing yet. The skill-declared tier (1/2/3) is
+    // recorded in the audit log so v0.5 can light up real per-tier routing
+    // without re-tagging skills. Do not branch on tier in skill code.
     models: z.object({
       tier1: z.string().default("claude-sonnet-4-6"),
       // v0.5 → opus
@@ -2033,18 +2043,29 @@ function computeEntryHash(entry) {
   const { hash: _ignored, ...rest } = entry;
   return sha256Hex2(canonicalize(rest));
 }
-function readAnthropicApiKey() {
-  const v = process.env["ANTHROPIC_API_KEY"];
+function readKey(name) {
+  const v = process.env[name];
   if (typeof v !== "string") return void 0;
   return v.trim().length > 0 ? v : void 0;
 }
+function readAnthropicApiKey() {
+  return readKey("ANTHROPIC_API_KEY");
+}
+function readOpenAiApiKey() {
+  return readKey("OPENAI_API_KEY");
+}
+function readProviderApiKey(provider, apiKeyEnvOverride) {
+  if (apiKeyEnvOverride !== void 0 && apiKeyEnvOverride.trim().length > 0) {
+    return readKey(apiKeyEnvOverride);
+  }
+  return provider === "openai" ? readOpenAiApiKey() : readAnthropicApiKey();
+}
 function snapshotEnv() {
-  const k = readAnthropicApiKey();
+  const a = readAnthropicApiKey();
+  const o = readOpenAiApiKey();
   return {
-    anthropicApiKey: {
-      present: k !== void 0,
-      length: k?.length ?? 0
-    }
+    anthropicApiKey: { present: a !== void 0, length: a?.length ?? 0 },
+    openaiApiKey: { present: o !== void 0, length: o?.length ?? 0 }
   };
 }
 var PRICING = {
@@ -2079,16 +2100,52 @@ var PRICING = {
     outputPerMTok: 4,
     cacheReadPerMTok: 0.08,
     cacheWritePerMTok: 1
+  },
+  // ── OpenAI (provider: openai) ──────────────────────────────────────────
+  // USD per 1M tokens. OpenAI bills cached input at ~0.5× the input rate;
+  // there is no separate cache-write charge, so cacheWritePerMTok is 0.
+  "gpt-4o": {
+    inputPerMTok: 2.5,
+    outputPerMTok: 10,
+    cacheReadPerMTok: 1.25,
+    cacheWritePerMTok: 0
+  },
+  "gpt-4o-mini": {
+    inputPerMTok: 0.15,
+    outputPerMTok: 0.6,
+    cacheReadPerMTok: 0.075,
+    cacheWritePerMTok: 0
+  },
+  "gpt-4.1": {
+    inputPerMTok: 2,
+    outputPerMTok: 8,
+    cacheReadPerMTok: 0.5,
+    cacheWritePerMTok: 0
+  },
+  "gpt-4.1-mini": {
+    inputPerMTok: 0.4,
+    outputPerMTok: 1.6,
+    cacheReadPerMTok: 0.1,
+    cacheWritePerMTok: 0
   }
 };
 var DEFAULT_PRICING = PRICING["claude-sonnet-4-6"];
+var _unknownModelWarned = /* @__PURE__ */ new Set();
 function computeCostUsdCents(model, usage) {
-  const p = PRICING[model] ?? DEFAULT_PRICING;
+  const p = PRICING[model];
+  if (p === void 0 && !_unknownModelWarned.has(model)) {
+    _unknownModelWarned.add(model);
+    process.stderr.write(
+      `(qaios) warning: no pricing for model "${model}" \u2014 billing at the default rate. Costs in the audit log are approximate for this model.
+`
+    );
+  }
+  const pricing = p ?? DEFAULT_PRICING;
   const tok = (n) => typeof n === "number" && Number.isFinite(n) && n > 0 ? n : 0;
-  const inputUsd = tok(usage.inputTokens) / 1e6 * p.inputPerMTok;
-  const outputUsd = tok(usage.outputTokens) / 1e6 * p.outputPerMTok;
-  const cacheReadUsd = tok(usage.cacheReadTokens) / 1e6 * p.cacheReadPerMTok;
-  const cacheWriteUsd = tok(usage.cacheWriteTokens) / 1e6 * p.cacheWritePerMTok;
+  const inputUsd = tok(usage.inputTokens) / 1e6 * pricing.inputPerMTok;
+  const outputUsd = tok(usage.outputTokens) / 1e6 * pricing.outputPerMTok;
+  const cacheReadUsd = tok(usage.cacheReadTokens) / 1e6 * pricing.cacheReadPerMTok;
+  const cacheWriteUsd = tok(usage.cacheWriteTokens) / 1e6 * pricing.cacheWritePerMTok;
   const totalCents = (inputUsd + outputUsd + cacheReadUsd + cacheWriteUsd) * 100;
   return Math.ceil(totalCents);
 }
@@ -2177,6 +2234,260 @@ function mapResponse(response, latencyMs) {
     stopReason: response.stop_reason
   };
 }
+var UNSUPPORTED_KEYWORDS = /* @__PURE__ */ new Set([
+  "minLength",
+  "maxLength",
+  "pattern",
+  "format",
+  "minimum",
+  "maximum",
+  "exclusiveMinimum",
+  "exclusiveMaximum",
+  "multipleOf",
+  "minItems",
+  "maxItems",
+  "uniqueItems",
+  "minProperties",
+  "maxProperties",
+  "default",
+  "$schema",
+  "patternProperties"
+]);
+function isPlainObject(v) {
+  return typeof v === "object" && v !== null && !Array.isArray(v);
+}
+function makeNullable(node) {
+  const t = node["type"];
+  if (typeof t === "string") {
+    if (t === "null") return node;
+    return { ...node, type: [t, "null"] };
+  }
+  if (Array.isArray(t)) {
+    return t.includes("null") ? node : { ...node, type: [...t, "null"] };
+  }
+  return { anyOf: [node, { type: "null" }] };
+}
+function resolveRef(ref, root) {
+  if (!ref.startsWith("#/")) return void 0;
+  const parts = ref.slice(2).split("/").map((p) => p.replace(/~1/g, "/").replace(/~0/g, "~"));
+  let cur = root;
+  for (const part of parts) {
+    if (!isPlainObject(cur) && !Array.isArray(cur)) return void 0;
+    cur = cur[part];
+  }
+  return isPlainObject(cur) ? cur : void 0;
+}
+function inlineRefs(node, root, seen) {
+  if (Array.isArray(node)) return node.map((n) => inlineRefs(n, root, seen));
+  if (!isPlainObject(node)) return node;
+  const ref = node["$ref"];
+  if (typeof ref === "string") {
+    if (seen.has(ref)) return { ...node };
+    const target = resolveRef(ref, root);
+    if (target !== void 0) {
+      const next = new Set(seen).add(ref);
+      const { $ref: _drop, ...siblings } = node;
+      const inlined = inlineRefs(target, root, next);
+      return { ...inlined, ...siblings };
+    }
+  }
+  const out = {};
+  for (const [k, v] of Object.entries(node)) out[k] = inlineRefs(v, root, seen);
+  return out;
+}
+function openaiStrictify(schema) {
+  if (!isPlainObject(schema)) return schema;
+  return strictifyNode(inlineRefs(schema, schema, /* @__PURE__ */ new Set()));
+}
+function strictifyNode(schema) {
+  if (!isPlainObject(schema)) return schema;
+  const out = {};
+  for (const [key, value] of Object.entries(schema)) {
+    if (UNSUPPORTED_KEYWORDS.has(key)) continue;
+    if (key === "properties" && isPlainObject(value)) {
+      const props = {};
+      for (const [propName, propSchema] of Object.entries(value)) {
+        props[propName] = strictifyNode(propSchema);
+      }
+      out["properties"] = props;
+      continue;
+    }
+    if (key === "items") {
+      out["items"] = Array.isArray(value) ? value.map((v) => strictifyNode(v)) : strictifyNode(value);
+      continue;
+    }
+    if ((key === "anyOf" || key === "oneOf" || key === "allOf") && Array.isArray(value)) {
+      out[key] = value.map((v) => strictifyNode(v));
+      continue;
+    }
+    out[key] = value;
+  }
+  if (out["type"] === "object" && isPlainObject(out["properties"])) {
+    const props = out["properties"];
+    const originalRequired = new Set(
+      Array.isArray(schema["required"]) ? schema["required"] : []
+    );
+    const allKeys = Object.keys(props);
+    for (const k of allKeys) {
+      if (!originalRequired.has(k)) {
+        props[k] = makeNullable(props[k]);
+      }
+    }
+    out["required"] = allKeys;
+    out["additionalProperties"] = false;
+  }
+  return out;
+}
+var DEFAULT_OPENAI_MODEL = "gpt-4o";
+var DEFAULT_MAX_TOKENS2 = 4096;
+var OpenAiClient = class {
+  client;
+  explicitApiKey;
+  defaultModel;
+  defaultMaxTokens;
+  constructor(opts = {}) {
+    this.client = opts.client ?? null;
+    this.explicitApiKey = opts.apiKey;
+    this.defaultModel = opts.defaultModel ?? DEFAULT_OPENAI_MODEL;
+    this.defaultMaxTokens = opts.defaultMaxTokens ?? DEFAULT_MAX_TOKENS2;
+  }
+  resolveClient() {
+    if (this.client !== null) return this.client;
+    const apiKey = this.explicitApiKey ?? readOpenAiApiKey();
+    if (apiKey === void 0 || apiKey.trim().length === 0) {
+      throw new LlmError({
+        code: "qaios.llm.api_key_missing",
+        message: 'OPENAI_API_KEY is not set (llm.provider is "openai").\nGet one at https://platform.openai.com/api-keys, then:\n  export OPENAI_API_KEY=sk-\u2026\n`qaios doctor` will confirm the key is reachable.'
+      });
+    }
+    const sdk = new OpenAI({ apiKey });
+    this.client = sdk;
+    return this.client;
+  }
+  async call(opts) {
+    const client = this.resolveClient();
+    const model = opts.model ?? this.defaultModel;
+    const params = {
+      model,
+      max_tokens: opts.maxTokens ?? this.defaultMaxTokens,
+      // OpenAI carries the system prompt as a leading system-role message.
+      messages: [
+        { role: "system", content: opts.systemPrompt },
+        { role: "user", content: opts.userPrompt }
+      ]
+    };
+    if (opts.temperature !== void 0) params["temperature"] = opts.temperature;
+    if (opts.tools && opts.tools.length > 0) {
+      params["tools"] = opts.tools.map((t) => ({
+        type: "function",
+        function: {
+          name: t.name,
+          description: t.description,
+          parameters: openaiStrictify(t.input_schema),
+          strict: true
+        }
+      }));
+    }
+    if (opts.toolChoice !== void 0) {
+      params["tool_choice"] = mapToolChoice(opts.toolChoice);
+    }
+    const reqOpts = {};
+    if (opts.signal !== void 0) reqOpts.signal = opts.signal;
+    const start = Date.now();
+    const response = await client.chat.completions.create(params, reqOpts);
+    const latencyMs = Date.now() - start;
+    return mapResponse2(response, latencyMs);
+  }
+};
+function mapToolChoice(choice) {
+  switch (choice.type) {
+    case "auto":
+      return "auto";
+    case "any":
+      return "required";
+    case "tool":
+      return { type: "function", function: { name: choice.name } };
+  }
+}
+function mapFinishReason(reason) {
+  switch (reason) {
+    case "stop":
+      return "end_turn";
+    case "tool_calls":
+    case "function_call":
+      return "tool_use";
+    case "length":
+      return "max_tokens";
+    default:
+      return reason;
+  }
+}
+function mapResponse2(response, latencyMs) {
+  const choice = response.choices[0];
+  const message = choice?.message;
+  const output = message?.content ?? "";
+  const toolCalls = (message?.tool_calls ?? []).map((tc) => ({
+    id: tc.id,
+    name: tc.function.name,
+    // OpenAI returns function arguments as a JSON STRING — parse to match the
+    // parsed-object shape Anthropic's tool_use blocks give us. A malformed
+    // payload surfaces as an LlmError rather than a silent {}.
+    input: parseArguments(tc.function.arguments, tc.function.name)
+  }));
+  const promptTokens = response.usage?.prompt_tokens ?? 0;
+  const cachedTokens = response.usage?.prompt_tokens_details?.cached_tokens ?? 0;
+  const usage = {
+    // OpenAI's prompt_tokens INCLUDES cached tokens; bill the uncached portion
+    // at the input rate and the cached portion at the cache-read rate.
+    inputTokens: Math.max(0, promptTokens - cachedTokens),
+    outputTokens: response.usage?.completion_tokens ?? 0
+  };
+  if (cachedTokens > 0) usage.cacheReadTokens = cachedTokens;
+  return {
+    output,
+    toolCalls,
+    usage,
+    model: response.model,
+    latencyMs,
+    costUsdCents: computeCostUsdCents(response.model, usage),
+    stopReason: mapFinishReason(choice?.finish_reason ?? null)
+  };
+}
+function parseArguments(raw, toolName) {
+  let parsed;
+  try {
+    parsed = JSON.parse(raw);
+  } catch (err) {
+    throw new LlmError({
+      code: "qaios.llm.malformed_tool_arguments",
+      message: `OpenAI returned non-JSON arguments for tool "${toolName}".`,
+      cause: err
+    });
+  }
+  return stripNulls(parsed);
+}
+function stripNulls(value) {
+  if (Array.isArray(value)) return value.map(stripNulls);
+  if (value !== null && typeof value === "object") {
+    const out = {};
+    for (const [k, v] of Object.entries(value)) {
+      if (v === null) continue;
+      out[k] = stripNulls(v);
+    }
+    return out;
+  }
+  return value;
+}
+function defaultModelFor(provider) {
+  return provider === "openai" ? DEFAULT_OPENAI_MODEL : DEFAULT_LLM_MODEL;
+}
+function createLlmClient(opts = {}) {
+  if (opts.client !== void 0) return opts.client;
+  const provider = opts.provider ?? "anthropic";
+  const defaultModel = opts.model ?? defaultModelFor(provider);
+  const clientOpts = opts.apiKey !== void 0 ? { apiKey: opts.apiKey, defaultModel } : { defaultModel };
+  return provider === "openai" ? new OpenAiClient(clientOpts) : new LlmClient(clientOpts);
+}
 var SkillError = class extends Error {
   code;
   skillId;
@@ -6021,7 +6332,7 @@ function parseOpenApi(specPath) {
 function isRecord(v) {
   return typeof v === "object" && v !== null && !Array.isArray(v);
 }
-function resolveRef(refValue, doc, depth = 0) {
+function resolveRef2(refValue, doc, depth = 0) {
   if (depth > 4) return { $ref: refValue };
   if (!refValue.startsWith("#/")) return { $ref: refValue };
   const segments = refValue.slice(2).split("/").map((seg) => seg.replace(/~1/g, "/").replace(/~0/g, "~"));
@@ -6032,7 +6343,7 @@ function resolveRef(refValue, doc, depth = 0) {
     if (cursor === void 0) return { $ref: refValue };
   }
   if (isRecord(cursor) && typeof cursor["$ref"] === "string") {
-    return resolveRef(cursor["$ref"], doc, depth + 1);
+    return resolveRef2(cursor["$ref"], doc, depth + 1);
   }
   return cursor;
 }
@@ -6041,7 +6352,7 @@ function resolveRefsDeep(value, doc, depth = 0) {
   if (Array.isArray(value)) return value.map((v) => resolveRefsDeep(v, doc, depth + 1));
   if (!isRecord(value)) return value;
   if (typeof value["$ref"] === "string") {
-    const resolved = resolveRef(value["$ref"], doc);
+    const resolved = resolveRef2(value["$ref"], doc);
     return resolveRefsDeep(resolved, doc, depth + 1);
   }
   const out = {};
@@ -6499,7 +6810,7 @@ function runDoctor(opts = {}) {
   const cwd = path12.resolve(opts.cwd ?? process.cwd());
   const checks = [];
   checks.push(checkNode());
-  const apiKeyCheck = checkAnthropicApiKey();
+  const apiKeyCheck = checkProviderApiKey(cwd);
   checks.push(apiKeyCheck);
   const qaiosDir = path12.join(cwd, ".qaios");
   const qaiosExists = existsSync(qaiosDir) && safeIsDir(qaiosDir);
@@ -6568,23 +6879,42 @@ function checkNode() {
     detail: `Node ${version} is below the required v${MIN_NODE_MAJOR}.`
   };
 }
-function checkAnthropicApiKey() {
-  const key = readAnthropicApiKey();
+function resolveProviderFromConfig(cwd) {
+  const candidate = path12.join(cwd, ".qaios", "config.yaml");
+  if (!existsSync(candidate)) return { provider: "anthropic" };
+  try {
+    const raw = parse(readFileSync(candidate, "utf-8"));
+    const parsed = QaiosConfig.safeParse(raw ?? { version: 1 });
+    if (parsed.success) {
+      const out = {
+        provider: parsed.data.llm.provider
+      };
+      if (parsed.data.llm.apiKeyEnv !== void 0) out.apiKeyEnv = parsed.data.llm.apiKeyEnv;
+      return out;
+    }
+  } catch {
+  }
+  return { provider: "anthropic" };
+}
+function checkProviderApiKey(cwd) {
+  const { provider, apiKeyEnv } = resolveProviderFromConfig(cwd);
+  const envVar = apiKeyEnv ?? (provider === "openai" ? "OPENAI_API_KEY" : "ANTHROPIC_API_KEY");
+  const key = readProviderApiKey(provider, apiKeyEnv);
   if (key !== void 0) {
-    return { name: "ANTHROPIC_API_KEY", status: "ok", detail: "set in environment" };
+    return { name: envVar, status: "ok", detail: `set in environment (provider: ${provider})` };
   }
-  const raw = process.env["ANTHROPIC_API_KEY"];
+  const raw = process.env[envVar];
   if (typeof raw === "string" && raw.length > 0) {
     return {
-      name: "ANTHROPIC_API_KEY",
+      name: envVar,
       status: "warn",
       detail: "set but blank/whitespace-only \u2014 LLM commands will fail; export a real key"
     };
   }
   return {
-    name: "ANTHROPIC_API_KEY",
+    name: envVar,
     status: "warn",
-    detail: "not set; export it before running `qaios test` or other LLM-backed commands"
+    detail: `not set; export it before running LLM-backed commands (provider: ${provider})`
   };
 }
 function checkDb(dbPath) {
@@ -6874,12 +7204,20 @@ Use formal techniques (same enumeration as design.web). Prefer:
 Rules:
 - steps describe HTTP interactions: "POST /api/v1/users with body {email, password}",
   not implementation details.
-- oracles describe response shape + status code + observable side effects. The oracle
+- oracles describe response shape + status code + observable side effects. EVERY oracle
   MUST reference an HTTP status code (e.g. "200", "201", "401", "404") AND/OR a
   response.<property> path so the writer skill can produce a deterministic assertion.
+  An oracle that names neither is invalid \u2014 do not emit it.
 - dataNeeds describe request body categories: "valid signup payload", "payload missing
   email", "payload with email > 254 chars".
 - For each endpoint, generate at minimum 3 scenarios; more if the endpoint is risk-tagged.
+- COVERAGE FLOOR (non-negotiable): across the suite you MUST include at least one
+  scenario with testType="negative" (e.g. invalid auth / 4xx) AND at least one with
+  testType="boundary" (e.g. a min/max field length or numeric edge). Suites missing
+  either are incomplete.
+- Every requirement id provided in the input MUST be referenced by at least one
+  scenario's requirementIds \u2014 do not leave a stated requirement uncovered, and do not
+  cite a requirement id that wasn't given.
 - Cross-endpoint dependency tests (e.g., create then read) get their own scenario with
   testType=integration.
@@ -6954,19 +7292,26 @@ function checkAuthScenarios(output, endpoints) {
   const authNeeded = endpoints.filter((e) => e.authRequired);
   if (authNeeded.length === 0) return 1;
   const scenarios = output.designSpec.scenarios;
-  const covered = authNeeded.filter((ep) => {
+  const isAuthNegative = (s) => {
+    if (s.testType !== "negative") return false;
+    const oracleMentionsAuthCode = /\b401\b|\b403\b|unauthor|forbidden/i.test(s.oracle);
+    const dataNeedsMentionsAuth = s.dataNeeds.some(
+      (d) => /\b(missing|invalid|no|expired|wrong)\b.*(token|auth|key|credential|role)/i.test(d)
+    );
+    return oracleMentionsAuthCode || dataNeedsMentionsAuth;
+  };
+  const anyAuthTest = authNeeded.some((ep) => {
     const re = endpointStepPattern(ep.path, ep.method);
-    return scenarios.some((s) => {
-      const stepBlob = s.steps.join("\n");
-      const matchesEndpoint = re.test(stepBlob);
-      const oracleMentions401 = /\b401\b|unauthor/i.test(s.oracle);
-      const dataNeedsMentionsAuth = s.dataNeeds.some(
-        (d) => /\b(missing|invalid|no|expired)\b.*(token|auth|key|credential)/i.test(d)
-      );
-      return s.testType === "negative" && matchesEndpoint && (oracleMentions401 || dataNeedsMentionsAuth);
-    });
+    return scenarios.some((s) => isAuthNegative(s) && re.test(s.steps.join("\n")));
+  });
+  const hasGenericAuthTest = scenarios.some(isAuthNegative);
+  if (!anyAuthTest && !hasGenericAuthTest) return 0.5;
+  const explicitlyCovered = authNeeded.filter((ep) => {
+    const re = endpointStepPattern(ep.path, ep.method);
+    return scenarios.some((s) => isAuthNegative(s) && re.test(s.steps.join("\n")));
   }).length;
-  return covered / authNeeded.length;
+  const fraction = explicitlyCovered / authNeeded.length;
+  return Math.max(0.85, fraction);
 }
 var designApiSkill = {
   id: "design.api",
@@ -8203,6 +8548,15 @@ var skills = {
   "audit.a11y": auditA11ySkill
 };
+// src/llm.ts
+function resolveLlmClient(injected, llmConfig) {
+  if (injected !== void 0) return injected;
+  const opts = {};
+  if (llmConfig?.provider !== void 0) opts.provider = llmConfig.provider;
+  if (llmConfig?.model !== void 0) opts.model = llmConfig.model;
+  return createLlmClient(opts);
+}
 // src/commands/a11y.ts
 function loadConfig(cwd) {
   const candidate = path12.join(cwd, ".qaios", "config.yaml");
@@ -8243,7 +8597,7 @@ async function runA11y(opts) {
   const storage = opts.storage ?? Storage.open(path12.join(qaiosDir, "workflows.db"), { skipMigrations: false });
   const auditLogger = new AuditLogger(storage.db);
   const workflowsRepo = new WorkflowsRepository(storage.db);
-  const llm = opts.llm ?? new LlmClient();
+  const llm = resolveLlmClient(opts.llm, config?.llm);
   const writeOut = (line) => {
     if (opts.quiet === true) return;
     if (opts.json === true) process.stdout.write(JSON.stringify({ kind: "log", line }) + "\n");
@@ -8567,7 +8921,7 @@ async function runExplore(opts) {
   const storage = opts.storage ?? Storage.open(path12.join(qaiosDir, "workflows.db"), { skipMigrations: false });
   const auditLogger = new AuditLogger(storage.db);
   const workflowsRepo = new WorkflowsRepository(storage.db);
-  const llm = opts.llm ?? new LlmClient();
+  const llm = resolveLlmClient(opts.llm, config?.llm);
   const writeOut = (line) => {
     if (opts.quiet === true) return;
     if (opts.json === true) {
@@ -8978,7 +9332,7 @@ async function runFix(opts) {
   const testResultsRepo = new TestResultsRepository(storage.db);
   const workflowsRepo = new WorkflowsRepository(storage.db);
   const config = loadConfig3(cwd);
-  const llm = opts.llm ?? new LlmClient();
+  const llm = resolveLlmClient(opts.llm, config?.llm);
   const mode = opts.mode ?? config?.mode ?? "LITE";
   const writeOut = (line) => {
     if (opts.quiet === true) return;
@@ -9758,7 +10112,7 @@ function readRawConfig(configPath) {
   }
   return parsed;
 }
-function readKey(obj, key) {
+function readKey2(obj, key) {
   const segments = key.split(".");
   let cursor = obj;
   for (const seg of segments) {
@@ -9849,7 +10203,7 @@ function getValue(configPath, opts, writeOut) {
     }
     return { exitCode: ExitCode.SUCCESS, value: raw };
   }
-  const value = readKey(raw, opts.key);
+  const value = readKey2(raw, opts.key);
   if (value === void 0) {
     return {
       exitCode: ExitCode.USER_ERROR,
@@ -10240,6 +10594,16 @@ async function testServer(repo, opts, writeOut) {
     if (ownsClient) await client.close();
   }
 }
+function loadLlmConfig(cwd) {
+  const candidate = path12.join(cwd, ".qaios", "config.yaml");
+  if (!existsSync(candidate)) return void 0;
+  try {
+    const parsed = parse(readFileSync(candidate, "utf-8"));
+    return parsed?.llm;
+  } catch {
+    return void 0;
+  }
+}
 async function applyDecision(args) {
   const { gate, action, gatesRepo, auditLogger, orchestrator, skipResume } = args;
   const now = (/* @__PURE__ */ new Date()).toISOString();
@@ -10300,7 +10664,7 @@ async function runReview(opts) {
   const storage = opts.storage ?? Storage.open(path12.join(qaiosDir, "workflows.db"), { skipMigrations: false });
   const auditLogger = new AuditLogger(storage.db);
   const gatesRepo = new GatesRepository(storage.db);
-  const llm = opts.llm ?? new LlmClient();
+  const llm = resolveLlmClient(opts.llm, loadLlmConfig(cwd));
   try {
     const pending = gatesRepo.listPending(opts.workflowId);
     if (pending.length === 0) {
@@ -10525,8 +10889,8 @@ async function runRun(opts) {
   const ownsStorage = opts.storage === void 0;
   const storage = opts.storage ?? Storage.open(path12.join(qaiosDir, "workflows.db"), { skipMigrations: false });
   const auditLogger = new AuditLogger(storage.db);
-  const llm = opts.llm ?? new LlmClient();
   const config = loadRunConfig(cwd);
+  const llm = resolveLlmClient(opts.llm, config?.llm);
   const args = {
     cwd,
     noClassify: opts.noClassify === true,
@@ -10825,7 +11189,7 @@ async function runSnapshotCheck(opts) {
   const storage = opts.storage ?? Storage.open(path12.join(qaiosDir, "workflows.db"), { skipMigrations: false });
   const auditLogger = new AuditLogger(storage.db);
   const baselineRepo = new VisualBaselinesRepository(storage.db);
-  const llm = opts.llm ?? new LlmClient();
+  const llm = resolveLlmClient(opts.llm, config?.llm);
   let baselines = baselineRepo.list();
   if (opts.feature !== void 0) {
     const feat = opts.feature;
@@ -11401,7 +11765,7 @@ ${epSummary}`;
   const ownsStorage = opts.storage === void 0;
   const storage = opts.storage ?? Storage.open(path12.join(qaiosDir, "workflows.db"), { skipMigrations: false });
   const auditLogger = new AuditLogger(storage.db);
-  const llm = opts.llm ?? new LlmClient();
+  const llm = resolveLlmClient(opts.llm, config?.llm);
   const gateConfig = {};
   if (opts.nonInteractive === true) gateConfig.nonInteractive = true;
   if (config?.gates?.autoExpireOnTimeout !== void 0) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@qatonic_innovations/qaios",
-  "version": "0.1.2",
+  "version": "0.2.0",
   "type": "module",
   "description": "AI QA engineer in your terminal — designs, writes, runs, heals, and explores tests for web UI and APIs with audit-grade traceability.",
   "license": "MIT",
@@ -48,6 +48,7 @@
     "@modelcontextprotocol/sdk": "^1.29.0",
     "better-sqlite3": "^11.7.0",
     "commander": "^12.1.0",
+    "openai": "^4.77.0",
     "ink": "^5.2.1",
     "pino": "^9.5.0",
     "pino-pretty": "^11.3.0",