npm - @ljoukov/llm - Versions diffs - 0.1.2 → 2.0.0 - Mend

@ljoukov/llm 0.1.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -25,8 +25,10 @@ npm i @ljoukov/llm
 ## Environment variables
-This package reads a `.env.local` file in `process.cwd()` (Node.js) using the same rules as Spark, and falls back to
-plain environment variables.
+This package optionally loads a `.env.local` file from `process.cwd()` (Node.js) on first use (dotenv-style `KEY=value`
+syntax) and does not override already-set `process.env` values. It always falls back to plain environment variables.
+See Node.js docs on environment variables and dotenv files: https://nodejs.org/api/environment_variables.html#dotenv
 ### OpenAI
@@ -36,20 +38,52 @@ plain environment variables.
 - `GOOGLE_SERVICE_ACCOUNT_JSON` (the contents of a service account JSON key file, not a file path)
-For local dev it is usually easiest to store the JSON on one line:
+#### Get a service account key JSON
+You need a **Google service account key JSON** for your Firebase / GCP project (this is what you put into
+`GOOGLE_SERVICE_ACCOUNT_JSON`).
+- **Firebase Console:** your project -> Project settings -> **Service accounts** -> **Generate new private key**
+- **Google Cloud Console:** IAM & Admin -> **Service Accounts** -> select/create an account -> **Keys** -> **Add key** ->
+  **Create new key** -> JSON
+Either path is enough. Both produce the same kind of service account key `.json` file.
+Official docs: https://docs.cloud.google.com/iam/docs/keys-create-delete
+Store the JSON on one line (recommended):
 ```bash
 jq -c . < path/to/service-account.json
 ```
+Set it for local dev:
+```bash
+export GOOGLE_SERVICE_ACCOUNT_JSON="$(jq -c . < path/to/service-account.json)"
+```
+If deploying to Cloudflare Workers/Pages:
+```bash
+jq -c . < path/to/service-account.json | wrangler secret put GOOGLE_SERVICE_ACCOUNT_JSON
+```
 ### ChatGPT subscription models
 - `CHATGPT_AUTH_JSON_B64`
-This is a base64url-encoded JSON blob containing the ChatGPT OAuth tokens + account id (Spark-compatible).
+This is a base64url-encoded JSON blob containing the ChatGPT OAuth tokens + account id (RFC 4648):
+https://www.rfc-editor.org/rfc/rfc4648
 ## Usage
+`v2` uses OpenAI-style request fields:
+- `input`: string or message array
+- `instructions`: optional top-level system instructions
+- message roles: `developer`, `system`, `user`, `assistant`
 ### Basic (non-streaming)
 ```ts
@@ -57,7 +91,7 @@ import { generateText } from "@ljoukov/llm";
 const result = await generateText({
   model: "gpt-5.2",
-  prompt: "Write one sentence about TypeScript.",
+  input: "Write one sentence about TypeScript.",
 });
 console.log(result.text);
@@ -71,7 +105,7 @@ import { streamText } from "@ljoukov/llm";
 const call = streamText({
   model: "gpt-5.2",
-  prompt: "Explain what a hash function is in one paragraph.",
+  input: "Explain what a hash function is in one paragraph.",
 });
 for await (const event of call.events) {
@@ -90,6 +124,114 @@ const result = await call.result;
 console.log("\nmodelVersion:", result.modelVersion);
 ```
+### Full conversation (multi-turn)
+Pass a full message array via `input`.
+```ts
+import { generateText, type LlmInputMessage } from "@ljoukov/llm";
+const input: LlmInputMessage[] = [
+  {
+    role: "system",
+    content: "You are a concise assistant.",
+  },
+  {
+    role: "user",
+    content: "Summarize: Rust is a systems programming language.",
+  },
+  {
+    role: "assistant",
+    content: "Rust is a fast, memory-safe systems language.",
+  },
+  {
+    role: "user",
+    content: "Now rewrite it in 1 sentence.",
+  },
+];
+const result = await generateText({ model: "gpt-5.2", input });
+console.log(result.text);
+```
+### Attachments (files / images)
+Use `inlineData` parts to attach base64-encoded bytes (intermixed with text). `inlineData.data` is base64 (not a data
+URL).
+Note: `inlineData` is mapped based on `mimeType`.
+- `image/*` -> image input (`input_image`)
+- otherwise -> file input (`input_file`, e.g. `application/pdf`)
+```ts
+import fs from "node:fs";
+import { generateText, type LlmInputMessage } from "@ljoukov/llm";
+const imageB64 = fs.readFileSync("image.png").toString("base64");
+const input: LlmInputMessage[] = [
+  {
+    role: "user",
+    content: [
+      { type: "text", text: "Describe this image in 1 paragraph." },
+      { type: "inlineData", mimeType: "image/png", data: imageB64 },
+    ],
+  },
+];
+const result = await generateText({ model: "gpt-5.2", input });
+console.log(result.text);
+```
+PDF attachment example:
+```ts
+import fs from "node:fs";
+import { generateText, type LlmInputMessage } from "@ljoukov/llm";
+const pdfB64 = fs.readFileSync("doc.pdf").toString("base64");
+const input: LlmInputMessage[] = [
+  {
+    role: "user",
+    content: [
+      { type: "text", text: "Summarize this PDF in 5 bullet points." },
+      { type: "inlineData", mimeType: "application/pdf", data: pdfB64 },
+    ],
+  },
+];
+const result = await generateText({ model: "gpt-5.2", input });
+console.log(result.text);
+```
+Intermixed text + multiple images (e.g. compare two images):
+```ts
+import fs from "node:fs";
+import { generateText, type LlmInputMessage } from "@ljoukov/llm";
+const a = fs.readFileSync("a.png").toString("base64");
+const b = fs.readFileSync("b.png").toString("base64");
+const input: LlmInputMessage[] = [
+  {
+    role: "user",
+    content: [
+      { type: "text", text: "Compare the two images. List the important differences." },
+      { type: "text", text: "Image A:" },
+      { type: "inlineData", mimeType: "image/png", data: a },
+      { type: "text", text: "Image B:" },
+      { type: "inlineData", mimeType: "image/png", data: b },
+    ],
+  },
+];
+const result = await generateText({ model: "gpt-5.2", input });
+console.log(result.text);
+```
 ### Gemini
 ```ts
@@ -97,7 +239,7 @@ import { generateText } from "@ljoukov/llm";
 const result = await generateText({
   model: "gemini-2.5-pro",
-  prompt: "Return exactly: OK",
+  input: "Return exactly: OK",
 });
 console.log(result.text);
@@ -112,7 +254,7 @@ import { generateText } from "@ljoukov/llm";
 const result = await generateText({
   model: "chatgpt-gpt-5.1-codex-mini",
-  prompt: "Return exactly: OK",
+  input: "Return exactly: OK",
 });
 console.log(result.text);
@@ -124,7 +266,8 @@ console.log(result.text);
 - OpenAI API models use structured outputs (`json_schema`) when possible.
 - Gemini uses `responseJsonSchema`.
-- `chatgpt-*` models fall back to best-effort JSON parsing (no strict schema mode).
+- `chatgpt-*` models try to use structured outputs too; if rejected by the endpoint/model, it falls back to best-effort
+  JSON parsing.
 ```ts
 import { generateJson } from "@ljoukov/llm";
@@ -137,13 +280,72 @@ const schema = z.object({
 const { value } = await generateJson({
   model: "gpt-5.2",
-  prompt: "Return a JSON object with ok=true and message='hello'.",
+  input: "Return a JSON object with ok=true and message='hello'.",
   schema,
 });
 console.log(value.ok, value.message);
 ```
+### Streaming JSON outputs
+Use `streamJson()` to stream thought deltas and get best-effort partial JSON snapshots while the model is still
+generating.
+```ts
+import { streamJson } from "@ljoukov/llm";
+import { z } from "zod";
+const schema = z.object({
+  ok: z.boolean(),
+  message: z.string(),
+});
+const call = streamJson({
+  model: "gpt-5.2",
+  input: "Return a JSON object with ok=true and message='hello'.",
+  schema,
+});
+for await (const event of call.events) {
+  if (event.type === "delta" && event.channel === "thought") {
+    process.stdout.write(event.text);
+  }
+  if (event.type === "json" && event.stage === "partial") {
+    console.log("partial:", event.value);
+  }
+}
+const { value } = await call.result;
+console.log("final:", value);
+```
+If you only want thought deltas (no partial JSON), set `streamMode: "final"`.
+```ts
+const call = streamJson({
+  model: "gpt-5.2",
+  input: "Return a JSON object with ok=true and message='hello'.",
+  schema,
+  streamMode: "final",
+});
+```
+If you want to keep `generateJson()` but still stream thoughts, pass an `onEvent` callback.
+```ts
+const { value } = await generateJson({
+  model: "gpt-5.2",
+  input: "Return a JSON object with ok=true and message='hello'.",
+  schema,
+  onEvent: (event) => {
+    if (event.type === "delta" && event.channel === "thought") {
+      process.stdout.write(event.text);
+    }
+  },
+});
+```
 ## Tools
 This library supports two kinds of tools:
@@ -160,7 +362,7 @@ import { generateText } from "@ljoukov/llm";
 const result = await generateText({
   model: "gpt-5.2",
-  prompt: "Find 3 relevant sources about X and summarize them.",
+  input: "Find 3 relevant sources about X and summarize them.",
   tools: [{ type: "web-search", mode: "live" }, { type: "code-execution" }],
 });
@@ -177,7 +379,7 @@ import { z } from "zod";
 const result = await runToolLoop({
   model: "gpt-5.2",
-  prompt: "What is 12 * 9? Use the tool.",
+  input: "What is 12 * 9? Use the tool.",
   tools: {
     multiply: tool({
       description: "Multiply two integers.",