primellm 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,31 +1,47 @@
1
1
  /**
2
- * PrimeLLM JavaScript SDK - Main Client
2
+ * PrimeLLM JavaScript SDK v0.2.0
3
3
  *
4
- * This is the main SDK file. Developers import this to talk to PrimeLLM
5
- * from JavaScript or TypeScript.
4
+ * Production-grade SDK with streaming, retries, and full API parity.
6
5
  *
7
- * Example usage:
6
+ * @example
7
+ * import PrimeLLM from "primellm";
8
8
  *
9
- * import { PrimeLLMClient } from "primellm";
10
- *
11
- * const client = new PrimeLLMClient({ apiKey: "primellm_live_XXX" });
12
- *
13
- * const response = await client.chat({
14
- * model: "gpt-5.1",
15
- * messages: [{ role: "user", content: "Hello!" }],
16
- * });
17
- *
18
- * console.log(response.choices[0].message.content);
9
+ * const client = new PrimeLLM({ apiKey: "primellm_XXX" });
10
+ * const response = await client.chat({
11
+ * model: "gpt-5.1",
12
+ * messages: [{ role: "user", content: "Hello!" }],
13
+ * });
14
+ * console.log(response.choices[0].message.content);
19
15
  */
20
- // Re-export types for convenience
16
+ import { PrimeLLMError, createErrorFromStatus, } from "./errors.js";
17
+ import { countTokens, setTokenizerAdapter } from "./tokenizer.js";
18
+ import { streamReader } from "./streaming.js";
19
+ // Re-export types and utilities
21
20
  export * from "./types.js";
21
+ export * from "./errors.js";
22
+ export { countTokens, setTokenizerAdapter } from "./tokenizer.js";
23
+ const DEFAULT_RETRY = {
24
+ maxAttempts: 3,
25
+ baseDelayMs: 300,
26
+ maxDelayMs: 10000,
27
+ };
28
+ /**
29
+ * Retryable status codes
30
+ */
31
+ const RETRYABLE_STATUSES = [429, 502, 503, 504];
32
+ /**
33
+ * Sleep with exponential backoff and jitter
34
+ */
35
+ async function sleep(attempt, config) {
36
+ const delay = Math.min(config.maxDelayMs, config.baseDelayMs * Math.pow(2, attempt) + Math.random() * 300);
37
+ await new Promise(resolve => setTimeout(resolve, delay));
38
+ }
22
39
  /**
23
40
  * PrimeLLM API Client
24
41
  *
25
- * This class handles all communication with the PrimeLLM API.
26
- * It provides methods for chat, completions, and the legacy generate endpoint.
42
+ * Production-grade client with streaming, retries, and full API access.
27
43
  */
28
- export class PrimeLLMClient {
44
+ export class PrimeLLM {
29
45
  /**
30
46
  * Create a new PrimeLLM client.
31
47
  *
@@ -33,144 +49,231 @@ export class PrimeLLMClient {
33
49
  * @param options.apiKey - Your PrimeLLM API key (required)
34
50
  * @param options.baseURL - API base URL (default: "https://api.primellm.in")
35
51
  * @param options.timeoutMs - Request timeout in ms (default: 60000)
36
- *
37
- * @example
38
- * const client = new PrimeLLMClient({
39
- * apiKey: "primellm_live_XXX",
40
- * });
52
+ * @param options.maxRetries - Max retry attempts (default: 3)
41
53
  */
42
54
  constructor(options) {
43
55
  if (!options.apiKey) {
44
- throw new Error("PrimeLLMClient: apiKey is required");
56
+ throw new PrimeLLMError("PrimeLLM: apiKey is required");
45
57
  }
46
58
  this.apiKey = options.apiKey;
47
59
  this.baseURL = (options.baseURL ?? "https://api.primellm.in").replace(/\/$/, "");
48
60
  this.timeoutMs = options.timeoutMs ?? 60000;
61
+ this.retry = {
62
+ ...DEFAULT_RETRY,
63
+ maxAttempts: options.maxRetries ?? 3,
64
+ };
65
+ // Initialize sub-clients
66
+ this.embeddings = new EmbeddingsClient(this);
67
+ this.models = new ModelsClient(this);
68
+ this.keys = new KeysClient(this);
69
+ this.credits = new CreditsClient(this);
70
+ this.tokens = new TokensClient();
71
+ this.chat = new ChatClient(this);
49
72
  }
50
73
  /**
51
- * Internal helper to make API requests.
52
- * Handles authentication, JSON parsing, and error handling.
74
+ * Internal HTTP request with retries and error handling
53
75
  */
54
76
  async request(path, body, options) {
77
+ const method = options?.method ?? "POST";
78
+ let lastError = null;
79
+ for (let attempt = 0; attempt < this.retry.maxAttempts; attempt++) {
80
+ const controller = new AbortController();
81
+ const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
82
+ try {
83
+ const fetchOptions = {
84
+ method,
85
+ headers: {
86
+ "Authorization": `Bearer ${this.apiKey}`,
87
+ "Content-Type": "application/json",
88
+ },
89
+ signal: controller.signal,
90
+ };
91
+ if (body && method !== "GET") {
92
+ fetchOptions.body = JSON.stringify(body);
93
+ }
94
+ const res = await fetch(`${this.baseURL}${path}`, fetchOptions);
95
+ if (!res.ok) {
96
+ const text = await res.text().catch(() => "");
97
+ let detail = text;
98
+ try {
99
+ const json = JSON.parse(text);
100
+ detail = json.detail || text;
101
+ }
102
+ catch { }
103
+ // Check if retryable
104
+ if (RETRYABLE_STATUSES.includes(res.status) && attempt < this.retry.maxAttempts - 1) {
105
+ lastError = createErrorFromStatus(res.status, `Request failed: ${res.status}`, detail);
106
+ await sleep(attempt, this.retry);
107
+ continue;
108
+ }
109
+ throw createErrorFromStatus(res.status, `PrimeLLM API error: ${res.status}`, detail);
110
+ }
111
+ return await res.json();
112
+ }
113
+ catch (error) {
114
+ clearTimeout(timeout);
115
+ if (error instanceof PrimeLLMError) {
116
+ throw error;
117
+ }
118
+ if (error instanceof Error && error.name === "AbortError") {
119
+ throw new PrimeLLMError(`Request timed out after ${this.timeoutMs}ms`);
120
+ }
121
+ // Network error - retry
122
+ if (attempt < this.retry.maxAttempts - 1) {
123
+ lastError = error;
124
+ await sleep(attempt, this.retry);
125
+ continue;
126
+ }
127
+ throw new PrimeLLMError(error.message);
128
+ }
129
+ finally {
130
+ clearTimeout(timeout);
131
+ }
132
+ }
133
+ throw lastError || new PrimeLLMError("Request failed after retries");
134
+ }
135
+ /**
136
+ * Internal streaming request
137
+ */
138
+ async *streamRequest(path, body) {
55
139
  const controller = new AbortController();
56
140
  const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
57
141
  try {
58
142
  const res = await fetch(`${this.baseURL}${path}`, {
59
- method: options?.method ?? "POST",
143
+ method: "POST",
60
144
  headers: {
61
145
  "Authorization": `Bearer ${this.apiKey}`,
62
146
  "Content-Type": "application/json",
63
147
  },
64
- body: JSON.stringify(body),
148
+ body: JSON.stringify({ ...body, stream: true }),
65
149
  signal: controller.signal,
66
150
  });
67
151
  if (!res.ok) {
68
152
  const text = await res.text().catch(() => "");
69
- throw new Error(`PrimeLLM API error: ${res.status} ${res.statusText} - ${text}`);
153
+ throw createErrorFromStatus(res.status, `Streaming failed: ${res.status}`, text);
70
154
  }
71
- const json = await res.json();
72
- return json;
73
- }
74
- catch (error) {
75
- if (error instanceof Error && error.name === "AbortError") {
76
- throw new Error(`PrimeLLM API request timed out after ${this.timeoutMs}ms`);
155
+ if (!res.body) {
156
+ throw new PrimeLLMError("Response body is null");
77
157
  }
78
- throw error;
158
+ const reader = res.body.getReader();
159
+ yield* streamReader(reader);
79
160
  }
80
161
  finally {
81
162
  clearTimeout(timeout);
82
163
  }
83
164
  }
165
+ }
166
+ /**
167
+ * Chat sub-client
168
+ */
169
+ class ChatClient {
170
+ constructor(client) {
171
+ this.client = client;
172
+ }
84
173
  /**
85
- * Send a chat completion request using /v1/chat endpoint.
86
- *
87
- * This is the recommended method for most use cases.
88
- * Returns an OpenAI-compatible response format.
89
- *
90
- * @param request - The chat request with model and messages
91
- * @returns The chat response with choices, usage, and credits
174
+ * Send a chat completion request
175
+ */
176
+ async create(request) {
177
+ return this.client.request("/v1/chat", request);
178
+ }
179
+ /**
180
+ * Stream chat completion (async iterator)
92
181
  *
93
182
  * @example
94
- * const response = await client.chat({
95
- * model: "gpt-5.1",
96
- * messages: [
97
- * { role: "system", content: "You are a helpful assistant." },
98
- * { role: "user", content: "What is TypeScript?" },
99
- * ],
100
- * });
101
- * console.log(response.choices[0].message.content);
183
+ * for await (const chunk of client.chat.stream({...})) {
184
+ * console.log(chunk.delta?.content);
185
+ * }
102
186
  */
103
- async chat(request) {
104
- return this.request("/v1/chat", request);
187
+ async *stream(request) {
188
+ yield* this.client.streamRequest("/v1/chat", request);
189
+ }
190
+ }
191
+ /**
192
+ * Embeddings sub-client
193
+ */
194
+ class EmbeddingsClient {
195
+ constructor(client) {
196
+ this.client = client;
105
197
  }
106
198
  /**
107
- * Send a chat completion request using /v1/chat/completions endpoint.
108
- *
109
- * This is an alternative endpoint that also returns OpenAI-compatible format.
110
- * Use this if you need compatibility with OpenAI's exact endpoint path.
111
- *
112
- * @param request - The chat request with model and messages
113
- * @returns The chat response with choices, usage, and credits
199
+ * Create embeddings for input text
114
200
  */
115
- async completions(request) {
116
- return this.request("/v1/chat/completions", request);
201
+ async create(request) {
202
+ return this.client.request("/v1/embeddings", request);
203
+ }
204
+ }
205
+ /**
206
+ * Models sub-client
207
+ */
208
+ class ModelsClient {
209
+ constructor(client) {
210
+ this.client = client;
117
211
  }
118
212
  /**
119
- * Send a request to the legacy /generate endpoint.
120
- *
121
- * This endpoint returns a different response format than chat().
122
- * Use chat() for new projects; this is for backwards compatibility.
123
- *
124
- * @param request - The generate request with model and messages
125
- * @returns The generate response with reply, tokens_used, cost
126
- *
127
- * @example
128
- * const response = await client.generate({
129
- * model: "gpt-5.1",
130
- * messages: [{ role: "user", content: "Hello!" }],
131
- * });
132
- * console.log(response.reply);
213
+ * List available models
133
214
  */
134
- async generate(request) {
135
- return this.request("/generate", request);
215
+ async list() {
216
+ return this.client.request("/v1/models", undefined, { method: "GET" });
217
+ }
218
+ }
219
+ /**
220
+ * Keys sub-client
221
+ */
222
+ class KeysClient {
223
+ constructor(client) {
224
+ this.client = client;
136
225
  }
137
- // ============================================================
138
- // STREAMING METHODS (Not implemented yet)
139
- // ============================================================
140
226
  /**
141
- * Stream a chat completion response.
142
- *
143
- * ⚠️ NOT IMPLEMENTED YET - Backend streaming support coming soon.
144
- *
145
- * @throws Error always - streaming not supported in this version
227
+ * List API keys
146
228
  */
147
- async *streamChat(_request) {
148
- throw new Error("streamChat is not implemented yet: backend streaming not supported in this SDK version.");
149
- // This yield is never reached but satisfies TypeScript
150
- yield undefined;
229
+ async list() {
230
+ return this.client.request("/v1/keys", undefined, { method: "GET" });
151
231
  }
152
232
  /**
153
- * Stream a completions response.
154
- *
155
- * ⚠️ NOT IMPLEMENTED YET - Backend streaming support coming soon.
156
- *
157
- * @throws Error always - streaming not supported in this version
233
+ * Create a new API key
158
234
  */
159
- async *streamCompletions(_request) {
160
- throw new Error("streamCompletions is not implemented yet: backend streaming not supported in this SDK version.");
161
- yield undefined;
235
+ async create(label) {
236
+ return this.client.request("/v1/keys", { label });
162
237
  }
163
238
  /**
164
- * Stream a generate response.
165
- *
166
- * ⚠️ NOT IMPLEMENTED YET - Backend streaming support coming soon.
167
- *
168
- * @throws Error always - streaming not supported in this version
239
+ * Revoke an API key
240
+ */
241
+ async revoke(keyId) {
242
+ return this.client.request("/v1/keys/revoke", { key_id: keyId });
243
+ }
244
+ }
245
+ /**
246
+ * Credits sub-client
247
+ */
248
+ class CreditsClient {
249
+ constructor(client) {
250
+ this.client = client;
251
+ }
252
+ /**
253
+ * Get current credit balance
254
+ */
255
+ async get() {
256
+ return this.client.request("/v1/credits", undefined, { method: "GET" });
257
+ }
258
+ }
259
+ /**
260
+ * Tokens sub-client (utility)
261
+ */
262
+ class TokensClient {
263
+ /**
264
+ * Count tokens in text or messages
265
+ */
266
+ count(input) {
267
+ return countTokens(input);
268
+ }
269
+ /**
270
+ * Set custom tokenizer adapter
169
271
  */
170
- async *streamGenerate(_request) {
171
- throw new Error("streamGenerate is not implemented yet: backend streaming not supported in this SDK version.");
172
- yield undefined;
272
+ setAdapter(adapter) {
273
+ setTokenizerAdapter(adapter);
173
274
  }
174
275
  }
175
- // Default export for convenience
176
- export default PrimeLLMClient;
276
+ // Backwards compatibility alias
277
+ export { PrimeLLM as PrimeLLMClient };
278
+ // Default export
279
+ export default PrimeLLM;
@@ -0,0 +1,29 @@
1
+ /**
2
+ * PrimeLLM Streaming Utilities
3
+ *
4
+ * Async iterator for streaming chat completions.
5
+ */
6
+ /**
7
+ * Chat stream chunk from SSE
8
+ */
9
+ export interface StreamChunk {
10
+ id: string;
11
+ object: string;
12
+ delta?: {
13
+ role?: string;
14
+ content?: string;
15
+ };
16
+ done?: boolean;
17
+ finish_reason?: string;
18
+ }
19
+ /**
20
+ * Parse SSE data line to chunk object
21
+ */
22
+ export declare function parseSSELine(line: string): StreamChunk | null;
23
+ /**
24
+ * Create async iterator from SSE response stream
25
+ *
26
+ * @param reader - ReadableStreamDefaultReader from fetch response
27
+ */
28
+ export declare function streamReader(reader: ReadableStreamDefaultReader<Uint8Array>): AsyncGenerator<StreamChunk, void, unknown>;
29
+ //# sourceMappingURL=streaming.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"streaming.d.ts","sourceRoot":"","sources":["../src/streaming.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,WAAW,WAAW;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE;QACJ,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,OAAO,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC;IACF,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,IAAI,CAe7D;AAED;;;;GAIG;AACH,wBAAuB,YAAY,CAC/B,MAAM,EAAE,2BAA2B,CAAC,UAAU,CAAC,GAChD,cAAc,CAAC,WAAW,EAAE,IAAI,EAAE,OAAO,CAAC,CAyC5C"}
@@ -0,0 +1,64 @@
1
+ /**
2
+ * PrimeLLM Streaming Utilities
3
+ *
4
+ * Async iterator for streaming chat completions.
5
+ */
6
+ /**
7
+ * Parse SSE data line to chunk object
8
+ */
9
+ export function parseSSELine(line) {
10
+ if (!line.startsWith('data:')) {
11
+ return null;
12
+ }
13
+ const data = line.slice(5).trim();
14
+ if (!data || data === '[DONE]') {
15
+ return null;
16
+ }
17
+ try {
18
+ return JSON.parse(data);
19
+ }
20
+ catch {
21
+ return null;
22
+ }
23
+ }
24
+ /**
25
+ * Create async iterator from SSE response stream
26
+ *
27
+ * @param reader - ReadableStreamDefaultReader from fetch response
28
+ */
29
+ export async function* streamReader(reader) {
30
+ const decoder = new TextDecoder();
31
+ let buffer = '';
32
+ try {
33
+ while (true) {
34
+ const { done, value } = await reader.read();
35
+ if (done) {
36
+ break;
37
+ }
38
+ buffer += decoder.decode(value, { stream: true });
39
+ // Process complete lines
40
+ const lines = buffer.split('\n');
41
+ buffer = lines.pop() || ''; // Keep incomplete line in buffer
42
+ for (const line of lines) {
43
+ const chunk = parseSSELine(line);
44
+ if (chunk) {
45
+ yield chunk;
46
+ // Check for done
47
+ if (chunk.object === 'chat.completion.done' || chunk.done) {
48
+ return;
49
+ }
50
+ }
51
+ }
52
+ }
53
+ // Process remaining buffer
54
+ if (buffer) {
55
+ const chunk = parseSSELine(buffer);
56
+ if (chunk) {
57
+ yield chunk;
58
+ }
59
+ }
60
+ }
61
+ finally {
62
+ reader.releaseLock();
63
+ }
64
+ }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * PrimeLLM Token Counter
3
+ *
4
+ * Simple token estimation using chars/4 approximation.
5
+ * Provides adapter point for future tiktoken integration.
6
+ */
7
+ import { Message } from './types.js';
8
+ /**
9
+ * Token counter adapter function type
10
+ */
11
+ export type TokenizerAdapter = (text: string) => number;
12
+ /**
13
+ * Count tokens in text or messages array
14
+ *
15
+ * @param input - Text string or array of messages
16
+ * @returns Estimated token count
17
+ *
18
+ * @example
19
+ * countTokens("Hello world") // ~3
20
+ * countTokens([{role:"user", content:"Hello"}]) // ~2
21
+ */
22
+ export declare function countTokens(input: string | Message[]): number;
23
+ /**
24
+ * Set custom tokenizer adapter (for tiktoken or other)
25
+ *
26
+ * @param adapter - Function that takes text and returns token count
27
+ *
28
+ * @example
29
+ * import { encoding_for_model } from 'tiktoken';
30
+ * const enc = encoding_for_model('gpt-4');
31
+ * setTokenizerAdapter((text) => enc.encode(text).length);
32
+ */
33
+ export declare function setTokenizerAdapter(adapter: TokenizerAdapter | null): void;
34
+ /**
35
+ * Get current tokenizer adapter
36
+ */
37
+ export declare function getTokenizerAdapter(): TokenizerAdapter | null;
38
+ /**
39
+ * Reset tokenizer to default (chars/4)
40
+ */
41
+ export declare function resetTokenizer(): void;
42
+ //# sourceMappingURL=tokenizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../src/tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAErC;;GAEG;AACH,MAAM,MAAM,gBAAgB,GAAG,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;AAOxD;;;;;;;;;GASG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,EAAE,GAAG,MAAM,CAiB7D;AAED;;;;;;;;;GASG;AACH,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAAG,IAAI,CAE1E;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,gBAAgB,GAAG,IAAI,CAE7D;AAED;;GAEG;AACH,wBAAgB,cAAc,IAAI,IAAI,CAErC"}
@@ -0,0 +1,61 @@
1
+ /**
2
+ * PrimeLLM Token Counter
3
+ *
4
+ * Simple token estimation using chars/4 approximation.
5
+ * Provides adapter point for future tiktoken integration.
6
+ */
7
+ /**
8
+ * Current tokenizer adapter (defaults to simple chars/4)
9
+ */
10
+ let tokenizerAdapter = null;
11
+ /**
12
+ * Count tokens in text or messages array
13
+ *
14
+ * @param input - Text string or array of messages
15
+ * @returns Estimated token count
16
+ *
17
+ * @example
18
+ * countTokens("Hello world") // ~3
19
+ * countTokens([{role:"user", content:"Hello"}]) // ~2
20
+ */
21
+ export function countTokens(input) {
22
+ let text;
23
+ if (Array.isArray(input)) {
24
+ text = input.map(m => m.content).join(' ');
25
+ }
26
+ else {
27
+ text = input;
28
+ }
29
+ // Use custom adapter if set
30
+ if (tokenizerAdapter) {
31
+ return tokenizerAdapter(text);
32
+ }
33
+ // Default: chars / 4 (simple approximation)
34
+ const chars = text.length;
35
+ return Math.max(1, Math.ceil(chars / 4));
36
+ }
37
+ /**
38
+ * Set custom tokenizer adapter (for tiktoken or other)
39
+ *
40
+ * @param adapter - Function that takes text and returns token count
41
+ *
42
+ * @example
43
+ * import { encoding_for_model } from 'tiktoken';
44
+ * const enc = encoding_for_model('gpt-4');
45
+ * setTokenizerAdapter((text) => enc.encode(text).length);
46
+ */
47
+ export function setTokenizerAdapter(adapter) {
48
+ tokenizerAdapter = adapter;
49
+ }
50
+ /**
51
+ * Get current tokenizer adapter
52
+ */
53
+ export function getTokenizerAdapter() {
54
+ return tokenizerAdapter;
55
+ }
56
+ /**
57
+ * Reset tokenizer to default (chars/4)
58
+ */
59
+ export function resetTokenizer() {
60
+ tokenizerAdapter = null;
61
+ }