compress-lightreach 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Compress Light Reach
2
2
 
3
- **AI cost management SDK with intelligent model routing, prompt compression, and real-time token tracking**
3
+ **OpenAI-compatible LLM routing + compression SDK (superset responses with LightReach metadata)**
4
4
 
5
5
  [![npm version](https://badge.fury.io/js/compress-lightreach.svg)](https://badge.fury.io/js/compress-lightreach)
6
6
  [![Node.js 14+](https://img.shields.io/badge/node-14+-blue.svg)](https://nodejs.org/)
@@ -12,8 +12,7 @@ Compress Light Reach is a Node.js/TypeScript SDK that provides intelligent model
12
12
 
13
13
  - **Intelligent Model Routing**: Automatically selects the optimal model based on admin-configured quality settings and available provider keys
14
14
  - **Token-aware Compression**: Replaces repeated substrings with shorter placeholders using a fast greedy algorithm
15
- - **Lossless**: Perfect decompression guaranteed
16
- - **Output Compression**: Optional model output compression support
15
+ - **Lossless Input Compression**: Prompt reconstruction is deterministic
17
16
  - **Cloud API**: Uses Light Reach's cloud service for compression and routing
18
17
  - **Multi-provider Support**: OpenAI, Anthropic, Google, DeepSeek, Moonshot
19
18
  - **TypeScript**: Full TypeScript support with type definitions
@@ -52,7 +51,7 @@ const result = await client.complete({
52
51
  tags: { team: 'backend', environment: 'production' },
53
52
  });
54
53
 
55
- console.log(result.decompressed_response);
54
+ console.log(result.choices[0].message.content);
56
55
  console.log(`Selected: ${result.routing_info?.selected_model}`);
57
56
  console.log(`Token savings: ${result.compression_stats.token_savings}`);
58
57
  ```
@@ -154,17 +153,6 @@ const result = await client.complete({
154
153
  });
155
154
  ```
156
155
 
157
- ### With Output Compression
158
-
159
- ```typescript
160
- const result = await client.complete({
161
- messages: [{ role: 'user', content: 'Generate a long report...' }],
162
- compress_output: true,
163
- });
164
-
165
- console.log(result.decompressed_response);
166
- ```
167
-
168
156
  ### With Compression Config
169
157
 
170
158
  Control which message roles get compressed:
@@ -189,39 +177,10 @@ const result = await client.complete({
189
177
  tags: { team: 'backend', environment: 'production' },
190
178
  });
191
179
 
192
- console.log(result.decompressed_response);
180
+ console.log(result.choices[0].message.content);
193
181
  console.log(`Model used: ${result.routing_info?.selected_model}`);
194
182
  ```
195
183
 
196
- ### Compression Only (No LLM Call)
197
-
198
- ```typescript
199
- import { PcompresslrAPIClient } from 'compress-lightreach';
200
-
201
- const client = new PcompresslrAPIClient("your-lightreach-api-key");
202
-
203
- const compressed = await client.compress(
204
- "Your text with repeated content here...",
205
- "gpt-4",
206
- { team: 'backend' },
207
- );
208
-
209
- console.log(compressed.llm_format);
210
- console.log(`Compression ratio: ${compressed.compression_ratio}`);
211
-
212
- // Decompress later
213
- const decompressed = await client.decompress(compressed.llm_format);
214
- console.log(decompressed.decompressed);
215
- ```
216
-
217
- ### Command Line Interface
218
-
219
- ```bash
220
- export PCOMPRESLR_API_KEY=your-api-key
221
-
222
- npx pcompresslr "Your prompt with repeated text here..."
223
- ```
224
-
225
184
  ## API Reference
226
185
 
227
186
  ### `PcompresslrAPIClient`
@@ -254,7 +213,7 @@ For direct synchronous calls, use `completeSync()` instead.
254
213
  | `messages` | `Message[]` | required | Conversation history with `role` and `content` |
255
214
  | `llm_provider` | `'openai' \| 'anthropic' \| 'google' \| 'deepseek' \| 'moonshot'` | — | Optional provider constraint. Omit for cross-provider optimization |
256
215
  | `compress` | `boolean` | `true` | Whether to compress messages |
257
- | `compress_output` | `boolean` | `false` | Whether to request compressed output from LLM |
216
+ | `compress_output` | `boolean` | `false` | Advanced server hint. `complete()` still returns normal OpenAI-style text in `choices[0].message.content` |
258
217
  | `compression_config` | `object` | — | Per-role compression settings (see below) |
259
218
  | `temperature` | `number` | — | LLM temperature parameter |
260
219
  | `max_tokens` | `number` | — | Maximum tokens to generate |
@@ -276,8 +235,21 @@ For direct synchronous calls, use `completeSync()` instead.
276
235
 
277
236
  ```typescript
278
237
  {
279
- content: string; // Final response content
280
- decompressed_response: string; // Final decompressed LLM response
238
+ id: string; // OpenAI-style completion id
239
+ object: "chat.completion";
240
+ created: number; // Unix timestamp
241
+ model: string;
242
+ choices: Array<{
243
+ index: number;
244
+ message: { role: "assistant"; content: string | null; tool_calls?: any[] };
245
+ finish_reason: string | null;
246
+ }>;
247
+ usage: {
248
+ prompt_tokens: number;
249
+ completion_tokens: number;
250
+ total_tokens: number;
251
+ };
252
+ content: string; // Alias of choices[0].message.content
281
253
  compression_stats: {
282
254
  compression_enabled: boolean;
283
255
  original_tokens: number;
@@ -307,6 +279,12 @@ For direct synchronous calls, use `completeSync()` instead.
307
279
  hle_source: 'tag' | 'global' | 'none';
308
280
  };
309
281
  warnings?: string[];
282
+ lightreach?: { // Namespaced LightReach metadata extension
283
+ compression_stats?: object;
284
+ llm_stats?: object;
285
+ routing_info?: object;
286
+ latency_ms?: number | null;
287
+ };
310
288
 
311
289
  // Convenience aliases
312
290
  tokens_saved?: number;
@@ -330,46 +308,6 @@ Explicit async job flow with configurable polling. Called internally by `complet
330
308
  - `maxWaitMs` (number, default: timeout): Maximum wait time
331
309
  - `idempotencyKey` (string, optional): Idempotency key for job creation
332
310
 
333
- ##### `compress(prompt, model?, tags?): Promise<CompressResponse>`
334
-
335
- Compression-only (POST `/api/v1/compress`).
336
-
337
- **Parameters:**
338
- - `prompt` (string, required): Text to compress
339
- - `model` (string, optional): Model for tokenization. Default: `'gpt-4'`
340
- - `tags` (`Record<string, string>`, optional): Tags for attribution
341
-
342
- **Response (`CompressResponse`):**
343
-
344
- ```typescript
345
- {
346
- compressed: string;
347
- dictionary: Record<string, string>;
348
- llm_format: string;
349
- compression_ratio: number;
350
- original_size: number;
351
- compressed_size: number;
352
- processing_time_ms: number;
353
- algorithm: string;
354
- }
355
- ```
356
-
357
- ##### `decompress(llmFormat): Promise<DecompressResponse>`
358
-
359
- Decompress an LLM-formatted compressed prompt (POST `/api/v1/decompress`).
360
-
361
- **Parameters:**
362
- - `llmFormat` (string, required): The `llm_format` string from a compress response
363
-
364
- **Response (`DecompressResponse`):**
365
-
366
- ```typescript
367
- {
368
- decompressed: string;
369
- processing_time_ms: number;
370
- }
371
- ```
372
-
373
311
  ##### `healthCheck(): Promise<HealthCheckResponse>`
374
312
 
375
313
  Check API health status (GET `/health`).
@@ -432,7 +370,7 @@ try {
432
370
  1. **Compression**: Identifies repeated substrings using efficient algorithms and replaces them with shorter placeholders, reducing token count
433
371
  2. **Routing**: Selects the cheapest model that meets the admin-configured quality ceiling (global, tag-level, or integration-level)
434
372
  3. **LLM Call**: Sends the compressed prompt to the selected model via your BYOK provider keys
435
- 4. **Decompression**: Losslessly restores the model's response if output compression was enabled
373
+ 4. **Response Shaping**: Returns standard OpenAI-style completion fields plus LightReach metadata extensions
436
374
 
437
375
  ## Examples
438
376
 
@@ -454,13 +392,13 @@ const result = await client.complete({
454
392
  tags: { team: 'content', environment: 'production' },
455
393
  });
456
394
 
457
- console.log(result.decompressed_response);
395
+ console.log(result.choices[0].message.content);
458
396
  console.log(`Model used: ${result.routing_info?.selected_model}`);
459
397
  console.log(`Token savings: ${result.compression_stats.token_savings} tokens`);
460
398
  console.log(`Compression ratio: ${(result.compression_stats.compression_ratio * 100).toFixed(2)}%`);
461
399
  ```
462
400
 
463
- ### Example 2: Output Compression
401
+ ### Example 2: Compression Config
464
402
 
465
403
  ```typescript
466
404
  import { PcompresslrAPIClient } from 'compress-lightreach';
@@ -469,10 +407,15 @@ const client = new PcompresslrAPIClient("your-lightreach-api-key");
469
407
 
470
408
  const result = await client.complete({
471
409
  messages: [{ role: "user", content: "Generate a long report with repeated sections..." }],
472
- compress_output: true,
410
+ compression_config: {
411
+ compress_system: false,
412
+ compress_user: true,
413
+ compress_assistant: false,
414
+ compress_only_last_n_user: 1,
415
+ },
473
416
  });
474
417
 
475
- console.log(result.decompressed_response);
418
+ console.log(result.choices[0].message.content);
476
419
  ```
477
420
 
478
421
  ### Example 3: Multi-turn Conversation
@@ -28,7 +28,27 @@ export interface DecompressResponse {
28
28
  processing_time_ms: number;
29
29
  }
30
30
  export interface CompleteResponse {
31
+ id: string;
32
+ object: 'chat.completion';
33
+ created: number;
34
+ model: string;
35
+ choices: Array<{
36
+ index: number;
37
+ message: {
38
+ role: 'assistant';
39
+ content: string | null;
40
+ tool_calls?: Array<Record<string, any>>;
41
+ };
42
+ finish_reason: string | null;
43
+ }>;
44
+ usage: {
45
+ prompt_tokens: number;
46
+ completion_tokens: number;
47
+ total_tokens: number;
48
+ };
31
49
  content: string;
50
+ decompressed_response?: string;
51
+ text?: string;
32
52
  compression_stats: {
33
53
  compression_enabled: boolean;
34
54
  original_tokens: number;
@@ -75,6 +95,7 @@ export interface CompleteResponse {
75
95
  model_hle?: number | null;
76
96
  input_price_per_million?: number | null;
77
97
  output_price_per_million?: number | null;
98
+ lightreach?: Record<string, any>;
78
99
  }
79
100
  export type MessageRole = 'system' | 'developer' | 'user' | 'assistant';
80
101
  export interface Message {
@@ -152,6 +173,7 @@ export declare class PcompresslrAPIClient {
152
173
  private session;
153
174
  constructor(apiKey?: string, apiUrl?: string, timeout?: number);
154
175
  private makeRequest;
176
+ private toOpenAISupersetResponse;
155
177
  /**
156
178
  * Create async /complete job (POST /api/v1/complete/jobs).
157
179
  */
@@ -141,6 +141,65 @@ class PcompresslrAPIClient {
141
141
  throw new APIRequestError(`Request failed: ${errorMessage}`);
142
142
  }
143
143
  }
144
+ toOpenAISupersetResponse(raw) {
145
+ const response = (raw && typeof raw === 'object') ? { ...raw } : {};
146
+ const llmStats = (response.llm_stats && typeof response.llm_stats === 'object') ? response.llm_stats : {};
147
+ const routingInfo = (response.routing_info && typeof response.routing_info === 'object') ? response.routing_info : {};
148
+ const content = (typeof response.content === 'string' && response.content) ||
149
+ (typeof response.decompressed_response === 'string' && response.decompressed_response) ||
150
+ (typeof response.text === 'string' && response.text) ||
151
+ '';
152
+ const model = response.model ||
153
+ response.model_used ||
154
+ routingInfo.selected_model ||
155
+ llmStats.model ||
156
+ 'lightreach';
157
+ const promptTokens = Number(llmStats.input_tokens ?? 0) || 0;
158
+ const completionTokens = Number(llmStats.output_tokens ?? 0) || 0;
159
+ const totalTokens = Number(llmStats.total_tokens ?? (promptTokens + completionTokens)) || (promptTokens + completionTokens);
160
+ const finishReason = llmStats.finish_reason ?? 'stop';
161
+ const message = { role: 'assistant', content };
162
+ if (Array.isArray(response.tool_calls) && response.tool_calls.length > 0) {
163
+ message.tool_calls = response.tool_calls;
164
+ if (!content)
165
+ message.content = null;
166
+ }
167
+ response.id = String(response.id || `chatcmpl-${Math.random().toString(16).slice(2)}${Date.now().toString(16)}`);
168
+ response.object = 'chat.completion';
169
+ response.created = Number(response.created || Math.floor(Date.now() / 1000));
170
+ response.model = String(model);
171
+ response.choices = Array.isArray(response.choices)
172
+ ? response.choices
173
+ : [{ index: 0, message, finish_reason: finishReason }];
174
+ response.usage = (response.usage && typeof response.usage === 'object')
175
+ ? response.usage
176
+ : {
177
+ prompt_tokens: promptTokens,
178
+ completion_tokens: completionTokens,
179
+ total_tokens: totalTokens,
180
+ };
181
+ response.content = content;
182
+ if (response.decompressed_response === undefined)
183
+ response.decompressed_response = content;
184
+ if (response.text === undefined)
185
+ response.text = content;
186
+ response.lightreach = {
187
+ content,
188
+ compression_stats: response.compression_stats,
189
+ llm_stats: response.llm_stats,
190
+ routing_info: response.routing_info,
191
+ warnings: response.warnings,
192
+ tokens_saved: response.tokens_saved,
193
+ tokens_used: response.tokens_used,
194
+ compression_ratio: response.compression_ratio,
195
+ cost_estimate: response.cost_estimate,
196
+ savings_estimate: response.savings_estimate,
197
+ provider_used: response.provider_used,
198
+ model_used: response.model_used,
199
+ latency_ms: llmStats.latency_ms ?? null,
200
+ };
201
+ return response;
202
+ }
144
203
  /**
145
204
  * Create async /complete job (POST /api/v1/complete/jobs).
146
205
  */
@@ -193,7 +252,7 @@ class PcompresslrAPIClient {
193
252
  const st = await this.getCompleteJob(jobId);
194
253
  if (st.status === 'succeeded') {
195
254
  if (st.result)
196
- return st.result;
255
+ return this.toOpenAISupersetResponse(st.result);
197
256
  throw new APIRequestError('Async job succeeded but result was missing.');
198
257
  }
199
258
  if (st.status === 'failed' || st.status === 'canceled') {
@@ -285,7 +344,8 @@ class PcompresslrAPIClient {
285
344
  data.auto_select_by_hle = request.auto_select_by_hle;
286
345
  if (request.same_provider_only !== undefined)
287
346
  data.same_provider_only = request.same_provider_only;
288
- return this.makeRequest('/api/v2/complete', data, 'POST');
347
+ const raw = await this.makeRequest('/api/v2/complete', data, 'POST');
348
+ return this.toOpenAISupersetResponse(raw);
289
349
  }
290
350
  /**
291
351
  * Messages-first complete with intelligent model selection.
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Compress Light Reach - Intelligent compression algorithms for LLM prompts.
2
+ * Compress Light Reach - OpenAI-compatible routing + compression SDK.
3
3
  */
4
4
  export { __version__ } from './version';
5
5
  export { LightReach, Pcompresslr } from './core';
package/dist/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  /**
3
- * Compress Light Reach - Intelligent compression algorithms for LLM prompts.
3
+ * Compress Light Reach - OpenAI-compatible routing + compression SDK.
4
4
  */
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.PcompresslrAPIError = exports.APIRequestError = exports.RateLimitError = exports.APIKeyError = exports.PcompresslrAPIClient = exports.Pcompresslr = exports.LightReach = exports.__version__ = void 0;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "compress-lightreach",
3
- "version": "1.0.8",
4
- "description": "AI cost management SDK with intelligent model routing, prompt compression, and real-time token tracking",
3
+ "version": "1.0.10",
4
+ "description": "OpenAI-compatible LLM routing and compression SDK with LightReach metadata extensions",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
7
7
  "bin": {