compress-lightreach 1.0.5 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -96
- package/dist/api-client.d.ts +61 -28
- package/dist/api-client.js +22 -45
- package/dist/cli.js +30 -120
- package/dist/core.d.ts +14 -4
- package/dist/core.js +1 -5
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,11 +10,8 @@ Compress Light Reach is a Node.js/TypeScript SDK that provides intelligent model
|
|
|
10
10
|
|
|
11
11
|
## Features
|
|
12
12
|
|
|
13
|
-
- **Intelligent Model Routing**: Automatically selects optimal model based on quality
|
|
14
|
-
- **Token-aware Compression**: Replaces repeated substrings with shorter placeholders
|
|
15
|
-
- **Dual Algorithms**:
|
|
16
|
-
- Fast greedy (~99% optimal) for daily use
|
|
17
|
-
- Optimal DP (O(n²)) for critical prompts
|
|
13
|
+
- **Intelligent Model Routing**: Automatically selects the optimal model based on admin-configured quality settings and available provider keys
|
|
14
|
+
- **Token-aware Compression**: Replaces repeated substrings with shorter placeholders using a fast greedy algorithm
|
|
18
15
|
- **Lossless**: Perfect decompression guaranteed
|
|
19
16
|
- **Output Compression**: Optional model output compression support
|
|
20
17
|
- **Cloud API**: Uses Light Reach's cloud service for compression and routing
|
|
@@ -40,7 +37,7 @@ The SDK uses **intelligent model routing** and targets `POST /api/v2/complete`.
|
|
|
40
37
|
|
|
41
38
|
- Authenticate with your **LightReach API key** (env var `PCOMPRESLR_API_KEY` or `LIGHTREACH_API_KEY`)
|
|
42
39
|
- Manage **provider keys** (OpenAI/Anthropic/Google/etc.) in the dashboard (BYOK)
|
|
43
|
-
- System automatically selects optimal model based on
|
|
40
|
+
- System automatically selects the optimal model based on admin-configured quality settings
|
|
44
41
|
|
|
45
42
|
```typescript
|
|
46
43
|
import { PcompresslrAPIClient } from 'compress-lightreach';
|
|
@@ -52,7 +49,7 @@ const result = await client.complete({
|
|
|
52
49
|
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
53
50
|
{ role: 'user', content: 'Explain quantum computing in simple terms.' },
|
|
54
51
|
],
|
|
55
|
-
|
|
52
|
+
tags: { team: 'backend', environment: 'production' },
|
|
56
53
|
});
|
|
57
54
|
|
|
58
55
|
console.log(result.decompressed_response);
|
|
@@ -60,84 +57,117 @@ console.log(`Selected: ${result.routing_info?.selected_model}`);
|
|
|
60
57
|
console.log(`Token savings: ${result.compression_stats.token_savings}`);
|
|
61
58
|
```
|
|
62
59
|
|
|
63
|
-
|
|
60
|
+
## OpenAI-compatible API (Cursor / OpenAI SDKs)
|
|
61
|
+
|
|
62
|
+
LightReach also exposes a **strict OpenAI-compatible** surface (including streaming SSE) so you can use standard OpenAI tooling without changing your app.
|
|
63
|
+
|
|
64
|
+
- **Cursor base URL**: `https://api.compress.lightreach.io/v1/cursor`
|
|
65
|
+
- **Generic OpenAI-compatible base URL**: `https://api.compress.lightreach.io/v1`
|
|
66
|
+
- **Endpoints**: `GET /models`, `POST /chat/completions`
|
|
67
|
+
- **Model id**: `lightreach`
|
|
68
|
+
|
|
69
|
+
Example (cURL):
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
curl -sS https://api.compress.lightreach.io/v1/chat/completions \
|
|
73
|
+
-H "Authorization: Bearer lr_your_lightreach_key" \
|
|
74
|
+
-H "Content-Type: application/json" \
|
|
75
|
+
-d '{
|
|
76
|
+
"model": "lightreach",
|
|
77
|
+
"messages": [{"role":"user","content":"Say hello"}],
|
|
78
|
+
"stream": true
|
|
79
|
+
}'
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Tags
|
|
83
|
+
|
|
84
|
+
Tags provide **cost attribution** and enable **admin-controlled quality ceilings** per tag. The system supports three tag categories that you can set on requests:
|
|
85
|
+
|
|
86
|
+
| Tag Key | Description | Example Values |
|
|
87
|
+
|---------|-------------|----------------|
|
|
88
|
+
| `team` | Your team or group | `"backend"`, `"ml-platform"`, `"marketing"` |
|
|
89
|
+
| `environment` | Deployment environment | `"development"`, `"staging"`, `"production"` |
|
|
90
|
+
| `feature` | Feature or use case | `"search"`, `"chat"`, `"summarization"` |
|
|
91
|
+
|
|
92
|
+
Tags are validated server-side. Your workspace admin can configure allowed values for each tag category via the dashboard. If a tag value is not in the allowed list, the request may be warned or rejected depending on your workspace's enforcement mode.
|
|
64
93
|
|
|
65
94
|
```typescript
|
|
66
95
|
const result = await client.complete({
|
|
67
|
-
messages: [{ role: 'user', content: '
|
|
68
|
-
|
|
69
|
-
|
|
96
|
+
messages: [{ role: 'user', content: 'Summarize this document...' }],
|
|
97
|
+
tags: {
|
|
98
|
+
team: 'backend',
|
|
99
|
+
environment: 'production',
|
|
100
|
+
feature: 'summarization',
|
|
101
|
+
},
|
|
70
102
|
});
|
|
71
|
-
|
|
72
|
-
console.log(result.decompressed_response);
|
|
73
103
|
```
|
|
74
104
|
|
|
75
|
-
|
|
105
|
+
> **Note:** The `integration` tag is reserved for system use (e.g., Cursor, Claude Code) and should not be set manually. The `project` tag is also available for workspace-level project attribution — see your dashboard for configuration.
|
|
106
|
+
|
|
107
|
+
## Intelligent Model Routing
|
|
108
|
+
|
|
109
|
+
Model routing is fully managed by your workspace admin via the dashboard. The system uses **HLE (Humanity's Last Exam)** scores — a standardized benchmark — to determine model quality. Admins configure quality ceilings at three levels:
|
|
76
110
|
|
|
77
|
-
|
|
111
|
+
- **Global ceiling**: Set via the HLE slider in the dashboard. Applies to all requests.
|
|
112
|
+
- **Tag-level ceilings**: Set per tag (e.g., `environment=development` gets a lower ceiling to save costs).
|
|
113
|
+
- **Integration-level ceilings**: Set per integration (e.g., Cursor, Claude Code).
|
|
114
|
+
|
|
115
|
+
The routing engine picks the **cheapest model** whose HLE score meets the effective ceiling. HLE scores are maintained server-side and cannot be overridden by SDK callers.
|
|
78
116
|
|
|
79
117
|
```typescript
|
|
80
118
|
import { PcompresslrAPIClient } from 'compress-lightreach';
|
|
81
119
|
|
|
82
120
|
const client = new PcompresslrAPIClient("your-lightreach-api-key");
|
|
83
121
|
|
|
84
|
-
// Cross-provider optimization: system picks cheapest model meeting your quality bar
|
|
85
122
|
const result = await client.complete({
|
|
86
123
|
messages: [{ role: 'user', content: 'Explain quantum computing' }],
|
|
87
|
-
|
|
124
|
+
tags: { team: 'backend', environment: 'production' },
|
|
88
125
|
});
|
|
89
126
|
|
|
90
|
-
// Check what was selected
|
|
91
127
|
console.log(result.routing_info?.selected_model); // e.g., "gpt-4o-mini"
|
|
92
128
|
console.log(result.routing_info?.selected_provider); // e.g., "openai"
|
|
93
129
|
console.log(result.routing_info?.model_hle); // e.g., 32.5
|
|
94
130
|
console.log(result.routing_info?.model_price_per_million); // e.g., 0.15
|
|
95
131
|
```
|
|
96
132
|
|
|
133
|
+
### Routing Response
|
|
134
|
+
|
|
135
|
+
Every `complete()` response includes `routing_info` with full transparency into the routing decision:
|
|
136
|
+
|
|
137
|
+
```typescript
|
|
138
|
+
const info = result.routing_info;
|
|
139
|
+
console.log(`Model: ${info?.selected_model}`);
|
|
140
|
+
console.log(`Provider: ${info?.selected_provider}`);
|
|
141
|
+
console.log(`Model HLE: ${info?.model_hle}`);
|
|
142
|
+
console.log(`Effective HLE ceiling: ${info?.effective_hle}`);
|
|
143
|
+
console.log(`Ceiling source: ${info?.hle_source}`); // "tag", "global", or "none"
|
|
144
|
+
```
|
|
145
|
+
|
|
97
146
|
### Provider-Constrained Routing
|
|
98
147
|
|
|
99
148
|
Optionally constrain to a specific provider:
|
|
100
149
|
|
|
101
150
|
```typescript
|
|
102
|
-
// Only use OpenAI models, but pick the cheapest one meeting HLE 35
|
|
103
151
|
const result = await client.complete({
|
|
104
152
|
messages: [{ role: 'user', content: 'Write a poem' }],
|
|
105
|
-
llm_provider: '
|
|
106
|
-
desired_hle: 35,
|
|
153
|
+
llm_provider: 'anthropic',
|
|
107
154
|
});
|
|
108
155
|
```
|
|
109
156
|
|
|
110
|
-
###
|
|
111
|
-
|
|
112
|
-
Admins can set quality **ceilings** via the dashboard (global or per-tag) to control costs. Your `desired_hle` is a preference; if it exceeds an admin-set ceiling, the request will **silently clamp** to the ceiling and proceed.
|
|
157
|
+
### With Output Compression
|
|
113
158
|
|
|
114
159
|
```typescript
|
|
115
|
-
// Admin set global HLE ceiling to 30%
|
|
116
|
-
// Requesting above the ceiling will be clamped to 30 (no error)
|
|
117
|
-
const result = await client.complete({
|
|
118
|
-
messages: [{ role: 'user', content: 'Process payment' }],
|
|
119
|
-
desired_hle: 35, // Will be clamped down to 30
|
|
120
|
-
tags: { env: 'production' },
|
|
121
|
-
});
|
|
122
|
-
|
|
123
|
-
// Correct usage: request within ceiling
|
|
124
160
|
const result = await client.complete({
|
|
125
|
-
messages: [{ role: 'user', content: '
|
|
126
|
-
|
|
127
|
-
tags: { env: 'production' },
|
|
161
|
+
messages: [{ role: 'user', content: 'Generate a long report...' }],
|
|
162
|
+
compress_output: true,
|
|
128
163
|
});
|
|
129
164
|
|
|
130
|
-
|
|
131
|
-
if (result.routing_info?.hle_clamped) {
|
|
132
|
-
console.log(`HLE lowered from ${result.routing_info.requested_hle} ` +
|
|
133
|
-
`to ${result.routing_info.effective_hle} ` +
|
|
134
|
-
`by ${result.routing_info.hle_source}-level ceiling`);
|
|
135
|
-
}
|
|
165
|
+
console.log(result.decompressed_response);
|
|
136
166
|
```
|
|
137
167
|
|
|
138
168
|
### With Compression Config
|
|
139
169
|
|
|
140
|
-
|
|
170
|
+
Control which message roles get compressed:
|
|
141
171
|
|
|
142
172
|
```typescript
|
|
143
173
|
import { PcompresslrAPIClient } from 'compress-lightreach';
|
|
@@ -146,7 +176,6 @@ const client = new PcompresslrAPIClient("your-lightreach-api-key");
|
|
|
146
176
|
|
|
147
177
|
const result = await client.complete({
|
|
148
178
|
messages: [{ role: 'user', content: 'Hello!' }],
|
|
149
|
-
desired_hle: 30,
|
|
150
179
|
compress: true,
|
|
151
180
|
compress_output: false,
|
|
152
181
|
compression_config: {
|
|
@@ -157,14 +186,13 @@ const result = await client.complete({
|
|
|
157
186
|
},
|
|
158
187
|
temperature: 0.7,
|
|
159
188
|
max_tokens: 1000,
|
|
160
|
-
tags: {
|
|
189
|
+
tags: { team: 'backend', environment: 'production' },
|
|
161
190
|
});
|
|
162
191
|
|
|
163
192
|
console.log(result.decompressed_response);
|
|
164
193
|
console.log(`Model used: ${result.routing_info?.selected_model}`);
|
|
165
194
|
```
|
|
166
195
|
|
|
167
|
-
|
|
168
196
|
### Compression Only (No LLM Call)
|
|
169
197
|
|
|
170
198
|
```typescript
|
|
@@ -172,12 +200,10 @@ import { PcompresslrAPIClient } from 'compress-lightreach';
|
|
|
172
200
|
|
|
173
201
|
const client = new PcompresslrAPIClient("your-lightreach-api-key");
|
|
174
202
|
|
|
175
|
-
// Compress text without making an LLM call
|
|
176
203
|
const compressed = await client.compress(
|
|
177
204
|
"Your text with repeated content here...",
|
|
178
|
-
"gpt-4",
|
|
179
|
-
|
|
180
|
-
{ env: 'dev' } // Optional tags
|
|
205
|
+
"gpt-4",
|
|
206
|
+
{ team: 'backend' },
|
|
181
207
|
);
|
|
182
208
|
|
|
183
209
|
console.log(compressed.llm_format);
|
|
@@ -191,17 +217,9 @@ console.log(decompressed.decompressed);
|
|
|
191
217
|
### Command Line Interface
|
|
192
218
|
|
|
193
219
|
```bash
|
|
194
|
-
# Set your API key
|
|
195
220
|
export PCOMPRESLR_API_KEY=your-api-key
|
|
196
221
|
|
|
197
|
-
# Compress a prompt
|
|
198
222
|
npx pcompresslr "Your prompt with repeated text here..."
|
|
199
|
-
|
|
200
|
-
# Use optimal algorithm only
|
|
201
|
-
npx pcompresslr "Your prompt here" --optimal-only
|
|
202
|
-
|
|
203
|
-
# Use greedy algorithm only
|
|
204
|
-
npx pcompresslr "Your prompt here" --greedy-only
|
|
205
223
|
```
|
|
206
224
|
|
|
207
225
|
## API Reference
|
|
@@ -219,13 +237,15 @@ new PcompresslrAPIClient(apiKey?: string, apiUrl?: string, timeout?: number)
|
|
|
219
237
|
**Parameters:**
|
|
220
238
|
- `apiKey` (string, optional): LightReach API key. Falls back to `LIGHTREACH_API_KEY` or `PCOMPRESLR_API_KEY` env vars.
|
|
221
239
|
- `apiUrl` (string, optional): Override base API URL. Falls back to `PCOMPRESLR_API_URL` env var. Default: `https://api.compress.lightreach.io`
|
|
222
|
-
- `timeout` (number, optional): Request timeout in milliseconds. Default: `
|
|
240
|
+
- `timeout` (number, optional): Request timeout in milliseconds. Default: `900000` (15 minutes)
|
|
223
241
|
|
|
224
242
|
#### Methods
|
|
225
243
|
|
|
226
244
|
##### `complete(request: CompleteV2Request): Promise<CompleteResponse>`
|
|
227
245
|
|
|
228
|
-
Messages-first completion with intelligent routing (
|
|
246
|
+
Messages-first completion with intelligent routing. Uses async job processing (enqueue + poll) for production reliability.
|
|
247
|
+
|
|
248
|
+
For direct synchronous calls, use `completeSync()` instead.
|
|
229
249
|
|
|
230
250
|
**Request Parameters (`CompleteV2Request`):**
|
|
231
251
|
|
|
@@ -233,14 +253,12 @@ Messages-first completion with intelligent routing (POST `/api/v2/complete`).
|
|
|
233
253
|
|-----------|------|---------|-------------|
|
|
234
254
|
| `messages` | `Message[]` | required | Conversation history with `role` and `content` |
|
|
235
255
|
| `llm_provider` | `'openai' \| 'anthropic' \| 'google' \| 'deepseek' \| 'moonshot'` | — | Optional provider constraint. Omit for cross-provider optimization |
|
|
236
|
-
| `desired_hle` | `number` | — | Quality preference (0-40, where 40 is SOTA). If above an admin ceiling, it is clamped down |
|
|
237
256
|
| `compress` | `boolean` | `true` | Whether to compress messages |
|
|
238
257
|
| `compress_output` | `boolean` | `false` | Whether to request compressed output from LLM |
|
|
239
|
-
| `algorithm` | `'greedy' \| 'optimal'` | `'greedy'` | Compression algorithm |
|
|
240
258
|
| `compression_config` | `object` | — | Per-role compression settings (see below) |
|
|
241
259
|
| `temperature` | `number` | — | LLM temperature parameter |
|
|
242
260
|
| `max_tokens` | `number` | — | Maximum tokens to generate |
|
|
243
|
-
| `tags` | `Record<string, string>` | — | Tags for cost attribution and
|
|
261
|
+
| `tags` | `Record<string, string>` | — | Tags for cost attribution and quality ceilings. Use `team`, `environment`, and/or `feature` keys |
|
|
244
262
|
| `max_history_messages` | `number` | — | Limit conversation history length |
|
|
245
263
|
|
|
246
264
|
**`compression_config` options:**
|
|
@@ -258,51 +276,67 @@ Messages-first completion with intelligent routing (POST `/api/v2/complete`).
|
|
|
258
276
|
|
|
259
277
|
```typescript
|
|
260
278
|
{
|
|
279
|
+
content: string; // Final response content
|
|
261
280
|
decompressed_response: string; // Final decompressed LLM response
|
|
262
281
|
compression_stats: {
|
|
263
|
-
|
|
264
|
-
compressed_size_chars: number;
|
|
282
|
+
compression_enabled: boolean;
|
|
265
283
|
original_tokens: number;
|
|
266
284
|
compressed_tokens: number;
|
|
267
|
-
compression_ratio: number;
|
|
268
285
|
token_savings: number;
|
|
269
|
-
|
|
286
|
+
compression_ratio: number;
|
|
287
|
+
token_count_exact?: boolean;
|
|
288
|
+
token_count_source?: string;
|
|
289
|
+
token_accounting_note?: string;
|
|
270
290
|
processing_time_ms?: number;
|
|
271
291
|
};
|
|
272
292
|
llm_stats: {
|
|
273
|
-
|
|
274
|
-
|
|
293
|
+
provider?: string;
|
|
294
|
+
model?: string;
|
|
295
|
+
input_tokens: number;
|
|
296
|
+
output_tokens: number;
|
|
275
297
|
total_tokens: number;
|
|
298
|
+
finish_reason?: string | null;
|
|
276
299
|
};
|
|
277
300
|
routing_info?: {
|
|
278
301
|
selected_model: string; // Model chosen by system
|
|
279
302
|
selected_provider: string; // Provider chosen by system
|
|
280
303
|
selected_model_id: string;
|
|
281
|
-
model_hle: number; // HLE score of selected model
|
|
304
|
+
model_hle: number; // HLE score of selected model (server-computed)
|
|
282
305
|
model_price_per_million: number;
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
hle_source: 'request' | 'tag' | 'global' | 'none';
|
|
286
|
-
hle_clamped: boolean; // true if admin ceiling lowered your desired_hle
|
|
306
|
+
effective_hle: number | null; // The quality ceiling that was applied
|
|
307
|
+
hle_source: 'tag' | 'global' | 'none';
|
|
287
308
|
};
|
|
288
309
|
warnings?: string[];
|
|
289
|
-
|
|
310
|
+
|
|
290
311
|
// Convenience aliases
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
312
|
+
tokens_saved?: number;
|
|
313
|
+
tokens_used?: number;
|
|
314
|
+
compression_ratio?: number;
|
|
315
|
+
cost_estimate?: number | null;
|
|
316
|
+
savings_estimate?: number | null;
|
|
295
317
|
}
|
|
296
318
|
```
|
|
297
319
|
|
|
298
|
-
##### `
|
|
320
|
+
##### `completeSync(request: CompleteV2Request): Promise<CompleteResponse>`
|
|
321
|
+
|
|
322
|
+
Direct synchronous call to POST `/api/v2/complete`. Best for small/interactive usage. For production reliability, prefer `complete()` (async job + polling).
|
|
323
|
+
|
|
324
|
+
##### `completeAsync(request, opts?): Promise<CompleteResponse>`
|
|
325
|
+
|
|
326
|
+
Explicit async job flow with configurable polling. Called internally by `complete()`.
|
|
327
|
+
|
|
328
|
+
**Options:**
|
|
329
|
+
- `pollIntervalMs` (number, default: 1000): Polling interval in milliseconds
|
|
330
|
+
- `maxWaitMs` (number, default: timeout): Maximum wait time
|
|
331
|
+
- `idempotencyKey` (string, optional): Idempotency key for job creation
|
|
332
|
+
|
|
333
|
+
##### `compress(prompt, model?, tags?): Promise<CompressResponse>`
|
|
299
334
|
|
|
300
335
|
Compression-only (POST `/api/v1/compress`).
|
|
301
336
|
|
|
302
337
|
**Parameters:**
|
|
303
338
|
- `prompt` (string, required): Text to compress
|
|
304
339
|
- `model` (string, optional): Model for tokenization. Default: `'gpt-4'`
|
|
305
|
-
- `algorithm` (`'greedy' | 'optimal'`, optional): Compression algorithm. Default: `'greedy'`
|
|
306
340
|
- `tags` (`Record<string, string>`, optional): Tags for attribution
|
|
307
341
|
|
|
308
342
|
**Response (`CompressResponse`):**
|
|
@@ -349,7 +383,6 @@ Check API health status (GET `/health`).
|
|
|
349
383
|
}
|
|
350
384
|
```
|
|
351
385
|
|
|
352
|
-
|
|
353
386
|
### Message Types
|
|
354
387
|
|
|
355
388
|
```typescript
|
|
@@ -376,7 +409,7 @@ interface Message {
|
|
|
376
409
|
| `PcompresslrAPIError` | Base exception class |
|
|
377
410
|
| `APIKeyError` | Invalid or missing API key |
|
|
378
411
|
| `RateLimitError` | Rate limit exceeded |
|
|
379
|
-
| `APIRequestError` | General API errors (including routing failures) |
|
|
412
|
+
| `APIRequestError` | General API errors (including routing failures, tag validation errors) |
|
|
380
413
|
|
|
381
414
|
```typescript
|
|
382
415
|
import { APIKeyError, RateLimitError, APIRequestError } from 'compress-lightreach';
|
|
@@ -396,15 +429,10 @@ try {
|
|
|
396
429
|
|
|
397
430
|
## How It Works
|
|
398
431
|
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
2. Calculates token savings for each potential replacement
|
|
404
|
-
3. Selects optimal replacements that reduce total token count
|
|
405
|
-
4. Intelligently routes to the best model based on your quality requirements
|
|
406
|
-
5. Formats the result for easy LLM consumption
|
|
407
|
-
6. Provides perfect decompression
|
|
432
|
+
1. **Compression**: Identifies repeated substrings using efficient algorithms and replaces them with shorter placeholders, reducing token count
|
|
433
|
+
2. **Routing**: Selects the cheapest model that meets the admin-configured quality ceiling (global, tag-level, or integration-level)
|
|
434
|
+
3. **LLM Call**: Sends the compressed prompt to the selected model via your BYOK provider keys
|
|
435
|
+
4. **Decompression**: Losslessly restores the model's response if output compression was enabled
|
|
408
436
|
|
|
409
437
|
## Examples
|
|
410
438
|
|
|
@@ -423,7 +451,7 @@ Write a story about a bird. The bird is very friendly.
|
|
|
423
451
|
|
|
424
452
|
const result = await client.complete({
|
|
425
453
|
messages: [{ role: "user", content: prompt }],
|
|
426
|
-
|
|
454
|
+
tags: { team: 'content', environment: 'production' },
|
|
427
455
|
});
|
|
428
456
|
|
|
429
457
|
console.log(result.decompressed_response);
|
|
@@ -441,7 +469,6 @@ const client = new PcompresslrAPIClient("your-lightreach-api-key");
|
|
|
441
469
|
|
|
442
470
|
const result = await client.complete({
|
|
443
471
|
messages: [{ role: "user", content: "Generate a long report with repeated sections..." }],
|
|
444
|
-
desired_hle: 35,
|
|
445
472
|
compress_output: true,
|
|
446
473
|
});
|
|
447
474
|
|
|
@@ -462,13 +489,13 @@ const result = await client.complete({
|
|
|
462
489
|
{ role: "assistant", content: "You can use open() with a context manager..." },
|
|
463
490
|
{ role: "user", content: "How about writing to a file?" },
|
|
464
491
|
],
|
|
465
|
-
desired_hle: 30,
|
|
466
492
|
compression_config: {
|
|
467
493
|
compress_system: false,
|
|
468
494
|
compress_user: true,
|
|
469
495
|
compress_assistant: false,
|
|
470
|
-
compress_only_last_n_user: 2,
|
|
496
|
+
compress_only_last_n_user: 2,
|
|
471
497
|
},
|
|
498
|
+
tags: { team: 'engineering', feature: 'code-assistant' },
|
|
472
499
|
});
|
|
473
500
|
```
|
|
474
501
|
|
package/dist/api-client.d.ts
CHANGED
|
@@ -28,40 +28,53 @@ export interface DecompressResponse {
|
|
|
28
28
|
processing_time_ms: number;
|
|
29
29
|
}
|
|
30
30
|
export interface CompleteResponse {
|
|
31
|
-
|
|
31
|
+
content: string;
|
|
32
32
|
compression_stats: {
|
|
33
|
-
|
|
34
|
-
compressed_size_chars: number;
|
|
33
|
+
compression_enabled: boolean;
|
|
35
34
|
original_tokens: number;
|
|
36
35
|
compressed_tokens: number;
|
|
37
|
-
compression_ratio: number;
|
|
38
36
|
token_savings: number;
|
|
39
|
-
|
|
37
|
+
compression_ratio: number;
|
|
38
|
+
token_count_exact?: boolean;
|
|
39
|
+
token_count_source?: string;
|
|
40
|
+
token_accounting_note?: string;
|
|
40
41
|
processing_time_ms?: number;
|
|
41
42
|
};
|
|
42
43
|
llm_stats: {
|
|
43
|
-
|
|
44
|
-
|
|
44
|
+
provider?: string;
|
|
45
|
+
model?: string;
|
|
46
|
+
input_tokens: number;
|
|
47
|
+
output_tokens: number;
|
|
45
48
|
total_tokens: number;
|
|
49
|
+
finish_reason?: string | null;
|
|
46
50
|
};
|
|
47
51
|
warnings?: string[];
|
|
48
52
|
routing_info?: {
|
|
49
53
|
selected_model: string;
|
|
50
54
|
selected_provider: string;
|
|
51
55
|
selected_model_id: string;
|
|
56
|
+
/** HLE score of the selected model (server-computed). */
|
|
52
57
|
model_hle: number;
|
|
53
58
|
model_price_per_million: number;
|
|
54
|
-
|
|
59
|
+
input_price_per_million?: number | null;
|
|
60
|
+
output_price_per_million?: number | null;
|
|
61
|
+
/** @deprecated Present for backward compatibility. */
|
|
62
|
+
requested_hle?: number | null;
|
|
63
|
+
/** The quality ceiling that was applied (from global, tag, or integration settings). */
|
|
55
64
|
effective_hle: number | null;
|
|
65
|
+
/** Where the effective HLE ceiling came from. */
|
|
56
66
|
hle_source: 'request' | 'tag' | 'global' | 'none';
|
|
57
|
-
|
|
67
|
+
/** @deprecated Present for backward compatibility. */
|
|
68
|
+
hle_clamped?: boolean;
|
|
58
69
|
};
|
|
59
|
-
text?: string;
|
|
60
70
|
tokens_saved?: number;
|
|
61
71
|
tokens_used?: number;
|
|
62
72
|
compression_ratio?: number;
|
|
63
73
|
cost_estimate?: number | null;
|
|
64
74
|
savings_estimate?: number | null;
|
|
75
|
+
model_hle?: number | null;
|
|
76
|
+
input_price_per_million?: number | null;
|
|
77
|
+
output_price_per_million?: number | null;
|
|
65
78
|
}
|
|
66
79
|
export type MessageRole = 'system' | 'developer' | 'user' | 'assistant';
|
|
67
80
|
export interface Message {
|
|
@@ -70,7 +83,13 @@ export interface Message {
|
|
|
70
83
|
}
|
|
71
84
|
export interface CompleteV2Request {
|
|
72
85
|
messages: Message[];
|
|
86
|
+
/** Optional provider constraint. Omit for cross-provider cost optimization. */
|
|
73
87
|
llm_provider?: 'openai' | 'anthropic' | 'google' | 'deepseek' | 'moonshot';
|
|
88
|
+
/**
|
|
89
|
+
* @deprecated Quality routing is now fully managed by admin-configured ceilings
|
|
90
|
+
* (global, tag-level, integration-level) in the dashboard. This parameter is
|
|
91
|
+
* accepted for backward compatibility but should not be used in new code.
|
|
92
|
+
*/
|
|
74
93
|
desired_hle?: number;
|
|
75
94
|
compress?: boolean;
|
|
76
95
|
compression_config?: {
|
|
@@ -80,15 +99,28 @@ export interface CompleteV2Request {
|
|
|
80
99
|
compress_only_last_n_user?: number | null;
|
|
81
100
|
};
|
|
82
101
|
compress_output?: boolean;
|
|
83
|
-
algorithm?: 'greedy'
|
|
102
|
+
algorithm?: 'greedy';
|
|
84
103
|
temperature?: number;
|
|
85
104
|
max_tokens?: number;
|
|
105
|
+
/**
|
|
106
|
+
* Tags for cost attribution and tag-level quality ceilings.
|
|
107
|
+
* Supported keys: 'team', 'environment', 'feature'.
|
|
108
|
+
* Values are validated server-side against your workspace's allowed list.
|
|
109
|
+
* The 'integration' tag is reserved for system use and should not be set manually.
|
|
110
|
+
*
|
|
111
|
+
* @example { team: 'backend', environment: 'production', feature: 'search' }
|
|
112
|
+
*/
|
|
86
113
|
tags?: Record<string, string>;
|
|
87
114
|
max_history_messages?: number;
|
|
115
|
+
/** @deprecated System selects model automatically. */
|
|
88
116
|
model?: string;
|
|
117
|
+
/** @deprecated Use desired_hle instead. */
|
|
89
118
|
hle_target_percent?: number;
|
|
119
|
+
/** @deprecated Use desired_hle instead. */
|
|
90
120
|
min_hle_score?: number;
|
|
121
|
+
/** @deprecated Always auto-selects now. */
|
|
91
122
|
auto_select_by_hle?: boolean;
|
|
123
|
+
/** @deprecated Use llm_provider instead. */
|
|
92
124
|
same_provider_only?: boolean;
|
|
93
125
|
}
|
|
94
126
|
export interface HealthCheckResponse {
|
|
@@ -138,7 +170,17 @@ export declare class PcompresslrAPIClient {
|
|
|
138
170
|
maxWaitMs?: number;
|
|
139
171
|
idempotencyKey?: string;
|
|
140
172
|
}): Promise<CompleteResponse>;
|
|
141
|
-
|
|
173
|
+
/**
|
|
174
|
+
* Compress text without making an LLM call (POST /api/v1/compress).
|
|
175
|
+
*
|
|
176
|
+
* @param prompt - Text to compress
|
|
177
|
+
* @param model - Model for tokenization (default: 'gpt-4')
|
|
178
|
+
* @param tags - Tags for attribution. Supported keys: 'team', 'environment', 'feature'.
|
|
179
|
+
*
|
|
180
|
+
* Also supports a legacy call shape: compress(prompt, model, 'greedy', tags?)
|
|
181
|
+
*/
|
|
182
|
+
compress(prompt: string, model?: string, tags?: Record<string, string>): Promise<CompressResponse>;
|
|
183
|
+
compress(prompt: string, model: string, algorithm: 'greedy', tags?: Record<string, string>): Promise<CompressResponse>;
|
|
142
184
|
decompress(llmFormat: string): Promise<DecompressResponse>;
|
|
143
185
|
healthCheck(): Promise<HealthCheckResponse>;
|
|
144
186
|
/**
|
|
@@ -149,32 +191,23 @@ export declare class PcompresslrAPIClient {
|
|
|
149
191
|
*/
|
|
150
192
|
completeSync(request: CompleteV2Request): Promise<CompleteResponse>;
|
|
151
193
|
/**
|
|
152
|
-
* Messages-first complete with intelligent model selection
|
|
194
|
+
* Messages-first complete with intelligent model selection.
|
|
153
195
|
*
|
|
154
|
-
*
|
|
155
|
-
*
|
|
196
|
+
* Uses async job processing (enqueue + poll) for production reliability.
|
|
197
|
+
* Model routing is managed by admin-configured quality ceilings (global,
|
|
198
|
+
* tag-level, integration-level) in the dashboard. The system selects the
|
|
199
|
+
* cheapest model that meets the effective ceiling.
|
|
156
200
|
*
|
|
157
201
|
* Provider API keys must be stored in your account (BYOK via dashboard).
|
|
158
202
|
*
|
|
159
203
|
* @example
|
|
160
|
-
* // Basic usage (cross-provider optimization)
|
|
161
|
-
* const response = await client.complete({
|
|
162
|
-
* messages: [{role: 'user', content: 'Hello'}],
|
|
163
|
-
* desired_hle: 30,
|
|
164
|
-
* });
|
|
165
|
-
*
|
|
166
|
-
* // Constrained to specific provider
|
|
167
204
|
* const response = await client.complete({
|
|
168
205
|
* messages: [{role: 'user', content: 'Hello'}],
|
|
169
|
-
*
|
|
170
|
-
* desired_hle: 35,
|
|
206
|
+
* tags: { team: 'backend', environment: 'production' },
|
|
171
207
|
* });
|
|
172
208
|
*
|
|
173
|
-
* // Access routing info
|
|
174
209
|
* console.log(response.routing_info?.selected_model);
|
|
175
|
-
*
|
|
176
|
-
* console.log('Admin ceiling lowered your desired HLE');
|
|
177
|
-
* }
|
|
210
|
+
* console.log(response.routing_info?.effective_hle);
|
|
178
211
|
*/
|
|
179
212
|
complete(request: CompleteV2Request): Promise<CompleteResponse>;
|
|
180
213
|
}
|
package/dist/api-client.js
CHANGED
|
@@ -206,15 +206,22 @@ class PcompresslrAPIClient {
|
|
|
206
206
|
interval = Math.min(Math.floor(interval * 1.2), 2000);
|
|
207
207
|
}
|
|
208
208
|
}
|
|
209
|
-
async compress(prompt, model = "gpt-4",
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
209
|
+
async compress(prompt, model = "gpt-4", algorithmOrTags, maybeTags) {
|
|
210
|
+
let algorithm = 'greedy';
|
|
211
|
+
let tags;
|
|
212
|
+
if (typeof algorithmOrTags === 'string') {
|
|
213
|
+
if (algorithmOrTags !== 'greedy') {
|
|
214
|
+
throw new APIRequestError(`Invalid algorithm "${algorithmOrTags}". Only "greedy" is supported.`);
|
|
215
|
+
}
|
|
216
|
+
algorithm = 'greedy';
|
|
217
|
+
tags = maybeTags;
|
|
217
218
|
}
|
|
219
|
+
else if (algorithmOrTags && typeof algorithmOrTags === 'object') {
|
|
220
|
+
tags = algorithmOrTags;
|
|
221
|
+
}
|
|
222
|
+
const data = { prompt, model, algorithm };
|
|
223
|
+
if (tags)
|
|
224
|
+
data.tags = tags;
|
|
218
225
|
return this.makeRequest("/api/v1/compress", data);
|
|
219
226
|
}
|
|
220
227
|
async decompress(llmFormat) {
|
|
@@ -281,32 +288,23 @@ class PcompresslrAPIClient {
|
|
|
281
288
|
return this.makeRequest('/api/v2/complete', data, 'POST');
|
|
282
289
|
}
|
|
283
290
|
/**
|
|
284
|
-
* Messages-first complete with intelligent model selection
|
|
291
|
+
* Messages-first complete with intelligent model selection.
|
|
285
292
|
*
|
|
286
|
-
*
|
|
287
|
-
*
|
|
293
|
+
* Uses async job processing (enqueue + poll) for production reliability.
|
|
294
|
+
* Model routing is managed by admin-configured quality ceilings (global,
|
|
295
|
+
* tag-level, integration-level) in the dashboard. The system selects the
|
|
296
|
+
* cheapest model that meets the effective ceiling.
|
|
288
297
|
*
|
|
289
298
|
* Provider API keys must be stored in your account (BYOK via dashboard).
|
|
290
299
|
*
|
|
291
300
|
* @example
|
|
292
|
-
* // Basic usage (cross-provider optimization)
|
|
293
|
-
* const response = await client.complete({
|
|
294
|
-
* messages: [{role: 'user', content: 'Hello'}],
|
|
295
|
-
* desired_hle: 30,
|
|
296
|
-
* });
|
|
297
|
-
*
|
|
298
|
-
* // Constrained to specific provider
|
|
299
301
|
* const response = await client.complete({
|
|
300
302
|
* messages: [{role: 'user', content: 'Hello'}],
|
|
301
|
-
*
|
|
302
|
-
* desired_hle: 35,
|
|
303
|
+
* tags: { team: 'backend', environment: 'production' },
|
|
303
304
|
* });
|
|
304
305
|
*
|
|
305
|
-
* // Access routing info
|
|
306
306
|
* console.log(response.routing_info?.selected_model);
|
|
307
|
-
*
|
|
308
|
-
* console.log('Admin ceiling lowered your desired HLE');
|
|
309
|
-
* }
|
|
307
|
+
* console.log(response.routing_info?.effective_hle);
|
|
310
308
|
*/
|
|
311
309
|
async complete(request) {
|
|
312
310
|
// Warn about deprecated parameters
|
|
@@ -321,27 +319,6 @@ class PcompresslrAPIClient {
|
|
|
321
319
|
console.warn('[compress-lightreach v1.0.0] HLE parameters have changed. ' +
|
|
322
320
|
'Use "desired_hle" and optional "llm_provider" instead.');
|
|
323
321
|
}
|
|
324
|
-
const data = {
|
|
325
|
-
messages: request.messages,
|
|
326
|
-
compress: request.compress ?? true,
|
|
327
|
-
compress_output: request.compress_output ?? false,
|
|
328
|
-
algorithm: request.algorithm ?? 'greedy',
|
|
329
|
-
};
|
|
330
|
-
// v1.0.0 parameters
|
|
331
|
-
if (request.llm_provider !== undefined)
|
|
332
|
-
data.llm_provider = request.llm_provider;
|
|
333
|
-
if (request.desired_hle !== undefined)
|
|
334
|
-
data.desired_hle = request.desired_hle;
|
|
335
|
-
if (request.compression_config)
|
|
336
|
-
data.compression_config = request.compression_config;
|
|
337
|
-
if (request.temperature !== undefined)
|
|
338
|
-
data.temperature = request.temperature;
|
|
339
|
-
if (request.max_tokens !== undefined)
|
|
340
|
-
data.max_tokens = request.max_tokens;
|
|
341
|
-
if (request.tags !== undefined)
|
|
342
|
-
data.tags = request.tags;
|
|
343
|
-
if (request.max_history_messages !== undefined)
|
|
344
|
-
data.max_history_messages = request.max_history_messages;
|
|
345
322
|
// Prefer async jobs for production reliability; sync remains available via /api/v2/complete
|
|
346
323
|
// by calling makeRequest directly if needed.
|
|
347
324
|
return this.completeAsync(request);
|
package/dist/cli.js
CHANGED
|
@@ -8,29 +8,17 @@ const api_client_1 = require("./api-client");
|
|
|
8
8
|
async function main() {
|
|
9
9
|
const args = process.argv.slice(2);
|
|
10
10
|
if (args.length === 0) {
|
|
11
|
-
console.log("Usage: pcompresslr <prompt>
|
|
11
|
+
console.log("Usage: pcompresslr <prompt>");
|
|
12
12
|
console.log("\nExample:");
|
|
13
13
|
console.log(' pcompresslr "hello world hello world hello world"');
|
|
14
|
-
console.log(' pcompresslr "your prompt here" --greedy-only # Only greedy');
|
|
15
|
-
console.log(' pcompresslr "your prompt here" --optimal-only # Only optimal');
|
|
16
14
|
console.log("\nNote: Requires PCOMPRESLR_API_KEY environment variable");
|
|
17
15
|
process.exit(0);
|
|
18
16
|
}
|
|
19
|
-
|
|
20
|
-
let showGreedy = true;
|
|
21
|
-
let showOptimal = true;
|
|
22
|
-
if (prompt.endsWith("--greedy-only")) {
|
|
23
|
-
prompt = args.slice(0, -1).join(" ");
|
|
24
|
-
showOptimal = false;
|
|
25
|
-
}
|
|
26
|
-
else if (prompt.endsWith("--optimal-only")) {
|
|
27
|
-
prompt = args.slice(0, -1).join(" ");
|
|
28
|
-
showGreedy = false;
|
|
29
|
-
}
|
|
17
|
+
const prompt = args.join(" ");
|
|
30
18
|
// Get API key from environment
|
|
31
19
|
const apiKey = process.env.PCOMPRESLR_API_KEY;
|
|
32
20
|
if (!apiKey) {
|
|
33
|
-
console.error("
|
|
21
|
+
console.error("Error: PCOMPRESLR_API_KEY environment variable is required.");
|
|
34
22
|
console.error("\nTo get an API key, visit https://compress.lightreach.io");
|
|
35
23
|
console.error("Then set it with: export PCOMPRESLR_API_KEY=your-key-here");
|
|
36
24
|
process.exit(1);
|
|
@@ -42,7 +30,7 @@ async function main() {
|
|
|
42
30
|
}
|
|
43
31
|
catch (error) {
|
|
44
32
|
if (error instanceof api_client_1.APIKeyError) {
|
|
45
|
-
console.error(
|
|
33
|
+
console.error(`Error: ${error.message}`);
|
|
46
34
|
process.exit(1);
|
|
47
35
|
}
|
|
48
36
|
throw error;
|
|
@@ -50,116 +38,38 @@ async function main() {
|
|
|
50
38
|
console.log(`Original prompt: ${JSON.stringify(prompt)}`);
|
|
51
39
|
console.log(`Length: ${prompt.length} characters\n`);
|
|
52
40
|
console.log("=".repeat(80));
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
llm_format: llmFormatGreedy,
|
|
72
|
-
decompressed: decompressedGreedy
|
|
73
|
-
};
|
|
74
|
-
console.log(`Compressed: ${JSON.stringify(compressedGreedy)}`);
|
|
75
|
-
console.log(`Dictionary: ${JSON.stringify(dictGreedy)}`);
|
|
76
|
-
console.log(`Compression ratio: ${(ratioGreedy * 100).toFixed(2)}%`);
|
|
77
|
-
console.log(`LLM-ready format length: ${llmFormatGreedy.length} chars`);
|
|
78
|
-
console.log(`Processing time: ${resultGreedy.processing_time_ms.toFixed(2)}ms`);
|
|
79
|
-
if (decompressedGreedy === prompt) {
|
|
80
|
-
console.log("✅ Decompression verified");
|
|
81
|
-
}
|
|
82
|
-
else {
|
|
83
|
-
console.log("❌ Decompression failed");
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
catch (error) {
|
|
87
|
-
if (error instanceof api_client_1.RateLimitError) {
|
|
88
|
-
console.error(`❌ Rate limit exceeded: ${error.message}`);
|
|
89
|
-
}
|
|
90
|
-
else if (error instanceof api_client_1.APIRequestError) {
|
|
91
|
-
console.error(`❌ API error: ${error.message}`);
|
|
92
|
-
}
|
|
93
|
-
else {
|
|
94
|
-
throw error;
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
if (showOptimal) {
|
|
99
|
-
console.log("\n🔸 OPTIMAL COMPRESSOR (DP, O(n²), globally optimal)");
|
|
100
|
-
console.log("-".repeat(80));
|
|
101
|
-
try {
|
|
102
|
-
const resultOptimal = await client.compress(prompt, "gpt-4", "optimal");
|
|
103
|
-
const compressedOptimal = resultOptimal.compressed;
|
|
104
|
-
const dictOptimal = resultOptimal.dictionary;
|
|
105
|
-
const ratioOptimal = resultOptimal.compression_ratio;
|
|
106
|
-
const llmFormatOptimal = resultOptimal.llm_format;
|
|
107
|
-
// Verify decompression
|
|
108
|
-
const decompressResult = await client.decompress(llmFormatOptimal);
|
|
109
|
-
const decompressedOptimal = decompressResult.decompressed;
|
|
110
|
-
results['optimal'] = {
|
|
111
|
-
compressed: compressedOptimal,
|
|
112
|
-
dict: dictOptimal,
|
|
113
|
-
ratio: ratioOptimal,
|
|
114
|
-
llm_format: llmFormatOptimal,
|
|
115
|
-
decompressed: decompressedOptimal
|
|
116
|
-
};
|
|
117
|
-
console.log(`Compressed: ${JSON.stringify(compressedOptimal)}`);
|
|
118
|
-
console.log(`Dictionary: ${JSON.stringify(dictOptimal)}`);
|
|
119
|
-
console.log(`Compression ratio: ${(ratioOptimal * 100).toFixed(2)}%`);
|
|
120
|
-
console.log(`LLM-ready format length: ${llmFormatOptimal.length} chars`);
|
|
121
|
-
console.log(`Processing time: ${resultOptimal.processing_time_ms.toFixed(2)}ms`);
|
|
122
|
-
if (decompressedOptimal === prompt) {
|
|
123
|
-
console.log("✅ Decompression verified");
|
|
124
|
-
}
|
|
125
|
-
else {
|
|
126
|
-
console.log("❌ Decompression failed");
|
|
127
|
-
}
|
|
41
|
+
console.log("\nGREEDY COMPRESSOR");
|
|
42
|
+
console.log("-".repeat(80));
|
|
43
|
+
try {
|
|
44
|
+
const result = await client.compress(prompt, "gpt-4");
|
|
45
|
+
const compressed = result.compressed;
|
|
46
|
+
const dictionary = result.dictionary;
|
|
47
|
+
const ratio = result.compression_ratio;
|
|
48
|
+
const llmFormat = result.llm_format;
|
|
49
|
+
// Verify decompression
|
|
50
|
+
const decompressResult = await client.decompress(llmFormat);
|
|
51
|
+
const decompressed = decompressResult.decompressed;
|
|
52
|
+
console.log(`Compressed: ${JSON.stringify(compressed)}`);
|
|
53
|
+
console.log(`Dictionary: ${JSON.stringify(dictionary)}`);
|
|
54
|
+
console.log(`Compression ratio: ${(ratio * 100).toFixed(2)}%`);
|
|
55
|
+
console.log(`LLM-ready format length: ${llmFormat.length} chars`);
|
|
56
|
+
console.log(`Processing time: ${result.processing_time_ms.toFixed(2)}ms`);
|
|
57
|
+
if (decompressed === prompt) {
|
|
58
|
+
console.log("Decompression verified");
|
|
128
59
|
}
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
console.error(`❌ Rate limit exceeded: ${error.message}`);
|
|
132
|
-
}
|
|
133
|
-
else if (error instanceof api_client_1.APIRequestError) {
|
|
134
|
-
console.error(`❌ API error: ${error.message}`);
|
|
135
|
-
}
|
|
136
|
-
else {
|
|
137
|
-
throw error;
|
|
138
|
-
}
|
|
60
|
+
else {
|
|
61
|
+
console.log("Decompression failed");
|
|
139
62
|
}
|
|
140
63
|
}
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
console.log("📊 COMPARISON");
|
|
145
|
-
console.log("-".repeat(80));
|
|
146
|
-
const ratioDiff = results['optimal'].ratio - results['greedy'].ratio;
|
|
147
|
-
if (ratioDiff < 0) {
|
|
148
|
-
console.log(`✅ Optimal is ${Math.abs(ratioDiff * 100).toFixed(2)}% better (smaller ratio)`);
|
|
64
|
+
catch (error) {
|
|
65
|
+
if (error instanceof api_client_1.RateLimitError) {
|
|
66
|
+
console.error(`Rate limit exceeded: ${error.message}`);
|
|
149
67
|
}
|
|
150
|
-
else if (
|
|
151
|
-
console.
|
|
68
|
+
else if (error instanceof api_client_1.APIRequestError) {
|
|
69
|
+
console.error(`API error: ${error.message}`);
|
|
152
70
|
}
|
|
153
71
|
else {
|
|
154
|
-
|
|
155
|
-
}
|
|
156
|
-
console.log(`\nGreedy ratio: ${(results['greedy'].ratio * 100).toFixed(2)}%`);
|
|
157
|
-
console.log(`Optimal ratio: ${(results['optimal'].ratio * 100).toFixed(2)}%`);
|
|
158
|
-
console.log(`Difference: ${(ratioDiff * 100).toFixed(2)}%`);
|
|
159
|
-
const greedyDictSize = Object.keys(results['greedy'].dict).length;
|
|
160
|
-
const optimalDictSize = Object.keys(results['optimal'].dict).length;
|
|
161
|
-
if (greedyDictSize !== optimalDictSize) {
|
|
162
|
-
console.log(`\nDictionary size: Greedy=${greedyDictSize}, Optimal=${optimalDictSize}`);
|
|
72
|
+
throw error;
|
|
163
73
|
}
|
|
164
74
|
}
|
|
165
75
|
}
|
package/dist/core.d.ts
CHANGED
|
@@ -17,20 +17,33 @@ export interface CompressionConfig {
|
|
|
17
17
|
}
|
|
18
18
|
export interface CompleteOptions {
|
|
19
19
|
messages: Message[];
|
|
20
|
+
/** @deprecated System selects model automatically. */
|
|
20
21
|
model?: string;
|
|
21
22
|
provider?: 'openai' | 'anthropic' | 'google';
|
|
23
|
+
/**
|
|
24
|
+
* @deprecated Quality routing is now fully managed by admin-configured ceilings
|
|
25
|
+
* in the dashboard. Accepted for backward compatibility.
|
|
26
|
+
*/
|
|
22
27
|
desiredHle?: number;
|
|
23
28
|
compress?: boolean;
|
|
24
29
|
compressionConfig?: CompressionConfig;
|
|
25
30
|
compressOutput?: boolean;
|
|
26
|
-
useOptimal?: boolean;
|
|
27
31
|
mode?: 'async' | 'sync';
|
|
32
|
+
/** @deprecated Use desiredHle instead. */
|
|
28
33
|
hleTargetPercent?: number;
|
|
34
|
+
/** @deprecated Use desiredHle instead. */
|
|
29
35
|
minHleScore?: number;
|
|
36
|
+
/** @deprecated Always auto-selects now. */
|
|
30
37
|
autoSelectByHle?: boolean;
|
|
38
|
+
/** @deprecated Use provider instead. */
|
|
31
39
|
sameProviderOnly?: boolean;
|
|
32
40
|
temperature?: number;
|
|
33
41
|
maxTokens?: number;
|
|
42
|
+
/**
|
|
43
|
+
* Tags for cost attribution and quality ceilings.
|
|
44
|
+
* Supported keys: 'team', 'environment', 'feature'.
|
|
45
|
+
* The 'integration' tag is reserved for system use.
|
|
46
|
+
*/
|
|
34
47
|
tags?: Record<string, string>;
|
|
35
48
|
maxHistoryMessages?: number;
|
|
36
49
|
}
|
|
@@ -38,13 +51,11 @@ export declare class LightReach {
|
|
|
38
51
|
private apiClient;
|
|
39
52
|
private defaultModel;
|
|
40
53
|
private defaultProvider;
|
|
41
|
-
private useOptimal;
|
|
42
54
|
constructor(options?: {
|
|
43
55
|
apiKey?: string;
|
|
44
56
|
apiUrl?: string;
|
|
45
57
|
defaultModel?: string;
|
|
46
58
|
defaultProvider?: 'openai' | 'anthropic' | 'google';
|
|
47
|
-
useOptimal?: boolean;
|
|
48
59
|
});
|
|
49
60
|
complete(options: CompleteOptions): Promise<CompleteResponse>;
|
|
50
61
|
/**
|
|
@@ -52,7 +63,6 @@ export declare class LightReach {
|
|
|
52
63
|
*/
|
|
53
64
|
compress(text: string, options?: {
|
|
54
65
|
model?: string;
|
|
55
|
-
algorithm?: 'greedy' | 'optimal';
|
|
56
66
|
tags?: Record<string, string>;
|
|
57
67
|
}): Promise<CompressResponse>;
|
|
58
68
|
}
|
package/dist/core.js
CHANGED
|
@@ -13,11 +13,9 @@ class LightReach {
|
|
|
13
13
|
constructor(options = {}) {
|
|
14
14
|
this.defaultModel = options.defaultModel ?? 'gpt-4';
|
|
15
15
|
this.defaultProvider = options.defaultProvider ?? 'openai';
|
|
16
|
-
this.useOptimal = options.useOptimal ?? false;
|
|
17
16
|
this.apiClient = new api_client_1.PcompresslrAPIClient(options.apiKey, options.apiUrl);
|
|
18
17
|
}
|
|
19
18
|
async complete(options) {
|
|
20
|
-
const algorithm = (options.useOptimal ?? this.useOptimal) ? 'optimal' : 'greedy';
|
|
21
19
|
const cfg = options.compressionConfig
|
|
22
20
|
? {
|
|
23
21
|
compress_system: options.compressionConfig.compressSystem ?? false,
|
|
@@ -34,7 +32,6 @@ class LightReach {
|
|
|
34
32
|
compress: options.compress ?? true,
|
|
35
33
|
compression_config: cfg,
|
|
36
34
|
compress_output: options.compressOutput ?? false,
|
|
37
|
-
algorithm,
|
|
38
35
|
hle_target_percent: options.hleTargetPercent,
|
|
39
36
|
min_hle_score: options.minHleScore,
|
|
40
37
|
auto_select_by_hle: options.autoSelectByHle,
|
|
@@ -54,7 +51,6 @@ class LightReach {
|
|
|
54
51
|
// We do NOT fabricate cost estimates here since the API response does not include pricing data.
|
|
55
52
|
return {
|
|
56
53
|
...resp,
|
|
57
|
-
text: resp.text ?? resp.decompressed_response,
|
|
58
54
|
tokens_saved: resp.tokens_saved ?? resp.compression_stats?.token_savings,
|
|
59
55
|
tokens_used: resp.tokens_used ?? resp.llm_stats?.total_tokens,
|
|
60
56
|
compression_ratio: resp.compression_ratio ?? resp.compression_stats?.compression_ratio,
|
|
@@ -85,7 +81,7 @@ class LightReach {
|
|
|
85
81
|
* Compress text without making an LLM call (POST /api/v1/compress).
|
|
86
82
|
*/
|
|
87
83
|
async compress(text, options) {
|
|
88
|
-
return await this.apiClient.compress(text, options?.model ?? this.defaultModel, options?.
|
|
84
|
+
return await this.apiClient.compress(text, options?.model ?? this.defaultModel, options?.tags);
|
|
89
85
|
}
|
|
90
86
|
}
|
|
91
87
|
exports.LightReach = LightReach;
|
package/package.json
CHANGED