compress-lightreach 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -81
- package/dist/api-client.d.ts +44 -23
- package/dist/api-client.js +7 -16
- package/dist/cli.js +0 -0
- package/dist/core.d.ts +14 -0
- package/dist/core.js +0 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,7 +10,7 @@ Compress Light Reach is a Node.js/TypeScript SDK that provides intelligent model
|
|
|
10
10
|
|
|
11
11
|
## Features
|
|
12
12
|
|
|
13
|
-
- **Intelligent Model Routing**: Automatically selects optimal model based on quality
|
|
13
|
+
- **Intelligent Model Routing**: Automatically selects the optimal model based on admin-configured quality settings and available provider keys
|
|
14
14
|
- **Token-aware Compression**: Replaces repeated substrings with shorter placeholders using a fast greedy algorithm
|
|
15
15
|
- **Lossless**: Perfect decompression guaranteed
|
|
16
16
|
- **Output Compression**: Optional model output compression support
|
|
@@ -37,7 +37,7 @@ The SDK uses **intelligent model routing** and targets `POST /api/v2/complete`.
|
|
|
37
37
|
|
|
38
38
|
- Authenticate with your **LightReach API key** (env var `PCOMPRESLR_API_KEY` or `LIGHTREACH_API_KEY`)
|
|
39
39
|
- Manage **provider keys** (OpenAI/Anthropic/Google/etc.) in the dashboard (BYOK)
|
|
40
|
-
- System automatically selects optimal model based on
|
|
40
|
+
- System automatically selects the optimal model based on admin-configured quality settings
|
|
41
41
|
|
|
42
42
|
```typescript
|
|
43
43
|
import { PcompresslrAPIClient } from 'compress-lightreach';
|
|
@@ -49,7 +49,7 @@ const result = await client.complete({
|
|
|
49
49
|
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
50
50
|
{ role: 'user', content: 'Explain quantum computing in simple terms.' },
|
|
51
51
|
],
|
|
52
|
-
|
|
52
|
+
tags: { team: 'backend', environment: 'production' },
|
|
53
53
|
});
|
|
54
54
|
|
|
55
55
|
console.log(result.decompressed_response);
|
|
@@ -61,15 +61,15 @@ console.log(`Token savings: ${result.compression_stats.token_savings}`);
|
|
|
61
61
|
|
|
62
62
|
LightReach also exposes a **strict OpenAI-compatible** surface (including streaming SSE) so you can use standard OpenAI tooling without changing your app.
|
|
63
63
|
|
|
64
|
-
- **Cursor base URL**: `https://compress.lightreach.io/v1/cursor`
|
|
65
|
-
- **Generic OpenAI-compatible base URL**: `https://compress.lightreach.io/v1`
|
|
64
|
+
- **Cursor base URL**: `https://api.compress.lightreach.io/v1/cursor`
|
|
65
|
+
- **Generic OpenAI-compatible base URL**: `https://api.compress.lightreach.io/v1`
|
|
66
66
|
- **Endpoints**: `GET /models`, `POST /chat/completions`
|
|
67
67
|
- **Model id**: `lightreach`
|
|
68
68
|
|
|
69
69
|
Example (cURL):
|
|
70
70
|
|
|
71
71
|
```bash
|
|
72
|
-
curl -sS https://compress.lightreach.io/v1/chat/completions \
|
|
72
|
+
curl -sS https://api.compress.lightreach.io/v1/chat/completions \
|
|
73
73
|
-H "Authorization: Bearer lr_your_lightreach_key" \
|
|
74
74
|
-H "Content-Type: application/json" \
|
|
75
75
|
-d '{
|
|
@@ -79,84 +79,95 @@ curl -sS https://compress.lightreach.io/v1/chat/completions \
|
|
|
79
79
|
}'
|
|
80
80
|
```
|
|
81
81
|
|
|
82
|
-
|
|
82
|
+
## Tags
|
|
83
|
+
|
|
84
|
+
Tags provide **cost attribution** and enable **admin-controlled quality ceilings** per tag. The system supports three tag categories that you can set on requests:
|
|
85
|
+
|
|
86
|
+
| Tag Key | Description | Example Values |
|
|
87
|
+
|---------|-------------|----------------|
|
|
88
|
+
| `team` | Your team or group | `"backend"`, `"ml-platform"`, `"marketing"` |
|
|
89
|
+
| `environment` | Deployment environment | `"development"`, `"staging"`, `"production"` |
|
|
90
|
+
| `feature` | Feature or use case | `"search"`, `"chat"`, `"summarization"` |
|
|
91
|
+
|
|
92
|
+
Tags are validated server-side. Your workspace admin can configure allowed values for each tag category via the dashboard. If a tag value is not in the allowed list, the request may be warned or rejected depending on your workspace's enforcement mode.
|
|
83
93
|
|
|
84
94
|
```typescript
|
|
85
95
|
const result = await client.complete({
|
|
86
|
-
messages: [{ role: 'user', content: '
|
|
87
|
-
|
|
88
|
-
|
|
96
|
+
messages: [{ role: 'user', content: 'Summarize this document...' }],
|
|
97
|
+
tags: {
|
|
98
|
+
team: 'backend',
|
|
99
|
+
environment: 'production',
|
|
100
|
+
feature: 'summarization',
|
|
101
|
+
},
|
|
89
102
|
});
|
|
90
|
-
|
|
91
|
-
console.log(result.decompressed_response);
|
|
92
103
|
```
|
|
93
104
|
|
|
94
|
-
|
|
105
|
+
> **Note:** The `integration` tag is reserved for system use (e.g., Cursor, Claude Code) and should not be set manually. The `project` tag is also available for workspace-level project attribution — see your dashboard for configuration.
|
|
106
|
+
|
|
107
|
+
## Intelligent Model Routing
|
|
95
108
|
|
|
96
|
-
The system
|
|
109
|
+
Model routing is fully managed by your workspace admin via the dashboard. The system uses **HLE (Humanity's Last Exam)** scores — a standardized benchmark — to determine model quality. Admins configure quality ceilings at three levels:
|
|
110
|
+
|
|
111
|
+
- **Global ceiling**: Set via the HLE slider in the dashboard. Applies to all requests.
|
|
112
|
+
- **Tag-level ceilings**: Set per tag (e.g., `environment=development` gets a lower ceiling to save costs).
|
|
113
|
+
- **Integration-level ceilings**: Set per integration (e.g., Cursor, Claude Code).
|
|
114
|
+
|
|
115
|
+
The routing engine picks the **cheapest model** whose HLE score meets the effective ceiling. HLE scores are maintained server-side and cannot be overridden by SDK callers.
|
|
97
116
|
|
|
98
117
|
```typescript
|
|
99
118
|
import { PcompresslrAPIClient } from 'compress-lightreach';
|
|
100
119
|
|
|
101
120
|
const client = new PcompresslrAPIClient("your-lightreach-api-key");
|
|
102
121
|
|
|
103
|
-
// Cross-provider optimization: system picks cheapest model meeting your quality bar
|
|
104
122
|
const result = await client.complete({
|
|
105
123
|
messages: [{ role: 'user', content: 'Explain quantum computing' }],
|
|
106
|
-
|
|
124
|
+
tags: { team: 'backend', environment: 'production' },
|
|
107
125
|
});
|
|
108
126
|
|
|
109
|
-
// Check what was selected
|
|
110
127
|
console.log(result.routing_info?.selected_model); // e.g., "gpt-4o-mini"
|
|
111
128
|
console.log(result.routing_info?.selected_provider); // e.g., "openai"
|
|
112
129
|
console.log(result.routing_info?.model_hle); // e.g., 32.5
|
|
113
130
|
console.log(result.routing_info?.model_price_per_million); // e.g., 0.15
|
|
114
131
|
```
|
|
115
132
|
|
|
133
|
+
### Routing Response
|
|
134
|
+
|
|
135
|
+
Every `complete()` response includes `routing_info` with full transparency into the routing decision:
|
|
136
|
+
|
|
137
|
+
```typescript
|
|
138
|
+
const info = result.routing_info;
|
|
139
|
+
console.log(`Model: ${info?.selected_model}`);
|
|
140
|
+
console.log(`Provider: ${info?.selected_provider}`);
|
|
141
|
+
console.log(`Model HLE: ${info?.model_hle}`);
|
|
142
|
+
console.log(`Effective HLE ceiling: ${info?.effective_hle}`);
|
|
143
|
+
console.log(`Ceiling source: ${info?.hle_source}`); // "tag", "global", or "none"
|
|
144
|
+
```
|
|
145
|
+
|
|
116
146
|
### Provider-Constrained Routing
|
|
117
147
|
|
|
118
148
|
Optionally constrain to a specific provider:
|
|
119
149
|
|
|
120
150
|
```typescript
|
|
121
|
-
// Only use OpenAI models, but pick the cheapest one meeting HLE 35
|
|
122
151
|
const result = await client.complete({
|
|
123
152
|
messages: [{ role: 'user', content: 'Write a poem' }],
|
|
124
|
-
llm_provider: '
|
|
125
|
-
desired_hle: 35,
|
|
153
|
+
llm_provider: 'anthropic',
|
|
126
154
|
});
|
|
127
155
|
```
|
|
128
156
|
|
|
129
|
-
###
|
|
130
|
-
|
|
131
|
-
Admins can set quality **ceilings** via the dashboard (global or per-tag) to control costs. Your `desired_hle` is a preference; if it exceeds an admin-set ceiling, the request will **silently clamp** to the ceiling and proceed.
|
|
157
|
+
### With Output Compression
|
|
132
158
|
|
|
133
159
|
```typescript
|
|
134
|
-
// Admin set global HLE ceiling to 30%
|
|
135
|
-
// Requesting above the ceiling will be clamped to 30 (no error)
|
|
136
160
|
const result = await client.complete({
|
|
137
|
-
messages: [{ role: 'user', content: '
|
|
138
|
-
|
|
139
|
-
tags: { env: 'production' },
|
|
140
|
-
});
|
|
141
|
-
|
|
142
|
-
// Correct usage: request within ceiling
|
|
143
|
-
const result = await client.complete({
|
|
144
|
-
messages: [{ role: 'user', content: 'Process payment' }],
|
|
145
|
-
desired_hle: 25, // OK: below ceiling of 30
|
|
146
|
-
tags: { env: 'production' },
|
|
161
|
+
messages: [{ role: 'user', content: 'Generate a long report...' }],
|
|
162
|
+
compress_output: true,
|
|
147
163
|
});
|
|
148
164
|
|
|
149
|
-
|
|
150
|
-
if (result.routing_info?.hle_clamped) {
|
|
151
|
-
console.log(`HLE lowered from ${result.routing_info.requested_hle} ` +
|
|
152
|
-
`to ${result.routing_info.effective_hle} ` +
|
|
153
|
-
`by ${result.routing_info.hle_source}-level ceiling`);
|
|
154
|
-
}
|
|
165
|
+
console.log(result.decompressed_response);
|
|
155
166
|
```
|
|
156
167
|
|
|
157
168
|
### With Compression Config
|
|
158
169
|
|
|
159
|
-
|
|
170
|
+
Control which message roles get compressed:
|
|
160
171
|
|
|
161
172
|
```typescript
|
|
162
173
|
import { PcompresslrAPIClient } from 'compress-lightreach';
|
|
@@ -165,7 +176,6 @@ const client = new PcompresslrAPIClient("your-lightreach-api-key");
|
|
|
165
176
|
|
|
166
177
|
const result = await client.complete({
|
|
167
178
|
messages: [{ role: 'user', content: 'Hello!' }],
|
|
168
|
-
desired_hle: 30,
|
|
169
179
|
compress: true,
|
|
170
180
|
compress_output: false,
|
|
171
181
|
compression_config: {
|
|
@@ -176,14 +186,13 @@ const result = await client.complete({
|
|
|
176
186
|
},
|
|
177
187
|
temperature: 0.7,
|
|
178
188
|
max_tokens: 1000,
|
|
179
|
-
tags: {
|
|
189
|
+
tags: { team: 'backend', environment: 'production' },
|
|
180
190
|
});
|
|
181
191
|
|
|
182
192
|
console.log(result.decompressed_response);
|
|
183
193
|
console.log(`Model used: ${result.routing_info?.selected_model}`);
|
|
184
194
|
```
|
|
185
195
|
|
|
186
|
-
|
|
187
196
|
### Compression Only (No LLM Call)
|
|
188
197
|
|
|
189
198
|
```typescript
|
|
@@ -191,11 +200,10 @@ import { PcompresslrAPIClient } from 'compress-lightreach';
|
|
|
191
200
|
|
|
192
201
|
const client = new PcompresslrAPIClient("your-lightreach-api-key");
|
|
193
202
|
|
|
194
|
-
// Compress text without making an LLM call
|
|
195
203
|
const compressed = await client.compress(
|
|
196
204
|
"Your text with repeated content here...",
|
|
197
|
-
"gpt-4",
|
|
198
|
-
{
|
|
205
|
+
"gpt-4",
|
|
206
|
+
{ team: 'backend' },
|
|
199
207
|
);
|
|
200
208
|
|
|
201
209
|
console.log(compressed.llm_format);
|
|
@@ -209,10 +217,8 @@ console.log(decompressed.decompressed);
|
|
|
209
217
|
### Command Line Interface
|
|
210
218
|
|
|
211
219
|
```bash
|
|
212
|
-
# Set your API key
|
|
213
220
|
export PCOMPRESLR_API_KEY=your-api-key
|
|
214
221
|
|
|
215
|
-
# Compress a prompt
|
|
216
222
|
npx pcompresslr "Your prompt with repeated text here..."
|
|
217
223
|
```
|
|
218
224
|
|
|
@@ -237,7 +243,9 @@ new PcompresslrAPIClient(apiKey?: string, apiUrl?: string, timeout?: number)
|
|
|
237
243
|
|
|
238
244
|
##### `complete(request: CompleteV2Request): Promise<CompleteResponse>`
|
|
239
245
|
|
|
240
|
-
Messages-first completion with intelligent routing (
|
|
246
|
+
Messages-first completion with intelligent routing. Uses async job processing (enqueue + poll) for production reliability.
|
|
247
|
+
|
|
248
|
+
For direct synchronous calls, use `completeSync()` instead.
|
|
241
249
|
|
|
242
250
|
**Request Parameters (`CompleteV2Request`):**
|
|
243
251
|
|
|
@@ -245,13 +253,12 @@ Messages-first completion with intelligent routing (POST `/api/v2/complete`).
|
|
|
245
253
|
|-----------|------|---------|-------------|
|
|
246
254
|
| `messages` | `Message[]` | required | Conversation history with `role` and `content` |
|
|
247
255
|
| `llm_provider` | `'openai' \| 'anthropic' \| 'google' \| 'deepseek' \| 'moonshot'` | — | Optional provider constraint. Omit for cross-provider optimization |
|
|
248
|
-
| `desired_hle` | `number` | — | Quality ceiling (0-100). If above an admin ceiling, it is clamped down |
|
|
249
256
|
| `compress` | `boolean` | `true` | Whether to compress messages |
|
|
250
257
|
| `compress_output` | `boolean` | `false` | Whether to request compressed output from LLM |
|
|
251
258
|
| `compression_config` | `object` | — | Per-role compression settings (see below) |
|
|
252
259
|
| `temperature` | `number` | — | LLM temperature parameter |
|
|
253
260
|
| `max_tokens` | `number` | — | Maximum tokens to generate |
|
|
254
|
-
| `tags` | `Record<string, string>` | — | Tags for cost attribution and
|
|
261
|
+
| `tags` | `Record<string, string>` | — | Tags for cost attribution and quality ceilings. Use `team`, `environment`, and/or `feature` keys |
|
|
255
262
|
| `max_history_messages` | `number` | — | Limit conversation history length |
|
|
256
263
|
|
|
257
264
|
**`compression_config` options:**
|
|
@@ -269,6 +276,7 @@ Messages-first completion with intelligent routing (POST `/api/v2/complete`).
|
|
|
269
276
|
|
|
270
277
|
```typescript
|
|
271
278
|
{
|
|
279
|
+
content: string; // Final response content
|
|
272
280
|
decompressed_response: string; // Final decompressed LLM response
|
|
273
281
|
compression_stats: {
|
|
274
282
|
compression_enabled: boolean;
|
|
@@ -293,33 +301,42 @@ Messages-first completion with intelligent routing (POST `/api/v2/complete`).
|
|
|
293
301
|
selected_model: string; // Model chosen by system
|
|
294
302
|
selected_provider: string; // Provider chosen by system
|
|
295
303
|
selected_model_id: string;
|
|
296
|
-
model_hle: number; // HLE score of selected model
|
|
304
|
+
model_hle: number; // HLE score of selected model (server-computed)
|
|
297
305
|
model_price_per_million: number;
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
hle_source: 'request' | 'tag' | 'global' | 'none';
|
|
301
|
-
hle_clamped: boolean; // true if admin ceiling lowered your desired_hle
|
|
306
|
+
effective_hle: number | null; // The quality ceiling that was applied
|
|
307
|
+
hle_source: 'tag' | 'global' | 'none';
|
|
302
308
|
};
|
|
303
309
|
warnings?: string[];
|
|
304
|
-
|
|
310
|
+
|
|
305
311
|
// Convenience aliases
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
312
|
+
tokens_saved?: number;
|
|
313
|
+
tokens_used?: number;
|
|
314
|
+
compression_ratio?: number;
|
|
315
|
+
cost_estimate?: number | null;
|
|
316
|
+
savings_estimate?: number | null;
|
|
310
317
|
}
|
|
311
318
|
```
|
|
312
319
|
|
|
313
|
-
##### `
|
|
320
|
+
##### `completeSync(request: CompleteV2Request): Promise<CompleteResponse>`
|
|
321
|
+
|
|
322
|
+
Direct synchronous call to POST `/api/v2/complete`. Best for small/interactive usage. For production reliability, prefer `complete()` (async job + polling).
|
|
323
|
+
|
|
324
|
+
##### `completeAsync(request, opts?): Promise<CompleteResponse>`
|
|
325
|
+
|
|
326
|
+
Explicit async job flow with configurable polling. Called internally by `complete()`.
|
|
314
327
|
|
|
315
|
-
|
|
328
|
+
**Options:**
|
|
329
|
+
- `pollIntervalMs` (number, default: 1000): Polling interval in milliseconds
|
|
330
|
+
- `maxWaitMs` (number, default: timeout): Maximum wait time
|
|
331
|
+
- `idempotencyKey` (string, optional): Idempotency key for job creation
|
|
332
|
+
|
|
333
|
+
##### `compress(prompt, model?, tags?): Promise<CompressResponse>`
|
|
316
334
|
|
|
317
335
|
Compression-only (POST `/api/v1/compress`).
|
|
318
336
|
|
|
319
337
|
**Parameters:**
|
|
320
338
|
- `prompt` (string, required): Text to compress
|
|
321
339
|
- `model` (string, optional): Model for tokenization. Default: `'gpt-4'`
|
|
322
|
-
- `algorithm` (`"greedy"`, optional): Legacy-only parameter. Only `"greedy"` is supported.
|
|
323
340
|
- `tags` (`Record<string, string>`, optional): Tags for attribution
|
|
324
341
|
|
|
325
342
|
**Response (`CompressResponse`):**
|
|
@@ -366,7 +383,6 @@ Check API health status (GET `/health`).
|
|
|
366
383
|
}
|
|
367
384
|
```
|
|
368
385
|
|
|
369
|
-
|
|
370
386
|
### Message Types
|
|
371
387
|
|
|
372
388
|
```typescript
|
|
@@ -393,7 +409,7 @@ interface Message {
|
|
|
393
409
|
| `PcompresslrAPIError` | Base exception class |
|
|
394
410
|
| `APIKeyError` | Invalid or missing API key |
|
|
395
411
|
| `RateLimitError` | Rate limit exceeded |
|
|
396
|
-
| `APIRequestError` | General API errors (including routing failures) |
|
|
412
|
+
| `APIRequestError` | General API errors (including routing failures, tag validation errors) |
|
|
397
413
|
|
|
398
414
|
```typescript
|
|
399
415
|
import { APIKeyError, RateLimitError, APIRequestError } from 'compress-lightreach';
|
|
@@ -413,15 +429,10 @@ try {
|
|
|
413
429
|
|
|
414
430
|
## How It Works
|
|
415
431
|
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
2. Calculates token savings for each potential replacement
|
|
421
|
-
3. Selects optimal replacements that reduce total token count
|
|
422
|
-
4. Intelligently routes to the best model based on your quality requirements
|
|
423
|
-
5. Formats the result for easy LLM consumption
|
|
424
|
-
6. Provides perfect decompression
|
|
432
|
+
1. **Compression**: Identifies repeated substrings using efficient algorithms and replaces them with shorter placeholders, reducing token count
|
|
433
|
+
2. **Routing**: Selects the cheapest model that meets the admin-configured quality ceiling (global, tag-level, or integration-level)
|
|
434
|
+
3. **LLM Call**: Sends the compressed prompt to the selected model via your BYOK provider keys
|
|
435
|
+
4. **Decompression**: Losslessly restores the model's response if output compression was enabled
|
|
425
436
|
|
|
426
437
|
## Examples
|
|
427
438
|
|
|
@@ -440,7 +451,7 @@ Write a story about a bird. The bird is very friendly.
|
|
|
440
451
|
|
|
441
452
|
const result = await client.complete({
|
|
442
453
|
messages: [{ role: "user", content: prompt }],
|
|
443
|
-
|
|
454
|
+
tags: { team: 'content', environment: 'production' },
|
|
444
455
|
});
|
|
445
456
|
|
|
446
457
|
console.log(result.decompressed_response);
|
|
@@ -458,7 +469,6 @@ const client = new PcompresslrAPIClient("your-lightreach-api-key");
|
|
|
458
469
|
|
|
459
470
|
const result = await client.complete({
|
|
460
471
|
messages: [{ role: "user", content: "Generate a long report with repeated sections..." }],
|
|
461
|
-
desired_hle: 35,
|
|
462
472
|
compress_output: true,
|
|
463
473
|
});
|
|
464
474
|
|
|
@@ -479,13 +489,13 @@ const result = await client.complete({
|
|
|
479
489
|
{ role: "assistant", content: "You can use open() with a context manager..." },
|
|
480
490
|
{ role: "user", content: "How about writing to a file?" },
|
|
481
491
|
],
|
|
482
|
-
desired_hle: 30,
|
|
483
492
|
compression_config: {
|
|
484
493
|
compress_system: false,
|
|
485
494
|
compress_user: true,
|
|
486
495
|
compress_assistant: false,
|
|
487
|
-
compress_only_last_n_user: 2,
|
|
496
|
+
compress_only_last_n_user: 2,
|
|
488
497
|
},
|
|
498
|
+
tags: { team: 'engineering', feature: 'code-assistant' },
|
|
489
499
|
});
|
|
490
500
|
```
|
|
491
501
|
|
package/dist/api-client.d.ts
CHANGED
|
@@ -28,7 +28,7 @@ export interface DecompressResponse {
|
|
|
28
28
|
processing_time_ms: number;
|
|
29
29
|
}
|
|
30
30
|
export interface CompleteResponse {
|
|
31
|
-
|
|
31
|
+
content: string;
|
|
32
32
|
compression_stats: {
|
|
33
33
|
compression_enabled: boolean;
|
|
34
34
|
original_tokens: number;
|
|
@@ -53,19 +53,28 @@ export interface CompleteResponse {
|
|
|
53
53
|
selected_model: string;
|
|
54
54
|
selected_provider: string;
|
|
55
55
|
selected_model_id: string;
|
|
56
|
+
/** HLE score of the selected model (server-computed). */
|
|
56
57
|
model_hle: number;
|
|
57
58
|
model_price_per_million: number;
|
|
58
|
-
|
|
59
|
+
input_price_per_million?: number | null;
|
|
60
|
+
output_price_per_million?: number | null;
|
|
61
|
+
/** @deprecated Present for backward compatibility. */
|
|
62
|
+
requested_hle?: number | null;
|
|
63
|
+
/** The quality ceiling that was applied (from global, tag, or integration settings). */
|
|
59
64
|
effective_hle: number | null;
|
|
65
|
+
/** Where the effective HLE ceiling came from. */
|
|
60
66
|
hle_source: 'request' | 'tag' | 'global' | 'none';
|
|
61
|
-
|
|
67
|
+
/** @deprecated Present for backward compatibility. */
|
|
68
|
+
hle_clamped?: boolean;
|
|
62
69
|
};
|
|
63
|
-
text?: string;
|
|
64
70
|
tokens_saved?: number;
|
|
65
71
|
tokens_used?: number;
|
|
66
72
|
compression_ratio?: number;
|
|
67
73
|
cost_estimate?: number | null;
|
|
68
74
|
savings_estimate?: number | null;
|
|
75
|
+
model_hle?: number | null;
|
|
76
|
+
input_price_per_million?: number | null;
|
|
77
|
+
output_price_per_million?: number | null;
|
|
69
78
|
}
|
|
70
79
|
export type MessageRole = 'system' | 'developer' | 'user' | 'assistant';
|
|
71
80
|
export interface Message {
|
|
@@ -74,7 +83,13 @@ export interface Message {
|
|
|
74
83
|
}
|
|
75
84
|
export interface CompleteV2Request {
|
|
76
85
|
messages: Message[];
|
|
86
|
+
/** Optional provider constraint. Omit for cross-provider cost optimization. */
|
|
77
87
|
llm_provider?: 'openai' | 'anthropic' | 'google' | 'deepseek' | 'moonshot';
|
|
88
|
+
/**
|
|
89
|
+
* @deprecated Quality routing is now fully managed by admin-configured ceilings
|
|
90
|
+
* (global, tag-level, integration-level) in the dashboard. This parameter is
|
|
91
|
+
* accepted for backward compatibility but should not be used in new code.
|
|
92
|
+
*/
|
|
78
93
|
desired_hle?: number;
|
|
79
94
|
compress?: boolean;
|
|
80
95
|
compression_config?: {
|
|
@@ -87,12 +102,25 @@ export interface CompleteV2Request {
|
|
|
87
102
|
algorithm?: 'greedy';
|
|
88
103
|
temperature?: number;
|
|
89
104
|
max_tokens?: number;
|
|
105
|
+
/**
|
|
106
|
+
* Tags for cost attribution and tag-level quality ceilings.
|
|
107
|
+
* Supported keys: 'team', 'environment', 'feature'.
|
|
108
|
+
* Values are validated server-side against your workspace's allowed list.
|
|
109
|
+
* The 'integration' tag is reserved for system use and should not be set manually.
|
|
110
|
+
*
|
|
111
|
+
* @example { team: 'backend', environment: 'production', feature: 'search' }
|
|
112
|
+
*/
|
|
90
113
|
tags?: Record<string, string>;
|
|
91
114
|
max_history_messages?: number;
|
|
115
|
+
/** @deprecated System selects model automatically. */
|
|
92
116
|
model?: string;
|
|
117
|
+
/** @deprecated Use desired_hle instead. */
|
|
93
118
|
hle_target_percent?: number;
|
|
119
|
+
/** @deprecated Use desired_hle instead. */
|
|
94
120
|
min_hle_score?: number;
|
|
121
|
+
/** @deprecated Always auto-selects now. */
|
|
95
122
|
auto_select_by_hle?: boolean;
|
|
123
|
+
/** @deprecated Use llm_provider instead. */
|
|
96
124
|
same_provider_only?: boolean;
|
|
97
125
|
}
|
|
98
126
|
export interface HealthCheckResponse {
|
|
@@ -145,9 +173,11 @@ export declare class PcompresslrAPIClient {
|
|
|
145
173
|
/**
|
|
146
174
|
* Compress text without making an LLM call (POST /api/v1/compress).
|
|
147
175
|
*
|
|
148
|
-
*
|
|
149
|
-
* -
|
|
150
|
-
* -
|
|
176
|
+
* @param prompt - Text to compress
|
|
177
|
+
* @param model - Model for tokenization (default: 'gpt-4')
|
|
178
|
+
* @param tags - Tags for attribution. Supported keys: 'team', 'environment', 'feature'.
|
|
179
|
+
*
|
|
180
|
+
* Also supports a legacy call shape: compress(prompt, model, 'greedy', tags?)
|
|
151
181
|
*/
|
|
152
182
|
compress(prompt: string, model?: string, tags?: Record<string, string>): Promise<CompressResponse>;
|
|
153
183
|
compress(prompt: string, model: string, algorithm: 'greedy', tags?: Record<string, string>): Promise<CompressResponse>;
|
|
@@ -161,32 +191,23 @@ export declare class PcompresslrAPIClient {
|
|
|
161
191
|
*/
|
|
162
192
|
completeSync(request: CompleteV2Request): Promise<CompleteResponse>;
|
|
163
193
|
/**
|
|
164
|
-
* Messages-first complete with intelligent model selection
|
|
194
|
+
* Messages-first complete with intelligent model selection.
|
|
165
195
|
*
|
|
166
|
-
*
|
|
167
|
-
*
|
|
196
|
+
* Uses async job processing (enqueue + poll) for production reliability.
|
|
197
|
+
* Model routing is managed by admin-configured quality ceilings (global,
|
|
198
|
+
* tag-level, integration-level) in the dashboard. The system selects the
|
|
199
|
+
* cheapest model that meets the effective ceiling.
|
|
168
200
|
*
|
|
169
201
|
* Provider API keys must be stored in your account (BYOK via dashboard).
|
|
170
202
|
*
|
|
171
203
|
* @example
|
|
172
|
-
* // Basic usage (cross-provider optimization)
|
|
173
|
-
* const response = await client.complete({
|
|
174
|
-
* messages: [{role: 'user', content: 'Hello'}],
|
|
175
|
-
* desired_hle: 30,
|
|
176
|
-
* });
|
|
177
|
-
*
|
|
178
|
-
* // Constrained to specific provider
|
|
179
204
|
* const response = await client.complete({
|
|
180
205
|
* messages: [{role: 'user', content: 'Hello'}],
|
|
181
|
-
*
|
|
182
|
-
* desired_hle: 35,
|
|
206
|
+
* tags: { team: 'backend', environment: 'production' },
|
|
183
207
|
* });
|
|
184
208
|
*
|
|
185
|
-
* // Access routing info
|
|
186
209
|
* console.log(response.routing_info?.selected_model);
|
|
187
|
-
*
|
|
188
|
-
* console.log('Admin ceiling lowered your desired HLE');
|
|
189
|
-
* }
|
|
210
|
+
* console.log(response.routing_info?.effective_hle);
|
|
190
211
|
*/
|
|
191
212
|
complete(request: CompleteV2Request): Promise<CompleteResponse>;
|
|
192
213
|
}
|
package/dist/api-client.js
CHANGED
|
@@ -288,32 +288,23 @@ class PcompresslrAPIClient {
|
|
|
288
288
|
return this.makeRequest('/api/v2/complete', data, 'POST');
|
|
289
289
|
}
|
|
290
290
|
/**
|
|
291
|
-
* Messages-first complete with intelligent model selection
|
|
291
|
+
* Messages-first complete with intelligent model selection.
|
|
292
292
|
*
|
|
293
|
-
*
|
|
294
|
-
*
|
|
293
|
+
* Uses async job processing (enqueue + poll) for production reliability.
|
|
294
|
+
* Model routing is managed by admin-configured quality ceilings (global,
|
|
295
|
+
* tag-level, integration-level) in the dashboard. The system selects the
|
|
296
|
+
* cheapest model that meets the effective ceiling.
|
|
295
297
|
*
|
|
296
298
|
* Provider API keys must be stored in your account (BYOK via dashboard).
|
|
297
299
|
*
|
|
298
300
|
* @example
|
|
299
|
-
* // Basic usage (cross-provider optimization)
|
|
300
301
|
* const response = await client.complete({
|
|
301
302
|
* messages: [{role: 'user', content: 'Hello'}],
|
|
302
|
-
*
|
|
303
|
+
* tags: { team: 'backend', environment: 'production' },
|
|
303
304
|
* });
|
|
304
305
|
*
|
|
305
|
-
* // Constrained to specific provider
|
|
306
|
-
* const response = await client.complete({
|
|
307
|
-
* messages: [{role: 'user', content: 'Hello'}],
|
|
308
|
-
* llm_provider: 'openai',
|
|
309
|
-
* desired_hle: 35,
|
|
310
|
-
* });
|
|
311
|
-
*
|
|
312
|
-
* // Access routing info
|
|
313
306
|
* console.log(response.routing_info?.selected_model);
|
|
314
|
-
*
|
|
315
|
-
* console.log('Admin ceiling lowered your desired HLE');
|
|
316
|
-
* }
|
|
307
|
+
* console.log(response.routing_info?.effective_hle);
|
|
317
308
|
*/
|
|
318
309
|
async complete(request) {
|
|
319
310
|
// Warn about deprecated parameters
|
package/dist/cli.js
CHANGED
|
File without changes
|
package/dist/core.d.ts
CHANGED
|
@@ -17,19 +17,33 @@ export interface CompressionConfig {
|
|
|
17
17
|
}
|
|
18
18
|
export interface CompleteOptions {
|
|
19
19
|
messages: Message[];
|
|
20
|
+
/** @deprecated System selects model automatically. */
|
|
20
21
|
model?: string;
|
|
21
22
|
provider?: 'openai' | 'anthropic' | 'google';
|
|
23
|
+
/**
|
|
24
|
+
* @deprecated Quality routing is now fully managed by admin-configured ceilings
|
|
25
|
+
* in the dashboard. Accepted for backward compatibility.
|
|
26
|
+
*/
|
|
22
27
|
desiredHle?: number;
|
|
23
28
|
compress?: boolean;
|
|
24
29
|
compressionConfig?: CompressionConfig;
|
|
25
30
|
compressOutput?: boolean;
|
|
26
31
|
mode?: 'async' | 'sync';
|
|
32
|
+
/** @deprecated Use desiredHle instead. */
|
|
27
33
|
hleTargetPercent?: number;
|
|
34
|
+
/** @deprecated Use desiredHle instead. */
|
|
28
35
|
minHleScore?: number;
|
|
36
|
+
/** @deprecated Always auto-selects now. */
|
|
29
37
|
autoSelectByHle?: boolean;
|
|
38
|
+
/** @deprecated Use provider instead. */
|
|
30
39
|
sameProviderOnly?: boolean;
|
|
31
40
|
temperature?: number;
|
|
32
41
|
maxTokens?: number;
|
|
42
|
+
/**
|
|
43
|
+
* Tags for cost attribution and quality ceilings.
|
|
44
|
+
* Supported keys: 'team', 'environment', 'feature'.
|
|
45
|
+
* The 'integration' tag is reserved for system use.
|
|
46
|
+
*/
|
|
33
47
|
tags?: Record<string, string>;
|
|
34
48
|
maxHistoryMessages?: number;
|
|
35
49
|
}
|
package/dist/core.js
CHANGED
|
@@ -51,7 +51,6 @@ class LightReach {
|
|
|
51
51
|
// We do NOT fabricate cost estimates here since the API response does not include pricing data.
|
|
52
52
|
return {
|
|
53
53
|
...resp,
|
|
54
|
-
text: resp.text ?? resp.decompressed_response,
|
|
55
54
|
tokens_saved: resp.tokens_saved ?? resp.compression_stats?.token_savings,
|
|
56
55
|
tokens_used: resp.tokens_used ?? resp.llm_stats?.total_tokens,
|
|
57
56
|
compression_ratio: resp.compression_ratio ?? resp.compression_stats?.compression_ratio,
|
package/package.json
CHANGED