compress-lightreach 1.0.3 ā 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -36
- package/dist/api-client.d.ts +62 -8
- package/dist/api-client.js +126 -30
- package/dist/cli.js +30 -120
- package/dist/core.d.ts +2 -4
- package/dist/core.js +10 -7
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -11,10 +11,7 @@ Compress Light Reach is a Node.js/TypeScript SDK that provides intelligent model
|
|
|
11
11
|
## Features
|
|
12
12
|
|
|
13
13
|
- **Intelligent Model Routing**: Automatically selects optimal model based on quality requirements (HLE) and available provider keys
|
|
14
|
-
- **Token-aware Compression**: Replaces repeated substrings with shorter placeholders
|
|
15
|
-
- **Dual Algorithms**:
|
|
16
|
-
- Fast greedy (~99% optimal) for daily use
|
|
17
|
-
- Optimal DP (O(n²)) for critical prompts
|
|
14
|
+
- **Token-aware Compression**: Replaces repeated substrings with shorter placeholders using a fast greedy algorithm
|
|
18
15
|
- **Lossless**: Perfect decompression guaranteed
|
|
19
16
|
- **Output Compression**: Optional model output compression support
|
|
20
17
|
- **Cloud API**: Uses Light Reach's cloud service for compression and routing
|
|
@@ -52,7 +49,7 @@ const result = await client.complete({
|
|
|
52
49
|
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
53
50
|
{ role: 'user', content: 'Explain quantum computing in simple terms.' },
|
|
54
51
|
],
|
|
55
|
-
desired_hle: 30, // Quality
|
|
52
|
+
desired_hle: 30, // Quality ceiling (0-100). Current SOTA is ~40%.
|
|
56
53
|
});
|
|
57
54
|
|
|
58
55
|
console.log(result.decompressed_response);
|
|
@@ -60,6 +57,28 @@ console.log(`Selected: ${result.routing_info?.selected_model}`);
|
|
|
60
57
|
console.log(`Token savings: ${result.compression_stats.token_savings}`);
|
|
61
58
|
```
|
|
62
59
|
|
|
60
|
+
## OpenAI-compatible API (Cursor / OpenAI SDKs)
|
|
61
|
+
|
|
62
|
+
LightReach also exposes a **strict OpenAI-compatible** surface (including streaming SSE) so you can use standard OpenAI tooling without changing your app.
|
|
63
|
+
|
|
64
|
+
- **Cursor base URL**: `https://compress.lightreach.io/v1/cursor`
|
|
65
|
+
- **Generic OpenAI-compatible base URL**: `https://compress.lightreach.io/v1`
|
|
66
|
+
- **Endpoints**: `GET /models`, `POST /chat/completions`
|
|
67
|
+
- **Model id**: `lightreach`
|
|
68
|
+
|
|
69
|
+
Example (cURL):
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
curl -sS https://compress.lightreach.io/v1/chat/completions \
|
|
73
|
+
-H "Authorization: Bearer lr_your_lightreach_key" \
|
|
74
|
+
-H "Content-Type: application/json" \
|
|
75
|
+
-d '{
|
|
76
|
+
"model": "lightreach",
|
|
77
|
+
"messages": [{"role":"user","content":"Say hello"}],
|
|
78
|
+
"stream": true
|
|
79
|
+
}'
|
|
80
|
+
```
|
|
81
|
+
|
|
63
82
|
### With Output Compression
|
|
64
83
|
|
|
65
84
|
```typescript
|
|
@@ -84,7 +103,7 @@ const client = new PcompresslrAPIClient("your-lightreach-api-key");
|
|
|
84
103
|
// Cross-provider optimization: system picks cheapest model meeting your quality bar
|
|
85
104
|
const result = await client.complete({
|
|
86
105
|
messages: [{ role: 'user', content: 'Explain quantum computing' }],
|
|
87
|
-
desired_hle: 30, // Quality
|
|
106
|
+
desired_hle: 30, // Quality ceiling (0-100). Current SOTA is ~40%.
|
|
88
107
|
});
|
|
89
108
|
|
|
90
109
|
// Check what was selected
|
|
@@ -109,20 +128,16 @@ const result = await client.complete({
|
|
|
109
128
|
|
|
110
129
|
### HLE Cascading with Admin Controls
|
|
111
130
|
|
|
112
|
-
Admins can set quality **ceilings** via the dashboard (global or per-tag) to control costs. Your `desired_hle` is a preference,
|
|
131
|
+
Admins can set quality **ceilings** via the dashboard (global or per-tag) to control costs. Your `desired_hle` is a preference; if it exceeds an admin-set ceiling, the request will **silently clamp** to the ceiling and proceed.
|
|
113
132
|
|
|
114
133
|
```typescript
|
|
115
134
|
// Admin set global HLE ceiling to 30%
|
|
116
|
-
// Requesting above the ceiling will error
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
});
|
|
123
|
-
} catch (e) {
|
|
124
|
-
console.error(e.message); // "Requested HLE 35% exceeds workspace maximum of 30%"
|
|
125
|
-
}
|
|
135
|
+
// Requesting above the ceiling will be clamped to 30 (no error)
|
|
136
|
+
const result = await client.complete({
|
|
137
|
+
messages: [{ role: 'user', content: 'Process payment' }],
|
|
138
|
+
desired_hle: 35, // Will be clamped down to 30
|
|
139
|
+
tags: { env: 'production' },
|
|
140
|
+
});
|
|
126
141
|
|
|
127
142
|
// Correct usage: request within ceiling
|
|
128
143
|
const result = await client.complete({
|
|
@@ -131,7 +146,7 @@ const result = await client.complete({
|
|
|
131
146
|
tags: { env: 'production' },
|
|
132
147
|
});
|
|
133
148
|
|
|
134
|
-
// Check if your HLE was lowered by admin ceiling
|
|
149
|
+
// Check if your HLE was lowered by an admin ceiling
|
|
135
150
|
if (result.routing_info?.hle_clamped) {
|
|
136
151
|
console.log(`HLE lowered from ${result.routing_info.requested_hle} ` +
|
|
137
152
|
`to ${result.routing_info.effective_hle} ` +
|
|
@@ -180,7 +195,6 @@ const client = new PcompresslrAPIClient("your-lightreach-api-key");
|
|
|
180
195
|
const compressed = await client.compress(
|
|
181
196
|
"Your text with repeated content here...",
|
|
182
197
|
"gpt-4", // Model for tokenization
|
|
183
|
-
"greedy", // Algorithm: 'greedy' or 'optimal'
|
|
184
198
|
{ env: 'dev' } // Optional tags
|
|
185
199
|
);
|
|
186
200
|
|
|
@@ -200,12 +214,6 @@ export PCOMPRESLR_API_KEY=your-api-key
|
|
|
200
214
|
|
|
201
215
|
# Compress a prompt
|
|
202
216
|
npx pcompresslr "Your prompt with repeated text here..."
|
|
203
|
-
|
|
204
|
-
# Use optimal algorithm only
|
|
205
|
-
npx pcompresslr "Your prompt here" --optimal-only
|
|
206
|
-
|
|
207
|
-
# Use greedy algorithm only
|
|
208
|
-
npx pcompresslr "Your prompt here" --greedy-only
|
|
209
217
|
```
|
|
210
218
|
|
|
211
219
|
## API Reference
|
|
@@ -223,7 +231,7 @@ new PcompresslrAPIClient(apiKey?: string, apiUrl?: string, timeout?: number)
|
|
|
223
231
|
**Parameters:**
|
|
224
232
|
- `apiKey` (string, optional): LightReach API key. Falls back to `LIGHTREACH_API_KEY` or `PCOMPRESLR_API_KEY` env vars.
|
|
225
233
|
- `apiUrl` (string, optional): Override base API URL. Falls back to `PCOMPRESLR_API_URL` env var. Default: `https://api.compress.lightreach.io`
|
|
226
|
-
- `timeout` (number, optional): Request timeout in milliseconds. Default: `
|
|
234
|
+
- `timeout` (number, optional): Request timeout in milliseconds. Default: `900000` (15 minutes)
|
|
227
235
|
|
|
228
236
|
#### Methods
|
|
229
237
|
|
|
@@ -237,10 +245,9 @@ Messages-first completion with intelligent routing (POST `/api/v2/complete`).
|
|
|
237
245
|
|-----------|------|---------|-------------|
|
|
238
246
|
| `messages` | `Message[]` | required | Conversation history with `role` and `content` |
|
|
239
247
|
| `llm_provider` | `'openai' \| 'anthropic' \| 'google' \| 'deepseek' \| 'moonshot'` | ā | Optional provider constraint. Omit for cross-provider optimization |
|
|
240
|
-
| `desired_hle` | `number` | ā | Quality
|
|
248
|
+
| `desired_hle` | `number` | ā | Quality ceiling (0-100). If above an admin ceiling, it is clamped down |
|
|
241
249
|
| `compress` | `boolean` | `true` | Whether to compress messages |
|
|
242
250
|
| `compress_output` | `boolean` | `false` | Whether to request compressed output from LLM |
|
|
243
|
-
| `algorithm` | `'greedy' \| 'optimal'` | `'greedy'` | Compression algorithm |
|
|
244
251
|
| `compression_config` | `object` | ā | Per-role compression settings (see below) |
|
|
245
252
|
| `temperature` | `number` | ā | LLM temperature parameter |
|
|
246
253
|
| `max_tokens` | `number` | ā | Maximum tokens to generate |
|
|
@@ -264,19 +271,23 @@ Messages-first completion with intelligent routing (POST `/api/v2/complete`).
|
|
|
264
271
|
{
|
|
265
272
|
decompressed_response: string; // Final decompressed LLM response
|
|
266
273
|
compression_stats: {
|
|
267
|
-
|
|
268
|
-
compressed_size_chars: number;
|
|
274
|
+
compression_enabled: boolean;
|
|
269
275
|
original_tokens: number;
|
|
270
276
|
compressed_tokens: number;
|
|
271
|
-
compression_ratio: number;
|
|
272
277
|
token_savings: number;
|
|
273
|
-
|
|
278
|
+
compression_ratio: number;
|
|
279
|
+
token_count_exact?: boolean;
|
|
280
|
+
token_count_source?: string;
|
|
281
|
+
token_accounting_note?: string;
|
|
274
282
|
processing_time_ms?: number;
|
|
275
283
|
};
|
|
276
284
|
llm_stats: {
|
|
277
|
-
|
|
278
|
-
|
|
285
|
+
provider?: string;
|
|
286
|
+
model?: string;
|
|
287
|
+
input_tokens: number;
|
|
288
|
+
output_tokens: number;
|
|
279
289
|
total_tokens: number;
|
|
290
|
+
finish_reason?: string | null;
|
|
280
291
|
};
|
|
281
292
|
routing_info?: {
|
|
282
293
|
selected_model: string; // Model chosen by system
|
|
@@ -299,14 +310,16 @@ Messages-first completion with intelligent routing (POST `/api/v2/complete`).
|
|
|
299
310
|
}
|
|
300
311
|
```
|
|
301
312
|
|
|
302
|
-
##### `compress(prompt, model?,
|
|
313
|
+
##### `compress(prompt, model?, tags?): Promise<CompressResponse>`
|
|
314
|
+
|
|
315
|
+
Also supports a legacy call shape: `compress(prompt, model, algorithm, tags?)` (only `"greedy"` is supported).
|
|
303
316
|
|
|
304
317
|
Compression-only (POST `/api/v1/compress`).
|
|
305
318
|
|
|
306
319
|
**Parameters:**
|
|
307
320
|
- `prompt` (string, required): Text to compress
|
|
308
321
|
- `model` (string, optional): Model for tokenization. Default: `'gpt-4'`
|
|
309
|
-
- `algorithm` (`
|
|
322
|
+
- `algorithm` (`"greedy"`, optional): Legacy-only parameter. Only `"greedy"` is supported.
|
|
310
323
|
- `tags` (`Record<string, string>`, optional): Tags for attribution
|
|
311
324
|
|
|
312
325
|
**Response (`CompressResponse`):**
|
package/dist/api-client.d.ts
CHANGED
|
@@ -30,19 +30,23 @@ export interface DecompressResponse {
|
|
|
30
30
|
export interface CompleteResponse {
|
|
31
31
|
decompressed_response: string;
|
|
32
32
|
compression_stats: {
|
|
33
|
-
|
|
34
|
-
compressed_size_chars: number;
|
|
33
|
+
compression_enabled: boolean;
|
|
35
34
|
original_tokens: number;
|
|
36
35
|
compressed_tokens: number;
|
|
37
|
-
compression_ratio: number;
|
|
38
36
|
token_savings: number;
|
|
39
|
-
|
|
37
|
+
compression_ratio: number;
|
|
38
|
+
token_count_exact?: boolean;
|
|
39
|
+
token_count_source?: string;
|
|
40
|
+
token_accounting_note?: string;
|
|
40
41
|
processing_time_ms?: number;
|
|
41
42
|
};
|
|
42
43
|
llm_stats: {
|
|
43
|
-
|
|
44
|
-
|
|
44
|
+
provider?: string;
|
|
45
|
+
model?: string;
|
|
46
|
+
input_tokens: number;
|
|
47
|
+
output_tokens: number;
|
|
45
48
|
total_tokens: number;
|
|
49
|
+
finish_reason?: string | null;
|
|
46
50
|
};
|
|
47
51
|
warnings?: string[];
|
|
48
52
|
routing_info?: {
|
|
@@ -80,7 +84,7 @@ export interface CompleteV2Request {
|
|
|
80
84
|
compress_only_last_n_user?: number | null;
|
|
81
85
|
};
|
|
82
86
|
compress_output?: boolean;
|
|
83
|
-
algorithm?: 'greedy'
|
|
87
|
+
algorithm?: 'greedy';
|
|
84
88
|
temperature?: number;
|
|
85
89
|
max_tokens?: number;
|
|
86
90
|
tags?: Record<string, string>;
|
|
@@ -95,6 +99,23 @@ export interface HealthCheckResponse {
|
|
|
95
99
|
status: string;
|
|
96
100
|
version?: string;
|
|
97
101
|
}
|
|
102
|
+
export type CompleteJobStatus = 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled';
|
|
103
|
+
export interface CompleteJobCreateResponse {
|
|
104
|
+
job_id: string;
|
|
105
|
+
status: CompleteJobStatus;
|
|
106
|
+
status_url: string;
|
|
107
|
+
}
|
|
108
|
+
export interface CompleteJobStatusResponse {
|
|
109
|
+
job_id: string;
|
|
110
|
+
status: CompleteJobStatus;
|
|
111
|
+
phase?: string | null;
|
|
112
|
+
progress?: number | null;
|
|
113
|
+
created_at?: string | null;
|
|
114
|
+
started_at?: string | null;
|
|
115
|
+
finished_at?: string | null;
|
|
116
|
+
result?: any;
|
|
117
|
+
error?: any;
|
|
118
|
+
}
|
|
98
119
|
export declare class PcompresslrAPIClient {
|
|
99
120
|
private readonly DEFAULT_API_URL;
|
|
100
121
|
private apiKey;
|
|
@@ -103,9 +124,42 @@ export declare class PcompresslrAPIClient {
|
|
|
103
124
|
private session;
|
|
104
125
|
constructor(apiKey?: string, apiUrl?: string, timeout?: number);
|
|
105
126
|
private makeRequest;
|
|
106
|
-
|
|
127
|
+
/**
|
|
128
|
+
* Create async /complete job (POST /api/v1/complete/jobs).
|
|
129
|
+
*/
|
|
130
|
+
createCompleteJob(request: CompleteV2Request, opts?: {
|
|
131
|
+
idempotencyKey?: string;
|
|
132
|
+
}): Promise<CompleteJobCreateResponse>;
|
|
133
|
+
/**
|
|
134
|
+
* Poll async /complete job (GET /api/v1/complete/jobs/{job_id}).
|
|
135
|
+
*/
|
|
136
|
+
getCompleteJob(jobId: string): Promise<CompleteJobStatusResponse>;
|
|
137
|
+
/**
|
|
138
|
+
* Production-safe complete: enqueue + poll async job endpoints.
|
|
139
|
+
*/
|
|
140
|
+
completeAsync(request: CompleteV2Request, opts?: {
|
|
141
|
+
pollIntervalMs?: number;
|
|
142
|
+
maxWaitMs?: number;
|
|
143
|
+
idempotencyKey?: string;
|
|
144
|
+
}): Promise<CompleteResponse>;
|
|
145
|
+
/**
|
|
146
|
+
* Compress text without making an LLM call (POST /api/v1/compress).
|
|
147
|
+
*
|
|
148
|
+
* Supported call shapes:
|
|
149
|
+
* - compress(prompt, model?, tags?)
|
|
150
|
+
* - compress(prompt, model, algorithm, tags?) (back-compat; only "greedy" is supported)
|
|
151
|
+
*/
|
|
152
|
+
compress(prompt: string, model?: string, tags?: Record<string, string>): Promise<CompressResponse>;
|
|
153
|
+
compress(prompt: string, model: string, algorithm: 'greedy', tags?: Record<string, string>): Promise<CompressResponse>;
|
|
107
154
|
decompress(llmFormat: string): Promise<DecompressResponse>;
|
|
108
155
|
healthCheck(): Promise<HealthCheckResponse>;
|
|
156
|
+
/**
|
|
157
|
+
* Direct (non-job) complete call (POST /api/v2/complete).
|
|
158
|
+
*
|
|
159
|
+
* This hits the synchronous endpoint and is best-effort for small/interactive usage.
|
|
160
|
+
* For production reliability, prefer `complete()` (async job + polling).
|
|
161
|
+
*/
|
|
162
|
+
completeSync(request: CompleteV2Request): Promise<CompleteResponse>;
|
|
109
163
|
/**
|
|
110
164
|
* Messages-first complete with intelligent model selection (POST /api/v2/complete).
|
|
111
165
|
*
|
package/dist/api-client.js
CHANGED
|
@@ -42,7 +42,7 @@ class APIRequestError extends PcompresslrAPIError {
|
|
|
42
42
|
}
|
|
43
43
|
exports.APIRequestError = APIRequestError;
|
|
44
44
|
class PcompresslrAPIClient {
|
|
45
|
-
constructor(apiKey, apiUrl, timeout =
|
|
45
|
+
constructor(apiKey, apiUrl, timeout = 900000 // 15 minutes - complete() can include long upstream LLM calls
|
|
46
46
|
) {
|
|
47
47
|
this.DEFAULT_API_URL = "https://api.compress.lightreach.io";
|
|
48
48
|
// Get API key from parameter or environment
|
|
@@ -89,7 +89,7 @@ class PcompresslrAPIClient {
|
|
|
89
89
|
return Promise.reject(error);
|
|
90
90
|
});
|
|
91
91
|
}
|
|
92
|
-
async makeRequest(endpoint, data, method = 'POST') {
|
|
92
|
+
async makeRequest(endpoint, data, method = 'POST', opts) {
|
|
93
93
|
const url = `${this.apiUrl}${endpoint}`;
|
|
94
94
|
try {
|
|
95
95
|
const response = await this.session.request({
|
|
@@ -97,6 +97,8 @@ class PcompresslrAPIClient {
|
|
|
97
97
|
url: endpoint,
|
|
98
98
|
data: method === 'POST' ? data : undefined,
|
|
99
99
|
params: method === 'GET' ? data : undefined,
|
|
100
|
+
headers: opts?.headers,
|
|
101
|
+
timeout: opts?.timeoutMs,
|
|
100
102
|
});
|
|
101
103
|
return response.data;
|
|
102
104
|
}
|
|
@@ -139,15 +141,87 @@ class PcompresslrAPIClient {
|
|
|
139
141
|
throw new APIRequestError(`Request failed: ${errorMessage}`);
|
|
140
142
|
}
|
|
141
143
|
}
|
|
142
|
-
|
|
144
|
+
/**
|
|
145
|
+
* Create async /complete job (POST /api/v1/complete/jobs).
|
|
146
|
+
*/
|
|
147
|
+
async createCompleteJob(request, opts) {
|
|
143
148
|
const data = {
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
149
|
+
messages: request.messages,
|
|
150
|
+
compress: request.compress ?? true,
|
|
151
|
+
compress_output: request.compress_output ?? false,
|
|
152
|
+
algorithm: request.algorithm ?? 'greedy',
|
|
147
153
|
};
|
|
148
|
-
if (
|
|
149
|
-
data.
|
|
154
|
+
if (request.llm_provider !== undefined)
|
|
155
|
+
data.llm_provider = request.llm_provider;
|
|
156
|
+
if (request.desired_hle !== undefined)
|
|
157
|
+
data.desired_hle = request.desired_hle;
|
|
158
|
+
if (request.compression_config)
|
|
159
|
+
data.compression_config = request.compression_config;
|
|
160
|
+
if (request.temperature !== undefined)
|
|
161
|
+
data.temperature = request.temperature;
|
|
162
|
+
if (request.max_tokens !== undefined)
|
|
163
|
+
data.max_tokens = request.max_tokens;
|
|
164
|
+
if (request.tags !== undefined)
|
|
165
|
+
data.tags = request.tags;
|
|
166
|
+
if (request.max_history_messages !== undefined)
|
|
167
|
+
data.max_history_messages = request.max_history_messages;
|
|
168
|
+
const headers = {};
|
|
169
|
+
if (opts?.idempotencyKey)
|
|
170
|
+
headers['Idempotency-Key'] = opts.idempotencyKey;
|
|
171
|
+
return this.makeRequest('/api/v1/complete/jobs', data, 'POST', { headers });
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Poll async /complete job (GET /api/v1/complete/jobs/{job_id}).
|
|
175
|
+
*/
|
|
176
|
+
async getCompleteJob(jobId) {
|
|
177
|
+
if (!jobId)
|
|
178
|
+
throw new APIRequestError('jobId is required');
|
|
179
|
+
// Keep polls short even if overall client timeout is high.
|
|
180
|
+
return this.makeRequest(`/api/v1/complete/jobs/${jobId}`, {}, 'GET', { timeoutMs: Math.min(this.timeout, 30000) });
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Production-safe complete: enqueue + poll async job endpoints.
|
|
184
|
+
*/
|
|
185
|
+
async completeAsync(request, opts) {
|
|
186
|
+
const job = await this.createCompleteJob(request, { idempotencyKey: opts?.idempotencyKey });
|
|
187
|
+
const jobId = job.job_id;
|
|
188
|
+
const pollIntervalMs = Math.max(200, opts?.pollIntervalMs ?? 1000);
|
|
189
|
+
const maxWaitMs = opts?.maxWaitMs ?? this.timeout;
|
|
190
|
+
const deadline = Date.now() + maxWaitMs;
|
|
191
|
+
let interval = pollIntervalMs;
|
|
192
|
+
while (true) {
|
|
193
|
+
const st = await this.getCompleteJob(jobId);
|
|
194
|
+
if (st.status === 'succeeded') {
|
|
195
|
+
if (st.result)
|
|
196
|
+
return st.result;
|
|
197
|
+
throw new APIRequestError('Async job succeeded but result was missing.');
|
|
198
|
+
}
|
|
199
|
+
if (st.status === 'failed' || st.status === 'canceled') {
|
|
200
|
+
throw new APIRequestError(`Async complete job ${st.status}: ${JSON.stringify(st.error ?? {})}`);
|
|
201
|
+
}
|
|
202
|
+
if (Date.now() > deadline) {
|
|
203
|
+
throw new APIRequestError(`Async complete job timed out after ${maxWaitMs}ms (status=${st.status}).`);
|
|
204
|
+
}
|
|
205
|
+
await new Promise((r) => setTimeout(r, interval));
|
|
206
|
+
interval = Math.min(Math.floor(interval * 1.2), 2000);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
async compress(prompt, model = "gpt-4", algorithmOrTags, maybeTags) {
|
|
210
|
+
let algorithm = 'greedy';
|
|
211
|
+
let tags;
|
|
212
|
+
if (typeof algorithmOrTags === 'string') {
|
|
213
|
+
if (algorithmOrTags !== 'greedy') {
|
|
214
|
+
throw new APIRequestError(`Invalid algorithm "${algorithmOrTags}". Only "greedy" is supported.`);
|
|
215
|
+
}
|
|
216
|
+
algorithm = 'greedy';
|
|
217
|
+
tags = maybeTags;
|
|
218
|
+
}
|
|
219
|
+
else if (algorithmOrTags && typeof algorithmOrTags === 'object') {
|
|
220
|
+
tags = algorithmOrTags;
|
|
150
221
|
}
|
|
222
|
+
const data = { prompt, model, algorithm };
|
|
223
|
+
if (tags)
|
|
224
|
+
data.tags = tags;
|
|
151
225
|
return this.makeRequest("/api/v1/compress", data);
|
|
152
226
|
}
|
|
153
227
|
async decompress(llmFormat) {
|
|
@@ -172,6 +246,47 @@ class PcompresslrAPIClient {
|
|
|
172
246
|
throw new APIRequestError(`Health check failed: ${errorMessage}`);
|
|
173
247
|
}
|
|
174
248
|
}
|
|
249
|
+
/**
|
|
250
|
+
* Direct (non-job) complete call (POST /api/v2/complete).
|
|
251
|
+
*
|
|
252
|
+
* This hits the synchronous endpoint and is best-effort for small/interactive usage.
|
|
253
|
+
* For production reliability, prefer `complete()` (async job + polling).
|
|
254
|
+
*/
|
|
255
|
+
async completeSync(request) {
|
|
256
|
+
const data = {
|
|
257
|
+
messages: request.messages,
|
|
258
|
+
compress: request.compress ?? true,
|
|
259
|
+
compress_output: request.compress_output ?? false,
|
|
260
|
+
algorithm: request.algorithm ?? 'greedy',
|
|
261
|
+
};
|
|
262
|
+
// v1.0.0 parameters
|
|
263
|
+
if (request.llm_provider !== undefined)
|
|
264
|
+
data.llm_provider = request.llm_provider;
|
|
265
|
+
if (request.desired_hle !== undefined)
|
|
266
|
+
data.desired_hle = request.desired_hle;
|
|
267
|
+
if (request.compression_config)
|
|
268
|
+
data.compression_config = request.compression_config;
|
|
269
|
+
if (request.temperature !== undefined)
|
|
270
|
+
data.temperature = request.temperature;
|
|
271
|
+
if (request.max_tokens !== undefined)
|
|
272
|
+
data.max_tokens = request.max_tokens;
|
|
273
|
+
if (request.tags !== undefined)
|
|
274
|
+
data.tags = request.tags;
|
|
275
|
+
if (request.max_history_messages !== undefined)
|
|
276
|
+
data.max_history_messages = request.max_history_messages;
|
|
277
|
+
// Deprecated / backward compatible parameters (still accepted by some deployments)
|
|
278
|
+
if (request.model !== undefined)
|
|
279
|
+
data.model = request.model;
|
|
280
|
+
if (request.hle_target_percent !== undefined)
|
|
281
|
+
data.hle_target_percent = request.hle_target_percent;
|
|
282
|
+
if (request.min_hle_score !== undefined)
|
|
283
|
+
data.min_hle_score = request.min_hle_score;
|
|
284
|
+
if (request.auto_select_by_hle !== undefined)
|
|
285
|
+
data.auto_select_by_hle = request.auto_select_by_hle;
|
|
286
|
+
if (request.same_provider_only !== undefined)
|
|
287
|
+
data.same_provider_only = request.same_provider_only;
|
|
288
|
+
return this.makeRequest('/api/v2/complete', data, 'POST');
|
|
289
|
+
}
|
|
175
290
|
/**
|
|
176
291
|
* Messages-first complete with intelligent model selection (POST /api/v2/complete).
|
|
177
292
|
*
|
|
@@ -213,28 +328,9 @@ class PcompresslrAPIClient {
|
|
|
213
328
|
console.warn('[compress-lightreach v1.0.0] HLE parameters have changed. ' +
|
|
214
329
|
'Use "desired_hle" and optional "llm_provider" instead.');
|
|
215
330
|
}
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
compress_output: request.compress_output ?? false,
|
|
220
|
-
algorithm: request.algorithm ?? 'greedy',
|
|
221
|
-
};
|
|
222
|
-
// v1.0.0 parameters
|
|
223
|
-
if (request.llm_provider !== undefined)
|
|
224
|
-
data.llm_provider = request.llm_provider;
|
|
225
|
-
if (request.desired_hle !== undefined)
|
|
226
|
-
data.desired_hle = request.desired_hle;
|
|
227
|
-
if (request.compression_config)
|
|
228
|
-
data.compression_config = request.compression_config;
|
|
229
|
-
if (request.temperature !== undefined)
|
|
230
|
-
data.temperature = request.temperature;
|
|
231
|
-
if (request.max_tokens !== undefined)
|
|
232
|
-
data.max_tokens = request.max_tokens;
|
|
233
|
-
if (request.tags !== undefined)
|
|
234
|
-
data.tags = request.tags;
|
|
235
|
-
if (request.max_history_messages !== undefined)
|
|
236
|
-
data.max_history_messages = request.max_history_messages;
|
|
237
|
-
return this.makeRequest("/api/v2/complete", data);
|
|
331
|
+
// Prefer async jobs for production reliability; sync remains available via /api/v2/complete
|
|
332
|
+
// by calling makeRequest directly if needed.
|
|
333
|
+
return this.completeAsync(request);
|
|
238
334
|
}
|
|
239
335
|
}
|
|
240
336
|
exports.PcompresslrAPIClient = PcompresslrAPIClient;
|
package/dist/cli.js
CHANGED
|
@@ -8,29 +8,17 @@ const api_client_1 = require("./api-client");
|
|
|
8
8
|
async function main() {
|
|
9
9
|
const args = process.argv.slice(2);
|
|
10
10
|
if (args.length === 0) {
|
|
11
|
-
console.log("Usage: pcompresslr <prompt>
|
|
11
|
+
console.log("Usage: pcompresslr <prompt>");
|
|
12
12
|
console.log("\nExample:");
|
|
13
13
|
console.log(' pcompresslr "hello world hello world hello world"');
|
|
14
|
-
console.log(' pcompresslr "your prompt here" --greedy-only # Only greedy');
|
|
15
|
-
console.log(' pcompresslr "your prompt here" --optimal-only # Only optimal');
|
|
16
14
|
console.log("\nNote: Requires PCOMPRESLR_API_KEY environment variable");
|
|
17
15
|
process.exit(0);
|
|
18
16
|
}
|
|
19
|
-
|
|
20
|
-
let showGreedy = true;
|
|
21
|
-
let showOptimal = true;
|
|
22
|
-
if (prompt.endsWith("--greedy-only")) {
|
|
23
|
-
prompt = args.slice(0, -1).join(" ");
|
|
24
|
-
showOptimal = false;
|
|
25
|
-
}
|
|
26
|
-
else if (prompt.endsWith("--optimal-only")) {
|
|
27
|
-
prompt = args.slice(0, -1).join(" ");
|
|
28
|
-
showGreedy = false;
|
|
29
|
-
}
|
|
17
|
+
const prompt = args.join(" ");
|
|
30
18
|
// Get API key from environment
|
|
31
19
|
const apiKey = process.env.PCOMPRESLR_API_KEY;
|
|
32
20
|
if (!apiKey) {
|
|
33
|
-
console.error("
|
|
21
|
+
console.error("Error: PCOMPRESLR_API_KEY environment variable is required.");
|
|
34
22
|
console.error("\nTo get an API key, visit https://compress.lightreach.io");
|
|
35
23
|
console.error("Then set it with: export PCOMPRESLR_API_KEY=your-key-here");
|
|
36
24
|
process.exit(1);
|
|
@@ -42,7 +30,7 @@ async function main() {
|
|
|
42
30
|
}
|
|
43
31
|
catch (error) {
|
|
44
32
|
if (error instanceof api_client_1.APIKeyError) {
|
|
45
|
-
console.error(
|
|
33
|
+
console.error(`Error: ${error.message}`);
|
|
46
34
|
process.exit(1);
|
|
47
35
|
}
|
|
48
36
|
throw error;
|
|
@@ -50,116 +38,38 @@ async function main() {
|
|
|
50
38
|
console.log(`Original prompt: ${JSON.stringify(prompt)}`);
|
|
51
39
|
console.log(`Length: ${prompt.length} characters\n`);
|
|
52
40
|
console.log("=".repeat(80));
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
llm_format: llmFormatGreedy,
|
|
72
|
-
decompressed: decompressedGreedy
|
|
73
|
-
};
|
|
74
|
-
console.log(`Compressed: ${JSON.stringify(compressedGreedy)}`);
|
|
75
|
-
console.log(`Dictionary: ${JSON.stringify(dictGreedy)}`);
|
|
76
|
-
console.log(`Compression ratio: ${(ratioGreedy * 100).toFixed(2)}%`);
|
|
77
|
-
console.log(`LLM-ready format length: ${llmFormatGreedy.length} chars`);
|
|
78
|
-
console.log(`Processing time: ${resultGreedy.processing_time_ms.toFixed(2)}ms`);
|
|
79
|
-
if (decompressedGreedy === prompt) {
|
|
80
|
-
console.log("ā
Decompression verified");
|
|
81
|
-
}
|
|
82
|
-
else {
|
|
83
|
-
console.log("ā Decompression failed");
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
catch (error) {
|
|
87
|
-
if (error instanceof api_client_1.RateLimitError) {
|
|
88
|
-
console.error(`ā Rate limit exceeded: ${error.message}`);
|
|
89
|
-
}
|
|
90
|
-
else if (error instanceof api_client_1.APIRequestError) {
|
|
91
|
-
console.error(`ā API error: ${error.message}`);
|
|
92
|
-
}
|
|
93
|
-
else {
|
|
94
|
-
throw error;
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
if (showOptimal) {
|
|
99
|
-
console.log("\nšø OPTIMAL COMPRESSOR (DP, O(n²), globally optimal)");
|
|
100
|
-
console.log("-".repeat(80));
|
|
101
|
-
try {
|
|
102
|
-
const resultOptimal = await client.compress(prompt, "gpt-4", "optimal");
|
|
103
|
-
const compressedOptimal = resultOptimal.compressed;
|
|
104
|
-
const dictOptimal = resultOptimal.dictionary;
|
|
105
|
-
const ratioOptimal = resultOptimal.compression_ratio;
|
|
106
|
-
const llmFormatOptimal = resultOptimal.llm_format;
|
|
107
|
-
// Verify decompression
|
|
108
|
-
const decompressResult = await client.decompress(llmFormatOptimal);
|
|
109
|
-
const decompressedOptimal = decompressResult.decompressed;
|
|
110
|
-
results['optimal'] = {
|
|
111
|
-
compressed: compressedOptimal,
|
|
112
|
-
dict: dictOptimal,
|
|
113
|
-
ratio: ratioOptimal,
|
|
114
|
-
llm_format: llmFormatOptimal,
|
|
115
|
-
decompressed: decompressedOptimal
|
|
116
|
-
};
|
|
117
|
-
console.log(`Compressed: ${JSON.stringify(compressedOptimal)}`);
|
|
118
|
-
console.log(`Dictionary: ${JSON.stringify(dictOptimal)}`);
|
|
119
|
-
console.log(`Compression ratio: ${(ratioOptimal * 100).toFixed(2)}%`);
|
|
120
|
-
console.log(`LLM-ready format length: ${llmFormatOptimal.length} chars`);
|
|
121
|
-
console.log(`Processing time: ${resultOptimal.processing_time_ms.toFixed(2)}ms`);
|
|
122
|
-
if (decompressedOptimal === prompt) {
|
|
123
|
-
console.log("ā
Decompression verified");
|
|
124
|
-
}
|
|
125
|
-
else {
|
|
126
|
-
console.log("ā Decompression failed");
|
|
127
|
-
}
|
|
41
|
+
console.log("\nGREEDY COMPRESSOR");
|
|
42
|
+
console.log("-".repeat(80));
|
|
43
|
+
try {
|
|
44
|
+
const result = await client.compress(prompt, "gpt-4");
|
|
45
|
+
const compressed = result.compressed;
|
|
46
|
+
const dictionary = result.dictionary;
|
|
47
|
+
const ratio = result.compression_ratio;
|
|
48
|
+
const llmFormat = result.llm_format;
|
|
49
|
+
// Verify decompression
|
|
50
|
+
const decompressResult = await client.decompress(llmFormat);
|
|
51
|
+
const decompressed = decompressResult.decompressed;
|
|
52
|
+
console.log(`Compressed: ${JSON.stringify(compressed)}`);
|
|
53
|
+
console.log(`Dictionary: ${JSON.stringify(dictionary)}`);
|
|
54
|
+
console.log(`Compression ratio: ${(ratio * 100).toFixed(2)}%`);
|
|
55
|
+
console.log(`LLM-ready format length: ${llmFormat.length} chars`);
|
|
56
|
+
console.log(`Processing time: ${result.processing_time_ms.toFixed(2)}ms`);
|
|
57
|
+
if (decompressed === prompt) {
|
|
58
|
+
console.log("Decompression verified");
|
|
128
59
|
}
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
console.error(`ā Rate limit exceeded: ${error.message}`);
|
|
132
|
-
}
|
|
133
|
-
else if (error instanceof api_client_1.APIRequestError) {
|
|
134
|
-
console.error(`ā API error: ${error.message}`);
|
|
135
|
-
}
|
|
136
|
-
else {
|
|
137
|
-
throw error;
|
|
138
|
-
}
|
|
60
|
+
else {
|
|
61
|
+
console.log("Decompression failed");
|
|
139
62
|
}
|
|
140
63
|
}
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
console.log("š COMPARISON");
|
|
145
|
-
console.log("-".repeat(80));
|
|
146
|
-
const ratioDiff = results['optimal'].ratio - results['greedy'].ratio;
|
|
147
|
-
if (ratioDiff < 0) {
|
|
148
|
-
console.log(`ā
Optimal is ${Math.abs(ratioDiff * 100).toFixed(2)}% better (smaller ratio)`);
|
|
64
|
+
catch (error) {
|
|
65
|
+
if (error instanceof api_client_1.RateLimitError) {
|
|
66
|
+
console.error(`Rate limit exceeded: ${error.message}`);
|
|
149
67
|
}
|
|
150
|
-
else if (
|
|
151
|
-
console.
|
|
68
|
+
else if (error instanceof api_client_1.APIRequestError) {
|
|
69
|
+
console.error(`API error: ${error.message}`);
|
|
152
70
|
}
|
|
153
71
|
else {
|
|
154
|
-
|
|
155
|
-
}
|
|
156
|
-
console.log(`\nGreedy ratio: ${(results['greedy'].ratio * 100).toFixed(2)}%`);
|
|
157
|
-
console.log(`Optimal ratio: ${(results['optimal'].ratio * 100).toFixed(2)}%`);
|
|
158
|
-
console.log(`Difference: ${(ratioDiff * 100).toFixed(2)}%`);
|
|
159
|
-
const greedyDictSize = Object.keys(results['greedy'].dict).length;
|
|
160
|
-
const optimalDictSize = Object.keys(results['optimal'].dict).length;
|
|
161
|
-
if (greedyDictSize !== optimalDictSize) {
|
|
162
|
-
console.log(`\nDictionary size: Greedy=${greedyDictSize}, Optimal=${optimalDictSize}`);
|
|
72
|
+
throw error;
|
|
163
73
|
}
|
|
164
74
|
}
|
|
165
75
|
}
|
package/dist/core.d.ts
CHANGED
|
@@ -19,10 +19,11 @@ export interface CompleteOptions {
|
|
|
19
19
|
messages: Message[];
|
|
20
20
|
model?: string;
|
|
21
21
|
provider?: 'openai' | 'anthropic' | 'google';
|
|
22
|
+
desiredHle?: number;
|
|
22
23
|
compress?: boolean;
|
|
23
24
|
compressionConfig?: CompressionConfig;
|
|
24
25
|
compressOutput?: boolean;
|
|
25
|
-
|
|
26
|
+
mode?: 'async' | 'sync';
|
|
26
27
|
hleTargetPercent?: number;
|
|
27
28
|
minHleScore?: number;
|
|
28
29
|
autoSelectByHle?: boolean;
|
|
@@ -36,13 +37,11 @@ export declare class LightReach {
|
|
|
36
37
|
private apiClient;
|
|
37
38
|
private defaultModel;
|
|
38
39
|
private defaultProvider;
|
|
39
|
-
private useOptimal;
|
|
40
40
|
constructor(options?: {
|
|
41
41
|
apiKey?: string;
|
|
42
42
|
apiUrl?: string;
|
|
43
43
|
defaultModel?: string;
|
|
44
44
|
defaultProvider?: 'openai' | 'anthropic' | 'google';
|
|
45
|
-
useOptimal?: boolean;
|
|
46
45
|
});
|
|
47
46
|
complete(options: CompleteOptions): Promise<CompleteResponse>;
|
|
48
47
|
/**
|
|
@@ -50,7 +49,6 @@ export declare class LightReach {
|
|
|
50
49
|
*/
|
|
51
50
|
compress(text: string, options?: {
|
|
52
51
|
model?: string;
|
|
53
|
-
algorithm?: 'greedy' | 'optimal';
|
|
54
52
|
tags?: Record<string, string>;
|
|
55
53
|
}): Promise<CompressResponse>;
|
|
56
54
|
}
|
package/dist/core.js
CHANGED
|
@@ -13,11 +13,9 @@ class LightReach {
|
|
|
13
13
|
constructor(options = {}) {
|
|
14
14
|
this.defaultModel = options.defaultModel ?? 'gpt-4';
|
|
15
15
|
this.defaultProvider = options.defaultProvider ?? 'openai';
|
|
16
|
-
this.useOptimal = options.useOptimal ?? false;
|
|
17
16
|
this.apiClient = new api_client_1.PcompresslrAPIClient(options.apiKey, options.apiUrl);
|
|
18
17
|
}
|
|
19
18
|
async complete(options) {
|
|
20
|
-
const algorithm = (options.useOptimal ?? this.useOptimal) ? 'optimal' : 'greedy';
|
|
21
19
|
const cfg = options.compressionConfig
|
|
22
20
|
? {
|
|
23
21
|
compress_system: options.compressionConfig.compressSystem ?? false,
|
|
@@ -27,14 +25,13 @@ class LightReach {
|
|
|
27
25
|
}
|
|
28
26
|
: undefined;
|
|
29
27
|
try {
|
|
30
|
-
const
|
|
28
|
+
const req = {
|
|
31
29
|
messages: options.messages,
|
|
32
|
-
model: options.model ?? this.defaultModel,
|
|
33
30
|
llm_provider: options.provider ?? this.defaultProvider,
|
|
31
|
+
desired_hle: options.desiredHle,
|
|
34
32
|
compress: options.compress ?? true,
|
|
35
33
|
compression_config: cfg,
|
|
36
34
|
compress_output: options.compressOutput ?? false,
|
|
37
|
-
algorithm,
|
|
38
35
|
hle_target_percent: options.hleTargetPercent,
|
|
39
36
|
min_hle_score: options.minHleScore,
|
|
40
37
|
auto_select_by_hle: options.autoSelectByHle,
|
|
@@ -43,7 +40,13 @@ class LightReach {
|
|
|
43
40
|
max_tokens: options.maxTokens,
|
|
44
41
|
tags: options.tags,
|
|
45
42
|
max_history_messages: options.maxHistoryMessages,
|
|
46
|
-
}
|
|
43
|
+
};
|
|
44
|
+
// Only include deprecated `model` if explicitly provided to avoid noisy warnings.
|
|
45
|
+
if (options.model !== undefined)
|
|
46
|
+
req.model = options.model;
|
|
47
|
+
const resp = (options.mode ?? 'async') === 'sync'
|
|
48
|
+
? await this.apiClient.completeSync(req)
|
|
49
|
+
: await this.apiClient.complete(req);
|
|
47
50
|
// Add helpful aliases to better match the Feature 0.6 spec without changing backend response.
|
|
48
51
|
// We do NOT fabricate cost estimates here since the API response does not include pricing data.
|
|
49
52
|
return {
|
|
@@ -79,7 +82,7 @@ class LightReach {
|
|
|
79
82
|
* Compress text without making an LLM call (POST /api/v1/compress).
|
|
80
83
|
*/
|
|
81
84
|
async compress(text, options) {
|
|
82
|
-
return await this.apiClient.compress(text, options?.model ?? this.defaultModel, options?.
|
|
85
|
+
return await this.apiClient.compress(text, options?.model ?? this.defaultModel, options?.tags);
|
|
83
86
|
}
|
|
84
87
|
}
|
|
85
88
|
exports.LightReach = LightReach;
|
package/dist/version.d.ts
CHANGED
package/dist/version.js
CHANGED
package/package.json
CHANGED