compress-lightreach 1.0.2 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -79
- package/dist/api-client.d.ts +42 -0
- package/dist/api-client.js +113 -3
- package/dist/core.d.ts +2 -0
- package/dist/core.js +9 -3
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -109,20 +109,16 @@ const result = await client.complete({
|
|
|
109
109
|
|
|
110
110
|
### HLE Cascading with Admin Controls
|
|
111
111
|
|
|
112
|
-
Admins can set quality **ceilings** via the dashboard (global or per-tag) to control costs. Your `desired_hle` is a preference,
|
|
112
|
+
Admins can set quality **ceilings** via the dashboard (global or per-tag) to control costs. Your `desired_hle` is a preference; if it exceeds an admin-set ceiling, the request will **silently clamp** to the ceiling and proceed.
|
|
113
113
|
|
|
114
114
|
```typescript
|
|
115
115
|
// Admin set global HLE ceiling to 30%
|
|
116
|
-
// Requesting above the ceiling will error
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
});
|
|
123
|
-
} catch (e) {
|
|
124
|
-
console.error(e.message); // "Requested HLE 35% exceeds workspace maximum of 30%"
|
|
125
|
-
}
|
|
116
|
+
// Requesting above the ceiling will be clamped to 30 (no error)
|
|
117
|
+
const result = await client.complete({
|
|
118
|
+
messages: [{ role: 'user', content: 'Process payment' }],
|
|
119
|
+
desired_hle: 35, // Will be clamped down to 30
|
|
120
|
+
tags: { env: 'production' },
|
|
121
|
+
});
|
|
126
122
|
|
|
127
123
|
// Correct usage: request within ceiling
|
|
128
124
|
const result = await client.complete({
|
|
@@ -131,7 +127,7 @@ const result = await client.complete({
|
|
|
131
127
|
tags: { env: 'production' },
|
|
132
128
|
});
|
|
133
129
|
|
|
134
|
-
// Check if your HLE was lowered by admin ceiling
|
|
130
|
+
// Check if your HLE was lowered by an admin ceiling
|
|
135
131
|
if (result.routing_info?.hle_clamped) {
|
|
136
132
|
console.log(`HLE lowered from ${result.routing_info.requested_hle} ` +
|
|
137
133
|
`to ${result.routing_info.effective_hle} ` +
|
|
@@ -139,38 +135,36 @@ if (result.routing_info?.hle_clamped) {
|
|
|
139
135
|
}
|
|
140
136
|
```
|
|
141
137
|
|
|
142
|
-
###
|
|
138
|
+
### With Compression Config
|
|
143
139
|
|
|
144
|
-
|
|
140
|
+
Configure per-role compression settings:
|
|
145
141
|
|
|
146
142
|
```typescript
|
|
147
|
-
import {
|
|
143
|
+
import { PcompresslrAPIClient } from 'compress-lightreach';
|
|
148
144
|
|
|
149
|
-
const client = new
|
|
150
|
-
apiKey: 'your-lightreach-api-key',
|
|
151
|
-
defaultModel: 'gpt-4',
|
|
152
|
-
defaultProvider: 'openai',
|
|
153
|
-
useOptimal: false, // Use greedy algorithm by default
|
|
154
|
-
});
|
|
145
|
+
const client = new PcompresslrAPIClient("your-lightreach-api-key");
|
|
155
146
|
|
|
156
147
|
const result = await client.complete({
|
|
157
148
|
messages: [{ role: 'user', content: 'Hello!' }],
|
|
149
|
+
desired_hle: 30,
|
|
158
150
|
compress: true,
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
151
|
+
compress_output: false,
|
|
152
|
+
compression_config: {
|
|
153
|
+
compress_system: false,
|
|
154
|
+
compress_user: true,
|
|
155
|
+
compress_assistant: false,
|
|
156
|
+
compress_only_last_n_user: 1,
|
|
165
157
|
},
|
|
166
158
|
temperature: 0.7,
|
|
167
|
-
|
|
159
|
+
max_tokens: 1000,
|
|
168
160
|
tags: { env: 'production' },
|
|
169
161
|
});
|
|
170
162
|
|
|
171
163
|
console.log(result.decompressed_response);
|
|
164
|
+
console.log(`Model used: ${result.routing_info?.selected_model}`);
|
|
172
165
|
```
|
|
173
166
|
|
|
167
|
+
|
|
174
168
|
### Compression Only (No LLM Call)
|
|
175
169
|
|
|
176
170
|
```typescript
|
|
@@ -239,7 +233,7 @@ Messages-first completion with intelligent routing (POST `/api/v2/complete`).
|
|
|
239
233
|
|-----------|------|---------|-------------|
|
|
240
234
|
| `messages` | `Message[]` | required | Conversation history with `role` and `content` |
|
|
241
235
|
| `llm_provider` | `'openai' \| 'anthropic' \| 'google' \| 'deepseek' \| 'moonshot'` | — | Optional provider constraint. Omit for cross-provider optimization |
|
|
242
|
-
| `desired_hle` | `number` | — | Quality preference (0-40, where 40 is SOTA).
|
|
236
|
+
| `desired_hle` | `number` | — | Quality preference (0-40, where 40 is SOTA). If above an admin ceiling, it is clamped down |
|
|
243
237
|
| `compress` | `boolean` | `true` | Whether to compress messages |
|
|
244
238
|
| `compress_output` | `boolean` | `false` | Whether to request compressed output from LLM |
|
|
245
239
|
| `algorithm` | `'greedy' \| 'optimal'` | `'greedy'` | Compression algorithm |
|
|
@@ -355,56 +349,6 @@ Check API health status (GET `/health`).
|
|
|
355
349
|
}
|
|
356
350
|
```
|
|
357
351
|
|
|
358
|
-
### `LightReach` Class
|
|
359
|
-
|
|
360
|
-
Convenience wrapper with camelCase parameters.
|
|
361
|
-
|
|
362
|
-
#### Constructor
|
|
363
|
-
|
|
364
|
-
```typescript
|
|
365
|
-
new LightReach(options?: {
|
|
366
|
-
apiKey?: string;
|
|
367
|
-
apiUrl?: string;
|
|
368
|
-
defaultModel?: string; // Default: 'gpt-4'
|
|
369
|
-
defaultProvider?: 'openai' | 'anthropic' | 'google'; // Default: 'openai'
|
|
370
|
-
useOptimal?: boolean; // Default: false (use greedy)
|
|
371
|
-
})
|
|
372
|
-
```
|
|
373
|
-
|
|
374
|
-
#### Methods
|
|
375
|
-
|
|
376
|
-
##### `complete(options: CompleteOptions): Promise<CompleteResponse>`
|
|
377
|
-
|
|
378
|
-
```typescript
|
|
379
|
-
interface CompleteOptions {
|
|
380
|
-
messages: Message[];
|
|
381
|
-
model?: string;
|
|
382
|
-
provider?: 'openai' | 'anthropic' | 'google';
|
|
383
|
-
compress?: boolean;
|
|
384
|
-
compressionConfig?: {
|
|
385
|
-
compressSystem?: boolean;
|
|
386
|
-
compressUser?: boolean;
|
|
387
|
-
compressAssistant?: boolean;
|
|
388
|
-
compressOnlyLastNUser?: number | null;
|
|
389
|
-
};
|
|
390
|
-
compressOutput?: boolean;
|
|
391
|
-
useOptimal?: boolean;
|
|
392
|
-
temperature?: number;
|
|
393
|
-
maxTokens?: number;
|
|
394
|
-
tags?: Record<string, string>;
|
|
395
|
-
maxHistoryMessages?: number;
|
|
396
|
-
}
|
|
397
|
-
```
|
|
398
|
-
|
|
399
|
-
##### `compress(text, options?): Promise<CompressResponse>`
|
|
400
|
-
|
|
401
|
-
```typescript
|
|
402
|
-
await client.compress(text, {
|
|
403
|
-
model?: string;
|
|
404
|
-
algorithm?: 'greedy' | 'optimal';
|
|
405
|
-
tags?: Record<string, string>;
|
|
406
|
-
});
|
|
407
|
-
```
|
|
408
352
|
|
|
409
353
|
### Message Types
|
|
410
354
|
|
package/dist/api-client.d.ts
CHANGED
|
@@ -95,6 +95,23 @@ export interface HealthCheckResponse {
|
|
|
95
95
|
status: string;
|
|
96
96
|
version?: string;
|
|
97
97
|
}
|
|
98
|
+
export type CompleteJobStatus = 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled';
|
|
99
|
+
export interface CompleteJobCreateResponse {
|
|
100
|
+
job_id: string;
|
|
101
|
+
status: CompleteJobStatus;
|
|
102
|
+
status_url: string;
|
|
103
|
+
}
|
|
104
|
+
export interface CompleteJobStatusResponse {
|
|
105
|
+
job_id: string;
|
|
106
|
+
status: CompleteJobStatus;
|
|
107
|
+
phase?: string | null;
|
|
108
|
+
progress?: number | null;
|
|
109
|
+
created_at?: string | null;
|
|
110
|
+
started_at?: string | null;
|
|
111
|
+
finished_at?: string | null;
|
|
112
|
+
result?: any;
|
|
113
|
+
error?: any;
|
|
114
|
+
}
|
|
98
115
|
export declare class PcompresslrAPIClient {
|
|
99
116
|
private readonly DEFAULT_API_URL;
|
|
100
117
|
private apiKey;
|
|
@@ -103,9 +120,34 @@ export declare class PcompresslrAPIClient {
|
|
|
103
120
|
private session;
|
|
104
121
|
constructor(apiKey?: string, apiUrl?: string, timeout?: number);
|
|
105
122
|
private makeRequest;
|
|
123
|
+
/**
|
|
124
|
+
* Create async /complete job (POST /api/v1/complete/jobs).
|
|
125
|
+
*/
|
|
126
|
+
createCompleteJob(request: CompleteV2Request, opts?: {
|
|
127
|
+
idempotencyKey?: string;
|
|
128
|
+
}): Promise<CompleteJobCreateResponse>;
|
|
129
|
+
/**
|
|
130
|
+
* Poll async /complete job (GET /api/v1/complete/jobs/{job_id}).
|
|
131
|
+
*/
|
|
132
|
+
getCompleteJob(jobId: string): Promise<CompleteJobStatusResponse>;
|
|
133
|
+
/**
|
|
134
|
+
* Production-safe complete: enqueue + poll async job endpoints.
|
|
135
|
+
*/
|
|
136
|
+
completeAsync(request: CompleteV2Request, opts?: {
|
|
137
|
+
pollIntervalMs?: number;
|
|
138
|
+
maxWaitMs?: number;
|
|
139
|
+
idempotencyKey?: string;
|
|
140
|
+
}): Promise<CompleteResponse>;
|
|
106
141
|
compress(prompt: string, model?: string, algorithm?: "greedy" | "optimal", tags?: Record<string, string>): Promise<CompressResponse>;
|
|
107
142
|
decompress(llmFormat: string): Promise<DecompressResponse>;
|
|
108
143
|
healthCheck(): Promise<HealthCheckResponse>;
|
|
144
|
+
/**
|
|
145
|
+
* Direct (non-job) complete call (POST /api/v2/complete).
|
|
146
|
+
*
|
|
147
|
+
* This hits the synchronous endpoint and is best-effort for small/interactive usage.
|
|
148
|
+
* For production reliability, prefer `complete()` (async job + polling).
|
|
149
|
+
*/
|
|
150
|
+
completeSync(request: CompleteV2Request): Promise<CompleteResponse>;
|
|
109
151
|
/**
|
|
110
152
|
* Messages-first complete with intelligent model selection (POST /api/v2/complete).
|
|
111
153
|
*
|
package/dist/api-client.js
CHANGED
|
@@ -42,7 +42,7 @@ class APIRequestError extends PcompresslrAPIError {
|
|
|
42
42
|
}
|
|
43
43
|
exports.APIRequestError = APIRequestError;
|
|
44
44
|
class PcompresslrAPIClient {
|
|
45
|
-
constructor(apiKey, apiUrl, timeout =
|
|
45
|
+
constructor(apiKey, apiUrl, timeout = 900000 // 15 minutes - complete() can include long upstream LLM calls
|
|
46
46
|
) {
|
|
47
47
|
this.DEFAULT_API_URL = "https://api.compress.lightreach.io";
|
|
48
48
|
// Get API key from parameter or environment
|
|
@@ -89,7 +89,7 @@ class PcompresslrAPIClient {
|
|
|
89
89
|
return Promise.reject(error);
|
|
90
90
|
});
|
|
91
91
|
}
|
|
92
|
-
async makeRequest(endpoint, data, method = 'POST') {
|
|
92
|
+
async makeRequest(endpoint, data, method = 'POST', opts) {
|
|
93
93
|
const url = `${this.apiUrl}${endpoint}`;
|
|
94
94
|
try {
|
|
95
95
|
const response = await this.session.request({
|
|
@@ -97,6 +97,8 @@ class PcompresslrAPIClient {
|
|
|
97
97
|
url: endpoint,
|
|
98
98
|
data: method === 'POST' ? data : undefined,
|
|
99
99
|
params: method === 'GET' ? data : undefined,
|
|
100
|
+
headers: opts?.headers,
|
|
101
|
+
timeout: opts?.timeoutMs,
|
|
100
102
|
});
|
|
101
103
|
return response.data;
|
|
102
104
|
}
|
|
@@ -139,6 +141,71 @@ class PcompresslrAPIClient {
|
|
|
139
141
|
throw new APIRequestError(`Request failed: ${errorMessage}`);
|
|
140
142
|
}
|
|
141
143
|
}
|
|
144
|
+
/**
|
|
145
|
+
* Create async /complete job (POST /api/v1/complete/jobs).
|
|
146
|
+
*/
|
|
147
|
+
async createCompleteJob(request, opts) {
|
|
148
|
+
const data = {
|
|
149
|
+
messages: request.messages,
|
|
150
|
+
compress: request.compress ?? true,
|
|
151
|
+
compress_output: request.compress_output ?? false,
|
|
152
|
+
algorithm: request.algorithm ?? 'greedy',
|
|
153
|
+
};
|
|
154
|
+
if (request.llm_provider !== undefined)
|
|
155
|
+
data.llm_provider = request.llm_provider;
|
|
156
|
+
if (request.desired_hle !== undefined)
|
|
157
|
+
data.desired_hle = request.desired_hle;
|
|
158
|
+
if (request.compression_config)
|
|
159
|
+
data.compression_config = request.compression_config;
|
|
160
|
+
if (request.temperature !== undefined)
|
|
161
|
+
data.temperature = request.temperature;
|
|
162
|
+
if (request.max_tokens !== undefined)
|
|
163
|
+
data.max_tokens = request.max_tokens;
|
|
164
|
+
if (request.tags !== undefined)
|
|
165
|
+
data.tags = request.tags;
|
|
166
|
+
if (request.max_history_messages !== undefined)
|
|
167
|
+
data.max_history_messages = request.max_history_messages;
|
|
168
|
+
const headers = {};
|
|
169
|
+
if (opts?.idempotencyKey)
|
|
170
|
+
headers['Idempotency-Key'] = opts.idempotencyKey;
|
|
171
|
+
return this.makeRequest('/api/v1/complete/jobs', data, 'POST', { headers });
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Poll async /complete job (GET /api/v1/complete/jobs/{job_id}).
|
|
175
|
+
*/
|
|
176
|
+
async getCompleteJob(jobId) {
|
|
177
|
+
if (!jobId)
|
|
178
|
+
throw new APIRequestError('jobId is required');
|
|
179
|
+
// Keep polls short even if overall client timeout is high.
|
|
180
|
+
return this.makeRequest(`/api/v1/complete/jobs/${jobId}`, {}, 'GET', { timeoutMs: Math.min(this.timeout, 30000) });
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Production-safe complete: enqueue + poll async job endpoints.
|
|
184
|
+
*/
|
|
185
|
+
async completeAsync(request, opts) {
|
|
186
|
+
const job = await this.createCompleteJob(request, { idempotencyKey: opts?.idempotencyKey });
|
|
187
|
+
const jobId = job.job_id;
|
|
188
|
+
const pollIntervalMs = Math.max(200, opts?.pollIntervalMs ?? 1000);
|
|
189
|
+
const maxWaitMs = opts?.maxWaitMs ?? this.timeout;
|
|
190
|
+
const deadline = Date.now() + maxWaitMs;
|
|
191
|
+
let interval = pollIntervalMs;
|
|
192
|
+
while (true) {
|
|
193
|
+
const st = await this.getCompleteJob(jobId);
|
|
194
|
+
if (st.status === 'succeeded') {
|
|
195
|
+
if (st.result)
|
|
196
|
+
return st.result;
|
|
197
|
+
throw new APIRequestError('Async job succeeded but result was missing.');
|
|
198
|
+
}
|
|
199
|
+
if (st.status === 'failed' || st.status === 'canceled') {
|
|
200
|
+
throw new APIRequestError(`Async complete job ${st.status}: ${JSON.stringify(st.error ?? {})}`);
|
|
201
|
+
}
|
|
202
|
+
if (Date.now() > deadline) {
|
|
203
|
+
throw new APIRequestError(`Async complete job timed out after ${maxWaitMs}ms (status=${st.status}).`);
|
|
204
|
+
}
|
|
205
|
+
await new Promise((r) => setTimeout(r, interval));
|
|
206
|
+
interval = Math.min(Math.floor(interval * 1.2), 2000);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
142
209
|
async compress(prompt, model = "gpt-4", algorithm = "greedy", tags) {
|
|
143
210
|
const data = {
|
|
144
211
|
prompt,
|
|
@@ -172,6 +239,47 @@ class PcompresslrAPIClient {
|
|
|
172
239
|
throw new APIRequestError(`Health check failed: ${errorMessage}`);
|
|
173
240
|
}
|
|
174
241
|
}
|
|
242
|
+
/**
|
|
243
|
+
* Direct (non-job) complete call (POST /api/v2/complete).
|
|
244
|
+
*
|
|
245
|
+
* This hits the synchronous endpoint and is best-effort for small/interactive usage.
|
|
246
|
+
* For production reliability, prefer `complete()` (async job + polling).
|
|
247
|
+
*/
|
|
248
|
+
async completeSync(request) {
|
|
249
|
+
const data = {
|
|
250
|
+
messages: request.messages,
|
|
251
|
+
compress: request.compress ?? true,
|
|
252
|
+
compress_output: request.compress_output ?? false,
|
|
253
|
+
algorithm: request.algorithm ?? 'greedy',
|
|
254
|
+
};
|
|
255
|
+
// v1.0.0 parameters
|
|
256
|
+
if (request.llm_provider !== undefined)
|
|
257
|
+
data.llm_provider = request.llm_provider;
|
|
258
|
+
if (request.desired_hle !== undefined)
|
|
259
|
+
data.desired_hle = request.desired_hle;
|
|
260
|
+
if (request.compression_config)
|
|
261
|
+
data.compression_config = request.compression_config;
|
|
262
|
+
if (request.temperature !== undefined)
|
|
263
|
+
data.temperature = request.temperature;
|
|
264
|
+
if (request.max_tokens !== undefined)
|
|
265
|
+
data.max_tokens = request.max_tokens;
|
|
266
|
+
if (request.tags !== undefined)
|
|
267
|
+
data.tags = request.tags;
|
|
268
|
+
if (request.max_history_messages !== undefined)
|
|
269
|
+
data.max_history_messages = request.max_history_messages;
|
|
270
|
+
// Deprecated / backward compatible parameters (still accepted by some deployments)
|
|
271
|
+
if (request.model !== undefined)
|
|
272
|
+
data.model = request.model;
|
|
273
|
+
if (request.hle_target_percent !== undefined)
|
|
274
|
+
data.hle_target_percent = request.hle_target_percent;
|
|
275
|
+
if (request.min_hle_score !== undefined)
|
|
276
|
+
data.min_hle_score = request.min_hle_score;
|
|
277
|
+
if (request.auto_select_by_hle !== undefined)
|
|
278
|
+
data.auto_select_by_hle = request.auto_select_by_hle;
|
|
279
|
+
if (request.same_provider_only !== undefined)
|
|
280
|
+
data.same_provider_only = request.same_provider_only;
|
|
281
|
+
return this.makeRequest('/api/v2/complete', data, 'POST');
|
|
282
|
+
}
|
|
175
283
|
/**
|
|
176
284
|
* Messages-first complete with intelligent model selection (POST /api/v2/complete).
|
|
177
285
|
*
|
|
@@ -234,7 +342,9 @@ class PcompresslrAPIClient {
|
|
|
234
342
|
data.tags = request.tags;
|
|
235
343
|
if (request.max_history_messages !== undefined)
|
|
236
344
|
data.max_history_messages = request.max_history_messages;
|
|
237
|
-
|
|
345
|
+
// Prefer async jobs for production reliability; sync remains available via /api/v2/complete
|
|
346
|
+
// by calling makeRequest directly if needed.
|
|
347
|
+
return this.completeAsync(request);
|
|
238
348
|
}
|
|
239
349
|
}
|
|
240
350
|
exports.PcompresslrAPIClient = PcompresslrAPIClient;
|
package/dist/core.d.ts
CHANGED
|
@@ -19,10 +19,12 @@ export interface CompleteOptions {
|
|
|
19
19
|
messages: Message[];
|
|
20
20
|
model?: string;
|
|
21
21
|
provider?: 'openai' | 'anthropic' | 'google';
|
|
22
|
+
desiredHle?: number;
|
|
22
23
|
compress?: boolean;
|
|
23
24
|
compressionConfig?: CompressionConfig;
|
|
24
25
|
compressOutput?: boolean;
|
|
25
26
|
useOptimal?: boolean;
|
|
27
|
+
mode?: 'async' | 'sync';
|
|
26
28
|
hleTargetPercent?: number;
|
|
27
29
|
minHleScore?: number;
|
|
28
30
|
autoSelectByHle?: boolean;
|
package/dist/core.js
CHANGED
|
@@ -27,10 +27,10 @@ class LightReach {
|
|
|
27
27
|
}
|
|
28
28
|
: undefined;
|
|
29
29
|
try {
|
|
30
|
-
const
|
|
30
|
+
const req = {
|
|
31
31
|
messages: options.messages,
|
|
32
|
-
model: options.model ?? this.defaultModel,
|
|
33
32
|
llm_provider: options.provider ?? this.defaultProvider,
|
|
33
|
+
desired_hle: options.desiredHle,
|
|
34
34
|
compress: options.compress ?? true,
|
|
35
35
|
compression_config: cfg,
|
|
36
36
|
compress_output: options.compressOutput ?? false,
|
|
@@ -43,7 +43,13 @@ class LightReach {
|
|
|
43
43
|
max_tokens: options.maxTokens,
|
|
44
44
|
tags: options.tags,
|
|
45
45
|
max_history_messages: options.maxHistoryMessages,
|
|
46
|
-
}
|
|
46
|
+
};
|
|
47
|
+
// Only include deprecated `model` if explicitly provided to avoid noisy warnings.
|
|
48
|
+
if (options.model !== undefined)
|
|
49
|
+
req.model = options.model;
|
|
50
|
+
const resp = (options.mode ?? 'async') === 'sync'
|
|
51
|
+
? await this.apiClient.completeSync(req)
|
|
52
|
+
: await this.apiClient.complete(req);
|
|
47
53
|
// Add helpful aliases to better match the Feature 0.6 spec without changing backend response.
|
|
48
54
|
// We do NOT fabricate cost estimates here since the API response does not include pricing data.
|
|
49
55
|
return {
|
package/dist/version.d.ts
CHANGED
package/dist/version.js
CHANGED
package/package.json
CHANGED