compress-lightreach 1.0.2 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -109,20 +109,16 @@ const result = await client.complete({
109
109
 
110
110
  ### HLE Cascading with Admin Controls
111
111
 
112
- Admins can set quality **ceilings** via the dashboard (global or per-tag) to control costs. Your `desired_hle` is a preference, but requests will error if they exceed the admin-set ceiling:
112
+ Admins can set quality **ceilings** via the dashboard (global or per-tag) to control costs. Your `desired_hle` is a preference; if it exceeds an admin-set ceiling, the request will **silently clamp** to the ceiling and proceed.
113
113
 
114
114
  ```typescript
115
115
  // Admin set global HLE ceiling to 30%
116
- // Requesting above the ceiling will error
117
- try {
118
- const result = await client.complete({
119
- messages: [{ role: 'user', content: 'Process payment' }],
120
- desired_hle: 35, // ERROR: exceeds ceiling of 30
121
- tags: { env: 'production' },
122
- });
123
- } catch (e) {
124
- console.error(e.message); // "Requested HLE 35% exceeds workspace maximum of 30%"
125
- }
116
+ // Requesting above the ceiling will be clamped to 30 (no error)
117
+ const result = await client.complete({
118
+ messages: [{ role: 'user', content: 'Process payment' }],
119
+ desired_hle: 35, // Will be clamped down to 30
120
+ tags: { env: 'production' },
121
+ });
126
122
 
127
123
  // Correct usage: request within ceiling
128
124
  const result = await client.complete({
@@ -131,7 +127,7 @@ const result = await client.complete({
131
127
  tags: { env: 'production' },
132
128
  });
133
129
 
134
- // Check if your HLE was lowered by admin ceiling
130
+ // Check if your HLE was lowered by an admin ceiling
135
131
  if (result.routing_info?.hle_clamped) {
136
132
  console.log(`HLE lowered from ${result.routing_info.requested_hle} ` +
137
133
  `to ${result.routing_info.effective_hle} ` +
@@ -139,38 +135,36 @@ if (result.routing_info?.hle_clamped) {
139
135
  }
140
136
  ```
141
137
 
142
- ### Using the LightReach Wrapper Class
138
+ ### With Compression Config
143
139
 
144
- For a more ergonomic API with camelCase parameters, use the `LightReach` class:
140
+ Configure per-role compression settings:
145
141
 
146
142
  ```typescript
147
- import { LightReach } from 'compress-lightreach';
143
+ import { PcompresslrAPIClient } from 'compress-lightreach';
148
144
 
149
- const client = new LightReach({
150
- apiKey: 'your-lightreach-api-key',
151
- defaultModel: 'gpt-4',
152
- defaultProvider: 'openai',
153
- useOptimal: false, // Use greedy algorithm by default
154
- });
145
+ const client = new PcompresslrAPIClient("your-lightreach-api-key");
155
146
 
156
147
  const result = await client.complete({
157
148
  messages: [{ role: 'user', content: 'Hello!' }],
149
+ desired_hle: 30,
158
150
  compress: true,
159
- compressOutput: false,
160
- compressionConfig: {
161
- compressSystem: false,
162
- compressUser: true,
163
- compressAssistant: false,
164
- compressOnlyLastNUser: 1,
151
+ compress_output: false,
152
+ compression_config: {
153
+ compress_system: false,
154
+ compress_user: true,
155
+ compress_assistant: false,
156
+ compress_only_last_n_user: 1,
165
157
  },
166
158
  temperature: 0.7,
167
- maxTokens: 1000,
159
+ max_tokens: 1000,
168
160
  tags: { env: 'production' },
169
161
  });
170
162
 
171
163
  console.log(result.decompressed_response);
164
+ console.log(`Model used: ${result.routing_info?.selected_model}`);
172
165
  ```
173
166
 
167
+
174
168
  ### Compression Only (No LLM Call)
175
169
 
176
170
  ```typescript
@@ -239,7 +233,7 @@ Messages-first completion with intelligent routing (POST `/api/v2/complete`).
239
233
  |-----------|------|---------|-------------|
240
234
  | `messages` | `Message[]` | required | Conversation history with `role` and `content` |
241
235
  | `llm_provider` | `'openai' \| 'anthropic' \| 'google' \| 'deepseek' \| 'moonshot'` | — | Optional provider constraint. Omit for cross-provider optimization |
242
- | `desired_hle` | `number` | — | Quality preference (0-40, where 40 is SOTA). Must not exceed admin ceilings |
236
+ | `desired_hle` | `number` | — | Quality preference (0-40, where 40 is SOTA). If above an admin ceiling, it is clamped down |
243
237
  | `compress` | `boolean` | `true` | Whether to compress messages |
244
238
  | `compress_output` | `boolean` | `false` | Whether to request compressed output from LLM |
245
239
  | `algorithm` | `'greedy' \| 'optimal'` | `'greedy'` | Compression algorithm |
@@ -355,56 +349,6 @@ Check API health status (GET `/health`).
355
349
  }
356
350
  ```
357
351
 
358
- ### `LightReach` Class
359
-
360
- Convenience wrapper with camelCase parameters.
361
-
362
- #### Constructor
363
-
364
- ```typescript
365
- new LightReach(options?: {
366
- apiKey?: string;
367
- apiUrl?: string;
368
- defaultModel?: string; // Default: 'gpt-4'
369
- defaultProvider?: 'openai' | 'anthropic' | 'google'; // Default: 'openai'
370
- useOptimal?: boolean; // Default: false (use greedy)
371
- })
372
- ```
373
-
374
- #### Methods
375
-
376
- ##### `complete(options: CompleteOptions): Promise<CompleteResponse>`
377
-
378
- ```typescript
379
- interface CompleteOptions {
380
- messages: Message[];
381
- model?: string;
382
- provider?: 'openai' | 'anthropic' | 'google';
383
- compress?: boolean;
384
- compressionConfig?: {
385
- compressSystem?: boolean;
386
- compressUser?: boolean;
387
- compressAssistant?: boolean;
388
- compressOnlyLastNUser?: number | null;
389
- };
390
- compressOutput?: boolean;
391
- useOptimal?: boolean;
392
- temperature?: number;
393
- maxTokens?: number;
394
- tags?: Record<string, string>;
395
- maxHistoryMessages?: number;
396
- }
397
- ```
398
-
399
- ##### `compress(text, options?): Promise<CompressResponse>`
400
-
401
- ```typescript
402
- await client.compress(text, {
403
- model?: string;
404
- algorithm?: 'greedy' | 'optimal';
405
- tags?: Record<string, string>;
406
- });
407
- ```
408
352
 
409
353
  ### Message Types
410
354
 
@@ -95,6 +95,23 @@ export interface HealthCheckResponse {
95
95
  status: string;
96
96
  version?: string;
97
97
  }
98
+ export type CompleteJobStatus = 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled';
99
+ export interface CompleteJobCreateResponse {
100
+ job_id: string;
101
+ status: CompleteJobStatus;
102
+ status_url: string;
103
+ }
104
+ export interface CompleteJobStatusResponse {
105
+ job_id: string;
106
+ status: CompleteJobStatus;
107
+ phase?: string | null;
108
+ progress?: number | null;
109
+ created_at?: string | null;
110
+ started_at?: string | null;
111
+ finished_at?: string | null;
112
+ result?: any;
113
+ error?: any;
114
+ }
98
115
  export declare class PcompresslrAPIClient {
99
116
  private readonly DEFAULT_API_URL;
100
117
  private apiKey;
@@ -103,9 +120,34 @@ export declare class PcompresslrAPIClient {
103
120
  private session;
104
121
  constructor(apiKey?: string, apiUrl?: string, timeout?: number);
105
122
  private makeRequest;
123
+ /**
124
+ * Create async /complete job (POST /api/v1/complete/jobs).
125
+ */
126
+ createCompleteJob(request: CompleteV2Request, opts?: {
127
+ idempotencyKey?: string;
128
+ }): Promise<CompleteJobCreateResponse>;
129
+ /**
130
+ * Poll async /complete job (GET /api/v1/complete/jobs/{job_id}).
131
+ */
132
+ getCompleteJob(jobId: string): Promise<CompleteJobStatusResponse>;
133
+ /**
134
+ * Production-safe complete: enqueue + poll async job endpoints.
135
+ */
136
+ completeAsync(request: CompleteV2Request, opts?: {
137
+ pollIntervalMs?: number;
138
+ maxWaitMs?: number;
139
+ idempotencyKey?: string;
140
+ }): Promise<CompleteResponse>;
106
141
  compress(prompt: string, model?: string, algorithm?: "greedy" | "optimal", tags?: Record<string, string>): Promise<CompressResponse>;
107
142
  decompress(llmFormat: string): Promise<DecompressResponse>;
108
143
  healthCheck(): Promise<HealthCheckResponse>;
144
+ /**
145
+ * Direct (non-job) complete call (POST /api/v2/complete).
146
+ *
147
+ * This hits the synchronous endpoint and is best-effort for small/interactive usage.
148
+ * For production reliability, prefer `complete()` (async job + polling).
149
+ */
150
+ completeSync(request: CompleteV2Request): Promise<CompleteResponse>;
109
151
  /**
110
152
  * Messages-first complete with intelligent model selection (POST /api/v2/complete).
111
153
  *
@@ -42,7 +42,7 @@ class APIRequestError extends PcompresslrAPIError {
42
42
  }
43
43
  exports.APIRequestError = APIRequestError;
44
44
  class PcompresslrAPIClient {
45
- constructor(apiKey, apiUrl, timeout = 120000 // 2 minutes - complete() calls LLM which can take 30+ seconds
45
+ constructor(apiKey, apiUrl, timeout = 900000 // 15 minutes - complete() can include long upstream LLM calls
46
46
  ) {
47
47
  this.DEFAULT_API_URL = "https://api.compress.lightreach.io";
48
48
  // Get API key from parameter or environment
@@ -89,7 +89,7 @@ class PcompresslrAPIClient {
89
89
  return Promise.reject(error);
90
90
  });
91
91
  }
92
- async makeRequest(endpoint, data, method = 'POST') {
92
+ async makeRequest(endpoint, data, method = 'POST', opts) {
93
93
  const url = `${this.apiUrl}${endpoint}`;
94
94
  try {
95
95
  const response = await this.session.request({
@@ -97,6 +97,8 @@ class PcompresslrAPIClient {
97
97
  url: endpoint,
98
98
  data: method === 'POST' ? data : undefined,
99
99
  params: method === 'GET' ? data : undefined,
100
+ headers: opts?.headers,
101
+ timeout: opts?.timeoutMs,
100
102
  });
101
103
  return response.data;
102
104
  }
@@ -139,6 +141,71 @@ class PcompresslrAPIClient {
139
141
  throw new APIRequestError(`Request failed: ${errorMessage}`);
140
142
  }
141
143
  }
144
+ /**
145
+ * Create async /complete job (POST /api/v1/complete/jobs).
146
+ */
147
+ async createCompleteJob(request, opts) {
148
+ const data = {
149
+ messages: request.messages,
150
+ compress: request.compress ?? true,
151
+ compress_output: request.compress_output ?? false,
152
+ algorithm: request.algorithm ?? 'greedy',
153
+ };
154
+ if (request.llm_provider !== undefined)
155
+ data.llm_provider = request.llm_provider;
156
+ if (request.desired_hle !== undefined)
157
+ data.desired_hle = request.desired_hle;
158
+ if (request.compression_config)
159
+ data.compression_config = request.compression_config;
160
+ if (request.temperature !== undefined)
161
+ data.temperature = request.temperature;
162
+ if (request.max_tokens !== undefined)
163
+ data.max_tokens = request.max_tokens;
164
+ if (request.tags !== undefined)
165
+ data.tags = request.tags;
166
+ if (request.max_history_messages !== undefined)
167
+ data.max_history_messages = request.max_history_messages;
168
+ const headers = {};
169
+ if (opts?.idempotencyKey)
170
+ headers['Idempotency-Key'] = opts.idempotencyKey;
171
+ return this.makeRequest('/api/v1/complete/jobs', data, 'POST', { headers });
172
+ }
173
+ /**
174
+ * Poll async /complete job (GET /api/v1/complete/jobs/{job_id}).
175
+ */
176
+ async getCompleteJob(jobId) {
177
+ if (!jobId)
178
+ throw new APIRequestError('jobId is required');
179
+ // Keep polls short even if overall client timeout is high.
180
+ return this.makeRequest(`/api/v1/complete/jobs/${jobId}`, {}, 'GET', { timeoutMs: Math.min(this.timeout, 30000) });
181
+ }
182
+ /**
183
+ * Production-safe complete: enqueue + poll async job endpoints.
184
+ */
185
+ async completeAsync(request, opts) {
186
+ const job = await this.createCompleteJob(request, { idempotencyKey: opts?.idempotencyKey });
187
+ const jobId = job.job_id;
188
+ const pollIntervalMs = Math.max(200, opts?.pollIntervalMs ?? 1000);
189
+ const maxWaitMs = opts?.maxWaitMs ?? this.timeout;
190
+ const deadline = Date.now() + maxWaitMs;
191
+ let interval = pollIntervalMs;
192
+ while (true) {
193
+ const st = await this.getCompleteJob(jobId);
194
+ if (st.status === 'succeeded') {
195
+ if (st.result)
196
+ return st.result;
197
+ throw new APIRequestError('Async job succeeded but result was missing.');
198
+ }
199
+ if (st.status === 'failed' || st.status === 'canceled') {
200
+ throw new APIRequestError(`Async complete job ${st.status}: ${JSON.stringify(st.error ?? {})}`);
201
+ }
202
+ if (Date.now() > deadline) {
203
+ throw new APIRequestError(`Async complete job timed out after ${maxWaitMs}ms (status=${st.status}).`);
204
+ }
205
+ await new Promise((r) => setTimeout(r, interval));
206
+ interval = Math.min(Math.floor(interval * 1.2), 2000);
207
+ }
208
+ }
142
209
  async compress(prompt, model = "gpt-4", algorithm = "greedy", tags) {
143
210
  const data = {
144
211
  prompt,
@@ -172,6 +239,47 @@ class PcompresslrAPIClient {
172
239
  throw new APIRequestError(`Health check failed: ${errorMessage}`);
173
240
  }
174
241
  }
242
+ /**
243
+ * Direct (non-job) complete call (POST /api/v2/complete).
244
+ *
245
+ * This hits the synchronous endpoint and is best-effort for small/interactive usage.
246
+ * For production reliability, prefer `complete()` (async job + polling).
247
+ */
248
+ async completeSync(request) {
249
+ const data = {
250
+ messages: request.messages,
251
+ compress: request.compress ?? true,
252
+ compress_output: request.compress_output ?? false,
253
+ algorithm: request.algorithm ?? 'greedy',
254
+ };
255
+ // v1.0.0 parameters
256
+ if (request.llm_provider !== undefined)
257
+ data.llm_provider = request.llm_provider;
258
+ if (request.desired_hle !== undefined)
259
+ data.desired_hle = request.desired_hle;
260
+ if (request.compression_config)
261
+ data.compression_config = request.compression_config;
262
+ if (request.temperature !== undefined)
263
+ data.temperature = request.temperature;
264
+ if (request.max_tokens !== undefined)
265
+ data.max_tokens = request.max_tokens;
266
+ if (request.tags !== undefined)
267
+ data.tags = request.tags;
268
+ if (request.max_history_messages !== undefined)
269
+ data.max_history_messages = request.max_history_messages;
270
+ // Deprecated / backward compatible parameters (still accepted by some deployments)
271
+ if (request.model !== undefined)
272
+ data.model = request.model;
273
+ if (request.hle_target_percent !== undefined)
274
+ data.hle_target_percent = request.hle_target_percent;
275
+ if (request.min_hle_score !== undefined)
276
+ data.min_hle_score = request.min_hle_score;
277
+ if (request.auto_select_by_hle !== undefined)
278
+ data.auto_select_by_hle = request.auto_select_by_hle;
279
+ if (request.same_provider_only !== undefined)
280
+ data.same_provider_only = request.same_provider_only;
281
+ return this.makeRequest('/api/v2/complete', data, 'POST');
282
+ }
175
283
  /**
176
284
  * Messages-first complete with intelligent model selection (POST /api/v2/complete).
177
285
  *
@@ -234,7 +342,9 @@ class PcompresslrAPIClient {
234
342
  data.tags = request.tags;
235
343
  if (request.max_history_messages !== undefined)
236
344
  data.max_history_messages = request.max_history_messages;
237
- return this.makeRequest("/api/v2/complete", data);
345
+ // Prefer async jobs for production reliability; sync remains available via /api/v2/complete
346
+ // by calling makeRequest directly if needed.
347
+ return this.completeAsync(request);
238
348
  }
239
349
  }
240
350
  exports.PcompresslrAPIClient = PcompresslrAPIClient;
package/dist/core.d.ts CHANGED
@@ -19,10 +19,12 @@ export interface CompleteOptions {
19
19
  messages: Message[];
20
20
  model?: string;
21
21
  provider?: 'openai' | 'anthropic' | 'google';
22
+ desiredHle?: number;
22
23
  compress?: boolean;
23
24
  compressionConfig?: CompressionConfig;
24
25
  compressOutput?: boolean;
25
26
  useOptimal?: boolean;
27
+ mode?: 'async' | 'sync';
26
28
  hleTargetPercent?: number;
27
29
  minHleScore?: number;
28
30
  autoSelectByHle?: boolean;
package/dist/core.js CHANGED
@@ -27,10 +27,10 @@ class LightReach {
27
27
  }
28
28
  : undefined;
29
29
  try {
30
- const resp = await this.apiClient.complete({
30
+ const req = {
31
31
  messages: options.messages,
32
- model: options.model ?? this.defaultModel,
33
32
  llm_provider: options.provider ?? this.defaultProvider,
33
+ desired_hle: options.desiredHle,
34
34
  compress: options.compress ?? true,
35
35
  compression_config: cfg,
36
36
  compress_output: options.compressOutput ?? false,
@@ -43,7 +43,13 @@ class LightReach {
43
43
  max_tokens: options.maxTokens,
44
44
  tags: options.tags,
45
45
  max_history_messages: options.maxHistoryMessages,
46
- });
46
+ };
47
+ // Only include deprecated `model` if explicitly provided to avoid noisy warnings.
48
+ if (options.model !== undefined)
49
+ req.model = options.model;
50
+ const resp = (options.mode ?? 'async') === 'sync'
51
+ ? await this.apiClient.completeSync(req)
52
+ : await this.apiClient.complete(req);
47
53
  // Add helpful aliases to better match the Feature 0.6 spec without changing backend response.
48
54
  // We do NOT fabricate cost estimates here since the API response does not include pricing data.
49
55
  return {
package/dist/version.d.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  /**
2
2
  * Version information for compress-lightreach package.
3
3
  */
4
- export declare const __version__ = "1.0.0";
4
+ export declare const __version__ = "1.0.1";
package/dist/version.js CHANGED
@@ -4,4 +4,4 @@
4
4
  */
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.__version__ = void 0;
7
- exports.__version__ = "1.0.0";
7
+ exports.__version__ = "1.0.1";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "compress-lightreach",
3
- "version": "1.0.2",
3
+ "version": "1.0.5",
4
4
  "description": "AI cost management SDK with intelligent model routing, prompt compression, and real-time token tracking",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",