compress-lightreach 1.0.3 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,10 +11,7 @@ Compress Light Reach is a Node.js/TypeScript SDK that provides intelligent model
11
11
  ## Features
12
12
 
13
13
  - **Intelligent Model Routing**: Automatically selects optimal model based on quality requirements (HLE) and available provider keys
14
- - **Token-aware Compression**: Replaces repeated substrings with shorter placeholders
15
- - **Dual Algorithms**:
16
- - Fast greedy (~99% optimal) for daily use
17
- - Optimal DP (O(n²)) for critical prompts
14
+ - **Token-aware Compression**: Replaces repeated substrings with shorter placeholders using a fast greedy algorithm
18
15
  - **Lossless**: Perfect decompression guaranteed
19
16
  - **Output Compression**: Optional model output compression support
20
17
  - **Cloud API**: Uses Light Reach's cloud service for compression and routing
@@ -52,7 +49,7 @@ const result = await client.complete({
52
49
  { role: 'system', content: 'You are a helpful assistant.' },
53
50
  { role: 'user', content: 'Explain quantum computing in simple terms.' },
54
51
  ],
55
- desired_hle: 30, // Quality preference (0-40, where 40 is SOTA)
52
+ desired_hle: 30, // Quality ceiling (0-100). Current SOTA is ~40%.
56
53
  });
57
54
 
58
55
  console.log(result.decompressed_response);
@@ -60,6 +57,28 @@ console.log(`Selected: ${result.routing_info?.selected_model}`);
60
57
  console.log(`Token savings: ${result.compression_stats.token_savings}`);
61
58
  ```
62
59
 
60
+ ## OpenAI-compatible API (Cursor / OpenAI SDKs)
61
+
62
+ LightReach also exposes a **strict OpenAI-compatible** surface (including streaming SSE) so you can use standard OpenAI tooling without changing your app.
63
+
64
+ - **Cursor base URL**: `https://compress.lightreach.io/v1/cursor`
65
+ - **Generic OpenAI-compatible base URL**: `https://compress.lightreach.io/v1`
66
+ - **Endpoints**: `GET /models`, `POST /chat/completions`
67
+ - **Model id**: `lightreach`
68
+
69
+ Example (cURL):
70
+
71
+ ```bash
72
+ curl -sS https://compress.lightreach.io/v1/chat/completions \
73
+ -H "Authorization: Bearer lr_your_lightreach_key" \
74
+ -H "Content-Type: application/json" \
75
+ -d '{
76
+ "model": "lightreach",
77
+ "messages": [{"role":"user","content":"Say hello"}],
78
+ "stream": true
79
+ }'
80
+ ```
81
+
63
82
  ### With Output Compression
64
83
 
65
84
  ```typescript
@@ -84,7 +103,7 @@ const client = new PcompresslrAPIClient("your-lightreach-api-key");
84
103
  // Cross-provider optimization: system picks cheapest model meeting your quality bar
85
104
  const result = await client.complete({
86
105
  messages: [{ role: 'user', content: 'Explain quantum computing' }],
87
- desired_hle: 30, // Quality preference (0-40, where 40 is SOTA)
106
+ desired_hle: 30, // Quality ceiling (0-100). Current SOTA is ~40%.
88
107
  });
89
108
 
90
109
  // Check what was selected
@@ -109,20 +128,16 @@ const result = await client.complete({
109
128
 
110
129
  ### HLE Cascading with Admin Controls
111
130
 
112
- Admins can set quality **ceilings** via the dashboard (global or per-tag) to control costs. Your `desired_hle` is a preference, but requests will error if they exceed the admin-set ceiling:
131
+ Admins can set quality **ceilings** via the dashboard (global or per-tag) to control costs. Your `desired_hle` is a preference; if it exceeds an admin-set ceiling, the request will **silently clamp** to the ceiling and proceed.
113
132
 
114
133
  ```typescript
115
134
  // Admin set global HLE ceiling to 30%
116
- // Requesting above the ceiling will error
117
- try {
118
- const result = await client.complete({
119
- messages: [{ role: 'user', content: 'Process payment' }],
120
- desired_hle: 35, // ERROR: exceeds ceiling of 30
121
- tags: { env: 'production' },
122
- });
123
- } catch (e) {
124
- console.error(e.message); // "Requested HLE 35% exceeds workspace maximum of 30%"
125
- }
135
+ // Requesting above the ceiling will be clamped to 30 (no error)
136
+ const result = await client.complete({
137
+ messages: [{ role: 'user', content: 'Process payment' }],
138
+ desired_hle: 35, // Will be clamped down to 30
139
+ tags: { env: 'production' },
140
+ });
126
141
 
127
142
  // Correct usage: request within ceiling
128
143
  const result = await client.complete({
@@ -131,7 +146,7 @@ const result = await client.complete({
131
146
  tags: { env: 'production' },
132
147
  });
133
148
 
134
- // Check if your HLE was lowered by admin ceiling
149
+ // Check if your HLE was lowered by an admin ceiling
135
150
  if (result.routing_info?.hle_clamped) {
136
151
  console.log(`HLE lowered from ${result.routing_info.requested_hle} ` +
137
152
  `to ${result.routing_info.effective_hle} ` +
@@ -180,7 +195,6 @@ const client = new PcompresslrAPIClient("your-lightreach-api-key");
180
195
  const compressed = await client.compress(
181
196
  "Your text with repeated content here...",
182
197
  "gpt-4", // Model for tokenization
183
- "greedy", // Algorithm: 'greedy' or 'optimal'
184
198
  { env: 'dev' } // Optional tags
185
199
  );
186
200
 
@@ -200,12 +214,6 @@ export PCOMPRESLR_API_KEY=your-api-key
200
214
 
201
215
  # Compress a prompt
202
216
  npx pcompresslr "Your prompt with repeated text here..."
203
-
204
- # Use optimal algorithm only
205
- npx pcompresslr "Your prompt here" --optimal-only
206
-
207
- # Use greedy algorithm only
208
- npx pcompresslr "Your prompt here" --greedy-only
209
217
  ```
210
218
 
211
219
  ## API Reference
@@ -223,7 +231,7 @@ new PcompresslrAPIClient(apiKey?: string, apiUrl?: string, timeout?: number)
223
231
  **Parameters:**
224
232
  - `apiKey` (string, optional): LightReach API key. Falls back to `LIGHTREACH_API_KEY` or `PCOMPRESLR_API_KEY` env vars.
225
233
  - `apiUrl` (string, optional): Override base API URL. Falls back to `PCOMPRESLR_API_URL` env var. Default: `https://api.compress.lightreach.io`
226
- - `timeout` (number, optional): Request timeout in milliseconds. Default: `120000` (2 minutes)
234
+ - `timeout` (number, optional): Request timeout in milliseconds. Default: `900000` (15 minutes)
227
235
 
228
236
  #### Methods
229
237
 
@@ -237,10 +245,9 @@ Messages-first completion with intelligent routing (POST `/api/v2/complete`).
237
245
  |-----------|------|---------|-------------|
238
246
  | `messages` | `Message[]` | required | Conversation history with `role` and `content` |
239
247
  | `llm_provider` | `'openai' \| 'anthropic' \| 'google' \| 'deepseek' \| 'moonshot'` | — | Optional provider constraint. Omit for cross-provider optimization |
240
- | `desired_hle` | `number` | — | Quality preference (0-40, where 40 is SOTA). Must not exceed admin ceilings |
248
+ | `desired_hle` | `number` | — | Quality ceiling (0-100). If above an admin ceiling, it is clamped down |
241
249
  | `compress` | `boolean` | `true` | Whether to compress messages |
242
250
  | `compress_output` | `boolean` | `false` | Whether to request compressed output from LLM |
243
- | `algorithm` | `'greedy' \| 'optimal'` | `'greedy'` | Compression algorithm |
244
251
  | `compression_config` | `object` | — | Per-role compression settings (see below) |
245
252
  | `temperature` | `number` | — | LLM temperature parameter |
246
253
  | `max_tokens` | `number` | — | Maximum tokens to generate |
@@ -264,19 +271,23 @@ Messages-first completion with intelligent routing (POST `/api/v2/complete`).
264
271
  {
265
272
  decompressed_response: string; // Final decompressed LLM response
266
273
  compression_stats: {
267
- original_size_chars: number;
268
- compressed_size_chars: number;
274
+ compression_enabled: boolean;
269
275
  original_tokens: number;
270
276
  compressed_tokens: number;
271
- compression_ratio: number;
272
277
  token_savings: number;
273
- token_savings_percent: number;
278
+ compression_ratio: number;
279
+ token_count_exact?: boolean;
280
+ token_count_source?: string;
281
+ token_accounting_note?: string;
274
282
  processing_time_ms?: number;
275
283
  };
276
284
  llm_stats: {
277
- prompt_tokens: number;
278
- completion_tokens: number;
285
+ provider?: string;
286
+ model?: string;
287
+ input_tokens: number;
288
+ output_tokens: number;
279
289
  total_tokens: number;
290
+ finish_reason?: string | null;
280
291
  };
281
292
  routing_info?: {
282
293
  selected_model: string; // Model chosen by system
@@ -299,14 +310,16 @@ Messages-first completion with intelligent routing (POST `/api/v2/complete`).
299
310
  }
300
311
  ```
301
312
 
302
- ##### `compress(prompt, model?, algorithm?, tags?): Promise<CompressResponse>`
313
+ ##### `compress(prompt, model?, tags?): Promise<CompressResponse>`
314
+
315
+ Also supports a legacy call shape: `compress(prompt, model, algorithm, tags?)` (only `"greedy"` is supported).
303
316
 
304
317
  Compression-only (POST `/api/v1/compress`).
305
318
 
306
319
  **Parameters:**
307
320
  - `prompt` (string, required): Text to compress
308
321
  - `model` (string, optional): Model for tokenization. Default: `'gpt-4'`
309
- - `algorithm` (`'greedy' | 'optimal'`, optional): Compression algorithm. Default: `'greedy'`
322
+ - `algorithm` (`"greedy"`, optional): Legacy-only parameter. Only `"greedy"` is supported.
310
323
  - `tags` (`Record<string, string>`, optional): Tags for attribution
311
324
 
312
325
  **Response (`CompressResponse`):**
@@ -30,19 +30,23 @@ export interface DecompressResponse {
30
30
  export interface CompleteResponse {
31
31
  decompressed_response: string;
32
32
  compression_stats: {
33
- original_size_chars: number;
34
- compressed_size_chars: number;
33
+ compression_enabled: boolean;
35
34
  original_tokens: number;
36
35
  compressed_tokens: number;
37
- compression_ratio: number;
38
36
  token_savings: number;
39
- token_savings_percent: number;
37
+ compression_ratio: number;
38
+ token_count_exact?: boolean;
39
+ token_count_source?: string;
40
+ token_accounting_note?: string;
40
41
  processing_time_ms?: number;
41
42
  };
42
43
  llm_stats: {
43
- prompt_tokens: number;
44
- completion_tokens: number;
44
+ provider?: string;
45
+ model?: string;
46
+ input_tokens: number;
47
+ output_tokens: number;
45
48
  total_tokens: number;
49
+ finish_reason?: string | null;
46
50
  };
47
51
  warnings?: string[];
48
52
  routing_info?: {
@@ -80,7 +84,7 @@ export interface CompleteV2Request {
80
84
  compress_only_last_n_user?: number | null;
81
85
  };
82
86
  compress_output?: boolean;
83
- algorithm?: 'greedy' | 'optimal';
87
+ algorithm?: 'greedy';
84
88
  temperature?: number;
85
89
  max_tokens?: number;
86
90
  tags?: Record<string, string>;
@@ -95,6 +99,23 @@ export interface HealthCheckResponse {
95
99
  status: string;
96
100
  version?: string;
97
101
  }
102
+ export type CompleteJobStatus = 'queued' | 'running' | 'succeeded' | 'failed' | 'canceled';
103
+ export interface CompleteJobCreateResponse {
104
+ job_id: string;
105
+ status: CompleteJobStatus;
106
+ status_url: string;
107
+ }
108
+ export interface CompleteJobStatusResponse {
109
+ job_id: string;
110
+ status: CompleteJobStatus;
111
+ phase?: string | null;
112
+ progress?: number | null;
113
+ created_at?: string | null;
114
+ started_at?: string | null;
115
+ finished_at?: string | null;
116
+ result?: any;
117
+ error?: any;
118
+ }
98
119
  export declare class PcompresslrAPIClient {
99
120
  private readonly DEFAULT_API_URL;
100
121
  private apiKey;
@@ -103,9 +124,42 @@ export declare class PcompresslrAPIClient {
103
124
  private session;
104
125
  constructor(apiKey?: string, apiUrl?: string, timeout?: number);
105
126
  private makeRequest;
106
- compress(prompt: string, model?: string, algorithm?: "greedy" | "optimal", tags?: Record<string, string>): Promise<CompressResponse>;
127
+ /**
128
+ * Create async /complete job (POST /api/v1/complete/jobs).
129
+ */
130
+ createCompleteJob(request: CompleteV2Request, opts?: {
131
+ idempotencyKey?: string;
132
+ }): Promise<CompleteJobCreateResponse>;
133
+ /**
134
+ * Poll async /complete job (GET /api/v1/complete/jobs/{job_id}).
135
+ */
136
+ getCompleteJob(jobId: string): Promise<CompleteJobStatusResponse>;
137
+ /**
138
+ * Production-safe complete: enqueue + poll async job endpoints.
139
+ */
140
+ completeAsync(request: CompleteV2Request, opts?: {
141
+ pollIntervalMs?: number;
142
+ maxWaitMs?: number;
143
+ idempotencyKey?: string;
144
+ }): Promise<CompleteResponse>;
145
+ /**
146
+ * Compress text without making an LLM call (POST /api/v1/compress).
147
+ *
148
+ * Supported call shapes:
149
+ * - compress(prompt, model?, tags?)
150
+ * - compress(prompt, model, algorithm, tags?) (back-compat; only "greedy" is supported)
151
+ */
152
+ compress(prompt: string, model?: string, tags?: Record<string, string>): Promise<CompressResponse>;
153
+ compress(prompt: string, model: string, algorithm: 'greedy', tags?: Record<string, string>): Promise<CompressResponse>;
107
154
  decompress(llmFormat: string): Promise<DecompressResponse>;
108
155
  healthCheck(): Promise<HealthCheckResponse>;
156
+ /**
157
+ * Direct (non-job) complete call (POST /api/v2/complete).
158
+ *
159
+ * This hits the synchronous endpoint and is best-effort for small/interactive usage.
160
+ * For production reliability, prefer `complete()` (async job + polling).
161
+ */
162
+ completeSync(request: CompleteV2Request): Promise<CompleteResponse>;
109
163
  /**
110
164
  * Messages-first complete with intelligent model selection (POST /api/v2/complete).
111
165
  *
@@ -42,7 +42,7 @@ class APIRequestError extends PcompresslrAPIError {
42
42
  }
43
43
  exports.APIRequestError = APIRequestError;
44
44
  class PcompresslrAPIClient {
45
- constructor(apiKey, apiUrl, timeout = 120000 // 2 minutes - complete() calls LLM which can take 30+ seconds
45
+ constructor(apiKey, apiUrl, timeout = 900000 // 15 minutes - complete() can include long upstream LLM calls
46
46
  ) {
47
47
  this.DEFAULT_API_URL = "https://api.compress.lightreach.io";
48
48
  // Get API key from parameter or environment
@@ -89,7 +89,7 @@ class PcompresslrAPIClient {
89
89
  return Promise.reject(error);
90
90
  });
91
91
  }
92
- async makeRequest(endpoint, data, method = 'POST') {
92
+ async makeRequest(endpoint, data, method = 'POST', opts) {
93
93
  const url = `${this.apiUrl}${endpoint}`;
94
94
  try {
95
95
  const response = await this.session.request({
@@ -97,6 +97,8 @@ class PcompresslrAPIClient {
97
97
  url: endpoint,
98
98
  data: method === 'POST' ? data : undefined,
99
99
  params: method === 'GET' ? data : undefined,
100
+ headers: opts?.headers,
101
+ timeout: opts?.timeoutMs,
100
102
  });
101
103
  return response.data;
102
104
  }
@@ -139,15 +141,87 @@ class PcompresslrAPIClient {
139
141
  throw new APIRequestError(`Request failed: ${errorMessage}`);
140
142
  }
141
143
  }
142
- async compress(prompt, model = "gpt-4", algorithm = "greedy", tags) {
144
+ /**
145
+ * Create async /complete job (POST /api/v1/complete/jobs).
146
+ */
147
+ async createCompleteJob(request, opts) {
143
148
  const data = {
144
- prompt,
145
- model,
146
- algorithm
149
+ messages: request.messages,
150
+ compress: request.compress ?? true,
151
+ compress_output: request.compress_output ?? false,
152
+ algorithm: request.algorithm ?? 'greedy',
147
153
  };
148
- if (tags) {
149
- data.tags = tags;
154
+ if (request.llm_provider !== undefined)
155
+ data.llm_provider = request.llm_provider;
156
+ if (request.desired_hle !== undefined)
157
+ data.desired_hle = request.desired_hle;
158
+ if (request.compression_config)
159
+ data.compression_config = request.compression_config;
160
+ if (request.temperature !== undefined)
161
+ data.temperature = request.temperature;
162
+ if (request.max_tokens !== undefined)
163
+ data.max_tokens = request.max_tokens;
164
+ if (request.tags !== undefined)
165
+ data.tags = request.tags;
166
+ if (request.max_history_messages !== undefined)
167
+ data.max_history_messages = request.max_history_messages;
168
+ const headers = {};
169
+ if (opts?.idempotencyKey)
170
+ headers['Idempotency-Key'] = opts.idempotencyKey;
171
+ return this.makeRequest('/api/v1/complete/jobs', data, 'POST', { headers });
172
+ }
173
+ /**
174
+ * Poll async /complete job (GET /api/v1/complete/jobs/{job_id}).
175
+ */
176
+ async getCompleteJob(jobId) {
177
+ if (!jobId)
178
+ throw new APIRequestError('jobId is required');
179
+ // Keep polls short even if overall client timeout is high.
180
+ return this.makeRequest(`/api/v1/complete/jobs/${jobId}`, {}, 'GET', { timeoutMs: Math.min(this.timeout, 30000) });
181
+ }
182
+ /**
183
+ * Production-safe complete: enqueue + poll async job endpoints.
184
+ */
185
+ async completeAsync(request, opts) {
186
+ const job = await this.createCompleteJob(request, { idempotencyKey: opts?.idempotencyKey });
187
+ const jobId = job.job_id;
188
+ const pollIntervalMs = Math.max(200, opts?.pollIntervalMs ?? 1000);
189
+ const maxWaitMs = opts?.maxWaitMs ?? this.timeout;
190
+ const deadline = Date.now() + maxWaitMs;
191
+ let interval = pollIntervalMs;
192
+ while (true) {
193
+ const st = await this.getCompleteJob(jobId);
194
+ if (st.status === 'succeeded') {
195
+ if (st.result)
196
+ return st.result;
197
+ throw new APIRequestError('Async job succeeded but result was missing.');
198
+ }
199
+ if (st.status === 'failed' || st.status === 'canceled') {
200
+ throw new APIRequestError(`Async complete job ${st.status}: ${JSON.stringify(st.error ?? {})}`);
201
+ }
202
+ if (Date.now() > deadline) {
203
+ throw new APIRequestError(`Async complete job timed out after ${maxWaitMs}ms (status=${st.status}).`);
204
+ }
205
+ await new Promise((r) => setTimeout(r, interval));
206
+ interval = Math.min(Math.floor(interval * 1.2), 2000);
207
+ }
208
+ }
209
+ async compress(prompt, model = "gpt-4", algorithmOrTags, maybeTags) {
210
+ let algorithm = 'greedy';
211
+ let tags;
212
+ if (typeof algorithmOrTags === 'string') {
213
+ if (algorithmOrTags !== 'greedy') {
214
+ throw new APIRequestError(`Invalid algorithm "${algorithmOrTags}". Only "greedy" is supported.`);
215
+ }
216
+ algorithm = 'greedy';
217
+ tags = maybeTags;
218
+ }
219
+ else if (algorithmOrTags && typeof algorithmOrTags === 'object') {
220
+ tags = algorithmOrTags;
150
221
  }
222
+ const data = { prompt, model, algorithm };
223
+ if (tags)
224
+ data.tags = tags;
151
225
  return this.makeRequest("/api/v1/compress", data);
152
226
  }
153
227
  async decompress(llmFormat) {
@@ -172,6 +246,47 @@ class PcompresslrAPIClient {
172
246
  throw new APIRequestError(`Health check failed: ${errorMessage}`);
173
247
  }
174
248
  }
249
+ /**
250
+ * Direct (non-job) complete call (POST /api/v2/complete).
251
+ *
252
+ * This hits the synchronous endpoint and is best-effort for small/interactive usage.
253
+ * For production reliability, prefer `complete()` (async job + polling).
254
+ */
255
+ async completeSync(request) {
256
+ const data = {
257
+ messages: request.messages,
258
+ compress: request.compress ?? true,
259
+ compress_output: request.compress_output ?? false,
260
+ algorithm: request.algorithm ?? 'greedy',
261
+ };
262
+ // v1.0.0 parameters
263
+ if (request.llm_provider !== undefined)
264
+ data.llm_provider = request.llm_provider;
265
+ if (request.desired_hle !== undefined)
266
+ data.desired_hle = request.desired_hle;
267
+ if (request.compression_config)
268
+ data.compression_config = request.compression_config;
269
+ if (request.temperature !== undefined)
270
+ data.temperature = request.temperature;
271
+ if (request.max_tokens !== undefined)
272
+ data.max_tokens = request.max_tokens;
273
+ if (request.tags !== undefined)
274
+ data.tags = request.tags;
275
+ if (request.max_history_messages !== undefined)
276
+ data.max_history_messages = request.max_history_messages;
277
+ // Deprecated / backward compatible parameters (still accepted by some deployments)
278
+ if (request.model !== undefined)
279
+ data.model = request.model;
280
+ if (request.hle_target_percent !== undefined)
281
+ data.hle_target_percent = request.hle_target_percent;
282
+ if (request.min_hle_score !== undefined)
283
+ data.min_hle_score = request.min_hle_score;
284
+ if (request.auto_select_by_hle !== undefined)
285
+ data.auto_select_by_hle = request.auto_select_by_hle;
286
+ if (request.same_provider_only !== undefined)
287
+ data.same_provider_only = request.same_provider_only;
288
+ return this.makeRequest('/api/v2/complete', data, 'POST');
289
+ }
175
290
  /**
176
291
  * Messages-first complete with intelligent model selection (POST /api/v2/complete).
177
292
  *
@@ -213,28 +328,9 @@ class PcompresslrAPIClient {
213
328
  console.warn('[compress-lightreach v1.0.0] HLE parameters have changed. ' +
214
329
  'Use "desired_hle" and optional "llm_provider" instead.');
215
330
  }
216
- const data = {
217
- messages: request.messages,
218
- compress: request.compress ?? true,
219
- compress_output: request.compress_output ?? false,
220
- algorithm: request.algorithm ?? 'greedy',
221
- };
222
- // v1.0.0 parameters
223
- if (request.llm_provider !== undefined)
224
- data.llm_provider = request.llm_provider;
225
- if (request.desired_hle !== undefined)
226
- data.desired_hle = request.desired_hle;
227
- if (request.compression_config)
228
- data.compression_config = request.compression_config;
229
- if (request.temperature !== undefined)
230
- data.temperature = request.temperature;
231
- if (request.max_tokens !== undefined)
232
- data.max_tokens = request.max_tokens;
233
- if (request.tags !== undefined)
234
- data.tags = request.tags;
235
- if (request.max_history_messages !== undefined)
236
- data.max_history_messages = request.max_history_messages;
237
- return this.makeRequest("/api/v2/complete", data);
331
+ // Prefer async jobs for production reliability; sync remains available via /api/v2/complete
332
+ // by calling makeRequest directly if needed.
333
+ return this.completeAsync(request);
238
334
  }
239
335
  }
240
336
  exports.PcompresslrAPIClient = PcompresslrAPIClient;
package/dist/cli.js CHANGED
@@ -8,29 +8,17 @@ const api_client_1 = require("./api-client");
8
8
  async function main() {
9
9
  const args = process.argv.slice(2);
10
10
  if (args.length === 0) {
11
- console.log("Usage: pcompresslr <prompt> [--greedy-only|--optimal-only]");
11
+ console.log("Usage: pcompresslr <prompt>");
12
12
  console.log("\nExample:");
13
13
  console.log(' pcompresslr "hello world hello world hello world"');
14
- console.log(' pcompresslr "your prompt here" --greedy-only # Only greedy');
15
- console.log(' pcompresslr "your prompt here" --optimal-only # Only optimal');
16
14
  console.log("\nNote: Requires PCOMPRESLR_API_KEY environment variable");
17
15
  process.exit(0);
18
16
  }
19
- let prompt = args.join(" ");
20
- let showGreedy = true;
21
- let showOptimal = true;
22
- if (prompt.endsWith("--greedy-only")) {
23
- prompt = args.slice(0, -1).join(" ");
24
- showOptimal = false;
25
- }
26
- else if (prompt.endsWith("--optimal-only")) {
27
- prompt = args.slice(0, -1).join(" ");
28
- showGreedy = false;
29
- }
17
+ const prompt = args.join(" ");
30
18
  // Get API key from environment
31
19
  const apiKey = process.env.PCOMPRESLR_API_KEY;
32
20
  if (!apiKey) {
33
- console.error("āŒ Error: PCOMPRESLR_API_KEY environment variable is required.");
21
+ console.error("Error: PCOMPRESLR_API_KEY environment variable is required.");
34
22
  console.error("\nTo get an API key, visit https://compress.lightreach.io");
35
23
  console.error("Then set it with: export PCOMPRESLR_API_KEY=your-key-here");
36
24
  process.exit(1);
@@ -42,7 +30,7 @@ async function main() {
42
30
  }
43
31
  catch (error) {
44
32
  if (error instanceof api_client_1.APIKeyError) {
45
- console.error(`āŒ Error: ${error.message}`);
33
+ console.error(`Error: ${error.message}`);
46
34
  process.exit(1);
47
35
  }
48
36
  throw error;
@@ -50,116 +38,38 @@ async function main() {
50
38
  console.log(`Original prompt: ${JSON.stringify(prompt)}`);
51
39
  console.log(`Length: ${prompt.length} characters\n`);
52
40
  console.log("=".repeat(80));
53
- // Run both compressors and compare
54
- const results = {};
55
- if (showGreedy) {
56
- console.log("\nšŸ”¹ GREEDY COMPRESSOR (Fast, ~99% optimal)");
57
- console.log("-".repeat(80));
58
- try {
59
- const resultGreedy = await client.compress(prompt, "gpt-4", "greedy");
60
- const compressedGreedy = resultGreedy.compressed;
61
- const dictGreedy = resultGreedy.dictionary;
62
- const ratioGreedy = resultGreedy.compression_ratio;
63
- const llmFormatGreedy = resultGreedy.llm_format;
64
- // Verify decompression
65
- const decompressResult = await client.decompress(llmFormatGreedy);
66
- const decompressedGreedy = decompressResult.decompressed;
67
- results['greedy'] = {
68
- compressed: compressedGreedy,
69
- dict: dictGreedy,
70
- ratio: ratioGreedy,
71
- llm_format: llmFormatGreedy,
72
- decompressed: decompressedGreedy
73
- };
74
- console.log(`Compressed: ${JSON.stringify(compressedGreedy)}`);
75
- console.log(`Dictionary: ${JSON.stringify(dictGreedy)}`);
76
- console.log(`Compression ratio: ${(ratioGreedy * 100).toFixed(2)}%`);
77
- console.log(`LLM-ready format length: ${llmFormatGreedy.length} chars`);
78
- console.log(`Processing time: ${resultGreedy.processing_time_ms.toFixed(2)}ms`);
79
- if (decompressedGreedy === prompt) {
80
- console.log("āœ… Decompression verified");
81
- }
82
- else {
83
- console.log("āŒ Decompression failed");
84
- }
85
- }
86
- catch (error) {
87
- if (error instanceof api_client_1.RateLimitError) {
88
- console.error(`āŒ Rate limit exceeded: ${error.message}`);
89
- }
90
- else if (error instanceof api_client_1.APIRequestError) {
91
- console.error(`āŒ API error: ${error.message}`);
92
- }
93
- else {
94
- throw error;
95
- }
96
- }
97
- }
98
- if (showOptimal) {
99
- console.log("\nšŸ”ø OPTIMAL COMPRESSOR (DP, O(n²), globally optimal)");
100
- console.log("-".repeat(80));
101
- try {
102
- const resultOptimal = await client.compress(prompt, "gpt-4", "optimal");
103
- const compressedOptimal = resultOptimal.compressed;
104
- const dictOptimal = resultOptimal.dictionary;
105
- const ratioOptimal = resultOptimal.compression_ratio;
106
- const llmFormatOptimal = resultOptimal.llm_format;
107
- // Verify decompression
108
- const decompressResult = await client.decompress(llmFormatOptimal);
109
- const decompressedOptimal = decompressResult.decompressed;
110
- results['optimal'] = {
111
- compressed: compressedOptimal,
112
- dict: dictOptimal,
113
- ratio: ratioOptimal,
114
- llm_format: llmFormatOptimal,
115
- decompressed: decompressedOptimal
116
- };
117
- console.log(`Compressed: ${JSON.stringify(compressedOptimal)}`);
118
- console.log(`Dictionary: ${JSON.stringify(dictOptimal)}`);
119
- console.log(`Compression ratio: ${(ratioOptimal * 100).toFixed(2)}%`);
120
- console.log(`LLM-ready format length: ${llmFormatOptimal.length} chars`);
121
- console.log(`Processing time: ${resultOptimal.processing_time_ms.toFixed(2)}ms`);
122
- if (decompressedOptimal === prompt) {
123
- console.log("āœ… Decompression verified");
124
- }
125
- else {
126
- console.log("āŒ Decompression failed");
127
- }
41
+ console.log("\nGREEDY COMPRESSOR");
42
+ console.log("-".repeat(80));
43
+ try {
44
+ const result = await client.compress(prompt, "gpt-4");
45
+ const compressed = result.compressed;
46
+ const dictionary = result.dictionary;
47
+ const ratio = result.compression_ratio;
48
+ const llmFormat = result.llm_format;
49
+ // Verify decompression
50
+ const decompressResult = await client.decompress(llmFormat);
51
+ const decompressed = decompressResult.decompressed;
52
+ console.log(`Compressed: ${JSON.stringify(compressed)}`);
53
+ console.log(`Dictionary: ${JSON.stringify(dictionary)}`);
54
+ console.log(`Compression ratio: ${(ratio * 100).toFixed(2)}%`);
55
+ console.log(`LLM-ready format length: ${llmFormat.length} chars`);
56
+ console.log(`Processing time: ${result.processing_time_ms.toFixed(2)}ms`);
57
+ if (decompressed === prompt) {
58
+ console.log("Decompression verified");
128
59
  }
129
- catch (error) {
130
- if (error instanceof api_client_1.RateLimitError) {
131
- console.error(`āŒ Rate limit exceeded: ${error.message}`);
132
- }
133
- else if (error instanceof api_client_1.APIRequestError) {
134
- console.error(`āŒ API error: ${error.message}`);
135
- }
136
- else {
137
- throw error;
138
- }
60
+ else {
61
+ console.log("Decompression failed");
139
62
  }
140
63
  }
141
- // Comparison if both were run
142
- if (showGreedy && showOptimal && results['greedy'] && results['optimal']) {
143
- console.log("\n" + "=".repeat(80));
144
- console.log("šŸ“Š COMPARISON");
145
- console.log("-".repeat(80));
146
- const ratioDiff = results['optimal'].ratio - results['greedy'].ratio;
147
- if (ratioDiff < 0) {
148
- console.log(`āœ… Optimal is ${Math.abs(ratioDiff * 100).toFixed(2)}% better (smaller ratio)`);
64
+ catch (error) {
65
+ if (error instanceof api_client_1.RateLimitError) {
66
+ console.error(`Rate limit exceeded: ${error.message}`);
149
67
  }
150
- else if (ratioDiff > 0) {
151
- console.log(`āœ… Greedy is ${(ratioDiff * 100).toFixed(2)}% better (smaller ratio)`);
68
+ else if (error instanceof api_client_1.APIRequestError) {
69
+ console.error(`API error: ${error.message}`);
152
70
  }
153
71
  else {
154
- console.log("āœ… Both produce identical compression ratios");
155
- }
156
- console.log(`\nGreedy ratio: ${(results['greedy'].ratio * 100).toFixed(2)}%`);
157
- console.log(`Optimal ratio: ${(results['optimal'].ratio * 100).toFixed(2)}%`);
158
- console.log(`Difference: ${(ratioDiff * 100).toFixed(2)}%`);
159
- const greedyDictSize = Object.keys(results['greedy'].dict).length;
160
- const optimalDictSize = Object.keys(results['optimal'].dict).length;
161
- if (greedyDictSize !== optimalDictSize) {
162
- console.log(`\nDictionary size: Greedy=${greedyDictSize}, Optimal=${optimalDictSize}`);
72
+ throw error;
163
73
  }
164
74
  }
165
75
  }
package/dist/core.d.ts CHANGED
@@ -19,10 +19,11 @@ export interface CompleteOptions {
19
19
  messages: Message[];
20
20
  model?: string;
21
21
  provider?: 'openai' | 'anthropic' | 'google';
22
+ desiredHle?: number;
22
23
  compress?: boolean;
23
24
  compressionConfig?: CompressionConfig;
24
25
  compressOutput?: boolean;
25
- useOptimal?: boolean;
26
+ mode?: 'async' | 'sync';
26
27
  hleTargetPercent?: number;
27
28
  minHleScore?: number;
28
29
  autoSelectByHle?: boolean;
@@ -36,13 +37,11 @@ export declare class LightReach {
36
37
  private apiClient;
37
38
  private defaultModel;
38
39
  private defaultProvider;
39
- private useOptimal;
40
40
  constructor(options?: {
41
41
  apiKey?: string;
42
42
  apiUrl?: string;
43
43
  defaultModel?: string;
44
44
  defaultProvider?: 'openai' | 'anthropic' | 'google';
45
- useOptimal?: boolean;
46
45
  });
47
46
  complete(options: CompleteOptions): Promise<CompleteResponse>;
48
47
  /**
@@ -50,7 +49,6 @@ export declare class LightReach {
50
49
  */
51
50
  compress(text: string, options?: {
52
51
  model?: string;
53
- algorithm?: 'greedy' | 'optimal';
54
52
  tags?: Record<string, string>;
55
53
  }): Promise<CompressResponse>;
56
54
  }
package/dist/core.js CHANGED
@@ -13,11 +13,9 @@ class LightReach {
13
13
  constructor(options = {}) {
14
14
  this.defaultModel = options.defaultModel ?? 'gpt-4';
15
15
  this.defaultProvider = options.defaultProvider ?? 'openai';
16
- this.useOptimal = options.useOptimal ?? false;
17
16
  this.apiClient = new api_client_1.PcompresslrAPIClient(options.apiKey, options.apiUrl);
18
17
  }
19
18
  async complete(options) {
20
- const algorithm = (options.useOptimal ?? this.useOptimal) ? 'optimal' : 'greedy';
21
19
  const cfg = options.compressionConfig
22
20
  ? {
23
21
  compress_system: options.compressionConfig.compressSystem ?? false,
@@ -27,14 +25,13 @@ class LightReach {
27
25
  }
28
26
  : undefined;
29
27
  try {
30
- const resp = await this.apiClient.complete({
28
+ const req = {
31
29
  messages: options.messages,
32
- model: options.model ?? this.defaultModel,
33
30
  llm_provider: options.provider ?? this.defaultProvider,
31
+ desired_hle: options.desiredHle,
34
32
  compress: options.compress ?? true,
35
33
  compression_config: cfg,
36
34
  compress_output: options.compressOutput ?? false,
37
- algorithm,
38
35
  hle_target_percent: options.hleTargetPercent,
39
36
  min_hle_score: options.minHleScore,
40
37
  auto_select_by_hle: options.autoSelectByHle,
@@ -43,7 +40,13 @@ class LightReach {
43
40
  max_tokens: options.maxTokens,
44
41
  tags: options.tags,
45
42
  max_history_messages: options.maxHistoryMessages,
46
- });
43
+ };
44
+ // Only include deprecated `model` if explicitly provided to avoid noisy warnings.
45
+ if (options.model !== undefined)
46
+ req.model = options.model;
47
+ const resp = (options.mode ?? 'async') === 'sync'
48
+ ? await this.apiClient.completeSync(req)
49
+ : await this.apiClient.complete(req);
47
50
  // Add helpful aliases to better match the Feature 0.6 spec without changing backend response.
48
51
  // We do NOT fabricate cost estimates here since the API response does not include pricing data.
49
52
  return {
@@ -79,7 +82,7 @@ class LightReach {
79
82
  * Compress text without making an LLM call (POST /api/v1/compress).
80
83
  */
81
84
  async compress(text, options) {
82
- return await this.apiClient.compress(text, options?.model ?? this.defaultModel, options?.algorithm ?? 'greedy', options?.tags);
85
+ return await this.apiClient.compress(text, options?.model ?? this.defaultModel, options?.tags);
83
86
  }
84
87
  }
85
88
  exports.LightReach = LightReach;
package/dist/version.d.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  /**
2
2
  * Version information for compress-lightreach package.
3
3
  */
4
- export declare const __version__ = "1.0.0";
4
+ export declare const __version__ = "1.0.1";
package/dist/version.js CHANGED
@@ -4,4 +4,4 @@
4
4
  */
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.__version__ = void 0;
7
- exports.__version__ = "1.0.0";
7
+ exports.__version__ = "1.0.1";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "compress-lightreach",
3
- "version": "1.0.3",
3
+ "version": "1.0.6",
4
4
  "description": "AI cost management SDK with intelligent model routing, prompt compression, and real-time token tracking",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",