brevit 0.1.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -87
- package/TYPESCRIPT.md +16 -9
- package/example.ts +5 -1
- package/package.json +5 -2
- package/src/brevit.d.ts +28 -1
- package/src/brevit.js +60 -33
- package/src/semanticCompressor.js +157 -0
- package/test/test.js +66 -11
package/README.md
CHANGED
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
#
|
|
1
|
+
# brevit
|
|
2
2
|
|
|
3
3
|
A high-performance JavaScript library for semantically compressing and optimizing data before sending it to a Large Language Model (LLM). Dramatically reduce token costs while maintaining data integrity and readability.
|
|
4
4
|
|
|
5
5
|
## Table of Contents
|
|
6
6
|
|
|
7
|
-
- [Why
|
|
7
|
+
- [Why brevit?](#why-brevit)
|
|
8
8
|
- [Key Features](#key-features)
|
|
9
|
-
- [When Not to Use
|
|
9
|
+
- [When Not to Use brevit](#when-not-to-use-brevit)
|
|
10
10
|
- [Benchmarks](#benchmarks)
|
|
11
11
|
- [Installation & Quick Start](#installation--quick-start)
|
|
12
12
|
- [Playgrounds](#playgrounds)
|
|
13
13
|
- [CLI](#cli)
|
|
14
14
|
- [Format Overview](#format-overview)
|
|
15
15
|
- [API](#api)
|
|
16
|
-
- [Using
|
|
16
|
+
- [Using brevit in LLM Prompts](#using-brevit-in-llm-prompts)
|
|
17
17
|
- [Syntax Cheatsheet](#syntax-cheatsheet)
|
|
18
18
|
- [Other Implementations](#other-implementations)
|
|
19
19
|
- [Full Specification](#full-specification)
|
|
20
20
|
|
|
21
|
-
## Why
|
|
21
|
+
## Why brevit?
|
|
22
22
|
|
|
23
23
|
### JavaScript-Specific Advantages
|
|
24
24
|
|
|
@@ -53,7 +53,7 @@ const explicit = await brevit.optimize(complexOrder);
|
|
|
53
53
|
|
|
54
54
|
### Automatic Strategy Selection
|
|
55
55
|
|
|
56
|
-
|
|
56
|
+
brevit now includes the `.brevity()` method that automatically analyzes your data and selects the optimal optimization strategy:
|
|
57
57
|
|
|
58
58
|
```javascript
|
|
59
59
|
const data = {
|
|
@@ -72,7 +72,7 @@ const optimized = await brevit.brevity(data);
|
|
|
72
72
|
## Key Features
|
|
73
73
|
|
|
74
74
|
- **JSON Optimization**: Flatten nested JSON structures into token-efficient key-value pairs
|
|
75
|
-
- **Text Optimization**:
|
|
75
|
+
- **Text Optimization**: Deterministic TextRank-based compression for plain text (no LLM required)
|
|
76
76
|
- **Image Optimization**: Extract text from images via OCR
|
|
77
77
|
- **Lightweight**: Zero dependencies (optional YAML support)
|
|
78
78
|
- **Universal**: Works in Node.js, browsers, and modern JavaScript environments
|
|
@@ -110,7 +110,7 @@ pnpm add brevit
|
|
|
110
110
|
|
|
111
111
|
### TypeScript Support
|
|
112
112
|
|
|
113
|
-
|
|
113
|
+
brevit includes full TypeScript definitions. Simply import and use with full type safety:
|
|
114
114
|
|
|
115
115
|
```typescript
|
|
116
116
|
import {
|
|
@@ -130,7 +130,7 @@ const client = new BrevitClient(new BrevitConfig(config));
|
|
|
130
130
|
|
|
131
131
|
## Complete Usage Examples
|
|
132
132
|
|
|
133
|
-
|
|
133
|
+
brevit supports three main data types: **JSON objects/strings**, **text files/strings**, and **images**. Here's how to use each:
|
|
134
134
|
|
|
135
135
|
### 1. JSON Optimization Examples
|
|
136
136
|
|
|
@@ -219,20 +219,6 @@ const optimized = await brevit.brevity(jsonString);
|
|
|
219
219
|
// @o.status:SHIPPED
|
|
220
220
|
```
|
|
221
221
|
|
|
222
|
-
#### Example 1.2a: Abbreviations Disabled
|
|
223
|
-
|
|
224
|
-
```javascript
|
|
225
|
-
const brevitNoAbbr = new BrevitClient(new BrevitConfig({
|
|
226
|
-
jsonMode: JsonOptimizationMode.Flatten,
|
|
227
|
-
enableAbbreviations: false // Disable abbreviations
|
|
228
|
-
}));
|
|
229
|
-
|
|
230
|
-
const jsonString = '{"order": {"id": "o-456", "status": "SHIPPED"}}';
|
|
231
|
-
const optimized = await brevitNoAbbr.brevity(jsonString);
|
|
232
|
-
// Output (without abbreviations):
|
|
233
|
-
// order.id:o-456
|
|
234
|
-
// order.status:SHIPPED
|
|
235
|
-
```
|
|
236
222
|
|
|
237
223
|
#### Example 1.3: Complex Nested JSON with Arrays
|
|
238
224
|
|
|
@@ -276,51 +262,6 @@ const optimized = await brevit.brevity(complexData);
|
|
|
276
262
|
// luis,9.2,540,2,Ridge Overlook,false
|
|
277
263
|
```
|
|
278
264
|
|
|
279
|
-
#### Example 1.3a: Complex Data with Abbreviations Disabled
|
|
280
|
-
|
|
281
|
-
```javascript
|
|
282
|
-
const brevitNoAbbr = new BrevitClient(new BrevitConfig({
|
|
283
|
-
jsonMode: JsonOptimizationMode.Flatten,
|
|
284
|
-
enableAbbreviations: false // Disable abbreviations
|
|
285
|
-
}));
|
|
286
|
-
|
|
287
|
-
const complexData = {
|
|
288
|
-
context: {
|
|
289
|
-
task: "Our favorite hikes together",
|
|
290
|
-
location: "Boulder",
|
|
291
|
-
season: "spring_2025"
|
|
292
|
-
},
|
|
293
|
-
friends: ["ana", "luis", "sam"],
|
|
294
|
-
hikes: [
|
|
295
|
-
{
|
|
296
|
-
id: 1,
|
|
297
|
-
name: "Blue Lake Trail",
|
|
298
|
-
distanceKm: 7.5,
|
|
299
|
-
elevationGain: 320,
|
|
300
|
-
companion: "ana",
|
|
301
|
-
wasSunny: true
|
|
302
|
-
},
|
|
303
|
-
{
|
|
304
|
-
id: 2,
|
|
305
|
-
name: "Ridge Overlook",
|
|
306
|
-
distanceKm: 9.2,
|
|
307
|
-
elevationGain: 540,
|
|
308
|
-
companion: "luis",
|
|
309
|
-
wasSunny: false
|
|
310
|
-
}
|
|
311
|
-
]
|
|
312
|
-
};
|
|
313
|
-
|
|
314
|
-
const optimized = await brevitNoAbbr.brevity(complexData);
|
|
315
|
-
// Output (without abbreviations):
|
|
316
|
-
// context.task:Our favorite hikes together
|
|
317
|
-
// context.location:Boulder
|
|
318
|
-
// context.season:spring_2025
|
|
319
|
-
// friends[3]:ana,luis,sam
|
|
320
|
-
// hikes[2]{companion,distanceKm,elevationGain,id,name,wasSunny}:
|
|
321
|
-
// ana,7.5,320,1,Blue Lake Trail,true
|
|
322
|
-
// luis,9.2,540,2,Ridge Overlook,false
|
|
323
|
-
```
|
|
324
265
|
|
|
325
266
|
#### Example 1.4: Different JSON Optimization Modes
|
|
326
267
|
|
|
@@ -357,16 +298,23 @@ The text goes on for many lines...
|
|
|
357
298
|
[Repeated content many times]
|
|
358
299
|
`.repeat(50);
|
|
359
300
|
|
|
360
|
-
// Automatic detection:
|
|
301
|
+
// Automatic detection: plain text is compressed by default
|
|
361
302
|
const optimized = await brevit.brevity(longText);
|
|
362
303
|
|
|
363
|
-
// Explicit text
|
|
304
|
+
// Explicit text compression via the main pipeline (ratio optional; defaults to 0.0 = auto)
|
|
364
305
|
const config = new BrevitConfig({
|
|
365
306
|
textMode: TextOptimizationMode.Clean,
|
|
366
|
-
longTextThreshold: 500 //
|
|
307
|
+
longTextThreshold: 500 // (JSON heuristics only; plain text is compressed regardless)
|
|
367
308
|
});
|
|
368
309
|
const brevitWithText = new BrevitClient(config);
|
|
369
|
-
const
|
|
310
|
+
const cleanedAuto = await brevitWithText.optimize(longText); // auto
|
|
311
|
+
const cleaned60 = await brevitWithText.optimize(longText, 0.6); // ratio
|
|
312
|
+
const cleanedIntent = await brevitWithText.optimize(longText, 0.6, "keep key details"); // ratio + intent (3rd arg)
|
|
313
|
+
|
|
314
|
+
// Explicit TextRank compression APIs (recommended when you want direct control)
|
|
315
|
+
const compressedAuto = await brevit.compressText(longText); // AUTO mode
|
|
316
|
+
const compressed60 = await brevit.optimizeText(longText, 0.6); // Keep ~60% of sentences
|
|
317
|
+
const compressedDefault = await brevit.optimizeText(longText, 0.0); // Same as compressText()
|
|
370
318
|
```
|
|
371
319
|
|
|
372
320
|
#### Example 2.2: Reading Text from File (Node.js)
|
|
@@ -388,19 +336,19 @@ const optimized = await brevit.brevity(textContent);
|
|
|
388
336
|
const cleanConfig = new BrevitConfig({
|
|
389
337
|
textMode: TextOptimizationMode.Clean
|
|
390
338
|
});
|
|
391
|
-
//
|
|
339
|
+
// Built-in deterministic TextRank extractive compression (no LLM required)
|
|
392
340
|
|
|
393
341
|
// Summarize Fast
|
|
394
342
|
const fastConfig = new BrevitConfig({
|
|
395
343
|
textMode: TextOptimizationMode.SummarizeFast
|
|
396
344
|
});
|
|
397
|
-
//
|
|
345
|
+
// Reserved for custom LLM summarization (or use built-in TextRank via compressText/optimizeText)
|
|
398
346
|
|
|
399
347
|
// Summarize High Quality
|
|
400
348
|
const qualityConfig = new BrevitConfig({
|
|
401
349
|
textMode: TextOptimizationMode.SummarizeHighQuality
|
|
402
350
|
});
|
|
403
|
-
//
|
|
351
|
+
// Reserved for custom LLM summarization (or use built-in TextRank via compressText/optimizeText)
|
|
404
352
|
```
|
|
405
353
|
|
|
406
354
|
### 3. Image Optimization Examples
|
|
@@ -643,7 +591,7 @@ processOrder(order).then(console.log);
|
|
|
643
591
|
<!DOCTYPE html>
|
|
644
592
|
<html>
|
|
645
593
|
<head>
|
|
646
|
-
<title>
|
|
594
|
+
<title>brevit Example</title>
|
|
647
595
|
</head>
|
|
648
596
|
<body>
|
|
649
597
|
<script type="module">
|
|
@@ -880,14 +828,16 @@ const user = {
|
|
|
880
828
|
};
|
|
881
829
|
|
|
882
830
|
const optimized = await brevit.optimize(user);
|
|
883
|
-
// Output:
|
|
884
|
-
//
|
|
885
|
-
//
|
|
886
|
-
//
|
|
887
|
-
//
|
|
888
|
-
//
|
|
889
|
-
//
|
|
890
|
-
//
|
|
831
|
+
// Output (with abbreviations enabled by default):
|
|
832
|
+
// @c=contact
|
|
833
|
+
// @o=orders
|
|
834
|
+
// id:u-123
|
|
835
|
+
// name:Javian
|
|
836
|
+
// isActive:true
|
|
837
|
+
// @c.email:support@javianpicardo.com
|
|
838
|
+
// @c.phone:null
|
|
839
|
+
// @o[0].orderId:o-456
|
|
840
|
+
// @o[0].status:SHIPPED
|
|
891
841
|
```
|
|
892
842
|
|
|
893
843
|
### Example 2: Optimize JSON String
|
|
@@ -911,7 +861,7 @@ const optimized = await brevit.optimize(json);
|
|
|
911
861
|
```javascript
|
|
912
862
|
const longDocument = '...very long text...';
|
|
913
863
|
const optimized = await brevit.optimize(longDocument);
|
|
914
|
-
//
|
|
864
|
+
// Plain text is compressed by default; use optimize(longDocument, 0.6) for ratio compression
|
|
915
865
|
```
|
|
916
866
|
|
|
917
867
|
### Example 4: Process Image (ArrayBuffer)
|
|
@@ -925,7 +875,7 @@ const optimized = await brevit.optimize(imageData);
|
|
|
925
875
|
// Will trigger image optimization
|
|
926
876
|
```
|
|
927
877
|
|
|
928
|
-
## When Not to Use
|
|
878
|
+
## When Not to Use brevit
|
|
929
879
|
|
|
930
880
|
Consider alternatives when:
|
|
931
881
|
|
|
@@ -1199,7 +1149,7 @@ class BrevitConfig {
|
|
|
1199
1149
|
- `Ocr` - Extract text via OCR
|
|
1200
1150
|
- `Metadata` - Extract metadata only
|
|
1201
1151
|
|
|
1202
|
-
## Using
|
|
1152
|
+
## Using brevit in LLM Prompts
|
|
1203
1153
|
|
|
1204
1154
|
### Best Practices
|
|
1205
1155
|
|
package/TYPESCRIPT.md
CHANGED
|
@@ -7,7 +7,7 @@ Brevit.js includes comprehensive TypeScript definitions for full type safety and
|
|
|
7
7
|
No additional installation required! TypeScript definitions are included in the package.
|
|
8
8
|
|
|
9
9
|
```bash
|
|
10
|
-
npm install brevit
|
|
10
|
+
npm install brevit
|
|
11
11
|
```
|
|
12
12
|
|
|
13
13
|
## Basic Usage
|
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
BrevitClient,
|
|
18
18
|
BrevitConfig,
|
|
19
19
|
JsonOptimizationMode,
|
|
20
|
-
} from 'brevit
|
|
20
|
+
} from 'brevit';
|
|
21
21
|
|
|
22
22
|
const config = new BrevitConfig({
|
|
23
23
|
jsonMode: JsonOptimizationMode.Flatten,
|
|
@@ -38,7 +38,7 @@ import {
|
|
|
38
38
|
JsonOptimizationMode,
|
|
39
39
|
TextOptimizationMode,
|
|
40
40
|
ImageOptimizationMode,
|
|
41
|
-
} from 'brevit
|
|
41
|
+
} from 'brevit';
|
|
42
42
|
|
|
43
43
|
// Usage
|
|
44
44
|
const mode: typeof JsonOptimizationMode.Flatten = JsonOptimizationMode.Flatten;
|
|
@@ -51,7 +51,7 @@ import type {
|
|
|
51
51
|
JsonOptimizationModeType,
|
|
52
52
|
TextOptimizationModeType,
|
|
53
53
|
ImageOptimizationModeType,
|
|
54
|
-
} from 'brevit
|
|
54
|
+
} from 'brevit';
|
|
55
55
|
|
|
56
56
|
function setMode(mode: JsonOptimizationModeType) {
|
|
57
57
|
// Type-safe mode setting
|
|
@@ -66,7 +66,7 @@ import type {
|
|
|
66
66
|
BrevitClientOptions,
|
|
67
67
|
TextOptimizerFunction,
|
|
68
68
|
ImageOptimizerFunction,
|
|
69
|
-
} from 'brevit
|
|
69
|
+
} from 'brevit';
|
|
70
70
|
|
|
71
71
|
// Configuration options
|
|
72
72
|
const config: BrevitConfigOptions = {
|
|
@@ -91,14 +91,14 @@ import {
|
|
|
91
91
|
BrevitConfig,
|
|
92
92
|
JsonOptimizationMode,
|
|
93
93
|
type BrevitConfigOptions,
|
|
94
|
-
} from 'brevit
|
|
94
|
+
} from 'brevit';
|
|
95
95
|
|
|
96
96
|
const configOptions: BrevitConfigOptions = {
|
|
97
97
|
jsonMode: JsonOptimizationMode.Flatten,
|
|
98
98
|
textMode: 'Clean',
|
|
99
99
|
imageMode: 'Ocr',
|
|
100
100
|
jsonPathsToKeep: ['user.name', 'order.orderId'],
|
|
101
|
-
longTextThreshold: 1000,
|
|
101
|
+
longTextThreshold: 1000, // (plain text is compressed regardless; this is mostly for JSON heuristics)
|
|
102
102
|
};
|
|
103
103
|
|
|
104
104
|
const config = new BrevitConfig(configOptions);
|
|
@@ -113,7 +113,7 @@ import {
|
|
|
113
113
|
BrevitConfig,
|
|
114
114
|
type TextOptimizerFunction,
|
|
115
115
|
type ImageOptimizerFunction,
|
|
116
|
-
} from 'brevit
|
|
116
|
+
} from 'brevit';
|
|
117
117
|
|
|
118
118
|
const customTextOptimizer: TextOptimizerFunction = async (longText, intent) => {
|
|
119
119
|
const response = await fetch('/api/summarize', {
|
|
@@ -140,6 +140,13 @@ const client = new BrevitClient(new BrevitConfig(), {
|
|
|
140
140
|
textOptimizer: customTextOptimizer,
|
|
141
141
|
imageOptimizer: customImageOptimizer,
|
|
142
142
|
});
|
|
143
|
+
|
|
144
|
+
// Text compression (TextRank) is built-in:
|
|
145
|
+
const text = 'Alpha sentence. Beta sentence. Gamma sentence.';
|
|
146
|
+
const compressedAuto = await client.brevity(text); // auto compression
|
|
147
|
+
const compressedAuto2 = await client.optimize(text); // auto compression (ratio defaults to 0.0)
|
|
148
|
+
const compressed60 = await client.optimize(text, 0.6); // ratio compression
|
|
149
|
+
const compressed60WithIntent = await client.optimize(text, 0.6, 'keep key details'); // ratio + intent
|
|
143
150
|
```
|
|
144
151
|
|
|
145
152
|
### Example 3: Type-Safe Data Structures
|
|
@@ -184,7 +191,7 @@ const optimizedOrder = await client.optimize(order);
|
|
|
184
191
|
### Example 4: Generic Helper Function
|
|
185
192
|
|
|
186
193
|
```typescript
|
|
187
|
-
import { BrevitClient, BrevitConfig } from 'brevit
|
|
194
|
+
import { BrevitClient, BrevitConfig } from 'brevit';
|
|
188
195
|
|
|
189
196
|
async function optimizeData<T>(data: T): Promise<string> {
|
|
190
197
|
const client = new BrevitClient();
|
package/example.ts
CHANGED
|
@@ -37,7 +37,7 @@ async function example2() {
|
|
|
37
37
|
jsonMode: JsonOptimizationMode.Flatten,
|
|
38
38
|
textMode: TextOptimizationMode.Clean,
|
|
39
39
|
imageMode: ImageOptimizationMode.Ocr,
|
|
40
|
-
longTextThreshold: 1000,
|
|
40
|
+
longTextThreshold: 1000, // (plain text is compressed regardless; this is mostly for JSON heuristics)
|
|
41
41
|
};
|
|
42
42
|
|
|
43
43
|
const client = new BrevitClient(new BrevitConfig(config));
|
|
@@ -70,7 +70,11 @@ async function example3() {
|
|
|
70
70
|
});
|
|
71
71
|
|
|
72
72
|
const longText = '...very long text...';
|
|
73
|
+
// For text, optimize() defaults to deterministic TextRank compression unless you provide a custom text optimizer.
|
|
74
|
+
// Ratio compression is supported via optimize(longText, ratio, intent?).
|
|
73
75
|
const optimized = await client.optimize(longText);
|
|
76
|
+
const optimized60 = await client.optimize(longText, 0.6);
|
|
77
|
+
const optimized60WithIntent = await client.optimize(longText, 0.6, 'keep key details');
|
|
74
78
|
console.log(optimized);
|
|
75
79
|
}
|
|
76
80
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "brevit",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "A high-performance JavaScript library for
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "A high-performance JavaScript library for optimizing LLM prompt inputs: token-efficient JSON flattening + deterministic TextRank-based text compression.",
|
|
5
5
|
"main": "src/brevit.js",
|
|
6
6
|
"types": "src/brevit.d.ts",
|
|
7
7
|
"type": "module",
|
|
@@ -25,6 +25,9 @@
|
|
|
25
25
|
"type": "git",
|
|
26
26
|
"url": "https://github.com/JavianDev/Brevit.js.git"
|
|
27
27
|
},
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"compromise": "^14.14.4"
|
|
30
|
+
},
|
|
28
31
|
"optionalDependencies": {
|
|
29
32
|
"js-yaml": "^4.1.0"
|
|
30
33
|
}
|
package/src/brevit.d.ts
CHANGED
|
@@ -221,7 +221,34 @@ export class BrevitClient {
|
|
|
221
221
|
* // Returns OCR text or metadata
|
|
222
222
|
* ```
|
|
223
223
|
*/
|
|
224
|
-
|
|
224
|
+
/**
|
|
225
|
+
* Optimizes any supported input type.
|
|
226
|
+
* For plain text inputs, this performs deterministic TextRank compression by default.
|
|
227
|
+
*
|
|
228
|
+
* - optimize(text) => auto compression (ratio defaults to 0.0)
|
|
229
|
+
* - optimize(text, 0.6) => ratio compression
|
|
230
|
+
* - optimize(text, 0.6, intent) => ratio compression with intent hint
|
|
231
|
+
* - optimize(obj, intent) => JSON/object pipeline with intent hint
|
|
232
|
+
*/
|
|
233
|
+
optimize(rawData: unknown, ratioOrIntent?: number | string | null, intent?: string | null): Promise<string>;
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Intelligently optimizes data by automatically selecting the best strategy.
|
|
237
|
+
* For plain text inputs, this performs deterministic TextRank compression by default.
|
|
238
|
+
*/
|
|
239
|
+
brevity(rawData: unknown, intent?: string | null): Promise<string>;
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Explicit text compression (AUTO mode).
|
|
243
|
+
* Always attempts to compress, even for short / single-sentence inputs.
|
|
244
|
+
*/
|
|
245
|
+
compressText(text: string): Promise<string>;
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Explicit text compression (RATIO mode).
|
|
249
|
+
* If ratio <= 0, behaves like compressText().
|
|
250
|
+
*/
|
|
251
|
+
optimizeText(text: string, ratio?: number): Promise<string>;
|
|
225
252
|
}
|
|
226
253
|
|
|
227
254
|
// Re-export types for convenience
|
package/src/brevit.js
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*
|
|
8
8
|
* Project: Brevit
|
|
9
9
|
* Author: Javian
|
|
10
|
-
* Version:
|
|
10
|
+
* Version: 1.0.0
|
|
11
11
|
* =================================================================================
|
|
12
12
|
*/
|
|
13
13
|
|
|
@@ -80,6 +80,36 @@ export class BrevitClient {
|
|
|
80
80
|
this._config = config;
|
|
81
81
|
this._textOptimizer = options.textOptimizer || this._defaultTextOptimizer.bind(this);
|
|
82
82
|
this._imageOptimizer = options.imageOptimizer || this._defaultImageOptimizer.bind(this);
|
|
83
|
+
this._semanticCompressor = null;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Explicit text compression (AUTO mode).
|
|
88
|
+
* Always attempts to compress, even for short / single-sentence inputs.
|
|
89
|
+
* @param {string} text
|
|
90
|
+
* @returns {Promise<string>}
|
|
91
|
+
*/
|
|
92
|
+
async compressText(text) {
|
|
93
|
+
if (!this._semanticCompressor) {
|
|
94
|
+
const { SemanticCompressor } = await import('./semanticCompressor.js');
|
|
95
|
+
this._semanticCompressor = new SemanticCompressor();
|
|
96
|
+
}
|
|
97
|
+
return this._semanticCompressor.compress(String(text ?? ''));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Explicit text compression (RATIO mode).
|
|
102
|
+
* If ratio <= 0, behaves like compressText().
|
|
103
|
+
* @param {string} text
|
|
104
|
+
* @param {number} ratio
|
|
105
|
+
* @returns {Promise<string>}
|
|
106
|
+
*/
|
|
107
|
+
async optimizeText(text, ratio = 0.0) {
|
|
108
|
+
if (!this._semanticCompressor) {
|
|
109
|
+
const { SemanticCompressor } = await import('./semanticCompressor.js');
|
|
110
|
+
this._semanticCompressor = new SemanticCompressor();
|
|
111
|
+
}
|
|
112
|
+
return this._semanticCompressor.optimize(String(text ?? ''), Number(ratio ?? 0));
|
|
83
113
|
}
|
|
84
114
|
|
|
85
115
|
/**
|
|
@@ -592,24 +622,12 @@ export class BrevitClient {
|
|
|
592
622
|
(trimmed.startsWith('[') && trimmed.endsWith(']'))) {
|
|
593
623
|
inputObject = JSON.parse(rawData);
|
|
594
624
|
} else {
|
|
595
|
-
//
|
|
596
|
-
|
|
597
|
-
const strategy = this._selectOptimalStrategy(analysis);
|
|
598
|
-
|
|
599
|
-
if (strategy.name === 'TextOptimization') {
|
|
600
|
-
return await this._textOptimizer(rawData, intent);
|
|
601
|
-
}
|
|
602
|
-
return rawData;
|
|
625
|
+
// Plain text: always compress via TextRank (auto).
|
|
626
|
+
return await this.compressText(rawData);
|
|
603
627
|
}
|
|
604
628
|
} catch (e) {
|
|
605
|
-
// Not JSON - treat as text
|
|
606
|
-
|
|
607
|
-
const strategy = this._selectOptimalStrategy(analysis);
|
|
608
|
-
|
|
609
|
-
if (strategy.name === 'TextOptimization') {
|
|
610
|
-
return await this._textOptimizer(rawData, intent);
|
|
611
|
-
}
|
|
612
|
-
return rawData;
|
|
629
|
+
// Not valid JSON - treat as plain text and compress.
|
|
630
|
+
return await this.compressText(rawData);
|
|
613
631
|
}
|
|
614
632
|
} else if (inputType === 'object' && rawData !== null) {
|
|
615
633
|
// Check if it's image data
|
|
@@ -671,12 +689,25 @@ export class BrevitClient {
|
|
|
671
689
|
* or text into a token-efficient string.
|
|
672
690
|
*
|
|
673
691
|
* @param {any} rawData - The data to optimize (object, JSON string, text, ArrayBuffer).
|
|
674
|
-
* @param {string} [
|
|
692
|
+
* @param {number|string|null} [ratioOrIntent] - If number: sentence ratio for TextRank compression (0..1). If string: intent.
|
|
693
|
+
* @param {string|null} [intent] - (Optional) A hint about the user's goal (use this as 3rd arg when passing ratio).
|
|
675
694
|
* @returns {Promise<string>} A promise that resolves to the optimized string.
|
|
676
695
|
*/
|
|
677
|
-
async optimize(rawData, intent = null) {
|
|
696
|
+
async optimize(rawData, ratioOrIntent = null, intent = null) {
|
|
678
697
|
let inputObject = null;
|
|
679
698
|
let inputType = typeof rawData;
|
|
699
|
+
let ratio = 0.0;
|
|
700
|
+
let resolvedIntent = intent;
|
|
701
|
+
|
|
702
|
+
// Backwards-compatible argument parsing:
|
|
703
|
+
// - optimize(text, 0.6, intent?) => ratio-based text compression
|
|
704
|
+
// - optimize(text, intent?) => auto text compression (ratio defaults to 0.0)
|
|
705
|
+
// - optimize(obj, intent?) => JSON/object pipeline
|
|
706
|
+
if (typeof ratioOrIntent === 'number' && Number.isFinite(ratioOrIntent)) {
|
|
707
|
+
ratio = ratioOrIntent;
|
|
708
|
+
} else if (resolvedIntent == null && typeof ratioOrIntent === 'string') {
|
|
709
|
+
resolvedIntent = ratioOrIntent;
|
|
710
|
+
}
|
|
680
711
|
|
|
681
712
|
if (inputType === 'string') {
|
|
682
713
|
// Could be JSON string or just text
|
|
@@ -691,20 +722,15 @@ export class BrevitClient {
|
|
|
691
722
|
}
|
|
692
723
|
|
|
693
724
|
if (!inputObject) {
|
|
694
|
-
//
|
|
695
|
-
|
|
696
|
-
// It's long text, apply text optimization
|
|
697
|
-
return await this._textOptimizer(rawData, intent);
|
|
698
|
-
}
|
|
699
|
-
// It's short text, return as-is
|
|
700
|
-
return rawData;
|
|
725
|
+
// Plain text: always compress via TextRank.
|
|
726
|
+
return await this.optimizeText(rawData, ratio);
|
|
701
727
|
}
|
|
702
728
|
} else if (inputType === 'object' && rawData !== null) {
|
|
703
729
|
// Check if it's an ArrayBuffer or TypedArray (image data)
|
|
704
730
|
if (rawData instanceof ArrayBuffer ||
|
|
705
731
|
rawData instanceof Uint8Array ||
|
|
706
732
|
(rawData.constructor && rawData.constructor.name === 'Buffer')) {
|
|
707
|
-
return await this._imageOptimizer(rawData,
|
|
733
|
+
return await this._imageOptimizer(rawData, resolvedIntent);
|
|
708
734
|
}
|
|
709
735
|
// It's a plain JS object
|
|
710
736
|
inputObject = rawData;
|
|
@@ -745,12 +771,13 @@ export class BrevitClient {
|
|
|
745
771
|
* @private
|
|
746
772
|
*/
|
|
747
773
|
async _defaultTextOptimizer(longText, intent) {
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
774
|
+
if (this._config.textMode === TextOptimizationMode.None) {
|
|
775
|
+
return String(longText ?? '');
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
// Built-in deterministic extractive compression (TextRank).
|
|
779
|
+
// If callers want LLM summarization, they can pass a custom textOptimizer.
|
|
780
|
+
return await this.compressText(String(longText ?? ''));
|
|
754
781
|
}
|
|
755
782
|
|
|
756
783
|
/**
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import nlp from 'compromise';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Deterministic extractive semantic compressor using a TextRank-style graph over sentences.
|
|
5
|
+
*/
|
|
6
|
+
export class SemanticCompressor {
|
|
7
|
+
constructor(options = {}) {
|
|
8
|
+
const {
|
|
9
|
+
stopWords,
|
|
10
|
+
damping = 0.85,
|
|
11
|
+
iterations = 20,
|
|
12
|
+
autoThresholdMultiplier = 0.9,
|
|
13
|
+
} = options;
|
|
14
|
+
|
|
15
|
+
this._stopWords = new Set(
|
|
16
|
+
stopWords || [
|
|
17
|
+
'the', 'is', 'in', 'at', 'of', 'on', 'and', 'a', 'to', 'it', 'for',
|
|
18
|
+
'with', 'as', 'by', 'this', 'that', 'are', 'was', 'be', 'or', 'an',
|
|
19
|
+
'if', 'not', 'but', 'from', 'they', 'we', 'he', 'she', 'which',
|
|
20
|
+
],
|
|
21
|
+
);
|
|
22
|
+
|
|
23
|
+
this._damping = damping;
|
|
24
|
+
this._iterations = iterations;
|
|
25
|
+
this._autoThresholdMultiplier = autoThresholdMultiplier;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* AUTO MODE: Keep sentences with above-average importance (threshold = mean * multiplier).
|
|
30
|
+
*/
|
|
31
|
+
compress(text) {
|
|
32
|
+
return this._runTextRank(text, 'auto');
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* MANUAL MODE: Keep top-ranked sentences by ratio.
|
|
37
|
+
* If ratio <= 0, behaves like `compress`.
|
|
38
|
+
*/
|
|
39
|
+
optimize(text, ratio = 0.0) {
|
|
40
|
+
return this._runTextRank(text, 'ratio', ratio);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
_runTextRank(text, mode, ratioValue = 0.0) {
|
|
44
|
+
if (text == null) return '';
|
|
45
|
+
const str = String(text);
|
|
46
|
+
|
|
47
|
+
const rawSentences = this._splitSentences(str);
|
|
48
|
+
if (rawSentences.length === 0) return str;
|
|
49
|
+
|
|
50
|
+
// 1) Extract features
|
|
51
|
+
const nodes = rawSentences.map((sent, index) => {
|
|
52
|
+
const terms = nlp(sent)
|
|
53
|
+
.nouns()
|
|
54
|
+
.out('array')
|
|
55
|
+
.map((t) => String(t).toLowerCase().trim())
|
|
56
|
+
.filter((t) => t.length > 2 && !this._stopWords.has(t));
|
|
57
|
+
|
|
58
|
+
return {
|
|
59
|
+
id: index,
|
|
60
|
+
text: sent,
|
|
61
|
+
terms: new Set(terms),
|
|
62
|
+
score: 1.0,
|
|
63
|
+
};
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
// 2) Build graph (adjacency list) using similarity > 0 as an edge.
|
|
67
|
+
const edges = Array.from({ length: nodes.length }, () => []);
|
|
68
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
69
|
+
for (let j = i + 1; j < nodes.length; j++) {
|
|
70
|
+
const sim = this._calculateSimilarity(nodes[i].terms, nodes[j].terms);
|
|
71
|
+
if (sim > 0) {
|
|
72
|
+
edges[i].push(j);
|
|
73
|
+
edges[j].push(i);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// 3) Iterate (PageRank-style)
|
|
79
|
+
const base = 1 - this._damping;
|
|
80
|
+
for (let iter = 0; iter < this._iterations; iter++) {
|
|
81
|
+
const newScores = nodes.map((n) => n.score);
|
|
82
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
83
|
+
let sum = 0;
|
|
84
|
+
for (const neighborIdx of edges[i]) {
|
|
85
|
+
sum += nodes[neighborIdx].score / (edges[neighborIdx].length || 1);
|
|
86
|
+
}
|
|
87
|
+
newScores[i] = base + this._damping * sum;
|
|
88
|
+
}
|
|
89
|
+
nodes.forEach((n, i) => {
|
|
90
|
+
n.score = newScores[i];
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// 4) Selection strategy
|
|
95
|
+
const keptIndices = new Set();
|
|
96
|
+
|
|
97
|
+
if (mode === 'auto' || ratioValue <= 0) {
|
|
98
|
+
const totalScore = nodes.reduce((sum, n) => sum + n.score, 0);
|
|
99
|
+
const avgScore = totalScore / (nodes.length || 1);
|
|
100
|
+
const threshold = avgScore * this._autoThresholdMultiplier;
|
|
101
|
+
|
|
102
|
+
nodes.forEach((n) => {
|
|
103
|
+
if (n.score >= threshold) keptIndices.add(n.id);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
if (keptIndices.size === 0 && nodes.length > 0) {
|
|
107
|
+
const topNode = nodes.reduce((prev, current) =>
|
|
108
|
+
prev.score > current.score ? prev : current,
|
|
109
|
+
);
|
|
110
|
+
keptIndices.add(topNode.id);
|
|
111
|
+
}
|
|
112
|
+
} else {
|
|
113
|
+
if (ratioValue >= 1) {
|
|
114
|
+
nodes.forEach((n) => keptIndices.add(n.id));
|
|
115
|
+
} else {
|
|
116
|
+
const sorted = [...nodes].sort((a, b) => b.score - a.score);
|
|
117
|
+
const count = Math.max(1, Math.floor(nodes.length * ratioValue));
|
|
118
|
+
sorted.slice(0, count).forEach((n) => keptIndices.add(n.id));
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// 5) Reconstruct in original order
|
|
123
|
+
return nodes
|
|
124
|
+
.filter((n) => keptIndices.has(n.id))
|
|
125
|
+
.map((n) => n.text)
|
|
126
|
+
.join(' ');
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
_splitSentences(text) {
|
|
130
|
+
try {
|
|
131
|
+
const doc = nlp(text);
|
|
132
|
+
const arr = doc.sentences().out('array');
|
|
133
|
+
if (Array.isArray(arr) && arr.length > 0) {
|
|
134
|
+
return arr.map((s) => String(s).trim()).filter(Boolean);
|
|
135
|
+
}
|
|
136
|
+
} catch {
|
|
137
|
+
// fall back
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Conservative fallback split
|
|
141
|
+
return String(text)
|
|
142
|
+
.split(/(?<=[.!?])\s+/)
|
|
143
|
+
.map((s) => s.trim())
|
|
144
|
+
.filter(Boolean);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
_calculateSimilarity(setA, setB) {
|
|
148
|
+
if (!setA || !setB || setA.size === 0 || setB.size === 0) return 0;
|
|
149
|
+
let intersection = 0;
|
|
150
|
+
for (const elem of setA) if (setB.has(elem)) intersection++;
|
|
151
|
+
if (intersection === 0) return 0;
|
|
152
|
+
const denom = Math.log(setA.size) + Math.log(setB.size);
|
|
153
|
+
return intersection / (denom || 1);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
|
package/test/test.js
CHANGED
|
@@ -6,9 +6,9 @@ async function runTests() {
|
|
|
6
6
|
let passed = 0;
|
|
7
7
|
let failed = 0;
|
|
8
8
|
|
|
9
|
-
function test(name, fn) {
|
|
9
|
+
async function test(name, fn) {
|
|
10
10
|
try {
|
|
11
|
-
fn();
|
|
11
|
+
await fn();
|
|
12
12
|
console.log(`✓ ${name}`);
|
|
13
13
|
passed++;
|
|
14
14
|
} catch (error) {
|
|
@@ -18,7 +18,7 @@ async function runTests() {
|
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
// Test 1: Flatten JSON object
|
|
21
|
-
test('Flatten JSON object', async () => {
|
|
21
|
+
await test('Flatten JSON object', async () => {
|
|
22
22
|
const config = new BrevitConfig({ jsonMode: JsonOptimizationMode.Flatten });
|
|
23
23
|
const brevit = new BrevitClient(config);
|
|
24
24
|
|
|
@@ -30,26 +30,26 @@ async function runTests() {
|
|
|
30
30
|
};
|
|
31
31
|
|
|
32
32
|
const result = await brevit.optimize(testObject);
|
|
33
|
-
if (!result.includes('user.name:
|
|
33
|
+
if (!result.includes('user.name:Javian') || !result.includes('user.email:support@javianpicardo.com')) {
|
|
34
34
|
throw new Error('Flattened output does not contain expected values');
|
|
35
35
|
}
|
|
36
36
|
});
|
|
37
37
|
|
|
38
38
|
// Test 2: Flatten JSON string
|
|
39
|
-
test('Flatten JSON string', async () => {
|
|
39
|
+
await test('Flatten JSON string', async () => {
|
|
40
40
|
const config = new BrevitConfig({ jsonMode: JsonOptimizationMode.Flatten });
|
|
41
41
|
const brevit = new BrevitClient(config);
|
|
42
42
|
|
|
43
43
|
const jsonString = '{"order": {"orderId": "o-456", "status": "SHIPPED"}}';
|
|
44
44
|
const result = await brevit.optimize(jsonString);
|
|
45
45
|
|
|
46
|
-
if (!result.includes('order.orderId:
|
|
46
|
+
if (!result.includes('order.orderId:o-456') || !result.includes('order.status:SHIPPED')) {
|
|
47
47
|
throw new Error('Flattened output does not contain expected values');
|
|
48
48
|
}
|
|
49
49
|
});
|
|
50
50
|
|
|
51
51
|
// Test 3: Short text returns as-is
|
|
52
|
-
test('Short text returns as-is', async () => {
|
|
52
|
+
await test('Short text returns as-is', async () => {
|
|
53
53
|
const config = new BrevitConfig({ longTextThreshold: 500 });
|
|
54
54
|
const brevit = new BrevitClient(config);
|
|
55
55
|
|
|
@@ -61,8 +61,59 @@ async function runTests() {
|
|
|
61
61
|
}
|
|
62
62
|
});
|
|
63
63
|
|
|
64
|
-
// Test 4:
|
|
65
|
-
test('
|
|
64
|
+
// Test 4: compressText always attempts compression
|
|
65
|
+
await test('compressText returns a string and is deterministic', async () => {
|
|
66
|
+
const brevit = new BrevitClient();
|
|
67
|
+
const text = 'Alpha sentence about cats. Beta sentence about cats. Gamma unrelated sentence.';
|
|
68
|
+
|
|
69
|
+
const r1 = await brevit.compressText(text);
|
|
70
|
+
const r2 = await brevit.compressText(text);
|
|
71
|
+
if (typeof r1 !== 'string' || r1.length === 0) {
|
|
72
|
+
throw new Error('compressText did not return a non-empty string');
|
|
73
|
+
}
|
|
74
|
+
if (r1 !== r2) {
|
|
75
|
+
throw new Error('compressText should be deterministic');
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
// Test 5: optimizeText ratio=0 behaves like compressText
|
|
80
|
+
await test('optimizeText ratio=0 behaves like compressText', async () => {
|
|
81
|
+
const brevit = new BrevitClient();
|
|
82
|
+
const text = 'One sentence about hiking. Another sentence about hiking. A third sentence about coffee.';
|
|
83
|
+
|
|
84
|
+
const auto = await brevit.compressText(text);
|
|
85
|
+
const zero = await brevit.optimizeText(text, 0.0);
|
|
86
|
+
if (auto !== zero) {
|
|
87
|
+
throw new Error('optimizeText(text, 0.0) should equal compressText(text)');
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// Test 6: optimize(text) defaults to auto compression
|
|
92
|
+
await test('optimize(text) defaults to auto compression', async () => {
|
|
93
|
+
const brevit = new BrevitClient();
|
|
94
|
+
const text = 'One sentence about hiking. Another sentence about hiking. A third sentence about coffee.';
|
|
95
|
+
|
|
96
|
+
const auto = await brevit.compressText(text);
|
|
97
|
+
const result = await brevit.optimize(text);
|
|
98
|
+
if (result !== auto) {
|
|
99
|
+
throw new Error('optimize(text) should behave like compressText(text)');
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// Test 7: optimize(text, ratio) routes to optimizeText
|
|
104
|
+
await test('optimize(text, ratio) routes to optimizeText', async () => {
|
|
105
|
+
const brevit = new BrevitClient();
|
|
106
|
+
const text = 'Alpha cats. Beta cats. Gamma coffee. Delta cats.';
|
|
107
|
+
|
|
108
|
+
const direct = await brevit.optimizeText(text, 0.6);
|
|
109
|
+
const routed = await brevit.optimize(text, 0.6);
|
|
110
|
+
if (direct !== routed) {
|
|
111
|
+
throw new Error('optimize(text, ratio) should equal optimizeText(text, ratio)');
|
|
112
|
+
}
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
// Test 8: Array handling
|
|
116
|
+
await test('Array handling', async () => {
|
|
66
117
|
const config = new BrevitConfig({ jsonMode: JsonOptimizationMode.Flatten });
|
|
67
118
|
const brevit = new BrevitClient(config);
|
|
68
119
|
|
|
@@ -74,8 +125,12 @@ async function runTests() {
|
|
|
74
125
|
};
|
|
75
126
|
|
|
76
127
|
const result = await brevit.optimize(testObject);
|
|
77
|
-
|
|
78
|
-
|
|
128
|
+
// Expect tabular optimization for uniform object arrays
|
|
129
|
+
if (!result.includes('items[2]{sku,name}:')) {
|
|
130
|
+
throw new Error('Tabular array header missing');
|
|
131
|
+
}
|
|
132
|
+
if (!result.includes('A-88,Brevit Pro') || !result.includes('T-22,Toon Handbook')) {
|
|
133
|
+
throw new Error('Tabular array rows missing');
|
|
79
134
|
}
|
|
80
135
|
});
|
|
81
136
|
|