brevit 0.1.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -9
- package/TYPESCRIPT.md +16 -9
- package/example.ts +5 -1
- package/package.json +5 -2
- package/src/brevit.d.ts +28 -1
- package/src/brevit.js +60 -33
- package/src/semanticCompressor.js +157 -0
- package/test/test.js +66 -11
package/README.md
CHANGED
|
@@ -72,7 +72,7 @@ const optimized = await brevit.brevity(data);
|
|
|
72
72
|
## Key Features
|
|
73
73
|
|
|
74
74
|
- **JSON Optimization**: Flatten nested JSON structures into token-efficient key-value pairs
|
|
75
|
-
- **Text Optimization**:
|
|
75
|
+
- **Text Optimization**: Deterministic TextRank-based compression for plain text (no LLM required)
|
|
76
76
|
- **Image Optimization**: Extract text from images via OCR
|
|
77
77
|
- **Lightweight**: Zero dependencies (optional YAML support)
|
|
78
78
|
- **Universal**: Works in Node.js, browsers, and modern JavaScript environments
|
|
@@ -298,16 +298,23 @@ The text goes on for many lines...
|
|
|
298
298
|
[Repeated content many times]
|
|
299
299
|
`.repeat(50);
|
|
300
300
|
|
|
301
|
-
// Automatic detection:
|
|
301
|
+
// Automatic detection: plain text is compressed by default
|
|
302
302
|
const optimized = await brevit.brevity(longText);
|
|
303
303
|
|
|
304
|
-
// Explicit text
|
|
304
|
+
// Explicit text compression via the main pipeline (ratio optional; defaults to 0.0 = auto)
|
|
305
305
|
const config = new BrevitConfig({
|
|
306
306
|
textMode: TextOptimizationMode.Clean,
|
|
307
|
-
longTextThreshold: 500 //
|
|
307
|
+
longTextThreshold: 500 // (JSON heuristics only; plain text is compressed regardless)
|
|
308
308
|
});
|
|
309
309
|
const brevitWithText = new BrevitClient(config);
|
|
310
|
-
const
|
|
310
|
+
const cleanedAuto = await brevitWithText.optimize(longText); // auto
|
|
311
|
+
const cleaned60 = await brevitWithText.optimize(longText, 0.6); // ratio
|
|
312
|
+
const cleanedIntent = await brevitWithText.optimize(longText, 0.6, "keep key details"); // ratio + intent (3rd arg)
|
|
313
|
+
|
|
314
|
+
// Explicit TextRank compression APIs (recommended when you want direct control)
|
|
315
|
+
const compressedAuto = await brevit.compressText(longText); // AUTO mode
|
|
316
|
+
const compressed60 = await brevit.optimizeText(longText, 0.6); // Keep ~60% of sentences
|
|
317
|
+
const compressedDefault = await brevit.optimizeText(longText, 0.0); // Same as compressText()
|
|
311
318
|
```
|
|
312
319
|
|
|
313
320
|
#### Example 2.2: Reading Text from File (Node.js)
|
|
@@ -329,19 +336,19 @@ const optimized = await brevit.brevity(textContent);
|
|
|
329
336
|
const cleanConfig = new BrevitConfig({
|
|
330
337
|
textMode: TextOptimizationMode.Clean
|
|
331
338
|
});
|
|
332
|
-
//
|
|
339
|
+
// Built-in deterministic TextRank extractive compression (no LLM required)
|
|
333
340
|
|
|
334
341
|
// Summarize Fast
|
|
335
342
|
const fastConfig = new BrevitConfig({
|
|
336
343
|
textMode: TextOptimizationMode.SummarizeFast
|
|
337
344
|
});
|
|
338
|
-
//
|
|
345
|
+
// Reserved for custom LLM summarization (or use built-in TextRank via compressText/optimizeText)
|
|
339
346
|
|
|
340
347
|
// Summarize High Quality
|
|
341
348
|
const qualityConfig = new BrevitConfig({
|
|
342
349
|
textMode: TextOptimizationMode.SummarizeHighQuality
|
|
343
350
|
});
|
|
344
|
-
//
|
|
351
|
+
// Reserved for custom LLM summarization (or use built-in TextRank via compressText/optimizeText)
|
|
345
352
|
```
|
|
346
353
|
|
|
347
354
|
### 3. Image Optimization Examples
|
|
@@ -854,7 +861,7 @@ const optimized = await brevit.optimize(json);
|
|
|
854
861
|
```javascript
|
|
855
862
|
const longDocument = '...very long text...';
|
|
856
863
|
const optimized = await brevit.optimize(longDocument);
|
|
857
|
-
//
|
|
864
|
+
// Plain text is compressed by default; use optimize(longDocument, 0.6) for ratio compression
|
|
858
865
|
```
|
|
859
866
|
|
|
860
867
|
### Example 4: Process Image (ArrayBuffer)
|
package/TYPESCRIPT.md
CHANGED
|
@@ -7,7 +7,7 @@ Brevit.js includes comprehensive TypeScript definitions for full type safety and
|
|
|
7
7
|
No additional installation required! TypeScript definitions are included in the package.
|
|
8
8
|
|
|
9
9
|
```bash
|
|
10
|
-
npm install brevit
|
|
10
|
+
npm install brevit
|
|
11
11
|
```
|
|
12
12
|
|
|
13
13
|
## Basic Usage
|
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
BrevitClient,
|
|
18
18
|
BrevitConfig,
|
|
19
19
|
JsonOptimizationMode,
|
|
20
|
-
} from 'brevit
|
|
20
|
+
} from 'brevit';
|
|
21
21
|
|
|
22
22
|
const config = new BrevitConfig({
|
|
23
23
|
jsonMode: JsonOptimizationMode.Flatten,
|
|
@@ -38,7 +38,7 @@ import {
|
|
|
38
38
|
JsonOptimizationMode,
|
|
39
39
|
TextOptimizationMode,
|
|
40
40
|
ImageOptimizationMode,
|
|
41
|
-
} from 'brevit
|
|
41
|
+
} from 'brevit';
|
|
42
42
|
|
|
43
43
|
// Usage
|
|
44
44
|
const mode: typeof JsonOptimizationMode.Flatten = JsonOptimizationMode.Flatten;
|
|
@@ -51,7 +51,7 @@ import type {
|
|
|
51
51
|
JsonOptimizationModeType,
|
|
52
52
|
TextOptimizationModeType,
|
|
53
53
|
ImageOptimizationModeType,
|
|
54
|
-
} from 'brevit
|
|
54
|
+
} from 'brevit';
|
|
55
55
|
|
|
56
56
|
function setMode(mode: JsonOptimizationModeType) {
|
|
57
57
|
// Type-safe mode setting
|
|
@@ -66,7 +66,7 @@ import type {
|
|
|
66
66
|
BrevitClientOptions,
|
|
67
67
|
TextOptimizerFunction,
|
|
68
68
|
ImageOptimizerFunction,
|
|
69
|
-
} from 'brevit
|
|
69
|
+
} from 'brevit';
|
|
70
70
|
|
|
71
71
|
// Configuration options
|
|
72
72
|
const config: BrevitConfigOptions = {
|
|
@@ -91,14 +91,14 @@ import {
|
|
|
91
91
|
BrevitConfig,
|
|
92
92
|
JsonOptimizationMode,
|
|
93
93
|
type BrevitConfigOptions,
|
|
94
|
-
} from 'brevit
|
|
94
|
+
} from 'brevit';
|
|
95
95
|
|
|
96
96
|
const configOptions: BrevitConfigOptions = {
|
|
97
97
|
jsonMode: JsonOptimizationMode.Flatten,
|
|
98
98
|
textMode: 'Clean',
|
|
99
99
|
imageMode: 'Ocr',
|
|
100
100
|
jsonPathsToKeep: ['user.name', 'order.orderId'],
|
|
101
|
-
longTextThreshold: 1000,
|
|
101
|
+
longTextThreshold: 1000, // (plain text is compressed regardless; this is mostly for JSON heuristics)
|
|
102
102
|
};
|
|
103
103
|
|
|
104
104
|
const config = new BrevitConfig(configOptions);
|
|
@@ -113,7 +113,7 @@ import {
|
|
|
113
113
|
BrevitConfig,
|
|
114
114
|
type TextOptimizerFunction,
|
|
115
115
|
type ImageOptimizerFunction,
|
|
116
|
-
} from 'brevit
|
|
116
|
+
} from 'brevit';
|
|
117
117
|
|
|
118
118
|
const customTextOptimizer: TextOptimizerFunction = async (longText, intent) => {
|
|
119
119
|
const response = await fetch('/api/summarize', {
|
|
@@ -140,6 +140,13 @@ const client = new BrevitClient(new BrevitConfig(), {
|
|
|
140
140
|
textOptimizer: customTextOptimizer,
|
|
141
141
|
imageOptimizer: customImageOptimizer,
|
|
142
142
|
});
|
|
143
|
+
|
|
144
|
+
// Text compression (TextRank) is built-in:
|
|
145
|
+
const text = 'Alpha sentence. Beta sentence. Gamma sentence.';
|
|
146
|
+
const compressedAuto = await client.brevity(text); // auto compression
|
|
147
|
+
const compressedAuto2 = await client.optimize(text); // auto compression (ratio defaults to 0.0)
|
|
148
|
+
const compressed60 = await client.optimize(text, 0.6); // ratio compression
|
|
149
|
+
const compressed60WithIntent = await client.optimize(text, 0.6, 'keep key details'); // ratio + intent
|
|
143
150
|
```
|
|
144
151
|
|
|
145
152
|
### Example 3: Type-Safe Data Structures
|
|
@@ -184,7 +191,7 @@ const optimizedOrder = await client.optimize(order);
|
|
|
184
191
|
### Example 4: Generic Helper Function
|
|
185
192
|
|
|
186
193
|
```typescript
|
|
187
|
-
import { BrevitClient, BrevitConfig } from 'brevit
|
|
194
|
+
import { BrevitClient, BrevitConfig } from 'brevit';
|
|
188
195
|
|
|
189
196
|
async function optimizeData<T>(data: T): Promise<string> {
|
|
190
197
|
const client = new BrevitClient();
|
package/example.ts
CHANGED
|
@@ -37,7 +37,7 @@ async function example2() {
|
|
|
37
37
|
jsonMode: JsonOptimizationMode.Flatten,
|
|
38
38
|
textMode: TextOptimizationMode.Clean,
|
|
39
39
|
imageMode: ImageOptimizationMode.Ocr,
|
|
40
|
-
longTextThreshold: 1000,
|
|
40
|
+
longTextThreshold: 1000, // (plain text is compressed regardless; this is mostly for JSON heuristics)
|
|
41
41
|
};
|
|
42
42
|
|
|
43
43
|
const client = new BrevitClient(new BrevitConfig(config));
|
|
@@ -70,7 +70,11 @@ async function example3() {
|
|
|
70
70
|
});
|
|
71
71
|
|
|
72
72
|
const longText = '...very long text...';
|
|
73
|
+
// For text, optimize() defaults to deterministic TextRank compression unless you provide a custom text optimizer.
|
|
74
|
+
// Ratio compression is supported via optimize(longText, ratio, intent?).
|
|
73
75
|
const optimized = await client.optimize(longText);
|
|
76
|
+
const optimized60 = await client.optimize(longText, 0.6);
|
|
77
|
+
const optimized60WithIntent = await client.optimize(longText, 0.6, 'keep key details');
|
|
74
78
|
console.log(optimized);
|
|
75
79
|
}
|
|
76
80
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "brevit",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "A high-performance JavaScript library for
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "A high-performance JavaScript library for optimizing LLM prompt inputs: token-efficient JSON flattening + deterministic TextRank-based text compression.",
|
|
5
5
|
"main": "src/brevit.js",
|
|
6
6
|
"types": "src/brevit.d.ts",
|
|
7
7
|
"type": "module",
|
|
@@ -25,6 +25,9 @@
|
|
|
25
25
|
"type": "git",
|
|
26
26
|
"url": "https://github.com/JavianDev/Brevit.js.git"
|
|
27
27
|
},
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"compromise": "^14.14.4"
|
|
30
|
+
},
|
|
28
31
|
"optionalDependencies": {
|
|
29
32
|
"js-yaml": "^4.1.0"
|
|
30
33
|
}
|
package/src/brevit.d.ts
CHANGED
|
@@ -221,7 +221,34 @@ export class BrevitClient {
|
|
|
221
221
|
* // Returns OCR text or metadata
|
|
222
222
|
* ```
|
|
223
223
|
*/
|
|
224
|
-
|
|
224
|
+
/**
|
|
225
|
+
* Optimizes any supported input type.
|
|
226
|
+
* For plain text inputs, this performs deterministic TextRank compression by default.
|
|
227
|
+
*
|
|
228
|
+
* - optimize(text) => auto compression (ratio defaults to 0.0)
|
|
229
|
+
* - optimize(text, 0.6) => ratio compression
|
|
230
|
+
* - optimize(text, 0.6, intent) => ratio compression with intent hint
|
|
231
|
+
* - optimize(obj, intent) => JSON/object pipeline with intent hint
|
|
232
|
+
*/
|
|
233
|
+
optimize(rawData: unknown, ratioOrIntent?: number | string | null, intent?: string | null): Promise<string>;
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Intelligently optimizes data by automatically selecting the best strategy.
|
|
237
|
+
* For plain text inputs, this performs deterministic TextRank compression by default.
|
|
238
|
+
*/
|
|
239
|
+
brevity(rawData: unknown, intent?: string | null): Promise<string>;
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Explicit text compression (AUTO mode).
|
|
243
|
+
* Always attempts to compress, even for short / single-sentence inputs.
|
|
244
|
+
*/
|
|
245
|
+
compressText(text: string): Promise<string>;
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Explicit text compression (RATIO mode).
|
|
249
|
+
* If ratio <= 0, behaves like compressText().
|
|
250
|
+
*/
|
|
251
|
+
optimizeText(text: string, ratio?: number): Promise<string>;
|
|
225
252
|
}
|
|
226
253
|
|
|
227
254
|
// Re-export types for convenience
|
package/src/brevit.js
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*
|
|
8
8
|
* Project: Brevit
|
|
9
9
|
* Author: Javian
|
|
10
|
-
* Version:
|
|
10
|
+
* Version: 1.0.0
|
|
11
11
|
* =================================================================================
|
|
12
12
|
*/
|
|
13
13
|
|
|
@@ -80,6 +80,36 @@ export class BrevitClient {
|
|
|
80
80
|
this._config = config;
|
|
81
81
|
this._textOptimizer = options.textOptimizer || this._defaultTextOptimizer.bind(this);
|
|
82
82
|
this._imageOptimizer = options.imageOptimizer || this._defaultImageOptimizer.bind(this);
|
|
83
|
+
this._semanticCompressor = null;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Explicit text compression (AUTO mode).
|
|
88
|
+
* Always attempts to compress, even for short / single-sentence inputs.
|
|
89
|
+
* @param {string} text
|
|
90
|
+
* @returns {Promise<string>}
|
|
91
|
+
*/
|
|
92
|
+
async compressText(text) {
|
|
93
|
+
if (!this._semanticCompressor) {
|
|
94
|
+
const { SemanticCompressor } = await import('./semanticCompressor.js');
|
|
95
|
+
this._semanticCompressor = new SemanticCompressor();
|
|
96
|
+
}
|
|
97
|
+
return this._semanticCompressor.compress(String(text ?? ''));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Explicit text compression (RATIO mode).
|
|
102
|
+
* If ratio <= 0, behaves like compressText().
|
|
103
|
+
* @param {string} text
|
|
104
|
+
* @param {number} ratio
|
|
105
|
+
* @returns {Promise<string>}
|
|
106
|
+
*/
|
|
107
|
+
async optimizeText(text, ratio = 0.0) {
|
|
108
|
+
if (!this._semanticCompressor) {
|
|
109
|
+
const { SemanticCompressor } = await import('./semanticCompressor.js');
|
|
110
|
+
this._semanticCompressor = new SemanticCompressor();
|
|
111
|
+
}
|
|
112
|
+
return this._semanticCompressor.optimize(String(text ?? ''), Number(ratio ?? 0));
|
|
83
113
|
}
|
|
84
114
|
|
|
85
115
|
/**
|
|
@@ -592,24 +622,12 @@ export class BrevitClient {
|
|
|
592
622
|
(trimmed.startsWith('[') && trimmed.endsWith(']'))) {
|
|
593
623
|
inputObject = JSON.parse(rawData);
|
|
594
624
|
} else {
|
|
595
|
-
//
|
|
596
|
-
|
|
597
|
-
const strategy = this._selectOptimalStrategy(analysis);
|
|
598
|
-
|
|
599
|
-
if (strategy.name === 'TextOptimization') {
|
|
600
|
-
return await this._textOptimizer(rawData, intent);
|
|
601
|
-
}
|
|
602
|
-
return rawData;
|
|
625
|
+
// Plain text: always compress via TextRank (auto).
|
|
626
|
+
return await this.compressText(rawData);
|
|
603
627
|
}
|
|
604
628
|
} catch (e) {
|
|
605
|
-
// Not JSON - treat as text
|
|
606
|
-
|
|
607
|
-
const strategy = this._selectOptimalStrategy(analysis);
|
|
608
|
-
|
|
609
|
-
if (strategy.name === 'TextOptimization') {
|
|
610
|
-
return await this._textOptimizer(rawData, intent);
|
|
611
|
-
}
|
|
612
|
-
return rawData;
|
|
629
|
+
// Not valid JSON - treat as plain text and compress.
|
|
630
|
+
return await this.compressText(rawData);
|
|
613
631
|
}
|
|
614
632
|
} else if (inputType === 'object' && rawData !== null) {
|
|
615
633
|
// Check if it's image data
|
|
@@ -671,12 +689,25 @@ export class BrevitClient {
|
|
|
671
689
|
* or text into a token-efficient string.
|
|
672
690
|
*
|
|
673
691
|
* @param {any} rawData - The data to optimize (object, JSON string, text, ArrayBuffer).
|
|
674
|
-
* @param {string} [
|
|
692
|
+
* @param {number|string|null} [ratioOrIntent] - If number: sentence ratio for TextRank compression (0..1). If string: intent.
|
|
693
|
+
* @param {string|null} [intent] - (Optional) A hint about the user's goal (use this as 3rd arg when passing ratio).
|
|
675
694
|
* @returns {Promise<string>} A promise that resolves to the optimized string.
|
|
676
695
|
*/
|
|
677
|
-
async optimize(rawData, intent = null) {
|
|
696
|
+
async optimize(rawData, ratioOrIntent = null, intent = null) {
|
|
678
697
|
let inputObject = null;
|
|
679
698
|
let inputType = typeof rawData;
|
|
699
|
+
let ratio = 0.0;
|
|
700
|
+
let resolvedIntent = intent;
|
|
701
|
+
|
|
702
|
+
// Backwards-compatible argument parsing:
|
|
703
|
+
// - optimize(text, 0.6, intent?) => ratio-based text compression
|
|
704
|
+
// - optimize(text, intent?) => auto text compression (ratio defaults to 0.0)
|
|
705
|
+
// - optimize(obj, intent?) => JSON/object pipeline
|
|
706
|
+
if (typeof ratioOrIntent === 'number' && Number.isFinite(ratioOrIntent)) {
|
|
707
|
+
ratio = ratioOrIntent;
|
|
708
|
+
} else if (resolvedIntent == null && typeof ratioOrIntent === 'string') {
|
|
709
|
+
resolvedIntent = ratioOrIntent;
|
|
710
|
+
}
|
|
680
711
|
|
|
681
712
|
if (inputType === 'string') {
|
|
682
713
|
// Could be JSON string or just text
|
|
@@ -691,20 +722,15 @@ export class BrevitClient {
|
|
|
691
722
|
}
|
|
692
723
|
|
|
693
724
|
if (!inputObject) {
|
|
694
|
-
//
|
|
695
|
-
|
|
696
|
-
// It's long text, apply text optimization
|
|
697
|
-
return await this._textOptimizer(rawData, intent);
|
|
698
|
-
}
|
|
699
|
-
// It's short text, return as-is
|
|
700
|
-
return rawData;
|
|
725
|
+
// Plain text: always compress via TextRank.
|
|
726
|
+
return await this.optimizeText(rawData, ratio);
|
|
701
727
|
}
|
|
702
728
|
} else if (inputType === 'object' && rawData !== null) {
|
|
703
729
|
// Check if it's an ArrayBuffer or TypedArray (image data)
|
|
704
730
|
if (rawData instanceof ArrayBuffer ||
|
|
705
731
|
rawData instanceof Uint8Array ||
|
|
706
732
|
(rawData.constructor && rawData.constructor.name === 'Buffer')) {
|
|
707
|
-
return await this._imageOptimizer(rawData,
|
|
733
|
+
return await this._imageOptimizer(rawData, resolvedIntent);
|
|
708
734
|
}
|
|
709
735
|
// It's a plain JS object
|
|
710
736
|
inputObject = rawData;
|
|
@@ -745,12 +771,13 @@ export class BrevitClient {
|
|
|
745
771
|
* @private
|
|
746
772
|
*/
|
|
747
773
|
async _defaultTextOptimizer(longText, intent) {
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
774
|
+
if (this._config.textMode === TextOptimizationMode.None) {
|
|
775
|
+
return String(longText ?? '');
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
// Built-in deterministic extractive compression (TextRank).
|
|
779
|
+
// If callers want LLM summarization, they can pass a custom textOptimizer.
|
|
780
|
+
return await this.compressText(String(longText ?? ''));
|
|
754
781
|
}
|
|
755
782
|
|
|
756
783
|
/**
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import nlp from 'compromise';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Deterministic extractive semantic compressor using a TextRank-style graph over sentences.
|
|
5
|
+
*/
|
|
6
|
+
export class SemanticCompressor {
|
|
7
|
+
constructor(options = {}) {
|
|
8
|
+
const {
|
|
9
|
+
stopWords,
|
|
10
|
+
damping = 0.85,
|
|
11
|
+
iterations = 20,
|
|
12
|
+
autoThresholdMultiplier = 0.9,
|
|
13
|
+
} = options;
|
|
14
|
+
|
|
15
|
+
this._stopWords = new Set(
|
|
16
|
+
stopWords || [
|
|
17
|
+
'the', 'is', 'in', 'at', 'of', 'on', 'and', 'a', 'to', 'it', 'for',
|
|
18
|
+
'with', 'as', 'by', 'this', 'that', 'are', 'was', 'be', 'or', 'an',
|
|
19
|
+
'if', 'not', 'but', 'from', 'they', 'we', 'he', 'she', 'which',
|
|
20
|
+
],
|
|
21
|
+
);
|
|
22
|
+
|
|
23
|
+
this._damping = damping;
|
|
24
|
+
this._iterations = iterations;
|
|
25
|
+
this._autoThresholdMultiplier = autoThresholdMultiplier;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* AUTO MODE: Keep sentences with above-average importance (threshold = mean * multiplier).
|
|
30
|
+
*/
|
|
31
|
+
compress(text) {
|
|
32
|
+
return this._runTextRank(text, 'auto');
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* MANUAL MODE: Keep top-ranked sentences by ratio.
|
|
37
|
+
* If ratio <= 0, behaves like `compress`.
|
|
38
|
+
*/
|
|
39
|
+
optimize(text, ratio = 0.0) {
|
|
40
|
+
return this._runTextRank(text, 'ratio', ratio);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
_runTextRank(text, mode, ratioValue = 0.0) {
|
|
44
|
+
if (text == null) return '';
|
|
45
|
+
const str = String(text);
|
|
46
|
+
|
|
47
|
+
const rawSentences = this._splitSentences(str);
|
|
48
|
+
if (rawSentences.length === 0) return str;
|
|
49
|
+
|
|
50
|
+
// 1) Extract features
|
|
51
|
+
const nodes = rawSentences.map((sent, index) => {
|
|
52
|
+
const terms = nlp(sent)
|
|
53
|
+
.nouns()
|
|
54
|
+
.out('array')
|
|
55
|
+
.map((t) => String(t).toLowerCase().trim())
|
|
56
|
+
.filter((t) => t.length > 2 && !this._stopWords.has(t));
|
|
57
|
+
|
|
58
|
+
return {
|
|
59
|
+
id: index,
|
|
60
|
+
text: sent,
|
|
61
|
+
terms: new Set(terms),
|
|
62
|
+
score: 1.0,
|
|
63
|
+
};
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
// 2) Build graph (adjacency list) using similarity > 0 as an edge.
|
|
67
|
+
const edges = Array.from({ length: nodes.length }, () => []);
|
|
68
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
69
|
+
for (let j = i + 1; j < nodes.length; j++) {
|
|
70
|
+
const sim = this._calculateSimilarity(nodes[i].terms, nodes[j].terms);
|
|
71
|
+
if (sim > 0) {
|
|
72
|
+
edges[i].push(j);
|
|
73
|
+
edges[j].push(i);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// 3) Iterate (PageRank-style)
|
|
79
|
+
const base = 1 - this._damping;
|
|
80
|
+
for (let iter = 0; iter < this._iterations; iter++) {
|
|
81
|
+
const newScores = nodes.map((n) => n.score);
|
|
82
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
83
|
+
let sum = 0;
|
|
84
|
+
for (const neighborIdx of edges[i]) {
|
|
85
|
+
sum += nodes[neighborIdx].score / (edges[neighborIdx].length || 1);
|
|
86
|
+
}
|
|
87
|
+
newScores[i] = base + this._damping * sum;
|
|
88
|
+
}
|
|
89
|
+
nodes.forEach((n, i) => {
|
|
90
|
+
n.score = newScores[i];
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// 4) Selection strategy
|
|
95
|
+
const keptIndices = new Set();
|
|
96
|
+
|
|
97
|
+
if (mode === 'auto' || ratioValue <= 0) {
|
|
98
|
+
const totalScore = nodes.reduce((sum, n) => sum + n.score, 0);
|
|
99
|
+
const avgScore = totalScore / (nodes.length || 1);
|
|
100
|
+
const threshold = avgScore * this._autoThresholdMultiplier;
|
|
101
|
+
|
|
102
|
+
nodes.forEach((n) => {
|
|
103
|
+
if (n.score >= threshold) keptIndices.add(n.id);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
if (keptIndices.size === 0 && nodes.length > 0) {
|
|
107
|
+
const topNode = nodes.reduce((prev, current) =>
|
|
108
|
+
prev.score > current.score ? prev : current,
|
|
109
|
+
);
|
|
110
|
+
keptIndices.add(topNode.id);
|
|
111
|
+
}
|
|
112
|
+
} else {
|
|
113
|
+
if (ratioValue >= 1) {
|
|
114
|
+
nodes.forEach((n) => keptIndices.add(n.id));
|
|
115
|
+
} else {
|
|
116
|
+
const sorted = [...nodes].sort((a, b) => b.score - a.score);
|
|
117
|
+
const count = Math.max(1, Math.floor(nodes.length * ratioValue));
|
|
118
|
+
sorted.slice(0, count).forEach((n) => keptIndices.add(n.id));
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// 5) Reconstruct in original order
|
|
123
|
+
return nodes
|
|
124
|
+
.filter((n) => keptIndices.has(n.id))
|
|
125
|
+
.map((n) => n.text)
|
|
126
|
+
.join(' ');
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
_splitSentences(text) {
|
|
130
|
+
try {
|
|
131
|
+
const doc = nlp(text);
|
|
132
|
+
const arr = doc.sentences().out('array');
|
|
133
|
+
if (Array.isArray(arr) && arr.length > 0) {
|
|
134
|
+
return arr.map((s) => String(s).trim()).filter(Boolean);
|
|
135
|
+
}
|
|
136
|
+
} catch {
|
|
137
|
+
// fall back
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Conservative fallback split
|
|
141
|
+
return String(text)
|
|
142
|
+
.split(/(?<=[.!?])\s+/)
|
|
143
|
+
.map((s) => s.trim())
|
|
144
|
+
.filter(Boolean);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
_calculateSimilarity(setA, setB) {
|
|
148
|
+
if (!setA || !setB || setA.size === 0 || setB.size === 0) return 0;
|
|
149
|
+
let intersection = 0;
|
|
150
|
+
for (const elem of setA) if (setB.has(elem)) intersection++;
|
|
151
|
+
if (intersection === 0) return 0;
|
|
152
|
+
const denom = Math.log(setA.size) + Math.log(setB.size);
|
|
153
|
+
return intersection / (denom || 1);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
|
package/test/test.js
CHANGED
|
@@ -6,9 +6,9 @@ async function runTests() {
|
|
|
6
6
|
let passed = 0;
|
|
7
7
|
let failed = 0;
|
|
8
8
|
|
|
9
|
-
function test(name, fn) {
|
|
9
|
+
async function test(name, fn) {
|
|
10
10
|
try {
|
|
11
|
-
fn();
|
|
11
|
+
await fn();
|
|
12
12
|
console.log(`✓ ${name}`);
|
|
13
13
|
passed++;
|
|
14
14
|
} catch (error) {
|
|
@@ -18,7 +18,7 @@ async function runTests() {
|
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
// Test 1: Flatten JSON object
|
|
21
|
-
test('Flatten JSON object', async () => {
|
|
21
|
+
await test('Flatten JSON object', async () => {
|
|
22
22
|
const config = new BrevitConfig({ jsonMode: JsonOptimizationMode.Flatten });
|
|
23
23
|
const brevit = new BrevitClient(config);
|
|
24
24
|
|
|
@@ -30,26 +30,26 @@ async function runTests() {
|
|
|
30
30
|
};
|
|
31
31
|
|
|
32
32
|
const result = await brevit.optimize(testObject);
|
|
33
|
-
if (!result.includes('user.name:
|
|
33
|
+
if (!result.includes('user.name:Javian') || !result.includes('user.email:support@javianpicardo.com')) {
|
|
34
34
|
throw new Error('Flattened output does not contain expected values');
|
|
35
35
|
}
|
|
36
36
|
});
|
|
37
37
|
|
|
38
38
|
// Test 2: Flatten JSON string
|
|
39
|
-
test('Flatten JSON string', async () => {
|
|
39
|
+
await test('Flatten JSON string', async () => {
|
|
40
40
|
const config = new BrevitConfig({ jsonMode: JsonOptimizationMode.Flatten });
|
|
41
41
|
const brevit = new BrevitClient(config);
|
|
42
42
|
|
|
43
43
|
const jsonString = '{"order": {"orderId": "o-456", "status": "SHIPPED"}}';
|
|
44
44
|
const result = await brevit.optimize(jsonString);
|
|
45
45
|
|
|
46
|
-
if (!result.includes('order.orderId:
|
|
46
|
+
if (!result.includes('order.orderId:o-456') || !result.includes('order.status:SHIPPED')) {
|
|
47
47
|
throw new Error('Flattened output does not contain expected values');
|
|
48
48
|
}
|
|
49
49
|
});
|
|
50
50
|
|
|
51
51
|
// Test 3: Short text returns as-is
|
|
52
|
-
test('Short text returns as-is', async () => {
|
|
52
|
+
await test('Short text returns as-is', async () => {
|
|
53
53
|
const config = new BrevitConfig({ longTextThreshold: 500 });
|
|
54
54
|
const brevit = new BrevitClient(config);
|
|
55
55
|
|
|
@@ -61,8 +61,59 @@ async function runTests() {
|
|
|
61
61
|
}
|
|
62
62
|
});
|
|
63
63
|
|
|
64
|
-
// Test 4:
|
|
65
|
-
test('
|
|
64
|
+
// Test 4: compressText always attempts compression
|
|
65
|
+
await test('compressText returns a string and is deterministic', async () => {
|
|
66
|
+
const brevit = new BrevitClient();
|
|
67
|
+
const text = 'Alpha sentence about cats. Beta sentence about cats. Gamma unrelated sentence.';
|
|
68
|
+
|
|
69
|
+
const r1 = await brevit.compressText(text);
|
|
70
|
+
const r2 = await brevit.compressText(text);
|
|
71
|
+
if (typeof r1 !== 'string' || r1.length === 0) {
|
|
72
|
+
throw new Error('compressText did not return a non-empty string');
|
|
73
|
+
}
|
|
74
|
+
if (r1 !== r2) {
|
|
75
|
+
throw new Error('compressText should be deterministic');
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
// Test 5: optimizeText ratio=0 behaves like compressText
|
|
80
|
+
await test('optimizeText ratio=0 behaves like compressText', async () => {
|
|
81
|
+
const brevit = new BrevitClient();
|
|
82
|
+
const text = 'One sentence about hiking. Another sentence about hiking. A third sentence about coffee.';
|
|
83
|
+
|
|
84
|
+
const auto = await brevit.compressText(text);
|
|
85
|
+
const zero = await brevit.optimizeText(text, 0.0);
|
|
86
|
+
if (auto !== zero) {
|
|
87
|
+
throw new Error('optimizeText(text, 0.0) should equal compressText(text)');
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// Test 6: optimize(text) defaults to auto compression
|
|
92
|
+
await test('optimize(text) defaults to auto compression', async () => {
|
|
93
|
+
const brevit = new BrevitClient();
|
|
94
|
+
const text = 'One sentence about hiking. Another sentence about hiking. A third sentence about coffee.';
|
|
95
|
+
|
|
96
|
+
const auto = await brevit.compressText(text);
|
|
97
|
+
const result = await brevit.optimize(text);
|
|
98
|
+
if (result !== auto) {
|
|
99
|
+
throw new Error('optimize(text) should behave like compressText(text)');
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// Test 7: optimize(text, ratio) routes to optimizeText
|
|
104
|
+
await test('optimize(text, ratio) routes to optimizeText', async () => {
|
|
105
|
+
const brevit = new BrevitClient();
|
|
106
|
+
const text = 'Alpha cats. Beta cats. Gamma coffee. Delta cats.';
|
|
107
|
+
|
|
108
|
+
const direct = await brevit.optimizeText(text, 0.6);
|
|
109
|
+
const routed = await brevit.optimize(text, 0.6);
|
|
110
|
+
if (direct !== routed) {
|
|
111
|
+
throw new Error('optimize(text, ratio) should equal optimizeText(text, ratio)');
|
|
112
|
+
}
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
// Test 8: Array handling
|
|
116
|
+
await test('Array handling', async () => {
|
|
66
117
|
const config = new BrevitConfig({ jsonMode: JsonOptimizationMode.Flatten });
|
|
67
118
|
const brevit = new BrevitClient(config);
|
|
68
119
|
|
|
@@ -74,8 +125,12 @@ async function runTests() {
|
|
|
74
125
|
};
|
|
75
126
|
|
|
76
127
|
const result = await brevit.optimize(testObject);
|
|
77
|
-
|
|
78
|
-
|
|
128
|
+
// Expect tabular optimization for uniform object arrays
|
|
129
|
+
if (!result.includes('items[2]{sku,name}:')) {
|
|
130
|
+
throw new Error('Tabular array header missing');
|
|
131
|
+
}
|
|
132
|
+
if (!result.includes('A-88,Brevit Pro') || !result.includes('T-22,Toon Handbook')) {
|
|
133
|
+
throw new Error('Tabular array rows missing');
|
|
79
134
|
}
|
|
80
135
|
});
|
|
81
136
|
|