@small-ltsc/ml 0.1.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -30
- package/package.json +15 -5
package/README.md
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# @small-ltsc/ml
|
|
2
2
|
|
|
3
|
+
[](https://www.npmjs.com/package/@small-ltsc/ml)
|
|
4
|
+
[](../../LICENSE)
|
|
5
|
+
|
|
3
6
|
Optional ML features for **Small LTSC** - Pattern importance scoring, quality prediction, and adaptive region detection.
|
|
4
7
|
|
|
5
8
|
## Installation
|
|
@@ -8,13 +11,13 @@ Optional ML features for **Small LTSC** - Pattern importance scoring, quality pr
|
|
|
8
11
|
npm install @small-ltsc/ml @small-ltsc/sdk
|
|
9
12
|
```
|
|
10
13
|
|
|
11
|
-
Note: `@small-ltsc/sdk` is a peer dependency.
|
|
14
|
+
Note: `@small-ltsc/sdk` is a required peer dependency.
|
|
12
15
|
|
|
13
16
|
## Features
|
|
14
17
|
|
|
15
|
-
- **Pattern Importance Scoring** - Determine which patterns are semantically important
|
|
16
|
-
- **Quality Prediction** - Predict if compression will degrade model performance
|
|
17
|
-
- **Region Detection** - Identify system prompts, user input, and context for adaptive compression
|
|
18
|
+
- **Pattern Importance Scoring** - Determine which patterns are semantically important and should be preserved
|
|
19
|
+
- **Quality Prediction** - Predict if compression will degrade model performance before applying it
|
|
20
|
+
- **Region Detection** - Identify system prompts, user input, and context for adaptive compression strategies
|
|
18
21
|
|
|
19
22
|
## Pattern Importance
|
|
20
23
|
|
|
@@ -28,13 +31,13 @@ const scorer = new PositionalImportanceScorer({ decayRate: 2.0 });
|
|
|
28
31
|
const patterns = await discoverPatterns(tokens);
|
|
29
32
|
const scores = await scorer.scorePatterns(tokens, patterns);
|
|
30
33
|
|
|
31
|
-
// Filter out high-importance patterns (preserve them)
|
|
34
|
+
// Filter out high-importance patterns (preserve them from compression)
|
|
32
35
|
const safeToCompress = filterByImportance(patterns, scores, 0.8);
|
|
33
36
|
```
|
|
34
37
|
|
|
35
38
|
### Embedding-Based Scoring
|
|
36
39
|
|
|
37
|
-
For more accurate importance scoring
|
|
40
|
+
For more accurate importance scoring using an embedding model:
|
|
38
41
|
|
|
39
42
|
```typescript
|
|
40
43
|
import { EmbeddingImportanceScorer } from '@small-ltsc/ml';
|
|
@@ -48,7 +51,7 @@ const scores = await scorer.scorePatterns(tokens, patterns);
|
|
|
48
51
|
|
|
49
52
|
## Quality Prediction
|
|
50
53
|
|
|
51
|
-
Predict if compressed output will maintain quality:
|
|
54
|
+
Predict if compressed output will maintain quality before committing:
|
|
52
55
|
|
|
53
56
|
```typescript
|
|
54
57
|
import { createQualityPredictor } from '@small-ltsc/ml';
|
|
@@ -79,17 +82,17 @@ console.log(prediction.features);
|
|
|
79
82
|
|
|
80
83
|
## Region Detection
|
|
81
84
|
|
|
82
|
-
Detect semantic regions for adaptive compression:
|
|
85
|
+
Detect semantic regions for adaptive compression strategies:
|
|
83
86
|
|
|
84
87
|
```typescript
|
|
85
88
|
import { detectRegions, RegionType, filterPatternsByRegion } from '@small-ltsc/ml';
|
|
86
89
|
|
|
87
90
|
const regions = detectRegions(tokens, {
|
|
88
|
-
systemMarkers: [[58, 71905, 60]], // [SYSTEM]
|
|
91
|
+
systemMarkers: [[58, 71905, 60]], // [SYSTEM] token sequence
|
|
89
92
|
retentionTargets: {
|
|
90
|
-
[RegionType.SYSTEM]: 0.98, // Minimal compression
|
|
91
|
-
[RegionType.USER]: 0.85, // Moderate
|
|
92
|
-
[RegionType.CONTEXT]: 0.6, // Aggressive
|
|
93
|
+
[RegionType.SYSTEM]: 0.98, // Minimal compression (preserve instructions)
|
|
94
|
+
[RegionType.USER]: 0.85, // Moderate compression
|
|
95
|
+
[RegionType.CONTEXT]: 0.6, // Aggressive compression (RAG content)
|
|
93
96
|
},
|
|
94
97
|
});
|
|
95
98
|
|
|
@@ -99,11 +102,13 @@ const filtered = filterPatternsByRegion(patterns, regions, tokens);
|
|
|
99
102
|
|
|
100
103
|
### Region Types
|
|
101
104
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
105
|
+
| Region | Description | Default Retention |
|
|
106
|
+
|--------|-------------|-------------------|
|
|
107
|
+
| `SYSTEM` | System instructions | 98% (minimal compression) |
|
|
108
|
+
| `USER` | User input | 85% (moderate) |
|
|
109
|
+
| `CONTEXT` | Injected context/documents | 60% (aggressive) |
|
|
110
|
+
| `CODE` | Code blocks | 80% (moderate) |
|
|
111
|
+
| `UNKNOWN` | Default region | 75% |
|
|
107
112
|
|
|
108
113
|
## Custom Embedding Provider
|
|
109
114
|
|
|
@@ -134,25 +139,40 @@ const scorer = new EmbeddingImportanceScorer(new OpenAIEmbeddings());
|
|
|
134
139
|
|
|
135
140
|
### Importance Scoring
|
|
136
141
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
+
| Export | Description |
|
|
143
|
+
|--------|-------------|
|
|
144
|
+
| `PositionalImportanceScorer` | Score patterns by position (earlier = more important) |
|
|
145
|
+
| `EmbeddingImportanceScorer` | Score patterns by contextual diversity |
|
|
146
|
+
| `CombinedImportanceScorer` | Combine positional and embedding scoring |
|
|
147
|
+
| `adjustPrioritiesByImportance()` | Adjust pattern priorities based on scores |
|
|
148
|
+
| `filterByImportance()` | Filter out high-importance patterns |
|
|
142
149
|
|
|
143
150
|
### Quality Prediction
|
|
144
151
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
152
|
+
| Export | Description |
|
|
153
|
+
|--------|-------------|
|
|
154
|
+
| `HeuristicQualityPredictor` | Rule-based quality prediction |
|
|
155
|
+
| `EmbeddingQualityPredictor` | Enhanced prediction with embedding similarity |
|
|
156
|
+
| `createQualityPredictor()` | Factory function for creating predictors |
|
|
148
157
|
|
|
149
158
|
### Region Detection
|
|
150
159
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
160
|
+
| Export | Description |
|
|
161
|
+
|--------|-------------|
|
|
162
|
+
| `detectRegions()` | Detect semantic regions in token sequence |
|
|
163
|
+
| `detectRegionsHeuristic()` | Simple heuristic-based detection |
|
|
164
|
+
| `filterPatternsByRegion()` | Filter patterns based on region constraints |
|
|
165
|
+
| `getRegionCompressionSettings()` | Get default settings for a region type |
|
|
166
|
+
| `RegionType` | Enum of available region types |
|
|
155
167
|
|
|
156
168
|
## License
|
|
157
169
|
|
|
158
|
-
MIT
|
|
170
|
+
MIT License - see [LICENSE](../../LICENSE) for details.
|
|
171
|
+
|
|
172
|
+
## Contributors
|
|
173
|
+
|
|
174
|
+
Built by [Triage Sec](https://triage-sec.com) - an applied team of researchers and engineers working towards building resiliency for AI systems.
|
|
175
|
+
|
|
176
|
+
- Nikhil Srivastava (University of California, Berkeley)
|
|
177
|
+
- Omansh Bainsla (Georgia Tech)
|
|
178
|
+
- Sahil Chatiwala (Georgia Tech)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@small-ltsc/ml",
|
|
3
|
-
|
|
3
|
+
"version": "0.2.4",
|
|
4
4
|
"description": "ML features for Small LTSC - Pattern importance scoring and quality prediction",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/esm/index.js",
|
|
@@ -17,12 +17,13 @@
|
|
|
17
17
|
],
|
|
18
18
|
"sideEffects": false,
|
|
19
19
|
"scripts": {
|
|
20
|
+
"clean": "rm -rf dist",
|
|
20
21
|
"build": "npm run build:esm && npm run build:types",
|
|
21
22
|
"build:esm": "tsc -p tsconfig.esm.json",
|
|
22
23
|
"build:types": "tsc -p tsconfig.types.json",
|
|
23
24
|
"test": "vitest",
|
|
24
25
|
"lint": "eslint src --ext .ts",
|
|
25
|
-
"prepublishOnly": "npm run build"
|
|
26
|
+
"prepublishOnly": "npm run clean && npm run build"
|
|
26
27
|
},
|
|
27
28
|
"keywords": [
|
|
28
29
|
"compression",
|
|
@@ -32,14 +33,23 @@
|
|
|
32
33
|
"ml",
|
|
33
34
|
"embeddings"
|
|
34
35
|
],
|
|
35
|
-
"author": "",
|
|
36
|
+
"author": "Triage Sec <nicks@triage-sec.com>",
|
|
37
|
+
"contributors": [
|
|
38
|
+
"Nikhil Srivastava",
|
|
39
|
+
"Omansh Bainsla",
|
|
40
|
+
"Sahil Chatiwala"
|
|
41
|
+
],
|
|
36
42
|
"license": "MIT",
|
|
37
43
|
"repository": {
|
|
38
44
|
"type": "git",
|
|
39
|
-
"url": "https://github.com/triage-sec/small"
|
|
45
|
+
"url": "git+https://github.com/triage-sec/small.git"
|
|
46
|
+
},
|
|
47
|
+
"bugs": {
|
|
48
|
+
"url": "https://github.com/triage-sec/small/issues"
|
|
40
49
|
},
|
|
50
|
+
"homepage": "https://github.com/triage-sec/small#readme",
|
|
41
51
|
"peerDependencies": {
|
|
42
|
-
|
|
52
|
+
"@small-ltsc/sdk": "^0.2.4"
|
|
43
53
|
},
|
|
44
54
|
"devDependencies": {
|
|
45
55
|
"@types/node": "^20.10.0",
|