@small-ltsc/ml 0.1.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +50 -30
  2. package/package.json +15 -5
package/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # @small-ltsc/ml
2
2
 
3
+ [![npm](https://img.shields.io/npm/v/@small-ltsc/ml)](https://www.npmjs.com/package/@small-ltsc/ml)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](../../LICENSE)
5
+
3
6
  Optional ML features for **Small LTSC** - Pattern importance scoring, quality prediction, and adaptive region detection.
4
7
 
5
8
  ## Installation
@@ -8,13 +11,13 @@ Optional ML features for **Small LTSC** - Pattern importance scoring, quality pr
8
11
  npm install @small-ltsc/ml @small-ltsc/sdk
9
12
  ```
10
13
 
11
- Note: `@small-ltsc/sdk` is a peer dependency.
14
+ Note: `@small-ltsc/sdk` is a required peer dependency.
12
15
 
13
16
  ## Features
14
17
 
15
- - **Pattern Importance Scoring** - Determine which patterns are semantically important
16
- - **Quality Prediction** - Predict if compression will degrade model performance
17
- - **Region Detection** - Identify system prompts, user input, and context for adaptive compression
18
+ - **Pattern Importance Scoring** - Determine which patterns are semantically important and should be preserved
19
+ - **Quality Prediction** - Predict if compression will degrade model performance before applying it
20
+ - **Region Detection** - Identify system prompts, user input, and context for adaptive compression strategies
18
21
 
19
22
  ## Pattern Importance
20
23
 
@@ -28,13 +31,13 @@ const scorer = new PositionalImportanceScorer({ decayRate: 2.0 });
28
31
  const patterns = await discoverPatterns(tokens);
29
32
  const scores = await scorer.scorePatterns(tokens, patterns);
30
33
 
31
- // Filter out high-importance patterns (preserve them)
34
+ // Filter out high-importance patterns (preserve them from compression)
32
35
  const safeToCompress = filterByImportance(patterns, scores, 0.8);
33
36
  ```
34
37
 
35
38
  ### Embedding-Based Scoring
36
39
 
37
- For more accurate importance scoring with an embedding model:
40
+ For more accurate importance scoring using an embedding model:
38
41
 
39
42
  ```typescript
40
43
  import { EmbeddingImportanceScorer } from '@small-ltsc/ml';
@@ -48,7 +51,7 @@ const scores = await scorer.scorePatterns(tokens, patterns);
48
51
 
49
52
  ## Quality Prediction
50
53
 
51
- Predict if compressed output will maintain quality:
54
+ Predict if compressed output will maintain quality before committing:
52
55
 
53
56
  ```typescript
54
57
  import { createQualityPredictor } from '@small-ltsc/ml';
@@ -79,17 +82,17 @@ console.log(prediction.features);
79
82
 
80
83
  ## Region Detection
81
84
 
82
- Detect semantic regions for adaptive compression:
85
+ Detect semantic regions for adaptive compression strategies:
83
86
 
84
87
  ```typescript
85
88
  import { detectRegions, RegionType, filterPatternsByRegion } from '@small-ltsc/ml';
86
89
 
87
90
  const regions = detectRegions(tokens, {
88
- systemMarkers: [[58, 71905, 60]], // [SYSTEM] tokens
91
+ systemMarkers: [[58, 71905, 60]], // [SYSTEM] token sequence
89
92
  retentionTargets: {
90
- [RegionType.SYSTEM]: 0.98, // Minimal compression
91
- [RegionType.USER]: 0.85, // Moderate
92
- [RegionType.CONTEXT]: 0.6, // Aggressive
93
+ [RegionType.SYSTEM]: 0.98, // Minimal compression (preserve instructions)
94
+ [RegionType.USER]: 0.85, // Moderate compression
95
+ [RegionType.CONTEXT]: 0.6, // Aggressive compression (RAG content)
93
96
  },
94
97
  });
95
98
 
@@ -99,11 +102,13 @@ const filtered = filterPatternsByRegion(patterns, regions, tokens);
99
102
 
100
103
  ### Region Types
101
104
 
102
- - `SYSTEM` - System instructions (high retention)
103
- - `USER` - User input (moderate retention)
104
- - `CONTEXT` - Injected context/documents (low retention)
105
- - `CODE` - Code blocks (moderate retention)
106
- - `UNKNOWN` - Default region
105
+ | Region | Description | Default Retention |
106
+ |--------|-------------|-------------------|
107
+ | `SYSTEM` | System instructions | 98% (minimal compression) |
108
+ | `USER` | User input | 85% (moderate) |
109
+ | `CONTEXT` | Injected context/documents | 60% (aggressive) |
110
+ | `CODE` | Code blocks | 80% (moderate) |
111
+ | `UNKNOWN` | Default region | 75% |
107
112
 
108
113
  ## Custom Embedding Provider
109
114
 
@@ -134,25 +139,40 @@ const scorer = new EmbeddingImportanceScorer(new OpenAIEmbeddings());
134
139
 
135
140
  ### Importance Scoring
136
141
 
137
- - `PositionalImportanceScorer` - Score by position (early = important)
138
- - `EmbeddingImportanceScorer` - Score by context diversity
139
- - `CombinedImportanceScorer` - Combine positional and embedding scoring
140
- - `adjustPrioritiesByImportance(patterns, scores, threshold)` - Adjust pattern priorities
141
- - `filterByImportance(patterns, scores, threshold)` - Filter high-importance patterns
142
+ | Export | Description |
143
+ |--------|-------------|
144
+ | `PositionalImportanceScorer` | Score patterns by position (earlier = more important) |
145
+ | `EmbeddingImportanceScorer` | Score patterns by contextual diversity |
146
+ | `CombinedImportanceScorer` | Combine positional and embedding scoring |
147
+ | `adjustPrioritiesByImportance()` | Adjust pattern priorities based on scores |
148
+ | `filterByImportance()` | Filter out high-importance patterns |
142
149
 
143
150
  ### Quality Prediction
144
151
 
145
- - `HeuristicQualityPredictor` - Rule-based quality prediction
146
- - `EmbeddingQualityPredictor` - Enhanced with embedding similarity
147
- - `createQualityPredictor(provider?, config?)` - Factory function
152
+ | Export | Description |
153
+ |--------|-------------|
154
+ | `HeuristicQualityPredictor` | Rule-based quality prediction |
155
+ | `EmbeddingQualityPredictor` | Enhanced prediction with embedding similarity |
156
+ | `createQualityPredictor()` | Factory function for creating predictors |
148
157
 
149
158
  ### Region Detection
150
159
 
151
- - `detectRegions(tokens, config?)` - Detect semantic regions
152
- - `detectRegionsHeuristic(tokens)` - Simple heuristic detection
153
- - `filterPatternsByRegion(patterns, regions, tokens)` - Filter by region
154
- - `getRegionCompressionSettings(regionType)` - Get settings for region
160
+ | Export | Description |
161
+ |--------|-------------|
162
+ | `detectRegions()` | Detect semantic regions in token sequence |
163
+ | `detectRegionsHeuristic()` | Simple heuristic-based detection |
164
+ | `filterPatternsByRegion()` | Filter patterns based on region constraints |
165
+ | `getRegionCompressionSettings()` | Get default settings for a region type |
166
+ | `RegionType` | Enum of available region types |
155
167
 
156
168
  ## License
157
169
 
158
- MIT
170
+ MIT License - see [LICENSE](../../LICENSE) for details.
171
+
172
+ ## Contributors
173
+
174
+ Built by [Triage Sec](https://triage-sec.com) - an applied team of researchers and engineers working towards building resiliency for AI systems.
175
+
176
+ - Nikhil Srivastava (University of California, Berkeley)
177
+ - Omansh Bainsla (Georgia Tech)
178
+ - Sahil Chatiwala (Georgia Tech)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@small-ltsc/ml",
3
- "version": "0.1.0",
3
+ "version": "0.2.3",
4
4
  "description": "ML features for Small LTSC - Pattern importance scoring and quality prediction",
5
5
  "type": "module",
6
6
  "main": "./dist/esm/index.js",
@@ -17,12 +17,13 @@
17
17
  ],
18
18
  "sideEffects": false,
19
19
  "scripts": {
20
+ "clean": "rm -rf dist",
20
21
  "build": "npm run build:esm && npm run build:types",
21
22
  "build:esm": "tsc -p tsconfig.esm.json",
22
23
  "build:types": "tsc -p tsconfig.types.json",
23
24
  "test": "vitest",
24
25
  "lint": "eslint src --ext .ts",
25
- "prepublishOnly": "npm run build"
26
+ "prepublishOnly": "npm run clean && npm run build"
26
27
  },
27
28
  "keywords": [
28
29
  "compression",
@@ -32,14 +33,23 @@
32
33
  "ml",
33
34
  "embeddings"
34
35
  ],
35
- "author": "",
36
+ "author": "Triage Sec <nicks@triage-sec.com>",
37
+ "contributors": [
38
+ "Nikhil Srivastava",
39
+ "Omansh Bainsla",
40
+ "Sahil Chatiwala"
41
+ ],
36
42
  "license": "MIT",
37
43
  "repository": {
38
44
  "type": "git",
39
- "url": "https://github.com/triage-sec/small"
45
+ "url": "git+https://github.com/triage-sec/small.git"
46
+ },
47
+ "bugs": {
48
+ "url": "https://github.com/triage-sec/small/issues"
40
49
  },
50
+ "homepage": "https://github.com/triage-sec/small#readme",
41
51
  "peerDependencies": {
42
- "@small-ltsc/sdk": "^0.1.0"
52
+ "@small-ltsc/sdk": "^0.2.3"
43
53
  },
44
54
  "devDependencies": {
45
55
  "@types/node": "^20.10.0",