EasySmallEmbeddingModel 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- easysmallembeddingmodel-0.1.0/EasySmallEmbeddingModel.egg-info/PKG-INFO +161 -0
- easysmallembeddingmodel-0.1.0/EasySmallEmbeddingModel.egg-info/SOURCES.txt +22 -0
- easysmallembeddingmodel-0.1.0/EasySmallEmbeddingModel.egg-info/dependency_links.txt +1 -0
- easysmallembeddingmodel-0.1.0/EasySmallEmbeddingModel.egg-info/entry_points.txt +2 -0
- easysmallembeddingmodel-0.1.0/EasySmallEmbeddingModel.egg-info/requires.txt +25 -0
- easysmallembeddingmodel-0.1.0/EasySmallEmbeddingModel.egg-info/top_level.txt +2 -0
- easysmallembeddingmodel-0.1.0/MANIFEST.in +2 -0
- easysmallembeddingmodel-0.1.0/PKG-INFO +161 -0
- easysmallembeddingmodel-0.1.0/README.md +126 -0
- easysmallembeddingmodel-0.1.0/pyproject.toml +45 -0
- easysmallembeddingmodel-0.1.0/setup.cfg +4 -0
- easysmallembeddingmodel-0.1.0/smallmodel/__init__.py +7 -0
- easysmallembeddingmodel-0.1.0/smallmodel/arch.py +441 -0
- easysmallembeddingmodel-0.1.0/smallmodel/cli.py +152 -0
- easysmallembeddingmodel-0.1.0/smallmodel/core.py +447 -0
- easysmallembeddingmodel-0.1.0/smallmodel/data.py +131 -0
- easysmallembeddingmodel-0.1.0/smallmodel/distill.py +217 -0
- easysmallembeddingmodel-0.1.0/smallmodel/sizing.py +152 -0
- easysmallembeddingmodel-0.1.0/smallmodel/teachers.py +144 -0
- easysmallembeddingmodel-0.1.0/smallmodel/web/__init__.py +0 -0
- easysmallembeddingmodel-0.1.0/smallmodel/web/app.py +573 -0
- easysmallembeddingmodel-0.1.0/smallmodel/web/static/css/style.css +803 -0
- easysmallembeddingmodel-0.1.0/smallmodel/web/static/js/app.js +587 -0
- easysmallembeddingmodel-0.1.0/smallmodel/web/templates/index.html +285 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: EasySmallEmbeddingModel
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Compress large embedding models into small, fast students via layer pruning, vocab pruning, hidden dim reduction, and knowledge distillation.
|
|
5
|
+
Author: gomyk
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Keywords: embedding,model-compression,distillation,pruning,sentence-transformers
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: torch>=2.0.0
|
|
16
|
+
Requires-Dist: transformers>=4.40.0
|
|
17
|
+
Requires-Dist: sentence-transformers>=3.0.0
|
|
18
|
+
Requires-Dist: numpy>=1.24.0
|
|
19
|
+
Requires-Dist: tqdm
|
|
20
|
+
Requires-Dist: sentencepiece
|
|
21
|
+
Requires-Dist: protobuf
|
|
22
|
+
Provides-Extra: eval
|
|
23
|
+
Requires-Dist: mteb>=1.14.0; extra == "eval"
|
|
24
|
+
Requires-Dist: pandas>=2.0.0; extra == "eval"
|
|
25
|
+
Provides-Extra: export
|
|
26
|
+
Requires-Dist: onnxruntime>=1.16.0; extra == "export"
|
|
27
|
+
Requires-Dist: onnx>=1.15.0; extra == "export"
|
|
28
|
+
Requires-Dist: optimum[onnxruntime]>=1.16.0; extra == "export"
|
|
29
|
+
Provides-Extra: hub
|
|
30
|
+
Requires-Dist: huggingface-hub>=0.20.0; extra == "hub"
|
|
31
|
+
Provides-Extra: web
|
|
32
|
+
Requires-Dist: flask>=3.0.0; extra == "web"
|
|
33
|
+
Provides-Extra: all
|
|
34
|
+
Requires-Dist: EasySmallEmbeddingModel[eval,export,hub,web]; extra == "all"
|
|
35
|
+
|
|
36
|
+
# SmallModel
|
|
37
|
+
|
|
38
|
+
Compress large embedding models into small, fast students via layer pruning, vocab pruning, hidden dim reduction, and knowledge distillation.
|
|
39
|
+
|
|
40
|
+
## Features
|
|
41
|
+
|
|
42
|
+
- **Layer Pruning** - Select which transformer layers to keep
|
|
43
|
+
- **Vocab Pruning** - Remove unused tokens based on corpus frequency
|
|
44
|
+
- **Hidden Dim Reduction** - Shrink internal dimensions (slicing or PCA)
|
|
45
|
+
- **Knowledge Distillation** - MSE + Cosine loss alignment with teacher
|
|
46
|
+
- **Auto Compress** - Find optimal config within size constraints
|
|
47
|
+
- **2-Stage Distillation** - Progressive distillation for 10x+ compression
|
|
48
|
+
- **Interactive Web UI** - Visual layer editor with real-time size estimation
|
|
49
|
+
- **MTEB Evaluation** - Benchmark on Classification, Clustering, STS tasks
|
|
50
|
+
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install smallmodel[all]
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Or install specific extras:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install smallmodel # core only
|
|
61
|
+
pip install smallmodel[web] # + Flask web UI
|
|
62
|
+
pip install smallmodel[eval] # + MTEB evaluation
|
|
63
|
+
pip install smallmodel[export] # + ONNX export
|
|
64
|
+
pip install smallmodel[hub] # + HuggingFace Hub upload
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
For development:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
git clone https://github.com/gomyk/smallmodel.git
|
|
71
|
+
cd smallmodel
|
|
72
|
+
pip install -e ".[all]"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Quick Start
|
|
76
|
+
|
|
77
|
+
### Python API
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from smallmodel import SmallModel
|
|
81
|
+
|
|
82
|
+
# Auto-compress within 50MB
|
|
83
|
+
sm = SmallModel.from_teacher("gte")
|
|
84
|
+
sm.compress(max_fp32_mb=50.0)
|
|
85
|
+
sm.distill(epochs=10)
|
|
86
|
+
|
|
87
|
+
# Manual layer selection
|
|
88
|
+
sm = SmallModel.from_teacher("gte", layer_indices=[0, 3, 6, 11])
|
|
89
|
+
sm.create()
|
|
90
|
+
|
|
91
|
+
# Register custom teacher
|
|
92
|
+
from smallmodel import register_teacher
|
|
93
|
+
register_teacher(
|
|
94
|
+
"my-bert",
|
|
95
|
+
model_id="my-org/my-bert-base",
|
|
96
|
+
short_name="MyBERT",
|
|
97
|
+
hidden_dim=768, num_layers=12,
|
|
98
|
+
intermediate_size=3072, vocab_size=30522,
|
|
99
|
+
)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Web UI
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from smallmodel import SmallModel
|
|
106
|
+
|
|
107
|
+
sm = SmallModel.from_teacher("gte")
|
|
108
|
+
sm.serve() # http://127.0.0.1:7860
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Or via CLI:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
smallmodel serve --teacher gte --port 7860
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
The web UI lets you:
|
|
118
|
+
- Select teacher model from 7+ pre-registered models
|
|
119
|
+
- Toggle layers on/off with preset configurations
|
|
120
|
+
- Adjust hidden dim, FFN size, and vocab size
|
|
121
|
+
- See real-time size estimation and compression ratio
|
|
122
|
+
- Select distillation datasets and evaluation tasks
|
|
123
|
+
- Analyze vocab coverage at different vocab sizes
|
|
124
|
+
- Create compressed models with one click
|
|
125
|
+
|
|
126
|
+
### CLI
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
smallmodel list-teachers
|
|
130
|
+
smallmodel compress --teacher gte --max-mb 50
|
|
131
|
+
smallmodel create --teacher gte --layers 0,3,6,11
|
|
132
|
+
smallmodel distill --teacher gte --student output/students/gte/gte_compressed
|
|
133
|
+
smallmodel serve --teacher gte
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Pre-registered Teachers
|
|
137
|
+
|
|
138
|
+
| Key | Model | Layers | Hidden | Vocab | FP32 MB |
|
|
139
|
+
|---|---|---|---|---|---|
|
|
140
|
+
| minilm | paraphrase-multilingual-MiniLM-L12-v2 | 12 | 384 | 250K | 448 |
|
|
141
|
+
| modernbert | ModernBERT-base | 22 | 768 | 50K | 496 |
|
|
142
|
+
| gte | gte-multilingual-base | 12 | 768 | 250K | 1058 |
|
|
143
|
+
| me5 | multilingual-e5-base | 12 | 768 | 250K | 1058 |
|
|
144
|
+
| me5s | multilingual-e5-small | 12 | 384 | 250K | 448 |
|
|
145
|
+
| gemma_emb | embeddinggemma-300m | 24 | 768 | 262K | 1155 |
|
|
146
|
+
| qwen3 | Qwen3-0.6B | 28 | 1024 | 152K | 2274 |
|
|
147
|
+
|
|
148
|
+
## How It Works
|
|
149
|
+
|
|
150
|
+
1. **Layer Pruning** - Copy selected layers from teacher (uniform spacing recommended)
|
|
151
|
+
2. **Hidden Dim Reduction** - Shrink dimensions if needed to meet size target
|
|
152
|
+
3. **Vocab Pruning** - Remove tokens not seen in training corpus
|
|
153
|
+
4. **Knowledge Distillation** - Train student to reproduce teacher's embeddings
|
|
154
|
+
5. **Evaluation** - MTEB benchmark (Classification, Clustering, STS)
|
|
155
|
+
|
|
156
|
+
For compression ratios > 10x, a 2-stage distillation pipeline is used:
|
|
157
|
+
Teacher → Intermediate (~1/5 teacher) → Final Student
|
|
158
|
+
|
|
159
|
+
## License
|
|
160
|
+
|
|
161
|
+
Apache-2.0
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MANIFEST.in
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
EasySmallEmbeddingModel.egg-info/PKG-INFO
|
|
5
|
+
EasySmallEmbeddingModel.egg-info/SOURCES.txt
|
|
6
|
+
EasySmallEmbeddingModel.egg-info/dependency_links.txt
|
|
7
|
+
EasySmallEmbeddingModel.egg-info/entry_points.txt
|
|
8
|
+
EasySmallEmbeddingModel.egg-info/requires.txt
|
|
9
|
+
EasySmallEmbeddingModel.egg-info/top_level.txt
|
|
10
|
+
smallmodel/__init__.py
|
|
11
|
+
smallmodel/arch.py
|
|
12
|
+
smallmodel/cli.py
|
|
13
|
+
smallmodel/core.py
|
|
14
|
+
smallmodel/data.py
|
|
15
|
+
smallmodel/distill.py
|
|
16
|
+
smallmodel/sizing.py
|
|
17
|
+
smallmodel/teachers.py
|
|
18
|
+
smallmodel/web/__init__.py
|
|
19
|
+
smallmodel/web/app.py
|
|
20
|
+
smallmodel/web/static/css/style.css
|
|
21
|
+
smallmodel/web/static/js/app.js
|
|
22
|
+
smallmodel/web/templates/index.html
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
torch>=2.0.0
|
|
2
|
+
transformers>=4.40.0
|
|
3
|
+
sentence-transformers>=3.0.0
|
|
4
|
+
numpy>=1.24.0
|
|
5
|
+
tqdm
|
|
6
|
+
sentencepiece
|
|
7
|
+
protobuf
|
|
8
|
+
|
|
9
|
+
[all]
|
|
10
|
+
EasySmallEmbeddingModel[eval,export,hub,web]
|
|
11
|
+
|
|
12
|
+
[eval]
|
|
13
|
+
mteb>=1.14.0
|
|
14
|
+
pandas>=2.0.0
|
|
15
|
+
|
|
16
|
+
[export]
|
|
17
|
+
onnxruntime>=1.16.0
|
|
18
|
+
onnx>=1.15.0
|
|
19
|
+
optimum[onnxruntime]>=1.16.0
|
|
20
|
+
|
|
21
|
+
[hub]
|
|
22
|
+
huggingface-hub>=0.20.0
|
|
23
|
+
|
|
24
|
+
[web]
|
|
25
|
+
flask>=3.0.0
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: EasySmallEmbeddingModel
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Compress large embedding models into small, fast students via layer pruning, vocab pruning, hidden dim reduction, and knowledge distillation.
|
|
5
|
+
Author: gomyk
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Keywords: embedding,model-compression,distillation,pruning,sentence-transformers
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: torch>=2.0.0
|
|
16
|
+
Requires-Dist: transformers>=4.40.0
|
|
17
|
+
Requires-Dist: sentence-transformers>=3.0.0
|
|
18
|
+
Requires-Dist: numpy>=1.24.0
|
|
19
|
+
Requires-Dist: tqdm
|
|
20
|
+
Requires-Dist: sentencepiece
|
|
21
|
+
Requires-Dist: protobuf
|
|
22
|
+
Provides-Extra: eval
|
|
23
|
+
Requires-Dist: mteb>=1.14.0; extra == "eval"
|
|
24
|
+
Requires-Dist: pandas>=2.0.0; extra == "eval"
|
|
25
|
+
Provides-Extra: export
|
|
26
|
+
Requires-Dist: onnxruntime>=1.16.0; extra == "export"
|
|
27
|
+
Requires-Dist: onnx>=1.15.0; extra == "export"
|
|
28
|
+
Requires-Dist: optimum[onnxruntime]>=1.16.0; extra == "export"
|
|
29
|
+
Provides-Extra: hub
|
|
30
|
+
Requires-Dist: huggingface-hub>=0.20.0; extra == "hub"
|
|
31
|
+
Provides-Extra: web
|
|
32
|
+
Requires-Dist: flask>=3.0.0; extra == "web"
|
|
33
|
+
Provides-Extra: all
|
|
34
|
+
Requires-Dist: EasySmallEmbeddingModel[eval,export,hub,web]; extra == "all"
|
|
35
|
+
|
|
36
|
+
# SmallModel
|
|
37
|
+
|
|
38
|
+
Compress large embedding models into small, fast students via layer pruning, vocab pruning, hidden dim reduction, and knowledge distillation.
|
|
39
|
+
|
|
40
|
+
## Features
|
|
41
|
+
|
|
42
|
+
- **Layer Pruning** - Select which transformer layers to keep
|
|
43
|
+
- **Vocab Pruning** - Remove unused tokens based on corpus frequency
|
|
44
|
+
- **Hidden Dim Reduction** - Shrink internal dimensions (slicing or PCA)
|
|
45
|
+
- **Knowledge Distillation** - MSE + Cosine loss alignment with teacher
|
|
46
|
+
- **Auto Compress** - Find optimal config within size constraints
|
|
47
|
+
- **2-Stage Distillation** - Progressive distillation for 10x+ compression
|
|
48
|
+
- **Interactive Web UI** - Visual layer editor with real-time size estimation
|
|
49
|
+
- **MTEB Evaluation** - Benchmark on Classification, Clustering, STS tasks
|
|
50
|
+
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install smallmodel[all]
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Or install specific extras:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install smallmodel # core only
|
|
61
|
+
pip install smallmodel[web] # + Flask web UI
|
|
62
|
+
pip install smallmodel[eval] # + MTEB evaluation
|
|
63
|
+
pip install smallmodel[export] # + ONNX export
|
|
64
|
+
pip install smallmodel[hub] # + HuggingFace Hub upload
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
For development:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
git clone https://github.com/gomyk/smallmodel.git
|
|
71
|
+
cd smallmodel
|
|
72
|
+
pip install -e ".[all]"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Quick Start
|
|
76
|
+
|
|
77
|
+
### Python API
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from smallmodel import SmallModel
|
|
81
|
+
|
|
82
|
+
# Auto-compress within 50MB
|
|
83
|
+
sm = SmallModel.from_teacher("gte")
|
|
84
|
+
sm.compress(max_fp32_mb=50.0)
|
|
85
|
+
sm.distill(epochs=10)
|
|
86
|
+
|
|
87
|
+
# Manual layer selection
|
|
88
|
+
sm = SmallModel.from_teacher("gte", layer_indices=[0, 3, 6, 11])
|
|
89
|
+
sm.create()
|
|
90
|
+
|
|
91
|
+
# Register custom teacher
|
|
92
|
+
from smallmodel import register_teacher
|
|
93
|
+
register_teacher(
|
|
94
|
+
"my-bert",
|
|
95
|
+
model_id="my-org/my-bert-base",
|
|
96
|
+
short_name="MyBERT",
|
|
97
|
+
hidden_dim=768, num_layers=12,
|
|
98
|
+
intermediate_size=3072, vocab_size=30522,
|
|
99
|
+
)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Web UI
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from smallmodel import SmallModel
|
|
106
|
+
|
|
107
|
+
sm = SmallModel.from_teacher("gte")
|
|
108
|
+
sm.serve() # http://127.0.0.1:7860
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Or via CLI:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
smallmodel serve --teacher gte --port 7860
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
The web UI lets you:
|
|
118
|
+
- Select teacher model from 7+ pre-registered models
|
|
119
|
+
- Toggle layers on/off with preset configurations
|
|
120
|
+
- Adjust hidden dim, FFN size, and vocab size
|
|
121
|
+
- See real-time size estimation and compression ratio
|
|
122
|
+
- Select distillation datasets and evaluation tasks
|
|
123
|
+
- Analyze vocab coverage at different vocab sizes
|
|
124
|
+
- Create compressed models with one click
|
|
125
|
+
|
|
126
|
+
### CLI
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
smallmodel list-teachers
|
|
130
|
+
smallmodel compress --teacher gte --max-mb 50
|
|
131
|
+
smallmodel create --teacher gte --layers 0,3,6,11
|
|
132
|
+
smallmodel distill --teacher gte --student output/students/gte/gte_compressed
|
|
133
|
+
smallmodel serve --teacher gte
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Pre-registered Teachers
|
|
137
|
+
|
|
138
|
+
| Key | Model | Layers | Hidden | Vocab | FP32 MB |
|
|
139
|
+
|---|---|---|---|---|---|
|
|
140
|
+
| minilm | paraphrase-multilingual-MiniLM-L12-v2 | 12 | 384 | 250K | 448 |
|
|
141
|
+
| modernbert | ModernBERT-base | 22 | 768 | 50K | 496 |
|
|
142
|
+
| gte | gte-multilingual-base | 12 | 768 | 250K | 1058 |
|
|
143
|
+
| me5 | multilingual-e5-base | 12 | 768 | 250K | 1058 |
|
|
144
|
+
| me5s | multilingual-e5-small | 12 | 384 | 250K | 448 |
|
|
145
|
+
| gemma_emb | embeddinggemma-300m | 24 | 768 | 262K | 1155 |
|
|
146
|
+
| qwen3 | Qwen3-0.6B | 28 | 1024 | 152K | 2274 |
|
|
147
|
+
|
|
148
|
+
## How It Works
|
|
149
|
+
|
|
150
|
+
1. **Layer Pruning** - Copy selected layers from teacher (uniform spacing recommended)
|
|
151
|
+
2. **Hidden Dim Reduction** - Shrink dimensions if needed to meet size target
|
|
152
|
+
3. **Vocab Pruning** - Remove tokens not seen in training corpus
|
|
153
|
+
4. **Knowledge Distillation** - Train student to reproduce teacher's embeddings
|
|
154
|
+
5. **Evaluation** - MTEB benchmark (Classification, Clustering, STS)
|
|
155
|
+
|
|
156
|
+
For compression ratios > 10x, a 2-stage distillation pipeline is used:
|
|
157
|
+
Teacher → Intermediate (~1/5 teacher) → Final Student
|
|
158
|
+
|
|
159
|
+
## License
|
|
160
|
+
|
|
161
|
+
Apache-2.0
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# SmallModel
|
|
2
|
+
|
|
3
|
+
Compress large embedding models into small, fast students via layer pruning, vocab pruning, hidden dim reduction, and knowledge distillation.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Layer Pruning** - Select which transformer layers to keep
|
|
8
|
+
- **Vocab Pruning** - Remove unused tokens based on corpus frequency
|
|
9
|
+
- **Hidden Dim Reduction** - Shrink internal dimensions (slicing or PCA)
|
|
10
|
+
- **Knowledge Distillation** - MSE + Cosine loss alignment with teacher
|
|
11
|
+
- **Auto Compress** - Find optimal config within size constraints
|
|
12
|
+
- **2-Stage Distillation** - Progressive distillation for 10x+ compression
|
|
13
|
+
- **Interactive Web UI** - Visual layer editor with real-time size estimation
|
|
14
|
+
- **MTEB Evaluation** - Benchmark on Classification, Clustering, STS tasks
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install smallmodel[all]
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Or install specific extras:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install smallmodel # core only
|
|
26
|
+
pip install smallmodel[web] # + Flask web UI
|
|
27
|
+
pip install smallmodel[eval] # + MTEB evaluation
|
|
28
|
+
pip install smallmodel[export] # + ONNX export
|
|
29
|
+
pip install smallmodel[hub] # + HuggingFace Hub upload
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
For development:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
git clone https://github.com/gomyk/smallmodel.git
|
|
36
|
+
cd smallmodel
|
|
37
|
+
pip install -e ".[all]"
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Quick Start
|
|
41
|
+
|
|
42
|
+
### Python API
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from smallmodel import SmallModel
|
|
46
|
+
|
|
47
|
+
# Auto-compress within 50MB
|
|
48
|
+
sm = SmallModel.from_teacher("gte")
|
|
49
|
+
sm.compress(max_fp32_mb=50.0)
|
|
50
|
+
sm.distill(epochs=10)
|
|
51
|
+
|
|
52
|
+
# Manual layer selection
|
|
53
|
+
sm = SmallModel.from_teacher("gte", layer_indices=[0, 3, 6, 11])
|
|
54
|
+
sm.create()
|
|
55
|
+
|
|
56
|
+
# Register custom teacher
|
|
57
|
+
from smallmodel import register_teacher
|
|
58
|
+
register_teacher(
|
|
59
|
+
"my-bert",
|
|
60
|
+
model_id="my-org/my-bert-base",
|
|
61
|
+
short_name="MyBERT",
|
|
62
|
+
hidden_dim=768, num_layers=12,
|
|
63
|
+
intermediate_size=3072, vocab_size=30522,
|
|
64
|
+
)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Web UI
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from smallmodel import SmallModel
|
|
71
|
+
|
|
72
|
+
sm = SmallModel.from_teacher("gte")
|
|
73
|
+
sm.serve() # http://127.0.0.1:7860
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Or via CLI:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
smallmodel serve --teacher gte --port 7860
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
The web UI lets you:
|
|
83
|
+
- Select teacher model from 7+ pre-registered models
|
|
84
|
+
- Toggle layers on/off with preset configurations
|
|
85
|
+
- Adjust hidden dim, FFN size, and vocab size
|
|
86
|
+
- See real-time size estimation and compression ratio
|
|
87
|
+
- Select distillation datasets and evaluation tasks
|
|
88
|
+
- Analyze vocab coverage at different vocab sizes
|
|
89
|
+
- Create compressed models with one click
|
|
90
|
+
|
|
91
|
+
### CLI
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
smallmodel list-teachers
|
|
95
|
+
smallmodel compress --teacher gte --max-mb 50
|
|
96
|
+
smallmodel create --teacher gte --layers 0,3,6,11
|
|
97
|
+
smallmodel distill --teacher gte --student output/students/gte/gte_compressed
|
|
98
|
+
smallmodel serve --teacher gte
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Pre-registered Teachers
|
|
102
|
+
|
|
103
|
+
| Key | Model | Layers | Hidden | Vocab | FP32 MB |
|
|
104
|
+
|---|---|---|---|---|---|
|
|
105
|
+
| minilm | paraphrase-multilingual-MiniLM-L12-v2 | 12 | 384 | 250K | 448 |
|
|
106
|
+
| modernbert | ModernBERT-base | 22 | 768 | 50K | 496 |
|
|
107
|
+
| gte | gte-multilingual-base | 12 | 768 | 250K | 1058 |
|
|
108
|
+
| me5 | multilingual-e5-base | 12 | 768 | 250K | 1058 |
|
|
109
|
+
| me5s | multilingual-e5-small | 12 | 384 | 250K | 448 |
|
|
110
|
+
| gemma_emb | embeddinggemma-300m | 24 | 768 | 262K | 1155 |
|
|
111
|
+
| qwen3 | Qwen3-0.6B | 28 | 1024 | 152K | 2274 |
|
|
112
|
+
|
|
113
|
+
## How It Works
|
|
114
|
+
|
|
115
|
+
1. **Layer Pruning** - Copy selected layers from teacher (uniform spacing recommended)
|
|
116
|
+
2. **Hidden Dim Reduction** - Shrink dimensions if needed to meet size target
|
|
117
|
+
3. **Vocab Pruning** - Remove tokens not seen in training corpus
|
|
118
|
+
4. **Knowledge Distillation** - Train student to reproduce teacher's embeddings
|
|
119
|
+
5. **Evaluation** - MTEB benchmark (Classification, Clustering, STS)
|
|
120
|
+
|
|
121
|
+
For compression ratios > 10x, a 2-stage distillation pipeline is used:
|
|
122
|
+
Teacher → Intermediate (~1/5 teacher) → Final Student
|
|
123
|
+
|
|
124
|
+
## License
|
|
125
|
+
|
|
126
|
+
Apache-2.0
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "EasySmallEmbeddingModel"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Compress large embedding models into small, fast students via layer pruning, vocab pruning, hidden dim reduction, and knowledge distillation."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "Apache-2.0"}
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [{name = "gomyk"}]
|
|
13
|
+
keywords = ["embedding", "model-compression", "distillation", "pruning", "sentence-transformers"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"License :: OSI Approved :: Apache Software License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"torch>=2.0.0",
|
|
23
|
+
"transformers>=4.40.0",
|
|
24
|
+
"sentence-transformers>=3.0.0",
|
|
25
|
+
"numpy>=1.24.0",
|
|
26
|
+
"tqdm",
|
|
27
|
+
"sentencepiece",
|
|
28
|
+
"protobuf",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
eval = ["mteb>=1.14.0", "pandas>=2.0.0"]
|
|
33
|
+
export = ["onnxruntime>=1.16.0", "onnx>=1.15.0", "optimum[onnxruntime]>=1.16.0"]
|
|
34
|
+
hub = ["huggingface-hub>=0.20.0"]
|
|
35
|
+
web = ["flask>=3.0.0"]
|
|
36
|
+
all = ["EasySmallEmbeddingModel[eval,export,hub,web]"]
|
|
37
|
+
|
|
38
|
+
[project.scripts]
|
|
39
|
+
smallmodel = "smallmodel.cli:main"
|
|
40
|
+
|
|
41
|
+
[tool.setuptools.packages.find]
|
|
42
|
+
where = ["."]
|
|
43
|
+
|
|
44
|
+
[tool.setuptools.package-data]
|
|
45
|
+
"smallmodel.web" = ["static/**/*", "templates/**/*"]
|