qmdr 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +29 -0
- package/.env.example +85 -0
- package/.gitattributes +3 -0
- package/.github/workflows/release.yml +77 -0
- package/AI-SETUP.md +466 -0
- package/LICENSE +22 -0
- package/README.md +78 -0
- package/bun.lock +637 -0
- package/docs/README-zh.md +78 -0
- package/docs/refactor-checklist.md +54 -0
- package/docs/setup-openclaw.md +139 -0
- package/example-index.yml +33 -0
- package/finetune/BALANCED_DISTRIBUTION.md +157 -0
- package/finetune/DATA_IMPROVEMENTS.md +218 -0
- package/finetune/Justfile +43 -0
- package/finetune/Modelfile +16 -0
- package/finetune/README.md +299 -0
- package/finetune/SCORING.md +286 -0
- package/finetune/configs/accelerate_multi_gpu.yaml +17 -0
- package/finetune/configs/grpo.yaml +49 -0
- package/finetune/configs/sft.yaml +42 -0
- package/finetune/configs/sft_local.yaml +40 -0
- package/finetune/convert_gguf.py +221 -0
- package/finetune/data/best_glm_prompt.txt +17 -0
- package/finetune/data/gepa_generated.prompts.json +32 -0
- package/finetune/data/qmd_expansion_balanced_deduped.jsonl +413 -0
- package/finetune/data/qmd_expansion_diverse_addon.jsonl +386 -0
- package/finetune/data/qmd_expansion_handcrafted.jsonl +65 -0
- package/finetune/data/qmd_expansion_handcrafted_only.jsonl +336 -0
- package/finetune/data/qmd_expansion_locations.jsonl +64 -0
- package/finetune/data/qmd_expansion_people.jsonl +46 -0
- package/finetune/data/qmd_expansion_short_nontech.jsonl +200 -0
- package/finetune/data/qmd_expansion_v2.jsonl +1498 -0
- package/finetune/data/qmd_only_sampled.jsonl +399 -0
- package/finetune/dataset/analyze_data.py +369 -0
- package/finetune/dataset/clean_data.py +906 -0
- package/finetune/dataset/generate_balanced.py +823 -0
- package/finetune/dataset/generate_data.py +714 -0
- package/finetune/dataset/generate_data_offline.py +206 -0
- package/finetune/dataset/generate_diverse.py +441 -0
- package/finetune/dataset/generate_ollama.py +326 -0
- package/finetune/dataset/prepare_data.py +197 -0
- package/finetune/dataset/schema.py +73 -0
- package/finetune/dataset/score_data.py +115 -0
- package/finetune/dataset/validate_schema.py +104 -0
- package/finetune/eval.py +196 -0
- package/finetune/evals/queries.txt +56 -0
- package/finetune/gepa/__init__.py +1 -0
- package/finetune/gepa/best_prompt.txt +31 -0
- package/finetune/gepa/best_prompt_glm.txt +1 -0
- package/finetune/gepa/dspy_gepa.py +204 -0
- package/finetune/gepa/example.py +117 -0
- package/finetune/gepa/generate.py +129 -0
- package/finetune/gepa/gepa_outputs.jsonl +10 -0
- package/finetune/gepa/gepa_outputs_glm.jsonl +20 -0
- package/finetune/gepa/model.json +19 -0
- package/finetune/gepa/optimizer.py +70 -0
- package/finetune/gepa/score.py +84 -0
- package/finetune/jobs/eval.py +490 -0
- package/finetune/jobs/eval_common.py +354 -0
- package/finetune/jobs/eval_verbose.py +113 -0
- package/finetune/jobs/grpo.py +141 -0
- package/finetune/jobs/quantize.py +244 -0
- package/finetune/jobs/sft.py +121 -0
- package/finetune/pyproject.toml +23 -0
- package/finetune/reward.py +610 -0
- package/finetune/train.py +611 -0
- package/finetune/uv.lock +4070 -0
- package/flake.lock +61 -0
- package/flake.nix +83 -0
- package/migrate-schema.ts +162 -0
- package/package.json +56 -0
- package/skills/qmdr/SKILL.md +172 -0
- package/skills/qmdr/references/mcp-setup.md +88 -0
- package/src/app/commands/collection.ts +55 -0
- package/src/app/commands/context.ts +82 -0
- package/src/app/commands/document.ts +46 -0
- package/src/app/commands/maintenance.ts +60 -0
- package/src/app/commands/search.ts +45 -0
- package/src/app/ports/llm.ts +13 -0
- package/src/app/services/llm-service.ts +145 -0
- package/src/cli.test.ts +963 -0
- package/src/collections.ts +390 -0
- package/src/eval.test.ts +412 -0
- package/src/formatter.ts +427 -0
- package/src/llm.test.ts +559 -0
- package/src/llm.ts +1990 -0
- package/src/mcp.test.ts +889 -0
- package/src/mcp.ts +626 -0
- package/src/qmd.ts +3330 -0
- package/src/store/collections.ts +7 -0
- package/src/store/context.ts +10 -0
- package/src/store/db.ts +5 -0
- package/src/store/documents.ts +26 -0
- package/src/store/maintenance.ts +15 -0
- package/src/store/path.ts +13 -0
- package/src/store/search.ts +10 -0
- package/src/store-paths.test.ts +395 -0
- package/src/store.test.ts +2483 -0
- package/src/store.ts +2813 -0
- package/test/eval-harness.ts +223 -0
- package/tsconfig.json +29 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
set shell := ["bash", "-uc"]
|
|
2
|
+
|
|
3
|
+
validate:
|
|
4
|
+
uv run dataset/validate_schema.py
|
|
5
|
+
uv run dataset/score_data.py
|
|
6
|
+
for f in data/*.jsonl; do \
|
|
7
|
+
uv run dataset/analyze_data.py --input "$f" --show-examples 0; \
|
|
8
|
+
done
|
|
9
|
+
|
|
10
|
+
score:
|
|
11
|
+
uv run dataset/score_data.py
|
|
12
|
+
|
|
13
|
+
schema:
|
|
14
|
+
uv run dataset/validate_schema.py
|
|
15
|
+
|
|
16
|
+
analyze:
|
|
17
|
+
for f in data/*.jsonl; do \
|
|
18
|
+
uv run dataset/analyze_data.py --input "$f" --show-examples 0; \
|
|
19
|
+
done
|
|
20
|
+
|
|
21
|
+
prepare:
|
|
22
|
+
QMD_BASE_MODEL=Qwen/Qwen3-1.7B uv run dataset/prepare_data.py --seed 42
|
|
23
|
+
|
|
24
|
+
train-local:
|
|
25
|
+
just prepare
|
|
26
|
+
HF_TOKEN=${HF_TOKEN} uv run torchrun --standalone --nproc_per_node auto \
|
|
27
|
+
train.py sft --config configs/sft_local.yaml |& tee /tmp/qmd-sft-train.log
|
|
28
|
+
|
|
29
|
+
grpo-local:
|
|
30
|
+
CUDA_VISIBLE_DEVICES=1,2,3 HF_TOKEN=${HF_TOKEN} uv run torchrun --standalone --nproc_per_node 3 \
|
|
31
|
+
train.py grpo --config configs/grpo.yaml |& tee /tmp/qmd-grpo-train.log
|
|
32
|
+
|
|
33
|
+
gepa-local:
|
|
34
|
+
UV_CACHE_DIR=/tmp/uv-cache LITELLM_CACHE_DIR=/tmp/litellm-cache OLLAMA_API_BASE=http://localhost:11434 \
|
|
35
|
+
uv run python gepa/dspy_gepa.py \
|
|
36
|
+
--input data/qmd_expansion_v2.jsonl \
|
|
37
|
+
--model ollama/glm-4.7-flash:Q8_0 \
|
|
38
|
+
--reflection-model ollama/glm-4.7-flash:Q8_0 \
|
|
39
|
+
--max-metric-calls 100 --limit 20 \
|
|
40
|
+
--valset data/qmd_expansion_handcrafted.jsonl --val-limit 20 \
|
|
41
|
+
--max-tokens 512 --reflection-max-tokens 512 \
|
|
42
|
+
--emit gepa/gepa_outputs_glm.jsonl \
|
|
43
|
+
--save-prompt gepa/best_prompt_glm.txt
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
FROM /home/tobi/src/github.com/tobi/qmd/finetune/outputs/sft/gguf/sft-q4_k_m.gguf
|
|
2
|
+
|
|
3
|
+
PARAMETER temperature 0.0
|
|
4
|
+
PARAMETER top_p 1.0
|
|
5
|
+
PARAMETER top_k 0
|
|
6
|
+
PARAMETER repeat_penalty 1.1
|
|
7
|
+
PARAMETER num_ctx 4096
|
|
8
|
+
|
|
9
|
+
TEMPLATE """<|im_start|>system
|
|
10
|
+
You are a helpful assistant.
|
|
11
|
+
<|im_end|>
|
|
12
|
+
<|im_start|>user
|
|
13
|
+
/no_think Expand this search query: {{ .Prompt }}
|
|
14
|
+
<|im_end|>
|
|
15
|
+
<|im_start|>assistant
|
|
16
|
+
"""
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
---
|
|
2
|
+
license: mit
|
|
3
|
+
language:
|
|
4
|
+
- en
|
|
5
|
+
base_model: Qwen/Qwen3-1.7B
|
|
6
|
+
tags:
|
|
7
|
+
- query-expansion
|
|
8
|
+
- search
|
|
9
|
+
- gguf
|
|
10
|
+
- qwen3
|
|
11
|
+
pipeline_tag: text-generation
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# QMD Query Expansion Fine-Tuning
|
|
15
|
+
|
|
16
|
+
Train small language models to expand search queries for [QMD](https://github.com/tobi/qmd)'s hybrid retrieval pipeline.
|
|
17
|
+
|
|
18
|
+
## What This Does
|
|
19
|
+
|
|
20
|
+
Given a raw search query like `"auth config"`, the trained model produces structured expansions:
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
hyde: Authentication can be configured by setting the AUTH_SECRET environment variable.
|
|
24
|
+
lex: authentication configuration
|
|
25
|
+
lex: auth settings setup
|
|
26
|
+
vec: how to configure authentication settings
|
|
27
|
+
vec: authentication configuration options
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
These feed into QMD's three search backends:
|
|
31
|
+
- **`lex:`** lines go to BM25 full-text search (short, keyword-focused)
|
|
32
|
+
- **`vec:`** lines go to vector similarity search (natural language phrases)
|
|
33
|
+
- **`hyde:`** is a hypothetical document passage for embedding-based retrieval ([HyDE](https://arxiv.org/abs/2212.10496) technique)
|
|
34
|
+
|
|
35
|
+
## Quick Start
|
|
36
|
+
|
|
37
|
+
### Cloud training via HuggingFace Jobs (no GPU needed)
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
# 1. SFT: teach the model the output format (~45 min on A10G, ~$1.50)
|
|
41
|
+
hf jobs uv run --flavor a10g-large --secrets HF_TOKEN --timeout 2h jobs/sft.py
|
|
42
|
+
|
|
43
|
+
# 2. GRPO: RL refinement on top of SFT (~20 min on A10G, ~$0.50)
|
|
44
|
+
hf jobs uv run --flavor a10g-large --secrets HF_TOKEN --timeout 4h jobs/grpo.py
|
|
45
|
+
|
|
46
|
+
# 3. Evaluate against test queries (needs local GPU or use eval job)
|
|
47
|
+
uv run eval.py --model tobil/qmd-query-expansion-1.7B-grpo \
|
|
48
|
+
--sft-model tobil/qmd-query-expansion-1.7B-sft
|
|
49
|
+
|
|
50
|
+
# 4. Convert to GGUF for local deployment (Ollama, llama.cpp)
|
|
51
|
+
uv run convert_gguf.py --size 1.7B
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Local training (if you have a GPU)
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
uv run train.py sft --config configs/sft.yaml
|
|
58
|
+
uv run train.py grpo --config configs/grpo.yaml
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Monitoring HF Jobs
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
hf jobs ps # list running jobs
|
|
65
|
+
hf jobs inspect <job-id> # check status
|
|
66
|
+
hf jobs logs <job-id> # stream logs
|
|
67
|
+
hf jobs cancel <job-id> # cancel a job
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Prompt Format
|
|
71
|
+
|
|
72
|
+
All tools use the same prompt — **Qwen3 chat template with `/no_think`**:
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
<|im_start|>user
|
|
76
|
+
/no_think Expand this search query: {query}<|im_end|>
|
|
77
|
+
<|im_start|>assistant
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
The `/no_think` directive suppresses Qwen3's chain-of-thought mode, producing
|
|
81
|
+
direct `lex:/vec:/hyde:` output without `<think>` blocks.
|
|
82
|
+
|
|
83
|
+
## File Structure
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
finetune/
|
|
87
|
+
├── reward.py # Scoring/reward function (single source of truth)
|
|
88
|
+
├── train.py # Unified SFT + GRPO training (two subcommands)
|
|
89
|
+
├── eval.py # Generate expansions and score them
|
|
90
|
+
├── convert_gguf.py # GGUF conversion for Ollama/llama.cpp
|
|
91
|
+
├── jobs/
|
|
92
|
+
│ ├── sft.py # Self-contained SFT for HuggingFace Jobs
|
|
93
|
+
│ ├── grpo.py # Self-contained GRPO for HuggingFace Jobs
|
|
94
|
+
│ ├── eval.py # Self-contained eval for HuggingFace Jobs
|
|
95
|
+
│ ├── eval_common.py # Shared eval utilities
|
|
96
|
+
│ └── quantize.py # GGUF quantization for HuggingFace Jobs
|
|
97
|
+
├── configs/
|
|
98
|
+
│ ├── sft.yaml # SFT hyperparameters for Qwen3-1.7B
|
|
99
|
+
│ └── grpo.yaml # GRPO hyperparameters for Qwen3-1.7B
|
|
100
|
+
├── evals/
|
|
101
|
+
│ └── queries.txt # 31 test queries across 8 categories
|
|
102
|
+
├── data/
|
|
103
|
+
│ └── qmd_expansion_v2.jsonl # Source training data (1,000 high-quality examples)
|
|
104
|
+
├── dataset/
|
|
105
|
+
│ ├── generate_data.py # Generate data via Claude API
|
|
106
|
+
│ ├── generate_data_offline.py # Generate from existing HF dataset
|
|
107
|
+
│ ├── prepare_data.py # Format for Qwen3 chat template
|
|
108
|
+
│ └── clean_data.py # Detect technical term misinterpretations
|
|
109
|
+
├── SCORING.md # Detailed scoring rubric reference
|
|
110
|
+
└── README.md # This file
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Training Pipeline
|
|
114
|
+
|
|
115
|
+
### Stage 1: SFT (Supervised Fine-Tuning)
|
|
116
|
+
|
|
117
|
+
Teaches the model the `lex:/vec:/hyde:` output format from labeled examples.
|
|
118
|
+
|
|
119
|
+
| Parameter | Value |
|
|
120
|
+
|-----------|-------|
|
|
121
|
+
| Base model | `Qwen/Qwen3-1.7B` |
|
|
122
|
+
| Method | LoRA (rank 16, alpha 32) |
|
|
123
|
+
| Target modules | All projection layers (q/k/v/o/gate/up/down) |
|
|
124
|
+
| Dataset | ~2,290 examples (train split) |
|
|
125
|
+
| Effective batch size | 16 (4 × 4 gradient accumulation) |
|
|
126
|
+
| Epochs | 5 |
|
|
127
|
+
| Learning rate | 2e-4 (cosine schedule) |
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
uv run train.py sft --config configs/sft.yaml
|
|
131
|
+
uv run train.py sft --config configs/sft.yaml --dry-run # preview config
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Stage 2: GRPO (Group Relative Policy Optimization)
|
|
135
|
+
|
|
136
|
+
Reinforcement learning on top of the merged SFT weights. The model generates
|
|
137
|
+
multiple expansions per query, they are scored by the reward function, and the
|
|
138
|
+
model is updated to prefer higher-scoring outputs.
|
|
139
|
+
|
|
140
|
+
| Parameter | Value |
|
|
141
|
+
|-----------|-------|
|
|
142
|
+
| Base | Merged SFT checkpoint |
|
|
143
|
+
| Method | LoRA (rank 4, alpha 8) — smaller for RL stability |
|
|
144
|
+
| Target modules | q_proj, v_proj only |
|
|
145
|
+
| Reward | `reward.py` (rule-based, 5 dimensions) |
|
|
146
|
+
| KL beta | 0.04 — prevents drift from SFT checkpoint |
|
|
147
|
+
| Generations per prompt | 4 |
|
|
148
|
+
| Max steps | 200 |
|
|
149
|
+
| Learning rate | 5e-7 |
|
|
150
|
+
|
|
151
|
+
**Important:** `beta > 0` is critical. With `beta=0` the model experiences
|
|
152
|
+
catastrophic drift and scores drop to 0%.
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
uv run train.py grpo --config configs/grpo.yaml
|
|
156
|
+
uv run train.py grpo --config configs/grpo.yaml --dry-run # test reward function
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Evaluation
|
|
160
|
+
|
|
161
|
+
`eval.py` generates expansions from a model and scores them against test queries:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
# Evaluate an SFT model
|
|
165
|
+
uv run eval.py --model tobil/qmd-query-expansion-1.7B-sft
|
|
166
|
+
|
|
167
|
+
# Evaluate a GRPO model (needs SFT adapter merged first)
|
|
168
|
+
uv run eval.py --model tobil/qmd-query-expansion-1.7B-grpo \
|
|
169
|
+
--sft-model tobil/qmd-query-expansion-1.7B-sft
|
|
170
|
+
|
|
171
|
+
# Verbose output with deduction details
|
|
172
|
+
uv run eval.py --model tobil/qmd-query-expansion-1.7B-sft -v
|
|
173
|
+
|
|
174
|
+
# Save detailed scores to JSON
|
|
175
|
+
uv run eval.py --model tobil/qmd-query-expansion-1.7B-sft -o scores.json
|
|
176
|
+
|
|
177
|
+
# Score an existing JSONL file (backwards compat with old run.py output)
|
|
178
|
+
uv run eval.py --score-only evals/results_old.jsonl
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Reward Function
|
|
182
|
+
|
|
183
|
+
`reward.py` is the single source of truth for scoring. It is used both as the
|
|
184
|
+
GRPO reward signal during training and for evaluation.
|
|
185
|
+
|
|
186
|
+
Five scoring dimensions (max 120 without hyde, 140 with):
|
|
187
|
+
|
|
188
|
+
| Dimension | Points | What It Measures |
|
|
189
|
+
|-----------|--------|------------------|
|
|
190
|
+
| **Format** | 0-30 | Has lex/vec lines, no invalid lines |
|
|
191
|
+
| **Diversity** | 0-30 | Multiple expansion types, diverse content, no query echoes |
|
|
192
|
+
| **HyDE** | 0-20 | Present, 50-200 chars, single line, not repetitive |
|
|
193
|
+
| **Quality** | 0-20 | Lex shorter than vec, natural language, preserves key terms |
|
|
194
|
+
| **Entity** | -45 to +20 | Named entities preserved in lex and vec lines |
|
|
195
|
+
| **Think bonus** | 0-20 | Reward for NOT using `<think>` mode |
|
|
196
|
+
|
|
197
|
+
**Hard failures** (instant 0.0):
|
|
198
|
+
- Chat template leakage (`<|im_start|>`, `<|im_end|>`, etc.)
|
|
199
|
+
- Any line without a valid `lex:`, `vec:`, or `hyde:` prefix
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
# Self-test the reward function
|
|
203
|
+
uv run reward.py
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## GGUF Conversion
|
|
207
|
+
|
|
208
|
+
Merges base + SFT + GRPO adapters into a single model and produces
|
|
209
|
+
quantized GGUF files for deployment:
|
|
210
|
+
|
|
211
|
+
```bash
|
|
212
|
+
# Use preset for 1.7B
|
|
213
|
+
uv run convert_gguf.py --size 1.7B
|
|
214
|
+
|
|
215
|
+
# Use preset for 4B
|
|
216
|
+
uv run convert_gguf.py --size 4B
|
|
217
|
+
|
|
218
|
+
# Custom models
|
|
219
|
+
uv run convert_gguf.py --base Qwen/Qwen3-1.7B \
|
|
220
|
+
--sft tobil/qmd-query-expansion-1.7B-sft \
|
|
221
|
+
--grpo tobil/qmd-query-expansion-1.7B-grpo \
|
|
222
|
+
--output tobil/qmd-query-expansion-1.7B-gguf
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Using with Ollama
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
huggingface-cli download tobil/qmd-query-expansion-1.7B-gguf \
|
|
229
|
+
qmd-query-expansion-1.7B-q4_k_m.gguf --local-dir .
|
|
230
|
+
|
|
231
|
+
echo 'FROM ./qmd-query-expansion-1.7B-q4_k_m.gguf' > Modelfile
|
|
232
|
+
ollama create qmd-expand -f Modelfile
|
|
233
|
+
ollama run qmd-expand
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## Data Pipeline
|
|
237
|
+
|
|
238
|
+
The training data (1,000 examples in `data/qmd_expansion_v2.jsonl`) was generated
|
|
239
|
+
from two sources and cleaned for quality. To regenerate:
|
|
240
|
+
|
|
241
|
+
```bash
|
|
242
|
+
# Generate from existing HuggingFace dataset (bulk, no API needed)
|
|
243
|
+
uv run dataset/generate_data_offline.py
|
|
244
|
+
|
|
245
|
+
# Generate via Claude API (higher quality, needs ANTHROPIC_API_KEY)
|
|
246
|
+
uv run dataset/generate_data.py --count 100
|
|
247
|
+
|
|
248
|
+
# Detect and fix technical term misinterpretations
|
|
249
|
+
uv run dataset/clean_data.py
|
|
250
|
+
|
|
251
|
+
# Format for Qwen3 chat template, add short-query augmentation, split train/val
|
|
252
|
+
uv run dataset/prepare_data.py
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
## Architecture Notes
|
|
256
|
+
|
|
257
|
+
The two-stage training approach (SFT → GRPO) is standard for structured-output models:
|
|
258
|
+
|
|
259
|
+
1. **SFT** establishes format compliance and basic query understanding. It uses
|
|
260
|
+
a large LoRA (rank 16, all projection layers) because it needs to learn a
|
|
261
|
+
new output format from scratch.
|
|
262
|
+
|
|
263
|
+
2. **GRPO** refines quality within the learned format. It uses a small LoRA
|
|
264
|
+
(rank 4, q/v only) and KL regularization to make incremental improvements
|
|
265
|
+
without losing what SFT taught.
|
|
266
|
+
|
|
267
|
+
The reward function is entirely rule-based (no LLM judge) which makes it fast,
|
|
268
|
+
deterministic, and suitable as an RL signal. See `SCORING.md` for the full rubric.
|
|
269
|
+
|
|
270
|
+
## Training Results (Qwen3-1.7B, v2)
|
|
271
|
+
|
|
272
|
+
### SFT
|
|
273
|
+
|
|
274
|
+
| Metric | Value |
|
|
275
|
+
|--------|-------|
|
|
276
|
+
| Final train loss | 0.472 |
|
|
277
|
+
| Final eval loss | 0.304 |
|
|
278
|
+
| Token accuracy (train) | 97.4% |
|
|
279
|
+
| Token accuracy (eval) | 93.8% |
|
|
280
|
+
| Epochs | 5 |
|
|
281
|
+
| Hardware | A10G (24 GB VRAM) |
|
|
282
|
+
|
|
283
|
+
### GRPO
|
|
284
|
+
|
|
285
|
+
| Metric | Value |
|
|
286
|
+
|--------|-------|
|
|
287
|
+
| Mean reward | 0.757 |
|
|
288
|
+
| Final loss | 0.0005 |
|
|
289
|
+
| KL divergence | 0.00048 |
|
|
290
|
+
| Mean completion length | ~58 tokens |
|
|
291
|
+
| Training time | ~19 min (200 steps) |
|
|
292
|
+
| Hardware | A10G (24 GB VRAM) |
|
|
293
|
+
|
|
294
|
+
### Evaluation Scores
|
|
295
|
+
|
|
296
|
+
| Model | Average Score | Excellent (30) |
|
|
297
|
+
|-------|--------------|-----------------|
|
|
298
|
+
| SFT | 92.0% | 30/30 |
|
|
299
|
+
| GRPO | 91.7% | 30/30 |
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
# QMD Query Expansion Scoring
|
|
2
|
+
|
|
3
|
+
## Goal
|
|
4
|
+
|
|
5
|
+
Transform a random typed query into a great set of retrieval-optimized expansions.
|
|
6
|
+
|
|
7
|
+
**Input:** `"auth config"`
|
|
8
|
+
**Output:**
|
|
9
|
+
```
|
|
10
|
+
hyde: Authentication can be configured by setting the AUTH_SECRET environment variable and enabling the auth middleware in your application's config file.
|
|
11
|
+
lex: authentication configuration
|
|
12
|
+
lex: auth settings setup
|
|
13
|
+
vec: how to configure authentication settings
|
|
14
|
+
vec: authentication configuration options
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Output Format
|
|
18
|
+
|
|
19
|
+
| Prefix | Purpose | Required | Count |
|
|
20
|
+
|--------|---------|----------|-------|
|
|
21
|
+
| `lex:` | BM25 keyword variations (shorter, keyword-focused) | Yes | 1-3 |
|
|
22
|
+
| `vec:` | Semantic reformulations (natural language) | Yes | 1-3 |
|
|
23
|
+
| `hyde:` | Hypothetical document passage | Optional | 0-1 |
|
|
24
|
+
|
|
25
|
+
## Scoring Criteria
|
|
26
|
+
|
|
27
|
+
### 1. Format Compliance (0-30 points)
|
|
28
|
+
|
|
29
|
+
| Criterion | Points | Deduction |
|
|
30
|
+
|-----------|--------|-----------|
|
|
31
|
+
| Has at least one `lex:` line | +10 | -10 if missing |
|
|
32
|
+
| Has at least one `vec:` line | +10 | -10 if missing |
|
|
33
|
+
| All lines have valid prefix (`lex:`, `vec:`, `hyde:`) | +10 | -5 per invalid line |
|
|
34
|
+
| No garbage/prose outside of prefixed lines | - | -10 if present |
|
|
35
|
+
|
|
36
|
+
### 2. Diversity & Coverage (0-30 points)
|
|
37
|
+
|
|
38
|
+
| Criterion | Points | Deduction |
|
|
39
|
+
|-----------|--------|-----------|
|
|
40
|
+
| 2+ different types present (lex + vec) | +10 | -10 if only one type |
|
|
41
|
+
| 2+ total expansions | +5 | -5 if only one |
|
|
42
|
+
| Multiple lex: lines are diverse (edit distance > 3) | +5 | -2 per duplicate pair |
|
|
43
|
+
| Multiple vec: lines are diverse (edit distance > 5) | +5 | -2 per duplicate pair |
|
|
44
|
+
| lex/vec not identical to original query | +5 | -5 per line that equals query |
|
|
45
|
+
|
|
46
|
+
### 3. Hyde Quality (0-20 points, optional bonus)
|
|
47
|
+
|
|
48
|
+
| Criterion | Points | Deduction |
|
|
49
|
+
|-----------|--------|-----------|
|
|
50
|
+
| Hyde present and well-formed | +5 | - |
|
|
51
|
+
| Hyde is concise (50-200 chars) | +5 | -3 if too short, -5 if too long |
|
|
52
|
+
| Hyde has no newlines | +5 | -5 if contains newlines |
|
|
53
|
+
| Hyde has no excessive repetition | +5 | -3 if word repeats 3+ times |
|
|
54
|
+
|
|
55
|
+
### 4. Content Quality (0-20 points)
|
|
56
|
+
|
|
57
|
+
| Criterion | Points | Deduction |
|
|
58
|
+
|-----------|--------|-----------|
|
|
59
|
+
| Base relevance | +5 | Subjective |
|
|
60
|
+
| Lex lines preserve key terms from query | +5 | -5 if lex is generic |
|
|
61
|
+
| Lex lines are keyword-focused (shorter) | +5 | -2 if lex is longer than vec |
|
|
62
|
+
| Vec lines are natural language (complete phrases) | +5 | -2 if vec is just keywords |
|
|
63
|
+
|
|
64
|
+
### 5. Named Entity Preservation (0-20 points, CRITICAL)
|
|
65
|
+
|
|
66
|
+
Named entities are proper nouns, brand names, technical terms, and acronyms that MUST appear in lex queries. This prevents generic expansions that lose the specific topic.
|
|
67
|
+
|
|
68
|
+
| Criterion | Points | Deduction |
|
|
69
|
+
|-----------|--------|-----------|
|
|
70
|
+
| All lex lines contain at least one entity | +15 | - |
|
|
71
|
+
| Some lex lines contain entities | +5 | - |
|
|
72
|
+
| NO lex lines contain entities | - | **-30 HEAVY PENALTY** |
|
|
73
|
+
| Generic filler phrases in lex | - | -15 per phrase |
|
|
74
|
+
| Entities also in vec lines | +5 | - |
|
|
75
|
+
|
|
76
|
+
**Named Entity Detection:**
|
|
77
|
+
- All-caps acronyms: `TDS`, `API`, `GPU`, `AWS`
|
|
78
|
+
- Capitalized proper nouns: `React`, `Docker`, `Kubernetes`
|
|
79
|
+
- Technical terms: `node.js`, `C++`, `.NET`
|
|
80
|
+
- CamelCase: `JavaScript`, `TypeScript`
|
|
81
|
+
- Compound names: `TDS motorsports` → both words are entities
|
|
82
|
+
|
|
83
|
+
**Generic Filler Phrases (BANNED in lex):**
|
|
84
|
+
- "find information about"
|
|
85
|
+
- "search for", "look up"
|
|
86
|
+
- "get information", "learn about"
|
|
87
|
+
- "details about", "guide to"
|
|
88
|
+
|
|
89
|
+
**Examples:**
|
|
90
|
+
|
|
91
|
+
| Query | Bad Lex (Score: 0.30) | Good Lex (Score: 1.00) |
|
|
92
|
+
|-------|----------------------|------------------------|
|
|
93
|
+
| `who is TDS motorsports` | `lex: find information about` | `lex: TDS motorsports history` |
|
|
94
|
+
| | `lex: company details` | `lex: TDS motorsports founders` |
|
|
95
|
+
| `how to use React hooks` | `lex: programming tutorial` | `lex: React hooks tutorial` |
|
|
96
|
+
| | `lex: how to code` | `lex: useEffect useState hooks` |
|
|
97
|
+
|
|
98
|
+
**Key Rule**: If a query mentions a specific entity (brand, product, technology), EVERY lex line should include that entity or a direct variation of it.
|
|
99
|
+
|
|
100
|
+
## Score Calculation
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
Total Score = Format + Diversity + Hyde + Quality + Entity
|
|
104
|
+
Max Score = 120 (100 without hyde)
|
|
105
|
+
Normalized = Total / Max (0.0 - 1.0)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
**Rating:**
|
|
109
|
+
- 0.80-1.00: Excellent
|
|
110
|
+
- 0.60-0.79: Good
|
|
111
|
+
- 0.40-0.59: Acceptable
|
|
112
|
+
- 0.20-0.39: Poor
|
|
113
|
+
- 0.00-0.19: Failed
|
|
114
|
+
|
|
115
|
+
**Note:** Entity score can go negative, heavily penalizing outputs that miss named entities.
|
|
116
|
+
|
|
117
|
+
## Examples
|
|
118
|
+
|
|
119
|
+
### Good Example (Score: 85/100)
|
|
120
|
+
|
|
121
|
+
**Query:** `"react hooks"`
|
|
122
|
+
|
|
123
|
+
**Output:**
|
|
124
|
+
```
|
|
125
|
+
hyde: React Hooks allow you to use state and lifecycle features in functional components without writing a class.
|
|
126
|
+
lex: react hooks tutorial
|
|
127
|
+
lex: usestate useeffect
|
|
128
|
+
vec: how to use react hooks in functional components
|
|
129
|
+
vec: react hooks best practices guide
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
**Scoring:**
|
|
133
|
+
- Format: 30/30 (all prefixes valid)
|
|
134
|
+
- Diversity: 28/30 (2 types, 4 expansions, good diversity)
|
|
135
|
+
- Hyde: 17/20 (present, good length, no repetition)
|
|
136
|
+
- Quality: 10/20 (relevant, good structure)
|
|
137
|
+
|
|
138
|
+
### Bad Example (Score: 15/100)
|
|
139
|
+
|
|
140
|
+
**Query:** `"auth"`
|
|
141
|
+
|
|
142
|
+
**Output:**
|
|
143
|
+
```
|
|
144
|
+
auth is an important concept that relates to authentication.
|
|
145
|
+
The answer should be in Chinese.
|
|
146
|
+
The answer should be in Chinese.
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
**Scoring:**
|
|
150
|
+
- Format: 0/30 (no valid prefixes)
|
|
151
|
+
- Diversity: 0/30 (no valid expansions)
|
|
152
|
+
- Hyde: 0/20 (N/A)
|
|
153
|
+
- Quality: 15/20 (some relevance but wrong format)
|
|
154
|
+
|
|
155
|
+
### Medium Example (Score: 55/100)
|
|
156
|
+
|
|
157
|
+
**Query:** `"docker networking"`
|
|
158
|
+
|
|
159
|
+
**Output:**
|
|
160
|
+
```
|
|
161
|
+
hyde: Docker networking is an important concept. Docker networking is used for container communication. Docker networking configuration is essential.
|
|
162
|
+
lex: docker networking
|
|
163
|
+
vec: docker networking
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Scoring:**
|
|
167
|
+
- Format: 30/30 (valid prefixes)
|
|
168
|
+
- Diversity: 10/30 (lex=vec=query, no diversity)
|
|
169
|
+
- Hyde: 5/20 (too repetitive - "docker networking" 3x)
|
|
170
|
+
- Quality: 10/20 (relevant but low effort)
|
|
171
|
+
|
|
172
|
+
## Heuristics
|
|
173
|
+
|
|
174
|
+
### Repetition Detection
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
def word_repetition_score(text):
|
|
178
|
+
words = text.lower().split()
|
|
179
|
+
counts = Counter(words)
|
|
180
|
+
# Deduct for words appearing 3+ times (excluding stopwords)
|
|
181
|
+
stopwords = {'the', 'a', 'an', 'is', 'are', 'to', 'for', 'of', 'in', 'and', 'or'}
|
|
182
|
+
repeated = sum(1 for w, c in counts.items() if c >= 3 and w not in stopwords)
|
|
183
|
+
return max(0, 5 - repeated * 2)
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Diversity Check (Simple)
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
def is_diverse(a, b, min_distance=3):
|
|
190
|
+
"""Check if two strings are sufficiently different."""
|
|
191
|
+
a, b = a.lower().strip(), b.lower().strip()
|
|
192
|
+
if a == b:
|
|
193
|
+
return False
|
|
194
|
+
# Simple: check if one is not a substring of the other
|
|
195
|
+
if a in b or b in a:
|
|
196
|
+
return False
|
|
197
|
+
# Check edit distance (simplified)
|
|
198
|
+
return len(set(a.split()) ^ set(b.split())) >= min_distance
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### Query Echo Detection
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
def echoes_query(expansion, query):
|
|
205
|
+
"""Check if expansion is just echoing the query."""
|
|
206
|
+
exp = expansion.lower().strip()
|
|
207
|
+
q = query.lower().strip()
|
|
208
|
+
return exp == q or exp in q or q in exp
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### Named Entity Extraction
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
KEY_TERM_STOPWORDS = {'what', 'is', 'how', 'to', 'the', 'a', 'an', 'in', 'on', 'for', 'of',
|
|
215
|
+
'and', 'or', 'with', 'my', 'your', 'do', 'does', 'can', 'i', 'me', 'we',
|
|
216
|
+
'who', 'where', 'when', 'why', 'which', 'find', 'get', 'show', 'tell'}
|
|
217
|
+
|
|
218
|
+
def extract_named_entities(query: str) -> set:
|
|
219
|
+
"""Extract named entities using simple heuristics."""
|
|
220
|
+
entities = set()
|
|
221
|
+
words = query.split()
|
|
222
|
+
prev_was_entity = False
|
|
223
|
+
|
|
224
|
+
for i, word in enumerate(words):
|
|
225
|
+
clean = word.strip('.,!?:;()[]"\'')
|
|
226
|
+
if not clean:
|
|
227
|
+
prev_was_entity = False
|
|
228
|
+
continue
|
|
229
|
+
|
|
230
|
+
is_entity = False
|
|
231
|
+
|
|
232
|
+
# All-caps acronyms: TDS, API, GPU
|
|
233
|
+
if clean.isupper() and len(clean) >= 2:
|
|
234
|
+
entities.add(clean.lower())
|
|
235
|
+
is_entity = True
|
|
236
|
+
# Capitalized proper nouns (not first word)
|
|
237
|
+
elif i > 0 and clean[0].isupper() and clean.lower() not in KEY_TERM_STOPWORDS:
|
|
238
|
+
entities.add(clean.lower())
|
|
239
|
+
is_entity = True
|
|
240
|
+
# Technical terms: node.js, C++
|
|
241
|
+
elif any(c in clean for c in '.+-#@') and len(clean) >= 2:
|
|
242
|
+
entities.add(clean.lower())
|
|
243
|
+
is_entity = True
|
|
244
|
+
# CamelCase: JavaScript
|
|
245
|
+
elif len(clean) > 1 and any(c.isupper() for c in clean[1:]) and clean[0].isupper():
|
|
246
|
+
entities.add(clean.lower())
|
|
247
|
+
is_entity = True
|
|
248
|
+
# Word following an entity (compound names: TDS motorsports)
|
|
249
|
+
elif prev_was_entity and clean.lower() not in KEY_TERM_STOPWORDS:
|
|
250
|
+
entities.add(clean.lower())
|
|
251
|
+
is_entity = True
|
|
252
|
+
|
|
253
|
+
prev_was_entity = is_entity
|
|
254
|
+
|
|
255
|
+
return entities
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
### Generic Phrase Detection
|
|
259
|
+
|
|
260
|
+
```python
|
|
261
|
+
GENERIC_LEX_PHRASES = {
|
|
262
|
+
'find information about', 'search for', 'look up', 'get information',
|
|
263
|
+
'learn about', 'information on', 'details about', 'find out about',
|
|
264
|
+
'what is', 'how to', 'guide to', 'help with'
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
def lex_is_generic(lex_line: str) -> bool:
|
|
268
|
+
"""Check if lex line is a useless generic filler."""
|
|
269
|
+
lex_lower = lex_line.lower().strip()
|
|
270
|
+
for phrase in GENERIC_LEX_PHRASES:
|
|
271
|
+
if phrase in lex_lower:
|
|
272
|
+
# Check if there's specific content beyond the generic phrase
|
|
273
|
+
remaining = lex_lower
|
|
274
|
+
for word in phrase.split():
|
|
275
|
+
remaining = remaining.replace(word, '', 1).strip()
|
|
276
|
+
if len(remaining) < 3: # Nothing specific left
|
|
277
|
+
return True
|
|
278
|
+
return False
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## Training Data Requirements
|
|
282
|
+
|
|
283
|
+
1. **EOM tokens**: Ensure training examples end with proper end-of-message tokens
|
|
284
|
+
2. **Diverse examples**: Include varied query types (short, long, technical, casual)
|
|
285
|
+
3. **Quality hyde**: Hyde passages should be informative, not template-y
|
|
286
|
+
4. **No repetition**: Avoid "This is important. This is very important." patterns
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
compute_environment: LOCAL_MACHINE
|
|
2
|
+
debug: false
|
|
3
|
+
distributed_type: MULTI_GPU
|
|
4
|
+
downcast_bf16: 'no'
|
|
5
|
+
enable_cpu_affinity: false
|
|
6
|
+
gpu_ids: all
|
|
7
|
+
machine_rank: 0
|
|
8
|
+
main_training_function: main
|
|
9
|
+
mixed_precision: bf16
|
|
10
|
+
num_machines: 1
|
|
11
|
+
num_processes: 4
|
|
12
|
+
rdzv_backend: static
|
|
13
|
+
same_network: true
|
|
14
|
+
tpu_env: []
|
|
15
|
+
tpu_use_cluster: false
|
|
16
|
+
tpu_use_sudo: false
|
|
17
|
+
use_cpu: false
|