quickdistill 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quickdistill/__init__.py +1 -1
- quickdistill/__pycache__/__init__.cpython-310.pyc +0 -0
- quickdistill/__pycache__/server.cpython-310.pyc +0 -0
- quickdistill/server.py +330 -14
- quickdistill/static/judge_manager.html +183 -16
- quickdistill/static/trace_viewer.html +787 -13
- {quickdistill-0.1.5.dist-info → quickdistill-0.1.7.dist-info}/METADATA +1 -1
- quickdistill-0.1.7.dist-info/RECORD +17 -0
- quickdistill-0.1.5.dist-info/RECORD +0 -17
- {quickdistill-0.1.5.dist-info → quickdistill-0.1.7.dist-info}/WHEEL +0 -0
- {quickdistill-0.1.5.dist-info → quickdistill-0.1.7.dist-info}/entry_points.txt +0 -0
- {quickdistill-0.1.5.dist-info → quickdistill-0.1.7.dist-info}/top_level.txt +0 -0
|
@@ -162,6 +162,13 @@
|
|
|
162
162
|
<div class="container">
|
|
163
163
|
<h1>Judge Manager</h1>
|
|
164
164
|
|
|
165
|
+
<!-- Prompt Generator Button -->
|
|
166
|
+
<div style="margin-bottom: 20px; display: none;">
|
|
167
|
+
<button onclick="openPromptGenerator()" style="padding: 10px 20px; background: #6a4a7e; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 14px;">
|
|
168
|
+
✨ Generate Judge Prompt with AI
|
|
169
|
+
</button>
|
|
170
|
+
</div>
|
|
171
|
+
|
|
165
172
|
<!-- Create/Edit Judge Section -->
|
|
166
173
|
<div class="section">
|
|
167
174
|
<h2 id="form-title">Create New Judge</h2>
|
|
@@ -172,7 +179,6 @@
|
|
|
172
179
|
<label for="judge-type">Judge Type</label>
|
|
173
180
|
<select id="judge-type">
|
|
174
181
|
<option value="llm">LLM-as-a-Judge</option>
|
|
175
|
-
<option value="custom">Custom Function</option>
|
|
176
182
|
</select>
|
|
177
183
|
|
|
178
184
|
<div id="llm-options" style="display: block;">
|
|
@@ -204,13 +210,6 @@
|
|
|
204
210
|
<textarea id="judge-prompt"></textarea>
|
|
205
211
|
</div>
|
|
206
212
|
|
|
207
|
-
<div id="custom-options" style="display: none;">
|
|
208
|
-
<label for="custom-function">Custom Function (Python)</label>
|
|
209
|
-
<textarea id="custom-function" placeholder="def custom_judge(strong_output: str, weak_output: str) -> dict:
|
|
210
|
-
# Your custom logic here
|
|
211
|
-
return {'similarity': 1.0 if strong_output == weak_output else 0.0}"></textarea>
|
|
212
|
-
</div>
|
|
213
|
-
|
|
214
213
|
<button onclick="saveJudge()" id="save-btn">Save Judge</button>
|
|
215
214
|
<button onclick="cancelEdit()" id="cancel-btn" style="display: none; background: #5a2a2a; margin-left: 10px;">Cancel</button>
|
|
216
215
|
</div>
|
|
@@ -225,6 +224,73 @@
|
|
|
225
224
|
</div>
|
|
226
225
|
</div>
|
|
227
226
|
|
|
227
|
+
<!-- Prompt Generator Panel -->
|
|
228
|
+
<div id="prompt-generator-panel" style="display: none; position: fixed; top: 0; left: 0; width: 100%; height: 100%; background: rgba(0,0,0,0.9); z-index: 1000; padding: 40px; overflow-y: auto;">
|
|
229
|
+
<div style="max-width: 1200px; margin: 0 auto; background: #1a1a1a; border-radius: 8px; padding: 30px; border: 1px solid #3a2a4a;">
|
|
230
|
+
<h2 style="color: #fff; margin-bottom: 10px;">AI-Powered Judge Prompt Generator</h2>
|
|
231
|
+
<p style="color: #888; font-size: 13px; margin-bottom: 25px;">
|
|
232
|
+
Generate specialized judge prompts by showing sample data to an AI model
|
|
233
|
+
</p>
|
|
234
|
+
|
|
235
|
+
<!-- Configuration -->
|
|
236
|
+
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin-bottom: 20px;">
|
|
237
|
+
<div>
|
|
238
|
+
<label style="display: block; color: #aaa; margin-bottom: 8px; font-size: 14px;">Weak Model Dataset:</label>
|
|
239
|
+
<select id="gen-weak-model-select" style="width: 100%; padding: 10px; background: #2a2a2a; color: #fff; border: 1px solid #3a3a3a; border-radius: 4px; font-size: 14px;">
|
|
240
|
+
<option value="">Loading weak model files...</option>
|
|
241
|
+
</select>
|
|
242
|
+
</div>
|
|
243
|
+
|
|
244
|
+
<div>
|
|
245
|
+
<label style="display: block; color: #aaa; margin-bottom: 8px; font-size: 14px;">Number of Samples:</label>
|
|
246
|
+
<input type="number" id="gen-num-samples" value="3" min="1" max="10"
|
|
247
|
+
style="width: 100%; padding: 10px; background: #2a2a2a; color: #fff; border: 1px solid #3a3a3a; border-radius: 4px; font-size: 14px;">
|
|
248
|
+
<div style="color: #666; font-size: 12px; margin-top: 5px;">Max: 10 (for context limits)</div>
|
|
249
|
+
</div>
|
|
250
|
+
</div>
|
|
251
|
+
|
|
252
|
+
<div style="margin-bottom: 20px;">
|
|
253
|
+
<label style="display: block; color: #aaa; margin-bottom: 8px; font-size: 14px;">Generation Model:</label>
|
|
254
|
+
<input type="text" id="gen-model" value="openai/gpt-5"
|
|
255
|
+
style="width: 100%; padding: 10px; background: #2a2a2a; color: #fff; border: 1px solid #3a3a3a; border-radius: 4px; font-size: 14px;"
|
|
256
|
+
placeholder="e.g., openai/gpt-5, anthropic/claude-3.5-sonnet">
|
|
257
|
+
<div style="color: #666; font-size: 12px; margin-top: 5px;">OpenRouter model to use for generating the prompt</div>
|
|
258
|
+
</div>
|
|
259
|
+
|
|
260
|
+
<!-- Meta-Prompt -->
|
|
261
|
+
<div style="margin-bottom: 25px;">
|
|
262
|
+
<label style="display: block; color: #aaa; margin-bottom: 8px; font-size: 14px;">Meta-Prompt (edit as needed):</label>
|
|
263
|
+
<textarea id="gen-meta-prompt"
|
|
264
|
+
style="width: 100%; min-height: 250px; padding: 10px; background: #2a2a2a; color: #fff; border: 1px solid #3a3a3a; border-radius: 4px; font-size: 13px; font-family: 'Courier New', monospace; resize: vertical;"></textarea>
|
|
265
|
+
<div style="color: #666; font-size: 12px; margin-top: 5px;">
|
|
266
|
+
This prompt will be sent to the generation model along with sample data
|
|
267
|
+
</div>
|
|
268
|
+
</div>
|
|
269
|
+
|
|
270
|
+
<!-- Actions -->
|
|
271
|
+
<div style="display: flex; gap: 10px; margin-bottom: 25px;">
|
|
272
|
+
<button onclick="generatePrompt()" style="padding: 10px 20px; background: #6a4a7e; color: white; border: none; border-radius: 4px; cursor: pointer; font-weight: 500;">
|
|
273
|
+
Generate Prompt
|
|
274
|
+
</button>
|
|
275
|
+
<button onclick="closePromptGenerator()" style="padding: 10px 20px; background: #5a2a2a; color: white; border: none; border-radius: 4px; cursor: pointer;">
|
|
276
|
+
Close
|
|
277
|
+
</button>
|
|
278
|
+
</div>
|
|
279
|
+
|
|
280
|
+
<!-- Generated Output -->
|
|
281
|
+
<div id="gen-output-section" style="display: none;">
|
|
282
|
+
<h3 style="color: #4a9eff; margin-bottom: 15px;">Generated Judge Prompt</h3>
|
|
283
|
+
<textarea id="gen-output" readonly
|
|
284
|
+
style="width: 100%; min-height: 300px; padding: 15px; background: #0f0f0f; color: #4a9eff; border: 1px solid #4a9eff; border-radius: 4px; font-size: 13px; font-family: 'Courier New', monospace; resize: vertical;"></textarea>
|
|
285
|
+
<div style="margin-top: 10px;">
|
|
286
|
+
<button onclick="copyGeneratedPrompt()" style="padding: 8px 16px; background: #2a7c4a; color: white; border: none; border-radius: 4px; cursor: pointer;">
|
|
287
|
+
Copy to Clipboard
|
|
288
|
+
</button>
|
|
289
|
+
</div>
|
|
290
|
+
</div>
|
|
291
|
+
</div>
|
|
292
|
+
</div>
|
|
293
|
+
|
|
228
294
|
<script>
|
|
229
295
|
let judges = [];
|
|
230
296
|
let editingIndex = null;
|
|
@@ -340,8 +406,6 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
|
|
|
340
406
|
alert('Error: Judge prompt must include {weak_output} placeholder');
|
|
341
407
|
return;
|
|
342
408
|
}
|
|
343
|
-
} else {
|
|
344
|
-
judge.customFunction = document.getElementById('custom-function').value.trim();
|
|
345
409
|
}
|
|
346
410
|
|
|
347
411
|
const success = await saveJudgeToServer(judge);
|
|
@@ -358,7 +422,6 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
|
|
|
358
422
|
document.getElementById('judge-type').value = 'llm';
|
|
359
423
|
document.getElementById('judge-model').value = 'gpt-5-2025-08-07';
|
|
360
424
|
document.getElementById('judge-prompt').value = '';
|
|
361
|
-
document.getElementById('custom-function').value = '';
|
|
362
425
|
document.getElementById('form-title').textContent = 'Create New Judge';
|
|
363
426
|
document.getElementById('save-btn').textContent = 'Save Judge';
|
|
364
427
|
document.getElementById('cancel-btn').style.display = 'none';
|
|
@@ -383,8 +446,6 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
|
|
|
383
446
|
document.getElementById('judge-model').value = judge.model;
|
|
384
447
|
document.getElementById('judge-return-type').value = judge.returnType || 'scalar';
|
|
385
448
|
document.getElementById('judge-prompt').value = judge.prompt || '';
|
|
386
|
-
} else {
|
|
387
|
-
document.getElementById('custom-function').value = judge.customFunction || '';
|
|
388
449
|
}
|
|
389
450
|
|
|
390
451
|
document.getElementById('form-title').textContent = 'Edit Judge';
|
|
@@ -437,9 +498,8 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
|
|
|
437
498
|
|
|
438
499
|
// Toggle judge type options
|
|
439
500
|
function toggleJudgeType() {
|
|
440
|
-
|
|
441
|
-
document.getElementById('llm-options').style.display =
|
|
442
|
-
document.getElementById('custom-options').style.display = type === 'custom' ? 'block' : 'none';
|
|
501
|
+
// Only LLM type is supported now
|
|
502
|
+
document.getElementById('llm-options').style.display = 'block';
|
|
443
503
|
}
|
|
444
504
|
|
|
445
505
|
document.getElementById('judge-type').addEventListener('change', toggleJudgeType);
|
|
@@ -503,6 +563,113 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
|
|
|
503
563
|
console.log('Not changing prompt - user has edited it');
|
|
504
564
|
}
|
|
505
565
|
});
|
|
566
|
+
|
|
567
|
+
// === PROMPT GENERATOR ===
|
|
568
|
+
|
|
569
|
+
const DEFAULT_META_PROMPT = `You are an expert at creating evaluation prompts for judging AI model outputs. I'm building a specialized judge prompt to evaluate the quality/similarity of weak model outputs compared to strong reference model outputs.
|
|
570
|
+
|
|
571
|
+
I will show you some sample data below. Each sample contains:
|
|
572
|
+
- A question/input
|
|
573
|
+
- The strong reference model's output (ground truth)
|
|
574
|
+
- The weak model's output (what we're evaluating)
|
|
575
|
+
|
|
576
|
+
Your task: Create a specialized, detailed judge prompt that can be used to systematically evaluate the delta/difference between these outputs. The prompt should:
|
|
577
|
+
1. Be specific to the patterns you see in this data
|
|
578
|
+
2. Include clear evaluation criteria
|
|
579
|
+
3. Be written in second-person ("You are...")
|
|
580
|
+
4. Include the placeholders {question}, {strong_output}, and {weak_output}
|
|
581
|
+
5. Specify the exact JSON format to return (either {'score': number} for scalar or {'correct': boolean} for boolean)
|
|
582
|
+
|
|
583
|
+
Sample Data:
|
|
584
|
+
{SAMPLES}
|
|
585
|
+
|
|
586
|
+
Based on these samples, create a specialized judge prompt that would effectively evaluate this type of data. Return ONLY the judge prompt text, nothing else.`;
|
|
587
|
+
|
|
588
|
+
async function openPromptGenerator() {
|
|
589
|
+
// Load weak model files
|
|
590
|
+
try {
|
|
591
|
+
const response = await fetch('/list_weak_models');
|
|
592
|
+
const data = await response.json();
|
|
593
|
+
const select = document.getElementById('gen-weak-model-select');
|
|
594
|
+
|
|
595
|
+
if (data.files && data.files.length > 0) {
|
|
596
|
+
select.innerHTML = data.files.map(f =>
|
|
597
|
+
`<option value="${f.filename}">${f.weak_model || f.filename}</option>`
|
|
598
|
+
).join('');
|
|
599
|
+
} else {
|
|
600
|
+
select.innerHTML = '<option value="">No weak model files available</option>';
|
|
601
|
+
}
|
|
602
|
+
} catch (error) {
|
|
603
|
+
console.error('Error loading weak models:', error);
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
// Set default meta-prompt
|
|
607
|
+
document.getElementById('gen-meta-prompt').value = DEFAULT_META_PROMPT;
|
|
608
|
+
|
|
609
|
+
// Show panel
|
|
610
|
+
document.getElementById('prompt-generator-panel').style.display = 'block';
|
|
611
|
+
document.getElementById('gen-output-section').style.display = 'none';
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
function closePromptGenerator() {
|
|
615
|
+
document.getElementById('prompt-generator-panel').style.display = 'none';
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
async function generatePrompt() {
|
|
619
|
+
const weakModelFile = document.getElementById('gen-weak-model-select').value;
|
|
620
|
+
const numSamples = parseInt(document.getElementById('gen-num-samples').value) || 3;
|
|
621
|
+
const model = document.getElementById('gen-model').value.trim();
|
|
622
|
+
const metaPrompt = document.getElementById('gen-meta-prompt').value.trim();
|
|
623
|
+
|
|
624
|
+
if (!weakModelFile) {
|
|
625
|
+
alert('Please select a weak model dataset');
|
|
626
|
+
return;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
if (!model) {
|
|
630
|
+
alert('Please enter a generation model');
|
|
631
|
+
return;
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
if (!metaPrompt) {
|
|
635
|
+
alert('Please enter a meta-prompt');
|
|
636
|
+
return;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
try {
|
|
640
|
+
const response = await fetch('/generate_judge_prompt', {
|
|
641
|
+
method: 'POST',
|
|
642
|
+
headers: { 'Content-Type': 'application/json' },
|
|
643
|
+
body: JSON.stringify({
|
|
644
|
+
weak_model_file: weakModelFile,
|
|
645
|
+
num_samples: numSamples,
|
|
646
|
+
model: model,
|
|
647
|
+
meta_prompt: metaPrompt
|
|
648
|
+
})
|
|
649
|
+
});
|
|
650
|
+
|
|
651
|
+
if (!response.ok) {
|
|
652
|
+
throw new Error('Failed to generate prompt');
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
const result = await response.json();
|
|
656
|
+
|
|
657
|
+
// Display generated prompt
|
|
658
|
+
document.getElementById('gen-output').value = result.generated_prompt;
|
|
659
|
+
document.getElementById('gen-output-section').style.display = 'block';
|
|
660
|
+
|
|
661
|
+
} catch (error) {
|
|
662
|
+
alert('Error generating prompt: ' + error.message);
|
|
663
|
+
console.error('Generation error:', error);
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
function copyGeneratedPrompt() {
|
|
668
|
+
const output = document.getElementById('gen-output');
|
|
669
|
+
output.select();
|
|
670
|
+
document.execCommand('copy');
|
|
671
|
+
alert('Prompt copied to clipboard!');
|
|
672
|
+
}
|
|
506
673
|
</script>
|
|
507
674
|
</body>
|
|
508
675
|
</html>
|