quickdistill 0.1.7__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quickdistill-0.1.7/quickdistill.egg-info → quickdistill-0.1.8}/PKG-INFO +1 -1
- {quickdistill-0.1.7 → quickdistill-0.1.8}/pyproject.toml +1 -1
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill/__init__.py +1 -1
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill/__pycache__/__init__.cpython-310.pyc +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill/default_judges.json +2 -2
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill/static/judge_manager.html +12 -8
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill/static/trace_viewer.html +19 -11
- {quickdistill-0.1.7 → quickdistill-0.1.8/quickdistill.egg-info}/PKG-INFO +1 -1
- {quickdistill-0.1.7 → quickdistill-0.1.8}/.pycommands +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/README.md +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/generate_test_traces.py +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/get_call.py +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/get_traces.py +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/inference_server.py +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/judge_manager.html +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/judges.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/old/TEST_TRACE_GENERATION.md +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/old/traces_data.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/projects/byyoung3_arena-detailed/traces_data.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/projects/byyoung3_claude-opus-4-1-tutorial/traces_data.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/projects/byyoung3_test-financial-qa/traces_data.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/pystatus +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/run_evaluation.py +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/run_weak_models.py +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/strong_exports/anthropic_claude-3.5-sonnet_10traces_v2.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/strong_exports/anthropic_claude-3.5-sonnet_20traces.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/strong_exports/claude-opus-4-1-20250805_1traces.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/strong_exports/gpt-5-2025-08-07_199traces.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/trace_viewer.html +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/traces_data.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/weak_model_google_gemini-2.5-flash.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/weak_model_meta-llama_Llama-3.1-8B-Instruct.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/weak_model_meta-llama_Llama-3.3-70B-Instruct.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/dev/weak_model_openai_gpt-oss-20b.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill/__pycache__/cli.cpython-310.pyc +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill/__pycache__/get_traces.cpython-310.pyc +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill/__pycache__/server.cpython-310.pyc +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill/cli.py +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill/default_projects/byyoung3_arena-detailed/traces_data.json +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill/get_traces.py +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill/server.py +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill.egg-info/SOURCES.txt +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill.egg-info/dependency_links.txt +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill.egg-info/entry_points.txt +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill.egg-info/requires.txt +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill.egg-info/top_level.txt +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/setup.cfg +0 -0
- {quickdistill-0.1.7 → quickdistill-0.1.8}/update.sh +0 -0
|
Binary file
|
|
@@ -2,14 +2,14 @@
|
|
|
2
2
|
{
|
|
3
3
|
"name": "boolean_scorer",
|
|
4
4
|
"type": "llm",
|
|
5
|
-
"model": "gpt-5",
|
|
5
|
+
"model": "openai/gpt-5",
|
|
6
6
|
"returnType": "boolean",
|
|
7
7
|
"prompt": "You are a strict evaluator comparing two AI responses (one from a strong reference model which is the ground truth, and one from a weaker model which we are testing to see how similar the responses it generates are to the strong model).\n\nStrong Model Response: {strong_output}\nWeak Model Response: {weak_output}\n\nDetermine if the weak model response is CORRECT compared to the strong model response.\nConsider a response CORRECT if it conveys the same key information and meaning, even if worded differently.\n\nRespond in JSON format: {'correct': true} or {'correct': false}"
|
|
8
8
|
},
|
|
9
9
|
{
|
|
10
10
|
"name": "scalar_scorer",
|
|
11
11
|
"type": "llm",
|
|
12
|
-
"model": "gpt-5",
|
|
12
|
+
"model": "openai/gpt-5",
|
|
13
13
|
"returnType": "scalar",
|
|
14
14
|
"prompt": "You are a strict evaluator comparing two AI responses (one from a strong reference model which is the ground truth, and one from a weaker model which we are testing to see how similar the responses it generates are to the strong model).\n\nStrong Model Response: {strong_output}\nWeak Model Response: {weak_output}\n\nEvaluate how similar the weak model response is to the strong model response.\nRate on a scale of 1-5 where 1=completely different and 5=nearly identical. RETURN ONLY ONE SCORE REPRESENTY THE AVERAGE SIMILARITY (EG 5-(avg_error))\n\nRespond in JSON format eg {'scores': the_score }"
|
|
15
15
|
}
|
|
@@ -183,12 +183,10 @@
|
|
|
183
183
|
|
|
184
184
|
<div id="llm-options" style="display: block;">
|
|
185
185
|
<label for="judge-model">Model</label>
|
|
186
|
-
<
|
|
187
|
-
|
|
188
|
-
<
|
|
189
|
-
|
|
190
|
-
<option value="claude-3-5-sonnet-20241022">claude-3-5-sonnet</option>
|
|
191
|
-
</select>
|
|
186
|
+
<input type="text" id="judge-model" placeholder="e.g., openai/gpt-5, anthropic/claude-3.5-sonnet" value="openai/gpt-5">
|
|
187
|
+
<p style="color: #888; font-size: 12px; margin-top: 5px; margin-bottom: 15px;">
|
|
188
|
+
<strong>Note:</strong> Uses LiteLLM format. Examples: <code>openai/gpt-5</code>, <code>anthropic/claude-3.5-sonnet</code>, <code>openai/gpt-4o</code>
|
|
189
|
+
</p>
|
|
192
190
|
|
|
193
191
|
<label for="judge-return-type">Return Type</label>
|
|
194
192
|
<select id="judge-return-type">
|
|
@@ -393,10 +391,16 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
|
|
|
393
391
|
};
|
|
394
392
|
|
|
395
393
|
if (type === 'llm') {
|
|
396
|
-
judge.model = document.getElementById('judge-model').value;
|
|
394
|
+
judge.model = document.getElementById('judge-model').value.trim();
|
|
397
395
|
judge.returnType = document.getElementById('judge-return-type').value;
|
|
398
396
|
judge.prompt = document.getElementById('judge-prompt').value.trim();
|
|
399
397
|
|
|
398
|
+
// Validate model
|
|
399
|
+
if (!judge.model) {
|
|
400
|
+
alert('Error: Please enter a model (e.g., openai/gpt-5)');
|
|
401
|
+
return;
|
|
402
|
+
}
|
|
403
|
+
|
|
400
404
|
// Validate required placeholders
|
|
401
405
|
if (!judge.prompt.includes('{strong_output}')) {
|
|
402
406
|
alert('Error: Judge prompt must include {strong_output} placeholder');
|
|
@@ -420,7 +424,7 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
|
|
|
420
424
|
function resetForm() {
|
|
421
425
|
document.getElementById('judge-name').value = '';
|
|
422
426
|
document.getElementById('judge-type').value = 'llm';
|
|
423
|
-
document.getElementById('judge-model').value = 'gpt-5
|
|
427
|
+
document.getElementById('judge-model').value = 'openai/gpt-5';
|
|
424
428
|
document.getElementById('judge-prompt').value = '';
|
|
425
429
|
document.getElementById('form-title').textContent = 'Create New Judge';
|
|
426
430
|
document.getElementById('save-btn').textContent = 'Save Judge';
|
|
@@ -300,18 +300,25 @@
|
|
|
300
300
|
Select All Filtered
|
|
301
301
|
</button>
|
|
302
302
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
303
|
+
<!-- Manual Workflow Section -->
|
|
304
|
+
<div style="margin: 20px 0; padding: 15px; background: #1a2a1a; border-radius: 8px; border: 3px solid #ffffff;">
|
|
305
|
+
<div style="color: #ffffff; font-size: 14px; font-weight: 500; margin-bottom: 12px;">📋 Manual Workflow (Step-by-Step):</div>
|
|
306
|
+
<div style="display: flex; flex-wrap: wrap; gap: 10px;">
|
|
307
|
+
<button id="export-btn" style="padding: 8px 16px; background: #4a9eff; color: white; border: none; border-radius: 4px; cursor: pointer;">
|
|
308
|
+
Export Selected to Test Set (<span id="selected-count">0</span>)
|
|
309
|
+
</button>
|
|
306
310
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
311
|
+
<button id="open-inference-btn" style="padding: 8px 16px; background: #7c4a9e; color: white; border: none; border-radius: 4px; cursor: pointer;">
|
|
312
|
+
Run Weak Models
|
|
313
|
+
</button>
|
|
310
314
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
315
|
+
<button id="open-eval-btn" style="padding: 8px 16px; background: #9e6a4a; color: white; border: none; border-radius: 4px; cursor: pointer;">
|
|
316
|
+
Run Evaluation
|
|
317
|
+
</button>
|
|
318
|
+
</div>
|
|
319
|
+
</div>
|
|
314
320
|
|
|
321
|
+
<!-- Utilities -->
|
|
315
322
|
<a href="/judge" target="_blank" style="padding: 8px 16px; background: #4a5a9e; color: white; border: none; border-radius: 4px; text-decoration: none; display: inline-block;">
|
|
316
323
|
Manage Judges
|
|
317
324
|
</a>
|
|
@@ -324,6 +331,7 @@
|
|
|
324
331
|
Settings
|
|
325
332
|
</button>
|
|
326
333
|
|
|
334
|
+
<!-- Automatic Workflow Section -->
|
|
327
335
|
<div style="margin: 20px 0; padding: 15px; background: #2a1a2a; border-radius: 8px; border: 1px solid #4a2a4a;">
|
|
328
336
|
<div style="color: #aaa; font-size: 13px; margin-bottom: 10px;">Automatic Workflow:</div>
|
|
329
337
|
<button id="open-e2e-btn" style="padding: 10px 20px; background: #7a4a9e; color: white; border: none; border-radius: 4px; cursor: pointer; font-weight: 500;">
|
|
@@ -511,9 +519,9 @@
|
|
|
511
519
|
<label style="display: block; color: #aaa; margin-bottom: 8px; font-size: 14px;">Judge Model:</label>
|
|
512
520
|
<input type="text" id="test-judge-model"
|
|
513
521
|
style="width: 100%; padding: 10px; background: #2a2a2a; color: #fff; border: 1px solid #3a3a3a; border-radius: 4px; font-size: 14px;"
|
|
514
|
-
placeholder="e.g., gpt-
|
|
522
|
+
placeholder="e.g., openai/gpt-5, anthropic/claude-3.5-sonnet">
|
|
515
523
|
<div style="color: #666; font-size: 12px; margin-top: 5px;">
|
|
516
|
-
Override the judge's model for this test
|
|
524
|
+
Override the judge's model for this test. Uses LiteLLM format (e.g., <code style="color: #aaa;">openai/gpt-5</code>, <code style="color: #aaa;">anthropic/claude-3.5-sonnet</code>)
|
|
517
525
|
</div>
|
|
518
526
|
</div>
|
|
519
527
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{quickdistill-0.1.7 → quickdistill-0.1.8}/dev/projects/byyoung3_arena-detailed/traces_data.json
RENAMED
|
File without changes
|
|
File without changes
|
{quickdistill-0.1.7 → quickdistill-0.1.8}/dev/projects/byyoung3_test-financial-qa/traces_data.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{quickdistill-0.1.7 → quickdistill-0.1.8}/dev/strong_exports/claude-opus-4-1-20250805_1traces.json
RENAMED
|
File without changes
|
{quickdistill-0.1.7 → quickdistill-0.1.8}/dev/strong_exports/gpt-5-2025-08-07_199traces.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{quickdistill-0.1.7 → quickdistill-0.1.8}/dev/weak_model_meta-llama_Llama-3.1-8B-Instruct.json
RENAMED
|
File without changes
|
{quickdistill-0.1.7 → quickdistill-0.1.8}/dev/weak_model_meta-llama_Llama-3.3-70B-Instruct.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{quickdistill-0.1.7 → quickdistill-0.1.8}/quickdistill/__pycache__/get_traces.cpython-310.pyc
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|