quickdistill 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
quickdistill/__init__.py CHANGED
@@ -8,7 +8,7 @@ This package provides tools to:
8
8
  - Export datasets for model evaluation
9
9
  """
10
10
 
11
- __version__ = "0.1.7"
11
+ __version__ = "0.1.8"
12
12
  __author__ = "Brett Young"
13
13
  __email__ = "bdytx5@umsystem.edu"
14
14
 
@@ -2,14 +2,14 @@
2
2
  {
3
3
  "name": "boolean_scorer",
4
4
  "type": "llm",
5
- "model": "gpt-5",
5
+ "model": "openai/gpt-5",
6
6
  "returnType": "boolean",
7
7
  "prompt": "You are a strict evaluator comparing two AI responses (one from a strong reference model which is the ground truth, and one from a weaker model which we are testing to see how similar the responses it generates are to the strong model).\n\nStrong Model Response: {strong_output}\nWeak Model Response: {weak_output}\n\nDetermine if the weak model response is CORRECT compared to the strong model response.\nConsider a response CORRECT if it conveys the same key information and meaning, even if worded differently.\n\nRespond in JSON format: {'correct': true} or {'correct': false}"
8
8
  },
9
9
  {
10
10
  "name": "scalar_scorer",
11
11
  "type": "llm",
12
- "model": "gpt-5",
12
+ "model": "openai/gpt-5",
13
13
  "returnType": "scalar",
14
14
  "prompt": "You are a strict evaluator comparing two AI responses (one from a strong reference model which is the ground truth, and one from a weaker model which we are testing to see how similar the responses it generates are to the strong model).\n\nStrong Model Response: {strong_output}\nWeak Model Response: {weak_output}\n\nEvaluate how similar the weak model response is to the strong model response.\nRate on a scale of 1-5 where 1=completely different and 5=nearly identical. RETURN ONLY ONE SCORE REPRESENTY THE AVERAGE SIMILARITY (EG 5-(avg_error))\n\nRespond in JSON format eg {'scores': the_score }"
15
15
  }
@@ -183,12 +183,10 @@
183
183
 
184
184
  <div id="llm-options" style="display: block;">
185
185
  <label for="judge-model">Model</label>
186
- <select id="judge-model">
187
- <option value="gpt-5">gpt-5</option>
188
- <option value="gpt-4o">gpt-4o</option>
189
- <option value="gpt-4o-mini">gpt-4o-mini</option>
190
- <option value="claude-3-5-sonnet-20241022">claude-3-5-sonnet</option>
191
- </select>
186
+ <input type="text" id="judge-model" placeholder="e.g., openai/gpt-5, anthropic/claude-3.5-sonnet" value="openai/gpt-5">
187
+ <p style="color: #888; font-size: 12px; margin-top: 5px; margin-bottom: 15px;">
188
+ <strong>Note:</strong> Uses LiteLLM format. Examples: <code>openai/gpt-5</code>, <code>anthropic/claude-3.5-sonnet</code>, <code>openai/gpt-4o</code>
189
+ </p>
192
190
 
193
191
  <label for="judge-return-type">Return Type</label>
194
192
  <select id="judge-return-type">
@@ -393,10 +391,16 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
393
391
  };
394
392
 
395
393
  if (type === 'llm') {
396
- judge.model = document.getElementById('judge-model').value;
394
+ judge.model = document.getElementById('judge-model').value.trim();
397
395
  judge.returnType = document.getElementById('judge-return-type').value;
398
396
  judge.prompt = document.getElementById('judge-prompt').value.trim();
399
397
 
398
+ // Validate model
399
+ if (!judge.model) {
400
+ alert('Error: Please enter a model (e.g., openai/gpt-5)');
401
+ return;
402
+ }
403
+
400
404
  // Validate required placeholders
401
405
  if (!judge.prompt.includes('{strong_output}')) {
402
406
  alert('Error: Judge prompt must include {strong_output} placeholder');
@@ -420,7 +424,7 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
420
424
  function resetForm() {
421
425
  document.getElementById('judge-name').value = '';
422
426
  document.getElementById('judge-type').value = 'llm';
423
- document.getElementById('judge-model').value = 'gpt-5-2025-08-07';
427
+ document.getElementById('judge-model').value = 'openai/gpt-5';
424
428
  document.getElementById('judge-prompt').value = '';
425
429
  document.getElementById('form-title').textContent = 'Create New Judge';
426
430
  document.getElementById('save-btn').textContent = 'Save Judge';
@@ -300,18 +300,25 @@
300
300
  Select All Filtered
301
301
  </button>
302
302
 
303
- <button id="export-btn" style="padding: 8px 16px; background: #4a9eff; color: white; border: none; border-radius: 4px; cursor: pointer;">
304
- Export Selected to Test Set (<span id="selected-count">0</span>)
305
- </button>
303
+ <!-- Manual Workflow Section -->
304
+ <div style="margin: 20px 0; padding: 15px; background: #1a2a1a; border-radius: 8px; border: 3px solid #ffffff;">
305
+ <div style="color: #ffffff; font-size: 14px; font-weight: 500; margin-bottom: 12px;">📋 Manual Workflow (Step-by-Step):</div>
306
+ <div style="display: flex; flex-wrap: wrap; gap: 10px;">
307
+ <button id="export-btn" style="padding: 8px 16px; background: #4a9eff; color: white; border: none; border-radius: 4px; cursor: pointer;">
308
+ Export Selected to Test Set (<span id="selected-count">0</span>)
309
+ </button>
306
310
 
307
- <button id="open-inference-btn" style="padding: 8px 16px; background: #7c4a9e; color: white; border: none; border-radius: 4px; cursor: pointer;">
308
- Run Weak Models
309
- </button>
311
+ <button id="open-inference-btn" style="padding: 8px 16px; background: #7c4a9e; color: white; border: none; border-radius: 4px; cursor: pointer;">
312
+ Run Weak Models
313
+ </button>
310
314
 
311
- <button id="open-eval-btn" style="padding: 8px 16px; background: #9e6a4a; color: white; border: none; border-radius: 4px; cursor: pointer;">
312
- Run Evaluation
313
- </button>
315
+ <button id="open-eval-btn" style="padding: 8px 16px; background: #9e6a4a; color: white; border: none; border-radius: 4px; cursor: pointer;">
316
+ Run Evaluation
317
+ </button>
318
+ </div>
319
+ </div>
314
320
 
321
+ <!-- Utilities -->
315
322
  <a href="/judge" target="_blank" style="padding: 8px 16px; background: #4a5a9e; color: white; border: none; border-radius: 4px; text-decoration: none; display: inline-block;">
316
323
  Manage Judges
317
324
  </a>
@@ -324,6 +331,7 @@
324
331
  Settings
325
332
  </button>
326
333
 
334
+ <!-- Automatic Workflow Section -->
327
335
  <div style="margin: 20px 0; padding: 15px; background: #2a1a2a; border-radius: 8px; border: 1px solid #4a2a4a;">
328
336
  <div style="color: #aaa; font-size: 13px; margin-bottom: 10px;">Automatic Workflow:</div>
329
337
  <button id="open-e2e-btn" style="padding: 10px 20px; background: #7a4a9e; color: white; border: none; border-radius: 4px; cursor: pointer; font-weight: 500;">
@@ -511,9 +519,9 @@
511
519
  <label style="display: block; color: #aaa; margin-bottom: 8px; font-size: 14px;">Judge Model:</label>
512
520
  <input type="text" id="test-judge-model"
513
521
  style="width: 100%; padding: 10px; background: #2a2a2a; color: #fff; border: 1px solid #3a3a3a; border-radius: 4px; font-size: 14px;"
514
- placeholder="e.g., gpt-4o, claude-3-5-sonnet-20241022">
522
+ placeholder="e.g., openai/gpt-5, anthropic/claude-3.5-sonnet">
515
523
  <div style="color: #666; font-size: 12px; margin-top: 5px;">
516
- Override the judge's model for this test
524
+ Override the judge's model for this test. Uses LiteLLM format (e.g., <code style="color: #aaa;">openai/gpt-5</code>, <code style="color: #aaa;">anthropic/claude-3.5-sonnet</code>)
517
525
  </div>
518
526
  </div>
519
527
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: quickdistill
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: Fast and easy toolkit for distilling AI models
5
5
  Author-email: Brett Young <bdytx5@umsystem.edu>
6
6
  License: MIT
@@ -0,0 +1,17 @@
1
+ quickdistill/__init__.py,sha256=4hLOUVOlPTaZaCLc7950TQGMb-EV_3J9t2qT7StwA7k,397
2
+ quickdistill/cli.py,sha256=A8d5GN9NdBS299WyAsJ6-p8ynW3DJnDRHZ-UGH7TXLM,2212
3
+ quickdistill/default_judges.json,sha256=9uDqsYc9CsJwZAWwOkWcqgmlGZNJ0zzyXpv4wZ8vtuE,1446
4
+ quickdistill/get_traces.py,sha256=mfy9fMiK-CZQN1noZ4DfOwdwP45ntthVDLgh4-u2iNk,4896
5
+ quickdistill/server.py,sha256=0Y0XG-8oYoNZgmo10LPZgtwlHuGqrq0urxE-KabyIvI,36789
6
+ quickdistill/__pycache__/__init__.cpython-310.pyc,sha256=kCGMGP5qGjIpf2QZcBVLVTVlQKd-HHy_l9tHr1LfysU,603
7
+ quickdistill/__pycache__/cli.cpython-310.pyc,sha256=xtVgJTayQLKS4gE_te7U1Wo8LmkDtPkaa2rnzu8h9fY,2443
8
+ quickdistill/__pycache__/get_traces.cpython-310.pyc,sha256=T7Suxp9vpqYDQJ_3uJvXWemqoLf5tnRC2I0BfHrSiNM,2956
9
+ quickdistill/__pycache__/server.cpython-310.pyc,sha256=_taKWofMtdgfMZzfVsd7PoC4jnuKxEOGzW82YBxqPPc,22051
10
+ quickdistill/default_projects/byyoung3_arena-detailed/traces_data.json,sha256=iz-cBmXBYj0bC3Vn754QTnGuDh6sRvlE_RzSyGXaxbY,15496950
11
+ quickdistill/static/judge_manager.html,sha256=t6dSPwo_d-GIu1FscuK1KDgxKCnmiOekQTMu80lZPPY,27166
12
+ quickdistill/static/trace_viewer.html,sha256=yt_zPP88px_51a9ilv8UhrssnVOT-2hjEPHEGoRlPrQ,95152
13
+ quickdistill-0.1.8.dist-info/METADATA,sha256=q4uGRUvQ3HSlHff0ZKs1tBzGos-iOiSxHq3HbKJHa-k,5084
14
+ quickdistill-0.1.8.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
15
+ quickdistill-0.1.8.dist-info/entry_points.txt,sha256=AUUTxnwdD9gRnsOEcTXQTAZIZ_F0aRU7JGstIJ3Xk_o,55
16
+ quickdistill-0.1.8.dist-info/top_level.txt,sha256=ysiMvurJYsE1IhkxmObe-0G8A-GIav40kTh2z6axjxg,13
17
+ quickdistill-0.1.8.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- quickdistill/__init__.py,sha256=U8mvMbfYKLFegcEA4D-P6AFHvSiHQPXoFn0KKd-xh0A,397
2
- quickdistill/cli.py,sha256=A8d5GN9NdBS299WyAsJ6-p8ynW3DJnDRHZ-UGH7TXLM,2212
3
- quickdistill/default_judges.json,sha256=w0TkIniELPPG-Mi3hm7zPW06eq46W1BI_ufWXnkDDDM,1432
4
- quickdistill/get_traces.py,sha256=mfy9fMiK-CZQN1noZ4DfOwdwP45ntthVDLgh4-u2iNk,4896
5
- quickdistill/server.py,sha256=0Y0XG-8oYoNZgmo10LPZgtwlHuGqrq0urxE-KabyIvI,36789
6
- quickdistill/__pycache__/__init__.cpython-310.pyc,sha256=Tbov274p3OjaOuOsQwcW-meATEfkz0mHKmpytksuDJI,603
7
- quickdistill/__pycache__/cli.cpython-310.pyc,sha256=xtVgJTayQLKS4gE_te7U1Wo8LmkDtPkaa2rnzu8h9fY,2443
8
- quickdistill/__pycache__/get_traces.cpython-310.pyc,sha256=T7Suxp9vpqYDQJ_3uJvXWemqoLf5tnRC2I0BfHrSiNM,2956
9
- quickdistill/__pycache__/server.cpython-310.pyc,sha256=_taKWofMtdgfMZzfVsd7PoC4jnuKxEOGzW82YBxqPPc,22051
10
- quickdistill/default_projects/byyoung3_arena-detailed/traces_data.json,sha256=iz-cBmXBYj0bC3Vn754QTnGuDh6sRvlE_RzSyGXaxbY,15496950
11
- quickdistill/static/judge_manager.html,sha256=fXteyx_ry4gY166WypBkVGGCqieE88MigqLRLVCKnG8,26887
12
- quickdistill/static/trace_viewer.html,sha256=kPC4GnxeDPq7jxClRhZBOuS6xmA3RaY-loJDZmKDADE,94426
13
- quickdistill-0.1.7.dist-info/METADATA,sha256=1pE5fDep0l0kAxhHuT1C_H4CYHIiPLP4n9QraAqI9bM,5084
14
- quickdistill-0.1.7.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
15
- quickdistill-0.1.7.dist-info/entry_points.txt,sha256=AUUTxnwdD9gRnsOEcTXQTAZIZ_F0aRU7JGstIJ3Xk_o,55
16
- quickdistill-0.1.7.dist-info/top_level.txt,sha256=ysiMvurJYsE1IhkxmObe-0G8A-GIav40kTh2z6axjxg,13
17
- quickdistill-0.1.7.dist-info/RECORD,,