PyPI - quickdistill - Versions diffs - 0.1.4__tar.gz → 0.1.6__tar.gz - Mend

quickdistill 0.1.4tar.gz → 0.1.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

{quickdistill-0.1.4/quickdistill.egg-info → quickdistill-0.1.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: quickdistill
-Version: 0.1.4
+Version: 0.1.6
 Summary: Fast and easy toolkit for distilling AI models
 Author-email: Brett Young <bdytx5@umsystem.edu>
 License: MIT
@@ -21,12 +21,11 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
-Requires-Dist: flask>=2.0.0
-Requires-Dist: flask-cors>=3.0.0
-Requires-Dist: openai>=1.0.0
-Requires-Dist: weave>=0.50.0
-Requires-Dist: llmasajudge>=0.1.0
-Requires-Dist: datasets>=2.0.0
+Requires-Dist: flask==2.3.2
+Requires-Dist: flask-cors==4.0.0
+Requires-Dist: openai==2.14.0
+Requires-Dist: weave==0.52.14
+Requires-Dist: llmasajudge==0.1.15
 Provides-Extra: dev
 Requires-Dist: pytest>=7.0.0; extra == "dev"
 Requires-Dist: black>=22.0.0; extra == "dev"

{quickdistill-0.1.4 → quickdistill-0.1.6}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "quickdistill"
-version = "0.1.4"
+version = "0.1.6"
 description = "Fast and easy toolkit for distilling AI models"
 readme = "README.md"
 authors = [
@@ -26,12 +26,11 @@ classifiers = [
 keywords = ["ai", "ml", "distillation", "evaluation", "weave"]
 requires-python = ">=3.8"
 dependencies = [
-    "flask>=2.0.0",
-    "flask-cors>=3.0.0",
-    "openai>=1.0.0",
-    "weave>=0.50.0",
-    "llmasajudge>=0.1.0",
-    "datasets>=2.0.0",
+    "flask==2.3.2",
+    "flask-cors==4.0.0",
+    "openai==2.14.0",
+    "weave==0.52.14",
+    "llmasajudge==0.1.15",
 ]
 [project.optional-dependencies]

{quickdistill-0.1.4 → quickdistill-0.1.6}/quickdistill/__init__.py RENAMED Viewed

@@ -8,7 +8,7 @@ This package provides tools to:
 - Export datasets for model evaluation
 """
-__version__ = "0.1.4"
+__version__ = "0.1.6"
 __author__ = "Brett Young"
 __email__ = "bdytx5@umsystem.edu"

{quickdistill-0.1.4 → quickdistill-0.1.6}/quickdistill/__pycache__/__init__.cpython-310.pyc RENAMED Viewed

Binary file

quickdistill-0.1.6/quickdistill/__pycache__/server.cpython-310.pyc ADDED Viewed

Binary file

{quickdistill-0.1.4 → quickdistill-0.1.6}/quickdistill/server.py RENAMED Viewed

@@ -3,6 +3,7 @@ import json
 import openai
 import weave
 import shutil
+import threading
 from flask import Flask, request, jsonify, send_from_directory
 from flask_cors import CORS
 from llmasajudge import LLMAsAJudge
@@ -30,6 +31,9 @@ if default_project_src.exists() and not default_project_dst.exists():
 app = Flask(__name__, static_folder=str(STATIC_DIR))
 CORS(app)
+# Progress tracking for long-running operations
+progress_state = {}
 # Configuration
 PROJECT = "wandb_inference"
@@ -152,6 +156,7 @@ def run_inference_endpoint():
     models = data.get('models', [])
     strong_export_file = data.get('strong_export_file')
     num_examples = data.get('num_examples')
+    task_id = data.get('task_id', f"inference_{id(models)}")
     if not models:
         return jsonify({'error': 'No models provided'}), 400
@@ -176,8 +181,17 @@ def run_inference_endpoint():
     output_files = []
+    # Initialize progress tracking
+    total_steps = len(models) * len(traces)
+    progress_state[task_id] = {
+        'current': 0,
+        'total': total_steps,
+        'message': 'Starting inference...',
+        'status': 'running'
+    }
     # Run inference for each model
-    for model in models:
+    for model_idx, model in enumerate(models):
         print(f"Running model: {model}")
         results = []
@@ -185,6 +199,13 @@ def run_inference_endpoint():
         client = get_client_for_model(model)
         for i, trace in enumerate(traces):
+            step = model_idx * len(traces) + i + 1
+            progress_state[task_id] = {
+                'current': step,
+                'total': total_steps,
+                'message': f'[{model_idx+1}/{len(models)}] {model} - Example {i+1}/{len(traces)}',
+                'status': 'running'
+            }
             print(f"  Processing example {i+1}/{len(traces)}...", end=' ')
             # Extract messages
@@ -239,13 +260,30 @@ def run_inference_endpoint():
         output_files.append(str(output_file))
         print(f"Saved {len(results)} results to {output_file}")
+    # Mark progress as complete
+    progress_state[task_id] = {
+        'current': total_steps,
+        'total': total_steps,
+        'message': 'Complete!',
+        'status': 'complete'
+    }
     return jsonify({
         'status': 'success',
         'files': output_files,
         'total_examples': len(traces),
-        'models_run': len(models)
+        'models_run': len(models),
+        'task_id': task_id
     })
+@app.route('/progress/<task_id>', methods=['GET'])
+def get_progress(task_id):
+    """Get progress for a running task"""
+    if task_id in progress_state:
+        return jsonify(progress_state[task_id])
+    return jsonify({'error': 'Task not found'}), 404
 @app.route('/list_weak_models', methods=['GET'])
 def list_weak_models():
     """List available weak model result files with metadata"""
@@ -469,27 +507,38 @@ def run_evaluation_endpoint():
     data = request.json
     model_file = data.get('model_file')
     judge = data.get('judge')
+    task_id = data.get('task_id', f"eval_{id(data)}")
     if not model_file or not judge:
         return jsonify({'error': 'Missing model_file or judge'}), 400
     # Load weak model results
-    with open(model_file, 'r') as f:
-        data = json.load(f)
+    model_path = DATA_DIR / model_file
+    with open(model_path, 'r') as f:
+        file_data = json.load(f)
     # Handle both old format (list) and new format (dict with metadata)
-    if isinstance(data, dict) and 'results' in data:
-        metadata = data.get('metadata', {})
-        results = data['results']
+    if isinstance(file_data, dict) and 'results' in file_data:
+        metadata = file_data.get('metadata', {})
+        results = file_data['results']
         strong_export = metadata.get('strong_export_file', 'unknown')
     else:
         # Old format - just a list
-        results = data
+        results = file_data
         strong_export = 'unknown'
     # Extract model name from filename
     model_name = model_file.replace('weak_model_', '').replace('.json', '')
+    # Initialize progress tracking
+    total_steps = len(results)
+    progress_state[task_id] = {
+        'current': 0,
+        'total': total_steps,
+        'message': f'Starting evaluation: {model_name} with {judge["name"]}...',
+        'status': 'running'
+    }
     # Create evaluation logger
     ev = weave.EvaluationLogger(
         name=f"eval-{model_name}-{judge['name']}",
@@ -497,7 +546,13 @@ def run_evaluation_endpoint():
     )
     # Run evaluation
-    for example in results:
+    for idx, example in enumerate(results):
+        progress_state[task_id] = {
+            'current': idx + 1,
+            'total': total_steps,
+            'message': f'{model_name} - Example {idx+1}/{total_steps}',
+            'status': 'running'
+        }
         # Skip examples with errors (null messages/output)
         if example.get('error') or not example.get('output'):
             continue
@@ -533,12 +588,21 @@ def run_evaluation_endpoint():
     # Finish evaluation
     ev.log_summary()
+    # Mark progress as complete
+    progress_state[task_id] = {
+        'current': total_steps,
+        'total': total_steps,
+        'message': 'Complete!',
+        'status': 'complete'
+    }
     return jsonify({
         'status': 'success',
         'evaluation_name': f"eval-{model_name}-{judge['name']}",
         'examples_evaluated': len(results),
         'weave_url': ev.ui_url,
-        'strong_export': strong_export
+        'strong_export': strong_export,
+        'task_id': task_id
     })

{quickdistill-0.1.4 → quickdistill-0.1.6}/quickdistill/static/judge_manager.html RENAMED Viewed

@@ -172,7 +172,6 @@
             <label for="judge-type">Judge Type</label>
             <select id="judge-type">
                 <option value="llm">LLM-as-a-Judge</option>
-                <option value="custom">Custom Function</option>
             </select>
             <div id="llm-options" style="display: block;">
@@ -204,13 +203,6 @@
                 <textarea id="judge-prompt"></textarea>
             </div>
-            <div id="custom-options" style="display: none;">
-                <label for="custom-function">Custom Function (Python)</label>
-                <textarea id="custom-function" placeholder="def custom_judge(strong_output: str, weak_output: str) -> dict:
-    # Your custom logic here
-    return {'similarity': 1.0 if strong_output == weak_output else 0.0}"></textarea>
-            </div>
             <button onclick="saveJudge()" id="save-btn">Save Judge</button>
             <button onclick="cancelEdit()" id="cancel-btn" style="display: none; background: #5a2a2a; margin-left: 10px;">Cancel</button>
         </div>
@@ -340,8 +332,6 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
                     alert('Error: Judge prompt must include {weak_output} placeholder');
                     return;
                 }
-            } else {
-                judge.customFunction = document.getElementById('custom-function').value.trim();
             }
             const success = await saveJudgeToServer(judge);
@@ -358,7 +348,6 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
             document.getElementById('judge-type').value = 'llm';
             document.getElementById('judge-model').value = 'gpt-5-2025-08-07';
             document.getElementById('judge-prompt').value = '';
-            document.getElementById('custom-function').value = '';
             document.getElementById('form-title').textContent = 'Create New Judge';
             document.getElementById('save-btn').textContent = 'Save Judge';
             document.getElementById('cancel-btn').style.display = 'none';
@@ -383,8 +372,6 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
                 document.getElementById('judge-model').value = judge.model;
                 document.getElementById('judge-return-type').value = judge.returnType || 'scalar';
                 document.getElementById('judge-prompt').value = judge.prompt || '';
-            } else {
-                document.getElementById('custom-function').value = judge.customFunction || '';
             }
             document.getElementById('form-title').textContent = 'Edit Judge';
@@ -437,9 +424,8 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
         // Toggle judge type options
         function toggleJudgeType() {
-            const type = document.getElementById('judge-type').value;
-            document.getElementById('llm-options').style.display = type === 'llm' ? 'block' : 'none';
-            document.getElementById('custom-options').style.display = type === 'custom' ? 'block' : 'none';
+            // Only LLM type is supported now
+            document.getElementById('llm-options').style.display = 'block';
         }
         document.getElementById('judge-type').addEventListener('change', toggleJudgeType);

{quickdistill-0.1.4 → quickdistill-0.1.6}/quickdistill/static/trace_viewer.html RENAMED Viewed

@@ -375,6 +375,9 @@
                 <div id="inference-progress" style="display: none; margin-top: 20px; padding: 15px; background: #0f0f0f; border-radius: 4px;">
                     <div style="color: #4a9eff; margin-bottom: 10px;">Running inference...</div>
+                    <div id="inference-progress-bar" style="width: 100%; height: 6px; background: #2a2a2a; border-radius: 3px; margin-bottom: 15px; overflow: hidden;">
+                        <div id="inference-progress-fill" style="height: 100%; background: #4a9eff; width: 0%; transition: width 0.3s;"></div>
+                    </div>
                     <div id="progress-text" style="color: #888; font-family: monospace; font-size: 12px; white-space: pre-wrap;"></div>
                 </div>
             </div>
@@ -919,20 +922,46 @@
             // Show progress
             document.getElementById('inference-progress').style.display = 'block';
             const progressText = document.getElementById('progress-text');
+            const progressFill = document.getElementById('inference-progress-fill');
             progressText.textContent = `Starting inference...\n`;
-            progressText.textContent += `Strong Export: ${strongExportFile}\n`;
-            progressText.textContent += `Models: ${allModels.join(', ')}\n`;
-            progressText.textContent += `Max Examples: ${numExamples}\n\n`;
+            progressFill.style.width = '0%';
+            // Start inference and poll for progress
+            let taskId = null;
+            let pollInterval = null;
+            const pollProgress = async () => {
+                if (!taskId) return;
+                try {
+                    const resp = await fetch(`/progress/${taskId}`);
+                    if (resp.ok) {
+                        const progress = await resp.json();
+                        const percent = (progress.current / progress.total) * 100;
+                        progressFill.style.width = `${percent}%`;
+                        progressText.textContent = `${progress.message}\nProgress: ${progress.current}/${progress.total} (${percent.toFixed(1)}%)\n`;
+                    }
+                } catch (e) {
+                    console.error('Error polling progress:', e);
+                }
+            };
             // Call backend API
             try {
+                // Generate a task ID for polling
+                taskId = `inference_${Date.now()}`;
+                // Start polling immediately
+                pollInterval = setInterval(pollProgress, 300);
+                // Start the inference
                 const response = await fetch('/run_inference', {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
                     body: JSON.stringify({
                         models: allModels,
                         strong_export_file: strongExportFile,
-                        num_examples: numExamples
+                        num_examples: numExamples,
+                        task_id: taskId
                     })
                 });
@@ -941,8 +970,12 @@
                 }
                 const result = await response.json();
-                progressText.textContent += `\n✓ Complete!\n`;
-                progressText.textContent += `Results saved to: ${result.files.join(', ')}\n`;
+                // Stop polling
+                if (pollInterval) clearInterval(pollInterval);
+                progressText.textContent = `\n✓ Complete!\nResults saved to: ${result.files.join(', ')}\n`;
+                progressFill.style.width = '100%';
                 setTimeout(() => {
                     document.getElementById('inference-panel').style.display = 'none';
@@ -951,8 +984,7 @@
             } catch (error) {
                 progressText.textContent += `\n✗ Error: ${error.message}\n`;
-                progressText.textContent += `\nNote: You need to run the backend server for inference.\n`;
-                progressText.textContent += `Run: python inference_server.py\n`;
+                if (pollInterval) clearInterval(pollInterval);
             }
         });
@@ -1091,21 +1123,44 @@
             const modelFiles = Array.from(selectedEvalModels);
             const results = [];
-            // Run evaluations sequentially
+            // Run evaluations sequentially with granular progress
             for (let i = 0; i < modelFiles.length; i++) {
                 const modelFile = modelFiles[i];
-                const progress = ((i) / modelFiles.length) * 100;
-                progressFill.style.width = `${progress}%`;
-                progressText.textContent += `[${i+1}/${modelFiles.length}] Evaluating ${modelFile}...\n`;
+                progressText.textContent += `[${i+1}/${modelFiles.length}] Starting ${modelFile}...\n`;
+                let pollInterval = null;
+                let taskId = null;
+                const pollProgress = async () => {
+                    if (!taskId) return;
+                    try {
+                        const resp = await fetch(`/progress/${taskId}`);
+                        if (resp.ok) {
+                            const progress = await resp.json();
+                            const percent = (progress.current / progress.total) * 100;
+                            progressFill.style.width = `${percent}%`;
+                            progressText.textContent = `[${i+1}/${modelFiles.length}] ${progress.message}\nProgress: ${progress.current}/${progress.total} (${percent.toFixed(1)}%)\n`;
+                        }
+                    } catch (e) {
+                        console.error('Error polling eval progress:', e);
+                    }
+                };
                 try {
+                    // Generate task ID for this evaluation
+                    taskId = `eval_${Date.now()}_${i}`;
+                    // Start polling
+                    pollInterval = setInterval(pollProgress, 300);
                     const response = await fetch('/run_evaluation', {
                         method: 'POST',
                         headers: { 'Content-Type': 'application/json' },
                         body: JSON.stringify({
                             model_file: modelFile,
-                            judge: judge
+                            judge: judge,
+                            task_id: taskId
                         })
                     });
@@ -1114,6 +1169,10 @@
                     }
                     const result = await response.json();
+                    // Clear polling when done
+                    if (pollInterval) clearInterval(pollInterval);
                     progressText.textContent += `  ✓ Complete: ${result.evaluation_name}\n`;
                     progressText.textContent += `  Examples: ${result.examples_evaluated}\n\n`;
@@ -1125,6 +1184,7 @@
                     });
                 } catch (error) {
+                    if (pollInterval) clearInterval(pollInterval);
                     progressText.textContent += `  ✗ Error: ${error.message}\n\n`;
                 }
             }

{quickdistill-0.1.4 → quickdistill-0.1.6/quickdistill.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: quickdistill
-Version: 0.1.4
+Version: 0.1.6
 Summary: Fast and easy toolkit for distilling AI models
 Author-email: Brett Young <bdytx5@umsystem.edu>
 License: MIT
@@ -21,12 +21,11 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
-Requires-Dist: flask>=2.0.0
-Requires-Dist: flask-cors>=3.0.0
-Requires-Dist: openai>=1.0.0
-Requires-Dist: weave>=0.50.0
-Requires-Dist: llmasajudge>=0.1.0
-Requires-Dist: datasets>=2.0.0
+Requires-Dist: flask==2.3.2
+Requires-Dist: flask-cors==4.0.0
+Requires-Dist: openai==2.14.0
+Requires-Dist: weave==0.52.14
+Requires-Dist: llmasajudge==0.1.15
 Provides-Extra: dev
 Requires-Dist: pytest>=7.0.0; extra == "dev"
 Requires-Dist: black>=22.0.0; extra == "dev"

quickdistill-0.1.6/quickdistill.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,11 @@
+flask==2.3.2
+flask-cors==4.0.0
+openai==2.14.0
+weave==0.52.14
+llmasajudge==0.1.15
+[dev]
+pytest>=7.0.0
+black>=22.0.0
+isort>=5.0.0
+flake8>=4.0.0

quickdistill-0.1.4/quickdistill/__pycache__/server.cpython-310.pyc DELETED Viewed

Binary file

quickdistill-0.1.4/quickdistill.egg-info/requires.txt DELETED Viewed

@@ -1,12 +0,0 @@
-flask>=2.0.0
-flask-cors>=3.0.0
-openai>=1.0.0
-weave>=0.50.0
-llmasajudge>=0.1.0
-datasets>=2.0.0
-[dev]
-pytest>=7.0.0
-black>=22.0.0
-isort>=5.0.0
-flake8>=4.0.0