PyPI - quickdistill - Versions diffs - 0.1.7__tar.gz → 0.1.9__tar.gz - Mend

quickdistill 0.1.7tar.gz → 0.1.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

{quickdistill-0.1.7/quickdistill.egg-info → quickdistill-0.1.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: quickdistill
-Version: 0.1.7
+Version: 0.1.9
 Summary: Fast and easy toolkit for distilling AI models
 Author-email: Brett Young <bdytx5@umsystem.edu>
 License: MIT

quickdistill-0.1.9/dev/run_inf_with_providers.py ADDED Viewed

@@ -0,0 +1,89 @@
+import os
+import anthropic
+# ---------------- GEMINI ----------------
+from google import genai
+# ---------------- GROK ----------------
+from xai_sdk import Client as XAIClient
+from xai_sdk.chat import user, system
+import weave; weave.init("providers-testing")
+def run_gemini(prompt: str):
+    client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
+    resp = client.models.generate_content(
+        model="gemini-2.5-flash",
+        contents=[{
+            "role": "user",
+            "parts": [{"text": prompt}]
+        }]
+    )
+    return resp.text
+# ---------------- CLAUDE ----------------
+def run_claude(prompt: str):
+    client = anthropic.Anthropic(
+        api_key=os.environ["ANTHROPIC_API_KEY"]
+    )
+    msg = client.messages.create(
+        model="claude-4.5-haiku",
+        max_tokens=512,
+        messages=[
+            {"role": "user", "content": prompt}
+        ]
+    )
+    return msg.content[0].text
+def run_grok(prompt: str):
+    client = XAIClient(
+        api_key=os.environ["XAI_API_KEY"],
+        timeout=3600
+    )
+    chat = client.chat.create(
+        model="grok-4-1-fast-reasoning"
+    )
+    chat.append(system("You are Grok, a helpful AI assistant."))
+    chat.append(user(prompt))
+    resp = chat.sample()
+    return resp.content
+# ---------------- UNIFIED ROUTER ----------------
+def run_model(provider: str, prompt: str):
+    provider = provider.lower()
+    if provider == "gemini":
+        return run_gemini(prompt)
+    if provider == "claude":
+        return run_claude(prompt)
+    if provider == "grok":
+        return run_grok(prompt)
+    raise ValueError(provider)
+# ---------------- TEST ----------------
+if __name__ == "__main__":
+    prompt = "Explain transformers simply"
+    for provider in ["gemini", "claude", "grok"]:
+        try:
+            print(f"\n=== {provider.upper()} ===")
+            print(run_model(provider, prompt))
+        except Exception as e:
+            print(provider, "failed:", e)

quickdistill-0.1.9/dev/v2_run_inf_w_providers.py ADDED Viewed

@@ -0,0 +1,174 @@
+# pip install openai anthropic google-genai
+import os
+# ================= OPENAI =================
+from openai import OpenAI
+from google import genai
+import anthropic
+import weave; weave.init("providers-testing")
+openai_client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
+def openai_responses(prompt: str):
+    resp = openai_client.responses.create(
+        model="gpt-5-mini",
+        input=prompt
+    )
+    return resp.output_text
+def openai_chat(prompt: str):
+    resp = openai_client.chat.completions.create(
+        model="gpt-4.1-mini",
+        messages=[{"role": "user", "content": prompt}]
+    )
+    return resp.choices[0].message.content
+def openai_stream(prompt: str):
+    print("\n[OpenAI Streaming]")
+    with openai_client.responses.stream(
+        model="gpt-5-mini",
+        input=prompt
+    ) as stream:
+        for event in stream:
+            if event.type == "response.output_text.delta":
+                print(event.delta, end="", flush=True)
+    print()
+# ================= ANTHROPIC =================
+anthropic_client = anthropic.Anthropic(
+    api_key=os.environ["ANTHROPIC_API_KEY"]
+)
+def anthropic_messages(prompt: str):
+    resp = anthropic_client.messages.create(
+        model="claude-haiku-4-5-20251001",
+        max_tokens=512,
+        messages=[{"role": "user", "content": prompt}]
+    )
+    return resp.content[0].text
+def anthropic_stream(prompt: str):
+    print("\n[Anthropic Streaming]")
+    with anthropic_client.messages.stream(
+        model="claude-haiku-4-5-20251001",
+        max_tokens=512,
+        messages=[{"role": "user", "content": prompt}]
+    ) as stream:
+        for text in stream.text_stream:
+            print(text, end="", flush=True)
+    print()
+# ================= GEMINI =================
+gemini_client = genai.Client(
+    api_key=os.environ["GEMINI_API_KEY"]
+)
+def gemini_generate(prompt: str):
+    resp = gemini_client.models.generate_content(
+        model="gemini-2.5-flash",
+        contents=[{
+            "role": "user",
+            "parts": [{"text": prompt}]
+        }]
+    )
+    return resp.text
+def gemini_chat(prompt: str):
+    chat = gemini_client.chats.create(
+        model="gemini-2.5-flash"
+    )
+    resp = chat.send_message(prompt)
+    return resp.text
+def gemini_stream(prompt: str):
+    print("\n[Gemini Streaming]")
+    chat = gemini_client.chats.create(
+        model="gemini-2.5-flash"
+    )
+    stream = chat.send_message_stream(prompt)
+    for chunk in stream:
+        if chunk.text:
+            print(chunk.text, end="", flush=True)
+    print()
+# ================= TOOL CALL EXAMPLE =================
+# Minimal cross-provider demonstration using OpenAI only
+# (Anthropic/Gemini support tools but schemas differ heavily)
+def openai_tool_example():
+    tools = [{
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "city": {"type": "string"}
+                },
+                "required": ["city"]
+            }
+        }
+    }]
+    resp = openai_client.chat.completions.create(
+        model="gpt-4.1-mini",
+        messages=[{"role": "user", "content": "What's weather in Tokyo?"}],
+        tools=tools
+    )
+    return resp.choices[0].message.tool_calls
+# ================= RUN ALL =================
+if __name__ == "__main__":
+    prompt = "Explain transformers simply."
+    # print("\n==== OPENAI RESPONSES ====")
+    # print(openai_responses(prompt))
+    # print("\n==== OPENAI CHAT ====")
+    # print(openai_chat(prompt))
+    # openai_stream(prompt)
+    print("\n==== ANTHROPIC ====")
+    print(anthropic_messages(prompt))
+    anthropic_stream(prompt)
+    print("\n==== GEMINI GENERATE ====")
+    print(gemini_generate(prompt))
+    print("\n==== GEMINI CHAT ====")
+    print(gemini_chat(prompt))
+    gemini_stream(prompt)
+    print("\n==== OPENAI TOOL CALL ====")
+    print(openai_tool_example())

{quickdistill-0.1.7 → quickdistill-0.1.9}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "quickdistill"
-version = "0.1.7"
+version = "0.1.9"
 description = "Fast and easy toolkit for distilling AI models"
 readme = "README.md"
 authors = [

quickdistill-0.1.9/quickdistill/__init__.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""
+QuickDistill - A fast and easy toolkit for distilling AI models.
+This package provides tools to:
+- Capture and view Weave traces
+- Run weak models on strong model outputs
+- Evaluate similarity using LLM judges
+- Export datasets for model evaluation
+"""
+# Monkey patch for aiohttp/litellm compatibility
+# litellm expects aiohttp.ConnectionTimeoutError but it doesn't exist in some versions
+try:
+    import aiohttp
+    if not hasattr(aiohttp, 'ConnectionTimeoutError'):
+        aiohttp.ConnectionTimeoutError = aiohttp.ServerTimeoutError
+    if not hasattr(aiohttp, 'SocketTimeoutError'):
+        aiohttp.SocketTimeoutError = aiohttp.ServerTimeoutError
+except Exception:
+    pass
+__version__ = "0.1.9"
+__author__ = "Brett Young"
+__email__ = "bdytx5@umsystem.edu"
+from quickdistill.cli import main
+__all__ = ["main"]

quickdistill-0.1.9/quickdistill/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file

quickdistill-0.1.9/quickdistill/__pycache__/server.cpython-310.pyc ADDED Viewed

Binary file

{quickdistill-0.1.7 → quickdistill-0.1.9}/quickdistill/default_judges.json RENAMED Viewed

@@ -2,14 +2,14 @@
   {
     "name": "boolean_scorer",
     "type": "llm",
-    "model": "gpt-5",
+    "model": "openai/gpt-5",
     "returnType": "boolean",
     "prompt": "You are a strict evaluator comparing two AI responses (one from a strong reference model which is the ground truth, and one from a weaker model which we are testing to see how similar the responses it generates are to the strong model).\n\nStrong Model Response: {strong_output}\nWeak Model Response: {weak_output}\n\nDetermine if the weak model response is CORRECT compared to the strong model response.\nConsider a response CORRECT if it conveys the same key information and meaning, even if worded differently.\n\nRespond in JSON format: {'correct': true} or {'correct': false}"
   },
   {
     "name": "scalar_scorer",
     "type": "llm",
-    "model": "gpt-5",
+    "model": "openai/gpt-5",
     "returnType": "scalar",
     "prompt": "You are a strict evaluator comparing two AI responses (one from a strong reference model which is the ground truth, and one from a weaker model which we are testing to see how similar the responses it generates are to the strong model).\n\nStrong Model Response: {strong_output}\nWeak Model Response: {weak_output}\n\nEvaluate how similar the weak model response is to the strong model response.\nRate on a scale of 1-5 where 1=completely different and 5=nearly identical. RETURN ONLY ONE SCORE REPRESENTY THE AVERAGE SIMILARITY (EG 5-(avg_error))\n\nRespond in JSON format eg {'scores': the_score }"
   }

{quickdistill-0.1.7 → quickdistill-0.1.9}/quickdistill/server.py RENAMED Viewed

@@ -100,40 +100,133 @@ def run_inference(client, model, messages, max_tokens=1000):
         return f"ERROR: {str(e)}"
 def extract_output_content(output_str):
-    """Extract actual content from WeaveObject string or regular output"""
+    """Extract actual content from WeaveObject string, JSON response, or regular output.
+    Handles outputs from:
+    - OpenAI chat.completions.create (plain text)
+    - OpenAI responses.create (JSON with nested structure)
+    - Anthropic Messages (WeaveObject with content[0].text)
+    - Google Gemini (WeaveObject with candidates[0].content.parts[0].text)
+    """
+    import re
+    import json
     if not output_str:
         return None
-    # If it's a WeaveObject string, try to extract the text content
-    if isinstance(output_str, str) and 'WeaveObject' in output_str:
-        import re
-        # Try to find the 'text' field in the WeaveObject
-        match = re.search(r"'text':\s*'([^']*(?:\\'[^']*)*)'", output_str)
+    if not isinstance(output_str, str):
+        return str(output_str)
+    # Handle empty/streaming responses
+    if output_str in ('', 'None', 'null'):
+        return '[Streaming output - not captured]'
+    # Handle OpenAI responses.create JSON format
+    if output_str.startswith('{') and '"output"' in output_str:
+        try:
+            resp_obj = json.loads(output_str)
+            if 'output' in resp_obj and isinstance(resp_obj['output'], list):
+                # Extract text from output messages
+                text_parts = []
+                for item in resp_obj['output']:
+                    if item.get('type') == 'message' and 'content' in item:
+                        for content in item['content']:
+                            if content.get('type') == 'output_text' and 'text' in content:
+                                text_parts.append(content['text'])
+                if text_parts:
+                    return '\n\n'.join(text_parts)
+        except (json.JSONDecodeError, KeyError, TypeError):
+            pass  # Fall through to other handlers
+    # Handle WeaveObject strings (Anthropic, Gemini)
+    if 'WeaveObject' in output_str:
+        # Improved regex that handles escape sequences properly
+        match = re.search(r"'text':\s*'((?:[^'\\]|\\.)*)'", output_str, re.DOTALL)
         if match:
-            # Unescape the string
+            # Unescape the string properly (order matters!)
             text = match.group(1)
-            text = text.replace('\\n', '\n').replace("\\'", "'").replace('\\\\', '\\')
+            text = text.replace("\\'", "'")      # escaped single quotes
+            text = text.replace('\\"', '"')      # escaped double quotes
+            text = text.replace('\\n', '\n')     # newlines
+            text = text.replace('\\t', '\t')     # tabs
+            text = text.replace('\\r', '\r')     # carriage returns
+            text = text.replace('\\\\', '\\')    # escaped backslashes (do this last!)
             return text
-    # Otherwise return as-is
+        # If no text field found, return truncated version
+        return f"[Complex WeaveObject - could not extract text]\n{output_str[:500]}..."
+    # Plain text output (standard OpenAI chat format)
     return output_str
 def extract_messages_from_trace(trace):
-    """Extract messages from a trace in the format needed for inference"""
-    # Check if messages are at top level
+    """Extract messages from a trace in the format needed for inference.
+    Handles message extraction from:
+    - OpenAI chat.completions.create (messages at top level or in inputs.messages)
+    - OpenAI responses.create (inputs.input field)
+    - Anthropic Messages (inputs.messages)
+    - Google Gemini generate_content (inputs.contents array)
+    - Google Gemini Chat.send_message (inputs.message string)
+    """
+    import re
+    # Get op_display_name for provider detection
+    op_name = trace.get('op_display_name', '')
+    # Check if messages are at top level (already extracted/cached)
     if trace.get('messages') and isinstance(trace['messages'], list) and len(trace['messages']) > 0:
         return trace['messages']
     # Check if messages are in inputs
     if trace.get('inputs') and isinstance(trace['inputs'], dict):
-        messages = trace['inputs'].get('messages', [])
+        inputs = trace['inputs']
+        # Standard OpenAI/Anthropic: inputs.messages
+        messages = inputs.get('messages', [])
         if isinstance(messages, list) and len(messages) > 0:
             return messages
+        # OpenAI responses.create: inputs.input (simple string)
+        if 'openai.responses' in op_name and 'input' in inputs:
+            return [{"role": "user", "content": inputs['input']}]
+        # Gemini Chat.send_message: inputs.message (simple string)
+        if 'Chat.send_message' in op_name and 'message' in inputs:
+            return [{"role": "user", "content": inputs['message']}]
+        # Gemini generate_content: inputs.contents (array of content objects or WeaveObject strings)
+        if 'google.genai' in op_name and 'contents' in inputs:
+            contents = inputs['contents']
+            if isinstance(contents, list) and len(contents) > 0:
+                messages = []
+                for content in contents:
+                    # Handle WeaveObject string format
+                    if isinstance(content, str) and 'WeaveObject' in content:
+                        role_match = re.search(r"'role':\s*'(\w+)'", content)
+                        text_match = re.search(r"'text':\s*'((?:[^'\\]|\\.)*)'", content, re.DOTALL)
+                        text = '[Complex content]'
+                        if text_match:
+                            text = text_match.group(1)
+                            text = text.replace("\\'", "'").replace('\\n', '\n').replace('\\\\', '\\')
+                        messages.append({
+                            "role": role_match.group(1) if role_match else "user",
+                            "content": text
+                        })
+                    # Handle regular dict format
+                    elif isinstance(content, dict):
+                        role = content.get('role', 'user')
+                        parts = content.get('parts', [])
+                        if isinstance(parts, list):
+                            text = '\n'.join([p.get('text', '') for p in parts if isinstance(p, dict)])
+                            messages.append({"role": role, "content": text})
+                if messages:
+                    return messages
         # Check if inputs has question/context format (from generate_test_traces.py wrapper traces)
-        question = trace['inputs'].get('question')
-        context = trace['inputs'].get('context')
+        question = inputs.get('question')
+        context = inputs.get('context')
         if question:
             if context:
                 prompt = f"""Based on the following context, answer the question concisely.
@@ -753,16 +846,26 @@ def delete_judge():
 @app.route('/run_evaluation', methods=['POST'])
 def run_evaluation_endpoint():
-    """Run evaluation using specified judge"""
+    """Run evaluation using specified judge(s) - supports multiple judges"""
     data = request.json
     model_file = data.get('model_file')
-    judge = data.get('judge')
+    judges = data.get('judges')  # Can be a list or single judge dict
     task_id = data.get('task_id', f"eval_{id(data)}")
-    if not model_file or not judge:
-        return jsonify({'error': 'Missing model_file or judge'}), 400
+    # Handle both single judge (backwards compat) and multiple judges
+    if data.get('judge'):
+        judges = [data.get('judge')]
+    elif not judges:
+        return jsonify({'error': 'Missing judge or judges'}), 400
+    # Ensure judges is a list
+    if not isinstance(judges, list):
+        judges = [judges]
+    if not model_file:
+        return jsonify({'error': 'Missing model_file'}), 400
     # Load weak model results
     model_path = DATA_DIR / model_file
@@ -782,18 +885,22 @@ def run_evaluation_endpoint():
     # Extract model name from filename
     model_name = model_file.replace('weak_model_', '').replace('.json', '')
+    # Create evaluation name with all judges
+    judges_names = '_'.join([j['name'] for j in judges])
+    eval_name = f"eval-{model_name}-{judges_names}"
     # Initialize progress tracking
     total_steps = len(results)
     progress_state[task_id] = {
         'current': 0,
         'total': total_steps,
-        'message': f'Starting evaluation: {model_name} with {judge["name"]}...',
+        'message': f'Starting evaluation: {model_name} with {len(judges)} judge(s)...',
         'status': 'running'
     }
     # Create evaluation logger
     ev = weave.EvaluationLogger(
-        name=f"eval-{model_name}-{judge['name']}",
+        name=eval_name,
         model=model_name
     )
@@ -818,13 +925,20 @@ def run_evaluation_endpoint():
         if messages and len(messages) > 0:
             question = messages[0].get('content', '')
-        # Run judge
-        if judge['type'] == 'llm':
-            scores = run_llm_judge_eval(judge, strong_output, weak_output, question)
-        else:
-            scores = run_custom_judge_eval(judge, strong_output, weak_output)
+        # Run all judges and collect scores
+        all_scores = {}
+        for judge in judges:
+            # Run judge
+            if judge['type'] == 'llm':
+                scores = run_llm_judge_eval(judge, strong_output, weak_output, question)
+            else:
+                scores = run_custom_judge_eval(judge, strong_output, weak_output)
+            # Merge scores with judge name prefix to avoid conflicts
+            for score_key, score_value in scores.items():
+                all_scores[f"{judge['name']}_{score_key}"] = score_value
-        # Log to weave
+        # Log to weave with all scores from all judges
         ev.log_example(
             inputs={
                 "question": question,
@@ -834,7 +948,7 @@ def run_evaluation_endpoint():
                 "weak_output": weak_output
             },
-            scores=scores
+            scores=all_scores
         )
     # Finish evaluation
@@ -850,10 +964,11 @@ def run_evaluation_endpoint():
     return jsonify({
         'status': 'success',
-        'evaluation_name': f"eval-{model_name}-{judge['name']}",
+        'evaluation_name': eval_name,
         'examples_evaluated': len(results),
         'weave_url': ev.ui_url,
         'strong_export': strong_export,
+        'judges': [j['name'] for j in judges],
         'task_id': task_id
     })
@@ -1032,6 +1147,32 @@ def list_projects():
     return jsonify({'projects': projects})
+@app.route('/get_preferences', methods=['GET'])
+def get_preferences():
+    """Get saved user preferences"""
+    prefs_file = DATA_DIR / 'preferences.json'
+    if prefs_file.exists():
+        try:
+            with open(prefs_file, 'r') as f:
+                return jsonify(json.load(f))
+        except:
+            pass
+    return jsonify({})
+@app.route('/save_preferences', methods=['POST'])
+def save_preferences():
+    """Save user preferences"""
+    try:
+        data = request.json
+        prefs_file = DATA_DIR / 'preferences.json'
+        with open(prefs_file, 'w') as f:
+            json.dump(data, f, indent=2)
+        return jsonify({'status': 'success'})
+    except Exception as e:
+        return jsonify({'status': 'error', 'message': str(e)}), 500
 # Routes for serving HTML pages
 @app.route('/')
 def index():

{quickdistill-0.1.7 → quickdistill-0.1.9}/quickdistill/static/judge_manager.html RENAMED Viewed

@@ -183,12 +183,10 @@
             <div id="llm-options" style="display: block;">
                 <label for="judge-model">Model</label>
-                <select id="judge-model">
-                    <option value="gpt-5">gpt-5</option>
-                    <option value="gpt-4o">gpt-4o</option>
-                    <option value="gpt-4o-mini">gpt-4o-mini</option>
-                    <option value="claude-3-5-sonnet-20241022">claude-3-5-sonnet</option>
-                </select>
+                <input type="text" id="judge-model" placeholder="e.g., openai/gpt-5, anthropic/claude-3.5-sonnet" value="openai/gpt-5">
+                <p style="color: #888; font-size: 12px; margin-top: 5px; margin-bottom: 15px;">
+                    <strong>Note:</strong> Uses LiteLLM format. Examples: <code>openai/gpt-5</code>, <code>anthropic/claude-3.5-sonnet</code>, <code>openai/gpt-4o</code>
+                </p>
                 <label for="judge-return-type">Return Type</label>
                 <select id="judge-return-type">
@@ -393,10 +391,16 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
             };
             if (type === 'llm') {
-                judge.model = document.getElementById('judge-model').value;
+                judge.model = document.getElementById('judge-model').value.trim();
                 judge.returnType = document.getElementById('judge-return-type').value;
                 judge.prompt = document.getElementById('judge-prompt').value.trim();
+                // Validate model
+                if (!judge.model) {
+                    alert('Error: Please enter a model (e.g., openai/gpt-5)');
+                    return;
+                }
                 // Validate required placeholders
                 if (!judge.prompt.includes('{strong_output}')) {
                     alert('Error: Judge prompt must include {strong_output} placeholder');
@@ -420,7 +424,7 @@ Respond in JSON format: {'correct': true} or {'correct': false}`
         function resetForm() {
             document.getElementById('judge-name').value = '';
             document.getElementById('judge-type').value = 'llm';
-            document.getElementById('judge-model').value = 'gpt-5-2025-08-07';
+            document.getElementById('judge-model').value = 'openai/gpt-5';
             document.getElementById('judge-prompt').value = '';
             document.getElementById('form-title').textContent = 'Create New Judge';
             document.getElementById('save-btn').textContent = 'Save Judge';

quickdistill 0.1.7__tar.gz → 0.1.9__tar.gz

quickdistill 0.1.7tar.gz → 0.1.9tar.gz