PyPI - pixie-qa - Versions diffs - 0.2.0__tar.gz → 0.2.1__tar.gz - Mend

pixie-qa 0.2.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pixie-qa
-Version: 0.2.0
+Version: 0.2.1
 Summary: Automated quality assurance for AI applications
 Project-URL: Homepage, https://github.com/yiouli/pixie-qa
 Project-URL: Repository, https://github.com/yiouli/pixie-qa

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/evals/eval_utils.py RENAMED Viewed

@@ -81,16 +81,27 @@ def _publish_to_scorecard(
                 "expected_output": (
                     None
                     if isinstance(ev.expected_output, _Unset)
-                    else (str(ev.expected_output) if ev.expected_output is not None else None)
+                    else (
+                        str(ev.expected_output)
+                        if ev.expected_output is not None
+                        else None
+                    )
+                ),
+                "actual_output": (
+                    str(ev.eval_output) if ev.eval_output is not None else None
                 ),
-                "actual_output": str(ev.eval_output) if ev.eval_output is not None else None,
                 "metadata": ev.eval_metadata,
             }
             for ev in evaluables
         )
     else:
         ev_dicts = tuple(
-            {"input": str(inp), "expected_output": None, "actual_output": None, "metadata": None}
+            {
+                "input": str(inp),
+                "expected_output": None,
+                "actual_output": None,
+                "metadata": None,
+            }
             for inp in eval_inputs
         )
@@ -200,17 +211,23 @@ async def assert_pass(
     If the pass criteria are not met, raises :class:`EvalAssertionError`
     carrying the tensor.
-    When ``evaluables`` is provided, each item is used directly as the
-    evaluable for the corresponding input (it already carries its own
-    ``expected_output``).  When ``evaluables`` is ``None``, the evaluable
-    is constructed from the captured trace as before.
+    When ``evaluables`` is provided, behaviour depends on whether each
+    item already has ``eval_output`` populated:
+    - **eval_output is None** — the ``runnable`` is called via
+      ``run_and_evaluate`` to produce an output from traces, and
+      ``expected_output`` from the evaluable is merged into the result.
+    - **eval_output is not None** — the evaluable is used directly
+      (the runnable is not called for that item).
     Args:
         runnable: The application function to test.
         eval_inputs: List of inputs, each passed to *runnable*.
         evaluators: List of evaluator callables.
         evaluables: Optional list of ``Evaluable`` items, one per input.
-            Must have the same length as *eval_inputs* when provided.
+            When provided, their ``expected_output`` is forwarded to
+            ``run_and_evaluate``.  Must have the same length as
+            *eval_inputs*.
         passes: How many times to run the entire test matrix.
         pass_criteria: Receives the results tensor, returns
             ``(passed, message)``.  Defaults to "every score >= 0.5".
@@ -234,11 +251,26 @@ async def assert_pass(
         pass_results: list[list[Evaluation]] = []
         for idx, inp in enumerate(eval_inputs):
             if evaluables is not None:
-                # Use provided evaluable directly — skip trace capture
                 ev_item = evaluables[idx]
-                eval_coros = [
-                    evaluate(evaluator=ev, evaluable=ev_item) for ev in evaluators
-                ]
+                if ev_item.eval_output is None:
+                    # eval_output not yet computed — run the runnable to
+                    # produce it via trace capture, and merge the dataset
+                    # item's expected_output into the result.
+                    eval_coros = [
+                        run_and_evaluate(
+                            evaluator=ev,
+                            runnable=runnable,
+                            eval_input=inp,
+                            expected_output=ev_item.expected_output,
+                            from_trace=from_trace,
+                        )
+                        for ev in evaluators
+                    ]
+                else:
+                    # eval_output already populated — evaluate directly.
+                    eval_coros = [
+                        evaluate(evaluator=ev, evaluable=ev_item) for ev in evaluators
+                    ]
             else:
                 eval_coros = [
                     run_and_evaluate(

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "pixie-qa"
-version = "0.2.0"
+version = "0.2.1"
 description = "Automated quality assurance for AI applications"
 readme = "README.md"
 requires-python = ">=3.11"

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/skills/eval-driven-dev/SKILL.md RENAMED Viewed

@@ -27,11 +27,28 @@ This skill is about doing the work, not describing it. Read code, edit files, ru
 ## Before you start
+Run the following to keep the skill and package up to date. If any command fails or is blocked by the environment, continue — do not let failures here block the rest of the workflow.
+**Update the skill:**
 ```bash
-python resources/check_version.py
+npx skills update
 ```
-If the script reports updates needed, run the upgrade commands it suggests. If it says "All up to date" or fails, continue.
+**Upgrade the `pixie-qa` package**
+Make sure the python virtual environment is active and use the project's package manager:
+```bash
+# uv project (uv.lock exists):
+uv add pixie-qa --upgrade
+# poetry project (poetry.lock exists):
+poetry add pixie-qa@latest
+# pip / no lock file:
+pip install --upgrade pixie-qa
+```
 ---
@@ -213,8 +230,8 @@ Each dataset item contains:
 - `eval_input`: the made-up input data (app input + external dependency data)
 - `expected_output`: case-specific expectation text (optional — only for test cases with expectations beyond the universal criteria). This is a reference for evaluation, not an exact expected answer.
-- `eval_output`: set to `"UNSET"` — produced at test time by the utility function from Step 3
+At test time, `eval_output` is produced by the utility function from Step 3 and is not stored in the dataset itself.
 Read `references/dataset-generation.md` for the dataset creation API, data shape matching, expected_output strategy, and validation checklist.
 #### 4c. Validate the dataset

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/skills/eval-driven-dev/references/eval-tests.md RENAMED Viewed

@@ -81,6 +81,7 @@ concise_voice_style = create_llm_evaluator(
 **How template variables work**: `{eval_input}`, `{eval_output}`, `{expected_output}` are the only placeholders. Each is replaced with a string representation of the corresponding `Evaluable` field — if the field is a dict or list, it becomes a JSON string. The LLM judge sees the full serialized value.
 **Rules**:
 - **Only `{eval_input}`, `{eval_output}`, `{expected_output}`** — no nested access like `{eval_input[key]}` (this will crash with a `TypeError`)
 - **Keep templates short and direct** — the system prompt already tells the LLM to return `Score: X.X`. Your template just needs to present the data and define the scoring criteria.
 - **Don't instruct the LLM to "parse" or "extract" data** — just present the values and state the criteria. The LLM can read JSON naturally.

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/skills/eval-driven-dev/references/run-harness-patterns.md RENAMED Viewed

@@ -8,11 +8,11 @@ For `enable_storage()` and `observe` API details, see `references/pixie-api.md`
 Look at how a real user or client invokes the app, and do the same thing in your utility function:
-| App type | Entry point example | How to invoke it |
-| --- | --- | --- |
-| **Web server** (FastAPI, Flask) | HTTP/WebSocket endpoint | `TestClient`, `httpx`, or subprocess + HTTP requests |
-| **CLI application** | Command-line invocation | `subprocess.run()` |
-| **Standalone function** (no server, no middleware) | Python function | Import and call directly |
+| App type                                           | Entry point example     | How to invoke it                                     |
+| -------------------------------------------------- | ----------------------- | ---------------------------------------------------- |
+| **Web server** (FastAPI, Flask)                    | HTTP/WebSocket endpoint | `TestClient`, `httpx`, or subprocess + HTTP requests |
+| **CLI application**                                | Command-line invocation | `subprocess.run()`                                   |
+| **Standalone function** (no server, no middleware) | Python function         | Import and call directly                             |
 **Do NOT call an inner function** like `agent.respond()` directly just because it's simpler. Between the entry point and that inner function, the app does request handling, state management, prompt assembly, routing — all of which is under test. When you call an inner function, you skip all of that and end up reimplementing it in your test. Now your test is testing test code, not app code.
@@ -137,11 +137,10 @@ from pixie_qa.scripts.mock_backends import (
     MockSynthesisBackend,
 )
-enable_storage()
 @observe
 def run_app(eval_input: dict) -> dict:
     """Run the voice agent through its real FastAPI app layer."""
+    enable_storage()
     # Patch external dependencies before importing the app
     with patch("myapp.app.transcription_backend", MockTranscriptionBackend()), \
          patch("myapp.app.synthesis_backend", MockSynthesisBackend()), \

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/skills/eval-driven-dev/references/understanding-app.md RENAMED Viewed

@@ -142,10 +142,10 @@ These are the primary testability seams. In Step 3, you'll write mock implementa
 <For each external dependency, how will you replace it in the utility function (Step 3)?>
-| Dependency | Mock approach | What mock provides (IN) | What mock captures (OUT) |
-| --- | --- | --- | --- |
-| <e.g., Redis> | <mock.patch / mock class / DI> | <conversation history from eval_input> | <saved messages> |
-| <e.g., STT service> | <MockTranscriptionBackend> | <text from eval_input> | <n/a> |
+| Dependency          | Mock approach                  | What mock provides (IN)                | What mock captures (OUT) |
+| ------------------- | ------------------------------ | -------------------------------------- | ------------------------ |
+| <e.g., Redis>       | <mock.patch / mock class / DI> | <conversation history from eval_input> | <saved messages>         |
+| <e.g., STT service> | <MockTranscriptionBackend>     | <text from eval_input>                 | <n/a>                    |
 ### Intermediate states to capture

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/evals/test_eval_utils.py RENAMED Viewed

@@ -438,6 +438,108 @@ class TestAssertPassEvaluables:
         )
         assert received_outputs == ["echo:hello"]
+    @pytest.mark.asyncio
+    async def test_evaluables_with_runnable_calls_runnable(self) -> None:
+        """When evaluables AND runnable are provided, runnable is still called."""
+        call_count = 0
+        def counting_app(input: Any) -> None:  # noqa: A002
+            nonlocal call_count
+            call_count += 1
+            with px.start_observation(input=input, name="app") as obs:
+                obs.set_output(f"ran:{input}")
+        items = [
+            Evaluable(eval_input="q1", expected_output="e1"),
+            Evaluable(eval_input="q2", expected_output="e2"),
+        ]
+        await assert_pass(
+            runnable=counting_app,
+            eval_inputs=["q1", "q2"],
+            evaluators=[_always_pass],
+            evaluables=items,
+        )
+        assert call_count == 2, "runnable should be called for each input"
+    @pytest.mark.asyncio
+    async def test_evaluables_precomputed_output_used_directly(self) -> None:
+        """When eval_output is already set, the evaluable is used directly."""
+        received: list[Evaluable] = []
+        async def capture_eval(
+            evaluable: Evaluable,
+            *,
+            trace: list[ObservationNode] | None = None,
+        ) -> Evaluation:
+            received.append(evaluable)
+            return Evaluation(score=1.0, reasoning="ok")
+        items = [
+            Evaluable(
+                eval_input="hello",
+                eval_output="precomputed_output",
+                expected_output="ref",
+            ),
+        ]
+        await assert_pass(
+            runnable=_sync_app,
+            eval_inputs=["hello"],
+            evaluators=[capture_eval],
+            evaluables=items,
+        )
+        # eval_output should be the pre-computed value (runnable not called)
+        assert received[0].eval_output == "precomputed_output"
+        assert received[0].expected_output == "ref"
+    @pytest.mark.asyncio
+    async def test_evaluables_none_output_runs_runnable(self) -> None:
+        """When eval_output is None, the runnable is called to produce it."""
+        received: list[Evaluable] = []
+        async def capture_eval(
+            evaluable: Evaluable,
+            *,
+            trace: list[ObservationNode] | None = None,
+        ) -> Evaluation:
+            received.append(evaluable)
+            return Evaluation(score=1.0, reasoning="ok")
+        items = [
+            Evaluable(eval_input="hello", expected_output="ref"),
+        ]
+        await assert_pass(
+            runnable=_sync_app,
+            eval_inputs=["hello"],
+            evaluators=[capture_eval],
+            evaluables=items,
+        )
+        # eval_output should come from the trace (runnable execution)
+        assert received[0].eval_output == "echo:hello"
+        # expected_output should come from the evaluable
+        assert received[0].expected_output == "ref"
+    @pytest.mark.asyncio
+    async def test_evaluables_from_trace_respected(self) -> None:
+        """from_trace is honoured even when evaluables are provided."""
+        async def check_child(
+            evaluable: Evaluable,
+            *,
+            trace: list[ObservationNode] | None = None,
+        ) -> Evaluation:
+            assert evaluable.eval_output == "generated"
+            assert evaluable.expected_output == "ref"
+            return Evaluation(score=1.0, reasoning="ok")
+        items = [Evaluable(eval_input="q1", expected_output="ref")]
+        await assert_pass(
+            runnable=_nested_app,
+            eval_inputs=["q1"],
+            evaluators=[check_child],
+            evaluables=items,
+            from_trace=lambda tree: as_evaluable(tree[0].find("generator")[0].span),
+        )
 # ── assert_dataset_pass tests ─────────────────────────────────────────────
@@ -529,3 +631,36 @@ class TestAssertDatasetPass:
                 passes=3,
             )
         assert len(exc_info.value.results) == 3
+    @pytest.mark.asyncio
+    async def test_runnable_is_called_with_eval_output_from_trace(
+        self, tmp_path: Path
+    ) -> None:
+        """assert_dataset_pass calls the runnable; eval_output comes from trace."""
+        store = DatasetStore(dataset_dir=tmp_path)
+        store.create(
+            "run-ds",
+            items=[
+                Evaluable(eval_input="q1", expected_output="e1"),
+            ],
+        )
+        received: list[Evaluable] = []
+        async def capture_eval(
+            evaluable: Evaluable,
+            *,
+            trace: list[ObservationNode] | None = None,
+        ) -> Evaluation:
+            received.append(evaluable)
+            return Evaluation(score=1.0, reasoning="ok")
+        await assert_dataset_pass(
+            runnable=_sync_app,
+            dataset_name="run-ds",
+            evaluators=[capture_eval],
+            dataset_dir=str(tmp_path),
+        )
+        # eval_output should come from the runnable (trace), not the dataset
+        assert received[0].eval_output == "echo:q1"
+        # expected_output should still come from the dataset
+        assert received[0].expected_output == "e1"

pixie_qa-0.2.0/skills/eval-driven-dev/resources/check_version.py DELETED Viewed

@@ -1,126 +0,0 @@
-#!/usr/bin/env python3
-"""Check whether the eval-driven-dev skill and pixie-qa package need updating.
-Prints one of:
-  "SKILL upgrade available"
-  "Package upgrade available"
-  "SKILL and Package upgrade available"
-  "All up to date"
-Exit codes:
-  0 — everything is up to date (or status could not be determined)
-  1 — at least one component needs an upgrade
-"""
-from __future__ import annotations
-import importlib.metadata
-import json
-import re
-from pathlib import Path
-from urllib.error import URLError
-from urllib.request import urlopen
-# ── Constants ────────────────────────────────────────────────────────────────
-SKILL_URL = (
-    "https://raw.githubusercontent.com/yiouli/pixie-qa/"
-    "main/skills/eval-driven-dev/SKILL.md"
-)
-PYPI_URL = "https://pypi.org/pypi/pixie-qa/json"
-# ── Helpers ──────────────────────────────────────────────────────────────────
-_RE_FRONTMATTER = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL)
-_RE_VERSION = re.compile(r"^\s+version:\s*(\S+)$", re.MULTILINE)
-def _parse_version(text: str) -> str:
-    """Extract metadata.version from SKILL.md YAML frontmatter."""
-    match = _RE_FRONTMATTER.search(text)
-    frontmatter = match.group(1) if match else text
-    m = _RE_VERSION.search(frontmatter)
-    return m.group(1).strip() if m else "0.0.0"
-def _normalise_version(version: str) -> tuple[int, ...]:
-    parts: list[int] = []
-    for part in version.strip().split("."):
-        try:
-            parts.append(int(part))
-        except ValueError:
-            break
-    return tuple(parts)
-# ── Skill check ──────────────────────────────────────────────────────────────
-def _skill_needs_upgrade() -> bool:
-    """Return True if a newer version of the skill is available on GitHub."""
-    resource_dir = Path(__file__).resolve().parent
-    skill_path = resource_dir.parent / "SKILL.md"
-    if not skill_path.exists():
-        # SKILL.md is not on disk (e.g. prompt-based agents); skip check.
-        return False
-    local_text = skill_path.read_text(encoding="utf-8")
-    local_version = _parse_version(local_text)
-    try:
-        with urlopen(SKILL_URL, timeout=10) as resp:
-            remote_version = _parse_version(resp.read().decode("utf-8"))
-    except (OSError, URLError):
-        return False
-    return _normalise_version(remote_version) > _normalise_version(local_version)
-# ── Package check ─────────────────────────────────────────────────────────────
-def _is_local_install(dist: importlib.metadata.Distribution) -> bool:
-    """Return True if pixie-qa was installed from a local path rather than PyPI."""
-    try:
-        text = dist.read_text("direct_url.json")
-        if text:
-            url: str = json.loads(text).get("url", "")
-            return url.startswith("file://")
-    except Exception:
-        pass
-    return False
-def _package_needs_upgrade() -> bool:
-    """Return True if pixie-qa is missing or a newer version is on PyPI."""
-    try:
-        dist = importlib.metadata.distribution("pixie-qa")
-    except importlib.metadata.PackageNotFoundError:
-        return True
-    if _is_local_install(dist):
-        return False
-    installed: str = dist.metadata["Version"]
-    try:
-        with urlopen(PYPI_URL, timeout=10) as resp:
-            latest: str = json.loads(resp.read().decode("utf-8"))["info"]["version"]
-    except (OSError, URLError, KeyError, ValueError):
-        return False
-    return _normalise_version(latest) > _normalise_version(installed)
-# ── Entry point ───────────────────────────────────────────────────────────────
-def main() -> int:
-    skill = _skill_needs_upgrade()
-    package = _package_needs_upgrade()
-    if skill and package:
-        print("SKILL and Package upgrade available")
-    elif skill:
-        print("SKILL upgrade available")
-    elif package:
-        print("Package upgrade available")
-    else:
-        print("All up to date")
-    return 1 if (skill or package) else 0
-if __name__ == "__main__":
-    raise SystemExit(main())

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/.github/copilot-instructions.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/.github/workflows/publish.yml RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/.gitignore RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/LICENSE RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/README.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/async-handler-processing.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/autoevals-adapters.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/cli-dataset-commands.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/dataset-management.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/deep-research-demo.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/eval-harness.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/expected-output-in-evals.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/instrumentation-module-implementation.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/loud-failure-mode.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/manual-instrumentation-usability.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/observation-store-implementation.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/observe-sensitive-field-stripping.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/pixie-directory-and-skill-improvements.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/pixie-test-e2e-suite.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/root-package-exports-and-trace-id.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/scorecard-branding-and-skill-version-check.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/scorecard-eval-detail-dialog.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/skill-v2-and-rootdir-discovery.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/test-scorecard.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/changelogs/usability-utils.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/docs/package.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/cli/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/cli/dataset_command.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/cli/main.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/cli/test_command.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/cli/trace_command.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/config.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/dataset/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/dataset/models.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/dataset/store.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/evals/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/evals/criteria.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/evals/evaluation.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/evals/llm_evaluator.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/evals/runner.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/evals/scorecard.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/evals/scorers.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/evals/trace_capture.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/evals/trace_helpers.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/favicon.png RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/instrumentation/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/instrumentation/context.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/instrumentation/handler.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/instrumentation/handlers.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/instrumentation/instrumentors.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/instrumentation/observation.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/instrumentation/processor.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/instrumentation/queue.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/instrumentation/spans.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/storage/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/storage/evaluable.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/storage/piccolo_conf.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/storage/piccolo_migrations/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/storage/serialization.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/storage/store.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/storage/tables.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/pixie/storage/tree.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/skills/eval-driven-dev/references/dataset-generation.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/skills/eval-driven-dev/references/instrumentation.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/skills/eval-driven-dev/references/investigation.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/skills/eval-driven-dev/references/pixie-api.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/specs/agent-skill-1.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/specs/agent-skill.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/specs/autoevals-adapters.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/specs/dataset-management.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/specs/evals-harness.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/specs/expected-output-in-evals.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/specs/instrumentation.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/specs/manual-instrumentation-usability.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/specs/storage.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/specs/usability-utils.md RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/cli/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/cli/e2e_cases.json RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/cli/e2e_fixtures/conftest.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/cli/e2e_fixtures/datasets/customer-faq.json RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/cli/e2e_fixtures/mock_evaluators.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/cli/e2e_fixtures/test_customer_faq.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/cli/test_dataset_command.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/cli/test_e2e_pixie_test.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/cli/test_main.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/cli/test_trace_command.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/dataset/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/dataset/test_models.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/dataset/test_store.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/evals/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/evals/test_criteria.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/evals/test_evaluation.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/evals/test_llm_evaluator.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/evals/test_runner.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/evals/test_scorecard.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/evals/test_scorers.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/evals/test_trace_capture.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/evals/test_trace_helpers.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/instrumentation/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/instrumentation/conftest.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/instrumentation/test_context.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/instrumentation/test_handler.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/instrumentation/test_integration.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/instrumentation/test_observation.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/instrumentation/test_processor.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/instrumentation/test_queue.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/instrumentation/test_spans.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/instrumentation/test_storage_handler.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/observation_store/__init__.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/observation_store/conftest.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/observation_store/test_evaluable.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/observation_store/test_serialization.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/observation_store/test_store.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/observation_store/test_tree.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/test_config.py RENAMED Viewed

File without changes

{pixie_qa-0.2.0 → pixie_qa-0.2.1}/tests/pixie/test_init.py RENAMED Viewed

File without changes

pixie-qa 0.2.0__tar.gz → 0.2.1__tar.gz

pixie-qa 0.2.0tar.gz → 0.2.1tar.gz