PyPI - pixie-qa - Versions diffs - 0.4.0__tar.gz → 0.5.0__tar.gz - Mend

pixie-qa 0.4.0tar.gz → 0.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

{pixie_qa-0.4.0 → pixie_qa-0.5.0}/PKG-INFO +15 -11
{pixie_qa-0.4.0 → pixie_qa-0.5.0}/README.md +14 -9
pixie_qa-0.5.0/pixie/__init__.py +90 -0
pixie_qa-0.5.0/pixie/assets/webui.html +64 -0
pixie_qa-0.5.0/pixie/cli/__init__.py +11 -0
{pixie_qa-0.4.0 → pixie_qa-0.5.0}/pixie/cli/analyze_command.py +3 -3
pixie_qa-0.5.0/pixie/cli/format_command.py +223 -0
pixie_qa-0.5.0/pixie/cli/main.py +202 -0
pixie_qa-0.5.0/pixie/cli/test_command.py +178 -0
pixie_qa-0.5.0/pixie/cli/trace_command.py +128 -0
{pixie_qa-0.4.0 → pixie_qa-0.5.0}/pixie/config.py +8 -8
pixie_qa-0.5.0/pixie/eval/__init__.py +143 -0
pixie_qa-0.5.0/pixie/eval/evaluable.py +100 -0
{pixie_qa-0.4.0/pixie/evals → pixie_qa-0.5.0/pixie/eval}/evaluation.py +8 -14
{pixie_qa-0.4.0/pixie/evals → pixie_qa-0.5.0/pixie/eval}/llm_evaluator.py +35 -19
{pixie_qa-0.4.0/pixie/evals → pixie_qa-0.5.0/pixie/eval}/rate_limiter.py +1 -1
{pixie_qa-0.4.0/pixie/evals → pixie_qa-0.5.0/pixie/eval}/scorers.py +53 -50
pixie_qa-0.5.0/pixie/harness/__init__.py +8 -0
pixie_qa-0.5.0/pixie/harness/runnable.py +133 -0
pixie_qa-0.5.0/pixie/harness/runner.py +813 -0
pixie_qa-0.5.0/pixie/instrumentation/__init__.py +99 -0
pixie_qa-0.5.0/pixie/instrumentation/llm_tracing.py +818 -0
pixie_qa-0.5.0/pixie/instrumentation/wrap.py +323 -0
pixie_qa-0.5.0/pixie/web/__init__.py +7 -0
{pixie_qa-0.4.0 → pixie_qa-0.5.0}/pixie/web/app.py +15 -4
{pixie_qa-0.4.0 → pixie_qa-0.5.0}/pixie/web/watcher.py +6 -3
{pixie_qa-0.4.0 → pixie_qa-0.5.0}/pyproject.toml +3 -4
pixie_qa-0.4.0/pixie/__init__.py +0 -113
pixie_qa-0.4.0/pixie/assets/webui.html +0 -64
pixie_qa-0.4.0/pixie/cli/__init__.py +0 -6
pixie_qa-0.4.0/pixie/cli/dag_command.py +0 -75
pixie_qa-0.4.0/pixie/cli/dataset_command.py +0 -193
pixie_qa-0.4.0/pixie/cli/main.py +0 -456
pixie_qa-0.4.0/pixie/cli/test_command.py +0 -257
pixie_qa-0.4.0/pixie/cli/trace_command.py +0 -294
pixie_qa-0.4.0/pixie/dag/__init__.py +0 -400
pixie_qa-0.4.0/pixie/dag/trace_check.py +0 -183
pixie_qa-0.4.0/pixie/dataset/__init__.py +0 -11
pixie_qa-0.4.0/pixie/dataset/models.py +0 -21
pixie_qa-0.4.0/pixie/dataset/store.py +0 -212
pixie_qa-0.4.0/pixie/evals/__init__.py +0 -184
pixie_qa-0.4.0/pixie/evals/criteria.py +0 -61
pixie_qa-0.4.0/pixie/evals/dataset_runner.py +0 -495
pixie_qa-0.4.0/pixie/evals/eval_utils.py +0 -334
pixie_qa-0.4.0/pixie/evals/scorecard.py +0 -252
pixie_qa-0.4.0/pixie/evals/trace_capture.py +0 -70
pixie_qa-0.4.0/pixie/evals/trace_helpers.py +0 -57
pixie_qa-0.4.0/pixie/instrumentation/__init__.py +0 -80
pixie_qa-0.4.0/pixie/instrumentation/context.py +0 -86
pixie_qa-0.4.0/pixie/instrumentation/handler.py +0 -72
pixie_qa-0.4.0/pixie/instrumentation/handlers.py +0 -105
pixie_qa-0.4.0/pixie/instrumentation/instrumentors.py +0 -47
pixie_qa-0.4.0/pixie/instrumentation/observation.py +0 -217
pixie_qa-0.4.0/pixie/instrumentation/processor.py +0 -366
pixie_qa-0.4.0/pixie/instrumentation/queue.py +0 -88
pixie_qa-0.4.0/pixie/instrumentation/spans.py +0 -165
pixie_qa-0.4.0/pixie/storage/__init__.py +0 -27
pixie_qa-0.4.0/pixie/storage/evaluable.py +0 -140
pixie_qa-0.4.0/pixie/storage/piccolo_conf.py +0 -10
pixie_qa-0.4.0/pixie/storage/piccolo_migrations/__init__.py +0 -1
pixie_qa-0.4.0/pixie/storage/serialization.py +0 -227
pixie_qa-0.4.0/pixie/storage/store.py +0 -231
pixie_qa-0.4.0/pixie/storage/tables.py +0 -21
pixie_qa-0.4.0/pixie/storage/tree.py +0 -199
pixie_qa-0.4.0/pixie/web/__init__.py +0 -1
{pixie_qa-0.4.0 → pixie_qa-0.5.0}/.gitignore +0 -0
{pixie_qa-0.4.0 → pixie_qa-0.5.0}/LICENSE +0 -0
{pixie_qa-0.4.0 → pixie_qa-0.5.0}/pixie/assets/mock-data.json +0 -0
{pixie_qa-0.4.0 → pixie_qa-0.5.0}/pixie/cli/init_command.py +0 -0
{pixie_qa-0.4.0 → pixie_qa-0.5.0}/pixie/cli/start_command.py +0 -0
{pixie_qa-0.4.0 → pixie_qa-0.5.0}/pixie/favicon.png +0 -0
/pixie_qa-0.4.0/pixie/evals/test_result.py → /pixie_qa-0.5.0/pixie/harness/run_result.py +0 -0
{pixie_qa-0.4.0 → pixie_qa-0.5.0}/pixie/web/server.py +0 -0

{pixie_qa-0.4.0 → pixie_qa-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pixie-qa
-Version: 0.4.0
+Version: 0.5.0
 Summary: Automated quality assurance for AI applications
 Project-URL: Homepage, https://github.com/yiouli/pixie-qa
 Project-URL: Repository, https://github.com/yiouli/pixie-qa
@@ -44,7 +44,6 @@ Requires-Dist: openai>=2.29.0
 Requires-Dist: openinference-instrumentation>=0.1.44
 Requires-Dist: opentelemetry-api>=1.27.0
 Requires-Dist: opentelemetry-sdk>=1.27.0
-Requires-Dist: piccolo[sqlite]>=1.33.0
 Requires-Dist: pydantic>=2.0
 Requires-Dist: python-dotenv>=1.2.2
 Requires-Dist: starlette>=1.0.0
@@ -70,20 +69,19 @@ Description-Content-Type: text/markdown
 # pixie-qa
-An agent skill that make coding agent the QA engineer for LLM applications.
+An agent skill that makes coding agents the QA engineer for LLM applications.
 ## What the Skill Does
 The `qa-eval` skill guides your coding agent through the full eval-based QA loop for LLM applications:
 1. **Understand the code** — read the codebase, trace the data flow, learn what the code is supposed to do
-2. **Instrument it** — add `enable_storage()` and `@observe` so every run is captured to a local SQLite database
-3. **Build a dataset** — save representative traces as test cases with `pixie dataset save`
-4. **Write eval tests** — generate `test_*.py` files with `assert_dataset_pass` and appropriate evaluators
-5. **Validate datasets** — `pixie dataset validate [dir_or_dataset_path]` to catch schema/config errors early
-6. **Run the tests** — `pixie test` to run all evals and report per-case scores
-7. **Analyse results** — `pixie analyze <test_id>` to get LLM-generated analysis of test results
-8. **Investigate failures** — look up the stored trace for each failure, diagnose, fix, repeat
+2. **Instrument it** — use `wrap()` for data-object tracing and OpenInference auto-instrumentation for LLM span capture
+3. **Build a dataset** — create JSON datasets of representative inputs and expected outputs
+4. **Write eval tests** — generate `test_*.py` files with appropriate evaluators
+5. **Run the tests** — `pixie test` to run all evals and report per-case scores
+6. **Analyse results** — `pixie analyze <test_id>` to get LLM-generated analysis of test results
+7. **Investigate failures** — diagnose failures, fix, repeat
 ## Getting Started
@@ -105,7 +103,13 @@ Your coding agent will read your code, instrument it, build a dataset from a few
 ## Python Package
-The `pixie-qa` Python package (imported as `pixie`) is what Claude installs and uses inside your project. For the package API and CLI reference, see [docs/package.md](docs/package.md).
+The `pixie-qa` Python package (imported as `pixie`) is what Claude installs and uses inside your project. API docs are auto-generated by `pdoc3` into [docs/pixie/index.md](docs/pixie/index.md) via pre-commit. The markdown renderer uses [scripts/pdoc_templates/text.mako](scripts/pdoc_templates/text.mako) so async functions and methods are explicitly shown as `async def` in signatures.
+Install hooks once per clone:
+```bash
+uv run pre-commit install
+```
 ## Web UI

{pixie_qa-0.4.0 → pixie_qa-0.5.0}/README.md RENAMED Viewed

@@ -1,19 +1,18 @@
 # pixie-qa
-An agent skill that make coding agent the QA engineer for LLM applications.
+An agent skill that makes coding agents the QA engineer for LLM applications.
 ## What the Skill Does
 The `qa-eval` skill guides your coding agent through the full eval-based QA loop for LLM applications:
 1. **Understand the code** — read the codebase, trace the data flow, learn what the code is supposed to do
-2. **Instrument it** — add `enable_storage()` and `@observe` so every run is captured to a local SQLite database
-3. **Build a dataset** — save representative traces as test cases with `pixie dataset save`
-4. **Write eval tests** — generate `test_*.py` files with `assert_dataset_pass` and appropriate evaluators
-5. **Validate datasets** — `pixie dataset validate [dir_or_dataset_path]` to catch schema/config errors early
-6. **Run the tests** — `pixie test` to run all evals and report per-case scores
-7. **Analyse results** — `pixie analyze <test_id>` to get LLM-generated analysis of test results
-8. **Investigate failures** — look up the stored trace for each failure, diagnose, fix, repeat
+2. **Instrument it** — use `wrap()` for data-object tracing and OpenInference auto-instrumentation for LLM span capture
+3. **Build a dataset** — create JSON datasets of representative inputs and expected outputs
+4. **Write eval tests** — generate `test_*.py` files with appropriate evaluators
+5. **Run the tests** — `pixie test` to run all evals and report per-case scores
+6. **Analyse results** — `pixie analyze <test_id>` to get LLM-generated analysis of test results
+7. **Investigate failures** — diagnose failures, fix, repeat
 ## Getting Started
@@ -35,7 +34,13 @@ Your coding agent will read your code, instrument it, build a dataset from a few
 ## Python Package
-The `pixie-qa` Python package (imported as `pixie`) is what Claude installs and uses inside your project. For the package API and CLI reference, see [docs/package.md](docs/package.md).
+The `pixie-qa` Python package (imported as `pixie`) is what Claude installs and uses inside your project. API docs are auto-generated by `pdoc3` into [docs/pixie/index.md](docs/pixie/index.md) via pre-commit. The markdown renderer uses [scripts/pdoc_templates/text.mako](scripts/pdoc_templates/text.mako) so async functions and methods are explicitly shown as `async def` in signatures.
+Install hooks once per clone:
+```bash
+uv run pre-commit install
+```
 ## Web UI

pixie_qa-0.5.0/pixie/__init__.py ADDED Viewed

@@ -0,0 +1,90 @@
+"""pixie — automated quality assurance for AI applications.
+Re-exports the full public API so users can ``from pixie import ...``
+for every commonly used symbol without needing submodule paths.
+"""
+from pixie.eval.evaluable import Evaluable, TestCase
+from pixie.eval.evaluation import Evaluation, Evaluator, evaluate
+from pixie.eval.llm_evaluator import create_llm_evaluator
+from pixie.eval.scorers import (
+    AnswerCorrectness,
+    AnswerRelevancy,
+    AutoevalsAdapter,
+    Battle,
+    ClosedQA,
+    ContextRelevancy,
+    EmbeddingSimilarity,
+    ExactMatch,
+    Factuality,
+    Faithfulness,
+    Humor,
+    JSONDiff,
+    LevenshteinMatch,
+    ListContains,
+    Moderation,
+    NumericDiff,
+    Possible,
+    Security,
+    Sql,
+    Summary,
+    Translation,
+    ValidJSON,
+)
+# -- Harness ------------------------------------------------------------------
+from pixie.harness.runnable import Runnable
+# -- Instrumentation ----------------------------------------------------------
+from pixie.instrumentation.llm_tracing import (
+    add_handler,
+    enable_llm_tracing,
+    flush,
+    remove_handler,
+)
+from pixie.instrumentation.wrap import (
+    WrappedData,
+    wrap,
+)
+__all__ = [
+    # Instrumentation
+    "WrappedData",
+    "flush",
+    "enable_llm_tracing",
+    "add_handler",
+    "remove_handler",
+    "wrap",
+    # Harness
+    "Runnable",
+    # Eval data models
+    "Evaluable",
+    "TestCase",
+    "Evaluation",
+    "Evaluator",
+    "evaluate",
+    "create_llm_evaluator",
+    # Pre-made evaluators (autoevals adapters)
+    "AnswerCorrectness",
+    "AnswerRelevancy",
+    "AutoevalsAdapter",
+    "Battle",
+    "ClosedQA",
+    "ContextRelevancy",
+    "EmbeddingSimilarity",
+    "ExactMatch",
+    "Factuality",
+    "Faithfulness",
+    "Humor",
+    "JSONDiff",
+    "LevenshteinMatch",
+    "ListContains",
+    "Moderation",
+    "NumericDiff",
+    "Possible",
+    "Security",
+    "Sql",
+    "Summary",
+    "Translation",
+    "ValidJSON",
+]

pixie-qa 0.4.0__tar.gz → 0.5.0__tar.gz

pixie-qa 0.4.0tar.gz → 0.5.0tar.gz