pixie-qa 0.1.10__tar.gz → 0.1.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/PKG-INFO +3 -3
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/README.md +2 -2
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pyproject.toml +1 -1
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/skills/eval-driven-dev/SKILL.md +6 -6
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/skills/eval-driven-dev/resources/check_version.py +3 -5
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/.github/copilot-instructions.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/.github/workflows/publish.yml +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/.gitignore +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/LICENSE +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/async-handler-processing.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/autoevals-adapters.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/cli-dataset-commands.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/dataset-management.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/deep-research-demo.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/eval-harness.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/expected-output-in-evals.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/instrumentation-module-implementation.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/loud-failure-mode.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/manual-instrumentation-usability.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/observation-store-implementation.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/pixie-directory-and-skill-improvements.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/pixie-test-e2e-suite.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/root-package-exports-and-trace-id.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/scorecard-branding-and-skill-version-check.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/scorecard-eval-detail-dialog.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/skill-v2-and-rootdir-discovery.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/test-scorecard.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/usability-utils.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/docs/package.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/cli/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/cli/dataset_command.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/cli/main.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/cli/test_command.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/config.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/dataset/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/dataset/models.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/dataset/store.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/evals/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/evals/criteria.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/evals/eval_utils.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/evals/evaluation.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/evals/runner.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/evals/scorecard.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/evals/scorers.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/evals/trace_capture.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/evals/trace_helpers.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/favicon.png +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/instrumentation/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/instrumentation/context.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/instrumentation/handler.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/instrumentation/handlers.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/instrumentation/instrumentors.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/instrumentation/observation.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/instrumentation/processor.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/instrumentation/queue.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/instrumentation/spans.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/storage/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/storage/evaluable.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/storage/piccolo_conf.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/storage/piccolo_migrations/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/storage/serialization.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/storage/store.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/storage/tables.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/pixie/storage/tree.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/skills/eval-driven-dev/references/pixie-api.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/specs/agent-skill-1.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/specs/agent-skill.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/specs/autoevals-adapters.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/specs/dataset-management.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/specs/evals-harness.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/specs/expected-output-in-evals.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/specs/instrumentation.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/specs/manual-instrumentation-usability.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/specs/storage.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/specs/usability-utils.md +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/cli/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/cli/e2e_cases.json +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/cli/e2e_fixtures/conftest.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/cli/e2e_fixtures/datasets/customer-faq.json +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/cli/e2e_fixtures/mock_evaluators.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/cli/e2e_fixtures/test_customer_faq.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/cli/test_dataset_command.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/cli/test_e2e_pixie_test.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/cli/test_main.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/dataset/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/dataset/test_models.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/dataset/test_store.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/evals/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/evals/test_criteria.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/evals/test_eval_utils.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/evals/test_evaluation.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/evals/test_runner.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/evals/test_scorecard.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/evals/test_scorers.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/evals/test_trace_capture.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/evals/test_trace_helpers.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/instrumentation/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/instrumentation/conftest.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/instrumentation/test_context.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/instrumentation/test_handler.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/instrumentation/test_integration.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/instrumentation/test_observation.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/instrumentation/test_processor.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/instrumentation/test_queue.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/instrumentation/test_spans.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/instrumentation/test_storage_handler.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/observation_store/__init__.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/observation_store/conftest.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/observation_store/test_evaluable.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/observation_store/test_serialization.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/observation_store/test_store.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/observation_store/test_tree.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/test_config.py +0 -0
- {pixie_qa-0.1.10 → pixie_qa-0.1.11}/tests/pixie/test_init.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pixie-qa
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.11
|
|
4
4
|
Summary: Automated quality assurance for AI applications
|
|
5
5
|
Project-URL: Homepage, https://github.com/yiouli/pixie-qa
|
|
6
6
|
Project-URL: Repository, https://github.com/yiouli/pixie-qa
|
|
@@ -66,11 +66,11 @@ Description-Content-Type: text/markdown
|
|
|
66
66
|
|
|
67
67
|
# pixie-qa
|
|
68
68
|
|
|
69
|
-
An agent skill
|
|
69
|
+
An agent skill that make coding agent the QA engineer for LLM applications.
|
|
70
70
|
|
|
71
71
|
## What the Skill Does
|
|
72
72
|
|
|
73
|
-
The `eval
|
|
73
|
+
The `qa-eval` skill guides your coding agent through the full eval-based QA loop for LLM applications:
|
|
74
74
|
|
|
75
75
|
1. **Understand the code** — read the codebase, trace the data flow, learn what the code is supposed to do
|
|
76
76
|
2. **Instrument it** — add `enable_storage()` and `@observe` so every run is captured to a local SQLite database
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# pixie-qa
|
|
2
2
|
|
|
3
|
-
An agent skill
|
|
3
|
+
An agent skill that make coding agent the QA engineer for LLM applications.
|
|
4
4
|
|
|
5
5
|
## What the Skill Does
|
|
6
6
|
|
|
7
|
-
The `eval
|
|
7
|
+
The `qa-eval` skill guides your coding agent through the full eval-based QA loop for LLM applications:
|
|
8
8
|
|
|
9
9
|
1. **Understand the code** — read the codebase, trace the data flow, learn what the code is supposed to do
|
|
10
10
|
2. **Instrument it** — add `enable_storage()` and `@observe` so every run is captured to a local SQLite database
|
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: eval-driven-dev
|
|
3
|
-
description:
|
|
3
|
+
description: Add instrumentation, build golden datasets, write eval-based tests, run them, root-cause failures, and iterate — Ensure your Python LLM application works correctly. Make sure to use this skill whenever a user is developing, testing, QA-ing, evaluating, or benchmarking a Python project that calls an LLM. Use for making sure an LLM application works correctly, catching regressions after prompt changes, fixing unexpected behavior, or validating output quality before shipping.
|
|
4
4
|
license: MIT
|
|
5
5
|
compatibility: Python 3.11+
|
|
6
6
|
metadata:
|
|
7
|
-
version: 0.1.
|
|
7
|
+
version: 0.1.11
|
|
8
8
|
---
|
|
9
9
|
|
|
10
|
-
#
|
|
10
|
+
# Evaluation-Driven Development for Python LLM Applications
|
|
11
11
|
|
|
12
12
|
This skill is about doing the work, not describing it. When a user asks you to set up evals for their app, you should be reading their code, editing their files, running commands, and producing a working test pipeline — not writing a plan for them to follow later.
|
|
13
13
|
|
|
14
14
|
## Startup checks (always first)
|
|
15
15
|
|
|
16
|
-
Before doing anything else, perform these two steps
|
|
16
|
+
This skill uses the python `pixie-qa` package. Before doing anything else, perform these two steps:
|
|
17
17
|
|
|
18
18
|
### 1. Upgrade pixie-qa
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
Attempt to upgrade the `pixie-qa` package in the user's environment. Detect the package manager from the project (check for `uv.lock`, `poetry.lock`, `requirements.txt`, or a plain `pip` environment) and run the appropriate upgrade command:
|
|
21
21
|
|
|
22
22
|
- **uv**: `uv add pixie-qa --upgrade` (or `uv sync --upgrade-package pixie-qa`)
|
|
23
23
|
- **poetry**: `poetry add pixie-qa@latest`
|
|
@@ -27,7 +27,7 @@ If the upgrade fails (e.g., no network, version conflict), log the error and con
|
|
|
27
27
|
|
|
28
28
|
### 2. Check skill version
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
Check whether a newer version of this skill itself is available by running the script `check_version.py`.
|
|
31
31
|
|
|
32
32
|
If there is a newer version, reinstall the skill with `npx skills add` before proceeding.
|
|
33
33
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
"""Check whether the eval
|
|
2
|
+
"""Check whether the qa-eval skill is outdated and update it if needed."""
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
@@ -9,9 +9,7 @@ from urllib.error import URLError
|
|
|
9
9
|
from urllib.request import urlopen
|
|
10
10
|
|
|
11
11
|
SKILL_REPO = "/yiouli/pixie-qa/"
|
|
12
|
-
SKILL_URL =
|
|
13
|
-
f"https://raw.githubusercontent.com{SKILL_REPO}main/skills/eval-driven-dev/SKILL.md"
|
|
14
|
-
)
|
|
12
|
+
SKILL_URL = f"https://raw.githubusercontent.com{SKILL_REPO}main/skills/qa-eval/SKILL.md"
|
|
15
13
|
|
|
16
14
|
_RE_FRONTMATTER = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL)
|
|
17
15
|
_RE_NAME = re.compile(r"^name:\s*(.+)$", re.MULTILINE)
|
|
@@ -57,7 +55,7 @@ def _normalise_version(version: str) -> tuple[int, ...]:
|
|
|
57
55
|
|
|
58
56
|
def main() -> int:
|
|
59
57
|
resource_dir = Path(__file__).resolve().parent
|
|
60
|
-
skill_dir = resource_dir.parent # skills/
|
|
58
|
+
skill_dir = resource_dir.parent # skills/ai-qa/
|
|
61
59
|
|
|
62
60
|
local_data = _load_local_version(skill_dir)
|
|
63
61
|
local_version = local_data.get("version", "0.0.0")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{pixie_qa-0.1.10 → pixie_qa-0.1.11}/changelogs/scorecard-branding-and-skill-version-check.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|