pixie-qa 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pixie/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """pixie — automated quality assurance for AI applications.
2
+
3
+ Re-exports commonly used public API for convenient top-level access.
4
+ """
5
+
6
+ from pixie.instrumentation.handlers import StorageHandler, enable_storage
7
+
8
+ __all__ = [
9
+ "StorageHandler",
10
+ "enable_storage",
11
+ ]
pixie/cli/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ """pixie.cli — command-line interface for pixie.
2
+
3
+ Provides:
4
+ - ``pixie`` — main entry point with dataset management subcommands.
5
+ - ``pixie-test`` — eval test discovery and execution.
6
+ """
@@ -0,0 +1,193 @@
1
+ """``pixie dataset`` CLI commands.
2
+
3
+ Provides operations for managing datasets and saving trace spans as evaluable
4
+ items:
5
+
6
+ - :func:`dataset_create` — create a new empty dataset.
7
+ - :func:`dataset_list` — list datasets with basic information.
8
+ - :func:`dataset_save` — select a span from the latest trace and save it
9
+ to a dataset.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Any
15
+
16
+ from pydantic import JsonValue
17
+
18
+ from pixie.dataset.models import Dataset
19
+ from pixie.dataset.store import DatasetStore
20
+ from pixie.storage.evaluable import UNSET, Evaluable, _Unset, as_evaluable
21
+ from pixie.storage.store import ObservationStore
22
+
23
+
24
+ def dataset_create(
25
+ *,
26
+ name: str,
27
+ dataset_store: DatasetStore,
28
+ ) -> Dataset:
29
+ """Create a new empty dataset.
30
+
31
+ Args:
32
+ name: Unique name for the new dataset.
33
+ dataset_store: Store to write the dataset to.
34
+
35
+ Returns:
36
+ The created ``Dataset``.
37
+
38
+ Raises:
39
+ FileExistsError: If a dataset with *name* already exists.
40
+ """
41
+ return dataset_store.create(name)
42
+
43
+
44
+ def dataset_list(
45
+ *,
46
+ dataset_store: DatasetStore,
47
+ ) -> list[dict[str, Any]]:
48
+ """Return metadata for every dataset.
49
+
50
+ Each returned dict contains:
51
+ - ``name``: dataset name
52
+ - ``row_count``: number of evaluable items
53
+ - ``created_at``: file creation timestamp (ISO 8601)
54
+ - ``updated_at``: file last-modified timestamp (ISO 8601)
55
+ """
56
+ return dataset_store.list_details()
57
+
58
+
59
+ def format_dataset_table(rows: list[dict[str, Any]]) -> str:
60
+ """Format dataset metadata rows as an aligned CLI table.
61
+
62
+ Args:
63
+ rows: List of dicts from :func:`dataset_list`.
64
+
65
+ Returns:
66
+ A multi-line string suitable for printing.
67
+ """
68
+ if not rows:
69
+ return "No datasets found."
70
+
71
+ headers = ["Name", "Rows", "Created", "Updated"]
72
+ data = [
73
+ [r["name"], str(r["row_count"]), r["created_at"], r["updated_at"]]
74
+ for r in rows
75
+ ]
76
+
77
+ col_widths = [len(h) for h in headers]
78
+ for row in data:
79
+ for i, cell in enumerate(row):
80
+ col_widths[i] = max(col_widths[i], len(cell))
81
+
82
+ def _fmt_row(cells: list[str]) -> str:
83
+ return " ".join(c.ljust(col_widths[i]) for i, c in enumerate(cells))
84
+
85
+ lines = [_fmt_row(headers), _fmt_row(["-" * w for w in col_widths])]
86
+ for row in data:
87
+ lines.append(_fmt_row(row))
88
+ return "\n".join(lines)
89
+
90
+
91
+ async def dataset_save(
92
+ *,
93
+ name: str,
94
+ observation_store: ObservationStore,
95
+ dataset_store: DatasetStore,
96
+ select: str = "root",
97
+ span_name: str | None = None,
98
+ expected_output: JsonValue | _Unset = UNSET,
99
+ notes: str | None = None,
100
+ ) -> Dataset:
101
+ """Select a span from the latest trace and save it to a dataset.
102
+
103
+ Fetches the most recent trace from the observation store, selects
104
+ a span according to *select*, converts it to an ``Evaluable``, then
105
+ appends it to the named dataset.
106
+
107
+ Args:
108
+ name: Name of the dataset to save to (must exist).
109
+ observation_store: Store to read spans from.
110
+ dataset_store: Store to write the updated dataset to.
111
+ select: Selection mode — ``"root"``, ``"last_llm_call"``, or
112
+ ``"by_name"``. Defaults to ``"root"``.
113
+ span_name: Span name to match when *select* is ``"by_name"``.
114
+ Required when *select* is ``"by_name"``.
115
+ expected_output: If provided, set on the evaluable. When
116
+ ``UNSET`` (default), the evaluable's ``expected_output``
117
+ is left as ``UNSET``.
118
+ notes: Optional notes string to attach to the evaluable's
119
+ ``eval_metadata`` under the ``"notes"`` key.
120
+
121
+ Returns:
122
+ The updated ``Dataset``.
123
+
124
+ Raises:
125
+ ValueError: If no traces exist, or no matching span found.
126
+ FileNotFoundError: If no dataset with *name* exists.
127
+ """
128
+ traces = await observation_store.list_traces(limit=1)
129
+ if not traces:
130
+ raise ValueError("No traces found in the observation store.")
131
+ trace_id: str = traces[0]["trace_id"]
132
+
133
+ span = await _select_span(
134
+ observation_store=observation_store,
135
+ trace_id=trace_id,
136
+ select=select,
137
+ span_name=span_name,
138
+ )
139
+
140
+ evaluable = as_evaluable(span)
141
+
142
+ # Apply expected_output if provided
143
+ if not isinstance(expected_output, _Unset):
144
+ evaluable = Evaluable(
145
+ eval_input=evaluable.eval_input,
146
+ eval_output=evaluable.eval_output,
147
+ eval_metadata=evaluable.eval_metadata,
148
+ expected_output=expected_output,
149
+ )
150
+
151
+ # Apply notes if provided
152
+ if notes is not None:
153
+ existing_meta = dict(evaluable.eval_metadata) if evaluable.eval_metadata else {}
154
+ existing_meta["notes"] = notes
155
+ evaluable = Evaluable(
156
+ eval_input=evaluable.eval_input,
157
+ eval_output=evaluable.eval_output,
158
+ eval_metadata=existing_meta,
159
+ expected_output=evaluable.expected_output,
160
+ )
161
+
162
+ return dataset_store.append(name, evaluable)
163
+
164
+
165
+ async def _select_span(
166
+ *,
167
+ observation_store: ObservationStore,
168
+ trace_id: str,
169
+ select: str,
170
+ span_name: str | None,
171
+ ) -> Any:
172
+ """Select a span from a trace according to the selection mode."""
173
+ if select == "root":
174
+ return await observation_store.get_root(trace_id)
175
+
176
+ if select == "last_llm_call":
177
+ span = await observation_store.get_last_llm(trace_id)
178
+ if span is None:
179
+ raise ValueError(f"No LLM span found in trace {trace_id}.")
180
+ return span
181
+
182
+ if select == "by_name":
183
+ if not span_name:
184
+ raise ValueError("--span-name is required when selection mode is 'by_name'.")
185
+ matches = await observation_store.get_by_name(span_name, trace_id=trace_id)
186
+ if not matches:
187
+ raise ValueError(
188
+ f"No span named {span_name!r} found in trace {trace_id}."
189
+ )
190
+ # Select the latest (last by started_at — get_by_name returns ASC order)
191
+ return matches[-1]
192
+
193
+ raise ValueError(f"Unknown selection mode: {select!r}")
pixie/cli/main.py ADDED
@@ -0,0 +1,192 @@
1
+ """``pixie`` CLI entry point — top-level command with subcommand routing.
2
+
3
+ Usage::
4
+
5
+ pixie dataset create <name>
6
+ pixie dataset list
7
+ pixie dataset save <name> [--select MODE] [--span-name NAME]
8
+ [--expected-output] [--notes TEXT]
9
+
10
+ Reads spans from the observation store (SQLite, configured via ``PIXIE_DB_PATH``)
11
+ and writes evaluable items to the dataset store (JSON files, configured via
12
+ ``PIXIE_DATASET_DIR``).
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import argparse
18
+ import asyncio
19
+ import json
20
+ import sys
21
+ from typing import TextIO
22
+
23
+ from piccolo.engine.sqlite import SQLiteEngine
24
+ from pydantic import JsonValue
25
+
26
+ from pixie.cli.dataset_command import (
27
+ dataset_create,
28
+ dataset_list,
29
+ dataset_save,
30
+ format_dataset_table,
31
+ )
32
+ from pixie.config import get_config
33
+ from pixie.dataset.store import DatasetStore
34
+ from pixie.storage.evaluable import UNSET, _Unset
35
+ from pixie.storage.store import ObservationStore
36
+
37
+
38
+ def _build_parser() -> argparse.ArgumentParser:
39
+ """Build the top-level argument parser with subcommands."""
40
+ parser = argparse.ArgumentParser(
41
+ prog="pixie",
42
+ description="Pixie — automated quality assurance for AI applications",
43
+ )
44
+ subparsers = parser.add_subparsers(dest="command", help="Available commands")
45
+
46
+ # -- pixie dataset -------------------------------------------------------
47
+ dataset_parser = subparsers.add_parser("dataset", help="Dataset management commands")
48
+ dataset_sub = dataset_parser.add_subparsers(
49
+ dest="dataset_action", help="Dataset actions"
50
+ )
51
+
52
+ # pixie dataset create <name>
53
+ create_parser = dataset_sub.add_parser(
54
+ "create", help="Create a new empty dataset"
55
+ )
56
+ create_parser.add_argument("name", help="Name for the new dataset")
57
+
58
+ # pixie dataset list
59
+ dataset_sub.add_parser("list", help="List all datasets")
60
+
61
+ # pixie dataset save <name> [options]
62
+ save_parser = dataset_sub.add_parser(
63
+ "save",
64
+ help="Save a span from the latest trace to a dataset",
65
+ )
66
+ save_parser.add_argument("name", help="Name of the dataset to save to")
67
+ save_parser.add_argument(
68
+ "--select",
69
+ choices=["root", "last_llm_call", "by_name"],
70
+ default="root",
71
+ help="How to select the span from the trace (default: root)",
72
+ )
73
+ save_parser.add_argument(
74
+ "--span-name",
75
+ default=None,
76
+ help="Span name to match (required when --select=by_name)",
77
+ )
78
+ save_parser.add_argument(
79
+ "--expected-output",
80
+ action="store_true",
81
+ default=False,
82
+ help="Read expected output JSON from stdin",
83
+ )
84
+ save_parser.add_argument(
85
+ "--notes",
86
+ default=None,
87
+ help="Optional notes to attach to the evaluable metadata",
88
+ )
89
+
90
+ return parser
91
+
92
+
93
+ def _run_dataset_create(name: str) -> None:
94
+ """Run dataset_create."""
95
+ ds_store = DatasetStore()
96
+ dataset = dataset_create(name=name, dataset_store=ds_store)
97
+ print(f"Created dataset {dataset.name!r}.") # noqa: T201
98
+
99
+
100
+ def _run_dataset_list() -> None:
101
+ """Run dataset_list and print the table."""
102
+ ds_store = DatasetStore()
103
+ rows = dataset_list(dataset_store=ds_store)
104
+ print(format_dataset_table(rows)) # noqa: T201
105
+
106
+
107
+ def _run_dataset_save(
108
+ name: str,
109
+ select: str,
110
+ span_name: str | None,
111
+ expected_output_flag: bool,
112
+ notes: str | None,
113
+ stdin: TextIO | None = None,
114
+ ) -> None:
115
+ """Set up stores and run dataset_save."""
116
+ config = get_config()
117
+ engine = SQLiteEngine(path=config.db_path)
118
+ obs_store = ObservationStore(engine=engine)
119
+ ds_store = DatasetStore()
120
+
121
+ expected: JsonValue | _Unset = UNSET
122
+ if expected_output_flag:
123
+ source = stdin if stdin is not None else sys.stdin
124
+ raw = source.read().strip()
125
+ if not raw:
126
+ raise ValueError("--expected-output flag set but no JSON provided on stdin.")
127
+ expected = json.loads(raw)
128
+
129
+ dataset = asyncio.run(
130
+ dataset_save(
131
+ name=name,
132
+ observation_store=obs_store,
133
+ dataset_store=ds_store,
134
+ select=select,
135
+ span_name=span_name,
136
+ expected_output=expected,
137
+ notes=notes,
138
+ )
139
+ )
140
+ print( # noqa: T201
141
+ f"Saved to dataset {dataset.name!r} — now {len(dataset.items)} item(s)."
142
+ )
143
+
144
+
145
+ def main(argv: list[str] | None = None) -> int:
146
+ """Entry point for the ``pixie`` command.
147
+
148
+ Args:
149
+ argv: Command-line arguments. Defaults to ``sys.argv[1:]``.
150
+
151
+ Returns:
152
+ Exit code: 0 on success, 1 on error.
153
+ """
154
+ parser = _build_parser()
155
+ args = parser.parse_args(argv)
156
+
157
+ if args.command is None:
158
+ parser.print_help()
159
+ return 1
160
+
161
+ if args.command == "dataset":
162
+ if args.dataset_action is None:
163
+ parser.parse_args(["dataset", "--help"])
164
+ return 1
165
+
166
+ try:
167
+ if args.dataset_action == "create":
168
+ _run_dataset_create(args.name)
169
+ elif args.dataset_action == "list":
170
+ _run_dataset_list()
171
+ elif args.dataset_action == "save":
172
+ _run_dataset_save(
173
+ name=args.name,
174
+ select=args.select,
175
+ span_name=args.span_name,
176
+ expected_output_flag=args.expected_output,
177
+ notes=args.notes,
178
+ )
179
+ except (
180
+ ValueError,
181
+ FileExistsError,
182
+ FileNotFoundError,
183
+ json.JSONDecodeError,
184
+ ) as exc:
185
+ print(f"Error: {exc}", file=sys.stderr) # noqa: T201
186
+ return 1
187
+
188
+ return 0
189
+
190
+
191
+ if __name__ == "__main__":
192
+ sys.exit(main())
@@ -0,0 +1,68 @@
1
+ """``pixie test`` CLI entry point.
2
+
3
+ Usage::
4
+
5
+ pixie test [path] [--filter PATTERN] [--verbose]
6
+
7
+ Discovers and runs eval test functions, reporting pass/fail results.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import sys
14
+
15
+ import pixie.instrumentation as px
16
+ from pixie.evals.runner import discover_tests, format_results, run_tests
17
+
18
+
19
+ def main(argv: list[str] | None = None) -> int:
20
+ """Entry point for ``pixie test`` command.
21
+
22
+ Args:
23
+ argv: Command-line arguments. Defaults to ``sys.argv[1:]``.
24
+
25
+ Returns:
26
+ Exit code: 0 if all tests pass, 1 otherwise.
27
+ """
28
+ parser = argparse.ArgumentParser(
29
+ prog="pixie test",
30
+ description="Run pixie eval tests",
31
+ )
32
+ parser.add_argument(
33
+ "path",
34
+ nargs="?",
35
+ default=".",
36
+ help="File or directory to search for tests (default: current directory)",
37
+ )
38
+ parser.add_argument(
39
+ "-k",
40
+ "--filter",
41
+ dest="filter_pattern",
42
+ default=None,
43
+ help="Only run tests whose names contain this substring",
44
+ )
45
+ parser.add_argument(
46
+ "-v",
47
+ "--verbose",
48
+ action="store_true",
49
+ default=False,
50
+ help="Show detailed evaluation results",
51
+ )
52
+
53
+ args = parser.parse_args(argv)
54
+
55
+ # Ensure instrumentation is initialised before running test functions
56
+ px.init()
57
+
58
+ cases = discover_tests(args.path, filter_pattern=args.filter_pattern)
59
+ results = run_tests(cases)
60
+ output = format_results(results, verbose=args.verbose)
61
+ print(output) # noqa: T201
62
+
63
+ all_passed = all(r.status == "passed" for r in results)
64
+ return 0 if all_passed else 1
65
+
66
+
67
+ if __name__ == "__main__":
68
+ sys.exit(main())
pixie/config.py ADDED
@@ -0,0 +1,41 @@
1
+ """Centralized configuration with env var overrides and sensible defaults.
2
+
3
+ All environment variables are prefixed with ``PIXIE_``. Values are read at
4
+ call time (inside :func:`get_config`), not at import time, so tests can
5
+ manipulate ``os.environ`` before calling :func:`get_config`.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ from dataclasses import dataclass
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class PixieConfig:
16
+ """Immutable configuration snapshot.
17
+
18
+ Attributes:
19
+ db_path: Path to the SQLite database file.
20
+ db_engine: Database engine type (currently only ``"sqlite"``).
21
+ dataset_dir: Directory for dataset JSON files.
22
+ """
23
+
24
+ db_path: str = "pixie_observations.db"
25
+ db_engine: str = "sqlite"
26
+ dataset_dir: str = "pixie_datasets"
27
+
28
+
29
+ def get_config() -> PixieConfig:
30
+ """Read configuration from environment variables with defaults.
31
+
32
+ Supported variables:
33
+ - ``PIXIE_DB_PATH`` — overrides :attr:`PixieConfig.db_path`
34
+ - ``PIXIE_DB_ENGINE`` — overrides :attr:`PixieConfig.db_engine`
35
+ - ``PIXIE_DATASET_DIR`` — overrides :attr:`PixieConfig.dataset_dir`
36
+ """
37
+ return PixieConfig(
38
+ db_path=os.environ.get("PIXIE_DB_PATH", PixieConfig.db_path),
39
+ db_engine=os.environ.get("PIXIE_DB_ENGINE", PixieConfig.db_engine),
40
+ dataset_dir=os.environ.get("PIXIE_DATASET_DIR", PixieConfig.dataset_dir),
41
+ )
@@ -0,0 +1,11 @@
1
+ """pixie.dataset — named collections of evaluable items.
2
+
3
+ Public API:
4
+ - ``Dataset`` — Pydantic model: name + items
5
+ - ``DatasetStore`` — JSON-file-backed CRUD
6
+ """
7
+
8
+ from pixie.dataset.models import Dataset
9
+ from pixie.dataset.store import DatasetStore
10
+
11
+ __all__ = ["Dataset", "DatasetStore"]
@@ -0,0 +1,21 @@
1
+ """Dataset model — a named collection of evaluable items."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field
6
+
7
+ from pixie.storage.evaluable import Evaluable
8
+
9
+
10
+ class Dataset(BaseModel):
11
+ """A named collection of evaluable items.
12
+
13
+ Attributes:
14
+ name: Unique human-readable name for the dataset.
15
+ items: Ordered list of evaluable entries.
16
+ """
17
+
18
+ model_config = ConfigDict(frozen=True)
19
+
20
+ name: str = Field(..., min_length=1)
21
+ items: tuple[Evaluable, ...] = ()