PyPI - ldv-cli - Versions diffs - 0.9.0__tar.gz → 0.10.0__tar.gz - Mend

ldv-cli 0.9.0tar.gz → 0.10.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{ldv_cli-0.9.0 → ldv_cli-0.10.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ldv-cli
-Version: 0.9.0
+Version: 0.10.0
 Summary: ldv — CLI for the Liquid DataViewer platform (formerly lql)
 Project-URL: Homepage, https://github.com/Liquid4All/lql
 Author: Liquid AI

{ldv_cli-0.9.0 → ldv_cli-0.10.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "ldv-cli"
-version = "0.9.0"
+version = "0.10.0"
 description = "ldv — CLI for the Liquid DataViewer platform (formerly lql)"
 readme = "README.md"
 requires-python = ">=3.12"

{ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/__init__.py RENAMED Viewed

@@ -3,4 +3,4 @@ from importlib.metadata import PackageNotFoundError, version
 try:
     __version__ = version("ldv-cli")
 except PackageNotFoundError:  # not installed (e.g. running from a source checkout)
-    __version__ = "0.9.0"
+    __version__ = "0.10.0"

{ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/evals.py RENAMED Viewed

@@ -5,6 +5,7 @@ import sys
 from typing import Annotated, List, Optional
 import typer
+from rich.console import Console
 from .._group import AliasGroup
@@ -178,6 +179,62 @@ def correctness(
     )
+def _bar(pct: float, width: int = 20) -> str:
+    filled = round(pct * width)
+    filled = max(0, min(width, filled))
+    return "█" * filled + "░" * (width - filled)
+@app.command("failures")
+def failures(
+    id: Annotated[str, typer.Argument(help="Dataset ID")],
+    json_out: JsonOpt = False,
+    profile: ProfileOpt = None,
+    api_url: ApiUrlOpt = None,
+) -> None:
+    """Quality analysis: clean vs. dirty rate + failure mode breakdown."""
+    client = ApiClient(profile=profile, api_url=api_url)
+    data = client.get(f"/v1/datasets/{q(id)}/eval-failure-analysis").json()
+    if json_out:
+        print_json(data)
+        return
+    skip = data.get("skip_reason")
+    if skip:
+        sys.stdout.write(f"No failure_analysis column found in this dataset.\n")
+        return
+    total = data.get("total") or 0
+    clean = data.get("clean") or 0
+    dirty = data.get("dirty") or 0
+    clean_rate = data.get("clean_rate") or 0.0
+    dirty_rate = 1.0 - clean_rate
+    console = Console()
+    console.print(f"\n[bold]Quality analysis: {total:,} samples[/bold]\n")
+    console.print(f"  [green]Quality rate[/green]   {_bar(clean_rate)}  {clean_rate * 100:.1f}%")
+    console.print(f"  [red]Issues[/red]         {_bar(dirty_rate)}  {dirty_rate * 100:.1f}%")
+    modes = data.get("mode_distribution") or []
+    if not modes:
+        if dirty == 0:
+            sys.stdout.write("\nNo issues detected.\n")
+        else:
+            sys.stdout.write(f"\n{dirty:,} samples with issues (no mode breakdown available).\n")
+        return
+    sys.stdout.write(f"\nFailure modes  ({dirty:,} samples with issues):\n")
+    name_width = max((len(str(m.get("mode") or "").replace("_", " ")) for m in modes), default=0)
+    name_width = max(name_width, 10)
+    count_width = max((len(str(m.get("count") or 0)) for m in modes), default=0)
+    count_width = max(count_width, 5)
+    for m in modes:
+        name = str(m.get("mode") or "").replace("_", " ")
+        count = m.get("count") or 0
+        rate = m.get("rate") or 0.0
+        bar = _bar(rate)
+        sys.stdout.write(f"  {name:<{name_width}}  {count:>{count_width}}  {bar}  {rate * 100:.1f}%\n")
 @app.command("samples")
 def samples(
     id: Annotated[str, typer.Argument(help="Dataset ID")],

{ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/instructions.py RENAMED Viewed

@@ -149,6 +149,17 @@ primitives for error analysis — YOU do the reasoning over what they return.
   ldv eval stats <id>                  # Accuracy + correctness counts + error-type
                                        # distribution + token stats (the distribution view)
   ldv eval correctness <id>            # Fast accuracy + correct/incorrect/missing counts
+  ldv eval failures <id>               # Quality analysis: clean-vs-dirty rate + failure mode
+                                       # breakdown from the failure_analysis column.
+                                       # Example output:
+                                       #   Quality analysis: 1,000 samples
+                                       #     Quality rate   ████████████████████░░░░░  80.0%
+                                       #     Issues         █████░░░░░░░░░░░░░░░░░░░░  20.0%
+                                       #   Failure modes  (200 samples with issues):
+                                       #     truncated response   100  ██████████████████  50.0%
+                                       #     missing think tags    80  ██████████████      40.0%
+                                       # If no failure_analysis column exists, prints a clear
+                                       # message and exits 0. Use --json for the raw API response.
   ldv eval samples <id> [--filter correct|incorrect|missing|all] [--search <text>]
                        [--error-type <value>] [--columns a,b] [--limit N] [--offset N]
                                        # Slice the dataset for error analysis. Filters AND
@@ -160,6 +171,8 @@ Notes:
   - --search matches a substring on the prompt OR response column (either one matching is a hit).
   - --error-type values come from the `error_field` / `error_distribution` in `eval stats`.
   - Use the 'index' from `eval samples` directly as `eval sample --row <index>`.
+  - `eval failures` reads the `failure_analysis` column; if absent, skip_reason is set and a
+    clear message is printed. Use --json to get the raw counts for programmatic consumption.
 ## Row Edits
@@ -278,6 +291,8 @@ never goes stale.
 ### Analyze an eval's failure modes
   ldv eval list --json                              # find the eval dataset
+  ldv eval failures <id> --json                     # clean rate + failure mode breakdown
+                                                    #   (mode_distribution: name/count/rate per mode)
   ldv eval stats <id> --json                        # accuracy + error_distribution_incorrect
                                                     #   = the common errors AMONG the misses
   ldv eval samples <id> --filter incorrect --json   # pull the misses