ldv-cli 0.9.0__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/PKG-INFO +1 -1
  2. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/pyproject.toml +1 -1
  3. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/__init__.py +1 -1
  4. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/evals.py +57 -0
  5. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/instructions.py +15 -0
  6. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/.gitignore +0 -0
  7. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/README.md +0 -0
  8. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/examples/agent-traces.jsonl +0 -0
  9. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/package-lock.json +0 -0
  10. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/_group.py +0 -0
  11. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/_opts.py +0 -0
  12. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/api.py +0 -0
  13. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/cli.py +0 -0
  14. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/__init__.py +0 -0
  15. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/annotations.py +0 -0
  16. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/auth.py +0 -0
  17. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/buckets.py +0 -0
  18. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/datasets.py +0 -0
  19. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/edits.py +0 -0
  20. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/highlights.py +0 -0
  21. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/issues.py +0 -0
  22. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/preview.py +0 -0
  23. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/reports.py +0 -0
  24. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/skills.py +0 -0
  25. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/spec.py +0 -0
  26. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/tui.py +0 -0
  27. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/update.py +0 -0
  28. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/commands/workspaces.py +0 -0
  29. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/config.py +0 -0
  30. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/output.py +0 -0
  31. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/sessions.py +0 -0
  32. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/src/ldv/util.py +0 -0
  33. {ldv_cli-0.9.0 → ldv_cli-0.10.0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ldv-cli
3
- Version: 0.9.0
3
+ Version: 0.10.0
4
4
  Summary: ldv — CLI for the Liquid DataViewer platform (formerly lql)
5
5
  Project-URL: Homepage, https://github.com/Liquid4All/lql
6
6
  Author: Liquid AI
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "ldv-cli"
7
- version = "0.9.0"
7
+ version = "0.10.0"
8
8
  description = "ldv — CLI for the Liquid DataViewer platform (formerly lql)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.12"
@@ -3,4 +3,4 @@ from importlib.metadata import PackageNotFoundError, version
3
3
  try:
4
4
  __version__ = version("ldv-cli")
5
5
  except PackageNotFoundError: # not installed (e.g. running from a source checkout)
6
- __version__ = "0.9.0"
6
+ __version__ = "0.10.0"
@@ -5,6 +5,7 @@ import sys
5
5
  from typing import Annotated, List, Optional
6
6
 
7
7
  import typer
8
+ from rich.console import Console
8
9
 
9
10
  from .._group import AliasGroup
10
11
 
@@ -178,6 +179,62 @@ def correctness(
178
179
  )
179
180
 
180
181
 
182
+ def _bar(pct: float, width: int = 20) -> str:
183
+ filled = round(pct * width)
184
+ filled = max(0, min(width, filled))
185
+ return "█" * filled + "░" * (width - filled)
186
+
187
+
188
+ @app.command("failures")
189
+ def failures(
190
+ id: Annotated[str, typer.Argument(help="Dataset ID")],
191
+ json_out: JsonOpt = False,
192
+ profile: ProfileOpt = None,
193
+ api_url: ApiUrlOpt = None,
194
+ ) -> None:
195
+ """Quality analysis: clean vs. dirty rate + failure mode breakdown."""
196
+ client = ApiClient(profile=profile, api_url=api_url)
197
+ data = client.get(f"/v1/datasets/{q(id)}/eval-failure-analysis").json()
198
+ if json_out:
199
+ print_json(data)
200
+ return
201
+ skip = data.get("skip_reason")
202
+ if skip:
203
+ sys.stdout.write(f"No failure_analysis column found in this dataset.\n")
204
+ return
205
+ total = data.get("total") or 0
206
+ clean = data.get("clean") or 0
207
+ dirty = data.get("dirty") or 0
208
+ clean_rate = data.get("clean_rate") or 0.0
209
+ dirty_rate = 1.0 - clean_rate
210
+
211
+ console = Console()
212
+
213
+ console.print(f"\n[bold]Quality analysis: {total:,} samples[/bold]\n")
214
+ console.print(f" [green]Quality rate[/green] {_bar(clean_rate)} {clean_rate * 100:.1f}%")
215
+ console.print(f" [red]Issues[/red] {_bar(dirty_rate)} {dirty_rate * 100:.1f}%")
216
+
217
+ modes = data.get("mode_distribution") or []
218
+ if not modes:
219
+ if dirty == 0:
220
+ sys.stdout.write("\nNo issues detected.\n")
221
+ else:
222
+ sys.stdout.write(f"\n{dirty:,} samples with issues (no mode breakdown available).\n")
223
+ return
224
+
225
+ sys.stdout.write(f"\nFailure modes ({dirty:,} samples with issues):\n")
226
+ name_width = max((len(str(m.get("mode") or "").replace("_", " ")) for m in modes), default=0)
227
+ name_width = max(name_width, 10)
228
+ count_width = max((len(str(m.get("count") or 0)) for m in modes), default=0)
229
+ count_width = max(count_width, 5)
230
+ for m in modes:
231
+ name = str(m.get("mode") or "").replace("_", " ")
232
+ count = m.get("count") or 0
233
+ rate = m.get("rate") or 0.0
234
+ bar = _bar(rate)
235
+ sys.stdout.write(f" {name:<{name_width}} {count:>{count_width}} {bar} {rate * 100:.1f}%\n")
236
+
237
+
181
238
  @app.command("samples")
182
239
  def samples(
183
240
  id: Annotated[str, typer.Argument(help="Dataset ID")],
@@ -149,6 +149,17 @@ primitives for error analysis — YOU do the reasoning over what they return.
149
149
  ldv eval stats <id> # Accuracy + correctness counts + error-type
150
150
  # distribution + token stats (the distribution view)
151
151
  ldv eval correctness <id> # Fast accuracy + correct/incorrect/missing counts
152
+ ldv eval failures <id> # Quality analysis: clean-vs-dirty rate + failure mode
153
+ # breakdown from the failure_analysis column.
154
+ # Example output:
155
+ # Quality analysis: 1,000 samples
156
+ # Quality rate ████████████████████░░░░░ 80.0%
157
+ # Issues █████░░░░░░░░░░░░░░░░░░░░ 20.0%
158
+ # Failure modes (200 samples with issues):
159
+ # truncated response 100 ██████████████████ 50.0%
160
+ # missing think tags 80 ██████████████ 40.0%
161
+ # If no failure_analysis column exists, prints a clear
162
+ # message and exits 0. Use --json for the raw API response.
152
163
  ldv eval samples <id> [--filter correct|incorrect|missing|all] [--search <text>]
153
164
  [--error-type <value>] [--columns a,b] [--limit N] [--offset N]
154
165
  # Slice the dataset for error analysis. Filters AND
@@ -160,6 +171,8 @@ Notes:
160
171
  - --search matches a substring on the prompt OR response column (either one matching is a hit).
161
172
  - --error-type values come from the `error_field` / `error_distribution` in `eval stats`.
162
173
  - Use the 'index' from `eval samples` directly as `eval sample --row <index>`.
174
+ - `eval failures` reads the `failure_analysis` column; if absent, skip_reason is set and a
175
+ clear message is printed. Use --json to get the raw counts for programmatic consumption.
163
176
 
164
177
  ## Row Edits
165
178
 
@@ -278,6 +291,8 @@ never goes stale.
278
291
 
279
292
  ### Analyze an eval's failure modes
280
293
  ldv eval list --json # find the eval dataset
294
+ ldv eval failures <id> --json # clean rate + failure mode breakdown
295
+ # (mode_distribution: name/count/rate per mode)
281
296
  ldv eval stats <id> --json # accuracy + error_distribution_incorrect
282
297
  # = the common errors AMONG the misses
283
298
  ldv eval samples <id> --filter incorrect --json # pull the misses
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes