ldv-cli 0.9.0__tar.gz → 0.11.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/PKG-INFO +13 -8
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/README.md +12 -7
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/pyproject.toml +1 -1
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/__init__.py +1 -1
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/datasets.py +17 -3
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/evals.py +72 -8
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/instructions.py +34 -9
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/preview.py +5 -68
- ldv_cli-0.11.0/src/ldv/filters.py +99 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/uv.lock +1 -1
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/.gitignore +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/examples/agent-traces.jsonl +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/package-lock.json +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/_group.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/_opts.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/api.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/cli.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/__init__.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/annotations.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/auth.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/buckets.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/edits.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/highlights.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/issues.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/reports.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/skills.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/spec.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/tui.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/update.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/commands/workspaces.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/config.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/output.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/sessions.py +0 -0
- {ldv_cli-0.9.0 → ldv_cli-0.11.0}/src/ldv/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ldv-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.11.0
|
|
4
4
|
Summary: ldv — CLI for the Liquid DataViewer platform (formerly lql)
|
|
5
5
|
Project-URL: Homepage, https://github.com/Liquid4All/lql
|
|
6
6
|
Author: Liquid AI
|
|
@@ -140,7 +140,8 @@ ldv datasets create --workspace <id> --hf-bucket <org/bucket> --key <path-or-glo
|
|
|
140
140
|
From an HF storage bucket (e.g. --key 'data/*.parquet')
|
|
141
141
|
ldv datasets sync <id> Trigger sync (HF repo, S3, or HF bucket)
|
|
142
142
|
ldv datasets schema <id> Show column schema
|
|
143
|
-
ldv datasets rows <id> [--limit N] [--offset N]
|
|
143
|
+
ldv datasets rows <id> [-f "col<op>value"] [--columns a,b] [--limit N] [--offset N]
|
|
144
|
+
Fetch rows (-f/--filter: same syntax everywhere)
|
|
144
145
|
ldv datasets delete <id> Delete dataset
|
|
145
146
|
ldv datasets push <id> Push to HuggingFace
|
|
146
147
|
ldv datasets push-status <id> [--job <id>] Check push job status
|
|
@@ -174,10 +175,11 @@ ldv preview <src> --offset N Start at row index N
|
|
|
174
175
|
ldv preview <src> --title "<title>" Title shown in the viewer header
|
|
175
176
|
```
|
|
176
177
|
|
|
177
|
-
**Filtering (`--filter`/`-f`)
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
178
|
+
**Filtering (`--filter`/`-f`) — one syntax everywhere.** The same flag and syntax
|
|
179
|
+
work on `preview`, `datasets rows`, and `eval samples`. Show only matching rows —
|
|
180
|
+
`preview` also filters local files (client-side); platform datasets filter
|
|
181
|
+
server-side. Repeatable; filters AND together; string match is case-insensitive.
|
|
182
|
+
Operators: `=`, `!=`, `~` (contains), `>`, `<`, `>=`, `<=`.
|
|
181
183
|
|
|
182
184
|
```
|
|
183
185
|
ldv preview <dataset-id> -f "domain=telecom"
|
|
@@ -228,7 +230,7 @@ ldv eval list [--workspace <id>] List eval datasets only
|
|
|
228
230
|
workspace, lists only evals you own.
|
|
229
231
|
ldv eval correctness <id> Fast accuracy + correct/incorrect/missing counts
|
|
230
232
|
ldv eval stats <id> Accuracy + error-type distribution + token stats
|
|
231
|
-
ldv eval samples <id> [--
|
|
233
|
+
ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
|
|
232
234
|
[--search <text>] [--error-type <value>]
|
|
233
235
|
[--columns a,b] [--limit N] [--offset N]
|
|
234
236
|
Slice the dataset for error analysis. Filters
|
|
@@ -239,6 +241,8 @@ ldv eval sample <id> --row <index> Read one full sample (the conve
|
|
|
239
241
|
|
|
240
242
|
Notes:
|
|
241
243
|
|
|
244
|
+
- `-f`/`--filter` is the unified column filter — same syntax as `preview` and `datasets rows` (see Filtering above).
|
|
245
|
+
- `--correct` / `--incorrect` / `--missing` are convenience flags for the canonical correctness filter (mutually exclusive). They AND with any `-f` filters, `--search`, and `--error-type`.
|
|
242
246
|
- `--search` matches a substring on the prompt **or** response column (either hit counts). Override the searched columns with `--search-columns a,b`.
|
|
243
247
|
- `--error-type` values come from the `error_field` / `error_distribution` reported by `eval stats`.
|
|
244
248
|
- Use the `index` from `eval samples` directly as `eval sample --row <index>`.
|
|
@@ -248,7 +252,8 @@ Typical analysis loop:
|
|
|
248
252
|
```bash
|
|
249
253
|
ldv eval list --workspace <id> # find the eval dataset
|
|
250
254
|
ldv eval stats <id> # accuracy + where the errors cluster
|
|
251
|
-
ldv eval samples <id> --
|
|
255
|
+
ldv eval samples <id> --incorrect --limit 20 # pull the misses
|
|
256
|
+
ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" # misses that ran long
|
|
252
257
|
ldv eval sample <id> --row 42 # read one failure in full
|
|
253
258
|
```
|
|
254
259
|
|
|
@@ -124,7 +124,8 @@ ldv datasets create --workspace <id> --hf-bucket <org/bucket> --key <path-or-glo
|
|
|
124
124
|
From an HF storage bucket (e.g. --key 'data/*.parquet')
|
|
125
125
|
ldv datasets sync <id> Trigger sync (HF repo, S3, or HF bucket)
|
|
126
126
|
ldv datasets schema <id> Show column schema
|
|
127
|
-
ldv datasets rows <id> [--limit N] [--offset N]
|
|
127
|
+
ldv datasets rows <id> [-f "col<op>value"] [--columns a,b] [--limit N] [--offset N]
|
|
128
|
+
Fetch rows (-f/--filter: same syntax everywhere)
|
|
128
129
|
ldv datasets delete <id> Delete dataset
|
|
129
130
|
ldv datasets push <id> Push to HuggingFace
|
|
130
131
|
ldv datasets push-status <id> [--job <id>] Check push job status
|
|
@@ -158,10 +159,11 @@ ldv preview <src> --offset N Start at row index N
|
|
|
158
159
|
ldv preview <src> --title "<title>" Title shown in the viewer header
|
|
159
160
|
```
|
|
160
161
|
|
|
161
|
-
**Filtering (`--filter`/`-f`)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
162
|
+
**Filtering (`--filter`/`-f`) — one syntax everywhere.** The same flag and syntax
|
|
163
|
+
work on `preview`, `datasets rows`, and `eval samples`. Show only matching rows —
|
|
164
|
+
`preview` also filters local files (client-side); platform datasets filter
|
|
165
|
+
server-side. Repeatable; filters AND together; string match is case-insensitive.
|
|
166
|
+
Operators: `=`, `!=`, `~` (contains), `>`, `<`, `>=`, `<=`.
|
|
165
167
|
|
|
166
168
|
```
|
|
167
169
|
ldv preview <dataset-id> -f "domain=telecom"
|
|
@@ -212,7 +214,7 @@ ldv eval list [--workspace <id>] List eval datasets only
|
|
|
212
214
|
workspace, lists only evals you own.
|
|
213
215
|
ldv eval correctness <id> Fast accuracy + correct/incorrect/missing counts
|
|
214
216
|
ldv eval stats <id> Accuracy + error-type distribution + token stats
|
|
215
|
-
ldv eval samples <id> [--
|
|
217
|
+
ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
|
|
216
218
|
[--search <text>] [--error-type <value>]
|
|
217
219
|
[--columns a,b] [--limit N] [--offset N]
|
|
218
220
|
Slice the dataset for error analysis. Filters
|
|
@@ -223,6 +225,8 @@ ldv eval sample <id> --row <index> Read one full sample (the conve
|
|
|
223
225
|
|
|
224
226
|
Notes:
|
|
225
227
|
|
|
228
|
+
- `-f`/`--filter` is the unified column filter — same syntax as `preview` and `datasets rows` (see Filtering above).
|
|
229
|
+
- `--correct` / `--incorrect` / `--missing` are convenience flags for the canonical correctness filter (mutually exclusive). They AND with any `-f` filters, `--search`, and `--error-type`.
|
|
226
230
|
- `--search` matches a substring on the prompt **or** response column (either hit counts). Override the searched columns with `--search-columns a,b`.
|
|
227
231
|
- `--error-type` values come from the `error_field` / `error_distribution` reported by `eval stats`.
|
|
228
232
|
- Use the `index` from `eval samples` directly as `eval sample --row <index>`.
|
|
@@ -232,7 +236,8 @@ Typical analysis loop:
|
|
|
232
236
|
```bash
|
|
233
237
|
ldv eval list --workspace <id> # find the eval dataset
|
|
234
238
|
ldv eval stats <id> # accuracy + where the errors cluster
|
|
235
|
-
ldv eval samples <id> --
|
|
239
|
+
ldv eval samples <id> --incorrect --limit 20 # pull the misses
|
|
240
|
+
ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" # misses that ran long
|
|
236
241
|
ldv eval sample <id> --row 42 # read one failure in full
|
|
237
242
|
```
|
|
238
243
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import sys
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Annotated, Optional
|
|
4
|
+
from typing import Annotated, List, Optional
|
|
5
5
|
|
|
6
6
|
import typer
|
|
7
7
|
|
|
@@ -10,6 +10,7 @@ from .._group import AliasGroup
|
|
|
10
10
|
from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
|
|
11
11
|
from ..api import ApiClient
|
|
12
12
|
from ..config import _env
|
|
13
|
+
from ..filters import FILTER_HELP, parse_filters, to_api_filters
|
|
13
14
|
from ..output import print_error, print_grouped_tables, print_json, print_table
|
|
14
15
|
from ..util import q
|
|
15
16
|
|
|
@@ -339,15 +340,28 @@ def profile_cmd(
|
|
|
339
340
|
@app.command("rows")
|
|
340
341
|
def rows(
|
|
341
342
|
id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
343
|
+
filter_: Annotated[Optional[List[str]], typer.Option("--filter", "-f", help=FILTER_HELP)] = None,
|
|
344
|
+
columns: Annotated[
|
|
345
|
+
Optional[str], typer.Option("--columns", help="Comma-separated columns to project")
|
|
346
|
+
] = None,
|
|
342
347
|
limit: Annotated[str, typer.Option("--limit", help="Number of rows")] = "20",
|
|
343
348
|
offset: Annotated[str, typer.Option("--offset", help="Row offset")] = "0",
|
|
344
349
|
json_out: JsonOpt = False,
|
|
345
350
|
profile: ProfileOpt = None,
|
|
346
351
|
api_url: ApiUrlOpt = None,
|
|
347
352
|
) -> None:
|
|
348
|
-
"""Get dataset rows."""
|
|
353
|
+
"""Get dataset rows, optionally filtered (see --filter)."""
|
|
349
354
|
client = ApiClient(profile=profile, api_url=api_url)
|
|
350
|
-
|
|
355
|
+
params = {"limit": limit, "offset": offset}
|
|
356
|
+
if columns:
|
|
357
|
+
params["columns"] = str(columns)
|
|
358
|
+
api_filters = to_api_filters(parse_filters(filter_))
|
|
359
|
+
if api_filters:
|
|
360
|
+
data = client.post(
|
|
361
|
+
f"/v1/datasets/{q(id)}/rows/filter", json={"filters": api_filters}, params=params
|
|
362
|
+
).json()
|
|
363
|
+
else:
|
|
364
|
+
data = client.get(f"/v1/datasets/{q(id)}/rows", params=params).json()
|
|
351
365
|
if json_out:
|
|
352
366
|
print_json(data)
|
|
353
367
|
return
|
|
@@ -5,12 +5,14 @@ import sys
|
|
|
5
5
|
from typing import Annotated, List, Optional
|
|
6
6
|
|
|
7
7
|
import typer
|
|
8
|
+
from rich.console import Console
|
|
8
9
|
|
|
9
10
|
from .._group import AliasGroup
|
|
10
11
|
|
|
11
12
|
from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
|
|
12
13
|
from ..api import ApiClient
|
|
13
14
|
from ..config import _env
|
|
15
|
+
from ..filters import FILTER_HELP, parse_filters, to_api_filters
|
|
14
16
|
from ..output import print_error, print_json, print_table
|
|
15
17
|
from ..util import q
|
|
16
18
|
|
|
@@ -178,10 +180,69 @@ def correctness(
|
|
|
178
180
|
)
|
|
179
181
|
|
|
180
182
|
|
|
183
|
+
def _bar(pct: float, width: int = 20) -> str:
|
|
184
|
+
filled = round(pct * width)
|
|
185
|
+
filled = max(0, min(width, filled))
|
|
186
|
+
return "█" * filled + "░" * (width - filled)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@app.command("failures")
|
|
190
|
+
def failures(
|
|
191
|
+
id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
192
|
+
json_out: JsonOpt = False,
|
|
193
|
+
profile: ProfileOpt = None,
|
|
194
|
+
api_url: ApiUrlOpt = None,
|
|
195
|
+
) -> None:
|
|
196
|
+
"""Quality analysis: clean vs. dirty rate + failure mode breakdown."""
|
|
197
|
+
client = ApiClient(profile=profile, api_url=api_url)
|
|
198
|
+
data = client.get(f"/v1/datasets/{q(id)}/eval-failure-analysis").json()
|
|
199
|
+
if json_out:
|
|
200
|
+
print_json(data)
|
|
201
|
+
return
|
|
202
|
+
skip = data.get("skip_reason")
|
|
203
|
+
if skip:
|
|
204
|
+
sys.stdout.write(f"No failure_analysis column found in this dataset.\n")
|
|
205
|
+
return
|
|
206
|
+
total = data.get("total") or 0
|
|
207
|
+
clean = data.get("clean") or 0
|
|
208
|
+
dirty = data.get("dirty") or 0
|
|
209
|
+
clean_rate = data.get("clean_rate") or 0.0
|
|
210
|
+
dirty_rate = 1.0 - clean_rate
|
|
211
|
+
|
|
212
|
+
console = Console()
|
|
213
|
+
|
|
214
|
+
console.print(f"\n[bold]Quality analysis: {total:,} samples[/bold]\n")
|
|
215
|
+
console.print(f" [green]Quality rate[/green] {_bar(clean_rate)} {clean_rate * 100:.1f}%")
|
|
216
|
+
console.print(f" [red]Issues[/red] {_bar(dirty_rate)} {dirty_rate * 100:.1f}%")
|
|
217
|
+
|
|
218
|
+
modes = data.get("mode_distribution") or []
|
|
219
|
+
if not modes:
|
|
220
|
+
if dirty == 0:
|
|
221
|
+
sys.stdout.write("\nNo issues detected.\n")
|
|
222
|
+
else:
|
|
223
|
+
sys.stdout.write(f"\n{dirty:,} samples with issues (no mode breakdown available).\n")
|
|
224
|
+
return
|
|
225
|
+
|
|
226
|
+
sys.stdout.write(f"\nFailure modes ({dirty:,} samples with issues):\n")
|
|
227
|
+
name_width = max((len(str(m.get("mode") or "").replace("_", " ")) for m in modes), default=0)
|
|
228
|
+
name_width = max(name_width, 10)
|
|
229
|
+
count_width = max((len(str(m.get("count") or 0)) for m in modes), default=0)
|
|
230
|
+
count_width = max(count_width, 5)
|
|
231
|
+
for m in modes:
|
|
232
|
+
name = str(m.get("mode") or "").replace("_", " ")
|
|
233
|
+
count = m.get("count") or 0
|
|
234
|
+
rate = m.get("rate") or 0.0
|
|
235
|
+
bar = _bar(rate)
|
|
236
|
+
sys.stdout.write(f" {name:<{name_width}} {count:>{count_width}} {bar} {rate * 100:.1f}%\n")
|
|
237
|
+
|
|
238
|
+
|
|
181
239
|
@app.command("samples")
|
|
182
240
|
def samples(
|
|
183
241
|
id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
184
|
-
filter_: Annotated[str, typer.Option("--filter", help=
|
|
242
|
+
filter_: Annotated[Optional[List[str]], typer.Option("--filter", "-f", help=FILTER_HELP)] = None,
|
|
243
|
+
correct: Annotated[bool, typer.Option("--correct", help="Only correct samples")] = False,
|
|
244
|
+
incorrect: Annotated[bool, typer.Option("--incorrect", help="Only incorrect samples")] = False,
|
|
245
|
+
missing: Annotated[bool, typer.Option("--missing", help="Only samples with no verdict")] = False,
|
|
185
246
|
search: Annotated[Optional[str], typer.Option("--search", help="Substring match on prompt OR response column")] = None,
|
|
186
247
|
search_columns: Annotated[Optional[str], typer.Option("--search-columns", help="Override which columns --search matches (comma-separated)")] = None,
|
|
187
248
|
error_type: Annotated[Optional[str], typer.Option("--error-type", help="Filter to samples whose error field equals <value>")] = None,
|
|
@@ -192,14 +253,17 @@ def samples(
|
|
|
192
253
|
profile: ProfileOpt = None,
|
|
193
254
|
api_url: ApiUrlOpt = None,
|
|
194
255
|
) -> None:
|
|
195
|
-
"""List samples filtered by
|
|
256
|
+
"""List eval samples filtered by --filter / --correct / --incorrect / --missing / --search / --error-type."""
|
|
196
257
|
client = ApiClient(profile=profile, api_url=api_url)
|
|
197
|
-
filters: List[dict] =
|
|
258
|
+
filters: List[dict] = to_api_filters(parse_filters(filter_))
|
|
198
259
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
260
|
+
# --correct / --incorrect / --missing are convenience flags for the canonical
|
|
261
|
+
# correctness filter (server-side reconciliation). Mutually exclusive.
|
|
262
|
+
chosen = [name for name, on in (("correct", correct), ("incorrect", incorrect), ("missing", missing)) if on]
|
|
263
|
+
if len(chosen) > 1:
|
|
264
|
+
print_error("--correct, --incorrect and --missing are mutually exclusive.", "bad_filter")
|
|
202
265
|
raise typer.Exit(1)
|
|
266
|
+
correctness = chosen[0] if chosen else None
|
|
203
267
|
|
|
204
268
|
if search:
|
|
205
269
|
if search_columns:
|
|
@@ -230,8 +294,8 @@ def samples(
|
|
|
230
294
|
params = {"limit": limit, "offset": offset}
|
|
231
295
|
if columns:
|
|
232
296
|
params["columns"] = str(columns)
|
|
233
|
-
if
|
|
234
|
-
params["correctness"] =
|
|
297
|
+
if correctness:
|
|
298
|
+
params["correctness"] = correctness
|
|
235
299
|
|
|
236
300
|
data = client.post(f"/v1/datasets/{q(id)}/rows/filter", json={"filters": filters}, params=params).json()
|
|
237
301
|
if json_out:
|
|
@@ -84,7 +84,8 @@ A workspace is the top-level container for datasets, spec docs, and members.
|
|
|
84
84
|
ldv datasets schema <id> # Column names + types
|
|
85
85
|
ldv datasets profile <id> # Per-column nulls/cardinality/numeric stats/top values + content token stats
|
|
86
86
|
# [--full-content] exact content scan (slow) [--skip-content] omit it
|
|
87
|
-
ldv datasets rows <id> [--limit N] [--offset N]
|
|
87
|
+
ldv datasets rows <id> [-f "col<op>value" ...] [--columns a,b] [--limit N] [--offset N]
|
|
88
|
+
# -f/--filter is the same syntax everywhere (see Filtering below)
|
|
88
89
|
ldv datasets delete <id>
|
|
89
90
|
ldv datasets push <id> # Push edits back to HuggingFace
|
|
90
91
|
ldv datasets push-status <id> [--job <job-id>]
|
|
@@ -116,13 +117,17 @@ repeatable), -f/--filter (filter rows; see below), -n/--limit (page size when
|
|
|
116
117
|
paging a platform dataset), --offset (start row index), --title, --hf, --split,
|
|
117
118
|
--workspace, --profile, --api-url.
|
|
118
119
|
|
|
119
|
-
Filtering: -f/--filter "col<op>value" shows only matching
|
|
120
|
-
|
|
121
|
-
together; string compare is
|
|
122
|
-
~ (contains), >, <, >=, <=.
|
|
120
|
+
Filtering (one syntax everywhere): -f/--filter "col<op>value" shows only matching
|
|
121
|
+
rows. The SAME flag and syntax work on `preview`, `datasets rows`, and
|
|
122
|
+
`eval samples`. Repeatable; filters AND together; string compare is
|
|
123
|
+
case-insensitive. Operators: = (eq), != (ne), ~ (contains), >, <, >=, <=.
|
|
124
|
+
For `preview` it also runs on local files (client-side); on platform datasets all
|
|
125
|
+
three filter server-side via POST /v1/datasets/{id}/rows/filter.
|
|
123
126
|
|
|
124
127
|
ldv preview <dataset-id> -f "domain=telecom" -f "reward>=0.8"
|
|
125
128
|
ldv preview data.jsonl -f "model~lfm"
|
|
129
|
+
ldv datasets rows <id> -f "lang=en" -f "score<0.5"
|
|
130
|
+
ldv eval samples <id> -f "reasoning_tokens>30000" --incorrect
|
|
126
131
|
|
|
127
132
|
Navigation: two modes toggled with m — pager (one sample at a time; ←/→ or
|
|
128
133
|
n/b switch samples, ↑/↓/j/k scroll) and scroll (all samples; n/b jump between
|
|
@@ -149,17 +154,34 @@ primitives for error analysis — YOU do the reasoning over what they return.
|
|
|
149
154
|
ldv eval stats <id> # Accuracy + correctness counts + error-type
|
|
150
155
|
# distribution + token stats (the distribution view)
|
|
151
156
|
ldv eval correctness <id> # Fast accuracy + correct/incorrect/missing counts
|
|
152
|
-
ldv eval
|
|
153
|
-
|
|
157
|
+
ldv eval failures <id> # Quality analysis: clean-vs-dirty rate + failure mode
|
|
158
|
+
# breakdown from the failure_analysis column.
|
|
159
|
+
# Example output:
|
|
160
|
+
# Quality analysis: 1,000 samples
|
|
161
|
+
# Quality rate ████████████████████░░░░░ 80.0%
|
|
162
|
+
# Issues █████░░░░░░░░░░░░░░░░░░░░ 20.0%
|
|
163
|
+
# Failure modes (200 samples with issues):
|
|
164
|
+
# truncated response 100 ██████████████████ 50.0%
|
|
165
|
+
# missing think tags 80 ██████████████ 40.0%
|
|
166
|
+
# If no failure_analysis column exists, prints a clear
|
|
167
|
+
# message and exits 0. Use --json for the raw API response.
|
|
168
|
+
ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
|
|
169
|
+
[--search <text>] [--error-type <value>] [--columns a,b]
|
|
170
|
+
[--limit N] [--offset N]
|
|
154
171
|
# Slice the dataset for error analysis. Filters AND
|
|
155
172
|
# together. Prints an 'index' column per row.
|
|
156
173
|
ldv eval sample <id> --row <index> # Read one full sample (the conversation) by the
|
|
157
174
|
# 'index' from `eval samples`
|
|
158
175
|
|
|
159
176
|
Notes:
|
|
177
|
+
- -f/--filter is the unified column filter (same syntax as preview / datasets rows; see Filtering).
|
|
178
|
+
- --correct / --incorrect / --missing are convenience flags for the canonical correctness filter
|
|
179
|
+
(mutually exclusive). They AND with any -f filters and --search / --error-type.
|
|
160
180
|
- --search matches a substring on the prompt OR response column (either one matching is a hit).
|
|
161
181
|
- --error-type values come from the `error_field` / `error_distribution` in `eval stats`.
|
|
162
182
|
- Use the 'index' from `eval samples` directly as `eval sample --row <index>`.
|
|
183
|
+
- `eval failures` reads the `failure_analysis` column; if absent, skip_reason is set and a
|
|
184
|
+
clear message is printed. Use --json to get the raw counts for programmatic consumption.
|
|
163
185
|
|
|
164
186
|
## Row Edits
|
|
165
187
|
|
|
@@ -278,10 +300,13 @@ never goes stale.
|
|
|
278
300
|
|
|
279
301
|
### Analyze an eval's failure modes
|
|
280
302
|
ldv eval list --json # find the eval dataset
|
|
303
|
+
ldv eval failures <id> --json # clean rate + failure mode breakdown
|
|
304
|
+
# (mode_distribution: name/count/rate per mode)
|
|
281
305
|
ldv eval stats <id> --json # accuracy + error_distribution_incorrect
|
|
282
306
|
# = the common errors AMONG the misses
|
|
283
|
-
ldv eval samples <id> --
|
|
284
|
-
ldv eval samples <id> --
|
|
307
|
+
ldv eval samples <id> --incorrect --json # pull the misses
|
|
308
|
+
ldv eval samples <id> --incorrect --error-type <value> --json # focus one failure mode
|
|
309
|
+
ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" --json # misses that ran long
|
|
285
310
|
ldv eval sample <id> --row <index> --json # read the full conversation of a miss
|
|
286
311
|
# Then synthesize the common pattern across the misses yourself — the commands give you
|
|
287
312
|
# the data (counts, slices, conversations); the analysis is your job.
|
|
@@ -20,6 +20,7 @@ import typer
|
|
|
20
20
|
|
|
21
21
|
from .._opts import ApiUrlOpt, ProfileOpt
|
|
22
22
|
from ..api import ApiClient
|
|
23
|
+
from ..filters import FILTER_HELP, parse_filters, row_matches, to_api_filters
|
|
23
24
|
from ..output import print_error
|
|
24
25
|
from ..util import q
|
|
25
26
|
|
|
@@ -759,67 +760,6 @@ def _choose_workspace(client: ApiClient, tui_mod) -> Optional[str]:
|
|
|
759
760
|
return choice
|
|
760
761
|
|
|
761
762
|
|
|
762
|
-
# --------------------------------------------------------------------------
|
|
763
|
-
# Row filtering (--filter "col<op>value")
|
|
764
|
-
# --------------------------------------------------------------------------
|
|
765
|
-
|
|
766
|
-
# Maps each CLI symbol to the platform filter API's operator name (the same
|
|
767
|
-
# names work server-side and locally). _parse_filters picks the earliest operator
|
|
768
|
-
# (longest on a tie), so list order doesn't affect correctness.
|
|
769
|
-
_FILTER_OPS = [(">=", "gte"), ("<=", "lte"), ("!=", "ne"), ("~", "contains"), ("=", "eq"), (">", "gt"), ("<", "lt")]
|
|
770
|
-
_NUMERIC_OPS = {"gt": lambda c, v: c > v, "lt": lambda c, v: c < v, "gte": lambda c, v: c >= v, "lte": lambda c, v: c <= v}
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
def _parse_filters(specs: Optional[List[str]]) -> List[tuple]:
|
|
774
|
-
"""Parse ['col=value', 'reward>=0.5', 'name~kod'] → [(col, op, value), ...].
|
|
775
|
-
|
|
776
|
-
Splits on the EARLIEST operator (longest on a tie, so 'reward>=5' is gte not
|
|
777
|
-
gt), keeping operator chars in the value intact (e.g. 'q=a>b' → col 'q', value
|
|
778
|
-
'a>b'). Rejects an empty column or value."""
|
|
779
|
-
out: List[tuple] = []
|
|
780
|
-
for spec in specs or []:
|
|
781
|
-
chosen = None # (index, symbol, op_name)
|
|
782
|
-
for sym, op in _FILTER_OPS:
|
|
783
|
-
i = spec.find(sym)
|
|
784
|
-
if i > 0 and (chosen is None or i < chosen[0] or (i == chosen[0] and len(sym) > len(chosen[1]))):
|
|
785
|
-
chosen = (i, sym, op)
|
|
786
|
-
if chosen is None:
|
|
787
|
-
print_error(
|
|
788
|
-
f"Invalid --filter '{spec}'. Use col=value, col!=value, col~text, or col>/</>=/<= N.",
|
|
789
|
-
"bad_filter",
|
|
790
|
-
)
|
|
791
|
-
raise typer.Exit(1)
|
|
792
|
-
i, sym, op = chosen
|
|
793
|
-
col, val = spec[:i].strip(), spec[i + len(sym):].strip()
|
|
794
|
-
if not col or not val:
|
|
795
|
-
print_error(f"Invalid --filter '{spec}': both a column and a value are required.", "bad_filter")
|
|
796
|
-
raise typer.Exit(1)
|
|
797
|
-
out.append((col, op, val))
|
|
798
|
-
return out
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
def _cell_matches(cell: object, op: str, val: str) -> bool:
|
|
802
|
-
if op == "contains":
|
|
803
|
-
return cell is not None and val.lower() in str(cell).lower()
|
|
804
|
-
if op in ("eq", "ne"):
|
|
805
|
-
equal = cell is not None and str(cell).strip().lower() == val.strip().lower()
|
|
806
|
-
return equal if op == "eq" else not equal
|
|
807
|
-
try:
|
|
808
|
-
return _NUMERIC_OPS[op](float(cell), float(val)) # gt/lt/gte/lte
|
|
809
|
-
except (TypeError, ValueError):
|
|
810
|
-
return False
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
def _row_matches(row: object, filters: List[tuple]) -> bool:
|
|
814
|
-
"""Client-side predicate (local files). A non-dict row can't match a column
|
|
815
|
-
filter. All filters AND together."""
|
|
816
|
-
if not filters:
|
|
817
|
-
return True
|
|
818
|
-
if not isinstance(row, dict):
|
|
819
|
-
return False
|
|
820
|
-
return all(_cell_matches(row.get(col), op, val) for col, op, val in filters)
|
|
821
|
-
|
|
822
|
-
|
|
823
763
|
# --------------------------------------------------------------------------
|
|
824
764
|
# Command
|
|
825
765
|
# --------------------------------------------------------------------------
|
|
@@ -835,10 +775,7 @@ def preview(
|
|
|
835
775
|
offset: Annotated[int, typer.Option("--offset", help="Start at this row index")] = 0,
|
|
836
776
|
filter_: Annotated[
|
|
837
777
|
Optional[List[str]],
|
|
838
|
-
typer.Option(
|
|
839
|
-
"--filter", "-f",
|
|
840
|
-
help="Filter rows: 'col=value', 'col!=value', 'col~text' (contains), or 'col>/</>=/<= N'. Repeatable (AND).",
|
|
841
|
-
),
|
|
778
|
+
typer.Option("--filter", "-f", help=FILTER_HELP),
|
|
842
779
|
] = None,
|
|
843
780
|
title: Annotated[Optional[str], typer.Option("--title", help="Title shown in the viewer header")] = None,
|
|
844
781
|
hf: Annotated[
|
|
@@ -869,7 +806,7 @@ def preview(
|
|
|
869
806
|
print_error("The terminal viewer requires 'textual'. Install it: pip install textual", "missing_textual")
|
|
870
807
|
raise typer.Exit(1)
|
|
871
808
|
|
|
872
|
-
filters =
|
|
809
|
+
filters = parse_filters(filter_)
|
|
873
810
|
local_path = Path(source)
|
|
874
811
|
is_local = (not hf) and local_path.exists() and local_path.is_file()
|
|
875
812
|
|
|
@@ -877,7 +814,7 @@ def preview(
|
|
|
877
814
|
if is_local:
|
|
878
815
|
rows = _load_local(local_path)
|
|
879
816
|
if filters:
|
|
880
|
-
rows = [r for r in rows if
|
|
817
|
+
rows = [r for r in rows if row_matches(r, filters)]
|
|
881
818
|
if not rows:
|
|
882
819
|
print_error("No rows match the filter(s).", "no_match")
|
|
883
820
|
raise typer.Exit(3)
|
|
@@ -909,7 +846,7 @@ def preview(
|
|
|
909
846
|
view_title = title or f"dataset {source}"
|
|
910
847
|
|
|
911
848
|
page_size = limit if limit and limit > 0 else 25
|
|
912
|
-
api_filters =
|
|
849
|
+
api_filters = to_api_filters(filters)
|
|
913
850
|
|
|
914
851
|
def _fetch_page(off: int, lim: int) -> List[object]:
|
|
915
852
|
params = {"limit": str(lim), "offset": str(offset + off)}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Shared row-filter syntax for `preview`, `datasets rows`, and `eval samples`.
|
|
2
|
+
|
|
3
|
+
One filtering language across the CLI: `--filter "col<op>value"` (repeatable, AND).
|
|
4
|
+
The operator symbols map to the platform filter API's operator names, which work
|
|
5
|
+
both server-side (`POST /v1/datasets/{id}/rows/filter`) and locally (preview's
|
|
6
|
+
client-side matcher for local files).
|
|
7
|
+
"""
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
|
|
10
|
+
import typer
|
|
11
|
+
|
|
12
|
+
from .output import print_error
|
|
13
|
+
|
|
14
|
+
# Shown in each command's --filter help so the syntax is documented in one place.
|
|
15
|
+
FILTER_HELP = (
|
|
16
|
+
"Filter rows: 'col=value', 'col!=value', 'col~text' (contains), "
|
|
17
|
+
"or 'col>/</>=/<= N'. Repeatable (AND)."
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Maps each CLI symbol to the platform filter API's operator name. parse_filters
|
|
21
|
+
# picks the earliest operator (longest on a tie), so list order doesn't affect
|
|
22
|
+
# correctness.
|
|
23
|
+
_FILTER_OPS = [
|
|
24
|
+
(">=", "gte"),
|
|
25
|
+
("<=", "lte"),
|
|
26
|
+
("!=", "neq"),
|
|
27
|
+
("~", "contains"),
|
|
28
|
+
("=", "eq"),
|
|
29
|
+
(">", "gt"),
|
|
30
|
+
("<", "lt"),
|
|
31
|
+
]
|
|
32
|
+
_NUMERIC_OPS = {
|
|
33
|
+
"gt": lambda c, v: c > v,
|
|
34
|
+
"lt": lambda c, v: c < v,
|
|
35
|
+
"gte": lambda c, v: c >= v,
|
|
36
|
+
"lte": lambda c, v: c <= v,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def parse_filters(specs: Optional[List[str]]) -> List[tuple]:
|
|
41
|
+
"""Parse ['col=value', 'reward>=0.5', 'name~kod'] → [(col, op, value), ...].
|
|
42
|
+
|
|
43
|
+
Splits on the EARLIEST operator (longest on a tie, so 'reward>=5' is gte not
|
|
44
|
+
gt), keeping operator chars in the value intact (e.g. 'q=a>b' → col 'q', value
|
|
45
|
+
'a>b'). Rejects an empty column or value."""
|
|
46
|
+
out: List[tuple] = []
|
|
47
|
+
for spec in specs or []:
|
|
48
|
+
chosen = None # (index, symbol, op_name)
|
|
49
|
+
for sym, op in _FILTER_OPS:
|
|
50
|
+
i = spec.find(sym)
|
|
51
|
+
if i > 0 and (
|
|
52
|
+
chosen is None
|
|
53
|
+
or i < chosen[0]
|
|
54
|
+
or (i == chosen[0] and len(sym) > len(chosen[1]))
|
|
55
|
+
):
|
|
56
|
+
chosen = (i, sym, op)
|
|
57
|
+
if chosen is None:
|
|
58
|
+
print_error(
|
|
59
|
+
f"Invalid --filter '{spec}'. Use col=value, col!=value, col~text, or col>/</>=/<= N.",
|
|
60
|
+
"bad_filter",
|
|
61
|
+
)
|
|
62
|
+
raise typer.Exit(1)
|
|
63
|
+
i, sym, op = chosen
|
|
64
|
+
col, val = spec[:i].strip(), spec[i + len(sym) :].strip()
|
|
65
|
+
if not col or not val:
|
|
66
|
+
print_error(
|
|
67
|
+
f"Invalid --filter '{spec}': both a column and a value are required.",
|
|
68
|
+
"bad_filter",
|
|
69
|
+
)
|
|
70
|
+
raise typer.Exit(1)
|
|
71
|
+
out.append((col, op, val))
|
|
72
|
+
return out
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def to_api_filters(parsed: List[tuple]) -> List[dict]:
|
|
76
|
+
"""[(col, op, val), ...] → the `filters` payload for POST /rows/filter."""
|
|
77
|
+
return [{"column": col, "operator": op, "value": val} for col, op, val in parsed]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def cell_matches(cell: object, op: str, val: str) -> bool:
|
|
81
|
+
if op == "contains":
|
|
82
|
+
return cell is not None and val.lower() in str(cell).lower()
|
|
83
|
+
if op in ("eq", "neq"):
|
|
84
|
+
equal = cell is not None and str(cell).strip().lower() == val.strip().lower()
|
|
85
|
+
return equal if op == "eq" else not equal
|
|
86
|
+
try:
|
|
87
|
+
return _NUMERIC_OPS[op](float(cell), float(val)) # gt/lt/gte/lte
|
|
88
|
+
except (TypeError, ValueError):
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def row_matches(row: object, filters: List[tuple]) -> bool:
|
|
93
|
+
"""Client-side predicate (local files). A non-dict row can't match a column
|
|
94
|
+
filter. All filters AND together."""
|
|
95
|
+
if not filters:
|
|
96
|
+
return True
|
|
97
|
+
if not isinstance(row, dict):
|
|
98
|
+
return False
|
|
99
|
+
return all(cell_matches(row.get(col), op, val) for col, op, val in filters)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|