ldv-cli 0.10.0__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ldv_cli-0.12.0/.env +1 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/.gitignore +0 -2
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/PKG-INFO +17 -9
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/README.md +16 -8
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/pyproject.toml +1 -1
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/datasets.py +17 -3
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/evals.py +36 -8
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/instructions.py +24 -10
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/preview.py +5 -68
- ldv_cli-0.12.0/src/ldv/filters.py +99 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/examples/agent-traces.jsonl +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/package-lock.json +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/__init__.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/_group.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/_opts.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/api.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/cli.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/__init__.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/annotations.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/auth.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/buckets.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/edits.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/highlights.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/issues.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/reports.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/skills.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/spec.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/tui.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/update.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/workspaces.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/config.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/output.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/sessions.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/util.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.12.0}/uv.lock +0 -0
ldv_cli-0.12.0/.env
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
PYPI_TOKEN=pypi-AgEIcHlwaS5vcmcCJDM5ODc5ZGY0LWExOGQtNDM0MS1iMjcxLTQxN2E3OGE4NTE3NAACKlszLCIyNGFlYWVlMC1jZDg3LTQ0MWEtYjBlYS1iYTRjYjFjZjRmMWEiXQAABiDokA2L5mJtlb8YQ6WUYuo7v_AL_wua3b-JObZoZY1g_w
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ldv-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.0
|
|
4
4
|
Summary: ldv — CLI for the Liquid DataViewer platform (formerly lql)
|
|
5
5
|
Project-URL: Homepage, https://github.com/Liquid4All/lql
|
|
6
6
|
Author: Liquid AI
|
|
@@ -140,7 +140,8 @@ ldv datasets create --workspace <id> --hf-bucket <org/bucket> --key <path-or-glo
|
|
|
140
140
|
From an HF storage bucket (e.g. --key 'data/*.parquet')
|
|
141
141
|
ldv datasets sync <id> Trigger sync (HF repo, S3, or HF bucket)
|
|
142
142
|
ldv datasets schema <id> Show column schema
|
|
143
|
-
ldv datasets rows <id> [--limit N] [--offset N]
|
|
143
|
+
ldv datasets rows <id> [-f "col<op>value"] [--columns a,b] [--limit N] [--offset N]
|
|
144
|
+
Fetch rows (-f/--filter: same syntax everywhere)
|
|
144
145
|
ldv datasets delete <id> Delete dataset
|
|
145
146
|
ldv datasets push <id> Push to HuggingFace
|
|
146
147
|
ldv datasets push-status <id> [--job <id>] Check push job status
|
|
@@ -174,10 +175,11 @@ ldv preview <src> --offset N Start at row index N
|
|
|
174
175
|
ldv preview <src> --title "<title>" Title shown in the viewer header
|
|
175
176
|
```
|
|
176
177
|
|
|
177
|
-
**Filtering (`--filter`/`-f`)
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
178
|
+
**Filtering (`--filter`/`-f`) — one syntax everywhere.** The same flag and syntax
|
|
179
|
+
work on `preview`, `datasets rows`, and `eval samples`. Show only matching rows —
|
|
180
|
+
`preview` also filters local files (client-side); platform datasets filter
|
|
181
|
+
server-side. Repeatable; filters AND together; string match is case-insensitive.
|
|
182
|
+
Operators: `=`, `!=`, `~` (contains), `>`, `<`, `>=`, `<=`.
|
|
181
183
|
|
|
182
184
|
```
|
|
183
185
|
ldv preview <dataset-id> -f "domain=telecom"
|
|
@@ -223,12 +225,15 @@ commands are the data primitives for error analysis: they slice and summarize
|
|
|
223
225
|
the dataset, and you do the reasoning over what they return.
|
|
224
226
|
|
|
225
227
|
```
|
|
226
|
-
ldv eval list [--workspace <id>]
|
|
228
|
+
ldv eval list [--workspace <id>] [--runid <id>] [--taskid <id>]
|
|
229
|
+
List eval datasets only. --runid/--taskid filter by
|
|
230
|
+
run<id>/task<id> in the name or parquet storage path
|
|
231
|
+
(e.g. run11213_task72284.parquet); they AND together.
|
|
227
232
|
Defaults to LDV_EVAL_WORKSPACE; without a
|
|
228
233
|
workspace, lists only evals you own.
|
|
229
234
|
ldv eval correctness <id> Fast accuracy + correct/incorrect/missing counts
|
|
230
235
|
ldv eval stats <id> Accuracy + error-type distribution + token stats
|
|
231
|
-
ldv eval samples <id> [--
|
|
236
|
+
ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
|
|
232
237
|
[--search <text>] [--error-type <value>]
|
|
233
238
|
[--columns a,b] [--limit N] [--offset N]
|
|
234
239
|
Slice the dataset for error analysis. Filters
|
|
@@ -239,6 +244,8 @@ ldv eval sample <id> --row <index> Read one full sample (the conve
|
|
|
239
244
|
|
|
240
245
|
Notes:
|
|
241
246
|
|
|
247
|
+
- `-f`/`--filter` is the unified column filter — same syntax as `preview` and `datasets rows` (see Filtering above).
|
|
248
|
+
- `--correct` / `--incorrect` / `--missing` are convenience flags for the canonical correctness filter (mutually exclusive). They AND with any `-f` filters, `--search`, and `--error-type`.
|
|
242
249
|
- `--search` matches a substring on the prompt **or** response column (either hit counts). Override the searched columns with `--search-columns a,b`.
|
|
243
250
|
- `--error-type` values come from the `error_field` / `error_distribution` reported by `eval stats`.
|
|
244
251
|
- Use the `index` from `eval samples` directly as `eval sample --row <index>`.
|
|
@@ -248,7 +255,8 @@ Typical analysis loop:
|
|
|
248
255
|
```bash
|
|
249
256
|
ldv eval list --workspace <id> # find the eval dataset
|
|
250
257
|
ldv eval stats <id> # accuracy + where the errors cluster
|
|
251
|
-
ldv eval samples <id> --
|
|
258
|
+
ldv eval samples <id> --incorrect --limit 20 # pull the misses
|
|
259
|
+
ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" # misses that ran long
|
|
252
260
|
ldv eval sample <id> --row 42 # read one failure in full
|
|
253
261
|
```
|
|
254
262
|
|
|
@@ -124,7 +124,8 @@ ldv datasets create --workspace <id> --hf-bucket <org/bucket> --key <path-or-glo
|
|
|
124
124
|
From an HF storage bucket (e.g. --key 'data/*.parquet')
|
|
125
125
|
ldv datasets sync <id> Trigger sync (HF repo, S3, or HF bucket)
|
|
126
126
|
ldv datasets schema <id> Show column schema
|
|
127
|
-
ldv datasets rows <id> [--limit N] [--offset N]
|
|
127
|
+
ldv datasets rows <id> [-f "col<op>value"] [--columns a,b] [--limit N] [--offset N]
|
|
128
|
+
Fetch rows (-f/--filter: same syntax everywhere)
|
|
128
129
|
ldv datasets delete <id> Delete dataset
|
|
129
130
|
ldv datasets push <id> Push to HuggingFace
|
|
130
131
|
ldv datasets push-status <id> [--job <id>] Check push job status
|
|
@@ -158,10 +159,11 @@ ldv preview <src> --offset N Start at row index N
|
|
|
158
159
|
ldv preview <src> --title "<title>" Title shown in the viewer header
|
|
159
160
|
```
|
|
160
161
|
|
|
161
|
-
**Filtering (`--filter`/`-f`)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
162
|
+
**Filtering (`--filter`/`-f`) — one syntax everywhere.** The same flag and syntax
|
|
163
|
+
work on `preview`, `datasets rows`, and `eval samples`. Show only matching rows —
|
|
164
|
+
`preview` also filters local files (client-side); platform datasets filter
|
|
165
|
+
server-side. Repeatable; filters AND together; string match is case-insensitive.
|
|
166
|
+
Operators: `=`, `!=`, `~` (contains), `>`, `<`, `>=`, `<=`.
|
|
165
167
|
|
|
166
168
|
```
|
|
167
169
|
ldv preview <dataset-id> -f "domain=telecom"
|
|
@@ -207,12 +209,15 @@ commands are the data primitives for error analysis: they slice and summarize
|
|
|
207
209
|
the dataset, and you do the reasoning over what they return.
|
|
208
210
|
|
|
209
211
|
```
|
|
210
|
-
ldv eval list [--workspace <id>]
|
|
212
|
+
ldv eval list [--workspace <id>] [--runid <id>] [--taskid <id>]
|
|
213
|
+
List eval datasets only. --runid/--taskid filter by
|
|
214
|
+
run<id>/task<id> in the name or parquet storage path
|
|
215
|
+
(e.g. run11213_task72284.parquet); they AND together.
|
|
211
216
|
Defaults to LDV_EVAL_WORKSPACE; without a
|
|
212
217
|
workspace, lists only evals you own.
|
|
213
218
|
ldv eval correctness <id> Fast accuracy + correct/incorrect/missing counts
|
|
214
219
|
ldv eval stats <id> Accuracy + error-type distribution + token stats
|
|
215
|
-
ldv eval samples <id> [--
|
|
220
|
+
ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
|
|
216
221
|
[--search <text>] [--error-type <value>]
|
|
217
222
|
[--columns a,b] [--limit N] [--offset N]
|
|
218
223
|
Slice the dataset for error analysis. Filters
|
|
@@ -223,6 +228,8 @@ ldv eval sample <id> --row <index> Read one full sample (the conve
|
|
|
223
228
|
|
|
224
229
|
Notes:
|
|
225
230
|
|
|
231
|
+
- `-f`/`--filter` is the unified column filter — same syntax as `preview` and `datasets rows` (see Filtering above).
|
|
232
|
+
- `--correct` / `--incorrect` / `--missing` are convenience flags for the canonical correctness filter (mutually exclusive). They AND with any `-f` filters, `--search`, and `--error-type`.
|
|
226
233
|
- `--search` matches a substring on the prompt **or** response column (either hit counts). Override the searched columns with `--search-columns a,b`.
|
|
227
234
|
- `--error-type` values come from the `error_field` / `error_distribution` reported by `eval stats`.
|
|
228
235
|
- Use the `index` from `eval samples` directly as `eval sample --row <index>`.
|
|
@@ -232,7 +239,8 @@ Typical analysis loop:
|
|
|
232
239
|
```bash
|
|
233
240
|
ldv eval list --workspace <id> # find the eval dataset
|
|
234
241
|
ldv eval stats <id> # accuracy + where the errors cluster
|
|
235
|
-
ldv eval samples <id> --
|
|
242
|
+
ldv eval samples <id> --incorrect --limit 20 # pull the misses
|
|
243
|
+
ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" # misses that ran long
|
|
236
244
|
ldv eval sample <id> --row 42 # read one failure in full
|
|
237
245
|
```
|
|
238
246
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import sys
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Annotated, Optional
|
|
4
|
+
from typing import Annotated, List, Optional
|
|
5
5
|
|
|
6
6
|
import typer
|
|
7
7
|
|
|
@@ -10,6 +10,7 @@ from .._group import AliasGroup
|
|
|
10
10
|
from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
|
|
11
11
|
from ..api import ApiClient
|
|
12
12
|
from ..config import _env
|
|
13
|
+
from ..filters import FILTER_HELP, parse_filters, to_api_filters
|
|
13
14
|
from ..output import print_error, print_grouped_tables, print_json, print_table
|
|
14
15
|
from ..util import q
|
|
15
16
|
|
|
@@ -339,15 +340,28 @@ def profile_cmd(
|
|
|
339
340
|
@app.command("rows")
|
|
340
341
|
def rows(
|
|
341
342
|
id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
343
|
+
filter_: Annotated[Optional[List[str]], typer.Option("--filter", "-f", help=FILTER_HELP)] = None,
|
|
344
|
+
columns: Annotated[
|
|
345
|
+
Optional[str], typer.Option("--columns", help="Comma-separated columns to project")
|
|
346
|
+
] = None,
|
|
342
347
|
limit: Annotated[str, typer.Option("--limit", help="Number of rows")] = "20",
|
|
343
348
|
offset: Annotated[str, typer.Option("--offset", help="Row offset")] = "0",
|
|
344
349
|
json_out: JsonOpt = False,
|
|
345
350
|
profile: ProfileOpt = None,
|
|
346
351
|
api_url: ApiUrlOpt = None,
|
|
347
352
|
) -> None:
|
|
348
|
-
"""Get dataset rows."""
|
|
353
|
+
"""Get dataset rows, optionally filtered (see --filter)."""
|
|
349
354
|
client = ApiClient(profile=profile, api_url=api_url)
|
|
350
|
-
|
|
355
|
+
params = {"limit": limit, "offset": offset}
|
|
356
|
+
if columns:
|
|
357
|
+
params["columns"] = str(columns)
|
|
358
|
+
api_filters = to_api_filters(parse_filters(filter_))
|
|
359
|
+
if api_filters:
|
|
360
|
+
data = client.post(
|
|
361
|
+
f"/v1/datasets/{q(id)}/rows/filter", json={"filters": api_filters}, params=params
|
|
362
|
+
).json()
|
|
363
|
+
else:
|
|
364
|
+
data = client.get(f"/v1/datasets/{q(id)}/rows", params=params).json()
|
|
351
365
|
if json_out:
|
|
352
366
|
print_json(data)
|
|
353
367
|
return
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import math
|
|
3
3
|
import os
|
|
4
|
+
import re
|
|
4
5
|
import sys
|
|
5
6
|
from typing import Annotated, List, Optional
|
|
6
7
|
|
|
@@ -12,6 +13,7 @@ from .._group import AliasGroup
|
|
|
12
13
|
from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
|
|
13
14
|
from ..api import ApiClient
|
|
14
15
|
from ..config import _env
|
|
16
|
+
from ..filters import FILTER_HELP, parse_filters, to_api_filters
|
|
15
17
|
from ..output import print_error, print_json, print_table
|
|
16
18
|
from ..util import q
|
|
17
19
|
|
|
@@ -45,9 +47,24 @@ def _fmt_accuracy(acc: object) -> str:
|
|
|
45
47
|
return f"{n * 100:.1f}%"
|
|
46
48
|
|
|
47
49
|
|
|
50
|
+
# Fields a run/task id may appear in: the human name and the storage path. The
|
|
51
|
+
# parquet name (e.g. run11213_task72284.parquet) is the reliable signal.
|
|
52
|
+
_ID_FIELDS = ("display_name", "name", "hf_bucket_key", "hf_bucket", "s3_object_key", "hf_repo_id")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _filter_by_id(items: list, prefix: str, num: str) -> list:
|
|
56
|
+
"""Keep datasets whose name/storage path contains ``<prefix><num>`` — e.g.
|
|
57
|
+
prefix 'run' + '11213' matches 'run11213', 'run 11213', 'run-11213'. The
|
|
58
|
+
trailing-digit guard means 1121 doesn't match 11213."""
|
|
59
|
+
pat = re.compile(rf"(?i)(?<![A-Za-z]){prefix}[\s_-]?{re.escape(num)}(?!\d)")
|
|
60
|
+
return [d for d in items if any(pat.search(str(d.get(f) or "")) for f in _ID_FIELDS)]
|
|
61
|
+
|
|
62
|
+
|
|
48
63
|
@app.command("list")
|
|
49
64
|
def list_evals(
|
|
50
65
|
workspace: Annotated[Optional[str], typer.Option("--workspace", help="Workspace (defaults to LDV_EVAL_WORKSPACE)")] = None,
|
|
66
|
+
runid: Annotated[Optional[str], typer.Option("--runid", help="Only evals whose name/storage path contains this run id (e.g. 11213 -> run11213)")] = None,
|
|
67
|
+
taskid: Annotated[Optional[str], typer.Option("--taskid", help="Only evals whose name/storage path contains this task id (e.g. 72284 -> task72284)")] = None,
|
|
51
68
|
json_out: JsonOpt = False,
|
|
52
69
|
profile: ProfileOpt = None,
|
|
53
70
|
api_url: ApiUrlOpt = None,
|
|
@@ -64,6 +81,11 @@ def list_evals(
|
|
|
64
81
|
"to list the shared eval workspace.\n"
|
|
65
82
|
)
|
|
66
83
|
items = client.get("/v1/datasets", params=params).json()
|
|
84
|
+
# --runid / --taskid AND together (run11213_task72284 matches both).
|
|
85
|
+
if runid:
|
|
86
|
+
items = _filter_by_id(items, r"run", runid)
|
|
87
|
+
if taskid:
|
|
88
|
+
items = _filter_by_id(items, r"task(?:[\s_-]?id)?", taskid)
|
|
67
89
|
print_table(
|
|
68
90
|
["ID", "Name", "Rows", "Source"],
|
|
69
91
|
[
|
|
@@ -238,7 +260,10 @@ def failures(
|
|
|
238
260
|
@app.command("samples")
|
|
239
261
|
def samples(
|
|
240
262
|
id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
241
|
-
filter_: Annotated[str, typer.Option("--filter", help=
|
|
263
|
+
filter_: Annotated[Optional[List[str]], typer.Option("--filter", "-f", help=FILTER_HELP)] = None,
|
|
264
|
+
correct: Annotated[bool, typer.Option("--correct", help="Only correct samples")] = False,
|
|
265
|
+
incorrect: Annotated[bool, typer.Option("--incorrect", help="Only incorrect samples")] = False,
|
|
266
|
+
missing: Annotated[bool, typer.Option("--missing", help="Only samples with no verdict")] = False,
|
|
242
267
|
search: Annotated[Optional[str], typer.Option("--search", help="Substring match on prompt OR response column")] = None,
|
|
243
268
|
search_columns: Annotated[Optional[str], typer.Option("--search-columns", help="Override which columns --search matches (comma-separated)")] = None,
|
|
244
269
|
error_type: Annotated[Optional[str], typer.Option("--error-type", help="Filter to samples whose error field equals <value>")] = None,
|
|
@@ -249,14 +274,17 @@ def samples(
|
|
|
249
274
|
profile: ProfileOpt = None,
|
|
250
275
|
api_url: ApiUrlOpt = None,
|
|
251
276
|
) -> None:
|
|
252
|
-
"""List samples filtered by
|
|
277
|
+
"""List eval samples filtered by --filter / --correct / --incorrect / --missing / --search / --error-type."""
|
|
253
278
|
client = ApiClient(profile=profile, api_url=api_url)
|
|
254
|
-
filters: List[dict] =
|
|
279
|
+
filters: List[dict] = to_api_filters(parse_filters(filter_))
|
|
255
280
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
281
|
+
# --correct / --incorrect / --missing are convenience flags for the canonical
|
|
282
|
+
# correctness filter (server-side reconciliation). Mutually exclusive.
|
|
283
|
+
chosen = [name for name, on in (("correct", correct), ("incorrect", incorrect), ("missing", missing)) if on]
|
|
284
|
+
if len(chosen) > 1:
|
|
285
|
+
print_error("--correct, --incorrect and --missing are mutually exclusive.", "bad_filter")
|
|
259
286
|
raise typer.Exit(1)
|
|
287
|
+
correctness = chosen[0] if chosen else None
|
|
260
288
|
|
|
261
289
|
if search:
|
|
262
290
|
if search_columns:
|
|
@@ -287,8 +315,8 @@ def samples(
|
|
|
287
315
|
params = {"limit": limit, "offset": offset}
|
|
288
316
|
if columns:
|
|
289
317
|
params["columns"] = str(columns)
|
|
290
|
-
if
|
|
291
|
-
params["correctness"] =
|
|
318
|
+
if correctness:
|
|
319
|
+
params["correctness"] = correctness
|
|
292
320
|
|
|
293
321
|
data = client.post(f"/v1/datasets/{q(id)}/rows/filter", json={"filters": filters}, params=params).json()
|
|
294
322
|
if json_out:
|
|
@@ -84,7 +84,8 @@ A workspace is the top-level container for datasets, spec docs, and members.
|
|
|
84
84
|
ldv datasets schema <id> # Column names + types
|
|
85
85
|
ldv datasets profile <id> # Per-column nulls/cardinality/numeric stats/top values + content token stats
|
|
86
86
|
# [--full-content] exact content scan (slow) [--skip-content] omit it
|
|
87
|
-
ldv datasets rows <id> [--limit N] [--offset N]
|
|
87
|
+
ldv datasets rows <id> [-f "col<op>value" ...] [--columns a,b] [--limit N] [--offset N]
|
|
88
|
+
# -f/--filter is the same syntax everywhere (see Filtering below)
|
|
88
89
|
ldv datasets delete <id>
|
|
89
90
|
ldv datasets push <id> # Push edits back to HuggingFace
|
|
90
91
|
ldv datasets push-status <id> [--job <job-id>]
|
|
@@ -116,13 +117,17 @@ repeatable), -f/--filter (filter rows; see below), -n/--limit (page size when
|
|
|
116
117
|
paging a platform dataset), --offset (start row index), --title, --hf, --split,
|
|
117
118
|
--workspace, --profile, --api-url.
|
|
118
119
|
|
|
119
|
-
Filtering: -f/--filter "col<op>value" shows only matching
|
|
120
|
-
|
|
121
|
-
together; string compare is
|
|
122
|
-
~ (contains), >, <, >=, <=.
|
|
120
|
+
Filtering (one syntax everywhere): -f/--filter "col<op>value" shows only matching
|
|
121
|
+
rows. The SAME flag and syntax work on `preview`, `datasets rows`, and
|
|
122
|
+
`eval samples`. Repeatable; filters AND together; string compare is
|
|
123
|
+
case-insensitive. Operators: = (eq), != (ne), ~ (contains), >, <, >=, <=.
|
|
124
|
+
For `preview` it also runs on local files (client-side); on platform datasets all
|
|
125
|
+
three filter server-side via POST /v1/datasets/{id}/rows/filter.
|
|
123
126
|
|
|
124
127
|
ldv preview <dataset-id> -f "domain=telecom" -f "reward>=0.8"
|
|
125
128
|
ldv preview data.jsonl -f "model~lfm"
|
|
129
|
+
ldv datasets rows <id> -f "lang=en" -f "score<0.5"
|
|
130
|
+
ldv eval samples <id> -f "reasoning_tokens>30000" --incorrect
|
|
126
131
|
|
|
127
132
|
Navigation: two modes toggled with m — pager (one sample at a time; ←/→ or
|
|
128
133
|
n/b switch samples, ↑/↓/j/k scroll) and scroll (all samples; n/b jump between
|
|
@@ -144,8 +149,12 @@ Eval datasets (evaluation-run output: each row a sample with a model 'response'
|
|
|
144
149
|
+ a 'correct' verdict) are detected automatically. These commands are the data
|
|
145
150
|
primitives for error analysis — YOU do the reasoning over what they return.
|
|
146
151
|
|
|
147
|
-
ldv eval list [--workspace <id>]
|
|
152
|
+
ldv eval list [--workspace <id>] [--runid <id>] [--taskid <id>]
|
|
153
|
+
# Eval datasets only. Defaults to LDV_EVAL_WORKSPACE;
|
|
148
154
|
# without a workspace it lists only evals you own.
|
|
155
|
+
# --runid / --taskid filter to evals whose name OR storage
|
|
156
|
+
# path matches run<id> / task<id> (e.g.
|
|
157
|
+
# run11213_task72284.parquet). They AND together.
|
|
149
158
|
ldv eval stats <id> # Accuracy + correctness counts + error-type
|
|
150
159
|
# distribution + token stats (the distribution view)
|
|
151
160
|
ldv eval correctness <id> # Fast accuracy + correct/incorrect/missing counts
|
|
@@ -160,14 +169,18 @@ primitives for error analysis — YOU do the reasoning over what they return.
|
|
|
160
169
|
# missing think tags 80 ██████████████ 40.0%
|
|
161
170
|
# If no failure_analysis column exists, prints a clear
|
|
162
171
|
# message and exits 0. Use --json for the raw API response.
|
|
163
|
-
ldv eval samples <id> [
|
|
164
|
-
[--error-type <value>] [--columns a,b]
|
|
172
|
+
ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
|
|
173
|
+
[--search <text>] [--error-type <value>] [--columns a,b]
|
|
174
|
+
[--limit N] [--offset N]
|
|
165
175
|
# Slice the dataset for error analysis. Filters AND
|
|
166
176
|
# together. Prints an 'index' column per row.
|
|
167
177
|
ldv eval sample <id> --row <index> # Read one full sample (the conversation) by the
|
|
168
178
|
# 'index' from `eval samples`
|
|
169
179
|
|
|
170
180
|
Notes:
|
|
181
|
+
- -f/--filter is the unified column filter (same syntax as preview / datasets rows; see Filtering).
|
|
182
|
+
- --correct / --incorrect / --missing are convenience flags for the canonical correctness filter
|
|
183
|
+
(mutually exclusive). They AND with any -f filters and --search / --error-type.
|
|
171
184
|
- --search matches a substring on the prompt OR response column (either one matching is a hit).
|
|
172
185
|
- --error-type values come from the `error_field` / `error_distribution` in `eval stats`.
|
|
173
186
|
- Use the 'index' from `eval samples` directly as `eval sample --row <index>`.
|
|
@@ -295,8 +308,9 @@ never goes stale.
|
|
|
295
308
|
# (mode_distribution: name/count/rate per mode)
|
|
296
309
|
ldv eval stats <id> --json # accuracy + error_distribution_incorrect
|
|
297
310
|
# = the common errors AMONG the misses
|
|
298
|
-
ldv eval samples <id> --
|
|
299
|
-
ldv eval samples <id> --
|
|
311
|
+
ldv eval samples <id> --incorrect --json # pull the misses
|
|
312
|
+
ldv eval samples <id> --incorrect --error-type <value> --json # focus one failure mode
|
|
313
|
+
ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" --json # misses that ran long
|
|
300
314
|
ldv eval sample <id> --row <index> --json # read the full conversation of a miss
|
|
301
315
|
# Then synthesize the common pattern across the misses yourself — the commands give you
|
|
302
316
|
# the data (counts, slices, conversations); the analysis is your job.
|
|
@@ -20,6 +20,7 @@ import typer
|
|
|
20
20
|
|
|
21
21
|
from .._opts import ApiUrlOpt, ProfileOpt
|
|
22
22
|
from ..api import ApiClient
|
|
23
|
+
from ..filters import FILTER_HELP, parse_filters, row_matches, to_api_filters
|
|
23
24
|
from ..output import print_error
|
|
24
25
|
from ..util import q
|
|
25
26
|
|
|
@@ -759,67 +760,6 @@ def _choose_workspace(client: ApiClient, tui_mod) -> Optional[str]:
|
|
|
759
760
|
return choice
|
|
760
761
|
|
|
761
762
|
|
|
762
|
-
# --------------------------------------------------------------------------
|
|
763
|
-
# Row filtering (--filter "col<op>value")
|
|
764
|
-
# --------------------------------------------------------------------------
|
|
765
|
-
|
|
766
|
-
# Maps each CLI symbol to the platform filter API's operator name (the same
|
|
767
|
-
# names work server-side and locally). _parse_filters picks the earliest operator
|
|
768
|
-
# (longest on a tie), so list order doesn't affect correctness.
|
|
769
|
-
_FILTER_OPS = [(">=", "gte"), ("<=", "lte"), ("!=", "ne"), ("~", "contains"), ("=", "eq"), (">", "gt"), ("<", "lt")]
|
|
770
|
-
_NUMERIC_OPS = {"gt": lambda c, v: c > v, "lt": lambda c, v: c < v, "gte": lambda c, v: c >= v, "lte": lambda c, v: c <= v}
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
def _parse_filters(specs: Optional[List[str]]) -> List[tuple]:
|
|
774
|
-
"""Parse ['col=value', 'reward>=0.5', 'name~kod'] → [(col, op, value), ...].
|
|
775
|
-
|
|
776
|
-
Splits on the EARLIEST operator (longest on a tie, so 'reward>=5' is gte not
|
|
777
|
-
gt), keeping operator chars in the value intact (e.g. 'q=a>b' → col 'q', value
|
|
778
|
-
'a>b'). Rejects an empty column or value."""
|
|
779
|
-
out: List[tuple] = []
|
|
780
|
-
for spec in specs or []:
|
|
781
|
-
chosen = None # (index, symbol, op_name)
|
|
782
|
-
for sym, op in _FILTER_OPS:
|
|
783
|
-
i = spec.find(sym)
|
|
784
|
-
if i > 0 and (chosen is None or i < chosen[0] or (i == chosen[0] and len(sym) > len(chosen[1]))):
|
|
785
|
-
chosen = (i, sym, op)
|
|
786
|
-
if chosen is None:
|
|
787
|
-
print_error(
|
|
788
|
-
f"Invalid --filter '{spec}'. Use col=value, col!=value, col~text, or col>/</>=/<= N.",
|
|
789
|
-
"bad_filter",
|
|
790
|
-
)
|
|
791
|
-
raise typer.Exit(1)
|
|
792
|
-
i, sym, op = chosen
|
|
793
|
-
col, val = spec[:i].strip(), spec[i + len(sym):].strip()
|
|
794
|
-
if not col or not val:
|
|
795
|
-
print_error(f"Invalid --filter '{spec}': both a column and a value are required.", "bad_filter")
|
|
796
|
-
raise typer.Exit(1)
|
|
797
|
-
out.append((col, op, val))
|
|
798
|
-
return out
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
def _cell_matches(cell: object, op: str, val: str) -> bool:
|
|
802
|
-
if op == "contains":
|
|
803
|
-
return cell is not None and val.lower() in str(cell).lower()
|
|
804
|
-
if op in ("eq", "ne"):
|
|
805
|
-
equal = cell is not None and str(cell).strip().lower() == val.strip().lower()
|
|
806
|
-
return equal if op == "eq" else not equal
|
|
807
|
-
try:
|
|
808
|
-
return _NUMERIC_OPS[op](float(cell), float(val)) # gt/lt/gte/lte
|
|
809
|
-
except (TypeError, ValueError):
|
|
810
|
-
return False
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
def _row_matches(row: object, filters: List[tuple]) -> bool:
|
|
814
|
-
"""Client-side predicate (local files). A non-dict row can't match a column
|
|
815
|
-
filter. All filters AND together."""
|
|
816
|
-
if not filters:
|
|
817
|
-
return True
|
|
818
|
-
if not isinstance(row, dict):
|
|
819
|
-
return False
|
|
820
|
-
return all(_cell_matches(row.get(col), op, val) for col, op, val in filters)
|
|
821
|
-
|
|
822
|
-
|
|
823
763
|
# --------------------------------------------------------------------------
|
|
824
764
|
# Command
|
|
825
765
|
# --------------------------------------------------------------------------
|
|
@@ -835,10 +775,7 @@ def preview(
|
|
|
835
775
|
offset: Annotated[int, typer.Option("--offset", help="Start at this row index")] = 0,
|
|
836
776
|
filter_: Annotated[
|
|
837
777
|
Optional[List[str]],
|
|
838
|
-
typer.Option(
|
|
839
|
-
"--filter", "-f",
|
|
840
|
-
help="Filter rows: 'col=value', 'col!=value', 'col~text' (contains), or 'col>/</>=/<= N'. Repeatable (AND).",
|
|
841
|
-
),
|
|
778
|
+
typer.Option("--filter", "-f", help=FILTER_HELP),
|
|
842
779
|
] = None,
|
|
843
780
|
title: Annotated[Optional[str], typer.Option("--title", help="Title shown in the viewer header")] = None,
|
|
844
781
|
hf: Annotated[
|
|
@@ -869,7 +806,7 @@ def preview(
|
|
|
869
806
|
print_error("The terminal viewer requires 'textual'. Install it: pip install textual", "missing_textual")
|
|
870
807
|
raise typer.Exit(1)
|
|
871
808
|
|
|
872
|
-
filters =
|
|
809
|
+
filters = parse_filters(filter_)
|
|
873
810
|
local_path = Path(source)
|
|
874
811
|
is_local = (not hf) and local_path.exists() and local_path.is_file()
|
|
875
812
|
|
|
@@ -877,7 +814,7 @@ def preview(
|
|
|
877
814
|
if is_local:
|
|
878
815
|
rows = _load_local(local_path)
|
|
879
816
|
if filters:
|
|
880
|
-
rows = [r for r in rows if
|
|
817
|
+
rows = [r for r in rows if row_matches(r, filters)]
|
|
881
818
|
if not rows:
|
|
882
819
|
print_error("No rows match the filter(s).", "no_match")
|
|
883
820
|
raise typer.Exit(3)
|
|
@@ -909,7 +846,7 @@ def preview(
|
|
|
909
846
|
view_title = title or f"dataset {source}"
|
|
910
847
|
|
|
911
848
|
page_size = limit if limit and limit > 0 else 25
|
|
912
|
-
api_filters =
|
|
849
|
+
api_filters = to_api_filters(filters)
|
|
913
850
|
|
|
914
851
|
def _fetch_page(off: int, lim: int) -> List[object]:
|
|
915
852
|
params = {"limit": str(lim), "offset": str(offset + off)}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Shared row-filter syntax for `preview`, `datasets rows`, and `eval samples`.
|
|
2
|
+
|
|
3
|
+
One filtering language across the CLI: `--filter "col<op>value"` (repeatable, AND).
|
|
4
|
+
The operator symbols map to the platform filter API's operator names, which work
|
|
5
|
+
both server-side (`POST /v1/datasets/{id}/rows/filter`) and locally (preview's
|
|
6
|
+
client-side matcher for local files).
|
|
7
|
+
"""
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
|
|
10
|
+
import typer
|
|
11
|
+
|
|
12
|
+
from .output import print_error
|
|
13
|
+
|
|
14
|
+
# Shown in each command's --filter help so the syntax is documented in one place.
|
|
15
|
+
FILTER_HELP = (
|
|
16
|
+
"Filter rows: 'col=value', 'col!=value', 'col~text' (contains), "
|
|
17
|
+
"or 'col>/</>=/<= N'. Repeatable (AND)."
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Maps each CLI symbol to the platform filter API's operator name. parse_filters
|
|
21
|
+
# picks the earliest operator (longest on a tie), so list order doesn't affect
|
|
22
|
+
# correctness.
|
|
23
|
+
_FILTER_OPS = [
|
|
24
|
+
(">=", "gte"),
|
|
25
|
+
("<=", "lte"),
|
|
26
|
+
("!=", "neq"),
|
|
27
|
+
("~", "contains"),
|
|
28
|
+
("=", "eq"),
|
|
29
|
+
(">", "gt"),
|
|
30
|
+
("<", "lt"),
|
|
31
|
+
]
|
|
32
|
+
_NUMERIC_OPS = {
|
|
33
|
+
"gt": lambda c, v: c > v,
|
|
34
|
+
"lt": lambda c, v: c < v,
|
|
35
|
+
"gte": lambda c, v: c >= v,
|
|
36
|
+
"lte": lambda c, v: c <= v,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def parse_filters(specs: Optional[List[str]]) -> List[tuple]:
|
|
41
|
+
"""Parse ['col=value', 'reward>=0.5', 'name~kod'] → [(col, op, value), ...].
|
|
42
|
+
|
|
43
|
+
Splits on the EARLIEST operator (longest on a tie, so 'reward>=5' is gte not
|
|
44
|
+
gt), keeping operator chars in the value intact (e.g. 'q=a>b' → col 'q', value
|
|
45
|
+
'a>b'). Rejects an empty column or value."""
|
|
46
|
+
out: List[tuple] = []
|
|
47
|
+
for spec in specs or []:
|
|
48
|
+
chosen = None # (index, symbol, op_name)
|
|
49
|
+
for sym, op in _FILTER_OPS:
|
|
50
|
+
i = spec.find(sym)
|
|
51
|
+
if i > 0 and (
|
|
52
|
+
chosen is None
|
|
53
|
+
or i < chosen[0]
|
|
54
|
+
or (i == chosen[0] and len(sym) > len(chosen[1]))
|
|
55
|
+
):
|
|
56
|
+
chosen = (i, sym, op)
|
|
57
|
+
if chosen is None:
|
|
58
|
+
print_error(
|
|
59
|
+
f"Invalid --filter '{spec}'. Use col=value, col!=value, col~text, or col>/</>=/<= N.",
|
|
60
|
+
"bad_filter",
|
|
61
|
+
)
|
|
62
|
+
raise typer.Exit(1)
|
|
63
|
+
i, sym, op = chosen
|
|
64
|
+
col, val = spec[:i].strip(), spec[i + len(sym) :].strip()
|
|
65
|
+
if not col or not val:
|
|
66
|
+
print_error(
|
|
67
|
+
f"Invalid --filter '{spec}': both a column and a value are required.",
|
|
68
|
+
"bad_filter",
|
|
69
|
+
)
|
|
70
|
+
raise typer.Exit(1)
|
|
71
|
+
out.append((col, op, val))
|
|
72
|
+
return out
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def to_api_filters(parsed: List[tuple]) -> List[dict]:
|
|
76
|
+
"""[(col, op, val), ...] → the `filters` payload for POST /rows/filter."""
|
|
77
|
+
return [{"column": col, "operator": op, "value": val} for col, op, val in parsed]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def cell_matches(cell: object, op: str, val: str) -> bool:
|
|
81
|
+
if op == "contains":
|
|
82
|
+
return cell is not None and val.lower() in str(cell).lower()
|
|
83
|
+
if op in ("eq", "neq"):
|
|
84
|
+
equal = cell is not None and str(cell).strip().lower() == val.strip().lower()
|
|
85
|
+
return equal if op == "eq" else not equal
|
|
86
|
+
try:
|
|
87
|
+
return _NUMERIC_OPS[op](float(cell), float(val)) # gt/lt/gte/lte
|
|
88
|
+
except (TypeError, ValueError):
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def row_matches(row: object, filters: List[tuple]) -> bool:
|
|
93
|
+
"""Client-side predicate (local files). A non-dict row can't match a column
|
|
94
|
+
filter. All filters AND together."""
|
|
95
|
+
if not filters:
|
|
96
|
+
return True
|
|
97
|
+
if not isinstance(row, dict):
|
|
98
|
+
return False
|
|
99
|
+
return all(cell_matches(row.get(col), op, val) for col, op, val in filters)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|