ldv-cli 0.10.0__tar.gz → 0.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. ldv_cli-0.12.0/.env +1 -0
  2. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/.gitignore +0 -2
  3. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/PKG-INFO +17 -9
  4. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/README.md +16 -8
  5. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/pyproject.toml +1 -1
  6. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/datasets.py +17 -3
  7. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/evals.py +36 -8
  8. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/instructions.py +24 -10
  9. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/preview.py +5 -68
  10. ldv_cli-0.12.0/src/ldv/filters.py +99 -0
  11. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/examples/agent-traces.jsonl +0 -0
  12. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/package-lock.json +0 -0
  13. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/__init__.py +0 -0
  14. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/_group.py +0 -0
  15. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/_opts.py +0 -0
  16. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/api.py +0 -0
  17. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/cli.py +0 -0
  18. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/__init__.py +0 -0
  19. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/annotations.py +0 -0
  20. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/auth.py +0 -0
  21. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/buckets.py +0 -0
  22. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/edits.py +0 -0
  23. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/highlights.py +0 -0
  24. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/issues.py +0 -0
  25. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/reports.py +0 -0
  26. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/skills.py +0 -0
  27. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/spec.py +0 -0
  28. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/tui.py +0 -0
  29. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/update.py +0 -0
  30. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/commands/workspaces.py +0 -0
  31. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/config.py +0 -0
  32. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/output.py +0 -0
  33. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/sessions.py +0 -0
  34. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/src/ldv/util.py +0 -0
  35. {ldv_cli-0.10.0 → ldv_cli-0.12.0}/uv.lock +0 -0
ldv_cli-0.12.0/.env ADDED
@@ -0,0 +1 @@
1
+ PYPI_TOKEN=pypi-AgEIcHlwaS5vcmcCJDM5ODc5ZGY0LWExOGQtNDM0MS1iMjcxLTQxN2E3OGE4NTE3NAACKlszLCIyNGFlYWVlMC1jZDg3LTQ0MWEtYjBlYS1iYTRjYjFjZjRmMWEiXQAABiDokA2L5mJtlb8YQ6WUYuo7v_AL_wua3b-JObZoZY1g_w
@@ -13,5 +13,3 @@ venv/
13
13
 
14
14
  # local config
15
15
  ~/.lql/
16
-
17
- .env
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ldv-cli
3
- Version: 0.10.0
3
+ Version: 0.12.0
4
4
  Summary: ldv — CLI for the Liquid DataViewer platform (formerly lql)
5
5
  Project-URL: Homepage, https://github.com/Liquid4All/lql
6
6
  Author: Liquid AI
@@ -140,7 +140,8 @@ ldv datasets create --workspace <id> --hf-bucket <org/bucket> --key <path-or-glo
140
140
  From an HF storage bucket (e.g. --key 'data/*.parquet')
141
141
  ldv datasets sync <id> Trigger sync (HF repo, S3, or HF bucket)
142
142
  ldv datasets schema <id> Show column schema
143
- ldv datasets rows <id> [--limit N] [--offset N] Fetch rows
143
+ ldv datasets rows <id> [-f "col<op>value"] [--columns a,b] [--limit N] [--offset N]
144
+ Fetch rows (-f/--filter: same syntax everywhere)
144
145
  ldv datasets delete <id> Delete dataset
145
146
  ldv datasets push <id> Push to HuggingFace
146
147
  ldv datasets push-status <id> [--job <id>] Check push job status
@@ -174,10 +175,11 @@ ldv preview <src> --offset N Start at row index N
174
175
  ldv preview <src> --title "<title>" Title shown in the viewer header
175
176
  ```
176
177
 
177
- **Filtering (`--filter`/`-f`).** Show only matching rows works on local files and
178
- platform datasets (platform filtering runs server-side). Repeatable; filters AND
179
- together; string match is case-insensitive. Operators: `=`, `!=`, `~` (contains),
180
- `>`, `<`, `>=`, `<=`.
178
+ **Filtering (`--filter`/`-f`) one syntax everywhere.** The same flag and syntax
179
+ work on `preview`, `datasets rows`, and `eval samples`. Show only matching rows —
180
+ `preview` also filters local files (client-side); platform datasets filter
181
+ server-side. Repeatable; filters AND together; string match is case-insensitive.
182
+ Operators: `=`, `!=`, `~` (contains), `>`, `<`, `>=`, `<=`.
181
183
 
182
184
  ```
183
185
  ldv preview <dataset-id> -f "domain=telecom"
@@ -223,12 +225,15 @@ commands are the data primitives for error analysis: they slice and summarize
223
225
  the dataset, and you do the reasoning over what they return.
224
226
 
225
227
  ```
226
- ldv eval list [--workspace <id>] List eval datasets only
228
+ ldv eval list [--workspace <id>] [--runid <id>] [--taskid <id>]
229
+ List eval datasets only. --runid/--taskid filter by
230
+ run<id>/task<id> in the name or parquet storage path
231
+ (e.g. run11213_task72284.parquet); they AND together.
227
232
  Defaults to LDV_EVAL_WORKSPACE; without a
228
233
  workspace, lists only evals you own.
229
234
  ldv eval correctness <id> Fast accuracy + correct/incorrect/missing counts
230
235
  ldv eval stats <id> Accuracy + error-type distribution + token stats
231
- ldv eval samples <id> [--filter correct|incorrect|missing|all]
236
+ ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
232
237
  [--search <text>] [--error-type <value>]
233
238
  [--columns a,b] [--limit N] [--offset N]
234
239
  Slice the dataset for error analysis. Filters
@@ -239,6 +244,8 @@ ldv eval sample <id> --row <index> Read one full sample (the conve
239
244
 
240
245
  Notes:
241
246
 
247
+ - `-f`/`--filter` is the unified column filter — same syntax as `preview` and `datasets rows` (see Filtering above).
248
+ - `--correct` / `--incorrect` / `--missing` are convenience flags for the canonical correctness filter (mutually exclusive). They AND with any `-f` filters, `--search`, and `--error-type`.
242
249
  - `--search` matches a substring on the prompt **or** response column (either hit counts). Override the searched columns with `--search-columns a,b`.
243
250
  - `--error-type` values come from the `error_field` / `error_distribution` reported by `eval stats`.
244
251
  - Use the `index` from `eval samples` directly as `eval sample --row <index>`.
@@ -248,7 +255,8 @@ Typical analysis loop:
248
255
  ```bash
249
256
  ldv eval list --workspace <id> # find the eval dataset
250
257
  ldv eval stats <id> # accuracy + where the errors cluster
251
- ldv eval samples <id> --filter incorrect --limit 20 # pull the misses
258
+ ldv eval samples <id> --incorrect --limit 20 # pull the misses
259
+ ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" # misses that ran long
252
260
  ldv eval sample <id> --row 42 # read one failure in full
253
261
  ```
254
262
 
@@ -124,7 +124,8 @@ ldv datasets create --workspace <id> --hf-bucket <org/bucket> --key <path-or-glo
124
124
  From an HF storage bucket (e.g. --key 'data/*.parquet')
125
125
  ldv datasets sync <id> Trigger sync (HF repo, S3, or HF bucket)
126
126
  ldv datasets schema <id> Show column schema
127
- ldv datasets rows <id> [--limit N] [--offset N] Fetch rows
127
+ ldv datasets rows <id> [-f "col<op>value"] [--columns a,b] [--limit N] [--offset N]
128
+ Fetch rows (-f/--filter: same syntax everywhere)
128
129
  ldv datasets delete <id> Delete dataset
129
130
  ldv datasets push <id> Push to HuggingFace
130
131
  ldv datasets push-status <id> [--job <id>] Check push job status
@@ -158,10 +159,11 @@ ldv preview <src> --offset N Start at row index N
158
159
  ldv preview <src> --title "<title>" Title shown in the viewer header
159
160
  ```
160
161
 
161
- **Filtering (`--filter`/`-f`).** Show only matching rows works on local files and
162
- platform datasets (platform filtering runs server-side). Repeatable; filters AND
163
- together; string match is case-insensitive. Operators: `=`, `!=`, `~` (contains),
164
- `>`, `<`, `>=`, `<=`.
162
+ **Filtering (`--filter`/`-f`) one syntax everywhere.** The same flag and syntax
163
+ work on `preview`, `datasets rows`, and `eval samples`. Show only matching rows —
164
+ `preview` also filters local files (client-side); platform datasets filter
165
+ server-side. Repeatable; filters AND together; string match is case-insensitive.
166
+ Operators: `=`, `!=`, `~` (contains), `>`, `<`, `>=`, `<=`.
165
167
 
166
168
  ```
167
169
  ldv preview <dataset-id> -f "domain=telecom"
@@ -207,12 +209,15 @@ commands are the data primitives for error analysis: they slice and summarize
207
209
  the dataset, and you do the reasoning over what they return.
208
210
 
209
211
  ```
210
- ldv eval list [--workspace <id>] List eval datasets only
212
+ ldv eval list [--workspace <id>] [--runid <id>] [--taskid <id>]
213
+ List eval datasets only. --runid/--taskid filter by
214
+ run<id>/task<id> in the name or parquet storage path
215
+ (e.g. run11213_task72284.parquet); they AND together.
211
216
  Defaults to LDV_EVAL_WORKSPACE; without a
212
217
  workspace, lists only evals you own.
213
218
  ldv eval correctness <id> Fast accuracy + correct/incorrect/missing counts
214
219
  ldv eval stats <id> Accuracy + error-type distribution + token stats
215
- ldv eval samples <id> [--filter correct|incorrect|missing|all]
220
+ ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
216
221
  [--search <text>] [--error-type <value>]
217
222
  [--columns a,b] [--limit N] [--offset N]
218
223
  Slice the dataset for error analysis. Filters
@@ -223,6 +228,8 @@ ldv eval sample <id> --row <index> Read one full sample (the conve
223
228
 
224
229
  Notes:
225
230
 
231
+ - `-f`/`--filter` is the unified column filter — same syntax as `preview` and `datasets rows` (see Filtering above).
232
+ - `--correct` / `--incorrect` / `--missing` are convenience flags for the canonical correctness filter (mutually exclusive). They AND with any `-f` filters, `--search`, and `--error-type`.
226
233
  - `--search` matches a substring on the prompt **or** response column (either hit counts). Override the searched columns with `--search-columns a,b`.
227
234
  - `--error-type` values come from the `error_field` / `error_distribution` reported by `eval stats`.
228
235
  - Use the `index` from `eval samples` directly as `eval sample --row <index>`.
@@ -232,7 +239,8 @@ Typical analysis loop:
232
239
  ```bash
233
240
  ldv eval list --workspace <id> # find the eval dataset
234
241
  ldv eval stats <id> # accuracy + where the errors cluster
235
- ldv eval samples <id> --filter incorrect --limit 20 # pull the misses
242
+ ldv eval samples <id> --incorrect --limit 20 # pull the misses
243
+ ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" # misses that ran long
236
244
  ldv eval sample <id> --row 42 # read one failure in full
237
245
  ```
238
246
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "ldv-cli"
7
- version = "0.10.0"
7
+ version = "0.12.0"
8
8
  description = "ldv — CLI for the Liquid DataViewer platform (formerly lql)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.12"
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import sys
3
3
  from pathlib import Path
4
- from typing import Annotated, Optional
4
+ from typing import Annotated, List, Optional
5
5
 
6
6
  import typer
7
7
 
@@ -10,6 +10,7 @@ from .._group import AliasGroup
10
10
  from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
11
11
  from ..api import ApiClient
12
12
  from ..config import _env
13
+ from ..filters import FILTER_HELP, parse_filters, to_api_filters
13
14
  from ..output import print_error, print_grouped_tables, print_json, print_table
14
15
  from ..util import q
15
16
 
@@ -339,15 +340,28 @@ def profile_cmd(
339
340
  @app.command("rows")
340
341
  def rows(
341
342
  id: Annotated[str, typer.Argument(help="Dataset ID")],
343
+ filter_: Annotated[Optional[List[str]], typer.Option("--filter", "-f", help=FILTER_HELP)] = None,
344
+ columns: Annotated[
345
+ Optional[str], typer.Option("--columns", help="Comma-separated columns to project")
346
+ ] = None,
342
347
  limit: Annotated[str, typer.Option("--limit", help="Number of rows")] = "20",
343
348
  offset: Annotated[str, typer.Option("--offset", help="Row offset")] = "0",
344
349
  json_out: JsonOpt = False,
345
350
  profile: ProfileOpt = None,
346
351
  api_url: ApiUrlOpt = None,
347
352
  ) -> None:
348
- """Get dataset rows."""
353
+ """Get dataset rows, optionally filtered (see --filter)."""
349
354
  client = ApiClient(profile=profile, api_url=api_url)
350
- data = client.get(f"/v1/datasets/{q(id)}/rows", params={"limit": limit, "offset": offset}).json()
355
+ params = {"limit": limit, "offset": offset}
356
+ if columns:
357
+ params["columns"] = str(columns)
358
+ api_filters = to_api_filters(parse_filters(filter_))
359
+ if api_filters:
360
+ data = client.post(
361
+ f"/v1/datasets/{q(id)}/rows/filter", json={"filters": api_filters}, params=params
362
+ ).json()
363
+ else:
364
+ data = client.get(f"/v1/datasets/{q(id)}/rows", params=params).json()
351
365
  if json_out:
352
366
  print_json(data)
353
367
  return
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import math
3
3
  import os
4
+ import re
4
5
  import sys
5
6
  from typing import Annotated, List, Optional
6
7
 
@@ -12,6 +13,7 @@ from .._group import AliasGroup
12
13
  from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
13
14
  from ..api import ApiClient
14
15
  from ..config import _env
16
+ from ..filters import FILTER_HELP, parse_filters, to_api_filters
15
17
  from ..output import print_error, print_json, print_table
16
18
  from ..util import q
17
19
 
@@ -45,9 +47,24 @@ def _fmt_accuracy(acc: object) -> str:
45
47
  return f"{n * 100:.1f}%"
46
48
 
47
49
 
50
+ # Fields a run/task id may appear in: the human name and the storage path. The
51
+ # parquet name (e.g. run11213_task72284.parquet) is the reliable signal.
52
+ _ID_FIELDS = ("display_name", "name", "hf_bucket_key", "hf_bucket", "s3_object_key", "hf_repo_id")
53
+
54
+
55
+ def _filter_by_id(items: list, prefix: str, num: str) -> list:
56
+ """Keep datasets whose name/storage path contains ``<prefix><num>`` — e.g.
57
+ prefix 'run' + '11213' matches 'run11213', 'run 11213', 'run-11213'. The
58
+ trailing-digit guard means 1121 doesn't match 11213."""
59
+ pat = re.compile(rf"(?i)(?<![A-Za-z]){prefix}[\s_-]?{re.escape(num)}(?!\d)")
60
+ return [d for d in items if any(pat.search(str(d.get(f) or "")) for f in _ID_FIELDS)]
61
+
62
+
48
63
  @app.command("list")
49
64
  def list_evals(
50
65
  workspace: Annotated[Optional[str], typer.Option("--workspace", help="Workspace (defaults to LDV_EVAL_WORKSPACE)")] = None,
66
+ runid: Annotated[Optional[str], typer.Option("--runid", help="Only evals whose name/storage path contains this run id (e.g. 11213 -> run11213)")] = None,
67
+ taskid: Annotated[Optional[str], typer.Option("--taskid", help="Only evals whose name/storage path contains this task id (e.g. 72284 -> task72284)")] = None,
51
68
  json_out: JsonOpt = False,
52
69
  profile: ProfileOpt = None,
53
70
  api_url: ApiUrlOpt = None,
@@ -64,6 +81,11 @@ def list_evals(
64
81
  "to list the shared eval workspace.\n"
65
82
  )
66
83
  items = client.get("/v1/datasets", params=params).json()
84
+ # --runid / --taskid AND together (run11213_task72284 matches both).
85
+ if runid:
86
+ items = _filter_by_id(items, r"run", runid)
87
+ if taskid:
88
+ items = _filter_by_id(items, r"task(?:[\s_-]?id)?", taskid)
67
89
  print_table(
68
90
  ["ID", "Name", "Rows", "Source"],
69
91
  [
@@ -238,7 +260,10 @@ def failures(
238
260
  @app.command("samples")
239
261
  def samples(
240
262
  id: Annotated[str, typer.Argument(help="Dataset ID")],
241
- filter_: Annotated[str, typer.Option("--filter", help="correct | incorrect | missing | all")] = "all",
263
+ filter_: Annotated[Optional[List[str]], typer.Option("--filter", "-f", help=FILTER_HELP)] = None,
264
+ correct: Annotated[bool, typer.Option("--correct", help="Only correct samples")] = False,
265
+ incorrect: Annotated[bool, typer.Option("--incorrect", help="Only incorrect samples")] = False,
266
+ missing: Annotated[bool, typer.Option("--missing", help="Only samples with no verdict")] = False,
242
267
  search: Annotated[Optional[str], typer.Option("--search", help="Substring match on prompt OR response column")] = None,
243
268
  search_columns: Annotated[Optional[str], typer.Option("--search-columns", help="Override which columns --search matches (comma-separated)")] = None,
244
269
  error_type: Annotated[Optional[str], typer.Option("--error-type", help="Filter to samples whose error field equals <value>")] = None,
@@ -249,14 +274,17 @@ def samples(
249
274
  profile: ProfileOpt = None,
250
275
  api_url: ApiUrlOpt = None,
251
276
  ) -> None:
252
- """List samples filtered by correctness / search / error type (for error analysis)."""
277
+ """List eval samples filtered by --filter / --correct / --incorrect / --missing / --search / --error-type."""
253
278
  client = ApiClient(profile=profile, api_url=api_url)
254
- filters: List[dict] = []
279
+ filters: List[dict] = to_api_filters(parse_filters(filter_))
255
280
 
256
- kind = str(filter_ or "all").lower()
257
- if kind not in ("all", "correct", "incorrect", "missing"):
258
- print_error("--filter must be one of: correct, incorrect, missing, all", "bad_filter")
281
+ # --correct / --incorrect / --missing are convenience flags for the canonical
282
+ # correctness filter (server-side reconciliation). Mutually exclusive.
283
+ chosen = [name for name, on in (("correct", correct), ("incorrect", incorrect), ("missing", missing)) if on]
284
+ if len(chosen) > 1:
285
+ print_error("--correct, --incorrect and --missing are mutually exclusive.", "bad_filter")
259
286
  raise typer.Exit(1)
287
+ correctness = chosen[0] if chosen else None
260
288
 
261
289
  if search:
262
290
  if search_columns:
@@ -287,8 +315,8 @@ def samples(
287
315
  params = {"limit": limit, "offset": offset}
288
316
  if columns:
289
317
  params["columns"] = str(columns)
290
- if kind != "all":
291
- params["correctness"] = kind
318
+ if correctness:
319
+ params["correctness"] = correctness
292
320
 
293
321
  data = client.post(f"/v1/datasets/{q(id)}/rows/filter", json={"filters": filters}, params=params).json()
294
322
  if json_out:
@@ -84,7 +84,8 @@ A workspace is the top-level container for datasets, spec docs, and members.
84
84
  ldv datasets schema <id> # Column names + types
85
85
  ldv datasets profile <id> # Per-column nulls/cardinality/numeric stats/top values + content token stats
86
86
  # [--full-content] exact content scan (slow) [--skip-content] omit it
87
- ldv datasets rows <id> [--limit N] [--offset N]
87
+ ldv datasets rows <id> [-f "col<op>value" ...] [--columns a,b] [--limit N] [--offset N]
88
+ # -f/--filter is the same syntax everywhere (see Filtering below)
88
89
  ldv datasets delete <id>
89
90
  ldv datasets push <id> # Push edits back to HuggingFace
90
91
  ldv datasets push-status <id> [--job <job-id>]
@@ -116,13 +117,17 @@ repeatable), -f/--filter (filter rows; see below), -n/--limit (page size when
116
117
  paging a platform dataset), --offset (start row index), --title, --hf, --split,
117
118
  --workspace, --profile, --api-url.
118
119
 
119
- Filtering: -f/--filter "col<op>value" shows only matching rows — works on local
120
- files and platform datasets (server-side for platform). Repeatable; filters AND
121
- together; string compare is case-insensitive. Operators: = (eq), != (ne),
122
- ~ (contains), >, <, >=, <=.
120
+ Filtering (one syntax everywhere): -f/--filter "col<op>value" shows only matching
121
+ rows. The SAME flag and syntax work on `preview`, `datasets rows`, and
122
+ `eval samples`. Repeatable; filters AND together; string compare is
123
+ case-insensitive. Operators: = (eq), != (ne), ~ (contains), >, <, >=, <=.
124
+ For `preview` it also runs on local files (client-side); on platform datasets all
125
+ three filter server-side via POST /v1/datasets/{id}/rows/filter.
123
126
 
124
127
  ldv preview <dataset-id> -f "domain=telecom" -f "reward>=0.8"
125
128
  ldv preview data.jsonl -f "model~lfm"
129
+ ldv datasets rows <id> -f "lang=en" -f "score<0.5"
130
+ ldv eval samples <id> -f "reasoning_tokens>30000" --incorrect
126
131
 
127
132
  Navigation: two modes toggled with m — pager (one sample at a time; ←/→ or
128
133
  n/b switch samples, ↑/↓/j/k scroll) and scroll (all samples; n/b jump between
@@ -144,8 +149,12 @@ Eval datasets (evaluation-run output: each row a sample with a model 'response'
144
149
  + a 'correct' verdict) are detected automatically. These commands are the data
145
150
  primitives for error analysis — YOU do the reasoning over what they return.
146
151
 
147
- ldv eval list [--workspace <id>] # Eval datasets only. Defaults to LDV_EVAL_WORKSPACE;
152
+ ldv eval list [--workspace <id>] [--runid <id>] [--taskid <id>]
153
+ # Eval datasets only. Defaults to LDV_EVAL_WORKSPACE;
148
154
  # without a workspace it lists only evals you own.
155
+ # --runid / --taskid filter to evals whose name OR storage
156
+ # path matches run<id> / task<id> (e.g.
157
+ # run11213_task72284.parquet). They AND together.
149
158
  ldv eval stats <id> # Accuracy + correctness counts + error-type
150
159
  # distribution + token stats (the distribution view)
151
160
  ldv eval correctness <id> # Fast accuracy + correct/incorrect/missing counts
@@ -160,14 +169,18 @@ primitives for error analysis — YOU do the reasoning over what they return.
160
169
  # missing think tags 80 ██████████████ 40.0%
161
170
  # If no failure_analysis column exists, prints a clear
162
171
  # message and exits 0. Use --json for the raw API response.
163
- ldv eval samples <id> [--filter correct|incorrect|missing|all] [--search <text>]
164
- [--error-type <value>] [--columns a,b] [--limit N] [--offset N]
172
+ ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
173
+ [--search <text>] [--error-type <value>] [--columns a,b]
174
+ [--limit N] [--offset N]
165
175
  # Slice the dataset for error analysis. Filters AND
166
176
  # together. Prints an 'index' column per row.
167
177
  ldv eval sample <id> --row <index> # Read one full sample (the conversation) by the
168
178
  # 'index' from `eval samples`
169
179
 
170
180
  Notes:
181
+ - -f/--filter is the unified column filter (same syntax as preview / datasets rows; see Filtering).
182
+ - --correct / --incorrect / --missing are convenience flags for the canonical correctness filter
183
+ (mutually exclusive). They AND with any -f filters and --search / --error-type.
171
184
  - --search matches a substring on the prompt OR response column (either one matching is a hit).
172
185
  - --error-type values come from the `error_field` / `error_distribution` in `eval stats`.
173
186
  - Use the 'index' from `eval samples` directly as `eval sample --row <index>`.
@@ -295,8 +308,9 @@ never goes stale.
295
308
  # (mode_distribution: name/count/rate per mode)
296
309
  ldv eval stats <id> --json # accuracy + error_distribution_incorrect
297
310
  # = the common errors AMONG the misses
298
- ldv eval samples <id> --filter incorrect --json # pull the misses
299
- ldv eval samples <id> --filter incorrect --error-type <value> --json # focus one failure mode
311
+ ldv eval samples <id> --incorrect --json # pull the misses
312
+ ldv eval samples <id> --incorrect --error-type <value> --json # focus one failure mode
313
+ ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" --json # misses that ran long
300
314
  ldv eval sample <id> --row <index> --json # read the full conversation of a miss
301
315
  # Then synthesize the common pattern across the misses yourself — the commands give you
302
316
  # the data (counts, slices, conversations); the analysis is your job.
@@ -20,6 +20,7 @@ import typer
20
20
 
21
21
  from .._opts import ApiUrlOpt, ProfileOpt
22
22
  from ..api import ApiClient
23
+ from ..filters import FILTER_HELP, parse_filters, row_matches, to_api_filters
23
24
  from ..output import print_error
24
25
  from ..util import q
25
26
 
@@ -759,67 +760,6 @@ def _choose_workspace(client: ApiClient, tui_mod) -> Optional[str]:
759
760
  return choice
760
761
 
761
762
 
762
- # --------------------------------------------------------------------------
763
- # Row filtering (--filter "col<op>value")
764
- # --------------------------------------------------------------------------
765
-
766
- # Maps each CLI symbol to the platform filter API's operator name (the same
767
- # names work server-side and locally). _parse_filters picks the earliest operator
768
- # (longest on a tie), so list order doesn't affect correctness.
769
- _FILTER_OPS = [(">=", "gte"), ("<=", "lte"), ("!=", "ne"), ("~", "contains"), ("=", "eq"), (">", "gt"), ("<", "lt")]
770
- _NUMERIC_OPS = {"gt": lambda c, v: c > v, "lt": lambda c, v: c < v, "gte": lambda c, v: c >= v, "lte": lambda c, v: c <= v}
771
-
772
-
773
- def _parse_filters(specs: Optional[List[str]]) -> List[tuple]:
774
- """Parse ['col=value', 'reward>=0.5', 'name~kod'] → [(col, op, value), ...].
775
-
776
- Splits on the EARLIEST operator (longest on a tie, so 'reward>=5' is gte not
777
- gt), keeping operator chars in the value intact (e.g. 'q=a>b' → col 'q', value
778
- 'a>b'). Rejects an empty column or value."""
779
- out: List[tuple] = []
780
- for spec in specs or []:
781
- chosen = None # (index, symbol, op_name)
782
- for sym, op in _FILTER_OPS:
783
- i = spec.find(sym)
784
- if i > 0 and (chosen is None or i < chosen[0] or (i == chosen[0] and len(sym) > len(chosen[1]))):
785
- chosen = (i, sym, op)
786
- if chosen is None:
787
- print_error(
788
- f"Invalid --filter '{spec}'. Use col=value, col!=value, col~text, or col>/</>=/<= N.",
789
- "bad_filter",
790
- )
791
- raise typer.Exit(1)
792
- i, sym, op = chosen
793
- col, val = spec[:i].strip(), spec[i + len(sym):].strip()
794
- if not col or not val:
795
- print_error(f"Invalid --filter '{spec}': both a column and a value are required.", "bad_filter")
796
- raise typer.Exit(1)
797
- out.append((col, op, val))
798
- return out
799
-
800
-
801
- def _cell_matches(cell: object, op: str, val: str) -> bool:
802
- if op == "contains":
803
- return cell is not None and val.lower() in str(cell).lower()
804
- if op in ("eq", "ne"):
805
- equal = cell is not None and str(cell).strip().lower() == val.strip().lower()
806
- return equal if op == "eq" else not equal
807
- try:
808
- return _NUMERIC_OPS[op](float(cell), float(val)) # gt/lt/gte/lte
809
- except (TypeError, ValueError):
810
- return False
811
-
812
-
813
- def _row_matches(row: object, filters: List[tuple]) -> bool:
814
- """Client-side predicate (local files). A non-dict row can't match a column
815
- filter. All filters AND together."""
816
- if not filters:
817
- return True
818
- if not isinstance(row, dict):
819
- return False
820
- return all(_cell_matches(row.get(col), op, val) for col, op, val in filters)
821
-
822
-
823
763
  # --------------------------------------------------------------------------
824
764
  # Command
825
765
  # --------------------------------------------------------------------------
@@ -835,10 +775,7 @@ def preview(
835
775
  offset: Annotated[int, typer.Option("--offset", help="Start at this row index")] = 0,
836
776
  filter_: Annotated[
837
777
  Optional[List[str]],
838
- typer.Option(
839
- "--filter", "-f",
840
- help="Filter rows: 'col=value', 'col!=value', 'col~text' (contains), or 'col>/</>=/<= N'. Repeatable (AND).",
841
- ),
778
+ typer.Option("--filter", "-f", help=FILTER_HELP),
842
779
  ] = None,
843
780
  title: Annotated[Optional[str], typer.Option("--title", help="Title shown in the viewer header")] = None,
844
781
  hf: Annotated[
@@ -869,7 +806,7 @@ def preview(
869
806
  print_error("The terminal viewer requires 'textual'. Install it: pip install textual", "missing_textual")
870
807
  raise typer.Exit(1)
871
808
 
872
- filters = _parse_filters(filter_)
809
+ filters = parse_filters(filter_)
873
810
  local_path = Path(source)
874
811
  is_local = (not hf) and local_path.exists() and local_path.is_file()
875
812
 
@@ -877,7 +814,7 @@ def preview(
877
814
  if is_local:
878
815
  rows = _load_local(local_path)
879
816
  if filters:
880
- rows = [r for r in rows if _row_matches(r, filters)]
817
+ rows = [r for r in rows if row_matches(r, filters)]
881
818
  if not rows:
882
819
  print_error("No rows match the filter(s).", "no_match")
883
820
  raise typer.Exit(3)
@@ -909,7 +846,7 @@ def preview(
909
846
  view_title = title or f"dataset {source}"
910
847
 
911
848
  page_size = limit if limit and limit > 0 else 25
912
- api_filters = [{"column": col, "operator": op, "value": val} for col, op, val in filters]
849
+ api_filters = to_api_filters(filters)
913
850
 
914
851
  def _fetch_page(off: int, lim: int) -> List[object]:
915
852
  params = {"limit": str(lim), "offset": str(offset + off)}
@@ -0,0 +1,99 @@
1
+ """Shared row-filter syntax for `preview`, `datasets rows`, and `eval samples`.
2
+
3
+ One filtering language across the CLI: `--filter "col<op>value"` (repeatable, AND).
4
+ The operator symbols map to the platform filter API's operator names, which work
5
+ both server-side (`POST /v1/datasets/{id}/rows/filter`) and locally (preview's
6
+ client-side matcher for local files).
7
+ """
8
+ from typing import List, Optional
9
+
10
+ import typer
11
+
12
+ from .output import print_error
13
+
14
+ # Shown in each command's --filter help so the syntax is documented in one place.
15
+ FILTER_HELP = (
16
+ "Filter rows: 'col=value', 'col!=value', 'col~text' (contains), "
17
+ "or 'col>/</>=/<= N'. Repeatable (AND)."
18
+ )
19
+
20
+ # Maps each CLI symbol to the platform filter API's operator name. parse_filters
21
+ # picks the earliest operator (longest on a tie), so list order doesn't affect
22
+ # correctness.
23
+ _FILTER_OPS = [
24
+ (">=", "gte"),
25
+ ("<=", "lte"),
26
+ ("!=", "neq"),
27
+ ("~", "contains"),
28
+ ("=", "eq"),
29
+ (">", "gt"),
30
+ ("<", "lt"),
31
+ ]
32
+ _NUMERIC_OPS = {
33
+ "gt": lambda c, v: c > v,
34
+ "lt": lambda c, v: c < v,
35
+ "gte": lambda c, v: c >= v,
36
+ "lte": lambda c, v: c <= v,
37
+ }
38
+
39
+
40
+ def parse_filters(specs: Optional[List[str]]) -> List[tuple]:
41
+ """Parse ['col=value', 'reward>=0.5', 'name~kod'] → [(col, op, value), ...].
42
+
43
+ Splits on the EARLIEST operator (longest on a tie, so 'reward>=5' is gte not
44
+ gt), keeping operator chars in the value intact (e.g. 'q=a>b' → col 'q', value
45
+ 'a>b'). Rejects an empty column or value."""
46
+ out: List[tuple] = []
47
+ for spec in specs or []:
48
+ chosen = None # (index, symbol, op_name)
49
+ for sym, op in _FILTER_OPS:
50
+ i = spec.find(sym)
51
+ if i > 0 and (
52
+ chosen is None
53
+ or i < chosen[0]
54
+ or (i == chosen[0] and len(sym) > len(chosen[1]))
55
+ ):
56
+ chosen = (i, sym, op)
57
+ if chosen is None:
58
+ print_error(
59
+ f"Invalid --filter '{spec}'. Use col=value, col!=value, col~text, or col>/</>=/<= N.",
60
+ "bad_filter",
61
+ )
62
+ raise typer.Exit(1)
63
+ i, sym, op = chosen
64
+ col, val = spec[:i].strip(), spec[i + len(sym) :].strip()
65
+ if not col or not val:
66
+ print_error(
67
+ f"Invalid --filter '{spec}': both a column and a value are required.",
68
+ "bad_filter",
69
+ )
70
+ raise typer.Exit(1)
71
+ out.append((col, op, val))
72
+ return out
73
+
74
+
75
+ def to_api_filters(parsed: List[tuple]) -> List[dict]:
76
+ """[(col, op, val), ...] → the `filters` payload for POST /rows/filter."""
77
+ return [{"column": col, "operator": op, "value": val} for col, op, val in parsed]
78
+
79
+
80
+ def cell_matches(cell: object, op: str, val: str) -> bool:
81
+ if op == "contains":
82
+ return cell is not None and val.lower() in str(cell).lower()
83
+ if op in ("eq", "neq"):
84
+ equal = cell is not None and str(cell).strip().lower() == val.strip().lower()
85
+ return equal if op == "eq" else not equal
86
+ try:
87
+ return _NUMERIC_OPS[op](float(cell), float(val)) # gt/lt/gte/lte
88
+ except (TypeError, ValueError):
89
+ return False
90
+
91
+
92
+ def row_matches(row: object, filters: List[tuple]) -> bool:
93
+ """Client-side predicate (local files). A non-dict row can't match a column
94
+ filter. All filters AND together."""
95
+ if not filters:
96
+ return True
97
+ if not isinstance(row, dict):
98
+ return False
99
+ return all(cell_matches(row.get(col), op, val) for col, op, val in filters)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes