ldv-cli 0.10.0__tar.gz → 0.11.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/PKG-INFO +13 -8
  2. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/README.md +12 -7
  3. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/pyproject.toml +1 -1
  4. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/datasets.py +17 -3
  5. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/evals.py +15 -8
  6. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/instructions.py +19 -9
  7. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/preview.py +5 -68
  8. ldv_cli-0.11.0/src/ldv/filters.py +99 -0
  9. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/uv.lock +1 -1
  10. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/.gitignore +0 -0
  11. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/examples/agent-traces.jsonl +0 -0
  12. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/package-lock.json +0 -0
  13. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/__init__.py +0 -0
  14. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/_group.py +0 -0
  15. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/_opts.py +0 -0
  16. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/api.py +0 -0
  17. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/cli.py +0 -0
  18. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/__init__.py +0 -0
  19. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/annotations.py +0 -0
  20. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/auth.py +0 -0
  21. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/buckets.py +0 -0
  22. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/edits.py +0 -0
  23. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/highlights.py +0 -0
  24. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/issues.py +0 -0
  25. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/reports.py +0 -0
  26. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/skills.py +0 -0
  27. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/spec.py +0 -0
  28. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/tui.py +0 -0
  29. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/update.py +0 -0
  30. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/workspaces.py +0 -0
  31. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/config.py +0 -0
  32. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/output.py +0 -0
  33. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/sessions.py +0 -0
  34. {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ldv-cli
3
- Version: 0.10.0
3
+ Version: 0.11.0
4
4
  Summary: ldv — CLI for the Liquid DataViewer platform (formerly lql)
5
5
  Project-URL: Homepage, https://github.com/Liquid4All/lql
6
6
  Author: Liquid AI
@@ -140,7 +140,8 @@ ldv datasets create --workspace <id> --hf-bucket <org/bucket> --key <path-or-glo
140
140
  From an HF storage bucket (e.g. --key 'data/*.parquet')
141
141
  ldv datasets sync <id> Trigger sync (HF repo, S3, or HF bucket)
142
142
  ldv datasets schema <id> Show column schema
143
- ldv datasets rows <id> [--limit N] [--offset N] Fetch rows
143
+ ldv datasets rows <id> [-f "col<op>value"] [--columns a,b] [--limit N] [--offset N]
144
+ Fetch rows (-f/--filter: same syntax everywhere)
144
145
  ldv datasets delete <id> Delete dataset
145
146
  ldv datasets push <id> Push to HuggingFace
146
147
  ldv datasets push-status <id> [--job <id>] Check push job status
@@ -174,10 +175,11 @@ ldv preview <src> --offset N Start at row index N
174
175
  ldv preview <src> --title "<title>" Title shown in the viewer header
175
176
  ```
176
177
 
177
- **Filtering (`--filter`/`-f`).** Show only matching rows works on local files and
178
- platform datasets (platform filtering runs server-side). Repeatable; filters AND
179
- together; string match is case-insensitive. Operators: `=`, `!=`, `~` (contains),
180
- `>`, `<`, `>=`, `<=`.
178
+ **Filtering (`--filter`/`-f`) one syntax everywhere.** The same flag and syntax
179
+ work on `preview`, `datasets rows`, and `eval samples`. Show only matching rows —
180
+ `preview` also filters local files (client-side); platform datasets filter
181
+ server-side. Repeatable; filters AND together; string match is case-insensitive.
182
+ Operators: `=`, `!=`, `~` (contains), `>`, `<`, `>=`, `<=`.
181
183
 
182
184
  ```
183
185
  ldv preview <dataset-id> -f "domain=telecom"
@@ -228,7 +230,7 @@ ldv eval list [--workspace <id>] List eval datasets only
228
230
  workspace, lists only evals you own.
229
231
  ldv eval correctness <id> Fast accuracy + correct/incorrect/missing counts
230
232
  ldv eval stats <id> Accuracy + error-type distribution + token stats
231
- ldv eval samples <id> [--filter correct|incorrect|missing|all]
233
+ ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
232
234
  [--search <text>] [--error-type <value>]
233
235
  [--columns a,b] [--limit N] [--offset N]
234
236
  Slice the dataset for error analysis. Filters
@@ -239,6 +241,8 @@ ldv eval sample <id> --row <index> Read one full sample (the conve
239
241
 
240
242
  Notes:
241
243
 
244
+ - `-f`/`--filter` is the unified column filter — same syntax as `preview` and `datasets rows` (see Filtering above).
245
+ - `--correct` / `--incorrect` / `--missing` are convenience flags for the canonical correctness filter (mutually exclusive). They AND with any `-f` filters, `--search`, and `--error-type`.
242
246
  - `--search` matches a substring on the prompt **or** response column (either hit counts). Override the searched columns with `--search-columns a,b`.
243
247
  - `--error-type` values come from the `error_field` / `error_distribution` reported by `eval stats`.
244
248
  - Use the `index` from `eval samples` directly as `eval sample --row <index>`.
@@ -248,7 +252,8 @@ Typical analysis loop:
248
252
  ```bash
249
253
  ldv eval list --workspace <id> # find the eval dataset
250
254
  ldv eval stats <id> # accuracy + where the errors cluster
251
- ldv eval samples <id> --filter incorrect --limit 20 # pull the misses
255
+ ldv eval samples <id> --incorrect --limit 20 # pull the misses
256
+ ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" # misses that ran long
252
257
  ldv eval sample <id> --row 42 # read one failure in full
253
258
  ```
254
259
 
@@ -124,7 +124,8 @@ ldv datasets create --workspace <id> --hf-bucket <org/bucket> --key <path-or-glo
124
124
  From an HF storage bucket (e.g. --key 'data/*.parquet')
125
125
  ldv datasets sync <id> Trigger sync (HF repo, S3, or HF bucket)
126
126
  ldv datasets schema <id> Show column schema
127
- ldv datasets rows <id> [--limit N] [--offset N] Fetch rows
127
+ ldv datasets rows <id> [-f "col<op>value"] [--columns a,b] [--limit N] [--offset N]
128
+ Fetch rows (-f/--filter: same syntax everywhere)
128
129
  ldv datasets delete <id> Delete dataset
129
130
  ldv datasets push <id> Push to HuggingFace
130
131
  ldv datasets push-status <id> [--job <id>] Check push job status
@@ -158,10 +159,11 @@ ldv preview <src> --offset N Start at row index N
158
159
  ldv preview <src> --title "<title>" Title shown in the viewer header
159
160
  ```
160
161
 
161
- **Filtering (`--filter`/`-f`).** Show only matching rows works on local files and
162
- platform datasets (platform filtering runs server-side). Repeatable; filters AND
163
- together; string match is case-insensitive. Operators: `=`, `!=`, `~` (contains),
164
- `>`, `<`, `>=`, `<=`.
162
+ **Filtering (`--filter`/`-f`) one syntax everywhere.** The same flag and syntax
163
+ work on `preview`, `datasets rows`, and `eval samples`. Show only matching rows —
164
+ `preview` also filters local files (client-side); platform datasets filter
165
+ server-side. Repeatable; filters AND together; string match is case-insensitive.
166
+ Operators: `=`, `!=`, `~` (contains), `>`, `<`, `>=`, `<=`.
165
167
 
166
168
  ```
167
169
  ldv preview <dataset-id> -f "domain=telecom"
@@ -212,7 +214,7 @@ ldv eval list [--workspace <id>] List eval datasets only
212
214
  workspace, lists only evals you own.
213
215
  ldv eval correctness <id> Fast accuracy + correct/incorrect/missing counts
214
216
  ldv eval stats <id> Accuracy + error-type distribution + token stats
215
- ldv eval samples <id> [--filter correct|incorrect|missing|all]
217
+ ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
216
218
  [--search <text>] [--error-type <value>]
217
219
  [--columns a,b] [--limit N] [--offset N]
218
220
  Slice the dataset for error analysis. Filters
@@ -223,6 +225,8 @@ ldv eval sample <id> --row <index> Read one full sample (the conve
223
225
 
224
226
  Notes:
225
227
 
228
+ - `-f`/`--filter` is the unified column filter — same syntax as `preview` and `datasets rows` (see Filtering above).
229
+ - `--correct` / `--incorrect` / `--missing` are convenience flags for the canonical correctness filter (mutually exclusive). They AND with any `-f` filters, `--search`, and `--error-type`.
226
230
  - `--search` matches a substring on the prompt **or** response column (either hit counts). Override the searched columns with `--search-columns a,b`.
227
231
  - `--error-type` values come from the `error_field` / `error_distribution` reported by `eval stats`.
228
232
  - Use the `index` from `eval samples` directly as `eval sample --row <index>`.
@@ -232,7 +236,8 @@ Typical analysis loop:
232
236
  ```bash
233
237
  ldv eval list --workspace <id> # find the eval dataset
234
238
  ldv eval stats <id> # accuracy + where the errors cluster
235
- ldv eval samples <id> --filter incorrect --limit 20 # pull the misses
239
+ ldv eval samples <id> --incorrect --limit 20 # pull the misses
240
+ ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" # misses that ran long
236
241
  ldv eval sample <id> --row 42 # read one failure in full
237
242
  ```
238
243
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "ldv-cli"
7
- version = "0.10.0"
7
+ version = "0.11.0"
8
8
  description = "ldv — CLI for the Liquid DataViewer platform (formerly lql)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.12"
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import sys
3
3
  from pathlib import Path
4
- from typing import Annotated, Optional
4
+ from typing import Annotated, List, Optional
5
5
 
6
6
  import typer
7
7
 
@@ -10,6 +10,7 @@ from .._group import AliasGroup
10
10
  from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
11
11
  from ..api import ApiClient
12
12
  from ..config import _env
13
+ from ..filters import FILTER_HELP, parse_filters, to_api_filters
13
14
  from ..output import print_error, print_grouped_tables, print_json, print_table
14
15
  from ..util import q
15
16
 
@@ -339,15 +340,28 @@ def profile_cmd(
339
340
  @app.command("rows")
340
341
  def rows(
341
342
  id: Annotated[str, typer.Argument(help="Dataset ID")],
343
+ filter_: Annotated[Optional[List[str]], typer.Option("--filter", "-f", help=FILTER_HELP)] = None,
344
+ columns: Annotated[
345
+ Optional[str], typer.Option("--columns", help="Comma-separated columns to project")
346
+ ] = None,
342
347
  limit: Annotated[str, typer.Option("--limit", help="Number of rows")] = "20",
343
348
  offset: Annotated[str, typer.Option("--offset", help="Row offset")] = "0",
344
349
  json_out: JsonOpt = False,
345
350
  profile: ProfileOpt = None,
346
351
  api_url: ApiUrlOpt = None,
347
352
  ) -> None:
348
- """Get dataset rows."""
353
+ """Get dataset rows, optionally filtered (see --filter)."""
349
354
  client = ApiClient(profile=profile, api_url=api_url)
350
- data = client.get(f"/v1/datasets/{q(id)}/rows", params={"limit": limit, "offset": offset}).json()
355
+ params = {"limit": limit, "offset": offset}
356
+ if columns:
357
+ params["columns"] = str(columns)
358
+ api_filters = to_api_filters(parse_filters(filter_))
359
+ if api_filters:
360
+ data = client.post(
361
+ f"/v1/datasets/{q(id)}/rows/filter", json={"filters": api_filters}, params=params
362
+ ).json()
363
+ else:
364
+ data = client.get(f"/v1/datasets/{q(id)}/rows", params=params).json()
351
365
  if json_out:
352
366
  print_json(data)
353
367
  return
@@ -12,6 +12,7 @@ from .._group import AliasGroup
12
12
  from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
13
13
  from ..api import ApiClient
14
14
  from ..config import _env
15
+ from ..filters import FILTER_HELP, parse_filters, to_api_filters
15
16
  from ..output import print_error, print_json, print_table
16
17
  from ..util import q
17
18
 
@@ -238,7 +239,10 @@ def failures(
238
239
  @app.command("samples")
239
240
  def samples(
240
241
  id: Annotated[str, typer.Argument(help="Dataset ID")],
241
- filter_: Annotated[str, typer.Option("--filter", help="correct | incorrect | missing | all")] = "all",
242
+ filter_: Annotated[Optional[List[str]], typer.Option("--filter", "-f", help=FILTER_HELP)] = None,
243
+ correct: Annotated[bool, typer.Option("--correct", help="Only correct samples")] = False,
244
+ incorrect: Annotated[bool, typer.Option("--incorrect", help="Only incorrect samples")] = False,
245
+ missing: Annotated[bool, typer.Option("--missing", help="Only samples with no verdict")] = False,
242
246
  search: Annotated[Optional[str], typer.Option("--search", help="Substring match on prompt OR response column")] = None,
243
247
  search_columns: Annotated[Optional[str], typer.Option("--search-columns", help="Override which columns --search matches (comma-separated)")] = None,
244
248
  error_type: Annotated[Optional[str], typer.Option("--error-type", help="Filter to samples whose error field equals <value>")] = None,
@@ -249,14 +253,17 @@ def samples(
249
253
  profile: ProfileOpt = None,
250
254
  api_url: ApiUrlOpt = None,
251
255
  ) -> None:
252
- """List samples filtered by correctness / search / error type (for error analysis)."""
256
+ """List eval samples filtered by --filter / --correct / --incorrect / --missing / --search / --error-type."""
253
257
  client = ApiClient(profile=profile, api_url=api_url)
254
- filters: List[dict] = []
258
+ filters: List[dict] = to_api_filters(parse_filters(filter_))
255
259
 
256
- kind = str(filter_ or "all").lower()
257
- if kind not in ("all", "correct", "incorrect", "missing"):
258
- print_error("--filter must be one of: correct, incorrect, missing, all", "bad_filter")
260
+ # --correct / --incorrect / --missing are convenience flags for the canonical
261
+ # correctness filter (server-side reconciliation). Mutually exclusive.
262
+ chosen = [name for name, on in (("correct", correct), ("incorrect", incorrect), ("missing", missing)) if on]
263
+ if len(chosen) > 1:
264
+ print_error("--correct, --incorrect and --missing are mutually exclusive.", "bad_filter")
259
265
  raise typer.Exit(1)
266
+ correctness = chosen[0] if chosen else None
260
267
 
261
268
  if search:
262
269
  if search_columns:
@@ -287,8 +294,8 @@ def samples(
287
294
  params = {"limit": limit, "offset": offset}
288
295
  if columns:
289
296
  params["columns"] = str(columns)
290
- if kind != "all":
291
- params["correctness"] = kind
297
+ if correctness:
298
+ params["correctness"] = correctness
292
299
 
293
300
  data = client.post(f"/v1/datasets/{q(id)}/rows/filter", json={"filters": filters}, params=params).json()
294
301
  if json_out:
@@ -84,7 +84,8 @@ A workspace is the top-level container for datasets, spec docs, and members.
84
84
  ldv datasets schema <id> # Column names + types
85
85
  ldv datasets profile <id> # Per-column nulls/cardinality/numeric stats/top values + content token stats
86
86
  # [--full-content] exact content scan (slow) [--skip-content] omit it
87
- ldv datasets rows <id> [--limit N] [--offset N]
87
+ ldv datasets rows <id> [-f "col<op>value" ...] [--columns a,b] [--limit N] [--offset N]
88
+ # -f/--filter is the same syntax everywhere (see Filtering below)
88
89
  ldv datasets delete <id>
89
90
  ldv datasets push <id> # Push edits back to HuggingFace
90
91
  ldv datasets push-status <id> [--job <job-id>]
@@ -116,13 +117,17 @@ repeatable), -f/--filter (filter rows; see below), -n/--limit (page size when
116
117
  paging a platform dataset), --offset (start row index), --title, --hf, --split,
117
118
  --workspace, --profile, --api-url.
118
119
 
119
- Filtering: -f/--filter "col<op>value" shows only matching rows — works on local
120
- files and platform datasets (server-side for platform). Repeatable; filters AND
121
- together; string compare is case-insensitive. Operators: = (eq), != (ne),
122
- ~ (contains), >, <, >=, <=.
120
+ Filtering (one syntax everywhere): -f/--filter "col<op>value" shows only matching
121
+ rows. The SAME flag and syntax work on `preview`, `datasets rows`, and
122
+ `eval samples`. Repeatable; filters AND together; string compare is
123
+ case-insensitive. Operators: = (eq), != (ne), ~ (contains), >, <, >=, <=.
124
+ For `preview` it also runs on local files (client-side); on platform datasets all
125
+ three filter server-side via POST /v1/datasets/{id}/rows/filter.
123
126
 
124
127
  ldv preview <dataset-id> -f "domain=telecom" -f "reward>=0.8"
125
128
  ldv preview data.jsonl -f "model~lfm"
129
+ ldv datasets rows <id> -f "lang=en" -f "score<0.5"
130
+ ldv eval samples <id> -f "reasoning_tokens>30000" --incorrect
126
131
 
127
132
  Navigation: two modes toggled with m — pager (one sample at a time; ←/→ or
128
133
  n/b switch samples, ↑/↓/j/k scroll) and scroll (all samples; n/b jump between
@@ -160,14 +165,18 @@ primitives for error analysis — YOU do the reasoning over what they return.
160
165
  # missing think tags 80 ██████████████ 40.0%
161
166
  # If no failure_analysis column exists, prints a clear
162
167
  # message and exits 0. Use --json for the raw API response.
163
- ldv eval samples <id> [--filter correct|incorrect|missing|all] [--search <text>]
164
- [--error-type <value>] [--columns a,b] [--limit N] [--offset N]
168
+ ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
169
+ [--search <text>] [--error-type <value>] [--columns a,b]
170
+ [--limit N] [--offset N]
165
171
  # Slice the dataset for error analysis. Filters AND
166
172
  # together. Prints an 'index' column per row.
167
173
  ldv eval sample <id> --row <index> # Read one full sample (the conversation) by the
168
174
  # 'index' from `eval samples`
169
175
 
170
176
  Notes:
177
+ - -f/--filter is the unified column filter (same syntax as preview / datasets rows; see Filtering).
178
+ - --correct / --incorrect / --missing are convenience flags for the canonical correctness filter
179
+ (mutually exclusive). They AND with any -f filters and --search / --error-type.
171
180
  - --search matches a substring on the prompt OR response column (either one matching is a hit).
172
181
  - --error-type values come from the `error_field` / `error_distribution` in `eval stats`.
173
182
  - Use the 'index' from `eval samples` directly as `eval sample --row <index>`.
@@ -295,8 +304,9 @@ never goes stale.
295
304
  # (mode_distribution: name/count/rate per mode)
296
305
  ldv eval stats <id> --json # accuracy + error_distribution_incorrect
297
306
  # = the common errors AMONG the misses
298
- ldv eval samples <id> --filter incorrect --json # pull the misses
299
- ldv eval samples <id> --filter incorrect --error-type <value> --json # focus one failure mode
307
+ ldv eval samples <id> --incorrect --json # pull the misses
308
+ ldv eval samples <id> --incorrect --error-type <value> --json # focus one failure mode
309
+ ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" --json # misses that ran long
300
310
  ldv eval sample <id> --row <index> --json # read the full conversation of a miss
301
311
  # Then synthesize the common pattern across the misses yourself — the commands give you
302
312
  # the data (counts, slices, conversations); the analysis is your job.
@@ -20,6 +20,7 @@ import typer
20
20
 
21
21
  from .._opts import ApiUrlOpt, ProfileOpt
22
22
  from ..api import ApiClient
23
+ from ..filters import FILTER_HELP, parse_filters, row_matches, to_api_filters
23
24
  from ..output import print_error
24
25
  from ..util import q
25
26
 
@@ -759,67 +760,6 @@ def _choose_workspace(client: ApiClient, tui_mod) -> Optional[str]:
759
760
  return choice
760
761
 
761
762
 
762
- # --------------------------------------------------------------------------
763
- # Row filtering (--filter "col<op>value")
764
- # --------------------------------------------------------------------------
765
-
766
- # Maps each CLI symbol to the platform filter API's operator name (the same
767
- # names work server-side and locally). _parse_filters picks the earliest operator
768
- # (longest on a tie), so list order doesn't affect correctness.
769
- _FILTER_OPS = [(">=", "gte"), ("<=", "lte"), ("!=", "ne"), ("~", "contains"), ("=", "eq"), (">", "gt"), ("<", "lt")]
770
- _NUMERIC_OPS = {"gt": lambda c, v: c > v, "lt": lambda c, v: c < v, "gte": lambda c, v: c >= v, "lte": lambda c, v: c <= v}
771
-
772
-
773
- def _parse_filters(specs: Optional[List[str]]) -> List[tuple]:
774
- """Parse ['col=value', 'reward>=0.5', 'name~kod'] → [(col, op, value), ...].
775
-
776
- Splits on the EARLIEST operator (longest on a tie, so 'reward>=5' is gte not
777
- gt), keeping operator chars in the value intact (e.g. 'q=a>b' → col 'q', value
778
- 'a>b'). Rejects an empty column or value."""
779
- out: List[tuple] = []
780
- for spec in specs or []:
781
- chosen = None # (index, symbol, op_name)
782
- for sym, op in _FILTER_OPS:
783
- i = spec.find(sym)
784
- if i > 0 and (chosen is None or i < chosen[0] or (i == chosen[0] and len(sym) > len(chosen[1]))):
785
- chosen = (i, sym, op)
786
- if chosen is None:
787
- print_error(
788
- f"Invalid --filter '{spec}'. Use col=value, col!=value, col~text, or col>/</>=/<= N.",
789
- "bad_filter",
790
- )
791
- raise typer.Exit(1)
792
- i, sym, op = chosen
793
- col, val = spec[:i].strip(), spec[i + len(sym):].strip()
794
- if not col or not val:
795
- print_error(f"Invalid --filter '{spec}': both a column and a value are required.", "bad_filter")
796
- raise typer.Exit(1)
797
- out.append((col, op, val))
798
- return out
799
-
800
-
801
- def _cell_matches(cell: object, op: str, val: str) -> bool:
802
- if op == "contains":
803
- return cell is not None and val.lower() in str(cell).lower()
804
- if op in ("eq", "ne"):
805
- equal = cell is not None and str(cell).strip().lower() == val.strip().lower()
806
- return equal if op == "eq" else not equal
807
- try:
808
- return _NUMERIC_OPS[op](float(cell), float(val)) # gt/lt/gte/lte
809
- except (TypeError, ValueError):
810
- return False
811
-
812
-
813
- def _row_matches(row: object, filters: List[tuple]) -> bool:
814
- """Client-side predicate (local files). A non-dict row can't match a column
815
- filter. All filters AND together."""
816
- if not filters:
817
- return True
818
- if not isinstance(row, dict):
819
- return False
820
- return all(_cell_matches(row.get(col), op, val) for col, op, val in filters)
821
-
822
-
823
763
  # --------------------------------------------------------------------------
824
764
  # Command
825
765
  # --------------------------------------------------------------------------
@@ -835,10 +775,7 @@ def preview(
835
775
  offset: Annotated[int, typer.Option("--offset", help="Start at this row index")] = 0,
836
776
  filter_: Annotated[
837
777
  Optional[List[str]],
838
- typer.Option(
839
- "--filter", "-f",
840
- help="Filter rows: 'col=value', 'col!=value', 'col~text' (contains), or 'col>/</>=/<= N'. Repeatable (AND).",
841
- ),
778
+ typer.Option("--filter", "-f", help=FILTER_HELP),
842
779
  ] = None,
843
780
  title: Annotated[Optional[str], typer.Option("--title", help="Title shown in the viewer header")] = None,
844
781
  hf: Annotated[
@@ -869,7 +806,7 @@ def preview(
869
806
  print_error("The terminal viewer requires 'textual'. Install it: pip install textual", "missing_textual")
870
807
  raise typer.Exit(1)
871
808
 
872
- filters = _parse_filters(filter_)
809
+ filters = parse_filters(filter_)
873
810
  local_path = Path(source)
874
811
  is_local = (not hf) and local_path.exists() and local_path.is_file()
875
812
 
@@ -877,7 +814,7 @@ def preview(
877
814
  if is_local:
878
815
  rows = _load_local(local_path)
879
816
  if filters:
880
- rows = [r for r in rows if _row_matches(r, filters)]
817
+ rows = [r for r in rows if row_matches(r, filters)]
881
818
  if not rows:
882
819
  print_error("No rows match the filter(s).", "no_match")
883
820
  raise typer.Exit(3)
@@ -909,7 +846,7 @@ def preview(
909
846
  view_title = title or f"dataset {source}"
910
847
 
911
848
  page_size = limit if limit and limit > 0 else 25
912
- api_filters = [{"column": col, "operator": op, "value": val} for col, op, val in filters]
849
+ api_filters = to_api_filters(filters)
913
850
 
914
851
  def _fetch_page(off: int, lim: int) -> List[object]:
915
852
  params = {"limit": str(lim), "offset": str(offset + off)}
@@ -0,0 +1,99 @@
1
+ """Shared row-filter syntax for `preview`, `datasets rows`, and `eval samples`.
2
+
3
+ One filtering language across the CLI: `--filter "col<op>value"` (repeatable, AND).
4
+ The operator symbols map to the platform filter API's operator names, which work
5
+ both server-side (`POST /v1/datasets/{id}/rows/filter`) and locally (preview's
6
+ client-side matcher for local files).
7
+ """
8
+ from typing import List, Optional
9
+
10
+ import typer
11
+
12
+ from .output import print_error
13
+
14
+ # Shown in each command's --filter help so the syntax is documented in one place.
15
+ FILTER_HELP = (
16
+ "Filter rows: 'col=value', 'col!=value', 'col~text' (contains), "
17
+ "or 'col>/</>=/<= N'. Repeatable (AND)."
18
+ )
19
+
20
+ # Maps each CLI symbol to the platform filter API's operator name. parse_filters
21
+ # picks the earliest operator (longest on a tie), so list order doesn't affect
22
+ # correctness.
23
+ _FILTER_OPS = [
24
+ (">=", "gte"),
25
+ ("<=", "lte"),
26
+ ("!=", "neq"),
27
+ ("~", "contains"),
28
+ ("=", "eq"),
29
+ (">", "gt"),
30
+ ("<", "lt"),
31
+ ]
32
+ _NUMERIC_OPS = {
33
+ "gt": lambda c, v: c > v,
34
+ "lt": lambda c, v: c < v,
35
+ "gte": lambda c, v: c >= v,
36
+ "lte": lambda c, v: c <= v,
37
+ }
38
+
39
+
40
+ def parse_filters(specs: Optional[List[str]]) -> List[tuple]:
41
+ """Parse ['col=value', 'reward>=0.5', 'name~kod'] → [(col, op, value), ...].
42
+
43
+ Splits on the EARLIEST operator (longest on a tie, so 'reward>=5' is gte not
44
+ gt), keeping operator chars in the value intact (e.g. 'q=a>b' → col 'q', value
45
+ 'a>b'). Rejects an empty column or value."""
46
+ out: List[tuple] = []
47
+ for spec in specs or []:
48
+ chosen = None # (index, symbol, op_name)
49
+ for sym, op in _FILTER_OPS:
50
+ i = spec.find(sym)
51
+ if i > 0 and (
52
+ chosen is None
53
+ or i < chosen[0]
54
+ or (i == chosen[0] and len(sym) > len(chosen[1]))
55
+ ):
56
+ chosen = (i, sym, op)
57
+ if chosen is None:
58
+ print_error(
59
+ f"Invalid --filter '{spec}'. Use col=value, col!=value, col~text, or col>/</>=/<= N.",
60
+ "bad_filter",
61
+ )
62
+ raise typer.Exit(1)
63
+ i, sym, op = chosen
64
+ col, val = spec[:i].strip(), spec[i + len(sym) :].strip()
65
+ if not col or not val:
66
+ print_error(
67
+ f"Invalid --filter '{spec}': both a column and a value are required.",
68
+ "bad_filter",
69
+ )
70
+ raise typer.Exit(1)
71
+ out.append((col, op, val))
72
+ return out
73
+
74
+
75
+ def to_api_filters(parsed: List[tuple]) -> List[dict]:
76
+ """[(col, op, val), ...] → the `filters` payload for POST /rows/filter."""
77
+ return [{"column": col, "operator": op, "value": val} for col, op, val in parsed]
78
+
79
+
80
+ def cell_matches(cell: object, op: str, val: str) -> bool:
81
+ if op == "contains":
82
+ return cell is not None and val.lower() in str(cell).lower()
83
+ if op in ("eq", "neq"):
84
+ equal = cell is not None and str(cell).strip().lower() == val.strip().lower()
85
+ return equal if op == "eq" else not equal
86
+ try:
87
+ return _NUMERIC_OPS[op](float(cell), float(val)) # gt/lt/gte/lte
88
+ except (TypeError, ValueError):
89
+ return False
90
+
91
+
92
+ def row_matches(row: object, filters: List[tuple]) -> bool:
93
+ """Client-side predicate (local files). A non-dict row can't match a column
94
+ filter. All filters AND together."""
95
+ if not filters:
96
+ return True
97
+ if not isinstance(row, dict):
98
+ return False
99
+ return all(cell_matches(row.get(col), op, val) for col, op, val in filters)
@@ -173,7 +173,7 @@ wheels = [
173
173
 
174
174
  [[package]]
175
175
  name = "ldv-cli"
176
- version = "0.9.0"
176
+ version = "0.10.0"
177
177
  source = { editable = "." }
178
178
  dependencies = [
179
179
  { name = "httpx" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes