ldv-cli 0.10.0__tar.gz → 0.11.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/PKG-INFO +13 -8
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/README.md +12 -7
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/pyproject.toml +1 -1
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/datasets.py +17 -3
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/evals.py +15 -8
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/instructions.py +19 -9
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/preview.py +5 -68
- ldv_cli-0.11.0/src/ldv/filters.py +99 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/uv.lock +1 -1
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/.gitignore +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/examples/agent-traces.jsonl +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/package-lock.json +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/__init__.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/_group.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/_opts.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/api.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/cli.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/__init__.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/annotations.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/auth.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/buckets.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/edits.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/highlights.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/issues.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/reports.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/skills.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/spec.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/tui.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/update.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/commands/workspaces.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/config.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/output.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/sessions.py +0 -0
- {ldv_cli-0.10.0 → ldv_cli-0.11.0}/src/ldv/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ldv-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.11.0
|
|
4
4
|
Summary: ldv — CLI for the Liquid DataViewer platform (formerly lql)
|
|
5
5
|
Project-URL: Homepage, https://github.com/Liquid4All/lql
|
|
6
6
|
Author: Liquid AI
|
|
@@ -140,7 +140,8 @@ ldv datasets create --workspace <id> --hf-bucket <org/bucket> --key <path-or-glo
|
|
|
140
140
|
From an HF storage bucket (e.g. --key 'data/*.parquet')
|
|
141
141
|
ldv datasets sync <id> Trigger sync (HF repo, S3, or HF bucket)
|
|
142
142
|
ldv datasets schema <id> Show column schema
|
|
143
|
-
ldv datasets rows <id> [--limit N] [--offset N]
|
|
143
|
+
ldv datasets rows <id> [-f "col<op>value"] [--columns a,b] [--limit N] [--offset N]
|
|
144
|
+
Fetch rows (-f/--filter: same syntax everywhere)
|
|
144
145
|
ldv datasets delete <id> Delete dataset
|
|
145
146
|
ldv datasets push <id> Push to HuggingFace
|
|
146
147
|
ldv datasets push-status <id> [--job <id>] Check push job status
|
|
@@ -174,10 +175,11 @@ ldv preview <src> --offset N Start at row index N
|
|
|
174
175
|
ldv preview <src> --title "<title>" Title shown in the viewer header
|
|
175
176
|
```
|
|
176
177
|
|
|
177
|
-
**Filtering (`--filter`/`-f`)
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
178
|
+
**Filtering (`--filter`/`-f`) — one syntax everywhere.** The same flag and syntax
|
|
179
|
+
work on `preview`, `datasets rows`, and `eval samples`. Show only matching rows —
|
|
180
|
+
`preview` also filters local files (client-side); platform datasets filter
|
|
181
|
+
server-side. Repeatable; filters AND together; string match is case-insensitive.
|
|
182
|
+
Operators: `=`, `!=`, `~` (contains), `>`, `<`, `>=`, `<=`.
|
|
181
183
|
|
|
182
184
|
```
|
|
183
185
|
ldv preview <dataset-id> -f "domain=telecom"
|
|
@@ -228,7 +230,7 @@ ldv eval list [--workspace <id>] List eval datasets only
|
|
|
228
230
|
workspace, lists only evals you own.
|
|
229
231
|
ldv eval correctness <id> Fast accuracy + correct/incorrect/missing counts
|
|
230
232
|
ldv eval stats <id> Accuracy + error-type distribution + token stats
|
|
231
|
-
ldv eval samples <id> [--
|
|
233
|
+
ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
|
|
232
234
|
[--search <text>] [--error-type <value>]
|
|
233
235
|
[--columns a,b] [--limit N] [--offset N]
|
|
234
236
|
Slice the dataset for error analysis. Filters
|
|
@@ -239,6 +241,8 @@ ldv eval sample <id> --row <index> Read one full sample (the conve
|
|
|
239
241
|
|
|
240
242
|
Notes:
|
|
241
243
|
|
|
244
|
+
- `-f`/`--filter` is the unified column filter — same syntax as `preview` and `datasets rows` (see Filtering above).
|
|
245
|
+
- `--correct` / `--incorrect` / `--missing` are convenience flags for the canonical correctness filter (mutually exclusive). They AND with any `-f` filters, `--search`, and `--error-type`.
|
|
242
246
|
- `--search` matches a substring on the prompt **or** response column (either hit counts). Override the searched columns with `--search-columns a,b`.
|
|
243
247
|
- `--error-type` values come from the `error_field` / `error_distribution` reported by `eval stats`.
|
|
244
248
|
- Use the `index` from `eval samples` directly as `eval sample --row <index>`.
|
|
@@ -248,7 +252,8 @@ Typical analysis loop:
|
|
|
248
252
|
```bash
|
|
249
253
|
ldv eval list --workspace <id> # find the eval dataset
|
|
250
254
|
ldv eval stats <id> # accuracy + where the errors cluster
|
|
251
|
-
ldv eval samples <id> --
|
|
255
|
+
ldv eval samples <id> --incorrect --limit 20 # pull the misses
|
|
256
|
+
ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" # misses that ran long
|
|
252
257
|
ldv eval sample <id> --row 42 # read one failure in full
|
|
253
258
|
```
|
|
254
259
|
|
|
@@ -124,7 +124,8 @@ ldv datasets create --workspace <id> --hf-bucket <org/bucket> --key <path-or-glo
|
|
|
124
124
|
From an HF storage bucket (e.g. --key 'data/*.parquet')
|
|
125
125
|
ldv datasets sync <id> Trigger sync (HF repo, S3, or HF bucket)
|
|
126
126
|
ldv datasets schema <id> Show column schema
|
|
127
|
-
ldv datasets rows <id> [--limit N] [--offset N]
|
|
127
|
+
ldv datasets rows <id> [-f "col<op>value"] [--columns a,b] [--limit N] [--offset N]
|
|
128
|
+
Fetch rows (-f/--filter: same syntax everywhere)
|
|
128
129
|
ldv datasets delete <id> Delete dataset
|
|
129
130
|
ldv datasets push <id> Push to HuggingFace
|
|
130
131
|
ldv datasets push-status <id> [--job <id>] Check push job status
|
|
@@ -158,10 +159,11 @@ ldv preview <src> --offset N Start at row index N
|
|
|
158
159
|
ldv preview <src> --title "<title>" Title shown in the viewer header
|
|
159
160
|
```
|
|
160
161
|
|
|
161
|
-
**Filtering (`--filter`/`-f`)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
162
|
+
**Filtering (`--filter`/`-f`) — one syntax everywhere.** The same flag and syntax
|
|
163
|
+
work on `preview`, `datasets rows`, and `eval samples`. Show only matching rows —
|
|
164
|
+
`preview` also filters local files (client-side); platform datasets filter
|
|
165
|
+
server-side. Repeatable; filters AND together; string match is case-insensitive.
|
|
166
|
+
Operators: `=`, `!=`, `~` (contains), `>`, `<`, `>=`, `<=`.
|
|
165
167
|
|
|
166
168
|
```
|
|
167
169
|
ldv preview <dataset-id> -f "domain=telecom"
|
|
@@ -212,7 +214,7 @@ ldv eval list [--workspace <id>] List eval datasets only
|
|
|
212
214
|
workspace, lists only evals you own.
|
|
213
215
|
ldv eval correctness <id> Fast accuracy + correct/incorrect/missing counts
|
|
214
216
|
ldv eval stats <id> Accuracy + error-type distribution + token stats
|
|
215
|
-
ldv eval samples <id> [--
|
|
217
|
+
ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
|
|
216
218
|
[--search <text>] [--error-type <value>]
|
|
217
219
|
[--columns a,b] [--limit N] [--offset N]
|
|
218
220
|
Slice the dataset for error analysis. Filters
|
|
@@ -223,6 +225,8 @@ ldv eval sample <id> --row <index> Read one full sample (the conve
|
|
|
223
225
|
|
|
224
226
|
Notes:
|
|
225
227
|
|
|
228
|
+
- `-f`/`--filter` is the unified column filter — same syntax as `preview` and `datasets rows` (see Filtering above).
|
|
229
|
+
- `--correct` / `--incorrect` / `--missing` are convenience flags for the canonical correctness filter (mutually exclusive). They AND with any `-f` filters, `--search`, and `--error-type`.
|
|
226
230
|
- `--search` matches a substring on the prompt **or** response column (either hit counts). Override the searched columns with `--search-columns a,b`.
|
|
227
231
|
- `--error-type` values come from the `error_field` / `error_distribution` reported by `eval stats`.
|
|
228
232
|
- Use the `index` from `eval samples` directly as `eval sample --row <index>`.
|
|
@@ -232,7 +236,8 @@ Typical analysis loop:
|
|
|
232
236
|
```bash
|
|
233
237
|
ldv eval list --workspace <id> # find the eval dataset
|
|
234
238
|
ldv eval stats <id> # accuracy + where the errors cluster
|
|
235
|
-
ldv eval samples <id> --
|
|
239
|
+
ldv eval samples <id> --incorrect --limit 20 # pull the misses
|
|
240
|
+
ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" # misses that ran long
|
|
236
241
|
ldv eval sample <id> --row 42 # read one failure in full
|
|
237
242
|
```
|
|
238
243
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import sys
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Annotated, Optional
|
|
4
|
+
from typing import Annotated, List, Optional
|
|
5
5
|
|
|
6
6
|
import typer
|
|
7
7
|
|
|
@@ -10,6 +10,7 @@ from .._group import AliasGroup
|
|
|
10
10
|
from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
|
|
11
11
|
from ..api import ApiClient
|
|
12
12
|
from ..config import _env
|
|
13
|
+
from ..filters import FILTER_HELP, parse_filters, to_api_filters
|
|
13
14
|
from ..output import print_error, print_grouped_tables, print_json, print_table
|
|
14
15
|
from ..util import q
|
|
15
16
|
|
|
@@ -339,15 +340,28 @@ def profile_cmd(
|
|
|
339
340
|
@app.command("rows")
|
|
340
341
|
def rows(
|
|
341
342
|
id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
343
|
+
filter_: Annotated[Optional[List[str]], typer.Option("--filter", "-f", help=FILTER_HELP)] = None,
|
|
344
|
+
columns: Annotated[
|
|
345
|
+
Optional[str], typer.Option("--columns", help="Comma-separated columns to project")
|
|
346
|
+
] = None,
|
|
342
347
|
limit: Annotated[str, typer.Option("--limit", help="Number of rows")] = "20",
|
|
343
348
|
offset: Annotated[str, typer.Option("--offset", help="Row offset")] = "0",
|
|
344
349
|
json_out: JsonOpt = False,
|
|
345
350
|
profile: ProfileOpt = None,
|
|
346
351
|
api_url: ApiUrlOpt = None,
|
|
347
352
|
) -> None:
|
|
348
|
-
"""Get dataset rows."""
|
|
353
|
+
"""Get dataset rows, optionally filtered (see --filter)."""
|
|
349
354
|
client = ApiClient(profile=profile, api_url=api_url)
|
|
350
|
-
|
|
355
|
+
params = {"limit": limit, "offset": offset}
|
|
356
|
+
if columns:
|
|
357
|
+
params["columns"] = str(columns)
|
|
358
|
+
api_filters = to_api_filters(parse_filters(filter_))
|
|
359
|
+
if api_filters:
|
|
360
|
+
data = client.post(
|
|
361
|
+
f"/v1/datasets/{q(id)}/rows/filter", json={"filters": api_filters}, params=params
|
|
362
|
+
).json()
|
|
363
|
+
else:
|
|
364
|
+
data = client.get(f"/v1/datasets/{q(id)}/rows", params=params).json()
|
|
351
365
|
if json_out:
|
|
352
366
|
print_json(data)
|
|
353
367
|
return
|
|
@@ -12,6 +12,7 @@ from .._group import AliasGroup
|
|
|
12
12
|
from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
|
|
13
13
|
from ..api import ApiClient
|
|
14
14
|
from ..config import _env
|
|
15
|
+
from ..filters import FILTER_HELP, parse_filters, to_api_filters
|
|
15
16
|
from ..output import print_error, print_json, print_table
|
|
16
17
|
from ..util import q
|
|
17
18
|
|
|
@@ -238,7 +239,10 @@ def failures(
|
|
|
238
239
|
@app.command("samples")
|
|
239
240
|
def samples(
|
|
240
241
|
id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
241
|
-
filter_: Annotated[str, typer.Option("--filter", help=
|
|
242
|
+
filter_: Annotated[Optional[List[str]], typer.Option("--filter", "-f", help=FILTER_HELP)] = None,
|
|
243
|
+
correct: Annotated[bool, typer.Option("--correct", help="Only correct samples")] = False,
|
|
244
|
+
incorrect: Annotated[bool, typer.Option("--incorrect", help="Only incorrect samples")] = False,
|
|
245
|
+
missing: Annotated[bool, typer.Option("--missing", help="Only samples with no verdict")] = False,
|
|
242
246
|
search: Annotated[Optional[str], typer.Option("--search", help="Substring match on prompt OR response column")] = None,
|
|
243
247
|
search_columns: Annotated[Optional[str], typer.Option("--search-columns", help="Override which columns --search matches (comma-separated)")] = None,
|
|
244
248
|
error_type: Annotated[Optional[str], typer.Option("--error-type", help="Filter to samples whose error field equals <value>")] = None,
|
|
@@ -249,14 +253,17 @@ def samples(
|
|
|
249
253
|
profile: ProfileOpt = None,
|
|
250
254
|
api_url: ApiUrlOpt = None,
|
|
251
255
|
) -> None:
|
|
252
|
-
"""List samples filtered by
|
|
256
|
+
"""List eval samples filtered by --filter / --correct / --incorrect / --missing / --search / --error-type."""
|
|
253
257
|
client = ApiClient(profile=profile, api_url=api_url)
|
|
254
|
-
filters: List[dict] =
|
|
258
|
+
filters: List[dict] = to_api_filters(parse_filters(filter_))
|
|
255
259
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
260
|
+
# --correct / --incorrect / --missing are convenience flags for the canonical
|
|
261
|
+
# correctness filter (server-side reconciliation). Mutually exclusive.
|
|
262
|
+
chosen = [name for name, on in (("correct", correct), ("incorrect", incorrect), ("missing", missing)) if on]
|
|
263
|
+
if len(chosen) > 1:
|
|
264
|
+
print_error("--correct, --incorrect and --missing are mutually exclusive.", "bad_filter")
|
|
259
265
|
raise typer.Exit(1)
|
|
266
|
+
correctness = chosen[0] if chosen else None
|
|
260
267
|
|
|
261
268
|
if search:
|
|
262
269
|
if search_columns:
|
|
@@ -287,8 +294,8 @@ def samples(
|
|
|
287
294
|
params = {"limit": limit, "offset": offset}
|
|
288
295
|
if columns:
|
|
289
296
|
params["columns"] = str(columns)
|
|
290
|
-
if
|
|
291
|
-
params["correctness"] =
|
|
297
|
+
if correctness:
|
|
298
|
+
params["correctness"] = correctness
|
|
292
299
|
|
|
293
300
|
data = client.post(f"/v1/datasets/{q(id)}/rows/filter", json={"filters": filters}, params=params).json()
|
|
294
301
|
if json_out:
|
|
@@ -84,7 +84,8 @@ A workspace is the top-level container for datasets, spec docs, and members.
|
|
|
84
84
|
ldv datasets schema <id> # Column names + types
|
|
85
85
|
ldv datasets profile <id> # Per-column nulls/cardinality/numeric stats/top values + content token stats
|
|
86
86
|
# [--full-content] exact content scan (slow) [--skip-content] omit it
|
|
87
|
-
ldv datasets rows <id> [--limit N] [--offset N]
|
|
87
|
+
ldv datasets rows <id> [-f "col<op>value" ...] [--columns a,b] [--limit N] [--offset N]
|
|
88
|
+
# -f/--filter is the same syntax everywhere (see Filtering below)
|
|
88
89
|
ldv datasets delete <id>
|
|
89
90
|
ldv datasets push <id> # Push edits back to HuggingFace
|
|
90
91
|
ldv datasets push-status <id> [--job <job-id>]
|
|
@@ -116,13 +117,17 @@ repeatable), -f/--filter (filter rows; see below), -n/--limit (page size when
|
|
|
116
117
|
paging a platform dataset), --offset (start row index), --title, --hf, --split,
|
|
117
118
|
--workspace, --profile, --api-url.
|
|
118
119
|
|
|
119
|
-
Filtering: -f/--filter "col<op>value" shows only matching
|
|
120
|
-
|
|
121
|
-
together; string compare is
|
|
122
|
-
~ (contains), >, <, >=, <=.
|
|
120
|
+
Filtering (one syntax everywhere): -f/--filter "col<op>value" shows only matching
|
|
121
|
+
rows. The SAME flag and syntax work on `preview`, `datasets rows`, and
|
|
122
|
+
`eval samples`. Repeatable; filters AND together; string compare is
|
|
123
|
+
case-insensitive. Operators: = (eq), != (ne), ~ (contains), >, <, >=, <=.
|
|
124
|
+
For `preview` it also runs on local files (client-side); on platform datasets all
|
|
125
|
+
three filter server-side via POST /v1/datasets/{id}/rows/filter.
|
|
123
126
|
|
|
124
127
|
ldv preview <dataset-id> -f "domain=telecom" -f "reward>=0.8"
|
|
125
128
|
ldv preview data.jsonl -f "model~lfm"
|
|
129
|
+
ldv datasets rows <id> -f "lang=en" -f "score<0.5"
|
|
130
|
+
ldv eval samples <id> -f "reasoning_tokens>30000" --incorrect
|
|
126
131
|
|
|
127
132
|
Navigation: two modes toggled with m — pager (one sample at a time; ←/→ or
|
|
128
133
|
n/b switch samples, ↑/↓/j/k scroll) and scroll (all samples; n/b jump between
|
|
@@ -160,14 +165,18 @@ primitives for error analysis — YOU do the reasoning over what they return.
|
|
|
160
165
|
# missing think tags 80 ██████████████ 40.0%
|
|
161
166
|
# If no failure_analysis column exists, prints a clear
|
|
162
167
|
# message and exits 0. Use --json for the raw API response.
|
|
163
|
-
ldv eval samples <id> [
|
|
164
|
-
[--error-type <value>] [--columns a,b]
|
|
168
|
+
ldv eval samples <id> [-f "col<op>value" ...] [--correct|--incorrect|--missing]
|
|
169
|
+
[--search <text>] [--error-type <value>] [--columns a,b]
|
|
170
|
+
[--limit N] [--offset N]
|
|
165
171
|
# Slice the dataset for error analysis. Filters AND
|
|
166
172
|
# together. Prints an 'index' column per row.
|
|
167
173
|
ldv eval sample <id> --row <index> # Read one full sample (the conversation) by the
|
|
168
174
|
# 'index' from `eval samples`
|
|
169
175
|
|
|
170
176
|
Notes:
|
|
177
|
+
- -f/--filter is the unified column filter (same syntax as preview / datasets rows; see Filtering).
|
|
178
|
+
- --correct / --incorrect / --missing are convenience flags for the canonical correctness filter
|
|
179
|
+
(mutually exclusive). They AND with any -f filters and --search / --error-type.
|
|
171
180
|
- --search matches a substring on the prompt OR response column (either one matching is a hit).
|
|
172
181
|
- --error-type values come from the `error_field` / `error_distribution` in `eval stats`.
|
|
173
182
|
- Use the 'index' from `eval samples` directly as `eval sample --row <index>`.
|
|
@@ -295,8 +304,9 @@ never goes stale.
|
|
|
295
304
|
# (mode_distribution: name/count/rate per mode)
|
|
296
305
|
ldv eval stats <id> --json # accuracy + error_distribution_incorrect
|
|
297
306
|
# = the common errors AMONG the misses
|
|
298
|
-
ldv eval samples <id> --
|
|
299
|
-
ldv eval samples <id> --
|
|
307
|
+
ldv eval samples <id> --incorrect --json # pull the misses
|
|
308
|
+
ldv eval samples <id> --incorrect --error-type <value> --json # focus one failure mode
|
|
309
|
+
ldv eval samples <id> --incorrect -f "reasoning_tokens>30000" --json # misses that ran long
|
|
300
310
|
ldv eval sample <id> --row <index> --json # read the full conversation of a miss
|
|
301
311
|
# Then synthesize the common pattern across the misses yourself — the commands give you
|
|
302
312
|
# the data (counts, slices, conversations); the analysis is your job.
|
|
@@ -20,6 +20,7 @@ import typer
|
|
|
20
20
|
|
|
21
21
|
from .._opts import ApiUrlOpt, ProfileOpt
|
|
22
22
|
from ..api import ApiClient
|
|
23
|
+
from ..filters import FILTER_HELP, parse_filters, row_matches, to_api_filters
|
|
23
24
|
from ..output import print_error
|
|
24
25
|
from ..util import q
|
|
25
26
|
|
|
@@ -759,67 +760,6 @@ def _choose_workspace(client: ApiClient, tui_mod) -> Optional[str]:
|
|
|
759
760
|
return choice
|
|
760
761
|
|
|
761
762
|
|
|
762
|
-
# --------------------------------------------------------------------------
|
|
763
|
-
# Row filtering (--filter "col<op>value")
|
|
764
|
-
# --------------------------------------------------------------------------
|
|
765
|
-
|
|
766
|
-
# Maps each CLI symbol to the platform filter API's operator name (the same
|
|
767
|
-
# names work server-side and locally). _parse_filters picks the earliest operator
|
|
768
|
-
# (longest on a tie), so list order doesn't affect correctness.
|
|
769
|
-
_FILTER_OPS = [(">=", "gte"), ("<=", "lte"), ("!=", "ne"), ("~", "contains"), ("=", "eq"), (">", "gt"), ("<", "lt")]
|
|
770
|
-
_NUMERIC_OPS = {"gt": lambda c, v: c > v, "lt": lambda c, v: c < v, "gte": lambda c, v: c >= v, "lte": lambda c, v: c <= v}
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
def _parse_filters(specs: Optional[List[str]]) -> List[tuple]:
|
|
774
|
-
"""Parse ['col=value', 'reward>=0.5', 'name~kod'] → [(col, op, value), ...].
|
|
775
|
-
|
|
776
|
-
Splits on the EARLIEST operator (longest on a tie, so 'reward>=5' is gte not
|
|
777
|
-
gt), keeping operator chars in the value intact (e.g. 'q=a>b' → col 'q', value
|
|
778
|
-
'a>b'). Rejects an empty column or value."""
|
|
779
|
-
out: List[tuple] = []
|
|
780
|
-
for spec in specs or []:
|
|
781
|
-
chosen = None # (index, symbol, op_name)
|
|
782
|
-
for sym, op in _FILTER_OPS:
|
|
783
|
-
i = spec.find(sym)
|
|
784
|
-
if i > 0 and (chosen is None or i < chosen[0] or (i == chosen[0] and len(sym) > len(chosen[1]))):
|
|
785
|
-
chosen = (i, sym, op)
|
|
786
|
-
if chosen is None:
|
|
787
|
-
print_error(
|
|
788
|
-
f"Invalid --filter '{spec}'. Use col=value, col!=value, col~text, or col>/</>=/<= N.",
|
|
789
|
-
"bad_filter",
|
|
790
|
-
)
|
|
791
|
-
raise typer.Exit(1)
|
|
792
|
-
i, sym, op = chosen
|
|
793
|
-
col, val = spec[:i].strip(), spec[i + len(sym):].strip()
|
|
794
|
-
if not col or not val:
|
|
795
|
-
print_error(f"Invalid --filter '{spec}': both a column and a value are required.", "bad_filter")
|
|
796
|
-
raise typer.Exit(1)
|
|
797
|
-
out.append((col, op, val))
|
|
798
|
-
return out
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
def _cell_matches(cell: object, op: str, val: str) -> bool:
|
|
802
|
-
if op == "contains":
|
|
803
|
-
return cell is not None and val.lower() in str(cell).lower()
|
|
804
|
-
if op in ("eq", "ne"):
|
|
805
|
-
equal = cell is not None and str(cell).strip().lower() == val.strip().lower()
|
|
806
|
-
return equal if op == "eq" else not equal
|
|
807
|
-
try:
|
|
808
|
-
return _NUMERIC_OPS[op](float(cell), float(val)) # gt/lt/gte/lte
|
|
809
|
-
except (TypeError, ValueError):
|
|
810
|
-
return False
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
def _row_matches(row: object, filters: List[tuple]) -> bool:
|
|
814
|
-
"""Client-side predicate (local files). A non-dict row can't match a column
|
|
815
|
-
filter. All filters AND together."""
|
|
816
|
-
if not filters:
|
|
817
|
-
return True
|
|
818
|
-
if not isinstance(row, dict):
|
|
819
|
-
return False
|
|
820
|
-
return all(_cell_matches(row.get(col), op, val) for col, op, val in filters)
|
|
821
|
-
|
|
822
|
-
|
|
823
763
|
# --------------------------------------------------------------------------
|
|
824
764
|
# Command
|
|
825
765
|
# --------------------------------------------------------------------------
|
|
@@ -835,10 +775,7 @@ def preview(
|
|
|
835
775
|
offset: Annotated[int, typer.Option("--offset", help="Start at this row index")] = 0,
|
|
836
776
|
filter_: Annotated[
|
|
837
777
|
Optional[List[str]],
|
|
838
|
-
typer.Option(
|
|
839
|
-
"--filter", "-f",
|
|
840
|
-
help="Filter rows: 'col=value', 'col!=value', 'col~text' (contains), or 'col>/</>=/<= N'. Repeatable (AND).",
|
|
841
|
-
),
|
|
778
|
+
typer.Option("--filter", "-f", help=FILTER_HELP),
|
|
842
779
|
] = None,
|
|
843
780
|
title: Annotated[Optional[str], typer.Option("--title", help="Title shown in the viewer header")] = None,
|
|
844
781
|
hf: Annotated[
|
|
@@ -869,7 +806,7 @@ def preview(
|
|
|
869
806
|
print_error("The terminal viewer requires 'textual'. Install it: pip install textual", "missing_textual")
|
|
870
807
|
raise typer.Exit(1)
|
|
871
808
|
|
|
872
|
-
filters =
|
|
809
|
+
filters = parse_filters(filter_)
|
|
873
810
|
local_path = Path(source)
|
|
874
811
|
is_local = (not hf) and local_path.exists() and local_path.is_file()
|
|
875
812
|
|
|
@@ -877,7 +814,7 @@ def preview(
|
|
|
877
814
|
if is_local:
|
|
878
815
|
rows = _load_local(local_path)
|
|
879
816
|
if filters:
|
|
880
|
-
rows = [r for r in rows if
|
|
817
|
+
rows = [r for r in rows if row_matches(r, filters)]
|
|
881
818
|
if not rows:
|
|
882
819
|
print_error("No rows match the filter(s).", "no_match")
|
|
883
820
|
raise typer.Exit(3)
|
|
@@ -909,7 +846,7 @@ def preview(
|
|
|
909
846
|
view_title = title or f"dataset {source}"
|
|
910
847
|
|
|
911
848
|
page_size = limit if limit and limit > 0 else 25
|
|
912
|
-
api_filters =
|
|
849
|
+
api_filters = to_api_filters(filters)
|
|
913
850
|
|
|
914
851
|
def _fetch_page(off: int, lim: int) -> List[object]:
|
|
915
852
|
params = {"limit": str(lim), "offset": str(offset + off)}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Shared row-filter syntax for `preview`, `datasets rows`, and `eval samples`.
|
|
2
|
+
|
|
3
|
+
One filtering language across the CLI: `--filter "col<op>value"` (repeatable, AND).
|
|
4
|
+
The operator symbols map to the platform filter API's operator names, which work
|
|
5
|
+
both server-side (`POST /v1/datasets/{id}/rows/filter`) and locally (preview's
|
|
6
|
+
client-side matcher for local files).
|
|
7
|
+
"""
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
|
|
10
|
+
import typer
|
|
11
|
+
|
|
12
|
+
from .output import print_error
|
|
13
|
+
|
|
14
|
+
# Shown in each command's --filter help so the syntax is documented in one place.
|
|
15
|
+
FILTER_HELP = (
|
|
16
|
+
"Filter rows: 'col=value', 'col!=value', 'col~text' (contains), "
|
|
17
|
+
"or 'col>/</>=/<= N'. Repeatable (AND)."
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Maps each CLI symbol to the platform filter API's operator name. parse_filters
|
|
21
|
+
# picks the earliest operator (longest on a tie), so list order doesn't affect
|
|
22
|
+
# correctness.
|
|
23
|
+
_FILTER_OPS = [
|
|
24
|
+
(">=", "gte"),
|
|
25
|
+
("<=", "lte"),
|
|
26
|
+
("!=", "neq"),
|
|
27
|
+
("~", "contains"),
|
|
28
|
+
("=", "eq"),
|
|
29
|
+
(">", "gt"),
|
|
30
|
+
("<", "lt"),
|
|
31
|
+
]
|
|
32
|
+
_NUMERIC_OPS = {
|
|
33
|
+
"gt": lambda c, v: c > v,
|
|
34
|
+
"lt": lambda c, v: c < v,
|
|
35
|
+
"gte": lambda c, v: c >= v,
|
|
36
|
+
"lte": lambda c, v: c <= v,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def parse_filters(specs: Optional[List[str]]) -> List[tuple]:
|
|
41
|
+
"""Parse ['col=value', 'reward>=0.5', 'name~kod'] → [(col, op, value), ...].
|
|
42
|
+
|
|
43
|
+
Splits on the EARLIEST operator (longest on a tie, so 'reward>=5' is gte not
|
|
44
|
+
gt), keeping operator chars in the value intact (e.g. 'q=a>b' → col 'q', value
|
|
45
|
+
'a>b'). Rejects an empty column or value."""
|
|
46
|
+
out: List[tuple] = []
|
|
47
|
+
for spec in specs or []:
|
|
48
|
+
chosen = None # (index, symbol, op_name)
|
|
49
|
+
for sym, op in _FILTER_OPS:
|
|
50
|
+
i = spec.find(sym)
|
|
51
|
+
if i > 0 and (
|
|
52
|
+
chosen is None
|
|
53
|
+
or i < chosen[0]
|
|
54
|
+
or (i == chosen[0] and len(sym) > len(chosen[1]))
|
|
55
|
+
):
|
|
56
|
+
chosen = (i, sym, op)
|
|
57
|
+
if chosen is None:
|
|
58
|
+
print_error(
|
|
59
|
+
f"Invalid --filter '{spec}'. Use col=value, col!=value, col~text, or col>/</>=/<= N.",
|
|
60
|
+
"bad_filter",
|
|
61
|
+
)
|
|
62
|
+
raise typer.Exit(1)
|
|
63
|
+
i, sym, op = chosen
|
|
64
|
+
col, val = spec[:i].strip(), spec[i + len(sym) :].strip()
|
|
65
|
+
if not col or not val:
|
|
66
|
+
print_error(
|
|
67
|
+
f"Invalid --filter '{spec}': both a column and a value are required.",
|
|
68
|
+
"bad_filter",
|
|
69
|
+
)
|
|
70
|
+
raise typer.Exit(1)
|
|
71
|
+
out.append((col, op, val))
|
|
72
|
+
return out
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def to_api_filters(parsed: List[tuple]) -> List[dict]:
|
|
76
|
+
"""[(col, op, val), ...] → the `filters` payload for POST /rows/filter."""
|
|
77
|
+
return [{"column": col, "operator": op, "value": val} for col, op, val in parsed]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def cell_matches(cell: object, op: str, val: str) -> bool:
|
|
81
|
+
if op == "contains":
|
|
82
|
+
return cell is not None and val.lower() in str(cell).lower()
|
|
83
|
+
if op in ("eq", "neq"):
|
|
84
|
+
equal = cell is not None and str(cell).strip().lower() == val.strip().lower()
|
|
85
|
+
return equal if op == "eq" else not equal
|
|
86
|
+
try:
|
|
87
|
+
return _NUMERIC_OPS[op](float(cell), float(val)) # gt/lt/gte/lte
|
|
88
|
+
except (TypeError, ValueError):
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def row_matches(row: object, filters: List[tuple]) -> bool:
|
|
93
|
+
"""Client-side predicate (local files). A non-dict row can't match a column
|
|
94
|
+
filter. All filters AND together."""
|
|
95
|
+
if not filters:
|
|
96
|
+
return True
|
|
97
|
+
if not isinstance(row, dict):
|
|
98
|
+
return False
|
|
99
|
+
return all(cell_matches(row.get(col), op, val) for col, op, val in filters)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|