lql-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lql/commands/evals.py ADDED
@@ -0,0 +1,285 @@
1
+ import json
2
+ import math
3
+ import os
4
+ import sys
5
+ from typing import Annotated, List, Optional
6
+
7
+ import typer
8
+
9
+ from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
10
+ from ..api import ApiClient
11
+ from ..output import print_error, print_json, print_table
12
+ from ..util import q
13
+
14
+ app = typer.Typer(help="Inspect and analyze eval datasets (accuracy, failure modes, samples)")
15
+
16
+ # Mirrors front/src/lib/eval-dataset.ts so `eval samples --search` searches the
17
+ # same prompt/response columns the eval views do.
18
+ PROMPT_KEYS = ["prompt", "messages", "conversation", "input"]
19
+ RESPONSE_KEYS = ["response", "output", "completion", "generation"]
20
+
21
+
22
+ def _eval_search_columns(keys: List[str]) -> List[str]:
23
+ s = set(keys)
24
+ prompt = next((k for k in PROMPT_KEYS if k in s), None)
25
+ response = next((k for k in RESPONSE_KEYS if k in s), None)
26
+ return [k for k in (prompt, response) if k]
27
+
28
+
29
+ def _truncate(v: object) -> str:
30
+ s = json.dumps(v) if isinstance(v, (dict, list)) else ("" if v is None else str(v))
31
+ return s[:77] + "..." if len(s) > 80 else s
32
+
33
+
34
+ def _fmt_accuracy(acc: object) -> str:
35
+ try:
36
+ n = float(acc)
37
+ except (TypeError, ValueError):
38
+ return "—"
39
+ if acc is None or not math.isfinite(n):
40
+ return "—"
41
+ return f"{n * 100:.1f}%"
42
+
43
+
44
+ @app.command("list")
45
+ def list_evals(
46
+ workspace: Annotated[Optional[str], typer.Option("--workspace", help="Workspace (defaults to LQL_EVAL_WORKSPACE)")] = None,
47
+ json_out: JsonOpt = False,
48
+ profile: ProfileOpt = None,
49
+ api_url: ApiUrlOpt = None,
50
+ ) -> None:
51
+ """List eval datasets (those detected as evaluation-run output)."""
52
+ client = ApiClient(profile=profile, api_url=api_url)
53
+ ws = workspace or os.environ.get("LQL_EVAL_WORKSPACE")
54
+ params = {"is_eval": "true"}
55
+ if ws:
56
+ params["workspace_id"] = ws
57
+ else:
58
+ sys.stderr.write(
59
+ "Note: listing only evals you own — set LQL_EVAL_WORKSPACE or pass --workspace "
60
+ "to list the shared eval workspace.\n"
61
+ )
62
+ items = client.get("/v1/datasets", params=params).json()
63
+ print_table(
64
+ ["ID", "Name", "Rows", "Source"],
65
+ [
66
+ [
67
+ d.get("id") or "",
68
+ d.get("display_name") or d.get("name") or "",
69
+ d.get("row_count") if d.get("row_count") is not None else "",
70
+ d.get("source_type") or "",
71
+ ]
72
+ for d in items
73
+ ],
74
+ json_out,
75
+ items,
76
+ )
77
+
78
+
79
+ @app.command("stats")
80
+ def stats(
81
+ id: Annotated[str, typer.Argument(help="Dataset ID")],
82
+ json_out: JsonOpt = False,
83
+ profile: ProfileOpt = None,
84
+ api_url: ApiUrlOpt = None,
85
+ ) -> None:
86
+ """Distribution stats: accuracy, error-type distribution, token stats."""
87
+ client = ApiClient(profile=profile, api_url=api_url)
88
+ s = client.get(f"/v1/datasets/{q(id)}/eval-stats").json()
89
+ if json_out:
90
+ print_json(s)
91
+ return
92
+ if s.get("skip_reason"):
93
+ sys.stdout.write(f"Stats unavailable: {s['skip_reason']}\n")
94
+ return
95
+ print_table(
96
+ ["Field", "Value"],
97
+ [
98
+ ["Accuracy", _fmt_accuracy(s.get("accuracy"))],
99
+ ["Total", s.get("total") if s.get("total") is not None else ""],
100
+ ["Correct", s.get("correct") if s.get("correct") is not None else ""],
101
+ ["Incorrect", s.get("incorrect") if s.get("incorrect") is not None else ""],
102
+ ["Missing", s.get("missing") if s.get("missing") is not None else ""],
103
+ ],
104
+ False,
105
+ [s],
106
+ )
107
+ dist = s.get("error_distribution") or []
108
+ if dist:
109
+ field = f" ({s['error_field']})" if s.get("error_field") else ""
110
+ sys.stdout.write(f"\nError distribution{field}:\n")
111
+ print_table(
112
+ ["Value", "Count"],
113
+ [[d.get("value") if d.get("value") is not None else "—", d.get("count") if d.get("count") is not None else ""] for d in dist],
114
+ False,
115
+ dist,
116
+ )
117
+ if s.get("error_distribution_truncated"):
118
+ sys.stdout.write("(distribution truncated — more values exist)\n")
119
+ dist_bad = s.get("error_distribution_incorrect") or []
120
+ if dist_bad:
121
+ sys.stdout.write("\nErrors among incorrect samples (the misses):\n")
122
+ print_table(
123
+ ["Value", "Count"],
124
+ [[d.get("value") if d.get("value") is not None else "—", d.get("count") if d.get("count") is not None else ""] for d in dist_bad],
125
+ False,
126
+ dist_bad,
127
+ )
128
+ if s.get("error_distribution_incorrect_truncated"):
129
+ sys.stdout.write("(distribution truncated — more values exist)\n")
130
+ tokens = s.get("token_stats") or []
131
+ if tokens:
132
+ sys.stdout.write("\nToken stats:\n")
133
+ print_table(
134
+ ["Field", "Min", "Mean", "P50", "P95", "Max"],
135
+ [
136
+ [
137
+ t.get("label") or t.get("field") or "",
138
+ t.get("min") if t.get("min") is not None else "",
139
+ t.get("mean") if t.get("mean") is not None else "",
140
+ t.get("p50") if t.get("p50") is not None else "",
141
+ t.get("p95") if t.get("p95") is not None else "",
142
+ t.get("max") if t.get("max") is not None else "",
143
+ ]
144
+ for t in tokens
145
+ ],
146
+ False,
147
+ tokens,
148
+ )
149
+
150
+
151
+ @app.command("correctness")
152
+ def correctness(
153
+ id: Annotated[str, typer.Argument(help="Dataset ID")],
154
+ json_out: JsonOpt = False,
155
+ profile: ProfileOpt = None,
156
+ api_url: ApiUrlOpt = None,
157
+ ) -> None:
158
+ """Fast correctness counts + accuracy."""
159
+ client = ApiClient(profile=profile, api_url=api_url)
160
+ c = client.get(f"/v1/datasets/{q(id)}/eval-correctness").json()
161
+ if json_out:
162
+ print_json(c)
163
+ return
164
+ print_table(
165
+ ["Field", "Value"],
166
+ [
167
+ ["Accuracy", _fmt_accuracy(c.get("accuracy"))],
168
+ ["Total", c.get("total") if c.get("total") is not None else ""],
169
+ ["Correct", c.get("correct") if c.get("correct") is not None else ""],
170
+ ["Incorrect", c.get("incorrect") if c.get("incorrect") is not None else ""],
171
+ ["Missing", c.get("missing") if c.get("missing") is not None else ""],
172
+ ],
173
+ False,
174
+ [c],
175
+ )
176
+
177
+
178
+ @app.command("samples")
179
+ def samples(
180
+ id: Annotated[str, typer.Argument(help="Dataset ID")],
181
+ filter_: Annotated[str, typer.Option("--filter", help="correct | incorrect | missing | all")] = "all",
182
+ search: Annotated[Optional[str], typer.Option("--search", help="Substring match on prompt OR response column")] = None,
183
+ search_columns: Annotated[Optional[str], typer.Option("--search-columns", help="Override which columns --search matches (comma-separated)")] = None,
184
+ error_type: Annotated[Optional[str], typer.Option("--error-type", help="Filter to samples whose error field equals <value>")] = None,
185
+ columns: Annotated[Optional[str], typer.Option("--columns", help="Comma-separated columns to project")] = None,
186
+ limit: Annotated[str, typer.Option("--limit", help="Number of rows")] = "20",
187
+ offset: Annotated[str, typer.Option("--offset", help="Row offset")] = "0",
188
+ json_out: JsonOpt = False,
189
+ profile: ProfileOpt = None,
190
+ api_url: ApiUrlOpt = None,
191
+ ) -> None:
192
+ """List samples filtered by correctness / search / error type (for error analysis)."""
193
+ client = ApiClient(profile=profile, api_url=api_url)
194
+ filters: List[dict] = []
195
+
196
+ kind = str(filter_ or "all").lower()
197
+ if kind not in ("all", "correct", "incorrect", "missing"):
198
+ print_error("--filter must be one of: correct, incorrect, missing, all", "bad_filter")
199
+ raise typer.Exit(1)
200
+
201
+ if search:
202
+ if search_columns:
203
+ search_cols = [c.strip() for c in str(search_columns).split(",") if c.strip()]
204
+ else:
205
+ schema = client.get(f"/v1/datasets/{q(id)}/schema").json()
206
+ names = [str(c.get("name") or "") for c in (schema.get("columns") or [])]
207
+ search_cols = _eval_search_columns(names)
208
+ if not search_cols:
209
+ print_error(
210
+ "No searchable prompt/response columns — pass --search-columns to choose which to search.",
211
+ "no_search_columns",
212
+ )
213
+ raise typer.Exit(1)
214
+ filters.append({"columns": search_cols, "operator": "contains", "value": str(search)})
215
+
216
+ if error_type:
217
+ stats_data = client.get(f"/v1/datasets/{q(id)}/eval-stats").json()
218
+ error_field = stats_data.get("error_field")
219
+ if stats_data.get("skip_reason") or not error_field:
220
+ print_error(
221
+ f"error-type filtering unavailable: {stats_data.get('skip_reason') or 'no error field discovered'}",
222
+ "no_error_field",
223
+ )
224
+ raise typer.Exit(1)
225
+ filters.append({"column": error_field, "operator": "eq", "value": str(error_type)})
226
+
227
+ params = {"limit": limit, "offset": offset}
228
+ if columns:
229
+ params["columns"] = str(columns)
230
+ if kind != "all":
231
+ params["correctness"] = kind
232
+
233
+ data = client.post(f"/v1/datasets/{q(id)}/rows/filter", json={"filters": filters}, params=params).json()
234
+ if json_out:
235
+ print_json(data)
236
+ return
237
+ rows_data = data.get("rows") or []
238
+ indices = data.get("matched_indices") or []
239
+ sys.stdout.write(f"{data.get('total_matched', len(rows_data))} matched\n")
240
+ if not rows_data:
241
+ sys.stdout.write("No rows.\n")
242
+ return
243
+ keys = list(rows_data[0].keys())
244
+ print_table(
245
+ ["index", *keys],
246
+ [
247
+ [str(indices[i]) if i < len(indices) else "", *[_truncate(r.get(k)) for k in keys]]
248
+ for i, r in enumerate(rows_data)
249
+ ],
250
+ False,
251
+ rows_data,
252
+ )
253
+
254
+
255
+ @app.command("sample")
256
+ def sample(
257
+ id: Annotated[str, typer.Argument(help="Dataset ID")],
258
+ row: Annotated[str, typer.Option("--row", help="Dataset row index")],
259
+ json_out: JsonOpt = False,
260
+ profile: ProfileOpt = None,
261
+ api_url: ApiUrlOpt = None,
262
+ ) -> None:
263
+ """Read one full sample by its dataset row index (from `eval samples`)."""
264
+ try:
265
+ row_idx = int(row)
266
+ bad = row_idx < 0
267
+ except ValueError:
268
+ bad = True
269
+ if bad:
270
+ print_error("--row must be a non-negative integer (a dataset row index).", "bad_row")
271
+ raise typer.Exit(1)
272
+
273
+ client = ApiClient(profile=profile, api_url=api_url)
274
+ data = client.get(f"/v1/datasets/{q(id)}/rows", params={"offset": row_idx, "limit": 1}).json()
275
+ rows_data = data.get("rows") or []
276
+ row_obj = rows_data[0] if rows_data else None
277
+ if not row_obj:
278
+ print_error(f"No row at index {row}.", "not_found")
279
+ raise typer.Exit(3)
280
+ if json_out:
281
+ print_json(row_obj)
282
+ return
283
+ for k, v in row_obj.items():
284
+ s = json.dumps(v, indent=2) if isinstance(v, (dict, list)) else ("" if v is None else str(v))
285
+ sys.stdout.write(f"## {k}\n{s}\n\n")
@@ -0,0 +1,89 @@
1
+ import sys
2
+ from typing import Annotated, Optional
3
+
4
+ import typer
5
+
6
+ from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
7
+ from ..api import ApiClient
8
+ from ..output import print_error, print_json, print_table
9
+ from ..sessions import resolve_session_id
10
+ from ..util import q
11
+
12
+ app = typer.Typer(help="Manage highlights")
13
+
14
+ SessionOpt = Annotated[Optional[str], typer.Option("--session", help="Target a specific review session (advanced)")]
15
+
16
+
17
+ @app.command("list")
18
+ def list_highlights(
19
+ dataset_id: Annotated[str, typer.Argument(help="Dataset ID")],
20
+ session: SessionOpt = None,
21
+ json_out: JsonOpt = False,
22
+ profile: ProfileOpt = None,
23
+ api_url: ApiUrlOpt = None,
24
+ ) -> None:
25
+ """List highlights for a dataset."""
26
+ client = ApiClient(profile=profile, api_url=api_url)
27
+ session_id = resolve_session_id(client, dataset_id, session)
28
+ items = client.get(f"/v1/sessions/{q(session_id)}/highlights").json()
29
+ print_table(
30
+ ["ID", "Row", "Column", "Span", "Text", "Issue"],
31
+ [
32
+ [
33
+ h.get("id") or "",
34
+ h.get("row_external_id") or "",
35
+ h.get("source_column") or "",
36
+ f"{h.get('start_offset', '?')}-{h.get('end_offset', '?')}",
37
+ str(h.get("highlighted_text") or "")[:40],
38
+ h.get("issue_id") or "",
39
+ ]
40
+ for h in items
41
+ ],
42
+ json_out,
43
+ items,
44
+ )
45
+
46
+
47
+ @app.command("add")
48
+ def add(
49
+ dataset_id: Annotated[str, typer.Argument(help="Dataset ID")],
50
+ row: Annotated[str, typer.Option("--row", help="Row external ID")],
51
+ column: Annotated[str, typer.Option("--column", help="Source column the span lives in")],
52
+ start: Annotated[str, typer.Option("--start", help="Start character offset")],
53
+ end: Annotated[str, typer.Option("--end", help="End character offset")],
54
+ text: Annotated[str, typer.Option("--text", help="The highlighted text span")],
55
+ issue: Annotated[Optional[str], typer.Option("--issue", help="Issue taxonomy ID to link")] = None,
56
+ color: Annotated[Optional[str], typer.Option("--color", help="Highlight color")] = None,
57
+ note: Annotated[Optional[str], typer.Option("--note", help="Note attached to the highlight")] = None,
58
+ session: SessionOpt = None,
59
+ json_out: JsonOpt = False,
60
+ profile: ProfileOpt = None,
61
+ api_url: ApiUrlOpt = None,
62
+ ) -> None:
63
+ """Add a text-span highlight to a dataset row."""
64
+ try:
65
+ start_i = int(start)
66
+ end_i = int(end)
67
+ except ValueError:
68
+ print_error("--start and --end must be integers", "invalid_offset")
69
+ raise typer.Exit(1)
70
+ client = ApiClient(profile=profile, api_url=api_url)
71
+ session_id = resolve_session_id(client, dataset_id, session)
72
+ body: dict = {
73
+ "row_external_id": row,
74
+ "source_column": column,
75
+ "start_offset": start_i,
76
+ "end_offset": end_i,
77
+ "highlighted_text": text,
78
+ }
79
+ if issue:
80
+ body["issue_id"] = issue
81
+ if color:
82
+ body["color"] = color
83
+ if note:
84
+ body["note"] = note
85
+ data = client.post(f"/v1/sessions/{q(session_id)}/highlights", json=body).json()
86
+ if json_out:
87
+ print_json(data)
88
+ else:
89
+ sys.stdout.write(f"Created highlight: {data.get('id', 'ok')}\n")
@@ -0,0 +1,248 @@
1
+ import sys
2
+
3
+ import typer
4
+
5
+ INSTRUCTIONS = r"""
6
+ # lql — Liquid Query Language CLI
7
+
8
+ CLI for the DataViewer platform. Gives agents and humans complete scriptable
9
+ control over workspaces, datasets, spec docs, annotations, and S3.
10
+
11
+ ## Authentication
12
+
13
+ lql login # Open browser → click Authorize → token stored automatically
14
+ lql logout # Revoke token and clear local config
15
+ lql whoami # Confirm current identity
16
+
17
+ Non-interactive (CI/agents): set LQL_API_KEY=<token> before any command.
18
+ Token is read from env first, then ~/.lql/config.json.
19
+
20
+ Config file: ~/.lql/config.json (mode 0600)
21
+ { "current_profile": "default", "profiles": { "default": { "token", "key_id", "api_url" } } }
22
+
23
+ Override API base URL: --api-url <url> or LQL_API_URL env var.
24
+ Use --profile <name> to switch between named credential sets.
25
+
26
+ ## Output
27
+
28
+ All commands accept --json for stable JSON output to stdout.
29
+ Errors always go to stderr as: { "error": "message", "code": "slug" }
30
+ Data always goes to stdout.
31
+
32
+ Exit codes:
33
+ 0 success
34
+ 1 usage / validation error
35
+ 2 auth error (no token, 401, 403)
36
+ 3 not found (404)
37
+ 4 conflict (409) — e.g. spec push version conflict
38
+ 5 server error (5xx)
39
+
40
+ Pagination: --limit N --offset N on list commands.
41
+
42
+ ## Workspaces
43
+
44
+ A workspace is the top-level container for datasets, spec docs, and members.
45
+
46
+ lql workspaces list
47
+ lql workspaces create <name>
48
+ lql workspaces show <id>
49
+ lql workspaces update <id> --name <new-name>
50
+ lql workspaces delete <id>
51
+ lql workspaces members list <workspace-id>
52
+ lql workspaces members add <workspace-id> <email>
53
+ lql workspaces members remove <workspace-id> <user-id>
54
+
55
+ ## Datasets
56
+
57
+ lql datasets list [--workspace <id>]
58
+ lql datasets show <id>
59
+ lql datasets create --workspace <id> --hf-repo <org/repo> [--name <display>] [--split <split>]
60
+ lql datasets create --workspace <id> --hf-bucket <org/bucket> --key <path-or-glob> [--name <display>]
61
+ # From an HF storage bucket (e.g. --key 'data/*.parquet'); syncs in background
62
+ lql datasets sync <id> # Re-fetch from HuggingFace, S3, or HF bucket
63
+ lql datasets schema <id> # Column names + types
64
+ lql datasets profile <id> # Per-column nulls/cardinality/numeric stats/top values + content token stats
65
+ # [--full-content] exact content scan (slow) [--skip-content] omit it
66
+ lql datasets rows <id> [--limit N] [--offset N]
67
+ lql datasets delete <id>
68
+ lql datasets push <id> # Push edits back to HuggingFace
69
+ lql datasets push-status <id> [--job <job-id>]
70
+
71
+ Upload a local file (uploads to HuggingFace liquid-ai org, then syncs):
72
+ LQL_HF_TOKEN=<token> lql datasets upload <file.parquet> --workspace <id> --name <repo-name>
73
+
74
+ ## Evals
75
+
76
+ Eval datasets (evaluation-run output: each row a sample with a model 'response'
77
+ + a 'correct' verdict) are detected automatically. These commands are the data
78
+ primitives for error analysis — YOU do the reasoning over what they return.
79
+
80
+ lql eval list [--workspace <id>] # Eval datasets only. Defaults to LQL_EVAL_WORKSPACE;
81
+ # without a workspace it lists only evals you own.
82
+ lql eval stats <id> # Accuracy + correctness counts + error-type
83
+ # distribution + token stats (the distribution view)
84
+ lql eval correctness <id> # Fast accuracy + correct/incorrect/missing counts
85
+ lql eval samples <id> [--filter correct|incorrect|missing|all] [--search <text>]
86
+ [--error-type <value>] [--columns a,b] [--limit N] [--offset N]
87
+ # Slice the dataset for error analysis. Filters AND
88
+ # together. Prints an 'index' column per row.
89
+ lql eval sample <id> --row <index> # Read one full sample (the conversation) by the
90
+ # 'index' from `eval samples`
91
+
92
+ Notes:
93
+ - --search matches a substring on the prompt OR response column (either one matching is a hit).
94
+ - --error-type values come from the `error_field` / `error_distribution` in `eval stats`.
95
+ - Use the 'index' from `eval samples` directly as `eval sample --row <index>`.
96
+
97
+ ## Row Edits
98
+
99
+ Edits are cell-level overrides layered on top of the source dataset.
100
+
101
+ lql edits list <dataset-id> [--limit N]
102
+ lql edits count <dataset-id>
103
+ lql edits add <dataset-id> --row <row-external-id> --column <col> --value <json>
104
+ lql edits delete <dataset-id> <edit-id>
105
+
106
+ Example: lql edits add abc123 --row row_0 --column label --value '"positive"'
107
+
108
+ ## Spec Docs
109
+
110
+ Each workspace has one spec doc (versioned markdown). Use pull/push to edit
111
+ it like a file. Conflicts are detected via base-version-id (exit code 4).
112
+
113
+ lql spec show [--workspace <id>] # print current doc to stdout
114
+ lql spec pull [--workspace <id>] [-o FILE] [--stdout] # writes SPEC.md by default
115
+ lql spec push [--workspace <id>] [--file SPEC.md] --message <msg> [--base-version-id <id>]
116
+ lql spec history [--workspace <id>]
117
+ lql spec diff [--workspace <id>] --version-id <id> [--compare-to <id>]
118
+ lql spec generate [--workspace <id>] # AI-generate from datasets
119
+
120
+ push auto-detects create-vs-update: with no existing doc it creates v1; otherwise
121
+ it commits on top of the current HEAD (auto-resolved unless --base-version-id is
122
+ given). --message is required. pull writes SPEC.md unless -o or --stdout is set.
123
+
124
+ Agentic edit loop:
125
+ lql spec pull --workspace <id> # writes ./SPEC.md
126
+ # modify SPEC.md
127
+ lql spec push --workspace <id> --message "Refine numeric rules"
128
+ # exit 4 means a conflict — pull again and re-apply
129
+
130
+ ## Annotations, highlights, issues, reports — all scoped to a dataset
131
+
132
+ These act directly on a dataset; the CLI resolves the dataset's review session
133
+ for you, so you never manage sessions by hand. (Advanced: pass --session <id>
134
+ to target a specific session for multi-pass review — a session id is returned in
135
+ the JSON of any annotation/highlight/report.)
136
+
137
+ ## Annotations
138
+
139
+ lql annotations list <dataset-id>
140
+ lql annotations add <dataset-id> --row <row-external-id> [--rating <number>] [--note <text>]
141
+
142
+ ## Highlights
143
+
144
+ Highlights mark specific text spans within a row.
145
+
146
+ lql highlights list <dataset-id>
147
+ lql highlights add <dataset-id> --row <id> --column <col> --start <n> --end <n> --text <span> [--issue <issue-id>] [--color <hex>] [--note <text>]
148
+
149
+ start/end are character offsets into the row's <column> value. Link a highlight to
150
+ an issue taxonomy entry with --issue (see "lql issues create <dataset-id>").
151
+
152
+ ## Issues
153
+
154
+ Issues are a per-dataset taxonomy (name/color) used to tag highlights.
155
+
156
+ lql issues list <dataset-id>
157
+ lql issues create <dataset-id> --name <str> [--description <str>] [--color <hex>]
158
+
159
+ ## Reports
160
+
161
+ lql reports list <dataset-id>
162
+ lql reports show <report-id>
163
+ lql reports create <dataset-id> --title <title> [--summary <text>]
164
+
165
+ ## Buckets
166
+
167
+ S3-compatible:
168
+ lql buckets list
169
+ lql buckets show <id>
170
+ lql buckets probe <id> # Verify connectivity + credentials
171
+ lql buckets objects <id> [--prefix <prefix>]
172
+ lql buckets attach <bucket-id> --workspace <id>
173
+ lql buckets detach <bucket-id> --workspace <id>
174
+
175
+ Hugging Face buckets (connect → add datasets; auth = your HF token):
176
+ lql buckets list-hf
177
+ lql buckets connect-hf <owner/bucket> --workspace <id> [--label <l>] [--hf-key <id>]
178
+ lql buckets create-dataset <bucket-id> --workspace <id> --key <path-or-glob> [--name <display>]
179
+
180
+ ## Skills (agent setup)
181
+
182
+ Install the lql skill so coding agents (Claude Code, Codex) know how to use lql.
183
+ The skill is a thin pointer that tells the agent to run `lql instructions`, so it
184
+ never goes stale.
185
+
186
+ lql skills install # both Claude Code + Codex, user-level (~/.claude, ~/.codex)
187
+ lql skills install --tool claude # or --tool codex
188
+ lql skills install --project # install into ./.claude and ./.codex instead
189
+ lql skills install --force # overwrite an existing skill file
190
+ lql skills uninstall # remove it
191
+
192
+ ## Common Agentic Workflows
193
+
194
+ ### Discover workspaces and datasets
195
+ lql workspaces list --json
196
+ lql datasets list --workspace <id> --json
197
+
198
+ ### Read dataset contents
199
+ lql datasets schema <id> --json
200
+ lql datasets rows <id> --limit 10 --json
201
+
202
+ ### Analyze a dataset (token distribution + quality)
203
+ lql datasets profile <id> --json # nulls/cardinality/numeric stats/top values per column
204
+ # + content_stats: char & ~token length p50/p95/max per text column
205
+ # content_stats is SAMPLED from the first shard by default (sampled=true; max_chars is sample-bound).
206
+ # For exact token counts over every row: lql datasets profile <id> --full-content --json
207
+ # Use this to judge context-window/truncation risk, spot all-null or single-value columns,
208
+ # and ground any narrative analysis in real numbers (don't eyeball a few rows).
209
+
210
+ ### Analyze an eval's failure modes
211
+ lql eval list --json # find the eval dataset
212
+ lql eval stats <id> --json # accuracy + error_distribution_incorrect
213
+ # = the common errors AMONG the misses
214
+ lql eval samples <id> --filter incorrect --json # pull the misses
215
+ lql eval samples <id> --filter incorrect --error-type <value> --json # focus one failure mode
216
+ lql eval sample <id> --row <index> --json # read the full conversation of a miss
217
+ # Then synthesize the common pattern across the misses yourself — the commands give you
218
+ # the data (counts, slices, conversations); the analysis is your job.
219
+
220
+ ### Edit a spec doc without conflicts
221
+ lql spec pull --workspace <id> -o /tmp/SPEC.md
222
+ # edit /tmp/SPEC.md
223
+ lql spec push --workspace <id> --file /tmp/SPEC.md --message "Refine rules"
224
+ # base version is auto-resolved; exit 4 means conflict — pull again and re-apply
225
+
226
+ ### Annotate dataset rows
227
+ lql annotations add <dataset-id> --row row_0 --rating 1 --note "correct"
228
+ lql annotations add <dataset-id> --row row_1 --rating 0 --note "wrong label"
229
+
230
+ ### Upload a new dataset from a local file
231
+ LQL_HF_TOKEN=hf_... lql datasets upload ./data.parquet \
232
+ --workspace <id> --name my-eval-run --json
233
+
234
+ ### Push edits to HuggingFace
235
+ lql datasets push <id> --json
236
+ lql datasets push-status <id> --json # poll until status != pending
237
+ """.strip()
238
+
239
+
240
+ def instructions() -> None:
241
+ sys.stdout.write(INSTRUCTIONS + "\n")
242
+
243
+
244
+ app = typer.Typer()
245
+ app.command(
246
+ "instructions",
247
+ help="Print a full reference for agents and humans (all commands, examples, workflows)",
248
+ )(instructions)
lql/commands/issues.py ADDED
@@ -0,0 +1,56 @@
1
+ import sys
2
+ from typing import Annotated, Optional
3
+
4
+ import typer
5
+
6
+ from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
7
+ from ..api import ApiClient
8
+ from ..output import print_json, print_table
9
+ from ..util import q
10
+
11
+ app = typer.Typer(help="Manage issues")
12
+
13
+
14
+ @app.command("list")
15
+ def list_issues(
16
+ dataset_id: Annotated[str, typer.Argument(help="Dataset ID")],
17
+ json_out: JsonOpt = False,
18
+ profile: ProfileOpt = None,
19
+ api_url: ApiUrlOpt = None,
20
+ ) -> None:
21
+ """List issues in a dataset's taxonomy."""
22
+ client = ApiClient(profile=profile, api_url=api_url)
23
+ items = client.get(f"/v1/datasets/{q(dataset_id)}/issues").json()
24
+ print_table(
25
+ ["ID", "Name", "Description", "Color"],
26
+ [
27
+ [i.get("id") or "", i.get("name") or "", i.get("description") or "", i.get("color") or ""]
28
+ for i in items
29
+ ],
30
+ json_out,
31
+ items,
32
+ )
33
+
34
+
35
+ @app.command("create")
36
+ def create(
37
+ dataset_id: Annotated[str, typer.Argument(help="Dataset ID")],
38
+ name: Annotated[str, typer.Option("--name", help="Issue name")],
39
+ description: Annotated[Optional[str], typer.Option("--description", help="Issue description")] = None,
40
+ color: Annotated[Optional[str], typer.Option("--color", help="Issue color (hex)")] = None,
41
+ json_out: JsonOpt = False,
42
+ profile: ProfileOpt = None,
43
+ api_url: ApiUrlOpt = None,
44
+ ) -> None:
45
+ """Create an issue in a dataset's taxonomy."""
46
+ client = ApiClient(profile=profile, api_url=api_url)
47
+ body: dict = {"name": name}
48
+ if description:
49
+ body["description"] = description
50
+ if color:
51
+ body["color"] = color
52
+ data = client.post(f"/v1/datasets/{q(dataset_id)}/issues", json=body).json()
53
+ if json_out:
54
+ print_json(data)
55
+ else:
56
+ sys.stdout.write(f"Created issue: {data.get('id', 'ok')}\n")