lql-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lql/__init__.py +1 -0
- lql/_opts.py +7 -0
- lql/api.py +69 -0
- lql/cli.py +93 -0
- lql/commands/__init__.py +0 -0
- lql/commands/annotations.py +69 -0
- lql/commands/auth.py +162 -0
- lql/commands/buckets.py +190 -0
- lql/commands/datasets.py +398 -0
- lql/commands/edits.py +95 -0
- lql/commands/evals.py +285 -0
- lql/commands/highlights.py +89 -0
- lql/commands/instructions.py +248 -0
- lql/commands/issues.py +56 -0
- lql/commands/reports.py +92 -0
- lql/commands/skills.py +116 -0
- lql/commands/spec.py +165 -0
- lql/commands/workspaces.py +147 -0
- lql/config.py +103 -0
- lql/output.py +29 -0
- lql/sessions.py +27 -0
- lql/util.py +11 -0
- lql_cli-0.2.0.dist-info/METADATA +320 -0
- lql_cli-0.2.0.dist-info/RECORD +26 -0
- lql_cli-0.2.0.dist-info/WHEEL +4 -0
- lql_cli-0.2.0.dist-info/entry_points.txt +2 -0
lql/commands/evals.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import math
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
from typing import Annotated, List, Optional
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
|
|
10
|
+
from ..api import ApiClient
|
|
11
|
+
from ..output import print_error, print_json, print_table
|
|
12
|
+
from ..util import q
|
|
13
|
+
|
|
14
|
+
app = typer.Typer(help="Inspect and analyze eval datasets (accuracy, failure modes, samples)")
|
|
15
|
+
|
|
16
|
+
# Mirrors front/src/lib/eval-dataset.ts so `eval samples --search` searches the
|
|
17
|
+
# same prompt/response columns the eval views do.
|
|
18
|
+
PROMPT_KEYS = ["prompt", "messages", "conversation", "input"]
|
|
19
|
+
RESPONSE_KEYS = ["response", "output", "completion", "generation"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _eval_search_columns(keys: List[str]) -> List[str]:
|
|
23
|
+
s = set(keys)
|
|
24
|
+
prompt = next((k for k in PROMPT_KEYS if k in s), None)
|
|
25
|
+
response = next((k for k in RESPONSE_KEYS if k in s), None)
|
|
26
|
+
return [k for k in (prompt, response) if k]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _truncate(v: object) -> str:
|
|
30
|
+
s = json.dumps(v) if isinstance(v, (dict, list)) else ("" if v is None else str(v))
|
|
31
|
+
return s[:77] + "..." if len(s) > 80 else s
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _fmt_accuracy(acc: object) -> str:
|
|
35
|
+
try:
|
|
36
|
+
n = float(acc)
|
|
37
|
+
except (TypeError, ValueError):
|
|
38
|
+
return "—"
|
|
39
|
+
if acc is None or not math.isfinite(n):
|
|
40
|
+
return "—"
|
|
41
|
+
return f"{n * 100:.1f}%"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@app.command("list")
|
|
45
|
+
def list_evals(
|
|
46
|
+
workspace: Annotated[Optional[str], typer.Option("--workspace", help="Workspace (defaults to LQL_EVAL_WORKSPACE)")] = None,
|
|
47
|
+
json_out: JsonOpt = False,
|
|
48
|
+
profile: ProfileOpt = None,
|
|
49
|
+
api_url: ApiUrlOpt = None,
|
|
50
|
+
) -> None:
|
|
51
|
+
"""List eval datasets (those detected as evaluation-run output)."""
|
|
52
|
+
client = ApiClient(profile=profile, api_url=api_url)
|
|
53
|
+
ws = workspace or os.environ.get("LQL_EVAL_WORKSPACE")
|
|
54
|
+
params = {"is_eval": "true"}
|
|
55
|
+
if ws:
|
|
56
|
+
params["workspace_id"] = ws
|
|
57
|
+
else:
|
|
58
|
+
sys.stderr.write(
|
|
59
|
+
"Note: listing only evals you own — set LQL_EVAL_WORKSPACE or pass --workspace "
|
|
60
|
+
"to list the shared eval workspace.\n"
|
|
61
|
+
)
|
|
62
|
+
items = client.get("/v1/datasets", params=params).json()
|
|
63
|
+
print_table(
|
|
64
|
+
["ID", "Name", "Rows", "Source"],
|
|
65
|
+
[
|
|
66
|
+
[
|
|
67
|
+
d.get("id") or "",
|
|
68
|
+
d.get("display_name") or d.get("name") or "",
|
|
69
|
+
d.get("row_count") if d.get("row_count") is not None else "",
|
|
70
|
+
d.get("source_type") or "",
|
|
71
|
+
]
|
|
72
|
+
for d in items
|
|
73
|
+
],
|
|
74
|
+
json_out,
|
|
75
|
+
items,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@app.command("stats")
|
|
80
|
+
def stats(
|
|
81
|
+
id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
82
|
+
json_out: JsonOpt = False,
|
|
83
|
+
profile: ProfileOpt = None,
|
|
84
|
+
api_url: ApiUrlOpt = None,
|
|
85
|
+
) -> None:
|
|
86
|
+
"""Distribution stats: accuracy, error-type distribution, token stats."""
|
|
87
|
+
client = ApiClient(profile=profile, api_url=api_url)
|
|
88
|
+
s = client.get(f"/v1/datasets/{q(id)}/eval-stats").json()
|
|
89
|
+
if json_out:
|
|
90
|
+
print_json(s)
|
|
91
|
+
return
|
|
92
|
+
if s.get("skip_reason"):
|
|
93
|
+
sys.stdout.write(f"Stats unavailable: {s['skip_reason']}\n")
|
|
94
|
+
return
|
|
95
|
+
print_table(
|
|
96
|
+
["Field", "Value"],
|
|
97
|
+
[
|
|
98
|
+
["Accuracy", _fmt_accuracy(s.get("accuracy"))],
|
|
99
|
+
["Total", s.get("total") if s.get("total") is not None else ""],
|
|
100
|
+
["Correct", s.get("correct") if s.get("correct") is not None else ""],
|
|
101
|
+
["Incorrect", s.get("incorrect") if s.get("incorrect") is not None else ""],
|
|
102
|
+
["Missing", s.get("missing") if s.get("missing") is not None else ""],
|
|
103
|
+
],
|
|
104
|
+
False,
|
|
105
|
+
[s],
|
|
106
|
+
)
|
|
107
|
+
dist = s.get("error_distribution") or []
|
|
108
|
+
if dist:
|
|
109
|
+
field = f" ({s['error_field']})" if s.get("error_field") else ""
|
|
110
|
+
sys.stdout.write(f"\nError distribution{field}:\n")
|
|
111
|
+
print_table(
|
|
112
|
+
["Value", "Count"],
|
|
113
|
+
[[d.get("value") if d.get("value") is not None else "—", d.get("count") if d.get("count") is not None else ""] for d in dist],
|
|
114
|
+
False,
|
|
115
|
+
dist,
|
|
116
|
+
)
|
|
117
|
+
if s.get("error_distribution_truncated"):
|
|
118
|
+
sys.stdout.write("(distribution truncated — more values exist)\n")
|
|
119
|
+
dist_bad = s.get("error_distribution_incorrect") or []
|
|
120
|
+
if dist_bad:
|
|
121
|
+
sys.stdout.write("\nErrors among incorrect samples (the misses):\n")
|
|
122
|
+
print_table(
|
|
123
|
+
["Value", "Count"],
|
|
124
|
+
[[d.get("value") if d.get("value") is not None else "—", d.get("count") if d.get("count") is not None else ""] for d in dist_bad],
|
|
125
|
+
False,
|
|
126
|
+
dist_bad,
|
|
127
|
+
)
|
|
128
|
+
if s.get("error_distribution_incorrect_truncated"):
|
|
129
|
+
sys.stdout.write("(distribution truncated — more values exist)\n")
|
|
130
|
+
tokens = s.get("token_stats") or []
|
|
131
|
+
if tokens:
|
|
132
|
+
sys.stdout.write("\nToken stats:\n")
|
|
133
|
+
print_table(
|
|
134
|
+
["Field", "Min", "Mean", "P50", "P95", "Max"],
|
|
135
|
+
[
|
|
136
|
+
[
|
|
137
|
+
t.get("label") or t.get("field") or "",
|
|
138
|
+
t.get("min") if t.get("min") is not None else "",
|
|
139
|
+
t.get("mean") if t.get("mean") is not None else "",
|
|
140
|
+
t.get("p50") if t.get("p50") is not None else "",
|
|
141
|
+
t.get("p95") if t.get("p95") is not None else "",
|
|
142
|
+
t.get("max") if t.get("max") is not None else "",
|
|
143
|
+
]
|
|
144
|
+
for t in tokens
|
|
145
|
+
],
|
|
146
|
+
False,
|
|
147
|
+
tokens,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@app.command("correctness")
|
|
152
|
+
def correctness(
|
|
153
|
+
id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
154
|
+
json_out: JsonOpt = False,
|
|
155
|
+
profile: ProfileOpt = None,
|
|
156
|
+
api_url: ApiUrlOpt = None,
|
|
157
|
+
) -> None:
|
|
158
|
+
"""Fast correctness counts + accuracy."""
|
|
159
|
+
client = ApiClient(profile=profile, api_url=api_url)
|
|
160
|
+
c = client.get(f"/v1/datasets/{q(id)}/eval-correctness").json()
|
|
161
|
+
if json_out:
|
|
162
|
+
print_json(c)
|
|
163
|
+
return
|
|
164
|
+
print_table(
|
|
165
|
+
["Field", "Value"],
|
|
166
|
+
[
|
|
167
|
+
["Accuracy", _fmt_accuracy(c.get("accuracy"))],
|
|
168
|
+
["Total", c.get("total") if c.get("total") is not None else ""],
|
|
169
|
+
["Correct", c.get("correct") if c.get("correct") is not None else ""],
|
|
170
|
+
["Incorrect", c.get("incorrect") if c.get("incorrect") is not None else ""],
|
|
171
|
+
["Missing", c.get("missing") if c.get("missing") is not None else ""],
|
|
172
|
+
],
|
|
173
|
+
False,
|
|
174
|
+
[c],
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@app.command("samples")
|
|
179
|
+
def samples(
|
|
180
|
+
id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
181
|
+
filter_: Annotated[str, typer.Option("--filter", help="correct | incorrect | missing | all")] = "all",
|
|
182
|
+
search: Annotated[Optional[str], typer.Option("--search", help="Substring match on prompt OR response column")] = None,
|
|
183
|
+
search_columns: Annotated[Optional[str], typer.Option("--search-columns", help="Override which columns --search matches (comma-separated)")] = None,
|
|
184
|
+
error_type: Annotated[Optional[str], typer.Option("--error-type", help="Filter to samples whose error field equals <value>")] = None,
|
|
185
|
+
columns: Annotated[Optional[str], typer.Option("--columns", help="Comma-separated columns to project")] = None,
|
|
186
|
+
limit: Annotated[str, typer.Option("--limit", help="Number of rows")] = "20",
|
|
187
|
+
offset: Annotated[str, typer.Option("--offset", help="Row offset")] = "0",
|
|
188
|
+
json_out: JsonOpt = False,
|
|
189
|
+
profile: ProfileOpt = None,
|
|
190
|
+
api_url: ApiUrlOpt = None,
|
|
191
|
+
) -> None:
|
|
192
|
+
"""List samples filtered by correctness / search / error type (for error analysis)."""
|
|
193
|
+
client = ApiClient(profile=profile, api_url=api_url)
|
|
194
|
+
filters: List[dict] = []
|
|
195
|
+
|
|
196
|
+
kind = str(filter_ or "all").lower()
|
|
197
|
+
if kind not in ("all", "correct", "incorrect", "missing"):
|
|
198
|
+
print_error("--filter must be one of: correct, incorrect, missing, all", "bad_filter")
|
|
199
|
+
raise typer.Exit(1)
|
|
200
|
+
|
|
201
|
+
if search:
|
|
202
|
+
if search_columns:
|
|
203
|
+
search_cols = [c.strip() for c in str(search_columns).split(",") if c.strip()]
|
|
204
|
+
else:
|
|
205
|
+
schema = client.get(f"/v1/datasets/{q(id)}/schema").json()
|
|
206
|
+
names = [str(c.get("name") or "") for c in (schema.get("columns") or [])]
|
|
207
|
+
search_cols = _eval_search_columns(names)
|
|
208
|
+
if not search_cols:
|
|
209
|
+
print_error(
|
|
210
|
+
"No searchable prompt/response columns — pass --search-columns to choose which to search.",
|
|
211
|
+
"no_search_columns",
|
|
212
|
+
)
|
|
213
|
+
raise typer.Exit(1)
|
|
214
|
+
filters.append({"columns": search_cols, "operator": "contains", "value": str(search)})
|
|
215
|
+
|
|
216
|
+
if error_type:
|
|
217
|
+
stats_data = client.get(f"/v1/datasets/{q(id)}/eval-stats").json()
|
|
218
|
+
error_field = stats_data.get("error_field")
|
|
219
|
+
if stats_data.get("skip_reason") or not error_field:
|
|
220
|
+
print_error(
|
|
221
|
+
f"error-type filtering unavailable: {stats_data.get('skip_reason') or 'no error field discovered'}",
|
|
222
|
+
"no_error_field",
|
|
223
|
+
)
|
|
224
|
+
raise typer.Exit(1)
|
|
225
|
+
filters.append({"column": error_field, "operator": "eq", "value": str(error_type)})
|
|
226
|
+
|
|
227
|
+
params = {"limit": limit, "offset": offset}
|
|
228
|
+
if columns:
|
|
229
|
+
params["columns"] = str(columns)
|
|
230
|
+
if kind != "all":
|
|
231
|
+
params["correctness"] = kind
|
|
232
|
+
|
|
233
|
+
data = client.post(f"/v1/datasets/{q(id)}/rows/filter", json={"filters": filters}, params=params).json()
|
|
234
|
+
if json_out:
|
|
235
|
+
print_json(data)
|
|
236
|
+
return
|
|
237
|
+
rows_data = data.get("rows") or []
|
|
238
|
+
indices = data.get("matched_indices") or []
|
|
239
|
+
sys.stdout.write(f"{data.get('total_matched', len(rows_data))} matched\n")
|
|
240
|
+
if not rows_data:
|
|
241
|
+
sys.stdout.write("No rows.\n")
|
|
242
|
+
return
|
|
243
|
+
keys = list(rows_data[0].keys())
|
|
244
|
+
print_table(
|
|
245
|
+
["index", *keys],
|
|
246
|
+
[
|
|
247
|
+
[str(indices[i]) if i < len(indices) else "", *[_truncate(r.get(k)) for k in keys]]
|
|
248
|
+
for i, r in enumerate(rows_data)
|
|
249
|
+
],
|
|
250
|
+
False,
|
|
251
|
+
rows_data,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
@app.command("sample")
|
|
256
|
+
def sample(
|
|
257
|
+
id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
258
|
+
row: Annotated[str, typer.Option("--row", help="Dataset row index")],
|
|
259
|
+
json_out: JsonOpt = False,
|
|
260
|
+
profile: ProfileOpt = None,
|
|
261
|
+
api_url: ApiUrlOpt = None,
|
|
262
|
+
) -> None:
|
|
263
|
+
"""Read one full sample by its dataset row index (from `eval samples`)."""
|
|
264
|
+
try:
|
|
265
|
+
row_idx = int(row)
|
|
266
|
+
bad = row_idx < 0
|
|
267
|
+
except ValueError:
|
|
268
|
+
bad = True
|
|
269
|
+
if bad:
|
|
270
|
+
print_error("--row must be a non-negative integer (a dataset row index).", "bad_row")
|
|
271
|
+
raise typer.Exit(1)
|
|
272
|
+
|
|
273
|
+
client = ApiClient(profile=profile, api_url=api_url)
|
|
274
|
+
data = client.get(f"/v1/datasets/{q(id)}/rows", params={"offset": row_idx, "limit": 1}).json()
|
|
275
|
+
rows_data = data.get("rows") or []
|
|
276
|
+
row_obj = rows_data[0] if rows_data else None
|
|
277
|
+
if not row_obj:
|
|
278
|
+
print_error(f"No row at index {row}.", "not_found")
|
|
279
|
+
raise typer.Exit(3)
|
|
280
|
+
if json_out:
|
|
281
|
+
print_json(row_obj)
|
|
282
|
+
return
|
|
283
|
+
for k, v in row_obj.items():
|
|
284
|
+
s = json.dumps(v, indent=2) if isinstance(v, (dict, list)) else ("" if v is None else str(v))
|
|
285
|
+
sys.stdout.write(f"## {k}\n{s}\n\n")
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from typing import Annotated, Optional
|
|
3
|
+
|
|
4
|
+
import typer
|
|
5
|
+
|
|
6
|
+
from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
|
|
7
|
+
from ..api import ApiClient
|
|
8
|
+
from ..output import print_error, print_json, print_table
|
|
9
|
+
from ..sessions import resolve_session_id
|
|
10
|
+
from ..util import q
|
|
11
|
+
|
|
12
|
+
app = typer.Typer(help="Manage highlights")
|
|
13
|
+
|
|
14
|
+
SessionOpt = Annotated[Optional[str], typer.Option("--session", help="Target a specific review session (advanced)")]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@app.command("list")
|
|
18
|
+
def list_highlights(
|
|
19
|
+
dataset_id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
20
|
+
session: SessionOpt = None,
|
|
21
|
+
json_out: JsonOpt = False,
|
|
22
|
+
profile: ProfileOpt = None,
|
|
23
|
+
api_url: ApiUrlOpt = None,
|
|
24
|
+
) -> None:
|
|
25
|
+
"""List highlights for a dataset."""
|
|
26
|
+
client = ApiClient(profile=profile, api_url=api_url)
|
|
27
|
+
session_id = resolve_session_id(client, dataset_id, session)
|
|
28
|
+
items = client.get(f"/v1/sessions/{q(session_id)}/highlights").json()
|
|
29
|
+
print_table(
|
|
30
|
+
["ID", "Row", "Column", "Span", "Text", "Issue"],
|
|
31
|
+
[
|
|
32
|
+
[
|
|
33
|
+
h.get("id") or "",
|
|
34
|
+
h.get("row_external_id") or "",
|
|
35
|
+
h.get("source_column") or "",
|
|
36
|
+
f"{h.get('start_offset', '?')}-{h.get('end_offset', '?')}",
|
|
37
|
+
str(h.get("highlighted_text") or "")[:40],
|
|
38
|
+
h.get("issue_id") or "",
|
|
39
|
+
]
|
|
40
|
+
for h in items
|
|
41
|
+
],
|
|
42
|
+
json_out,
|
|
43
|
+
items,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@app.command("add")
|
|
48
|
+
def add(
|
|
49
|
+
dataset_id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
50
|
+
row: Annotated[str, typer.Option("--row", help="Row external ID")],
|
|
51
|
+
column: Annotated[str, typer.Option("--column", help="Source column the span lives in")],
|
|
52
|
+
start: Annotated[str, typer.Option("--start", help="Start character offset")],
|
|
53
|
+
end: Annotated[str, typer.Option("--end", help="End character offset")],
|
|
54
|
+
text: Annotated[str, typer.Option("--text", help="The highlighted text span")],
|
|
55
|
+
issue: Annotated[Optional[str], typer.Option("--issue", help="Issue taxonomy ID to link")] = None,
|
|
56
|
+
color: Annotated[Optional[str], typer.Option("--color", help="Highlight color")] = None,
|
|
57
|
+
note: Annotated[Optional[str], typer.Option("--note", help="Note attached to the highlight")] = None,
|
|
58
|
+
session: SessionOpt = None,
|
|
59
|
+
json_out: JsonOpt = False,
|
|
60
|
+
profile: ProfileOpt = None,
|
|
61
|
+
api_url: ApiUrlOpt = None,
|
|
62
|
+
) -> None:
|
|
63
|
+
"""Add a text-span highlight to a dataset row."""
|
|
64
|
+
try:
|
|
65
|
+
start_i = int(start)
|
|
66
|
+
end_i = int(end)
|
|
67
|
+
except ValueError:
|
|
68
|
+
print_error("--start and --end must be integers", "invalid_offset")
|
|
69
|
+
raise typer.Exit(1)
|
|
70
|
+
client = ApiClient(profile=profile, api_url=api_url)
|
|
71
|
+
session_id = resolve_session_id(client, dataset_id, session)
|
|
72
|
+
body: dict = {
|
|
73
|
+
"row_external_id": row,
|
|
74
|
+
"source_column": column,
|
|
75
|
+
"start_offset": start_i,
|
|
76
|
+
"end_offset": end_i,
|
|
77
|
+
"highlighted_text": text,
|
|
78
|
+
}
|
|
79
|
+
if issue:
|
|
80
|
+
body["issue_id"] = issue
|
|
81
|
+
if color:
|
|
82
|
+
body["color"] = color
|
|
83
|
+
if note:
|
|
84
|
+
body["note"] = note
|
|
85
|
+
data = client.post(f"/v1/sessions/{q(session_id)}/highlights", json=body).json()
|
|
86
|
+
if json_out:
|
|
87
|
+
print_json(data)
|
|
88
|
+
else:
|
|
89
|
+
sys.stdout.write(f"Created highlight: {data.get('id', 'ok')}\n")
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
INSTRUCTIONS = r"""
|
|
6
|
+
# lql — Liquid Query Language CLI
|
|
7
|
+
|
|
8
|
+
CLI for the DataViewer platform. Gives agents and humans complete scriptable
|
|
9
|
+
control over workspaces, datasets, spec docs, annotations, and S3.
|
|
10
|
+
|
|
11
|
+
## Authentication
|
|
12
|
+
|
|
13
|
+
lql login # Open browser → click Authorize → token stored automatically
|
|
14
|
+
lql logout # Revoke token and clear local config
|
|
15
|
+
lql whoami # Confirm current identity
|
|
16
|
+
|
|
17
|
+
Non-interactive (CI/agents): set LQL_API_KEY=<token> before any command.
|
|
18
|
+
Token is read from env first, then ~/.lql/config.json.
|
|
19
|
+
|
|
20
|
+
Config file: ~/.lql/config.json (mode 0600)
|
|
21
|
+
{ "current_profile": "default", "profiles": { "default": { "token", "key_id", "api_url" } } }
|
|
22
|
+
|
|
23
|
+
Override API base URL: --api-url <url> or LQL_API_URL env var.
|
|
24
|
+
Use --profile <name> to switch between named credential sets.
|
|
25
|
+
|
|
26
|
+
## Output
|
|
27
|
+
|
|
28
|
+
All commands accept --json for stable JSON output to stdout.
|
|
29
|
+
Errors always go to stderr as: { "error": "message", "code": "slug" }
|
|
30
|
+
Data always goes to stdout.
|
|
31
|
+
|
|
32
|
+
Exit codes:
|
|
33
|
+
0 success
|
|
34
|
+
1 usage / validation error
|
|
35
|
+
2 auth error (no token, 401, 403)
|
|
36
|
+
3 not found (404)
|
|
37
|
+
4 conflict (409) — e.g. spec push version conflict
|
|
38
|
+
5 server error (5xx)
|
|
39
|
+
|
|
40
|
+
Pagination: --limit N --offset N on list commands.
|
|
41
|
+
|
|
42
|
+
## Workspaces
|
|
43
|
+
|
|
44
|
+
A workspace is the top-level container for datasets, spec docs, and members.
|
|
45
|
+
|
|
46
|
+
lql workspaces list
|
|
47
|
+
lql workspaces create <name>
|
|
48
|
+
lql workspaces show <id>
|
|
49
|
+
lql workspaces update <id> --name <new-name>
|
|
50
|
+
lql workspaces delete <id>
|
|
51
|
+
lql workspaces members list <workspace-id>
|
|
52
|
+
lql workspaces members add <workspace-id> <email>
|
|
53
|
+
lql workspaces members remove <workspace-id> <user-id>
|
|
54
|
+
|
|
55
|
+
## Datasets
|
|
56
|
+
|
|
57
|
+
lql datasets list [--workspace <id>]
|
|
58
|
+
lql datasets show <id>
|
|
59
|
+
lql datasets create --workspace <id> --hf-repo <org/repo> [--name <display>] [--split <split>]
|
|
60
|
+
lql datasets create --workspace <id> --hf-bucket <org/bucket> --key <path-or-glob> [--name <display>]
|
|
61
|
+
# From an HF storage bucket (e.g. --key 'data/*.parquet'); syncs in background
|
|
62
|
+
lql datasets sync <id> # Re-fetch from HuggingFace, S3, or HF bucket
|
|
63
|
+
lql datasets schema <id> # Column names + types
|
|
64
|
+
lql datasets profile <id> # Per-column nulls/cardinality/numeric stats/top values + content token stats
|
|
65
|
+
# [--full-content] exact content scan (slow) [--skip-content] omit it
|
|
66
|
+
lql datasets rows <id> [--limit N] [--offset N]
|
|
67
|
+
lql datasets delete <id>
|
|
68
|
+
lql datasets push <id> # Push edits back to HuggingFace
|
|
69
|
+
lql datasets push-status <id> [--job <job-id>]
|
|
70
|
+
|
|
71
|
+
Upload a local file (uploads to HuggingFace liquid-ai org, then syncs):
|
|
72
|
+
LQL_HF_TOKEN=<token> lql datasets upload <file.parquet> --workspace <id> --name <repo-name>
|
|
73
|
+
|
|
74
|
+
## Evals
|
|
75
|
+
|
|
76
|
+
Eval datasets (evaluation-run output: each row a sample with a model 'response'
|
|
77
|
+
+ a 'correct' verdict) are detected automatically. These commands are the data
|
|
78
|
+
primitives for error analysis — YOU do the reasoning over what they return.
|
|
79
|
+
|
|
80
|
+
lql eval list [--workspace <id>] # Eval datasets only. Defaults to LQL_EVAL_WORKSPACE;
|
|
81
|
+
# without a workspace it lists only evals you own.
|
|
82
|
+
lql eval stats <id> # Accuracy + correctness counts + error-type
|
|
83
|
+
# distribution + token stats (the distribution view)
|
|
84
|
+
lql eval correctness <id> # Fast accuracy + correct/incorrect/missing counts
|
|
85
|
+
lql eval samples <id> [--filter correct|incorrect|missing|all] [--search <text>]
|
|
86
|
+
[--error-type <value>] [--columns a,b] [--limit N] [--offset N]
|
|
87
|
+
# Slice the dataset for error analysis. Filters AND
|
|
88
|
+
# together. Prints an 'index' column per row.
|
|
89
|
+
lql eval sample <id> --row <index> # Read one full sample (the conversation) by the
|
|
90
|
+
# 'index' from `eval samples`
|
|
91
|
+
|
|
92
|
+
Notes:
|
|
93
|
+
- --search matches a substring on the prompt OR response column (either one matching is a hit).
|
|
94
|
+
- --error-type values come from the `error_field` / `error_distribution` in `eval stats`.
|
|
95
|
+
- Use the 'index' from `eval samples` directly as `eval sample --row <index>`.
|
|
96
|
+
|
|
97
|
+
## Row Edits
|
|
98
|
+
|
|
99
|
+
Edits are cell-level overrides layered on top of the source dataset.
|
|
100
|
+
|
|
101
|
+
lql edits list <dataset-id> [--limit N]
|
|
102
|
+
lql edits count <dataset-id>
|
|
103
|
+
lql edits add <dataset-id> --row <row-external-id> --column <col> --value <json>
|
|
104
|
+
lql edits delete <dataset-id> <edit-id>
|
|
105
|
+
|
|
106
|
+
Example: lql edits add abc123 --row row_0 --column label --value '"positive"'
|
|
107
|
+
|
|
108
|
+
## Spec Docs
|
|
109
|
+
|
|
110
|
+
Each workspace has one spec doc (versioned markdown). Use pull/push to edit
|
|
111
|
+
it like a file. Conflicts are detected via base-version-id (exit code 4).
|
|
112
|
+
|
|
113
|
+
lql spec show [--workspace <id>] # print current doc to stdout
|
|
114
|
+
lql spec pull [--workspace <id>] [-o FILE] [--stdout] # writes SPEC.md by default
|
|
115
|
+
lql spec push [--workspace <id>] [--file SPEC.md] --message <msg> [--base-version-id <id>]
|
|
116
|
+
lql spec history [--workspace <id>]
|
|
117
|
+
lql spec diff [--workspace <id>] --version-id <id> [--compare-to <id>]
|
|
118
|
+
lql spec generate [--workspace <id>] # AI-generate from datasets
|
|
119
|
+
|
|
120
|
+
push auto-detects create-vs-update: with no existing doc it creates v1; otherwise
|
|
121
|
+
it commits on top of the current HEAD (auto-resolved unless --base-version-id is
|
|
122
|
+
given). --message is required. pull writes SPEC.md unless -o or --stdout is set.
|
|
123
|
+
|
|
124
|
+
Agentic edit loop:
|
|
125
|
+
lql spec pull --workspace <id> # writes ./SPEC.md
|
|
126
|
+
# modify SPEC.md
|
|
127
|
+
lql spec push --workspace <id> --message "Refine numeric rules"
|
|
128
|
+
# exit 4 means a conflict — pull again and re-apply
|
|
129
|
+
|
|
130
|
+
## Annotations, highlights, issues, reports — all scoped to a dataset
|
|
131
|
+
|
|
132
|
+
These act directly on a dataset; the CLI resolves the dataset's review session
|
|
133
|
+
for you, so you never manage sessions by hand. (Advanced: pass --session <id>
|
|
134
|
+
to target a specific session for multi-pass review — a session id is returned in
|
|
135
|
+
the JSON of any annotation/highlight/report.)
|
|
136
|
+
|
|
137
|
+
## Annotations
|
|
138
|
+
|
|
139
|
+
lql annotations list <dataset-id>
|
|
140
|
+
lql annotations add <dataset-id> --row <row-external-id> [--rating <number>] [--note <text>]
|
|
141
|
+
|
|
142
|
+
## Highlights
|
|
143
|
+
|
|
144
|
+
Highlights mark specific text spans within a row.
|
|
145
|
+
|
|
146
|
+
lql highlights list <dataset-id>
|
|
147
|
+
lql highlights add <dataset-id> --row <id> --column <col> --start <n> --end <n> --text <span> [--issue <issue-id>] [--color <hex>] [--note <text>]
|
|
148
|
+
|
|
149
|
+
start/end are character offsets into the row's <column> value. Link a highlight to
|
|
150
|
+
an issue taxonomy entry with --issue (see "lql issues create <dataset-id>").
|
|
151
|
+
|
|
152
|
+
## Issues
|
|
153
|
+
|
|
154
|
+
Issues are a per-dataset taxonomy (name/color) used to tag highlights.
|
|
155
|
+
|
|
156
|
+
lql issues list <dataset-id>
|
|
157
|
+
lql issues create <dataset-id> --name <str> [--description <str>] [--color <hex>]
|
|
158
|
+
|
|
159
|
+
## Reports
|
|
160
|
+
|
|
161
|
+
lql reports list <dataset-id>
|
|
162
|
+
lql reports show <report-id>
|
|
163
|
+
lql reports create <dataset-id> --title <title> [--summary <text>]
|
|
164
|
+
|
|
165
|
+
## Buckets
|
|
166
|
+
|
|
167
|
+
S3-compatible:
|
|
168
|
+
lql buckets list
|
|
169
|
+
lql buckets show <id>
|
|
170
|
+
lql buckets probe <id> # Verify connectivity + credentials
|
|
171
|
+
lql buckets objects <id> [--prefix <prefix>]
|
|
172
|
+
lql buckets attach <bucket-id> --workspace <id>
|
|
173
|
+
lql buckets detach <bucket-id> --workspace <id>
|
|
174
|
+
|
|
175
|
+
Hugging Face buckets (connect → add datasets; auth = your HF token):
|
|
176
|
+
lql buckets list-hf
|
|
177
|
+
lql buckets connect-hf <owner/bucket> --workspace <id> [--label <l>] [--hf-key <id>]
|
|
178
|
+
lql buckets create-dataset <bucket-id> --workspace <id> --key <path-or-glob> [--name <display>]
|
|
179
|
+
|
|
180
|
+
## Skills (agent setup)
|
|
181
|
+
|
|
182
|
+
Install the lql skill so coding agents (Claude Code, Codex) know how to use lql.
|
|
183
|
+
The skill is a thin pointer that tells the agent to run `lql instructions`, so it
|
|
184
|
+
never goes stale.
|
|
185
|
+
|
|
186
|
+
lql skills install # both Claude Code + Codex, user-level (~/.claude, ~/.codex)
|
|
187
|
+
lql skills install --tool claude # or --tool codex
|
|
188
|
+
lql skills install --project # install into ./.claude and ./.codex instead
|
|
189
|
+
lql skills install --force # overwrite an existing skill file
|
|
190
|
+
lql skills uninstall # remove it
|
|
191
|
+
|
|
192
|
+
## Common Agentic Workflows
|
|
193
|
+
|
|
194
|
+
### Discover workspaces and datasets
|
|
195
|
+
lql workspaces list --json
|
|
196
|
+
lql datasets list --workspace <id> --json
|
|
197
|
+
|
|
198
|
+
### Read dataset contents
|
|
199
|
+
lql datasets schema <id> --json
|
|
200
|
+
lql datasets rows <id> --limit 10 --json
|
|
201
|
+
|
|
202
|
+
### Analyze a dataset (token distribution + quality)
|
|
203
|
+
lql datasets profile <id> --json # nulls/cardinality/numeric stats/top values per column
|
|
204
|
+
# + content_stats: char & ~token length p50/p95/max per text column
|
|
205
|
+
# content_stats is SAMPLED from the first shard by default (sampled=true; max_chars is sample-bound).
|
|
206
|
+
# For exact token counts over every row: lql datasets profile <id> --full-content --json
|
|
207
|
+
# Use this to judge context-window/truncation risk, spot all-null or single-value columns,
|
|
208
|
+
# and ground any narrative analysis in real numbers (don't eyeball a few rows).
|
|
209
|
+
|
|
210
|
+
### Analyze an eval's failure modes
|
|
211
|
+
lql eval list --json # find the eval dataset
|
|
212
|
+
lql eval stats <id> --json # accuracy + error_distribution_incorrect
|
|
213
|
+
# = the common errors AMONG the misses
|
|
214
|
+
lql eval samples <id> --filter incorrect --json # pull the misses
|
|
215
|
+
lql eval samples <id> --filter incorrect --error-type <value> --json # focus one failure mode
|
|
216
|
+
lql eval sample <id> --row <index> --json # read the full conversation of a miss
|
|
217
|
+
# Then synthesize the common pattern across the misses yourself — the commands give you
|
|
218
|
+
# the data (counts, slices, conversations); the analysis is your job.
|
|
219
|
+
|
|
220
|
+
### Edit a spec doc without conflicts
|
|
221
|
+
lql spec pull --workspace <id> -o /tmp/SPEC.md
|
|
222
|
+
# edit /tmp/SPEC.md
|
|
223
|
+
lql spec push --workspace <id> --file /tmp/SPEC.md --message "Refine rules"
|
|
224
|
+
# base version is auto-resolved; exit 4 means conflict — pull again and re-apply
|
|
225
|
+
|
|
226
|
+
### Annotate dataset rows
|
|
227
|
+
lql annotations add <dataset-id> --row row_0 --rating 1 --note "correct"
|
|
228
|
+
lql annotations add <dataset-id> --row row_1 --rating 0 --note "wrong label"
|
|
229
|
+
|
|
230
|
+
### Upload a new dataset from a local file
|
|
231
|
+
LQL_HF_TOKEN=hf_... lql datasets upload ./data.parquet \
|
|
232
|
+
--workspace <id> --name my-eval-run --json
|
|
233
|
+
|
|
234
|
+
### Push edits to HuggingFace
|
|
235
|
+
lql datasets push <id> --json
|
|
236
|
+
lql datasets push-status <id> --json # poll until status != pending
|
|
237
|
+
""".strip()
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def instructions() -> None:
|
|
241
|
+
sys.stdout.write(INSTRUCTIONS + "\n")
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
app = typer.Typer()
|
|
245
|
+
app.command(
|
|
246
|
+
"instructions",
|
|
247
|
+
help="Print a full reference for agents and humans (all commands, examples, workflows)",
|
|
248
|
+
)(instructions)
|
lql/commands/issues.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from typing import Annotated, Optional
|
|
3
|
+
|
|
4
|
+
import typer
|
|
5
|
+
|
|
6
|
+
from .._opts import ApiUrlOpt, JsonOpt, ProfileOpt
|
|
7
|
+
from ..api import ApiClient
|
|
8
|
+
from ..output import print_json, print_table
|
|
9
|
+
from ..util import q
|
|
10
|
+
|
|
11
|
+
app = typer.Typer(help="Manage issues")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@app.command("list")
|
|
15
|
+
def list_issues(
|
|
16
|
+
dataset_id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
17
|
+
json_out: JsonOpt = False,
|
|
18
|
+
profile: ProfileOpt = None,
|
|
19
|
+
api_url: ApiUrlOpt = None,
|
|
20
|
+
) -> None:
|
|
21
|
+
"""List issues in a dataset's taxonomy."""
|
|
22
|
+
client = ApiClient(profile=profile, api_url=api_url)
|
|
23
|
+
items = client.get(f"/v1/datasets/{q(dataset_id)}/issues").json()
|
|
24
|
+
print_table(
|
|
25
|
+
["ID", "Name", "Description", "Color"],
|
|
26
|
+
[
|
|
27
|
+
[i.get("id") or "", i.get("name") or "", i.get("description") or "", i.get("color") or ""]
|
|
28
|
+
for i in items
|
|
29
|
+
],
|
|
30
|
+
json_out,
|
|
31
|
+
items,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@app.command("create")
|
|
36
|
+
def create(
|
|
37
|
+
dataset_id: Annotated[str, typer.Argument(help="Dataset ID")],
|
|
38
|
+
name: Annotated[str, typer.Option("--name", help="Issue name")],
|
|
39
|
+
description: Annotated[Optional[str], typer.Option("--description", help="Issue description")] = None,
|
|
40
|
+
color: Annotated[Optional[str], typer.Option("--color", help="Issue color (hex)")] = None,
|
|
41
|
+
json_out: JsonOpt = False,
|
|
42
|
+
profile: ProfileOpt = None,
|
|
43
|
+
api_url: ApiUrlOpt = None,
|
|
44
|
+
) -> None:
|
|
45
|
+
"""Create an issue in a dataset's taxonomy."""
|
|
46
|
+
client = ApiClient(profile=profile, api_url=api_url)
|
|
47
|
+
body: dict = {"name": name}
|
|
48
|
+
if description:
|
|
49
|
+
body["description"] = description
|
|
50
|
+
if color:
|
|
51
|
+
body["color"] = color
|
|
52
|
+
data = client.post(f"/v1/datasets/{q(dataset_id)}/issues", json=body).json()
|
|
53
|
+
if json_out:
|
|
54
|
+
print_json(data)
|
|
55
|
+
else:
|
|
56
|
+
sys.stdout.write(f"Created issue: {data.get('id', 'ok')}\n")
|