markback 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markback/__init__.py +86 -0
- markback/cli.py +435 -0
- markback/config.py +181 -0
- markback/linter.py +312 -0
- markback/llm.py +175 -0
- markback/parser.py +587 -0
- markback/types.py +270 -0
- markback/workflow.py +351 -0
- markback/writer.py +249 -0
- markback-0.1.0.dist-info/METADATA +251 -0
- markback-0.1.0.dist-info/RECORD +14 -0
- markback-0.1.0.dist-info/WHEEL +4 -0
- markback-0.1.0.dist-info/entry_points.txt +2 -0
- markback-0.1.0.dist-info/licenses/LICENSE +21 -0
markback/__init__.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""MarkBack: A compact format for content + feedback."""
|
|
2
|
+
|
|
3
|
+
from .types import (
|
|
4
|
+
Diagnostic,
|
|
5
|
+
ErrorCode,
|
|
6
|
+
FeedbackParsed,
|
|
7
|
+
ParseResult,
|
|
8
|
+
Record,
|
|
9
|
+
Severity,
|
|
10
|
+
SourceRef,
|
|
11
|
+
WarningCode,
|
|
12
|
+
parse_feedback,
|
|
13
|
+
)
|
|
14
|
+
from .parser import (
|
|
15
|
+
parse_file,
|
|
16
|
+
parse_string,
|
|
17
|
+
parse_paired_files,
|
|
18
|
+
parse_directory,
|
|
19
|
+
discover_paired_files,
|
|
20
|
+
)
|
|
21
|
+
from .writer import (
|
|
22
|
+
OutputMode,
|
|
23
|
+
normalize_file,
|
|
24
|
+
write_file,
|
|
25
|
+
write_record_canonical,
|
|
26
|
+
write_records_multi,
|
|
27
|
+
write_records_compact,
|
|
28
|
+
write_label_file,
|
|
29
|
+
write_paired_files,
|
|
30
|
+
)
|
|
31
|
+
from .linter import (
|
|
32
|
+
lint_file,
|
|
33
|
+
lint_files,
|
|
34
|
+
lint_string,
|
|
35
|
+
format_diagnostics,
|
|
36
|
+
summarize_results,
|
|
37
|
+
)
|
|
38
|
+
from .config import (
|
|
39
|
+
Config,
|
|
40
|
+
LLMConfig,
|
|
41
|
+
load_config,
|
|
42
|
+
init_env,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
__version__ = "0.1.0"
|
|
46
|
+
|
|
47
|
+
__all__ = [
|
|
48
|
+
# Types
|
|
49
|
+
"Diagnostic",
|
|
50
|
+
"ErrorCode",
|
|
51
|
+
"FeedbackParsed",
|
|
52
|
+
"ParseResult",
|
|
53
|
+
"Record",
|
|
54
|
+
"Severity",
|
|
55
|
+
"SourceRef",
|
|
56
|
+
"WarningCode",
|
|
57
|
+
"parse_feedback",
|
|
58
|
+
# Parser
|
|
59
|
+
"parse_file",
|
|
60
|
+
"parse_string",
|
|
61
|
+
"parse_paired_files",
|
|
62
|
+
"parse_directory",
|
|
63
|
+
"discover_paired_files",
|
|
64
|
+
# Writer
|
|
65
|
+
"OutputMode",
|
|
66
|
+
"normalize_file",
|
|
67
|
+
"write_file",
|
|
68
|
+
"write_record_canonical",
|
|
69
|
+
"write_records_multi",
|
|
70
|
+
"write_records_compact",
|
|
71
|
+
"write_label_file",
|
|
72
|
+
"write_paired_files",
|
|
73
|
+
# Linter
|
|
74
|
+
"lint_file",
|
|
75
|
+
"lint_files",
|
|
76
|
+
"lint_string",
|
|
77
|
+
"format_diagnostics",
|
|
78
|
+
"summarize_results",
|
|
79
|
+
# Config
|
|
80
|
+
"Config",
|
|
81
|
+
"LLMConfig",
|
|
82
|
+
"load_config",
|
|
83
|
+
"init_env",
|
|
84
|
+
# Version
|
|
85
|
+
"__version__",
|
|
86
|
+
]
|
markback/cli.py
ADDED
|
@@ -0,0 +1,435 @@
|
|
|
1
|
+
"""MarkBack command-line interface."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Annotated, Optional
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
from rich.table import Table
|
|
11
|
+
|
|
12
|
+
from .config import Config, init_env, load_config, validate_config
|
|
13
|
+
from .linter import format_diagnostics, lint_files, summarize_results
|
|
14
|
+
from .parser import parse_file, parse_string
|
|
15
|
+
from .types import Severity, parse_feedback
|
|
16
|
+
from .writer import OutputMode, normalize_file, write_file, write_records_multi
|
|
17
|
+
|
|
18
|
+
app = typer.Typer(
|
|
19
|
+
name="markback",
|
|
20
|
+
help="MarkBack: A compact format for content + feedback",
|
|
21
|
+
no_args_is_help=True,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
console = Console()
|
|
25
|
+
err_console = Console(stderr=True)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@app.command()
|
|
29
|
+
def init(
|
|
30
|
+
path: Annotated[
|
|
31
|
+
Path,
|
|
32
|
+
typer.Argument(help="Path to create .env file"),
|
|
33
|
+
] = Path(".env"),
|
|
34
|
+
force: Annotated[
|
|
35
|
+
bool,
|
|
36
|
+
typer.Option("--force", "-f", help="Overwrite existing file"),
|
|
37
|
+
] = False,
|
|
38
|
+
):
|
|
39
|
+
"""Initialize a .env configuration file."""
|
|
40
|
+
if init_env(path, force=force):
|
|
41
|
+
console.print(f"[green]Created {path}[/green]")
|
|
42
|
+
else:
|
|
43
|
+
console.print(f"[yellow]{path} already exists. Use --force to overwrite.[/yellow]")
|
|
44
|
+
raise typer.Exit(1)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@app.command()
|
|
48
|
+
def lint(
|
|
49
|
+
paths: Annotated[
|
|
50
|
+
list[Path],
|
|
51
|
+
typer.Argument(help="Files or directories to lint"),
|
|
52
|
+
],
|
|
53
|
+
output_json: Annotated[
|
|
54
|
+
bool,
|
|
55
|
+
typer.Option("--json", "-j", help="Output as JSON"),
|
|
56
|
+
] = False,
|
|
57
|
+
no_source_check: Annotated[
|
|
58
|
+
bool,
|
|
59
|
+
typer.Option("--no-source-check", help="Skip checking if @source files exist"),
|
|
60
|
+
] = False,
|
|
61
|
+
no_canonical_check: Annotated[
|
|
62
|
+
bool,
|
|
63
|
+
typer.Option("--no-canonical-check", help="Skip canonical format check"),
|
|
64
|
+
] = False,
|
|
65
|
+
):
|
|
66
|
+
"""Lint MarkBack files."""
|
|
67
|
+
results = lint_files(
|
|
68
|
+
paths,
|
|
69
|
+
check_sources=not no_source_check,
|
|
70
|
+
check_canonical=not no_canonical_check,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
summary = summarize_results(results)
|
|
74
|
+
|
|
75
|
+
# Collect all diagnostics
|
|
76
|
+
all_diagnostics = []
|
|
77
|
+
for result in results:
|
|
78
|
+
all_diagnostics.extend(result.diagnostics)
|
|
79
|
+
|
|
80
|
+
if output_json:
|
|
81
|
+
output = {
|
|
82
|
+
"summary": summary,
|
|
83
|
+
"diagnostics": [d.to_dict() for d in all_diagnostics],
|
|
84
|
+
}
|
|
85
|
+
console.print(json.dumps(output, indent=2))
|
|
86
|
+
else:
|
|
87
|
+
# Print diagnostics
|
|
88
|
+
for d in all_diagnostics:
|
|
89
|
+
if d.severity == Severity.ERROR:
|
|
90
|
+
err_console.print(f"[red]{d}[/red]")
|
|
91
|
+
else:
|
|
92
|
+
err_console.print(f"[yellow]{d}[/yellow]")
|
|
93
|
+
|
|
94
|
+
# Print summary
|
|
95
|
+
console.print()
|
|
96
|
+
console.print(f"Files: {summary['files']}")
|
|
97
|
+
console.print(f"Records: {summary['records']}")
|
|
98
|
+
console.print(f"Errors: {summary['errors']}")
|
|
99
|
+
console.print(f"Warnings: {summary['warnings']}")
|
|
100
|
+
|
|
101
|
+
# Exit with error code if there were errors
|
|
102
|
+
if summary["errors"] > 0:
|
|
103
|
+
raise typer.Exit(1)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@app.command()
|
|
107
|
+
def normalize(
|
|
108
|
+
input_path: Annotated[
|
|
109
|
+
Path,
|
|
110
|
+
typer.Argument(help="Input MarkBack file"),
|
|
111
|
+
],
|
|
112
|
+
output_path: Annotated[
|
|
113
|
+
Optional[Path],
|
|
114
|
+
typer.Argument(help="Output file (omit for in-place)"),
|
|
115
|
+
] = None,
|
|
116
|
+
in_place: Annotated[
|
|
117
|
+
bool,
|
|
118
|
+
typer.Option("--in-place", "-i", help="Modify input file in place"),
|
|
119
|
+
] = False,
|
|
120
|
+
):
|
|
121
|
+
"""Normalize a MarkBack file to canonical format."""
|
|
122
|
+
try:
|
|
123
|
+
content = normalize_file(
|
|
124
|
+
input_path,
|
|
125
|
+
output_path=output_path,
|
|
126
|
+
in_place=in_place or (output_path is None),
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if output_path:
|
|
130
|
+
console.print(f"[green]Wrote {output_path}[/green]")
|
|
131
|
+
elif in_place:
|
|
132
|
+
console.print(f"[green]Normalized {input_path}[/green]")
|
|
133
|
+
else:
|
|
134
|
+
console.print(content)
|
|
135
|
+
|
|
136
|
+
except ValueError as e:
|
|
137
|
+
err_console.print(f"[red]Error: {e}[/red]")
|
|
138
|
+
raise typer.Exit(1)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@app.command("list")
|
|
142
|
+
def list_records(
|
|
143
|
+
paths: Annotated[
|
|
144
|
+
list[Path],
|
|
145
|
+
typer.Argument(help="Files or directories to list"),
|
|
146
|
+
],
|
|
147
|
+
output_json: Annotated[
|
|
148
|
+
bool,
|
|
149
|
+
typer.Option("--json", "-j", help="Output as JSON"),
|
|
150
|
+
] = False,
|
|
151
|
+
):
|
|
152
|
+
"""List records in MarkBack files."""
|
|
153
|
+
all_records = []
|
|
154
|
+
|
|
155
|
+
for path in paths:
|
|
156
|
+
if path.is_dir():
|
|
157
|
+
for mb_file in path.glob("**/*.mb"):
|
|
158
|
+
result = parse_file(mb_file)
|
|
159
|
+
for record in result.records:
|
|
160
|
+
all_records.append((mb_file, record))
|
|
161
|
+
else:
|
|
162
|
+
result = parse_file(path)
|
|
163
|
+
for record in result.records:
|
|
164
|
+
all_records.append((path, record))
|
|
165
|
+
|
|
166
|
+
if output_json:
|
|
167
|
+
output = []
|
|
168
|
+
for file_path, record in all_records:
|
|
169
|
+
output.append({
|
|
170
|
+
"file": str(file_path),
|
|
171
|
+
"uri": record.uri,
|
|
172
|
+
"source": str(record.source) if record.source else None,
|
|
173
|
+
"feedback": record.feedback,
|
|
174
|
+
"has_content": record.has_inline_content(),
|
|
175
|
+
})
|
|
176
|
+
console.print(json.dumps(output, indent=2))
|
|
177
|
+
else:
|
|
178
|
+
table = Table(show_header=True)
|
|
179
|
+
table.add_column("URI", style="cyan")
|
|
180
|
+
table.add_column("Source", style="green")
|
|
181
|
+
table.add_column("Feedback", style="white")
|
|
182
|
+
|
|
183
|
+
for file_path, record in all_records:
|
|
184
|
+
uri = record.uri or "-"
|
|
185
|
+
source = str(record.source) if record.source else "-"
|
|
186
|
+
# Truncate feedback for display
|
|
187
|
+
feedback = record.feedback
|
|
188
|
+
if len(feedback) > 50:
|
|
189
|
+
feedback = feedback[:47] + "..."
|
|
190
|
+
|
|
191
|
+
table.add_row(uri, source, feedback)
|
|
192
|
+
|
|
193
|
+
console.print(table)
|
|
194
|
+
console.print(f"\nTotal: {len(all_records)} records")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@app.command()
|
|
198
|
+
def convert(
|
|
199
|
+
input_path: Annotated[
|
|
200
|
+
Path,
|
|
201
|
+
typer.Argument(help="Input MarkBack file or directory"),
|
|
202
|
+
],
|
|
203
|
+
output_path: Annotated[
|
|
204
|
+
Path,
|
|
205
|
+
typer.Argument(help="Output file or directory"),
|
|
206
|
+
],
|
|
207
|
+
to: Annotated[
|
|
208
|
+
str,
|
|
209
|
+
typer.Option("--to", "-t", help="Output format: single, multi, compact, paired"),
|
|
210
|
+
] = "multi",
|
|
211
|
+
):
|
|
212
|
+
"""Convert between MarkBack storage modes."""
|
|
213
|
+
# Parse input
|
|
214
|
+
if input_path.is_dir():
|
|
215
|
+
from .parser import parse_directory
|
|
216
|
+
result = parse_directory(input_path)
|
|
217
|
+
else:
|
|
218
|
+
result = parse_file(input_path)
|
|
219
|
+
|
|
220
|
+
if result.has_errors:
|
|
221
|
+
err_console.print("[red]Cannot convert file with errors[/red]")
|
|
222
|
+
for d in result.diagnostics:
|
|
223
|
+
if d.severity == Severity.ERROR:
|
|
224
|
+
err_console.print(f"[red]{d}[/red]")
|
|
225
|
+
raise typer.Exit(1)
|
|
226
|
+
|
|
227
|
+
records = result.records
|
|
228
|
+
|
|
229
|
+
# Convert to output format
|
|
230
|
+
mode_map = {
|
|
231
|
+
"single": OutputMode.SINGLE,
|
|
232
|
+
"multi": OutputMode.MULTI,
|
|
233
|
+
"compact": OutputMode.COMPACT,
|
|
234
|
+
"paired": OutputMode.PAIRED,
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if to not in mode_map:
|
|
238
|
+
err_console.print(f"[red]Unknown format: {to}. Use: single, multi, compact, paired[/red]")
|
|
239
|
+
raise typer.Exit(1)
|
|
240
|
+
|
|
241
|
+
mode = mode_map[to]
|
|
242
|
+
|
|
243
|
+
if mode == OutputMode.PAIRED:
|
|
244
|
+
# Paired mode: create output directory with label files
|
|
245
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
246
|
+
from .writer import write_paired_files
|
|
247
|
+
for i, record in enumerate(records):
|
|
248
|
+
# Determine filename from URI or source or index
|
|
249
|
+
if record.source:
|
|
250
|
+
basename = Path(str(record.source)).stem
|
|
251
|
+
elif record.uri:
|
|
252
|
+
basename = record.uri.split("/")[-1].split(":")[-1]
|
|
253
|
+
else:
|
|
254
|
+
basename = f"record_{i:04d}"
|
|
255
|
+
|
|
256
|
+
label_path = output_path / f"{basename}.label.txt"
|
|
257
|
+
write_paired_files(label_path, None, record)
|
|
258
|
+
|
|
259
|
+
console.print(f"[green]Wrote {len(records)} label files to {output_path}[/green]")
|
|
260
|
+
|
|
261
|
+
elif mode == OutputMode.SINGLE:
|
|
262
|
+
if len(records) != 1:
|
|
263
|
+
err_console.print(f"[red]Single mode requires exactly 1 record, got {len(records)}[/red]")
|
|
264
|
+
raise typer.Exit(1)
|
|
265
|
+
write_file(output_path, records, mode=mode)
|
|
266
|
+
console.print(f"[green]Wrote {output_path}[/green]")
|
|
267
|
+
|
|
268
|
+
else:
|
|
269
|
+
write_file(output_path, records, mode=mode)
|
|
270
|
+
console.print(f"[green]Wrote {len(records)} records to {output_path}[/green]")
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
# Workflow subcommand group
|
|
274
|
+
workflow_app = typer.Typer(
|
|
275
|
+
name="workflow",
|
|
276
|
+
help="Editor/Operator LLM workflow commands",
|
|
277
|
+
)
|
|
278
|
+
app.add_typer(workflow_app, name="workflow")
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
@workflow_app.command("run")
|
|
282
|
+
def workflow_run(
|
|
283
|
+
dataset: Annotated[
|
|
284
|
+
Path,
|
|
285
|
+
typer.Argument(help="Path to MarkBack dataset file or directory"),
|
|
286
|
+
],
|
|
287
|
+
prompt: Annotated[
|
|
288
|
+
str,
|
|
289
|
+
typer.Option("--prompt", "-p", help="Initial prompt (or path to prompt file)"),
|
|
290
|
+
] = "",
|
|
291
|
+
output: Annotated[
|
|
292
|
+
Path,
|
|
293
|
+
typer.Option("--output", "-o", help="Output file for results"),
|
|
294
|
+
] = Path("workflow_result.json"),
|
|
295
|
+
env_file: Annotated[
|
|
296
|
+
Optional[Path],
|
|
297
|
+
typer.Option("--env", "-e", help="Path to .env file"),
|
|
298
|
+
] = None,
|
|
299
|
+
):
|
|
300
|
+
"""Run the editor/operator workflow on a dataset."""
|
|
301
|
+
from .workflow import run_workflow, save_workflow_result
|
|
302
|
+
|
|
303
|
+
# Load config
|
|
304
|
+
config = load_config(env_file)
|
|
305
|
+
|
|
306
|
+
# Validate config
|
|
307
|
+
issues = validate_config(config)
|
|
308
|
+
if issues:
|
|
309
|
+
for issue in issues:
|
|
310
|
+
err_console.print(f"[yellow]Config warning: {issue}[/yellow]")
|
|
311
|
+
|
|
312
|
+
if config.editor is None or config.operator is None:
|
|
313
|
+
err_console.print("[red]Editor and Operator LLMs must be configured in .env[/red]")
|
|
314
|
+
raise typer.Exit(1)
|
|
315
|
+
|
|
316
|
+
# Load initial prompt
|
|
317
|
+
initial_prompt = prompt
|
|
318
|
+
if prompt and Path(prompt).exists():
|
|
319
|
+
initial_prompt = Path(prompt).read_text(encoding="utf-8")
|
|
320
|
+
|
|
321
|
+
# Load dataset
|
|
322
|
+
if dataset.is_dir():
|
|
323
|
+
from .parser import parse_directory
|
|
324
|
+
result = parse_directory(dataset)
|
|
325
|
+
else:
|
|
326
|
+
result = parse_file(dataset)
|
|
327
|
+
|
|
328
|
+
if result.has_errors:
|
|
329
|
+
err_console.print("[red]Dataset has errors:[/red]")
|
|
330
|
+
for d in result.diagnostics:
|
|
331
|
+
if d.severity == Severity.ERROR:
|
|
332
|
+
err_console.print(f"[red]{d}[/red]")
|
|
333
|
+
raise typer.Exit(1)
|
|
334
|
+
|
|
335
|
+
if not result.records:
|
|
336
|
+
err_console.print("[red]No records found in dataset[/red]")
|
|
337
|
+
raise typer.Exit(1)
|
|
338
|
+
|
|
339
|
+
console.print(f"Loaded {len(result.records)} records from {dataset}")
|
|
340
|
+
console.print("Running workflow...")
|
|
341
|
+
|
|
342
|
+
try:
|
|
343
|
+
workflow_result = run_workflow(config, initial_prompt, result.records)
|
|
344
|
+
|
|
345
|
+
# Save result
|
|
346
|
+
output_file = save_workflow_result(workflow_result, output, config)
|
|
347
|
+
console.print(f"[green]Results saved to {output_file}[/green]")
|
|
348
|
+
|
|
349
|
+
# Print summary
|
|
350
|
+
console.print("\n[bold]Workflow Results:[/bold]")
|
|
351
|
+
console.print(f"Refined prompt length: {len(workflow_result.refined_prompt)} chars")
|
|
352
|
+
console.print(f"Outputs generated: {len(workflow_result.outputs)}")
|
|
353
|
+
|
|
354
|
+
eval_result = workflow_result.evaluation
|
|
355
|
+
console.print(f"\n[bold]Evaluation:[/bold]")
|
|
356
|
+
console.print(f"Total: {eval_result['total']}")
|
|
357
|
+
console.print(f"Correct: {eval_result['correct']}")
|
|
358
|
+
console.print(f"Incorrect: {eval_result['incorrect']}")
|
|
359
|
+
console.print(f"Accuracy: {eval_result['accuracy']:.1%}")
|
|
360
|
+
|
|
361
|
+
except Exception as e:
|
|
362
|
+
err_console.print(f"[red]Workflow error: {e}[/red]")
|
|
363
|
+
raise typer.Exit(1)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
@workflow_app.command("evaluate")
|
|
367
|
+
def workflow_evaluate(
|
|
368
|
+
results_file: Annotated[
|
|
369
|
+
Path,
|
|
370
|
+
typer.Argument(help="Path to workflow results JSON"),
|
|
371
|
+
],
|
|
372
|
+
output_json: Annotated[
|
|
373
|
+
bool,
|
|
374
|
+
typer.Option("--json", "-j", help="Output as JSON"),
|
|
375
|
+
] = False,
|
|
376
|
+
):
|
|
377
|
+
"""Show evaluation details from a workflow run."""
|
|
378
|
+
if not results_file.exists():
|
|
379
|
+
err_console.print(f"[red]File not found: {results_file}[/red]")
|
|
380
|
+
raise typer.Exit(1)
|
|
381
|
+
|
|
382
|
+
data = json.loads(results_file.read_text(encoding="utf-8"))
|
|
383
|
+
evaluation = data.get("evaluation", {})
|
|
384
|
+
|
|
385
|
+
if output_json:
|
|
386
|
+
console.print(json.dumps(evaluation, indent=2))
|
|
387
|
+
else:
|
|
388
|
+
console.print("[bold]Evaluation Summary:[/bold]")
|
|
389
|
+
console.print(f"Total: {evaluation.get('total', 0)}")
|
|
390
|
+
console.print(f"Correct: {evaluation.get('correct', 0)}")
|
|
391
|
+
console.print(f"Incorrect: {evaluation.get('incorrect', 0)}")
|
|
392
|
+
console.print(f"Accuracy: {evaluation.get('accuracy', 0):.1%}")
|
|
393
|
+
|
|
394
|
+
details = evaluation.get("details", [])
|
|
395
|
+
if details:
|
|
396
|
+
console.print("\n[bold]Details:[/bold]")
|
|
397
|
+
for d in details:
|
|
398
|
+
status = "[green]PASS[/green]" if d.get("match") else "[red]FAIL[/red]"
|
|
399
|
+
uri = d.get("uri") or f"record {d.get('record_idx')}"
|
|
400
|
+
console.print(f" {status} {uri}: expected={d.get('expected_label')}")
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
@workflow_app.command("prompt")
|
|
404
|
+
def workflow_prompt(
|
|
405
|
+
results_file: Annotated[
|
|
406
|
+
Path,
|
|
407
|
+
typer.Argument(help="Path to workflow results JSON"),
|
|
408
|
+
],
|
|
409
|
+
output: Annotated[
|
|
410
|
+
Optional[Path],
|
|
411
|
+
typer.Option("--output", "-o", help="Save prompt to file"),
|
|
412
|
+
] = None,
|
|
413
|
+
):
|
|
414
|
+
"""Extract the refined prompt from a workflow run."""
|
|
415
|
+
if not results_file.exists():
|
|
416
|
+
err_console.print(f"[red]File not found: {results_file}[/red]")
|
|
417
|
+
raise typer.Exit(1)
|
|
418
|
+
|
|
419
|
+
data = json.loads(results_file.read_text(encoding="utf-8"))
|
|
420
|
+
prompt = data.get("refined_prompt", "")
|
|
421
|
+
|
|
422
|
+
if output:
|
|
423
|
+
output.write_text(prompt, encoding="utf-8")
|
|
424
|
+
console.print(f"[green]Saved prompt to {output}[/green]")
|
|
425
|
+
else:
|
|
426
|
+
console.print(prompt)
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def main():
|
|
430
|
+
"""Entry point for the CLI."""
|
|
431
|
+
app()
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
if __name__ == "__main__":
|
|
435
|
+
main()
|
markback/config.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""Configuration management for MarkBack."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from dotenv import load_dotenv
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Default .env template content
|
|
12
|
+
ENV_TEMPLATE = '''# MarkBack Configuration
|
|
13
|
+
# =====================
|
|
14
|
+
|
|
15
|
+
# File handling mode: "git" (in-place) or "versioned" (never overwrite)
|
|
16
|
+
FILE_MODE=git
|
|
17
|
+
|
|
18
|
+
# Label file discovery suffixes (comma-separated)
|
|
19
|
+
LABEL_SUFFIXES=.label.txt,.feedback.txt,.mb
|
|
20
|
+
|
|
21
|
+
# Editor LLM Configuration
|
|
22
|
+
# ------------------------
|
|
23
|
+
# The Editor LLM refines prompts based on examples and feedback
|
|
24
|
+
EDITOR_API_BASE=https://api.openai.com/v1
|
|
25
|
+
EDITOR_API_KEY=your-api-key-here
|
|
26
|
+
EDITOR_MODEL=gpt-4
|
|
27
|
+
EDITOR_MAX_TOKENS=2048
|
|
28
|
+
EDITOR_TEMPERATURE=0.7
|
|
29
|
+
EDITOR_TIMEOUT=60
|
|
30
|
+
|
|
31
|
+
# Operator LLM Configuration
|
|
32
|
+
# --------------------------
|
|
33
|
+
# The Operator LLM runs the refined prompt against examples
|
|
34
|
+
OPERATOR_API_BASE=https://api.openai.com/v1
|
|
35
|
+
OPERATOR_API_KEY=your-api-key-here
|
|
36
|
+
OPERATOR_MODEL=gpt-4
|
|
37
|
+
OPERATOR_MAX_TOKENS=1024
|
|
38
|
+
OPERATOR_TEMPERATURE=0.3
|
|
39
|
+
OPERATOR_TIMEOUT=60
|
|
40
|
+
|
|
41
|
+
# Evaluation Configuration
|
|
42
|
+
# ------------------------
|
|
43
|
+
# Labels that indicate positive/passing examples (comma-separated)
|
|
44
|
+
POSITIVE_LABELS=good,positive,pass,approved,excellent,correct
|
|
45
|
+
# Labels that indicate negative/failing examples (comma-separated)
|
|
46
|
+
NEGATIVE_LABELS=bad,negative,fail,rejected,needs work,incorrect
|
|
47
|
+
'''
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class LLMConfig:
|
|
52
|
+
"""Configuration for an LLM endpoint."""
|
|
53
|
+
api_base: str
|
|
54
|
+
api_key: str
|
|
55
|
+
model: str
|
|
56
|
+
max_tokens: int = 1024
|
|
57
|
+
temperature: float = 0.7
|
|
58
|
+
timeout: int = 60
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class Config:
|
|
63
|
+
"""MarkBack configuration."""
|
|
64
|
+
file_mode: str = "git" # "git" or "versioned"
|
|
65
|
+
label_suffixes: list[str] = field(default_factory=lambda: [".label.txt", ".feedback.txt", ".mb"])
|
|
66
|
+
|
|
67
|
+
# LLM configs
|
|
68
|
+
editor: Optional[LLMConfig] = None
|
|
69
|
+
operator: Optional[LLMConfig] = None
|
|
70
|
+
|
|
71
|
+
# Evaluation
|
|
72
|
+
positive_labels: list[str] = field(default_factory=lambda: ["good", "positive", "pass", "approved", "excellent", "correct"])
|
|
73
|
+
negative_labels: list[str] = field(default_factory=lambda: ["bad", "negative", "fail", "rejected", "needs work", "incorrect"])
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def load_config(env_path: Optional[Path] = None) -> Config:
|
|
77
|
+
"""Load configuration from .env file.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
env_path: Path to .env file. If None, searches current directory and parents.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Loaded configuration
|
|
84
|
+
"""
|
|
85
|
+
# Load .env file
|
|
86
|
+
if env_path:
|
|
87
|
+
load_dotenv(env_path)
|
|
88
|
+
else:
|
|
89
|
+
load_dotenv()
|
|
90
|
+
|
|
91
|
+
# Parse configuration
|
|
92
|
+
config = Config()
|
|
93
|
+
|
|
94
|
+
# File mode
|
|
95
|
+
file_mode = os.getenv("FILE_MODE", "git")
|
|
96
|
+
if file_mode in ("git", "versioned"):
|
|
97
|
+
config.file_mode = file_mode
|
|
98
|
+
|
|
99
|
+
# Label suffixes
|
|
100
|
+
suffixes = os.getenv("LABEL_SUFFIXES")
|
|
101
|
+
if suffixes:
|
|
102
|
+
config.label_suffixes = [s.strip() for s in suffixes.split(",") if s.strip()]
|
|
103
|
+
|
|
104
|
+
# Editor LLM
|
|
105
|
+
editor_base = os.getenv("EDITOR_API_BASE")
|
|
106
|
+
editor_key = os.getenv("EDITOR_API_KEY")
|
|
107
|
+
editor_model = os.getenv("EDITOR_MODEL")
|
|
108
|
+
|
|
109
|
+
if editor_base and editor_key and editor_model:
|
|
110
|
+
config.editor = LLMConfig(
|
|
111
|
+
api_base=editor_base,
|
|
112
|
+
api_key=editor_key,
|
|
113
|
+
model=editor_model,
|
|
114
|
+
max_tokens=int(os.getenv("EDITOR_MAX_TOKENS", "2048")),
|
|
115
|
+
temperature=float(os.getenv("EDITOR_TEMPERATURE", "0.7")),
|
|
116
|
+
timeout=int(os.getenv("EDITOR_TIMEOUT", "60")),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Operator LLM
|
|
120
|
+
operator_base = os.getenv("OPERATOR_API_BASE")
|
|
121
|
+
operator_key = os.getenv("OPERATOR_API_KEY")
|
|
122
|
+
operator_model = os.getenv("OPERATOR_MODEL")
|
|
123
|
+
|
|
124
|
+
if operator_base and operator_key and operator_model:
|
|
125
|
+
config.operator = LLMConfig(
|
|
126
|
+
api_base=operator_base,
|
|
127
|
+
api_key=operator_key,
|
|
128
|
+
model=operator_model,
|
|
129
|
+
max_tokens=int(os.getenv("OPERATOR_MAX_TOKENS", "1024")),
|
|
130
|
+
temperature=float(os.getenv("OPERATOR_TEMPERATURE", "0.3")),
|
|
131
|
+
timeout=int(os.getenv("OPERATOR_TIMEOUT", "60")),
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Evaluation labels
|
|
135
|
+
positive = os.getenv("POSITIVE_LABELS")
|
|
136
|
+
if positive:
|
|
137
|
+
config.positive_labels = [l.strip().lower() for l in positive.split(",") if l.strip()]
|
|
138
|
+
|
|
139
|
+
negative = os.getenv("NEGATIVE_LABELS")
|
|
140
|
+
if negative:
|
|
141
|
+
config.negative_labels = [l.strip().lower() for l in negative.split(",") if l.strip()]
|
|
142
|
+
|
|
143
|
+
return config
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def init_env(path: Path, force: bool = False) -> bool:
|
|
147
|
+
"""Initialize a .env file with template.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
path: Path to create .env file
|
|
151
|
+
force: If True, overwrite existing file
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
True if file was created, False if already exists
|
|
155
|
+
"""
|
|
156
|
+
if path.exists() and not force:
|
|
157
|
+
return False
|
|
158
|
+
|
|
159
|
+
path.write_text(ENV_TEMPLATE)
|
|
160
|
+
return True
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def validate_config(config: Config) -> list[str]:
|
|
164
|
+
"""Validate configuration and return list of issues."""
|
|
165
|
+
issues: list[str] = []
|
|
166
|
+
|
|
167
|
+
if config.file_mode not in ("git", "versioned"):
|
|
168
|
+
issues.append(f"Invalid FILE_MODE: {config.file_mode} (must be 'git' or 'versioned')")
|
|
169
|
+
|
|
170
|
+
if not config.label_suffixes:
|
|
171
|
+
issues.append("LABEL_SUFFIXES is empty")
|
|
172
|
+
|
|
173
|
+
if config.editor:
|
|
174
|
+
if not config.editor.api_key or config.editor.api_key == "your-api-key-here":
|
|
175
|
+
issues.append("EDITOR_API_KEY not set")
|
|
176
|
+
|
|
177
|
+
if config.operator:
|
|
178
|
+
if not config.operator.api_key or config.operator.api_key == "your-api-key-here":
|
|
179
|
+
issues.append("OPERATOR_API_KEY not set")
|
|
180
|
+
|
|
181
|
+
return issues
|