markback 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markback/__init__.py +86 -0
- markback/cli.py +435 -0
- markback/config.py +181 -0
- markback/linter.py +312 -0
- markback/llm.py +175 -0
- markback/parser.py +587 -0
- markback/types.py +270 -0
- markback/workflow.py +351 -0
- markback/writer.py +249 -0
- markback-0.1.0.dist-info/METADATA +251 -0
- markback-0.1.0.dist-info/RECORD +14 -0
- markback-0.1.0.dist-info/WHEEL +4 -0
- markback-0.1.0.dist-info/entry_points.txt +2 -0
- markback-0.1.0.dist-info/licenses/LICENSE +21 -0
markback/linter.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"""MarkBack linter implementation."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from .parser import parse_file, parse_string
|
|
8
|
+
from .types import (
|
|
9
|
+
Diagnostic,
|
|
10
|
+
ErrorCode,
|
|
11
|
+
ParseResult,
|
|
12
|
+
Record,
|
|
13
|
+
Severity,
|
|
14
|
+
WarningCode,
|
|
15
|
+
parse_feedback,
|
|
16
|
+
)
|
|
17
|
+
from .writer import write_record_canonical, write_records_multi
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def lint_feedback_json(
|
|
21
|
+
feedback: str,
|
|
22
|
+
file: Optional[Path],
|
|
23
|
+
line: Optional[int],
|
|
24
|
+
record_idx: Optional[int],
|
|
25
|
+
) -> list[Diagnostic]:
|
|
26
|
+
"""Lint JSON-formatted feedback."""
|
|
27
|
+
diagnostics: list[Diagnostic] = []
|
|
28
|
+
|
|
29
|
+
if feedback.startswith("json:"):
|
|
30
|
+
json_str = feedback[5:]
|
|
31
|
+
try:
|
|
32
|
+
json.loads(json_str)
|
|
33
|
+
except json.JSONDecodeError as e:
|
|
34
|
+
diagnostics.append(Diagnostic(
|
|
35
|
+
file=file,
|
|
36
|
+
line=line,
|
|
37
|
+
column=None,
|
|
38
|
+
severity=Severity.ERROR,
|
|
39
|
+
code=ErrorCode.E007,
|
|
40
|
+
message=f"Invalid JSON after json: prefix: {e}",
|
|
41
|
+
record_index=record_idx,
|
|
42
|
+
))
|
|
43
|
+
|
|
44
|
+
return diagnostics
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def lint_feedback_structured(
|
|
48
|
+
feedback: str,
|
|
49
|
+
file: Optional[Path],
|
|
50
|
+
line: Optional[int],
|
|
51
|
+
record_idx: Optional[int],
|
|
52
|
+
) -> list[Diagnostic]:
|
|
53
|
+
"""Lint structured feedback for unclosed quotes."""
|
|
54
|
+
diagnostics: list[Diagnostic] = []
|
|
55
|
+
|
|
56
|
+
# Check for unclosed quotes
|
|
57
|
+
in_quote = False
|
|
58
|
+
escaped = False
|
|
59
|
+
|
|
60
|
+
for i, char in enumerate(feedback):
|
|
61
|
+
if escaped:
|
|
62
|
+
escaped = False
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
if char == '\\':
|
|
66
|
+
escaped = True
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
if char == '"':
|
|
70
|
+
in_quote = not in_quote
|
|
71
|
+
|
|
72
|
+
if in_quote:
|
|
73
|
+
diagnostics.append(Diagnostic(
|
|
74
|
+
file=file,
|
|
75
|
+
line=line,
|
|
76
|
+
column=None,
|
|
77
|
+
severity=Severity.ERROR,
|
|
78
|
+
code=ErrorCode.E008,
|
|
79
|
+
message="Unclosed quote in structured attribute value",
|
|
80
|
+
record_index=record_idx,
|
|
81
|
+
))
|
|
82
|
+
|
|
83
|
+
return diagnostics
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def lint_source_exists(
|
|
87
|
+
record: Record,
|
|
88
|
+
base_path: Optional[Path],
|
|
89
|
+
record_idx: int,
|
|
90
|
+
) -> list[Diagnostic]:
|
|
91
|
+
"""Check if @source file exists."""
|
|
92
|
+
diagnostics: list[Diagnostic] = []
|
|
93
|
+
|
|
94
|
+
if record.source and not record.source.is_uri:
|
|
95
|
+
try:
|
|
96
|
+
resolved = record.source.resolve(base_path)
|
|
97
|
+
if not resolved.exists():
|
|
98
|
+
diagnostics.append(Diagnostic(
|
|
99
|
+
file=record._source_file,
|
|
100
|
+
line=record._start_line,
|
|
101
|
+
column=None,
|
|
102
|
+
severity=Severity.WARNING,
|
|
103
|
+
code=WarningCode.W003,
|
|
104
|
+
message=f"@source file not found: {record.source}",
|
|
105
|
+
record_index=record_idx,
|
|
106
|
+
))
|
|
107
|
+
except ValueError:
|
|
108
|
+
pass # URI that can't be resolved to path
|
|
109
|
+
|
|
110
|
+
return diagnostics
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def lint_canonical_format(
|
|
114
|
+
records: list[Record],
|
|
115
|
+
original_text: str,
|
|
116
|
+
file: Optional[Path],
|
|
117
|
+
) -> list[Diagnostic]:
|
|
118
|
+
"""Check if file is in canonical format."""
|
|
119
|
+
diagnostics: list[Diagnostic] = []
|
|
120
|
+
|
|
121
|
+
# Generate canonical version
|
|
122
|
+
if len(records) == 1:
|
|
123
|
+
canonical = write_record_canonical(records[0]) + "\n"
|
|
124
|
+
else:
|
|
125
|
+
canonical = write_records_multi(records)
|
|
126
|
+
|
|
127
|
+
# Normalize line endings for comparison
|
|
128
|
+
original_normalized = original_text.replace('\r\n', '\n')
|
|
129
|
+
|
|
130
|
+
if original_normalized != canonical:
|
|
131
|
+
diagnostics.append(Diagnostic(
|
|
132
|
+
file=file,
|
|
133
|
+
line=1,
|
|
134
|
+
column=None,
|
|
135
|
+
severity=Severity.WARNING,
|
|
136
|
+
code=WarningCode.W008,
|
|
137
|
+
message="Non-canonical formatting detected",
|
|
138
|
+
))
|
|
139
|
+
|
|
140
|
+
return diagnostics
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def lint_string(
|
|
144
|
+
text: str,
|
|
145
|
+
source_file: Optional[Path] = None,
|
|
146
|
+
check_sources: bool = True,
|
|
147
|
+
check_canonical: bool = True,
|
|
148
|
+
) -> ParseResult:
|
|
149
|
+
"""Lint a MarkBack string.
|
|
150
|
+
|
|
151
|
+
This runs the parser (which generates many diagnostics) and then
|
|
152
|
+
performs additional linting checks.
|
|
153
|
+
"""
|
|
154
|
+
# Parse first - this catches structural issues
|
|
155
|
+
result = parse_string(text, source_file=source_file)
|
|
156
|
+
|
|
157
|
+
# Additional linting for each record
|
|
158
|
+
for idx, record in enumerate(result.records):
|
|
159
|
+
# Lint JSON feedback
|
|
160
|
+
result.diagnostics.extend(lint_feedback_json(
|
|
161
|
+
record.feedback,
|
|
162
|
+
source_file,
|
|
163
|
+
record._end_line, # Feedback is at end
|
|
164
|
+
idx,
|
|
165
|
+
))
|
|
166
|
+
|
|
167
|
+
# Lint structured feedback for unclosed quotes
|
|
168
|
+
if not record.feedback.startswith("json:"):
|
|
169
|
+
result.diagnostics.extend(lint_feedback_structured(
|
|
170
|
+
record.feedback,
|
|
171
|
+
source_file,
|
|
172
|
+
record._end_line,
|
|
173
|
+
idx,
|
|
174
|
+
))
|
|
175
|
+
|
|
176
|
+
# Check source file existence
|
|
177
|
+
if check_sources:
|
|
178
|
+
base_path = source_file.parent if source_file else None
|
|
179
|
+
result.diagnostics.extend(lint_source_exists(record, base_path, idx))
|
|
180
|
+
|
|
181
|
+
# Check canonical format
|
|
182
|
+
if check_canonical and result.records and not result.has_errors:
|
|
183
|
+
result.diagnostics.extend(lint_canonical_format(
|
|
184
|
+
result.records,
|
|
185
|
+
text,
|
|
186
|
+
source_file,
|
|
187
|
+
))
|
|
188
|
+
|
|
189
|
+
return result
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def lint_file(
|
|
193
|
+
path: Path,
|
|
194
|
+
check_sources: bool = True,
|
|
195
|
+
check_canonical: bool = True,
|
|
196
|
+
) -> ParseResult:
|
|
197
|
+
"""Lint a MarkBack file."""
|
|
198
|
+
try:
|
|
199
|
+
text = path.read_text(encoding="utf-8")
|
|
200
|
+
except UnicodeDecodeError:
|
|
201
|
+
return ParseResult(
|
|
202
|
+
records=[],
|
|
203
|
+
diagnostics=[
|
|
204
|
+
Diagnostic(
|
|
205
|
+
file=path,
|
|
206
|
+
line=None,
|
|
207
|
+
column=None,
|
|
208
|
+
severity=Severity.ERROR,
|
|
209
|
+
code=ErrorCode.E006,
|
|
210
|
+
message="File is not valid UTF-8",
|
|
211
|
+
)
|
|
212
|
+
],
|
|
213
|
+
source_file=path,
|
|
214
|
+
)
|
|
215
|
+
except FileNotFoundError:
|
|
216
|
+
return ParseResult(
|
|
217
|
+
records=[],
|
|
218
|
+
diagnostics=[
|
|
219
|
+
Diagnostic(
|
|
220
|
+
file=path,
|
|
221
|
+
line=None,
|
|
222
|
+
column=None,
|
|
223
|
+
severity=Severity.ERROR,
|
|
224
|
+
code=ErrorCode.E006,
|
|
225
|
+
message="File not found",
|
|
226
|
+
)
|
|
227
|
+
],
|
|
228
|
+
source_file=path,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
return lint_string(
|
|
232
|
+
text,
|
|
233
|
+
source_file=path,
|
|
234
|
+
check_sources=check_sources,
|
|
235
|
+
check_canonical=check_canonical,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def lint_files(
|
|
240
|
+
paths: list[Path],
|
|
241
|
+
check_sources: bool = True,
|
|
242
|
+
check_canonical: bool = True,
|
|
243
|
+
) -> list[ParseResult]:
|
|
244
|
+
"""Lint multiple MarkBack files."""
|
|
245
|
+
results: list[ParseResult] = []
|
|
246
|
+
|
|
247
|
+
for path in paths:
|
|
248
|
+
if path.is_dir():
|
|
249
|
+
# Lint all .mb files in directory
|
|
250
|
+
for mb_file in path.glob("**/*.mb"):
|
|
251
|
+
results.append(lint_file(
|
|
252
|
+
mb_file,
|
|
253
|
+
check_sources=check_sources,
|
|
254
|
+
check_canonical=check_canonical,
|
|
255
|
+
))
|
|
256
|
+
# Also lint .label.txt and .feedback.txt files
|
|
257
|
+
for pattern in ["**/*.label.txt", "**/*.feedback.txt"]:
|
|
258
|
+
for label_file in path.glob(pattern):
|
|
259
|
+
results.append(lint_file(
|
|
260
|
+
label_file,
|
|
261
|
+
check_sources=check_sources,
|
|
262
|
+
check_canonical=check_canonical,
|
|
263
|
+
))
|
|
264
|
+
else:
|
|
265
|
+
results.append(lint_file(
|
|
266
|
+
path,
|
|
267
|
+
check_sources=check_sources,
|
|
268
|
+
check_canonical=check_canonical,
|
|
269
|
+
))
|
|
270
|
+
|
|
271
|
+
return results
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def format_diagnostics(
|
|
275
|
+
diagnostics: list[Diagnostic],
|
|
276
|
+
format: str = "human",
|
|
277
|
+
) -> str:
|
|
278
|
+
"""Format diagnostics for output.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
diagnostics: List of diagnostics to format
|
|
282
|
+
format: Output format ("human" or "json")
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
Formatted string
|
|
286
|
+
"""
|
|
287
|
+
if format == "json":
|
|
288
|
+
return json.dumps([d.to_dict() for d in diagnostics], indent=2)
|
|
289
|
+
|
|
290
|
+
lines: list[str] = []
|
|
291
|
+
for d in diagnostics:
|
|
292
|
+
lines.append(str(d))
|
|
293
|
+
|
|
294
|
+
return '\n'.join(lines)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def summarize_results(results: list[ParseResult]) -> dict:
|
|
298
|
+
"""Summarize lint results."""
|
|
299
|
+
total_records = sum(len(r.records) for r in results)
|
|
300
|
+
total_errors = sum(r.error_count for r in results)
|
|
301
|
+
total_warnings = sum(r.warning_count for r in results)
|
|
302
|
+
files_with_errors = sum(1 for r in results if r.has_errors)
|
|
303
|
+
files_with_warnings = sum(1 for r in results if r.has_warnings)
|
|
304
|
+
|
|
305
|
+
return {
|
|
306
|
+
"files": len(results),
|
|
307
|
+
"records": total_records,
|
|
308
|
+
"errors": total_errors,
|
|
309
|
+
"warnings": total_warnings,
|
|
310
|
+
"files_with_errors": files_with_errors,
|
|
311
|
+
"files_with_warnings": files_with_warnings,
|
|
312
|
+
}
|
markback/llm.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""LLM client abstraction for MarkBack."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Optional, Protocol
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
from .config import LLMConfig
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class LLMResponse:
|
|
14
|
+
"""Response from an LLM call."""
|
|
15
|
+
content: str
|
|
16
|
+
model: str
|
|
17
|
+
usage: Optional[dict] = None
|
|
18
|
+
raw_response: Optional[dict] = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class LLMClient(ABC):
|
|
22
|
+
"""Abstract base class for LLM clients."""
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def complete(
|
|
26
|
+
self,
|
|
27
|
+
prompt: str,
|
|
28
|
+
system: Optional[str] = None,
|
|
29
|
+
max_tokens: Optional[int] = None,
|
|
30
|
+
temperature: Optional[float] = None,
|
|
31
|
+
) -> LLMResponse:
|
|
32
|
+
"""Send a completion request.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
prompt: The user prompt
|
|
36
|
+
system: Optional system prompt
|
|
37
|
+
max_tokens: Override default max tokens
|
|
38
|
+
temperature: Override default temperature
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
LLM response
|
|
42
|
+
"""
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class OpenAICompatibleClient(LLMClient):
|
|
47
|
+
"""Client for OpenAI-compatible APIs."""
|
|
48
|
+
|
|
49
|
+
def __init__(self, config: LLMConfig):
|
|
50
|
+
self.config = config
|
|
51
|
+
self.client = httpx.Client(timeout=config.timeout)
|
|
52
|
+
|
|
53
|
+
def complete(
|
|
54
|
+
self,
|
|
55
|
+
prompt: str,
|
|
56
|
+
system: Optional[str] = None,
|
|
57
|
+
max_tokens: Optional[int] = None,
|
|
58
|
+
temperature: Optional[float] = None,
|
|
59
|
+
) -> LLMResponse:
|
|
60
|
+
"""Send a completion request to OpenAI-compatible API."""
|
|
61
|
+
messages = []
|
|
62
|
+
|
|
63
|
+
if system:
|
|
64
|
+
messages.append({"role": "system", "content": system})
|
|
65
|
+
|
|
66
|
+
messages.append({"role": "user", "content": prompt})
|
|
67
|
+
|
|
68
|
+
payload = {
|
|
69
|
+
"model": self.config.model,
|
|
70
|
+
"messages": messages,
|
|
71
|
+
"max_tokens": max_tokens or self.config.max_tokens,
|
|
72
|
+
"temperature": temperature if temperature is not None else self.config.temperature,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
headers = {
|
|
76
|
+
"Authorization": f"Bearer {self.config.api_key}",
|
|
77
|
+
"Content-Type": "application/json",
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
url = f"{self.config.api_base.rstrip('/')}/chat/completions"
|
|
81
|
+
|
|
82
|
+
response = self.client.post(url, json=payload, headers=headers)
|
|
83
|
+
response.raise_for_status()
|
|
84
|
+
|
|
85
|
+
data = response.json()
|
|
86
|
+
|
|
87
|
+
return LLMResponse(
|
|
88
|
+
content=data["choices"][0]["message"]["content"],
|
|
89
|
+
model=data.get("model", self.config.model),
|
|
90
|
+
usage=data.get("usage"),
|
|
91
|
+
raw_response=data,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def close(self):
|
|
95
|
+
"""Close the HTTP client."""
|
|
96
|
+
self.client.close()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class MockLLMClient(LLMClient):
|
|
100
|
+
"""Mock LLM client for testing."""
|
|
101
|
+
|
|
102
|
+
def __init__(
|
|
103
|
+
self,
|
|
104
|
+
responses: Optional[list[str]] = None,
|
|
105
|
+
default_response: str = "Mock response",
|
|
106
|
+
):
|
|
107
|
+
self.responses = responses or []
|
|
108
|
+
self.default_response = default_response
|
|
109
|
+
self.call_count = 0
|
|
110
|
+
self.calls: list[dict] = []
|
|
111
|
+
|
|
112
|
+
def complete(
|
|
113
|
+
self,
|
|
114
|
+
prompt: str,
|
|
115
|
+
system: Optional[str] = None,
|
|
116
|
+
max_tokens: Optional[int] = None,
|
|
117
|
+
temperature: Optional[float] = None,
|
|
118
|
+
) -> LLMResponse:
|
|
119
|
+
"""Return a mock response."""
|
|
120
|
+
self.calls.append({
|
|
121
|
+
"prompt": prompt,
|
|
122
|
+
"system": system,
|
|
123
|
+
"max_tokens": max_tokens,
|
|
124
|
+
"temperature": temperature,
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
if self.call_count < len(self.responses):
|
|
128
|
+
content = self.responses[self.call_count]
|
|
129
|
+
else:
|
|
130
|
+
content = self.default_response
|
|
131
|
+
|
|
132
|
+
self.call_count += 1
|
|
133
|
+
|
|
134
|
+
return LLMResponse(
|
|
135
|
+
content=content,
|
|
136
|
+
model="mock-model",
|
|
137
|
+
usage={"prompt_tokens": 10, "completion_tokens": 10, "total_tokens": 20},
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def reset(self):
|
|
141
|
+
"""Reset call tracking."""
|
|
142
|
+
self.call_count = 0
|
|
143
|
+
self.calls = []
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class LLMClientFactory:
|
|
147
|
+
"""Factory for creating LLM clients."""
|
|
148
|
+
|
|
149
|
+
_mock_client: Optional[MockLLMClient] = None
|
|
150
|
+
|
|
151
|
+
@classmethod
|
|
152
|
+
def set_mock(cls, client: Optional[MockLLMClient]):
|
|
153
|
+
"""Set a mock client for testing."""
|
|
154
|
+
cls._mock_client = client
|
|
155
|
+
|
|
156
|
+
@classmethod
|
|
157
|
+
def create(cls, config: LLMConfig) -> LLMClient:
|
|
158
|
+
"""Create an LLM client from config.
|
|
159
|
+
|
|
160
|
+
If a mock client is set, returns that instead.
|
|
161
|
+
"""
|
|
162
|
+
if cls._mock_client is not None:
|
|
163
|
+
return cls._mock_client
|
|
164
|
+
|
|
165
|
+
return OpenAICompatibleClient(config)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def create_editor_client(config: LLMConfig) -> LLMClient:
|
|
169
|
+
"""Create an editor LLM client."""
|
|
170
|
+
return LLMClientFactory.create(config)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def create_operator_client(config: LLMConfig) -> LLMClient:
|
|
174
|
+
"""Create an operator LLM client."""
|
|
175
|
+
return LLMClientFactory.create(config)
|