markback 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
markback/linter.py ADDED
@@ -0,0 +1,312 @@
1
+ """MarkBack linter implementation."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ from .parser import parse_file, parse_string
8
+ from .types import (
9
+ Diagnostic,
10
+ ErrorCode,
11
+ ParseResult,
12
+ Record,
13
+ Severity,
14
+ WarningCode,
15
+ parse_feedback,
16
+ )
17
+ from .writer import write_record_canonical, write_records_multi
18
+
19
+
20
+ def lint_feedback_json(
21
+ feedback: str,
22
+ file: Optional[Path],
23
+ line: Optional[int],
24
+ record_idx: Optional[int],
25
+ ) -> list[Diagnostic]:
26
+ """Lint JSON-formatted feedback."""
27
+ diagnostics: list[Diagnostic] = []
28
+
29
+ if feedback.startswith("json:"):
30
+ json_str = feedback[5:]
31
+ try:
32
+ json.loads(json_str)
33
+ except json.JSONDecodeError as e:
34
+ diagnostics.append(Diagnostic(
35
+ file=file,
36
+ line=line,
37
+ column=None,
38
+ severity=Severity.ERROR,
39
+ code=ErrorCode.E007,
40
+ message=f"Invalid JSON after json: prefix: {e}",
41
+ record_index=record_idx,
42
+ ))
43
+
44
+ return diagnostics
45
+
46
+
47
+ def lint_feedback_structured(
48
+ feedback: str,
49
+ file: Optional[Path],
50
+ line: Optional[int],
51
+ record_idx: Optional[int],
52
+ ) -> list[Diagnostic]:
53
+ """Lint structured feedback for unclosed quotes."""
54
+ diagnostics: list[Diagnostic] = []
55
+
56
+ # Check for unclosed quotes
57
+ in_quote = False
58
+ escaped = False
59
+
60
+ for i, char in enumerate(feedback):
61
+ if escaped:
62
+ escaped = False
63
+ continue
64
+
65
+ if char == '\\':
66
+ escaped = True
67
+ continue
68
+
69
+ if char == '"':
70
+ in_quote = not in_quote
71
+
72
+ if in_quote:
73
+ diagnostics.append(Diagnostic(
74
+ file=file,
75
+ line=line,
76
+ column=None,
77
+ severity=Severity.ERROR,
78
+ code=ErrorCode.E008,
79
+ message="Unclosed quote in structured attribute value",
80
+ record_index=record_idx,
81
+ ))
82
+
83
+ return diagnostics
84
+
85
+
86
+ def lint_source_exists(
87
+ record: Record,
88
+ base_path: Optional[Path],
89
+ record_idx: int,
90
+ ) -> list[Diagnostic]:
91
+ """Check if @source file exists."""
92
+ diagnostics: list[Diagnostic] = []
93
+
94
+ if record.source and not record.source.is_uri:
95
+ try:
96
+ resolved = record.source.resolve(base_path)
97
+ if not resolved.exists():
98
+ diagnostics.append(Diagnostic(
99
+ file=record._source_file,
100
+ line=record._start_line,
101
+ column=None,
102
+ severity=Severity.WARNING,
103
+ code=WarningCode.W003,
104
+ message=f"@source file not found: {record.source}",
105
+ record_index=record_idx,
106
+ ))
107
+ except ValueError:
108
+ pass # URI that can't be resolved to path
109
+
110
+ return diagnostics
111
+
112
+
113
+ def lint_canonical_format(
114
+ records: list[Record],
115
+ original_text: str,
116
+ file: Optional[Path],
117
+ ) -> list[Diagnostic]:
118
+ """Check if file is in canonical format."""
119
+ diagnostics: list[Diagnostic] = []
120
+
121
+ # Generate canonical version
122
+ if len(records) == 1:
123
+ canonical = write_record_canonical(records[0]) + "\n"
124
+ else:
125
+ canonical = write_records_multi(records)
126
+
127
+ # Normalize line endings for comparison
128
+ original_normalized = original_text.replace('\r\n', '\n')
129
+
130
+ if original_normalized != canonical:
131
+ diagnostics.append(Diagnostic(
132
+ file=file,
133
+ line=1,
134
+ column=None,
135
+ severity=Severity.WARNING,
136
+ code=WarningCode.W008,
137
+ message="Non-canonical formatting detected",
138
+ ))
139
+
140
+ return diagnostics
141
+
142
+
143
+ def lint_string(
144
+ text: str,
145
+ source_file: Optional[Path] = None,
146
+ check_sources: bool = True,
147
+ check_canonical: bool = True,
148
+ ) -> ParseResult:
149
+ """Lint a MarkBack string.
150
+
151
+ This runs the parser (which generates many diagnostics) and then
152
+ performs additional linting checks.
153
+ """
154
+ # Parse first - this catches structural issues
155
+ result = parse_string(text, source_file=source_file)
156
+
157
+ # Additional linting for each record
158
+ for idx, record in enumerate(result.records):
159
+ # Lint JSON feedback
160
+ result.diagnostics.extend(lint_feedback_json(
161
+ record.feedback,
162
+ source_file,
163
+ record._end_line, # Feedback is at end
164
+ idx,
165
+ ))
166
+
167
+ # Lint structured feedback for unclosed quotes
168
+ if not record.feedback.startswith("json:"):
169
+ result.diagnostics.extend(lint_feedback_structured(
170
+ record.feedback,
171
+ source_file,
172
+ record._end_line,
173
+ idx,
174
+ ))
175
+
176
+ # Check source file existence
177
+ if check_sources:
178
+ base_path = source_file.parent if source_file else None
179
+ result.diagnostics.extend(lint_source_exists(record, base_path, idx))
180
+
181
+ # Check canonical format
182
+ if check_canonical and result.records and not result.has_errors:
183
+ result.diagnostics.extend(lint_canonical_format(
184
+ result.records,
185
+ text,
186
+ source_file,
187
+ ))
188
+
189
+ return result
190
+
191
+
192
+ def lint_file(
193
+ path: Path,
194
+ check_sources: bool = True,
195
+ check_canonical: bool = True,
196
+ ) -> ParseResult:
197
+ """Lint a MarkBack file."""
198
+ try:
199
+ text = path.read_text(encoding="utf-8")
200
+ except UnicodeDecodeError:
201
+ return ParseResult(
202
+ records=[],
203
+ diagnostics=[
204
+ Diagnostic(
205
+ file=path,
206
+ line=None,
207
+ column=None,
208
+ severity=Severity.ERROR,
209
+ code=ErrorCode.E006,
210
+ message="File is not valid UTF-8",
211
+ )
212
+ ],
213
+ source_file=path,
214
+ )
215
+ except FileNotFoundError:
216
+ return ParseResult(
217
+ records=[],
218
+ diagnostics=[
219
+ Diagnostic(
220
+ file=path,
221
+ line=None,
222
+ column=None,
223
+ severity=Severity.ERROR,
224
+ code=ErrorCode.E006,
225
+ message="File not found",
226
+ )
227
+ ],
228
+ source_file=path,
229
+ )
230
+
231
+ return lint_string(
232
+ text,
233
+ source_file=path,
234
+ check_sources=check_sources,
235
+ check_canonical=check_canonical,
236
+ )
237
+
238
+
239
+ def lint_files(
240
+ paths: list[Path],
241
+ check_sources: bool = True,
242
+ check_canonical: bool = True,
243
+ ) -> list[ParseResult]:
244
+ """Lint multiple MarkBack files."""
245
+ results: list[ParseResult] = []
246
+
247
+ for path in paths:
248
+ if path.is_dir():
249
+ # Lint all .mb files in directory
250
+ for mb_file in path.glob("**/*.mb"):
251
+ results.append(lint_file(
252
+ mb_file,
253
+ check_sources=check_sources,
254
+ check_canonical=check_canonical,
255
+ ))
256
+ # Also lint .label.txt and .feedback.txt files
257
+ for pattern in ["**/*.label.txt", "**/*.feedback.txt"]:
258
+ for label_file in path.glob(pattern):
259
+ results.append(lint_file(
260
+ label_file,
261
+ check_sources=check_sources,
262
+ check_canonical=check_canonical,
263
+ ))
264
+ else:
265
+ results.append(lint_file(
266
+ path,
267
+ check_sources=check_sources,
268
+ check_canonical=check_canonical,
269
+ ))
270
+
271
+ return results
272
+
273
+
274
+ def format_diagnostics(
275
+ diagnostics: list[Diagnostic],
276
+ format: str = "human",
277
+ ) -> str:
278
+ """Format diagnostics for output.
279
+
280
+ Args:
281
+ diagnostics: List of diagnostics to format
282
+ format: Output format ("human" or "json")
283
+
284
+ Returns:
285
+ Formatted string
286
+ """
287
+ if format == "json":
288
+ return json.dumps([d.to_dict() for d in diagnostics], indent=2)
289
+
290
+ lines: list[str] = []
291
+ for d in diagnostics:
292
+ lines.append(str(d))
293
+
294
+ return '\n'.join(lines)
295
+
296
+
297
+ def summarize_results(results: list[ParseResult]) -> dict:
298
+ """Summarize lint results."""
299
+ total_records = sum(len(r.records) for r in results)
300
+ total_errors = sum(r.error_count for r in results)
301
+ total_warnings = sum(r.warning_count for r in results)
302
+ files_with_errors = sum(1 for r in results if r.has_errors)
303
+ files_with_warnings = sum(1 for r in results if r.has_warnings)
304
+
305
+ return {
306
+ "files": len(results),
307
+ "records": total_records,
308
+ "errors": total_errors,
309
+ "warnings": total_warnings,
310
+ "files_with_errors": files_with_errors,
311
+ "files_with_warnings": files_with_warnings,
312
+ }
markback/llm.py ADDED
@@ -0,0 +1,175 @@
1
+ """LLM client abstraction for MarkBack."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass
5
+ from typing import Optional, Protocol
6
+
7
+ import httpx
8
+
9
+ from .config import LLMConfig
10
+
11
+
12
+ @dataclass
13
+ class LLMResponse:
14
+ """Response from an LLM call."""
15
+ content: str
16
+ model: str
17
+ usage: Optional[dict] = None
18
+ raw_response: Optional[dict] = None
19
+
20
+
21
+ class LLMClient(ABC):
22
+ """Abstract base class for LLM clients."""
23
+
24
+ @abstractmethod
25
+ def complete(
26
+ self,
27
+ prompt: str,
28
+ system: Optional[str] = None,
29
+ max_tokens: Optional[int] = None,
30
+ temperature: Optional[float] = None,
31
+ ) -> LLMResponse:
32
+ """Send a completion request.
33
+
34
+ Args:
35
+ prompt: The user prompt
36
+ system: Optional system prompt
37
+ max_tokens: Override default max tokens
38
+ temperature: Override default temperature
39
+
40
+ Returns:
41
+ LLM response
42
+ """
43
+ pass
44
+
45
+
46
+ class OpenAICompatibleClient(LLMClient):
47
+ """Client for OpenAI-compatible APIs."""
48
+
49
+ def __init__(self, config: LLMConfig):
50
+ self.config = config
51
+ self.client = httpx.Client(timeout=config.timeout)
52
+
53
+ def complete(
54
+ self,
55
+ prompt: str,
56
+ system: Optional[str] = None,
57
+ max_tokens: Optional[int] = None,
58
+ temperature: Optional[float] = None,
59
+ ) -> LLMResponse:
60
+ """Send a completion request to OpenAI-compatible API."""
61
+ messages = []
62
+
63
+ if system:
64
+ messages.append({"role": "system", "content": system})
65
+
66
+ messages.append({"role": "user", "content": prompt})
67
+
68
+ payload = {
69
+ "model": self.config.model,
70
+ "messages": messages,
71
+ "max_tokens": max_tokens or self.config.max_tokens,
72
+ "temperature": temperature if temperature is not None else self.config.temperature,
73
+ }
74
+
75
+ headers = {
76
+ "Authorization": f"Bearer {self.config.api_key}",
77
+ "Content-Type": "application/json",
78
+ }
79
+
80
+ url = f"{self.config.api_base.rstrip('/')}/chat/completions"
81
+
82
+ response = self.client.post(url, json=payload, headers=headers)
83
+ response.raise_for_status()
84
+
85
+ data = response.json()
86
+
87
+ return LLMResponse(
88
+ content=data["choices"][0]["message"]["content"],
89
+ model=data.get("model", self.config.model),
90
+ usage=data.get("usage"),
91
+ raw_response=data,
92
+ )
93
+
94
+ def close(self):
95
+ """Close the HTTP client."""
96
+ self.client.close()
97
+
98
+
99
+ class MockLLMClient(LLMClient):
100
+ """Mock LLM client for testing."""
101
+
102
+ def __init__(
103
+ self,
104
+ responses: Optional[list[str]] = None,
105
+ default_response: str = "Mock response",
106
+ ):
107
+ self.responses = responses or []
108
+ self.default_response = default_response
109
+ self.call_count = 0
110
+ self.calls: list[dict] = []
111
+
112
+ def complete(
113
+ self,
114
+ prompt: str,
115
+ system: Optional[str] = None,
116
+ max_tokens: Optional[int] = None,
117
+ temperature: Optional[float] = None,
118
+ ) -> LLMResponse:
119
+ """Return a mock response."""
120
+ self.calls.append({
121
+ "prompt": prompt,
122
+ "system": system,
123
+ "max_tokens": max_tokens,
124
+ "temperature": temperature,
125
+ })
126
+
127
+ if self.call_count < len(self.responses):
128
+ content = self.responses[self.call_count]
129
+ else:
130
+ content = self.default_response
131
+
132
+ self.call_count += 1
133
+
134
+ return LLMResponse(
135
+ content=content,
136
+ model="mock-model",
137
+ usage={"prompt_tokens": 10, "completion_tokens": 10, "total_tokens": 20},
138
+ )
139
+
140
+ def reset(self):
141
+ """Reset call tracking."""
142
+ self.call_count = 0
143
+ self.calls = []
144
+
145
+
146
+ class LLMClientFactory:
147
+ """Factory for creating LLM clients."""
148
+
149
+ _mock_client: Optional[MockLLMClient] = None
150
+
151
+ @classmethod
152
+ def set_mock(cls, client: Optional[MockLLMClient]):
153
+ """Set a mock client for testing."""
154
+ cls._mock_client = client
155
+
156
+ @classmethod
157
+ def create(cls, config: LLMConfig) -> LLMClient:
158
+ """Create an LLM client from config.
159
+
160
+ If a mock client is set, returns that instead.
161
+ """
162
+ if cls._mock_client is not None:
163
+ return cls._mock_client
164
+
165
+ return OpenAICompatibleClient(config)
166
+
167
+
168
+ def create_editor_client(config: LLMConfig) -> LLMClient:
169
+ """Create an editor LLM client."""
170
+ return LLMClientFactory.create(config)
171
+
172
+
173
+ def create_operator_client(config: LLMConfig) -> LLMClient:
174
+ """Create an operator LLM client."""
175
+ return LLMClientFactory.create(config)