markback 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markback/linter.py +68 -1
- markback/parser.py +8 -2
- markback/types.py +47 -4
- markback/writer.py +9 -2
- {markback-0.1.0.dist-info → markback-0.1.2.dist-info}/METADATA +11 -1
- markback-0.1.2.dist-info/RECORD +14 -0
- markback-0.1.0.dist-info/RECORD +0 -14
- {markback-0.1.0.dist-info → markback-0.1.2.dist-info}/WHEEL +0 -0
- {markback-0.1.0.dist-info → markback-0.1.2.dist-info}/entry_points.txt +0 -0
- {markback-0.1.0.dist-info → markback-0.1.2.dist-info}/licenses/LICENSE +0 -0
markback/linter.py
CHANGED
|
@@ -110,6 +110,69 @@ def lint_source_exists(
|
|
|
110
110
|
return diagnostics
|
|
111
111
|
|
|
112
112
|
|
|
113
|
+
def lint_prior_exists(
|
|
114
|
+
record: Record,
|
|
115
|
+
base_path: Optional[Path],
|
|
116
|
+
record_idx: int,
|
|
117
|
+
) -> list[Diagnostic]:
|
|
118
|
+
"""Check if @prior file exists."""
|
|
119
|
+
diagnostics: list[Diagnostic] = []
|
|
120
|
+
|
|
121
|
+
if record.prior and not record.prior.is_uri:
|
|
122
|
+
try:
|
|
123
|
+
resolved = record.prior.resolve(base_path)
|
|
124
|
+
if not resolved.exists():
|
|
125
|
+
diagnostics.append(Diagnostic(
|
|
126
|
+
file=record._source_file,
|
|
127
|
+
line=record._start_line,
|
|
128
|
+
column=None,
|
|
129
|
+
severity=Severity.WARNING,
|
|
130
|
+
code=WarningCode.W009,
|
|
131
|
+
message=f"@prior file not found: {record.prior}",
|
|
132
|
+
record_index=record_idx,
|
|
133
|
+
))
|
|
134
|
+
except ValueError:
|
|
135
|
+
pass # URI that can't be resolved to path
|
|
136
|
+
|
|
137
|
+
return diagnostics
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def lint_line_range(
|
|
141
|
+
record: Record,
|
|
142
|
+
record_idx: int,
|
|
143
|
+
) -> list[Diagnostic]:
|
|
144
|
+
"""Check if line ranges are valid (end >= start)."""
|
|
145
|
+
diagnostics: list[Diagnostic] = []
|
|
146
|
+
|
|
147
|
+
# Check @source line range
|
|
148
|
+
if record.source and record.source.start_line is not None:
|
|
149
|
+
if record.source.end_line is not None and record.source.end_line < record.source.start_line:
|
|
150
|
+
diagnostics.append(Diagnostic(
|
|
151
|
+
file=record._source_file,
|
|
152
|
+
line=record._start_line,
|
|
153
|
+
column=None,
|
|
154
|
+
severity=Severity.ERROR,
|
|
155
|
+
code=ErrorCode.E011,
|
|
156
|
+
message=f"Invalid line range in @source: end line {record.source.end_line} is less than start line {record.source.start_line}",
|
|
157
|
+
record_index=record_idx,
|
|
158
|
+
))
|
|
159
|
+
|
|
160
|
+
# Check @prior line range
|
|
161
|
+
if record.prior and record.prior.start_line is not None:
|
|
162
|
+
if record.prior.end_line is not None and record.prior.end_line < record.prior.start_line:
|
|
163
|
+
diagnostics.append(Diagnostic(
|
|
164
|
+
file=record._source_file,
|
|
165
|
+
line=record._start_line,
|
|
166
|
+
column=None,
|
|
167
|
+
severity=Severity.ERROR,
|
|
168
|
+
code=ErrorCode.E011,
|
|
169
|
+
message=f"Invalid line range in @prior: end line {record.prior.end_line} is less than start line {record.prior.start_line}",
|
|
170
|
+
record_index=record_idx,
|
|
171
|
+
))
|
|
172
|
+
|
|
173
|
+
return diagnostics
|
|
174
|
+
|
|
175
|
+
|
|
113
176
|
def lint_canonical_format(
|
|
114
177
|
records: list[Record],
|
|
115
178
|
original_text: str,
|
|
@@ -173,10 +236,14 @@ def lint_string(
|
|
|
173
236
|
idx,
|
|
174
237
|
))
|
|
175
238
|
|
|
176
|
-
# Check source file existence
|
|
239
|
+
# Check source and prior file existence
|
|
177
240
|
if check_sources:
|
|
178
241
|
base_path = source_file.parent if source_file else None
|
|
179
242
|
result.diagnostics.extend(lint_source_exists(record, base_path, idx))
|
|
243
|
+
result.diagnostics.extend(lint_prior_exists(record, base_path, idx))
|
|
244
|
+
|
|
245
|
+
# Check line range validity
|
|
246
|
+
result.diagnostics.extend(lint_line_range(record, idx))
|
|
180
247
|
|
|
181
248
|
# Check canonical format
|
|
182
249
|
if check_canonical and result.records and not result.has_errors:
|
markback/parser.py
CHANGED
|
@@ -17,7 +17,7 @@ from .types import (
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
# Known header keywords
|
|
20
|
-
KNOWN_HEADERS = {"uri", "source"}
|
|
20
|
+
KNOWN_HEADERS = {"uri", "source", "prior"}
|
|
21
21
|
|
|
22
22
|
# Patterns
|
|
23
23
|
HEADER_PATTERN = re.compile(r"^@([a-z]+)\s+(.+)$")
|
|
@@ -147,6 +147,8 @@ def parse_string(
|
|
|
147
147
|
uri = current_headers.get("uri") or pending_uri
|
|
148
148
|
source_str = current_headers.get("source")
|
|
149
149
|
source = SourceRef(source_str) if source_str else None
|
|
150
|
+
prior_str = current_headers.get("prior")
|
|
151
|
+
prior = SourceRef(prior_str) if prior_str else None
|
|
150
152
|
|
|
151
153
|
content = None
|
|
152
154
|
if current_content_lines:
|
|
@@ -163,6 +165,7 @@ def parse_string(
|
|
|
163
165
|
feedback=feedback,
|
|
164
166
|
uri=uri,
|
|
165
167
|
source=source,
|
|
168
|
+
prior=prior,
|
|
166
169
|
content=content,
|
|
167
170
|
_source_file=source_file,
|
|
168
171
|
_start_line=current_start_line,
|
|
@@ -239,13 +242,16 @@ def parse_string(
|
|
|
239
242
|
line_num,
|
|
240
243
|
)
|
|
241
244
|
|
|
242
|
-
# Use any pending @uri from previous line
|
|
245
|
+
# Use any pending @uri from previous line and @prior if present
|
|
243
246
|
uri = pending_uri or current_headers.get("uri")
|
|
247
|
+
prior_str = current_headers.get("prior")
|
|
248
|
+
prior = SourceRef(prior_str) if prior_str else None
|
|
244
249
|
|
|
245
250
|
record = Record(
|
|
246
251
|
feedback=feedback or "",
|
|
247
252
|
uri=uri,
|
|
248
253
|
source=source,
|
|
254
|
+
prior=prior,
|
|
249
255
|
content=None,
|
|
250
256
|
_source_file=source_file,
|
|
251
257
|
_start_line=current_start_line,
|
markback/types.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Core types for MarkBack format."""
|
|
2
2
|
|
|
3
|
+
import re
|
|
3
4
|
from dataclasses import dataclass, field
|
|
4
5
|
from enum import Enum
|
|
5
6
|
from pathlib import Path
|
|
@@ -25,6 +26,7 @@ class ErrorCode(Enum):
|
|
|
25
26
|
E008 = "E008" # Unclosed quote in structured attribute value
|
|
26
27
|
E009 = "E009" # Empty feedback (nothing after <<< )
|
|
27
28
|
E010 = "E010" # Missing blank line before inline content
|
|
29
|
+
E011 = "E011" # Invalid line range (end < start)
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
class WarningCode(Enum):
|
|
@@ -37,6 +39,7 @@ class WarningCode(Enum):
|
|
|
37
39
|
W006 = "W006" # Missing @uri (record has no identifier)
|
|
38
40
|
W007 = "W007" # Paired feedback file not found
|
|
39
41
|
W008 = "W008" # Non-canonical formatting detected
|
|
42
|
+
W009 = "W009" # @prior file not found
|
|
40
43
|
|
|
41
44
|
|
|
42
45
|
@dataclass
|
|
@@ -75,29 +78,67 @@ class Diagnostic:
|
|
|
75
78
|
}
|
|
76
79
|
|
|
77
80
|
|
|
81
|
+
# Regex to parse line range from a path: path:start or path:start-end
|
|
82
|
+
_LINE_RANGE_PATTERN = re.compile(r'^(.+?):(\d+)(?:-(\d+))?$')
|
|
83
|
+
|
|
84
|
+
|
|
78
85
|
@dataclass
|
|
79
86
|
class SourceRef:
|
|
80
87
|
"""Reference to external content (file path or URI)."""
|
|
81
88
|
value: str
|
|
82
89
|
is_uri: bool = False
|
|
90
|
+
start_line: Optional[int] = None
|
|
91
|
+
end_line: Optional[int] = None
|
|
92
|
+
_path_only: str = ""
|
|
83
93
|
|
|
84
94
|
def __post_init__(self):
|
|
85
|
-
#
|
|
95
|
+
# Parse line range if present
|
|
96
|
+
self._parse_line_range()
|
|
97
|
+
|
|
98
|
+
# Determine if this is a URI or file path (using path without line range)
|
|
86
99
|
if not self.is_uri:
|
|
87
|
-
parsed = urlparse(self.
|
|
100
|
+
parsed = urlparse(self._path_only)
|
|
88
101
|
# Consider it a URI if it has a scheme that's not a Windows drive letter
|
|
89
102
|
self.is_uri = bool(parsed.scheme) and len(parsed.scheme) > 1
|
|
90
103
|
|
|
104
|
+
def _parse_line_range(self):
|
|
105
|
+
"""Parse optional line range from value."""
|
|
106
|
+
match = _LINE_RANGE_PATTERN.match(self.value)
|
|
107
|
+
if match:
|
|
108
|
+
self._path_only = match.group(1)
|
|
109
|
+
self.start_line = int(match.group(2))
|
|
110
|
+
if match.group(3):
|
|
111
|
+
self.end_line = int(match.group(3))
|
|
112
|
+
else:
|
|
113
|
+
# Single line reference: start and end are the same
|
|
114
|
+
self.end_line = self.start_line
|
|
115
|
+
else:
|
|
116
|
+
self._path_only = self.value
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def path(self) -> str:
|
|
120
|
+
"""Return path without line range."""
|
|
121
|
+
return self._path_only
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def line_range_str(self) -> Optional[str]:
|
|
125
|
+
"""Return formatted line range string, or None if no range."""
|
|
126
|
+
if self.start_line is None:
|
|
127
|
+
return None
|
|
128
|
+
if self.start_line == self.end_line:
|
|
129
|
+
return f":{self.start_line}"
|
|
130
|
+
return f":{self.start_line}-{self.end_line}"
|
|
131
|
+
|
|
91
132
|
def resolve(self, base_path: Optional[Path] = None) -> Path:
|
|
92
133
|
"""Resolve to a file path (relative paths resolved against base_path)."""
|
|
93
134
|
if self.is_uri:
|
|
94
|
-
parsed = urlparse(self.
|
|
135
|
+
parsed = urlparse(self._path_only)
|
|
95
136
|
if parsed.scheme == "file":
|
|
96
137
|
# file:// URI
|
|
97
138
|
return Path(parsed.path)
|
|
98
139
|
raise ValueError(f"Cannot resolve non-file URI to path: {self.value}")
|
|
99
140
|
|
|
100
|
-
path = Path(self.
|
|
141
|
+
path = Path(self._path_only)
|
|
101
142
|
if path.is_absolute():
|
|
102
143
|
return path
|
|
103
144
|
if base_path:
|
|
@@ -122,6 +163,7 @@ class Record:
|
|
|
122
163
|
feedback: str
|
|
123
164
|
uri: Optional[str] = None
|
|
124
165
|
source: Optional[SourceRef] = None
|
|
166
|
+
prior: Optional[SourceRef] = None
|
|
125
167
|
content: Optional[str] = None
|
|
126
168
|
metadata: dict = field(default_factory=dict)
|
|
127
169
|
|
|
@@ -154,6 +196,7 @@ class Record:
|
|
|
154
196
|
return {
|
|
155
197
|
"uri": self.uri,
|
|
156
198
|
"source": str(self.source) if self.source else None,
|
|
199
|
+
"prior": str(self.prior) if self.prior else None,
|
|
157
200
|
"content": self.content,
|
|
158
201
|
"feedback": self.feedback,
|
|
159
202
|
"metadata": self.metadata,
|
markback/writer.py
CHANGED
|
@@ -38,15 +38,19 @@ def write_record_canonical(
|
|
|
38
38
|
)
|
|
39
39
|
|
|
40
40
|
if use_compact:
|
|
41
|
-
# Compact format: @uri on its own line (if present), then @source ... <<<
|
|
41
|
+
# Compact format: @uri on its own line (if present), then @prior, then @source ... <<<
|
|
42
42
|
if record.uri:
|
|
43
43
|
lines.append(f"@uri {record.uri}")
|
|
44
|
+
if record.prior:
|
|
45
|
+
lines.append(f"@prior {record.prior}")
|
|
44
46
|
lines.append(f"@source {record.source} <<< {record.feedback}")
|
|
45
47
|
else:
|
|
46
48
|
# Full format
|
|
47
|
-
# Headers: @uri first, then @source
|
|
49
|
+
# Headers: @uri first, then @prior, then @source
|
|
48
50
|
if record.uri:
|
|
49
51
|
lines.append(f"@uri {record.uri}")
|
|
52
|
+
if record.prior:
|
|
53
|
+
lines.append(f"@prior {record.prior}")
|
|
50
54
|
if record.source:
|
|
51
55
|
lines.append(f"@source {record.source}")
|
|
52
56
|
|
|
@@ -147,6 +151,9 @@ def write_label_file(record: Record) -> str:
|
|
|
147
151
|
|
|
148
152
|
if record.uri:
|
|
149
153
|
lines.append(f"@uri {record.uri}")
|
|
154
|
+
|
|
155
|
+
if record.prior:
|
|
156
|
+
lines.append(f"@prior {record.prior}")
|
|
150
157
|
|
|
151
158
|
lines.append(f"<<< {record.feedback}")
|
|
152
159
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: markback
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: A compact, human-writable format for storing content paired with feedback/labels
|
|
5
5
|
Project-URL: Homepage, https://github.com/dandriscoll/markback
|
|
6
6
|
Project-URL: Repository, https://github.com/dandriscoll/markback
|
|
@@ -196,6 +196,16 @@ Second content.
|
|
|
196
196
|
@source ./images/003.jpg <<< approved; scene=mountain
|
|
197
197
|
```
|
|
198
198
|
|
|
199
|
+
### With Prior Reference
|
|
200
|
+
|
|
201
|
+
Use `@prior` to reference an item that precedes the source (e.g., a prompt that generated an image):
|
|
202
|
+
|
|
203
|
+
```
|
|
204
|
+
@uri local:generated-001
|
|
205
|
+
@prior ./prompts/sunset-prompt.txt
|
|
206
|
+
@source ./images/generated-sunset.jpg <<< accurate; matches prompt well
|
|
207
|
+
```
|
|
208
|
+
|
|
199
209
|
### Paired Files
|
|
200
210
|
|
|
201
211
|
**content.txt:**
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
markback/__init__.py,sha256=B0-2dpUu5nkbnUI0hPz-x7PHiOl7M-tiRi6s3UCYJFk,1540
|
|
2
|
+
markback/cli.py,sha256=5wMk1OUG7W_voS9DxeFxRJrBTMabEdOK_s_o3Irxuu0,13639
|
|
3
|
+
markback/config.py,sha256=eTVhb7UwDER9FRYo8QUAvneLHSqXD2ZtLUgtBtnljUs,5455
|
|
4
|
+
markback/linter.py,sha256=IctgPEFNfGDo5DcELdo0Ni3d7Dp0bIWtlDw61ccWDOQ,11210
|
|
5
|
+
markback/llm.py,sha256=ON5_2C6v4KIk7_aIceulfWjEEI6hmallaPlLv-1-s_o,4692
|
|
6
|
+
markback/parser.py,sha256=P7GRjlwhy8j6Tnub7XAqILtZ4pFdkfdHhB-aIjLVRYU,18881
|
|
7
|
+
markback/types.py,sha256=tFunBAoqUEVf9mi_4N1QwWmOsKt0nocZK7M7K_rybWg,10097
|
|
8
|
+
markback/workflow.py,sha256=zC1RUm1i1wgiciFDqUilJKJ0-bgInvctxhQ0h5WSdoQ,10485
|
|
9
|
+
markback/writer.py,sha256=3-LeupyuruGv4WZH9pV65hU4YDKuC5HgIIZ8YZ2SZnM,7896
|
|
10
|
+
markback-0.1.2.dist-info/METADATA,sha256=d_xMmpicyEYeakJ4hA8SsrlGgj5Qxpd6fMv7tKf-eaI,5133
|
|
11
|
+
markback-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
12
|
+
markback-0.1.2.dist-info/entry_points.txt,sha256=Bc9aXvtlPxVPuOJ9BWGngAVrkx5dMvRgujjVzXC-V5U,46
|
|
13
|
+
markback-0.1.2.dist-info/licenses/LICENSE,sha256=lLK1n13C_CXb0M10O-6itEIDY6dsXKutZYQH-09n6s0,1068
|
|
14
|
+
markback-0.1.2.dist-info/RECORD,,
|
markback-0.1.0.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
markback/__init__.py,sha256=B0-2dpUu5nkbnUI0hPz-x7PHiOl7M-tiRi6s3UCYJFk,1540
|
|
2
|
-
markback/cli.py,sha256=5wMk1OUG7W_voS9DxeFxRJrBTMabEdOK_s_o3Irxuu0,13639
|
|
3
|
-
markback/config.py,sha256=eTVhb7UwDER9FRYo8QUAvneLHSqXD2ZtLUgtBtnljUs,5455
|
|
4
|
-
markback/linter.py,sha256=6jrfngF4PiYFQlDddm09OEmVSSGwacE5YFxMub5mqlA,8707
|
|
5
|
-
markback/llm.py,sha256=ON5_2C6v4KIk7_aIceulfWjEEI6hmallaPlLv-1-s_o,4692
|
|
6
|
-
markback/parser.py,sha256=5CrLeOWGuiE0_BOK9dJUnLLrJ72KTmucyOQEzR1nDh4,18570
|
|
7
|
-
markback/types.py,sha256=rRy41h1ZYYP9lo_FhvP5X5-OlwEM7vzHCw--Sq5L2LA,8564
|
|
8
|
-
markback/workflow.py,sha256=zC1RUm1i1wgiciFDqUilJKJ0-bgInvctxhQ0h5WSdoQ,10485
|
|
9
|
-
markback/writer.py,sha256=v5KT2o2Ma2I9I4U-r06PgzKyqwQFSsx49Ri5qIsovhY,7645
|
|
10
|
-
markback-0.1.0.dist-info/METADATA,sha256=aqRMZiWqsEExkqi_dwSyWtcFb3uhOx2Ol6HbNBU7Ggw,4864
|
|
11
|
-
markback-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
12
|
-
markback-0.1.0.dist-info/entry_points.txt,sha256=Bc9aXvtlPxVPuOJ9BWGngAVrkx5dMvRgujjVzXC-V5U,46
|
|
13
|
-
markback-0.1.0.dist-info/licenses/LICENSE,sha256=lLK1n13C_CXb0M10O-6itEIDY6dsXKutZYQH-09n6s0,1068
|
|
14
|
-
markback-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|