markback 0.1.1__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markback-0.1.3/.claude/settings.local.json +17 -0
- {markback-0.1.1 → markback-0.1.3}/PKG-INFO +1 -1
- {markback-0.1.1 → markback-0.1.3}/SPEC.md +102 -13
- {markback-0.1.1 → markback-0.1.3}/markback/linter.py +64 -0
- {markback-0.1.1 → markback-0.1.3}/markback/parser.py +6 -2
- {markback-0.1.1 → markback-0.1.3}/markback/types.py +69 -4
- {markback-0.1.1 → markback-0.1.3}/markback/writer.py +10 -3
- {markback-0.1.1 → markback-0.1.3}/packages/markbackjs/package.json +1 -1
- {markback-0.1.1 → markback-0.1.3}/packages/markbackjs/src/linter.ts +68 -0
- {markback-0.1.1 → markback-0.1.3}/packages/markbackjs/src/parser.ts +6 -2
- {markback-0.1.1 → markback-0.1.3}/packages/markbackjs/src/types.ts +85 -7
- {markback-0.1.1 → markback-0.1.3}/packages/markbackjs/src/writer.ts +6 -0
- markback-0.1.3/packages/markbackjs/test/linter.test.js +265 -0
- {markback-0.1.1 → markback-0.1.3}/pyproject.toml +1 -1
- markback-0.1.3/scripts/publish-npm.sh +11 -0
- markback-0.1.3/scripts/publish-pypi.sh +14 -0
- markback-0.1.3/scripts/publish.sh +16 -0
- markback-0.1.3/tests/test_linter.py +455 -0
- markback-0.1.1/.claude/settings.local.json +0 -9
- markback-0.1.1/packages/markbackjs/test/linter.test.js +0 -107
- markback-0.1.1/tests/test_linter.py +0 -247
- {markback-0.1.1 → markback-0.1.3}/.gitignore +0 -0
- {markback-0.1.1 → markback-0.1.3}/.ishipped/card.md +0 -0
- {markback-0.1.1 → markback-0.1.3}/IMPLEMENTATION_NOTES.md +0 -0
- {markback-0.1.1 → markback-0.1.3}/LICENSE +0 -0
- {markback-0.1.1 → markback-0.1.3}/README.md +0 -0
- {markback-0.1.1 → markback-0.1.3}/markback/__init__.py +0 -0
- {markback-0.1.1 → markback-0.1.3}/markback/cli.py +0 -0
- {markback-0.1.1 → markback-0.1.3}/markback/config.py +0 -0
- {markback-0.1.1 → markback-0.1.3}/markback/llm.py +0 -0
- {markback-0.1.1 → markback-0.1.3}/markback/workflow.py +0 -0
- {markback-0.1.1 → markback-0.1.3}/packages/markbackjs/LICENSE +0 -0
- {markback-0.1.1 → markback-0.1.3}/packages/markbackjs/README.md +0 -0
- {markback-0.1.1 → markback-0.1.3}/packages/markbackjs/package-lock.json +0 -0
- {markback-0.1.1 → markback-0.1.3}/packages/markbackjs/src/index.ts +0 -0
- {markback-0.1.1 → markback-0.1.3}/packages/markbackjs/tsconfig.json +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/__init__.py +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/compact_source.mb +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/errors/content_with_source.mb +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/errors/empty_feedback.mb +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/errors/malformed_uri.mb +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/errors/missing_feedback.mb +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/errors/multiple_feedback.mb +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/essay.label.txt +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/essay.txt +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/external_source.mb +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/freeform_feedback.mb +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/json_feedback.mb +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/label_list.mb +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/minimal.mb +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/multi_record.mb +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/fixtures/with_uri.mb +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/test_cli.py +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/test_config.py +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/test_parser.py +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/test_types.py +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/test_workflow.py +0 -0
- {markback-0.1.1 → markback-0.1.3}/tests/test_writer.py +0 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(python -m pytest:*)",
|
|
5
|
+
"Bash(npm test:*)",
|
|
6
|
+
"Bash(npm install)",
|
|
7
|
+
"Bash(npm run build:*)",
|
|
8
|
+
"Bash(echo:*)",
|
|
9
|
+
"Bash(python -m markback lint:*)",
|
|
10
|
+
"Bash(python:*)",
|
|
11
|
+
"Bash(python3 -m pytest:*)",
|
|
12
|
+
"Bash(pip3 install:*)",
|
|
13
|
+
"Bash(.venv/bin/python -m pytest:*)",
|
|
14
|
+
"Bash(chmod:*)"
|
|
15
|
+
]
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: markback
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: A compact, human-writable format for storing content paired with feedback/labels
|
|
5
5
|
Project-URL: Homepage, https://github.com/dandriscoll/markback
|
|
6
6
|
Project-URL: Repository, https://github.com/dandriscoll/markback
|
|
@@ -27,6 +27,7 @@ A MarkBack **record** is the fundamental unit. Every record has:
|
|
|
27
27
|
| `content` | Yes* | The content being labeled (inline or referenced) |
|
|
28
28
|
| `feedback` | Yes | Text after the `<<<` delimiter (always one line) |
|
|
29
29
|
| `uri` | No | Unique identifier for the item |
|
|
30
|
+
| `by` | No | Freeform identifier for who provided the feedback |
|
|
30
31
|
| `source` | No | Reference to external content (when content is not inline) |
|
|
31
32
|
| `prior` | No | Reference to an item that precedes the source (e.g., a prompt that generated the content) |
|
|
32
33
|
|
|
@@ -66,6 +67,7 @@ Header lines appear at the start of a record and begin with `@`. They define met
|
|
|
66
67
|
|
|
67
68
|
```
|
|
68
69
|
@uri <uri-value>
|
|
70
|
+
@by <freeform-text>
|
|
69
71
|
@source <path-or-uri>
|
|
70
72
|
@prior <path-or-uri>
|
|
71
73
|
```
|
|
@@ -95,7 +97,23 @@ Defines the unique identifier for this record.
|
|
|
95
97
|
|
|
96
98
|
**Validation:** URI MUST be valid per RFC 3986. Parsers MUST reject malformed URIs as errors.
|
|
97
99
|
|
|
98
|
-
#### 3.1.2 `@
|
|
100
|
+
#### 3.1.2 `@by` Header
|
|
101
|
+
|
|
102
|
+
Identifies who provided the feedback. The value is freeform text.
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
@by dan@example.com
|
|
106
|
+
@by Dan Driscoll
|
|
107
|
+
@by reviewer-42
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
**Rules:**
|
|
111
|
+
- Value is freeform text extending to end of line (trailing whitespace trimmed)
|
|
112
|
+
- Can contain any characters including spaces, special characters, etc.
|
|
113
|
+
- Commonly used for email addresses, usernames, or full names
|
|
114
|
+
- Optional - records without `@by` are valid
|
|
115
|
+
|
|
116
|
+
#### 3.1.3 `@source` Header
|
|
99
117
|
|
|
100
118
|
References external content instead of inline content.
|
|
101
119
|
|
|
@@ -111,7 +129,7 @@ References external content instead of inline content.
|
|
|
111
129
|
- When `@source` is present, inline content MUST be empty (or contain only whitespace)
|
|
112
130
|
- Parsers MUST verify referenced files exist (warning if missing)
|
|
113
131
|
|
|
114
|
-
#### 3.1.
|
|
132
|
+
#### 3.1.4 `@prior` Header
|
|
115
133
|
|
|
116
134
|
References an item that precedes the source material. For example, if the source is an image generated by an LLM, the prior could be the prompt that was used to create it.
|
|
117
135
|
|
|
@@ -127,6 +145,37 @@ References an item that precedes the source material. For example, if the source
|
|
|
127
145
|
- `@prior` does not affect content handling (inline content or `@source` rules still apply)
|
|
128
146
|
- Parsers SHOULD verify referenced files exist (warning if missing)
|
|
129
147
|
|
|
148
|
+
#### 3.1.5 Line and Character Range Specification
|
|
149
|
+
|
|
150
|
+
Both `@source` and `@prior` headers support optional line and character range specifications using colon notation. This allows referencing specific positions within a file.
|
|
151
|
+
|
|
152
|
+
**Syntax:**
|
|
153
|
+
- Line only: `<path-or-uri>:<line>` or `<path-or-uri>:<start-line>-<end-line>`
|
|
154
|
+
- With columns: `<path-or-uri>:<line>:<col>` or `<path-or-uri>:<start-line>:<start-col>-<end-line>:<end-col>`
|
|
155
|
+
|
|
156
|
+
```
|
|
157
|
+
@source ./code.py:42
|
|
158
|
+
@source ./code.py:42-50
|
|
159
|
+
@source ./code.py:42:10
|
|
160
|
+
@source ./code.py:42:10-42:25
|
|
161
|
+
@source ./code.py:10:5-15:20
|
|
162
|
+
@prior ./prompts/template.txt:1-20
|
|
163
|
+
@source https://example.com/file.txt:100-150
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Rules:**
|
|
167
|
+
- Line and column numbers are 1-indexed (first line/column is 1)
|
|
168
|
+
- Single line: `:N` references line N only
|
|
169
|
+
- Line range: `:N-M` references lines N through M (inclusive)
|
|
170
|
+
- Single position: `:N:C` references line N, column C
|
|
171
|
+
- Character range: `:N:C-M:D` references from line N column C to line M column D (inclusive)
|
|
172
|
+
- End position must be greater than or equal to start position (E011 error otherwise)
|
|
173
|
+
- If on same line: end column must be >= start column
|
|
174
|
+
- If on different lines: end line must be >= start line
|
|
175
|
+
- Ranges are informational metadata; parsers do not validate that referenced positions exist in the file
|
|
176
|
+
- Windows drive letters (e.g., `C:\path`) are not confused with line ranges because scheme detection requires length > 1
|
|
177
|
+
- Column specification is optional; you can specify `:10:5-20` (start with column, end without)
|
|
178
|
+
|
|
130
179
|
### 3.2 Content Block
|
|
131
180
|
|
|
132
181
|
Content is everything between headers and the `<<<` feedback delimiter.
|
|
@@ -460,7 +509,7 @@ Canonical form ensures consistent output for comparison and version control.
|
|
|
460
509
|
### 5.2 Canonicalization Rules
|
|
461
510
|
|
|
462
511
|
1. **Line endings:** Normalize to `\n` (LF)
|
|
463
|
-
2. **Header order:** `@uri` before `@prior` before `@source` before unknown headers (alphabetical)
|
|
512
|
+
2. **Header order:** `@uri` before `@by` before `@prior` before `@source` before unknown headers (alphabetical)
|
|
464
513
|
3. **Header spacing:** Exactly one space after keyword
|
|
465
514
|
4. **Trailing whitespace:** Remove from all lines
|
|
466
515
|
5. **Content whitespace:** Preserve internal whitespace; trim leading/trailing blank lines
|
|
@@ -575,6 +624,7 @@ Each line is classified as one of:
|
|
|
575
624
|
| E008 | Unclosed quote in structured attribute value (only in `structured` parse mode) |
|
|
576
625
|
| E009 | Empty feedback (nothing after `<<< `) |
|
|
577
626
|
| E010 | Missing blank line before inline content (content starts with `@`) |
|
|
627
|
+
| E011 | Invalid line range (end line less than start line) |
|
|
578
628
|
|
|
579
629
|
### 7.2 Warnings (SHOULD fix)
|
|
580
630
|
|
|
@@ -656,7 +706,42 @@ Spring whispers goodbye.
|
|
|
656
706
|
<<< creative; follows haiku structure; quality=excellent
|
|
657
707
|
```
|
|
658
708
|
|
|
659
|
-
### 8.5
|
|
709
|
+
### 8.5 Record with Attribution
|
|
710
|
+
|
|
711
|
+
```
|
|
712
|
+
@uri local:review-001
|
|
713
|
+
@by dan@example.com
|
|
714
|
+
|
|
715
|
+
This code needs better error handling.
|
|
716
|
+
<<< actionable; priority=high
|
|
717
|
+
```
|
|
718
|
+
|
|
719
|
+
Or with a full name:
|
|
720
|
+
```
|
|
721
|
+
@uri local:review-002
|
|
722
|
+
@by Dan Driscoll
|
|
723
|
+
@source ./src/app.py
|
|
724
|
+
<<< approved; good code quality
|
|
725
|
+
```
|
|
726
|
+
|
|
727
|
+
### 8.6 Character-Level References
|
|
728
|
+
|
|
729
|
+
Reference a specific position in a file:
|
|
730
|
+
```
|
|
731
|
+
@source ./code.py:42:10 <<< potential bug at this position
|
|
732
|
+
```
|
|
733
|
+
|
|
734
|
+
Reference a character range on a single line:
|
|
735
|
+
```
|
|
736
|
+
@source ./code.py:42:10-42:25 <<< consider renaming this variable
|
|
737
|
+
```
|
|
738
|
+
|
|
739
|
+
Reference a multi-line character range:
|
|
740
|
+
```
|
|
741
|
+
@source ./code.py:10:5-15:20 <<< this function needs refactoring
|
|
742
|
+
```
|
|
743
|
+
|
|
744
|
+
### 8.7 Single-File Example
|
|
660
745
|
|
|
661
746
|
**File:** `question.mb`
|
|
662
747
|
```
|
|
@@ -666,7 +751,7 @@ Explain quantum entanglement in simple terms.
|
|
|
666
751
|
<<< quality=excellent; accuracy=high; clarity=good
|
|
667
752
|
```
|
|
668
753
|
|
|
669
|
-
### 8.
|
|
754
|
+
### 8.8 Label List Example (Compact Format)
|
|
670
755
|
|
|
671
756
|
**File:** `image-annotations.mb`
|
|
672
757
|
```
|
|
@@ -698,7 +783,7 @@ Explain quantum entanglement in simple terms.
|
|
|
698
783
|
@source ./batch1/item3.txt <<< positive; excellent clarity
|
|
699
784
|
```
|
|
700
785
|
|
|
701
|
-
### 8.
|
|
786
|
+
### 8.9 Multi-Record Example (Mixed Freeform and Structured)
|
|
702
787
|
|
|
703
788
|
**File:** `training-data.mb`
|
|
704
789
|
```
|
|
@@ -729,7 +814,7 @@ Please write a formal letter requesting a meeting.
|
|
|
729
814
|
@source ./audio/sample-005.wav <<< transcription="Hello world"; quality=clear; language=en
|
|
730
815
|
```
|
|
731
816
|
|
|
732
|
-
### 8.
|
|
817
|
+
### 8.10 Paired-File Example
|
|
733
818
|
|
|
734
819
|
**Content file:** `essay.txt`
|
|
735
820
|
```
|
|
@@ -745,7 +830,7 @@ agriculture, manufacturing, mining, and transport.
|
|
|
745
830
|
<<< good; grade=B+; well structured but needs more specific examples
|
|
746
831
|
```
|
|
747
832
|
|
|
748
|
-
### 8.
|
|
833
|
+
### 8.11 Freeform Feedback Examples
|
|
749
834
|
|
|
750
835
|
Various styles of freeform feedback:
|
|
751
836
|
|
|
@@ -768,7 +853,7 @@ Explain machine learning to a child.
|
|
|
768
853
|
<<< needs work; the explanation assumes too much prior knowledge
|
|
769
854
|
```
|
|
770
855
|
|
|
771
|
-
### 8.
|
|
856
|
+
### 8.12 Complex Structured Feedback (JSON)
|
|
772
857
|
|
|
773
858
|
```
|
|
774
859
|
@uri local:complex-example
|
|
@@ -777,7 +862,7 @@ Multi-attribute content with special characters.
|
|
|
777
862
|
<<< json:{"rating":4.5,"tags":["important","review"],"notes":"Contains \"quoted\" text and; semicolons","scores":{"accuracy":0.9,"relevance":0.85}}
|
|
778
863
|
```
|
|
779
864
|
|
|
780
|
-
### 8.
|
|
865
|
+
### 8.13 Image with MarkBack Sidecar
|
|
781
866
|
|
|
782
867
|
**Content file:** `diagram.png` (binary)
|
|
783
868
|
|
|
@@ -860,11 +945,15 @@ feedback = "<<<" SP feedback-content LF
|
|
|
860
945
|
feedback-content = *VCHAR ; no LF allowed
|
|
861
946
|
|
|
862
947
|
; Compact record (single line, external source only)
|
|
863
|
-
compact-record = [uri-line] source-feedback-line
|
|
948
|
+
compact-record = [uri-line] [by-line] [prior-line] source-feedback-line
|
|
864
949
|
compact-list = compact-record *(1*blank-line compact-record)
|
|
865
950
|
uri-line = "@uri" SP value LF
|
|
866
|
-
|
|
867
|
-
|
|
951
|
+
by-line = "@by" SP value LF
|
|
952
|
+
prior-line = "@prior" SP path-with-range LF
|
|
953
|
+
source-feedback-line = "@source" SP path-with-range SP "<<<" SP feedback-content LF
|
|
954
|
+
path-with-range = path [position-range] ; path with optional position range
|
|
955
|
+
path = 1*VCHAR ; ends at space before <<< or position-range
|
|
956
|
+
position-range = ":" 1*DIGIT [":" 1*DIGIT] ["-" 1*DIGIT [":" 1*DIGIT]]
|
|
868
957
|
|
|
869
958
|
LOWER = %x61-7A ; a-z
|
|
870
959
|
SP = %x20 ; space
|
|
@@ -137,6 +137,67 @@ def lint_prior_exists(
|
|
|
137
137
|
return diagnostics
|
|
138
138
|
|
|
139
139
|
|
|
140
|
+
def _is_position_invalid(source_ref) -> tuple[bool, str]:
|
|
141
|
+
"""Check if a SourceRef has an invalid position range.
|
|
142
|
+
|
|
143
|
+
Returns (is_invalid, error_message).
|
|
144
|
+
Position is invalid if:
|
|
145
|
+
- end_line < start_line
|
|
146
|
+
- end_line == start_line and end_column < start_column
|
|
147
|
+
"""
|
|
148
|
+
if source_ref.start_line is None or source_ref.end_line is None:
|
|
149
|
+
return False, ""
|
|
150
|
+
|
|
151
|
+
if source_ref.end_line < source_ref.start_line:
|
|
152
|
+
return True, f"end line {source_ref.end_line} is less than start line {source_ref.start_line}"
|
|
153
|
+
|
|
154
|
+
if source_ref.end_line == source_ref.start_line:
|
|
155
|
+
if (source_ref.start_column is not None and
|
|
156
|
+
source_ref.end_column is not None and
|
|
157
|
+
source_ref.end_column < source_ref.start_column):
|
|
158
|
+
return True, f"end column {source_ref.end_column} is less than start column {source_ref.start_column} on line {source_ref.start_line}"
|
|
159
|
+
|
|
160
|
+
return False, ""
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def lint_line_range(
|
|
164
|
+
record: Record,
|
|
165
|
+
record_idx: int,
|
|
166
|
+
) -> list[Diagnostic]:
|
|
167
|
+
"""Check if line/character ranges are valid (end position >= start position)."""
|
|
168
|
+
diagnostics: list[Diagnostic] = []
|
|
169
|
+
|
|
170
|
+
# Check @source range
|
|
171
|
+
if record.source and record.source.start_line is not None:
|
|
172
|
+
is_invalid, error_msg = _is_position_invalid(record.source)
|
|
173
|
+
if is_invalid:
|
|
174
|
+
diagnostics.append(Diagnostic(
|
|
175
|
+
file=record._source_file,
|
|
176
|
+
line=record._start_line,
|
|
177
|
+
column=None,
|
|
178
|
+
severity=Severity.ERROR,
|
|
179
|
+
code=ErrorCode.E011,
|
|
180
|
+
message=f"Invalid range in @source: {error_msg}",
|
|
181
|
+
record_index=record_idx,
|
|
182
|
+
))
|
|
183
|
+
|
|
184
|
+
# Check @prior range
|
|
185
|
+
if record.prior and record.prior.start_line is not None:
|
|
186
|
+
is_invalid, error_msg = _is_position_invalid(record.prior)
|
|
187
|
+
if is_invalid:
|
|
188
|
+
diagnostics.append(Diagnostic(
|
|
189
|
+
file=record._source_file,
|
|
190
|
+
line=record._start_line,
|
|
191
|
+
column=None,
|
|
192
|
+
severity=Severity.ERROR,
|
|
193
|
+
code=ErrorCode.E011,
|
|
194
|
+
message=f"Invalid range in @prior: {error_msg}",
|
|
195
|
+
record_index=record_idx,
|
|
196
|
+
))
|
|
197
|
+
|
|
198
|
+
return diagnostics
|
|
199
|
+
|
|
200
|
+
|
|
140
201
|
def lint_canonical_format(
|
|
141
202
|
records: list[Record],
|
|
142
203
|
original_text: str,
|
|
@@ -206,6 +267,9 @@ def lint_string(
|
|
|
206
267
|
result.diagnostics.extend(lint_source_exists(record, base_path, idx))
|
|
207
268
|
result.diagnostics.extend(lint_prior_exists(record, base_path, idx))
|
|
208
269
|
|
|
270
|
+
# Check line range validity
|
|
271
|
+
result.diagnostics.extend(lint_line_range(record, idx))
|
|
272
|
+
|
|
209
273
|
# Check canonical format
|
|
210
274
|
if check_canonical and result.records and not result.has_errors:
|
|
211
275
|
result.diagnostics.extend(lint_canonical_format(
|
|
@@ -17,7 +17,7 @@ from .types import (
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
# Known header keywords
|
|
20
|
-
KNOWN_HEADERS = {"uri", "source", "prior"}
|
|
20
|
+
KNOWN_HEADERS = {"uri", "by", "source", "prior"}
|
|
21
21
|
|
|
22
22
|
# Patterns
|
|
23
23
|
HEADER_PATTERN = re.compile(r"^@([a-z]+)\s+(.+)$")
|
|
@@ -145,6 +145,7 @@ def parse_string(
|
|
|
145
145
|
nonlocal pending_uri, in_content, had_blank_line
|
|
146
146
|
|
|
147
147
|
uri = current_headers.get("uri") or pending_uri
|
|
148
|
+
by = current_headers.get("by")
|
|
148
149
|
source_str = current_headers.get("source")
|
|
149
150
|
source = SourceRef(source_str) if source_str else None
|
|
150
151
|
prior_str = current_headers.get("prior")
|
|
@@ -164,6 +165,7 @@ def parse_string(
|
|
|
164
165
|
record = Record(
|
|
165
166
|
feedback=feedback,
|
|
166
167
|
uri=uri,
|
|
168
|
+
by=by,
|
|
167
169
|
source=source,
|
|
168
170
|
prior=prior,
|
|
169
171
|
content=content,
|
|
@@ -242,14 +244,16 @@ def parse_string(
|
|
|
242
244
|
line_num,
|
|
243
245
|
)
|
|
244
246
|
|
|
245
|
-
# Use any pending @uri from previous line and @prior if present
|
|
247
|
+
# Use any pending @uri from previous line and @by, @prior if present
|
|
246
248
|
uri = pending_uri or current_headers.get("uri")
|
|
249
|
+
by = current_headers.get("by")
|
|
247
250
|
prior_str = current_headers.get("prior")
|
|
248
251
|
prior = SourceRef(prior_str) if prior_str else None
|
|
249
252
|
|
|
250
253
|
record = Record(
|
|
251
254
|
feedback=feedback or "",
|
|
252
255
|
uri=uri,
|
|
256
|
+
by=by,
|
|
253
257
|
source=source,
|
|
254
258
|
prior=prior,
|
|
255
259
|
content=None,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Core types for MarkBack format."""
|
|
2
2
|
|
|
3
|
+
import re
|
|
3
4
|
from dataclasses import dataclass, field
|
|
4
5
|
from enum import Enum
|
|
5
6
|
from pathlib import Path
|
|
@@ -25,6 +26,7 @@ class ErrorCode(Enum):
|
|
|
25
26
|
E008 = "E008" # Unclosed quote in structured attribute value
|
|
26
27
|
E009 = "E009" # Empty feedback (nothing after <<< )
|
|
27
28
|
E010 = "E010" # Missing blank line before inline content
|
|
29
|
+
E011 = "E011" # Invalid line range (end < start)
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
class WarningCode(Enum):
|
|
@@ -76,29 +78,90 @@ class Diagnostic:
|
|
|
76
78
|
}
|
|
77
79
|
|
|
78
80
|
|
|
81
|
+
# Regex to parse line/character range from a path
|
|
82
|
+
# Supports: path:line, path:line:col, path:line-line, path:line:col-line:col
|
|
83
|
+
_LINE_RANGE_PATTERN = re.compile(r'^(.+?):(\d+)(?::(\d+))?(?:-(\d+)(?::(\d+))?)?$')
|
|
84
|
+
|
|
85
|
+
|
|
79
86
|
@dataclass
|
|
80
87
|
class SourceRef:
|
|
81
88
|
"""Reference to external content (file path or URI)."""
|
|
82
89
|
value: str
|
|
83
90
|
is_uri: bool = False
|
|
91
|
+
start_line: Optional[int] = None
|
|
92
|
+
end_line: Optional[int] = None
|
|
93
|
+
start_column: Optional[int] = None
|
|
94
|
+
end_column: Optional[int] = None
|
|
95
|
+
_path_only: str = ""
|
|
84
96
|
|
|
85
97
|
def __post_init__(self):
|
|
86
|
-
#
|
|
98
|
+
# Parse line range if present
|
|
99
|
+
self._parse_line_range()
|
|
100
|
+
|
|
101
|
+
# Determine if this is a URI or file path (using path without line range)
|
|
87
102
|
if not self.is_uri:
|
|
88
|
-
parsed = urlparse(self.
|
|
103
|
+
parsed = urlparse(self._path_only)
|
|
89
104
|
# Consider it a URI if it has a scheme that's not a Windows drive letter
|
|
90
105
|
self.is_uri = bool(parsed.scheme) and len(parsed.scheme) > 1
|
|
91
106
|
|
|
107
|
+
def _parse_line_range(self):
|
|
108
|
+
"""Parse optional line/character range from value."""
|
|
109
|
+
match = _LINE_RANGE_PATTERN.match(self.value)
|
|
110
|
+
if match:
|
|
111
|
+
self._path_only = match.group(1)
|
|
112
|
+
self.start_line = int(match.group(2))
|
|
113
|
+
if match.group(3):
|
|
114
|
+
self.start_column = int(match.group(3))
|
|
115
|
+
if match.group(4):
|
|
116
|
+
self.end_line = int(match.group(4))
|
|
117
|
+
if match.group(5):
|
|
118
|
+
self.end_column = int(match.group(5))
|
|
119
|
+
else:
|
|
120
|
+
# Single line/position reference: start and end are the same
|
|
121
|
+
self.end_line = self.start_line
|
|
122
|
+
self.end_column = self.start_column
|
|
123
|
+
else:
|
|
124
|
+
self._path_only = self.value
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def path(self) -> str:
|
|
128
|
+
"""Return path without line range."""
|
|
129
|
+
return self._path_only
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def line_range_str(self) -> Optional[str]:
|
|
133
|
+
"""Return formatted line/character range string, or None if no range."""
|
|
134
|
+
if self.start_line is None:
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
# Build start position
|
|
138
|
+
if self.start_column is not None:
|
|
139
|
+
start = f":{self.start_line}:{self.start_column}"
|
|
140
|
+
else:
|
|
141
|
+
start = f":{self.start_line}"
|
|
142
|
+
|
|
143
|
+
# Check if end is the same as start (single position)
|
|
144
|
+
if self.start_line == self.end_line and self.start_column == self.end_column:
|
|
145
|
+
return start
|
|
146
|
+
|
|
147
|
+
# Build end position
|
|
148
|
+
if self.end_column is not None:
|
|
149
|
+
end = f"-{self.end_line}:{self.end_column}"
|
|
150
|
+
else:
|
|
151
|
+
end = f"-{self.end_line}"
|
|
152
|
+
|
|
153
|
+
return f"{start}{end}"
|
|
154
|
+
|
|
92
155
|
def resolve(self, base_path: Optional[Path] = None) -> Path:
|
|
93
156
|
"""Resolve to a file path (relative paths resolved against base_path)."""
|
|
94
157
|
if self.is_uri:
|
|
95
|
-
parsed = urlparse(self.
|
|
158
|
+
parsed = urlparse(self._path_only)
|
|
96
159
|
if parsed.scheme == "file":
|
|
97
160
|
# file:// URI
|
|
98
161
|
return Path(parsed.path)
|
|
99
162
|
raise ValueError(f"Cannot resolve non-file URI to path: {self.value}")
|
|
100
163
|
|
|
101
|
-
path = Path(self.
|
|
164
|
+
path = Path(self._path_only)
|
|
102
165
|
if path.is_absolute():
|
|
103
166
|
return path
|
|
104
167
|
if base_path:
|
|
@@ -122,6 +185,7 @@ class Record:
|
|
|
122
185
|
"""A MarkBack record containing content and feedback."""
|
|
123
186
|
feedback: str
|
|
124
187
|
uri: Optional[str] = None
|
|
188
|
+
by: Optional[str] = None
|
|
125
189
|
source: Optional[SourceRef] = None
|
|
126
190
|
prior: Optional[SourceRef] = None
|
|
127
191
|
content: Optional[str] = None
|
|
@@ -155,6 +219,7 @@ class Record:
|
|
|
155
219
|
"""Convert to JSON-serializable dict."""
|
|
156
220
|
return {
|
|
157
221
|
"uri": self.uri,
|
|
222
|
+
"by": self.by,
|
|
158
223
|
"source": str(self.source) if self.source else None,
|
|
159
224
|
"prior": str(self.prior) if self.prior else None,
|
|
160
225
|
"content": self.content,
|
|
@@ -38,17 +38,21 @@ def write_record_canonical(
|
|
|
38
38
|
)
|
|
39
39
|
|
|
40
40
|
if use_compact:
|
|
41
|
-
# Compact format: @uri on
|
|
41
|
+
# Compact format: @uri, @by, @prior on own lines (if present), then @source ... <<<
|
|
42
42
|
if record.uri:
|
|
43
43
|
lines.append(f"@uri {record.uri}")
|
|
44
|
+
if record.by:
|
|
45
|
+
lines.append(f"@by {record.by}")
|
|
44
46
|
if record.prior:
|
|
45
47
|
lines.append(f"@prior {record.prior}")
|
|
46
48
|
lines.append(f"@source {record.source} <<< {record.feedback}")
|
|
47
49
|
else:
|
|
48
50
|
# Full format
|
|
49
|
-
# Headers: @uri first, then @prior, then @source
|
|
51
|
+
# Headers: @uri first, then @by, then @prior, then @source
|
|
50
52
|
if record.uri:
|
|
51
53
|
lines.append(f"@uri {record.uri}")
|
|
54
|
+
if record.by:
|
|
55
|
+
lines.append(f"@by {record.by}")
|
|
52
56
|
if record.prior:
|
|
53
57
|
lines.append(f"@prior {record.prior}")
|
|
54
58
|
if record.source:
|
|
@@ -151,7 +155,10 @@ def write_label_file(record: Record) -> str:
|
|
|
151
155
|
|
|
152
156
|
if record.uri:
|
|
153
157
|
lines.append(f"@uri {record.uri}")
|
|
154
|
-
|
|
158
|
+
|
|
159
|
+
if record.by:
|
|
160
|
+
lines.append(f"@by {record.by}")
|
|
161
|
+
|
|
155
162
|
if record.prior:
|
|
156
163
|
lines.append(f"@prior {record.prior}")
|
|
157
164
|
|
|
@@ -138,6 +138,71 @@ function lintPriorExists(record: MarkbackRecord, basePath: string | null, record
|
|
|
138
138
|
return diagnostics;
|
|
139
139
|
}
|
|
140
140
|
|
|
141
|
+
interface PositionCheck {
|
|
142
|
+
isInvalid: boolean;
|
|
143
|
+
errorMsg: string;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function isPositionInvalid(sourceRef: { startLine: number | null; endLine: number | null; startColumn: number | null; endColumn: number | null }): PositionCheck {
|
|
147
|
+
if (sourceRef.startLine === null || sourceRef.endLine === null) {
|
|
148
|
+
return { isInvalid: false, errorMsg: "" };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (sourceRef.endLine < sourceRef.startLine) {
|
|
152
|
+
return { isInvalid: true, errorMsg: `end line ${sourceRef.endLine} is less than start line ${sourceRef.startLine}` };
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (sourceRef.endLine === sourceRef.startLine) {
|
|
156
|
+
if (sourceRef.startColumn !== null && sourceRef.endColumn !== null && sourceRef.endColumn < sourceRef.startColumn) {
|
|
157
|
+
return { isInvalid: true, errorMsg: `end column ${sourceRef.endColumn} is less than start column ${sourceRef.startColumn} on line ${sourceRef.startLine}` };
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return { isInvalid: false, errorMsg: "" };
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function lintLineRange(record: MarkbackRecord, recordIdx: number): Diagnostic[] {
|
|
165
|
+
const diagnostics: Diagnostic[] = [];
|
|
166
|
+
|
|
167
|
+
// Check @source range
|
|
168
|
+
if (record.source && record.source.startLine !== null) {
|
|
169
|
+
const { isInvalid, errorMsg } = isPositionInvalid(record.source);
|
|
170
|
+
if (isInvalid) {
|
|
171
|
+
diagnostics.push(
|
|
172
|
+
new Diagnostic({
|
|
173
|
+
file: record._sourceFile ?? null,
|
|
174
|
+
line: record._startLine ?? null,
|
|
175
|
+
column: null,
|
|
176
|
+
severity: Severity.ERROR,
|
|
177
|
+
code: ErrorCode.E011,
|
|
178
|
+
message: `Invalid range in @source: ${errorMsg}`,
|
|
179
|
+
recordIndex: recordIdx,
|
|
180
|
+
}),
|
|
181
|
+
);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Check @prior range
|
|
186
|
+
if (record.prior && record.prior.startLine !== null) {
|
|
187
|
+
const { isInvalid, errorMsg } = isPositionInvalid(record.prior);
|
|
188
|
+
if (isInvalid) {
|
|
189
|
+
diagnostics.push(
|
|
190
|
+
new Diagnostic({
|
|
191
|
+
file: record._sourceFile ?? null,
|
|
192
|
+
line: record._startLine ?? null,
|
|
193
|
+
column: null,
|
|
194
|
+
severity: Severity.ERROR,
|
|
195
|
+
code: ErrorCode.E011,
|
|
196
|
+
message: `Invalid range in @prior: ${errorMsg}`,
|
|
197
|
+
recordIndex: recordIdx,
|
|
198
|
+
}),
|
|
199
|
+
);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
return diagnostics;
|
|
204
|
+
}
|
|
205
|
+
|
|
141
206
|
function lintCanonicalFormat(records: MarkbackRecord[], originalText: string, file?: string | null): Diagnostic[] {
|
|
142
207
|
const diagnostics: Diagnostic[] = [];
|
|
143
208
|
|
|
@@ -199,6 +264,9 @@ export function lintString(text: string, options: LintOptions = {}): ParseResult
|
|
|
199
264
|
result.diagnostics.push(...lintSourceExists(record, basePath, idx));
|
|
200
265
|
result.diagnostics.push(...lintPriorExists(record, basePath, idx));
|
|
201
266
|
}
|
|
267
|
+
|
|
268
|
+
// Check line range validity
|
|
269
|
+
result.diagnostics.push(...lintLineRange(record, idx));
|
|
202
270
|
});
|
|
203
271
|
|
|
204
272
|
if (checkCanonical && result.records.length > 0 && !result.hasErrors) {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Diagnostic, ErrorCode, ParseResult, Record as MarkbackRecord, Severity, SourceRef, WarningCode } from "./types";
|
|
2
2
|
|
|
3
|
-
const KNOWN_HEADERS = new Set(["uri", "source", "prior"]);
|
|
3
|
+
const KNOWN_HEADERS = new Set(["uri", "by", "source", "prior"]);
|
|
4
4
|
|
|
5
5
|
const HEADER_PATTERN = /^@([a-z]+)\s+(.+)$/;
|
|
6
6
|
const FEEDBACK_DELIMITER = "<<<";
|
|
@@ -114,6 +114,7 @@ export function parseString(text: string, sourceFile?: string | null): ParseResu
|
|
|
114
114
|
|
|
115
115
|
const finalizeRecord = (feedback: string, endLine: number, isCompact = false) => {
|
|
116
116
|
const uri = currentHeaders.uri ?? pendingUri;
|
|
117
|
+
const by = currentHeaders.by ?? null;
|
|
117
118
|
const sourceStr = currentHeaders.source;
|
|
118
119
|
const source = sourceStr ? new SourceRef(sourceStr) : null;
|
|
119
120
|
const priorStr = currentHeaders.prior;
|
|
@@ -135,6 +136,7 @@ export function parseString(text: string, sourceFile?: string | null): ParseResu
|
|
|
135
136
|
new MarkbackRecord({
|
|
136
137
|
feedback,
|
|
137
138
|
uri: uri ?? null,
|
|
139
|
+
by,
|
|
138
140
|
source,
|
|
139
141
|
prior,
|
|
140
142
|
content,
|
|
@@ -202,13 +204,15 @@ export function parseString(text: string, sourceFile?: string | null): ParseResu
|
|
|
202
204
|
}
|
|
203
205
|
|
|
204
206
|
const uri = pendingUri ?? currentHeaders.uri ?? null;
|
|
207
|
+
const by = currentHeaders.by ?? null;
|
|
205
208
|
const priorStr = currentHeaders.prior;
|
|
206
209
|
const prior = priorStr ? new SourceRef(priorStr) : null;
|
|
207
210
|
|
|
208
211
|
records.push(
|
|
209
|
-
|
|
212
|
+
new MarkbackRecord({
|
|
210
213
|
feedback: feedback ?? "",
|
|
211
214
|
uri,
|
|
215
|
+
by,
|
|
212
216
|
source,
|
|
213
217
|
prior,
|
|
214
218
|
content: null,
|