markback 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markback-0.1.2/.claude/settings.local.json +13 -0
- {markback-0.1.0 → markback-0.1.2}/.gitignore +5 -0
- markback-0.1.2/.ishipped/card.md +26 -0
- {markback-0.1.0 → markback-0.1.2}/PKG-INFO +11 -1
- {markback-0.1.0 → markback-0.1.2}/README.md +10 -0
- {markback-0.1.0 → markback-0.1.2}/SPEC.md +73 -10
- {markback-0.1.0 → markback-0.1.2}/markback/linter.py +68 -1
- {markback-0.1.0 → markback-0.1.2}/markback/parser.py +8 -2
- {markback-0.1.0 → markback-0.1.2}/markback/types.py +47 -4
- {markback-0.1.0 → markback-0.1.2}/markback/writer.py +9 -2
- markback-0.1.2/packages/markbackjs/LICENSE +21 -0
- markback-0.1.2/packages/markbackjs/README.md +47 -0
- markback-0.1.2/packages/markbackjs/package-lock.json +51 -0
- markback-0.1.2/packages/markbackjs/package.json +33 -0
- markback-0.1.2/packages/markbackjs/src/index.ts +23 -0
- markback-0.1.2/packages/markbackjs/src/linter.ts +368 -0
- markback-0.1.2/packages/markbackjs/src/parser.ts +357 -0
- markback-0.1.2/packages/markbackjs/src/types.ts +357 -0
- markback-0.1.2/packages/markbackjs/src/writer.ts +73 -0
- markback-0.1.2/packages/markbackjs/test/linter.test.js +161 -0
- markback-0.1.2/packages/markbackjs/tsconfig.json +17 -0
- {markback-0.1.0 → markback-0.1.2}/pyproject.toml +1 -1
- {markback-0.1.0 → markback-0.1.2}/tests/test_linter.py +100 -0
- {markback-0.1.0 → markback-0.1.2}/tests/test_parser.py +52 -0
- {markback-0.1.0 → markback-0.1.2}/tests/test_writer.py +58 -0
- {markback-0.1.0 → markback-0.1.2}/IMPLEMENTATION_NOTES.md +0 -0
- {markback-0.1.0 → markback-0.1.2}/LICENSE +0 -0
- {markback-0.1.0 → markback-0.1.2}/markback/__init__.py +0 -0
- {markback-0.1.0 → markback-0.1.2}/markback/cli.py +0 -0
- {markback-0.1.0 → markback-0.1.2}/markback/config.py +0 -0
- {markback-0.1.0 → markback-0.1.2}/markback/llm.py +0 -0
- {markback-0.1.0 → markback-0.1.2}/markback/workflow.py +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/__init__.py +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/compact_source.mb +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/errors/content_with_source.mb +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/errors/empty_feedback.mb +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/errors/malformed_uri.mb +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/errors/missing_feedback.mb +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/errors/multiple_feedback.mb +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/essay.label.txt +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/essay.txt +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/external_source.mb +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/freeform_feedback.mb +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/json_feedback.mb +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/label_list.mb +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/minimal.mb +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/multi_record.mb +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/fixtures/with_uri.mb +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/test_cli.py +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/test_config.py +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/test_types.py +0 -0
- {markback-0.1.0 → markback-0.1.2}/tests/test_workflow.py +0 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "MarkBack"
|
|
3
|
+
summary: "Human-writable format for pairing content with labels and feedback."
|
|
4
|
+
shipped: 2026-01-04
|
|
5
|
+
tags: [data-annotation, machine-learning, cli, python, typescript]
|
|
6
|
+
links:
|
|
7
|
+
- label: "markback.org"
|
|
8
|
+
url: "https://markback.org"
|
|
9
|
+
- label: "GitHub"
|
|
10
|
+
url: "https://github.com/dandriscoll/markback"
|
|
11
|
+
primary: true
|
|
12
|
+
- label: "NPM"
|
|
13
|
+
url: "https://www.npmjs.com/package/markbackjs"
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## What is it?
|
|
17
|
+
|
|
18
|
+
MarkBack is a compact file format for storing content alongside feedback and labels. It's built for training data management, prompt engineering, and annotation workflows where you need human-readable files that machines can parse reliably.
|
|
19
|
+
|
|
20
|
+
## Key Features
|
|
21
|
+
|
|
22
|
+
- **Multiple storage modes** — Single-file, multi-record, compact one-liner, or paired files. Pick what fits your workflow.
|
|
23
|
+
- **Structured feedback parsing** — Labels, key-value attributes, JSON, and freeform comments in one line.
|
|
24
|
+
- **Comprehensive linting** — 18 diagnostic rules catch errors and style issues with precise line numbers.
|
|
25
|
+
- **External content references** — Point to files, URIs, or embed content inline. Works with text, images, and binary files.
|
|
26
|
+
- **Dual-language support** — Full implementations in Python (CLI + library) and TypeScript.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: markback
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: A compact, human-writable format for storing content paired with feedback/labels
|
|
5
5
|
Project-URL: Homepage, https://github.com/dandriscoll/markback
|
|
6
6
|
Project-URL: Repository, https://github.com/dandriscoll/markback
|
|
@@ -196,6 +196,16 @@ Second content.
|
|
|
196
196
|
@source ./images/003.jpg <<< approved; scene=mountain
|
|
197
197
|
```
|
|
198
198
|
|
|
199
|
+
### With Prior Reference
|
|
200
|
+
|
|
201
|
+
Use `@prior` to reference an item that precedes the source (e.g., a prompt that generated an image):
|
|
202
|
+
|
|
203
|
+
```
|
|
204
|
+
@uri local:generated-001
|
|
205
|
+
@prior ./prompts/sunset-prompt.txt
|
|
206
|
+
@source ./images/generated-sunset.jpg <<< accurate; matches prompt well
|
|
207
|
+
```
|
|
208
|
+
|
|
199
209
|
### Paired Files
|
|
200
210
|
|
|
201
211
|
**content.txt:**
|
|
@@ -161,6 +161,16 @@ Second content.
|
|
|
161
161
|
@source ./images/003.jpg <<< approved; scene=mountain
|
|
162
162
|
```
|
|
163
163
|
|
|
164
|
+
### With Prior Reference
|
|
165
|
+
|
|
166
|
+
Use `@prior` to reference an item that precedes the source (e.g., a prompt that generated an image):
|
|
167
|
+
|
|
168
|
+
```
|
|
169
|
+
@uri local:generated-001
|
|
170
|
+
@prior ./prompts/sunset-prompt.txt
|
|
171
|
+
@source ./images/generated-sunset.jpg <<< accurate; matches prompt well
|
|
172
|
+
```
|
|
173
|
+
|
|
164
174
|
### Paired Files
|
|
165
175
|
|
|
166
176
|
**content.txt:**
|
|
@@ -28,6 +28,7 @@ A MarkBack **record** is the fundamental unit. Every record has:
|
|
|
28
28
|
| `feedback` | Yes | Text after the `<<<` delimiter (always one line) |
|
|
29
29
|
| `uri` | No | Unique identifier for the item |
|
|
30
30
|
| `source` | No | Reference to external content (when content is not inline) |
|
|
31
|
+
| `prior` | No | Reference to an item that precedes the source (e.g., a prompt that generated the content) |
|
|
31
32
|
|
|
32
33
|
*Content is required but may be external (via `source` field).
|
|
33
34
|
|
|
@@ -66,6 +67,7 @@ Header lines appear at the start of a record and begin with `@`. They define met
|
|
|
66
67
|
```
|
|
67
68
|
@uri <uri-value>
|
|
68
69
|
@source <path-or-uri>
|
|
70
|
+
@prior <path-or-uri>
|
|
69
71
|
```
|
|
70
72
|
|
|
71
73
|
**Rules:**
|
|
@@ -109,6 +111,43 @@ References external content instead of inline content.
|
|
|
109
111
|
- When `@source` is present, inline content MUST be empty (or contain only whitespace)
|
|
110
112
|
- Parsers MUST verify referenced files exist (warning if missing)
|
|
111
113
|
|
|
114
|
+
#### 3.1.3 `@prior` Header
|
|
115
|
+
|
|
116
|
+
References an item that precedes the source material. For example, if the source is an image generated by an LLM, the prior could be the prompt that was used to create it.
|
|
117
|
+
|
|
118
|
+
```
|
|
119
|
+
@prior ./prompts/image-gen-prompt.txt
|
|
120
|
+
@prior https://example.com/prompts/123
|
|
121
|
+
@prior file:///path/to/prompt.txt
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**Rules:**
|
|
125
|
+
- Relative paths are resolved relative to the MarkBack file location
|
|
126
|
+
- `@prior` can be used with or without `@source`
|
|
127
|
+
- `@prior` does not affect content handling (inline content or `@source` rules still apply)
|
|
128
|
+
- Parsers SHOULD verify referenced files exist (warning if missing)
|
|
129
|
+
|
|
130
|
+
#### 3.1.4 Line Range Specification
|
|
131
|
+
|
|
132
|
+
Both `@source` and `@prior` headers support optional line range specifications using colon notation. This allows referencing specific lines within a file.
|
|
133
|
+
|
|
134
|
+
**Syntax:** `<path-or-uri>:<start>` or `<path-or-uri>:<start>-<end>`
|
|
135
|
+
|
|
136
|
+
```
|
|
137
|
+
@source ./code.py:42
|
|
138
|
+
@source ./code.py:42-50
|
|
139
|
+
@prior ./prompts/template.txt:1-20
|
|
140
|
+
@source https://example.com/file.txt:100-150
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**Rules:**
|
|
144
|
+
- Line numbers are 1-indexed (first line is line 1)
|
|
145
|
+
- Single line: `:N` references line N only
|
|
146
|
+
- Line range: `:N-M` references lines N through M (inclusive)
|
|
147
|
+
- End line must be greater than or equal to start line (E011 error otherwise)
|
|
148
|
+
- Line ranges are informational metadata; parsers do not validate that referenced lines exist in the file
|
|
149
|
+
- Windows drive letters (e.g., `C:\path`) are not confused with line ranges because scheme detection requires length > 1
|
|
150
|
+
|
|
112
151
|
### 3.2 Content Block
|
|
113
152
|
|
|
114
153
|
Content is everything between headers and the `<<<` feedback delimiter.
|
|
@@ -442,7 +481,7 @@ Canonical form ensures consistent output for comparison and version control.
|
|
|
442
481
|
### 5.2 Canonicalization Rules
|
|
443
482
|
|
|
444
483
|
1. **Line endings:** Normalize to `\n` (LF)
|
|
445
|
-
2. **Header order:** `@uri` before `@source` before unknown headers (alphabetical)
|
|
484
|
+
2. **Header order:** `@uri` before `@prior` before `@source` before unknown headers (alphabetical)
|
|
446
485
|
3. **Header spacing:** Exactly one space after keyword
|
|
447
486
|
4. **Trailing whitespace:** Remove from all lines
|
|
448
487
|
5. **Content whitespace:** Preserve internal whitespace; trim leading/trailing blank lines
|
|
@@ -557,6 +596,7 @@ Each line is classified as one of:
|
|
|
557
596
|
| E008 | Unclosed quote in structured attribute value (only in `structured` parse mode) |
|
|
558
597
|
| E009 | Empty feedback (nothing after `<<< `) |
|
|
559
598
|
| E010 | Missing blank line before inline content (content starts with `@`) |
|
|
599
|
+
| E011 | Invalid line range (end line less than start line) |
|
|
560
600
|
|
|
561
601
|
### 7.2 Warnings (SHOULD fix)
|
|
562
602
|
|
|
@@ -570,6 +610,7 @@ Each line is classified as one of:
|
|
|
570
610
|
| W006 | Missing `@uri` (record has no identifier) |
|
|
571
611
|
| W007 | Paired feedback file not found for content file |
|
|
572
612
|
| W008 | Non-canonical formatting detected |
|
|
613
|
+
| W009 | `@prior` file not found |
|
|
573
614
|
|
|
574
615
|
### 7.3 Lint Output Format
|
|
575
616
|
|
|
@@ -617,7 +658,27 @@ Or in compact form:
|
|
|
617
658
|
@source ./images/beach.jpg <<< appropriate; tags=landscape,beach,sunset; quality=high
|
|
618
659
|
```
|
|
619
660
|
|
|
620
|
-
### 8.4
|
|
661
|
+
### 8.4 Record with Prior Reference (e.g., LLM-generated content)
|
|
662
|
+
|
|
663
|
+
```
|
|
664
|
+
@uri local:generated-image-001
|
|
665
|
+
@prior ./prompts/beach-sunset.txt
|
|
666
|
+
@source ./images/generated-beach.jpg
|
|
667
|
+
<<< accurate; matches prompt well; quality=high
|
|
668
|
+
```
|
|
669
|
+
|
|
670
|
+
Or with inline content:
|
|
671
|
+
```
|
|
672
|
+
@uri local:generated-text-001
|
|
673
|
+
@prior ./prompts/haiku-prompt.txt
|
|
674
|
+
|
|
675
|
+
Cherry blossoms fall,
|
|
676
|
+
Petals dance on gentle breeze,
|
|
677
|
+
Spring whispers goodbye.
|
|
678
|
+
<<< creative; follows haiku structure; quality=excellent
|
|
679
|
+
```
|
|
680
|
+
|
|
681
|
+
### 8.5 Single-File Example
|
|
621
682
|
|
|
622
683
|
**File:** `question.mb`
|
|
623
684
|
```
|
|
@@ -627,7 +688,7 @@ Explain quantum entanglement in simple terms.
|
|
|
627
688
|
<<< quality=excellent; accuracy=high; clarity=good
|
|
628
689
|
```
|
|
629
690
|
|
|
630
|
-
### 8.
|
|
691
|
+
### 8.6 Label List Example (Compact Format)
|
|
631
692
|
|
|
632
693
|
**File:** `image-annotations.mb`
|
|
633
694
|
```
|
|
@@ -659,7 +720,7 @@ Explain quantum entanglement in simple terms.
|
|
|
659
720
|
@source ./batch1/item3.txt <<< positive; excellent clarity
|
|
660
721
|
```
|
|
661
722
|
|
|
662
|
-
### 8.
|
|
723
|
+
### 8.7 Multi-Record Example (Mixed Freeform and Structured)
|
|
663
724
|
|
|
664
725
|
**File:** `training-data.mb`
|
|
665
726
|
```
|
|
@@ -690,7 +751,7 @@ Please write a formal letter requesting a meeting.
|
|
|
690
751
|
@source ./audio/sample-005.wav <<< transcription="Hello world"; quality=clear; language=en
|
|
691
752
|
```
|
|
692
753
|
|
|
693
|
-
### 8.
|
|
754
|
+
### 8.8 Paired-File Example
|
|
694
755
|
|
|
695
756
|
**Content file:** `essay.txt`
|
|
696
757
|
```
|
|
@@ -706,7 +767,7 @@ agriculture, manufacturing, mining, and transport.
|
|
|
706
767
|
<<< good; grade=B+; well structured but needs more specific examples
|
|
707
768
|
```
|
|
708
769
|
|
|
709
|
-
### 8.
|
|
770
|
+
### 8.9 Freeform Feedback Examples
|
|
710
771
|
|
|
711
772
|
Various styles of freeform feedback:
|
|
712
773
|
|
|
@@ -729,7 +790,7 @@ Explain machine learning to a child.
|
|
|
729
790
|
<<< needs work; the explanation assumes too much prior knowledge
|
|
730
791
|
```
|
|
731
792
|
|
|
732
|
-
### 8.
|
|
793
|
+
### 8.10 Complex Structured Feedback (JSON)
|
|
733
794
|
|
|
734
795
|
```
|
|
735
796
|
@uri local:complex-example
|
|
@@ -738,7 +799,7 @@ Multi-attribute content with special characters.
|
|
|
738
799
|
<<< json:{"rating":4.5,"tags":["important","review"],"notes":"Contains \"quoted\" text and; semicolons","scores":{"accuracy":0.9,"relevance":0.85}}
|
|
739
800
|
```
|
|
740
801
|
|
|
741
|
-
### 8.
|
|
802
|
+
### 8.11 Image with MarkBack Sidecar
|
|
742
803
|
|
|
743
804
|
**Content file:** `diagram.png` (binary)
|
|
744
805
|
|
|
@@ -824,8 +885,10 @@ feedback-content = *VCHAR ; no LF allowed
|
|
|
824
885
|
compact-record = [uri-line] source-feedback-line
|
|
825
886
|
compact-list = compact-record *(1*blank-line compact-record)
|
|
826
887
|
uri-line = "@uri" SP value LF
|
|
827
|
-
source-feedback-line = "@source" SP path SP "<<<" SP feedback-content LF
|
|
828
|
-
path
|
|
888
|
+
source-feedback-line = "@source" SP path-with-range SP "<<<" SP feedback-content LF
|
|
889
|
+
path-with-range = path [line-range] ; path with optional line range
|
|
890
|
+
path = 1*VCHAR ; ends at space before <<< or line-range
|
|
891
|
+
line-range = ":" 1*DIGIT ["-" 1*DIGIT]
|
|
829
892
|
|
|
830
893
|
LOWER = %x61-7A ; a-z
|
|
831
894
|
SP = %x20 ; space
|
|
@@ -110,6 +110,69 @@ def lint_source_exists(
|
|
|
110
110
|
return diagnostics
|
|
111
111
|
|
|
112
112
|
|
|
113
|
+
def lint_prior_exists(
|
|
114
|
+
record: Record,
|
|
115
|
+
base_path: Optional[Path],
|
|
116
|
+
record_idx: int,
|
|
117
|
+
) -> list[Diagnostic]:
|
|
118
|
+
"""Check if @prior file exists."""
|
|
119
|
+
diagnostics: list[Diagnostic] = []
|
|
120
|
+
|
|
121
|
+
if record.prior and not record.prior.is_uri:
|
|
122
|
+
try:
|
|
123
|
+
resolved = record.prior.resolve(base_path)
|
|
124
|
+
if not resolved.exists():
|
|
125
|
+
diagnostics.append(Diagnostic(
|
|
126
|
+
file=record._source_file,
|
|
127
|
+
line=record._start_line,
|
|
128
|
+
column=None,
|
|
129
|
+
severity=Severity.WARNING,
|
|
130
|
+
code=WarningCode.W009,
|
|
131
|
+
message=f"@prior file not found: {record.prior}",
|
|
132
|
+
record_index=record_idx,
|
|
133
|
+
))
|
|
134
|
+
except ValueError:
|
|
135
|
+
pass # URI that can't be resolved to path
|
|
136
|
+
|
|
137
|
+
return diagnostics
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def lint_line_range(
|
|
141
|
+
record: Record,
|
|
142
|
+
record_idx: int,
|
|
143
|
+
) -> list[Diagnostic]:
|
|
144
|
+
"""Check if line ranges are valid (end >= start)."""
|
|
145
|
+
diagnostics: list[Diagnostic] = []
|
|
146
|
+
|
|
147
|
+
# Check @source line range
|
|
148
|
+
if record.source and record.source.start_line is not None:
|
|
149
|
+
if record.source.end_line is not None and record.source.end_line < record.source.start_line:
|
|
150
|
+
diagnostics.append(Diagnostic(
|
|
151
|
+
file=record._source_file,
|
|
152
|
+
line=record._start_line,
|
|
153
|
+
column=None,
|
|
154
|
+
severity=Severity.ERROR,
|
|
155
|
+
code=ErrorCode.E011,
|
|
156
|
+
message=f"Invalid line range in @source: end line {record.source.end_line} is less than start line {record.source.start_line}",
|
|
157
|
+
record_index=record_idx,
|
|
158
|
+
))
|
|
159
|
+
|
|
160
|
+
# Check @prior line range
|
|
161
|
+
if record.prior and record.prior.start_line is not None:
|
|
162
|
+
if record.prior.end_line is not None and record.prior.end_line < record.prior.start_line:
|
|
163
|
+
diagnostics.append(Diagnostic(
|
|
164
|
+
file=record._source_file,
|
|
165
|
+
line=record._start_line,
|
|
166
|
+
column=None,
|
|
167
|
+
severity=Severity.ERROR,
|
|
168
|
+
code=ErrorCode.E011,
|
|
169
|
+
message=f"Invalid line range in @prior: end line {record.prior.end_line} is less than start line {record.prior.start_line}",
|
|
170
|
+
record_index=record_idx,
|
|
171
|
+
))
|
|
172
|
+
|
|
173
|
+
return diagnostics
|
|
174
|
+
|
|
175
|
+
|
|
113
176
|
def lint_canonical_format(
|
|
114
177
|
records: list[Record],
|
|
115
178
|
original_text: str,
|
|
@@ -173,10 +236,14 @@ def lint_string(
|
|
|
173
236
|
idx,
|
|
174
237
|
))
|
|
175
238
|
|
|
176
|
-
# Check source file existence
|
|
239
|
+
# Check source and prior file existence
|
|
177
240
|
if check_sources:
|
|
178
241
|
base_path = source_file.parent if source_file else None
|
|
179
242
|
result.diagnostics.extend(lint_source_exists(record, base_path, idx))
|
|
243
|
+
result.diagnostics.extend(lint_prior_exists(record, base_path, idx))
|
|
244
|
+
|
|
245
|
+
# Check line range validity
|
|
246
|
+
result.diagnostics.extend(lint_line_range(record, idx))
|
|
180
247
|
|
|
181
248
|
# Check canonical format
|
|
182
249
|
if check_canonical and result.records and not result.has_errors:
|
|
@@ -17,7 +17,7 @@ from .types import (
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
# Known header keywords
|
|
20
|
-
KNOWN_HEADERS = {"uri", "source"}
|
|
20
|
+
KNOWN_HEADERS = {"uri", "source", "prior"}
|
|
21
21
|
|
|
22
22
|
# Patterns
|
|
23
23
|
HEADER_PATTERN = re.compile(r"^@([a-z]+)\s+(.+)$")
|
|
@@ -147,6 +147,8 @@ def parse_string(
|
|
|
147
147
|
uri = current_headers.get("uri") or pending_uri
|
|
148
148
|
source_str = current_headers.get("source")
|
|
149
149
|
source = SourceRef(source_str) if source_str else None
|
|
150
|
+
prior_str = current_headers.get("prior")
|
|
151
|
+
prior = SourceRef(prior_str) if prior_str else None
|
|
150
152
|
|
|
151
153
|
content = None
|
|
152
154
|
if current_content_lines:
|
|
@@ -163,6 +165,7 @@ def parse_string(
|
|
|
163
165
|
feedback=feedback,
|
|
164
166
|
uri=uri,
|
|
165
167
|
source=source,
|
|
168
|
+
prior=prior,
|
|
166
169
|
content=content,
|
|
167
170
|
_source_file=source_file,
|
|
168
171
|
_start_line=current_start_line,
|
|
@@ -239,13 +242,16 @@ def parse_string(
|
|
|
239
242
|
line_num,
|
|
240
243
|
)
|
|
241
244
|
|
|
242
|
-
# Use any pending @uri from previous line
|
|
245
|
+
# Use any pending @uri from previous line and @prior if present
|
|
243
246
|
uri = pending_uri or current_headers.get("uri")
|
|
247
|
+
prior_str = current_headers.get("prior")
|
|
248
|
+
prior = SourceRef(prior_str) if prior_str else None
|
|
244
249
|
|
|
245
250
|
record = Record(
|
|
246
251
|
feedback=feedback or "",
|
|
247
252
|
uri=uri,
|
|
248
253
|
source=source,
|
|
254
|
+
prior=prior,
|
|
249
255
|
content=None,
|
|
250
256
|
_source_file=source_file,
|
|
251
257
|
_start_line=current_start_line,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Core types for MarkBack format."""
|
|
2
2
|
|
|
3
|
+
import re
|
|
3
4
|
from dataclasses import dataclass, field
|
|
4
5
|
from enum import Enum
|
|
5
6
|
from pathlib import Path
|
|
@@ -25,6 +26,7 @@ class ErrorCode(Enum):
|
|
|
25
26
|
E008 = "E008" # Unclosed quote in structured attribute value
|
|
26
27
|
E009 = "E009" # Empty feedback (nothing after <<< )
|
|
27
28
|
E010 = "E010" # Missing blank line before inline content
|
|
29
|
+
E011 = "E011" # Invalid line range (end < start)
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
class WarningCode(Enum):
|
|
@@ -37,6 +39,7 @@ class WarningCode(Enum):
|
|
|
37
39
|
W006 = "W006" # Missing @uri (record has no identifier)
|
|
38
40
|
W007 = "W007" # Paired feedback file not found
|
|
39
41
|
W008 = "W008" # Non-canonical formatting detected
|
|
42
|
+
W009 = "W009" # @prior file not found
|
|
40
43
|
|
|
41
44
|
|
|
42
45
|
@dataclass
|
|
@@ -75,29 +78,67 @@ class Diagnostic:
|
|
|
75
78
|
}
|
|
76
79
|
|
|
77
80
|
|
|
81
|
+
# Regex to parse line range from a path: path:start or path:start-end
|
|
82
|
+
_LINE_RANGE_PATTERN = re.compile(r'^(.+?):(\d+)(?:-(\d+))?$')
|
|
83
|
+
|
|
84
|
+
|
|
78
85
|
@dataclass
|
|
79
86
|
class SourceRef:
|
|
80
87
|
"""Reference to external content (file path or URI)."""
|
|
81
88
|
value: str
|
|
82
89
|
is_uri: bool = False
|
|
90
|
+
start_line: Optional[int] = None
|
|
91
|
+
end_line: Optional[int] = None
|
|
92
|
+
_path_only: str = ""
|
|
83
93
|
|
|
84
94
|
def __post_init__(self):
|
|
85
|
-
#
|
|
95
|
+
# Parse line range if present
|
|
96
|
+
self._parse_line_range()
|
|
97
|
+
|
|
98
|
+
# Determine if this is a URI or file path (using path without line range)
|
|
86
99
|
if not self.is_uri:
|
|
87
|
-
parsed = urlparse(self.
|
|
100
|
+
parsed = urlparse(self._path_only)
|
|
88
101
|
# Consider it a URI if it has a scheme that's not a Windows drive letter
|
|
89
102
|
self.is_uri = bool(parsed.scheme) and len(parsed.scheme) > 1
|
|
90
103
|
|
|
104
|
+
def _parse_line_range(self):
|
|
105
|
+
"""Parse optional line range from value."""
|
|
106
|
+
match = _LINE_RANGE_PATTERN.match(self.value)
|
|
107
|
+
if match:
|
|
108
|
+
self._path_only = match.group(1)
|
|
109
|
+
self.start_line = int(match.group(2))
|
|
110
|
+
if match.group(3):
|
|
111
|
+
self.end_line = int(match.group(3))
|
|
112
|
+
else:
|
|
113
|
+
# Single line reference: start and end are the same
|
|
114
|
+
self.end_line = self.start_line
|
|
115
|
+
else:
|
|
116
|
+
self._path_only = self.value
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def path(self) -> str:
|
|
120
|
+
"""Return path without line range."""
|
|
121
|
+
return self._path_only
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def line_range_str(self) -> Optional[str]:
|
|
125
|
+
"""Return formatted line range string, or None if no range."""
|
|
126
|
+
if self.start_line is None:
|
|
127
|
+
return None
|
|
128
|
+
if self.start_line == self.end_line:
|
|
129
|
+
return f":{self.start_line}"
|
|
130
|
+
return f":{self.start_line}-{self.end_line}"
|
|
131
|
+
|
|
91
132
|
def resolve(self, base_path: Optional[Path] = None) -> Path:
|
|
92
133
|
"""Resolve to a file path (relative paths resolved against base_path)."""
|
|
93
134
|
if self.is_uri:
|
|
94
|
-
parsed = urlparse(self.
|
|
135
|
+
parsed = urlparse(self._path_only)
|
|
95
136
|
if parsed.scheme == "file":
|
|
96
137
|
# file:// URI
|
|
97
138
|
return Path(parsed.path)
|
|
98
139
|
raise ValueError(f"Cannot resolve non-file URI to path: {self.value}")
|
|
99
140
|
|
|
100
|
-
path = Path(self.
|
|
141
|
+
path = Path(self._path_only)
|
|
101
142
|
if path.is_absolute():
|
|
102
143
|
return path
|
|
103
144
|
if base_path:
|
|
@@ -122,6 +163,7 @@ class Record:
|
|
|
122
163
|
feedback: str
|
|
123
164
|
uri: Optional[str] = None
|
|
124
165
|
source: Optional[SourceRef] = None
|
|
166
|
+
prior: Optional[SourceRef] = None
|
|
125
167
|
content: Optional[str] = None
|
|
126
168
|
metadata: dict = field(default_factory=dict)
|
|
127
169
|
|
|
@@ -154,6 +196,7 @@ class Record:
|
|
|
154
196
|
return {
|
|
155
197
|
"uri": self.uri,
|
|
156
198
|
"source": str(self.source) if self.source else None,
|
|
199
|
+
"prior": str(self.prior) if self.prior else None,
|
|
157
200
|
"content": self.content,
|
|
158
201
|
"feedback": self.feedback,
|
|
159
202
|
"metadata": self.metadata,
|
|
@@ -38,15 +38,19 @@ def write_record_canonical(
|
|
|
38
38
|
)
|
|
39
39
|
|
|
40
40
|
if use_compact:
|
|
41
|
-
# Compact format: @uri on its own line (if present), then @source ... <<<
|
|
41
|
+
# Compact format: @uri on its own line (if present), then @prior, then @source ... <<<
|
|
42
42
|
if record.uri:
|
|
43
43
|
lines.append(f"@uri {record.uri}")
|
|
44
|
+
if record.prior:
|
|
45
|
+
lines.append(f"@prior {record.prior}")
|
|
44
46
|
lines.append(f"@source {record.source} <<< {record.feedback}")
|
|
45
47
|
else:
|
|
46
48
|
# Full format
|
|
47
|
-
# Headers: @uri first, then @source
|
|
49
|
+
# Headers: @uri first, then @prior, then @source
|
|
48
50
|
if record.uri:
|
|
49
51
|
lines.append(f"@uri {record.uri}")
|
|
52
|
+
if record.prior:
|
|
53
|
+
lines.append(f"@prior {record.prior}")
|
|
50
54
|
if record.source:
|
|
51
55
|
lines.append(f"@source {record.source}")
|
|
52
56
|
|
|
@@ -147,6 +151,9 @@ def write_label_file(record: Record) -> str:
|
|
|
147
151
|
|
|
148
152
|
if record.uri:
|
|
149
153
|
lines.append(f"@uri {record.uri}")
|
|
154
|
+
|
|
155
|
+
if record.prior:
|
|
156
|
+
lines.append(f"@prior {record.prior}")
|
|
150
157
|
|
|
151
158
|
lines.append(f"<<< {record.feedback}")
|
|
152
159
|
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 dandriscoll
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# markbackjs
|
|
2
|
+
|
|
3
|
+
JavaScript/TypeScript linter for the MarkBack format.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install markbackjs
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```js
|
|
14
|
+
const { lintString, formatDiagnostics } = require("markbackjs");
|
|
15
|
+
|
|
16
|
+
const text = "Content here.\n<<< positive\n";
|
|
17
|
+
const result = lintString(text, { checkSources: false, checkCanonical: false });
|
|
18
|
+
|
|
19
|
+
if (result.hasErrors) {
|
|
20
|
+
console.log(formatDiagnostics(result.diagnostics));
|
|
21
|
+
}
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### Supported Headers
|
|
25
|
+
|
|
26
|
+
- `@uri` - Unique identifier for the record
|
|
27
|
+
- `@source` - Reference to external content file
|
|
28
|
+
- `@prior` - Reference to a file that precedes the source (e.g., a prompt that generated it)
|
|
29
|
+
|
|
30
|
+
## API
|
|
31
|
+
|
|
32
|
+
- `lintString(text, options)`
|
|
33
|
+
- `lintFile(path, options)`
|
|
34
|
+
- `lintFiles(paths, options)`
|
|
35
|
+
- `formatDiagnostics(diagnostics, format)`
|
|
36
|
+
- `summarizeResults(results)`
|
|
37
|
+
|
|
38
|
+
Options:
|
|
39
|
+
- `sourceFile`: string
|
|
40
|
+
- `checkSources`: boolean (default true)
|
|
41
|
+
- `checkCanonical`: boolean (default true)
|
|
42
|
+
|
|
43
|
+
## Build
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
npm run build
|
|
47
|
+
```
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "markbackjs",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"lockfileVersion": 3,
|
|
5
|
+
"requires": true,
|
|
6
|
+
"packages": {
|
|
7
|
+
"": {
|
|
8
|
+
"name": "markbackjs",
|
|
9
|
+
"version": "0.1.0",
|
|
10
|
+
"license": "MIT",
|
|
11
|
+
"devDependencies": {
|
|
12
|
+
"@types/node": "^20.11.0",
|
|
13
|
+
"typescript": "^5.4.0"
|
|
14
|
+
},
|
|
15
|
+
"engines": {
|
|
16
|
+
"node": ">=18"
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"node_modules/@types/node": {
|
|
20
|
+
"version": "20.19.27",
|
|
21
|
+
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.27.tgz",
|
|
22
|
+
"integrity": "sha512-N2clP5pJhB2YnZJ3PIHFk5RkygRX5WO/5f0WC08tp0wd+sv0rsJk3MqWn3CbNmT2J505a5336jaQj4ph1AdMug==",
|
|
23
|
+
"dev": true,
|
|
24
|
+
"license": "MIT",
|
|
25
|
+
"dependencies": {
|
|
26
|
+
"undici-types": "~6.21.0"
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"node_modules/typescript": {
|
|
30
|
+
"version": "5.9.3",
|
|
31
|
+
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
|
|
32
|
+
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
|
33
|
+
"dev": true,
|
|
34
|
+
"license": "Apache-2.0",
|
|
35
|
+
"bin": {
|
|
36
|
+
"tsc": "bin/tsc",
|
|
37
|
+
"tsserver": "bin/tsserver"
|
|
38
|
+
},
|
|
39
|
+
"engines": {
|
|
40
|
+
"node": ">=14.17"
|
|
41
|
+
}
|
|
42
|
+
},
|
|
43
|
+
"node_modules/undici-types": {
|
|
44
|
+
"version": "6.21.0",
|
|
45
|
+
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
|
46
|
+
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
|
47
|
+
"dev": true,
|
|
48
|
+
"license": "MIT"
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|