markback 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. markback-0.1.2/.claude/settings.local.json +13 -0
  2. {markback-0.1.0 → markback-0.1.2}/.gitignore +5 -0
  3. markback-0.1.2/.ishipped/card.md +26 -0
  4. {markback-0.1.0 → markback-0.1.2}/PKG-INFO +11 -1
  5. {markback-0.1.0 → markback-0.1.2}/README.md +10 -0
  6. {markback-0.1.0 → markback-0.1.2}/SPEC.md +73 -10
  7. {markback-0.1.0 → markback-0.1.2}/markback/linter.py +68 -1
  8. {markback-0.1.0 → markback-0.1.2}/markback/parser.py +8 -2
  9. {markback-0.1.0 → markback-0.1.2}/markback/types.py +47 -4
  10. {markback-0.1.0 → markback-0.1.2}/markback/writer.py +9 -2
  11. markback-0.1.2/packages/markbackjs/LICENSE +21 -0
  12. markback-0.1.2/packages/markbackjs/README.md +47 -0
  13. markback-0.1.2/packages/markbackjs/package-lock.json +51 -0
  14. markback-0.1.2/packages/markbackjs/package.json +33 -0
  15. markback-0.1.2/packages/markbackjs/src/index.ts +23 -0
  16. markback-0.1.2/packages/markbackjs/src/linter.ts +368 -0
  17. markback-0.1.2/packages/markbackjs/src/parser.ts +357 -0
  18. markback-0.1.2/packages/markbackjs/src/types.ts +357 -0
  19. markback-0.1.2/packages/markbackjs/src/writer.ts +73 -0
  20. markback-0.1.2/packages/markbackjs/test/linter.test.js +161 -0
  21. markback-0.1.2/packages/markbackjs/tsconfig.json +17 -0
  22. {markback-0.1.0 → markback-0.1.2}/pyproject.toml +1 -1
  23. {markback-0.1.0 → markback-0.1.2}/tests/test_linter.py +100 -0
  24. {markback-0.1.0 → markback-0.1.2}/tests/test_parser.py +52 -0
  25. {markback-0.1.0 → markback-0.1.2}/tests/test_writer.py +58 -0
  26. {markback-0.1.0 → markback-0.1.2}/IMPLEMENTATION_NOTES.md +0 -0
  27. {markback-0.1.0 → markback-0.1.2}/LICENSE +0 -0
  28. {markback-0.1.0 → markback-0.1.2}/markback/__init__.py +0 -0
  29. {markback-0.1.0 → markback-0.1.2}/markback/cli.py +0 -0
  30. {markback-0.1.0 → markback-0.1.2}/markback/config.py +0 -0
  31. {markback-0.1.0 → markback-0.1.2}/markback/llm.py +0 -0
  32. {markback-0.1.0 → markback-0.1.2}/markback/workflow.py +0 -0
  33. {markback-0.1.0 → markback-0.1.2}/tests/__init__.py +0 -0
  34. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/compact_source.mb +0 -0
  35. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/errors/content_with_source.mb +0 -0
  36. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/errors/empty_feedback.mb +0 -0
  37. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/errors/malformed_uri.mb +0 -0
  38. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/errors/missing_feedback.mb +0 -0
  39. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/errors/multiple_feedback.mb +0 -0
  40. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/essay.label.txt +0 -0
  41. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/essay.txt +0 -0
  42. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/external_source.mb +0 -0
  43. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/freeform_feedback.mb +0 -0
  44. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/json_feedback.mb +0 -0
  45. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/label_list.mb +0 -0
  46. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/minimal.mb +0 -0
  47. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/multi_record.mb +0 -0
  48. {markback-0.1.0 → markback-0.1.2}/tests/fixtures/with_uri.mb +0 -0
  49. {markback-0.1.0 → markback-0.1.2}/tests/test_cli.py +0 -0
  50. {markback-0.1.0 → markback-0.1.2}/tests/test_config.py +0 -0
  51. {markback-0.1.0 → markback-0.1.2}/tests/test_types.py +0 -0
  52. {markback-0.1.0 → markback-0.1.2}/tests/test_workflow.py +0 -0
@@ -0,0 +1,13 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(python -m pytest:*)",
5
+ "Bash(npm test:*)",
6
+ "Bash(npm install)",
7
+ "Bash(npm run build:*)",
8
+ "Bash(echo:*)",
9
+ "Bash(python -m markback lint:*)",
10
+ "Bash(python:*)"
11
+ ]
12
+ }
13
+ }
@@ -201,6 +201,11 @@ cython_debug/
201
201
  .cursorignore
202
202
  .cursorindexingignore
203
203
 
204
+ # Node
205
+ node_modules/
206
+ packages/markback/dist/
207
+ packages/markback/*.tsbuildinfo
208
+
204
209
  # Marimo
205
210
  marimo/_static/
206
211
  marimo/_lsp/
@@ -0,0 +1,26 @@
1
+ ---
2
+ title: "MarkBack"
3
+ summary: "Human-writable format for pairing content with labels and feedback."
4
+ shipped: 2026-01-04
5
+ tags: [data-annotation, machine-learning, cli, python, typescript]
6
+ links:
7
+ - label: "markback.org"
8
+ url: "https://markback.org"
9
+ - label: "GitHub"
10
+ url: "https://github.com/dandriscoll/markback"
11
+ primary: true
12
+ - label: "NPM"
13
+ url: "https://www.npmjs.com/package/markbackjs"
14
+ ---
15
+
16
+ ## What is it?
17
+
18
+ MarkBack is a compact file format for storing content alongside feedback and labels. It's built for training data management, prompt engineering, and annotation workflows where you need human-readable files that machines can parse reliably.
19
+
20
+ ## Key Features
21
+
22
+ - **Multiple storage modes** — Single-file, multi-record, compact one-liner, or paired files. Pick what fits your workflow.
23
+ - **Structured feedback parsing** — Labels, key-value attributes, JSON, and freeform comments in one line.
24
+ - **Comprehensive linting** — 18 diagnostic rules catch errors and style issues with precise line numbers.
25
+ - **External content references** — Point to files, URIs, or embed content inline. Works with text, images, and binary files.
26
+ - **Dual-language support** — Full implementations in Python (CLI + library) and TypeScript.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: markback
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: A compact, human-writable format for storing content paired with feedback/labels
5
5
  Project-URL: Homepage, https://github.com/dandriscoll/markback
6
6
  Project-URL: Repository, https://github.com/dandriscoll/markback
@@ -196,6 +196,16 @@ Second content.
196
196
  @source ./images/003.jpg <<< approved; scene=mountain
197
197
  ```
198
198
 
199
+ ### With Prior Reference
200
+
201
+ Use `@prior` to reference an item that precedes the source (e.g., a prompt that generated an image):
202
+
203
+ ```
204
+ @uri local:generated-001
205
+ @prior ./prompts/sunset-prompt.txt
206
+ @source ./images/generated-sunset.jpg <<< accurate; matches prompt well
207
+ ```
208
+
199
209
  ### Paired Files
200
210
 
201
211
  **content.txt:**
@@ -161,6 +161,16 @@ Second content.
161
161
  @source ./images/003.jpg <<< approved; scene=mountain
162
162
  ```
163
163
 
164
+ ### With Prior Reference
165
+
166
+ Use `@prior` to reference an item that precedes the source (e.g., a prompt that generated an image):
167
+
168
+ ```
169
+ @uri local:generated-001
170
+ @prior ./prompts/sunset-prompt.txt
171
+ @source ./images/generated-sunset.jpg <<< accurate; matches prompt well
172
+ ```
173
+
164
174
  ### Paired Files
165
175
 
166
176
  **content.txt:**
@@ -28,6 +28,7 @@ A MarkBack **record** is the fundamental unit. Every record has:
28
28
  | `feedback` | Yes | Text after the `<<<` delimiter (always one line) |
29
29
  | `uri` | No | Unique identifier for the item |
30
30
  | `source` | No | Reference to external content (when content is not inline) |
31
+ | `prior` | No | Reference to an item that precedes the source (e.g., a prompt that generated the content) |
31
32
 
32
33
  *Content is required but may be external (via `source` field).
33
34
 
@@ -66,6 +67,7 @@ Header lines appear at the start of a record and begin with `@`. They define met
66
67
  ```
67
68
  @uri <uri-value>
68
69
  @source <path-or-uri>
70
+ @prior <path-or-uri>
69
71
  ```
70
72
 
71
73
  **Rules:**
@@ -109,6 +111,43 @@ References external content instead of inline content.
109
111
  - When `@source` is present, inline content MUST be empty (or contain only whitespace)
110
112
  - Parsers MUST verify referenced files exist (warning if missing)
111
113
 
114
+ #### 3.1.3 `@prior` Header
115
+
116
+ References an item that precedes the source material. For example, if the source is an image generated by an LLM, the prior could be the prompt that was used to create it.
117
+
118
+ ```
119
+ @prior ./prompts/image-gen-prompt.txt
120
+ @prior https://example.com/prompts/123
121
+ @prior file:///path/to/prompt.txt
122
+ ```
123
+
124
+ **Rules:**
125
+ - Relative paths are resolved relative to the MarkBack file location
126
+ - `@prior` can be used with or without `@source`
127
+ - `@prior` does not affect content handling (inline content or `@source` rules still apply)
128
+ - Parsers SHOULD verify referenced files exist (warning if missing)
129
+
130
+ #### 3.1.4 Line Range Specification
131
+
132
+ Both `@source` and `@prior` headers support optional line range specifications using colon notation. This allows referencing specific lines within a file.
133
+
134
+ **Syntax:** `<path-or-uri>:<start>` or `<path-or-uri>:<start>-<end>`
135
+
136
+ ```
137
+ @source ./code.py:42
138
+ @source ./code.py:42-50
139
+ @prior ./prompts/template.txt:1-20
140
+ @source https://example.com/file.txt:100-150
141
+ ```
142
+
143
+ **Rules:**
144
+ - Line numbers are 1-indexed (first line is line 1)
145
+ - Single line: `:N` references line N only
146
+ - Line range: `:N-M` references lines N through M (inclusive)
147
+ - End line must be greater than or equal to start line (E011 error otherwise)
148
+ - Line ranges are informational metadata; parsers do not validate that referenced lines exist in the file
149
+ - Windows drive letters (e.g., `C:\path`) are not confused with line ranges because scheme detection requires length > 1
150
+
112
151
  ### 3.2 Content Block
113
152
 
114
153
  Content is everything between headers and the `<<<` feedback delimiter.
@@ -442,7 +481,7 @@ Canonical form ensures consistent output for comparison and version control.
442
481
  ### 5.2 Canonicalization Rules
443
482
 
444
483
  1. **Line endings:** Normalize to `\n` (LF)
445
- 2. **Header order:** `@uri` before `@source` before unknown headers (alphabetical)
484
+ 2. **Header order:** `@uri` before `@prior` before `@source` before unknown headers (alphabetical)
446
485
  3. **Header spacing:** Exactly one space after keyword
447
486
  4. **Trailing whitespace:** Remove from all lines
448
487
  5. **Content whitespace:** Preserve internal whitespace; trim leading/trailing blank lines
@@ -557,6 +596,7 @@ Each line is classified as one of:
557
596
  | E008 | Unclosed quote in structured attribute value (only in `structured` parse mode) |
558
597
  | E009 | Empty feedback (nothing after `<<< `) |
559
598
  | E010 | Missing blank line before inline content (content starts with `@`) |
599
+ | E011 | Invalid line range (end line less than start line) |
560
600
 
561
601
  ### 7.2 Warnings (SHOULD fix)
562
602
 
@@ -570,6 +610,7 @@ Each line is classified as one of:
570
610
  | W006 | Missing `@uri` (record has no identifier) |
571
611
  | W007 | Paired feedback file not found for content file |
572
612
  | W008 | Non-canonical formatting detected |
613
+ | W009 | `@prior` file not found |
573
614
 
574
615
  ### 7.3 Lint Output Format
575
616
 
@@ -617,7 +658,27 @@ Or in compact form:
617
658
  @source ./images/beach.jpg <<< appropriate; tags=landscape,beach,sunset; quality=high
618
659
  ```
619
660
 
620
- ### 8.4 Single-File Example
661
+ ### 8.4 Record with Prior Reference (e.g., LLM-generated content)
662
+
663
+ ```
664
+ @uri local:generated-image-001
665
+ @prior ./prompts/beach-sunset.txt
666
+ @source ./images/generated-beach.jpg
667
+ <<< accurate; matches prompt well; quality=high
668
+ ```
669
+
670
+ Or with inline content:
671
+ ```
672
+ @uri local:generated-text-001
673
+ @prior ./prompts/haiku-prompt.txt
674
+
675
+ Cherry blossoms fall,
676
+ Petals dance on gentle breeze,
677
+ Spring whispers goodbye.
678
+ <<< creative; follows haiku structure; quality=excellent
679
+ ```
680
+
681
+ ### 8.5 Single-File Example
621
682
 
622
683
  **File:** `question.mb`
623
684
  ```
@@ -627,7 +688,7 @@ Explain quantum entanglement in simple terms.
627
688
  <<< quality=excellent; accuracy=high; clarity=good
628
689
  ```
629
690
 
630
- ### 8.5 Label List Example (Compact Format)
691
+ ### 8.6 Label List Example (Compact Format)
631
692
 
632
693
  **File:** `image-annotations.mb`
633
694
  ```
@@ -659,7 +720,7 @@ Explain quantum entanglement in simple terms.
659
720
  @source ./batch1/item3.txt <<< positive; excellent clarity
660
721
  ```
661
722
 
662
- ### 8.6 Multi-Record Example (Mixed Freeform and Structured)
723
+ ### 8.7 Multi-Record Example (Mixed Freeform and Structured)
663
724
 
664
725
  **File:** `training-data.mb`
665
726
  ```
@@ -690,7 +751,7 @@ Please write a formal letter requesting a meeting.
690
751
  @source ./audio/sample-005.wav <<< transcription="Hello world"; quality=clear; language=en
691
752
  ```
692
753
 
693
- ### 8.7 Paired-File Example
754
+ ### 8.8 Paired-File Example
694
755
 
695
756
  **Content file:** `essay.txt`
696
757
  ```
@@ -706,7 +767,7 @@ agriculture, manufacturing, mining, and transport.
706
767
  <<< good; grade=B+; well structured but needs more specific examples
707
768
  ```
708
769
 
709
- ### 8.8 Freeform Feedback Examples
770
+ ### 8.9 Freeform Feedback Examples
710
771
 
711
772
  Various styles of freeform feedback:
712
773
 
@@ -729,7 +790,7 @@ Explain machine learning to a child.
729
790
  <<< needs work; the explanation assumes too much prior knowledge
730
791
  ```
731
792
 
732
- ### 8.9 Complex Structured Feedback (JSON)
793
+ ### 8.10 Complex Structured Feedback (JSON)
733
794
 
734
795
  ```
735
796
  @uri local:complex-example
@@ -738,7 +799,7 @@ Multi-attribute content with special characters.
738
799
  <<< json:{"rating":4.5,"tags":["important","review"],"notes":"Contains \"quoted\" text and; semicolons","scores":{"accuracy":0.9,"relevance":0.85}}
739
800
  ```
740
801
 
741
- ### 8.10 Image with MarkBack Sidecar
802
+ ### 8.11 Image with MarkBack Sidecar
742
803
 
743
804
  **Content file:** `diagram.png` (binary)
744
805
 
@@ -824,8 +885,10 @@ feedback-content = *VCHAR ; no LF allowed
824
885
  compact-record = [uri-line] source-feedback-line
825
886
  compact-list = compact-record *(1*blank-line compact-record)
826
887
  uri-line = "@uri" SP value LF
827
- source-feedback-line = "@source" SP path SP "<<<" SP feedback-content LF
828
- path = 1*VCHAR ; ends at space before <<<
888
+ source-feedback-line = "@source" SP path-with-range SP "<<<" SP feedback-content LF
889
+ path-with-range = path [line-range] ; path with optional line range
890
+ path = 1*VCHAR ; ends at space before <<< or line-range
891
+ line-range = ":" 1*DIGIT ["-" 1*DIGIT]
829
892
 
830
893
  LOWER = %x61-7A ; a-z
831
894
  SP = %x20 ; space
@@ -110,6 +110,69 @@ def lint_source_exists(
110
110
  return diagnostics
111
111
 
112
112
 
113
+ def lint_prior_exists(
114
+ record: Record,
115
+ base_path: Optional[Path],
116
+ record_idx: int,
117
+ ) -> list[Diagnostic]:
118
+ """Check if @prior file exists."""
119
+ diagnostics: list[Diagnostic] = []
120
+
121
+ if record.prior and not record.prior.is_uri:
122
+ try:
123
+ resolved = record.prior.resolve(base_path)
124
+ if not resolved.exists():
125
+ diagnostics.append(Diagnostic(
126
+ file=record._source_file,
127
+ line=record._start_line,
128
+ column=None,
129
+ severity=Severity.WARNING,
130
+ code=WarningCode.W009,
131
+ message=f"@prior file not found: {record.prior}",
132
+ record_index=record_idx,
133
+ ))
134
+ except ValueError:
135
+ pass # URI that can't be resolved to path
136
+
137
+ return diagnostics
138
+
139
+
140
+ def lint_line_range(
141
+ record: Record,
142
+ record_idx: int,
143
+ ) -> list[Diagnostic]:
144
+ """Check if line ranges are valid (end >= start)."""
145
+ diagnostics: list[Diagnostic] = []
146
+
147
+ # Check @source line range
148
+ if record.source and record.source.start_line is not None:
149
+ if record.source.end_line is not None and record.source.end_line < record.source.start_line:
150
+ diagnostics.append(Diagnostic(
151
+ file=record._source_file,
152
+ line=record._start_line,
153
+ column=None,
154
+ severity=Severity.ERROR,
155
+ code=ErrorCode.E011,
156
+ message=f"Invalid line range in @source: end line {record.source.end_line} is less than start line {record.source.start_line}",
157
+ record_index=record_idx,
158
+ ))
159
+
160
+ # Check @prior line range
161
+ if record.prior and record.prior.start_line is not None:
162
+ if record.prior.end_line is not None and record.prior.end_line < record.prior.start_line:
163
+ diagnostics.append(Diagnostic(
164
+ file=record._source_file,
165
+ line=record._start_line,
166
+ column=None,
167
+ severity=Severity.ERROR,
168
+ code=ErrorCode.E011,
169
+ message=f"Invalid line range in @prior: end line {record.prior.end_line} is less than start line {record.prior.start_line}",
170
+ record_index=record_idx,
171
+ ))
172
+
173
+ return diagnostics
174
+
175
+
113
176
  def lint_canonical_format(
114
177
  records: list[Record],
115
178
  original_text: str,
@@ -173,10 +236,14 @@ def lint_string(
173
236
  idx,
174
237
  ))
175
238
 
176
- # Check source file existence
239
+ # Check source and prior file existence
177
240
  if check_sources:
178
241
  base_path = source_file.parent if source_file else None
179
242
  result.diagnostics.extend(lint_source_exists(record, base_path, idx))
243
+ result.diagnostics.extend(lint_prior_exists(record, base_path, idx))
244
+
245
+ # Check line range validity
246
+ result.diagnostics.extend(lint_line_range(record, idx))
180
247
 
181
248
  # Check canonical format
182
249
  if check_canonical and result.records and not result.has_errors:
@@ -17,7 +17,7 @@ from .types import (
17
17
 
18
18
 
19
19
  # Known header keywords
20
- KNOWN_HEADERS = {"uri", "source"}
20
+ KNOWN_HEADERS = {"uri", "source", "prior"}
21
21
 
22
22
  # Patterns
23
23
  HEADER_PATTERN = re.compile(r"^@([a-z]+)\s+(.+)$")
@@ -147,6 +147,8 @@ def parse_string(
147
147
  uri = current_headers.get("uri") or pending_uri
148
148
  source_str = current_headers.get("source")
149
149
  source = SourceRef(source_str) if source_str else None
150
+ prior_str = current_headers.get("prior")
151
+ prior = SourceRef(prior_str) if prior_str else None
150
152
 
151
153
  content = None
152
154
  if current_content_lines:
@@ -163,6 +165,7 @@ def parse_string(
163
165
  feedback=feedback,
164
166
  uri=uri,
165
167
  source=source,
168
+ prior=prior,
166
169
  content=content,
167
170
  _source_file=source_file,
168
171
  _start_line=current_start_line,
@@ -239,13 +242,16 @@ def parse_string(
239
242
  line_num,
240
243
  )
241
244
 
242
- # Use any pending @uri from previous line
245
+ # Use any pending @uri from previous line and @prior if present
243
246
  uri = pending_uri or current_headers.get("uri")
247
+ prior_str = current_headers.get("prior")
248
+ prior = SourceRef(prior_str) if prior_str else None
244
249
 
245
250
  record = Record(
246
251
  feedback=feedback or "",
247
252
  uri=uri,
248
253
  source=source,
254
+ prior=prior,
249
255
  content=None,
250
256
  _source_file=source_file,
251
257
  _start_line=current_start_line,
@@ -1,5 +1,6 @@
1
1
  """Core types for MarkBack format."""
2
2
 
3
+ import re
3
4
  from dataclasses import dataclass, field
4
5
  from enum import Enum
5
6
  from pathlib import Path
@@ -25,6 +26,7 @@ class ErrorCode(Enum):
25
26
  E008 = "E008" # Unclosed quote in structured attribute value
26
27
  E009 = "E009" # Empty feedback (nothing after <<< )
27
28
  E010 = "E010" # Missing blank line before inline content
29
+ E011 = "E011" # Invalid line range (end < start)
28
30
 
29
31
 
30
32
  class WarningCode(Enum):
@@ -37,6 +39,7 @@ class WarningCode(Enum):
37
39
  W006 = "W006" # Missing @uri (record has no identifier)
38
40
  W007 = "W007" # Paired feedback file not found
39
41
  W008 = "W008" # Non-canonical formatting detected
42
+ W009 = "W009" # @prior file not found
40
43
 
41
44
 
42
45
  @dataclass
@@ -75,29 +78,67 @@ class Diagnostic:
75
78
  }
76
79
 
77
80
 
81
+ # Regex to parse line range from a path: path:start or path:start-end
82
+ _LINE_RANGE_PATTERN = re.compile(r'^(.+?):(\d+)(?:-(\d+))?$')
83
+
84
+
78
85
  @dataclass
79
86
  class SourceRef:
80
87
  """Reference to external content (file path or URI)."""
81
88
  value: str
82
89
  is_uri: bool = False
90
+ start_line: Optional[int] = None
91
+ end_line: Optional[int] = None
92
+ _path_only: str = ""
83
93
 
84
94
  def __post_init__(self):
85
- # Determine if this is a URI or file path
95
+ # Parse line range if present
96
+ self._parse_line_range()
97
+
98
+ # Determine if this is a URI or file path (using path without line range)
86
99
  if not self.is_uri:
87
- parsed = urlparse(self.value)
100
+ parsed = urlparse(self._path_only)
88
101
  # Consider it a URI if it has a scheme that's not a Windows drive letter
89
102
  self.is_uri = bool(parsed.scheme) and len(parsed.scheme) > 1
90
103
 
104
+ def _parse_line_range(self):
105
+ """Parse optional line range from value."""
106
+ match = _LINE_RANGE_PATTERN.match(self.value)
107
+ if match:
108
+ self._path_only = match.group(1)
109
+ self.start_line = int(match.group(2))
110
+ if match.group(3):
111
+ self.end_line = int(match.group(3))
112
+ else:
113
+ # Single line reference: start and end are the same
114
+ self.end_line = self.start_line
115
+ else:
116
+ self._path_only = self.value
117
+
118
+ @property
119
+ def path(self) -> str:
120
+ """Return path without line range."""
121
+ return self._path_only
122
+
123
+ @property
124
+ def line_range_str(self) -> Optional[str]:
125
+ """Return formatted line range string, or None if no range."""
126
+ if self.start_line is None:
127
+ return None
128
+ if self.start_line == self.end_line:
129
+ return f":{self.start_line}"
130
+ return f":{self.start_line}-{self.end_line}"
131
+
91
132
  def resolve(self, base_path: Optional[Path] = None) -> Path:
92
133
  """Resolve to a file path (relative paths resolved against base_path)."""
93
134
  if self.is_uri:
94
- parsed = urlparse(self.value)
135
+ parsed = urlparse(self._path_only)
95
136
  if parsed.scheme == "file":
96
137
  # file:// URI
97
138
  return Path(parsed.path)
98
139
  raise ValueError(f"Cannot resolve non-file URI to path: {self.value}")
99
140
 
100
- path = Path(self.value)
141
+ path = Path(self._path_only)
101
142
  if path.is_absolute():
102
143
  return path
103
144
  if base_path:
@@ -122,6 +163,7 @@ class Record:
122
163
  feedback: str
123
164
  uri: Optional[str] = None
124
165
  source: Optional[SourceRef] = None
166
+ prior: Optional[SourceRef] = None
125
167
  content: Optional[str] = None
126
168
  metadata: dict = field(default_factory=dict)
127
169
 
@@ -154,6 +196,7 @@ class Record:
154
196
  return {
155
197
  "uri": self.uri,
156
198
  "source": str(self.source) if self.source else None,
199
+ "prior": str(self.prior) if self.prior else None,
157
200
  "content": self.content,
158
201
  "feedback": self.feedback,
159
202
  "metadata": self.metadata,
@@ -38,15 +38,19 @@ def write_record_canonical(
38
38
  )
39
39
 
40
40
  if use_compact:
41
- # Compact format: @uri on its own line (if present), then @source ... <<<
41
+ # Compact format: @uri on its own line (if present), then @prior, then @source ... <<<
42
42
  if record.uri:
43
43
  lines.append(f"@uri {record.uri}")
44
+ if record.prior:
45
+ lines.append(f"@prior {record.prior}")
44
46
  lines.append(f"@source {record.source} <<< {record.feedback}")
45
47
  else:
46
48
  # Full format
47
- # Headers: @uri first, then @source
49
+ # Headers: @uri first, then @prior, then @source
48
50
  if record.uri:
49
51
  lines.append(f"@uri {record.uri}")
52
+ if record.prior:
53
+ lines.append(f"@prior {record.prior}")
50
54
  if record.source:
51
55
  lines.append(f"@source {record.source}")
52
56
 
@@ -147,6 +151,9 @@ def write_label_file(record: Record) -> str:
147
151
 
148
152
  if record.uri:
149
153
  lines.append(f"@uri {record.uri}")
154
+
155
+ if record.prior:
156
+ lines.append(f"@prior {record.prior}")
150
157
 
151
158
  lines.append(f"<<< {record.feedback}")
152
159
 
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 dandriscoll
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,47 @@
1
+ # markbackjs
2
+
3
+ JavaScript/TypeScript linter for the MarkBack format.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install markbackjs
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```js
14
+ const { lintString, formatDiagnostics } = require("markbackjs");
15
+
16
+ const text = "Content here.\n<<< positive\n";
17
+ const result = lintString(text, { checkSources: false, checkCanonical: false });
18
+
19
+ if (result.hasErrors) {
20
+ console.log(formatDiagnostics(result.diagnostics));
21
+ }
22
+ ```
23
+
24
+ ### Supported Headers
25
+
26
+ - `@uri` - Unique identifier for the record
27
+ - `@source` - Reference to external content file
28
+ - `@prior` - Reference to a file that precedes the source (e.g., a prompt that generated it)
29
+
30
+ ## API
31
+
32
+ - `lintString(text, options)`
33
+ - `lintFile(path, options)`
34
+ - `lintFiles(paths, options)`
35
+ - `formatDiagnostics(diagnostics, format)`
36
+ - `summarizeResults(results)`
37
+
38
+ Options:
39
+ - `sourceFile`: string
40
+ - `checkSources`: boolean (default true)
41
+ - `checkCanonical`: boolean (default true)
42
+
43
+ ## Build
44
+
45
+ ```bash
46
+ npm run build
47
+ ```
@@ -0,0 +1,51 @@
1
+ {
2
+ "name": "markbackjs",
3
+ "version": "0.1.0",
4
+ "lockfileVersion": 3,
5
+ "requires": true,
6
+ "packages": {
7
+ "": {
8
+ "name": "markbackjs",
9
+ "version": "0.1.0",
10
+ "license": "MIT",
11
+ "devDependencies": {
12
+ "@types/node": "^20.11.0",
13
+ "typescript": "^5.4.0"
14
+ },
15
+ "engines": {
16
+ "node": ">=18"
17
+ }
18
+ },
19
+ "node_modules/@types/node": {
20
+ "version": "20.19.27",
21
+ "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.27.tgz",
22
+ "integrity": "sha512-N2clP5pJhB2YnZJ3PIHFk5RkygRX5WO/5f0WC08tp0wd+sv0rsJk3MqWn3CbNmT2J505a5336jaQj4ph1AdMug==",
23
+ "dev": true,
24
+ "license": "MIT",
25
+ "dependencies": {
26
+ "undici-types": "~6.21.0"
27
+ }
28
+ },
29
+ "node_modules/typescript": {
30
+ "version": "5.9.3",
31
+ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
32
+ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
33
+ "dev": true,
34
+ "license": "Apache-2.0",
35
+ "bin": {
36
+ "tsc": "bin/tsc",
37
+ "tsserver": "bin/tsserver"
38
+ },
39
+ "engines": {
40
+ "node": ">=14.17"
41
+ }
42
+ },
43
+ "node_modules/undici-types": {
44
+ "version": "6.21.0",
45
+ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
46
+ "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
47
+ "dev": true,
48
+ "license": "MIT"
49
+ }
50
+ }
51
+ }