markback 0.1.5__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. {markback-0.1.5 → markback-0.2.1}/.claude/settings.local.json +12 -1
  2. {markback-0.1.5 → markback-0.2.1}/.gitignore +3 -0
  3. markback-0.2.1/AGENTS.md +135 -0
  4. markback-0.2.1/IMPLEMENTATION_NOTES.md +79 -0
  5. markback-0.2.1/PKG-INFO +240 -0
  6. markback-0.2.1/README.md +207 -0
  7. markback-0.2.1/SPEC.md +1311 -0
  8. {markback-0.1.5 → markback-0.2.1}/markback/__init__.py +36 -30
  9. markback-0.2.1/markback/cli.py +540 -0
  10. {markback-0.1.5 → markback-0.2.1}/markback/linter.py +99 -87
  11. markback-0.2.1/markback/parser.py +638 -0
  12. {markback-0.1.5 → markback-0.2.1}/markback/types.py +74 -42
  13. markback-0.2.1/markback/writer.py +357 -0
  14. {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/package-lock.json +2 -2
  15. {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/package.json +1 -1
  16. {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/src/index.ts +2 -1
  17. {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/src/linter.ts +87 -63
  18. markback-0.2.1/packages/markbackjs/src/parser.ts +450 -0
  19. {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/src/types.ts +55 -29
  20. markback-0.2.1/packages/markbackjs/src/writer.ts +174 -0
  21. {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/test/linter.test.js +117 -4
  22. markback-0.2.1/packages/markbackjs/test/multi_segment.test.js +167 -0
  23. {markback-0.1.5 → markback-0.2.1}/pyproject.toml +1 -3
  24. markback-0.2.1/scripts/bump-version.sh +91 -0
  25. markback-0.2.1/scripts/publish-pypi.sh +28 -0
  26. markback-0.2.1/tests/fixtures/compact_source.mb +2 -0
  27. markback-0.2.1/tests/fixtures/errors/content_with_source.mb +5 -0
  28. {markback-0.1.5 → markback-0.2.1}/tests/fixtures/errors/empty_feedback.mb +1 -1
  29. {markback-0.1.5 → markback-0.2.1}/tests/fixtures/errors/malformed_uri.mb +1 -1
  30. {markback-0.1.5 → markback-0.2.1}/tests/fixtures/errors/missing_feedback.mb +1 -1
  31. {markback-0.1.5 → markback-0.2.1}/tests/fixtures/errors/multiple_feedback.mb +1 -1
  32. {markback-0.1.5 → markback-0.2.1}/tests/fixtures/essay.label.txt +1 -1
  33. {markback-0.1.5 → markback-0.2.1}/tests/fixtures/external_source.mb +2 -2
  34. {markback-0.1.5 → markback-0.2.1}/tests/fixtures/freeform_feedback.mb +1 -1
  35. {markback-0.1.5 → markback-0.2.1}/tests/fixtures/json_feedback.mb +1 -1
  36. markback-0.2.1/tests/fixtures/label_list.mb +6 -0
  37. markback-0.2.1/tests/fixtures/minimal.mb.mb +1 -0
  38. {markback-0.1.5 → markback-0.2.1}/tests/fixtures/multi_record.mb +5 -5
  39. {markback-0.1.5 → markback-0.2.1}/tests/fixtures/with_uri.mb +1 -1
  40. markback-0.2.1/tests/test_cli.py +303 -0
  41. markback-0.2.1/tests/test_linter.py +323 -0
  42. markback-0.2.1/tests/test_parser.py +538 -0
  43. {markback-0.1.5 → markback-0.2.1}/tests/test_types.py +75 -150
  44. markback-0.2.1/tests/test_writer.py +391 -0
  45. markback-0.1.5/IMPLEMENTATION_NOTES.md +0 -119
  46. markback-0.1.5/PKG-INFO +0 -268
  47. markback-0.1.5/README.md +0 -233
  48. markback-0.1.5/SPEC.md +0 -981
  49. markback-0.1.5/markback/cli.py +0 -122
  50. markback-0.1.5/markback/config.py +0 -181
  51. markback-0.1.5/markback/llm.py +0 -175
  52. markback-0.1.5/markback/parser.py +0 -597
  53. markback-0.1.5/markback/workflow.py +0 -351
  54. markback-0.1.5/markback/writer.py +0 -263
  55. markback-0.1.5/packages/markbackjs/src/parser.ts +0 -361
  56. markback-0.1.5/packages/markbackjs/src/writer.ts +0 -79
  57. markback-0.1.5/scripts/publish-pypi.sh +0 -14
  58. markback-0.1.5/tests/fixtures/compact_source.mb +0 -2
  59. markback-0.1.5/tests/fixtures/errors/content_with_source.mb +0 -5
  60. markback-0.1.5/tests/fixtures/label_list.mb +0 -6
  61. markback-0.1.5/tests/test_cli.py +0 -208
  62. markback-0.1.5/tests/test_config.py +0 -233
  63. markback-0.1.5/tests/test_linter.py +0 -455
  64. markback-0.1.5/tests/test_parser.py +0 -389
  65. markback-0.1.5/tests/test_workflow.py +0 -260
  66. markback-0.1.5/tests/test_writer.py +0 -347
  67. {markback-0.1.5 → markback-0.2.1}/.ishipped/card.md +0 -0
  68. {markback-0.1.5 → markback-0.2.1}/LICENSE +0 -0
  69. {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/LICENSE +0 -0
  70. {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/README.md +0 -0
  71. {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/tsconfig.json +0 -0
  72. {markback-0.1.5 → markback-0.2.1}/scripts/publish-npm.sh +0 -0
  73. {markback-0.1.5 → markback-0.2.1}/scripts/publish.sh +0 -0
  74. {markback-0.1.5 → markback-0.2.1}/tests/__init__.py +0 -0
  75. {markback-0.1.5 → markback-0.2.1}/tests/fixtures/essay.txt +0 -0
  76. {markback-0.1.5 → markback-0.2.1}/tests/fixtures/minimal.mb +0 -0
@@ -17,7 +17,18 @@
17
17
  "Bash(ls:*)",
18
18
  "Bash(pip install:*)",
19
19
  "Bash(PYTHONPATH=/src/markback EDITOR=cat python3:*)",
20
- "Bash(PYTHONPATH=/src/markback python3:*)"
20
+ "Bash(PYTHONPATH=/src/markback python3:*)",
21
+ "Bash(python3 -m markback.cli --help)",
22
+ "Read(//tmp/**)",
23
+ "Bash(python3 -c:*)",
24
+ "Bash(node -e:*)",
25
+ "Bash(git add:*)",
26
+ "Bash(git commit -m ':*)",
27
+ "Bash(git push:*)",
28
+ "Bash(git commit -m ' *)",
29
+ "Bash(tar tzf *)",
30
+ "Bash(.venv/bin/python *)",
31
+ "Bash(bash -n /src/markback/scripts/bump-version.sh)"
21
32
  ]
22
33
  }
23
34
  }
@@ -194,6 +194,9 @@ cython_debug/
194
194
  # PyPI configuration file
195
195
  .pypirc
196
196
 
197
+ # Devbox
198
+ .devbox/
199
+
197
200
  # Cursor
198
201
  # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
202
  # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
@@ -0,0 +1,135 @@
1
+ # Writing MarkBack V2 (.mb) Files
2
+
3
+ MarkBack pairs content with single-line feedback using `<<<` as the delimiter.
4
+
5
+ ## Minimal record
6
+
7
+ ```
8
+ Some content here.
9
+ <<< positive
10
+ ```
11
+
12
+ ## Record with headers
13
+
14
+ ```
15
+ @id item-001
16
+ @by reviewer@example.com
17
+ @file ./file.txt
18
+ @input ./prompt.txt
19
+ @tag review p1
20
+
21
+ Inline content goes here.
22
+ <<< good; quality=high
23
+ ```
24
+
25
+ Headers: `@id`, `@by`, `@tag`, `@input`, `@file`. All optional. Order: id, by, tag, input, file.
26
+
27
+ ## Rules
28
+
29
+ - `<<<` must be followed by one space then feedback text — all on one line
30
+ - A blank line is **required** between headers and inline content
31
+ - `@file` + inline content can coexist (file is provenance, content is snapshot)
32
+ - Records in multi-record files are separated by `---`
33
+ - Files must be UTF-8 with LF line endings
34
+ - `@id` values are plain strings (no URI validation)
35
+
36
+ ## Compact format (one record per line)
37
+
38
+ ```
39
+ @file ./images/001.jpg <<< approved; scene=beach
40
+ @file ./images/002.jpg <<< rejected; too dark
41
+ ```
42
+
43
+ No `---` separator needed between compact records. `@id` can go on the line above:
44
+
45
+ ```
46
+ @id item-001
47
+ @file ./file.txt <<< good
48
+ ```
49
+
50
+ ## Multi-record file
51
+
52
+ ```
53
+ @id first
54
+
55
+ First content.
56
+ <<< positive
57
+
58
+ ---
59
+ @id second
60
+
61
+ Second content.
62
+ <<< negative; needs work
63
+ ```
64
+
65
+ ## File-level headers (% prefix)
66
+
67
+ ```
68
+ %markback 2
69
+ %scope issue-A issue-B
70
+ %covers ./gen/batch3/*.txt
71
+
72
+ @file ./gen/batch3/file2.txt <<< issue-B; tone is off
73
+ ```
74
+
75
+ - `%markback 2` — version declaration
76
+ - `%scope` — what issues are being checked (sweep pattern)
77
+ - `%covers` — glob of all files reviewed (absence = clean for scope)
78
+
79
+ ## Tags
80
+
81
+ ```
82
+ @id item-001
83
+ @tag training positive-examples batch-2024-03
84
+ @file ./data/example.txt
85
+ <<< approved
86
+ ```
87
+
88
+ Space-separated. Multiple `@tag` lines merge.
89
+
90
+ ## Feedback format
91
+
92
+ Feedback is freeform text. Optional structured convention:
93
+
94
+ | Pattern | Meaning |
95
+ |---------|---------|
96
+ | `<<< positive` | label |
97
+ | `<<< negative; too vague` | label + comment |
98
+ | `<<< good; quality=high` | label + attribute |
99
+ | `<<< quality=high; confidence=0.9` | attributes only |
100
+ | `<<< json:{"key":"value"}` | JSON mode |
101
+
102
+ Segments are separated by `; ` (semicolon + space). Segments with `=` are key-value attributes; without are labels or comments.
103
+
104
+ ## Sidecar files
105
+
106
+ Content in `name.ext`, annotation in `name.ext.mb`:
107
+
108
+ **report.pdf** — the content
109
+ **report.pdf.mb:**
110
+ ```
111
+ @id report-001
112
+ <<< good; grade=B+
113
+ ```
114
+
115
+ ## Line/character ranges
116
+
117
+ `@file` and `@input` support position references:
118
+
119
+ ```
120
+ @file ./code.py:42 ← line 42
121
+ @file ./code.py:42-50 ← lines 42–50
122
+ @file ./code.py:10:5-15:20 ← line 10 col 5 to line 15 col 20
123
+ ```
124
+
125
+ ## V1 backward compatibility
126
+
127
+ V1 headers (`@uri`, `@source`, `@prior`) are automatically mapped to V2 (`@id`, `@file`, `@input`) with a W010 warning.
128
+
129
+ ## Quick checklist
130
+
131
+ - [ ] Every record has exactly one `<<<` line
132
+ - [ ] Feedback is a single line (no newlines)
133
+ - [ ] Blank line before inline content
134
+ - [ ] `---` between full records; not needed between compact records
135
+ - [ ] File ends with a newline
@@ -0,0 +1,79 @@
1
+ # Implementation Notes — MarkBack V2
2
+
3
+ ## V1 → V2 Changes
4
+
5
+ ### Header Renames
6
+ - `@uri` → `@id` (plain string, no URI validation)
7
+ - `@source` → `@file` (the content being annotated)
8
+ - `@prior` → `@input` (what produced the content)
9
+
10
+ ### New Features
11
+ - `@tag` header — space-separated tags for categorization
12
+ - `%markback 2` — optional version declaration
13
+ - `%scope` / `%covers` — sweep pattern (meaningful absence)
14
+ - `@file` + inline content can coexist (file is provenance, content is snapshot)
15
+ - Sidecar convention simplified to `name.ext.mb` only
16
+
17
+ ### Removed
18
+ - LLM workflow layer (`llm.py`, `workflow.py`) — not core to MarkBack
19
+ - `httpx` dependency
20
+ - `config.py` / `python-dotenv` dependency / `--init` command — not core to MarkBack
21
+ - RFC 3986 URI validation on `@id`
22
+ - E003 (malformed URI) no longer emitted
23
+ - E005 (content with @source) no longer emitted — coexistence is valid
24
+ - `.label.txt` / `.feedback.txt` as primary sidecar convention (kept as V1 legacy in discovery)
25
+
26
+ ### API Changes
27
+ - `SourceRef` → `FileRef` (alias preserved)
28
+ - `Record` fields: `uri→id`, `source→file`, `prior→input`, added `tags`
29
+ - New `write()` / `append()` / `write_string()` functions
30
+ - New `normalize()` function
31
+ - New `discover_sidecars()` function
32
+ - `ParseResult` gains `scope`, `covers`, `version`, `covered_files()`
33
+ - V1 compat aliases preserved for all renamed functions
34
+
35
+ ## Design Decisions
36
+
37
+ ### Parser Architecture
38
+ Same state-machine approach as V1. Now also handles:
39
+ - File-level `%` headers (parsed before records, must be at top of file)
40
+ - V1 header mapping (detected by keyword, mapped with W010 warning)
41
+ - `@tag` with whitespace splitting and merge across multiple lines
42
+
43
+ ### V1 Backward Compatibility
44
+ The parser transparently reads V1 files:
45
+ 1. `@uri` → mapped to `record.id`
46
+ 2. `@source` → mapped to `record.file`
47
+ 3. `@prior` → mapped to `record.input`
48
+ 4. `@source ... <<<` compact → handled alongside `@file ... <<<`
49
+ 5. Each V1 header emits W010 warning
50
+
51
+ ### Sweep Pattern
52
+ `%scope` + `%covers` enable "meaningful absence":
53
+ - `%scope` declares what issues are being checked
54
+ - `%covers` declares the complete file set under review
55
+ - Files matching `%covers` with no record are implicitly clean for all scope items
56
+ - `ParseResult.covered_files()` resolves the glob for programmatic access
57
+
58
+ ### Writer Simplification
59
+ V1 had 5+ writer functions. V2 has:
60
+ - `write()` — write records to a file (auto-format)
61
+ - `append()` — add a record to existing file
62
+ - `write_string()` — write records to string
63
+ - `normalize()` — canonical rewrite
64
+ All V1 functions preserved as aliases.
65
+
66
+ ### Sidecar Convention
67
+ V2: `name.ext.mb` (append `.mb` to the full filename)
68
+ V1 legacy: `.label.txt`, `.feedback.txt` still discovered for backward compat
69
+
70
+ ## Testing Strategy
71
+
72
+ ### Unit Tests
73
+ - Parser tests cover V2 format, V1 backward compat, file-level headers, tags, sweep pattern
74
+ - Writer tests cover canonical output, version headers, scope/covers, round-trip
75
+ - Linter tests verify all error/warning codes with V2 semantics
76
+ - Type tests verify FileRef, Record with new fields, V1 compat aliases
77
+
78
+ ### Fixtures
79
+ All fixtures updated to V2 format. Error fixtures updated to reflect V2 semantics (e.g., content_with_source is now valid).
@@ -0,0 +1,240 @@
1
+ Metadata-Version: 2.4
2
+ Name: markback
3
+ Version: 0.2.1
4
+ Summary: A compact, human-writable format for storing content paired with feedback/labels
5
+ Project-URL: Homepage, https://github.com/dandriscoll/markback
6
+ Project-URL: Repository, https://github.com/dandriscoll/markback
7
+ Project-URL: Documentation, https://github.com/dandriscoll/markback#readme
8
+ Project-URL: Issues, https://github.com/dandriscoll/markback/issues
9
+ Author: Dan Driscoll
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: annotation,data-labeling,feedback,labeling,llm,markdown
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Environment :: Console
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Software Development :: Quality Assurance
23
+ Classifier: Topic :: Text Processing :: Markup
24
+ Requires-Python: >=3.10
25
+ Requires-Dist: rich>=13.0.0
26
+ Requires-Dist: typer>=0.9.0
27
+ Provides-Extra: dev
28
+ Requires-Dist: build>=1.0.0; extra == 'dev'
29
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
30
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
31
+ Requires-Dist: twine>=5.0.0; extra == 'dev'
32
+ Description-Content-Type: text/markdown
33
+
34
+ # MarkBack V2
35
+
36
+ A compact, human-writable format for storing content paired with feedback/labels.
37
+
38
+ ## Installation
39
+
40
+ ```bash
41
+ pip install -e .
42
+ ```
43
+
44
+ ## Quick Start
45
+
46
+ ### Parse a MarkBack file
47
+
48
+ ```python
49
+ from markback import parse_file, parse_string
50
+
51
+ # Parse a file
52
+ result = parse_file("labels.mb")
53
+ for record in result.records:
54
+ print(f"{record.id}: {record.feedback}")
55
+
56
+ # Parse a string
57
+ text = """
58
+ @id example
59
+
60
+ Some content here.
61
+ <<< positive; good quality
62
+ """
63
+ result = parse_string(text)
64
+ ```
65
+
66
+ ### Write MarkBack files
67
+
68
+ ```python
69
+ from markback import Record, FileRef, write, append
70
+
71
+ # Write records to a file
72
+ records = [
73
+ Record(feedback="good", id="item-1", content="First item"),
74
+ Record(feedback="bad", id="item-2", content="Second item"),
75
+ ]
76
+ write("output.mb", records)
77
+
78
+ # Append a single record
79
+ append("output.mb", Record(feedback="great", id="item-3", content="Third"))
80
+ ```
81
+
82
+ ### Lint files
83
+
84
+ ```python
85
+ from markback import lint_file
86
+
87
+ result = lint_file("myfile.mb")
88
+ if result.has_errors:
89
+ for d in result.diagnostics:
90
+ print(d)
91
+ ```
92
+
93
+ ## CLI Usage
94
+
95
+ The CLI is available via `markback` or `mb` (shorthand).
96
+
97
+ ### Annotate files
98
+
99
+ ```bash
100
+ # Single file — inline feedback, appends to myfile.txt.mb
101
+ mb myfile.txt "good; clear writing"
102
+
103
+ # URL target — derives sidecar from last path segment (or hostname)
104
+ mb https://example.com/blog/post.html "great explanation"
105
+ # → writes post.html.mb with @file https://example.com/blog/post.html
106
+
107
+ # Quote a passage by editing the .mb file directly: inline content
108
+ # under an @file header can be a full snapshot OR an excerpt.
109
+ # @file https://example.com/post.html
110
+ #
111
+ # the quick brown fox jumps over the lazy dog
112
+ # <<< awkward phrasing
113
+
114
+ # Multi-segment section: several comments on one source, no repeated headers.
115
+ # @file ./essay.txt
116
+ #
117
+ # the lazy fox
118
+ # <<< awkward
119
+ #
120
+ # weak ending
121
+ # <<< needs punch
122
+
123
+ # With input reference (what produced the file)
124
+ mb output.txt "accurate" --input prompt.txt
125
+
126
+ # With tags and attribution
127
+ mb file.txt "good" --tag "review p1" --by alice@example.com
128
+
129
+ # Multiple files — same feedback for all
130
+ mb *.jpg -f "approved"
131
+
132
+ # Interactive mode — steps through each file
133
+ mb *.jpg --print
134
+
135
+ # Sweep pattern — track issues across batches
136
+ mb *.txt -f "issue-A" --scope "issue-A issue-B" --covers "./*.txt"
137
+ ```
138
+
139
+ ### Utility commands
140
+
141
+ ```bash
142
+ # Lint
143
+ mb --lint myfile.mb
144
+ mb --lint --json ./data/
145
+
146
+ # List records
147
+ mb --list myfile.mb
148
+
149
+ # Statistics
150
+ mb --stats myfile.mb
151
+
152
+ # Normalize to canonical format
153
+ mb --normalize input.mb
154
+ mb --normalize --in-place input.mb
155
+
156
+ # Convert between formats
157
+ mb --convert --to multi -o output.mb input.mb
158
+ mb --convert --to compact -o output.mb input.mb
159
+
160
+ # Upgrade V1 files to V2
161
+ mb --upgrade *.mb # preview
162
+ mb --upgrade --apply --in-place *.mb # apply
163
+ ```
164
+
165
+ ## File Format
166
+
167
+ ### V2 Headers
168
+
169
+ | Header | Purpose |
170
+ |--------|---------|
171
+ | `@id` | Record identifier (plain string) |
172
+ | `@by` | Who provided feedback |
173
+ | `@tag` | Space-separated tags |
174
+ | `@input` | What produced the content (e.g., a prompt) |
175
+ | `@file` | Path to the content being annotated |
176
+
177
+ ### File-level headers (% prefix)
178
+
179
+ ```
180
+ %markback 2
181
+ %scope issue-A issue-B
182
+ %covers ./gen/batch3/*.txt
183
+ ```
184
+
185
+ ### Record examples
186
+
187
+ ```
188
+ @id review-001
189
+ @by alice@company.com
190
+ @file ./src/auth.py:45-67
191
+ @tag security p0
192
+
193
+ <<< vulnerable; sql-injection in query builder
194
+ ```
195
+
196
+ ### Compact label list
197
+
198
+ ```
199
+ @file ./images/001.jpg <<< approved; scene=beach
200
+ @file ./images/002.jpg <<< rejected; too dark
201
+ ```
202
+
203
+ ### Sidecar files
204
+
205
+ Content in `report.pdf`, annotation in `report.pdf.mb`:
206
+
207
+ ```
208
+ @id report-001
209
+ <<< good; grade=B+
210
+ ```
211
+
212
+ ### Sweep pattern
213
+
214
+ Track issues across batches with meaningful absence:
215
+
216
+ ```
217
+ %markback 2
218
+ %scope issue-A issue-B
219
+ %covers ./gen/batch3/*.txt
220
+
221
+ @file ./gen/batch3/file2.txt <<< issue-B; tone is off
222
+ @file ./gen/batch3/file5.txt <<< issue-A; issue-B; both problems
223
+ ```
224
+
225
+ Files matching `%covers` without annotations are implicitly clean for all `%scope` items.
226
+
227
+ ## V1 Backward Compatibility
228
+
229
+ V1 headers (`@uri`, `@source`, `@prior`) are automatically mapped to V2 equivalents with a W010 warning. The V2 parser reads V1 files transparently.
230
+
231
+ ## Development
232
+
233
+ ```bash
234
+ pip install -e ".[dev]"
235
+ pytest
236
+ ```
237
+
238
+ ## License
239
+
240
+ MIT
@@ -0,0 +1,207 @@
1
+ # MarkBack V2
2
+
3
+ A compact, human-writable format for storing content paired with feedback/labels.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install -e .
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ### Parse a MarkBack file
14
+
15
+ ```python
16
+ from markback import parse_file, parse_string
17
+
18
+ # Parse a file
19
+ result = parse_file("labels.mb")
20
+ for record in result.records:
21
+ print(f"{record.id}: {record.feedback}")
22
+
23
+ # Parse a string
24
+ text = """
25
+ @id example
26
+
27
+ Some content here.
28
+ <<< positive; good quality
29
+ """
30
+ result = parse_string(text)
31
+ ```
32
+
33
+ ### Write MarkBack files
34
+
35
+ ```python
36
+ from markback import Record, FileRef, write, append
37
+
38
+ # Write records to a file
39
+ records = [
40
+ Record(feedback="good", id="item-1", content="First item"),
41
+ Record(feedback="bad", id="item-2", content="Second item"),
42
+ ]
43
+ write("output.mb", records)
44
+
45
+ # Append a single record
46
+ append("output.mb", Record(feedback="great", id="item-3", content="Third"))
47
+ ```
48
+
49
+ ### Lint files
50
+
51
+ ```python
52
+ from markback import lint_file
53
+
54
+ result = lint_file("myfile.mb")
55
+ if result.has_errors:
56
+ for d in result.diagnostics:
57
+ print(d)
58
+ ```
59
+
60
+ ## CLI Usage
61
+
62
+ The CLI is available via `markback` or `mb` (shorthand).
63
+
64
+ ### Annotate files
65
+
66
+ ```bash
67
+ # Single file — inline feedback, appends to myfile.txt.mb
68
+ mb myfile.txt "good; clear writing"
69
+
70
+ # URL target — derives sidecar from last path segment (or hostname)
71
+ mb https://example.com/blog/post.html "great explanation"
72
+ # → writes post.html.mb with @file https://example.com/blog/post.html
73
+
74
+ # Quote a passage by editing the .mb file directly: inline content
75
+ # under an @file header can be a full snapshot OR an excerpt.
76
+ # @file https://example.com/post.html
77
+ #
78
+ # the quick brown fox jumps over the lazy dog
79
+ # <<< awkward phrasing
80
+
81
+ # Multi-segment section: several comments on one source, no repeated headers.
82
+ # @file ./essay.txt
83
+ #
84
+ # the lazy fox
85
+ # <<< awkward
86
+ #
87
+ # weak ending
88
+ # <<< needs punch
89
+
90
+ # With input reference (what produced the file)
91
+ mb output.txt "accurate" --input prompt.txt
92
+
93
+ # With tags and attribution
94
+ mb file.txt "good" --tag "review p1" --by alice@example.com
95
+
96
+ # Multiple files — same feedback for all
97
+ mb *.jpg -f "approved"
98
+
99
+ # Interactive mode — steps through each file
100
+ mb *.jpg --print
101
+
102
+ # Sweep pattern — track issues across batches
103
+ mb *.txt -f "issue-A" --scope "issue-A issue-B" --covers "./*.txt"
104
+ ```
105
+
106
+ ### Utility commands
107
+
108
+ ```bash
109
+ # Lint
110
+ mb --lint myfile.mb
111
+ mb --lint --json ./data/
112
+
113
+ # List records
114
+ mb --list myfile.mb
115
+
116
+ # Statistics
117
+ mb --stats myfile.mb
118
+
119
+ # Normalize to canonical format
120
+ mb --normalize input.mb
121
+ mb --normalize --in-place input.mb
122
+
123
+ # Convert between formats
124
+ mb --convert --to multi -o output.mb input.mb
125
+ mb --convert --to compact -o output.mb input.mb
126
+
127
+ # Upgrade V1 files to V2
128
+ mb --upgrade *.mb # preview
129
+ mb --upgrade --apply --in-place *.mb # apply
130
+ ```
131
+
132
+ ## File Format
133
+
134
+ ### V2 Headers
135
+
136
+ | Header | Purpose |
137
+ |--------|---------|
138
+ | `@id` | Record identifier (plain string) |
139
+ | `@by` | Who provided feedback |
140
+ | `@tag` | Space-separated tags |
141
+ | `@input` | What produced the content (e.g., a prompt) |
142
+ | `@file` | Path to the content being annotated |
143
+
144
+ ### File-level headers (% prefix)
145
+
146
+ ```
147
+ %markback 2
148
+ %scope issue-A issue-B
149
+ %covers ./gen/batch3/*.txt
150
+ ```
151
+
152
+ ### Record examples
153
+
154
+ ```
155
+ @id review-001
156
+ @by alice@company.com
157
+ @file ./src/auth.py:45-67
158
+ @tag security p0
159
+
160
+ <<< vulnerable; sql-injection in query builder
161
+ ```
162
+
163
+ ### Compact label list
164
+
165
+ ```
166
+ @file ./images/001.jpg <<< approved; scene=beach
167
+ @file ./images/002.jpg <<< rejected; too dark
168
+ ```
169
+
170
+ ### Sidecar files
171
+
172
+ Content in `report.pdf`, annotation in `report.pdf.mb`:
173
+
174
+ ```
175
+ @id report-001
176
+ <<< good; grade=B+
177
+ ```
178
+
179
+ ### Sweep pattern
180
+
181
+ Track issues across batches with meaningful absence:
182
+
183
+ ```
184
+ %markback 2
185
+ %scope issue-A issue-B
186
+ %covers ./gen/batch3/*.txt
187
+
188
+ @file ./gen/batch3/file2.txt <<< issue-B; tone is off
189
+ @file ./gen/batch3/file5.txt <<< issue-A; issue-B; both problems
190
+ ```
191
+
192
+ Files matching `%covers` without annotations are implicitly clean for all `%scope` items.
193
+
194
+ ## V1 Backward Compatibility
195
+
196
+ V1 headers (`@uri`, `@source`, `@prior`) are automatically mapped to V2 equivalents with a W010 warning. The V2 parser reads V1 files transparently.
197
+
198
+ ## Development
199
+
200
+ ```bash
201
+ pip install -e ".[dev]"
202
+ pytest
203
+ ```
204
+
205
+ ## License
206
+
207
+ MIT