markback 0.1.5__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markback-0.1.5 → markback-0.2.1}/.claude/settings.local.json +12 -1
- {markback-0.1.5 → markback-0.2.1}/.gitignore +3 -0
- markback-0.2.1/AGENTS.md +135 -0
- markback-0.2.1/IMPLEMENTATION_NOTES.md +79 -0
- markback-0.2.1/PKG-INFO +240 -0
- markback-0.2.1/README.md +207 -0
- markback-0.2.1/SPEC.md +1311 -0
- {markback-0.1.5 → markback-0.2.1}/markback/__init__.py +36 -30
- markback-0.2.1/markback/cli.py +540 -0
- {markback-0.1.5 → markback-0.2.1}/markback/linter.py +99 -87
- markback-0.2.1/markback/parser.py +638 -0
- {markback-0.1.5 → markback-0.2.1}/markback/types.py +74 -42
- markback-0.2.1/markback/writer.py +357 -0
- {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/package-lock.json +2 -2
- {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/package.json +1 -1
- {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/src/index.ts +2 -1
- {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/src/linter.ts +87 -63
- markback-0.2.1/packages/markbackjs/src/parser.ts +450 -0
- {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/src/types.ts +55 -29
- markback-0.2.1/packages/markbackjs/src/writer.ts +174 -0
- {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/test/linter.test.js +117 -4
- markback-0.2.1/packages/markbackjs/test/multi_segment.test.js +167 -0
- {markback-0.1.5 → markback-0.2.1}/pyproject.toml +1 -3
- markback-0.2.1/scripts/bump-version.sh +91 -0
- markback-0.2.1/scripts/publish-pypi.sh +28 -0
- markback-0.2.1/tests/fixtures/compact_source.mb +2 -0
- markback-0.2.1/tests/fixtures/errors/content_with_source.mb +5 -0
- {markback-0.1.5 → markback-0.2.1}/tests/fixtures/errors/empty_feedback.mb +1 -1
- {markback-0.1.5 → markback-0.2.1}/tests/fixtures/errors/malformed_uri.mb +1 -1
- {markback-0.1.5 → markback-0.2.1}/tests/fixtures/errors/missing_feedback.mb +1 -1
- {markback-0.1.5 → markback-0.2.1}/tests/fixtures/errors/multiple_feedback.mb +1 -1
- {markback-0.1.5 → markback-0.2.1}/tests/fixtures/essay.label.txt +1 -1
- {markback-0.1.5 → markback-0.2.1}/tests/fixtures/external_source.mb +2 -2
- {markback-0.1.5 → markback-0.2.1}/tests/fixtures/freeform_feedback.mb +1 -1
- {markback-0.1.5 → markback-0.2.1}/tests/fixtures/json_feedback.mb +1 -1
- markback-0.2.1/tests/fixtures/label_list.mb +6 -0
- markback-0.2.1/tests/fixtures/minimal.mb.mb +1 -0
- {markback-0.1.5 → markback-0.2.1}/tests/fixtures/multi_record.mb +5 -5
- {markback-0.1.5 → markback-0.2.1}/tests/fixtures/with_uri.mb +1 -1
- markback-0.2.1/tests/test_cli.py +303 -0
- markback-0.2.1/tests/test_linter.py +323 -0
- markback-0.2.1/tests/test_parser.py +538 -0
- {markback-0.1.5 → markback-0.2.1}/tests/test_types.py +75 -150
- markback-0.2.1/tests/test_writer.py +391 -0
- markback-0.1.5/IMPLEMENTATION_NOTES.md +0 -119
- markback-0.1.5/PKG-INFO +0 -268
- markback-0.1.5/README.md +0 -233
- markback-0.1.5/SPEC.md +0 -981
- markback-0.1.5/markback/cli.py +0 -122
- markback-0.1.5/markback/config.py +0 -181
- markback-0.1.5/markback/llm.py +0 -175
- markback-0.1.5/markback/parser.py +0 -597
- markback-0.1.5/markback/workflow.py +0 -351
- markback-0.1.5/markback/writer.py +0 -263
- markback-0.1.5/packages/markbackjs/src/parser.ts +0 -361
- markback-0.1.5/packages/markbackjs/src/writer.ts +0 -79
- markback-0.1.5/scripts/publish-pypi.sh +0 -14
- markback-0.1.5/tests/fixtures/compact_source.mb +0 -2
- markback-0.1.5/tests/fixtures/errors/content_with_source.mb +0 -5
- markback-0.1.5/tests/fixtures/label_list.mb +0 -6
- markback-0.1.5/tests/test_cli.py +0 -208
- markback-0.1.5/tests/test_config.py +0 -233
- markback-0.1.5/tests/test_linter.py +0 -455
- markback-0.1.5/tests/test_parser.py +0 -389
- markback-0.1.5/tests/test_workflow.py +0 -260
- markback-0.1.5/tests/test_writer.py +0 -347
- {markback-0.1.5 → markback-0.2.1}/.ishipped/card.md +0 -0
- {markback-0.1.5 → markback-0.2.1}/LICENSE +0 -0
- {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/LICENSE +0 -0
- {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/README.md +0 -0
- {markback-0.1.5 → markback-0.2.1}/packages/markbackjs/tsconfig.json +0 -0
- {markback-0.1.5 → markback-0.2.1}/scripts/publish-npm.sh +0 -0
- {markback-0.1.5 → markback-0.2.1}/scripts/publish.sh +0 -0
- {markback-0.1.5 → markback-0.2.1}/tests/__init__.py +0 -0
- {markback-0.1.5 → markback-0.2.1}/tests/fixtures/essay.txt +0 -0
- {markback-0.1.5 → markback-0.2.1}/tests/fixtures/minimal.mb +0 -0
|
@@ -17,7 +17,18 @@
|
|
|
17
17
|
"Bash(ls:*)",
|
|
18
18
|
"Bash(pip install:*)",
|
|
19
19
|
"Bash(PYTHONPATH=/src/markback EDITOR=cat python3:*)",
|
|
20
|
-
"Bash(PYTHONPATH=/src/markback python3:*)"
|
|
20
|
+
"Bash(PYTHONPATH=/src/markback python3:*)",
|
|
21
|
+
"Bash(python3 -m markback.cli --help)",
|
|
22
|
+
"Read(//tmp/**)",
|
|
23
|
+
"Bash(python3 -c:*)",
|
|
24
|
+
"Bash(node -e:*)",
|
|
25
|
+
"Bash(git add:*)",
|
|
26
|
+
"Bash(git commit -m ':*)",
|
|
27
|
+
"Bash(git push:*)",
|
|
28
|
+
"Bash(git commit -m ' *)",
|
|
29
|
+
"Bash(tar tzf *)",
|
|
30
|
+
"Bash(.venv/bin/python *)",
|
|
31
|
+
"Bash(bash -n /src/markback/scripts/bump-version.sh)"
|
|
21
32
|
]
|
|
22
33
|
}
|
|
23
34
|
}
|
|
@@ -194,6 +194,9 @@ cython_debug/
|
|
|
194
194
|
# PyPI configuration file
|
|
195
195
|
.pypirc
|
|
196
196
|
|
|
197
|
+
# Devbox
|
|
198
|
+
.devbox/
|
|
199
|
+
|
|
197
200
|
# Cursor
|
|
198
201
|
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
199
202
|
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
markback-0.2.1/AGENTS.md
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# Writing MarkBack V2 (.mb) Files
|
|
2
|
+
|
|
3
|
+
MarkBack pairs content with single-line feedback using `<<<` as the delimiter.
|
|
4
|
+
|
|
5
|
+
## Minimal record
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
Some content here.
|
|
9
|
+
<<< positive
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Record with headers
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
@id item-001
|
|
16
|
+
@by reviewer@example.com
|
|
17
|
+
@file ./file.txt
|
|
18
|
+
@input ./prompt.txt
|
|
19
|
+
@tag review p1
|
|
20
|
+
|
|
21
|
+
Inline content goes here.
|
|
22
|
+
<<< good; quality=high
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Headers: `@id`, `@by`, `@tag`, `@input`, `@file`. All optional. Order: id, by, tag, input, file.
|
|
26
|
+
|
|
27
|
+
## Rules
|
|
28
|
+
|
|
29
|
+
- `<<<` must be followed by one space then feedback text — all on one line
|
|
30
|
+
- A blank line is **required** between headers and inline content
|
|
31
|
+
- `@file` + inline content can coexist (file is provenance, content is snapshot)
|
|
32
|
+
- Records in multi-record files are separated by `---`
|
|
33
|
+
- Files must be UTF-8 with LF line endings
|
|
34
|
+
- `@id` values are plain strings (no URI validation)
|
|
35
|
+
|
|
36
|
+
## Compact format (one record per line)
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
@file ./images/001.jpg <<< approved; scene=beach
|
|
40
|
+
@file ./images/002.jpg <<< rejected; too dark
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
No `---` separator needed between compact records. `@id` can go on the line above:
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
@id item-001
|
|
47
|
+
@file ./file.txt <<< good
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Multi-record file
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
@id first
|
|
54
|
+
|
|
55
|
+
First content.
|
|
56
|
+
<<< positive
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
@id second
|
|
60
|
+
|
|
61
|
+
Second content.
|
|
62
|
+
<<< negative; needs work
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## File-level headers (% prefix)
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
%markback 2
|
|
69
|
+
%scope issue-A issue-B
|
|
70
|
+
%covers ./gen/batch3/*.txt
|
|
71
|
+
|
|
72
|
+
@file ./gen/batch3/file2.txt <<< issue-B; tone is off
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
- `%markback 2` — version declaration
|
|
76
|
+
- `%scope` — what issues are being checked (sweep pattern)
|
|
77
|
+
- `%covers` — glob of all files reviewed (absence = clean for scope)
|
|
78
|
+
|
|
79
|
+
## Tags
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
@id item-001
|
|
83
|
+
@tag training positive-examples batch-2024-03
|
|
84
|
+
@file ./data/example.txt
|
|
85
|
+
<<< approved
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Space-separated. Multiple `@tag` lines merge.
|
|
89
|
+
|
|
90
|
+
## Feedback format
|
|
91
|
+
|
|
92
|
+
Feedback is freeform text. Optional structured convention:
|
|
93
|
+
|
|
94
|
+
| Pattern | Meaning |
|
|
95
|
+
|---------|---------|
|
|
96
|
+
| `<<< positive` | label |
|
|
97
|
+
| `<<< negative; too vague` | label + comment |
|
|
98
|
+
| `<<< good; quality=high` | label + attribute |
|
|
99
|
+
| `<<< quality=high; confidence=0.9` | attributes only |
|
|
100
|
+
| `<<< json:{"key":"value"}` | JSON mode |
|
|
101
|
+
|
|
102
|
+
Segments are separated by `; ` (semicolon + space). Segments with `=` are key-value attributes; without are labels or comments.
|
|
103
|
+
|
|
104
|
+
## Sidecar files
|
|
105
|
+
|
|
106
|
+
Content in `name.ext`, annotation in `name.ext.mb`:
|
|
107
|
+
|
|
108
|
+
**report.pdf** — the content
|
|
109
|
+
**report.pdf.mb:**
|
|
110
|
+
```
|
|
111
|
+
@id report-001
|
|
112
|
+
<<< good; grade=B+
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Line/character ranges
|
|
116
|
+
|
|
117
|
+
`@file` and `@input` support position references:
|
|
118
|
+
|
|
119
|
+
```
|
|
120
|
+
@file ./code.py:42 ← line 42
|
|
121
|
+
@file ./code.py:42-50 ← lines 42–50
|
|
122
|
+
@file ./code.py:10:5-15:20 ← line 10 col 5 to line 15 col 20
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## V1 backward compatibility
|
|
126
|
+
|
|
127
|
+
V1 headers (`@uri`, `@source`, `@prior`) are automatically mapped to V2 (`@id`, `@file`, `@input`) with a W010 warning.
|
|
128
|
+
|
|
129
|
+
## Quick checklist
|
|
130
|
+
|
|
131
|
+
- [ ] Every record has exactly one `<<<` line
|
|
132
|
+
- [ ] Feedback is a single line (no newlines)
|
|
133
|
+
- [ ] Blank line before inline content
|
|
134
|
+
- [ ] `---` between full records; not needed between compact records
|
|
135
|
+
- [ ] File ends with a newline
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Implementation Notes — MarkBack V2
|
|
2
|
+
|
|
3
|
+
## V1 → V2 Changes
|
|
4
|
+
|
|
5
|
+
### Header Renames
|
|
6
|
+
- `@uri` → `@id` (plain string, no URI validation)
|
|
7
|
+
- `@source` → `@file` (the content being annotated)
|
|
8
|
+
- `@prior` → `@input` (what produced the content)
|
|
9
|
+
|
|
10
|
+
### New Features
|
|
11
|
+
- `@tag` header — space-separated tags for categorization
|
|
12
|
+
- `%markback 2` — optional version declaration
|
|
13
|
+
- `%scope` / `%covers` — sweep pattern (meaningful absence)
|
|
14
|
+
- `@file` + inline content can coexist (file is provenance, content is snapshot)
|
|
15
|
+
- Sidecar convention simplified to `name.ext.mb` only
|
|
16
|
+
|
|
17
|
+
### Removed
|
|
18
|
+
- LLM workflow layer (`llm.py`, `workflow.py`) — not core to MarkBack
|
|
19
|
+
- `httpx` dependency
|
|
20
|
+
- `config.py` / `python-dotenv` dependency / `--init` command — not core to MarkBack
|
|
21
|
+
- RFC 3986 URI validation on `@id`
|
|
22
|
+
- E003 (malformed URI) no longer emitted
|
|
23
|
+
- E005 (content with @source) no longer emitted — coexistence is valid
|
|
24
|
+
- `.label.txt` / `.feedback.txt` as primary sidecar convention (kept as V1 legacy in discovery)
|
|
25
|
+
|
|
26
|
+
### API Changes
|
|
27
|
+
- `SourceRef` → `FileRef` (alias preserved)
|
|
28
|
+
- `Record` fields: `uri→id`, `source→file`, `prior→input`, added `tags`
|
|
29
|
+
- New `write()` / `append()` / `write_string()` functions
|
|
30
|
+
- New `normalize()` function
|
|
31
|
+
- New `discover_sidecars()` function
|
|
32
|
+
- `ParseResult` gains `scope`, `covers`, `version`, `covered_files()`
|
|
33
|
+
- V1 compat aliases preserved for all renamed functions
|
|
34
|
+
|
|
35
|
+
## Design Decisions
|
|
36
|
+
|
|
37
|
+
### Parser Architecture
|
|
38
|
+
Same state-machine approach as V1. Now also handles:
|
|
39
|
+
- File-level `%` headers (parsed before records, must be at top of file)
|
|
40
|
+
- V1 header mapping (detected by keyword, mapped with W010 warning)
|
|
41
|
+
- `@tag` with whitespace splitting and merge across multiple lines
|
|
42
|
+
|
|
43
|
+
### V1 Backward Compatibility
|
|
44
|
+
The parser transparently reads V1 files:
|
|
45
|
+
1. `@uri` → mapped to `record.id`
|
|
46
|
+
2. `@source` → mapped to `record.file`
|
|
47
|
+
3. `@prior` → mapped to `record.input`
|
|
48
|
+
4. `@source ... <<<` compact → handled alongside `@file ... <<<`
|
|
49
|
+
5. Each V1 header emits W010 warning
|
|
50
|
+
|
|
51
|
+
### Sweep Pattern
|
|
52
|
+
`%scope` + `%covers` enable "meaningful absence":
|
|
53
|
+
- `%scope` declares what issues are being checked
|
|
54
|
+
- `%covers` declares the complete file set under review
|
|
55
|
+
- Files matching `%covers` with no record are implicitly clean for all scope items
|
|
56
|
+
- `ParseResult.covered_files()` resolves the glob for programmatic access
|
|
57
|
+
|
|
58
|
+
### Writer Simplification
|
|
59
|
+
V1 had 5+ writer functions. V2 has:
|
|
60
|
+
- `write()` — write records to a file (auto-format)
|
|
61
|
+
- `append()` — add a record to existing file
|
|
62
|
+
- `write_string()` — write records to string
|
|
63
|
+
- `normalize()` — canonical rewrite
|
|
64
|
+
All V1 functions preserved as aliases.
|
|
65
|
+
|
|
66
|
+
### Sidecar Convention
|
|
67
|
+
V2: `name.ext.mb` (append `.mb` to the full filename)
|
|
68
|
+
V1 legacy: `.label.txt`, `.feedback.txt` still discovered for backward compat
|
|
69
|
+
|
|
70
|
+
## Testing Strategy
|
|
71
|
+
|
|
72
|
+
### Unit Tests
|
|
73
|
+
- Parser tests cover V2 format, V1 backward compat, file-level headers, tags, sweep pattern
|
|
74
|
+
- Writer tests cover canonical output, version headers, scope/covers, round-trip
|
|
75
|
+
- Linter tests verify all error/warning codes with V2 semantics
|
|
76
|
+
- Type tests verify FileRef, Record with new fields, V1 compat aliases
|
|
77
|
+
|
|
78
|
+
### Fixtures
|
|
79
|
+
All fixtures updated to V2 format. Error fixtures updated to reflect V2 semantics (e.g., content_with_source is now valid).
|
markback-0.2.1/PKG-INFO
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: markback
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: A compact, human-writable format for storing content paired with feedback/labels
|
|
5
|
+
Project-URL: Homepage, https://github.com/dandriscoll/markback
|
|
6
|
+
Project-URL: Repository, https://github.com/dandriscoll/markback
|
|
7
|
+
Project-URL: Documentation, https://github.com/dandriscoll/markback#readme
|
|
8
|
+
Project-URL: Issues, https://github.com/dandriscoll/markback/issues
|
|
9
|
+
Author: Dan Driscoll
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: annotation,data-labeling,feedback,labeling,llm,markdown
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Environment :: Console
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
23
|
+
Classifier: Topic :: Text Processing :: Markup
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Requires-Dist: rich>=13.0.0
|
|
26
|
+
Requires-Dist: typer>=0.9.0
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: build>=1.0.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: twine>=5.0.0; extra == 'dev'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# MarkBack V2
|
|
35
|
+
|
|
36
|
+
A compact, human-writable format for storing content paired with feedback/labels.
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install -e .
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
### Parse a MarkBack file
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from markback import parse_file, parse_string
|
|
50
|
+
|
|
51
|
+
# Parse a file
|
|
52
|
+
result = parse_file("labels.mb")
|
|
53
|
+
for record in result.records:
|
|
54
|
+
print(f"{record.id}: {record.feedback}")
|
|
55
|
+
|
|
56
|
+
# Parse a string
|
|
57
|
+
text = """
|
|
58
|
+
@id example
|
|
59
|
+
|
|
60
|
+
Some content here.
|
|
61
|
+
<<< positive; good quality
|
|
62
|
+
"""
|
|
63
|
+
result = parse_string(text)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Write MarkBack files
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from markback import Record, FileRef, write, append
|
|
70
|
+
|
|
71
|
+
# Write records to a file
|
|
72
|
+
records = [
|
|
73
|
+
Record(feedback="good", id="item-1", content="First item"),
|
|
74
|
+
Record(feedback="bad", id="item-2", content="Second item"),
|
|
75
|
+
]
|
|
76
|
+
write("output.mb", records)
|
|
77
|
+
|
|
78
|
+
# Append a single record
|
|
79
|
+
append("output.mb", Record(feedback="great", id="item-3", content="Third"))
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Lint files
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from markback import lint_file
|
|
86
|
+
|
|
87
|
+
result = lint_file("myfile.mb")
|
|
88
|
+
if result.has_errors:
|
|
89
|
+
for d in result.diagnostics:
|
|
90
|
+
print(d)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## CLI Usage
|
|
94
|
+
|
|
95
|
+
The CLI is available via `markback` or `mb` (shorthand).
|
|
96
|
+
|
|
97
|
+
### Annotate files
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
# Single file — inline feedback, appends to myfile.txt.mb
|
|
101
|
+
mb myfile.txt "good; clear writing"
|
|
102
|
+
|
|
103
|
+
# URL target — derives sidecar from last path segment (or hostname)
|
|
104
|
+
mb https://example.com/blog/post.html "great explanation"
|
|
105
|
+
# → writes post.html.mb with @file https://example.com/blog/post.html
|
|
106
|
+
|
|
107
|
+
# Quote a passage by editing the .mb file directly: inline content
|
|
108
|
+
# under an @file header can be a full snapshot OR an excerpt.
|
|
109
|
+
# @file https://example.com/post.html
|
|
110
|
+
#
|
|
111
|
+
# the quick brown fox jumps over the lazy dog
|
|
112
|
+
# <<< awkward phrasing
|
|
113
|
+
|
|
114
|
+
# Multi-segment section: several comments on one source, no repeated headers.
|
|
115
|
+
# @file ./essay.txt
|
|
116
|
+
#
|
|
117
|
+
# the lazy fox
|
|
118
|
+
# <<< awkward
|
|
119
|
+
#
|
|
120
|
+
# weak ending
|
|
121
|
+
# <<< needs punch
|
|
122
|
+
|
|
123
|
+
# With input reference (what produced the file)
|
|
124
|
+
mb output.txt "accurate" --input prompt.txt
|
|
125
|
+
|
|
126
|
+
# With tags and attribution
|
|
127
|
+
mb file.txt "good" --tag "review p1" --by alice@example.com
|
|
128
|
+
|
|
129
|
+
# Multiple files — same feedback for all
|
|
130
|
+
mb *.jpg -f "approved"
|
|
131
|
+
|
|
132
|
+
# Interactive mode — steps through each file
|
|
133
|
+
mb *.jpg --print
|
|
134
|
+
|
|
135
|
+
# Sweep pattern — track issues across batches
|
|
136
|
+
mb *.txt -f "issue-A" --scope "issue-A issue-B" --covers "./*.txt"
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Utility commands
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
# Lint
|
|
143
|
+
mb --lint myfile.mb
|
|
144
|
+
mb --lint --json ./data/
|
|
145
|
+
|
|
146
|
+
# List records
|
|
147
|
+
mb --list myfile.mb
|
|
148
|
+
|
|
149
|
+
# Statistics
|
|
150
|
+
mb --stats myfile.mb
|
|
151
|
+
|
|
152
|
+
# Normalize to canonical format
|
|
153
|
+
mb --normalize input.mb
|
|
154
|
+
mb --normalize --in-place input.mb
|
|
155
|
+
|
|
156
|
+
# Convert between formats
|
|
157
|
+
mb --convert --to multi -o output.mb input.mb
|
|
158
|
+
mb --convert --to compact -o output.mb input.mb
|
|
159
|
+
|
|
160
|
+
# Upgrade V1 files to V2
|
|
161
|
+
mb --upgrade *.mb # preview
|
|
162
|
+
mb --upgrade --apply --in-place *.mb # apply
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## File Format
|
|
166
|
+
|
|
167
|
+
### V2 Headers
|
|
168
|
+
|
|
169
|
+
| Header | Purpose |
|
|
170
|
+
|--------|---------|
|
|
171
|
+
| `@id` | Record identifier (plain string) |
|
|
172
|
+
| `@by` | Who provided feedback |
|
|
173
|
+
| `@tag` | Space-separated tags |
|
|
174
|
+
| `@input` | What produced the content (e.g., a prompt) |
|
|
175
|
+
| `@file` | Path to the content being annotated |
|
|
176
|
+
|
|
177
|
+
### File-level headers (% prefix)
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
%markback 2
|
|
181
|
+
%scope issue-A issue-B
|
|
182
|
+
%covers ./gen/batch3/*.txt
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Record examples
|
|
186
|
+
|
|
187
|
+
```
|
|
188
|
+
@id review-001
|
|
189
|
+
@by alice@company.com
|
|
190
|
+
@file ./src/auth.py:45-67
|
|
191
|
+
@tag security p0
|
|
192
|
+
|
|
193
|
+
<<< vulnerable; sql-injection in query builder
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Compact label list
|
|
197
|
+
|
|
198
|
+
```
|
|
199
|
+
@file ./images/001.jpg <<< approved; scene=beach
|
|
200
|
+
@file ./images/002.jpg <<< rejected; too dark
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### Sidecar files
|
|
204
|
+
|
|
205
|
+
Content in `report.pdf`, annotation in `report.pdf.mb`:
|
|
206
|
+
|
|
207
|
+
```
|
|
208
|
+
@id report-001
|
|
209
|
+
<<< good; grade=B+
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
### Sweep pattern
|
|
213
|
+
|
|
214
|
+
Track issues across batches with meaningful absence:
|
|
215
|
+
|
|
216
|
+
```
|
|
217
|
+
%markback 2
|
|
218
|
+
%scope issue-A issue-B
|
|
219
|
+
%covers ./gen/batch3/*.txt
|
|
220
|
+
|
|
221
|
+
@file ./gen/batch3/file2.txt <<< issue-B; tone is off
|
|
222
|
+
@file ./gen/batch3/file5.txt <<< issue-A; issue-B; both problems
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Files matching `%covers` without annotations are implicitly clean for all `%scope` items.
|
|
226
|
+
|
|
227
|
+
## V1 Backward Compatibility
|
|
228
|
+
|
|
229
|
+
V1 headers (`@uri`, `@source`, `@prior`) are automatically mapped to V2 equivalents with a W010 warning. The V2 parser reads V1 files transparently.
|
|
230
|
+
|
|
231
|
+
## Development
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
pip install -e ".[dev]"
|
|
235
|
+
pytest
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## License
|
|
239
|
+
|
|
240
|
+
MIT
|
markback-0.2.1/README.md
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# MarkBack V2
|
|
2
|
+
|
|
3
|
+
A compact, human-writable format for storing content paired with feedback/labels.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install -e .
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
### Parse a MarkBack file
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from markback import parse_file, parse_string
|
|
17
|
+
|
|
18
|
+
# Parse a file
|
|
19
|
+
result = parse_file("labels.mb")
|
|
20
|
+
for record in result.records:
|
|
21
|
+
print(f"{record.id}: {record.feedback}")
|
|
22
|
+
|
|
23
|
+
# Parse a string
|
|
24
|
+
text = """
|
|
25
|
+
@id example
|
|
26
|
+
|
|
27
|
+
Some content here.
|
|
28
|
+
<<< positive; good quality
|
|
29
|
+
"""
|
|
30
|
+
result = parse_string(text)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Write MarkBack files
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from markback import Record, FileRef, write, append
|
|
37
|
+
|
|
38
|
+
# Write records to a file
|
|
39
|
+
records = [
|
|
40
|
+
Record(feedback="good", id="item-1", content="First item"),
|
|
41
|
+
Record(feedback="bad", id="item-2", content="Second item"),
|
|
42
|
+
]
|
|
43
|
+
write("output.mb", records)
|
|
44
|
+
|
|
45
|
+
# Append a single record
|
|
46
|
+
append("output.mb", Record(feedback="great", id="item-3", content="Third"))
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Lint files
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from markback import lint_file
|
|
53
|
+
|
|
54
|
+
result = lint_file("myfile.mb")
|
|
55
|
+
if result.has_errors:
|
|
56
|
+
for d in result.diagnostics:
|
|
57
|
+
print(d)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## CLI Usage
|
|
61
|
+
|
|
62
|
+
The CLI is available via `markback` or `mb` (shorthand).
|
|
63
|
+
|
|
64
|
+
### Annotate files
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Single file — inline feedback, appends to myfile.txt.mb
|
|
68
|
+
mb myfile.txt "good; clear writing"
|
|
69
|
+
|
|
70
|
+
# URL target — derives sidecar from last path segment (or hostname)
|
|
71
|
+
mb https://example.com/blog/post.html "great explanation"
|
|
72
|
+
# → writes post.html.mb with @file https://example.com/blog/post.html
|
|
73
|
+
|
|
74
|
+
# Quote a passage by editing the .mb file directly: inline content
|
|
75
|
+
# under an @file header can be a full snapshot OR an excerpt.
|
|
76
|
+
# @file https://example.com/post.html
|
|
77
|
+
#
|
|
78
|
+
# the quick brown fox jumps over the lazy dog
|
|
79
|
+
# <<< awkward phrasing
|
|
80
|
+
|
|
81
|
+
# Multi-segment section: several comments on one source, no repeated headers.
|
|
82
|
+
# @file ./essay.txt
|
|
83
|
+
#
|
|
84
|
+
# the lazy fox
|
|
85
|
+
# <<< awkward
|
|
86
|
+
#
|
|
87
|
+
# weak ending
|
|
88
|
+
# <<< needs punch
|
|
89
|
+
|
|
90
|
+
# With input reference (what produced the file)
|
|
91
|
+
mb output.txt "accurate" --input prompt.txt
|
|
92
|
+
|
|
93
|
+
# With tags and attribution
|
|
94
|
+
mb file.txt "good" --tag "review p1" --by alice@example.com
|
|
95
|
+
|
|
96
|
+
# Multiple files — same feedback for all
|
|
97
|
+
mb *.jpg -f "approved"
|
|
98
|
+
|
|
99
|
+
# Interactive mode — steps through each file
|
|
100
|
+
mb *.jpg --print
|
|
101
|
+
|
|
102
|
+
# Sweep pattern — track issues across batches
|
|
103
|
+
mb *.txt -f "issue-A" --scope "issue-A issue-B" --covers "./*.txt"
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Utility commands
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
# Lint
|
|
110
|
+
mb --lint myfile.mb
|
|
111
|
+
mb --lint --json ./data/
|
|
112
|
+
|
|
113
|
+
# List records
|
|
114
|
+
mb --list myfile.mb
|
|
115
|
+
|
|
116
|
+
# Statistics
|
|
117
|
+
mb --stats myfile.mb
|
|
118
|
+
|
|
119
|
+
# Normalize to canonical format
|
|
120
|
+
mb --normalize input.mb
|
|
121
|
+
mb --normalize --in-place input.mb
|
|
122
|
+
|
|
123
|
+
# Convert between formats
|
|
124
|
+
mb --convert --to multi -o output.mb input.mb
|
|
125
|
+
mb --convert --to compact -o output.mb input.mb
|
|
126
|
+
|
|
127
|
+
# Upgrade V1 files to V2
|
|
128
|
+
mb --upgrade *.mb # preview
|
|
129
|
+
mb --upgrade --apply --in-place *.mb # apply
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## File Format
|
|
133
|
+
|
|
134
|
+
### V2 Headers
|
|
135
|
+
|
|
136
|
+
| Header | Purpose |
|
|
137
|
+
|--------|---------|
|
|
138
|
+
| `@id` | Record identifier (plain string) |
|
|
139
|
+
| `@by` | Who provided feedback |
|
|
140
|
+
| `@tag` | Space-separated tags |
|
|
141
|
+
| `@input` | What produced the content (e.g., a prompt) |
|
|
142
|
+
| `@file` | Path to the content being annotated |
|
|
143
|
+
|
|
144
|
+
### File-level headers (% prefix)
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
%markback 2
|
|
148
|
+
%scope issue-A issue-B
|
|
149
|
+
%covers ./gen/batch3/*.txt
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Record examples
|
|
153
|
+
|
|
154
|
+
```
|
|
155
|
+
@id review-001
|
|
156
|
+
@by alice@company.com
|
|
157
|
+
@file ./src/auth.py:45-67
|
|
158
|
+
@tag security p0
|
|
159
|
+
|
|
160
|
+
<<< vulnerable; sql-injection in query builder
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Compact label list
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
@file ./images/001.jpg <<< approved; scene=beach
|
|
167
|
+
@file ./images/002.jpg <<< rejected; too dark
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Sidecar files
|
|
171
|
+
|
|
172
|
+
Content in `report.pdf`, annotation in `report.pdf.mb`:
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
@id report-001
|
|
176
|
+
<<< good; grade=B+
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### Sweep pattern
|
|
180
|
+
|
|
181
|
+
Track issues across batches with meaningful absence:
|
|
182
|
+
|
|
183
|
+
```
|
|
184
|
+
%markback 2
|
|
185
|
+
%scope issue-A issue-B
|
|
186
|
+
%covers ./gen/batch3/*.txt
|
|
187
|
+
|
|
188
|
+
@file ./gen/batch3/file2.txt <<< issue-B; tone is off
|
|
189
|
+
@file ./gen/batch3/file5.txt <<< issue-A; issue-B; both problems
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Files matching `%covers` without annotations are implicitly clean for all `%scope` items.
|
|
193
|
+
|
|
194
|
+
## V1 Backward Compatibility
|
|
195
|
+
|
|
196
|
+
V1 headers (`@uri`, `@source`, `@prior`) are automatically mapped to V2 equivalents with a W010 warning. The V2 parser reads V1 files transparently.
|
|
197
|
+
|
|
198
|
+
## Development
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
pip install -e ".[dev]"
|
|
202
|
+
pytest
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
## License
|
|
206
|
+
|
|
207
|
+
MIT
|