markback 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
markback/writer.py ADDED
@@ -0,0 +1,249 @@
1
+ """MarkBack canonical writer implementation."""
2
+
3
+ from enum import Enum
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ from .types import Record, SourceRef
8
+
9
+
10
+ class OutputMode(Enum):
11
+ """Output format modes."""
12
+ SINGLE = "single" # One record per file
13
+ MULTI = "multi" # Multiple records in one file
14
+ COMPACT = "compact" # Compact label list format
15
+ PAIRED = "paired" # Separate content and label files
16
+
17
+
18
+ def write_record_canonical(
19
+ record: Record,
20
+ prefer_compact: bool = True,
21
+ ) -> str:
22
+ """Write a single record in canonical format.
23
+
24
+ Args:
25
+ record: The record to write
26
+ prefer_compact: If True, use compact format when possible (source + no content)
27
+
28
+ Returns:
29
+ Canonical string representation
30
+ """
31
+ lines: list[str] = []
32
+
33
+ # Determine if we should use compact format
34
+ use_compact = (
35
+ prefer_compact
36
+ and record.source is not None
37
+ and not record.has_inline_content()
38
+ )
39
+
40
+ if use_compact:
41
+ # Compact format: @uri on its own line (if present), then @source ... <<<
42
+ if record.uri:
43
+ lines.append(f"@uri {record.uri}")
44
+ lines.append(f"@source {record.source} <<< {record.feedback}")
45
+ else:
46
+ # Full format
47
+ # Headers: @uri first, then @source
48
+ if record.uri:
49
+ lines.append(f"@uri {record.uri}")
50
+ if record.source:
51
+ lines.append(f"@source {record.source}")
52
+
53
+ # Content block (with blank line if content present)
54
+ if record.has_inline_content():
55
+ lines.append("") # Blank line before content
56
+ # Normalize content: trim leading/trailing blank lines
57
+ content_lines = record.content.split('\n')
58
+ while content_lines and not content_lines[0].strip():
59
+ content_lines.pop(0)
60
+ while content_lines and not content_lines[-1].strip():
61
+ content_lines.pop()
62
+ lines.extend(content_lines)
63
+
64
+ # Feedback line
65
+ lines.append(f"<<< {record.feedback}")
66
+
67
+ return '\n'.join(lines)
68
+
69
+
70
+ def write_records_multi(
71
+ records: list[Record],
72
+ prefer_compact: bool = True,
73
+ ) -> str:
74
+ """Write multiple records in multi-record format.
75
+
76
+ Args:
77
+ records: List of records to write
78
+ prefer_compact: If True, use compact format when possible
79
+
80
+ Returns:
81
+ Canonical multi-record string
82
+ """
83
+ if not records:
84
+ return ""
85
+
86
+ result_parts: list[str] = []
87
+ prev_was_compact = False
88
+
89
+ for i, record in enumerate(records):
90
+ is_compact = (
91
+ prefer_compact
92
+ and record.source is not None
93
+ and not record.has_inline_content()
94
+ )
95
+
96
+ # Add separator between records
97
+ if i > 0:
98
+ # Compact records in sequence don't need separators
99
+ if is_compact and prev_was_compact:
100
+ result_parts.append("\n")
101
+ else:
102
+ # Add blank line then separator then newline
103
+ result_parts.append("\n---\n")
104
+
105
+ record_str = write_record_canonical(record, prefer_compact=prefer_compact)
106
+ result_parts.append(record_str)
107
+ prev_was_compact = is_compact
108
+
109
+ return ''.join(result_parts) + "\n"
110
+
111
+
112
+ def write_records_compact(records: list[Record]) -> str:
113
+ """Write records in compact label list format.
114
+
115
+ All records are written as single-line @source ... <<< entries.
116
+ Records without source will have source derived from URI or index.
117
+ """
118
+ lines: list[str] = []
119
+
120
+ for i, record in enumerate(records):
121
+ if record.uri and record.source:
122
+ lines.append(f"@uri {record.uri}")
123
+ lines.append(f"@source {record.source} <<< {record.feedback}")
124
+ lines.append("") # Blank line for grouping
125
+ elif record.source:
126
+ lines.append(f"@source {record.source} <<< {record.feedback}")
127
+ else:
128
+ # No source - need to create a placeholder or use full format
129
+ if record.uri:
130
+ lines.append(f"@uri {record.uri}")
131
+ if record.has_inline_content():
132
+ # Can't use compact for this record
133
+ lines.append("")
134
+ lines.extend(record.content.split('\n'))
135
+ lines.append(f"<<< {record.feedback}")
136
+
137
+ # Remove trailing empty lines and add final newline
138
+ while lines and not lines[-1]:
139
+ lines.pop()
140
+
141
+ return '\n'.join(lines) + "\n" if lines else ""
142
+
143
+
144
+ def write_label_file(record: Record) -> str:
145
+ """Write a label file for paired mode (no content, just headers + feedback)."""
146
+ lines: list[str] = []
147
+
148
+ if record.uri:
149
+ lines.append(f"@uri {record.uri}")
150
+
151
+ lines.append(f"<<< {record.feedback}")
152
+
153
+ return '\n'.join(lines) + "\n"
154
+
155
+
156
+ def write_file(
157
+ path: Path,
158
+ records: list[Record],
159
+ mode: OutputMode = OutputMode.MULTI,
160
+ prefer_compact: bool = True,
161
+ ) -> None:
162
+ """Write records to a file.
163
+
164
+ Args:
165
+ path: Output file path
166
+ records: Records to write
167
+ mode: Output format mode
168
+ prefer_compact: For MULTI mode, prefer compact format when possible
169
+ """
170
+ if mode == OutputMode.SINGLE:
171
+ if len(records) != 1:
172
+ raise ValueError(f"SINGLE mode requires exactly 1 record, got {len(records)}")
173
+ content = write_record_canonical(records[0], prefer_compact=prefer_compact) + "\n"
174
+
175
+ elif mode == OutputMode.MULTI:
176
+ content = write_records_multi(records, prefer_compact=prefer_compact)
177
+
178
+ elif mode == OutputMode.COMPACT:
179
+ content = write_records_compact(records)
180
+
181
+ elif mode == OutputMode.PAIRED:
182
+ if len(records) != 1:
183
+ raise ValueError(f"PAIRED mode requires exactly 1 record, got {len(records)}")
184
+ content = write_label_file(records[0])
185
+
186
+ else:
187
+ raise ValueError(f"Unknown output mode: {mode}")
188
+
189
+ path.write_text(content, encoding="utf-8")
190
+
191
+
192
+ def write_paired_files(
193
+ label_path: Path,
194
+ content_path: Optional[Path],
195
+ record: Record,
196
+ write_content: bool = False,
197
+ ) -> None:
198
+ """Write paired label + content files.
199
+
200
+ Args:
201
+ label_path: Path for the label file
202
+ content_path: Path for the content file (optional)
203
+ record: The record to write
204
+ write_content: If True, write content to content_path (only for text content)
205
+ """
206
+ # Write label file
207
+ label_content = write_label_file(record)
208
+ label_path.write_text(label_content, encoding="utf-8")
209
+
210
+ # Optionally write content file
211
+ if write_content and content_path and record.content:
212
+ content_path.write_text(record.content, encoding="utf-8")
213
+
214
+
215
+ def normalize_file(
216
+ input_path: Path,
217
+ output_path: Optional[Path] = None,
218
+ in_place: bool = False,
219
+ ) -> str:
220
+ """Read a MarkBack file and write it in canonical form.
221
+
222
+ Args:
223
+ input_path: Input file path
224
+ output_path: Output file path (if None and in_place=True, overwrites input)
225
+ in_place: If True and output_path is None, overwrite input file
226
+
227
+ Returns:
228
+ The canonical content
229
+ """
230
+ from .parser import parse_file
231
+
232
+ result = parse_file(input_path)
233
+
234
+ if result.has_errors:
235
+ raise ValueError(f"Cannot normalize file with errors: {input_path}")
236
+
237
+ # Determine output format based on input
238
+ if len(result.records) == 1:
239
+ content = write_record_canonical(result.records[0]) + "\n"
240
+ else:
241
+ content = write_records_multi(result.records)
242
+
243
+ # Write output
244
+ if output_path:
245
+ output_path.write_text(content, encoding="utf-8")
246
+ elif in_place:
247
+ input_path.write_text(content, encoding="utf-8")
248
+
249
+ return content
@@ -0,0 +1,251 @@
1
+ Metadata-Version: 2.4
2
+ Name: markback
3
+ Version: 0.1.0
4
+ Summary: A compact, human-writable format for storing content paired with feedback/labels
5
+ Project-URL: Homepage, https://github.com/dandriscoll/markback
6
+ Project-URL: Repository, https://github.com/dandriscoll/markback
7
+ Project-URL: Documentation, https://github.com/dandriscoll/markback#readme
8
+ Project-URL: Issues, https://github.com/dandriscoll/markback/issues
9
+ Author: Dan Driscoll
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: annotation,data-labeling,feedback,labeling,llm,markdown
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Environment :: Console
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Software Development :: Quality Assurance
23
+ Classifier: Topic :: Text Processing :: Markup
24
+ Requires-Python: >=3.10
25
+ Requires-Dist: httpx>=0.25.0
26
+ Requires-Dist: python-dotenv>=1.0.0
27
+ Requires-Dist: rich>=13.0.0
28
+ Requires-Dist: typer>=0.9.0
29
+ Provides-Extra: dev
30
+ Requires-Dist: build>=1.0.0; extra == 'dev'
31
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
32
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
33
+ Requires-Dist: twine>=5.0.0; extra == 'dev'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # MarkBack
37
+
38
+ A compact, human-writable format for storing content paired with feedback/labels.
39
+
40
+ ## Installation
41
+
42
+ ```bash
43
+ pip install -e .
44
+ ```
45
+
46
+ ## Quick Start
47
+
48
+ ### Parse a MarkBack file
49
+
50
+ ```python
51
+ from markback import parse_file, parse_string
52
+
53
+ # Parse a file
54
+ result = parse_file("labels.mb")
55
+ for record in result.records:
56
+ print(f"{record.uri}: {record.feedback}")
57
+
58
+ # Parse a string
59
+ text = """
60
+ @uri local:example
61
+
62
+ Some content here.
63
+ <<< positive; good quality
64
+ """
65
+ result = parse_string(text)
66
+ ```
67
+
68
+ ### Write MarkBack files
69
+
70
+ ```python
71
+ from markback import Record, SourceRef, write_file, OutputMode
72
+
73
+ records = [
74
+ Record(feedback="good", uri="local:1", content="First item"),
75
+ Record(feedback="bad", uri="local:2", content="Second item"),
76
+ ]
77
+
78
+ # Write multi-record file
79
+ write_file("output.mb", records, mode=OutputMode.MULTI)
80
+
81
+ # Write compact label list
82
+ write_file("labels.mb", records, mode=OutputMode.COMPACT)
83
+ ```
84
+
85
+ ### Lint files
86
+
87
+ ```python
88
+ from markback import lint_file
89
+
90
+ result = lint_file("myfile.mb")
91
+ if result.has_errors:
92
+ for d in result.diagnostics:
93
+ print(d)
94
+ ```
95
+
96
+ ## CLI Usage
97
+
98
+ ### Initialize configuration
99
+
100
+ ```bash
101
+ markback init
102
+ ```
103
+
104
+ Creates a `.env` file with all configuration options.
105
+
106
+ ### Lint files
107
+
108
+ ```bash
109
+ # Lint a single file
110
+ markback lint myfile.mb
111
+
112
+ # Lint a directory
113
+ markback lint ./data/
114
+
115
+ # JSON output
116
+ markback lint myfile.mb --json
117
+ ```
118
+
119
+ ### Normalize to canonical format
120
+
121
+ ```bash
122
+ # Output to stdout
123
+ markback normalize input.mb
124
+
125
+ # Output to file
126
+ markback normalize input.mb output.mb
127
+
128
+ # In-place normalization
129
+ markback normalize input.mb --in-place
130
+ ```
131
+
132
+ ### List records
133
+
134
+ ```bash
135
+ markback list myfile.mb
136
+ markback list ./data/ --json
137
+ ```
138
+
139
+ ### Convert between formats
140
+
141
+ ```bash
142
+ # Convert to multi-record format
143
+ markback convert input.mb output.mb --to multi
144
+
145
+ # Convert to compact label list
146
+ markback convert input.mb output.mb --to compact
147
+
148
+ # Convert to paired files
149
+ markback convert input.mb ./output_dir/ --to paired
150
+ ```
151
+
152
+ ### Run LLM workflow
153
+
154
+ ```bash
155
+ # Run editor/operator workflow
156
+ markback workflow run dataset.mb --prompt "Initial prompt" --output results.json
157
+
158
+ # View evaluation results
159
+ markback workflow evaluate results.json
160
+
161
+ # Extract refined prompt
162
+ markback workflow prompt results.json --output refined_prompt.txt
163
+ ```
164
+
165
+ ## File Formats
166
+
167
+ ### Single Record
168
+
169
+ ```
170
+ @uri local:example
171
+
172
+ Content goes here.
173
+ <<< positive; quality=high
174
+ ```
175
+
176
+ ### Multi-Record
177
+
178
+ ```
179
+ @uri local:item-1
180
+
181
+ First content.
182
+ <<< good
183
+
184
+ ---
185
+ @uri local:item-2
186
+
187
+ Second content.
188
+ <<< bad; needs improvement
189
+ ```
190
+
191
+ ### Compact Label List
192
+
193
+ ```
194
+ @source ./images/001.jpg <<< approved; scene=beach
195
+ @source ./images/002.jpg <<< rejected; too dark
196
+ @source ./images/003.jpg <<< approved; scene=mountain
197
+ ```
198
+
199
+ ### Paired Files
200
+
201
+ **content.txt:**
202
+ ```
203
+ The actual content goes here.
204
+ ```
205
+
206
+ **content.label.txt:**
207
+ ```
208
+ @uri local:content-id
209
+ <<< approved; reviewer=alice
210
+ ```
211
+
212
+ ## Configuration
213
+
214
+ Configuration is loaded from `.env`:
215
+
216
+ ```bash
217
+ # File handling mode
218
+ FILE_MODE=git # or "versioned"
219
+
220
+ # Label file discovery
221
+ LABEL_SUFFIXES=.label.txt,.feedback.txt,.mb
222
+
223
+ # Editor LLM
224
+ EDITOR_API_BASE=https://api.openai.com/v1
225
+ EDITOR_API_KEY=your-key
226
+ EDITOR_MODEL=gpt-4
227
+
228
+ # Operator LLM
229
+ OPERATOR_API_BASE=https://api.openai.com/v1
230
+ OPERATOR_API_KEY=your-key
231
+ OPERATOR_MODEL=gpt-4
232
+ ```
233
+
234
+ ## Development
235
+
236
+ ### Run tests
237
+
238
+ ```bash
239
+ pip install -e ".[dev]"
240
+ pytest
241
+ ```
242
+
243
+ ### Run with coverage
244
+
245
+ ```bash
246
+ pytest --cov=markback
247
+ ```
248
+
249
+ ## License
250
+
251
+ MIT
@@ -0,0 +1,14 @@
1
+ markback/__init__.py,sha256=B0-2dpUu5nkbnUI0hPz-x7PHiOl7M-tiRi6s3UCYJFk,1540
2
+ markback/cli.py,sha256=5wMk1OUG7W_voS9DxeFxRJrBTMabEdOK_s_o3Irxuu0,13639
3
+ markback/config.py,sha256=eTVhb7UwDER9FRYo8QUAvneLHSqXD2ZtLUgtBtnljUs,5455
4
+ markback/linter.py,sha256=6jrfngF4PiYFQlDddm09OEmVSSGwacE5YFxMub5mqlA,8707
5
+ markback/llm.py,sha256=ON5_2C6v4KIk7_aIceulfWjEEI6hmallaPlLv-1-s_o,4692
6
+ markback/parser.py,sha256=5CrLeOWGuiE0_BOK9dJUnLLrJ72KTmucyOQEzR1nDh4,18570
7
+ markback/types.py,sha256=rRy41h1ZYYP9lo_FhvP5X5-OlwEM7vzHCw--Sq5L2LA,8564
8
+ markback/workflow.py,sha256=zC1RUm1i1wgiciFDqUilJKJ0-bgInvctxhQ0h5WSdoQ,10485
9
+ markback/writer.py,sha256=v5KT2o2Ma2I9I4U-r06PgzKyqwQFSsx49Ri5qIsovhY,7645
10
+ markback-0.1.0.dist-info/METADATA,sha256=aqRMZiWqsEExkqi_dwSyWtcFb3uhOx2Ol6HbNBU7Ggw,4864
11
+ markback-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
12
+ markback-0.1.0.dist-info/entry_points.txt,sha256=Bc9aXvtlPxVPuOJ9BWGngAVrkx5dMvRgujjVzXC-V5U,46
13
+ markback-0.1.0.dist-info/licenses/LICENSE,sha256=lLK1n13C_CXb0M10O-6itEIDY6dsXKutZYQH-09n6s0,1068
14
+ markback-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ markback = markback.cli:app
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 dandriscoll
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.