PyPI - markback - Versions diffs - 0.1.0__py3-none-any.whl - Mend

markback 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

markback/__init__.py +86 -0
markback/cli.py +435 -0
markback/config.py +181 -0
markback/linter.py +312 -0
markback/llm.py +175 -0
markback/parser.py +587 -0
markback/types.py +270 -0
markback/workflow.py +351 -0
markback/writer.py +249 -0
markback-0.1.0.dist-info/METADATA +251 -0
markback-0.1.0.dist-info/RECORD +14 -0
markback-0.1.0.dist-info/WHEEL +4 -0
markback-0.1.0.dist-info/entry_points.txt +2 -0
markback-0.1.0.dist-info/licenses/LICENSE +21 -0

markback/writer.py ADDED Viewed

@@ -0,0 +1,249 @@
+"""MarkBack canonical writer implementation."""
+from enum import Enum
+from pathlib import Path
+from typing import Optional
+from .types import Record, SourceRef
+class OutputMode(Enum):
+    """Output format modes."""
+    SINGLE = "single"      # One record per file
+    MULTI = "multi"        # Multiple records in one file
+    COMPACT = "compact"    # Compact label list format
+    PAIRED = "paired"      # Separate content and label files
+def write_record_canonical(
+    record: Record,
+    prefer_compact: bool = True,
+) -> str:
+    """Write a single record in canonical format.
+    Args:
+        record: The record to write
+        prefer_compact: If True, use compact format when possible (source + no content)
+    Returns:
+        Canonical string representation
+    """
+    lines: list[str] = []
+    # Determine if we should use compact format
+    use_compact = (
+        prefer_compact
+        and record.source is not None
+        and not record.has_inline_content()
+    )
+    if use_compact:
+        # Compact format: @uri on its own line (if present), then @source ... <<<
+        if record.uri:
+            lines.append(f"@uri {record.uri}")
+        lines.append(f"@source {record.source} <<< {record.feedback}")
+    else:
+        # Full format
+        # Headers: @uri first, then @source
+        if record.uri:
+            lines.append(f"@uri {record.uri}")
+        if record.source:
+            lines.append(f"@source {record.source}")
+        # Content block (with blank line if content present)
+        if record.has_inline_content():
+            lines.append("")  # Blank line before content
+            # Normalize content: trim leading/trailing blank lines
+            content_lines = record.content.split('\n')
+            while content_lines and not content_lines[0].strip():
+                content_lines.pop(0)
+            while content_lines and not content_lines[-1].strip():
+                content_lines.pop()
+            lines.extend(content_lines)
+        # Feedback line
+        lines.append(f"<<< {record.feedback}")
+    return '\n'.join(lines)
+def write_records_multi(
+    records: list[Record],
+    prefer_compact: bool = True,
+) -> str:
+    """Write multiple records in multi-record format.
+    Args:
+        records: List of records to write
+        prefer_compact: If True, use compact format when possible
+    Returns:
+        Canonical multi-record string
+    """
+    if not records:
+        return ""
+    result_parts: list[str] = []
+    prev_was_compact = False
+    for i, record in enumerate(records):
+        is_compact = (
+            prefer_compact
+            and record.source is not None
+            and not record.has_inline_content()
+        )
+        # Add separator between records
+        if i > 0:
+            # Compact records in sequence don't need separators
+            if is_compact and prev_was_compact:
+                result_parts.append("\n")
+            else:
+                # Add blank line then separator then newline
+                result_parts.append("\n---\n")
+        record_str = write_record_canonical(record, prefer_compact=prefer_compact)
+        result_parts.append(record_str)
+        prev_was_compact = is_compact
+    return ''.join(result_parts) + "\n"
+def write_records_compact(records: list[Record]) -> str:
+    """Write records in compact label list format.
+    All records are written as single-line @source ... <<< entries.
+    Records without source will have source derived from URI or index.
+    """
+    lines: list[str] = []
+    for i, record in enumerate(records):
+        if record.uri and record.source:
+            lines.append(f"@uri {record.uri}")
+            lines.append(f"@source {record.source} <<< {record.feedback}")
+            lines.append("")  # Blank line for grouping
+        elif record.source:
+            lines.append(f"@source {record.source} <<< {record.feedback}")
+        else:
+            # No source - need to create a placeholder or use full format
+            if record.uri:
+                lines.append(f"@uri {record.uri}")
+            if record.has_inline_content():
+                # Can't use compact for this record
+                lines.append("")
+                lines.extend(record.content.split('\n'))
+            lines.append(f"<<< {record.feedback}")
+    # Remove trailing empty lines and add final newline
+    while lines and not lines[-1]:
+        lines.pop()
+    return '\n'.join(lines) + "\n" if lines else ""
+def write_label_file(record: Record) -> str:
+    """Write a label file for paired mode (no content, just headers + feedback)."""
+    lines: list[str] = []
+    if record.uri:
+        lines.append(f"@uri {record.uri}")
+    lines.append(f"<<< {record.feedback}")
+    return '\n'.join(lines) + "\n"
+def write_file(
+    path: Path,
+    records: list[Record],
+    mode: OutputMode = OutputMode.MULTI,
+    prefer_compact: bool = True,
+) -> None:
+    """Write records to a file.
+    Args:
+        path: Output file path
+        records: Records to write
+        mode: Output format mode
+        prefer_compact: For MULTI mode, prefer compact format when possible
+    """
+    if mode == OutputMode.SINGLE:
+        if len(records) != 1:
+            raise ValueError(f"SINGLE mode requires exactly 1 record, got {len(records)}")
+        content = write_record_canonical(records[0], prefer_compact=prefer_compact) + "\n"
+    elif mode == OutputMode.MULTI:
+        content = write_records_multi(records, prefer_compact=prefer_compact)
+    elif mode == OutputMode.COMPACT:
+        content = write_records_compact(records)
+    elif mode == OutputMode.PAIRED:
+        if len(records) != 1:
+            raise ValueError(f"PAIRED mode requires exactly 1 record, got {len(records)}")
+        content = write_label_file(records[0])
+    else:
+        raise ValueError(f"Unknown output mode: {mode}")
+    path.write_text(content, encoding="utf-8")
+def write_paired_files(
+    label_path: Path,
+    content_path: Optional[Path],
+    record: Record,
+    write_content: bool = False,
+) -> None:
+    """Write paired label + content files.
+    Args:
+        label_path: Path for the label file
+        content_path: Path for the content file (optional)
+        record: The record to write
+        write_content: If True, write content to content_path (only for text content)
+    """
+    # Write label file
+    label_content = write_label_file(record)
+    label_path.write_text(label_content, encoding="utf-8")
+    # Optionally write content file
+    if write_content and content_path and record.content:
+        content_path.write_text(record.content, encoding="utf-8")
+def normalize_file(
+    input_path: Path,
+    output_path: Optional[Path] = None,
+    in_place: bool = False,
+) -> str:
+    """Read a MarkBack file and write it in canonical form.
+    Args:
+        input_path: Input file path
+        output_path: Output file path (if None and in_place=True, overwrites input)
+        in_place: If True and output_path is None, overwrite input file
+    Returns:
+        The canonical content
+    """
+    from .parser import parse_file
+    result = parse_file(input_path)
+    if result.has_errors:
+        raise ValueError(f"Cannot normalize file with errors: {input_path}")
+    # Determine output format based on input
+    if len(result.records) == 1:
+        content = write_record_canonical(result.records[0]) + "\n"
+    else:
+        content = write_records_multi(result.records)
+    # Write output
+    if output_path:
+        output_path.write_text(content, encoding="utf-8")
+    elif in_place:
+        input_path.write_text(content, encoding="utf-8")
+    return content

markback-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,251 @@
+Metadata-Version: 2.4
+Name: markback
+Version: 0.1.0
+Summary: A compact, human-writable format for storing content paired with feedback/labels
+Project-URL: Homepage, https://github.com/dandriscoll/markback
+Project-URL: Repository, https://github.com/dandriscoll/markback
+Project-URL: Documentation, https://github.com/dandriscoll/markback#readme
+Project-URL: Issues, https://github.com/dandriscoll/markback/issues
+Author: Dan Driscoll
+License-Expression: MIT
+License-File: LICENSE
+Keywords: annotation,data-labeling,feedback,labeling,llm,markdown
+Classifier: Development Status :: 3 - Alpha
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development :: Quality Assurance
+Classifier: Topic :: Text Processing :: Markup
+Requires-Python: >=3.10
+Requires-Dist: httpx>=0.25.0
+Requires-Dist: python-dotenv>=1.0.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: typer>=0.9.0
+Provides-Extra: dev
+Requires-Dist: build>=1.0.0; extra == 'dev'
+Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
+Requires-Dist: pytest>=7.0.0; extra == 'dev'
+Requires-Dist: twine>=5.0.0; extra == 'dev'
+Description-Content-Type: text/markdown
+# MarkBack
+A compact, human-writable format for storing content paired with feedback/labels.
+## Installation
+```bash
+pip install -e .
+```
+## Quick Start
+### Parse a MarkBack file
+```python
+from markback import parse_file, parse_string
+# Parse a file
+result = parse_file("labels.mb")
+for record in result.records:
+    print(f"{record.uri}: {record.feedback}")
+# Parse a string
+text = """
+@uri local:example
+Some content here.
+<<< positive; good quality
+"""
+result = parse_string(text)
+```
+### Write MarkBack files
+```python
+from markback import Record, SourceRef, write_file, OutputMode
+records = [
+    Record(feedback="good", uri="local:1", content="First item"),
+    Record(feedback="bad", uri="local:2", content="Second item"),
+]
+# Write multi-record file
+write_file("output.mb", records, mode=OutputMode.MULTI)
+# Write compact label list
+write_file("labels.mb", records, mode=OutputMode.COMPACT)
+```
+### Lint files
+```python
+from markback import lint_file
+result = lint_file("myfile.mb")
+if result.has_errors:
+    for d in result.diagnostics:
+        print(d)
+```
+## CLI Usage
+### Initialize configuration
+```bash
+markback init
+```
+Creates a `.env` file with all configuration options.
+### Lint files
+```bash
+# Lint a single file
+markback lint myfile.mb
+# Lint a directory
+markback lint ./data/
+# JSON output
+markback lint myfile.mb --json
+```
+### Normalize to canonical format
+```bash
+# Output to stdout
+markback normalize input.mb
+# Output to file
+markback normalize input.mb output.mb
+# In-place normalization
+markback normalize input.mb --in-place
+```
+### List records
+```bash
+markback list myfile.mb
+markback list ./data/ --json
+```
+### Convert between formats
+```bash
+# Convert to multi-record format
+markback convert input.mb output.mb --to multi
+# Convert to compact label list
+markback convert input.mb output.mb --to compact
+# Convert to paired files
+markback convert input.mb ./output_dir/ --to paired
+```
+### Run LLM workflow
+```bash
+# Run editor/operator workflow
+markback workflow run dataset.mb --prompt "Initial prompt" --output results.json
+# View evaluation results
+markback workflow evaluate results.json
+# Extract refined prompt
+markback workflow prompt results.json --output refined_prompt.txt
+```
+## File Formats
+### Single Record
+```
+@uri local:example
+Content goes here.
+<<< positive; quality=high
+```
+### Multi-Record
+```
+@uri local:item-1
+First content.
+<<< good
+---
+@uri local:item-2
+Second content.
+<<< bad; needs improvement
+```
+### Compact Label List
+```
+@source ./images/001.jpg <<< approved; scene=beach
+@source ./images/002.jpg <<< rejected; too dark
+@source ./images/003.jpg <<< approved; scene=mountain
+```
+### Paired Files
+**content.txt:**
+```
+The actual content goes here.
+```
+**content.label.txt:**
+```
+@uri local:content-id
+<<< approved; reviewer=alice
+```
+## Configuration
+Configuration is loaded from `.env`:
+```bash
+# File handling mode
+FILE_MODE=git  # or "versioned"
+# Label file discovery
+LABEL_SUFFIXES=.label.txt,.feedback.txt,.mb
+# Editor LLM
+EDITOR_API_BASE=https://api.openai.com/v1
+EDITOR_API_KEY=your-key
+EDITOR_MODEL=gpt-4
+# Operator LLM
+OPERATOR_API_BASE=https://api.openai.com/v1
+OPERATOR_API_KEY=your-key
+OPERATOR_MODEL=gpt-4
+```
+## Development
+### Run tests
+```bash
+pip install -e ".[dev]"
+pytest
+```
+### Run with coverage
+```bash
+pytest --cov=markback
+```
+## License
+MIT

markback-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+markback/__init__.py,sha256=B0-2dpUu5nkbnUI0hPz-x7PHiOl7M-tiRi6s3UCYJFk,1540
+markback/cli.py,sha256=5wMk1OUG7W_voS9DxeFxRJrBTMabEdOK_s_o3Irxuu0,13639
+markback/config.py,sha256=eTVhb7UwDER9FRYo8QUAvneLHSqXD2ZtLUgtBtnljUs,5455
+markback/linter.py,sha256=6jrfngF4PiYFQlDddm09OEmVSSGwacE5YFxMub5mqlA,8707
+markback/llm.py,sha256=ON5_2C6v4KIk7_aIceulfWjEEI6hmallaPlLv-1-s_o,4692
+markback/parser.py,sha256=5CrLeOWGuiE0_BOK9dJUnLLrJ72KTmucyOQEzR1nDh4,18570
+markback/types.py,sha256=rRy41h1ZYYP9lo_FhvP5X5-OlwEM7vzHCw--Sq5L2LA,8564
+markback/workflow.py,sha256=zC1RUm1i1wgiciFDqUilJKJ0-bgInvctxhQ0h5WSdoQ,10485
+markback/writer.py,sha256=v5KT2o2Ma2I9I4U-r06PgzKyqwQFSsx49Ri5qIsovhY,7645
+markback-0.1.0.dist-info/METADATA,sha256=aqRMZiWqsEExkqi_dwSyWtcFb3uhOx2Ol6HbNBU7Ggw,4864
+markback-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+markback-0.1.0.dist-info/entry_points.txt,sha256=Bc9aXvtlPxVPuOJ9BWGngAVrkx5dMvRgujjVzXC-V5U,46
+markback-0.1.0.dist-info/licenses/LICENSE,sha256=lLK1n13C_CXb0M10O-6itEIDY6dsXKutZYQH-09n6s0,1068
+markback-0.1.0.dist-info/RECORD,,

markback-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.28.0
+Root-Is-Purelib: true
+Tag: py3-none-any

markback-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ markback = markback.cli:app

markback-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 dandriscoll
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.