PyPI - docassert - Versions diffs - 0.4.0__tar.gz → 0.5.0__tar.gz - Mend

docassert 0.4.0tar.gz → 0.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

{docassert-0.4.0/docassert.egg-info → docassert-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docassert
-Version: 0.4.0
+Version: 0.5.0
 Summary: Unit testing for business documents — validate structured Markdown docs against a configurable audit standard.
 Author: C4G Enterprises Inc.
 License: Apache-2.0
@@ -85,8 +85,8 @@ flagged as TODOs, never invented). The skill's source is
 | Command | What it does |
 |---|---|
-| `docassert validate <globs>` | Validate documents against their kind's criteria. Exit code = number of blocking failures (capped at 125). |
-| `docassert consistency` | Cross-document checks: referential integrity, coverage, required links, profile completeness. |
+| `docassert validate <globs>` | Validate documents against their kind's criteria. Exit code = number of blocking failures (capped at 125). Reports: `--junit` / `--markdown` / `--json`. |
+| `docassert consistency` | Cross-document checks: referential integrity, coverage, required links, profile completeness. Reports: `--junit` / `--markdown` / `--json`. |
 | `docassert rtm [--project ID]` | Requirements traceability matrix (Markdown or CSV). |
 | `docassert status [--project ID] [--index]` | Derived project status (md / json / html). |
 | `docassert pages --out DIR` | Build the portfolio site (index + a page per project). |
@@ -117,6 +117,17 @@ kind is adding a trio — no code for the common cases.
 - **Semantic — AI-graded, advisory.** Scored via the Anthropic API and posted to
   the PR — never blocking. Set `ANTHROPIC_API_KEY` to enable; skipped otherwise.
+## Privacy
+Structural checks run **entirely locally** — no document content leaves your
+machine or CI runner. Semantic checks are the one exception: when
+`ANTHROPIC_API_KEY` is set, the graded excerpts (section text, linked item
+text) are sent to the **Anthropic API** for scoring. Without the key, semantic
+checks are skipped and nothing is sent anywhere. Alignment grading is capped at
+`alignment_limit` links per run (default 25). If your documents are
+confidential, run without the key or review [Anthropic's data-usage
+policies](https://www.anthropic.com/legal/commercial-terms) first.
 ## Development
 ```bash

{docassert-0.4.0 → docassert-0.5.0}/README.md RENAMED Viewed

@@ -47,8 +47,8 @@ flagged as TODOs, never invented). The skill's source is
 | Command | What it does |
 |---|---|
-| `docassert validate <globs>` | Validate documents against their kind's criteria. Exit code = number of blocking failures (capped at 125). |
-| `docassert consistency` | Cross-document checks: referential integrity, coverage, required links, profile completeness. |
+| `docassert validate <globs>` | Validate documents against their kind's criteria. Exit code = number of blocking failures (capped at 125). Reports: `--junit` / `--markdown` / `--json`. |
+| `docassert consistency` | Cross-document checks: referential integrity, coverage, required links, profile completeness. Reports: `--junit` / `--markdown` / `--json`. |
 | `docassert rtm [--project ID]` | Requirements traceability matrix (Markdown or CSV). |
 | `docassert status [--project ID] [--index]` | Derived project status (md / json / html). |
 | `docassert pages --out DIR` | Build the portfolio site (index + a page per project). |
@@ -79,6 +79,17 @@ kind is adding a trio — no code for the common cases.
 - **Semantic — AI-graded, advisory.** Scored via the Anthropic API and posted to
   the PR — never blocking. Set `ANTHROPIC_API_KEY` to enable; skipped otherwise.
+## Privacy
+Structural checks run **entirely locally** — no document content leaves your
+machine or CI runner. Semantic checks are the one exception: when
+`ANTHROPIC_API_KEY` is set, the graded excerpts (section text, linked item
+text) are sent to the **Anthropic API** for scoring. Without the key, semantic
+checks are skipped and nothing is sent anywhere. Alignment grading is capped at
+`alignment_limit` links per run (default 25). If your documents are
+confidential, run without the key or review [Anthropic's data-usage
+policies](https://www.anthropic.com/legal/commercial-terms) first.
 ## Development
 ```bash

{docassert-0.4.0 → docassert-0.5.0}/docassert/__init__.py RENAMED Viewed

@@ -5,4 +5,4 @@ standard: deterministic structural checks that gate a merge, plus optional
 AI-graded semantic checks that advise.
 """
-__version__ = "0.4.0"
+__version__ = "0.5.0"

{docassert-0.4.0 → docassert-0.5.0}/docassert/cli.py RENAMED Viewed

@@ -116,6 +116,8 @@ def cmd_validate(args: argparse.Namespace) -> int:
         Path(args.junit).write_text(report.junit(results_by_doc))
     if args.markdown:
         Path(args.markdown).write_text(report.markdown(results_by_doc))
+    if args.json:
+        Path(args.json).write_text(report.json_report(results_by_doc))
     return _capped(sum(1 for rs in results_by_doc.values()
                        for r in rs if r.is_blocking_failure))
@@ -133,6 +135,8 @@ def cmd_consistency(args: argparse.Namespace) -> int:
     if args.markdown:
         Path(args.markdown).write_text(
             report.markdown(results_by_doc, title="docassert consistency"))
+    if args.json:
+        Path(args.json).write_text(report.json_report(results_by_doc))
     return _capped(sum(1 for r in results if r.is_blocking_failure))
@@ -292,12 +296,14 @@ def main(argv: list[str] | None = None) -> int:
     v.add_argument("paths", nargs="+", help="Markdown files or globs.")
     v.add_argument("--junit", help="Write a JUnit XML report to this path.")
     v.add_argument("--markdown", help="Write a PR-comment markdown report to this path.")
+    v.add_argument("--json", help="Write a machine-readable JSON report to this path.")
     docs_dir_opt(v)
     v.set_defaults(func=cmd_validate)
     c = sub.add_parser("consistency", help="Check cross-document traceability.")
     c.add_argument("--junit", help="Write a JUnit XML report to this path.")
     c.add_argument("--markdown", help="Write a PR-comment markdown report to this path.")
+    c.add_argument("--json", help="Write a machine-readable JSON report to this path.")
     c.add_argument("--no-semantic", action="store_true",
                    help="Skip AI alignment (structural consistency only).")
     docs_dir_opt(c)

{docassert-0.4.0 → docassert-0.5.0}/docassert/report.py RENAMED Viewed

@@ -1,6 +1,7 @@
-"""Render check results as console text, PR-comment markdown, or JUnit XML."""
+"""Render check results as console text, PR-comment markdown, JUnit XML, or JSON."""
 from __future__ import annotations
+import json as _json
 import xml.etree.ElementTree as ET
 from xml.dom import minidom
@@ -37,6 +38,31 @@ def summary_line(results_by_doc: dict[str, list[CheckResult]]) -> str:
     return f"{_TICK} All structural checks passed across {docs} document(s) {_DASH} clear to merge."
+def json_report(results_by_doc: dict[str, list[CheckResult]]) -> str:
+    """Machine-readable results: one entry per document, plus a summary."""
+    documents = {
+        path: [{
+            "check_id": r.check_id,
+            "passed": r.passed,
+            "blocking": r.blocking,
+            "kind": r.kind,
+            "score": r.score,
+            "detail": r.detail,
+        } for r in results]
+        for path, results in results_by_doc.items()
+    }
+    all_results = [r for rs in results_by_doc.values() for r in rs]
+    summary = {
+        "documents": len(results_by_doc),
+        "checks": len(all_results),
+        "blocking_failures": sum(1 for r in all_results if r.is_blocking_failure),
+        "advisory_failures": sum(1 for r in all_results
+                                 if not r.passed and not r.blocking),
+        "passed": not any(r.is_blocking_failure for r in all_results),
+    }
+    return _json.dumps({"summary": summary, "documents": documents}, indent=2) + "\n"
 def markdown(results_by_doc: dict[str, list[CheckResult]],
              title: str = "docassert audit") -> str:
     """PR-comment body."""

{docassert-0.4.0 → docassert-0.5.0/docassert.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docassert
-Version: 0.4.0
+Version: 0.5.0
 Summary: Unit testing for business documents — validate structured Markdown docs against a configurable audit standard.
 Author: C4G Enterprises Inc.
 License: Apache-2.0
@@ -85,8 +85,8 @@ flagged as TODOs, never invented). The skill's source is
 | Command | What it does |
 |---|---|
-| `docassert validate <globs>` | Validate documents against their kind's criteria. Exit code = number of blocking failures (capped at 125). |
-| `docassert consistency` | Cross-document checks: referential integrity, coverage, required links, profile completeness. |
+| `docassert validate <globs>` | Validate documents against their kind's criteria. Exit code = number of blocking failures (capped at 125). Reports: `--junit` / `--markdown` / `--json`. |
+| `docassert consistency` | Cross-document checks: referential integrity, coverage, required links, profile completeness. Reports: `--junit` / `--markdown` / `--json`. |
 | `docassert rtm [--project ID]` | Requirements traceability matrix (Markdown or CSV). |
 | `docassert status [--project ID] [--index]` | Derived project status (md / json / html). |
 | `docassert pages --out DIR` | Build the portfolio site (index + a page per project). |
@@ -117,6 +117,17 @@ kind is adding a trio — no code for the common cases.
 - **Semantic — AI-graded, advisory.** Scored via the Anthropic API and posted to
   the PR — never blocking. Set `ANTHROPIC_API_KEY` to enable; skipped otherwise.
+## Privacy
+Structural checks run **entirely locally** — no document content leaves your
+machine or CI runner. Semantic checks are the one exception: when
+`ANTHROPIC_API_KEY` is set, the graded excerpts (section text, linked item
+text) are sent to the **Anthropic API** for scoring. Without the key, semantic
+checks are skipped and nothing is sent anywhere. Alignment grading is capped at
+`alignment_limit` links per run (default 25). If your documents are
+confidential, run without the key or review [Anthropic's data-usage
+policies](https://www.anthropic.com/legal/commercial-terms) first.
 ## Development
 ```bash

{docassert-0.4.0 → docassert-0.5.0}/docassert.egg-info/SOURCES.txt RENAMED Viewed

@@ -95,6 +95,7 @@ tests/test_consistency.py
 tests/test_defects.py
 tests/test_extract.py
 tests/test_graph.py
+tests/test_json_report.py
 tests/test_kinds_delivery.py
 tests/test_kinds_governance.py
 tests/test_kinds_operate.py

{docassert-0.4.0 → docassert-0.5.0}/tests/test_extract.py RENAMED Viewed

@@ -30,6 +30,38 @@ def test_unsupported_type_raises(tmp_path):
         E.extract(f)
+def _minimal_pdf(text: str) -> bytes:
+    """Assemble a one-page PDF with `text` in a content stream, xref included."""
+    stream = f"BT /F1 24 Tf 72 720 Td ({text}) Tj ET".encode()
+    objects = [
+        b"<< /Type /Catalog /Pages 2 0 R >>",
+        b"<< /Type /Pages /Kids [3 0 R] /Count 1 >>",
+        (b"<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] "
+         b"/Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >>"),
+        b"<< /Length " + str(len(stream)).encode() + b" >>\nstream\n" + stream + b"\nendstream",
+        b"<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>",
+    ]
+    out = b"%PDF-1.4\n"
+    offsets = []
+    for i, body in enumerate(objects, start=1):
+        offsets.append(len(out))
+        out += f"{i} 0 obj\n".encode() + body + b"\nendobj\n"
+    xref_at = len(out)
+    out += f"xref\n0 {len(objects) + 1}\n0000000000 65535 f \n".encode()
+    for off in offsets:
+        out += f"{off:010d} 00000 n \n".encode()
+    out += (f"trailer\n<< /Size {len(objects) + 1} /Root 1 0 R >>\n"
+            f"startxref\n{xref_at}\n%%EOF\n").encode()
+    return out
+def test_extract_pdf(tmp_path):
+    pytest.importorskip("pypdf")  # needs the 'convert' extra
+    path = tmp_path / "s.pdf"
+    path.write_bytes(_minimal_pdf("Hello docassert PDF"))
+    assert "Hello docassert PDF" in E.extract(path)
 def test_extract_docx_paragraphs_and_tables(tmp_path):
     docx = pytest.importorskip("docx")  # needs the 'convert' extra
     d = docx.Document()

docassert-0.5.0/tests/test_json_report.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""Tests for the machine-readable JSON report (`--json`)."""
+import json
+from pathlib import Path
+from docassert import report
+from docassert.cli import main
+from docassert.models import CheckResult
+ROOT = Path(__file__).resolve().parent.parent
+def test_json_report_shape():
+    results = {
+        "a.md": [CheckResult("c1", True, True, "ok"),
+                 CheckResult("c2", False, True, "bad")],
+        "b.md": [CheckResult("c3", False, False, "meh", kind="semantic", score=0.4)],
+    }
+    data = json.loads(report.json_report(results))
+    assert data["summary"] == {"documents": 2, "checks": 3, "blocking_failures": 1,
+                               "advisory_failures": 1, "passed": False}
+    assert data["documents"]["a.md"][1]["check_id"] == "c2"
+    assert data["documents"]["b.md"][0]["score"] == 0.4
+def test_cli_validate_writes_json(tmp_path, monkeypatch):
+    monkeypatch.chdir(ROOT)  # criteria/schema resolve; sample documents exist
+    out = tmp_path / "r.json"
+    code = main(["validate", "documents/PRJ-001-AUR/charter.md", "--json", str(out)])
+    assert code == 0
+    data = json.loads(out.read_text())
+    assert data["summary"]["passed"] is True
+    assert "documents/PRJ-001-AUR/charter.md" in data["documents"]
+def test_cli_consistency_writes_json(tmp_path, monkeypatch):
+    monkeypatch.chdir(ROOT)
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+    out = tmp_path / "c.json"
+    code = main(["consistency", "--no-semantic", "--json", str(out)])
+    assert code == 0
+    data = json.loads(out.read_text())
+    checks = {c["check_id"] for c in data["documents"]["consistency (cross-document)"]}
+    assert {"item-id-uniqueness", "referential-integrity", "coverage"} <= checks