docassert 0.1.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. {docassert-0.1.0/docassert.egg-info → docassert-0.2.1}/PKG-INFO +15 -4
  2. {docassert-0.1.0 → docassert-0.2.1}/README.md +14 -3
  3. {docassert-0.1.0 → docassert-0.2.1}/docassert/__init__.py +1 -1
  4. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/consistency.yaml +4 -0
  5. {docassert-0.1.0 → docassert-0.2.1}/docassert/cli.py +65 -21
  6. {docassert-0.1.0 → docassert-0.2.1}/docassert/consistency.py +22 -2
  7. docassert-0.2.1/docassert/extract.py +55 -0
  8. {docassert-0.1.0 → docassert-0.2.1/docassert.egg-info}/PKG-INFO +15 -4
  9. {docassert-0.1.0 → docassert-0.2.1}/docassert.egg-info/SOURCES.txt +3 -0
  10. docassert-0.2.1/tests/test_defects.py +85 -0
  11. docassert-0.2.1/tests/test_extract.py +65 -0
  12. {docassert-0.1.0 → docassert-0.2.1}/LICENSE +0 -0
  13. {docassert-0.1.0 → docassert-0.2.1}/NOTICE +0 -0
  14. {docassert-0.1.0 → docassert-0.2.1}/docassert/__main__.py +0 -0
  15. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/adr.criteria.yaml +0 -0
  16. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/benefits-realization.criteria.yaml +0 -0
  17. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/brd.criteria.yaml +0 -0
  18. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/business-case.criteria.yaml +0 -0
  19. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/charter.criteria.yaml +0 -0
  20. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/data-migration-plan.criteria.yaml +0 -0
  21. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/frnfr.criteria.yaml +0 -0
  22. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/hypercare-plan.criteria.yaml +0 -0
  23. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/post-implementation-review.criteria.yaml +0 -0
  24. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/prd.criteria.yaml +0 -0
  25. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/project.criteria.yaml +0 -0
  26. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/qa-test-plan.criteria.yaml +0 -0
  27. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/raci-stakeholder.criteria.yaml +0 -0
  28. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/release-cutover-plan.criteria.yaml +0 -0
  29. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/risk-register.criteria.yaml +0 -0
  30. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/rollback-plan.criteria.yaml +0 -0
  31. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/runbook.criteria.yaml +0 -0
  32. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/status-report.criteria.yaml +0 -0
  33. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/test-cases.criteria.yaml +0 -0
  34. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/criteria/user-story.criteria.yaml +0 -0
  35. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/profiles/agile-delivery.yaml +0 -0
  36. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/profiles/lean-startup.yaml +0 -0
  37. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/profiles/regulated-industry.yaml +0 -0
  38. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/adr.schema.json +0 -0
  39. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/benefits-realization.schema.json +0 -0
  40. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/brd.schema.json +0 -0
  41. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/business-case.schema.json +0 -0
  42. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/charter.schema.json +0 -0
  43. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/data-migration-plan.schema.json +0 -0
  44. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/frnfr.schema.json +0 -0
  45. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/hypercare-plan.schema.json +0 -0
  46. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/post-implementation-review.schema.json +0 -0
  47. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/prd.schema.json +0 -0
  48. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/project.schema.json +0 -0
  49. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/qa-test-plan.schema.json +0 -0
  50. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/raci-stakeholder.schema.json +0 -0
  51. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/release-cutover-plan.schema.json +0 -0
  52. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/risk-register.schema.json +0 -0
  53. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/rollback-plan.schema.json +0 -0
  54. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/runbook.schema.json +0 -0
  55. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/status-report.schema.json +0 -0
  56. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/test-cases.schema.json +0 -0
  57. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/schema/user-story.schema.json +0 -0
  58. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/adr.template.md +0 -0
  59. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/benefits-realization.template.md +0 -0
  60. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/brd.template.md +0 -0
  61. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/business-case.template.md +0 -0
  62. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/charter.template.md +0 -0
  63. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/data-migration-plan.template.md +0 -0
  64. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/frnfr.template.md +0 -0
  65. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/hypercare-plan.template.md +0 -0
  66. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/post-implementation-review.template.md +0 -0
  67. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/prd.template.md +0 -0
  68. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/project.template.md +0 -0
  69. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/qa-test-plan.template.md +0 -0
  70. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/raci-stakeholder.template.md +0 -0
  71. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/release-cutover-plan.template.md +0 -0
  72. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/risk-register.template.md +0 -0
  73. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/rollback-plan.template.md +0 -0
  74. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/runbook.template.md +0 -0
  75. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/status-report.template.md +0 -0
  76. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/test-cases.template.md +0 -0
  77. {docassert-0.1.0 → docassert-0.2.1}/docassert/_data/templates/user-story.template.md +0 -0
  78. {docassert-0.1.0 → docassert-0.2.1}/docassert/config.py +0 -0
  79. {docassert-0.1.0 → docassert-0.2.1}/docassert/graph.py +0 -0
  80. {docassert-0.1.0 → docassert-0.2.1}/docassert/loader.py +0 -0
  81. {docassert-0.1.0 → docassert-0.2.1}/docassert/models.py +0 -0
  82. {docassert-0.1.0 → docassert-0.2.1}/docassert/profiles.py +0 -0
  83. {docassert-0.1.0 → docassert-0.2.1}/docassert/projects.py +0 -0
  84. {docassert-0.1.0 → docassert-0.2.1}/docassert/report.py +0 -0
  85. {docassert-0.1.0 → docassert-0.2.1}/docassert/rtm.py +0 -0
  86. {docassert-0.1.0 → docassert-0.2.1}/docassert/semantic.py +0 -0
  87. {docassert-0.1.0 → docassert-0.2.1}/docassert/status.py +0 -0
  88. {docassert-0.1.0 → docassert-0.2.1}/docassert/structural.py +0 -0
  89. {docassert-0.1.0 → docassert-0.2.1}/docassert.egg-info/dependency_links.txt +0 -0
  90. {docassert-0.1.0 → docassert-0.2.1}/docassert.egg-info/entry_points.txt +0 -0
  91. {docassert-0.1.0 → docassert-0.2.1}/docassert.egg-info/requires.txt +0 -0
  92. {docassert-0.1.0 → docassert-0.2.1}/docassert.egg-info/top_level.txt +0 -0
  93. {docassert-0.1.0 → docassert-0.2.1}/pyproject.toml +0 -0
  94. {docassert-0.1.0 → docassert-0.2.1}/setup.cfg +0 -0
  95. {docassert-0.1.0 → docassert-0.2.1}/tests/test_config.py +0 -0
  96. {docassert-0.1.0 → docassert-0.2.1}/tests/test_consistency.py +0 -0
  97. {docassert-0.1.0 → docassert-0.2.1}/tests/test_graph.py +0 -0
  98. {docassert-0.1.0 → docassert-0.2.1}/tests/test_kinds_delivery.py +0 -0
  99. {docassert-0.1.0 → docassert-0.2.1}/tests/test_kinds_governance.py +0 -0
  100. {docassert-0.1.0 → docassert-0.2.1}/tests/test_kinds_operate.py +0 -0
  101. {docassert-0.1.0 → docassert-0.2.1}/tests/test_kinds_reporting.py +0 -0
  102. {docassert-0.1.0 → docassert-0.2.1}/tests/test_profiles.py +0 -0
  103. {docassert-0.1.0 → docassert-0.2.1}/tests/test_projects.py +0 -0
  104. {docassert-0.1.0 → docassert-0.2.1}/tests/test_status.py +0 -0
  105. {docassert-0.1.0 → docassert-0.2.1}/tests/test_structural.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docassert
3
- Version: 0.1.0
3
+ Version: 0.2.1
4
4
  Summary: Unit testing for business documents — validate structured Markdown docs against a configurable audit standard.
5
5
  Author: C4G Enterprises Inc.
6
6
  License: Apache-2.0
@@ -38,6 +38,10 @@ Dynamic: license-file
38
38
 
39
39
  # docassert
40
40
 
41
+ [![PyPI](https://img.shields.io/pypi/v/docassert)](https://pypi.org/project/docassert/)
42
+ [![Python](https://img.shields.io/pypi/pyversions/docassert)](https://pypi.org/project/docassert/)
43
+ [![License](https://img.shields.io/badge/license-Apache--2.0-blue)](LICENSE)
44
+
41
45
  **Unit testing for business documents.** Validate structured Markdown documents
42
46
  (charters, BRDs, PRDs, risk registers, …) against a configurable audit standard:
43
47
  deterministic structural checks that gate a merge, plus optional AI-graded
@@ -50,9 +54,11 @@ a vendor-neutral standard for running a PMO from version-controlled, declarative
50
54
  ## Install
51
55
 
52
56
  ```bash
53
- pip install "docassert @ git+https://github.com/c4g-john/docassert" # PyPI release coming
57
+ pipx install docassert # recommended installs the CLI in its own isolated env
58
+ # or:
59
+ pip install docassert
54
60
  # with the AI advisory extra:
55
- pip install "docassert[ai] @ git+https://github.com/c4g-john/docassert"
61
+ pip install "docassert[ai]"
56
62
  ```
57
63
 
58
64
  ## Quickstart
@@ -74,13 +80,18 @@ you can customize them.
74
80
 
75
81
  | Command | What it does |
76
82
  |---|---|
77
- | `docassert validate <globs>` | Validate documents against their kind's criteria. Exit code = number of blocking failures. |
83
+ | `docassert validate <globs>` | Validate documents against their kind's criteria. Exit code = number of blocking failures (capped at 125). |
78
84
  | `docassert consistency` | Cross-document checks: referential integrity, coverage, required links, profile completeness. |
79
85
  | `docassert rtm [--project ID]` | Requirements traceability matrix (Markdown or CSV). |
80
86
  | `docassert status [--project ID] [--index]` | Derived project status (md / json / html). |
81
87
  | `docassert pages --out DIR` | Build the portfolio site (index + a page per project). |
82
88
  | `docassert projects [--out] [--check]` | Generate / verify the project registry. |
83
89
  | `docassert init [DIR]` | Scaffold the default config into a repo. |
90
+ | `docassert extract <file>` | Extract plain text from a source `.docx` / `.pdf` / `.md` / `.txt` (the first step of doc-to-pmo conversion). Needs the `convert` extra: `pip install "docassert[convert]"`. |
91
+
92
+ Every document-reading command accepts `--documents-dir` (default `documents/`).
93
+ AI alignment grades at most `alignment_limit` links per run (default 25; set it
94
+ in `consistency.yaml`, `0` = no cap) so API cost stays bounded on large graphs.
84
95
 
85
96
  ## Document kinds
86
97
 
@@ -1,5 +1,9 @@
1
1
  # docassert
2
2
 
3
+ [![PyPI](https://img.shields.io/pypi/v/docassert)](https://pypi.org/project/docassert/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/docassert)](https://pypi.org/project/docassert/)
5
+ [![License](https://img.shields.io/badge/license-Apache--2.0-blue)](LICENSE)
6
+
3
7
  **Unit testing for business documents.** Validate structured Markdown documents
4
8
  (charters, BRDs, PRDs, risk registers, …) against a configurable audit standard:
5
9
  deterministic structural checks that gate a merge, plus optional AI-graded
@@ -12,9 +16,11 @@ a vendor-neutral standard for running a PMO from version-controlled, declarative
12
16
  ## Install
13
17
 
14
18
  ```bash
15
- pip install "docassert @ git+https://github.com/c4g-john/docassert" # PyPI release coming
19
+ pipx install docassert # recommended installs the CLI in its own isolated env
20
+ # or:
21
+ pip install docassert
16
22
  # with the AI advisory extra:
17
- pip install "docassert[ai] @ git+https://github.com/c4g-john/docassert"
23
+ pip install "docassert[ai]"
18
24
  ```
19
25
 
20
26
  ## Quickstart
@@ -36,13 +42,18 @@ you can customize them.
36
42
 
37
43
  | Command | What it does |
38
44
  |---|---|
39
- | `docassert validate <globs>` | Validate documents against their kind's criteria. Exit code = number of blocking failures. |
45
+ | `docassert validate <globs>` | Validate documents against their kind's criteria. Exit code = number of blocking failures (capped at 125). |
40
46
  | `docassert consistency` | Cross-document checks: referential integrity, coverage, required links, profile completeness. |
41
47
  | `docassert rtm [--project ID]` | Requirements traceability matrix (Markdown or CSV). |
42
48
  | `docassert status [--project ID] [--index]` | Derived project status (md / json / html). |
43
49
  | `docassert pages --out DIR` | Build the portfolio site (index + a page per project). |
44
50
  | `docassert projects [--out] [--check]` | Generate / verify the project registry. |
45
51
  | `docassert init [DIR]` | Scaffold the default config into a repo. |
52
+ | `docassert extract <file>` | Extract plain text from a source `.docx` / `.pdf` / `.md` / `.txt` (the first step of doc-to-pmo conversion). Needs the `convert` extra: `pip install "docassert[convert]"`. |
53
+
54
+ Every document-reading command accepts `--documents-dir` (default `documents/`).
55
+ AI alignment grades at most `alignment_limit` links per run (default 25; set it
56
+ in `consistency.yaml`, `0` = no cap) so API cost stays bounded on large graphs.
46
57
 
47
58
  ## Document kinds
48
59
 
@@ -5,4 +5,4 @@ standard: deterministic structural checks that gate a merge, plus optional
5
5
  AI-graded semantic checks that advise.
6
6
  """
7
7
 
8
- __version__ = "0.1.0"
8
+ __version__ = "0.2.1"
@@ -35,6 +35,10 @@ coverage:
35
35
 
36
36
  # Advisory AI alignment: for each relation, judge whether the child genuinely
37
37
  # fulfils the parent it links to. Never blocks.
38
+ # Each graded link costs one API call; `alignment_limit` caps calls per run
39
+ # (0 = no cap).
40
+ alignment_limit: 25
41
+
38
42
  alignment:
39
43
  - relation: traces
40
44
  prompt: >
@@ -3,8 +3,10 @@
3
3
  docassert validate documents/charters/aurora.md
4
4
  docassert validate documents/**/*.md --junit out.xml --markdown comment.md
5
5
 
6
- Exit code = number of BLOCKING (structural) failures. Advisory (AI) failures
7
- never affect the exit code, so CI is gated only by deterministic checks.
6
+ Exit code = number of BLOCKING (structural) failures, capped at 125 so large
7
+ counts can't wrap around the 8-bit exit-status space (256 failures must never
8
+ read as success). Advisory (AI) failures never affect the exit code, so CI is
9
+ gated only by deterministic checks.
8
10
  """
9
11
  from __future__ import annotations
10
12
 
@@ -23,15 +25,24 @@ from .models import CheckResult
23
25
  from .semantic import run_semantic
24
26
  from .structural import run_structural
25
27
 
26
- # The user's documents live here; criteria / schema / consistency.yaml / profiles
27
- # resolve via `config` (local override packaged default).
28
- DOCUMENTS_DIR = Path("documents")
28
+ # Default documents location; every document-reading command accepts
29
+ # --documents-dir to override it. Criteria / schema / consistency.yaml /
30
+ # profiles resolve via `config` (local override → packaged default).
31
+ DEFAULT_DOCUMENTS_DIR = "documents"
29
32
 
33
+ # POSIX exit statuses are 8-bit; 126+ carry shell meanings. Cap so a failure
34
+ # count can never wrap to 0.
35
+ _EXIT_CAP = 125
30
36
 
31
- def _build_id_index() -> dict[str, list[str]]:
32
- """Map document id -> [paths] across all documents/, for uniqueness checks."""
37
+
38
+ def _capped(failures: int) -> int:
39
+ return min(failures, _EXIT_CAP)
40
+
41
+
42
+ def _build_id_index(documents_dir: Path) -> dict[str, list[str]]:
43
+ """Map document id -> [paths] across the documents tree, for uniqueness checks."""
33
44
  index: dict[str, list[str]] = defaultdict(list)
34
- for path in DOCUMENTS_DIR.rglob("*.md"):
45
+ for path in documents_dir.rglob("*.md"):
35
46
  try:
36
47
  doc = load(path)
37
48
  except ValueError:
@@ -86,7 +97,7 @@ def cmd_validate(args: argparse.Namespace) -> int:
86
97
  print("docassert: no markdown documents matched.", file=sys.stderr)
87
98
  return 0
88
99
 
89
- id_index = _build_id_index()
100
+ id_index = _build_id_index(Path(args.documents_dir))
90
101
  results_by_doc: dict[str, list[CheckResult]] = {}
91
102
  for path in files:
92
103
  try:
@@ -106,12 +117,12 @@ def cmd_validate(args: argparse.Namespace) -> int:
106
117
  if args.markdown:
107
118
  Path(args.markdown).write_text(report.markdown(results_by_doc))
108
119
 
109
- return sum(1 for rs in results_by_doc.values()
110
- for r in rs if r.is_blocking_failure)
120
+ return _capped(sum(1 for rs in results_by_doc.values()
121
+ for r in rs if r.is_blocking_failure))
111
122
 
112
123
 
113
124
  def cmd_consistency(args: argparse.Namespace) -> int:
114
- results = run_consistency(DOCUMENTS_DIR, with_semantic=not args.no_semantic)
125
+ results = run_consistency(args.documents_dir, with_semantic=not args.no_semantic)
115
126
  results_by_doc = {"consistency (cross-document)": results}
116
127
 
117
128
  print(report.console(results_by_doc))
@@ -123,7 +134,7 @@ def cmd_consistency(args: argparse.Namespace) -> int:
123
134
  Path(args.markdown).write_text(
124
135
  report.markdown(results_by_doc, title="docassert consistency"))
125
136
 
126
- return sum(1 for r in results if r.is_blocking_failure)
137
+ return _capped(sum(1 for r in results if r.is_blocking_failure))
127
138
 
128
139
 
129
140
  def _project_code(value: str | None) -> str | None:
@@ -132,7 +143,7 @@ def _project_code(value: str | None) -> str | None:
132
143
 
133
144
 
134
145
  def cmd_rtm(args: argparse.Namespace) -> int:
135
- graph = build_graph(DOCUMENTS_DIR)
146
+ graph = build_graph(args.documents_dir)
136
147
  code = _project_code(args.project)
137
148
  text = rtm.render_csv(graph, code) if args.csv else rtm.render_markdown(graph, code)
138
149
  if args.out:
@@ -145,7 +156,7 @@ def cmd_rtm(args: argparse.Namespace) -> int:
145
156
 
146
157
  def cmd_projects(args: argparse.Namespace) -> int:
147
158
  from . import projects as proj
148
- plist = proj.load_projects(DOCUMENTS_DIR)
159
+ plist = proj.load_projects(args.documents_dir)
149
160
  issues = proj.registry_issues(plist)
150
161
  for issue in issues:
151
162
  print(f"docassert: {issue}", file=sys.stderr)
@@ -172,7 +183,7 @@ def cmd_projects(args: argparse.Namespace) -> int:
172
183
  def cmd_status(args: argparse.Namespace) -> int:
173
184
  from . import status as status_mod
174
185
  if args.index:
175
- index = status_mod.build_index(DOCUMENTS_DIR)
186
+ index = status_mod.build_index(args.documents_dir)
176
187
  if args.format == "json":
177
188
  text = status_mod.render_json(index)
178
189
  elif args.format == "html":
@@ -181,7 +192,7 @@ def cmd_status(args: argparse.Namespace) -> int:
181
192
  text = status_mod.render_index_markdown(index)
182
193
  tag = index["overall"]["rag"]
183
194
  else:
184
- model = status_mod.build_status(DOCUMENTS_DIR, project=args.project)
195
+ model = status_mod.build_status(args.documents_dir, project=args.project)
185
196
  if args.project and not model["documents"]:
186
197
  print(f"docassert: no documents for project {args.project!r}", file=sys.stderr)
187
198
  return 2
@@ -206,16 +217,17 @@ def cmd_pages(args: argparse.Namespace) -> int:
206
217
  from . import status as status_mod
207
218
  out = Path(args.out)
208
219
  out.mkdir(parents=True, exist_ok=True)
220
+ docs_dir = args.documents_dir
209
221
 
210
- index = status_mod.build_index(DOCUMENTS_DIR)
222
+ index = status_mod.build_index(docs_dir)
211
223
  (out / "index.html").write_text(status_mod.render_index_html(index))
212
224
 
213
- plist = projects_mod.load_projects(DOCUMENTS_DIR)
225
+ plist = projects_mod.load_projects(docs_dir)
214
226
  for p in plist:
215
- model = status_mod.build_status(DOCUMENTS_DIR, project=p["id"])
227
+ model = status_mod.build_status(docs_dir, project=p["id"])
216
228
  (out / f"{p['id']}.html").write_text(status_mod.render_html(model))
217
229
 
218
- (out / "RTM.md").write_text(rtm.render_markdown(build_graph(DOCUMENTS_DIR)))
230
+ (out / "RTM.md").write_text(rtm.render_markdown(build_graph(docs_dir)))
219
231
  print(f"docassert: wrote {out}/ — index + {len(plist)} project page(s) + RTM.md "
220
232
  f"(portfolio: {index['overall']['rag']})")
221
233
  return 0
@@ -232,6 +244,23 @@ def cmd_init(args: argparse.Namespace) -> int:
232
244
  return 0
233
245
 
234
246
 
247
+ def cmd_extract(args: argparse.Namespace) -> int:
248
+ """Extract plain text from a source document (.docx/.pdf/.md/.txt) — the
249
+ deterministic first step of doc-to-pmo conversion."""
250
+ from . import extract as extract_mod
251
+ try:
252
+ text = extract_mod.extract(args.file)
253
+ except (FileNotFoundError, ValueError, ImportError) as exc:
254
+ print(f"docassert: {exc}", file=sys.stderr)
255
+ return 2
256
+ if args.out:
257
+ Path(args.out).write_text(text, encoding="utf-8")
258
+ print(f"docassert: wrote {args.out} ({len(text)} chars)")
259
+ else:
260
+ sys.stdout.write(text)
261
+ return 0
262
+
263
+
235
264
  def main(argv: list[str] | None = None) -> int:
236
265
  from . import __version__
237
266
  parser = argparse.ArgumentParser(prog="docassert",
@@ -239,10 +268,15 @@ def main(argv: list[str] | None = None) -> int:
239
268
  parser.add_argument("--version", action="version", version=f"docassert {__version__}")
240
269
  sub = parser.add_subparsers(dest="command", required=True)
241
270
 
271
+ def docs_dir_opt(sp: argparse.ArgumentParser) -> None:
272
+ sp.add_argument("--documents-dir", default=DEFAULT_DOCUMENTS_DIR,
273
+ help=f"Documents tree to read (default: {DEFAULT_DOCUMENTS_DIR}/).")
274
+
242
275
  v = sub.add_parser("validate", help="Validate documents against their criteria.")
243
276
  v.add_argument("paths", nargs="+", help="Markdown files or globs.")
244
277
  v.add_argument("--junit", help="Write a JUnit XML report to this path.")
245
278
  v.add_argument("--markdown", help="Write a PR-comment markdown report to this path.")
279
+ docs_dir_opt(v)
246
280
  v.set_defaults(func=cmd_validate)
247
281
 
248
282
  c = sub.add_parser("consistency", help="Check cross-document traceability.")
@@ -250,12 +284,14 @@ def main(argv: list[str] | None = None) -> int:
250
284
  c.add_argument("--markdown", help="Write a PR-comment markdown report to this path.")
251
285
  c.add_argument("--no-semantic", action="store_true",
252
286
  help="Skip AI alignment (structural consistency only).")
287
+ docs_dir_opt(c)
253
288
  c.set_defaults(func=cmd_consistency)
254
289
 
255
290
  r = sub.add_parser("rtm", help="Generate the requirements traceability matrix.")
256
291
  r.add_argument("--out", help="Write to this path instead of stdout.")
257
292
  r.add_argument("--csv", action="store_true", help="Emit CSV instead of Markdown.")
258
293
  r.add_argument("--project", help="Scope to one project (PRJ-NNN-CODE id or CODE).")
294
+ docs_dir_opt(r)
259
295
  r.set_defaults(func=cmd_rtm)
260
296
 
261
297
  s = sub.add_parser("status", help="Derive a project status page from the documents.")
@@ -267,22 +303,30 @@ def main(argv: list[str] | None = None) -> int:
267
303
  s.add_argument("--index", action="store_true",
268
304
  help="Render the multi-project portfolio index instead of one status.")
269
305
  s.add_argument("--out", help="Write to this path instead of stdout.")
306
+ docs_dir_opt(s)
270
307
  s.set_defaults(func=cmd_status)
271
308
 
272
309
  pg = sub.add_parser("pages", help="Build the full Pages site (portfolio index + a page per project).")
273
310
  pg.add_argument("--out", default="_site", help="Output directory (default: _site).")
311
+ docs_dir_opt(pg)
274
312
  pg.set_defaults(func=cmd_pages)
275
313
 
276
314
  p = sub.add_parser("projects", help="Generate the project registry from the project.md anchors.")
277
315
  p.add_argument("--out", help="Write to this path instead of stdout (e.g. projects.yaml).")
278
316
  p.add_argument("--check", action="store_true",
279
317
  help="Exit non-zero if the registry file is stale (CI freshness gate).")
318
+ docs_dir_opt(p)
280
319
  p.set_defaults(func=cmd_projects)
281
320
 
282
321
  ini = sub.add_parser("init", help="Scaffold the default criteria/schema/profiles/templates into a repo.")
283
322
  ini.add_argument("dir", nargs="?", default=".", help="Target directory (default: current).")
284
323
  ini.set_defaults(func=cmd_init)
285
324
 
325
+ ex = sub.add_parser("extract", help="Extract plain text from a source doc (.docx/.pdf/.md/.txt) for conversion.")
326
+ ex.add_argument("file", help="Source document (.docx / .pdf / .md / .txt).")
327
+ ex.add_argument("--out", help="Write to this path instead of stdout.")
328
+ ex.set_defaults(func=cmd_extract)
329
+
286
330
  args = parser.parse_args(argv)
287
331
  return args.func(args)
288
332
 
@@ -130,6 +130,12 @@ def check_profile_completeness(documents_dir: str | Path = "documents") -> Check
130
130
 
131
131
 
132
132
  # ── semantic (advisory) ────────────────────────────────────────────────────
133
+ # Each alignment edge costs one API call, so a large graph could otherwise run
134
+ # away on cost. Cap per run; tune with `alignment_limit` in consistency.yaml
135
+ # (0 disables the cap).
136
+ DEFAULT_ALIGNMENT_LIMIT = 25
137
+
138
+
133
139
  def run_alignment_checks(graph, config) -> list[CheckResult]:
134
140
  edges = [] # (prompt, parent, child, relation)
135
141
  for rule in config.get("alignment", []):
@@ -142,12 +148,26 @@ def run_alignment_checks(graph, config) -> list[CheckResult]:
142
148
 
143
149
  if not edges:
144
150
  return []
151
+
152
+ limit = int(config.get("alignment_limit", DEFAULT_ALIGNMENT_LIMIT) or 0)
153
+ note: CheckResult | None = None
154
+ if limit and len(edges) > limit:
155
+ note = CheckResult(
156
+ "alignment-limit", True, False,
157
+ f"graded {limit} of {len(edges)} link(s) — raise `alignment_limit` "
158
+ f"in consistency.yaml to grade more per run",
159
+ kind="semantic", score=None)
160
+ edges = edges[:limit]
161
+
145
162
  if not os.environ.get("ANTHROPIC_API_KEY"):
146
163
  return [CheckResult("alignment", True, False,
147
164
  f"skipped — no ANTHROPIC_API_KEY ({len(edges)} link(s) to grade)",
148
165
  kind="semantic", score=None)]
149
- return [run_alignment(f"align:{c.id}-{rel}-{p.id}", prompt, p.text, c.text)
150
- for prompt, p, c, rel in edges]
166
+ results = [run_alignment(f"align:{c.id}-{rel}-{p.id}", prompt, p.text, c.text)
167
+ for prompt, p, c, rel in edges]
168
+ if note is not None:
169
+ results.append(note)
170
+ return results
151
171
 
152
172
 
153
173
  def run_consistency(documents_dir: str | Path = "documents",
@@ -0,0 +1,55 @@
1
+ """Extract plain text from a source document, for doc-to-pmo conversion.
2
+
3
+ The deterministic first step of the conversion front-door: turn an arbitrary
4
+ source file (.docx / .pdf / .md / .txt) into plain text that the doc-to-pmo
5
+ skill then maps into a standard template. It does not interpret or reshape the
6
+ content — that is the skill's job.
7
+
8
+ .docx / .pdf support needs the optional `convert` extra:
9
+ pip install "docassert[convert]"
10
+ """
11
+ from __future__ import annotations
12
+
13
+ from pathlib import Path
14
+
15
+ _NEED_CONVERT = 'extract needs the "convert" extra: pip install "docassert[convert]"'
16
+
17
+
18
+ def extract(path: str | Path) -> str:
19
+ """Return the plain text of a source document.
20
+
21
+ Raises FileNotFoundError (missing file), ValueError (unsupported type), or
22
+ ImportError (a .docx/.pdf without the `convert` extra installed).
23
+ """
24
+ p = Path(path)
25
+ if not p.is_file():
26
+ raise FileNotFoundError(f"no such file: {p}")
27
+ ext = p.suffix.lower()
28
+
29
+ if ext in {".md", ".txt"}:
30
+ return p.read_text(encoding="utf-8")
31
+
32
+ if ext == ".docx":
33
+ try:
34
+ import docx # python-docx
35
+ except ImportError as exc:
36
+ raise ImportError(_NEED_CONVERT) from exc
37
+ document = docx.Document(str(p))
38
+ blocks: list[str] = [para.text for para in document.paragraphs]
39
+ # include table cell text, which charters often use for milestones/risks
40
+ for table in document.tables:
41
+ for row in table.rows:
42
+ cells = [cell.text.strip() for cell in row.cells]
43
+ if any(cells):
44
+ blocks.append(" | ".join(cells))
45
+ return "\n".join(blocks)
46
+
47
+ if ext == ".pdf":
48
+ try:
49
+ from pypdf import PdfReader
50
+ except ImportError as exc:
51
+ raise ImportError(_NEED_CONVERT) from exc
52
+ reader = PdfReader(str(p))
53
+ return "\n".join((page.extract_text() or "") for page in reader.pages)
54
+
55
+ raise ValueError(f"unsupported source type '{ext}' (supported: .docx, .pdf, .md, .txt)")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docassert
3
- Version: 0.1.0
3
+ Version: 0.2.1
4
4
  Summary: Unit testing for business documents — validate structured Markdown docs against a configurable audit standard.
5
5
  Author: C4G Enterprises Inc.
6
6
  License: Apache-2.0
@@ -38,6 +38,10 @@ Dynamic: license-file
38
38
 
39
39
  # docassert
40
40
 
41
+ [![PyPI](https://img.shields.io/pypi/v/docassert)](https://pypi.org/project/docassert/)
42
+ [![Python](https://img.shields.io/pypi/pyversions/docassert)](https://pypi.org/project/docassert/)
43
+ [![License](https://img.shields.io/badge/license-Apache--2.0-blue)](LICENSE)
44
+
41
45
  **Unit testing for business documents.** Validate structured Markdown documents
42
46
  (charters, BRDs, PRDs, risk registers, …) against a configurable audit standard:
43
47
  deterministic structural checks that gate a merge, plus optional AI-graded
@@ -50,9 +54,11 @@ a vendor-neutral standard for running a PMO from version-controlled, declarative
50
54
  ## Install
51
55
 
52
56
  ```bash
53
- pip install "docassert @ git+https://github.com/c4g-john/docassert" # PyPI release coming
57
+ pipx install docassert # recommended installs the CLI in its own isolated env
58
+ # or:
59
+ pip install docassert
54
60
  # with the AI advisory extra:
55
- pip install "docassert[ai] @ git+https://github.com/c4g-john/docassert"
61
+ pip install "docassert[ai]"
56
62
  ```
57
63
 
58
64
  ## Quickstart
@@ -74,13 +80,18 @@ you can customize them.
74
80
 
75
81
  | Command | What it does |
76
82
  |---|---|
77
- | `docassert validate <globs>` | Validate documents against their kind's criteria. Exit code = number of blocking failures. |
83
+ | `docassert validate <globs>` | Validate documents against their kind's criteria. Exit code = number of blocking failures (capped at 125). |
78
84
  | `docassert consistency` | Cross-document checks: referential integrity, coverage, required links, profile completeness. |
79
85
  | `docassert rtm [--project ID]` | Requirements traceability matrix (Markdown or CSV). |
80
86
  | `docassert status [--project ID] [--index]` | Derived project status (md / json / html). |
81
87
  | `docassert pages --out DIR` | Build the portfolio site (index + a page per project). |
82
88
  | `docassert projects [--out] [--check]` | Generate / verify the project registry. |
83
89
  | `docassert init [DIR]` | Scaffold the default config into a repo. |
90
+ | `docassert extract <file>` | Extract plain text from a source `.docx` / `.pdf` / `.md` / `.txt` (the first step of doc-to-pmo conversion). Needs the `convert` extra: `pip install "docassert[convert]"`. |
91
+
92
+ Every document-reading command accepts `--documents-dir` (default `documents/`).
93
+ AI alignment grades at most `alignment_limit` links per run (default 25; set it
94
+ in `consistency.yaml`, `0` = no cap) so API cost stays bounded on large graphs.
84
95
 
85
96
  ## Document kinds
86
97
 
@@ -7,6 +7,7 @@ docassert/__main__.py
7
7
  docassert/cli.py
8
8
  docassert/config.py
9
9
  docassert/consistency.py
10
+ docassert/extract.py
10
11
  docassert/graph.py
11
12
  docassert/loader.py
12
13
  docassert/models.py
@@ -89,6 +90,8 @@ docassert/_data/templates/test-cases.template.md
89
90
  docassert/_data/templates/user-story.template.md
90
91
  tests/test_config.py
91
92
  tests/test_consistency.py
93
+ tests/test_defects.py
94
+ tests/test_extract.py
92
95
  tests/test_graph.py
93
96
  tests/test_kinds_delivery.py
94
97
  tests/test_kinds_governance.py
@@ -0,0 +1,85 @@
1
+ """Tests for the 0.2.1 defect fixes: exit-code cap, --documents-dir, alignment cap."""
2
+ from docassert import consistency as C
3
+ from docassert.cli import _capped, main
4
+ from docassert.graph import Graph
5
+ from docassert.models import CheckResult, Item
6
+
7
+ PROJECT_MD = """---
8
+ kind: project
9
+ id: PRJ-009-TST
10
+ code: TST
11
+ name: Test Project
12
+ sponsor: jane.doe
13
+ status: proposed
14
+ ---
15
+
16
+ ## Overview
17
+ A test project.
18
+
19
+ ## Scope
20
+ Everything.
21
+ """
22
+
23
+
24
+ # ── exit-code cap ────────────────────────────────────────────────────────────
25
+ def test_exit_code_capped_below_wraparound():
26
+ assert _capped(0) == 0
27
+ assert _capped(3) == 3
28
+ assert _capped(125) == 125
29
+ assert _capped(256) == 125 # would otherwise wrap to exit status 0
30
+ assert _capped(1000) == 125
31
+
32
+
33
+ # ── --documents-dir ──────────────────────────────────────────────────────────
34
+ def test_projects_reads_documents_dir_flag(tmp_path, monkeypatch, capsys):
35
+ docs = tmp_path / "elsewhere"
36
+ (docs / "PRJ-009-TST").mkdir(parents=True)
37
+ (docs / "PRJ-009-TST" / "project.md").write_text(PROJECT_MD, encoding="utf-8")
38
+ monkeypatch.chdir(tmp_path) # cwd has no documents/ at all
39
+ assert main(["projects", "--documents-dir", str(docs)]) == 0
40
+ assert "PRJ-009-TST" in capsys.readouterr().out
41
+
42
+
43
+ def test_status_reads_documents_dir_flag(tmp_path, monkeypatch, capsys):
44
+ docs = tmp_path / "elsewhere"
45
+ (docs / "PRJ-009-TST").mkdir(parents=True)
46
+ (docs / "PRJ-009-TST" / "project.md").write_text(PROJECT_MD, encoding="utf-8")
47
+ monkeypatch.chdir(tmp_path)
48
+ assert main(["status", "--documents-dir", str(docs), "--summary"]) == 0
49
+ assert "Derived from 1 documents" in capsys.readouterr().out
50
+
51
+
52
+ # ── alignment call cap ───────────────────────────────────────────────────────
53
+ def _graph_with_edges(n):
54
+ g = Graph()
55
+ g.add(Item("TST-BR-001", "TST", "BR", "parent", {}, "d.md", "k", "approved", "S"))
56
+ for i in range(n):
57
+ g.add(Item(f"TST-PR-{i:03d}", "TST", "PR", "child",
58
+ {"traces": ["TST-BR-001"]}, "d.md", "k", "approved", "S"))
59
+ return g
60
+
61
+
62
+ def _stub_calls(monkeypatch):
63
+ calls = []
64
+ monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
65
+ monkeypatch.setattr(C, "run_alignment",
66
+ lambda cid, *a: calls.append(cid) or CheckResult(
67
+ cid, True, False, "ok", kind="semantic", score=1.0))
68
+ return calls
69
+
70
+
71
+ def test_alignment_capped(monkeypatch):
72
+ calls = _stub_calls(monkeypatch)
73
+ cfg = {"alignment": [{"relation": "traces", "prompt": "judge"}], "alignment_limit": 2}
74
+ results = C.run_alignment_checks(_graph_with_edges(4), cfg)
75
+ assert len(calls) == 2
76
+ note = next(r for r in results if r.check_id == "alignment-limit")
77
+ assert "graded 2 of 4" in note.detail and not note.blocking
78
+
79
+
80
+ def test_alignment_cap_disabled_with_zero(monkeypatch):
81
+ calls = _stub_calls(monkeypatch)
82
+ cfg = {"alignment": [{"relation": "traces", "prompt": "judge"}], "alignment_limit": 0}
83
+ results = C.run_alignment_checks(_graph_with_edges(4), cfg)
84
+ assert len(calls) == 4
85
+ assert not any(r.check_id == "alignment-limit" for r in results)
@@ -0,0 +1,65 @@
1
+ """Tests for the extract module and the `docassert extract` command."""
2
+ import pytest
3
+
4
+ from docassert import extract as E
5
+ from docassert.cli import main
6
+
7
+
8
+ # ── the extract() function ──────────────────────────────────────────────────
9
+ def test_extract_md(tmp_path):
10
+ f = tmp_path / "s.md"
11
+ f.write_text("# Hello\nworld", encoding="utf-8")
12
+ assert E.extract(f) == "# Hello\nworld"
13
+
14
+
15
+ def test_extract_txt(tmp_path):
16
+ f = tmp_path / "s.txt"
17
+ f.write_text("plain text", encoding="utf-8")
18
+ assert E.extract(str(f)) == "plain text"
19
+
20
+
21
+ def test_missing_file_raises(tmp_path):
22
+ with pytest.raises(FileNotFoundError):
23
+ E.extract(tmp_path / "nope.md")
24
+
25
+
26
+ def test_unsupported_type_raises(tmp_path):
27
+ f = tmp_path / "s.rtf"
28
+ f.write_text("x", encoding="utf-8")
29
+ with pytest.raises(ValueError):
30
+ E.extract(f)
31
+
32
+
33
+ def test_extract_docx_paragraphs_and_tables(tmp_path):
34
+ docx = pytest.importorskip("docx") # needs the 'convert' extra
35
+ d = docx.Document()
36
+ d.add_paragraph("First para.")
37
+ table = d.add_table(rows=1, cols=2)
38
+ table.rows[0].cells[0].text = "Milestone"
39
+ table.rows[0].cells[1].text = "2026-09-30"
40
+ path = tmp_path / "s.docx"
41
+ d.save(str(path))
42
+ text = E.extract(path)
43
+ assert "First para." in text
44
+ assert "Milestone | 2026-09-30" in text # table cells joined
45
+
46
+
47
+ # ── the CLI command ─────────────────────────────────────────────────────────
48
+ def test_cli_extract_stdout(tmp_path, capsys):
49
+ f = tmp_path / "s.md"
50
+ f.write_text("hello cli", encoding="utf-8")
51
+ assert main(["extract", str(f)]) == 0
52
+ assert "hello cli" in capsys.readouterr().out
53
+
54
+
55
+ def test_cli_extract_out_file(tmp_path):
56
+ src = tmp_path / "s.txt"
57
+ src.write_text("abc", encoding="utf-8")
58
+ out = tmp_path / "out.txt"
59
+ assert main(["extract", str(src), "--out", str(out)]) == 0
60
+ assert out.read_text() == "abc"
61
+
62
+
63
+ def test_cli_extract_missing_returns_2(tmp_path, capsys):
64
+ assert main(["extract", str(tmp_path / "nope.md")]) == 2
65
+ assert "no such file" in capsys.readouterr().err
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes