docassert 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. {docassert-0.1.0/docassert.egg-info → docassert-0.2.0}/PKG-INFO +10 -3
  2. {docassert-0.1.0 → docassert-0.2.0}/README.md +9 -2
  3. {docassert-0.1.0 → docassert-0.2.0}/docassert/__init__.py +1 -1
  4. {docassert-0.1.0 → docassert-0.2.0}/docassert/cli.py +22 -0
  5. docassert-0.2.0/docassert/extract.py +55 -0
  6. {docassert-0.1.0 → docassert-0.2.0/docassert.egg-info}/PKG-INFO +10 -3
  7. {docassert-0.1.0 → docassert-0.2.0}/docassert.egg-info/SOURCES.txt +2 -0
  8. docassert-0.2.0/tests/test_extract.py +65 -0
  9. {docassert-0.1.0 → docassert-0.2.0}/LICENSE +0 -0
  10. {docassert-0.1.0 → docassert-0.2.0}/NOTICE +0 -0
  11. {docassert-0.1.0 → docassert-0.2.0}/docassert/__main__.py +0 -0
  12. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/consistency.yaml +0 -0
  13. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/adr.criteria.yaml +0 -0
  14. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/benefits-realization.criteria.yaml +0 -0
  15. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/brd.criteria.yaml +0 -0
  16. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/business-case.criteria.yaml +0 -0
  17. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/charter.criteria.yaml +0 -0
  18. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/data-migration-plan.criteria.yaml +0 -0
  19. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/frnfr.criteria.yaml +0 -0
  20. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/hypercare-plan.criteria.yaml +0 -0
  21. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/post-implementation-review.criteria.yaml +0 -0
  22. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/prd.criteria.yaml +0 -0
  23. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/project.criteria.yaml +0 -0
  24. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/qa-test-plan.criteria.yaml +0 -0
  25. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/raci-stakeholder.criteria.yaml +0 -0
  26. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/release-cutover-plan.criteria.yaml +0 -0
  27. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/risk-register.criteria.yaml +0 -0
  28. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/rollback-plan.criteria.yaml +0 -0
  29. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/runbook.criteria.yaml +0 -0
  30. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/status-report.criteria.yaml +0 -0
  31. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/test-cases.criteria.yaml +0 -0
  32. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/criteria/user-story.criteria.yaml +0 -0
  33. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/profiles/agile-delivery.yaml +0 -0
  34. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/profiles/lean-startup.yaml +0 -0
  35. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/profiles/regulated-industry.yaml +0 -0
  36. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/adr.schema.json +0 -0
  37. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/benefits-realization.schema.json +0 -0
  38. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/brd.schema.json +0 -0
  39. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/business-case.schema.json +0 -0
  40. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/charter.schema.json +0 -0
  41. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/data-migration-plan.schema.json +0 -0
  42. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/frnfr.schema.json +0 -0
  43. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/hypercare-plan.schema.json +0 -0
  44. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/post-implementation-review.schema.json +0 -0
  45. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/prd.schema.json +0 -0
  46. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/project.schema.json +0 -0
  47. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/qa-test-plan.schema.json +0 -0
  48. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/raci-stakeholder.schema.json +0 -0
  49. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/release-cutover-plan.schema.json +0 -0
  50. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/risk-register.schema.json +0 -0
  51. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/rollback-plan.schema.json +0 -0
  52. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/runbook.schema.json +0 -0
  53. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/status-report.schema.json +0 -0
  54. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/test-cases.schema.json +0 -0
  55. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/schema/user-story.schema.json +0 -0
  56. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/adr.template.md +0 -0
  57. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/benefits-realization.template.md +0 -0
  58. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/brd.template.md +0 -0
  59. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/business-case.template.md +0 -0
  60. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/charter.template.md +0 -0
  61. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/data-migration-plan.template.md +0 -0
  62. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/frnfr.template.md +0 -0
  63. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/hypercare-plan.template.md +0 -0
  64. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/post-implementation-review.template.md +0 -0
  65. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/prd.template.md +0 -0
  66. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/project.template.md +0 -0
  67. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/qa-test-plan.template.md +0 -0
  68. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/raci-stakeholder.template.md +0 -0
  69. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/release-cutover-plan.template.md +0 -0
  70. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/risk-register.template.md +0 -0
  71. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/rollback-plan.template.md +0 -0
  72. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/runbook.template.md +0 -0
  73. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/status-report.template.md +0 -0
  74. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/test-cases.template.md +0 -0
  75. {docassert-0.1.0 → docassert-0.2.0}/docassert/_data/templates/user-story.template.md +0 -0
  76. {docassert-0.1.0 → docassert-0.2.0}/docassert/config.py +0 -0
  77. {docassert-0.1.0 → docassert-0.2.0}/docassert/consistency.py +0 -0
  78. {docassert-0.1.0 → docassert-0.2.0}/docassert/graph.py +0 -0
  79. {docassert-0.1.0 → docassert-0.2.0}/docassert/loader.py +0 -0
  80. {docassert-0.1.0 → docassert-0.2.0}/docassert/models.py +0 -0
  81. {docassert-0.1.0 → docassert-0.2.0}/docassert/profiles.py +0 -0
  82. {docassert-0.1.0 → docassert-0.2.0}/docassert/projects.py +0 -0
  83. {docassert-0.1.0 → docassert-0.2.0}/docassert/report.py +0 -0
  84. {docassert-0.1.0 → docassert-0.2.0}/docassert/rtm.py +0 -0
  85. {docassert-0.1.0 → docassert-0.2.0}/docassert/semantic.py +0 -0
  86. {docassert-0.1.0 → docassert-0.2.0}/docassert/status.py +0 -0
  87. {docassert-0.1.0 → docassert-0.2.0}/docassert/structural.py +0 -0
  88. {docassert-0.1.0 → docassert-0.2.0}/docassert.egg-info/dependency_links.txt +0 -0
  89. {docassert-0.1.0 → docassert-0.2.0}/docassert.egg-info/entry_points.txt +0 -0
  90. {docassert-0.1.0 → docassert-0.2.0}/docassert.egg-info/requires.txt +0 -0
  91. {docassert-0.1.0 → docassert-0.2.0}/docassert.egg-info/top_level.txt +0 -0
  92. {docassert-0.1.0 → docassert-0.2.0}/pyproject.toml +0 -0
  93. {docassert-0.1.0 → docassert-0.2.0}/setup.cfg +0 -0
  94. {docassert-0.1.0 → docassert-0.2.0}/tests/test_config.py +0 -0
  95. {docassert-0.1.0 → docassert-0.2.0}/tests/test_consistency.py +0 -0
  96. {docassert-0.1.0 → docassert-0.2.0}/tests/test_graph.py +0 -0
  97. {docassert-0.1.0 → docassert-0.2.0}/tests/test_kinds_delivery.py +0 -0
  98. {docassert-0.1.0 → docassert-0.2.0}/tests/test_kinds_governance.py +0 -0
  99. {docassert-0.1.0 → docassert-0.2.0}/tests/test_kinds_operate.py +0 -0
  100. {docassert-0.1.0 → docassert-0.2.0}/tests/test_kinds_reporting.py +0 -0
  101. {docassert-0.1.0 → docassert-0.2.0}/tests/test_profiles.py +0 -0
  102. {docassert-0.1.0 → docassert-0.2.0}/tests/test_projects.py +0 -0
  103. {docassert-0.1.0 → docassert-0.2.0}/tests/test_status.py +0 -0
  104. {docassert-0.1.0 → docassert-0.2.0}/tests/test_structural.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docassert
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Unit testing for business documents — validate structured Markdown docs against a configurable audit standard.
5
5
  Author: C4G Enterprises Inc.
6
6
  License: Apache-2.0
@@ -38,6 +38,10 @@ Dynamic: license-file
38
38
 
39
39
  # docassert
40
40
 
41
+ [![PyPI](https://img.shields.io/pypi/v/docassert)](https://pypi.org/project/docassert/)
42
+ [![Python](https://img.shields.io/pypi/pyversions/docassert)](https://pypi.org/project/docassert/)
43
+ [![License](https://img.shields.io/badge/license-Apache--2.0-blue)](LICENSE)
44
+
41
45
  **Unit testing for business documents.** Validate structured Markdown documents
42
46
  (charters, BRDs, PRDs, risk registers, …) against a configurable audit standard:
43
47
  deterministic structural checks that gate a merge, plus optional AI-graded
@@ -50,9 +54,11 @@ a vendor-neutral standard for running a PMO from version-controlled, declarative
50
54
  ## Install
51
55
 
52
56
  ```bash
53
- pip install "docassert @ git+https://github.com/c4g-john/docassert" # PyPI release coming
57
+ pipx install docassert # recommended installs the CLI in its own isolated env
58
+ # or:
59
+ pip install docassert
54
60
  # with the AI advisory extra:
55
- pip install "docassert[ai] @ git+https://github.com/c4g-john/docassert"
61
+ pip install "docassert[ai]"
56
62
  ```
57
63
 
58
64
  ## Quickstart
@@ -81,6 +87,7 @@ you can customize them.
81
87
  | `docassert pages --out DIR` | Build the portfolio site (index + a page per project). |
82
88
  | `docassert projects [--out] [--check]` | Generate / verify the project registry. |
83
89
  | `docassert init [DIR]` | Scaffold the default config into a repo. |
90
+ | `docassert extract <file>` | Extract plain text from a source `.docx` / `.pdf` / `.md` / `.txt` (the first step of doc-to-pmo conversion). Needs the `convert` extra: `pip install "docassert[convert]"`. |
84
91
 
85
92
  ## Document kinds
86
93
 
@@ -1,5 +1,9 @@
1
1
  # docassert
2
2
 
3
+ [![PyPI](https://img.shields.io/pypi/v/docassert)](https://pypi.org/project/docassert/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/docassert)](https://pypi.org/project/docassert/)
5
+ [![License](https://img.shields.io/badge/license-Apache--2.0-blue)](LICENSE)
6
+
3
7
  **Unit testing for business documents.** Validate structured Markdown documents
4
8
  (charters, BRDs, PRDs, risk registers, …) against a configurable audit standard:
5
9
  deterministic structural checks that gate a merge, plus optional AI-graded
@@ -12,9 +16,11 @@ a vendor-neutral standard for running a PMO from version-controlled, declarative
12
16
  ## Install
13
17
 
14
18
  ```bash
15
- pip install "docassert @ git+https://github.com/c4g-john/docassert" # PyPI release coming
19
+ pipx install docassert # recommended installs the CLI in its own isolated env
20
+ # or:
21
+ pip install docassert
16
22
  # with the AI advisory extra:
17
- pip install "docassert[ai] @ git+https://github.com/c4g-john/docassert"
23
+ pip install "docassert[ai]"
18
24
  ```
19
25
 
20
26
  ## Quickstart
@@ -43,6 +49,7 @@ you can customize them.
43
49
  | `docassert pages --out DIR` | Build the portfolio site (index + a page per project). |
44
50
  | `docassert projects [--out] [--check]` | Generate / verify the project registry. |
45
51
  | `docassert init [DIR]` | Scaffold the default config into a repo. |
52
+ | `docassert extract <file>` | Extract plain text from a source `.docx` / `.pdf` / `.md` / `.txt` (the first step of doc-to-pmo conversion). Needs the `convert` extra: `pip install "docassert[convert]"`. |
46
53
 
47
54
  ## Document kinds
48
55
 
@@ -5,4 +5,4 @@ standard: deterministic structural checks that gate a merge, plus optional
5
5
  AI-graded semantic checks that advise.
6
6
  """
7
7
 
8
- __version__ = "0.1.0"
8
+ __version__ = "0.2.0"
@@ -232,6 +232,23 @@ def cmd_init(args: argparse.Namespace) -> int:
232
232
  return 0
233
233
 
234
234
 
235
+ def cmd_extract(args: argparse.Namespace) -> int:
236
+ """Extract plain text from a source document (.docx/.pdf/.md/.txt) — the
237
+ deterministic first step of doc-to-pmo conversion."""
238
+ from . import extract as extract_mod
239
+ try:
240
+ text = extract_mod.extract(args.file)
241
+ except (FileNotFoundError, ValueError, ImportError) as exc:
242
+ print(f"docassert: {exc}", file=sys.stderr)
243
+ return 2
244
+ if args.out:
245
+ Path(args.out).write_text(text, encoding="utf-8")
246
+ print(f"docassert: wrote {args.out} ({len(text)} chars)")
247
+ else:
248
+ sys.stdout.write(text)
249
+ return 0
250
+
251
+
235
252
  def main(argv: list[str] | None = None) -> int:
236
253
  from . import __version__
237
254
  parser = argparse.ArgumentParser(prog="docassert",
@@ -283,6 +300,11 @@ def main(argv: list[str] | None = None) -> int:
283
300
  ini.add_argument("dir", nargs="?", default=".", help="Target directory (default: current).")
284
301
  ini.set_defaults(func=cmd_init)
285
302
 
303
+ ex = sub.add_parser("extract", help="Extract plain text from a source doc (.docx/.pdf/.md/.txt) for conversion.")
304
+ ex.add_argument("file", help="Source document (.docx / .pdf / .md / .txt).")
305
+ ex.add_argument("--out", help="Write to this path instead of stdout.")
306
+ ex.set_defaults(func=cmd_extract)
307
+
286
308
  args = parser.parse_args(argv)
287
309
  return args.func(args)
288
310
 
@@ -0,0 +1,55 @@
1
+ """Extract plain text from a source document, for doc-to-pmo conversion.
2
+
3
+ The deterministic first step of the conversion front-door: turn an arbitrary
4
+ source file (.docx / .pdf / .md / .txt) into plain text that the doc-to-pmo
5
+ skill then maps into a standard template. It does not interpret or reshape the
6
+ content — that is the skill's job.
7
+
8
+ .docx / .pdf support needs the optional `convert` extra:
9
+ pip install "docassert[convert]"
10
+ """
11
+ from __future__ import annotations
12
+
13
+ from pathlib import Path
14
+
15
+ _NEED_CONVERT = 'extract needs the "convert" extra: pip install "docassert[convert]"'
16
+
17
+
18
+ def extract(path: str | Path) -> str:
19
+ """Return the plain text of a source document.
20
+
21
+ Raises FileNotFoundError (missing file), ValueError (unsupported type), or
22
+ ImportError (a .docx/.pdf without the `convert` extra installed).
23
+ """
24
+ p = Path(path)
25
+ if not p.is_file():
26
+ raise FileNotFoundError(f"no such file: {p}")
27
+ ext = p.suffix.lower()
28
+
29
+ if ext in {".md", ".txt"}:
30
+ return p.read_text(encoding="utf-8")
31
+
32
+ if ext == ".docx":
33
+ try:
34
+ import docx # python-docx
35
+ except ImportError as exc:
36
+ raise ImportError(_NEED_CONVERT) from exc
37
+ document = docx.Document(str(p))
38
+ blocks: list[str] = [para.text for para in document.paragraphs]
39
+ # include table cell text, which charters often use for milestones/risks
40
+ for table in document.tables:
41
+ for row in table.rows:
42
+ cells = [cell.text.strip() for cell in row.cells]
43
+ if any(cells):
44
+ blocks.append(" | ".join(cells))
45
+ return "\n".join(blocks)
46
+
47
+ if ext == ".pdf":
48
+ try:
49
+ from pypdf import PdfReader
50
+ except ImportError as exc:
51
+ raise ImportError(_NEED_CONVERT) from exc
52
+ reader = PdfReader(str(p))
53
+ return "\n".join((page.extract_text() or "") for page in reader.pages)
54
+
55
+ raise ValueError(f"unsupported source type '{ext}' (supported: .docx, .pdf, .md, .txt)")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docassert
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Unit testing for business documents — validate structured Markdown docs against a configurable audit standard.
5
5
  Author: C4G Enterprises Inc.
6
6
  License: Apache-2.0
@@ -38,6 +38,10 @@ Dynamic: license-file
38
38
 
39
39
  # docassert
40
40
 
41
+ [![PyPI](https://img.shields.io/pypi/v/docassert)](https://pypi.org/project/docassert/)
42
+ [![Python](https://img.shields.io/pypi/pyversions/docassert)](https://pypi.org/project/docassert/)
43
+ [![License](https://img.shields.io/badge/license-Apache--2.0-blue)](LICENSE)
44
+
41
45
  **Unit testing for business documents.** Validate structured Markdown documents
42
46
  (charters, BRDs, PRDs, risk registers, …) against a configurable audit standard:
43
47
  deterministic structural checks that gate a merge, plus optional AI-graded
@@ -50,9 +54,11 @@ a vendor-neutral standard for running a PMO from version-controlled, declarative
50
54
  ## Install
51
55
 
52
56
  ```bash
53
- pip install "docassert @ git+https://github.com/c4g-john/docassert" # PyPI release coming
57
+ pipx install docassert # recommended installs the CLI in its own isolated env
58
+ # or:
59
+ pip install docassert
54
60
  # with the AI advisory extra:
55
- pip install "docassert[ai] @ git+https://github.com/c4g-john/docassert"
61
+ pip install "docassert[ai]"
56
62
  ```
57
63
 
58
64
  ## Quickstart
@@ -81,6 +87,7 @@ you can customize them.
81
87
  | `docassert pages --out DIR` | Build the portfolio site (index + a page per project). |
82
88
  | `docassert projects [--out] [--check]` | Generate / verify the project registry. |
83
89
  | `docassert init [DIR]` | Scaffold the default config into a repo. |
90
+ | `docassert extract <file>` | Extract plain text from a source `.docx` / `.pdf` / `.md` / `.txt` (the first step of doc-to-pmo conversion). Needs the `convert` extra: `pip install "docassert[convert]"`. |
84
91
 
85
92
  ## Document kinds
86
93
 
@@ -7,6 +7,7 @@ docassert/__main__.py
7
7
  docassert/cli.py
8
8
  docassert/config.py
9
9
  docassert/consistency.py
10
+ docassert/extract.py
10
11
  docassert/graph.py
11
12
  docassert/loader.py
12
13
  docassert/models.py
@@ -89,6 +90,7 @@ docassert/_data/templates/test-cases.template.md
89
90
  docassert/_data/templates/user-story.template.md
90
91
  tests/test_config.py
91
92
  tests/test_consistency.py
93
+ tests/test_extract.py
92
94
  tests/test_graph.py
93
95
  tests/test_kinds_delivery.py
94
96
  tests/test_kinds_governance.py
@@ -0,0 +1,65 @@
1
+ """Tests for the extract module and the `docassert extract` command."""
2
+ import pytest
3
+
4
+ from docassert import extract as E
5
+ from docassert.cli import main
6
+
7
+
8
+ # ── the extract() function ──────────────────────────────────────────────────
9
+ def test_extract_md(tmp_path):
10
+ f = tmp_path / "s.md"
11
+ f.write_text("# Hello\nworld", encoding="utf-8")
12
+ assert E.extract(f) == "# Hello\nworld"
13
+
14
+
15
+ def test_extract_txt(tmp_path):
16
+ f = tmp_path / "s.txt"
17
+ f.write_text("plain text", encoding="utf-8")
18
+ assert E.extract(str(f)) == "plain text"
19
+
20
+
21
+ def test_missing_file_raises(tmp_path):
22
+ with pytest.raises(FileNotFoundError):
23
+ E.extract(tmp_path / "nope.md")
24
+
25
+
26
+ def test_unsupported_type_raises(tmp_path):
27
+ f = tmp_path / "s.rtf"
28
+ f.write_text("x", encoding="utf-8")
29
+ with pytest.raises(ValueError):
30
+ E.extract(f)
31
+
32
+
33
+ def test_extract_docx_paragraphs_and_tables(tmp_path):
34
+ docx = pytest.importorskip("docx") # needs the 'convert' extra
35
+ d = docx.Document()
36
+ d.add_paragraph("First para.")
37
+ table = d.add_table(rows=1, cols=2)
38
+ table.rows[0].cells[0].text = "Milestone"
39
+ table.rows[0].cells[1].text = "2026-09-30"
40
+ path = tmp_path / "s.docx"
41
+ d.save(str(path))
42
+ text = E.extract(path)
43
+ assert "First para." in text
44
+ assert "Milestone | 2026-09-30" in text # table cells joined
45
+
46
+
47
+ # ── the CLI command ─────────────────────────────────────────────────────────
48
+ def test_cli_extract_stdout(tmp_path, capsys):
49
+ f = tmp_path / "s.md"
50
+ f.write_text("hello cli", encoding="utf-8")
51
+ assert main(["extract", str(f)]) == 0
52
+ assert "hello cli" in capsys.readouterr().out
53
+
54
+
55
+ def test_cli_extract_out_file(tmp_path):
56
+ src = tmp_path / "s.txt"
57
+ src.write_text("abc", encoding="utf-8")
58
+ out = tmp_path / "out.txt"
59
+ assert main(["extract", str(src), "--out", str(out)]) == 0
60
+ assert out.read_text() == "abc"
61
+
62
+
63
+ def test_cli_extract_missing_returns_2(tmp_path, capsys):
64
+ assert main(["extract", str(tmp_path / "nope.md")]) == 2
65
+ assert "no such file" in capsys.readouterr().err
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes