decompose-mcp 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. decompose_mcp-0.1.0/.github/dependabot.yml +10 -0
  2. decompose_mcp-0.1.0/.github/release-drafter.yml +24 -0
  3. decompose_mcp-0.1.0/.github/workflows/benchmark.yml +29 -0
  4. decompose_mcp-0.1.0/.github/workflows/ci.yml +31 -0
  5. decompose_mcp-0.1.0/.github/workflows/pages.yml +35 -0
  6. decompose_mcp-0.1.0/.github/workflows/publish.yml +28 -0
  7. decompose_mcp-0.1.0/.github/workflows/release-drafter.yml +17 -0
  8. decompose_mcp-0.1.0/.gitignore +22 -0
  9. decompose_mcp-0.1.0/LAUNCH.md +33 -0
  10. decompose_mcp-0.1.0/PKG-INFO +165 -0
  11. decompose_mcp-0.1.0/README.md +138 -0
  12. decompose_mcp-0.1.0/benchmarks/latest.json +61 -0
  13. decompose_mcp-0.1.0/benchmarks/run.py +79 -0
  14. decompose_mcp-0.1.0/docs/CNAME +1 -0
  15. decompose_mcp-0.1.0/docs/about.html +92 -0
  16. decompose_mcp-0.1.0/docs/aecai.html +177 -0
  17. decompose_mcp-0.1.0/docs/blog.html +65 -0
  18. decompose_mcp-0.1.0/docs/contact.html +83 -0
  19. decompose_mcp-0.1.0/docs/decompose.html +242 -0
  20. decompose_mcp-0.1.0/docs/index.html +101 -0
  21. decompose_mcp-0.1.0/docs/style.css +358 -0
  22. decompose_mcp-0.1.0/pyproject.toml +66 -0
  23. decompose_mcp-0.1.0/server.json +21 -0
  24. decompose_mcp-0.1.0/src/decompose/__init__.py +10 -0
  25. decompose_mcp-0.1.0/src/decompose/__main__.py +5 -0
  26. decompose_mcp-0.1.0/src/decompose/chunker.py +153 -0
  27. decompose_mcp-0.1.0/src/decompose/classifier.py +166 -0
  28. decompose_mcp-0.1.0/src/decompose/cli.py +55 -0
  29. decompose_mcp-0.1.0/src/decompose/core.py +145 -0
  30. decompose_mcp-0.1.0/src/decompose/entities.py +82 -0
  31. decompose_mcp-0.1.0/src/decompose/irreducibility.py +57 -0
  32. decompose_mcp-0.1.0/src/decompose/mcp_server.py +128 -0
  33. decompose_mcp-0.1.0/src/decompose/py.typed +0 -0
  34. decompose_mcp-0.1.0/tests/__init__.py +0 -0
  35. decompose_mcp-0.1.0/tests/fixtures/agent_policy.txt +29 -0
  36. decompose_mcp-0.1.0/tests/fixtures/contract_excerpt.txt +59 -0
  37. decompose_mcp-0.1.0/tests/fixtures/model_card.txt +34 -0
  38. decompose_mcp-0.1.0/tests/fixtures/report_geotechnical.txt +76 -0
  39. decompose_mcp-0.1.0/tests/fixtures/spec_structural.txt +97 -0
  40. decompose_mcp-0.1.0/tests/test_chunker.py +69 -0
  41. decompose_mcp-0.1.0/tests/test_classifier.py +70 -0
  42. decompose_mcp-0.1.0/tests/test_core.py +65 -0
  43. decompose_mcp-0.1.0/tests/test_entities.py +47 -0
  44. decompose_mcp-0.1.0/tests/test_irreducibility.py +42 -0
@@ -0,0 +1,10 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: pip
4
+ directory: "/"
5
+ schedule:
6
+ interval: weekly
7
+ - package-ecosystem: github-actions
8
+ directory: "/"
9
+ schedule:
10
+ interval: weekly
@@ -0,0 +1,24 @@
1
+ name-template: 'v$RESOLVED_VERSION'
2
+ tag-template: 'v$RESOLVED_VERSION'
3
+ template: |
4
+ ## What's Changed
5
+
6
+ $CHANGES
7
+
8
+ **Full Changelog**: https://github.com/echology-io/decompose/compare/$PREVIOUS_TAG...v$RESOLVED_VERSION
9
+ categories:
10
+ - title: 'Features'
11
+ labels: ['feature', 'enhancement']
12
+ - title: 'Bug Fixes'
13
+ labels: ['fix', 'bug']
14
+ - title: 'Maintenance'
15
+ labels: ['chore', 'deps', 'ci']
16
+ change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
17
+ version-resolver:
18
+ major:
19
+ labels: ['major']
20
+ minor:
21
+ labels: ['minor', 'feature']
22
+ patch:
23
+ labels: ['patch', 'fix', 'bug']
24
+ default: patch
@@ -0,0 +1,29 @@
1
+ name: Benchmark
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ benchmark:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "3.12"
18
+
19
+ - name: Install
20
+ run: pip install -e .
21
+
22
+ - name: Run benchmarks
23
+ run: python benchmarks/run.py | tee benchmark.json
24
+
25
+ - name: Upload results
26
+ uses: actions/upload-artifact@v4
27
+ with:
28
+ name: benchmark-results
29
+ path: benchmark.json
@@ -0,0 +1,31 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - uses: actions/setup-python@v5
20
+ with:
21
+ python-version: ${{ matrix.python-version }}
22
+
23
+ - name: Install dependencies
24
+ run: |
25
+ pip install -e ".[dev]"
26
+
27
+ - name: Lint
28
+ run: ruff check src/ tests/
29
+
30
+ - name: Test
31
+ run: pytest -v
@@ -0,0 +1,35 @@
1
+ name: Deploy Pages
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ paths: [docs/**]
7
+
8
+ permissions:
9
+ contents: read
10
+ pages: write
11
+ id-token: write
12
+
13
+ concurrency:
14
+ group: pages
15
+ cancel-in-progress: true
16
+
17
+ jobs:
18
+ deploy:
19
+ runs-on: ubuntu-latest
20
+ environment:
21
+ name: github-pages
22
+ url: ${{ steps.deployment.outputs.page_url }}
23
+ steps:
24
+ - uses: actions/checkout@v4
25
+
26
+ - uses: actions/configure-pages@v5
27
+ with:
28
+ enablement: true
29
+
30
+ - uses: actions/upload-pages-artifact@v3
31
+ with:
32
+ path: docs
33
+
34
+ - id: deployment
35
+ uses: actions/deploy-pages@v4
@@ -0,0 +1,28 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags: ["v*"]
6
+
7
+ jobs:
8
+ publish:
9
+ runs-on: ubuntu-latest
10
+ permissions:
11
+ id-token: write
12
+
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.12"
19
+
20
+ - name: Build
21
+ run: |
22
+ pip install hatchling
23
+ python -m hatchling build
24
+
25
+ - name: Publish
26
+ uses: pypa/gh-action-pypi-publish@release/v1
27
+ with:
28
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,17 @@
1
+ name: Release Drafter
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ permissions:
8
+ contents: write
9
+ pull-requests: read
10
+
11
+ jobs:
12
+ draft:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: release-drafter/release-drafter@v6
16
+ env:
17
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -0,0 +1,22 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .venv/
10
+ venv/
11
+ env/
12
+ .env
13
+ .ruff_cache/
14
+ .mypy_cache/
15
+ .pytest_cache/
16
+ htmlcov/
17
+ .coverage
18
+ *.so
19
+ .DS_Store
20
+ *.swp
21
+ *.swo
22
+ *~
@@ -0,0 +1,33 @@
1
+ # Decompose — Launch Plan
2
+
3
+ **"Stop prompting. Start decomposing."**
4
+
5
+ **Repo:** `echology-io/decompose`
6
+ **Site:** echology.io
7
+
8
+ ---
9
+
10
+ ## What Decompose Is
11
+
12
+ The missing cognitive primitive for AI agents.
13
+
14
+ Agents fail on complex input because they receive unstructured chaos. Decompose gives any agent the ability to break any input into structured, composable intelligence — instantly. No LLM. No setup. One function call.
15
+
16
+ **Positioning:** Decompose is not a parser. It's a stability layer for agent reasoning.
17
+
18
+ **Philosophy:** All intelligence begins with decomposition.
19
+
20
+ ---
21
+
22
+ ## What We're NOT Building
23
+
24
+ | Temptation | Why No |
25
+ |---|---|
26
+ | Web service / API | MCP runs locally. Zero ops. |
27
+ | Database | Pure function. Stateless. |
28
+ | LLM dependency | Kills adoption. Pure Python. Works offline. |
29
+ | Config files | Sensible defaults. Env vars for power users. |
30
+ | Docker | `pip install decompose`. Done. |
31
+ | Auth / accounts | Local tool. No signup. |
32
+ | Plugin system | Premature. Hardcode extractors. |
33
+ | Multiple formats | JSON only. Agents consume JSON. |
@@ -0,0 +1,165 @@
1
+ Metadata-Version: 2.4
2
+ Name: decompose-mcp
3
+ Version: 0.1.0
4
+ Summary: The missing cognitive primitive for AI agents. Structured intelligence from any text.
5
+ Project-URL: Homepage, https://echology.io/decompose
6
+ Project-URL: Repository, https://github.com/echology-io/decompose
7
+ Project-URL: Issues, https://github.com/echology-io/decompose/issues
8
+ Author-email: Kyle Vines <kyle@echology.io>
9
+ License: Proprietary
10
+ Keywords: agents,ai,decompose,intelligence,mcp,semantic,structured
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: Other/Proprietary License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Libraries
20
+ Classifier: Topic :: Text Processing :: Linguistic
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: mcp>=1.0.0
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest>=8.0; extra == 'dev'
25
+ Requires-Dist: ruff>=0.8; extra == 'dev'
26
+ Description-Content-Type: text/markdown
27
+
28
+ # Decompose
29
+
30
+ [![CI](https://github.com/echology-io/decompose/actions/workflows/ci.yml/badge.svg)](https://github.com/echology-io/decompose/actions/workflows/ci.yml)
31
+ [![PyPI](https://img.shields.io/pypi/v/decompose-mcp)](https://pypi.org/project/decompose-mcp/)
32
+ [![Python](https://img.shields.io/pypi/pyversions/decompose-mcp)](https://pypi.org/project/decompose-mcp/)
33
+
34
+ <!-- mcp-name: io.github.echology-io/decompose -->
35
+
36
+ **Stop prompting. Start decomposing.**
37
+
38
+ The missing cognitive primitive for AI agents. Decompose turns any text into classified, structured semantic units — instantly. No LLM. No setup. One function call.
39
+
40
+ ---
41
+
42
+ ### Before: your agent reads this
43
+
44
+ ```
45
+ The contractor shall provide all materials per ASTM C150-20. Maximum load
46
+ shall not exceed 500 psf per ASCE 7-22. Notice to proceed within 14 calendar
47
+ days of contract execution. Retainage of 10% applies to all payments.
48
+ For general background, the project is located in Denver, CO...
49
+ ```
50
+
51
+ ### After: your agent reads this
52
+
53
+ ```json
54
+ [
55
+ {
56
+ "text": "The contractor shall provide all materials per ASTM C150-20.",
57
+ "authority": "mandatory",
58
+ "risk": "compliance",
59
+ "type": "requirement",
60
+ "irreducible": true,
61
+ "attention": 8.0,
62
+ "entities": ["ASTM C150-20"]
63
+ },
64
+ {
65
+ "text": "Maximum load shall not exceed 500 psf per ASCE 7-22.",
66
+ "authority": "prohibitive",
67
+ "risk": "safety_critical",
68
+ "type": "constraint",
69
+ "irreducible": true,
70
+ "attention": 10.0,
71
+ "entities": ["ASCE 7-22"]
72
+ }
73
+ ]
74
+ ```
75
+
76
+ Every unit classified. Every standard extracted. Every risk scored. Your agent knows what matters.
77
+
78
+ ---
79
+
80
+ ## Install
81
+
82
+ ```bash
83
+ pip install decompose-mcp
84
+ ```
85
+
86
+ ## Use as MCP Server
87
+
88
+ Add to your agent's MCP config (Claude Code, OpenClaw, Cursor, etc.):
89
+
90
+ ```json
91
+ {
92
+ "mcpServers": {
93
+ "decompose": {
94
+ "command": "uvx",
95
+ "args": ["decompose-mcp", "--serve"]
96
+ }
97
+ }
98
+ }
99
+ ```
100
+
101
+ Your agent gets two tools:
102
+ - **`decompose_text`** — decompose any text
103
+ - **`decompose_url`** — fetch a URL and decompose its content
104
+
105
+ ## Use as CLI
106
+
107
+ ```bash
108
+ # Pipe text
109
+ cat spec.txt | decompose --pretty
110
+
111
+ # Inline
112
+ decompose --text "The contractor shall provide all materials per ASTM C150-20."
113
+
114
+ # Compact output (smaller JSON)
115
+ cat document.md | decompose --compact
116
+ ```
117
+
118
+ ## Use as Library
119
+
120
+ ```python
121
+ from decompose import decompose
122
+
123
+ result = decompose("The contractor shall provide all materials per ASTM C150-20.")
124
+
125
+ for unit in result["units"]:
126
+ print(f"[{unit['authority']}] [{unit['risk']}] {unit['text'][:60]}...")
127
+ ```
128
+
129
+ ---
130
+
131
+ ## What Each Field Means
132
+
133
+ | Field | Values | What It Tells Your Agent |
134
+ |-------|--------|--------------------------|
135
+ | `authority` | mandatory, prohibitive, directive, permissive, conditional, informational | Is this a hard requirement or background? |
136
+ | `risk` | safety_critical, compliance, financial, contractual, advisory, informational | How much does this matter? |
137
+ | `type` | requirement, definition, reference, constraint, narrative, data | What kind of content is this? |
138
+ | `irreducible` | true/false | Must this be preserved verbatim? |
139
+ | `attention` | 0.0 - 10.0 | How much compute should the agent spend here? |
140
+ | `entities` | standards, codes, regulations | What formal references are cited? |
141
+ | `actionable` | true/false | Does someone need to do something? |
142
+
143
+ ---
144
+
145
+ ## Why No LLM?
146
+
147
+ Decompose runs on pure regex and heuristics. No Ollama, no API key, no GPU, no inference cost.
148
+
149
+ This is intentional:
150
+ - **Fast**: <500ms for a 50-page spec
151
+ - **Deterministic**: Same input always produces same output
152
+ - **Offline**: Works air-gapped, on a plane, on CI
153
+ - **Composable**: Your agent's LLM reasons over the structured output — decompose handles the preprocessing
154
+
155
+ The LLM is what *your agent* uses. Decompose makes whatever model you're running work better.
156
+
157
+ ---
158
+
159
+ ## Built by Echology
160
+
161
+ Decompose is extracted from [AECai](https://aecai.io), a document intelligence platform for Architecture, Engineering, and Construction firms. The classification patterns, entity extraction, and irreducibility detection are battle-tested against thousands of real AEC documents — specs, contracts, RFIs, inspection reports, pay applications.
162
+
163
+ **License:** Proprietary — Copyright (c) 2025-2026 Echology, Inc.
164
+
165
+ **Philosophy:** All intelligence begins with decomposition.
@@ -0,0 +1,138 @@
1
+ # Decompose
2
+
3
+ [![CI](https://github.com/echology-io/decompose/actions/workflows/ci.yml/badge.svg)](https://github.com/echology-io/decompose/actions/workflows/ci.yml)
4
+ [![PyPI](https://img.shields.io/pypi/v/decompose-mcp)](https://pypi.org/project/decompose-mcp/)
5
+ [![Python](https://img.shields.io/pypi/pyversions/decompose-mcp)](https://pypi.org/project/decompose-mcp/)
6
+
7
+ <!-- mcp-name: io.github.echology-io/decompose -->
8
+
9
+ **Stop prompting. Start decomposing.**
10
+
11
+ The missing cognitive primitive for AI agents. Decompose turns any text into classified, structured semantic units — instantly. No LLM. No setup. One function call.
12
+
13
+ ---
14
+
15
+ ### Before: your agent reads this
16
+
17
+ ```
18
+ The contractor shall provide all materials per ASTM C150-20. Maximum load
19
+ shall not exceed 500 psf per ASCE 7-22. Notice to proceed within 14 calendar
20
+ days of contract execution. Retainage of 10% applies to all payments.
21
+ For general background, the project is located in Denver, CO...
22
+ ```
23
+
24
+ ### After: your agent reads this
25
+
26
+ ```json
27
+ [
28
+ {
29
+ "text": "The contractor shall provide all materials per ASTM C150-20.",
30
+ "authority": "mandatory",
31
+ "risk": "compliance",
32
+ "type": "requirement",
33
+ "irreducible": true,
34
+ "attention": 8.0,
35
+ "entities": ["ASTM C150-20"]
36
+ },
37
+ {
38
+ "text": "Maximum load shall not exceed 500 psf per ASCE 7-22.",
39
+ "authority": "prohibitive",
40
+ "risk": "safety_critical",
41
+ "type": "constraint",
42
+ "irreducible": true,
43
+ "attention": 10.0,
44
+ "entities": ["ASCE 7-22"]
45
+ }
46
+ ]
47
+ ```
48
+
49
+ Every unit classified. Every standard extracted. Every risk scored. Your agent knows what matters.
50
+
51
+ ---
52
+
53
+ ## Install
54
+
55
+ ```bash
56
+ pip install decompose-mcp
57
+ ```
58
+
59
+ ## Use as MCP Server
60
+
61
+ Add to your agent's MCP config (Claude Code, OpenClaw, Cursor, etc.):
62
+
63
+ ```json
64
+ {
65
+ "mcpServers": {
66
+ "decompose": {
67
+ "command": "uvx",
68
+ "args": ["decompose-mcp", "--serve"]
69
+ }
70
+ }
71
+ }
72
+ ```
73
+
74
+ Your agent gets two tools:
75
+ - **`decompose_text`** — decompose any text
76
+ - **`decompose_url`** — fetch a URL and decompose its content
77
+
78
+ ## Use as CLI
79
+
80
+ ```bash
81
+ # Pipe text
82
+ cat spec.txt | decompose --pretty
83
+
84
+ # Inline
85
+ decompose --text "The contractor shall provide all materials per ASTM C150-20."
86
+
87
+ # Compact output (smaller JSON)
88
+ cat document.md | decompose --compact
89
+ ```
90
+
91
+ ## Use as Library
92
+
93
+ ```python
94
+ from decompose import decompose
95
+
96
+ result = decompose("The contractor shall provide all materials per ASTM C150-20.")
97
+
98
+ for unit in result["units"]:
99
+ print(f"[{unit['authority']}] [{unit['risk']}] {unit['text'][:60]}...")
100
+ ```
101
+
102
+ ---
103
+
104
+ ## What Each Field Means
105
+
106
+ | Field | Values | What It Tells Your Agent |
107
+ |-------|--------|--------------------------|
108
+ | `authority` | mandatory, prohibitive, directive, permissive, conditional, informational | Is this a hard requirement or background? |
109
+ | `risk` | safety_critical, compliance, financial, contractual, advisory, informational | How much does this matter? |
110
+ | `type` | requirement, definition, reference, constraint, narrative, data | What kind of content is this? |
111
+ | `irreducible` | true/false | Must this be preserved verbatim? |
112
+ | `attention` | 0.0 - 10.0 | How much compute should the agent spend here? |
113
+ | `entities` | standards, codes, regulations | What formal references are cited? |
114
+ | `actionable` | true/false | Does someone need to do something? |
115
+
116
+ ---
117
+
118
+ ## Why No LLM?
119
+
120
+ Decompose runs on pure regex and heuristics. No Ollama, no API key, no GPU, no inference cost.
121
+
122
+ This is intentional:
123
+ - **Fast**: <500ms for a 50-page spec
124
+ - **Deterministic**: Same input always produces same output
125
+ - **Offline**: Works air-gapped, on a plane, on CI
126
+ - **Composable**: Your agent's LLM reasons over the structured output — decompose handles the preprocessing
127
+
128
+ The LLM is what *your agent* uses. Decompose makes whatever model you're running work better.
129
+
130
+ ---
131
+
132
+ ## Built by Echology
133
+
134
+ Decompose is extracted from [AECai](https://aecai.io), a document intelligence platform for Architecture, Engineering, and Construction firms. The classification patterns, entity extraction, and irreducibility detection are battle-tested against thousands of real AEC documents — specs, contracts, RFIs, inspection reports, pay applications.
135
+
136
+ **License:** Proprietary — Copyright (c) 2025-2026 Echology, Inc.
137
+
138
+ **Philosophy:** All intelligence begins with decomposition.
@@ -0,0 +1,61 @@
1
+ {
2
+ "docs": 5,
3
+ "total_chars": 15683,
4
+ "total_units": 34,
5
+ "total_ms": 13.8,
6
+ "chars_per_ms": 1132,
7
+ "total_standards": 22,
8
+ "total_irreducible": 19,
9
+ "benchmarks": [
10
+ {
11
+ "file": "agent_policy.txt",
12
+ "chars": 2679,
13
+ "words": 404,
14
+ "units": 8,
15
+ "ms": 2.12,
16
+ "standards": 0,
17
+ "irreducible": 6,
18
+ "actionable": 7
19
+ },
20
+ {
21
+ "file": "contract_excerpt.txt",
22
+ "chars": 3693,
23
+ "words": 580,
24
+ "units": 2,
25
+ "ms": 3.51,
26
+ "standards": 1,
27
+ "irreducible": 2,
28
+ "actionable": 2
29
+ },
30
+ {
31
+ "file": "model_card.txt",
32
+ "chars": 2266,
33
+ "words": 339,
34
+ "units": 7,
35
+ "ms": 1.76,
36
+ "standards": 0,
37
+ "irreducible": 2,
38
+ "actionable": 2
39
+ },
40
+ {
41
+ "file": "report_geotechnical.txt",
42
+ "chars": 3242,
43
+ "words": 499,
44
+ "units": 14,
45
+ "ms": 2.65,
46
+ "standards": 6,
47
+ "irreducible": 6,
48
+ "actionable": 8
49
+ },
50
+ {
51
+ "file": "spec_structural.txt",
52
+ "chars": 3803,
53
+ "words": 580,
54
+ "units": 3,
55
+ "ms": 3.81,
56
+ "standards": 15,
57
+ "irreducible": 3,
58
+ "actionable": 3
59
+ }
60
+ ]
61
+ }
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env python3
2
+ """Benchmark decompose against reference fixtures. Outputs JSON."""
3
+
4
+ import json
5
+ import sys
6
+ import time
7
+ from pathlib import Path
8
+
9
+ from decompose.core import decompose_text
10
+
11
+ FIXTURES = Path(__file__).parent.parent / "tests" / "fixtures"
12
+
13
+
14
+ def bench_file(path: Path) -> dict:
15
+ text = path.read_text()
16
+ start = time.monotonic()
17
+ result = decompose_text(text)
18
+ elapsed = round((time.monotonic() - start) * 1000, 1)
19
+
20
+ meta = result["meta"]
21
+ units = result["units"]
22
+
23
+ safety = sum(1 for u in units if u["risk"] == "safety_critical")
24
+ mandatory = sum(1 for u in units if u["authority"] == "mandatory")
25
+ irreducible = sum(1 for u in units if u["irreducible"])
26
+
27
+ return {
28
+ "file": path.name,
29
+ "input_chars": len(text),
30
+ "input_words": len(text.split()),
31
+ "total_units": meta["total_units"],
32
+ "processing_ms": elapsed,
33
+ "standards_found": len(meta["standards_found"]),
34
+ "dates_found": len(meta["dates_found"]),
35
+ "token_estimate": meta["token_estimate"],
36
+ "mandatory_units": mandatory,
37
+ "safety_critical_units": safety,
38
+ "irreducible_units": irreducible,
39
+ }
40
+
41
+
42
+ def main():
43
+ if not FIXTURES.exists():
44
+ print(f"No fixtures at {FIXTURES}", file=sys.stderr)
45
+ sys.exit(1)
46
+
47
+ files = sorted(FIXTURES.glob("*.txt"))
48
+ if not files:
49
+ print("No .txt fixtures found", file=sys.stderr)
50
+ sys.exit(1)
51
+
52
+ results = []
53
+ for f in files:
54
+ results.append(bench_file(f))
55
+
56
+ # Summary
57
+ total_chars = sum(r["input_chars"] for r in results)
58
+ total_ms = sum(r["processing_ms"] for r in results)
59
+ total_units = sum(r["total_units"] for r in results)
60
+ total_standards = sum(r["standards_found"] for r in results)
61
+
62
+ output = {
63
+ "benchmarks": results,
64
+ "summary": {
65
+ "files": len(results),
66
+ "total_chars": total_chars,
67
+ "total_units": total_units,
68
+ "total_standards": total_standards,
69
+ "total_ms": round(total_ms, 1),
70
+ "chars_per_ms": round(total_chars / max(total_ms, 0.1)),
71
+ },
72
+ }
73
+
74
+ json.dump(output, sys.stdout, indent=2)
75
+ print()
76
+
77
+
78
+ if __name__ == "__main__":
79
+ main()
@@ -0,0 +1 @@
1
+ echology.io