bulla 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. bulla-0.7.0/.github/workflows/bulla.yml +54 -0
  2. bulla-0.7.0/.gitignore +8 -0
  3. bulla-0.7.0/CHANGELOG.md +137 -0
  4. bulla-0.7.0/LICENSE +21 -0
  5. bulla-0.7.0/PKG-INFO +302 -0
  6. bulla-0.7.0/README.md +277 -0
  7. bulla-0.7.0/action.yml +87 -0
  8. bulla-0.7.0/audit/AUDIT_REPORT.md +129 -0
  9. bulla-0.7.0/audit/audit_report.json +588 -0
  10. bulla-0.7.0/audit/financial_composition.yaml +110 -0
  11. bulla-0.7.0/audit/financial_composition_bridged.yaml +115 -0
  12. bulla-0.7.0/audit/financial_pipeline.json +91 -0
  13. bulla-0.7.0/audit/mcp_official_composition.yaml +77 -0
  14. bulla-0.7.0/audit/mcp_official_tools.json +966 -0
  15. bulla-0.7.0/bulla-manifest-schema.json +63 -0
  16. bulla-0.7.0/bulla-manifest-spec-v0.1.md +134 -0
  17. bulla-0.7.0/composition-schema.json +98 -0
  18. bulla-0.7.0/compositions/auth_pipeline.yaml +67 -0
  19. bulla-0.7.0/compositions/code_review_pipeline.yaml +92 -0
  20. bulla-0.7.0/compositions/data_etl_pipeline.yaml +94 -0
  21. bulla-0.7.0/compositions/financial_pipeline.yaml +73 -0
  22. bulla-0.7.0/compositions/mcp_fetch_filesystem_git.yaml +90 -0
  23. bulla-0.7.0/compositions/mcp_fetch_memory.yaml +66 -0
  24. bulla-0.7.0/compositions/mcp_filesystem_git.yaml +78 -0
  25. bulla-0.7.0/compositions/rag_pipeline.yaml +70 -0
  26. bulla-0.7.0/compositions/web_research_pipeline.yaml +97 -0
  27. bulla-0.7.0/examples/demo_reflexive.py +142 -0
  28. bulla-0.7.0/pyproject.toml +41 -0
  29. bulla-0.7.0/src/bulla/__init__.py +50 -0
  30. bulla-0.7.0/src/bulla/__main__.py +5 -0
  31. bulla-0.7.0/src/bulla/cli.py +733 -0
  32. bulla-0.7.0/src/bulla/coboundary.py +111 -0
  33. bulla-0.7.0/src/bulla/compositions/auth_pipeline.yaml +67 -0
  34. bulla-0.7.0/src/bulla/compositions/code_review_pipeline.yaml +92 -0
  35. bulla-0.7.0/src/bulla/compositions/data_etl_pipeline.yaml +94 -0
  36. bulla-0.7.0/src/bulla/compositions/financial_pipeline.yaml +73 -0
  37. bulla-0.7.0/src/bulla/compositions/mcp_fetch_filesystem_git.yaml +90 -0
  38. bulla-0.7.0/src/bulla/compositions/mcp_fetch_memory.yaml +66 -0
  39. bulla-0.7.0/src/bulla/compositions/mcp_filesystem_git.yaml +78 -0
  40. bulla-0.7.0/src/bulla/compositions/rag_pipeline.yaml +70 -0
  41. bulla-0.7.0/src/bulla/compositions/web_research_pipeline.yaml +97 -0
  42. bulla-0.7.0/src/bulla/diagnostic.py +117 -0
  43. bulla-0.7.0/src/bulla/formatters.py +320 -0
  44. bulla-0.7.0/src/bulla/guard.py +304 -0
  45. bulla-0.7.0/src/bulla/infer/__init__.py +25 -0
  46. bulla-0.7.0/src/bulla/infer/classifier.py +547 -0
  47. bulla-0.7.0/src/bulla/infer/mcp.py +193 -0
  48. bulla-0.7.0/src/bulla/init.py +110 -0
  49. bulla-0.7.0/src/bulla/manifest.py +130 -0
  50. bulla-0.7.0/src/bulla/model.py +346 -0
  51. bulla-0.7.0/src/bulla/ots.py +301 -0
  52. bulla-0.7.0/src/bulla/parser.py +156 -0
  53. bulla-0.7.0/src/bulla/scan.py +138 -0
  54. bulla-0.7.0/src/bulla/serve.py +379 -0
  55. bulla-0.7.0/src/bulla/taxonomy.yaml +260 -0
  56. bulla-0.7.0/src/bulla/witness.py +111 -0
  57. bulla-0.7.0/tests/__init__.py +0 -0
  58. bulla-0.7.0/tests/fixtures/real_mcp_tools.json +116 -0
  59. bulla-0.7.0/tests/fixtures/sample_mcp_manifest.json +65 -0
  60. bulla-0.7.0/tests/test_cli.py +136 -0
  61. bulla-0.7.0/tests/test_coboundary.py +109 -0
  62. bulla-0.7.0/tests/test_diagnostic.py +90 -0
  63. bulla-0.7.0/tests/test_guard.py +193 -0
  64. bulla-0.7.0/tests/test_infer.py +159 -0
  65. bulla-0.7.0/tests/test_init.py +70 -0
  66. bulla-0.7.0/tests/test_manifest.py +286 -0
  67. bulla-0.7.0/tests/test_multi_signal.py +794 -0
  68. bulla-0.7.0/tests/test_ots.py +185 -0
  69. bulla-0.7.0/tests/test_parser.py +145 -0
  70. bulla-0.7.0/tests/test_scan.py +82 -0
  71. bulla-0.7.0/tests/test_serve.py +476 -0
  72. bulla-0.7.0/tests/test_witness.py +409 -0
@@ -0,0 +1,54 @@
1
+ name: bulla
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ coherence-check:
11
+ runs-on: ubuntu-latest
12
+ permissions:
13
+ security-events: write
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.11"
20
+
21
+ - name: Install bulla from source
22
+ run: pip install -e .
23
+
24
+ - name: Run bulla on bundled examples
25
+ run: |
26
+ bulla check --max-blind-spots 5 --format sarif compositions/ > bulla.sarif 2>/dev/null || true
27
+ bulla check --max-blind-spots 5 compositions/
28
+
29
+ - name: Upload SARIF
30
+ if: always()
31
+ uses: github/codeql-action/upload-sarif@v3
32
+ with:
33
+ sarif_file: bulla.sarif
34
+ continue-on-error: true
35
+
36
+ tests:
37
+ runs-on: ubuntu-latest
38
+ strategy:
39
+ matrix:
40
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
41
+ steps:
42
+ - uses: actions/checkout@v4
43
+
44
+ - uses: actions/setup-python@v5
45
+ with:
46
+ python-version: ${{ matrix.python-version }}
47
+
48
+ - name: Install dependencies
49
+ run: |
50
+ pip install -e ".[ots]"
51
+ pip install pytest
52
+
53
+ - name: Run tests
54
+ run: python -m pytest tests/ -v
bulla-0.7.0/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .eggs/
7
+ *.egg
8
+ .venv/
@@ -0,0 +1,137 @@
1
+ # Changelog
2
+
3
+ ## 0.7.0
4
+
5
+ ### Changed
6
+ - **Renamed from `seam-lint` to `bulla`** — the package, CLI, Python imports, and all public API names now use the `bulla` identity. SEAM remains the name for the underlying theory; Bulla is the protocol and tool; Glyph is the company.
7
+ - `SeamGuard` → `BullaGuard`, `SeamCheckError` → `BullaCheckError`
8
+ - `to_seam_patch()` → `to_bulla_patch()`, `seam_patch_version` → `bulla_patch_version`
9
+ - `seam_manifest` YAML key → `bulla_manifest` (parser accepts both for one version cycle)
10
+ - MCP server tools: `bulla.witness`, `bulla.bridge`; resource URI: `bulla://taxonomy`
11
+ - CLI entry point: `bulla` (was `seam-lint`)
12
+ - SARIF rule IDs: `bulla/blind-spot`, `bulla/bridge-recommendation`
13
+ - PyPI package: `pip install bulla`
14
+
15
+ ## 0.6.0
16
+
17
+ ### Added
18
+ - **Witness kernel** (`witness.py`): deterministic measurement → receipt pipeline with three-layer separation (measurement / binding / judgment)
19
+ - **Constitutional objects**: `Disposition` enum (5 levels), `BridgePatch` (frozen, Bulla Patch v0.1), `WitnessReceipt` (content-addressable, tamper-evident)
20
+ - **`bulla serve`** — MCP stdio server exposing 2 tools + 1 resource:
21
+ - `bulla.witness`: composition YAML → WitnessReceipt (atomic measure-bind-judge)
22
+ - `bulla.bridge`: composition YAML → patched composition + receipt + before/after metrics
23
+ - `bulla.taxonomy` resource: convention taxonomy for agent inspection
24
+ - **`bulla bridge`** — auto-generate bridged composition YAML or Bulla Patches from diagnosed composition
25
+ - **`bulla witness`** — diagnose and emit WitnessReceipt as JSON
26
+ - **`Diagnostic.content_hash()`** — deterministic SHA-256 of measurement content (excludes timestamps)
27
+ - **`load_composition(text=)`** — parser accepts string input for MCP server use
28
+ - **Policy profile**: `witness()` and `_resolve_disposition()` accept named `policy_profile` parameter (default: `witness.default.v1`), recorded in receipt and receipt hash
29
+ - **Bulla Patch v0.1**: `BridgePatch.to_bulla_patch()` — explicitly typed patch format, not RFC 6902
30
+ - **Typed error vocabulary**: `WitnessErrorCode` enum (4 codes), `WitnessError` exception
31
+ - **Anti-reflexivity enforcement**: AST-level test proves `diagnostic.py` has zero imports from `witness.py` (Law 1); bounded recursion via `depth` parameter with `MAX_DEPTH=10` (Law 7)
32
+ - **Three-hash boundary**: `composition_hash` (what was proposed), `diagnostic_hash` (what was measured), `receipt_hash` (what was witnessed) — tested for independence
33
+ - 33 new tests (233 total)
34
+
35
+ ### Fixed
36
+ - **Bridge generation bug**: when `from_field != to_field` and both sides hidden, destination tool received wrong field. Now generates separate Bridge per side with correct field.
37
+
38
+ ### Changed
39
+ - `to_json_patch()` renamed to `to_bulla_patch()` with `bulla_patch_version: "0.1.0"` field
40
+ - `receipt_hash` docstring documents timestamp inclusion semantics (unique event identity vs deduplication via `diagnostic_hash`)
41
+ - Bridge response includes `original_composition_hash` for traceability
42
+
43
+ ## 0.5.0
44
+
45
+ ### Added
46
+ - **Three-tier confidence: "unknown" tier now live** — single description-keyword-only or weak schema signals (enum partial overlap, integer type inference) now correctly produce `unknown` instead of the dead-branch `inferred`
47
+ - **0-100 range disambiguation** — fields with `minimum: 0, maximum: 100` now check field name and description for rate/percent indicators before choosing `rate_scale` vs `score_range`
48
+ - **Domain-aware prioritization** — `classify_tool_rich()` accepts `domain_hint` (e.g. `"financial"`, `"ml"`) to boost domain-relevant dimensions from `unknown` → `inferred`
49
+ - **`_normalize_enum_value()` helper** — single source of truth for enum normalization (lowercase, strip hyphens/underscores), replacing duplicated inline logic
50
+ - **Real MCP validation suite** — 5 realistic tool definitions (Stripe, GitHub, Datadog, Slack, ML) with per-tool coverage assertions
51
+ - **End-to-end coverage test** — real MCP JSON → generate manifests → validate → assert ≥6/10 dimensions detected
52
+ - **Domain map API** — `_get_domain_map()` loads taxonomy `domains` metadata (previously defined but unused)
53
+ - 16 new tests covering unknown tier, range disambiguation, domain boosting, normalization, real-tool coverage, and E2E pipeline (178 total)
54
+
55
+ ### Changed
56
+ - `_merge_signals()` accepts `domain_hint` parameter for confidence boosting
57
+ - `classify_tool_rich()` accepts `domain_hint` parameter (backward-compatible, defaults to `None`)
58
+ - Description-only signals now produce `unknown` confidence (was incorrectly `inferred`)
59
+
60
+ ### Fixed
61
+ - Dead `else` branch in `_merge_signals()` — the "unknown" confidence tier was unreachable (all paths produced "inferred")
62
+ - Field name propagation for description hits in `_merge_signals()` — description hits now inherit field names from co-occurring name/schema hits
63
+ - Circular import between `classifier.py` and `mcp.py` now documented with inline comment
64
+ - False positive: `format: "uri"` / `"email"` / `"uri-reference"` no longer mapped to `encoding` dimension — these are string formats, not encoding conventions
65
+ - False positive: `count` removed from `id_offset` field name patterns — count is a quantity, not an index
66
+ - Text formatter now explains fee-vs-blind-spots divergence when fee = 0 but blind spots exist
67
+
68
+ ## 0.4.0
69
+
70
+ ### Added
71
+ - **Multi-signal convention inference**: classifier now uses three independent signal sources instead of field-name regex alone
72
+ - Signal 1: Field name pattern matching (existing, now taxonomy-compiled)
73
+ - Signal 2: Description keyword matching — detects conventions from tool/field descriptions (e.g. "amounts in cents", "ISO-8601 timestamps")
74
+ - Signal 3: JSON Schema structural signals — `format`, `type`+range, `enum`, `pattern` metadata
75
+ - **Nested property extraction**: recursive extraction of fields from nested JSON Schema objects with dot-path naming (e.g. `invoice.total_amount`), depth limit 3
76
+ - **Taxonomy as single source of truth**: `field_patterns` from `taxonomy.yaml` now compile into classifier regex at load time; `known_values` drive enum matching
77
+ - **Three-tier confidence model**: `declared` (2+ independent signals agree), `inferred` (1 strong signal), `unknown` (weak/ambiguous) — replaces the binary high/medium system
78
+ - `FieldInfo` dataclass for rich field metadata (type, format, enum, min/max, pattern, description)
79
+ - `classify_tool_rich()` high-level API for full multi-signal classification of MCP tool definitions
80
+ - `classify_description()` for extracting dimension signals from tool descriptions
81
+ - `classify_schema_signal()` for extracting dimension signals from JSON Schema metadata
82
+ - `description_keywords` per dimension in taxonomy (v0.2)
83
+ - Currency codes (USD, EUR, GBP, JPY, CNY, BTC) added to `amount_unit` known_values
84
+ - `extract_field_infos()` public API for rich field extraction from tool schemas
85
+ - Manifest generation now uses multi-signal classifier with `sources` metadata in output
86
+ - 41 new tests covering all signal types, confidence tiers, and round-trip validation (162 total)
87
+
88
+ ### Changed
89
+ - Confidence values in generated manifests are now directly `declared`/`inferred`/`unknown` — the `_CONFIDENCE_MAP` translation layer is removed
90
+ - `infer_from_manifest()` output now includes signal sources in review comments
91
+ - Taxonomy version bumped to 0.2
92
+
93
+ ### Fixed
94
+ - Version string tests now use `__version__` import instead of hardcoded values
95
+
96
+ ## 0.3.0
97
+
98
+ ### Added
99
+ - `bulla manifest --publish` — anchor manifest commitment hash to Bitcoin timechain via OpenTimestamps
100
+ - `bulla manifest --verify` — verify OTS proof on a published manifest
101
+ - `bulla manifest --verify --upgrade` — upgrade pending proofs to confirmed after Bitcoin block inclusion
102
+ - Optional `[ots]` extra: `pip install bulla[ots]` (base install stays single-dependency)
103
+ - Commitment hash excludes OTS fields for deterministic verification after publish
104
+ - 11 new OTS tests (mocked calendars, no network required)
105
+
106
+ ## 0.2.0
107
+
108
+ ### Added
109
+ - `bulla manifest` — generate and validate Bulla Manifest files from MCP tool definitions
110
+ - `bulla manifest --from-json` — generate from MCP manifest JSON
111
+ - `bulla manifest --from-server` — generate from live MCP server
112
+ - `bulla manifest --validate` — validate existing manifest YAML
113
+ - `bulla manifest --examples` — generate example manifests to see the format
114
+ - `bulla scan` — scan live MCP server(s) via stdio and diagnose
115
+ - `bulla init` — interactive wizard to generate a composition YAML
116
+ - `bulla diagnose --brief` — one-line-per-file summary output
117
+ - `BullaGuard` Python API for programmatic composition analysis
118
+ - Convention taxonomy (10 dimensions) with field-pattern inference
119
+ - Auto-validation after manifest generation
120
+ - "Now what?" guidance in `check` output on failure
121
+ - Quickstart guide when running bare `bulla` with no subcommand
122
+ - SARIF output format for GitHub Code Scanning integration
123
+
124
+ ### Fixed
125
+ - Confidence mapping: classifier internal grades (`high`/`medium`) now correctly map to manifest spec vocabulary (`declared`/`inferred`/`unknown`)
126
+ - `_examples_dir()` portability for installed packages
127
+
128
+ ## 0.1.0
129
+
130
+ ### Added
131
+ - `bulla diagnose` — full sheaf cohomology diagnostic with blind spot detection
132
+ - `bulla check` — CI/CD gate with configurable thresholds
133
+ - `bulla infer` — infer proto-composition from MCP manifest JSON
134
+ - Text, JSON, and SARIF output formats
135
+ - Exact rational arithmetic (no floating-point) via Python `Fraction`
136
+ - 9 bundled example compositions (financial, code review, ETL, RAG, auth, MCP)
137
+ - 107 tests, single dependency (PyYAML)
bulla-0.7.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Res Agentica
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
bulla-0.7.0/PKG-INFO ADDED
@@ -0,0 +1,302 @@
1
+ Metadata-Version: 2.4
2
+ Name: bulla
3
+ Version: 0.7.0
4
+ Summary: Witness kernel for agent tool compositions — diagnose, attest, seal
5
+ Project-URL: Homepage, https://github.com/jkomkov/bulla
6
+ Project-URL: Issues, https://github.com/jkomkov/bulla/issues
7
+ Author: Glyph
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: agents,bulla,coherence,composition,mcp,witness
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Quality Assurance
20
+ Requires-Python: >=3.10
21
+ Requires-Dist: pyyaml>=6.0
22
+ Provides-Extra: ots
23
+ Requires-Dist: opentimestamps-client>=0.7; extra == 'ots'
24
+ Description-Content-Type: text/markdown
25
+
26
+ # bulla
27
+
28
+ Witness kernel for agent tool compositions — diagnose, attest, seal. Finds semantic blind spots that bilateral verification cannot reach and recommends bridge annotations to eliminate them.
29
+
30
+ **Zero heavy dependencies.** Only requires PyYAML. No numpy, no scipy, no LLM calls. Installs in under a second.
31
+
32
+ > **Naming**: *Bulla* is the protocol and tool. *SEAM* is the underlying theory (see [The Seam Protocol](../papers/seam/)). *Glyph* is the company.
33
+
34
+ ## Install
35
+
36
+ ```bash
37
+ pip install bulla
38
+ ```
39
+
40
+ ## Quick start
41
+
42
+ Run the built-in examples to see output immediately:
43
+
44
+ ```bash
45
+ bulla diagnose --examples
46
+ ```
47
+
48
+ Diagnose your own composition:
49
+
50
+ ```bash
51
+ bulla diagnose my_pipeline.yaml
52
+ ```
53
+
54
+ ## Library API
55
+
56
+ `BullaGuard` is the primary programmatic interface. Use it to embed coherence analysis in any Python application, agent framework, or CI pipeline.
57
+
58
+ ```python
59
+ from bulla import BullaGuard, BullaCheckError
60
+
61
+ # Path A: From raw tool definitions (the framework integration path)
62
+ guard = BullaGuard.from_tools({
63
+ "invoice_parser": {
64
+ "fields": ["total_amount", "due_date", "line_items", "currency"],
65
+ "conventions": {"amount_unit": "dollars", "date_format": "ISO-8601"},
66
+ },
67
+ "settlement_engine": {
68
+ "fields": ["amount", "settlement_date", "ledger_entry"],
69
+ "conventions": {"amount_unit": "cents"},
70
+ },
71
+ }, edges=[("invoice_parser", "settlement_engine")])
72
+
73
+ # Path B: From MCP manifest JSON
74
+ guard = BullaGuard.from_mcp_manifest("manifest.json")
75
+
76
+ # Path C: From YAML composition (the v0.1 path)
77
+ guard = BullaGuard.from_composition("pipeline.yaml")
78
+
79
+ # Path D: From a live MCP server via stdio
80
+ guard = BullaGuard.from_mcp_server("python my_server.py")
81
+
82
+ # Diagnose
83
+ diag = guard.diagnose()
84
+ diag.coherence_fee # int
85
+ diag.blind_spots # list[BlindSpot]
86
+ diag.bridges # list[Bridge]
87
+
88
+ # Check (raises BullaCheckError if thresholds exceeded)
89
+ guard.check(max_blind_spots=0, max_unbridged=0)
90
+
91
+ # Export
92
+ guard.to_yaml("pipeline.yaml") # save for CI
93
+ guard.to_json() # JSON string with version + hash
94
+ guard.to_sarif() # SARIF string
95
+ ```
96
+
97
+ ### Framework integration example
98
+
99
+ A LangChain integration becomes:
100
+
101
+ ```python
102
+ from bulla import BullaGuard
103
+
104
+ class BullaCoherenceCallback(BaseCallbackHandler):
105
+ def on_chain_start(self, serialized, inputs, **kwargs):
106
+ tools = extract_tools_from_chain(serialized)
107
+ guard = BullaGuard.from_tools(tools)
108
+ diag = guard.diagnose()
109
+ if diag.coherence_fee > 0:
110
+ warnings.warn(f"Composition has {len(diag.blind_spots)} blind spots")
111
+ ```
112
+
113
+ ## What it does
114
+
115
+ When tools in a pipeline share implicit conventions (date formats, unit scales, encoding schemes), some of those conventions may be invisible to bilateral verification -- each pair of tools looks correct in isolation, but the pipeline as a whole can silently produce wrong results.
116
+
117
+ bulla computes the **coherence fee**: the number of independent semantic dimensions that fall through the cracks of pairwise checks. For each blind spot, it recommends a **bridge** -- a specific field to expose in the tool's observable schema.
118
+
119
+ ```
120
+ Financial Analysis Pipeline
121
+ ═══════════════════════════
122
+
123
+ Topology: 3 tools, 3 edges, beta_1 = 1
124
+
125
+ Blind spots (2):
126
+ [1] day_conv_match (data_provider -> financial_analysis)
127
+ day_convention hidden on both sides
128
+ [2] metric_type_match (financial_analysis -> portfolio_verification)
129
+ risk_metric hidden on both sides
130
+
131
+ Recommended bridges:
132
+ [1] Add 'day_convention' to F(data_provider) and F(financial_analysis)
133
+ [2] Add 'risk_metric' to F(financial_analysis) and F(portfolio_verification)
134
+
135
+ After bridging: fee = 0
136
+ ```
137
+
138
+ ## Composition format
139
+
140
+ Compositions are YAML files that describe your tool pipeline. See [`composition-schema.json`](composition-schema.json) for the full schema.
141
+
142
+ ```yaml
143
+ name: My Pipeline
144
+
145
+ tools:
146
+ tool_a:
147
+ internal_state: [field_x, field_y, hidden_z]
148
+ observable_schema: [field_x, field_y]
149
+
150
+ tool_b:
151
+ internal_state: [field_x, hidden_z]
152
+ observable_schema: [field_x]
153
+
154
+ edges:
155
+ - from: tool_a
156
+ to: tool_b
157
+ dimensions:
158
+ - name: x_match
159
+ from_field: field_x
160
+ to_field: field_x
161
+ - name: z_match
162
+ from_field: hidden_z
163
+ to_field: hidden_z
164
+ ```
165
+
166
+ - **`internal_state`**: All semantic dimensions the tool operates on internally (the full stalk S(v)).
167
+ - **`observable_schema`**: Dimensions visible in the tool's API (the observable sub-sheaf F(v)). Must be a subset of `internal_state`.
168
+ - **`edges`**: Bilateral interfaces between tools. Each dimension names a shared convention.
169
+
170
+ A dimension is a **blind spot** when `from_field` or `to_field` is in `internal_state` but not in `observable_schema` of the respective tool.
171
+
172
+ ## Commands
173
+
174
+ ### `bulla diagnose`
175
+
176
+ Diagnose compositions and report blind spots, bridges, and the coherence fee.
177
+
178
+ ```bash
179
+ bulla diagnose pipeline.yaml # text output
180
+ bulla diagnose --format json pipeline.yaml # JSON with version + SHA-256
181
+ bulla diagnose --format sarif pipeline.yaml # SARIF for GitHub code scanning
182
+ bulla diagnose --examples # run on bundled examples
183
+ ```
184
+
185
+ ### `bulla check`
186
+
187
+ CI/CD gate. Exits with code 1 if any composition exceeds the specified thresholds.
188
+
189
+ ```bash
190
+ bulla check pipeline.yaml # default: --max-blind-spots 0 --max-unbridged 0
191
+ bulla check --max-blind-spots 2 compositions/ # allow up to 2 blind spots
192
+ bulla check --format sarif compositions/ > results.sarif # SARIF for GitHub Actions
193
+ ```
194
+
195
+ ### `bulla scan`
196
+
197
+ Scan live MCP servers via stdio. Zero configuration — no YAML required.
198
+
199
+ ```bash
200
+ bulla scan "python my_server.py" # single server
201
+ bulla scan "python server_a.py" "python server_b.py" # multi-server composition
202
+ bulla scan "python server.py" -o pipeline.yaml # save for CI
203
+ bulla scan "python server.py" --format json # JSON diagnostic
204
+ ```
205
+
206
+ The scanner spawns each server as a subprocess, performs the MCP initialize handshake, queries `tools/list`, and auto-generates a composition using the heuristic dimension classifier. No MCP SDK dependency.
207
+
208
+ ### `bulla manifest`
209
+
210
+ Generate or validate [Bulla Manifest](bulla-manifest-spec-v0.1.md) files.
211
+
212
+ ```bash
213
+ bulla manifest --from-json tools.json -o manifest.yaml # from MCP manifest JSON
214
+ bulla manifest --from-server "python server.py" # from live MCP server
215
+ bulla manifest --validate manifest.yaml # validate against spec
216
+ ```
217
+
218
+ ### `bulla init`
219
+
220
+ Interactive wizard to generate a composition YAML.
221
+
222
+ ```bash
223
+ bulla init
224
+ bulla init -o my_pipeline.yaml
225
+ ```
226
+
227
+ ### `bulla infer`
228
+
229
+ Infer a proto-composition from an MCP manifest JSON.
230
+
231
+ ```bash
232
+ bulla infer manifest.json # stdout
233
+ bulla infer manifest.json -o proto.yaml # save to file
234
+ ```
235
+
236
+ ### `bulla --version`
237
+
238
+ Print the installed version.
239
+
240
+ ## Bulla Manifest Specification
241
+
242
+ The [Bulla Manifest Spec v0.1](bulla-manifest-spec-v0.1.md) defines a per-tool convention declaration format. Each manifest declares what semantic conventions a single tool assumes (e.g. "amounts are in dollars", "dates are ISO-8601").
243
+
244
+ See the [spec](bulla-manifest-spec-v0.1.md), [JSON Schema](bulla-manifest-schema.json), and the built-in [taxonomy](src/bulla/taxonomy.yaml) of 10 convention dimensions.
245
+
246
+ ## CI integration
247
+
248
+ ### GitHub Actions with SARIF
249
+
250
+ ```yaml
251
+ name: bulla
252
+ on: [push, pull_request]
253
+ jobs:
254
+ lint:
255
+ runs-on: ubuntu-latest
256
+ steps:
257
+ - uses: actions/checkout@v4
258
+ - uses: actions/setup-python@v5
259
+ with:
260
+ python-version: "3.11"
261
+ - run: pip install bulla
262
+ - run: bulla check --format sarif compositions/ > bulla.sarif
263
+ - uses: github/codeql-action/upload-sarif@v3
264
+ if: always()
265
+ with:
266
+ sarif_file: bulla.sarif
267
+ ```
268
+
269
+ This uploads results to GitHub's code scanning tab, where blind spots appear as annotations on pull requests.
270
+
271
+ ### Simple pass/fail
272
+
273
+ ```yaml
274
+ - run: pip install bulla
275
+ - run: bulla check compositions/
276
+ ```
277
+
278
+ ## Output formats
279
+
280
+ | Format | Flag | Use case |
281
+ |--------|------|----------|
282
+ | Text | `--format text` (default) | Developer terminal |
283
+ | JSON | `--format json` | Orchestrator integration, includes version + SHA-256 |
284
+ | SARIF | `--format sarif` | GitHub code scanning, VS Code SARIF viewer |
285
+
286
+ ## How it works
287
+
288
+ bulla builds a discrete coboundary operator (delta-0) from C^0 (tool dimensions) to C^1 (edge dimensions) for both the observable sheaf F and the full sheaf S. The coherence fee is:
289
+
290
+ ```
291
+ fee = H^1(F_obs) - H^1(F_full)
292
+ = (dim C^1 - rank delta_obs) - (dim C^1 - rank delta_full)
293
+ = rank delta_full - rank delta_obs
294
+ ```
295
+
296
+ Each unit of fee corresponds to an independent semantic dimension that bilateral verification cannot detect. Bridging (exposing hidden fields in the observable schema) increases rank(delta_obs) until it matches rank(delta_full).
297
+
298
+ The rank computation uses exact arithmetic (Python's `fractions.Fraction` module) via Gaussian elimination -- no floating-point tolerance, no numpy dependency.
299
+
300
+ ## License
301
+
302
+ MIT