petra-nn 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. petra_nn-0.1.0/LICENSE +21 -0
  2. petra_nn-0.1.0/PKG-INFO +513 -0
  3. petra_nn-0.1.0/README.md +485 -0
  4. petra_nn-0.1.0/petra_nn.egg-info/PKG-INFO +513 -0
  5. petra_nn-0.1.0/petra_nn.egg-info/SOURCES.txt +39 -0
  6. petra_nn-0.1.0/petra_nn.egg-info/dependency_links.txt +1 -0
  7. petra_nn-0.1.0/petra_nn.egg-info/requires.txt +4 -0
  8. petra_nn-0.1.0/petra_nn.egg-info/top_level.txt +1 -0
  9. petra_nn-0.1.0/petri_net_nn/__init__.py +206 -0
  10. petra_nn-0.1.0/petri_net_nn/adapter.py +594 -0
  11. petra_nn-0.1.0/petri_net_nn/anomalies.py +140 -0
  12. petra_nn-0.1.0/petri_net_nn/bisimulation.py +395 -0
  13. petra_nn-0.1.0/petri_net_nn/bpmn.py +628 -0
  14. petra_nn-0.1.0/petri_net_nn/compiler.py +702 -0
  15. petra_nn-0.1.0/petri_net_nn/ctl.py +459 -0
  16. petra_nn-0.1.0/petri_net_nn/interpretability.py +1984 -0
  17. petra_nn-0.1.0/petri_net_nn/petri_net.py +591 -0
  18. petra_nn-0.1.0/petri_net_nn/pnml.py +274 -0
  19. petra_nn-0.1.0/petri_net_nn/sif.py +127 -0
  20. petra_nn-0.1.0/petri_net_nn/soundness.py +329 -0
  21. petra_nn-0.1.0/petri_net_nn/subnets.py +181 -0
  22. petra_nn-0.1.0/petri_net_nn/traces.py +332 -0
  23. petra_nn-0.1.0/petri_net_nn/xes.py +121 -0
  24. petra_nn-0.1.0/pyproject.toml +75 -0
  25. petra_nn-0.1.0/setup.cfg +4 -0
  26. petra_nn-0.1.0/tests/test_adapter.py +373 -0
  27. petra_nn-0.1.0/tests/test_anomalies.py +238 -0
  28. petra_nn-0.1.0/tests/test_bisimulation.py +439 -0
  29. petra_nn-0.1.0/tests/test_bpmn.py +686 -0
  30. petra_nn-0.1.0/tests/test_collaboration.py +182 -0
  31. petra_nn-0.1.0/tests/test_compiler.py +728 -0
  32. petra_nn-0.1.0/tests/test_ctl.py +353 -0
  33. petra_nn-0.1.0/tests/test_interpretability.py +1177 -0
  34. petra_nn-0.1.0/tests/test_non_bpmn_substrate.py +249 -0
  35. petra_nn-0.1.0/tests/test_petri_net.py +555 -0
  36. petra_nn-0.1.0/tests/test_pnml.py +231 -0
  37. petra_nn-0.1.0/tests/test_sif.py +160 -0
  38. petra_nn-0.1.0/tests/test_soundness.py +272 -0
  39. petra_nn-0.1.0/tests/test_subnets.py +305 -0
  40. petra_nn-0.1.0/tests/test_training_methodology.py +303 -0
  41. petra_nn-0.1.0/tests/test_xes.py +190 -0
petra_nn-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Edward Chalk / sapientronic.ai
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,513 @@
1
+ Metadata-Version: 2.4
2
+ Name: petra-nn
3
+ Version: 0.1.0
4
+ Summary: Petri-Net Trained Architecture — formally-verified learnable process intelligence
5
+ Author-email: Edward Chalk <edward@fleetingswallow.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://fleetingswallow.com
8
+ Project-URL: Repository, https://github.com/pcoz/formally-verified-learnable-process-intelligence
9
+ Project-URL: Documentation, https://github.com/pcoz/formally-verified-learnable-process-intelligence#readme
10
+ Project-URL: Changelog, https://github.com/pcoz/formally-verified-learnable-process-intelligence/blob/main/CHANGELOG.md
11
+ Keywords: petri-net,process-mining,bpmn,neural-network,interpretable-ml,formal-verification,bisimulation,anomaly-detection,workflow
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Operating System :: OS Independent
21
+ Requires-Python: >=3.11
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: torch>=2.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7; extra == "dev"
27
+ Dynamic: license-file
28
+
29
+ # PETRA
30
+
31
+ [![PyPI](https://img.shields.io/pypi/v/petra-nn.svg)](https://pypi.org/project/petra-nn/)
32
+ [![Python](https://img.shields.io/pypi/pyversions/petra-nn.svg)](https://pypi.org/project/petra-nn/)
33
+ [![tests](https://github.com/pcoz/formally-verified-learnable-process-intelligence/actions/workflows/test.yml/badge.svg)](https://github.com/pcoz/formally-verified-learnable-process-intelligence/actions/workflows/test.yml)
34
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
35
+
36
+ > **Not a developer?** Start with the
37
+ > [**Business Analyst Guide**](docs/BUSINESS_ANALYST_GUIDE.md) —
38
+ > a no-code, no-maths walkthrough of every concept in this
39
+ > framework (Petri nets, BPMN translation, coloured tokens,
40
+ > bisimulation, training, rule extraction, the lot) aimed at
41
+ > process analysts, compliance officers, and project managers.
42
+
43
+ **PETRA** (*Petri-Net Trained Architecture*) learns how a
44
+ **discrete-event system** actually behaves from its **execution
45
+ logs**, and turns the learned behaviour into four things you can
46
+ act on:
47
+
48
+ - **Readable decision rules** — distilled from the trained weights
49
+ in domain vocabulary, e.g. *"if amount > £1,000 → credit-review."*
50
+ - **Anomaly scores** — pinned to specific named elements, not
51
+ opaque whole-trace numbers.
52
+ - **Formal equivalence proofs** — strong bisimulation between two
53
+ variants, *before* either is deployed.
54
+ - **Cost rankings** — over behaviour-preserving refactorings,
55
+ fitted to your observed workload.
56
+
57
+ **Take a loan-approval process and 10,000 recorded loans.** PETRA
58
+ tells you which rules the actual decisions follow (*"if amount >
59
+ £1,000 the application gets a credit-review"*), flags loans that
60
+ took unusual paths (*someone skipped the credit check on a
61
+ high-value application*), and lets you compare two candidate
62
+ redesigns of the process — **proving they do the same thing** and
63
+ showing which one **costs less to run** on the observed workload.
64
+ The same primitives cover distributed-system protocols,
65
+ manufacturing lines, laboratory recipes, multi-agent coordination,
66
+ IT incident management, and biology signalling pathways.
67
+
68
+ You give PETRA a **Petri net** describing the system's structure
69
+ and an execution log. A Petri net is the standard formal model
70
+ for this class of system:
71
+
72
+ - ***places*** hold tokens (work items, requests, messages);
73
+ - ***transitions*** move tokens between places (a step firing);
74
+ - ***arcs*** between places and transitions capture the control flow.
75
+
76
+ PETRA compiles the Petri net into a neural network whose
77
+ **topology *is* the Petri net** — *one trainable weight per arc,
78
+ one trainable threshold per transition, nothing else can be
79
+ learned.* Training fits the network to the log. Because every
80
+ parameter corresponds to a named element of the original structure,
81
+ the trained model stays:
82
+
83
+ - **interpretable** — every parameter has a name from your domain;
84
+ - **structurally verified** — by construction, before you train;
85
+ - **amenable to formal analysis** — bisimulation, soundness,
86
+ equivalence proofs over the structure.
87
+
88
+ That last property is what makes **equivalence proofs** and
89
+ **cost-ranked refactoring** possible — neither of which you can do
90
+ with a generic ML model.
91
+
92
+ ---
93
+
94
+ ## What it works on
95
+
96
+ PETRA fits any **finite-state, terminating, discrete-event system**
97
+ for which you have **observable execution traces of multiple
98
+ instances**. That class is much larger than it sounds — for each
99
+ domain below, the second column says why it doesn't *look* like an
100
+ ML target at first glance, and the third says why the substrate
101
+ fits anyway:
102
+
103
+ | Domain | Why this isn't obvious | Why it fits anyway | Shipped scenarios |
104
+ |---|---|---|---|
105
+ | **Business processes** | BPM tools handle workflows; generic ML handles logs. The two are usually treated as separate problems with separate tools. | A BPMN diagram **is** a Petri net. The structural verification supplies interpretability and formal analysis; the logs supply the learning signal. Both at once, in one trained model. | [`cost_ranked_refactoring`](examples/cost_ranked_refactoring/), [`credit_approval_coloured`](examples/credit_approval_coloured/), [`incident_management`](examples/incident_management/) |
106
+ | **Distributed-system protocols** | Protocols are hand-specified state machines. You write the spec; you don't *learn* it. | The spec gives the structure. Production traces show how the deployed system **actually** behaves — including attack patterns and Byzantine faults that aren't in the spec. PETRA flags the deviations. | [`distributed_consensus`](examples/distributed_consensus/) (2PC), [`network_protocol`](examples/network_protocol/) (TCP) |
107
+ | **Multi-agent coordination** | Multi-agent systems are usually studied with game theory or reinforcement learning, not formal models. | Coordination *protocols* (contract-net, FIPA-ACL) are explicit message-passing flows. Each agent's state machine plus the inter-agent message places compose as a multi-pool Petri net naturally. | [`multi_agent_coordination`](examples/multi_agent_coordination/) |
108
+ | **Manufacturing & supply chain** | Simulated with domain-specific tools (Simio, AnyLogic); ML in this space usually means demand forecasting, not workflow modelling. | Production lines literally **are** discrete-event token systems — parts moving between stations, batches accumulating, quality gates routing. The Phase 9 primitives (multi-token arcs, durations, inhibitor arcs) model these directly. | [`manufacturing_cell`](examples/manufacturing_cell/), [`paint_shop`](examples/paint_shop/), [`batch_packaging`](examples/batch_packaging/) |
109
+ | **Operational coordination** | Priority dispatch and mutex feel like OS-level concerns, not "process intelligence". | Any system using these primitives has them in its control flow. Modelling at the Petri-net level lets you analyse the system's *actual* coordination against the protocol it declares. | [`priority_dispatch`](examples/priority_dispatch/), [`resource_lock`](examples/resource_lock/) |
110
+ | **Laboratory & clinical protocols** | Lab protocols feel domain-specific (chemistry, biology) and are usually owned by proprietary LIMS systems. | A protocol is a sequenced, gated workflow with deviations to flag — same shape as a business process. The lab's electronic logs are already structured. | [`scientific_workflow`](examples/scientific_workflow/) (PCR) |
111
+ | **Cell-biology signalling pathways** | Pathway analysis is a biology problem; ML usually means gene expression or protein structure, not "train a model of the pathway". | Reactome-style pathway databases literally store pathways as Petri-net structures: *places are molecule pools, transitions are reactions.* The activation channel is the natural soft analogue of pathway flux. | [`biological_signalling`](examples/biological_signalling/), [`mapk_pathway`](examples/mapk_pathway/) (real Pathway Commons SIF) |
112
+
113
+ The same primitives cover more ground than the shipped scenarios
114
+ exercise: *state machines in embedded software*, *regulatory and
115
+ compliance workflows*, *games with bounded state*, *contract /
116
+ treaty / agreement workflows*, *scientific data pipelines*, *RPA
117
+ scripts*.
118
+
119
+ PETRA works best when **all four properties below** hold:
120
+
121
+ | Property | What it means |
122
+ |---|---|
123
+ | **Discrete events** | State changes at identifiable moments (a transition firing), rather than continuously over time. |
124
+ | **Multiple-instance trace data** | You have many recorded runs of the system to learn from. *One run isn't enough.* |
125
+ | **Stable structure for training** | The place/transition graph stays fixed while you learn the dynamics within it. |
126
+ | **Tractably finite state space** | The compiled Petri net fits in memory and trains in reasonable time. Thousands of places and transitions work fine; problems that need a whole economy or the entire internet at full resolution don't fit. |
127
+
128
+ Fluid dynamics, classical mechanics, analogue control, and similar
129
+ **continuous-time / continuous-state physics need a different
130
+ substrate** — Petri nets are discrete by design.
131
+
132
+ ## What you get out of it
133
+
134
+ PETRA combines a **fixed verified topology** with **learned
135
+ dynamics within it**. Each individual capability below is possible
136
+ in isolation with some other tool — but only PETRA gives you all
137
+ four over a single trained model:
138
+
139
+ | Capability | What it means in practice |
140
+ |---|---|
141
+ | **Interpretability at named domain elements** | Every parameter corresponds to a BPMN task, a biological pathway component, a protocol state — not an opaque vector index. |
142
+ | **Formal equivalence checks** | Strong bisimulation between two trained models, *before* either is deployed: *"this redesign behaves identically to the old version."* |
143
+ | **Anomaly detection at the transition level** | Residuals pinned to specific transitions — *"the credit-check step didn't fire when the data says it should have"* — not opaque whole-trace scores. |
144
+ | **Cost-ranked refactoring** | Provably-equivalent variants compared by realised-execution cost on the trained firing distribution: *"Variant B is provably equivalent to Variant A and 6× cheaper."* |
145
+
146
+ PETRA's shape fits problems with **explicit place/transition
147
+ structure**. Arbitrary sequence modelling (free-text,
148
+ unrestricted time-series) fits something else.
149
+
150
+ ---
151
+
152
+ ## What makes this approach unusual
153
+
154
+ | Capability | Most ML | Classical Petri-net analysis | This framework |
155
+ |---|---|---|---|
156
+ | Learns from execution traces | ✓ | ✗ | ✓ |
157
+ | Preserves verified structure | ✗ | ✓ | ✓ |
158
+ | Bisimulation-based equivalence | ✗ | partial | ✓ |
159
+ | Interpretable at named elements | ✗ | n/a (no learning) | ✓ |
160
+ | Detects structurally-grounded anomalies | weak | ✗ | ✓ |
161
+ | Ranks behaviour-preserving variants by cost | ✗ | ✗ | ✓ |
162
+
163
+ The bisimulation + cost-ranking combination is what makes
164
+ **provably-safe process refactoring** possible: refactor a process,
165
+ prove the new version is behaviourally equivalent to the old one,
166
+ then rank the variants by realised-execution cost. Nobody else has
167
+ that running with tests.
168
+
169
+ ---
170
+
171
+ ## How PETRA fits with classical Petri-net tools
172
+
173
+ PETRA is **complementary**, not competitive, to the established
174
+ Petri-net tool ecosystem. Each classical tool answers a different
175
+ question over the same Petri-net structure:
176
+
177
+ | Tool | What it's best at | Where PETRA differs |
178
+ |---|---|---|
179
+ | **CPN Tools** (Aarhus) | Reference implementation of Coloured Petri Nets — full ML-style colour-set typing, state-space verification, mature GUI simulator. | CPN Tools verifies a *given* CPN; PETRA *trains* a model of how the net's transitions are actually used from execution traces, including learning guard thresholds from per-token values rather than taking them as given. CPN Tools' colour sets are far richer than PETRA's CPN-lite scalar token values. |
180
+ | **GreatSPN** (Turin) | Generalised Stochastic Petri Nets — exponentially-distributed firing times, analytical CTMC throughput, performance bounds. | GreatSPN gives closed-form stationary throughput under a stipulated rate model; PETRA's stochastic rates are compiler-level multipliers used during training. Different question. |
181
+ | **TINA** (LAAS-CNRS) | Time Petri nets with intervals, state-space exploration, integrated CTL/LTL model checking via NuSMV. | TINA proves temporal-logic invariants about the *specified* behaviour; PETRA learns how the deployed system actually behaves and flags deviations. Phase 11 of the PETRA roadmap aims to wire model checking in directly. |
182
+ | **ProM** (Eindhoven) | Process mining — Alpha / Inductive / Heuristics miners discover a Petri net from execution logs; conformance checking; large plugin ecosystem. | ProM does *structure discovery* from logs (Phase 12 of PETRA's roadmap, not yet built). The two are a natural pair: ProM discovers, PETRA trains dynamics on the result. |
183
+
184
+ **The thing PETRA does that none of them do:** combine a
185
+ *learned-from-traces* dynamics model with a *structurally verified*
186
+ Petri-net substrate, then extract interpretable rules from the
187
+ learned weights and rank behaviour-preserving refactorings by cost.
188
+ None of those four tools touch any of those four capabilities.
189
+
190
+ ### A complementary analysis stack
191
+
192
+ The five tools naturally compose end-to-end on the same model:
193
+
194
+ 1. **ProM** discovers the structure from logs.
195
+ 2. **CPN Tools** verifies its soundness.
196
+ 3. **GreatSPN** gives stochastic throughput bounds.
197
+ 4. **TINA** proves temporal invariants.
198
+ 5. **PETRA** learns the dynamics that actually occur in production,
199
+ distils the routing rules, detects deviations, and ranks
200
+ refactorings.
201
+
202
+ **PNML support is the bridge that makes this stack possible** —
203
+ any of those tools' output can now feed straight into PETRA.
204
+ That's why PNML is high-leverage despite being only a few hundred
205
+ lines of code: it converts PETRA from a standalone Python library
206
+ into an ecosystem citizen, *one PNML file away from any of the
207
+ above*.
208
+
209
+ ---
210
+
211
+ ## Using the whole toolchain together
212
+
213
+ Suppose a bank wants to **unify the loan-approval process across
214
+ two regional offices** that have drifted apart over the years. The
215
+ shared starting point is the offices' logs — *tens of thousands of
216
+ recorded applications each, all the routing decisions captured, no
217
+ documented "correct" process to refer back to.*
218
+
219
+ The five tools work through it in order:
220
+
221
+ 1. **ProM** runs an *inductive miner* over each office's log and
222
+ produces a Petri net per office. *You now have two structural
223
+ models discovered directly from data, where before there was
224
+ nothing.*
225
+
226
+ 2. **CPN Tools** opens each net and verifies elementary soundness
227
+ — *proper completion, deadlock-freedom, boundedness.* Both
228
+ pass; the offices' actual behaviour does conform to a sound
229
+ workflow net.
230
+
231
+ 3. **GreatSPN** annotates the nets with stochastic firing rates
232
+ derived from the same logs and computes closed-form throughput
233
+ bounds. *Office A maxes out at ~250 applications/day, Office B
234
+ at ~180/day.*
235
+
236
+ 4. **TINA** specifies the regulatory invariants the bank's
237
+ compliance team cares about — *"every approved loan eventually
238
+ fires the audit-log transition"*, *"no decline fires without a
239
+ prior credit-check"* — and model-checks each net against them
240
+ via CTL. Office A passes both; **Office B violates the audit-log
241
+ invariant** on a small subset of paths, surfaced as a
242
+ counterexample trace.
243
+
244
+ 5. **PETRA** takes the verified nets plus the original logs and:
245
+
246
+ - **Trains** each into a differentiable model whose weights
247
+ correspond to the offices' actual routing decisions.
248
+ - **Distils** the trained weights into readable rules — *Office
249
+ A approves at amount > £5,000 with a strict credit-check gate;
250
+ Office B at amount > £8,000 with a more lenient gate. Same
251
+ shape, different thresholds.*
252
+ - **Runs strong bisimulation** between the two trained nets.
253
+ They are **not** equivalent — which is the answer to *"are the
254
+ offices doing the same thing?"* (they aren't).
255
+ - **Scores held-out applications** for anomalies pinned to
256
+ specific transitions, so the compliance team can see which
257
+ Office B traces actually skipped the audit-log.
258
+ - **Ranks two proposed unified processes** by realised-execution
259
+ cost on the combined trace distribution, with bisimulation
260
+ proving each is behaviourally equivalent to a reference variant.
261
+
262
+ The output is something the bank's process team can act on:
263
+
264
+ - an *evidence-backed comparison* of the two offices,
265
+ - a *verified equivalence claim* (or proof that one doesn't hold),
266
+ - a *cost-ranked redesign*, and
267
+ - a *list of compliance-flagged traces* to investigate.
268
+
269
+ **None of the five tools alone produces all of that.** The PNML
270
+ format is the bridge — each tool's output can be read by the next
271
+ without bespoke glue.
272
+
273
+ ---
274
+
275
+ ## Why this matters
276
+
277
+ The walkthrough above isn't just a tidy demo. It collapses several
278
+ pieces of work that today require **separate teams and months of
279
+ effort** into one analytical pipeline. Three questions worth
280
+ asking about it: *how valuable is the capability, what are
281
+ organisations paying today to approximate it, and why is it
282
+ genuinely hard.*
283
+
284
+ ### How valuable is this capability?
285
+
286
+ In one pass over real logs, the chain produces four things any
287
+ large institution would want about its own operating model:
288
+
289
+ - **Evidence-backed comparison** of how two units *actually*
290
+ operate, not how they *think* they operate.
291
+ - **Verified equivalence claims** — proof that a redesign hasn't
292
+ silently changed behaviour.
293
+ - **Cost-ranked redesigns** fitted to real workload, not stipulated
294
+ by consultants.
295
+ - **Transition-level compliance flags** that point a regulator (or
296
+ an audit team) at specific named traces and the specific named
297
+ transition each one diverged at.
298
+
299
+ The strategic frame is *unlocking process change at organisational
300
+ scale*. Most large institutions are stuck in the change-aversion
301
+ equilibrium the [ROADMAP](docs/ROADMAP.md) describes: redesigns
302
+ are risky, so they don't happen, so legacy variation accumulates,
303
+ so the next redesign is even riskier. **Making refactoring safe
304
+ is the same shift that transformed software engineering** in the
305
+ 1990s and 2000s. Applied to banks, insurers, hospitals, telcos,
306
+ this is operating-model-level value, not tooling-level.
307
+
308
+ ### What do organisations pay today to approximate this?
309
+
310
+ The work is currently spread across several budget lines, **none
311
+ of which delivers the full chain**:
312
+
313
+ | Spend category | Typical scale |
314
+ |---|---|
315
+ | **Process-mining licences** (Celonis, Signavio, UiPath Process Mining, Disco, Apromore) | £50k–£500k+/year per enterprise deployment; Celonis enterprise contracts routinely run into seven figures annually. |
316
+ | **Business-process / management consulting** for redesign and harmonisation (McKinsey, BCG, Bain, Deloitte, Accenture) | £2k–£5k+ per consultant-day; a typical *"unify the two offices"* project runs **£500k–£5M over 6–18 months**. |
317
+ | **Compliance and audit tooling** plus dedicated compliance headcount | Varies widely; for a regulated bank, easily **£1M+/year** on a single workflow class. |
318
+ | **BPM platforms** (Camunda, Pega, Appian, IBM BPM) | Six- to seven-figure annual licences, plus implementation partners on top. |
319
+
320
+ **Crucially, no current vendor sells the equivalence-proof +
321
+ cost-ranked-refactoring combination** the walkthrough produces.
322
+ Process mining tells you *what happened*; it doesn't **prove** two
323
+ redesigns are behaviourally equivalent, and it doesn't rank them
324
+ by cost with formal guarantees. That gap is where the consulting
325
+ spend goes — and consultants reach a *judgement*, not a *proof*.
326
+
327
+ ### Why is it hard to get today?
328
+
329
+ Several difficulties compound:
330
+
331
+ - **Skill scarcity.** The chain needs *process mining*, *formal
332
+ methods* (bisimulation, model checking), *stochastic modelling*,
333
+ *ML*, and *domain expertise*. Almost nobody has all of these;
334
+ assembled teams pay an integration tax.
335
+ - **Tool fragmentation.** ProM, CPN Tools, GreatSPN, and TINA each
336
+ have their own input formats, UIs, and learning curves. PNML
337
+ helps, but stitching the chain into a deployable workflow is
338
+ bespoke work each time.
339
+ - **Equivalence proofs aren't actually being established.** Even
340
+ with the tools in place, the load-bearing claim — *that a
341
+ redesign behaves identically to the original* — is rarely proved.
342
+ Teams settle for *"it passes UAT"* and *"stakeholders signed
343
+ off,"* which is why redesigns stay risky.
344
+ - **Months-to-years cycle time.** Process redesign at large
345
+ institutions is a multi-quarter project; ERP-class
346
+ reimplementations run multiple years. The risk is high enough
347
+ that change-aversion is rational — which keeps the cycle long,
348
+ which keeps the risk high.
349
+ - **Outputs aren't actionable at the transition level.**
350
+ Process-mining heatmaps tell you *"this area is slow"* but
351
+ rarely give a compliance officer a list of specific named
352
+ traces and the specific named transition each one diverged at.
353
+ The walkthrough above does exactly that.
354
+
355
+ **Net:** the capability is valuable, current spend on partial
356
+ substitutes is large and fragmented, and the gap PETRA targets
357
+ — verified equivalence and cost-ranked refactoring grounded in
358
+ real logs — isn't actually filled by anything else on the market.
359
+
360
+ ### What gets disrupted, and what doesn't
361
+
362
+ A reasonable follow-up question — *does this vaporise the
363
+ change-management market?* The honest answer is *yes for the
364
+ largest slice, but with three explicit qualifiers*.
365
+
366
+ **Where the framing holds.** The change-management market is
367
+ largely sized by the *risk* of process change. Consultants get
368
+ paid in proportion to that risk, because someone has to absorb it
369
+ — through interviews, workshops, target-state modelling,
370
+ shadow-running, UAT, post-go-live war rooms. If a redesign can be
371
+ **mechanically proven to behave identically** to the original and
372
+ **ranked by realised cost** against the actual workload, the risk
373
+ collapses. A lot of the spend that exists to manage that risk
374
+ loses its reason to exist. The walkthrough's four outputs replace
375
+ expensive *judgement calls* with cheap, repeatable, auditable
376
+ **artefacts**. That is the same shape as what happened to manual
377
+ QA once test automation matured, or to manual deployment once
378
+ CI/CD matured.
379
+
380
+ **Where it doesn't.** Three slices of the change-management market
381
+ survive intact:
382
+
383
+ | Slice | Why it survives |
384
+ |---|---|
385
+ | **The people side** | Stakeholder alignment, training, communications, org redesign, incentive restructuring. PETRA proves a redesign is behaviourally equivalent — *it doesn't make people accept it or rewire who reports to whom.* |
386
+ | **Deciding which redesigns to propose** | The substrate *verifies and ranks* candidates; it doesn't *generate* them. Until candidate-generation is automated (the [ROADMAP](docs/ROADMAP.md) flags this as missing on top of PETRA), someone still has to imagine the alternatives — that's domain consulting work. |
387
+ | **The regulated-industry layer** | Compliance sign-off, regulator engagement, model-risk governance. A formal proof helps but doesn't replace the regulatory dance — and in some jurisdictions the regulator wants a *human* on the line. |
388
+
389
+ **Net:** PETRA collapses the **risk-absorption slice** of the
390
+ market — which is the largest and most expensive — but leaves
391
+ the **judgement**, **change**, and **governance** slices intact.
392
+ Roughly: the McKinsey/BCG/Bain *redesign-engagement* layer
393
+ shrinks dramatically; the Prosci/ADKAR *change-adoption* layer
394
+ doesn't.
395
+
396
+ ---
397
+
398
+ ## Worked examples
399
+
400
+ PETRA ships with **14 end-to-end scenarios** under `examples/`.
401
+ Each is a self-contained TOML configuration plus a paired test
402
+ that drives the full pipeline — *load the net, load the traces,
403
+ compile, train, extract rules, score anomalies.* They span
404
+ deliberately different domains to make the point that the
405
+ substrate isn't just for business processes.
406
+
407
+ Each scenario links to its own README with the long-form
408
+ explanation, the data source, the framework features it exercises,
409
+ and the load-bearing claims in its test.
410
+
411
+ | Scenario | What it demonstrates | Use case it represents |
412
+ |---|---|---|
413
+ | [**`cost_ranked_refactoring`**](examples/cost_ranked_refactoring/) | Two BPMN variants of the same approval process, proved equivalent by bisimulation, trained on a shared trace distribution, ranked by realised cost. **Variant B comes out ~6× cheaper** while doing provably the same thing. | *Provably-safe process refactoring* — pick redesigns with formal guarantees instead of guesswork. |
414
+ | [**`credit_approval_coloured`**](examples/credit_approval_coloured/) | Coloured tokens carry the loan amount; the compiled network **learns the approve/decline threshold from data** rather than taking the modeller's declared 1,000 as given. Learned thresholds land in the empirical band 900–1,500. | *Data-driven decision rules* — when the right threshold is in the data, not in someone's head. |
415
+ | [**`incident_management`**](examples/incident_management/) | Trains on the **real BPI Challenge 2013 incidents log** — 7,554 Volvo IT tickets, 65k events, shipped in the repo as a 1.3 MB gzipped XES file. Flags traces that skip the *Resolved* step before *Closing*. | *Real-world, large-scale, public business-process data* — proof that the framework scales beyond synthetic fixtures. |
416
+ | [**`distributed_consensus`**](examples/distributed_consensus/) | **Two-phase commit (2PC)** modelled as three composed pools (coordinator + two cohorts) with shared message places. Detects *Byzantine commit-after-low-vote* anomalies. | *Distributed-protocol verification* — flagging deviations against the spec from production traces. |
417
+ | [**`network_protocol`**](examples/network_protocol/) | **TCP three-way handshake** compiled from the RFC's state machine. After training on legitimate traces, flags **SYN-flood** and **half-open-connection** attacks as anomalies pinned to specific transitions. | *Security monitoring on protocol state machines* — attack-pattern detection grounded in the protocol's structural spec. |
418
+ | [**`multi_agent_coordination`**](examples/multi_agent_coordination/) | **Three-pool contract-net** protocol with bid-driven contractor selection. The AND-join rule extractor recovers the synchronisation rule over three input contributors; *pre-bid award* attempts are flagged as protocol violations. | *Coordination protocols between autonomous agents* — catching out-of-order coordination events. |
419
+ | [**`manufacturing_cell`**](examples/manufacturing_cell/) | Multi-station production line with **quality-gated ship-or-rework routing**. PETRA distils the quality-driven ship rule from production data; mis-shipped low-quality items are flagged as anomalies. | *Manufacturing and supply-chain analysis* — quality-conditional routing rules recovered from production data. |
420
+ | [**`paint_shop`**](examples/paint_shop/) | A cure step with declared **duration 3** — parts spend three time-steps in the cure transition before reaching inspection. Exercises the time-unrolled compiler's per-transition in-flight queue. | *Workflows with explicit step durations* — cure times, wait times, batched processing windows. |
421
+ | [**`batch_packaging`**](examples/batch_packaging/) | A bottle-to-crate transition with **input arc weight 6** — six bottles accumulate before the crate transition fires. Exercises multi-token arc multiplicities. | *Batching and aggregation patterns* — packaging lines, micro-batch processing, N-into-1 combination steps. |
422
+ | [**`priority_dispatch`**](examples/priority_dispatch/) | Three handlers with **declared firing-rate priors (3.0, 1.0, 0.5)** — high-rate fires more eagerly for the same input. Training refines the priors against the observed dispatch distribution. | *Priority-aware task dispatch* — modeller priors carried through to training, then refined from data. |
423
+ | [**`resource_lock`**](examples/resource_lock/) | Two clients competing for a shared resource, with **inhibitor arcs enforcing the mutex** — lock-acquire fires only when lock-held is empty. Exercises the soft inhibitor gate `(1 − a(p))`. | *Mutex, semaphore, and other negative-precondition patterns* — exclusive access modelled cleanly into the dynamics. |
424
+ | [**`scientific_workflow`**](examples/scientific_workflow/) | **PCR (polymerase chain reaction)** modelled with a quality-gate transition that routes pass/fail. PETRA learns the gate from trace data and flags traces that skip it. | *Laboratory and clinical protocol conformance* — deviation analysis on scientific procedures. |
425
+ | [**`biological_signalling`**](examples/biological_signalling/) | A **kinase cascade** with signal-strength-conditioned fast/slow pathway routing; the XOR rule is distilled in the pathway components' vocabulary, not internal framework labels. | *Cell-biology pathway analysis* — Reactome-style pathways are essentially Petri nets; the same primitives that handle business processes model signalling networks too. |
426
+ | [**`mapk_pathway`**](examples/mapk_pathway/) | Loads the canonical **EGF → MAPK1/3 (ERK1/2) signalling cascade** from a Pathway Commons-style SIF file (real HGNC symbols, standard PC interaction types), compiles, and propagates activation through the full receptor → adapter → small GTPase → MAP3K → MAP2K → MAPK → transcription-factor chain. | *Real biology format on real entities* — Phase 10 ecosystem citizenship: any of the ~3,000 Reactome pathways is one Pathway Commons download away. |
427
+
428
+ Run any individual scenario with
429
+ `python -m pytest tests/scenarios/test_<scenario_name>.py`, or
430
+ the whole set with `python -m pytest tests/scenarios/`.
431
+
432
+ ---
433
+
434
+ ## Quick start
435
+
436
+ ```
437
+ pip install petra-nn
438
+ ```
439
+
440
+ Requires Python 3.11+ and brings `torch` in as a dependency.
441
+
442
+ ```python
443
+ from petri_net_nn import load_scenario
444
+
445
+ # Each example/ subfolder contains a self-contained scenario as
446
+ # a TOML config plus an explanatory README.
447
+ ctx = load_scenario("examples/cost_ranked_refactoring/scenario.toml")
448
+ module, losses = ctx.train()
449
+ rules = ctx.extract_rules(module)
450
+ print(rules["xor"][0].description())
451
+ ```
452
+
453
+ The PyPI distribution name is `petra-nn` (the bare `petra` was
454
+ already taken on PyPI); the importable Python package is
455
+ `petri_net_nn`. For the framework-level API (build a `PetriNet`
456
+ by hand, compile, train, extract rules, score anomalies), see
457
+ [`docs/DEV_MANUAL.md`](docs/DEV_MANUAL.md).
458
+
459
+ ---
460
+
461
+ ## Repository layout
462
+
463
+ ```
464
+ petri_net_nn/ # the framework
465
+ petri_net.py # PetriNet dataclass + token-game semantics
466
+ bpmn.py # BPMN 2.0 → PetriNet parser
467
+ pnml.py # PNML 2009 P/T-net import / export
468
+ sif.py # Pathway Commons SIF import (biology pathways)
469
+ compiler.py # PetriNet → differentiable nn.Module
470
+ subnets.py # five hand-built reference subnets
471
+ traces.py # training, anomaly score, expected-cost, AUC
472
+ xes.py # IEEE XES log loader (plain + gzipped)
473
+ anomalies.py # corruption generators + frequency baseline
474
+ interpretability.py # distil learned weights into rules
475
+ bisimulation.py # strong + weak bisimulation equivalence checking
476
+ soundness.py # Aalst soundness + deadlock localisation
477
+ ctl.py # CTL temporal-logic model checking
478
+ adapter.py # config-driven scenario loader
479
+
480
+ examples/ # 14 end-to-end scenarios — see "Worked examples" above
481
+ tests/ # framework + scenario tests
482
+ docs/
483
+ BUSINESS_ANALYST_GUIDE.md # plain-English concepts primer for non-coders
484
+ ROADMAP.md # product roadmap, phase status, framing
485
+ DEV_MANUAL.md # framework + adapter usage guide
486
+ ```
487
+
488
+ ---
489
+
490
+ ## Reading order
491
+
492
+ 1. This README — what PETRA is and what to do with it.
493
+ 2. [`docs/BUSINESS_ANALYST_GUIDE.md`](docs/BUSINESS_ANALYST_GUIDE.md)
494
+ — a no-code, no-maths walkthrough of every framework concept
495
+ (Petri nets, BPMN translation, coloured tokens, bisimulation,
496
+ the lot) aimed at process analysts, compliance officers, and
497
+ project managers.
498
+ 3. [`docs/ROADMAP.md`](docs/ROADMAP.md) — framing, phase status,
499
+ what's next.
500
+ 4. Any [`examples/*/README.md`](examples/) — a concrete scenario in
501
+ your domain.
502
+ 5. [`docs/DEV_MANUAL.md`](docs/DEV_MANUAL.md) — adapter config and
503
+ framework API reference.
504
+
505
+ ---
506
+
507
+ ## Running tests
508
+
509
+ ```
510
+ python -m pytest # full suite (~379 tests)
511
+ python -m pytest tests/scenarios/ # only end-to-end scenarios
512
+ python -m pytest tests/test_compiler.py # only the compiler
513
+ ```