ergminer 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ergminer/ERGML_converter.py +229 -0
- ergminer/__init__.py +119 -0
- ergminer/conformance.py +187 -0
- ergminer/conformance_testing.py +1693 -0
- ergminer/delay_analyzer.py +892 -0
- ergminer/discovery.py +70 -0
- ergminer/erg_miner.py +615 -0
- ergminer/erg_playback.py +796 -0
- ergminer/erg_plotter.py +303 -0
- ergminer/erg_structure.py +310 -0
- ergminer/event_log_loader.py +220 -0
- ergminer/filtering.py +139 -0
- ergminer/guard_detector.py +228 -0
- ergminer/process_miner.py +438 -0
- ergminer/py.typed +0 -0
- ergminer/read.py +129 -0
- ergminer/sim.py +98 -0
- ergminer/state_variables.py +971 -0
- ergminer/utils.py +137 -0
- ergminer/vis.py +121 -0
- ergminer/write.py +59 -0
- ergminer-0.2.0.dist-info/METADATA +550 -0
- ergminer-0.2.0.dist-info/RECORD +26 -0
- ergminer-0.2.0.dist-info/WHEEL +5 -0
- ergminer-0.2.0.dist-info/licenses/LICENSE +674 -0
- ergminer-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ERGML Converter
|
|
3
|
+
|
|
4
|
+
Converts an ERG (Event Relationship Graph) model to ERGML (ERG Markup Language),
|
|
5
|
+
an XML-based format inspired by PNML (Petri Net Markup Language).
|
|
6
|
+
|
|
7
|
+
ERGML Schema overview
|
|
8
|
+
---------------------
|
|
9
|
+
<ergml version="1.0">
|
|
10
|
+
<erg id="..." >
|
|
11
|
+
<name> ... </name>
|
|
12
|
+
|
|
13
|
+
<parameters>
|
|
14
|
+
<simTime>...</simTime>
|
|
15
|
+
<start_events> <event ref="..."/> ... </start_events>
|
|
16
|
+
<end_events> <event ref="..."/> ... </end_events>
|
|
17
|
+
<statistics>
|
|
18
|
+
<stat name="..." value="..."/> ...
|
|
19
|
+
</statistics>
|
|
20
|
+
</parameters>
|
|
21
|
+
|
|
22
|
+
<states>
|
|
23
|
+
<stateVariable id="...">
|
|
24
|
+
<name>...</name>
|
|
25
|
+
<type>...</type>
|
|
26
|
+
<resource>...</resource>
|
|
27
|
+
<initialValue>...</initialValue>
|
|
28
|
+
</stateVariable>
|
|
29
|
+
...
|
|
30
|
+
</states>
|
|
31
|
+
|
|
32
|
+
<nodes>
|
|
33
|
+
<node id="..." type="...">
|
|
34
|
+
<name>...</name>
|
|
35
|
+
<eventType>...</eventType>
|
|
36
|
+
<originalActivity>...</originalActivity>
|
|
37
|
+
<resource>...</resource>
|
|
38
|
+
<frequency>...</frequency>
|
|
39
|
+
<stateUpdateEquations>
|
|
40
|
+
<equation>...</equation>
|
|
41
|
+
...
|
|
42
|
+
</stateUpdateEquations>
|
|
43
|
+
</node>
|
|
44
|
+
...
|
|
45
|
+
</nodes>
|
|
46
|
+
|
|
47
|
+
<arcs>
|
|
48
|
+
<arc id="..." source="..." target="...">
|
|
49
|
+
<probability>...</probability>
|
|
50
|
+
<guardCondition>...</guardCondition>
|
|
51
|
+
<isImmediate>...</isImmediate>
|
|
52
|
+
<delayDistribution>...</delayDistribution>
|
|
53
|
+
<distributionParams>
|
|
54
|
+
<param name="..." value="..."/> ...
|
|
55
|
+
</distributionParams>
|
|
56
|
+
<meanDelay>...</meanDelay>
|
|
57
|
+
</arc>
|
|
58
|
+
...
|
|
59
|
+
</arcs>
|
|
60
|
+
|
|
61
|
+
</erg>
|
|
62
|
+
</ergml>
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
import xml.etree.ElementTree as ET
|
|
66
|
+
from xml.dom import minidom
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
# Internal helpers
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
def _sub(parent, tag, text=None):
|
|
74
|
+
"""Create a child element, optionally setting its text."""
|
|
75
|
+
el = ET.SubElement(parent, tag)
|
|
76
|
+
if text is not None:
|
|
77
|
+
el.text = str(text)
|
|
78
|
+
return el
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _set_text(element, value):
|
|
82
|
+
"""Set element text; leave empty if value is None."""
|
|
83
|
+
if value is not None:
|
|
84
|
+
element.text = str(value)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _prettify(element):
|
|
88
|
+
"""Return a pretty-printed XML string for *element*."""
|
|
89
|
+
raw = ET.tostring(element, encoding="unicode")
|
|
90
|
+
reparsed = minidom.parseString(raw)
|
|
91
|
+
return reparsed.toprettyxml(indent=" ", encoding=None)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# ---------------------------------------------------------------------------
|
|
95
|
+
# Public API
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
def erg_to_ergml(erg, sim_time=5000.0):
|
|
99
|
+
"""
|
|
100
|
+
Convert an ERG object to an ERGML XML string.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
erg : ERG
|
|
105
|
+
The ERG model to convert.
|
|
106
|
+
sim_time : float
|
|
107
|
+
Simulation horizon to embed in the ERGML (default 5000.0).
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
str
|
|
112
|
+
Pretty-printed ERGML XML string.
|
|
113
|
+
"""
|
|
114
|
+
# Root
|
|
115
|
+
root = ET.Element("ergml", attrib={"version": "1.0"})
|
|
116
|
+
|
|
117
|
+
# <erg id="..." >
|
|
118
|
+
erg_el = ET.SubElement(root, "erg", attrib={"id": erg.name})
|
|
119
|
+
|
|
120
|
+
# ------------------------------------------------------------------
|
|
121
|
+
# <name>
|
|
122
|
+
# ------------------------------------------------------------------
|
|
123
|
+
_sub(erg_el, "name", erg.name)
|
|
124
|
+
|
|
125
|
+
# ------------------------------------------------------------------
|
|
126
|
+
# <parameters>
|
|
127
|
+
# ------------------------------------------------------------------
|
|
128
|
+
params_el = _sub(erg_el, "parameters")
|
|
129
|
+
|
|
130
|
+
# sim_time
|
|
131
|
+
_sub(params_el, "simTime", sim_time)
|
|
132
|
+
|
|
133
|
+
# start_events
|
|
134
|
+
start_el = _sub(params_el, "start_events")
|
|
135
|
+
for ev in sorted(erg.start_events):
|
|
136
|
+
ET.SubElement(start_el, "event", attrib={"ref": ev})
|
|
137
|
+
|
|
138
|
+
# end_events
|
|
139
|
+
end_el = _sub(params_el, "end_events")
|
|
140
|
+
for ev in sorted(erg.end_events):
|
|
141
|
+
ET.SubElement(end_el, "event", attrib={"ref": ev})
|
|
142
|
+
|
|
143
|
+
# statistics
|
|
144
|
+
stats = erg.get_statistics()
|
|
145
|
+
stats_el = _sub(params_el, "statistics")
|
|
146
|
+
for stat_name, stat_value in stats.items():
|
|
147
|
+
ET.SubElement(stats_el, "stat",
|
|
148
|
+
attrib={"name": stat_name, "value": str(stat_value)})
|
|
149
|
+
|
|
150
|
+
# ------------------------------------------------------------------
|
|
151
|
+
# <states>
|
|
152
|
+
# ------------------------------------------------------------------
|
|
153
|
+
states_el = _sub(erg_el, "states")
|
|
154
|
+
for sv in erg.state_variables.values():
|
|
155
|
+
sv_el = ET.SubElement(states_el, "stateVariable", attrib={"id": sv.name})
|
|
156
|
+
_sub(sv_el, "name", sv.name)
|
|
157
|
+
_sub(sv_el, "type", sv.variable_type)
|
|
158
|
+
_sub(sv_el, "resource", sv.resource)
|
|
159
|
+
_sub(sv_el, "initialValue", sv.initial_value)
|
|
160
|
+
|
|
161
|
+
# ------------------------------------------------------------------
|
|
162
|
+
# <nodes>
|
|
163
|
+
# ------------------------------------------------------------------
|
|
164
|
+
nodes_el = _sub(erg_el, "nodes")
|
|
165
|
+
for node in erg.nodes.values():
|
|
166
|
+
node_el = ET.SubElement(nodes_el, "node",
|
|
167
|
+
attrib={"id": node.name, "type": node.event_type})
|
|
168
|
+
_sub(node_el, "name", node.original_activity) # clean name without resource suffix
|
|
169
|
+
_sub(node_el, "eventType", node.event_type)
|
|
170
|
+
_sub(node_el, "originalActivity", node.original_activity)
|
|
171
|
+
res_el = _sub(node_el, "resource")
|
|
172
|
+
_set_text(res_el, node.resource)
|
|
173
|
+
_sub(node_el, "frequency", node.frequency)
|
|
174
|
+
eqs_el = _sub(node_el, "stateUpdateEquations")
|
|
175
|
+
for eq in node.state_update_equations:
|
|
176
|
+
_sub(eqs_el, "equation", eq)
|
|
177
|
+
|
|
178
|
+
# ------------------------------------------------------------------
|
|
179
|
+
# <arcs>
|
|
180
|
+
# ------------------------------------------------------------------
|
|
181
|
+
arcs_el = _sub(erg_el, "arcs")
|
|
182
|
+
|
|
183
|
+
# Track arc IDs — guard against duplicate source→target pairs
|
|
184
|
+
arc_id_counter = {}
|
|
185
|
+
for arc in erg.arcs:
|
|
186
|
+
base_id = f"arc_{arc.source}_{arc.target}"
|
|
187
|
+
count = arc_id_counter.get(base_id, 0)
|
|
188
|
+
arc_id = base_id if count == 0 else f"{base_id}_{count}"
|
|
189
|
+
arc_id_counter[base_id] = count + 1
|
|
190
|
+
|
|
191
|
+
arc_el = ET.SubElement(arcs_el, "arc",
|
|
192
|
+
attrib={"id": arc_id,
|
|
193
|
+
"source": arc.source,
|
|
194
|
+
"target": arc.target})
|
|
195
|
+
_sub(arc_el, "probability", arc.probability)
|
|
196
|
+
guard_el = _sub(arc_el, "guardCondition")
|
|
197
|
+
_set_text(guard_el, arc.guard_condition)
|
|
198
|
+
_sub(arc_el, "arcType", getattr(arc, 'arc_type', 'DF'))
|
|
199
|
+
_sub(arc_el, "isImmediate", str(arc.is_immediate).lower())
|
|
200
|
+
_sub(arc_el, "delayDistribution", arc.delay_distribution)
|
|
201
|
+
|
|
202
|
+
# distributionParams
|
|
203
|
+
dp_el = _sub(arc_el, "distributionParams")
|
|
204
|
+
if arc.distribution_params:
|
|
205
|
+
for param_name, param_val in arc.distribution_params.items():
|
|
206
|
+
ET.SubElement(dp_el, "param",
|
|
207
|
+
attrib={"name": param_name, "value": str(param_val)})
|
|
208
|
+
|
|
209
|
+
_sub(arc_el, "meanDelay", arc.mean_delay)
|
|
210
|
+
|
|
211
|
+
return _prettify(root)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def save_ergml(erg, filepath, sim_time=5000.0):
|
|
215
|
+
"""
|
|
216
|
+
Convert an ERG model to ERGML and write it to *filepath*.
|
|
217
|
+
|
|
218
|
+
Parameters
|
|
219
|
+
----------
|
|
220
|
+
erg : ERG
|
|
221
|
+
The ERG model to export.
|
|
222
|
+
filepath : str
|
|
223
|
+
Destination file path (conventionally ending in .ergml).
|
|
224
|
+
sim_time : float
|
|
225
|
+
Simulation horizon to embed in the ERGML (default 5000.0).
|
|
226
|
+
"""
|
|
227
|
+
xml_str = erg_to_ergml(erg, sim_time=sim_time)
|
|
228
|
+
with open(filepath, "w", encoding="utf-8") as fh:
|
|
229
|
+
fh.write(xml_str)
|
ergminer/__init__.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ERGminer - Event Relationship Graph Mining from Event Logs
|
|
3
|
+
|
|
4
|
+
Mines Event Relationship Graphs (ERGs) from event logs using process mining,
|
|
5
|
+
state variable identification, and statistical analysis.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.2.0"
|
|
9
|
+
__author__ = "Zach Eyde"
|
|
10
|
+
|
|
11
|
+
# ── Submodules (ergminer.read, ergminer.discovery, etc.) ─────────────────────
|
|
12
|
+
from ergminer import read
|
|
13
|
+
from ergminer import utils
|
|
14
|
+
from ergminer import filtering
|
|
15
|
+
from ergminer import discovery
|
|
16
|
+
from ergminer import write
|
|
17
|
+
from ergminer import sim
|
|
18
|
+
from ergminer import conformance as conformance_module
|
|
19
|
+
|
|
20
|
+
# ── ergminer.read ─────────────────────────────────────────────────────────────
|
|
21
|
+
from ergminer.read import (
|
|
22
|
+
read_csv,
|
|
23
|
+
read_xes,
|
|
24
|
+
read_dataframe,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# ── ergminer.utils ────────────────────────────────────────────────────────────
|
|
28
|
+
from ergminer.utils import (
|
|
29
|
+
format_dataframe,
|
|
30
|
+
get_start_activities,
|
|
31
|
+
get_end_activities,
|
|
32
|
+
get_variants,
|
|
33
|
+
get_activity_labels,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# ── ergminer.filtering ────────────────────────────────────────────────────────
|
|
37
|
+
from ergminer.filtering import (
|
|
38
|
+
filter_start_activities,
|
|
39
|
+
filter_end_activities,
|
|
40
|
+
filter_case_size,
|
|
41
|
+
filter_variants,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# ── ergminer.discovery ────────────────────────────────────────────────────────
|
|
45
|
+
from ergminer.discovery import (
|
|
46
|
+
discover_erg,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# ── ergminer.write ────────────────────────────────────────────────────────────
|
|
50
|
+
from ergminer.write import (
|
|
51
|
+
write_ergml,
|
|
52
|
+
write_erg_json,
|
|
53
|
+
write_dot,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# ── ergminer.sim ──────────────────────────────────────────────────────────────
|
|
57
|
+
from ergminer.sim import (
|
|
58
|
+
play_out,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# ── ergminer.conformance ──────────────────────────────────────────────────────
|
|
62
|
+
from ergminer.conformance import (
|
|
63
|
+
conformance_erg,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# ── Data classes and configuration (backward-compatible) ─────────────────────
|
|
67
|
+
from ergminer.erg_miner import ERGMiner, DelayConfig
|
|
68
|
+
from ergminer.erg_structure import ERG, ERGNode, ERGArc, ERGStateVariable
|
|
69
|
+
from ergminer.conformance_testing import ConformanceResult
|
|
70
|
+
|
|
71
|
+
# Backward-compatible top-level aliases (deprecated in favour of write.*)
|
|
72
|
+
from ergminer.ERGML_converter import erg_to_ergml, save_ergml
|
|
73
|
+
from ergminer.erg_playback import run_simulations
|
|
74
|
+
from ergminer.conformance_testing import conformance_testing
|
|
75
|
+
|
|
76
|
+
# ── ergminer.vis (optional — requires matplotlib + networkx) ──────────────────
|
|
77
|
+
try:
|
|
78
|
+
from ergminer.vis import view_erg, save_vis_erg
|
|
79
|
+
from ergminer import vis
|
|
80
|
+
_vis_available = True
|
|
81
|
+
except ImportError:
|
|
82
|
+
_vis_available = False
|
|
83
|
+
|
|
84
|
+
# ── ergminer.erg_plotter alias (backward-compatible) ─────────────────────────
|
|
85
|
+
try:
|
|
86
|
+
from ergminer.erg_plotter import plot_erg
|
|
87
|
+
except ImportError:
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
# ── __all__ ──────────────────────────────────────────────────────────────────
|
|
91
|
+
__all__ = [
|
|
92
|
+
# Submodules
|
|
93
|
+
'read', 'utils', 'filtering', 'discovery', 'write', 'sim',
|
|
94
|
+
# Read
|
|
95
|
+
'read_csv', 'read_xes', 'read_dataframe',
|
|
96
|
+
# Utils
|
|
97
|
+
'format_dataframe', 'get_start_activities', 'get_end_activities',
|
|
98
|
+
'get_variants', 'get_activity_labels',
|
|
99
|
+
# Filtering
|
|
100
|
+
'filter_start_activities', 'filter_end_activities',
|
|
101
|
+
'filter_case_size', 'filter_variants',
|
|
102
|
+
# Discovery
|
|
103
|
+
'discover_erg',
|
|
104
|
+
# Visualisation
|
|
105
|
+
'view_erg', 'save_vis_erg',
|
|
106
|
+
# Write
|
|
107
|
+
'write_ergml', 'write_erg_json', 'write_dot',
|
|
108
|
+
# Simulation
|
|
109
|
+
'play_out',
|
|
110
|
+
# Conformance
|
|
111
|
+
'conformance_erg',
|
|
112
|
+
# Data classes
|
|
113
|
+
'ERGMiner', 'DelayConfig', 'ERG', 'ERGNode', 'ERGArc',
|
|
114
|
+
'ERGStateVariable', 'ConformanceResult',
|
|
115
|
+
# Backward-compatible
|
|
116
|
+
'erg_to_ergml', 'save_ergml', 'run_simulations',
|
|
117
|
+
'conformance_testing', 'plot_erg',
|
|
118
|
+
]
|
|
119
|
+
|
ergminer/conformance.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""
|
|
2
|
+
conformance.py — ERG conformance checking functions for ERGminer.
|
|
3
|
+
|
|
4
|
+
Analogous to pm4py.conformance.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import tempfile
|
|
10
|
+
import os
|
|
11
|
+
from typing import TYPE_CHECKING, Dict, Optional
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
|
|
16
|
+
from .conformance_testing import (
|
|
17
|
+
ConformanceResult,
|
|
18
|
+
ERGConformanceChecker,
|
|
19
|
+
CHECK_GROUPS,
|
|
20
|
+
)
|
|
21
|
+
from .ERGML_converter import erg_to_ergml
|
|
22
|
+
from .erg_playback import parse_ergml
|
|
23
|
+
from .sim import play_out
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from .erg_structure import ERG
|
|
27
|
+
from .conformance_testing import ConformanceResult
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def conformance_erg(
|
|
31
|
+
log: pd.DataFrame,
|
|
32
|
+
erg: 'ERG',
|
|
33
|
+
sim_log: Optional[pd.DataFrame] = None,
|
|
34
|
+
n_simulations: int = 10,
|
|
35
|
+
sim_time: float = 5000.0,
|
|
36
|
+
seed: int = 42,
|
|
37
|
+
case_id_col: str = 'case_id',
|
|
38
|
+
activity_col: str = 'activity_name',
|
|
39
|
+
timestamp_col: str = 'timestamp',
|
|
40
|
+
resource_col: str = 'resource_id',
|
|
41
|
+
verbose: bool = False,
|
|
42
|
+
) -> 'ConformanceResult':
|
|
43
|
+
"""Run conformance checking between *log* and *erg*.
|
|
44
|
+
|
|
45
|
+
Compares the observed behaviour in *log* against the ERG model using
|
|
46
|
+
twelve complementary fitness metrics. If *sim_log* is not provided,
|
|
47
|
+
``play_out()`` is called automatically to generate simulation replications.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
log: Original event log DataFrame (user column names).
|
|
51
|
+
erg: The ``ERG`` model to check against.
|
|
52
|
+
sim_log: Pre-computed simulation log from ``play_out()`` (optional).
|
|
53
|
+
Must contain ``_erg_run`` column. If ``None``, simulations
|
|
54
|
+
are run automatically.
|
|
55
|
+
n_simulations: Number of simulation replications (used only when
|
|
56
|
+
*sim_log* is ``None``).
|
|
57
|
+
sim_time: Simulation horizon passed to ``play_out()`` (used only
|
|
58
|
+
when *sim_log* is ``None``).
|
|
59
|
+
seed: Base random seed for simulations (used only when
|
|
60
|
+
*sim_log* is ``None``).
|
|
61
|
+
case_id_col: Column name for case identifiers in *log*.
|
|
62
|
+
activity_col: Column name for activity names in *log*.
|
|
63
|
+
timestamp_col: Column name for timestamps in *log*.
|
|
64
|
+
resource_col: Column name for resource identifiers in *log*.
|
|
65
|
+
verbose: Print progress to stdout.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
``ConformanceResult`` with all check scores and a printable report.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ── 1. Generate sim log if not provided ──────────────────────────────────
|
|
73
|
+
if sim_log is None:
|
|
74
|
+
if verbose:
|
|
75
|
+
print(f"Running {n_simulations} simulation(s) for conformance …")
|
|
76
|
+
sim_log = play_out(
|
|
77
|
+
erg,
|
|
78
|
+
n=n_simulations,
|
|
79
|
+
sim_time=sim_time,
|
|
80
|
+
seed=seed,
|
|
81
|
+
verbose=verbose,
|
|
82
|
+
)
|
|
83
|
+
actual_n = n_simulations
|
|
84
|
+
else:
|
|
85
|
+
actual_n = sim_log['_erg_run'].nunique() if '_erg_run' in sim_log.columns else 1
|
|
86
|
+
|
|
87
|
+
# ── 2. Serialise ERG to temp ERGML and parse nodes/arcs ─────────────────
|
|
88
|
+
ergml_xml = erg_to_ergml(erg, sim_time=sim_time)
|
|
89
|
+
tmp_file = tempfile.NamedTemporaryFile(
|
|
90
|
+
suffix='.ergml', delete=False, mode='w', encoding='utf-8'
|
|
91
|
+
)
|
|
92
|
+
try:
|
|
93
|
+
tmp_file.write(ergml_xml)
|
|
94
|
+
tmp_file.close()
|
|
95
|
+
tmp_path = tmp_file.name
|
|
96
|
+
|
|
97
|
+
erg_name, nodes, arcs_from, start_events, end_events, res_caps, _ = parse_ergml(tmp_path)
|
|
98
|
+
finally:
|
|
99
|
+
try:
|
|
100
|
+
os.remove(tmp_path)
|
|
101
|
+
except OSError:
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
# ── 3. Rename original log columns to internal standard names ────────────
|
|
105
|
+
# The sim log always uses fixed internal column names (case_id, timestamp,
|
|
106
|
+
# activity_name, resource_id). Rename the original log to match so both
|
|
107
|
+
# DataFrames use the same column names when passed to ERGConformanceChecker.
|
|
108
|
+
rename_map = {
|
|
109
|
+
case_id_col: 'case_id',
|
|
110
|
+
activity_col: 'activity_name',
|
|
111
|
+
timestamp_col: 'timestamp',
|
|
112
|
+
}
|
|
113
|
+
if resource_col and resource_col in log.columns:
|
|
114
|
+
rename_map[resource_col] = 'resource_id'
|
|
115
|
+
orig_log = log.rename(columns=rename_map).copy()
|
|
116
|
+
|
|
117
|
+
# Drop incomplete cases from orig (mirror of conformance_testing behaviour)
|
|
118
|
+
if end_events:
|
|
119
|
+
completed_cases = orig_log.loc[
|
|
120
|
+
orig_log['activity_name'].isin(end_events), 'case_id'
|
|
121
|
+
].unique()
|
|
122
|
+
orig_log = orig_log[orig_log['case_id'].isin(completed_cases)]
|
|
123
|
+
|
|
124
|
+
# ── 4. Build combined sim log with globally unique case IDs ─────────────
|
|
125
|
+
# play_out() restarts case_ids from 1 each run, so prefix them per run.
|
|
126
|
+
combined_parts = []
|
|
127
|
+
run_col = '_erg_run' if '_erg_run' in sim_log.columns else None
|
|
128
|
+
if run_col:
|
|
129
|
+
for run_idx, run_df in sim_log.groupby('_erg_run', sort=True):
|
|
130
|
+
part = run_df.copy()
|
|
131
|
+
part['case_id'] = part['case_id'].astype(str).apply(
|
|
132
|
+
lambda x: f"run{run_idx}_case{x}"
|
|
133
|
+
)
|
|
134
|
+
combined_parts.append(part)
|
|
135
|
+
else:
|
|
136
|
+
combined_parts = [sim_log.copy()]
|
|
137
|
+
combined_sim_log = pd.concat(combined_parts, ignore_index=True)
|
|
138
|
+
|
|
139
|
+
# ── 5. Run conformance checker ────────────────────────────────────────────
|
|
140
|
+
if verbose:
|
|
141
|
+
print("Running ERG conformance checks …")
|
|
142
|
+
|
|
143
|
+
checker = ERGConformanceChecker(
|
|
144
|
+
original_log = orig_log,
|
|
145
|
+
sim_log = combined_sim_log,
|
|
146
|
+
nodes = nodes,
|
|
147
|
+
arcs_from = arcs_from,
|
|
148
|
+
case_id_col = 'case_id',
|
|
149
|
+
activity_col = 'activity_name',
|
|
150
|
+
timestamp_col = 'timestamp',
|
|
151
|
+
resource_col = 'resource_id',
|
|
152
|
+
)
|
|
153
|
+
checker.run_all_checks()
|
|
154
|
+
|
|
155
|
+
# ── 6. Build ConformanceResult ────────────────────────────────────────────
|
|
156
|
+
summary_df = checker.summary()
|
|
157
|
+
overall_score = round(float(np.mean([
|
|
158
|
+
r['score'] for r in checker.results.values()
|
|
159
|
+
if isinstance(r.get('score'), (int, float))
|
|
160
|
+
])), 4)
|
|
161
|
+
|
|
162
|
+
group_scores: Dict = {}
|
|
163
|
+
for grp, check_keys in CHECK_GROUPS.items():
|
|
164
|
+
grp_vals = [
|
|
165
|
+
checker.results[k]['score']
|
|
166
|
+
for k in check_keys
|
|
167
|
+
if k in checker.results and isinstance(checker.results[k].get('score'), (int, float))
|
|
168
|
+
]
|
|
169
|
+
if grp_vals:
|
|
170
|
+
grp_score = round(float(np.mean(grp_vals)), 4)
|
|
171
|
+
grp_pass = grp_score >= 0.8
|
|
172
|
+
group_scores[grp] = {
|
|
173
|
+
'score': grp_score,
|
|
174
|
+
'pass': grp_pass,
|
|
175
|
+
'checks': len(grp_vals),
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
result = ConformanceResult()
|
|
179
|
+
result.erg_name = erg_name or getattr(erg, 'name', 'ERG')
|
|
180
|
+
result.n_simulations = actual_n
|
|
181
|
+
result.checker_results = checker.results
|
|
182
|
+
result.summary_df = summary_df
|
|
183
|
+
result.overall_score = overall_score
|
|
184
|
+
result.group_scores = group_scores
|
|
185
|
+
|
|
186
|
+
return result
|
|
187
|
+
|