ergminer 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,229 @@
1
+ """
2
+ ERGML Converter
3
+
4
+ Converts an ERG (Event Relationship Graph) model to ERGML (ERG Markup Language),
5
+ an XML-based format inspired by PNML (Petri Net Markup Language).
6
+
7
+ ERGML Schema overview
8
+ ---------------------
9
+ <ergml version="1.0">
10
+ <erg id="..." >
11
+ <name> ... </name>
12
+
13
+ <parameters>
14
+ <simTime>...</simTime>
15
+ <start_events> <event ref="..."/> ... </start_events>
16
+ <end_events> <event ref="..."/> ... </end_events>
17
+ <statistics>
18
+ <stat name="..." value="..."/> ...
19
+ </statistics>
20
+ </parameters>
21
+
22
+ <states>
23
+ <stateVariable id="...">
24
+ <name>...</name>
25
+ <type>...</type>
26
+ <resource>...</resource>
27
+ <initialValue>...</initialValue>
28
+ </stateVariable>
29
+ ...
30
+ </states>
31
+
32
+ <nodes>
33
+ <node id="..." type="...">
34
+ <name>...</name>
35
+ <eventType>...</eventType>
36
+ <originalActivity>...</originalActivity>
37
+ <resource>...</resource>
38
+ <frequency>...</frequency>
39
+ <stateUpdateEquations>
40
+ <equation>...</equation>
41
+ ...
42
+ </stateUpdateEquations>
43
+ </node>
44
+ ...
45
+ </nodes>
46
+
47
+ <arcs>
48
+ <arc id="..." source="..." target="...">
49
+ <probability>...</probability>
50
+ <guardCondition>...</guardCondition>
51
+ <isImmediate>...</isImmediate>
52
+ <delayDistribution>...</delayDistribution>
53
+ <distributionParams>
54
+ <param name="..." value="..."/> ...
55
+ </distributionParams>
56
+ <meanDelay>...</meanDelay>
57
+ </arc>
58
+ ...
59
+ </arcs>
60
+
61
+ </erg>
62
+ </ergml>
63
+ """
64
+
65
+ import xml.etree.ElementTree as ET
66
+ from xml.dom import minidom
67
+
68
+
69
+ # ---------------------------------------------------------------------------
70
+ # Internal helpers
71
+ # ---------------------------------------------------------------------------
72
+
73
+ def _sub(parent, tag, text=None):
74
+ """Create a child element, optionally setting its text."""
75
+ el = ET.SubElement(parent, tag)
76
+ if text is not None:
77
+ el.text = str(text)
78
+ return el
79
+
80
+
81
+ def _set_text(element, value):
82
+ """Set element text; leave empty if value is None."""
83
+ if value is not None:
84
+ element.text = str(value)
85
+
86
+
87
+ def _prettify(element):
88
+ """Return a pretty-printed XML string for *element*."""
89
+ raw = ET.tostring(element, encoding="unicode")
90
+ reparsed = minidom.parseString(raw)
91
+ return reparsed.toprettyxml(indent=" ", encoding=None)
92
+
93
+
94
+ # ---------------------------------------------------------------------------
95
+ # Public API
96
+ # ---------------------------------------------------------------------------
97
+
98
+ def erg_to_ergml(erg, sim_time=5000.0):
99
+ """
100
+ Convert an ERG object to an ERGML XML string.
101
+
102
+ Parameters
103
+ ----------
104
+ erg : ERG
105
+ The ERG model to convert.
106
+ sim_time : float
107
+ Simulation horizon to embed in the ERGML (default 5000.0).
108
+
109
+ Returns
110
+ -------
111
+ str
112
+ Pretty-printed ERGML XML string.
113
+ """
114
+ # Root
115
+ root = ET.Element("ergml", attrib={"version": "1.0"})
116
+
117
+ # <erg id="..." >
118
+ erg_el = ET.SubElement(root, "erg", attrib={"id": erg.name})
119
+
120
+ # ------------------------------------------------------------------
121
+ # <name>
122
+ # ------------------------------------------------------------------
123
+ _sub(erg_el, "name", erg.name)
124
+
125
+ # ------------------------------------------------------------------
126
+ # <parameters>
127
+ # ------------------------------------------------------------------
128
+ params_el = _sub(erg_el, "parameters")
129
+
130
+ # sim_time
131
+ _sub(params_el, "simTime", sim_time)
132
+
133
+ # start_events
134
+ start_el = _sub(params_el, "start_events")
135
+ for ev in sorted(erg.start_events):
136
+ ET.SubElement(start_el, "event", attrib={"ref": ev})
137
+
138
+ # end_events
139
+ end_el = _sub(params_el, "end_events")
140
+ for ev in sorted(erg.end_events):
141
+ ET.SubElement(end_el, "event", attrib={"ref": ev})
142
+
143
+ # statistics
144
+ stats = erg.get_statistics()
145
+ stats_el = _sub(params_el, "statistics")
146
+ for stat_name, stat_value in stats.items():
147
+ ET.SubElement(stats_el, "stat",
148
+ attrib={"name": stat_name, "value": str(stat_value)})
149
+
150
+ # ------------------------------------------------------------------
151
+ # <states>
152
+ # ------------------------------------------------------------------
153
+ states_el = _sub(erg_el, "states")
154
+ for sv in erg.state_variables.values():
155
+ sv_el = ET.SubElement(states_el, "stateVariable", attrib={"id": sv.name})
156
+ _sub(sv_el, "name", sv.name)
157
+ _sub(sv_el, "type", sv.variable_type)
158
+ _sub(sv_el, "resource", sv.resource)
159
+ _sub(sv_el, "initialValue", sv.initial_value)
160
+
161
+ # ------------------------------------------------------------------
162
+ # <nodes>
163
+ # ------------------------------------------------------------------
164
+ nodes_el = _sub(erg_el, "nodes")
165
+ for node in erg.nodes.values():
166
+ node_el = ET.SubElement(nodes_el, "node",
167
+ attrib={"id": node.name, "type": node.event_type})
168
+ _sub(node_el, "name", node.original_activity) # clean name without resource suffix
169
+ _sub(node_el, "eventType", node.event_type)
170
+ _sub(node_el, "originalActivity", node.original_activity)
171
+ res_el = _sub(node_el, "resource")
172
+ _set_text(res_el, node.resource)
173
+ _sub(node_el, "frequency", node.frequency)
174
+ eqs_el = _sub(node_el, "stateUpdateEquations")
175
+ for eq in node.state_update_equations:
176
+ _sub(eqs_el, "equation", eq)
177
+
178
+ # ------------------------------------------------------------------
179
+ # <arcs>
180
+ # ------------------------------------------------------------------
181
+ arcs_el = _sub(erg_el, "arcs")
182
+
183
+ # Track arc IDs — guard against duplicate source→target pairs
184
+ arc_id_counter = {}
185
+ for arc in erg.arcs:
186
+ base_id = f"arc_{arc.source}_{arc.target}"
187
+ count = arc_id_counter.get(base_id, 0)
188
+ arc_id = base_id if count == 0 else f"{base_id}_{count}"
189
+ arc_id_counter[base_id] = count + 1
190
+
191
+ arc_el = ET.SubElement(arcs_el, "arc",
192
+ attrib={"id": arc_id,
193
+ "source": arc.source,
194
+ "target": arc.target})
195
+ _sub(arc_el, "probability", arc.probability)
196
+ guard_el = _sub(arc_el, "guardCondition")
197
+ _set_text(guard_el, arc.guard_condition)
198
+ _sub(arc_el, "arcType", getattr(arc, 'arc_type', 'DF'))
199
+ _sub(arc_el, "isImmediate", str(arc.is_immediate).lower())
200
+ _sub(arc_el, "delayDistribution", arc.delay_distribution)
201
+
202
+ # distributionParams
203
+ dp_el = _sub(arc_el, "distributionParams")
204
+ if arc.distribution_params:
205
+ for param_name, param_val in arc.distribution_params.items():
206
+ ET.SubElement(dp_el, "param",
207
+ attrib={"name": param_name, "value": str(param_val)})
208
+
209
+ _sub(arc_el, "meanDelay", arc.mean_delay)
210
+
211
+ return _prettify(root)
212
+
213
+
214
+ def save_ergml(erg, filepath, sim_time=5000.0):
215
+ """
216
+ Convert an ERG model to ERGML and write it to *filepath*.
217
+
218
+ Parameters
219
+ ----------
220
+ erg : ERG
221
+ The ERG model to export.
222
+ filepath : str
223
+ Destination file path (conventionally ending in .ergml).
224
+ sim_time : float
225
+ Simulation horizon to embed in the ERGML (default 5000.0).
226
+ """
227
+ xml_str = erg_to_ergml(erg, sim_time=sim_time)
228
+ with open(filepath, "w", encoding="utf-8") as fh:
229
+ fh.write(xml_str)
ergminer/__init__.py ADDED
@@ -0,0 +1,119 @@
1
+ """
2
+ ERGminer - Event Relationship Graph Mining from Event Logs
3
+
4
+ Mines Event Relationship Graphs (ERGs) from event logs using process mining,
5
+ state variable identification, and statistical analysis.
6
+ """
7
+
8
+ __version__ = "0.2.0"
9
+ __author__ = "Zach Eyde"
10
+
11
+ # ── Submodules (ergminer.read, ergminer.discovery, etc.) ─────────────────────
12
+ from ergminer import read
13
+ from ergminer import utils
14
+ from ergminer import filtering
15
+ from ergminer import discovery
16
+ from ergminer import write
17
+ from ergminer import sim
18
+ from ergminer import conformance as conformance_module
19
+
20
+ # ── ergminer.read ─────────────────────────────────────────────────────────────
21
+ from ergminer.read import (
22
+ read_csv,
23
+ read_xes,
24
+ read_dataframe,
25
+ )
26
+
27
+ # ── ergminer.utils ────────────────────────────────────────────────────────────
28
+ from ergminer.utils import (
29
+ format_dataframe,
30
+ get_start_activities,
31
+ get_end_activities,
32
+ get_variants,
33
+ get_activity_labels,
34
+ )
35
+
36
+ # ── ergminer.filtering ────────────────────────────────────────────────────────
37
+ from ergminer.filtering import (
38
+ filter_start_activities,
39
+ filter_end_activities,
40
+ filter_case_size,
41
+ filter_variants,
42
+ )
43
+
44
+ # ── ergminer.discovery ────────────────────────────────────────────────────────
45
+ from ergminer.discovery import (
46
+ discover_erg,
47
+ )
48
+
49
+ # ── ergminer.write ────────────────────────────────────────────────────────────
50
+ from ergminer.write import (
51
+ write_ergml,
52
+ write_erg_json,
53
+ write_dot,
54
+ )
55
+
56
+ # ── ergminer.sim ──────────────────────────────────────────────────────────────
57
+ from ergminer.sim import (
58
+ play_out,
59
+ )
60
+
61
+ # ── ergminer.conformance ──────────────────────────────────────────────────────
62
+ from ergminer.conformance import (
63
+ conformance_erg,
64
+ )
65
+
66
+ # ── Data classes and configuration (backward-compatible) ─────────────────────
67
+ from ergminer.erg_miner import ERGMiner, DelayConfig
68
+ from ergminer.erg_structure import ERG, ERGNode, ERGArc, ERGStateVariable
69
+ from ergminer.conformance_testing import ConformanceResult
70
+
71
+ # Backward-compatible top-level aliases (deprecated in favour of write.*)
72
+ from ergminer.ERGML_converter import erg_to_ergml, save_ergml
73
+ from ergminer.erg_playback import run_simulations
74
+ from ergminer.conformance_testing import conformance_testing
75
+
76
+ # ── ergminer.vis (optional — requires matplotlib + networkx) ──────────────────
77
+ try:
78
+ from ergminer.vis import view_erg, save_vis_erg
79
+ from ergminer import vis
80
+ _vis_available = True
81
+ except ImportError:
82
+ _vis_available = False
83
+
84
+ # ── ergminer.erg_plotter alias (backward-compatible) ─────────────────────────
85
+ try:
86
+ from ergminer.erg_plotter import plot_erg
87
+ except ImportError:
88
+ pass
89
+
90
+ # ── __all__ ──────────────────────────────────────────────────────────────────
91
+ __all__ = [
92
+ # Submodules
93
+ 'read', 'utils', 'filtering', 'discovery', 'write', 'sim',
94
+ # Read
95
+ 'read_csv', 'read_xes', 'read_dataframe',
96
+ # Utils
97
+ 'format_dataframe', 'get_start_activities', 'get_end_activities',
98
+ 'get_variants', 'get_activity_labels',
99
+ # Filtering
100
+ 'filter_start_activities', 'filter_end_activities',
101
+ 'filter_case_size', 'filter_variants',
102
+ # Discovery
103
+ 'discover_erg',
104
+ # Visualisation
105
+ 'view_erg', 'save_vis_erg',
106
+ # Write
107
+ 'write_ergml', 'write_erg_json', 'write_dot',
108
+ # Simulation
109
+ 'play_out',
110
+ # Conformance
111
+ 'conformance_erg',
112
+ # Data classes
113
+ 'ERGMiner', 'DelayConfig', 'ERG', 'ERGNode', 'ERGArc',
114
+ 'ERGStateVariable', 'ConformanceResult',
115
+ # Backward-compatible
116
+ 'erg_to_ergml', 'save_ergml', 'run_simulations',
117
+ 'conformance_testing', 'plot_erg',
118
+ ]
119
+
@@ -0,0 +1,187 @@
1
+ """
2
+ conformance.py — ERG conformance checking functions for ERGminer.
3
+
4
+ Analogous to pm4py.conformance.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import tempfile
10
+ import os
11
+ from typing import TYPE_CHECKING, Dict, Optional
12
+
13
+ import numpy as np
14
+ import pandas as pd
15
+
16
+ from .conformance_testing import (
17
+ ConformanceResult,
18
+ ERGConformanceChecker,
19
+ CHECK_GROUPS,
20
+ )
21
+ from .ERGML_converter import erg_to_ergml
22
+ from .erg_playback import parse_ergml
23
+ from .sim import play_out
24
+
25
+ if TYPE_CHECKING:
26
+ from .erg_structure import ERG
27
+ from .conformance_testing import ConformanceResult
28
+
29
+
30
+ def conformance_erg(
31
+ log: pd.DataFrame,
32
+ erg: 'ERG',
33
+ sim_log: Optional[pd.DataFrame] = None,
34
+ n_simulations: int = 10,
35
+ sim_time: float = 5000.0,
36
+ seed: int = 42,
37
+ case_id_col: str = 'case_id',
38
+ activity_col: str = 'activity_name',
39
+ timestamp_col: str = 'timestamp',
40
+ resource_col: str = 'resource_id',
41
+ verbose: bool = False,
42
+ ) -> 'ConformanceResult':
43
+ """Run conformance checking between *log* and *erg*.
44
+
45
+ Compares the observed behaviour in *log* against the ERG model using
46
+ twelve complementary fitness metrics. If *sim_log* is not provided,
47
+ ``play_out()`` is called automatically to generate simulation replications.
48
+
49
+ Args:
50
+ log: Original event log DataFrame (user column names).
51
+ erg: The ``ERG`` model to check against.
52
+ sim_log: Pre-computed simulation log from ``play_out()`` (optional).
53
+ Must contain ``_erg_run`` column. If ``None``, simulations
54
+ are run automatically.
55
+ n_simulations: Number of simulation replications (used only when
56
+ *sim_log* is ``None``).
57
+ sim_time: Simulation horizon passed to ``play_out()`` (used only
58
+ when *sim_log* is ``None``).
59
+ seed: Base random seed for simulations (used only when
60
+ *sim_log* is ``None``).
61
+ case_id_col: Column name for case identifiers in *log*.
62
+ activity_col: Column name for activity names in *log*.
63
+ timestamp_col: Column name for timestamps in *log*.
64
+ resource_col: Column name for resource identifiers in *log*.
65
+ verbose: Print progress to stdout.
66
+
67
+ Returns:
68
+ ``ConformanceResult`` with all check scores and a printable report.
69
+ """
70
+
71
+
72
+ # ── 1. Generate sim log if not provided ──────────────────────────────────
73
+ if sim_log is None:
74
+ if verbose:
75
+ print(f"Running {n_simulations} simulation(s) for conformance …")
76
+ sim_log = play_out(
77
+ erg,
78
+ n=n_simulations,
79
+ sim_time=sim_time,
80
+ seed=seed,
81
+ verbose=verbose,
82
+ )
83
+ actual_n = n_simulations
84
+ else:
85
+ actual_n = sim_log['_erg_run'].nunique() if '_erg_run' in sim_log.columns else 1
86
+
87
+ # ── 2. Serialise ERG to temp ERGML and parse nodes/arcs ─────────────────
88
+ ergml_xml = erg_to_ergml(erg, sim_time=sim_time)
89
+ tmp_file = tempfile.NamedTemporaryFile(
90
+ suffix='.ergml', delete=False, mode='w', encoding='utf-8'
91
+ )
92
+ try:
93
+ tmp_file.write(ergml_xml)
94
+ tmp_file.close()
95
+ tmp_path = tmp_file.name
96
+
97
+ erg_name, nodes, arcs_from, start_events, end_events, res_caps, _ = parse_ergml(tmp_path)
98
+ finally:
99
+ try:
100
+ os.remove(tmp_path)
101
+ except OSError:
102
+ pass
103
+
104
+ # ── 3. Rename original log columns to internal standard names ────────────
105
+ # The sim log always uses fixed internal column names (case_id, timestamp,
106
+ # activity_name, resource_id). Rename the original log to match so both
107
+ # DataFrames use the same column names when passed to ERGConformanceChecker.
108
+ rename_map = {
109
+ case_id_col: 'case_id',
110
+ activity_col: 'activity_name',
111
+ timestamp_col: 'timestamp',
112
+ }
113
+ if resource_col and resource_col in log.columns:
114
+ rename_map[resource_col] = 'resource_id'
115
+ orig_log = log.rename(columns=rename_map).copy()
116
+
117
+ # Drop incomplete cases from orig (mirror of conformance_testing behaviour)
118
+ if end_events:
119
+ completed_cases = orig_log.loc[
120
+ orig_log['activity_name'].isin(end_events), 'case_id'
121
+ ].unique()
122
+ orig_log = orig_log[orig_log['case_id'].isin(completed_cases)]
123
+
124
+ # ── 4. Build combined sim log with globally unique case IDs ─────────────
125
+ # play_out() restarts case_ids from 1 each run, so prefix them per run.
126
+ combined_parts = []
127
+ run_col = '_erg_run' if '_erg_run' in sim_log.columns else None
128
+ if run_col:
129
+ for run_idx, run_df in sim_log.groupby('_erg_run', sort=True):
130
+ part = run_df.copy()
131
+ part['case_id'] = part['case_id'].astype(str).apply(
132
+ lambda x: f"run{run_idx}_case{x}"
133
+ )
134
+ combined_parts.append(part)
135
+ else:
136
+ combined_parts = [sim_log.copy()]
137
+ combined_sim_log = pd.concat(combined_parts, ignore_index=True)
138
+
139
+ # ── 5. Run conformance checker ────────────────────────────────────────────
140
+ if verbose:
141
+ print("Running ERG conformance checks …")
142
+
143
+ checker = ERGConformanceChecker(
144
+ original_log = orig_log,
145
+ sim_log = combined_sim_log,
146
+ nodes = nodes,
147
+ arcs_from = arcs_from,
148
+ case_id_col = 'case_id',
149
+ activity_col = 'activity_name',
150
+ timestamp_col = 'timestamp',
151
+ resource_col = 'resource_id',
152
+ )
153
+ checker.run_all_checks()
154
+
155
+ # ── 6. Build ConformanceResult ────────────────────────────────────────────
156
+ summary_df = checker.summary()
157
+ overall_score = round(float(np.mean([
158
+ r['score'] for r in checker.results.values()
159
+ if isinstance(r.get('score'), (int, float))
160
+ ])), 4)
161
+
162
+ group_scores: Dict = {}
163
+ for grp, check_keys in CHECK_GROUPS.items():
164
+ grp_vals = [
165
+ checker.results[k]['score']
166
+ for k in check_keys
167
+ if k in checker.results and isinstance(checker.results[k].get('score'), (int, float))
168
+ ]
169
+ if grp_vals:
170
+ grp_score = round(float(np.mean(grp_vals)), 4)
171
+ grp_pass = grp_score >= 0.8
172
+ group_scores[grp] = {
173
+ 'score': grp_score,
174
+ 'pass': grp_pass,
175
+ 'checks': len(grp_vals),
176
+ }
177
+
178
+ result = ConformanceResult()
179
+ result.erg_name = erg_name or getattr(erg, 'name', 'ERG')
180
+ result.n_simulations = actual_n
181
+ result.checker_results = checker.results
182
+ result.summary_df = summary_df
183
+ result.overall_score = overall_score
184
+ result.group_scores = group_scores
185
+
186
+ return result
187
+