sim-tools 0.6.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: sim-tools
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: Simulation Tools for Education and Practice
5
5
  Project-URL: Homepage, https://github.com/TomMonks/sim-tools
6
6
  Project-URL: Bug Tracker, https://github.com/TomMonks/sim-tools/issues
@@ -16,8 +16,9 @@ Requires-Python: >=3.10
16
16
  Requires-Dist: matplotlib>=3.1.3
17
17
  Requires-Dist: numpy>=1.18.1
18
18
  Requires-Dist: pandas>=2.0.0
19
+ Requires-Dist: plotly>=6.0.0
20
+ Requires-Dist: rich>=13.7.1
19
21
  Requires-Dist: scikit-learn>=1.0.0
20
- Requires-Dist: scipy>=1.4.1
21
22
  Description-Content-Type: text/markdown
22
23
 
23
24
  # `sim-tools`: tools to support the Discrete-Event Simulation process in python.
@@ -46,6 +47,8 @@ Description-Content-Type: text/markdown
46
47
  1. Implementation of classic Optimisation via Simulation procedures such as KN, KN++, OBCA and OBCA-m
47
48
  2. Distributions module that includes classes that encapsulate a random number stream, seed, and distribution parameters.
48
49
  3. Implementation of Thinning to sample from Non-stationary poisson processes in a DES.
50
+ 4. Automatic selection of the number of replications to run via the Replications Algorithm.
51
+ 5. EXPERIMENTAL: model trace functionality to support debugging of simulation models.
49
52
 
50
53
  ## Installation
51
54
 
@@ -24,6 +24,8 @@
24
24
  1. Implementation of classic Optimisation via Simulation procedures such as KN, KN++, OBCA and OBCA-m
25
25
  2. Distributions module that includes classes that encapsulate a random number stream, seed, and distribution parameters.
26
26
  3. Implementation of Thinning to sample from Non-stationary poisson processes in a DES.
27
+ 4. Automatic selection of the number of replications to run via the Replications Algorithm.
28
+ 5. EXPERIMENTAL: model trace functionality to support debugging of simulation models.
27
29
 
28
30
  ## Installation
29
31
 
@@ -23,7 +23,9 @@ dependencies = [
23
23
  "numpy>=1.18.1",
24
24
  "pandas>=2.0.0",
25
25
  "scikit-learn>=1.0.0",
26
- "scipy>=1.4.1",
26
+ "rich>=13.7.1",
27
+ "plotly>=6.0.0"
28
+
27
29
  ]
28
30
 
29
31
  [project.urls]
@@ -38,3 +40,6 @@ path = "sim_tools/__init__.py"
38
40
  include = [
39
41
  "/sim_tools",
40
42
  ]
43
+
44
+ [[tool.hatch.envs.hatch-test.matrix]]
45
+ python = ["3.10", "3.11", "3.12", "3.13"]
@@ -0,0 +1,4 @@
1
+ __version__ = '0.7.0'
2
+ __author__ = 'Thomas Monks'
3
+
4
+ from . import datasets, distributions, time_dependent, ovs, output_analysis
@@ -0,0 +1,607 @@
1
+ """
2
+ module: output_analysis
3
+
4
+ Provides tools for selecting the number selecting the number of
5
+ replications to run with a Discrete-Event Simulation.
6
+
7
+ The Confidence Interval Method (tables and visualisation)
8
+
9
+ The Replications Algorithm (Hoad et al. 2010).
10
+ """
11
+
12
+ import plotly.graph_objects as go
13
+ import numpy as np
14
+ import pandas as pd
15
+ from scipy.stats import t
16
+ import warnings
17
+
18
+ from typing import Protocol, runtime_checkable, Optional
19
+
20
+ OBSERVER_INTERFACE_ERROR = (
21
+ "Observers of OnlineStatistics must implement "
22
+ + "ReplicationObserver interface. i.e. "
23
+ + "update(results: OnlineStatistics) -> None"
24
+ )
25
+
26
+ ALG_INTERFACE_ERROR = (
27
+ "Parameter 'model' must implement "
28
+ + "ReplicationsAlgorithmModelAdapter interface. i.e. "
29
+ + "single_run(replication_no: int) -> float"
30
+ )
31
+
32
+
33
+ @runtime_checkable
34
+ class ReplicationObserver(Protocol):
35
+ """
36
+ Interface for an observer of an instance of the ReplicationsAnalyser
37
+ """
38
+
39
+ def update(self, results) -> None:
40
+ """
41
+ Add an observation of a replication
42
+
43
+ Parameters:
44
+ -----------
45
+ results: OnlineStatistic
46
+ The current replication to observe.
47
+ """
48
+ pass
49
+
50
+
51
+ class OnlineStatistics:
52
+ """
53
+ Welford’s algorithm for computing a running sample mean and
54
+ variance. Allowing computation of CIs and half width % deviation
55
+ from the mean.
56
+
57
+ This is a robust, accurate and old(ish) approach (1960s) that
58
+ I first read about in Donald Knuth’s art of computer programming vol 2.
59
+ """
60
+
61
+ def __init__(
62
+ self,
63
+ data: Optional[np.ndarray] = None,
64
+ alpha: Optional[float] = 0.1,
65
+ observer: Optional[ReplicationObserver] = None,
66
+ ) -> None:
67
+ """
68
+ Initiaise Welford’s algorithm for computing a running sample mean and
69
+ variance.
70
+
71
+ Parameters:
72
+ -------
73
+ data: array-like, optional (default = None)
74
+ Contains an initial data sample.
75
+
76
+ alpha: float
77
+ To compute 100(1 - alpha) confidence interval
78
+
79
+ observer: ReplicationObserver, optional (default=None)
80
+ A user may optionally track the updates to the statistics using a
81
+ ReplicationObserver (e.g. ReplicationTabuliser). This allows further
82
+ tabular or visual analysis or saving results to file if required.
83
+
84
+ """
85
+
86
+ self.n = 0
87
+ self.x_i = None
88
+ self.mean = None
89
+ # sum of squares of differences from the current mean
90
+ self._sq = None
91
+ self.alpha = alpha
92
+ self._observers = []
93
+ if observer is not None:
94
+ self.register_observer(observer)
95
+
96
+ if isinstance(data, np.ndarray):
97
+ for x in data:
98
+ self.update(x)
99
+
100
+ def register_observer(self, observer: ReplicationObserver) -> None:
101
+ """
102
+ observer: ReplicationRecorder, optional (default = None)
103
+ Include a method for recording the replication results at each
104
+ update. Part of observer pattern. If None then no results are
105
+ observed.
106
+
107
+ """
108
+ if not isinstance(observer, ReplicationObserver):
109
+ raise ValueError(OBSERVER_INTERFACE_ERROR)
110
+
111
+ self._observers.append(observer)
112
+
113
+ @property
114
+ def variance(self) -> float:
115
+ """
116
+ Sample variance of data
117
+ Sum of squares of differences from the current mean divided by n - 1
118
+ """
119
+
120
+ return self._sq / (self.n - 1)
121
+
122
+ @property
123
+ def std(self) -> float:
124
+ """
125
+ Standard deviation of data
126
+ """
127
+ if self.n > 2:
128
+ return np.sqrt(self.variance)
129
+ else:
130
+ return np.nan
131
+
132
+ @property
133
+ def std_error(self) -> float:
134
+ """
135
+ Standard error of the mean
136
+ """
137
+ return self.std / np.sqrt(self.n)
138
+
139
+ @property
140
+ def half_width(self) -> float:
141
+ """
142
+ Confidence interval half width
143
+ """
144
+ dof = self.n - 1
145
+ t_value = t.ppf(1 - (self.alpha / 2), dof)
146
+ return t_value * self.std_error
147
+
148
+ @property
149
+ def lci(self) -> float:
150
+ """
151
+ Lower confidence interval bound
152
+ """
153
+ if self.n > 2:
154
+ return self.mean - self.half_width
155
+ else:
156
+ return np.nan
157
+
158
+ @property
159
+ def uci(self) -> float:
160
+ """
161
+ Lower confidence interval bound
162
+ """
163
+ if self.n > 2:
164
+ return self.mean + self.half_width
165
+ else:
166
+ return np.nan
167
+
168
+ @property
169
+ def deviation(self) -> float:
170
+ """
171
+ Precision of the confidence interval expressed as the
172
+ percentage deviation of the half width from the mean.
173
+ """
174
+ if self.n > 2:
175
+ return self.half_width / self.mean
176
+ else:
177
+ return np.nan
178
+
179
+ def update(self, x: float) -> None:
180
+ """
181
+ Running update of mean and variance implemented using Welford's
182
+ algorithm (1962).
183
+
184
+ See Knuth. D `The Art of Computer Programming` Vol 2. 2nd ed. Page 216.
185
+
186
+ Params:
187
+ ------
188
+ x: float
189
+ A new observation
190
+ """
191
+ self.n += 1
192
+ self.x_i = x
193
+
194
+ # init values
195
+ if self.n == 1:
196
+ self.mean = x
197
+ self._sq = 0
198
+ else:
199
+ # compute the updated mean
200
+ updated_mean = self.mean + ((x - self.mean) / self.n)
201
+
202
+ # update the sum of squares of differences from the current mean
203
+ self._sq += (x - self.mean) * (x - updated_mean)
204
+
205
+ # update the tracked mean
206
+ self.mean = updated_mean
207
+
208
+ self.notify()
209
+
210
+ def notify(self) -> None:
211
+ """
212
+ Notify any observers that a update has taken place.
213
+ """
214
+ for observer in self._observers:
215
+ observer.update(self)
216
+
217
+
218
+ class ReplicationTabulizer:
219
+ """
220
+ Record the replication results from an instance of ReplicationsAlgorithm
221
+ in a pandas DataFrame.
222
+
223
+ Implement as the part of observer pattern. Provides a summary frame
224
+ equivalent to the output of a confidence_interval_method
225
+ """
226
+
227
+ def __init__(self):
228
+ # to track online stats
229
+ self.stdev = []
230
+ self.lower = []
231
+ self.upper = []
232
+ self.dev = []
233
+ self.cumulative_mean = []
234
+ self.x_i = []
235
+ self.n = 0
236
+
237
+ def update(self, results: OnlineStatistics) -> None:
238
+ """
239
+ Add an observation of a replication
240
+
241
+ Parameters:
242
+ -----------
243
+ results: OnlineStatistic
244
+ The current replication to observe.
245
+ """
246
+ self.x_i.append(results.x_i)
247
+ self.cumulative_mean.append(results.mean)
248
+ self.stdev.append(results.std)
249
+ self.lower.append(results.lci)
250
+ self.upper.append(results.uci)
251
+ self.dev.append(results.deviation)
252
+ self.n += 1
253
+
254
+ def summary_table(self) -> pd.DataFrame:
255
+ """
256
+ Return a dataframe of results equivalent to the confidence interval
257
+ method.
258
+ """
259
+ # combine results into a single dataframe
260
+ results = pd.DataFrame(
261
+ [
262
+ self.x_i,
263
+ self.cumulative_mean,
264
+ self.stdev,
265
+ self.lower,
266
+ self.upper,
267
+ self.dev,
268
+ ]
269
+ ).T
270
+ results.columns = [
271
+ "Mean",
272
+ "Cumulative Mean",
273
+ "Standard Deviation",
274
+ "Lower Interval",
275
+ "Upper Interval",
276
+ "% deviation",
277
+ ]
278
+ results.index = np.arange(1, self.n + 1)
279
+ results.index.name = "replications"
280
+
281
+ return results
282
+
283
+
284
+ def confidence_interval_method(
285
+ replications,
286
+ alpha: Optional[float] = 0.05,
287
+ desired_precision: Optional[float] = 0.05,
288
+ min_rep: Optional[int] = 5,
289
+ decimal_places: Optional[int] = 2,
290
+ ):
291
+ """
292
+ The confidence interval method for selecting the number of replications
293
+ to run in a simulation.
294
+
295
+ Finds the smallest number of replications where the width of the confidence
296
+ interval is less than the desired_precision.
297
+
298
+ Returns both the number of replications and the full results dataframe.
299
+
300
+ Parameters:
301
+ ----------
302
+ replications: arraylike
303
+ Array (e.g. np.ndarray or list) of replications of a performance metric
304
+
305
+ alpha: float, optional (default=0.05)
306
+ procedure constructs a 100(1-alpha) confidence interval for the
307
+ cumulative mean.
308
+
309
+ desired_precision: float, optional (default=0.05)
310
+ Desired mean deviation from confidence interval.
311
+
312
+ min_rep: int, optional (default=5)
313
+ set to a integer > 0 and ignore all of the replications prior to it
314
+ when selecting the number of replications to run to achieve the desired
315
+ precision. Useful when the number of replications returned does not
316
+ provide a stable precision below target.
317
+
318
+ decimal_places: int, optional (default=2)
319
+ sets the number of decimal places of the returned dataframe containing
320
+ the results
321
+
322
+ Returns:
323
+ --------
324
+ tuple: int, pd.DataFrame
325
+
326
+ """
327
+ # welford's method to track cumulative mean and construct CIs at each rep
328
+ # track the process and construct data table using ReplicationTabuliser
329
+ observer = ReplicationTabulizer()
330
+ stats = OnlineStatistics(alpha=alpha, data=replications[:2], observer=observer)
331
+
332
+ # iteratively update.
333
+ for i in range(2, len(replications)):
334
+ stats.update(replications[i])
335
+
336
+ results = observer.summary_table()
337
+
338
+ # get the smallest no. of reps where deviation is less than precision target
339
+ try:
340
+ n_reps = (
341
+ results.iloc[min_rep:]
342
+ .loc[results["% deviation"] <= desired_precision]
343
+ .iloc[0]
344
+ .name
345
+ )
346
+ except IndexError:
347
+ # no replications with desired precision
348
+ message = "WARNING: the replications do not reach desired precision"
349
+ warnings.warn(message)
350
+ n_reps = -1
351
+
352
+ return n_reps, results.round(decimal_places)
353
+
354
+
355
+ def plotly_confidence_interval_method(
356
+ n_reps, conf_ints, metric_name, figsize=(1200, 400)
357
+ ):
358
+ """
359
+ Interactive Plotly visualization with deviation hover information
360
+
361
+ Parameters:
362
+ ----------
363
+ n_reps: int
364
+ Minimum number of reps selected
365
+ conf_ints: pandas.DataFrame
366
+ Results from `confidence_interval_method` function
367
+ metric_name: str
368
+ Name of the performance measure
369
+ figsize: tuple, optional (default=(1200,400))
370
+ Plot dimensions in pixels (width, height)
371
+
372
+ Returns:
373
+ -------
374
+ plotly.graph_objects.Figure
375
+ """
376
+ fig = go.Figure()
377
+
378
+ # Calculate relative deviations [1][4]
379
+ deviation_pct = (
380
+ (conf_ints["Upper Interval"] - conf_ints["Cumulative Mean"])
381
+ / conf_ints["Cumulative Mean"]
382
+ * 100
383
+ ).round(2)
384
+
385
+ # Confidence interval bands with hover info
386
+ for col, color, dash in zip(
387
+ ["Lower Interval", "Upper Interval"], ["lightblue", "lightblue"], ["dot", "dot"]
388
+ ):
389
+ fig.add_trace(
390
+ go.Scatter(
391
+ x=conf_ints.index,
392
+ y=conf_ints[col],
393
+ line=dict(color=color, dash=dash),
394
+ name=col,
395
+ text=[f"Deviation: {d}%" for d in deviation_pct],
396
+ hoverinfo="x+y+name+text",
397
+ )
398
+ )
399
+
400
+ # Cumulative mean line with enhanced hover
401
+ fig.add_trace(
402
+ go.Scatter(
403
+ x=conf_ints.index,
404
+ y=conf_ints["Cumulative Mean"],
405
+ line=dict(color="blue", width=2),
406
+ name="Cumulative Mean",
407
+ hoverinfo="x+y+name",
408
+ )
409
+ )
410
+
411
+ # Vertical threshold line
412
+ fig.add_shape(
413
+ type="line",
414
+ x0=n_reps,
415
+ x1=n_reps,
416
+ y0=0,
417
+ y1=1,
418
+ yref="paper",
419
+ line=dict(color="red", dash="dash"),
420
+ )
421
+
422
+ # Configure layout
423
+ fig.update_layout(
424
+ width=figsize[0],
425
+ height=figsize[1],
426
+ yaxis_title=f"Cumulative Mean: {metric_name}",
427
+ hovermode="x unified",
428
+ showlegend=True,
429
+ )
430
+
431
+ return fig
432
+
433
+
434
+ @runtime_checkable
435
+ class ReplicationsAlgorithmModelAdapter(Protocol):
436
+ """
437
+ Adapter pattern for the "Replications Algorithm".
438
+
439
+ All models that use ReplicationsAlgorithm must provide a
440
+ single_run(replication_number) interface.
441
+ """
442
+
443
+ def single_run(self, replication_number: int) -> float:
444
+ """
445
+ Perform a unique replication of the model. Return a performance measure
446
+ """
447
+ pass
448
+
449
+
450
+ class ReplicationsAlgorithm:
451
+ """
452
+ An implementation of the "Replications Algorithm" from
453
+ Hoad, Robinson, & Davies (2010).
454
+
455
+ Given a model's performance measure, and a user set CI half width precision
456
+ automatically select the number of replications.
457
+
458
+ Combines the "confidence intervals" method with a sequential look-ahead
459
+ procedure to determine if a desired precision in CI is maintained when
460
+ achieved.
461
+
462
+ Note only works with a single performance measure
463
+
464
+ Sources:
465
+ -------
466
+
467
+ Please cite the authors of the algorthim if you use it in your work.
468
+
469
+ Hoad, Robinson, & Davies (2010). Automated selection of the number of
470
+ replications for a discrete-event simulation. Journal of the Operational
471
+ Research Society. https://www.jstor.org/stable/40926090
472
+
473
+ Please also cite sim-tools!
474
+ """
475
+
476
+ def __init__(
477
+ self,
478
+ alpha: Optional[float] = 0.05,
479
+ half_width_precision: Optional[float] = 0.1,
480
+ initial_replications: Optional[int] = 3,
481
+ look_ahead: Optional[int] = 5,
482
+ replication_budget: Optional[float] = 1000,
483
+ verbose: Optional[bool] = False,
484
+ observer: Optional[ReplicationObserver] = None,
485
+ ):
486
+ """
487
+ Initiatise the replications algorithm
488
+
489
+ Parameters:
490
+ ----------
491
+ alpha: float, optional (default = 0.05)
492
+ Used to construct the 100(1-alpha) CI
493
+
494
+ half_width_precision: float, optional (default = 0.1)
495
+ The target half width precision for the algorithm
496
+ % deviation of the interval from mean.
497
+
498
+ look_ahead: int, optional (default = 5)
499
+ Recommended no. replications to look ahead to assess stability
500
+ of precision. When the number of replications n <= 100 the
501
+ value of look ahead is used. When n > 100 then
502
+ look_ahead / 100 * max(n, 100) is used.
503
+
504
+ replication_budget: int, optional (default = 1000)
505
+ A hard limit on the number of replications. Use for larger models
506
+ where replication runtime is a constraint.
507
+
508
+ verbose: bool, optional (default=False)
509
+ Display the current replication number while running
510
+
511
+ observer: ReplicationObserver, optional (default=None)
512
+ Include an observer object to track how statistics change as the
513
+ algorithm runs. For example ReplicationTabulizer to return a table
514
+ equivalent to confidence_interval_method.
515
+ """
516
+ self.alpha = alpha
517
+ self.half_width_precision = half_width_precision
518
+ self.initial_replications = initial_replications
519
+ # look ahead when n < 100
520
+ self.look_ahead = look_ahead
521
+
522
+ # hard constraint will terminate alg...
523
+ self.replication_budget = replication_budget
524
+
525
+ # show current replication no.
526
+ self.verbose = verbose
527
+
528
+ # current replication number
529
+ self.n = self.initial_replications
530
+ # Nsol
531
+ self._n_solution = self.replication_budget
532
+
533
+ self.observer = observer
534
+
535
+ def _klimit(self) -> int:
536
+ """
537
+ Return the current look ahead.
538
+ if n <= 100 then return kLimit. If n > 100 then compute kLimit
539
+ as fraction of n
540
+ """
541
+ return int((self.look_ahead / 100) * max(self.n, 100))
542
+
543
+ def select(self, model: ReplicationsAlgorithmModelAdapter) -> int:
544
+
545
+ if not isinstance(model, ReplicationsAlgorithmModelAdapter):
546
+ raise ValueError(ALG_INTERFACE_ERROR)
547
+
548
+ converged = False
549
+
550
+ # run initial replications of model
551
+ x_i = [model.single_run(rep) for rep in range(self.initial_replications)]
552
+
553
+ # initialise running mean and std dev
554
+ self.stats = OnlineStatistics(
555
+ data=np.array(x_i), alpha=self.alpha, observer=self.observer
556
+ )
557
+
558
+ while not converged and self.n <= self.replication_budget:
559
+ if self.n > self.initial_replications:
560
+ # update X_n and d_req
561
+ self.stats.update(x_i)
562
+
563
+ # precision achieved?
564
+ if self.stats.deviation <= self.half_width_precision:
565
+
566
+ # store current solution
567
+ self._n_solution = self.n
568
+ converged = True
569
+
570
+ if self._klimit() > 0:
571
+ k = 1
572
+
573
+ # look ahead loop
574
+ while converged and k <= self.look_ahead:
575
+ if self.verbose:
576
+ print(f"{self.n+k}", end=", ")
577
+
578
+ # simulate replication n + k
579
+ x_i = model.single_run(self.n + k - 1)
580
+
581
+ # update X_n and d_req
582
+ self.stats.update(x_i)
583
+
584
+ # check new precision
585
+ if self.stats.deviation > self.half_width_precision:
586
+ # precision not maintained
587
+ converged = False
588
+ self.n += k
589
+ else:
590
+ k += 1
591
+
592
+ # terminate if precision maintained over lookahead
593
+ if converged:
594
+ return self._n_solution
595
+
596
+ # precision not achieved/maintained so simulate another replication
597
+ self.n += 1
598
+ if self.verbose:
599
+ print(f"{self.n}", end=", ")
600
+ x_i = model.single_run(self.n - 1)
601
+
602
+ # if code gets to here then no solution found within budget.
603
+ warnings.warn(
604
+ "Algorithm did not converge for metric'. "
605
+ + "Returning replication budget as solution"
606
+ )
607
+ return self._n_solution
@@ -1,4 +0,0 @@
1
- __version__ = '0.6.0'
2
- __author__ = 'Thomas Monks'
3
-
4
- from . import datasets, distributions, time_dependent, ovs
File without changes
File without changes
File without changes