orange-experiment-analytics 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. orange_experiment_analytics-1.0.0-py3.12-nspkg.pth +1 -0
  2. orange_experiment_analytics-1.0.0.dist-info/METADATA +26 -0
  3. orange_experiment_analytics-1.0.0.dist-info/RECORD +50 -0
  4. orange_experiment_analytics-1.0.0.dist-info/WHEEL +5 -0
  5. orange_experiment_analytics-1.0.0.dist-info/entry_points.txt +11 -0
  6. orange_experiment_analytics-1.0.0.dist-info/namespace_packages.txt +1 -0
  7. orange_experiment_analytics-1.0.0.dist-info/top_level.txt +1 -0
  8. orangecontrib/experiment_analytics/__init__.py +4 -0
  9. orangecontrib/experiment_analytics/aggregate/__init__.py +0 -0
  10. orangecontrib/experiment_analytics/aggregate/frequency.py +166 -0
  11. orangecontrib/experiment_analytics/excel_export.py +174 -0
  12. orangecontrib/experiment_analytics/letter_report.py +168 -0
  13. orangecontrib/experiment_analytics/stepwise_feature_selection.py +613 -0
  14. orangecontrib/experiment_analytics/tests/__init__.py +0 -0
  15. orangecontrib/experiment_analytics/tests/test_frequency.py +169 -0
  16. orangecontrib/experiment_analytics/tests/test_letter_report.py +119 -0
  17. orangecontrib/experiment_analytics/tests/test_stepwise_feature_selection.py +764 -0
  18. orangecontrib/experiment_analytics/tests/test_transformation_export.py +469 -0
  19. orangecontrib/experiment_analytics/transformation_export.py +209 -0
  20. orangecontrib/experiment_analytics/widgets/__init__.py +24 -0
  21. orangecontrib/experiment_analytics/widgets/icons/aggregate.svg +157 -0
  22. orangecontrib/experiment_analytics/widgets/icons/category.svg +33 -0
  23. orangecontrib/experiment_analytics/widgets/icons/comparemeans.svg +1 -0
  24. orangecontrib/experiment_analytics/widgets/icons/info.svg +8 -0
  25. orangecontrib/experiment_analytics/widgets/icons/inittransformation.svg +41 -0
  26. orangecontrib/experiment_analytics/widgets/icons/letterreport.svg +1 -0
  27. orangecontrib/experiment_analytics/widgets/icons/multifile.svg +16 -0
  28. orangecontrib/experiment_analytics/widgets/icons/savetransformations.svg +15 -0
  29. orangecontrib/experiment_analytics/widgets/icons/slicer.svg +15 -0
  30. orangecontrib/experiment_analytics/widgets/icons/stepwiseFeatureSelection.svg +167 -0
  31. orangecontrib/experiment_analytics/widgets/letter_report_widgets.py +365 -0
  32. orangecontrib/experiment_analytics/widgets/owaggregate.py +823 -0
  33. orangecontrib/experiment_analytics/widgets/owcomparemeans.py +419 -0
  34. orangecontrib/experiment_analytics/widgets/owinitializetransformation.py +73 -0
  35. orangecontrib/experiment_analytics/widgets/owletterreport.py +753 -0
  36. orangecontrib/experiment_analytics/widgets/owmultifile.py +567 -0
  37. orangecontrib/experiment_analytics/widgets/owsavetransformations.py +191 -0
  38. orangecontrib/experiment_analytics/widgets/owslicer.py +1282 -0
  39. orangecontrib/experiment_analytics/widgets/owstepwisefeatureselection.py +913 -0
  40. orangecontrib/experiment_analytics/widgets/tests/__init__.py +0 -0
  41. orangecontrib/experiment_analytics/widgets/tests/test_exported_transformations.py +806 -0
  42. orangecontrib/experiment_analytics/widgets/tests/test_letter_report_widgets.py +54 -0
  43. orangecontrib/experiment_analytics/widgets/tests/test_owaggregate.py +734 -0
  44. orangecontrib/experiment_analytics/widgets/tests/test_owcomparemeans.py +127 -0
  45. orangecontrib/experiment_analytics/widgets/tests/test_owinitializetransformation.py +82 -0
  46. orangecontrib/experiment_analytics/widgets/tests/test_owletterreport.py +352 -0
  47. orangecontrib/experiment_analytics/widgets/tests/test_owmultifile.py +305 -0
  48. orangecontrib/experiment_analytics/widgets/tests/test_owsavetransformations.py +343 -0
  49. orangecontrib/experiment_analytics/widgets/tests/test_owslicer.py +618 -0
  50. orangecontrib/experiment_analytics/widgets/tests/test_owstepwisefeatureselection.py +1501 -0
@@ -0,0 +1 @@
1
+ import sys, types, os;has_mfs = sys.version_info > (3, 5);p = os.path.join(sys._getframe(1).f_locals['sitedir'], *('orangecontrib',));importlib = has_mfs and __import__('importlib.util');has_mfs and __import__('importlib.machinery');m = has_mfs and sys.modules.setdefault('orangecontrib', importlib.util.module_from_spec(importlib.machinery.PathFinder.find_spec('orangecontrib', [os.path.dirname(p)])));m = m or sys.modules.setdefault('orangecontrib', types.ModuleType('orangecontrib'));mp = (m or []) and m.__dict__.setdefault('__path__',[]);(p not in mp) and mp.append(p)
@@ -0,0 +1,26 @@
1
+ Metadata-Version: 2.1
2
+ Name: orange-experiment-analytics
3
+ Version: 1.0.0
4
+ Summary: Add-on containing Experiment Analytics specific widgets
5
+ Home-page: https://revelo.ai/
6
+ Author: Revelo, d.o.o.
7
+ License: BSD
8
+ Keywords: orange3 add-on
9
+ Requires-Python: >=3.8
10
+ Description-Content-Type: text/markdown
11
+ Requires-Dist: AnyQt
12
+ Requires-Dist: numpy
13
+ Requires-Dist: Orange3 (>=3.31.0)
14
+ Requires-Dist: orange-widget-base
15
+ Requires-Dist: orange-canvas-core
16
+ Requires-Dist: pandas (>=0.23)
17
+ Requires-Dist: pymssql
18
+ Requires-Dist: scipy (>=1.8.0)
19
+ Requires-Dist: scikit-learn
20
+ Requires-Dist: statsmodels
21
+
22
+
23
+ Orange Experiment Analytics
24
+ ===========================
25
+
26
+ Experiment Analytics specific Orange3 add-on.
@@ -0,0 +1,50 @@
1
+ orange_experiment_analytics-1.0.0-py3.12-nspkg.pth,sha256=xeeGR3TjdoVxdFeF6T-zSwZWh6Et--EYuPWu67LxL_c,574
2
+ orangecontrib/experiment_analytics/__init__.py,sha256=ik4vFfOrN1Qy7jA722d4Ur_RR8_tXOIa9Rz8G8mJmGM,104
3
+ orangecontrib/experiment_analytics/excel_export.py,sha256=B1YLEI4AqPp2cgo2f-6fGAfoH2pvv2hPEqOU0qp4vKg,5270
4
+ orangecontrib/experiment_analytics/letter_report.py,sha256=lNhoEHlPuYilUWus6gFLijb5xX81a4xUKD_oF2vY6DQ,4716
5
+ orangecontrib/experiment_analytics/stepwise_feature_selection.py,sha256=FO6C1zMcTsxulcF7XpTmh84B4QoivG3WRXuL_6LpDUg,24369
6
+ orangecontrib/experiment_analytics/transformation_export.py,sha256=iwEda8QxFSSHCahMNeRXbkwhzFlwVE0iBlza8iTnMxk,7638
7
+ orangecontrib/experiment_analytics/aggregate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ orangecontrib/experiment_analytics/aggregate/frequency.py,sha256=XEvlU2FPyOPRfpy-E8_7Sx7OjFwwTcLjyKwBfFxH81I,6059
9
+ orangecontrib/experiment_analytics/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ orangecontrib/experiment_analytics/tests/test_frequency.py,sha256=1SGwf8ivsUp1NniFAT0u-W1FSGzHbQobLSImbXbBfcs,5729
11
+ orangecontrib/experiment_analytics/tests/test_letter_report.py,sha256=evyajdp-bgAqV1zsIobUA6uxqntcz6-HTrS8-8A13cs,4192
12
+ orangecontrib/experiment_analytics/tests/test_stepwise_feature_selection.py,sha256=w7653RQXpNowYxGIUB89BRpSnWiwAvUJKMfsGN9yLx8,32857
13
+ orangecontrib/experiment_analytics/tests/test_transformation_export.py,sha256=SORnEOk5w8GUNk6Pc3oCWywk_-lOts6dsQ3EnwaVWVk,19931
14
+ orangecontrib/experiment_analytics/widgets/__init__.py,sha256=SpPtBSNvHO88i2x6qPPMHloizYuw8-u3JULdyV_BDWA,721
15
+ orangecontrib/experiment_analytics/widgets/letter_report_widgets.py,sha256=H1udjIR3lejyXT0Ja0U0_5JKTUnZzqB0T_ucgMzJdyg,13658
16
+ orangecontrib/experiment_analytics/widgets/owaggregate.py,sha256=5miNkl7XAs6p0Tjr7G-hPExHyoPpl5ynHUxe2FTqMMo,30632
17
+ orangecontrib/experiment_analytics/widgets/owcomparemeans.py,sha256=cpZh9IKUYo4GM6-qdX1P9NuTuFvzTkRmliUnAyHIfSA,15958
18
+ orangecontrib/experiment_analytics/widgets/owinitializetransformation.py,sha256=EQJ9CV-NGiZSoGwtB7Kxuuav67zQMldv7IuEEd2S1vY,2397
19
+ orangecontrib/experiment_analytics/widgets/owletterreport.py,sha256=6LF11JaUnVOWfBVEhAb9YBcEx3mwYVDdmscWb8M2Kqo,28501
20
+ orangecontrib/experiment_analytics/widgets/owmultifile.py,sha256=tCv_wqYm5I81Jr2eBQGuvMXgWpbW9Vr0YXCSQP_TsIM,19882
21
+ orangecontrib/experiment_analytics/widgets/owsavetransformations.py,sha256=wJgYFcjyyGCgOnmcgDddJjjOft8CaLgcRYoULAusQOA,6916
22
+ orangecontrib/experiment_analytics/widgets/owslicer.py,sha256=5pFRARaggnEbi5gaX9FJcfVzU1BzDk-CTm2ATRtK5Ks,45563
23
+ orangecontrib/experiment_analytics/widgets/owstepwisefeatureselection.py,sha256=V2JxE7jcFrzNtCIbyVipch1t1AmpoPQNB9yrv3n-qa0,35256
24
+ orangecontrib/experiment_analytics/widgets/icons/aggregate.svg,sha256=irZnCEQPC--4miJLhe0yy64b3Hr9Y4VA-oFQ-mYoFes,6073
25
+ orangecontrib/experiment_analytics/widgets/icons/category.svg,sha256=NxY45n5ZwIOBBnCtEvK7K-API2DKgsP2u_OrwsvyhBc,3291
26
+ orangecontrib/experiment_analytics/widgets/icons/comparemeans.svg,sha256=i0iq9mTbrj0eD-IdeuhICk7IRQK2ihKVYIWnZaOP3YE,2142
27
+ orangecontrib/experiment_analytics/widgets/icons/info.svg,sha256=6D-2lvHm8fwmoAA9WBmwtCs0NvyabxT8bVeK3zxp_EQ,672
28
+ orangecontrib/experiment_analytics/widgets/icons/inittransformation.svg,sha256=abkKlL1h57AQW-u1wD2v37OSOol7QKx7NJfhQfmnoDg,1717
29
+ orangecontrib/experiment_analytics/widgets/icons/letterreport.svg,sha256=Kq2zcvrh0Joxt5qEX_iX3N8DT4MKV-A13P6UZA0Y4UM,6120
30
+ orangecontrib/experiment_analytics/widgets/icons/multifile.svg,sha256=n-phOqjrx2C0NFcjxJ0m25Jx0zocH3WdRfBEQZDOuyM,1111
31
+ orangecontrib/experiment_analytics/widgets/icons/savetransformations.svg,sha256=WQCGWRi0b2Y2j-FHwpjeIRIT2qzu00ztw7Rqw6FlGgE,1656
32
+ orangecontrib/experiment_analytics/widgets/icons/slicer.svg,sha256=Qg4Vv36ZquBoSECB2XSnV9ZmjSji50Jw-RwNgu31CME,807
33
+ orangecontrib/experiment_analytics/widgets/icons/stepwiseFeatureSelection.svg,sha256=gQMY7K55cPtj4T0Zn2m3NM8O-axQLI_v0OU_pj0scd8,4528
34
+ orangecontrib/experiment_analytics/widgets/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
+ orangecontrib/experiment_analytics/widgets/tests/test_exported_transformations.py,sha256=nsseCebWUVsBnlzPrnwNpBcL4QKGLjGsOVoVA77V4TM,34574
36
+ orangecontrib/experiment_analytics/widgets/tests/test_letter_report_widgets.py,sha256=atYN-aI1han9dB-zx5rLiWKD1H4E3SHXAd7BjWDhvKs,1622
37
+ orangecontrib/experiment_analytics/widgets/tests/test_owaggregate.py,sha256=s42_X0PTPNiCcUdLhbMam5FrHlR5j5C0Votl2r21gm8,30412
38
+ orangecontrib/experiment_analytics/widgets/tests/test_owcomparemeans.py,sha256=YH-S0Q6Tf6lNaTJSzf-KuSvrYgVN_eeAqwLDJAaQ28I,5467
39
+ orangecontrib/experiment_analytics/widgets/tests/test_owinitializetransformation.py,sha256=ituMUyloAWuY0SI7aWy9iJU1xRtgp2UB66h2-KVYukY,3503
40
+ orangecontrib/experiment_analytics/widgets/tests/test_owletterreport.py,sha256=bKG0S-1izB4VDIKUtth5cowQ9921f20WzTEwBdReoBk,16432
41
+ orangecontrib/experiment_analytics/widgets/tests/test_owmultifile.py,sha256=yi3s3QZqq-5nAk7TErgX2L_jqxEooQjC_ZRgqIzt6r0,13426
42
+ orangecontrib/experiment_analytics/widgets/tests/test_owsavetransformations.py,sha256=kjoCGM98Qn8ajg1DM08Dd2jS8KTx1f7En4FP7ceIMoQ,14747
43
+ orangecontrib/experiment_analytics/widgets/tests/test_owslicer.py,sha256=-y5v7gddJ9EMKajDSAjBs1yFYgvSohiEuf-NUKkkmfM,26134
44
+ orangecontrib/experiment_analytics/widgets/tests/test_owstepwisefeatureselection.py,sha256=nlqz9eIkAdRh8BuNIIIrBkgwIo3jV1gMb1N3klzedHU,66338
45
+ orange_experiment_analytics-1.0.0.dist-info/METADATA,sha256=hznfiiAugrLl4BULxD2bxE2J7wIiAp3Fu7LTlogJmJY,679
46
+ orange_experiment_analytics-1.0.0.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
47
+ orange_experiment_analytics-1.0.0.dist-info/entry_points.txt,sha256=SbKMPkaCNa7o4v-f3aDzHyK7ZBfDewRKLd_T1_g4pOQ,345
48
+ orange_experiment_analytics-1.0.0.dist-info/namespace_packages.txt,sha256=Iut-JTfT11SZHHm77_ZeszD7pZDWXcTweCbvrJpqDyQ,14
49
+ orange_experiment_analytics-1.0.0.dist-info/top_level.txt,sha256=Iut-JTfT11SZHHm77_ZeszD7pZDWXcTweCbvrJpqDyQ,14
50
+ orange_experiment_analytics-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.41.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,11 @@
1
+ [orange.canvas.help]
2
+ html-index = orangecontrib.experiment_analytics.widgets:WIDGET_HELP_PATH
3
+
4
+ [orange.widgets]
5
+ Experiment Analytics = orangecontrib.experiment_analytics.widgets
6
+
7
+ [orange.widgets.tutorials]
8
+ exampletutorials = orangecontrib.experiment_analytics.tutorials
9
+
10
+ [orange3.addon]
11
+ experiment_analytics = orangecontrib.experiment_analytics
@@ -0,0 +1 @@
1
+ orangecontrib
@@ -0,0 +1,4 @@
1
+ from AnyQt.QtCore import QSettings
2
+
3
+ settings = QSettings()
4
+ settings.setValue("network/use-certs", True)
@@ -0,0 +1,166 @@
1
+ from functools import partial
2
+ from typing import Callable, Tuple, Optional
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ from scipy.optimize import curve_fit
7
+ from scipy.signal import lombscargle
8
+ from sklearn.metrics import r2_score
9
+
10
+ COLUMNS = np.array(["Frequency", "Amplitude", "Half-life", "R2"])
11
+
12
+
13
+ def frequency(
14
+ df: pd.DataFrame, use_damping: bool, compute_r2: bool, detrend_degree: Optional[int] = 1
15
+ ) -> pd.DataFrame:
16
+ """
17
+ The frequency aggregation function, that computes the dominant frequency
18
+ and its amplitude.
19
+
20
+ When use_damping is True it fits a damped cosine model with exponential
21
+ decay using least squares optimization to find the frequency, amplitude and
22
+ half-life (damping rate).
23
+
24
+ If compute_r2 is True, fit a model with the inferred parameters and report
25
+ the R2 of its predictions.
26
+
27
+ Parameters
28
+ ----------
29
+ use_damping
30
+ The signal has damping -- fit model that corrects the frequency and
31
+ amplitude and report the damping rate
32
+ compute_r2
33
+ Compute R2 of the single-frequency model predictions
34
+ df
35
+ The data -- first column is time and the second function values
36
+ detrend_degree
37
+ The degree of polynomial function used for the de-trending of the signal.
38
+ Use None to disable de-trending.
39
+
40
+ Returns
41
+ -------
42
+ The one line DataFrame that reports the frequency, amplitude, damping rate
43
+ and r2 score.
44
+ """
45
+ # sample data to maximally 1000 points -- Lomb-Scargle method complexity
46
+ # depends on number of points
47
+ df = df.dropna()
48
+ df = df.sample(n=min(len(df), 1000), axis=0, random_state=0)
49
+ df = df.sort_values(df.columns[0])
50
+ x, y = df.iloc[:, 0], df.iloc[:, 1]
51
+ columns = __column_names(use_damping, compute_r2, y.name)
52
+
53
+ if len(x.unique()) < 2:
54
+ # cannot fit and define the grid when less than 2 unique x values
55
+ return pd.DataFrame([[np.nan] * len(columns)], columns=columns)
56
+
57
+ x_det, y_det = __de_trending(x, y, detrend_degree)
58
+ f, a = __periodogram(x_det, y_det)
59
+
60
+ res = [f, a]
61
+ if use_damping or compute_r2:
62
+ # when damping in signal use model to correct frequency, amplitude and
63
+ # compute the damping rate
64
+ # when compute_r2 model reconstructs the signal to compute r2
65
+ try:
66
+ popt, fun = __fit_model(x_det, y_det, f, a, use_damping)
67
+ if use_damping:
68
+ f, a, decay = popt[1:4]
69
+ res = [f, a, np.log(2) / decay]
70
+ if compute_r2:
71
+ y_recon = fun(x_det, *popt)
72
+ compute_r2 = r2_score(y_det, y_recon)
73
+ res.append(compute_r2)
74
+ except RuntimeError:
75
+ # fitting model may fail for some functions, it will usually not
76
+ # fail for sinus like signal - report just frequency and amplitude
77
+ # by the Lomb-Scargle
78
+ res += [np.nan for _ in columns[2:]]
79
+
80
+ return pd.DataFrame([res], columns=columns)
81
+
82
+
83
+ def __column_names(damping: bool, r2: bool, series_name: str) -> pd.MultiIndex:
84
+ columns = COLUMNS[[True, True, damping, r2]]
85
+ return pd.MultiIndex.from_tuples([(series_name, c) for c in columns])
86
+
87
+
88
+ def __freq_grid(x: pd.Series) -> np.ndarray:
89
+ """
90
+ Compute the frequency grid that is used by Lomb-Scargle to search for
91
+ dominating frequency. Intuition behind can be found in
92
+ https://jakevdp.github.io/blog/2015/06/13/lomb-scargle-in-python/#Frequency-spacing
93
+ or in paper Understanding the Lomb–Scargle Periodogram by VanderPlas JT
94
+ """
95
+ n = len(x)
96
+ span = x.max() - x.min()
97
+ diff = 1 / (5 * span)
98
+ return np.arange(diff, (n - 1) / 2 / span, diff)
99
+
100
+
101
+ def __de_trending(x: pd.Series, y: pd.Series, deg: Optional[int]) -> Tuple[pd.Series, pd.Series]:
102
+ """
103
+ De-trend the signal with a polynomial function with degree deg:
104
+ 1. Fit a polynomial function to the signal
105
+ 2. Subtract the value of the polynomial from the signal
106
+ `x` is just shifted so that the minimum value is 0
107
+ """
108
+ if deg is None:
109
+ return x, y
110
+ p = np.polyfit(x, y, deg=deg)
111
+ trend = np.polyval(p, x)
112
+ y_det = y - trend
113
+ x_det = x - x.min()
114
+ return x_det, y_det
115
+
116
+
117
+ def __periodogram(x: pd.Series, y: pd.Series) -> Tuple[float, float]:
118
+ """
119
+ Use Lomb-Scargle method to compute the periodogram and extract the
120
+ dominating frequency and its amplitude.
121
+ """
122
+ freqs = __freq_grid(x)
123
+ pgram = lombscargle(x, y, freqs * 2 * np.pi, normalize=False)
124
+ ind = np.argmax(pgram)
125
+ amplitude = np.sqrt(pgram[ind] / len(x) * 4.0)
126
+ return freqs[ind], amplitude
127
+
128
+
129
+ def __periodic_f_decay(x, theta, f, a, decay) -> float:
130
+ """Function used to model the data to estimate damping and/or r2"""
131
+ ret = a * np.cos(f * 2 * np.pi * x + theta)
132
+ if decay is not None:
133
+ ret *= np.exp(-decay * x)
134
+ return ret
135
+
136
+
137
+ def __fit_model(
138
+ x: pd.Series, y: pd.Series, f: float, a: float, use_damping: bool
139
+ ) -> Tuple[np.ndarray, Callable]:
140
+ """
141
+ Fit the periodic model using non-linear least squares:
142
+ - when `use_damping` is `True`, fit frequency, amplitude and damping rate in addition
143
+ to the phase, starting with the frequency and amplitude retrieved by Lomb-Scargle
144
+ - otherwise, only fit the phase, using a fixed frequency and amplitude from
145
+ Lomb-Scargle (to measure r2 and estimate the goodness of fit)
146
+ """
147
+ if use_damping:
148
+ fun = __periodic_f_decay
149
+ p0 = (np.pi, f, a, 0)
150
+ # frequency and amplitude should be positive values
151
+ bounds = ([-np.inf, 0, 0, -np.inf], [np.inf, np.inf, np.inf, np.inf])
152
+ else:
153
+ fun = partial(__periodic_f_decay, a=a, f=f, decay=None)
154
+ p0 = (np.pi,)
155
+ bounds = (-np.inf, np.inf)
156
+ return curve_fit(fun, x, y, p0=p0, bounds=bounds)[0], fun
157
+
158
+
159
+ if __name__ == "__main__":
160
+ x_ = np.linspace(0.1, 20 * np.pi, 1000)
161
+ y_ = np.sin(x_) / (x_ * 0.2)
162
+ df_ = pd.DataFrame({"x": x_, "y": y_})
163
+ print(frequency(False, False, df_))
164
+ print(frequency(True, False, df_))
165
+ print(frequency(False, True, df_))
166
+ print(frequency(True, True, df_))
@@ -0,0 +1,174 @@
1
+ import os
2
+
3
+ import xlsxwriter
4
+ from AnyQt.QtCore import Qt
5
+ from AnyQt.QtWidgets import QFileDialog, QWidget
6
+
7
+ from Orange.widgets import gui
8
+ from Orange.widgets.utils.itemmodels import PyTableModel
9
+
10
+ BorderRole = next(gui.OrangeUserRole)
11
+
12
+
13
+ def export(model: PyTableModel, n_header: int, path: str):
14
+ workbook = xlsxwriter.Workbook(path)
15
+ worksheet = workbook.add_worksheet("Sheet 1")
16
+ worksheet.freeze_panes(n_header, 0)
17
+ worksheet.set_column(0, 0, width=30)
18
+ worksheet.set_column(1, model.columnCount() - 1, width=20)
19
+
20
+ horizontal_vertical_header_format = workbook.add_format({
21
+ "align": "center",
22
+ "right": 1,
23
+ "text_wrap": True
24
+ })
25
+ horizontal_header_format = workbook.add_format({
26
+ "bold": True,
27
+ "align": "center",
28
+ "text_wrap": True,
29
+ })
30
+ right_horizontal_header_format = workbook.add_format({
31
+ "bold": True,
32
+ "align": "center",
33
+ "text_wrap": True,
34
+ "right": 1,
35
+ })
36
+ border_vertical_header_format = workbook.add_format({
37
+ "align": "left",
38
+ "top": 1,
39
+ "right": 1,
40
+ "text_wrap": True
41
+ })
42
+ vertical_header_format = workbook.add_format({
43
+ "align": "left",
44
+ "right": 1,
45
+ "text_wrap": True
46
+ })
47
+ bottom_border_vertical_header_format = workbook.add_format({
48
+ "align": "left",
49
+ "top": 1,
50
+ "bottom": 1,
51
+ "right": 1,
52
+ "text_wrap": True
53
+ })
54
+ bottom_vertical_header_format = workbook.add_format({
55
+ "align": "left",
56
+ "bottom": 1,
57
+ "right": 1,
58
+ "text_wrap": True
59
+ })
60
+ total_header_format = workbook.add_format({
61
+ "align": "left",
62
+ "bold": True,
63
+ "italic": True,
64
+ "bottom": 1,
65
+ "top": 1,
66
+ "right": 1,
67
+ "text_wrap": True
68
+ })
69
+ total_format = workbook.add_format({
70
+ "align": "center",
71
+ "italic": True,
72
+ "bottom": 1,
73
+ "top": 1,
74
+ "text_wrap": True
75
+ })
76
+ right_total_format = workbook.add_format({
77
+ "align": "center",
78
+ "italic": True,
79
+ "bottom": 1,
80
+ "top": 1,
81
+ "right": 1,
82
+ })
83
+ center_format = workbook.add_format({
84
+ "align": "center",
85
+ "text_wrap": True,
86
+ })
87
+ right_center_format = workbook.add_format({
88
+ "align": "center",
89
+ "text_wrap": True,
90
+ "right": 1,
91
+ })
92
+ border_format = workbook.add_format({
93
+ "align": "center",
94
+ "text_wrap": True,
95
+ "top": 1,
96
+ })
97
+ right_border_format = workbook.add_format({
98
+ "align": "center",
99
+ "text_wrap": True,
100
+ "top": 1,
101
+ "right": 1,
102
+ })
103
+ bottom_center_format = workbook.add_format({
104
+ "align": "center",
105
+ "text_wrap": True,
106
+ "bottom": 1,
107
+ })
108
+ right_bottom_center_format = workbook.add_format({
109
+ "align": "center",
110
+ "text_wrap": True,
111
+ "bottom": 1,
112
+ "right": 1,
113
+ })
114
+ bottom_border_format = workbook.add_format({
115
+ "align": "center",
116
+ "text_wrap": True,
117
+ "top": 1,
118
+ "bottom": 1,
119
+ })
120
+ right_bottom_border_format = workbook.add_format({
121
+ "align": "center",
122
+ "text_wrap": True,
123
+ "top": 1,
124
+ "bottom": 1,
125
+ "right": 1,
126
+ })
127
+
128
+ n_rows, n_columns = model.rowCount(), model.columnCount()
129
+ for i in range(n_rows):
130
+ for j in range(n_columns):
131
+
132
+ data = model.data(model.index(i, j), role=Qt.DisplayRole)
133
+ border = model.data(model.index(i, j), role=BorderRole)
134
+
135
+ if i == n_header - 1 and j == n_columns - 1:
136
+ cell_format = right_total_format
137
+ elif i == n_header - 1:
138
+ cell_format = total_header_format if j == 0 else total_format
139
+ elif j == 0 and i < n_header:
140
+ cell_format = horizontal_vertical_header_format
141
+ elif i < n_header and j == n_columns - 1:
142
+ cell_format = right_horizontal_header_format
143
+ elif i < n_header:
144
+ cell_format = horizontal_header_format
145
+ elif j == 0 and i == n_rows - 1:
146
+ cell_format = bottom_border_vertical_header_format \
147
+ if border else bottom_vertical_header_format
148
+ elif j == 0:
149
+ cell_format = border_vertical_header_format \
150
+ if border else vertical_header_format
151
+ elif i == n_rows - 1 and j == n_columns - 1:
152
+ cell_format = right_bottom_border_format \
153
+ if border else right_bottom_center_format
154
+ elif i == n_rows - 1:
155
+ cell_format = bottom_border_format \
156
+ if border else bottom_center_format
157
+ elif j == n_columns - 1:
158
+ cell_format = right_border_format \
159
+ if border else right_center_format
160
+ else:
161
+ cell_format = border_format if border else center_format
162
+
163
+ worksheet.write_string(i, j, data, cell_format)
164
+
165
+ workbook.close()
166
+
167
+
168
+ def save(widget: QWidget, model: PyTableModel, n_rows: int):
169
+ filename, _ = QFileDialog.getSaveFileName(
170
+ widget, "Save", os.path.expanduser("~/"),
171
+ "Microsoft Excel spreadsheet (*.xlsx)"
172
+ )
173
+ if filename:
174
+ export(model, n_rows, filename)
@@ -0,0 +1,168 @@
1
+ """
2
+ Create a letter report using the insert-and-absorb algorithm for solving CLD.
3
+ """
4
+ from typing import List, Callable
5
+
6
+ import numpy as np
7
+ from statsmodels.stats.multicomp import pairwise_tukeyhsd
8
+
9
+
10
+ def simple_letter_report(
11
+ treatments: List[np.ndarray],
12
+ threshold: float = 0.05,
13
+ ) -> List[List[str]]:
14
+ """
15
+ Create a simple letter report.
16
+
17
+ Parameters
18
+ ----------
19
+ treatments : list
20
+ List of arrays.
21
+
22
+ threshold : float
23
+ Threshold for significant difference between two treatments.
24
+
25
+ Returns
26
+ -------
27
+ letters: list
28
+ A list of string (concatenated letters).
29
+ """
30
+ # compute p-values
31
+ assert len(treatments) <= 26
32
+ endog = np.hstack(treatments)
33
+ groups = np.hstack([np.full(treatment.shape, chr(i))
34
+ for i, treatment in enumerate(treatments, 65)])
35
+ res = pairwise_tukeyhsd(endog=endog, groups=groups, alpha=threshold)
36
+
37
+ # compute letters
38
+ matrix = _into_matrix(res.pvalues, len(treatments))
39
+ mask = matrix <= threshold
40
+ arr = np.array([chr(65 + i) for i in range(len(treatments))])
41
+ letters_matrix = np.tile(arr, (len(treatments), 1))
42
+ return [",".join(row[m]) for m, row in zip(mask, letters_matrix)]
43
+
44
+
45
+ def _into_matrix(
46
+ p_values: np.ndarray,
47
+ n_treatments: int
48
+ ) -> np.ndarray:
49
+ matrix = np.zeros((n_treatments, n_treatments))
50
+ indices = np.triu_indices(n_treatments, 1)
51
+ matrix[indices] = p_values
52
+ matrix = matrix + matrix.T
53
+ matrix[np.diag_indices(n_treatments)] = 1
54
+ return matrix
55
+
56
+
57
+ def letter_report(
58
+ treatments: List[np.ndarray],
59
+ threshold: float = 0.05
60
+ ) -> List[List[str]]:
61
+ """
62
+ Create a letter report.
63
+
64
+ Parameters
65
+ ----------
66
+ treatments : list
67
+ List of arrays.
68
+
69
+ threshold : float
70
+ Threshold for significant difference between two treatments.
71
+
72
+ Returns
73
+ -------
74
+ letters: list
75
+ A list of lists of letters.
76
+ """
77
+
78
+ # sort treatments by mean
79
+ indices = np.argsort([np.mean(t) for t in treatments])[::-1]
80
+ treatments = [treatments[i] for i in indices]
81
+
82
+ # compute p-values
83
+ endog = np.hstack(treatments)
84
+ groups = np.hstack([np.full(treatment.shape, chr(i))
85
+ for i, treatment in enumerate(treatments, 65)])
86
+ res = pairwise_tukeyhsd(endog=endog, groups=groups, alpha=threshold)
87
+
88
+ # compute letters
89
+ matrix = _cld(res.pvalues, len(treatments), threshold)
90
+ letters = _compute_letters(matrix)
91
+
92
+ # unsort letters
93
+ unsorted_letters = [["None"]] * len(letters)
94
+ for i, letters_ in zip(indices, letters):
95
+ unsorted_letters[i] = letters_
96
+
97
+ return unsorted_letters
98
+
99
+
100
+ def _cld(
101
+ p_values: np.ndarray,
102
+ n_treatments: int,
103
+ threshold: float = 0.05
104
+ ) -> np.ndarray:
105
+ """
106
+ Create a compact letter display using the insert-and-absorb algorithm.
107
+ Before obtaining p_values, the treatments should be sorted my mean.
108
+
109
+ Parameters
110
+ ----------
111
+ p_values : np.ndarray of shape (n_treatments x n_treatments)
112
+ An array with p-values.
113
+
114
+ n_treatments : int
115
+ Number of treatment.
116
+
117
+ threshold : float, optional, default = 0.05
118
+ Threshold for significant difference between two treatments.
119
+
120
+ Returns
121
+ -------
122
+ matrix: np.ndarray of shape (n_treatments x n_letters)
123
+ An array of 0 and 1.
124
+ """
125
+ assert n_treatments > 1
126
+ assert len(p_values) > 0
127
+
128
+ p_values_gen = (p for p in p_values)
129
+ matrix = np.ones((n_treatments, 1))
130
+ for i in range(n_treatments):
131
+ for j in range(i + 1, n_treatments):
132
+ if next(p_values_gen) < threshold:
133
+ matrix = _insert(matrix, i, j)
134
+ matrix = _absorb(matrix)
135
+
136
+ return matrix
137
+
138
+
139
+ def _insert(matrix: np.ndarray, t1_index: int, t2_index: int) -> np.ndarray:
140
+ matrix1 = matrix.copy()
141
+ matrix1[t2_index, :] = 0
142
+ matrix2 = matrix.copy()
143
+ matrix2[t1_index, :] = 0
144
+ return np.hstack((matrix1, matrix2))
145
+
146
+
147
+ def _absorb(matrix: np.ndarray) -> np.ndarray:
148
+ for i in range(matrix.shape[1] - 1, 0, -1):
149
+ msk = matrix.astype(bool)
150
+ if any((all((msk[:, i] & msk[:, j]) == msk[:, i]) and i != j)
151
+ for j in range(matrix.shape[1])):
152
+ matrix = np.delete(matrix, i, axis=1)
153
+ return matrix
154
+
155
+
156
+ def _compute_letters(matrix: np.ndarray) -> List[List[str]]:
157
+ shape = matrix.shape
158
+ report = np.tile(np.arange(shape[1]), (shape[0], 1)) + 65.0
159
+ report[matrix == 0] = np.nan
160
+ return _to_chr_lst(report)
161
+
162
+
163
+ def _to_chr_lst(report: np.ndarray) -> List[List[str]]:
164
+ return [[_to_chr(ordinal) for ordinal in ordinals] for ordinals in report]
165
+
166
+
167
+ def _to_chr(number: float):
168
+ return "" if np.isnan(number) else chr(int(number))