PyPI - orange-experiment-analytics - Versions diffs - 1.0.0__py3-none-any.whl - Mend

orange-experiment-analytics 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

orange_experiment_analytics-1.0.0-py3.12-nspkg.pth ADDED Viewed

@@ -0,0 +1 @@

+ import sys, types, os;has_mfs = sys.version_info > (3, 5);p = os.path.join(sys._getframe(1).f_locals['sitedir'], *('orangecontrib',));importlib = has_mfs and __import__('importlib.util');has_mfs and __import__('importlib.machinery');m = has_mfs and sys.modules.setdefault('orangecontrib', importlib.util.module_from_spec(importlib.machinery.PathFinder.find_spec('orangecontrib', [os.path.dirname(p)])));m = m or sys.modules.setdefault('orangecontrib', types.ModuleType('orangecontrib'));mp = (m or []) and m.__dict__.setdefault('__path__',[]);(p not in mp) and mp.append(p)

orange_experiment_analytics-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,26 @@
+Metadata-Version: 2.1
+Name: orange-experiment-analytics
+Version: 1.0.0
+Summary: Add-on containing Experiment Analytics specific widgets
+Home-page: https://revelo.ai/
+Author: Revelo, d.o.o.
+License: BSD
+Keywords: orange3 add-on
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: AnyQt
+Requires-Dist: numpy
+Requires-Dist: Orange3 (>=3.31.0)
+Requires-Dist: orange-widget-base
+Requires-Dist: orange-canvas-core
+Requires-Dist: pandas (>=0.23)
+Requires-Dist: pymssql
+Requires-Dist: scipy (>=1.8.0)
+Requires-Dist: scikit-learn
+Requires-Dist: statsmodels
+Orange Experiment Analytics
+===========================
+Experiment Analytics specific Orange3 add-on.

orange_experiment_analytics-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,50 @@
+orange_experiment_analytics-1.0.0-py3.12-nspkg.pth,sha256=xeeGR3TjdoVxdFeF6T-zSwZWh6Et--EYuPWu67LxL_c,574
+orangecontrib/experiment_analytics/__init__.py,sha256=ik4vFfOrN1Qy7jA722d4Ur_RR8_tXOIa9Rz8G8mJmGM,104
+orangecontrib/experiment_analytics/excel_export.py,sha256=B1YLEI4AqPp2cgo2f-6fGAfoH2pvv2hPEqOU0qp4vKg,5270
+orangecontrib/experiment_analytics/letter_report.py,sha256=lNhoEHlPuYilUWus6gFLijb5xX81a4xUKD_oF2vY6DQ,4716
+orangecontrib/experiment_analytics/stepwise_feature_selection.py,sha256=FO6C1zMcTsxulcF7XpTmh84B4QoivG3WRXuL_6LpDUg,24369
+orangecontrib/experiment_analytics/transformation_export.py,sha256=iwEda8QxFSSHCahMNeRXbkwhzFlwVE0iBlza8iTnMxk,7638
+orangecontrib/experiment_analytics/aggregate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+orangecontrib/experiment_analytics/aggregate/frequency.py,sha256=XEvlU2FPyOPRfpy-E8_7Sx7OjFwwTcLjyKwBfFxH81I,6059
+orangecontrib/experiment_analytics/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+orangecontrib/experiment_analytics/tests/test_frequency.py,sha256=1SGwf8ivsUp1NniFAT0u-W1FSGzHbQobLSImbXbBfcs,5729
+orangecontrib/experiment_analytics/tests/test_letter_report.py,sha256=evyajdp-bgAqV1zsIobUA6uxqntcz6-HTrS8-8A13cs,4192
+orangecontrib/experiment_analytics/tests/test_stepwise_feature_selection.py,sha256=w7653RQXpNowYxGIUB89BRpSnWiwAvUJKMfsGN9yLx8,32857
+orangecontrib/experiment_analytics/tests/test_transformation_export.py,sha256=SORnEOk5w8GUNk6Pc3oCWywk_-lOts6dsQ3EnwaVWVk,19931
+orangecontrib/experiment_analytics/widgets/__init__.py,sha256=SpPtBSNvHO88i2x6qPPMHloizYuw8-u3JULdyV_BDWA,721
+orangecontrib/experiment_analytics/widgets/letter_report_widgets.py,sha256=H1udjIR3lejyXT0Ja0U0_5JKTUnZzqB0T_ucgMzJdyg,13658
+orangecontrib/experiment_analytics/widgets/owaggregate.py,sha256=5miNkl7XAs6p0Tjr7G-hPExHyoPpl5ynHUxe2FTqMMo,30632
+orangecontrib/experiment_analytics/widgets/owcomparemeans.py,sha256=cpZh9IKUYo4GM6-qdX1P9NuTuFvzTkRmliUnAyHIfSA,15958
+orangecontrib/experiment_analytics/widgets/owinitializetransformation.py,sha256=EQJ9CV-NGiZSoGwtB7Kxuuav67zQMldv7IuEEd2S1vY,2397
+orangecontrib/experiment_analytics/widgets/owletterreport.py,sha256=6LF11JaUnVOWfBVEhAb9YBcEx3mwYVDdmscWb8M2Kqo,28501
+orangecontrib/experiment_analytics/widgets/owmultifile.py,sha256=tCv_wqYm5I81Jr2eBQGuvMXgWpbW9Vr0YXCSQP_TsIM,19882
+orangecontrib/experiment_analytics/widgets/owsavetransformations.py,sha256=wJgYFcjyyGCgOnmcgDddJjjOft8CaLgcRYoULAusQOA,6916
+orangecontrib/experiment_analytics/widgets/owslicer.py,sha256=5pFRARaggnEbi5gaX9FJcfVzU1BzDk-CTm2ATRtK5Ks,45563
+orangecontrib/experiment_analytics/widgets/owstepwisefeatureselection.py,sha256=V2JxE7jcFrzNtCIbyVipch1t1AmpoPQNB9yrv3n-qa0,35256
+orangecontrib/experiment_analytics/widgets/icons/aggregate.svg,sha256=irZnCEQPC--4miJLhe0yy64b3Hr9Y4VA-oFQ-mYoFes,6073
+orangecontrib/experiment_analytics/widgets/icons/category.svg,sha256=NxY45n5ZwIOBBnCtEvK7K-API2DKgsP2u_OrwsvyhBc,3291
+orangecontrib/experiment_analytics/widgets/icons/comparemeans.svg,sha256=i0iq9mTbrj0eD-IdeuhICk7IRQK2ihKVYIWnZaOP3YE,2142
+orangecontrib/experiment_analytics/widgets/icons/info.svg,sha256=6D-2lvHm8fwmoAA9WBmwtCs0NvyabxT8bVeK3zxp_EQ,672
+orangecontrib/experiment_analytics/widgets/icons/inittransformation.svg,sha256=abkKlL1h57AQW-u1wD2v37OSOol7QKx7NJfhQfmnoDg,1717
+orangecontrib/experiment_analytics/widgets/icons/letterreport.svg,sha256=Kq2zcvrh0Joxt5qEX_iX3N8DT4MKV-A13P6UZA0Y4UM,6120
+orangecontrib/experiment_analytics/widgets/icons/multifile.svg,sha256=n-phOqjrx2C0NFcjxJ0m25Jx0zocH3WdRfBEQZDOuyM,1111
+orangecontrib/experiment_analytics/widgets/icons/savetransformations.svg,sha256=WQCGWRi0b2Y2j-FHwpjeIRIT2qzu00ztw7Rqw6FlGgE,1656
+orangecontrib/experiment_analytics/widgets/icons/slicer.svg,sha256=Qg4Vv36ZquBoSECB2XSnV9ZmjSji50Jw-RwNgu31CME,807
+orangecontrib/experiment_analytics/widgets/icons/stepwiseFeatureSelection.svg,sha256=gQMY7K55cPtj4T0Zn2m3NM8O-axQLI_v0OU_pj0scd8,4528
+orangecontrib/experiment_analytics/widgets/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+orangecontrib/experiment_analytics/widgets/tests/test_exported_transformations.py,sha256=nsseCebWUVsBnlzPrnwNpBcL4QKGLjGsOVoVA77V4TM,34574
+orangecontrib/experiment_analytics/widgets/tests/test_letter_report_widgets.py,sha256=atYN-aI1han9dB-zx5rLiWKD1H4E3SHXAd7BjWDhvKs,1622
+orangecontrib/experiment_analytics/widgets/tests/test_owaggregate.py,sha256=s42_X0PTPNiCcUdLhbMam5FrHlR5j5C0Votl2r21gm8,30412
+orangecontrib/experiment_analytics/widgets/tests/test_owcomparemeans.py,sha256=YH-S0Q6Tf6lNaTJSzf-KuSvrYgVN_eeAqwLDJAaQ28I,5467
+orangecontrib/experiment_analytics/widgets/tests/test_owinitializetransformation.py,sha256=ituMUyloAWuY0SI7aWy9iJU1xRtgp2UB66h2-KVYukY,3503
+orangecontrib/experiment_analytics/widgets/tests/test_owletterreport.py,sha256=bKG0S-1izB4VDIKUtth5cowQ9921f20WzTEwBdReoBk,16432
+orangecontrib/experiment_analytics/widgets/tests/test_owmultifile.py,sha256=yi3s3QZqq-5nAk7TErgX2L_jqxEooQjC_ZRgqIzt6r0,13426
+orangecontrib/experiment_analytics/widgets/tests/test_owsavetransformations.py,sha256=kjoCGM98Qn8ajg1DM08Dd2jS8KTx1f7En4FP7ceIMoQ,14747
+orangecontrib/experiment_analytics/widgets/tests/test_owslicer.py,sha256=-y5v7gddJ9EMKajDSAjBs1yFYgvSohiEuf-NUKkkmfM,26134
+orangecontrib/experiment_analytics/widgets/tests/test_owstepwisefeatureselection.py,sha256=nlqz9eIkAdRh8BuNIIIrBkgwIo3jV1gMb1N3klzedHU,66338
+orange_experiment_analytics-1.0.0.dist-info/METADATA,sha256=hznfiiAugrLl4BULxD2bxE2J7wIiAp3Fu7LTlogJmJY,679
+orange_experiment_analytics-1.0.0.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
+orange_experiment_analytics-1.0.0.dist-info/entry_points.txt,sha256=SbKMPkaCNa7o4v-f3aDzHyK7ZBfDewRKLd_T1_g4pOQ,345
+orange_experiment_analytics-1.0.0.dist-info/namespace_packages.txt,sha256=Iut-JTfT11SZHHm77_ZeszD7pZDWXcTweCbvrJpqDyQ,14
+orange_experiment_analytics-1.0.0.dist-info/top_level.txt,sha256=Iut-JTfT11SZHHm77_ZeszD7pZDWXcTweCbvrJpqDyQ,14
+orange_experiment_analytics-1.0.0.dist-info/RECORD,,

orange_experiment_analytics-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.41.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

orange_experiment_analytics-1.0.0.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,11 @@
+[orange.canvas.help]
+html-index = orangecontrib.experiment_analytics.widgets:WIDGET_HELP_PATH
+[orange.widgets]
+Experiment Analytics = orangecontrib.experiment_analytics.widgets
+[orange.widgets.tutorials]
+exampletutorials = orangecontrib.experiment_analytics.tutorials
+[orange3.addon]
+experiment_analytics = orangecontrib.experiment_analytics

orange_experiment_analytics-1.0.0.dist-info/namespace_packages.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ orangecontrib

orange_experiment_analytics-1.0.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ orangecontrib

orangecontrib/experiment_analytics/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from AnyQt.QtCore import QSettings
+settings = QSettings()
+settings.setValue("network/use-certs", True)

orangecontrib/experiment_analytics/aggregate/__init__.py ADDED Viewed

File without changes

orangecontrib/experiment_analytics/aggregate/frequency.py ADDED Viewed

@@ -0,0 +1,166 @@
+from functools import partial
+from typing import Callable, Tuple, Optional
+import numpy as np
+import pandas as pd
+from scipy.optimize import curve_fit
+from scipy.signal import lombscargle
+from sklearn.metrics import r2_score
+COLUMNS = np.array(["Frequency", "Amplitude", "Half-life", "R2"])
+def frequency(
+    df: pd.DataFrame, use_damping: bool, compute_r2: bool, detrend_degree: Optional[int] = 1
+) -> pd.DataFrame:
+    """
+    The frequency aggregation function, that computes the dominant frequency
+    and its amplitude.
+    When use_damping is True it fits a damped cosine model with exponential
+    decay using least squares optimization to find the frequency, amplitude and
+    half-life (damping rate).
+    If compute_r2 is True, fit a model with the inferred parameters and report
+    the R2 of its predictions.
+    Parameters
+    ----------
+    use_damping
+        The signal has damping -- fit model that corrects the frequency and
+        amplitude and report the damping rate
+    compute_r2
+        Compute R2 of the single-frequency model predictions
+    df
+        The data -- first column is time and the second function values
+    detrend_degree
+        The degree of polynomial function used for the de-trending of the signal.
+        Use None to disable de-trending.
+    Returns
+    -------
+    The one line DataFrame that reports the frequency, amplitude, damping rate
+    and r2 score.
+    """
+    # sample data to maximally 1000 points -- Lomb-Scargle method complexity
+    # depends on number of points
+    df = df.dropna()
+    df = df.sample(n=min(len(df), 1000), axis=0, random_state=0)
+    df = df.sort_values(df.columns[0])
+    x, y = df.iloc[:, 0], df.iloc[:, 1]
+    columns = __column_names(use_damping, compute_r2, y.name)
+    if len(x.unique()) < 2:
+        # cannot fit and define the grid when less than 2 unique x values
+        return pd.DataFrame([[np.nan] * len(columns)], columns=columns)
+    x_det, y_det = __de_trending(x, y, detrend_degree)
+    f, a = __periodogram(x_det, y_det)
+    res = [f, a]
+    if use_damping or compute_r2:
+        # when damping in signal use model to correct frequency, amplitude and
+        # compute the damping rate
+        # when compute_r2 model reconstructs the signal to compute r2
+        try:
+            popt, fun = __fit_model(x_det, y_det, f, a, use_damping)
+            if use_damping:
+                f, a, decay = popt[1:4]
+                res = [f, a, np.log(2) / decay]
+            if compute_r2:
+                y_recon = fun(x_det, *popt)
+                compute_r2 = r2_score(y_det, y_recon)
+                res.append(compute_r2)
+        except RuntimeError:
+            # fitting model may fail for some functions, it will usually not
+            # fail for sinus like signal - report just frequency and amplitude
+            # by the Lomb-Scargle
+            res += [np.nan for _ in columns[2:]]
+    return pd.DataFrame([res], columns=columns)
+def __column_names(damping: bool, r2: bool, series_name: str) -> pd.MultiIndex:
+    columns = COLUMNS[[True, True, damping, r2]]
+    return pd.MultiIndex.from_tuples([(series_name, c) for c in columns])
+def __freq_grid(x: pd.Series) -> np.ndarray:
+    """
+    Compute the frequency grid that is used by Lomb-Scargle to search for
+    dominating frequency. Intuition behind can be found in
+    https://jakevdp.github.io/blog/2015/06/13/lomb-scargle-in-python/#Frequency-spacing
+    or in paper Understanding the Lomb–Scargle Periodogram by VanderPlas JT
+    """
+    n = len(x)
+    span = x.max() - x.min()
+    diff = 1 / (5 * span)
+    return np.arange(diff, (n - 1) / 2 / span, diff)
+def __de_trending(x: pd.Series, y: pd.Series, deg: Optional[int]) -> Tuple[pd.Series, pd.Series]:
+    """
+    De-trend the signal with a polynomial function with degree deg:
+    1. Fit a polynomial function to the signal
+    2. Subtract the value of the polynomial from the signal
+    `x` is just shifted so that the minimum value is 0
+    """
+    if deg is None:
+        return x, y
+    p = np.polyfit(x, y, deg=deg)
+    trend = np.polyval(p, x)
+    y_det = y - trend
+    x_det = x - x.min()
+    return x_det, y_det
+def __periodogram(x: pd.Series, y: pd.Series) -> Tuple[float, float]:
+    """
+    Use Lomb-Scargle method to compute the periodogram and extract the
+    dominating frequency and its amplitude.
+    """
+    freqs = __freq_grid(x)
+    pgram = lombscargle(x, y, freqs * 2 * np.pi, normalize=False)
+    ind = np.argmax(pgram)
+    amplitude = np.sqrt(pgram[ind] / len(x) * 4.0)
+    return freqs[ind], amplitude
+def __periodic_f_decay(x, theta, f, a, decay) -> float:
+    """Function used to model the data to estimate damping and/or r2"""
+    ret = a * np.cos(f * 2 * np.pi * x + theta)
+    if decay is not None:
+        ret *= np.exp(-decay * x)
+    return ret
+def __fit_model(
+    x: pd.Series, y: pd.Series, f: float, a: float, use_damping: bool
+) -> Tuple[np.ndarray, Callable]:
+    """
+    Fit the periodic model using non-linear least squares:
+    - when `use_damping` is `True`, fit frequency, amplitude and damping rate in addition
+      to the phase, starting with the frequency and amplitude retrieved by Lomb-Scargle
+    - otherwise, only fit the phase, using a fixed frequency and amplitude from
+      Lomb-Scargle (to measure r2 and estimate the goodness of fit)
+    """
+    if use_damping:
+        fun = __periodic_f_decay
+        p0 = (np.pi, f, a, 0)
+        # frequency and amplitude should be positive values
+        bounds = ([-np.inf, 0, 0, -np.inf], [np.inf, np.inf, np.inf, np.inf])
+    else:
+        fun = partial(__periodic_f_decay, a=a, f=f, decay=None)
+        p0 = (np.pi,)
+        bounds = (-np.inf, np.inf)
+    return curve_fit(fun, x, y, p0=p0, bounds=bounds)[0], fun
+if __name__ == "__main__":
+    x_ = np.linspace(0.1, 20 * np.pi, 1000)
+    y_ = np.sin(x_) / (x_ * 0.2)
+    df_ = pd.DataFrame({"x": x_, "y": y_})
+    print(frequency(False, False, df_))
+    print(frequency(True, False, df_))
+    print(frequency(False, True, df_))
+    print(frequency(True, True, df_))

orangecontrib/experiment_analytics/excel_export.py ADDED Viewed

@@ -0,0 +1,174 @@
+import os
+import xlsxwriter
+from AnyQt.QtCore import Qt
+from AnyQt.QtWidgets import QFileDialog, QWidget
+from Orange.widgets import gui
+from Orange.widgets.utils.itemmodels import PyTableModel
+BorderRole = next(gui.OrangeUserRole)
+def export(model: PyTableModel, n_header: int, path: str):
+    workbook = xlsxwriter.Workbook(path)
+    worksheet = workbook.add_worksheet("Sheet 1")
+    worksheet.freeze_panes(n_header, 0)
+    worksheet.set_column(0, 0, width=30)
+    worksheet.set_column(1, model.columnCount() - 1, width=20)
+    horizontal_vertical_header_format = workbook.add_format({
+        "align": "center",
+        "right": 1,
+        "text_wrap": True
+    })
+    horizontal_header_format = workbook.add_format({
+        "bold": True,
+        "align": "center",
+        "text_wrap": True,
+    })
+    right_horizontal_header_format = workbook.add_format({
+        "bold": True,
+        "align": "center",
+        "text_wrap": True,
+        "right": 1,
+    })
+    border_vertical_header_format = workbook.add_format({
+        "align": "left",
+        "top": 1,
+        "right": 1,
+        "text_wrap": True
+    })
+    vertical_header_format = workbook.add_format({
+        "align": "left",
+        "right": 1,
+        "text_wrap": True
+    })
+    bottom_border_vertical_header_format = workbook.add_format({
+        "align": "left",
+        "top": 1,
+        "bottom": 1,
+        "right": 1,
+        "text_wrap": True
+    })
+    bottom_vertical_header_format = workbook.add_format({
+        "align": "left",
+        "bottom": 1,
+        "right": 1,
+        "text_wrap": True
+    })
+    total_header_format = workbook.add_format({
+        "align": "left",
+        "bold": True,
+        "italic": True,
+        "bottom": 1,
+        "top": 1,
+        "right": 1,
+        "text_wrap": True
+    })
+    total_format = workbook.add_format({
+        "align": "center",
+        "italic": True,
+        "bottom": 1,
+        "top": 1,
+        "text_wrap": True
+    })
+    right_total_format = workbook.add_format({
+        "align": "center",
+        "italic": True,
+        "bottom": 1,
+        "top": 1,
+        "right": 1,
+    })
+    center_format = workbook.add_format({
+        "align": "center",
+        "text_wrap": True,
+    })
+    right_center_format = workbook.add_format({
+        "align": "center",
+        "text_wrap": True,
+        "right": 1,
+    })
+    border_format = workbook.add_format({
+        "align": "center",
+        "text_wrap": True,
+        "top": 1,
+    })
+    right_border_format = workbook.add_format({
+        "align": "center",
+        "text_wrap": True,
+        "top": 1,
+        "right": 1,
+    })
+    bottom_center_format = workbook.add_format({
+        "align": "center",
+        "text_wrap": True,
+        "bottom": 1,
+    })
+    right_bottom_center_format = workbook.add_format({
+        "align": "center",
+        "text_wrap": True,
+        "bottom": 1,
+        "right": 1,
+    })
+    bottom_border_format = workbook.add_format({
+        "align": "center",
+        "text_wrap": True,
+        "top": 1,
+        "bottom": 1,
+    })
+    right_bottom_border_format = workbook.add_format({
+        "align": "center",
+        "text_wrap": True,
+        "top": 1,
+        "bottom": 1,
+        "right": 1,
+    })
+    n_rows, n_columns = model.rowCount(), model.columnCount()
+    for i in range(n_rows):
+        for j in range(n_columns):
+            data = model.data(model.index(i, j), role=Qt.DisplayRole)
+            border = model.data(model.index(i, j), role=BorderRole)
+            if i == n_header - 1 and j == n_columns - 1:
+                cell_format = right_total_format
+            elif i == n_header - 1:
+                cell_format = total_header_format if j == 0 else total_format
+            elif j == 0 and i < n_header:
+                cell_format = horizontal_vertical_header_format
+            elif i < n_header and j == n_columns - 1:
+                cell_format = right_horizontal_header_format
+            elif i < n_header:
+                cell_format = horizontal_header_format
+            elif j == 0 and i == n_rows - 1:
+                cell_format = bottom_border_vertical_header_format \
+                    if border else bottom_vertical_header_format
+            elif j == 0:
+                cell_format = border_vertical_header_format \
+                    if border else vertical_header_format
+            elif i == n_rows - 1 and j == n_columns - 1:
+                cell_format = right_bottom_border_format \
+                    if border else right_bottom_center_format
+            elif i == n_rows - 1:
+                cell_format = bottom_border_format \
+                    if border else bottom_center_format
+            elif j == n_columns - 1:
+                cell_format = right_border_format \
+                    if border else right_center_format
+            else:
+                cell_format = border_format if border else center_format
+            worksheet.write_string(i, j, data, cell_format)
+    workbook.close()
+def save(widget: QWidget, model: PyTableModel, n_rows: int):
+    filename, _ = QFileDialog.getSaveFileName(
+        widget, "Save", os.path.expanduser("~/"),
+        "Microsoft Excel spreadsheet (*.xlsx)"
+    )
+    if filename:
+        export(model, n_rows, filename)

orangecontrib/experiment_analytics/letter_report.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""
+Create a letter report using the insert-and-absorb algorithm for solving CLD.
+"""
+from typing import List, Callable
+import numpy as np
+from statsmodels.stats.multicomp import pairwise_tukeyhsd
+def simple_letter_report(
+        treatments: List[np.ndarray],
+        threshold: float = 0.05,
+) -> List[List[str]]:
+    """
+    Create a simple letter report.
+    Parameters
+    ----------
+    treatments : list
+        List of arrays.
+    threshold : float
+        Threshold for significant difference between two treatments.
+    Returns
+    -------
+    letters: list
+        A list of string (concatenated letters).
+    """
+    # compute p-values
+    assert len(treatments) <= 26
+    endog = np.hstack(treatments)
+    groups = np.hstack([np.full(treatment.shape, chr(i))
+                        for i, treatment in enumerate(treatments, 65)])
+    res = pairwise_tukeyhsd(endog=endog, groups=groups, alpha=threshold)
+    # compute letters
+    matrix = _into_matrix(res.pvalues, len(treatments))
+    mask = matrix <= threshold
+    arr = np.array([chr(65 + i) for i in range(len(treatments))])
+    letters_matrix = np.tile(arr, (len(treatments), 1))
+    return [",".join(row[m]) for m, row in zip(mask, letters_matrix)]
+def _into_matrix(
+        p_values: np.ndarray,
+        n_treatments: int
+) -> np.ndarray:
+    matrix = np.zeros((n_treatments, n_treatments))
+    indices = np.triu_indices(n_treatments, 1)
+    matrix[indices] = p_values
+    matrix = matrix + matrix.T
+    matrix[np.diag_indices(n_treatments)] = 1
+    return matrix
+def letter_report(
+        treatments: List[np.ndarray],
+        threshold: float = 0.05
+) -> List[List[str]]:
+    """
+    Create a letter report.
+    Parameters
+    ----------
+    treatments : list
+        List of arrays.
+    threshold : float
+        Threshold for significant difference between two treatments.
+    Returns
+    -------
+    letters: list
+        A list of lists of letters.
+    """
+    # sort treatments by mean
+    indices = np.argsort([np.mean(t) for t in treatments])[::-1]
+    treatments = [treatments[i] for i in indices]
+    # compute p-values
+    endog = np.hstack(treatments)
+    groups = np.hstack([np.full(treatment.shape, chr(i))
+                        for i, treatment in enumerate(treatments, 65)])
+    res = pairwise_tukeyhsd(endog=endog, groups=groups, alpha=threshold)
+    # compute letters
+    matrix = _cld(res.pvalues, len(treatments), threshold)
+    letters = _compute_letters(matrix)
+    # unsort letters
+    unsorted_letters = [["None"]] * len(letters)
+    for i, letters_ in zip(indices, letters):
+        unsorted_letters[i] = letters_
+    return unsorted_letters
+def _cld(
+        p_values: np.ndarray,
+        n_treatments: int,
+        threshold: float = 0.05
+) -> np.ndarray:
+    """
+    Create a compact letter display using the insert-and-absorb algorithm.
+    Before obtaining p_values, the treatments should be sorted my mean.
+    Parameters
+    ----------
+    p_values : np.ndarray of shape (n_treatments x n_treatments)
+        An array with p-values.
+    n_treatments : int
+        Number of treatment.
+    threshold : float, optional, default = 0.05
+        Threshold for significant difference between two treatments.
+    Returns
+    -------
+    matrix: np.ndarray of shape (n_treatments x n_letters)
+        An array of 0 and 1.
+    """
+    assert n_treatments > 1
+    assert len(p_values) > 0
+    p_values_gen = (p for p in p_values)
+    matrix = np.ones((n_treatments, 1))
+    for i in range(n_treatments):
+        for j in range(i + 1, n_treatments):
+            if next(p_values_gen) < threshold:
+                matrix = _insert(matrix, i, j)
+                matrix = _absorb(matrix)
+    return matrix
+def _insert(matrix: np.ndarray, t1_index: int, t2_index: int) -> np.ndarray:
+    matrix1 = matrix.copy()
+    matrix1[t2_index, :] = 0
+    matrix2 = matrix.copy()
+    matrix2[t1_index, :] = 0
+    return np.hstack((matrix1, matrix2))
+def _absorb(matrix: np.ndarray) -> np.ndarray:
+    for i in range(matrix.shape[1] - 1, 0, -1):
+        msk = matrix.astype(bool)
+        if any((all((msk[:, i] & msk[:, j]) == msk[:, i]) and i != j)
+               for j in range(matrix.shape[1])):
+            matrix = np.delete(matrix, i, axis=1)
+    return matrix
+def _compute_letters(matrix: np.ndarray) -> List[List[str]]:
+    shape = matrix.shape
+    report = np.tile(np.arange(shape[1]), (shape[0], 1)) + 65.0
+    report[matrix == 0] = np.nan
+    return _to_chr_lst(report)
+def _to_chr_lst(report: np.ndarray) -> List[List[str]]:
+    return [[_to_chr(ordinal) for ordinal in ordinals] for ordinals in report]
+def _to_chr(number: float):
+    return "" if np.isnan(number) else chr(int(number))