PyPI - iita-python - Versions diffs - 1.1__tar.gz → 1.2__tar.gz - Mend

iita-python 1.1tar.gz → 1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{iita_python-1.1/iita_python.egg-info → iita_python-1.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: iita_python
-Version: 1.1
+Version: 1.2
 Summary: IITA algorithm in python
 Author-email: Aliaksei Badnarchuk <alexejbodnarchuk@gmail.com>
 Requires-Python: >=3.9
@@ -111,7 +111,7 @@ Returns: float (MSE, lower is better)
 ## Testing
-See the `test_ipynbs` folder. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
+See the `testing` branch. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
 I am comparing my results on the PISA dataset to those of Milan Segedinac ([his implementation](https://github.com/milansegedinac/kst))

{iita_python-1.1 → iita_python-1.2}/README.md RENAMED Viewed

@@ -101,7 +101,7 @@ Returns: float (MSE, lower is better)
 ## Testing
-See the `test_ipynbs` folder. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
+See the `testing` branch. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
 I am comparing my results on the PISA dataset to those of Milan Segedinac ([his implementation](https://github.com/milansegedinac/kst))

{iita_python-1.1 → iita_python-1.2}/iita_python/_version.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '1.1'
-__version_tuple__ = version_tuple = (1, 1)
+__version__ = version = '1.2'
+__version_tuple__ = version_tuple = (1, 2)
-__commit_id__ = commit_id = 'g0c3bef126'
+__commit_id__ = commit_id = 'gb97b590cd'

iita_python-1.2/iita_python/additional_ce.py ADDED Viewed

@@ -0,0 +1,101 @@
+import numpy as np
+import pandas as pd
+import numpy.typing as npt
+from typing import Self, List
+from iita_python.dataset import Dataset
+def pairwise_diff_ce(rp: pd.DataFrame) -> pd.DataFrame:
+    """
+    Computes counterexamples from a response pattern DataFrame by using pairwise differences of item correspondences\n
+    Can be used for non-binary data because of not relying on strict 0/1 values\n
+    Does not support missing values in the response patterns\n
+    """
+    items = rp.shape[1]
+    subjects = rp.shape[0]
+    ce = pd.DataFrame(0, index=np.arange(items), columns=np.arange(items))
+    for i in range(subjects):
+        # for subject i, if a < b, add b - a for all item pairs (a,b)
+        # this is equivalent to ce[a][b] += 1 if a=0 and b=1, but works for non-binary data as well
+        row = rp.loc[i].to_numpy()
+        ce -= np.clip(row[:, None] - row[None, :], None, 0)
+    return ce
+def missing_value_substitution_ce(rp: pd.DataFrame) -> pd.DataFrame:
+    """
+    Computes counterexamples from a response pattern DataFrame by using pairwise differences of item correspondences\n
+    Substitutes missing values in the response patterns with the mean of the item, making some counterexample amounts fractional\n
+    Can be used for non-binary data because of not relying on strict 0/1 values\n
+    """
+    items = rp.shape[1]
+    subjects = rp.shape[0]
+    rp1 = rp.copy()
+    for i in range(items):
+        # substitute missing values in item i with the mean of the item
+        col = rp1.loc[:, i].to_numpy()
+        mean_val = np.nanmean(col)
+        col = pd.Series(col).fillna(mean_val)
+        rp1.loc[:, i] = col
+    # then calculate pairwise difference counterexamples
+    return pairwise_diff_ce(rp1)
+def relativify(calculator: callable):
+    """
+    Decorator to relativify counterexample calculators\n
+    The counterexample amounts are divided by the number of cases for each item pair where both items are not missing\n
+    """
+    def wrapper(rp: pd.DataFrame):
+        f"""
+        Computes counterexamples relative to the amount of valid cases using {calculator.__name__} as base calculator\n
+        The counterexample amounts are divided by the number of cases for each item pair where both items are not missing\n
+        """
+        ce = calculator(rp)
+        items = rp.shape[1]
+        subjects = rp.shape[0]
+        valid_cases = pd.DataFrame(0, index=np.arange(items), columns=np.arange(items))
+        for i in range(subjects):
+            #for subject i, increment all cases where neither a nor b are NaN (valid case for counterexamples)
+            not_nan = np.logical_not(rp.loc[i].isna())
+            valid_cases += np.outer(not_nan, not_nan).astype(int)
+        # avoid division by zero
+        valid_cases = valid_cases.replace(0, 1)
+        return ce / valid_cases
+    return wrapper
+class AdditionalCEDataset(Dataset):
+    def __init__(self, response_patterns: pd.DataFrame | npt.NDArray | List[List[int]]):
+        """
+        Computes the counterexamples and equivalence examples from response patterns\n
+        Supports pandas dataframes, numpy arrays, and python lists\n
+        Rows represent the subjects, columns - the items\n
+        In addition to the base Dataset, this class provides additional counterexample calculators:\n
+        - pairwise_diff_ce: computes counterexamples using pairwise differences of item correspondences, allowing for non-binary data\n
+        - missing_value_substitution_ce: computes counterexamples using pairwise differences with missing values substituted by item means\n
+        """
+        super().__init__(response_patterns)
+        self.pairwise_diff_ce = lambda self, relative=False: (
+            relativify(pairwise_diff_ce) if relative else pairwise_diff_ce
+        )(self.rp)
+        self.pairwise_diff_ce.__doc__ = pairwise_diff_ce.__doc__
+        self.missing_value_substitution_ce = lambda self, relative=False: (
+            relativify(missing_value_substitution_ce) if relative else missing_value_substitution_ce
+        )(self.rp)
+        self.missing_value_substitution_ce.__doc__ = missing_value_substitution_ce.__doc__

{iita_python-1.1 → iita_python-1.2}/iita_python/dataset.py RENAMED Viewed

@@ -31,7 +31,7 @@ class Dataset():
     @property
     def items(self):
-        return self.ce.shape[0]
+        return self.rp.shape[1]
     @property
     def subjects(self):
@@ -52,23 +52,23 @@ class Dataset():
         self._eqe = None
         #counterexamples computation
-        self.ce = pd.DataFrame(0, index=np.arange(self.rp.shape[1]), columns=np.arange(self.rp.shape[1]))
+        self.ce = pd.DataFrame(0, index=np.arange(self.items), columns=np.arange(self.items))
-        for i in range(len(self.rp)):
+        for i in range(self.subjects):
             #for subject i, increment all cases where a=0 and b=1 (counterexamples to b->a or a <= b)
             not_a = (self.rp.loc[i] == 0)
             b = (self.rp.loc[i] == 1)
             self.ce.loc[not_a, b] += 1
         #equivalence examples computation
-        self.eqe = pd.DataFrame(0, index=np.arange(self.rp.shape[1]), columns=np.arange(self.rp.shape[1]))
-        for i in range(len(self.rp)):
+        self.eqe = pd.DataFrame(0, index=np.arange(self.items), columns=np.arange(self.items))
+        for i in range(self.subjects):
             #for subject i, increment all cases where a=b (examples of equivalence of a and b)
             row = self.rp.loc[i].to_numpy()
             self.eqe += np.equal.outer(row, row).astype(int)
-        self.valid_ce_cases = pd.DataFrame(0, index=np.arange(self.rp.shape[1]), columns=np.arange(self.rp.shape[1]))
-        for i in range(len(self.rp)):
+        self.valid_ce_cases = pd.DataFrame(0, index=np.arange(self.items), columns=np.arange(self.items))
+        for i in range(self.subjects):
             #for subject i, increment all cases where neither a nor b are NaN (valid case for counterexamples)
             not_nan = np.logical_not(self.rp.loc[i].isna())
             self.valid_ce_cases += np.outer(not_nan, not_nan).astype(int)
@@ -78,7 +78,7 @@ class Dataset():
         Add a second IITA_Dataset: concatenate the response patterns, add counterexamples and equivalence examples\n
         Item amounts must match, else ValueError
         """
-        if (self.rp.shape[1] != dataset_to_add.shape[1]):
+        if (self.items != dataset_to_add.items):
             raise ValueError('Item amounts must match')
         self.rp = pd.concat(self.rp, dataset_to_add.rp)

{iita_python-1.1 → iita_python-1.2/iita_python.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: iita_python
-Version: 1.1
+Version: 1.2
 Summary: IITA algorithm in python
 Author-email: Aliaksei Badnarchuk <alexejbodnarchuk@gmail.com>
 Requires-Python: >=3.9
@@ -111,7 +111,7 @@ Returns: float (MSE, lower is better)
 ## Testing
-See the `test_ipynbs` folder. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
+See the `testing` branch. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
 I am comparing my results on the PISA dataset to those of Milan Segedinac ([his implementation](https://github.com/milansegedinac/kst))

{iita_python-1.1 → iita_python-1.2}/iita_python.egg-info/SOURCES.txt RENAMED Viewed

@@ -6,6 +6,7 @@ setup.py
 .github/workflows/release.yaml
 iita_python/__init__.py
 iita_python/_version.py
+iita_python/additional_ce.py
 iita_python/dataset.py
 iita_python/fit_metrics.py
 iita_python/quasiorder.py