PyPI - iita-python - Versions diffs - 0.0.post42__py3-none-any.whl - Mend

iita-python 0.0.post42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

iita_python/__init__.py +4 -0
iita_python/_version.py +34 -0
iita_python/dataset.py +82 -0
iita_python/fit_metrics.py +64 -0
iita_python/quasiorder.py +100 -0
iita_python/utils.py +35 -0
iita_python-0.0.post42.dist-info/METADATA +148 -0
iita_python-0.0.post42.dist-info/RECORD +10 -0
iita_python-0.0.post42.dist-info/WHEEL +5 -0
iita_python-0.0.post42.dist-info/top_level.txt +1 -0

iita_python/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .dataset import Dataset
+from .quasiorder import ind_gen, unfold_examples
+__all__ = ['Dataset', 'unfold_examples', 'ind_gen']

iita_python/_version.py ADDED Viewed

@@ -0,0 +1,34 @@
+# file generated by setuptools-scm
+# don't change, don't track in version control
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple
+    from typing import Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
+else:
+    VERSION_TUPLE = object
+    COMMIT_ID = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
+__version__ = version = '0.0.post42'
+__version_tuple__ = version_tuple = (0, 0, 'post42')
+__commit_id__ = commit_id = None

iita_python/dataset.py ADDED Viewed

@@ -0,0 +1,82 @@
+import numpy as np
+import numpy.typing as npt
+from typing import Self, List
+import pandas as pd
+class Dataset():
+    #aliases for response_patterns, counterexamples, equiv_examples
+    @property
+    def rp(self) -> pd.DataFrame:
+        return self._rp
+    @rp.setter
+    def rp(self, inp: pd.DataFrame) -> None:
+        self._rp = inp
+    response_patterns = rp
+    @property
+    def ce(self) -> pd.DataFrame:
+        return self._ce
+    @ce.setter
+    def ce(self, inp: pd.DataFrame) -> None:
+        self._ce = inp
+    counterexamples = ce
+    @property
+    def eqe(self) -> pd.DataFrame:
+        return self._eqe
+    @eqe.setter
+    def eqe(self, inp: pd.DataFrame) -> None:
+        self._eqe = inp
+    equiv_examples = eqe
+    @property
+    def items(self):
+        return self.ce.shape[0]
+    @property
+    def subjects(self):
+        return self.rp.shape[0]
+    @property
+    def filled_vals(self):
+        return (~np.isnan(self.rp)).sum(axis=0)
+    def __init__(self, response_patterns: pd.DataFrame | npt.NDArray | List[List[int]]):
+        """
+        Computes the counterexamples and equivalence examples from response patterns\n
+        Supports pandas dataframes, numpy arrays, and python lists\n
+        Rows represent the subjects, columns - the items\n
+        """
+        self._rp = pd.DataFrame(response_patterns, index=None, columns=None)
+        self._ce = None
+        self._eqe = None
+        #counterexamples computation
+        self.ce = pd.DataFrame(0, index=np.arange(self.rp.shape[1]), columns=np.arange(self.rp.shape[1]))
+        for i in range(len(self.rp)):
+            #for subject i, find all cases where a=0 and b=1 (counterexamples to b->a or a <= b) and increment where they intersect
+            not_a = (self.rp.loc[i] == 0)
+            b = (self.rp.loc[i] == 1)
+            self.ce.loc[not_a, b] += 1
+        #equivalence examples computation
+        self.eqe = pd.DataFrame(0, index=np.arange(self.rp.shape[1]), columns=np.arange(self.rp.shape[1]))
+        for i in range(len(self.rp)):
+            #for subject i, increment all cases where a=b (examples of equivalence of a and b)
+            row = self.rp.loc[i].to_numpy()
+            self.eqe += np.equal.outer(row, row).astype(int)
+    def add(self, dataset_to_add: Self):
+        """
+        Add a second IITA_Dataset: concatenate the response patterns, add counterexamples and equivalence examples\n
+        Item amounts must match, else ValueError
+        """
+        if (self.rp.shape[1] != dataset_to_add.shape[1]):
+            raise ValueError('Item amounts must match')
+        self.rp = pd.concat(self.rp, dataset_to_add.rp)
+        self.ce = self.ce + dataset_to_add.ce
+        self.eqe = self.eqe + dataset_to_add.eqe
+    __iadd__ = add

iita_python/fit_metrics.py ADDED Viewed

@@ -0,0 +1,64 @@
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+from .dataset import Dataset
+from .quasiorder import QuasiOrder
+def orig_iita_fit(data: Dataset, qo: QuasiOrder):
+    """
+    Calculates the original IITA fit metric for a given dataset and quasiorder\n
+    """
+    qo_edges = qo.get_edge_list()
+    p = data.rp.to_numpy().sum(axis=0) / data.subjects
+    error = 0
+    for a, b in qo_edges:
+        error += data.ce.iloc[a, b] / (p[b] * data.subjects)
+    error /= len(qo_edges)
+    expected_ce = np.zeros(data.ce.shape)
+    for i in range(data.items):
+        for j in range(data.items):
+            if (i == j): continue
+            if (qo.full_matrix[i][j]):
+                expected_ce[i][j] = error * p[j] * data.subjects
+            else:
+                expected_ce[i][j] = (1 - p[i]) * p[j] * data.subjects * (1 - error)
+    ce = data.ce.to_numpy().flatten()
+    expected_ce = expected_ce.flatten()
+    return ((ce - expected_ce) ** 2).sum() / (data.items**2 - data.items)
+def corr_iita_fit(data: Dataset, qo: QuasiOrder):
+    """
+    Calculates the corrected IITA fit metric for a given dataset and quasiorder\n
+    """
+    qo_edges = qo.get_edge_list()
+    p = data.rp.to_numpy().sum(axis=0) / data.subjects
+    error = 0
+    for a, b in qo_edges:
+        error += data.ce.iloc[a, b] / (p[b] * data.subjects)
+    error /= len(qo_edges)
+    expected_ce = np.zeros(data.ce.shape)
+    for i in range(data.items):
+        for j in range(data.items):
+            if (i == j): continue
+            if (qo.full_matrix[i][j]):
+                expected_ce[i][j] = error * p[j] * data.subjects
+            elif (not qo.full_matrix[j][i]):
+                expected_ce[i][j] = (1 - p[i]) * p[j] * data.subjects
+            else:
+                expected_ce[i][j] = (p[j] * data.subjects) - ((p[i] - p[i] * error) * data.subjects)
+    ce = data.ce.to_numpy().flatten()
+    expected_ce = expected_ce.flatten()
+    return ((ce - expected_ce) ** 2).sum() / (data.items**2 - data.items)

iita_python/quasiorder.py ADDED Viewed

@@ -0,0 +1,100 @@
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+class QuasiOrder:
+    def __init__(self, matrix: npt.NDArray):
+        self.full_matrix = matrix
+    def get_edge_list(self, buff=0):
+        """
+        Returns the edge list of the quasiorder as a list of (a, b) pairs\n
+        buff: int to add to each index (useful for 1-based indexing)
+        """
+        n = self.full_matrix.shape[0]
+        edge_list = []
+        for i in range(n):
+            for j in range(n):
+                if i == j: continue
+                if (self.full_matrix[i][j]):
+                    edge_list.append([i+buff, j+buff])
+        return edge_list
+def unfold_examples(
+        matrix: pd.DataFrame,
+        relativity: npt.NDArray | None = None,
+        dtype=np.float32
+    ) -> npt.NDArray:
+    """
+    Turns an item/item metric DataFrame into
+    a list of tuples of the form (x, [i, j]), where matrix[i, j] = x.\n
+    Can input a relativity matrix, then exery x gets divided by relativity[i, j].
+    This can be used to account for missing values
+    """
+    dfmatrix = pd.DataFrame(matrix).astype(dtype)
+    rel = relativity
+    if (rel is None):
+        rel = np.ones(dfmatrix.shape, dtype=int)
+    dfmatrix = dfmatrix / rel
+    n = dfmatrix.shape[0]
+    pos = np.arange(n, dtype=np.int_)
+    i = np.repeat(pos, n)
+    j = np.tile(pos, n)
+    res = np.array(list(zip(dfmatrix.to_numpy()[i, j], i, j)), dtype=np.int_)
+    return res[res[:, 1] != res[:, 2]]
+def ind_gen(counterexamples: npt.NDArray, n: int) -> list[QuasiOrder]:
+    """
+    Inductively generates quasiorders from counterexample edge list\n
+    Counterexamples is expected to be of the form returned by unfold_examples (array of (x, i, j) tuples)\n
+    """
+    ce = counterexamples
+    if (len(ce) == 0): raise ValueError("Counterexamples can't be empty")
+    ce = ce[ce[:, 0].argsort()]
+    contracted_ce = [[]]
+    for example in ce:
+        if (len(contracted_ce[-1]) == 0): contracted_ce[-1].append(example)
+        elif (contracted_ce[-1][0][0] == example[0]): contracted_ce[-1].append(example)
+        else: contracted_ce.append([example])
+    ce = [[ex[1:] for ex in g] for g in contracted_ce]
+    qos = [np.eye(n, dtype=np.int_)]
+    long_queue = np.empty((0, 2), dtype=np.int_)
+    for group in ce:
+        new_qo = qos[-1].copy()
+        queue = np.concat([group, long_queue], axis=0)
+        queue = np.array(sorted(queue.tolist()), dtype=np.int_)
+        allow = np.ones((len(queue)))
+        for a, b in queue:
+            new_qo[a][b] = 1
+        while (True):
+            for i, (a, b) in enumerate(queue):
+                for c in range(n):
+                    if (c == a or c == b): continue
+                    if (new_qo[b][c] and (not new_qo[a][c])) or (new_qo[c][a] and (not new_qo[c][b])):
+                        new_qo[a][b] = 0
+                        allow[i] = 0
+                        break
+            if (allow.sum() == len(allow)): break
+            long_queue = queue[np.logical_not(allow)].copy()
+            queue = queue[allow.astype(np.bool)].copy()
+            allow = allow[allow.astype(np.bool)].copy()
+        if (not (qos[-1] == new_qo).all()):
+            qos.append(new_qo)
+    return [QuasiOrder(qo) for qo in qos[1:]]

iita_python/utils.py ADDED Viewed

@@ -0,0 +1,35 @@
+import numpy as np
+import pandas as pd
+import os
+def read_rp(
+        filename: str,
+        nan_vals: list = [],
+        separator: str = ',',
+        excel_sheet_id: int = 0
+    ) -> pd.DataFrame:
+    """
+    Reads a list of response patterns from a file\n
+    Supports all pandas-readable datatypes and .npy\n
+    Rows represent the respondents, columns - the items\n
+    Values in nan_vals get replaced by NaN in the data\n
+    """
+    #filename checks
+    if (not os.path.isfile(filename)):
+        raise ValueError('Invalid filename')
+    if (not os.access(filename, os.R_OK)):
+        raise ValueError('Unreadable file')
+    #response pattern reading
+    rp = None
+    if (filename[-3:] == 'xls' or filename[-4:] == 'xlsx'): #excel
+        rp = pd.read_excel(filename, sheet_name=excel_sheet_id, header=None, na_values=nan_vals)
+    elif (filename[-3:] == 'npy'): #npy
+        rp = pd.DataFrame(np.load(filename))
+        rp[rp in nan_vals] = np.nan
+    else: #sonstiges
+        rp = pd.read_table(filename, sep=separator, header=None, na_values=nan_vals)
+    return rp

iita_python-0.0.post42.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,148 @@
+Metadata-Version: 2.4
+Name: iita_python
+Version: 0.0.post42
+Summary: IITA algorithm in python
+Author-email: Aliaksei Badnarchuk <alexejbodnarchuk@gmail.com>
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+Requires-Dist: numpy
+Requires-Dist: pandas
+# IITA_Python
+A Python implementation of the Inductive ITem Tree Analysis (IITA) algorithm for analyzing and validating quasi-orderings in psychometric data.
+Intended to replicate the functionality DAKS package from R, with an OOP-style interface for simpler functionality expansion
+## Installation
+### From PyPI
+```bash
+pip install iita_python
+```
+## Quick Start
+```python
+from iita_python import Dataset, ind_gen, unfold_examples, orig_iita_fit
+import iita_python.utils as utils
+# Load response patterns from CSV
+response_patterns = utils.read_rp('data.csv')
+# Create Dataset: computes counterexamples and equivalence examples
+data = Dataset(response_patterns)
+# Extract counterexamples and generate quasi-orderings
+ce = unfold_examples(data.ce)
+qos = ind_gen(ce, data.items)
+# Evaluate fit for each quasi-order
+for i, qo in enumerate(qos):
+    fit = orig_iita_fit(data, qo)
+    print(f"Quasi-order {i}: fit = {fit:.2f}")
+```
+## Data Format
+### Input: Response Patterns
+Response patterns should be a 2D array where:
+- **Rows** represent subjects (respondents)
+- **Columns** represent items (questions/tasks)
+- **Values** are 0 (incorrect) or 1 (correct), with NaN for missing responses
+Example (CSV):
+```
+1,0,1,0,1
+0,0,1,0,1
+1,1,1,1,1
+```
+When reading from a file with `utils.read_rp()`, missing data can be specified via the `nan_vals` parameter.
+## Core Modules
+### `dataset.py`
+**`Dataset` class**
+Stores response patterns and computes derived metrics:
+- `rp`: response patterns (DataFrame)
+- `ce`: counterexamples - pairs (i, j) where subject has item i incorrect but item j correct
+- `eqe`: equivalence examples - pairs (i, j) where subject answered items i and j identically
+- `items`: number of items
+- `subjects`: number of subjects
+- `filled_vals`: number of non-missing responses per item
+### `quasiorder.py`
+**`unfold_examples(matrix, relativity=None, dtype=np.float32)`**
+Converts a 2D matrix (e.g., counterexamples or equivalence examples) into a list of (value, i, j) tuples, excluding diagonal entries. Optionally normalizes by a relativity matrix.
+**`ind_gen(counterexamples, n)`**
+Generates candidate quasi-orderings from counterexample data. Returns a list of quasi-order matrices (numpy arrays) that progressively include edges.
+**`get_edge_list(qo_matrix, buff=0)`**
+Extracts the edge list from a quasi-order matrix as a list of (i, j) pairs.
+### `fit_metrics.py`
+**`orig_iita_fit(data, qo)`**
+Computes the fit of a quasi-order to observed data using Schrepp's method:
+1. Estimates an error rate from counterexamples on edges in the quasi-order
+2. Predicts expected counterexamples for all item pairs under the quasi-order
+3. Computes mean squared error between observed and expected counterexamples
+Returns: float (MSE, lower is better)
+## Requirements
+- Python >= 3.8
+- numpy
+- pandas
+## Testing
+See the `test_ipynbs` folder. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
+I am comparing my results on the PISA dataset to those of Milan Segedinac ([his implementation](https://github.com/milansegedinac/kst))
+Please report any test failures in an issue
+## Contributing
+Pull requests and issues are welcome. For major changes, please open an issue first to discuss.
+## IITA Overview
+Schrepp (1999, 2003) developed IITA (Inductive itemm Tree Analysis) as a means to derive a surmise relation from dichotomous response patterns. Sargin and Ünlü (2009; Ünlü & Sargin, 2010) implemented two advanced versions of that procedure.
+The three inductive item tree analysis algorithms are exploratory methods for extracting quasi orders (surmise relations) from data. In each algorithm, competing binary relations are generated (in the same way for all three versions), and a fit measure (differing from version to version) is computed for every relation of the selection set in order to find the quasi order that fits the data best. In all three algorithms, the idea is to estimate the numbers of counterexamples for each quasi order, and to find, over all competing quasi orders, the minimum value for the discrepancy between the observed and expected numbers of counterexamples.
+The three data analysis methods differ in their choices of estimates for the expected numbers of counterexamples. (For an item pair (i,j), the number of subjects solving item j but failing to solve item i, is the corresponding number of counterexamples. Their response patterns contradict the interpretation of (i,j) as `mastering item j implies mastering item i.')
+## References
+- Schrepp, M. (2001). IITA: A program for the analysis of individual item and step matrices. Unpublished technical report.
+- Knowledge Space Theory: https://en.wikipedia.org/wiki/Knowledge_space
+## Author
+Alexe1900, mentored and supervised by Peter Steiner from PHSG St. Gallen
+---
+## Roadmap
+- [ ] Full DAKS functionality
+- [ ] Performance optimizations for large datasets
+- [ ] Visualization tools for quasi-orderings
+- [ ] Comprehensive test suite (unit + integration)

iita_python-0.0.post42.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+iita_python/__init__.py,sha256=1Ja-ESGsB8qzxyobsszesm1gEpVQrtvl3LkM4nkoCCk,130
+iita_python/_version.py,sha256=afZXHua5BWvwwMGeEV21Z-5LV9ovv9_qhG7bv1azHDw,716
+iita_python/dataset.py,sha256=BkHPebJlfDD-2u7MwDB4vaPftxMR0qVJ-ITID8rPAYw,2800
+iita_python/fit_metrics.py,sha256=nHSZYtvFaaPzRZtwpYbLcnWYmsgKAqJMn4DzhTs9k3I,2029
+iita_python/quasiorder.py,sha256=6OC3g8mPiSe0EcYVzgpBTxePEpL2q4MaYTI_Z7aSFgE,3295
+iita_python/utils.py,sha256=KOlf8fWDzSHqJ4SpQApMiTiUQWnpkunXE_sHiU6C6fE,1089
+iita_python-0.0.post42.dist-info/METADATA,sha256=a4QA7JS05WdZCoY2cBPHNMuWhqxRSo3Sf1Lfw3C3Xm8,5309
+iita_python-0.0.post42.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+iita_python-0.0.post42.dist-info/top_level.txt,sha256=ANioGbBjQ9-NRzQn_obPGc0id_6MtawEAoSEz8uqJBI,12
+iita_python-0.0.post42.dist-info/RECORD,,

iita_python-0.0.post42.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

iita_python-0.0.post42.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ iita_python