iita-python 0.0.post42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ from .dataset import Dataset
2
+ from .quasiorder import ind_gen, unfold_examples
3
+
4
+ __all__ = ['Dataset', 'unfold_examples', 'ind_gen']
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.0.post42'
32
+ __version_tuple__ = version_tuple = (0, 0, 'post42')
33
+
34
+ __commit_id__ = commit_id = None
iita_python/dataset.py ADDED
@@ -0,0 +1,82 @@
1
+ import numpy as np
2
+ import numpy.typing as npt
3
+ from typing import Self, List
4
+ import pandas as pd
5
+
6
+ class Dataset():
7
+ #aliases for response_patterns, counterexamples, equiv_examples
8
+ @property
9
+ def rp(self) -> pd.DataFrame:
10
+ return self._rp
11
+ @rp.setter
12
+ def rp(self, inp: pd.DataFrame) -> None:
13
+ self._rp = inp
14
+ response_patterns = rp
15
+
16
+ @property
17
+ def ce(self) -> pd.DataFrame:
18
+ return self._ce
19
+ @ce.setter
20
+ def ce(self, inp: pd.DataFrame) -> None:
21
+ self._ce = inp
22
+ counterexamples = ce
23
+
24
+ @property
25
+ def eqe(self) -> pd.DataFrame:
26
+ return self._eqe
27
+ @eqe.setter
28
+ def eqe(self, inp: pd.DataFrame) -> None:
29
+ self._eqe = inp
30
+ equiv_examples = eqe
31
+
32
+ @property
33
+ def items(self):
34
+ return self.ce.shape[0]
35
+
36
+ @property
37
+ def subjects(self):
38
+ return self.rp.shape[0]
39
+
40
+ @property
41
+ def filled_vals(self):
42
+ return (~np.isnan(self.rp)).sum(axis=0)
43
+
44
+ def __init__(self, response_patterns: pd.DataFrame | npt.NDArray | List[List[int]]):
45
+ """
46
+ Computes the counterexamples and equivalence examples from response patterns\n
47
+ Supports pandas dataframes, numpy arrays, and python lists\n
48
+ Rows represent the subjects, columns - the items\n
49
+ """
50
+ self._rp = pd.DataFrame(response_patterns, index=None, columns=None)
51
+ self._ce = None
52
+ self._eqe = None
53
+
54
+ #counterexamples computation
55
+ self.ce = pd.DataFrame(0, index=np.arange(self.rp.shape[1]), columns=np.arange(self.rp.shape[1]))
56
+
57
+ for i in range(len(self.rp)):
58
+ #for subject i, find all cases where a=0 and b=1 (counterexamples to b->a or a <= b) and increment where they intersect
59
+ not_a = (self.rp.loc[i] == 0)
60
+ b = (self.rp.loc[i] == 1)
61
+ self.ce.loc[not_a, b] += 1
62
+
63
+ #equivalence examples computation
64
+ self.eqe = pd.DataFrame(0, index=np.arange(self.rp.shape[1]), columns=np.arange(self.rp.shape[1]))
65
+ for i in range(len(self.rp)):
66
+ #for subject i, increment all cases where a=b (examples of equivalence of a and b)
67
+ row = self.rp.loc[i].to_numpy()
68
+ self.eqe += np.equal.outer(row, row).astype(int)
69
+
70
+ def add(self, dataset_to_add: Self):
71
+ """
72
+ Add a second IITA_Dataset: concatenate the response patterns, add counterexamples and equivalence examples\n
73
+ Item amounts must match, else ValueError
74
+ """
75
+ if (self.rp.shape[1] != dataset_to_add.shape[1]):
76
+ raise ValueError('Item amounts must match')
77
+
78
+ self.rp = pd.concat(self.rp, dataset_to_add.rp)
79
+ self.ce = self.ce + dataset_to_add.ce
80
+ self.eqe = self.eqe + dataset_to_add.eqe
81
+
82
+ __iadd__ = add
@@ -0,0 +1,64 @@
1
+ import numpy as np
2
+ import numpy.typing as npt
3
+ import pandas as pd
4
+ from .dataset import Dataset
5
+ from .quasiorder import QuasiOrder
6
+
7
+ def orig_iita_fit(data: Dataset, qo: QuasiOrder):
8
+ """
9
+ Calculates the original IITA fit metric for a given dataset and quasiorder\n
10
+ """
11
+ qo_edges = qo.get_edge_list()
12
+ p = data.rp.to_numpy().sum(axis=0) / data.subjects
13
+
14
+ error = 0
15
+ for a, b in qo_edges:
16
+ error += data.ce.iloc[a, b] / (p[b] * data.subjects)
17
+
18
+ error /= len(qo_edges)
19
+
20
+ expected_ce = np.zeros(data.ce.shape)
21
+
22
+ for i in range(data.items):
23
+ for j in range(data.items):
24
+ if (i == j): continue
25
+
26
+ if (qo.full_matrix[i][j]):
27
+ expected_ce[i][j] = error * p[j] * data.subjects
28
+ else:
29
+ expected_ce[i][j] = (1 - p[i]) * p[j] * data.subjects * (1 - error)
30
+
31
+ ce = data.ce.to_numpy().flatten()
32
+ expected_ce = expected_ce.flatten()
33
+
34
+ return ((ce - expected_ce) ** 2).sum() / (data.items**2 - data.items)
35
+
36
+ def corr_iita_fit(data: Dataset, qo: QuasiOrder):
37
+ """
38
+ Calculates the corrected IITA fit metric for a given dataset and quasiorder\n
39
+ """
40
+ qo_edges = qo.get_edge_list()
41
+ p = data.rp.to_numpy().sum(axis=0) / data.subjects
42
+
43
+ error = 0
44
+ for a, b in qo_edges:
45
+ error += data.ce.iloc[a, b] / (p[b] * data.subjects)
46
+
47
+ error /= len(qo_edges)
48
+
49
+ expected_ce = np.zeros(data.ce.shape)
50
+
51
+ for i in range(data.items):
52
+ for j in range(data.items):
53
+ if (i == j): continue
54
+
55
+ if (qo.full_matrix[i][j]):
56
+ expected_ce[i][j] = error * p[j] * data.subjects
57
+ elif (not qo.full_matrix[j][i]):
58
+ expected_ce[i][j] = (1 - p[i]) * p[j] * data.subjects
59
+ else:
60
+ expected_ce[i][j] = (p[j] * data.subjects) - ((p[i] - p[i] * error) * data.subjects)
61
+
62
+ ce = data.ce.to_numpy().flatten()
63
+ expected_ce = expected_ce.flatten()
64
+ return ((ce - expected_ce) ** 2).sum() / (data.items**2 - data.items)
@@ -0,0 +1,100 @@
1
+ import numpy as np
2
+ import numpy.typing as npt
3
+ import pandas as pd
4
+
5
+ class QuasiOrder:
6
+ def __init__(self, matrix: npt.NDArray):
7
+ self.full_matrix = matrix
8
+
9
+ def get_edge_list(self, buff=0):
10
+ """
11
+ Returns the edge list of the quasiorder as a list of (a, b) pairs\n
12
+ buff: int to add to each index (useful for 1-based indexing)
13
+ """
14
+ n = self.full_matrix.shape[0]
15
+ edge_list = []
16
+
17
+ for i in range(n):
18
+ for j in range(n):
19
+ if i == j: continue
20
+ if (self.full_matrix[i][j]):
21
+ edge_list.append([i+buff, j+buff])
22
+
23
+ return edge_list
24
+
25
+ def unfold_examples(
26
+ matrix: pd.DataFrame,
27
+ relativity: npt.NDArray | None = None,
28
+ dtype=np.float32
29
+ ) -> npt.NDArray:
30
+ """
31
+ Turns an item/item metric DataFrame into
32
+ a list of tuples of the form (x, [i, j]), where matrix[i, j] = x.\n
33
+ Can input a relativity matrix, then exery x gets divided by relativity[i, j].
34
+ This can be used to account for missing values
35
+ """
36
+
37
+ dfmatrix = pd.DataFrame(matrix).astype(dtype)
38
+
39
+ rel = relativity
40
+ if (rel is None):
41
+ rel = np.ones(dfmatrix.shape, dtype=int)
42
+
43
+ dfmatrix = dfmatrix / rel
44
+
45
+ n = dfmatrix.shape[0]
46
+ pos = np.arange(n, dtype=np.int_)
47
+ i = np.repeat(pos, n)
48
+ j = np.tile(pos, n)
49
+ res = np.array(list(zip(dfmatrix.to_numpy()[i, j], i, j)), dtype=np.int_)
50
+ return res[res[:, 1] != res[:, 2]]
51
+
52
+ def ind_gen(counterexamples: npt.NDArray, n: int) -> list[QuasiOrder]:
53
+ """
54
+ Inductively generates quasiorders from counterexample edge list\n
55
+ Counterexamples is expected to be of the form returned by unfold_examples (array of (x, i, j) tuples)\n
56
+ """
57
+ ce = counterexamples
58
+
59
+ if (len(ce) == 0): raise ValueError("Counterexamples can't be empty")
60
+
61
+ ce = ce[ce[:, 0].argsort()]
62
+ contracted_ce = [[]]
63
+ for example in ce:
64
+ if (len(contracted_ce[-1]) == 0): contracted_ce[-1].append(example)
65
+ elif (contracted_ce[-1][0][0] == example[0]): contracted_ce[-1].append(example)
66
+ else: contracted_ce.append([example])
67
+
68
+ ce = [[ex[1:] for ex in g] for g in contracted_ce]
69
+
70
+ qos = [np.eye(n, dtype=np.int_)]
71
+ long_queue = np.empty((0, 2), dtype=np.int_)
72
+ for group in ce:
73
+ new_qo = qos[-1].copy()
74
+ queue = np.concat([group, long_queue], axis=0)
75
+ queue = np.array(sorted(queue.tolist()), dtype=np.int_)
76
+ allow = np.ones((len(queue)))
77
+
78
+ for a, b in queue:
79
+ new_qo[a][b] = 1
80
+
81
+ while (True):
82
+ for i, (a, b) in enumerate(queue):
83
+ for c in range(n):
84
+ if (c == a or c == b): continue
85
+
86
+ if (new_qo[b][c] and (not new_qo[a][c])) or (new_qo[c][a] and (not new_qo[c][b])):
87
+ new_qo[a][b] = 0
88
+ allow[i] = 0
89
+ break
90
+
91
+ if (allow.sum() == len(allow)): break
92
+
93
+ long_queue = queue[np.logical_not(allow)].copy()
94
+ queue = queue[allow.astype(np.bool)].copy()
95
+ allow = allow[allow.astype(np.bool)].copy()
96
+
97
+ if (not (qos[-1] == new_qo).all()):
98
+ qos.append(new_qo)
99
+
100
+ return [QuasiOrder(qo) for qo in qos[1:]]
iita_python/utils.py ADDED
@@ -0,0 +1,35 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import os
4
+
5
+ def read_rp(
6
+ filename: str,
7
+ nan_vals: list = [],
8
+ separator: str = ',',
9
+ excel_sheet_id: int = 0
10
+ ) -> pd.DataFrame:
11
+ """
12
+ Reads a list of response patterns from a file\n
13
+ Supports all pandas-readable datatypes and .npy\n
14
+ Rows represent the respondents, columns - the items\n
15
+ Values in nan_vals get replaced by NaN in the data\n
16
+ """
17
+
18
+ #filename checks
19
+ if (not os.path.isfile(filename)):
20
+ raise ValueError('Invalid filename')
21
+ if (not os.access(filename, os.R_OK)):
22
+ raise ValueError('Unreadable file')
23
+
24
+ #response pattern reading
25
+ rp = None
26
+ if (filename[-3:] == 'xls' or filename[-4:] == 'xlsx'): #excel
27
+ rp = pd.read_excel(filename, sheet_name=excel_sheet_id, header=None, na_values=nan_vals)
28
+ elif (filename[-3:] == 'npy'): #npy
29
+ rp = pd.DataFrame(np.load(filename))
30
+
31
+ rp[rp in nan_vals] = np.nan
32
+ else: #sonstiges
33
+ rp = pd.read_table(filename, sep=separator, header=None, na_values=nan_vals)
34
+
35
+ return rp
@@ -0,0 +1,148 @@
1
+ Metadata-Version: 2.4
2
+ Name: iita_python
3
+ Version: 0.0.post42
4
+ Summary: IITA algorithm in python
5
+ Author-email: Aliaksei Badnarchuk <alexejbodnarchuk@gmail.com>
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: numpy
9
+ Requires-Dist: pandas
10
+
11
+ # IITA_Python
12
+
13
+ A Python implementation of the Inductive ITem Tree Analysis (IITA) algorithm for analyzing and validating quasi-orderings in psychometric data.
14
+
15
+ Intended to replicate the functionality DAKS package from R, with an OOP-style interface for simpler functionality expansion
16
+
17
+ ## Installation
18
+
19
+ ### From PyPI
20
+
21
+ ```bash
22
+ pip install iita_python
23
+ ```
24
+
25
+ ## Quick Start
26
+
27
+ ```python
28
+ from iita_python import Dataset, ind_gen, unfold_examples, orig_iita_fit
29
+ import iita_python.utils as utils
30
+
31
+ # Load response patterns from CSV
32
+ response_patterns = utils.read_rp('data.csv')
33
+
34
+ # Create Dataset: computes counterexamples and equivalence examples
35
+ data = Dataset(response_patterns)
36
+
37
+ # Extract counterexamples and generate quasi-orderings
38
+ ce = unfold_examples(data.ce)
39
+ qos = ind_gen(ce, data.items)
40
+
41
+ # Evaluate fit for each quasi-order
42
+ for i, qo in enumerate(qos):
43
+ fit = orig_iita_fit(data, qo)
44
+ print(f"Quasi-order {i}: fit = {fit:.2f}")
45
+ ```
46
+
47
+ ## Data Format
48
+
49
+ ### Input: Response Patterns
50
+
51
+ Response patterns should be a 2D array where:
52
+ - **Rows** represent subjects (respondents)
53
+ - **Columns** represent items (questions/tasks)
54
+ - **Values** are 0 (incorrect) or 1 (correct), with NaN for missing responses
55
+
56
+ Example (CSV):
57
+ ```
58
+ 1,0,1,0,1
59
+ 0,0,1,0,1
60
+ 1,1,1,1,1
61
+ ```
62
+
63
+ When reading from a file with `utils.read_rp()`, missing data can be specified via the `nan_vals` parameter.
64
+
65
+ ## Core Modules
66
+
67
+ ### `dataset.py`
68
+
69
+ **`Dataset` class**
70
+
71
+ Stores response patterns and computes derived metrics:
72
+
73
+ - `rp`: response patterns (DataFrame)
74
+ - `ce`: counterexamples - pairs (i, j) where subject has item i incorrect but item j correct
75
+ - `eqe`: equivalence examples - pairs (i, j) where subject answered items i and j identically
76
+ - `items`: number of items
77
+ - `subjects`: number of subjects
78
+ - `filled_vals`: number of non-missing responses per item
79
+
80
+ ### `quasiorder.py`
81
+
82
+ **`unfold_examples(matrix, relativity=None, dtype=np.float32)`**
83
+
84
+ Converts a 2D matrix (e.g., counterexamples or equivalence examples) into a list of (value, i, j) tuples, excluding diagonal entries. Optionally normalizes by a relativity matrix.
85
+
86
+ **`ind_gen(counterexamples, n)`**
87
+
88
+ Generates candidate quasi-orderings from counterexample data. Returns a list of quasi-order matrices (numpy arrays) that progressively include edges.
89
+
90
+ **`get_edge_list(qo_matrix, buff=0)`**
91
+
92
+ Extracts the edge list from a quasi-order matrix as a list of (i, j) pairs.
93
+
94
+ ### `fit_metrics.py`
95
+
96
+ **`orig_iita_fit(data, qo)`**
97
+
98
+ Computes the fit of a quasi-order to observed data using Schrepp's method:
99
+
100
+ 1. Estimates an error rate from counterexamples on edges in the quasi-order
101
+ 2. Predicts expected counterexamples for all item pairs under the quasi-order
102
+ 3. Computes mean squared error between observed and expected counterexamples
103
+
104
+ Returns: float (MSE, lower is better)
105
+
106
+ ## Requirements
107
+
108
+ - Python >= 3.8
109
+ - numpy
110
+ - pandas
111
+
112
+ ## Testing
113
+
114
+ See the `test_ipynbs` folder. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
115
+
116
+ I am comparing my results on the PISA dataset to those of Milan Segedinac ([his implementation](https://github.com/milansegedinac/kst))
117
+
118
+ Please report any test failures in an issue
119
+
120
+ ## Contributing
121
+
122
+ Pull requests and issues are welcome. For major changes, please open an issue first to discuss.
123
+
124
+ ## IITA Overview
125
+
126
+ Schrepp (1999, 2003) developed IITA (Inductive itemm Tree Analysis) as a means to derive a surmise relation from dichotomous response patterns. Sargin and Ünlü (2009; Ünlü & Sargin, 2010) implemented two advanced versions of that procedure.
127
+
128
+ The three inductive item tree analysis algorithms are exploratory methods for extracting quasi orders (surmise relations) from data. In each algorithm, competing binary relations are generated (in the same way for all three versions), and a fit measure (differing from version to version) is computed for every relation of the selection set in order to find the quasi order that fits the data best. In all three algorithms, the idea is to estimate the numbers of counterexamples for each quasi order, and to find, over all competing quasi orders, the minimum value for the discrepancy between the observed and expected numbers of counterexamples.
129
+
130
+ The three data analysis methods differ in their choices of estimates for the expected numbers of counterexamples. (For an item pair (i,j), the number of subjects solving item j but failing to solve item i, is the corresponding number of counterexamples. Their response patterns contradict the interpretation of (i,j) as `mastering item j implies mastering item i.')
131
+
132
+ ## References
133
+
134
+ - Schrepp, M. (2001). IITA: A program for the analysis of individual item and step matrices. Unpublished technical report.
135
+ - Knowledge Space Theory: https://en.wikipedia.org/wiki/Knowledge_space
136
+
137
+ ## Author
138
+
139
+ Alexe1900, mentored and supervised by Peter Steiner from PHSG St. Gallen
140
+
141
+ ---
142
+
143
+ ## Roadmap
144
+
145
+ - [ ] Full DAKS functionality
146
+ - [ ] Performance optimizations for large datasets
147
+ - [ ] Visualization tools for quasi-orderings
148
+ - [ ] Comprehensive test suite (unit + integration)
@@ -0,0 +1,10 @@
1
+ iita_python/__init__.py,sha256=1Ja-ESGsB8qzxyobsszesm1gEpVQrtvl3LkM4nkoCCk,130
2
+ iita_python/_version.py,sha256=afZXHua5BWvwwMGeEV21Z-5LV9ovv9_qhG7bv1azHDw,716
3
+ iita_python/dataset.py,sha256=BkHPebJlfDD-2u7MwDB4vaPftxMR0qVJ-ITID8rPAYw,2800
4
+ iita_python/fit_metrics.py,sha256=nHSZYtvFaaPzRZtwpYbLcnWYmsgKAqJMn4DzhTs9k3I,2029
5
+ iita_python/quasiorder.py,sha256=6OC3g8mPiSe0EcYVzgpBTxePEpL2q4MaYTI_Z7aSFgE,3295
6
+ iita_python/utils.py,sha256=KOlf8fWDzSHqJ4SpQApMiTiUQWnpkunXE_sHiU6C6fE,1089
7
+ iita_python-0.0.post42.dist-info/METADATA,sha256=a4QA7JS05WdZCoY2cBPHNMuWhqxRSo3Sf1Lfw3C3Xm8,5309
8
+ iita_python-0.0.post42.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ iita_python-0.0.post42.dist-info/top_level.txt,sha256=ANioGbBjQ9-NRzQn_obPGc0id_6MtawEAoSEz8uqJBI,12
10
+ iita_python-0.0.post42.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ iita_python