iita-python 1.1__tar.gz → 1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {iita_python-1.1/iita_python.egg-info → iita_python-1.2}/PKG-INFO +2 -2
- {iita_python-1.1 → iita_python-1.2}/README.md +1 -1
- {iita_python-1.1 → iita_python-1.2}/iita_python/_version.py +3 -3
- iita_python-1.2/iita_python/additional_ce.py +101 -0
- {iita_python-1.1 → iita_python-1.2}/iita_python/dataset.py +9 -9
- {iita_python-1.1 → iita_python-1.2/iita_python.egg-info}/PKG-INFO +2 -2
- {iita_python-1.1 → iita_python-1.2}/iita_python.egg-info/SOURCES.txt +1 -0
- {iita_python-1.1 → iita_python-1.2}/.github/workflows/release.yaml +0 -0
- {iita_python-1.1 → iita_python-1.2}/.gitignore +0 -0
- {iita_python-1.1 → iita_python-1.2}/iita_python/__init__.py +0 -0
- {iita_python-1.1 → iita_python-1.2}/iita_python/fit_metrics.py +0 -0
- {iita_python-1.1 → iita_python-1.2}/iita_python/quasiorder.py +0 -0
- {iita_python-1.1 → iita_python-1.2}/iita_python/utils.py +0 -0
- {iita_python-1.1 → iita_python-1.2}/iita_python.egg-info/dependency_links.txt +0 -0
- {iita_python-1.1 → iita_python-1.2}/iita_python.egg-info/requires.txt +0 -0
- {iita_python-1.1 → iita_python-1.2}/iita_python.egg-info/top_level.txt +0 -0
- {iita_python-1.1 → iita_python-1.2}/pyproject.toml +0 -0
- {iita_python-1.1 → iita_python-1.2}/setuf.cfg +0 -0
- {iita_python-1.1 → iita_python-1.2}/setup.cfg +0 -0
- {iita_python-1.1 → iita_python-1.2}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: iita_python
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2
|
|
4
4
|
Summary: IITA algorithm in python
|
|
5
5
|
Author-email: Aliaksei Badnarchuk <alexejbodnarchuk@gmail.com>
|
|
6
6
|
Requires-Python: >=3.9
|
|
@@ -111,7 +111,7 @@ Returns: float (MSE, lower is better)
|
|
|
111
111
|
|
|
112
112
|
## Testing
|
|
113
113
|
|
|
114
|
-
See the `
|
|
114
|
+
See the `testing` branch. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
|
|
115
115
|
|
|
116
116
|
I am comparing my results on the PISA dataset to those of Milan Segedinac ([his implementation](https://github.com/milansegedinac/kst))
|
|
117
117
|
|
|
@@ -101,7 +101,7 @@ Returns: float (MSE, lower is better)
|
|
|
101
101
|
|
|
102
102
|
## Testing
|
|
103
103
|
|
|
104
|
-
See the `
|
|
104
|
+
See the `testing` branch. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
|
|
105
105
|
|
|
106
106
|
I am comparing my results on the PISA dataset to those of Milan Segedinac ([his implementation](https://github.com/milansegedinac/kst))
|
|
107
107
|
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '1.
|
|
32
|
-
__version_tuple__ = version_tuple = (1,
|
|
31
|
+
__version__ = version = '1.2'
|
|
32
|
+
__version_tuple__ = version_tuple = (1, 2)
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'gb97b590cd'
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy.typing as npt
|
|
4
|
+
from typing import Self, List
|
|
5
|
+
|
|
6
|
+
from iita_python.dataset import Dataset
|
|
7
|
+
|
|
8
|
+
def pairwise_diff_ce(rp: pd.DataFrame) -> pd.DataFrame:
|
|
9
|
+
"""
|
|
10
|
+
Computes counterexamples from a response pattern DataFrame by using pairwise differences of item correspondences\n
|
|
11
|
+
Can be used for non-binary data because of not relying on strict 0/1 values\n
|
|
12
|
+
Does not support missing values in the response patterns\n
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
items = rp.shape[1]
|
|
16
|
+
subjects = rp.shape[0]
|
|
17
|
+
|
|
18
|
+
ce = pd.DataFrame(0, index=np.arange(items), columns=np.arange(items))
|
|
19
|
+
for i in range(subjects):
|
|
20
|
+
# for subject i, if a < b, add b - a for all item pairs (a,b)
|
|
21
|
+
# this is equivalent to ce[a][b] += 1 if a=0 and b=1, but works for non-binary data as well
|
|
22
|
+
|
|
23
|
+
row = rp.loc[i].to_numpy()
|
|
24
|
+
ce -= np.clip(row[:, None] - row[None, :], None, 0)
|
|
25
|
+
|
|
26
|
+
return ce
|
|
27
|
+
|
|
28
|
+
def missing_value_substitution_ce(rp: pd.DataFrame) -> pd.DataFrame:
|
|
29
|
+
"""
|
|
30
|
+
Computes counterexamples from a response pattern DataFrame by using pairwise differences of item correspondences\n
|
|
31
|
+
Substitutes missing values in the response patterns with the mean of the item, making some counterexample amounts fractional\n
|
|
32
|
+
Can be used for non-binary data because of not relying on strict 0/1 values\n
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
items = rp.shape[1]
|
|
36
|
+
subjects = rp.shape[0]
|
|
37
|
+
|
|
38
|
+
rp1 = rp.copy()
|
|
39
|
+
|
|
40
|
+
for i in range(items):
|
|
41
|
+
# substitute missing values in item i with the mean of the item
|
|
42
|
+
col = rp1.loc[:, i].to_numpy()
|
|
43
|
+
mean_val = np.nanmean(col)
|
|
44
|
+
col = pd.Series(col).fillna(mean_val)
|
|
45
|
+
rp1.loc[:, i] = col
|
|
46
|
+
|
|
47
|
+
# then calculate pairwise difference counterexamples
|
|
48
|
+
return pairwise_diff_ce(rp1)
|
|
49
|
+
|
|
50
|
+
def relativify(calculator: callable):
|
|
51
|
+
"""
|
|
52
|
+
Decorator to relativify counterexample calculators\n
|
|
53
|
+
The counterexample amounts are divided by the number of cases for each item pair where both items are not missing\n
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def wrapper(rp: pd.DataFrame):
|
|
57
|
+
f"""
|
|
58
|
+
Computes counterexamples relative to the amount of valid cases using {calculator.__name__} as base calculator\n
|
|
59
|
+
The counterexample amounts are divided by the number of cases for each item pair where both items are not missing\n
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
ce = calculator(rp)
|
|
63
|
+
|
|
64
|
+
items = rp.shape[1]
|
|
65
|
+
subjects = rp.shape[0]
|
|
66
|
+
|
|
67
|
+
valid_cases = pd.DataFrame(0, index=np.arange(items), columns=np.arange(items))
|
|
68
|
+
for i in range(subjects):
|
|
69
|
+
#for subject i, increment all cases where neither a nor b are NaN (valid case for counterexamples)
|
|
70
|
+
not_nan = np.logical_not(rp.loc[i].isna())
|
|
71
|
+
valid_cases += np.outer(not_nan, not_nan).astype(int)
|
|
72
|
+
|
|
73
|
+
# avoid division by zero
|
|
74
|
+
valid_cases = valid_cases.replace(0, 1)
|
|
75
|
+
|
|
76
|
+
return ce / valid_cases
|
|
77
|
+
|
|
78
|
+
return wrapper
|
|
79
|
+
|
|
80
|
+
class AdditionalCEDataset(Dataset):
|
|
81
|
+
def __init__(self, response_patterns: pd.DataFrame | npt.NDArray | List[List[int]]):
|
|
82
|
+
"""
|
|
83
|
+
Computes the counterexamples and equivalence examples from response patterns\n
|
|
84
|
+
Supports pandas dataframes, numpy arrays, and python lists\n
|
|
85
|
+
Rows represent the subjects, columns - the items\n
|
|
86
|
+
|
|
87
|
+
In addition to the base Dataset, this class provides additional counterexample calculators:\n
|
|
88
|
+
- pairwise_diff_ce: computes counterexamples using pairwise differences of item correspondences, allowing for non-binary data\n
|
|
89
|
+
- missing_value_substitution_ce: computes counterexamples using pairwise differences with missing values substituted by item means\n
|
|
90
|
+
"""
|
|
91
|
+
super().__init__(response_patterns)
|
|
92
|
+
|
|
93
|
+
self.pairwise_diff_ce = lambda self, relative=False: (
|
|
94
|
+
relativify(pairwise_diff_ce) if relative else pairwise_diff_ce
|
|
95
|
+
)(self.rp)
|
|
96
|
+
self.pairwise_diff_ce.__doc__ = pairwise_diff_ce.__doc__
|
|
97
|
+
|
|
98
|
+
self.missing_value_substitution_ce = lambda self, relative=False: (
|
|
99
|
+
relativify(missing_value_substitution_ce) if relative else missing_value_substitution_ce
|
|
100
|
+
)(self.rp)
|
|
101
|
+
self.missing_value_substitution_ce.__doc__ = missing_value_substitution_ce.__doc__
|
|
@@ -31,7 +31,7 @@ class Dataset():
|
|
|
31
31
|
|
|
32
32
|
@property
|
|
33
33
|
def items(self):
|
|
34
|
-
return self.
|
|
34
|
+
return self.rp.shape[1]
|
|
35
35
|
|
|
36
36
|
@property
|
|
37
37
|
def subjects(self):
|
|
@@ -52,23 +52,23 @@ class Dataset():
|
|
|
52
52
|
self._eqe = None
|
|
53
53
|
|
|
54
54
|
#counterexamples computation
|
|
55
|
-
self.ce = pd.DataFrame(0, index=np.arange(self.
|
|
55
|
+
self.ce = pd.DataFrame(0, index=np.arange(self.items), columns=np.arange(self.items))
|
|
56
56
|
|
|
57
|
-
for i in range(
|
|
57
|
+
for i in range(self.subjects):
|
|
58
58
|
#for subject i, increment all cases where a=0 and b=1 (counterexamples to b->a or a <= b)
|
|
59
59
|
not_a = (self.rp.loc[i] == 0)
|
|
60
60
|
b = (self.rp.loc[i] == 1)
|
|
61
61
|
self.ce.loc[not_a, b] += 1
|
|
62
62
|
|
|
63
63
|
#equivalence examples computation
|
|
64
|
-
self.eqe = pd.DataFrame(0, index=np.arange(self.
|
|
65
|
-
for i in range(
|
|
64
|
+
self.eqe = pd.DataFrame(0, index=np.arange(self.items), columns=np.arange(self.items))
|
|
65
|
+
for i in range(self.subjects):
|
|
66
66
|
#for subject i, increment all cases where a=b (examples of equivalence of a and b)
|
|
67
67
|
row = self.rp.loc[i].to_numpy()
|
|
68
68
|
self.eqe += np.equal.outer(row, row).astype(int)
|
|
69
|
-
|
|
70
|
-
self.valid_ce_cases = pd.DataFrame(0, index=np.arange(self.
|
|
71
|
-
for i in range(
|
|
69
|
+
|
|
70
|
+
self.valid_ce_cases = pd.DataFrame(0, index=np.arange(self.items), columns=np.arange(self.items))
|
|
71
|
+
for i in range(self.subjects):
|
|
72
72
|
#for subject i, increment all cases where neither a nor b are NaN (valid case for counterexamples)
|
|
73
73
|
not_nan = np.logical_not(self.rp.loc[i].isna())
|
|
74
74
|
self.valid_ce_cases += np.outer(not_nan, not_nan).astype(int)
|
|
@@ -78,7 +78,7 @@ class Dataset():
|
|
|
78
78
|
Add a second IITA_Dataset: concatenate the response patterns, add counterexamples and equivalence examples\n
|
|
79
79
|
Item amounts must match, else ValueError
|
|
80
80
|
"""
|
|
81
|
-
if (self.
|
|
81
|
+
if (self.items != dataset_to_add.items):
|
|
82
82
|
raise ValueError('Item amounts must match')
|
|
83
83
|
|
|
84
84
|
self.rp = pd.concat(self.rp, dataset_to_add.rp)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: iita_python
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2
|
|
4
4
|
Summary: IITA algorithm in python
|
|
5
5
|
Author-email: Aliaksei Badnarchuk <alexejbodnarchuk@gmail.com>
|
|
6
6
|
Requires-Python: >=3.9
|
|
@@ -111,7 +111,7 @@ Returns: float (MSE, lower is better)
|
|
|
111
111
|
|
|
112
112
|
## Testing
|
|
113
113
|
|
|
114
|
-
See the `
|
|
114
|
+
See the `testing` branch. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
|
|
115
115
|
|
|
116
116
|
I am comparing my results on the PISA dataset to those of Milan Segedinac ([his implementation](https://github.com/milansegedinac/kst))
|
|
117
117
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|