iita-python 1.1__tar.gz → 1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: iita_python
3
- Version: 1.1
3
+ Version: 1.2
4
4
  Summary: IITA algorithm in python
5
5
  Author-email: Aliaksei Badnarchuk <alexejbodnarchuk@gmail.com>
6
6
  Requires-Python: >=3.9
@@ -111,7 +111,7 @@ Returns: float (MSE, lower is better)
111
111
 
112
112
  ## Testing
113
113
 
114
- See the `test_ipynbs` folder. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
114
+ See the `testing` branch. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
115
115
 
116
116
  I am comparing my results on the PISA dataset to those of Milan Segedinac ([his implementation](https://github.com/milansegedinac/kst))
117
117
 
@@ -101,7 +101,7 @@ Returns: float (MSE, lower is better)
101
101
 
102
102
  ## Testing
103
103
 
104
- See the `test_ipynbs` folder. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
104
+ See the `testing` branch. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
105
105
 
106
106
  I am comparing my results on the PISA dataset to those of Milan Segedinac ([his implementation](https://github.com/milansegedinac/kst))
107
107
 
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '1.1'
32
- __version_tuple__ = version_tuple = (1, 1)
31
+ __version__ = version = '1.2'
32
+ __version_tuple__ = version_tuple = (1, 2)
33
33
 
34
- __commit_id__ = commit_id = 'g0c3bef126'
34
+ __commit_id__ = commit_id = 'gb97b590cd'
@@ -0,0 +1,101 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import numpy.typing as npt
4
+ from typing import Self, List
5
+
6
+ from iita_python.dataset import Dataset
7
+
8
+ def pairwise_diff_ce(rp: pd.DataFrame) -> pd.DataFrame:
9
+ """
10
+ Computes counterexamples from a response pattern DataFrame by using pairwise differences of item correspondences\n
11
+ Can be used for non-binary data because of not relying on strict 0/1 values\n
12
+ Does not support missing values in the response patterns\n
13
+ """
14
+
15
+ items = rp.shape[1]
16
+ subjects = rp.shape[0]
17
+
18
+ ce = pd.DataFrame(0, index=np.arange(items), columns=np.arange(items))
19
+ for i in range(subjects):
20
+ # for subject i, if a < b, add b - a for all item pairs (a,b)
21
+ # this is equivalent to ce[a][b] += 1 if a=0 and b=1, but works for non-binary data as well
22
+
23
+ row = rp.loc[i].to_numpy()
24
+ ce -= np.clip(row[:, None] - row[None, :], None, 0)
25
+
26
+ return ce
27
+
28
+ def missing_value_substitution_ce(rp: pd.DataFrame) -> pd.DataFrame:
29
+ """
30
+ Computes counterexamples from a response pattern DataFrame by using pairwise differences of item correspondences\n
31
+ Substitutes missing values in the response patterns with the mean of the item, making some counterexample amounts fractional\n
32
+ Can be used for non-binary data because of not relying on strict 0/1 values\n
33
+ """
34
+
35
+ items = rp.shape[1]
36
+ subjects = rp.shape[0]
37
+
38
+ rp1 = rp.copy()
39
+
40
+ for i in range(items):
41
+ # substitute missing values in item i with the mean of the item
42
+ col = rp1.loc[:, i].to_numpy()
43
+ mean_val = np.nanmean(col)
44
+ col = pd.Series(col).fillna(mean_val)
45
+ rp1.loc[:, i] = col
46
+
47
+ # then calculate pairwise difference counterexamples
48
+ return pairwise_diff_ce(rp1)
49
+
50
+ def relativify(calculator: callable):
51
+ """
52
+ Decorator to relativify counterexample calculators\n
53
+ The counterexample amounts are divided by the number of cases for each item pair where both items are not missing\n
54
+ """
55
+
56
+ def wrapper(rp: pd.DataFrame):
57
+ f"""
58
+ Computes counterexamples relative to the amount of valid cases using {calculator.__name__} as base calculator\n
59
+ The counterexample amounts are divided by the number of cases for each item pair where both items are not missing\n
60
+ """
61
+
62
+ ce = calculator(rp)
63
+
64
+ items = rp.shape[1]
65
+ subjects = rp.shape[0]
66
+
67
+ valid_cases = pd.DataFrame(0, index=np.arange(items), columns=np.arange(items))
68
+ for i in range(subjects):
69
+ #for subject i, increment all cases where neither a nor b are NaN (valid case for counterexamples)
70
+ not_nan = np.logical_not(rp.loc[i].isna())
71
+ valid_cases += np.outer(not_nan, not_nan).astype(int)
72
+
73
+ # avoid division by zero
74
+ valid_cases = valid_cases.replace(0, 1)
75
+
76
+ return ce / valid_cases
77
+
78
+ return wrapper
79
+
80
+ class AdditionalCEDataset(Dataset):
81
+ def __init__(self, response_patterns: pd.DataFrame | npt.NDArray | List[List[int]]):
82
+ """
83
+ Computes the counterexamples and equivalence examples from response patterns\n
84
+ Supports pandas dataframes, numpy arrays, and python lists\n
85
+ Rows represent the subjects, columns - the items\n
86
+
87
+ In addition to the base Dataset, this class provides additional counterexample calculators:\n
88
+ - pairwise_diff_ce: computes counterexamples using pairwise differences of item correspondences, allowing for non-binary data\n
89
+ - missing_value_substitution_ce: computes counterexamples using pairwise differences with missing values substituted by item means\n
90
+ """
91
+ super().__init__(response_patterns)
92
+
93
+ self.pairwise_diff_ce = lambda self, relative=False: (
94
+ relativify(pairwise_diff_ce) if relative else pairwise_diff_ce
95
+ )(self.rp)
96
+ self.pairwise_diff_ce.__doc__ = pairwise_diff_ce.__doc__
97
+
98
+ self.missing_value_substitution_ce = lambda self, relative=False: (
99
+ relativify(missing_value_substitution_ce) if relative else missing_value_substitution_ce
100
+ )(self.rp)
101
+ self.missing_value_substitution_ce.__doc__ = missing_value_substitution_ce.__doc__
@@ -31,7 +31,7 @@ class Dataset():
31
31
 
32
32
  @property
33
33
  def items(self):
34
- return self.ce.shape[0]
34
+ return self.rp.shape[1]
35
35
 
36
36
  @property
37
37
  def subjects(self):
@@ -52,23 +52,23 @@ class Dataset():
52
52
  self._eqe = None
53
53
 
54
54
  #counterexamples computation
55
- self.ce = pd.DataFrame(0, index=np.arange(self.rp.shape[1]), columns=np.arange(self.rp.shape[1]))
55
+ self.ce = pd.DataFrame(0, index=np.arange(self.items), columns=np.arange(self.items))
56
56
 
57
- for i in range(len(self.rp)):
57
+ for i in range(self.subjects):
58
58
  #for subject i, increment all cases where a=0 and b=1 (counterexamples to b->a or a <= b)
59
59
  not_a = (self.rp.loc[i] == 0)
60
60
  b = (self.rp.loc[i] == 1)
61
61
  self.ce.loc[not_a, b] += 1
62
62
 
63
63
  #equivalence examples computation
64
- self.eqe = pd.DataFrame(0, index=np.arange(self.rp.shape[1]), columns=np.arange(self.rp.shape[1]))
65
- for i in range(len(self.rp)):
64
+ self.eqe = pd.DataFrame(0, index=np.arange(self.items), columns=np.arange(self.items))
65
+ for i in range(self.subjects):
66
66
  #for subject i, increment all cases where a=b (examples of equivalence of a and b)
67
67
  row = self.rp.loc[i].to_numpy()
68
68
  self.eqe += np.equal.outer(row, row).astype(int)
69
-
70
- self.valid_ce_cases = pd.DataFrame(0, index=np.arange(self.rp.shape[1]), columns=np.arange(self.rp.shape[1]))
71
- for i in range(len(self.rp)):
69
+
70
+ self.valid_ce_cases = pd.DataFrame(0, index=np.arange(self.items), columns=np.arange(self.items))
71
+ for i in range(self.subjects):
72
72
  #for subject i, increment all cases where neither a nor b are NaN (valid case for counterexamples)
73
73
  not_nan = np.logical_not(self.rp.loc[i].isna())
74
74
  self.valid_ce_cases += np.outer(not_nan, not_nan).astype(int)
@@ -78,7 +78,7 @@ class Dataset():
78
78
  Add a second IITA_Dataset: concatenate the response patterns, add counterexamples and equivalence examples\n
79
79
  Item amounts must match, else ValueError
80
80
  """
81
- if (self.rp.shape[1] != dataset_to_add.shape[1]):
81
+ if (self.items != dataset_to_add.items):
82
82
  raise ValueError('Item amounts must match')
83
83
 
84
84
  self.rp = pd.concat(self.rp, dataset_to_add.rp)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: iita_python
3
- Version: 1.1
3
+ Version: 1.2
4
4
  Summary: IITA algorithm in python
5
5
  Author-email: Aliaksei Badnarchuk <alexejbodnarchuk@gmail.com>
6
6
  Requires-Python: >=3.9
@@ -111,7 +111,7 @@ Returns: float (MSE, lower is better)
111
111
 
112
112
  ## Testing
113
113
 
114
- See the `test_ipynbs` folder. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
114
+ See the `testing` branch. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
115
115
 
116
116
  I am comparing my results on the PISA dataset to those of Milan Segedinac ([his implementation](https://github.com/milansegedinac/kst))
117
117
 
@@ -6,6 +6,7 @@ setup.py
6
6
  .github/workflows/release.yaml
7
7
  iita_python/__init__.py
8
8
  iita_python/_version.py
9
+ iita_python/additional_ce.py
9
10
  iita_python/dataset.py
10
11
  iita_python/fit_metrics.py
11
12
  iita_python/quasiorder.py
File without changes
File without changes
File without changes
File without changes
File without changes