iita-python 0.0.post42__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iita_python-0.0.post42/.github/workflows/release.yaml +136 -0
- iita_python-0.0.post42/.gitignore +6 -0
- iita_python-0.0.post42/PKG-INFO +148 -0
- iita_python-0.0.post42/README.md +138 -0
- iita_python-0.0.post42/iita_python/__init__.py +4 -0
- iita_python-0.0.post42/iita_python/_version.py +34 -0
- iita_python-0.0.post42/iita_python/dataset.py +82 -0
- iita_python-0.0.post42/iita_python/fit_metrics.py +64 -0
- iita_python-0.0.post42/iita_python/quasiorder.py +100 -0
- iita_python-0.0.post42/iita_python/utils.py +35 -0
- iita_python-0.0.post42/iita_python.egg-info/PKG-INFO +148 -0
- iita_python-0.0.post42/iita_python.egg-info/SOURCES.txt +20 -0
- iita_python-0.0.post42/iita_python.egg-info/dependency_links.txt +1 -0
- iita_python-0.0.post42/iita_python.egg-info/requires.txt +2 -0
- iita_python-0.0.post42/iita_python.egg-info/top_level.txt +1 -0
- iita_python-0.0.post42/pyproject.toml +22 -0
- iita_python-0.0.post42/setuf.cfg +17 -0
- iita_python-0.0.post42/setup.cfg +4 -0
- iita_python-0.0.post42/setup.py +6 -0
- iita_python-0.0.post42/test_ipynbs/fit_metrics.ipynb +235 -0
- iita_python-0.0.post42/test_ipynbs/pisa.csv +340 -0
- iita_python-0.0.post42/test_ipynbs/quasi-order-gen.ipynb +219 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
name: release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*.*'
|
|
7
|
+
- 'v*.*.*'
|
|
8
|
+
|
|
9
|
+
env:
|
|
10
|
+
PACKAGE_NAME: "iita_python"
|
|
11
|
+
OWNER: "Alexe1900"
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
details:
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
outputs:
|
|
17
|
+
new_version: ${{ steps.release.outputs.new_version }}
|
|
18
|
+
suffix: ${{ steps.release.outputs.suffix }}
|
|
19
|
+
tag_name: ${{ steps.release.outputs.tag_name }}
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v4
|
|
22
|
+
|
|
23
|
+
- name: Extract tag and Details
|
|
24
|
+
id: release
|
|
25
|
+
run: |
|
|
26
|
+
if [ "${{ github.ref_type }}" = "tag" ]; then
|
|
27
|
+
TAG_NAME=${GITHUB_REF#refs/tags/}
|
|
28
|
+
NEW_VERSION=$(echo $TAG_NAME | awk -F'-' '{print $1}')
|
|
29
|
+
SUFFIX=$(echo $TAG_NAME | grep -oP '[a-z]+[0-9]+' || echo "")
|
|
30
|
+
echo "new_version=$NEW_VERSION" >> "$GITHUB_OUTPUT"
|
|
31
|
+
echo "suffix=$SUFFIX" >> "$GITHUB_OUTPUT"
|
|
32
|
+
echo "tag_name=$TAG_NAME" >> "$GITHUB_OUTPUT"
|
|
33
|
+
echo "Version is $NEW_VERSION"
|
|
34
|
+
echo "Suffix is $SUFFIX"
|
|
35
|
+
echo "Tag name is $TAG_NAME"
|
|
36
|
+
else
|
|
37
|
+
echo "No tag found"
|
|
38
|
+
exit 1
|
|
39
|
+
fi
|
|
40
|
+
|
|
41
|
+
check_pypi:
|
|
42
|
+
needs: details
|
|
43
|
+
runs-on: ubuntu-latest
|
|
44
|
+
steps:
|
|
45
|
+
- name: Fetch information from PyPI
|
|
46
|
+
run: |
|
|
47
|
+
response=$(curl -s https://pypi.org/pypi/${{ env.PACKAGE_NAME }}/json || echo "{}")
|
|
48
|
+
latest_previous_version=$(echo $response | jq --raw-output "select(.releases != null) | .releases | keys_unsorted | last")
|
|
49
|
+
if [ -z "$latest_previous_version" ]; then
|
|
50
|
+
echo "Package not found on PyPI."
|
|
51
|
+
latest_previous_version="0.0.0"
|
|
52
|
+
fi
|
|
53
|
+
echo "Latest version on PyPI: $latest_previous_version"
|
|
54
|
+
echo "latest_previous_version=$latest_previous_version" >> $GITHUB_ENV
|
|
55
|
+
|
|
56
|
+
- name: Compare versions and exit if not newer
|
|
57
|
+
run: |
|
|
58
|
+
NEW_VERSION=${{ needs.details.outputs.new_version }}
|
|
59
|
+
LATEST_VERSION=$latest_previous_version
|
|
60
|
+
if [ "$(printf '%s\n' "$LATEST_VERSION" "$NEW_VERSION" | sort -rV | head -n 1)" != "$NEW_VERSION" ] || [ "$NEW_VERSION" == "$LATEST_VERSION" ]; then
|
|
61
|
+
echo "The new version $NEW_VERSION is not greater than the latest version $LATEST_VERSION on PyPI."
|
|
62
|
+
exit 1
|
|
63
|
+
else
|
|
64
|
+
echo "The new version $NEW_VERSION is greater than the latest version $LATEST_VERSION on PyPI."
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
setup_and_build:
|
|
68
|
+
needs: [details, check_pypi]
|
|
69
|
+
runs-on: ubuntu-latest
|
|
70
|
+
steps:
|
|
71
|
+
- uses: actions/checkout@v4
|
|
72
|
+
with:
|
|
73
|
+
fetch-depth: 0
|
|
74
|
+
|
|
75
|
+
- name: Set up Python
|
|
76
|
+
uses: actions/setup-python@v5
|
|
77
|
+
with:
|
|
78
|
+
python-version: "3.12"
|
|
79
|
+
|
|
80
|
+
- name: Install build tools
|
|
81
|
+
run: |
|
|
82
|
+
python -m pip install --upgrade pip
|
|
83
|
+
pip install build twine setuptools_scm
|
|
84
|
+
|
|
85
|
+
- name: Build package
|
|
86
|
+
run: |
|
|
87
|
+
python -m build
|
|
88
|
+
|
|
89
|
+
- name: Upload artifacts
|
|
90
|
+
uses: actions/upload-artifact@v4
|
|
91
|
+
with:
|
|
92
|
+
name: dist
|
|
93
|
+
path: dist/
|
|
94
|
+
|
|
95
|
+
pypi_publish:
|
|
96
|
+
name: Upload release to PyPI
|
|
97
|
+
needs: [setup_and_build, details]
|
|
98
|
+
runs-on: ubuntu-latest
|
|
99
|
+
environment:
|
|
100
|
+
name: release
|
|
101
|
+
permissions:
|
|
102
|
+
id-token: write
|
|
103
|
+
steps:
|
|
104
|
+
- name: Download artifacts
|
|
105
|
+
uses: actions/download-artifact@v4
|
|
106
|
+
with:
|
|
107
|
+
name: dist
|
|
108
|
+
path: dist/
|
|
109
|
+
|
|
110
|
+
- name: Publish distribution to PyPI
|
|
111
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
112
|
+
|
|
113
|
+
github_release:
|
|
114
|
+
name: Create GitHub Release
|
|
115
|
+
needs: [setup_and_build, details]
|
|
116
|
+
runs-on: ubuntu-latest
|
|
117
|
+
permissions:
|
|
118
|
+
contents: write
|
|
119
|
+
steps:
|
|
120
|
+
- name: Checkout Code
|
|
121
|
+
uses: actions/checkout@v4
|
|
122
|
+
with:
|
|
123
|
+
fetch-depth: 0
|
|
124
|
+
|
|
125
|
+
- name: Download artifacts
|
|
126
|
+
uses: actions/download-artifact@v4
|
|
127
|
+
with:
|
|
128
|
+
name: dist
|
|
129
|
+
path: dist/
|
|
130
|
+
|
|
131
|
+
- name: Create GitHub Release
|
|
132
|
+
id: create_release
|
|
133
|
+
env:
|
|
134
|
+
GH_TOKEN: ${{ github.token }}
|
|
135
|
+
run: |
|
|
136
|
+
gh release create ${{ needs.details.outputs.tag_name }} dist/* --title ${{ needs.details.outputs.tag_name }} --generate-notes
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: iita_python
|
|
3
|
+
Version: 0.0.post42
|
|
4
|
+
Summary: IITA algorithm in python
|
|
5
|
+
Author-email: Aliaksei Badnarchuk <alexejbodnarchuk@gmail.com>
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: numpy
|
|
9
|
+
Requires-Dist: pandas
|
|
10
|
+
|
|
11
|
+
# IITA_Python
|
|
12
|
+
|
|
13
|
+
A Python implementation of the Inductive ITem Tree Analysis (IITA) algorithm for analyzing and validating quasi-orderings in psychometric data.
|
|
14
|
+
|
|
15
|
+
Intended to replicate the functionality DAKS package from R, with an OOP-style interface for simpler functionality expansion
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
### From PyPI
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install iita_python
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
from iita_python import Dataset, ind_gen, unfold_examples, orig_iita_fit
|
|
29
|
+
import iita_python.utils as utils
|
|
30
|
+
|
|
31
|
+
# Load response patterns from CSV
|
|
32
|
+
response_patterns = utils.read_rp('data.csv')
|
|
33
|
+
|
|
34
|
+
# Create Dataset: computes counterexamples and equivalence examples
|
|
35
|
+
data = Dataset(response_patterns)
|
|
36
|
+
|
|
37
|
+
# Extract counterexamples and generate quasi-orderings
|
|
38
|
+
ce = unfold_examples(data.ce)
|
|
39
|
+
qos = ind_gen(ce, data.items)
|
|
40
|
+
|
|
41
|
+
# Evaluate fit for each quasi-order
|
|
42
|
+
for i, qo in enumerate(qos):
|
|
43
|
+
fit = orig_iita_fit(data, qo)
|
|
44
|
+
print(f"Quasi-order {i}: fit = {fit:.2f}")
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Data Format
|
|
48
|
+
|
|
49
|
+
### Input: Response Patterns
|
|
50
|
+
|
|
51
|
+
Response patterns should be a 2D array where:
|
|
52
|
+
- **Rows** represent subjects (respondents)
|
|
53
|
+
- **Columns** represent items (questions/tasks)
|
|
54
|
+
- **Values** are 0 (incorrect) or 1 (correct), with NaN for missing responses
|
|
55
|
+
|
|
56
|
+
Example (CSV):
|
|
57
|
+
```
|
|
58
|
+
1,0,1,0,1
|
|
59
|
+
0,0,1,0,1
|
|
60
|
+
1,1,1,1,1
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
When reading from a file with `utils.read_rp()`, missing data can be specified via the `nan_vals` parameter.
|
|
64
|
+
|
|
65
|
+
## Core Modules
|
|
66
|
+
|
|
67
|
+
### `dataset.py`
|
|
68
|
+
|
|
69
|
+
**`Dataset` class**
|
|
70
|
+
|
|
71
|
+
Stores response patterns and computes derived metrics:
|
|
72
|
+
|
|
73
|
+
- `rp`: response patterns (DataFrame)
|
|
74
|
+
- `ce`: counterexamples - pairs (i, j) where subject has item i incorrect but item j correct
|
|
75
|
+
- `eqe`: equivalence examples - pairs (i, j) where subject answered items i and j identically
|
|
76
|
+
- `items`: number of items
|
|
77
|
+
- `subjects`: number of subjects
|
|
78
|
+
- `filled_vals`: number of non-missing responses per item
|
|
79
|
+
|
|
80
|
+
### `quasiorder.py`
|
|
81
|
+
|
|
82
|
+
**`unfold_examples(matrix, relativity=None, dtype=np.float32)`**
|
|
83
|
+
|
|
84
|
+
Converts a 2D matrix (e.g., counterexamples or equivalence examples) into a list of (value, i, j) tuples, excluding diagonal entries. Optionally normalizes by a relativity matrix.
|
|
85
|
+
|
|
86
|
+
**`ind_gen(counterexamples, n)`**
|
|
87
|
+
|
|
88
|
+
Generates candidate quasi-orderings from counterexample data. Returns a list of quasi-order matrices (numpy arrays) that progressively include edges.
|
|
89
|
+
|
|
90
|
+
**`get_edge_list(qo_matrix, buff=0)`**
|
|
91
|
+
|
|
92
|
+
Extracts the edge list from a quasi-order matrix as a list of (i, j) pairs.
|
|
93
|
+
|
|
94
|
+
### `fit_metrics.py`
|
|
95
|
+
|
|
96
|
+
**`orig_iita_fit(data, qo)`**
|
|
97
|
+
|
|
98
|
+
Computes the fit of a quasi-order to observed data using Schrepp's method:
|
|
99
|
+
|
|
100
|
+
1. Estimates an error rate from counterexamples on edges in the quasi-order
|
|
101
|
+
2. Predicts expected counterexamples for all item pairs under the quasi-order
|
|
102
|
+
3. Computes mean squared error between observed and expected counterexamples
|
|
103
|
+
|
|
104
|
+
Returns: float (MSE, lower is better)
|
|
105
|
+
|
|
106
|
+
## Requirements
|
|
107
|
+
|
|
108
|
+
- Python >= 3.8
|
|
109
|
+
- numpy
|
|
110
|
+
- pandas
|
|
111
|
+
|
|
112
|
+
## Testing
|
|
113
|
+
|
|
114
|
+
See the `test_ipynbs` folder. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
|
|
115
|
+
|
|
116
|
+
I am comparing my results on the PISA dataset to those of Milan Segedinac ([his implementation](https://github.com/milansegedinac/kst))
|
|
117
|
+
|
|
118
|
+
Please report any test failures in an issue
|
|
119
|
+
|
|
120
|
+
## Contributing
|
|
121
|
+
|
|
122
|
+
Pull requests and issues are welcome. For major changes, please open an issue first to discuss.
|
|
123
|
+
|
|
124
|
+
## IITA Overview
|
|
125
|
+
|
|
126
|
+
Schrepp (1999, 2003) developed IITA (Inductive itemm Tree Analysis) as a means to derive a surmise relation from dichotomous response patterns. Sargin and Ünlü (2009; Ünlü & Sargin, 2010) implemented two advanced versions of that procedure.
|
|
127
|
+
|
|
128
|
+
The three inductive item tree analysis algorithms are exploratory methods for extracting quasi orders (surmise relations) from data. In each algorithm, competing binary relations are generated (in the same way for all three versions), and a fit measure (differing from version to version) is computed for every relation of the selection set in order to find the quasi order that fits the data best. In all three algorithms, the idea is to estimate the numbers of counterexamples for each quasi order, and to find, over all competing quasi orders, the minimum value for the discrepancy between the observed and expected numbers of counterexamples.
|
|
129
|
+
|
|
130
|
+
The three data analysis methods differ in their choices of estimates for the expected numbers of counterexamples. (For an item pair (i,j), the number of subjects solving item j but failing to solve item i, is the corresponding number of counterexamples. Their response patterns contradict the interpretation of (i,j) as `mastering item j implies mastering item i.')
|
|
131
|
+
|
|
132
|
+
## References
|
|
133
|
+
|
|
134
|
+
- Schrepp, M. (2001). IITA: A program for the analysis of individual item and step matrices. Unpublished technical report.
|
|
135
|
+
- Knowledge Space Theory: https://en.wikipedia.org/wiki/Knowledge_space
|
|
136
|
+
|
|
137
|
+
## Author
|
|
138
|
+
|
|
139
|
+
Alexe1900, mentored and supervised by Peter Steiner from PHSG St. Gallen
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Roadmap
|
|
144
|
+
|
|
145
|
+
- [ ] Full DAKS functionality
|
|
146
|
+
- [ ] Performance optimizations for large datasets
|
|
147
|
+
- [ ] Visualization tools for quasi-orderings
|
|
148
|
+
- [ ] Comprehensive test suite (unit + integration)
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# IITA_Python
|
|
2
|
+
|
|
3
|
+
A Python implementation of the Inductive ITem Tree Analysis (IITA) algorithm for analyzing and validating quasi-orderings in psychometric data.
|
|
4
|
+
|
|
5
|
+
Intended to replicate the functionality DAKS package from R, with an OOP-style interface for simpler functionality expansion
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
### From PyPI
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install iita_python
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
from iita_python import Dataset, ind_gen, unfold_examples, orig_iita_fit
|
|
19
|
+
import iita_python.utils as utils
|
|
20
|
+
|
|
21
|
+
# Load response patterns from CSV
|
|
22
|
+
response_patterns = utils.read_rp('data.csv')
|
|
23
|
+
|
|
24
|
+
# Create Dataset: computes counterexamples and equivalence examples
|
|
25
|
+
data = Dataset(response_patterns)
|
|
26
|
+
|
|
27
|
+
# Extract counterexamples and generate quasi-orderings
|
|
28
|
+
ce = unfold_examples(data.ce)
|
|
29
|
+
qos = ind_gen(ce, data.items)
|
|
30
|
+
|
|
31
|
+
# Evaluate fit for each quasi-order
|
|
32
|
+
for i, qo in enumerate(qos):
|
|
33
|
+
fit = orig_iita_fit(data, qo)
|
|
34
|
+
print(f"Quasi-order {i}: fit = {fit:.2f}")
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Data Format
|
|
38
|
+
|
|
39
|
+
### Input: Response Patterns
|
|
40
|
+
|
|
41
|
+
Response patterns should be a 2D array where:
|
|
42
|
+
- **Rows** represent subjects (respondents)
|
|
43
|
+
- **Columns** represent items (questions/tasks)
|
|
44
|
+
- **Values** are 0 (incorrect) or 1 (correct), with NaN for missing responses
|
|
45
|
+
|
|
46
|
+
Example (CSV):
|
|
47
|
+
```
|
|
48
|
+
1,0,1,0,1
|
|
49
|
+
0,0,1,0,1
|
|
50
|
+
1,1,1,1,1
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
When reading from a file with `utils.read_rp()`, missing data can be specified via the `nan_vals` parameter.
|
|
54
|
+
|
|
55
|
+
## Core Modules
|
|
56
|
+
|
|
57
|
+
### `dataset.py`
|
|
58
|
+
|
|
59
|
+
**`Dataset` class**
|
|
60
|
+
|
|
61
|
+
Stores response patterns and computes derived metrics:
|
|
62
|
+
|
|
63
|
+
- `rp`: response patterns (DataFrame)
|
|
64
|
+
- `ce`: counterexamples - pairs (i, j) where subject has item i incorrect but item j correct
|
|
65
|
+
- `eqe`: equivalence examples - pairs (i, j) where subject answered items i and j identically
|
|
66
|
+
- `items`: number of items
|
|
67
|
+
- `subjects`: number of subjects
|
|
68
|
+
- `filled_vals`: number of non-missing responses per item
|
|
69
|
+
|
|
70
|
+
### `quasiorder.py`
|
|
71
|
+
|
|
72
|
+
**`unfold_examples(matrix, relativity=None, dtype=np.float32)`**
|
|
73
|
+
|
|
74
|
+
Converts a 2D matrix (e.g., counterexamples or equivalence examples) into a list of (value, i, j) tuples, excluding diagonal entries. Optionally normalizes by a relativity matrix.
|
|
75
|
+
|
|
76
|
+
**`ind_gen(counterexamples, n)`**
|
|
77
|
+
|
|
78
|
+
Generates candidate quasi-orderings from counterexample data. Returns a list of quasi-order matrices (numpy arrays) that progressively include edges.
|
|
79
|
+
|
|
80
|
+
**`get_edge_list(qo_matrix, buff=0)`**
|
|
81
|
+
|
|
82
|
+
Extracts the edge list from a quasi-order matrix as a list of (i, j) pairs.
|
|
83
|
+
|
|
84
|
+
### `fit_metrics.py`
|
|
85
|
+
|
|
86
|
+
**`orig_iita_fit(data, qo)`**
|
|
87
|
+
|
|
88
|
+
Computes the fit of a quasi-order to observed data using Schrepp's method:
|
|
89
|
+
|
|
90
|
+
1. Estimates an error rate from counterexamples on edges in the quasi-order
|
|
91
|
+
2. Predicts expected counterexamples for all item pairs under the quasi-order
|
|
92
|
+
3. Computes mean squared error between observed and expected counterexamples
|
|
93
|
+
|
|
94
|
+
Returns: float (MSE, lower is better)
|
|
95
|
+
|
|
96
|
+
## Requirements
|
|
97
|
+
|
|
98
|
+
- Python >= 3.8
|
|
99
|
+
- numpy
|
|
100
|
+
- pandas
|
|
101
|
+
|
|
102
|
+
## Testing
|
|
103
|
+
|
|
104
|
+
See the `test_ipynbs` folder. You can open the Jupyter notebooks in Google Colab and run all cells to see test results.
|
|
105
|
+
|
|
106
|
+
I am comparing my results on the PISA dataset to those of Milan Segedinac ([his implementation](https://github.com/milansegedinac/kst))
|
|
107
|
+
|
|
108
|
+
Please report any test failures in an issue
|
|
109
|
+
|
|
110
|
+
## Contributing
|
|
111
|
+
|
|
112
|
+
Pull requests and issues are welcome. For major changes, please open an issue first to discuss.
|
|
113
|
+
|
|
114
|
+
## IITA Overview
|
|
115
|
+
|
|
116
|
+
Schrepp (1999, 2003) developed IITA (Inductive itemm Tree Analysis) as a means to derive a surmise relation from dichotomous response patterns. Sargin and Ünlü (2009; Ünlü & Sargin, 2010) implemented two advanced versions of that procedure.
|
|
117
|
+
|
|
118
|
+
The three inductive item tree analysis algorithms are exploratory methods for extracting quasi orders (surmise relations) from data. In each algorithm, competing binary relations are generated (in the same way for all three versions), and a fit measure (differing from version to version) is computed for every relation of the selection set in order to find the quasi order that fits the data best. In all three algorithms, the idea is to estimate the numbers of counterexamples for each quasi order, and to find, over all competing quasi orders, the minimum value for the discrepancy between the observed and expected numbers of counterexamples.
|
|
119
|
+
|
|
120
|
+
The three data analysis methods differ in their choices of estimates for the expected numbers of counterexamples. (For an item pair (i,j), the number of subjects solving item j but failing to solve item i, is the corresponding number of counterexamples. Their response patterns contradict the interpretation of (i,j) as `mastering item j implies mastering item i.')
|
|
121
|
+
|
|
122
|
+
## References
|
|
123
|
+
|
|
124
|
+
- Schrepp, M. (2001). IITA: A program for the analysis of individual item and step matrices. Unpublished technical report.
|
|
125
|
+
- Knowledge Space Theory: https://en.wikipedia.org/wiki/Knowledge_space
|
|
126
|
+
|
|
127
|
+
## Author
|
|
128
|
+
|
|
129
|
+
Alexe1900, mentored and supervised by Peter Steiner from PHSG St. Gallen
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Roadmap
|
|
134
|
+
|
|
135
|
+
- [ ] Full DAKS functionality
|
|
136
|
+
- [ ] Performance optimizations for large datasets
|
|
137
|
+
- [ ] Visualization tools for quasi-orderings
|
|
138
|
+
- [ ] Comprehensive test suite (unit + integration)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.0.post42'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 0, 'post42')
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = 'ga5c158b2b'
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import numpy.typing as npt
|
|
3
|
+
from typing import Self, List
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
class Dataset():
|
|
7
|
+
#aliases for response_patterns, counterexamples, equiv_examples
|
|
8
|
+
@property
|
|
9
|
+
def rp(self) -> pd.DataFrame:
|
|
10
|
+
return self._rp
|
|
11
|
+
@rp.setter
|
|
12
|
+
def rp(self, inp: pd.DataFrame) -> None:
|
|
13
|
+
self._rp = inp
|
|
14
|
+
response_patterns = rp
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def ce(self) -> pd.DataFrame:
|
|
18
|
+
return self._ce
|
|
19
|
+
@ce.setter
|
|
20
|
+
def ce(self, inp: pd.DataFrame) -> None:
|
|
21
|
+
self._ce = inp
|
|
22
|
+
counterexamples = ce
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def eqe(self) -> pd.DataFrame:
|
|
26
|
+
return self._eqe
|
|
27
|
+
@eqe.setter
|
|
28
|
+
def eqe(self, inp: pd.DataFrame) -> None:
|
|
29
|
+
self._eqe = inp
|
|
30
|
+
equiv_examples = eqe
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def items(self):
|
|
34
|
+
return self.ce.shape[0]
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def subjects(self):
|
|
38
|
+
return self.rp.shape[0]
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def filled_vals(self):
|
|
42
|
+
return (~np.isnan(self.rp)).sum(axis=0)
|
|
43
|
+
|
|
44
|
+
def __init__(self, response_patterns: pd.DataFrame | npt.NDArray | List[List[int]]):
|
|
45
|
+
"""
|
|
46
|
+
Computes the counterexamples and equivalence examples from response patterns\n
|
|
47
|
+
Supports pandas dataframes, numpy arrays, and python lists\n
|
|
48
|
+
Rows represent the subjects, columns - the items\n
|
|
49
|
+
"""
|
|
50
|
+
self._rp = pd.DataFrame(response_patterns, index=None, columns=None)
|
|
51
|
+
self._ce = None
|
|
52
|
+
self._eqe = None
|
|
53
|
+
|
|
54
|
+
#counterexamples computation
|
|
55
|
+
self.ce = pd.DataFrame(0, index=np.arange(self.rp.shape[1]), columns=np.arange(self.rp.shape[1]))
|
|
56
|
+
|
|
57
|
+
for i in range(len(self.rp)):
|
|
58
|
+
#for subject i, find all cases where a=0 and b=1 (counterexamples to b->a or a <= b) and increment where they intersect
|
|
59
|
+
not_a = (self.rp.loc[i] == 0)
|
|
60
|
+
b = (self.rp.loc[i] == 1)
|
|
61
|
+
self.ce.loc[not_a, b] += 1
|
|
62
|
+
|
|
63
|
+
#equivalence examples computation
|
|
64
|
+
self.eqe = pd.DataFrame(0, index=np.arange(self.rp.shape[1]), columns=np.arange(self.rp.shape[1]))
|
|
65
|
+
for i in range(len(self.rp)):
|
|
66
|
+
#for subject i, increment all cases where a=b (examples of equivalence of a and b)
|
|
67
|
+
row = self.rp.loc[i].to_numpy()
|
|
68
|
+
self.eqe += np.equal.outer(row, row).astype(int)
|
|
69
|
+
|
|
70
|
+
def add(self, dataset_to_add: Self):
|
|
71
|
+
"""
|
|
72
|
+
Add a second IITA_Dataset: concatenate the response patterns, add counterexamples and equivalence examples\n
|
|
73
|
+
Item amounts must match, else ValueError
|
|
74
|
+
"""
|
|
75
|
+
if (self.rp.shape[1] != dataset_to_add.shape[1]):
|
|
76
|
+
raise ValueError('Item amounts must match')
|
|
77
|
+
|
|
78
|
+
self.rp = pd.concat(self.rp, dataset_to_add.rp)
|
|
79
|
+
self.ce = self.ce + dataset_to_add.ce
|
|
80
|
+
self.eqe = self.eqe + dataset_to_add.eqe
|
|
81
|
+
|
|
82
|
+
__iadd__ = add
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import numpy.typing as npt
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from .dataset import Dataset
|
|
5
|
+
from .quasiorder import QuasiOrder
|
|
6
|
+
|
|
7
|
+
def orig_iita_fit(data: Dataset, qo: QuasiOrder):
|
|
8
|
+
"""
|
|
9
|
+
Calculates the original IITA fit metric for a given dataset and quasiorder\n
|
|
10
|
+
"""
|
|
11
|
+
qo_edges = qo.get_edge_list()
|
|
12
|
+
p = data.rp.to_numpy().sum(axis=0) / data.subjects
|
|
13
|
+
|
|
14
|
+
error = 0
|
|
15
|
+
for a, b in qo_edges:
|
|
16
|
+
error += data.ce.iloc[a, b] / (p[b] * data.subjects)
|
|
17
|
+
|
|
18
|
+
error /= len(qo_edges)
|
|
19
|
+
|
|
20
|
+
expected_ce = np.zeros(data.ce.shape)
|
|
21
|
+
|
|
22
|
+
for i in range(data.items):
|
|
23
|
+
for j in range(data.items):
|
|
24
|
+
if (i == j): continue
|
|
25
|
+
|
|
26
|
+
if (qo.full_matrix[i][j]):
|
|
27
|
+
expected_ce[i][j] = error * p[j] * data.subjects
|
|
28
|
+
else:
|
|
29
|
+
expected_ce[i][j] = (1 - p[i]) * p[j] * data.subjects * (1 - error)
|
|
30
|
+
|
|
31
|
+
ce = data.ce.to_numpy().flatten()
|
|
32
|
+
expected_ce = expected_ce.flatten()
|
|
33
|
+
|
|
34
|
+
return ((ce - expected_ce) ** 2).sum() / (data.items**2 - data.items)
|
|
35
|
+
|
|
36
|
+
def corr_iita_fit(data: Dataset, qo: QuasiOrder):
|
|
37
|
+
"""
|
|
38
|
+
Calculates the corrected IITA fit metric for a given dataset and quasiorder\n
|
|
39
|
+
"""
|
|
40
|
+
qo_edges = qo.get_edge_list()
|
|
41
|
+
p = data.rp.to_numpy().sum(axis=0) / data.subjects
|
|
42
|
+
|
|
43
|
+
error = 0
|
|
44
|
+
for a, b in qo_edges:
|
|
45
|
+
error += data.ce.iloc[a, b] / (p[b] * data.subjects)
|
|
46
|
+
|
|
47
|
+
error /= len(qo_edges)
|
|
48
|
+
|
|
49
|
+
expected_ce = np.zeros(data.ce.shape)
|
|
50
|
+
|
|
51
|
+
for i in range(data.items):
|
|
52
|
+
for j in range(data.items):
|
|
53
|
+
if (i == j): continue
|
|
54
|
+
|
|
55
|
+
if (qo.full_matrix[i][j]):
|
|
56
|
+
expected_ce[i][j] = error * p[j] * data.subjects
|
|
57
|
+
elif (not qo.full_matrix[j][i]):
|
|
58
|
+
expected_ce[i][j] = (1 - p[i]) * p[j] * data.subjects
|
|
59
|
+
else:
|
|
60
|
+
expected_ce[i][j] = (p[j] * data.subjects) - ((p[i] - p[i] * error) * data.subjects)
|
|
61
|
+
|
|
62
|
+
ce = data.ce.to_numpy().flatten()
|
|
63
|
+
expected_ce = expected_ce.flatten()
|
|
64
|
+
return ((ce - expected_ce) ** 2).sum() / (data.items**2 - data.items)
|