irt_ruby 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +50 -0
- data/LICENSE.txt +21 -0
- data/README.md +168 -0
- data/benchmarks/README.md +135 -0
- data/benchmarks/convergence_benchmark.rb +265 -0
- data/benchmarks/performance_benchmark.rb +153 -0
- data/lib/irt_ruby/rasch_model.rb +123 -33
- data/lib/irt_ruby/three_parameter_model.rb +154 -41
- data/lib/irt_ruby/two_parameter_model.rb +131 -40
- data/lib/irt_ruby/version.rb +1 -1
- data/lib/irt_ruby.rb +1 -0
- metadata +69 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7c8164ec4d62622374ff79fa5110baf742d68544719088c9dcae2e8851838895
|
4
|
+
data.tar.gz: 973555d52fd8bef2cd912d2890222d3089af65e127336c4332b2ff1f5afe9638
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d193d94c2ee5b365337e26efe6efe7dc8c48b08e1d3e1f1043f0c7d360634f2ed79ec0a5c72f094482056adc15f2ec867f5798d8fdd84a3a39db348d8e98282
|
7
|
+
data.tar.gz: 6e8ef3b3c8d9d8e4e4de7783f0c39bada144a72e542de180a36f9b7f5a1f6d9d7860d4d2235a03e6936ed4776816fae5ed2144e261e415b9b033cd51ca3859f8
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
All notable changes to this project are documented in this file.
|
4
|
+
|
5
|
+
## [0.3.0] - 2025-01-14
|
6
|
+
|
7
|
+
### Changed
|
8
|
+
- **Code Quality**
|
9
|
+
- Updated RuboCop configuration to handle new cops and resolve style warnings
|
10
|
+
- Fixed operator precedence ambiguity in Three-Parameter Model calculations
|
11
|
+
- Added MFA requirement to gemspec metadata (RuboCop requirement)
|
12
|
+
|
13
|
+
### Notes
|
14
|
+
- This release maintains **full backward compatibility** with previous versions
|
15
|
+
- All 46 existing tests continue to pass
|
16
|
+
- Comprehensive performance benchmarking suite remains available via `bundle exec rake benchmark:all`
|
17
|
+
|
18
|
+
---
|
19
|
+
|
20
|
+
## [0.2.0] - 2025-03-01
|
21
|
+
|
22
|
+
### Added
|
23
|
+
- **Missing Data Strategies**
|
24
|
+
- Introduced a `missing_strategy` parameter for **Rasch**, **TwoParameterModel**, and **ThreeParameterModel** to handle `nil` responses:
|
25
|
+
- `:ignore` (default) – skip missing responses in log-likelihood and gradients.
|
26
|
+
- `:treat_as_incorrect` – interpret `nil` as `0`.
|
27
|
+
- `:treat_as_correct` – interpret `nil` as `1`.
|
28
|
+
- Updated RSpec tests to cover each strategy and ensure graceful handling of missing responses.
|
29
|
+
|
30
|
+
- **Expanded Test Coverage**
|
31
|
+
- Added tests for repeated fits, deterministic seeding, larger random datasets, and new edge cases (all-correct/all-incorrect).
|
32
|
+
- Improved specs for parameter clamping (discriminations, guessing in 2PL/3PL).
|
33
|
+
|
34
|
+
- **Adaptive Learning Rate Enhancements**
|
35
|
+
- Enhanced convergence checks combining log-likelihood changes and average parameter updates.
|
36
|
+
- Clearer revert-and-decay logic if the likelihood decreases on a given step.
|
37
|
+
|
38
|
+
### Changed
|
39
|
+
- **Documentation / README**
|
40
|
+
- Updated the README to reflect new missing data strategies, advanced usage (adaptive learning rate, parameter clamping), and test instructions.
|
41
|
+
- Added examples showcasing how to set `missing_strategy` for each model.
|
42
|
+
|
43
|
+
### Notes
|
44
|
+
- This release remains **backward-compatible** with `0.1.x` in terms of existing usage; the default `:ignore` missing-data approach matches prior behavior.
|
45
|
+
- If upgrading, simply update your gem and enjoy the new features.
|
46
|
+
- For more details, see the updated [README](./README.md) and expanded test suites.
|
47
|
+
|
48
|
+
---
|
49
|
+
|
50
|
+
*(If you have older versions below `0.2.0`, you can keep them documented similarly, e.g., `## [0.1.x] ...`, under this new entry.)*
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2024 Alex Kholodniak
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,168 @@
|
|
1
|
+
# IrtRuby
|
2
|
+
|
3
|
+
IrtRuby is a Ruby gem that provides implementations of the **Rasch model**, the **Two-Parameter (2PL)** model, and the **Three-Parameter (3PL)** model for Item Response Theory (IRT). It allows you to estimate the **abilities** of individuals and the **difficulties** (and optionally **discriminations** and **guessing** parameters) of items based on their responses.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'irt_ruby'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
```bash
|
16
|
+
bundle install
|
17
|
+
```
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
```bash
|
22
|
+
gem install irt_ruby
|
23
|
+
```
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
Here's a quick example using the Rasch model:
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
require 'irt_ruby'
|
31
|
+
require 'matrix'
|
32
|
+
|
33
|
+
# Create a sample response matrix
|
34
|
+
data = Matrix[
|
35
|
+
[1, 0, 1],
|
36
|
+
[0, 1, 0],
|
37
|
+
[1, 1, 1]
|
38
|
+
]
|
39
|
+
|
40
|
+
# Initialize the Rasch model with the response data
|
41
|
+
model = IrtRuby::RaschModel.new(data)
|
42
|
+
|
43
|
+
# Fit the model to estimate abilities and difficulties
|
44
|
+
result = model.fit
|
45
|
+
|
46
|
+
# Output the estimated abilities and difficulties
|
47
|
+
puts "Abilities: #{result[:abilities]}"
|
48
|
+
puts "Difficulties: #{result[:difficulties]}"
|
49
|
+
```
|
50
|
+
### Using 2PL and 3PL Models
|
51
|
+
```ruby
|
52
|
+
two_pl_model = IrtRuby::TwoParameterModel.new(data)
|
53
|
+
two_pl_result = two_pl_model.fit
|
54
|
+
puts two_pl_result[:abilities]
|
55
|
+
puts two_pl_result[:difficulties]
|
56
|
+
puts two_pl_result[:discriminations]
|
57
|
+
|
58
|
+
three_pl_model = IrtRuby::ThreeParameterModel.new(data)
|
59
|
+
three_pl_result = three_pl_model.fit
|
60
|
+
puts three_pl_result[:abilities]
|
61
|
+
puts three_pl_result[:difficulties]
|
62
|
+
puts three_pl_result[:discriminations]
|
63
|
+
puts three_pl_result[:guessings]
|
64
|
+
```
|
65
|
+
|
66
|
+
## Handling Missing Data
|
67
|
+
Real-world data often has missing responses. Each model (Rasch, 2PL, 3PL) accepts a `missing_strategy: option` to handle nil entries:
|
68
|
+
|
69
|
+
- `:ignore` (default): Skip `nil` responses entirely in the log-likelihood and gradient calculations.
|
70
|
+
- `:treat_as_incorrect`: Interpret `nil` as `0`.
|
71
|
+
- `:treat_as_correct`: Interpret `nil` as `1`.
|
72
|
+
|
73
|
+
For example:
|
74
|
+
```ruby
|
75
|
+
data_with_missing = [
|
76
|
+
[1, nil, 0],
|
77
|
+
[nil, 1, 0],
|
78
|
+
[0, 1, 1]
|
79
|
+
]
|
80
|
+
|
81
|
+
model = IrtRuby::RaschModel.new(
|
82
|
+
data_with_missing,
|
83
|
+
max_iter: 300,
|
84
|
+
learning_rate: 0.01,
|
85
|
+
missing_strategy: :treat_as_incorrect
|
86
|
+
)
|
87
|
+
result = model.fit
|
88
|
+
|
89
|
+
puts "Abilities: #{result[:abilities]}"
|
90
|
+
puts "Difficulties: #{result[:difficulties]}"
|
91
|
+
```
|
92
|
+
This flexibility helps you handle datasets where missingness might signify a skipped item or an unanswered question.
|
93
|
+
|
94
|
+
## Advanced Usage
|
95
|
+
|
96
|
+
### Adaptive Learning Rate & Convergence
|
97
|
+
By default, each model uses a gradient ascent with:
|
98
|
+
|
99
|
+
- An adaptive learning rate (if log-likelihood decreases, it reverts the step and reduces the rate).
|
100
|
+
- Multiple convergence checks (change in log-likelihood and average parameter updates).
|
101
|
+
|
102
|
+
You can customize:
|
103
|
+
|
104
|
+
- `max_iter`: The maximum number of iterations.
|
105
|
+
- `tolerance` and `param_tolerance`: Convergence thresholds for log-likelihood change and parameter updates.
|
106
|
+
- `learning_rate`: Initial learning rate.
|
107
|
+
- `decay_factor`: Factor by which the learning rate is reduced on a failed step.
|
108
|
+
|
109
|
+
Example:
|
110
|
+
```ruby
|
111
|
+
IrtRuby::TwoParameterModel.new(
|
112
|
+
data,
|
113
|
+
max_iter: 500,
|
114
|
+
tolerance: 1e-7,
|
115
|
+
param_tolerance: 1e-7,
|
116
|
+
learning_rate: 0.05,
|
117
|
+
decay_factor: 0.5
|
118
|
+
)
|
119
|
+
```
|
120
|
+
### Parameter Clamping
|
121
|
+
For 2PL and 3PL:
|
122
|
+
|
123
|
+
- **Discriminations** (`a`) are clamped between `0.01` and `5.0`.
|
124
|
+
- **Guessings** (`c`, 3PL only) are clamped to `[0.0, 0.35]`.
|
125
|
+
|
126
|
+
This prevents extreme or invalid parameter estimates.
|
127
|
+
|
128
|
+
## Performance Benchmarks
|
129
|
+
|
130
|
+
IRT Ruby includes comprehensive performance benchmarks to help you understand the computational characteristics of different models:
|
131
|
+
|
132
|
+
```bash
|
133
|
+
# Run all benchmarks (takes 8-15 minutes)
|
134
|
+
bundle exec rake benchmark:all
|
135
|
+
|
136
|
+
# Quick performance check (2-3 minutes)
|
137
|
+
bundle exec rake benchmark:quick
|
138
|
+
|
139
|
+
# Individual benchmark suites
|
140
|
+
bundle exec rake benchmark:performance
|
141
|
+
bundle exec rake benchmark:convergence
|
142
|
+
```
|
143
|
+
|
144
|
+
The benchmarks test:
|
145
|
+
- **Performance**: Execution speed across dataset sizes (50 to 100,000 data points)
|
146
|
+
- **Memory Usage**: Object allocation and memory efficiency
|
147
|
+
- **Scaling**: How computational complexity grows with data size
|
148
|
+
- **Convergence**: Optimization behavior under different conditions
|
149
|
+
|
150
|
+
See `benchmarks/README.md` for detailed information about interpreting results.
|
151
|
+
|
152
|
+
## Development
|
153
|
+
|
154
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
155
|
+
|
156
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
157
|
+
|
158
|
+
## Contributing
|
159
|
+
|
160
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/SyntaxSpirits/irt_ruby. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/SyntaxSpirits/irt_ruby/blob/main/CODE_OF_CONDUCT.md).
|
161
|
+
|
162
|
+
## License
|
163
|
+
|
164
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
165
|
+
|
166
|
+
## Code of Conduct
|
167
|
+
|
168
|
+
Everyone interacting in the IrtRuby project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [code of conduct](https://github.com/SyntaxSpirits/irt_ruby/blob/main/CODE_OF_CONDUCT.md).
|
@@ -0,0 +1,135 @@
|
|
1
|
+
# IRT Ruby Performance Benchmarks
|
2
|
+
|
3
|
+
This directory contains comprehensive performance benchmarks for the IRT Ruby gem, helping users understand the computational characteristics and scaling behavior of the different IRT models.
|
4
|
+
|
5
|
+
## Available Benchmarks
|
6
|
+
|
7
|
+
### 1. Performance Benchmark (`performance_benchmark.rb`)
|
8
|
+
|
9
|
+
**Purpose**: Comprehensive performance analysis across different dataset sizes and model types.
|
10
|
+
|
11
|
+
**What it measures**:
|
12
|
+
- Execution time (iterations per second) for Rasch, 2PL, and 3PL models
|
13
|
+
- Memory usage analysis (allocated/retained objects and memory)
|
14
|
+
- Scaling behavior analysis (how performance changes with dataset size)
|
15
|
+
- Impact of missing data strategies on performance
|
16
|
+
|
17
|
+
**Dataset sizes tested**:
|
18
|
+
- Tiny: 10 people × 5 items (50 data points)
|
19
|
+
- Small: 50 people × 20 items (1,000 data points)
|
20
|
+
- Medium: 100 people × 50 items (5,000 data points)
|
21
|
+
- Large: 200 people × 100 items (20,000 data points)
|
22
|
+
- XLarge: 500 people × 200 items (100,000 data points)
|
23
|
+
|
24
|
+
### 2. Convergence Benchmark (`convergence_benchmark.rb`)
|
25
|
+
|
26
|
+
**Purpose**: Detailed analysis of convergence behavior and optimization characteristics.
|
27
|
+
|
28
|
+
**What it measures**:
|
29
|
+
- Impact of tolerance settings on convergence time and success rate
|
30
|
+
- Learning rate optimization analysis
|
31
|
+
- Dataset characteristics impact on convergence
|
32
|
+
- Missing data pattern effects on convergence
|
33
|
+
|
34
|
+
**Key insights provided**:
|
35
|
+
- Optimal hyperparameter settings for different scenarios
|
36
|
+
- Convergence reliability across different conditions
|
37
|
+
- Trade-offs between speed and accuracy
|
38
|
+
|
39
|
+
## Running the Benchmarks
|
40
|
+
|
41
|
+
### Prerequisites
|
42
|
+
|
43
|
+
Install benchmark dependencies:
|
44
|
+
```bash
|
45
|
+
bundle install
|
46
|
+
```
|
47
|
+
|
48
|
+
### Running Individual Benchmarks
|
49
|
+
|
50
|
+
```bash
|
51
|
+
# Full performance benchmark suite (takes 5-10 minutes)
|
52
|
+
ruby benchmarks/performance_benchmark.rb
|
53
|
+
|
54
|
+
# Convergence analysis (takes 3-5 minutes)
|
55
|
+
ruby benchmarks/convergence_benchmark.rb
|
56
|
+
```
|
57
|
+
|
58
|
+
### Running All Benchmarks
|
59
|
+
|
60
|
+
```bash
|
61
|
+
# Run both benchmark suites
|
62
|
+
ruby benchmarks/performance_benchmark.rb && ruby benchmarks/convergence_benchmark.rb
|
63
|
+
```
|
64
|
+
|
65
|
+
## Understanding the Results
|
66
|
+
|
67
|
+
### Performance Benchmark Output
|
68
|
+
|
69
|
+
1. **Iterations per Second (IPS)**: Higher is better
|
70
|
+
- Shows relative speed between Rasch, 2PL, and 3PL models
|
71
|
+
- Includes confidence intervals and comparison ratios
|
72
|
+
|
73
|
+
2. **Memory Usage**:
|
74
|
+
- Total allocated: Memory used during computation
|
75
|
+
- Total retained: Memory still held after computation
|
76
|
+
- Object counts: Number of Ruby objects created
|
77
|
+
|
78
|
+
3. **Scaling Analysis**:
|
79
|
+
- Shows computational complexity (O(n^x))
|
80
|
+
- Helps predict performance for larger datasets
|
81
|
+
|
82
|
+
### Convergence Benchmark Output
|
83
|
+
|
84
|
+
1. **Convergence Rate**: Percentage of runs that converged within tolerance
|
85
|
+
2. **Average Iterations**: Typical number of iterations needed
|
86
|
+
3. **Time**: Wall-clock time to convergence
|
87
|
+
|
88
|
+
## Interpreting Results for Your Use Case
|
89
|
+
|
90
|
+
### For Educational Assessment (typical: 100-1000 students, 20-100 items)
|
91
|
+
- Focus on Medium to Large dataset results
|
92
|
+
- Rasch model typically fastest, 3PL slowest but most flexible
|
93
|
+
- Missing data strategies have < 10% performance impact
|
94
|
+
|
95
|
+
### For Psychological Testing (typical: 50-500 participants, 10-50 items)
|
96
|
+
- Focus on Small to Medium dataset results
|
97
|
+
- All models should complete in < 1 second
|
98
|
+
- Consider convergence reliability for different tolerance settings
|
99
|
+
|
100
|
+
### For Large-Scale Analysis (1000+ participants)
|
101
|
+
- Review XLarge dataset results and scaling analysis
|
102
|
+
- Consider batching or parallel processing for very large datasets
|
103
|
+
- Monitor memory usage to avoid system limits
|
104
|
+
|
105
|
+
## Customizing Benchmarks
|
106
|
+
|
107
|
+
You can modify the benchmark scripts to test your specific scenarios:
|
108
|
+
|
109
|
+
1. **Custom Dataset Sizes**: Edit `DATASET_CONFIGS` array
|
110
|
+
2. **Different Hyperparameters**: Modify tolerance, learning rate configs
|
111
|
+
3. **Specific Missing Data Patterns**: Adjust missing data generation
|
112
|
+
4. **Model-Specific Tests**: Focus on particular IRT models
|
113
|
+
|
114
|
+
## Performance Tips
|
115
|
+
|
116
|
+
Based on benchmark results:
|
117
|
+
|
118
|
+
1. **Choose the Right Model**: Rasch is fastest, use 2PL/3PL only when needed
|
119
|
+
2. **Optimize Tolerance**: `1e-5` typically good balance of speed/accuracy
|
120
|
+
3. **Adjust Learning Rate**: Start with `0.01`, increase for faster convergence
|
121
|
+
4. **Handle Missing Data**: `:ignore` strategy typically fastest
|
122
|
+
5. **Consider Iteration Limits**: 100-500 iterations usually sufficient
|
123
|
+
|
124
|
+
## Comparing with Other IRT Libraries
|
125
|
+
|
126
|
+
These benchmarks can help you compare IRT Ruby against other implementations. Key metrics to compare:
|
127
|
+
|
128
|
+
- Time per data point processed
|
129
|
+
- Memory efficiency
|
130
|
+
- Convergence reliability
|
131
|
+
- Scaling behavior with dataset size
|
132
|
+
|
133
|
+
---
|
134
|
+
|
135
|
+
*Note: Benchmark results will vary based on your hardware. Run benchmarks on your target deployment environment for most accurate performance estimates.*
|
@@ -0,0 +1,265 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "irt_ruby"
|
6
|
+
require "benchmark"
|
7
|
+
|
8
|
+
# Enhanced model classes that track iterations and convergence
|
9
|
+
class TrackedRaschModel < IrtRuby::RaschModel
|
10
|
+
attr_reader :iterations, :final_log_likelihood, :convergence_reason
|
11
|
+
|
12
|
+
def fit
|
13
|
+
@iterations = 0
|
14
|
+
prev_ll = log_likelihood
|
15
|
+
@final_log_likelihood = prev_ll
|
16
|
+
@convergence_reason = :max_iterations
|
17
|
+
|
18
|
+
@max_iter.times do
|
19
|
+
@iterations += 1
|
20
|
+
grad_abilities, grad_difficulties = compute_gradient
|
21
|
+
|
22
|
+
old_a, old_d = apply_gradient_update(grad_abilities, grad_difficulties)
|
23
|
+
|
24
|
+
current_ll = log_likelihood
|
25
|
+
param_delta = average_param_update(old_a, old_d)
|
26
|
+
|
27
|
+
if current_ll < prev_ll
|
28
|
+
@abilities = old_a
|
29
|
+
@difficulties = old_d
|
30
|
+
@learning_rate *= @decay_factor
|
31
|
+
else
|
32
|
+
ll_diff = (current_ll - prev_ll).abs
|
33
|
+
@final_log_likelihood = current_ll
|
34
|
+
|
35
|
+
if ll_diff < @tolerance && param_delta < @param_tolerance
|
36
|
+
@convergence_reason = :tolerance_reached
|
37
|
+
break
|
38
|
+
end
|
39
|
+
|
40
|
+
prev_ll = current_ll
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
{ abilities: @abilities, difficulties: @difficulties }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def generate_data(num_people, num_items, difficulty_range: (-2..2), ability_range: (-2..2))
|
49
|
+
# Generate realistic IRT data based on known parameters
|
50
|
+
true_abilities = Array.new(num_people) { rand(ability_range) }
|
51
|
+
true_difficulties = Array.new(num_items) { rand(difficulty_range) }
|
52
|
+
|
53
|
+
data = Array.new(num_people) do |person|
|
54
|
+
Array.new(num_items) do |item|
|
55
|
+
prob = 1.0 / (1.0 + Math.exp(-(true_abilities[person] - true_difficulties[item])))
|
56
|
+
rand < prob ? 1 : 0
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
{ data: data, true_abilities: true_abilities, true_difficulties: true_difficulties }
|
61
|
+
end
|
62
|
+
|
63
|
+
puts "=" * 70
|
64
|
+
puts "IRT Ruby Convergence Analysis"
|
65
|
+
puts "=" * 70
|
66
|
+
puts
|
67
|
+
|
68
|
+
# Test convergence with different tolerance settings
|
69
|
+
tolerance_configs = [
|
70
|
+
{ tolerance: 1e-3, param_tolerance: 1e-3, label: "Loose (1e-3)" },
|
71
|
+
{ tolerance: 1e-4, param_tolerance: 1e-4, label: "Medium (1e-4)" },
|
72
|
+
{ tolerance: 1e-5, param_tolerance: 1e-5, label: "Tight (1e-5)" },
|
73
|
+
{ tolerance: 1e-6, param_tolerance: 1e-6, label: "Very Tight (1e-6)" }
|
74
|
+
]
|
75
|
+
|
76
|
+
dataset = generate_data(100, 50)
|
77
|
+
data = dataset[:data]
|
78
|
+
|
79
|
+
puts "Convergence Analysis - Impact of Tolerance Settings"
|
80
|
+
puts "-" * 50
|
81
|
+
|
82
|
+
tolerance_configs.each do |config|
|
83
|
+
puts "\nTolerance: #{config[:label]}"
|
84
|
+
|
85
|
+
times = []
|
86
|
+
iterations = []
|
87
|
+
convergence_reasons = []
|
88
|
+
|
89
|
+
5.times do
|
90
|
+
time = Benchmark.measure do
|
91
|
+
model = TrackedRaschModel.new(
|
92
|
+
data,
|
93
|
+
max_iter: 2000,
|
94
|
+
tolerance: config[:tolerance],
|
95
|
+
param_tolerance: config[:param_tolerance],
|
96
|
+
learning_rate: 0.01
|
97
|
+
)
|
98
|
+
model.fit
|
99
|
+
iterations << model.iterations
|
100
|
+
convergence_reasons << model.convergence_reason
|
101
|
+
end.real
|
102
|
+
times << time
|
103
|
+
end
|
104
|
+
|
105
|
+
avg_time = times.sum / times.size
|
106
|
+
avg_iterations = iterations.sum.to_f / iterations.size
|
107
|
+
convergence_rate = convergence_reasons.count(:tolerance_reached) / 5.0
|
108
|
+
|
109
|
+
printf(" Time: %6.3fs Iterations: %6.1f Convergence Rate: %4.0f%%\n",
|
110
|
+
avg_time, avg_iterations, convergence_rate * 100)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Test convergence with different learning rates
|
114
|
+
puts "\n#{"=" * 70}"
|
115
|
+
puts "Learning Rate Impact Analysis"
|
116
|
+
puts "-" * 50
|
117
|
+
|
118
|
+
learning_rate_configs = [
|
119
|
+
{ rate: 0.001, label: "Very Slow (0.001)" },
|
120
|
+
{ rate: 0.01, label: "Slow (0.01)" },
|
121
|
+
{ rate: 0.05, label: "Medium (0.05)" },
|
122
|
+
{ rate: 0.1, label: "Fast (0.1)" },
|
123
|
+
{ rate: 0.2, label: "Very Fast (0.2)" }
|
124
|
+
]
|
125
|
+
|
126
|
+
learning_rate_configs.each do |config|
|
127
|
+
puts "\nLearning Rate: #{config[:label]}"
|
128
|
+
|
129
|
+
times = []
|
130
|
+
iterations = []
|
131
|
+
convergence_reasons = []
|
132
|
+
|
133
|
+
5.times do
|
134
|
+
time = Benchmark.measure do
|
135
|
+
model = TrackedRaschModel.new(
|
136
|
+
data,
|
137
|
+
max_iter: 1000,
|
138
|
+
tolerance: 1e-5,
|
139
|
+
param_tolerance: 1e-5,
|
140
|
+
learning_rate: config[:rate]
|
141
|
+
)
|
142
|
+
model.fit
|
143
|
+
iterations << model.iterations
|
144
|
+
convergence_reasons << model.convergence_reason
|
145
|
+
end.real
|
146
|
+
times << time
|
147
|
+
end
|
148
|
+
|
149
|
+
avg_time = times.sum / times.size
|
150
|
+
avg_iterations = iterations.sum.to_f / iterations.size
|
151
|
+
convergence_rate = convergence_reasons.count(:tolerance_reached) / 5.0
|
152
|
+
|
153
|
+
printf(" Time: %6.3fs Iterations: %6.1f Convergence Rate: %4.0f%%\n",
|
154
|
+
avg_time, avg_iterations, convergence_rate * 100)
|
155
|
+
end
|
156
|
+
|
157
|
+
# Test convergence with different dataset characteristics
|
158
|
+
puts "\n#{"=" * 70}"
|
159
|
+
puts "Dataset Characteristics Impact"
|
160
|
+
puts "-" * 50
|
161
|
+
|
162
|
+
dataset_configs = [
|
163
|
+
{ people: 50, items: 25, diff_range: (-1..1), ability_range: (-1..1), label: "Easy (narrow ranges)" },
|
164
|
+
{ people: 100, items: 50, diff_range: (-2..2), ability_range: (-2..2), label: "Medium (standard ranges)" },
|
165
|
+
{ people: 100, items: 50, diff_range: (-3..3), ability_range: (-3..3), label: "Hard (wide ranges)" },
|
166
|
+
{ people: 200, items: 100, diff_range: (-2..2), ability_range: (-2..2), label: "Large (more data)" }
|
167
|
+
]
|
168
|
+
|
169
|
+
dataset_configs.each do |config|
|
170
|
+
puts "\nDataset: #{config[:label]}"
|
171
|
+
|
172
|
+
times = []
|
173
|
+
iterations = []
|
174
|
+
convergence_reasons = []
|
175
|
+
|
176
|
+
3.times do
|
177
|
+
dataset = generate_data(
|
178
|
+
config[:people],
|
179
|
+
config[:items],
|
180
|
+
difficulty_range: config[:diff_range],
|
181
|
+
ability_range: config[:ability_range]
|
182
|
+
)
|
183
|
+
|
184
|
+
time = Benchmark.measure do
|
185
|
+
model = TrackedRaschModel.new(
|
186
|
+
dataset[:data],
|
187
|
+
max_iter: 1000,
|
188
|
+
tolerance: 1e-5,
|
189
|
+
param_tolerance: 1e-5,
|
190
|
+
learning_rate: 0.01
|
191
|
+
)
|
192
|
+
model.fit
|
193
|
+
iterations << model.iterations
|
194
|
+
convergence_reasons << model.convergence_reason
|
195
|
+
end.real
|
196
|
+
times << time
|
197
|
+
end
|
198
|
+
|
199
|
+
avg_time = times.sum / times.size
|
200
|
+
avg_iterations = iterations.sum.to_f / iterations.size
|
201
|
+
convergence_rate = convergence_reasons.count(:tolerance_reached) / 3.0
|
202
|
+
|
203
|
+
printf(" Time: %6.3fs Iterations: %6.1f Convergence Rate: %4.0f%%\n",
|
204
|
+
avg_time, avg_iterations, convergence_rate * 100)
|
205
|
+
end
|
206
|
+
|
207
|
+
# Test different missing data patterns
|
208
|
+
puts "\n#{"=" * 70}"
|
209
|
+
puts "Missing Data Pattern Impact"
|
210
|
+
puts "-" * 50
|
211
|
+
|
212
|
+
missing_configs = [
|
213
|
+
{ rate: 0.0, strategy: :ignore, label: "No Missing Data" },
|
214
|
+
{ rate: 0.1, strategy: :ignore, label: "10% Missing (ignore)" },
|
215
|
+
{ rate: 0.2, strategy: :ignore, label: "20% Missing (ignore)" },
|
216
|
+
{ rate: 0.2, strategy: :treat_as_incorrect, label: "20% Missing (incorrect)" },
|
217
|
+
{ rate: 0.2, strategy: :treat_as_correct, label: "20% Missing (correct)" }
|
218
|
+
]
|
219
|
+
|
220
|
+
missing_configs.each do |config|
|
221
|
+
puts "\nMissing Data: #{config[:label]}"
|
222
|
+
|
223
|
+
# Generate data with missing values
|
224
|
+
base_data = generate_data(100, 50)[:data]
|
225
|
+
|
226
|
+
data_with_missing = if (config[:rate]).positive?
|
227
|
+
base_data.map do |row|
|
228
|
+
row.map { |resp| rand < config[:rate] ? nil : resp }
|
229
|
+
end
|
230
|
+
else
|
231
|
+
base_data
|
232
|
+
end
|
233
|
+
|
234
|
+
times = []
|
235
|
+
iterations = []
|
236
|
+
convergence_reasons = []
|
237
|
+
|
238
|
+
3.times do
|
239
|
+
time = Benchmark.measure do
|
240
|
+
model = TrackedRaschModel.new(
|
241
|
+
data_with_missing,
|
242
|
+
max_iter: 1000,
|
243
|
+
tolerance: 1e-5,
|
244
|
+
param_tolerance: 1e-5,
|
245
|
+
learning_rate: 0.01,
|
246
|
+
missing_strategy: config[:strategy]
|
247
|
+
)
|
248
|
+
model.fit
|
249
|
+
iterations << model.iterations
|
250
|
+
convergence_reasons << model.convergence_reason
|
251
|
+
end.real
|
252
|
+
times << time
|
253
|
+
end
|
254
|
+
|
255
|
+
avg_time = times.sum / times.size
|
256
|
+
avg_iterations = iterations.sum.to_f / iterations.size
|
257
|
+
convergence_rate = convergence_reasons.count(:tolerance_reached) / 3.0
|
258
|
+
|
259
|
+
printf(" Time: %6.3fs Iterations: %6.1f Convergence Rate: %4.0f%%\n",
|
260
|
+
avg_time, avg_iterations, convergence_rate * 100)
|
261
|
+
end
|
262
|
+
|
263
|
+
puts "\n#{"=" * 70}"
|
264
|
+
puts "Convergence Analysis Complete!"
|
265
|
+
puts "=" * 70
|