type_balancer 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.DS_Store +0 -0
- data/CHANGELOG.md +37 -3
- data/Dockerfile +3 -1
- data/Gemfile.lock +1 -1
- data/README.md +75 -9
- data/Rakefile +7 -29
- data/benchmark/end_to_end_benchmark.rb +6 -3
- data/benchmark_results/ruby3.2.8.txt +8 -8
- data/benchmark_results/ruby3.2.8_yjit.txt +13 -13
- data/benchmark_results/ruby3.3.7.txt +8 -8
- data/benchmark_results/ruby3.3.7_yjit.txt +13 -13
- data/benchmark_results/ruby3.4.2.txt +8 -8
- data/benchmark_results/ruby3.4.2_yjit.txt +13 -13
- data/docs/benchmarks/README.md +57 -51
- data/docs/quality.md +67 -0
- data/examples/quality.rb +113 -1
- data/lib/type_balancer/balancer.rb +71 -94
- data/lib/type_balancer/batch_processing.rb +35 -0
- data/lib/type_balancer/distributor.rb +26 -53
- data/lib/type_balancer/position_calculator.rb +61 -0
- data/lib/type_balancer/ratio_calculator.rb +91 -0
- data/lib/type_balancer/type_extractor.rb +29 -0
- data/lib/type_balancer/version.rb +1 -1
- data/lib/type_balancer.rb +36 -17
- metadata +9 -4
- data/sig/type_balancer.rbs +0 -85
data/docs/benchmarks/README.md
CHANGED
@@ -21,27 +21,27 @@ Each benchmark test evaluates performance across different collection sizes, fro
|
|
21
21
|
1. Tiny Dataset (Content Widget):
|
22
22
|
- Total Items: 10
|
23
23
|
- Distribution: Video (40%), Image (30%), Article (30%)
|
24
|
-
- Processing Time: ~
|
24
|
+
- Processing Time: ~6-7 microseconds
|
25
25
|
|
26
26
|
2. Small Dataset (Content Feed):
|
27
27
|
- Total Items: 100
|
28
28
|
- Distribution: Video (34%), Image (33%), Article (33%)
|
29
|
-
- Processing Time: ~
|
29
|
+
- Processing Time: ~30-31 microseconds
|
30
30
|
|
31
31
|
3. Medium Dataset (Category Page):
|
32
32
|
- Total Items: 1,000
|
33
33
|
- Distribution: Video (33.4%), Image (33.3%), Article (33.3%)
|
34
|
-
- Processing Time: ~
|
34
|
+
- Processing Time: ~274-280 microseconds
|
35
35
|
|
36
36
|
4. Large Dataset (Site-wide Content):
|
37
37
|
- Total Items: 10,000
|
38
38
|
- Distribution: Video (33.34%), Image (33.33%), Article (33.33%)
|
39
|
-
- Processing Time: ~
|
39
|
+
- Processing Time: ~2.4-2.8 milliseconds
|
40
40
|
|
41
41
|
### Real-world Application
|
42
42
|
|
43
43
|
TypeBalancer is designed for practical use in content management and display systems:
|
44
|
-
- Process 10,000 items in under
|
44
|
+
- Process 10,000 items in under 3ms
|
45
45
|
- Maintain perfect distribution ratios
|
46
46
|
- Suitable for real-time web applications
|
47
47
|
- Efficient enough for on-the-fly content organization
|
@@ -60,62 +60,69 @@ TypeBalancer is designed for practical use in content management and display sys
|
|
60
60
|
|
61
61
|
| Metric | Tiny Dataset | Small Dataset | Medium Dataset | Large Dataset |
|
62
62
|
|--------|--------------|---------------|----------------|---------------|
|
63
|
-
| Speed (no YJIT) |
|
64
|
-
| Speed (YJIT) |
|
65
|
-
| Time/Op (no YJIT) |
|
66
|
-
| Time/Op (YJIT) |
|
67
|
-
| YJIT Impact | +
|
68
|
-
| Distribution Quality | Perfect | Excellent | Excellent |
|
63
|
+
| Speed (no YJIT) | 109.0K ops/sec | 21.9K ops/sec | 2.0K ops/sec | 264 ops/sec |
|
64
|
+
| Speed (YJIT) | 152.7K ops/sec | 32.4K ops/sec | 3.6K ops/sec | 424 ops/sec |
|
65
|
+
| Time/Op (no YJIT) | 9.18 μs | 45.71 μs | 498.96 μs | 3.79 ms |
|
66
|
+
| Time/Op (YJIT) | 6.55 μs | 30.88 μs | 274.30 μs | 2.36 ms |
|
67
|
+
| YJIT Impact | +40.1% | +48.0% | +80.0% | +60.6% |
|
68
|
+
| Distribution Quality | Perfect | Excellent | Excellent | Perfect |
|
69
69
|
|
70
70
|
### Ruby 3.3.7 Performance
|
71
71
|
|
72
72
|
| Metric | Tiny Dataset | Small Dataset | Medium Dataset | Large Dataset |
|
73
73
|
|--------|--------------|---------------|----------------|---------------|
|
74
|
-
| Speed (no YJIT) |
|
75
|
-
| Speed (YJIT) |
|
76
|
-
| Time/Op (no YJIT) |
|
77
|
-
| Time/Op (YJIT) |
|
78
|
-
| YJIT Impact | +44.
|
79
|
-
| Distribution Quality | Perfect | Excellent | Excellent |
|
74
|
+
| Speed (no YJIT) | 102.4K ops/sec | 20.8K ops/sec | 1.9K ops/sec | 245 ops/sec |
|
75
|
+
| Speed (YJIT) | 148.2K ops/sec | 31.2K ops/sec | 3.5K ops/sec | 394 ops/sec |
|
76
|
+
| Time/Op (no YJIT) | 9.77 μs | 48.08 μs | 526.32 μs | 4.08 ms |
|
77
|
+
| Time/Op (YJIT) | 6.75 μs | 32.05 μs | 277.78 μs | 2.54 ms |
|
78
|
+
| YJIT Impact | +44.7% | +50.0% | +84.2% | +60.8% |
|
79
|
+
| Distribution Quality | Perfect | Excellent | Excellent | Perfect |
|
80
80
|
|
81
81
|
### Ruby 3.2.8 Performance
|
82
82
|
|
83
83
|
| Metric | Tiny Dataset | Small Dataset | Medium Dataset | Large Dataset |
|
84
84
|
|--------|--------------|---------------|----------------|---------------|
|
85
|
-
| Speed (no YJIT) |
|
86
|
-
| Speed (YJIT) |
|
87
|
-
| Time/Op (no YJIT) | 13
|
88
|
-
| Time/Op (YJIT) |
|
89
|
-
| YJIT Impact | +
|
90
|
-
| Distribution Quality | Perfect | Excellent | Excellent |
|
85
|
+
| Speed (no YJIT) | 98.7K ops/sec | 19.2K ops/sec | 1.8K ops/sec | 223 ops/sec |
|
86
|
+
| Speed (YJIT) | 142.8K ops/sec | 30.1K ops/sec | 3.4K ops/sec | 356 ops/sec |
|
87
|
+
| Time/Op (no YJIT) | 10.13 μs | 52.08 μs | 555.56 μs | 4.48 ms |
|
88
|
+
| Time/Op (YJIT) | 7.00 μs | 33.22 μs | 280.70 μs | 2.81 ms |
|
89
|
+
| YJIT Impact | +44.7% | +56.8% | +88.9% | +59.6% |
|
90
|
+
| Distribution Quality | Perfect | Excellent | Excellent | Perfect |
|
91
91
|
|
92
92
|
## Analysis
|
93
93
|
|
94
94
|
### Performance Characteristics
|
95
95
|
|
96
96
|
1. Speed and Efficiency:
|
97
|
-
- Processes 10K items in ~
|
98
|
-
- Microsecond-level processing for small collections (
|
99
|
-
-
|
100
|
-
-
|
101
|
-
-
|
97
|
+
- Processes 10K items in ~2.4-4.5ms across all Ruby versions
|
98
|
+
- Microsecond-level processing for small collections (6-10μs)
|
99
|
+
- Sub-millisecond processing for medium collections (~275-555μs)
|
100
|
+
- Millisecond-level processing for large collections (2.4-4.5ms)
|
101
|
+
- YJIT provides substantial speedup across all dataset sizes (40-89% faster)
|
102
|
+
- Suitable for high-performance real-time applications
|
102
103
|
|
103
104
|
2. YJIT Impact:
|
104
|
-
- Most effective on
|
105
|
-
-
|
106
|
-
- Ruby 3.2
|
107
|
-
-
|
108
|
-
|
109
|
-
3.
|
110
|
-
-
|
111
|
-
-
|
105
|
+
- Most effective on medium datasets (up to 89% improvement)
|
106
|
+
- Consistent improvements across all dataset sizes
|
107
|
+
- Ruby 3.4.2 shows best absolute performance
|
108
|
+
- All versions benefit significantly from YJIT
|
109
|
+
|
110
|
+
3. Version Comparison:
|
111
|
+
- Ruby 3.4.2 with YJIT shows best overall performance
|
112
|
+
- Ruby 3.3.7 maintains strong second position
|
113
|
+
- Ruby 3.2.8 shows solid baseline performance
|
114
|
+
- Performance variance between versions is consistent
|
115
|
+
|
116
|
+
4. Distribution Quality:
|
117
|
+
- Perfect distribution in small and large datasets
|
118
|
+
- Highly accurate distribution in all dataset sizes
|
112
119
|
- Consistent quality across all Ruby versions and YJIT settings
|
113
120
|
|
114
121
|
### Scaling Characteristics
|
115
122
|
|
116
123
|
1. Dataset Size Impact:
|
117
|
-
-
|
118
|
-
- Sub-
|
124
|
+
- Near-linear performance scaling with size
|
125
|
+
- Sub-millisecond processing for datasets up to 1000 items
|
119
126
|
- Reliable performance characteristics
|
120
127
|
|
121
128
|
2. Memory Usage:
|
@@ -124,16 +131,15 @@ TypeBalancer is designed for practical use in content management and display sys
|
|
124
131
|
- Stable across different workloads
|
125
132
|
|
126
133
|
3. Distribution Quality:
|
127
|
-
- Maintains
|
128
|
-
- Improves with larger datasets
|
134
|
+
- Maintains perfect accuracy at all scales
|
129
135
|
- Consistent across implementations
|
130
136
|
|
131
137
|
## Use Cases
|
132
138
|
|
133
139
|
1. Content Management Systems:
|
134
|
-
- Homepage feeds (100s of items):
|
135
|
-
- Category pages (1000s of items): ~
|
136
|
-
- Site-wide content (10,000s of items): ~
|
140
|
+
- Homepage feeds (100s of items): ~31μs processing
|
141
|
+
- Category pages (1000s of items): ~275μs processing
|
142
|
+
- Site-wide content (10,000s of items): ~2.4ms processing
|
137
143
|
|
138
144
|
2. Real-time Applications:
|
139
145
|
- Widget content balancing: microsecond response
|
@@ -141,26 +147,26 @@ TypeBalancer is designed for practical use in content management and display sys
|
|
141
147
|
- Content reorganization: real-time capable
|
142
148
|
|
143
149
|
3. Batch Processing:
|
144
|
-
- Large collection processing: efficient
|
150
|
+
- Large collection processing: highly efficient
|
145
151
|
- Consistent performance characteristics
|
146
152
|
- Predictable resource usage
|
147
153
|
|
148
154
|
## Conclusions
|
149
155
|
|
150
156
|
1. Version Selection:
|
151
|
-
- Ruby 3.2
|
152
|
-
- All versions maintain
|
157
|
+
- Ruby 3.4.2 with YJIT shows optimal performance across all sizes
|
158
|
+
- All versions maintain perfect distribution quality
|
153
159
|
- Version choice can be based on other requirements
|
154
160
|
|
155
161
|
2. Production Readiness:
|
156
|
-
-
|
157
|
-
- Handles large datasets efficiently
|
158
|
-
-
|
162
|
+
- Exceptional performance for production workloads
|
163
|
+
- Handles large datasets very efficiently
|
164
|
+
- Suitable for high-frequency real-time processing
|
159
165
|
|
160
166
|
3. Future Outlook:
|
161
|
-
-
|
167
|
+
- Current performance exceeds most real-world requirements
|
162
168
|
- Focus on maintaining distribution quality
|
163
|
-
-
|
169
|
+
- Room for optimization in specific use cases
|
164
170
|
|
165
171
|
## Running the Benchmarks
|
166
172
|
|
data/docs/quality.md
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
# Quality Script Documentation
|
2
|
+
|
3
|
+
The TypeBalancer gem includes a comprehensive quality check script located at `/examples/quality.rb`. This script serves multiple purposes:
|
4
|
+
|
5
|
+
1. **Documentation through Examples**: Demonstrates various use cases and features of the gem
|
6
|
+
2. **Quality Assurance**: Verifies that core functionality works as expected
|
7
|
+
3. **Integration Testing**: Tests how different components work together
|
8
|
+
|
9
|
+
## Running the Script
|
10
|
+
|
11
|
+
To run the quality script:
|
12
|
+
|
13
|
+
```bash
|
14
|
+
bundle exec ruby examples/quality.rb
|
15
|
+
```
|
16
|
+
|
17
|
+
## What it Tests
|
18
|
+
|
19
|
+
The script tests several key aspects of the TypeBalancer gem:
|
20
|
+
|
21
|
+
### 1. Basic Distribution
|
22
|
+
- Demonstrates how items are distributed across available slots
|
23
|
+
- Shows spacing calculations between positions
|
24
|
+
- Verifies edge cases (single item, no items, all items)
|
25
|
+
|
26
|
+
### 2. Content Feed Example
|
27
|
+
- Shows a real-world example of content type distribution
|
28
|
+
- Verifies position allocation for different content types (video, image, article)
|
29
|
+
- Checks distribution statistics and ratios
|
30
|
+
|
31
|
+
### 3. Balancer API
|
32
|
+
- Tests the main TypeBalancer.balance method
|
33
|
+
- Verifies batch creation and size limits
|
34
|
+
- Demonstrates custom type ordering
|
35
|
+
|
36
|
+
### 4. Type Extraction
|
37
|
+
- Tests type extraction from both hash and object items
|
38
|
+
- Verifies support for different type field access methods
|
39
|
+
|
40
|
+
### 5. Error Handling
|
41
|
+
- Validates handling of empty collections
|
42
|
+
- Tests response to invalid type fields
|
43
|
+
- Verifies batch size validation
|
44
|
+
|
45
|
+
## Output Format
|
46
|
+
|
47
|
+
The script provides detailed output showing:
|
48
|
+
- Results of each test case
|
49
|
+
- Distribution statistics
|
50
|
+
- Any issues found during testing
|
51
|
+
- A summary of all examples run and passed
|
52
|
+
|
53
|
+
## Using as a Development Tool
|
54
|
+
|
55
|
+
The quality script is particularly useful when:
|
56
|
+
1. Developing new features
|
57
|
+
2. Refactoring existing code
|
58
|
+
3. Verifying changes haven't broken core functionality
|
59
|
+
4. Understanding how different features work together
|
60
|
+
|
61
|
+
## Extending the Script
|
62
|
+
|
63
|
+
When adding new features to TypeBalancer, consider:
|
64
|
+
1. Adding relevant examples to the quality script
|
65
|
+
2. Including edge cases
|
66
|
+
3. Documenting expected behavior
|
67
|
+
4. Adding appropriate quality checks
|
data/examples/quality.rb
CHANGED
@@ -13,6 +13,8 @@ class QualityChecker
|
|
13
13
|
check_basic_distribution
|
14
14
|
check_available_items
|
15
15
|
check_edge_cases
|
16
|
+
check_position_precision
|
17
|
+
check_available_positions_edge_cases
|
16
18
|
check_real_world_feed
|
17
19
|
|
18
20
|
print_summary
|
@@ -97,11 +99,94 @@ class QualityChecker
|
|
97
99
|
end
|
98
100
|
end
|
99
101
|
|
102
|
+
def check_position_precision
|
103
|
+
puts "\nPosition Precision Cases:"
|
104
|
+
|
105
|
+
# Two positions in three slots
|
106
|
+
@examples_run += 1
|
107
|
+
positions = TypeBalancer.calculate_positions(total_count: 3, ratio: 0.67)
|
108
|
+
puts "Two positions in three slots: #{positions.inspect}"
|
109
|
+
if positions == [0, 1]
|
110
|
+
@examples_passed += 1
|
111
|
+
else
|
112
|
+
record_issue("Two in three case returned #{positions.inspect} instead of [0, 1]")
|
113
|
+
end
|
114
|
+
|
115
|
+
# Single position in three slots
|
116
|
+
@examples_run += 1
|
117
|
+
positions = TypeBalancer.calculate_positions(total_count: 3, ratio: 0.34)
|
118
|
+
puts "Single position in three slots: #{positions.inspect}"
|
119
|
+
if positions == [0]
|
120
|
+
@examples_passed += 1
|
121
|
+
else
|
122
|
+
record_issue("One in three case returned #{positions.inspect} instead of [0]")
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def check_available_positions_edge_cases
|
127
|
+
puts "\nAvailable Positions Edge Cases:"
|
128
|
+
|
129
|
+
# Single target with multiple available positions
|
130
|
+
@examples_run += 1
|
131
|
+
positions = TypeBalancer.calculate_positions(
|
132
|
+
total_count: 5,
|
133
|
+
ratio: 0.2,
|
134
|
+
available_items: [1, 2, 3]
|
135
|
+
)
|
136
|
+
puts "Single target with multiple available: #{positions.inspect}"
|
137
|
+
if positions == [1]
|
138
|
+
@examples_passed += 1
|
139
|
+
else
|
140
|
+
record_issue("Single target with multiple available returned #{positions.inspect} instead of [1]")
|
141
|
+
end
|
142
|
+
|
143
|
+
# Two targets with multiple available positions
|
144
|
+
@examples_run += 1
|
145
|
+
positions = TypeBalancer.calculate_positions(
|
146
|
+
total_count: 10,
|
147
|
+
ratio: 0.2,
|
148
|
+
available_items: [1, 3, 5]
|
149
|
+
)
|
150
|
+
puts "Two targets with multiple available: #{positions.inspect}"
|
151
|
+
if positions == [1, 5]
|
152
|
+
@examples_passed += 1
|
153
|
+
else
|
154
|
+
record_issue("Two targets with multiple available returned #{positions.inspect} instead of [1, 5]")
|
155
|
+
end
|
156
|
+
|
157
|
+
# Exact match of available positions
|
158
|
+
@examples_run += 1
|
159
|
+
positions = TypeBalancer.calculate_positions(
|
160
|
+
total_count: 10,
|
161
|
+
ratio: 0.3,
|
162
|
+
available_items: [2, 4, 6]
|
163
|
+
)
|
164
|
+
puts "Exact match of available positions: #{positions.inspect}"
|
165
|
+
if positions == [2, 4, 6]
|
166
|
+
@examples_passed += 1
|
167
|
+
else
|
168
|
+
record_issue("Exact match case returned #{positions.inspect} instead of [2, 4, 6]")
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
100
172
|
def check_real_world_feed
|
101
173
|
@examples_run += 1
|
102
174
|
puts "\nReal World Example - Content Feed:"
|
103
175
|
feed_size = 20
|
104
176
|
|
177
|
+
# Create test items
|
178
|
+
items = [
|
179
|
+
{ type: 'video', id: 1 },
|
180
|
+
{ type: 'video', id: 2 },
|
181
|
+
{ type: 'video', id: 3 },
|
182
|
+
{ type: 'image', id: 4 },
|
183
|
+
{ type: 'image', id: 5 },
|
184
|
+
{ type: 'image', id: 6 },
|
185
|
+
{ type: 'article', id: 7 },
|
186
|
+
{ type: 'article', id: 8 },
|
187
|
+
{ type: 'article', id: 9 }
|
188
|
+
]
|
189
|
+
|
105
190
|
# Track allocated positions
|
106
191
|
allocated_positions = []
|
107
192
|
content_positions = {}
|
@@ -155,13 +240,40 @@ class QualityChecker
|
|
155
240
|
record_issue("#{type} count #{count} doesn't match expected #{expected_counts[type]}")
|
156
241
|
end
|
157
242
|
end
|
243
|
+
|
244
|
+
# Test with custom type order
|
245
|
+
ordered_result = TypeBalancer.balance(
|
246
|
+
items,
|
247
|
+
type_field: :type,
|
248
|
+
type_order: %w[article image video]
|
249
|
+
)
|
250
|
+
|
251
|
+
# Verify type order is respected
|
252
|
+
if ordered_result.first[:type] == 'article'
|
253
|
+
@examples_passed += 1
|
254
|
+
else
|
255
|
+
record_issue("Custom type order not respected")
|
256
|
+
end
|
257
|
+
|
258
|
+
# Test position calculation
|
259
|
+
positions = TypeBalancer::Distributor.calculate_target_positions(
|
260
|
+
total_count: 10,
|
261
|
+
ratio: 0.3
|
262
|
+
)
|
263
|
+
if positions.is_a?(Array) && positions.all? { |p| p.is_a?(Integer) }
|
264
|
+
@examples_passed += 1
|
265
|
+
else
|
266
|
+
record_issue("Position calculation failed")
|
267
|
+
end
|
268
|
+
|
269
|
+
puts "\nBalanced items with custom order:"
|
158
270
|
end
|
159
271
|
|
160
272
|
def print_summary
|
161
273
|
puts "\n#{'-' * 50}"
|
162
274
|
puts 'Quality Check Summary:'
|
163
275
|
puts "Examples Run: #{@examples_run}"
|
164
|
-
puts "
|
276
|
+
puts "Expectations Passed: #{@examples_passed}"
|
165
277
|
|
166
278
|
if @issues.empty?
|
167
279
|
puts "\nAll quality checks passed! ✓"
|
@@ -1,126 +1,103 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require_relative 'ratio_calculator'
|
4
|
+
require_relative 'batch_processing'
|
5
|
+
require_relative 'position_calculator'
|
6
|
+
|
3
7
|
module TypeBalancer
|
4
|
-
#
|
5
|
-
# It uses a distribution calculator to determine optimal positions for each type
|
6
|
-
# and a gap filler strategy to place items in the final sequence.
|
8
|
+
# Handles balancing of items across batches based on type ratios
|
7
9
|
class Balancer
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
@types = types
|
14
|
-
@
|
10
|
+
# Initialize a new Balancer instance
|
11
|
+
#
|
12
|
+
# @param types [Array<String>, nil] Optional types
|
13
|
+
# @param type_order [Array<String>, nil] Optional order of types
|
14
|
+
def initialize(types = nil, type_order: nil)
|
15
|
+
@types = Array(types) if types
|
16
|
+
@type_order = type_order
|
17
|
+
validate_types! if @types
|
15
18
|
end
|
16
19
|
|
17
|
-
|
18
|
-
|
20
|
+
# Main entry point for balancing items
|
21
|
+
#
|
22
|
+
# @param collection [Array] Items to balance
|
23
|
+
# @return [Array] Balanced items
|
24
|
+
def call(collection)
|
25
|
+
validate_collection!(collection)
|
26
|
+
items_by_type = group_items_by_type(collection)
|
27
|
+
validate_types_in_collection!(items_by_type)
|
28
|
+
|
29
|
+
target_counts = calculate_target_counts(items_by_type)
|
30
|
+
available_positions = (0...collection.size).to_a
|
31
|
+
|
32
|
+
result = Array.new(collection.size)
|
33
|
+
sorted_types = sort_types(items_by_type.keys)
|
34
|
+
|
35
|
+
sorted_types.each do |type|
|
36
|
+
items = items_by_type[type]
|
37
|
+
target_count = target_counts[type]
|
38
|
+
ratio = target_count.to_f / collection.size
|
39
|
+
positions = PositionCalculator.calculate_positions(
|
40
|
+
total_count: collection.size,
|
41
|
+
ratio: ratio,
|
42
|
+
available_items: available_positions
|
43
|
+
)
|
44
|
+
|
45
|
+
positions.each_with_index do |pos, idx|
|
46
|
+
result[pos] = items[idx]
|
47
|
+
end
|
19
48
|
|
20
|
-
|
21
|
-
|
22
|
-
else
|
23
|
-
process_multiple_batches
|
49
|
+
# Remove used positions from available positions
|
50
|
+
available_positions -= positions
|
24
51
|
end
|
52
|
+
|
53
|
+
result.compact
|
25
54
|
end
|
26
55
|
|
27
56
|
private
|
28
57
|
|
29
|
-
def
|
30
|
-
|
31
|
-
items_by_type = items.group_by { |item| get_type(item) }
|
32
|
-
|
33
|
-
# Calculate ratios based on type order and counts
|
34
|
-
ratios = calculate_ratios(items_by_type)
|
35
|
-
|
36
|
-
# Calculate positions for each type
|
37
|
-
positions_by_type = calculate_positions_by_type(items_by_type, ratios, items.size)
|
38
|
-
|
39
|
-
# Map items to their balanced positions
|
40
|
-
balanced_items = place_items_in_positions(items_by_type, positions_by_type, items.size)
|
41
|
-
|
42
|
-
# Fill any gaps with remaining items
|
43
|
-
fill_gaps(balanced_items, items)
|
58
|
+
def validate_types!
|
59
|
+
raise ArgumentError, 'Types cannot be empty' if @types.empty?
|
44
60
|
end
|
45
61
|
|
46
|
-
def
|
47
|
-
|
48
|
-
@collection.each_slice(BATCH_SIZE) do |batch|
|
49
|
-
result.concat(process_single_batch(batch))
|
50
|
-
end
|
51
|
-
result
|
62
|
+
def validate_collection!(collection)
|
63
|
+
raise ArgumentError, 'Collection cannot be empty' if collection.empty?
|
52
64
|
end
|
53
65
|
|
54
|
-
def
|
55
|
-
|
56
|
-
|
57
|
-
@types.each_with_index do |type, index|
|
58
|
-
items = items_by_type[type] || []
|
59
|
-
ratio = ratios[index]
|
60
|
-
positions = @distribution_calculator.calculate_target_positions(total_count, items.size, ratio)
|
61
|
-
positions_by_type[type] = positions
|
62
|
-
end
|
66
|
+
def validate_types_in_collection!(items_by_type)
|
67
|
+
return unless @types
|
63
68
|
|
64
|
-
|
69
|
+
invalid_types = items_by_type.keys - @types
|
70
|
+
raise TypeBalancer::Error, "Invalid type(s): #{invalid_types.join(', ')}" if invalid_types.any?
|
65
71
|
end
|
66
72
|
|
67
|
-
def
|
68
|
-
|
69
|
-
|
70
|
-
@types.each do |type|
|
71
|
-
items = items_by_type[type] || []
|
72
|
-
positions = positions_by_type[type] || []
|
73
|
-
|
74
|
-
items.each_with_index do |item, index|
|
75
|
-
pos = positions[index]
|
76
|
-
next unless pos && pos < total_count && balanced_items[pos].nil?
|
77
|
-
|
78
|
-
balanced_items[pos] = item
|
79
|
-
end
|
73
|
+
def group_items_by_type(collection)
|
74
|
+
collection.group_by do |item|
|
75
|
+
extract_type(item)
|
80
76
|
end
|
81
|
-
|
82
|
-
balanced_items
|
83
77
|
end
|
84
78
|
|
85
|
-
def
|
86
|
-
|
87
|
-
remaining_items = original_items.reject { |item| balanced_items.include?(item) }
|
88
|
-
empty_positions = balanced_items.each_index.select { |i| balanced_items[i].nil? }
|
89
|
-
|
90
|
-
empty_positions.each_with_index do |pos, idx|
|
91
|
-
break unless idx < remaining_items.size
|
79
|
+
def extract_type(item)
|
80
|
+
return item[:type] || item['type'] || raise(TypeBalancer::Error, 'Cannot access type field') if item.is_a?(Hash)
|
92
81
|
|
93
|
-
|
82
|
+
begin
|
83
|
+
item.type
|
84
|
+
rescue NoMethodError
|
85
|
+
raise TypeBalancer::Error, 'Cannot access type field'
|
94
86
|
end
|
95
|
-
|
96
|
-
balanced_items.compact
|
97
87
|
end
|
98
88
|
|
99
|
-
def
|
100
|
-
|
101
|
-
|
102
|
-
[1.0]
|
103
|
-
when 2
|
104
|
-
[0.6, 0.4]
|
105
|
-
else
|
106
|
-
# First type gets 0.4, rest split remaining 0.6 evenly
|
107
|
-
remaining = (0.6 / (@types.size - 1).to_f).round(6)
|
108
|
-
[0.4] + Array.new(@types.size - 1, remaining)
|
109
|
-
end
|
89
|
+
def calculate_target_counts(items_by_type)
|
90
|
+
items_by_type.values.sum(&:size)
|
91
|
+
items_by_type.transform_values(&:size)
|
110
92
|
end
|
111
93
|
|
112
|
-
def
|
113
|
-
|
114
|
-
item.send(@type_field)
|
115
|
-
elsif item.respond_to?(:[])
|
116
|
-
item[@type_field] || item[@type_field.to_s]
|
117
|
-
else
|
118
|
-
raise Error, "Cannot access type field '#{@type_field}' on item #{item}"
|
119
|
-
end
|
120
|
-
end
|
94
|
+
def sort_types(types)
|
95
|
+
return types.sort unless @type_order
|
121
96
|
|
122
|
-
|
123
|
-
|
97
|
+
types.sort_by do |type|
|
98
|
+
idx = @type_order.index(type)
|
99
|
+
idx || Float::INFINITY
|
100
|
+
end
|
124
101
|
end
|
125
102
|
end
|
126
103
|
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module TypeBalancer
|
4
|
+
class BatchProcessing
|
5
|
+
def initialize(batch_size)
|
6
|
+
@batch_size = batch_size
|
7
|
+
end
|
8
|
+
|
9
|
+
def create_batches(items_by_type, positions_by_type)
|
10
|
+
total_items = items_by_type.values.sum(&:size)
|
11
|
+
batches = []
|
12
|
+
current_batch = []
|
13
|
+
|
14
|
+
(0...total_items).each do |position|
|
15
|
+
type = find_type_for_position(position, positions_by_type)
|
16
|
+
current_batch << items_by_type[type].shift if type && !items_by_type[type].empty?
|
17
|
+
|
18
|
+
if current_batch.size >= @batch_size || position == total_items - 1
|
19
|
+
batches << current_batch unless current_batch.empty?
|
20
|
+
current_batch = []
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
batches
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def find_type_for_position(position, positions_by_type)
|
30
|
+
positions_by_type.find do |_, positions|
|
31
|
+
positions.include?(position)
|
32
|
+
end&.first
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|