type_balancer 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +38 -2
- data/Gemfile.lock +1 -1
- data/README.md +65 -4
- data/Rakefile +19 -1
- data/docs/balance.md +113 -10
- data/docs/quality.md +10 -2
- data/examples/large_scale_balance_test.rb +289 -0
- data/examples/quality.rb +93 -0
- data/lib/type_balancer/balancer.rb +16 -19
- data/lib/type_balancer/calculator.rb +27 -91
- data/lib/type_balancer/distributor.rb +70 -17
- data/lib/type_balancer/strategies/base_strategy.rb +49 -0
- data/lib/type_balancer/strategies/sliding_window_strategy.rb +140 -0
- data/lib/type_balancer/strategies.rb +7 -0
- data/lib/type_balancer/strategy_factory.rb +41 -0
- data/lib/type_balancer/type_extractor.rb +7 -2
- data/lib/type_balancer/type_extractor_registry.rb +20 -0
- data/lib/type_balancer/version.rb +1 -1
- data/lib/type_balancer.rb +43 -27
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 404bf713a39abab585e33e9f196d6f9e0ab21f11b07a14fbc91d37a8ac1f2ce7
|
4
|
+
data.tar.gz: 6f5975a9e2a4789645779d3d4fc8463cf8140250b15d650b32e2f8096ebbd9b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c8f8754168d393a49e0c705a633fefdfda11e0d18f56bc224459549eff14948ad659219fcf9bc0b97acc22b61147c75aace1d3fb33ae344b6158592a68720c5
|
7
|
+
data.tar.gz: d7a52bbff78479249faf11e3d38a1c8821324221d5b226db310a31918aa1a5f209358c2d375b8f0b876f08bb7d49dbaaac188aa3f2d06e61ba96232e327b9e07
|
data/CHANGELOG.md
CHANGED
@@ -1,11 +1,47 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
-
## [0.
|
3
|
+
## [0.2.0] - 2025-04-30
|
4
|
+
|
5
|
+
### Added
|
6
|
+
- Introduced strategy pattern for flexible balancing algorithms
|
7
|
+
- Added sliding window strategy as the default balancing algorithm
|
8
|
+
- Configurable window size (default: 10)
|
9
|
+
- Maintains both local and global type ratios
|
10
|
+
- Adaptive behavior for remaining items
|
11
|
+
- Added comprehensive strategy documentation in README and balance.md
|
12
|
+
- Added large scale balance test suite for thorough strategy validation
|
13
|
+
|
14
|
+
### Enhanced
|
15
|
+
- Improved quality testing infrastructure
|
16
|
+
- Added quality:all rake task that runs both quality.rb and large_scale_balance_test.rb
|
17
|
+
- Enhanced CI workflow to run all quality checks
|
18
|
+
- Added strategy-specific test cases
|
19
|
+
- Updated documentation with detailed strategy explanations and use cases
|
20
|
+
- Added extensive test coverage for strategy system
|
21
|
+
|
22
|
+
### Fixed
|
23
|
+
- Improved handling of type distribution in edge cases
|
24
|
+
- Better handling of remaining items when types are depleted
|
25
|
+
- Enhanced transition handling between windows
|
26
|
+
|
27
|
+
## [0.1.4] - 2025-04-29
|
28
|
+
|
29
|
+
### Fixed
|
30
|
+
- Fixed issue with providing a custom type field
|
31
|
+
|
32
|
+
## [0.1.3] - 2025-04-27
|
33
|
+
|
34
|
+
### Fixed
|
35
|
+
- Fixed type balancing behavior to properly handle edge cases where type ratios need to be maintained while respecting original collection order
|
36
|
+
- Enhanced position calculation to ensure consistent type distribution across the balanced collection
|
37
|
+
- Improved test coverage to verify correct type ratio preservation
|
38
|
+
|
39
|
+
## [0.1.2] - 2025-04-11
|
4
40
|
|
5
41
|
- Re-release of 0.1.1 due to RubyGems.org publishing issue
|
6
42
|
- No functional changes from 0.1.1
|
7
43
|
|
8
|
-
## [0.1.1] -
|
44
|
+
## [0.1.1] - 2025-04-10
|
9
45
|
|
10
46
|
### Refactoring
|
11
47
|
- Major refactoring of core components to follow SOLID principles:
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -52,15 +52,69 @@ items = [
|
|
52
52
|
# ... more items
|
53
53
|
]
|
54
54
|
|
55
|
-
# Balance items by type
|
55
|
+
# Balance items by type (uses default sliding window strategy)
|
56
56
|
balanced_items = TypeBalancer.balance(items, type_field: :type)
|
57
|
+
|
58
|
+
# Use sliding window strategy with custom window size
|
59
|
+
balanced_items = TypeBalancer.balance(items,
|
60
|
+
type_field: :type,
|
61
|
+
strategy: :sliding_window,
|
62
|
+
window_size: 25
|
63
|
+
)
|
57
64
|
```
|
58
65
|
|
59
66
|
## Balancing Collections with `TypeBalancer.balance`
|
60
67
|
|
61
68
|
The primary method for balancing collections is `TypeBalancer.balance`. This method takes an array of items and distributes them by type, ensuring optimal spacing and respecting type ratios.
|
62
69
|
|
63
|
-
|
70
|
+
### Available Strategies
|
71
|
+
|
72
|
+
TypeBalancer uses a strategy pattern to provide different balancing algorithms. Currently, the gem implements a sophisticated sliding window strategy as its default approach:
|
73
|
+
|
74
|
+
#### Sliding Window Strategy (Default)
|
75
|
+
The sliding window strategy balances items by examining a fixed-size window of items at a time (default size: 10). Within each window, it maintains the overall ratio of types while ensuring each type gets fair representation. This creates both local and global balance in your content distribution.
|
76
|
+
|
77
|
+
**When to Use Sliding Window Strategy:**
|
78
|
+
- Content feeds where users might stop scrolling at any point
|
79
|
+
- When you want to ensure diversity in any segment of your list
|
80
|
+
- When you need to maintain both local and global balance
|
81
|
+
- When you want to prevent long runs of the same type while still allowing some natural clustering
|
82
|
+
|
83
|
+
**Window Size Selection Guide:**
|
84
|
+
- Small windows (5-10): Strict local balance, ideal for shorter lists or when immediate diversity is critical
|
85
|
+
- Medium windows (15-25): Balance between local and global distribution
|
86
|
+
- Large windows (30+): More gradual transitions, better for preserving some natural clustering
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
# Basic usage with default window size (10)
|
90
|
+
balanced = TypeBalancer.balance(items, type_field: :type)
|
91
|
+
|
92
|
+
# Custom window size for stricter local balance
|
93
|
+
balanced = TypeBalancer.balance(items,
|
94
|
+
type_field: :type,
|
95
|
+
strategy: :sliding_window,
|
96
|
+
window_size: 5
|
97
|
+
)
|
98
|
+
|
99
|
+
# Larger window for more gradual transitions
|
100
|
+
balanced = TypeBalancer.balance(items,
|
101
|
+
type_field: :type,
|
102
|
+
strategy: :sliding_window,
|
103
|
+
window_size: 25
|
104
|
+
)
|
105
|
+
|
106
|
+
# With custom type ordering
|
107
|
+
balanced = TypeBalancer.balance(items,
|
108
|
+
type_field: :type,
|
109
|
+
strategy: :sliding_window,
|
110
|
+
window_size: 15,
|
111
|
+
type_order: %w[image video article]
|
112
|
+
)
|
113
|
+
```
|
114
|
+
|
115
|
+
The strategy system is designed to be extensible, allowing for future implementations of different balancing algorithms as needed.
|
116
|
+
|
117
|
+
### Basic Example
|
64
118
|
|
65
119
|
```ruby
|
66
120
|
items = [
|
@@ -73,18 +127,25 @@ balanced = TypeBalancer.balance(items, type_field: :type)
|
|
73
127
|
# => [ { type: 'article', ... }, { type: 'image', ... }, { type: 'video', ... }, ... ]
|
74
128
|
```
|
75
129
|
|
76
|
-
|
130
|
+
### Custom Type Order
|
77
131
|
|
78
132
|
You can specify a custom order for types using the `type_order` argument. This controls the priority of types in the balanced output.
|
79
133
|
|
80
134
|
```ruby
|
81
135
|
# Prioritize images, then videos, then articles
|
82
|
-
balanced = TypeBalancer.balance(items,
|
136
|
+
balanced = TypeBalancer.balance(items,
|
137
|
+
type_field: :type,
|
138
|
+
type_order: %w[image video article],
|
139
|
+
strategy: :sliding_window,
|
140
|
+
window_size: 15
|
141
|
+
)
|
83
142
|
# => [ { type: 'image', ... }, { type: 'video', ... }, { type: 'article', ... }, ... ]
|
84
143
|
```
|
85
144
|
|
86
145
|
- `type_field`: The key to use for type extraction (default: `:type`).
|
87
146
|
- `type_order`: An array of type names (as strings) specifying the desired order.
|
147
|
+
- `strategy`: The balancing strategy to use (default: `:sliding_window`).
|
148
|
+
- `window_size`: Size of the sliding window for the sliding window strategy (default: 10).
|
88
149
|
|
89
150
|
For more advanced usage and options, see [Detailed Balance Method Documentation](docs/balance.md).
|
90
151
|
|
data/Rakefile
CHANGED
@@ -17,6 +17,24 @@ Rake::ExtensionTask.new('type_balancer') do |ext|
|
|
17
17
|
ext.config_options = ['--with-cflags=-Wall -Wextra -O3']
|
18
18
|
end
|
19
19
|
|
20
|
+
# Quality check tasks
|
21
|
+
namespace :quality do
|
22
|
+
desc 'Run basic quality checks'
|
23
|
+
task :basic do
|
24
|
+
puts "\nRunning basic quality checks..."
|
25
|
+
ruby '-I lib examples/quality.rb'
|
26
|
+
end
|
27
|
+
|
28
|
+
desc 'Run large scale balance tests'
|
29
|
+
task :large_scale do
|
30
|
+
puts "\nRunning large scale balance tests..."
|
31
|
+
ruby '-I lib examples/large_scale_balance_test.rb'
|
32
|
+
end
|
33
|
+
|
34
|
+
desc 'Run all quality checks'
|
35
|
+
task all: %i[basic large_scale]
|
36
|
+
end
|
37
|
+
|
20
38
|
# Add GoogleTest task using CMake
|
21
39
|
namespace :gtest do
|
22
40
|
desc 'Build and run all GoogleTest tests'
|
@@ -62,7 +80,7 @@ task test_with_mocks: [:spec] do
|
|
62
80
|
Rake::Task['gtest:all'].invoke
|
63
81
|
end
|
64
82
|
|
65
|
-
task default: [:test_with_mocks, 'lint:all']
|
83
|
+
task default: [:test_with_mocks, 'lint:all', 'quality:all']
|
66
84
|
|
67
85
|
# Benchmark tasks
|
68
86
|
namespace :benchmark do
|
data/docs/balance.md
CHANGED
@@ -1,25 +1,124 @@
|
|
1
1
|
# Detailed Documentation: `TypeBalancer.balance`
|
2
2
|
|
3
|
-
`TypeBalancer.balance` is the main method for distributing items of different types across a sequence, ensuring optimal spacing and respecting type ratios. It is highly configurable and supports custom type fields and
|
3
|
+
`TypeBalancer.balance` is the main method for distributing items of different types across a sequence, ensuring optimal spacing and respecting type ratios. It is highly configurable and supports custom type fields, type orderings, and different balancing strategies.
|
4
4
|
|
5
5
|
## Method Signature
|
6
6
|
|
7
7
|
```ruby
|
8
|
-
TypeBalancer.balance(items, type_field: :type, type_order: nil)
|
8
|
+
TypeBalancer.balance(items, type_field: :type, type_order: nil, strategy: nil, **strategy_options)
|
9
9
|
```
|
10
10
|
|
11
11
|
### Arguments
|
12
12
|
- `items` (Array<Hash>): The collection of items to balance. Each item should have a type field (default: `:type`).
|
13
13
|
- `type_field` (Symbol/String, optional): The key to use for extracting the type from each item. Default is `:type`.
|
14
14
|
- `type_order` (Array<String>, optional): An array specifying the desired order of types in the output. If omitted, the gem determines the order automatically.
|
15
|
+
- `strategy` (Symbol, optional): The balancing strategy to use. Default is `:sliding_window`.
|
16
|
+
- `strategy_options` (Hash, optional): Additional options specific to the chosen strategy.
|
15
17
|
|
16
|
-
##
|
17
|
-
|
18
|
-
|
18
|
+
## Available Strategies
|
19
|
+
|
20
|
+
### 1. Sliding Window Strategy (default)
|
21
|
+
The sliding window strategy is a sophisticated approach that balances items by examining fixed-size windows of items sequentially. For each window, it:
|
22
|
+
1. Calculates the target ratio of each type based on the overall collection
|
23
|
+
2. Ensures minimum representation of each type when possible
|
24
|
+
3. Distributes remaining slots to maintain target ratios
|
25
|
+
4. Handles transitions between windows to maintain smooth distribution
|
26
|
+
|
27
|
+
**Technical Details:**
|
28
|
+
- Default window size: 10 items
|
29
|
+
- Minimum representation: Each type gets at least one slot in a window if ratio > 0
|
30
|
+
- Ratio preservation: Maintains approximate global ratios while ensuring local diversity
|
31
|
+
- Adaptive sizing: Window size automatically adjusts near the end of the collection
|
32
|
+
|
33
|
+
**Configuration Options:**
|
34
|
+
```ruby
|
35
|
+
TypeBalancer.balance(items,
|
36
|
+
strategy: :sliding_window,
|
37
|
+
window_size: 25, # Size of the sliding window
|
38
|
+
type_field: :type, # Field containing type information
|
39
|
+
type_order: %w[...] # Optional: preferred type order
|
40
|
+
)
|
41
|
+
```
|
42
|
+
|
43
|
+
**When to Use:**
|
44
|
+
1. **Content Feed Optimization**
|
45
|
+
- Perfect for social media feeds, blog lists, or any paginated content
|
46
|
+
- Ensures users see a diverse mix regardless of where they stop scrolling
|
47
|
+
```ruby
|
48
|
+
TypeBalancer.balance(posts,
|
49
|
+
strategy: :sliding_window,
|
50
|
+
window_size: 10
|
51
|
+
)
|
52
|
+
```
|
53
|
+
|
54
|
+
2. **E-commerce Category Display**
|
55
|
+
- Balances product types in search results or category pages
|
56
|
+
- Maintains category ratios while ensuring variety
|
57
|
+
```ruby
|
58
|
+
TypeBalancer.balance(products,
|
59
|
+
strategy: :sliding_window,
|
60
|
+
window_size: 15,
|
61
|
+
type_field: :category
|
62
|
+
)
|
63
|
+
```
|
64
|
+
|
65
|
+
3. **News Feed Management**
|
66
|
+
- Mixes different news categories while maintaining importance
|
67
|
+
- Larger windows allow for some natural clustering
|
68
|
+
```ruby
|
69
|
+
TypeBalancer.balance(articles,
|
70
|
+
strategy: :sliding_window,
|
71
|
+
window_size: 25,
|
72
|
+
type_order: %w[breaking featured regular]
|
73
|
+
)
|
74
|
+
```
|
75
|
+
|
76
|
+
**Window Size Guidelines:**
|
77
|
+
- **Small (5-10 items)**
|
78
|
+
- Strictest local balance
|
79
|
+
- Best for: Short lists, critical diversity needs
|
80
|
+
- Example: Featured content sections
|
81
|
+
|
82
|
+
- **Medium (15-25 items)**
|
83
|
+
- Balanced local/global distribution
|
84
|
+
- Best for: Standard content feeds
|
85
|
+
- Example: Blog post listings
|
86
|
+
|
87
|
+
- **Large (30+ items)**
|
88
|
+
- More gradual transitions
|
89
|
+
- Best for: Long-form content, natural grouping
|
90
|
+
- Example: Search results with category clustering
|
91
|
+
|
92
|
+
**Implementation Notes:**
|
93
|
+
- The strategy maintains a queue for each type
|
94
|
+
- Window calculations consider both used and available items
|
95
|
+
- Edge cases (end of collection, single type) are handled gracefully
|
96
|
+
- Performance scales linearly with collection size
|
97
|
+
|
98
|
+
**Example with Analysis:**
|
99
|
+
```ruby
|
100
|
+
# Balance a feed with analytics
|
101
|
+
items = [
|
102
|
+
{ type: 'video', id: 1 },
|
103
|
+
{ type: 'article', id: 2 },
|
104
|
+
# ... more items
|
105
|
+
]
|
106
|
+
|
107
|
+
balanced = TypeBalancer.balance(items,
|
108
|
+
strategy: :sliding_window,
|
109
|
+
window_size: 15,
|
110
|
+
type_field: :type
|
111
|
+
)
|
112
|
+
|
113
|
+
# Analyze distribution in first window
|
114
|
+
first_window = balanced.first(15)
|
115
|
+
distribution = first_window.group_by { |i| i[:type] }
|
116
|
+
.transform_values(&:count)
|
117
|
+
```
|
19
118
|
|
20
119
|
## Usage Examples
|
21
120
|
|
22
|
-
### 1. Basic Balancing
|
121
|
+
### 1. Basic Balancing (Default Strategy)
|
23
122
|
```ruby
|
24
123
|
items = [
|
25
124
|
{ type: 'video', title: 'Video 1' },
|
@@ -33,11 +132,14 @@ balanced = TypeBalancer.balance(items)
|
|
33
132
|
# => [ { type: 'article', ... }, { type: 'image', ... }, { type: 'video', ... }, ... ]
|
34
133
|
```
|
35
134
|
|
36
|
-
### 2. Custom
|
135
|
+
### 2. Custom Strategy Options
|
37
136
|
```ruby
|
38
|
-
#
|
39
|
-
balanced = TypeBalancer.balance(items,
|
40
|
-
|
137
|
+
# Large window size for more gradual transitions
|
138
|
+
balanced = TypeBalancer.balance(items,
|
139
|
+
strategy: :sliding_window,
|
140
|
+
window_size: 50,
|
141
|
+
type_order: %w[image video article]
|
142
|
+
)
|
41
143
|
```
|
42
144
|
|
43
145
|
### 3. Custom Type Field
|
@@ -64,6 +166,7 @@ If a type in `type_order` is not present in the input, it is simply ignored in t
|
|
64
166
|
- The `type_order` argument must be an array of strings matching the type values in your items.
|
65
167
|
- If you use a custom `type_field`, ensure all items have that field.
|
66
168
|
- The method does not mutate the input array.
|
169
|
+
- Strategy options are specific to each strategy and are ignored by other strategies.
|
67
170
|
|
68
171
|
## See Also
|
69
172
|
- [README.md](../README.md) for general usage
|
data/docs/quality.md
CHANGED
@@ -53,15 +53,23 @@ The script tests several key aspects of the TypeBalancer gem:
|
|
53
53
|
- Shows spacing calculations between positions
|
54
54
|
- Verifies edge cases (single item, no items, all items)
|
55
55
|
|
56
|
-
### 2.
|
56
|
+
### 2. Strategy System
|
57
|
+
- Tests the default sliding window strategy
|
58
|
+
- Verifies behavior with different window sizes
|
59
|
+
- Checks strategy options and customization
|
60
|
+
- Ensures backward compatibility with existing code
|
61
|
+
|
62
|
+
### 3. Robust Balance Method Tests
|
57
63
|
- Loads scenarios from a YAML file (`examples/balance_test_data.yml`)
|
58
64
|
- Tests `TypeBalancer.balance` with and without the `type_order` argument
|
65
|
+
- Tests different strategy configurations
|
59
66
|
- Checks type counts, custom order, and exception handling for empty input
|
60
67
|
- Prints a color-coded summary table for pass/fail counts
|
61
68
|
|
62
|
-
###
|
69
|
+
### 4. Content Feed Example
|
63
70
|
- Shows a real-world example of content type distribution
|
64
71
|
- Verifies position allocation for different content types (video, image, article)
|
72
|
+
- Tests strategy behavior with real-world data
|
65
73
|
- Checks distribution statistics and ratios
|
66
74
|
|
67
75
|
## Output Format
|
@@ -0,0 +1,289 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
# rubocop:disable Metrics/ClassLength
|
5
|
+
# rubocop:disable Metrics/MethodLength
|
6
|
+
# rubocop:disable Metrics/AbcSize
|
7
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
8
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
9
|
+
|
10
|
+
require 'type_balancer'
|
11
|
+
require 'yaml'
|
12
|
+
require 'json'
|
13
|
+
|
14
|
+
class LargeScaleBalanceTest
|
15
|
+
GREEN = "\e[32m"
|
16
|
+
RED = "\e[31m"
|
17
|
+
YELLOW = "\e[33m"
|
18
|
+
RESET = "\e[0m"
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
@total_records = 500
|
22
|
+
@type_distribution = {
|
23
|
+
'type_a' => 250, # 50%
|
24
|
+
'type_b' => 175, # 35%
|
25
|
+
'type_c' => 75 # 15%
|
26
|
+
}
|
27
|
+
@window_sizes = [10, 25, 50, 100]
|
28
|
+
@failures = []
|
29
|
+
@tests_run = 0
|
30
|
+
@tests_passed = 0
|
31
|
+
end
|
32
|
+
|
33
|
+
def run
|
34
|
+
puts "\n#{YELLOW}Running Large Scale Balance Test#{RESET}"
|
35
|
+
puts "Total Records: #{@total_records}"
|
36
|
+
puts 'Distribution:'
|
37
|
+
@type_distribution.each do |type, count|
|
38
|
+
puts " #{type}: #{count} (#{(count.to_f / @total_records * 100).round(1)}%)"
|
39
|
+
end
|
40
|
+
|
41
|
+
test_data = generate_test_data
|
42
|
+
|
43
|
+
# Test default strategy
|
44
|
+
puts "\n#{YELLOW}Testing Default Strategy (Sliding Window)#{RESET}"
|
45
|
+
run_balance_test(test_data)
|
46
|
+
|
47
|
+
# Test with different window sizes
|
48
|
+
@window_sizes.each do |size|
|
49
|
+
puts "\n#{YELLOW}Testing Sliding Window Strategy with Window Size #{size}#{RESET}"
|
50
|
+
run_balance_test(test_data, strategy: :sliding_window, window_size: size)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Test with custom type order
|
54
|
+
puts "\n#{YELLOW}Testing with Custom Type Order#{RESET}"
|
55
|
+
run_balance_test(
|
56
|
+
test_data,
|
57
|
+
strategy: :sliding_window,
|
58
|
+
types: %w[type_c type_b type_a],
|
59
|
+
window_size: 25
|
60
|
+
)
|
61
|
+
|
62
|
+
print_summary
|
63
|
+
@failures.empty?
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def record_failure(message)
|
69
|
+
@failures << message
|
70
|
+
end
|
71
|
+
|
72
|
+
def generate_test_data
|
73
|
+
items = []
|
74
|
+
@type_distribution.each do |type, count|
|
75
|
+
count.times do |i|
|
76
|
+
items << { type: type, id: "#{type}_#{i + 1}" }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
items.shuffle
|
80
|
+
end
|
81
|
+
|
82
|
+
def run_balance_test(items, strategy_options = {})
|
83
|
+
@tests_run += 1
|
84
|
+
puts "\nRunning balance test..."
|
85
|
+
puts "Strategy options: #{strategy_options.inspect}" unless strategy_options.empty?
|
86
|
+
|
87
|
+
# Balance the items
|
88
|
+
balanced_items = TypeBalancer.balance(items, type_field: :type, **strategy_options)
|
89
|
+
|
90
|
+
# Track if this test passes
|
91
|
+
test_passed = true
|
92
|
+
|
93
|
+
# Get window size (default is 10)
|
94
|
+
window_size = strategy_options[:window_size] || 10
|
95
|
+
|
96
|
+
# Track remaining items for each type
|
97
|
+
remaining_items = @type_distribution.dup
|
98
|
+
|
99
|
+
# Analyze windows
|
100
|
+
balanced_items.each_slice(window_size).with_index do |window, index|
|
101
|
+
window_result = analyze_window(window, index + 1, remaining_items)
|
102
|
+
test_passed = false unless window_result
|
103
|
+
|
104
|
+
# Update remaining items
|
105
|
+
window.each do |item|
|
106
|
+
remaining_items[item[:type]] -= 1
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Analyze full distribution
|
111
|
+
distribution_result = analyze_full_distribution(balanced_items)
|
112
|
+
test_passed = false unless distribution_result
|
113
|
+
|
114
|
+
# Analyze type transitions
|
115
|
+
transition_result = analyze_type_transitions(balanced_items)
|
116
|
+
test_passed = false unless transition_result
|
117
|
+
|
118
|
+
@tests_passed += 1 if test_passed
|
119
|
+
end
|
120
|
+
|
121
|
+
def analyze_window(items, window_number, remaining_items)
|
122
|
+
puts "\nAnalyzing window #{window_number} (#{items.size} items):"
|
123
|
+
distribution = items.map { |item| item[:type] }.tally
|
124
|
+
window_passed = true
|
125
|
+
|
126
|
+
distribution.each do |type, count|
|
127
|
+
percentage = (count.to_f / items.size * 100).round(1)
|
128
|
+
puts "#{type}: #{count} (#{percentage}%)"
|
129
|
+
end
|
130
|
+
|
131
|
+
# Calculate how many items we have left to work with
|
132
|
+
total_remaining = remaining_items.values.sum
|
133
|
+
remaining_items.transform_values { |count| count.to_f / total_remaining }
|
134
|
+
|
135
|
+
# Only enforce strict distribution when we have enough items of each type
|
136
|
+
has_enough_items = remaining_items.values.all? { |count| count >= items.size / 3 }
|
137
|
+
|
138
|
+
if has_enough_items
|
139
|
+
# When we have enough items, ensure each type that has items left appears at least once
|
140
|
+
remaining_items.each do |type, count|
|
141
|
+
next if count <= 0
|
142
|
+
|
143
|
+
unless distribution.key?(type)
|
144
|
+
record_failure("Window #{window_number}: #{type} does not appear but has #{count} items remaining")
|
145
|
+
window_passed = false
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# Prevent any type from completely dominating a window when we have enough items
|
150
|
+
max_allowed = (items.size * 0.7).ceil # Allow up to 70% when we have enough items
|
151
|
+
distribution.each do |type, count|
|
152
|
+
next unless count > max_allowed
|
153
|
+
|
154
|
+
message = "Window #{window_number}: #{type} appears #{count} times (#{percentage}%), "
|
155
|
+
message += "exceeding maximum allowed #{max_allowed} when sufficient items remain"
|
156
|
+
record_failure(message)
|
157
|
+
window_passed = false
|
158
|
+
end
|
159
|
+
else
|
160
|
+
# When running low on items, just verify we're using available items efficiently
|
161
|
+
distribution.each do |type, count|
|
162
|
+
max_possible = [remaining_items[type], items.size].min
|
163
|
+
next unless count > max_possible
|
164
|
+
|
165
|
+
message = "Window #{window_number}: #{type} appears #{count} times but only had #{max_possible} items available"
|
166
|
+
record_failure(message)
|
167
|
+
window_passed = false
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
window_passed
|
172
|
+
end
|
173
|
+
|
174
|
+
def analyze_full_distribution(balanced_items)
|
175
|
+
puts "\nFull Distribution Analysis:"
|
176
|
+
distribution = balanced_items.map { |item| item[:type] }.tally
|
177
|
+
distribution_passed = true
|
178
|
+
|
179
|
+
distribution.each do |type, count|
|
180
|
+
percentage = (count.to_f / balanced_items.length * 100).round(1)
|
181
|
+
target_percentage = (@type_distribution[type].to_f / @total_records * 100).round(1)
|
182
|
+
diff = (percentage - target_percentage).abs.round(1)
|
183
|
+
color = if diff <= 0.1
|
184
|
+
GREEN
|
185
|
+
elsif diff <= 0.5
|
186
|
+
YELLOW
|
187
|
+
else
|
188
|
+
RED
|
189
|
+
end
|
190
|
+
puts "#{color}#{type}: #{count} (#{percentage}%) - Target: #{target_percentage}% (Diff: #{diff}%)#{RESET}"
|
191
|
+
|
192
|
+
# Full distribution should be very close to target
|
193
|
+
next unless diff > 0.5
|
194
|
+
|
195
|
+
message = "Full distribution: #{type} off by #{diff}% "
|
196
|
+
message += "(expected #{target_percentage}%, got #{percentage}%)"
|
197
|
+
record_failure(message)
|
198
|
+
distribution_passed = false
|
199
|
+
end
|
200
|
+
|
201
|
+
distribution_passed
|
202
|
+
end
|
203
|
+
|
204
|
+
def analyze_type_transitions(items)
|
205
|
+
puts "\nType Transition Analysis:"
|
206
|
+
transitions = Hash.new { |h, k| h[k] = Hash.new(0) }
|
207
|
+
total_transitions = 0
|
208
|
+
transition_passed = true
|
209
|
+
|
210
|
+
# Track consecutive occurrences
|
211
|
+
current_type = nil
|
212
|
+
consecutive_count = 0
|
213
|
+
remaining_items = @type_distribution.dup
|
214
|
+
|
215
|
+
items.each do |item|
|
216
|
+
# Update remaining items
|
217
|
+
remaining_items[item[:type]] -= 1
|
218
|
+
total_remaining = remaining_items.values.sum
|
219
|
+
available_types = remaining_items.count { |_, count| count.positive? }
|
220
|
+
|
221
|
+
if item[:type] == current_type
|
222
|
+
consecutive_count += 1
|
223
|
+
# Allow longer runs when we're running out of items
|
224
|
+
max_consecutive = if available_types >= 3 && total_remaining >= 100
|
225
|
+
5 # Strict when we have lots of items and all types
|
226
|
+
elsif available_types >= 2 && total_remaining >= 50
|
227
|
+
8 # More lenient as we start running out
|
228
|
+
elsif available_types >= 2 && total_remaining >= 20
|
229
|
+
12 # Even more lenient with two types
|
230
|
+
else
|
231
|
+
Float::INFINITY # No limit when almost out or only one type left
|
232
|
+
end
|
233
|
+
|
234
|
+
if consecutive_count > max_consecutive
|
235
|
+
message = "Found #{consecutive_count} consecutive #{current_type} items "
|
236
|
+
message += "when #{total_remaining} total items remained (#{available_types} types available)"
|
237
|
+
record_failure(message)
|
238
|
+
transition_passed = false
|
239
|
+
break # Stop checking transitions once we find a violation
|
240
|
+
end
|
241
|
+
else
|
242
|
+
consecutive_count = 1
|
243
|
+
current_type = item[:type]
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# Analyze transitions for information only
|
248
|
+
items.each_cons(2) do |a, b|
|
249
|
+
transitions[a[:type]][b[:type]] += 1
|
250
|
+
total_transitions += 1
|
251
|
+
end
|
252
|
+
|
253
|
+
transitions.each do |from_type, to_types|
|
254
|
+
puts "\nTransitions from #{from_type}:"
|
255
|
+
to_types.each do |to_type, count|
|
256
|
+
percentage = (count.to_f / total_transitions * 100).round(1)
|
257
|
+
puts " to #{to_type}: #{count} (#{percentage}%)"
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
transition_passed
|
262
|
+
end
|
263
|
+
|
264
|
+
def print_summary
|
265
|
+
puts "\n#{'-' * 50}"
|
266
|
+
if @failures.empty?
|
267
|
+
puts "#{GREEN}All tests passed!#{RESET}"
|
268
|
+
else
|
269
|
+
puts "#{RED}#{@failures.size} test failures:#{RESET}"
|
270
|
+
@failures.each_with_index do |failure, index|
|
271
|
+
puts "#{index + 1}. #{failure}"
|
272
|
+
end
|
273
|
+
end
|
274
|
+
puts "Tests run: #{@tests_run}"
|
275
|
+
puts "Tests passed: #{@tests_passed}"
|
276
|
+
puts('-' * 50)
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
if __FILE__ == $PROGRAM_NAME
|
281
|
+
test = LargeScaleBalanceTest.new
|
282
|
+
exit(test.run ? 0 : 1)
|
283
|
+
end
|
284
|
+
|
285
|
+
# rubocop:enable Metrics/ClassLength
|
286
|
+
# rubocop:enable Metrics/MethodLength
|
287
|
+
# rubocop:enable Metrics/AbcSize
|
288
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
289
|
+
# rubocop:enable Metrics/PerceivedComplexity
|