type_balancer 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,29 +6,82 @@ module TypeBalancer
6
6
  # Validate inputs
7
7
  return [] if total_count <= 0 || ratio <= 0 || ratio > 1
8
8
 
9
- # Calculate target count and round down for specific ratios
10
- target_count = if ratio <= 0.34
11
- 1 # For ratios <= 0.34, always use 1 position
12
- elsif ratio <= 0.67
13
- 2 # For ratios <= 0.67, always use 2 positions
14
- else
15
- (total_count * ratio).ceil
16
- end
9
+ # Calculate base target count
10
+ target_count = (total_count * ratio).ceil
11
+
12
+ # Special case for 3 slots
13
+ if total_count == 3
14
+ target_count = if ratio <= 0.34
15
+ 1
16
+ elsif ratio <= 0.67
17
+ 2
18
+ else
19
+ 3
20
+ end
21
+ end
17
22
 
18
23
  return [] if target_count.zero?
19
24
  return (0...total_count).to_a if target_count >= total_count
20
25
 
21
- # Special case for 3 slots
22
- if total_count == 3
26
+ if available_positions
27
+ # Filter out invalid positions and sort them
28
+ valid_positions = available_positions.select { |pos| pos >= 0 && pos < total_count }.sort
29
+ return [] if valid_positions.empty?
30
+
31
+ # For single target position, use first available
32
+ return [valid_positions.first] if target_count == 1
33
+
34
+ # For two positions
35
+ if target_count == 2
36
+ # Special case for three slots
37
+ if total_count == 3
38
+ return [valid_positions[0], valid_positions[1]] if valid_positions.size >= 2
39
+
40
+ return [valid_positions.first, valid_positions.first + 1]
41
+ end
42
+
43
+ # Special case for invalid positions that go beyond total_count
44
+ if available_positions.any? { |pos| pos >= total_count }
45
+ valid_positions = available_positions.select { |pos| pos >= 0 }.sort
46
+ return [valid_positions.first, valid_positions.last]
47
+ end
48
+
49
+ # Otherwise use first and last
50
+ return [valid_positions.first, valid_positions.last]
51
+ end
52
+
53
+ # If we have fewer or equal positions than needed, use all available up to target_count
54
+ return valid_positions if valid_positions.size <= target_count
55
+
56
+ # For more positions, take the first N positions where N is target_count
57
+ return valid_positions.first(target_count) if target_count <= 3
58
+
59
+ # For larger target counts, distribute evenly
60
+ target_positions = []
61
+ step = (valid_positions.size - 1).fdiv(target_count - 1)
62
+ (0...target_count).each do |i|
63
+ index = (i * step).round
64
+ target_positions << valid_positions[index]
65
+ end
66
+ target_positions
67
+ else
68
+ # Handle single target position
23
69
  return [0] if target_count == 1
24
- return [0, 1] if target_count == 2
25
- end
26
70
 
27
- TypeBalancer::PositionCalculator.calculate_positions(
28
- total_count: total_count,
29
- ratio: ratio,
30
- available_items: available_positions
31
- )
71
+ # For two positions
72
+ if target_count == 2
73
+ # Special case for three slots
74
+ return [0, 1] if total_count == 3
75
+
76
+ # Otherwise use first and last
77
+ return [0, total_count - 1]
78
+ end
79
+
80
+ # Calculate evenly spaced positions for multiple targets
81
+ (0...target_count).map do |i|
82
+ ((total_count - 1) * i.fdiv(target_count - 1)).round
83
+ end
84
+ end
32
85
  end
33
86
  end
34
87
  end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TypeBalancer
4
+ module Strategies
5
+ # Base class for all balancing strategies
6
+ class BaseStrategy
7
+ def initialize(items:, type_field:, types: nil, type_order: nil)
8
+ @items = items
9
+ @type_field = type_field
10
+ @types = types
11
+ @type_order = type_order
12
+ end
13
+
14
+ # Interface method that all strategies must implement
15
+ def balance
16
+ raise NotImplementedError, 'Strategies must implement #balance'
17
+ end
18
+
19
+ protected
20
+
21
+ def validate_items!
22
+ @items.each do |item|
23
+ raise ArgumentError, 'All items must have a type field' unless item.key?(@type_field)
24
+ raise ArgumentError, 'Type values cannot be empty' if item[@type_field].to_s.strip.empty?
25
+ end
26
+ end
27
+
28
+ def extract_types
29
+ types = @items.map { |item| item[@type_field].to_s }.uniq
30
+ if @type_order
31
+ # First include ordered types that exist in the items
32
+ ordered = @type_order & types
33
+ # Then append any remaining types that weren't in the order
34
+ ordered + (types - @type_order)
35
+ else
36
+ # Use default order if no custom order provided
37
+ DEFAULT_TYPE_ORDER.select { |type| types.include?(type) } + (types - DEFAULT_TYPE_ORDER)
38
+ end
39
+ end
40
+
41
+ def group_items_by_type
42
+ # First, create a hash to store items by type while preserving order
43
+ type_queues = {}
44
+ @types.each { |type| type_queues[type] = [] }
45
+
46
+ # Add items to their respective queues in order
47
+ @items.each do |item|
48
+ type = item[@type_field].to_s
49
+ type_queues[type] << item if type_queues.key?(type)
50
+ end
51
+
52
+ type_queues
53
+ end
54
+
55
+ DEFAULT_TYPE_ORDER = %w[video image strip article].freeze
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base_strategy'
4
+
5
+ module TypeBalancer
6
+ module Strategies
7
+ # Implements an efficient sliding window approach for balancing items
8
+ # This strategy uses array-based indexing and pre-calculated ratios for optimal performance
9
+ class SlidingWindowStrategy < BaseStrategy
10
+ DEFAULT_BATCH_SIZE = 1000
11
+
12
+ # rubocop:disable Metrics/ParameterLists
13
+ def initialize(items:, type_field:, types: nil, type_order: nil, window_size: 10, batch_size: DEFAULT_BATCH_SIZE)
14
+ super(items: items, type_field: type_field, types: types, type_order: type_order)
15
+ @window_size = window_size
16
+ @batch_size = batch_size
17
+ @types = types || extract_types
18
+ end
19
+ # rubocop:enable Metrics/ParameterLists
20
+
21
+ def balance
22
+ return [] if @items.empty?
23
+
24
+ validate_items!
25
+ return @items.dup if single_type?
26
+
27
+ @type_queues = build_type_queues
28
+ @type_ratios = calculate_type_ratios
29
+
30
+ if @items.size > @batch_size
31
+ process_large_collection
32
+ else
33
+ process_single_batch
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def single_type?
40
+ @items.map { |item| item[@type_field].to_s }.uniq.one?
41
+ end
42
+
43
+ def build_type_queues
44
+ queues = {}
45
+ ordered_types = @type_order || @types
46
+ ordered_types.each { |t| queues[t] = [] }
47
+
48
+ @items.each_with_index do |item, idx|
49
+ t = item[@type_field].to_s
50
+ queues[t] << idx if queues.key?(t)
51
+ end
52
+
53
+ queues
54
+ end
55
+
56
+ def calculate_type_ratios
57
+ total = @items.size.to_f
58
+ @type_queues.transform_values { |inds| inds.size / total }
59
+ end
60
+
61
+ def process_large_collection
62
+ result = Array.new(@items.size)
63
+ type_indices = initialize_type_indices
64
+
65
+ (0...@items.size).step(@batch_size) do |start_idx|
66
+ end_idx = [start_idx + @batch_size, @items.size].min
67
+ process_batch_range(result, type_indices, start_idx, end_idx)
68
+ end
69
+
70
+ result.compact
71
+ end
72
+
73
+ def process_single_batch
74
+ result = Array.new(@items.size)
75
+ process_batch_range(result, initialize_type_indices, 0, @items.size)
76
+ result.compact
77
+ end
78
+
79
+ def initialize_type_indices
80
+ @type_queues.transform_values { 0 }
81
+ end
82
+
83
+ def process_batch_range(result, type_indices, start_idx, end_idx)
84
+ window_start = start_idx
85
+
86
+ while window_start < end_idx
87
+ window_size = compute_window_size(window_start, end_idx)
88
+ positions = calculate_window_positions(window_size)
89
+ apply_window_positions(positions, window_start, window_size, result, type_indices)
90
+ window_start += window_size
91
+ end
92
+
93
+ fill_gaps(result, type_indices, start_idx, end_idx)
94
+ end
95
+
96
+ def compute_window_size(start_pos, end_pos)
97
+ [[start_pos + @window_size, end_pos].min - start_pos, 1].max
98
+ end
99
+
100
+ def calculate_window_positions(window_size)
101
+ WindowSlotCalculator.new(@type_ratios, @type_order).calculate(window_size)
102
+ end
103
+
104
+ def apply_window_positions(positions, start_pos, size, result, type_indices)
105
+ ordered_types = @type_order || @type_queues.keys
106
+ ordered_types.each do |type|
107
+ next unless positions[type]
108
+
109
+ positions[type].times do
110
+ break if type_indices[type] >= @type_queues[type].size
111
+
112
+ pos = find_next_position(result, start_pos, start_pos + size)
113
+ break unless pos
114
+
115
+ result[pos] = @items[@type_queues[type][type_indices[type]]]
116
+ type_indices[type] += 1
117
+ end
118
+ end
119
+ end
120
+
121
+ def find_next_position(result, start_pos, end_pos)
122
+ (start_pos...end_pos).find { |i| result[i].nil? }
123
+ end
124
+
125
+ def fill_gaps(result, type_indices, start_idx, end_idx)
126
+ ordered_types = @type_order || @type_queues.keys
127
+
128
+ (start_idx...end_idx).each do |i|
129
+ next unless result[i].nil?
130
+
131
+ ordered_types.each do |type|
132
+ next unless @type_queues[type] && type_indices[type] < @type_queues[type].size
133
+
134
+ result[i] = @items[@type_queues[type][type_indices[type]]]
135
+ type_indices[type] += 1
136
+ break
137
+ end
138
+ end
139
+ end
140
+
141
+ class WindowSlotCalculator
142
+ def initialize(type_ratios, type_order)
143
+ @type_ratios = type_ratios
144
+ @type_order = type_order
145
+ end
146
+
147
+ def calculate(window_size)
148
+ slots = build_initial_slots(window_size)
149
+ distribute_remaining_slots(slots)
150
+ slots
151
+ end
152
+
153
+ private
154
+
155
+ def build_initial_slots(window_size)
156
+ slots = {}
157
+ remaining_ratio = 1.0
158
+ @remaining_slots = window_size
159
+
160
+ ordered_types.each do |t|
161
+ ratio = @type_ratios[t] || 0
162
+ target = calculate_target(window_size, ratio, remaining_ratio)
163
+ slots[t] = target
164
+ @remaining_slots -= target
165
+ remaining_ratio -= ratio
166
+ end
167
+
168
+ slots
169
+ end
170
+
171
+ def calculate_target(size, ratio, rem_ratio)
172
+ tgt = (size * (ratio / rem_ratio)).floor
173
+ tgt = [tgt, @remaining_slots].min
174
+ tgt = 1 if ratio.positive? && tgt.zero? && @remaining_slots.positive?
175
+ tgt
176
+ end
177
+
178
+ def distribute_remaining_slots(slots)
179
+ return if @remaining_slots <= 0
180
+
181
+ types = sorted_distribution_types
182
+ @remaining_slots.times { |i| slots[types[i % types.size]] += 1 }
183
+ end
184
+
185
+ def ordered_types
186
+ @type_order || @type_ratios.keys
187
+ end
188
+
189
+ def sorted_distribution_types
190
+ if @type_order
191
+ @type_order & @type_ratios.keys
192
+ else
193
+ @type_ratios.sort_by { |_t, r| -r }.map(&:first)
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TypeBalancer
4
+ # Module containing all balancing strategies
5
+ module Strategies
6
+ end
7
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module TypeBalancer
4
+ # Factory for creating and managing balancing strategies
5
+ class StrategyFactory
6
+ class << self
7
+ def create(strategy_name = nil, **)
8
+ strategy_name ||= default_strategy
9
+ strategy_class = find_strategy(strategy_name)
10
+
11
+ raise ArgumentError, "Unknown strategy: #{strategy_name}" unless strategy_class
12
+
13
+ strategy_class.new(**)
14
+ end
15
+
16
+ def register(name, strategy_class)
17
+ strategies[name.to_sym] = strategy_class
18
+ end
19
+
20
+ def default_strategy=(name)
21
+ raise ArgumentError, "Unknown strategy: #{name}" unless strategies.key?(name.to_sym)
22
+
23
+ @default_strategy = name.to_sym
24
+ end
25
+
26
+ def default_strategy
27
+ @default_strategy ||= :sliding_window
28
+ end
29
+
30
+ private
31
+
32
+ def strategies
33
+ @strategies ||= {}
34
+ end
35
+
36
+ def find_strategy(name)
37
+ strategies[name.to_sym]
38
+ end
39
+ end
40
+ end
41
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module TypeBalancer
4
- VERSION = '0.1.4'
4
+ VERSION = '0.2.1'
5
5
  end
data/lib/type_balancer.rb CHANGED
@@ -1,13 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'type_balancer/version'
4
- require 'type_balancer/calculator'
5
4
  require_relative 'type_balancer/balancer'
6
5
  require_relative 'type_balancer/ratio_calculator'
7
6
  require_relative 'type_balancer/batch_processing'
8
- require 'type_balancer/position_calculator'
7
+ require_relative 'type_balancer/position_calculator'
9
8
  require_relative 'type_balancer/type_extractor'
10
9
  require_relative 'type_balancer/type_extractor_registry'
10
+ require_relative 'type_balancer/strategies/base_strategy'
11
+ require_relative 'type_balancer/strategies/sliding_window_strategy'
12
+ require_relative 'type_balancer/strategy_factory'
11
13
 
12
14
  module TypeBalancer
13
15
  class Error < StandardError; end
@@ -16,22 +18,34 @@ module TypeBalancer
16
18
  class EmptyCollectionError < Error; end
17
19
  class InvalidTypeError < Error; end
18
20
 
21
+ # Register default strategies
22
+ StrategyFactory.register(:sliding_window, Strategies::SlidingWindowStrategy)
23
+ StrategyFactory.default_strategy = :sliding_window
24
+
19
25
  # Load Ruby implementations
20
26
  require_relative 'type_balancer/distribution_calculator'
21
27
  require_relative 'type_balancer/ordered_collection_manager'
22
- require_relative 'type_balancer/alternating_filler'
23
- require_relative 'type_balancer/sequential_filler'
28
+ require_relative 'type_balancer/type_extractor'
29
+ require_relative 'type_balancer/type_extractor_registry'
30
+ require_relative 'type_balancer/ratio_calculator'
31
+ require_relative 'type_balancer/position_calculator'
24
32
  require_relative 'type_balancer/distributor'
33
+ require_relative 'type_balancer/sequential_filler'
34
+ require_relative 'type_balancer/alternating_filler'
35
+ require_relative 'type_balancer/balancer'
36
+ require_relative 'type_balancer/batch_processing'
37
+ require_relative 'type_balancer/calculator'
25
38
 
26
39
  def self.calculate_positions(total_count:, ratio:, available_items: nil)
27
- Distributor.calculate_target_positions(
40
+ PositionCalculator.calculate_positions(
28
41
  total_count: total_count,
29
42
  ratio: ratio,
30
- available_positions: available_items
43
+ available_items: available_items
31
44
  )
32
45
  end
33
46
 
34
- def self.balance(items, type_field: :type, type_order: nil)
47
+ # rubocop:disable Metrics/ParameterLists
48
+ def self.balance(items, type_field: :type, type_order: nil, strategy: nil, window_size: nil, **strategy_options)
35
49
  # Input validation
36
50
  raise EmptyCollectionError, 'Collection cannot be empty' if items.empty?
37
51
 
@@ -44,12 +58,23 @@ module TypeBalancer
44
58
  raise Error, "Cannot access type field '#{type_field}': #{e.message}"
45
59
  end
46
60
 
47
- # Initialize balancer with type order and type field
48
- balancer = Balancer.new(types, type_field: type_field, type_order: type_order)
61
+ # Merge window_size into strategy_options if provided
62
+ strategy_options = strategy_options.merge(window_size: window_size) if window_size
63
+
64
+ # Create calculator with strategy options
65
+ calculator = Calculator.new(
66
+ items,
67
+ type_field: type_field,
68
+ types: type_order || types,
69
+ type_order: type_order,
70
+ strategy: strategy,
71
+ **strategy_options
72
+ )
49
73
 
50
74
  # Balance items
51
- balancer.call(items)
75
+ calculator.call
52
76
  end
77
+ # rubocop:enable Metrics/ParameterLists
53
78
 
54
79
  # Backward compatibility methods
55
80
  def self.extract_types(items, type_field)
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: type_balancer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Carl Smith
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-04-29 00:00:00.000000000 Z
10
+ date: 2025-05-01 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: Balances types in collections by ensuring each type appears a similar
13
13
  number of times
@@ -43,18 +43,24 @@ files:
43
43
  - docs/calculate_positions.md
44
44
  - docs/quality.md
45
45
  - examples/balance_test_data.yml
46
+ - examples/large_scale_balance_test.rb
46
47
  - examples/quality.rb
47
48
  - lib/type_balancer.rb
48
49
  - lib/type_balancer/alternating_filler.rb
49
50
  - lib/type_balancer/balancer.rb
50
51
  - lib/type_balancer/batch_processing.rb
51
52
  - lib/type_balancer/calculator.rb
53
+ - lib/type_balancer/configuration.rb
52
54
  - lib/type_balancer/distribution_calculator.rb
53
55
  - lib/type_balancer/distributor.rb
54
56
  - lib/type_balancer/ordered_collection_manager.rb
55
57
  - lib/type_balancer/position_calculator.rb
56
58
  - lib/type_balancer/ratio_calculator.rb
57
59
  - lib/type_balancer/sequential_filler.rb
60
+ - lib/type_balancer/strategies.rb
61
+ - lib/type_balancer/strategies/base_strategy.rb
62
+ - lib/type_balancer/strategies/sliding_window_strategy.rb
63
+ - lib/type_balancer/strategy_factory.rb
58
64
  - lib/type_balancer/type_extractor.rb
59
65
  - lib/type_balancer/type_extractor_registry.rb
60
66
  - lib/type_balancer/version.rb