string_to_number 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/benchmark.rb ADDED
@@ -0,0 +1,178 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Performance benchmark script for StringToNumber gem
5
+ # Run with: ruby benchmark.rb
6
+
7
+ require_relative 'lib/string_to_number'
8
+ require 'benchmark'
9
+
10
+ class StringToNumberBenchmark
11
+ # Test data organized by complexity
12
+ TEST_CASES = {
13
+ simple: %w[
14
+ un vingt cent mille
15
+ ],
16
+ medium: [
17
+ 'vingt et un', 'deux cent cinquante', 'mille deux cent'
18
+ ],
19
+ complex: [
20
+ 'trois milliards cinq cents millions',
21
+ 'soixante-quinze million trois cent quarante six mille sept cent quatre-vingt-dix neuf'
22
+ ],
23
+ edge_cases: %w[
24
+ VINGT une septante quatre-vingts
25
+ ]
26
+ }.freeze
27
+
28
+ def self.run_benchmark
29
+ puts 'StringToNumber Performance Benchmark'
30
+ puts '=' * 50
31
+ puts "Ruby version: #{RUBY_VERSION}"
32
+ puts "Platform: #{RUBY_PLATFORM}"
33
+ puts
34
+
35
+ # Warm up
36
+ puts 'Warming up...'
37
+ TEST_CASES.values.flatten.each { |text| StringToNumber.in_numbers(text) }
38
+ puts
39
+
40
+ total_results = {}
41
+
42
+ TEST_CASES.each do |category, test_cases|
43
+ puts "#{category.to_s.capitalize} Numbers:"
44
+ puts '-' * 30
45
+
46
+ results = benchmark_category(test_cases)
47
+ total_results[category] = results
48
+
49
+ puts "Cases: #{test_cases.size}"
50
+ puts "Total time: #{results[:total_time].round(4)}s"
51
+ puts "Average per conversion: #{results[:avg_time_ms].round(4)}ms"
52
+ puts "Conversions per second: #{results[:ops_per_sec].round(0)}"
53
+ puts
54
+
55
+ # Show individual case performance for complex numbers
56
+ next unless category == :complex
57
+
58
+ puts 'Individual case breakdown:'
59
+ test_cases.each_with_index do |text, index|
60
+ individual_time = Benchmark.realtime do
61
+ 1000.times { StringToNumber.in_numbers(text) }
62
+ end
63
+ avg_ms = (individual_time / 1000) * 1000
64
+ puts " #{index + 1}. #{avg_ms.round(4)}ms - '#{text[0..50]}#{'...' if text.length > 50}'"
65
+ end
66
+ puts
67
+ end
68
+
69
+ # Summary
70
+ puts '=' * 50
71
+ puts 'PERFORMANCE SUMMARY'
72
+ puts '=' * 50
73
+
74
+ total_results.each do |category, results|
75
+ status = case results[:avg_time_ms]
76
+ when 0..0.1 then '🟢 Excellent'
77
+ when 0.1..0.5 then '🟡 Good'
78
+ when 0.5..1.0 then '🟠 Acceptable'
79
+ else '🔴 Needs optimization'
80
+ end
81
+
82
+ puts "#{category.to_s.capitalize.ljust(12)} #{status.ljust(15)} #{results[:avg_time_ms].round(4)}ms avg"
83
+ end
84
+
85
+ puts
86
+ puts 'Memory efficiency test...'
87
+ test_memory_usage
88
+
89
+ puts
90
+ puts 'Scalability test...'
91
+ test_scalability
92
+ end
93
+
94
+ def self.benchmark_category(test_cases, iterations = 2000)
95
+ total_time = Benchmark.realtime do
96
+ test_cases.each do |text|
97
+ iterations.times do
98
+ StringToNumber.in_numbers(text)
99
+ end
100
+ end
101
+ end
102
+
103
+ total_conversions = test_cases.size * iterations
104
+ avg_time_ms = (total_time / total_conversions) * 1000
105
+ ops_per_sec = total_conversions / total_time
106
+
107
+ {
108
+ total_time: total_time,
109
+ avg_time_ms: avg_time_ms,
110
+ ops_per_sec: ops_per_sec
111
+ }
112
+ end
113
+
114
+ def self.test_memory_usage
115
+ # Test memory efficiency
116
+ if Object.const_defined?(:ObjectSpace)
117
+ GC.start
118
+ initial_objects = ObjectSpace.count_objects[:TOTAL]
119
+
120
+ # Perform intensive operations
121
+ 500.times do
122
+ TEST_CASES.values.flatten.each { |text| StringToNumber.in_numbers(text) }
123
+ end
124
+
125
+ GC.start
126
+ final_objects = ObjectSpace.count_objects[:TOTAL]
127
+ object_growth = final_objects - initial_objects
128
+
129
+ puts "Object creation: #{object_growth} new objects (#{object_growth > 1000 ? '🔴 High' : '🟢 Low'})"
130
+ else
131
+ puts 'Memory tracking not available on this platform'
132
+ end
133
+ end
134
+
135
+ def self.test_scalability
136
+ # Test how performance scales with input complexity
137
+ inputs = [
138
+ 'un', # 2 chars
139
+ 'vingt et un', # 11 chars
140
+ 'mille deux cent trente-quatre', # 29 chars
141
+ 'trois milliards cinq cents millions deux cent mille et une' # 58 chars
142
+ ]
143
+
144
+ puts 'Input length vs. performance:'
145
+
146
+ results = inputs.map do |input|
147
+ time = Benchmark.realtime do
148
+ 1000.times { StringToNumber.in_numbers(input) }
149
+ end
150
+ avg_ms = (time / 1000) * 1000
151
+
152
+ { length: input.length, time: avg_ms, input: input }
153
+ end
154
+
155
+ results.each do |result|
156
+ complexity_ratio = result[:time] / results.first[:time]
157
+ status = if complexity_ratio < 5
158
+ '🟢'
159
+ else
160
+ complexity_ratio < 10 ? '🟡' : '🔴'
161
+ end
162
+
163
+ puts " #{result[:length].to_s.rjust(2)} chars: #{result[:time].round(4)}ms #{status} " \
164
+ "(#{complexity_ratio.round(1)}x baseline)"
165
+ end
166
+
167
+ # Check if performance degrades reasonably
168
+ worst_ratio = results.last[:time] / results.first[:time]
169
+ if worst_ratio < 10
170
+ puts "✅ Scalability: Good (#{worst_ratio.round(1)}x degradation)"
171
+ else
172
+ puts "❌ Scalability: Poor (#{worst_ratio.round(1)}x degradation)"
173
+ end
174
+ end
175
+ end
176
+
177
+ # Run the benchmark
178
+ StringToNumberBenchmark.run_benchmark if __FILE__ == $PROGRAM_NAME
@@ -0,0 +1,232 @@
1
+ # frozen_string_literal: true
2
+
3
+ module StringToNumber
4
+ # High-performance French text to number parser
5
+ #
6
+ # This class provides a clean, optimized implementation that maintains
7
+ # compatibility with the original algorithm while adding significant
8
+ # performance improvements through caching and memoization.
9
+ #
10
+ # @example Basic usage
11
+ # parser = StringToNumber::Parser.new
12
+ # parser.parse('vingt et un') #=> 21
13
+ # parser.parse('trois millions') #=> 3_000_000
14
+ #
15
+ # @example Class method usage
16
+ # StringToNumber::Parser.convert('mille deux cent') #=> 1200
17
+ #
18
+ class Parser
19
+ # Import the proven data structures from the original implementation
20
+ WORD_VALUES = StringToNumber::ToNumber::EXCEPTIONS.freeze
21
+ MULTIPLIERS = StringToNumber::ToNumber::POWERS_OF_TEN.freeze
22
+
23
+ # Pre-compiled regex patterns for optimal performance
24
+ MULTIPLIER_KEYS = MULTIPLIERS.keys.reject { |k| %w[un dix].include?(k) }
25
+ .sort_by(&:length).reverse.freeze
26
+ MULTIPLIER_PATTERN = /(?<f>.*?)\s?(?<m>#{MULTIPLIER_KEYS.join('|')})/.freeze
27
+ QUATRE_VINGT_PATTERN = /(quatre(-|\s)vingt(s?)((-|\s)dix)?)((-|\s)?)(\w*)/.freeze
28
+
29
+ # Cache configuration
30
+ MAX_CACHE_SIZE = 1000
31
+ private_constant :MAX_CACHE_SIZE
32
+
33
+ # Thread-safe class-level caches
34
+ @conversion_cache = {}
35
+ @cache_access_order = []
36
+ @instance_cache = {}
37
+ @cache_mutex = Mutex.new
38
+ @instance_mutex = Mutex.new
39
+
40
+ class << self
41
+ # Convert French text to number using cached parser instance
42
+ #
43
+ # @param text [String] French number text to convert
44
+ # @return [Integer] The numeric value
45
+ # @raise [ArgumentError] if text is not a string
46
+ def convert(text)
47
+ validate_input!(text)
48
+
49
+ normalized = normalize_text(text)
50
+ return 0 if normalized.empty?
51
+
52
+ # Check conversion cache first
53
+ cached_result = get_cached_conversion(normalized)
54
+ return cached_result if cached_result
55
+
56
+ # Get or create parser instance and convert
57
+ parser = get_cached_instance(normalized)
58
+ result = parser.parse_optimized(normalized)
59
+
60
+ # Cache the result
61
+ cache_conversion(normalized, result)
62
+ result
63
+ end
64
+
65
+ # Clear all caches
66
+ def clear_caches!
67
+ @cache_mutex.synchronize do
68
+ @conversion_cache.clear
69
+ @cache_access_order.clear
70
+ end
71
+
72
+ @instance_mutex.synchronize do
73
+ @instance_cache.clear
74
+ end
75
+ end
76
+
77
+ # Get cache statistics
78
+ def cache_stats
79
+ @cache_mutex.synchronize do
80
+ {
81
+ conversion_cache_size: @conversion_cache.size,
82
+ conversion_cache_limit: MAX_CACHE_SIZE,
83
+ instance_cache_size: @instance_cache.size,
84
+ cache_hit_ratio: calculate_hit_ratio
85
+ }
86
+ end
87
+ end
88
+
89
+ private
90
+
91
+ def validate_input!(text)
92
+ raise ArgumentError, 'Input must be a string' unless text.respond_to?(:to_s)
93
+ end
94
+
95
+ def normalize_text(text)
96
+ text.to_s.downcase.strip
97
+ end
98
+
99
+ def get_cached_conversion(normalized_text)
100
+ @cache_mutex.synchronize do
101
+ if @conversion_cache.key?(normalized_text)
102
+ # Update LRU order
103
+ @cache_access_order.delete(normalized_text)
104
+ @cache_access_order.push(normalized_text)
105
+ return @conversion_cache[normalized_text]
106
+ end
107
+ end
108
+ nil
109
+ end
110
+
111
+ def cache_conversion(normalized_text, result)
112
+ @cache_mutex.synchronize do
113
+ # LRU eviction
114
+ if @conversion_cache.size >= MAX_CACHE_SIZE
115
+ oldest = @cache_access_order.shift
116
+ @conversion_cache.delete(oldest)
117
+ end
118
+
119
+ @conversion_cache[normalized_text] = result
120
+ @cache_access_order.push(normalized_text)
121
+ end
122
+ end
123
+
124
+ def get_cached_instance(normalized_text)
125
+ @instance_mutex.synchronize do
126
+ @instance_cache[normalized_text] ||= new(normalized_text)
127
+ end
128
+ end
129
+
130
+ def calculate_hit_ratio
131
+ return 0.0 if @cache_access_order.empty?
132
+
133
+ @conversion_cache.size.to_f / @cache_access_order.size
134
+ end
135
+ end
136
+
137
+ # Initialize parser with normalized text
138
+ def initialize(text = '')
139
+ @normalized_text = self.class.send(:normalize_text, text)
140
+ end
141
+
142
+ # Parse the text to numeric value
143
+ def parse
144
+ self.class.convert(@normalized_text)
145
+ end
146
+
147
+ # Internal optimized parsing method using the original proven algorithm
148
+ # but with performance optimizations
149
+ def parse_optimized(text)
150
+ return 0 if text.nil? || text.empty?
151
+
152
+ # Direct lookup (fastest path)
153
+ return WORD_VALUES[text] if WORD_VALUES.key?(text)
154
+
155
+ # Use the proven extraction algorithm from the original implementation
156
+ extract_optimized(text, MULTIPLIER_KEYS.join('|'))
157
+ end
158
+
159
+ private
160
+
161
+ # Optimized version of the original extract method
162
+ # This maintains the exact logic of the working implementation
163
+ # but with performance improvements
164
+ def extract_optimized(sentence, keys, detail: false)
165
+ return 0 if sentence.nil? || sentence.empty?
166
+
167
+ # Direct lookup
168
+ return WORD_VALUES[sentence] if WORD_VALUES.key?(sentence)
169
+
170
+ # Main pattern matching using pre-compiled regex
171
+ if (result = MULTIPLIER_PATTERN.match(sentence))
172
+ # Remove matched portion
173
+ sentence = sentence.gsub(result[0], '') if result[0]
174
+
175
+ # Extract factor
176
+ factor = WORD_VALUES[result[:f]] || match_optimized(result[:f])
177
+ factor = 1 if factor.zero? && !detail
178
+ multiple_of_ten = 10**(MULTIPLIERS[result[:m]] || 0)
179
+
180
+ # Handle compound numbers
181
+ if higher_multiple_exists?(result[:m], sentence)
182
+ details = extract_optimized(sentence, keys, detail: true)
183
+ factor = (factor * multiple_of_ten) + details[:factor]
184
+ multiple_of_ten = details[:multiple_of_ten]
185
+ sentence = details[:sentence]
186
+ end
187
+
188
+ # Return based on mode
189
+ if detail
190
+ return {
191
+ factor: factor,
192
+ multiple_of_ten: multiple_of_ten,
193
+ sentence: sentence
194
+ }
195
+ end
196
+
197
+ extract_optimized(sentence, keys) + (factor * multiple_of_ten)
198
+
199
+ # Quatre-vingt special handling
200
+ elsif (m = QUATRE_VINGT_PATTERN.match(sentence))
201
+ normalize_str = m[1].tr(' ', '-')
202
+ normalize_str = normalize_str[0...-1] if normalize_str[-1] == 's'
203
+
204
+ sentence = sentence.gsub(m[0], '')
205
+
206
+ extract_optimized(sentence, keys) +
207
+ WORD_VALUES[normalize_str] + (WORD_VALUES[m[8]] || 0)
208
+ else
209
+ match_optimized(sentence)
210
+ end
211
+ end
212
+
213
+ # Optimized match method
214
+ def match_optimized(sentence)
215
+ return 0 if sentence.nil?
216
+
217
+ sentence.tr('-', ' ').split.reverse.sum do |word|
218
+ next 0 if word == 'et'
219
+
220
+ WORD_VALUES[word] || (MULTIPLIERS[word] ? 10 * MULTIPLIERS[word] : 0)
221
+ end
222
+ end
223
+
224
+ # Optimized higher multiple check
225
+ def higher_multiple_exists?(multiple, sentence)
226
+ current_power = MULTIPLIERS[multiple]
227
+ MULTIPLIERS.any? do |word, power|
228
+ power > current_power && sentence.include?(word)
229
+ end
230
+ end
231
+ end
232
+ end
@@ -1,13 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module StringToNumber
4
+ # ToNumber class handles the conversion of French text to numbers
5
+ # It uses a complex recursive parsing algorithm to handle French number grammar
4
6
  class ToNumber
5
7
  attr_accessor :sentence, :keys
6
8
 
9
+ # EXCEPTIONS contains direct mappings from French words to their numeric values
10
+ # This includes:
11
+ # - Basic numbers 0-90
12
+ # - Feminine forms ("une" for "un")
13
+ # - Regional variations (Belgian/Swiss French: "septante", "huitante", "nonante")
14
+ # - Special cases for "quatre-vingt" variations with/without 's'
15
+ # - Compound numbers like "dix-sept", "soixante-dix"
7
16
  EXCEPTIONS = {
8
- 'zéro' => 0,
9
- 'zero' => 0,
10
- 'un' => 1,
17
+ 'zéro' => 0, # Zero with accent
18
+ 'zero' => 0, # Zero without accent
19
+ 'un' => 1, # Masculine "one"
20
+ 'une' => 1, # Feminine "one"
11
21
  'deux' => 2,
12
22
  'trois' => 3,
13
23
  'quatre' => 4,
@@ -23,29 +33,44 @@ module StringToNumber
23
33
  'quatorze' => 14,
24
34
  'quinze' => 15,
25
35
  'seize' => 16,
26
- 'dix-sept' => 17,
27
- 'dix-huit' => 18,
28
- 'dix-neuf' => 19,
36
+ 'dix-sept' => 17, # Compound: "ten-seven"
37
+ 'dix-huit' => 18, # Compound: "ten-eight"
38
+ 'dix-neuf' => 19, # Compound: "ten-nine"
29
39
  'vingt' => 20,
30
40
  'trente' => 30,
31
41
  'quarante' => 40,
32
42
  'cinquante' => 50,
33
43
  'soixante' => 60,
34
- 'soixante-dix' => 70,
35
- 'quatre-vingts' => 80,
36
- 'quatre-vingt' => 80,
37
- 'quatre-vingt-dix' => 90,
38
- 'quatre-vingts-dix' => 90
44
+ 'soixante-dix' => 70, # Standard French: "sixty-ten"
45
+ 'septante' => 70, # Belgian/Swiss French alternative
46
+ 'quatre-vingts' => 80, # Standard French: "four-twenties" (plural)
47
+ 'quatre-vingt' => 80, # Standard French: "four-twenty" (singular)
48
+ 'huitante' => 80, # Swiss French alternative
49
+ 'quatre-vingt-dix' => 90, # Standard French: "four-twenty-ten"
50
+ 'quatre-vingts-dix' => 90, # Alternative with plural "vingts"
51
+ 'nonante' => 90 # Belgian/Swiss French alternative
39
52
  }.freeze
40
53
 
54
+ # POWERS_OF_TEN maps French number words to their power of 10 exponents
55
+ # Used for multipliers like "cent" (10^2), "mille" (10^3), "million" (10^6)
56
+ # Includes both singular and plural forms for proper French grammar
57
+ # Uses French number scale where "billion" = 10^12 (not 10^9 as in English)
41
58
  POWERS_OF_TEN = {
42
- 'un' => 0,
43
- 'dix' => 1,
44
- 'cent' => 2,
45
- 'mille' => 3,
46
- 'million' => 6,
47
- 'billion' => 9,
48
- 'trillion' => 12,
59
+ 'un' => 0, # 10^0 = 1 (ones place)
60
+ 'dix' => 1, # 10^1 = 10 (tens place)
61
+ 'cent' => 2, # 10^2 = 100 (hundreds, singular)
62
+ 'cents' => 2, # 10^2 = 100 (hundreds, plural)
63
+ 'mille' => 3, # 10^3 = 1,000 (thousands, singular)
64
+ 'milles' => 3, # 10^3 = 1,000 (thousands, plural)
65
+ 'million' => 6, # 10^6 = 1,000,000 (millions, singular)
66
+ 'millions' => 6, # 10^6 = 1,000,000 (millions, plural)
67
+ 'milliard' => 9, # 10^9 = 1,000,000,000 (French billion, singular)
68
+ 'milliards' => 9, # 10^9 = 1,000,000,000 (French billion, plural)
69
+ 'billion' => 12, # 10^12 = 1,000,000,000,000 (French trillion, singular)
70
+ 'billions' => 12, # 10^12 = 1,000,000,000,000 (French trillion, plural)
71
+ 'trillion' => 15, # 10^15 (French quadrillion, singular)
72
+ 'trillions' => 15, # 10^15 (French quadrillion, plural)
73
+ # Extended list of large number names for completeness
49
74
  'quadrillion' => 15,
50
75
  'quintillion' => 18,
51
76
  'sextillion' => 21,
@@ -75,42 +100,88 @@ module StringToNumber
75
100
  'trigintillion' => 93,
76
101
  'untrigintillion' => 96,
77
102
  'duotrigintillion' => 99,
78
- 'googol' => 100
103
+ 'googol' => 100 # Special case: 10^100
79
104
  }.freeze
80
105
 
106
+ # Initialize the ToNumber parser with a French sentence
107
+ # @param sentence [String] The French text to be converted to numbers
81
108
  def initialize(sentence = '')
82
- @keys = POWERS_OF_TEN.keys.reject { |k| %w[un dix].include?(k) }.join('|')
83
- @sentence = sentence
109
+ # Create regex pattern from POWERS_OF_TEN keys, excluding 'un' and 'dix'
110
+ # which are handled differently in the parsing logic
111
+ # Sort keys by length (longest first) to ensure longer matches are preferred
112
+ # This prevents "cent" from matching before "cents" in "cinq cents"
113
+ sorted_keys = POWERS_OF_TEN.keys.reject { |k| %w[un dix].include?(k) }.sort_by(&:length).reverse
114
+ @keys = sorted_keys.join('|') # Create regex alternation pattern
115
+ # Normalize input to lowercase for case-insensitive matching
116
+ @sentence = sentence&.downcase || ''
84
117
  end
85
118
 
119
+ # Main entry point to convert the French sentence to a number
120
+ # @return [Integer] The numeric value of the French text
86
121
  def to_number
87
122
  extract(@sentence, keys)
88
123
  end
89
124
 
90
125
  private
91
126
 
127
+ # Main recursive extraction method that parses French number patterns
128
+ # This is the core of the parsing algorithm
129
+ # @param sentence [String] The French text to parse
130
+ # @param keys [String] Regex pattern of power-of-ten multipliers
131
+ # @param detail [Boolean] If true, returns detailed parsing info for recursion
132
+ # @return [Integer, Hash] Numeric value or detailed parsing hash
92
133
  def extract(sentence, keys, detail: false)
134
+ # Base cases: handle empty/nil input
93
135
  return 0 if sentence.nil? || sentence.empty?
136
+
137
+ # Ensure case-insensitive matching
138
+ sentence = sentence.downcase
139
+
140
+ # Direct lookup for simple cases (e.g., "vingt" -> 20)
94
141
  return EXCEPTIONS[sentence] unless EXCEPTIONS[sentence].nil?
95
142
 
96
- if result = /(?<f>.*?)\s?(?<m>#{keys})/.match(sentence)
97
- # Deleting matching element
98
- sentence.gsub!($&, '') if $&
143
+ # Main parsing logic: look for pattern "factor + multiplier"
144
+ # Example: "cinq cents" -> factor="cinq", multiplier="cents"
145
+ # Regex explanation:
146
+ # (?<f>.*?) - Non-greedy capture of factor part (before multiplier)
147
+ # \s? - Optional space
148
+ # (?<m>#{keys}) - Named capture of multiplier from keys pattern
149
+ if (result = /(?<f>.*?)\s?(?<m>#{keys})/.match(sentence))
150
+ # Remove the matched portion from sentence for further processing
151
+ sentence.gsub!(::Regexp.last_match(0), '') if ::Regexp.last_match(0)
152
+
153
+ # Parse the factor part (number before the multiplier)
154
+ # Example: "cinq" -> 5, "deux cent" -> 200
155
+ factor = EXCEPTIONS[result[:f]] || match(result[:f])
99
156
 
100
- # Extract matching element
101
- factor = EXCEPTIONS[result[:f]] || match(result[:f])
102
- factor = 1 if factor.zero?
157
+ # Handle implicit factor of 1 for standalone multipliers
158
+ # Example: "million" -> factor=1, but only for top-level calls
159
+ # For recursive calls (detail=true), keep factor as 0 to avoid double-counting
160
+ factor = 1 if factor.zero? && !detail
161
+
162
+ # Calculate the multiplier value (10^exponent)
163
+ # Example: "cents" -> 10^2 = 100, "millions" -> 10^6 = 1,000,000
103
164
  multiple_of_ten = 10**(POWERS_OF_TEN[result[:m]] || 0)
104
165
 
105
- # Check if this multiple is over
166
+ # Handle compound numbers with higher-order multipliers
167
+ # Example: "cinq cents millions" - after matching "cinq cents",
168
+ # check if "millions" (a higher multiplier than "cents") remains
106
169
  if /#{higher_multiple(result[:m]).keys.join('|')}/.match(sentence)
170
+ # Recursively process the higher multiplier
107
171
  details = extract(sentence, keys, detail: true)
108
172
 
109
- factor = (factor * multiple_of_ten) + details[:factor]
173
+ # Combine the current factor*multiplier with the higher multiplier
174
+ # Example: For "cinq cents millions":
175
+ # - factor = 5, multiple_of_ten = 100 (from "cinq cents")
176
+ # - details[:factor] = 0, details[:multiple_of_ten] = 1000000 (from "millions")
177
+ # - result: factor = (5 * 100) + 0 = 500, multiple_of_ten = 1000000
178
+ # - final: 500 * 1000000 = 500,000,000
179
+ factor = (factor * multiple_of_ten) + details[:factor]
110
180
  multiple_of_ten = details[:multiple_of_ten]
111
- sentence = details[:sentence]
181
+ sentence = details[:sentence]
112
182
  end
113
183
 
184
+ # Return detailed parsing info for recursive calls
114
185
  if detail
115
186
  return {
116
187
  factor: factor,
@@ -119,33 +190,69 @@ module StringToNumber
119
190
  }
120
191
  end
121
192
 
122
- return extract(sentence, keys) + factor * multiple_of_ten
193
+ # Final calculation: process any remaining sentence + current factor*multiplier
194
+ # Example: For "trois millions cinq cents", this handles the "cinq cents" part
195
+ extract(sentence, keys) + (factor * multiple_of_ten)
123
196
 
124
- elsif m = /(quatre(-|\s)vingt(s?)((-|\s)dix)?)((-|\s)?)(\w*)/.match(sentence)
197
+ # Special case handling for "quatre-vingt" variations
198
+ # This complex regex handles the irregular French "eighty" patterns:
199
+ # - "quatre-vingt" / "quatre vingts" (with/without 's')
200
+ # - "quatre-vingt-dix" / "quatre vingts dix" (90)
201
+ # - Space vs hyphen variations
202
+ elsif (m = /(quatre(-|\s)vingt(s?)((-|\s)dix)?)((-|\s)?)(\w*)/.match(sentence))
203
+ # Normalize spacing to hyphens for consistent lookup
125
204
  normalize_str = m[1].tr(' ', '-')
126
- normalize_str = normalize_str[0...-1] if normalize_str[normalize_str.length] == 's'
127
205
 
206
+ # Remove trailing 's' from "quatre-vingts" if present
207
+ # Bug fix: use [-1] instead of [length] for last character
208
+ normalize_str = normalize_str[0...-1] if normalize_str[-1] == 's'
209
+
210
+ # Remove the matched portion from sentence
128
211
  sentence.gsub!(m[0], '')
129
212
 
130
- return extract(sentence, keys) +
131
- EXCEPTIONS[normalize_str] + (EXCEPTIONS[m[8]] || 0)
213
+ # Return sum of: remaining sentence + normalized quatre-vingt value + any suffix
214
+ # Example: "quatre-vingt-cinq" -> EXCEPTIONS["quatre-vingt"] + EXCEPTIONS["cinq"]
215
+ extract(sentence, keys) +
216
+ EXCEPTIONS[normalize_str] + (EXCEPTIONS[m[8]] || 0)
132
217
  else
133
- return match(sentence)
218
+ # Fallback: use match() method for simple word combinations
219
+ match(sentence)
134
220
  end
135
221
  end
136
222
 
223
+ # Fallback method for parsing simple word sequences
224
+ # Used when the main extract() method can't find multiplier patterns
225
+ # @param sentence [String] French text to parse as individual words
226
+ # @return [Integer, nil] Sum of individual word values or nil if no sentence
137
227
  def match(sentence)
138
228
  return if sentence.nil?
139
229
 
140
- sentence.tr('-', ' ').split(' ').reverse.sum do |word|
230
+ # Process words in reverse order for proper French number logic
231
+ # Example: "vingt et un" -> ["un", "et", "vingt"] -> 1 + 0 + 20 = 21
232
+ sentence.downcase.tr('-', ' ').split.reverse.sum do |word|
233
+ # Handle French "et" (and) conjunction by ignoring it in calculations
234
+ # Example: "vingt et un" -> ignore "et", sum "vingt" + "un"
235
+ next 0 if word == 'et'
236
+
237
+ # Look up word value in either EXCEPTIONS or POWERS_OF_TEN
141
238
  if EXCEPTIONS[word].nil? && POWERS_OF_TEN[word].nil?
239
+ # Unknown words contribute 0 to the sum
142
240
  0
143
241
  else
144
- (EXCEPTIONS[word] || (10 * POWERS_OF_TEN[word]))
242
+ # Use EXCEPTIONS value if available, otherwise use 10 * power_of_ten
243
+ # Example: "dix" -> EXCEPTIONS["dix"] = 10
244
+ # "cent" -> 10 * POWERS_OF_TEN["cent"] = 10 * 2 = 100
245
+ EXCEPTIONS[word] || (10 * POWERS_OF_TEN[word])
145
246
  end
146
247
  end
147
248
  end
148
249
 
250
+ # Helper method to find multipliers with higher powers than the given one
251
+ # Used to detect when compound numbers have higher-order multipliers
252
+ # @param multiple [String] The current multiplier word (e.g., "cents")
253
+ # @return [Hash] Hash of multipliers with higher powers of 10
254
+ # Example: higher_multiple("cents") returns {"mille"=>3, "million"=>6, ...}
255
+ # because 10^3, 10^6, etc. are all > 10^2 (cents)
149
256
  def higher_multiple(multiple)
150
257
  POWERS_OF_TEN.select do |_k, v|
151
258
  v > POWERS_OF_TEN[multiple]
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module StringToNumber
2
- VERSION = '0.1.4'.freeze
4
+ VERSION = '0.2.1'
3
5
  end