string_to_number 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
 
3
3
  module StringToNumber
4
4
  # High-performance French text to number parser
5
- #
5
+ #
6
6
  # This class provides a clean, optimized implementation that maintains
7
7
  # compatibility with the original algorithm while adding significant
8
8
  # performance improvements through caching and memoization.
@@ -21,21 +21,21 @@ module StringToNumber
21
21
  MULTIPLIERS = StringToNumber::ToNumber::POWERS_OF_TEN.freeze
22
22
 
23
23
  # Pre-compiled regex patterns for optimal performance
24
- MULTIPLIER_KEYS = MULTIPLIERS.keys.reject { |k| %w[un dix].include?(k) }
24
+ MULTIPLIER_KEYS = MULTIPLIERS.keys
25
+ .reject { |k| %w[un dix].include?(k) }
25
26
  .sort_by(&:length).reverse.freeze
26
- MULTIPLIER_PATTERN = /(?<f>.*?)\s?(?<m>#{MULTIPLIER_KEYS.join('|')})/
27
- QUATRE_VINGT_PATTERN = /(quatre(-|\s)vingt(s?)((-|\s)dix)?)((-|\s)?)(\w*)/
28
-
27
+ MULTIPLIER_PATTERN = /(?<f>.*?)\s?(?<m>#{MULTIPLIER_KEYS.join('|')})/.freeze
28
+ QUATRE_VINGT_PATTERN = /(?<base>quatre[-\s]vingt(?:s?)(?:[-\s]dix)?)(?:[-\s]?)(?<suffix>\w*)/.freeze
29
+
29
30
  # Cache configuration
30
31
  MAX_CACHE_SIZE = 1000
31
32
  private_constant :MAX_CACHE_SIZE
32
33
 
33
- # Thread-safe class-level caches
34
- @conversion_cache = {}
35
- @cache_access_order = []
36
- @instance_cache = {}
34
+ # Thread-safe LRU cache using Hash insertion order (Ruby 1.9+)
35
+ @cache = {}
36
+ @cache_hits = 0
37
+ @cache_lookups = 0
37
38
  @cache_mutex = Mutex.new
38
- @instance_mutex = Mutex.new
39
39
 
40
40
  class << self
41
41
  # Convert French text to number using cached parser instance
@@ -45,32 +45,38 @@ module StringToNumber
45
45
  # @raise [ArgumentError] if text is not a string
46
46
  def convert(text)
47
47
  validate_input!(text)
48
-
48
+
49
49
  normalized = normalize_text(text)
50
50
  return 0 if normalized.empty?
51
51
 
52
- # Check conversion cache first
53
- cached_result = get_cached_conversion(normalized)
54
- return cached_result if cached_result
52
+ @cache_mutex.synchronize do
53
+ @cache_lookups += 1
54
+
55
+ if @cache.key?(normalized)
56
+ @cache_hits += 1
57
+ # Delete and reinsert to move to end (most recently used)
58
+ value = @cache.delete(normalized)
59
+ @cache[normalized] = value
60
+ return value
61
+ end
62
+ end
63
+
64
+ result = new(normalized).parse_optimized(normalized)
65
+
66
+ @cache_mutex.synchronize do
67
+ @cache.delete(@cache.first[0]) if @cache.size >= MAX_CACHE_SIZE
68
+ @cache[normalized] = result
69
+ end
55
70
 
56
- # Get or create parser instance and convert
57
- parser = get_cached_instance(normalized)
58
- result = parser.parse_optimized(normalized)
59
-
60
- # Cache the result
61
- cache_conversion(normalized, result)
62
71
  result
63
72
  end
64
73
 
65
74
  # Clear all caches
66
75
  def clear_caches!
67
76
  @cache_mutex.synchronize do
68
- @conversion_cache.clear
69
- @cache_access_order.clear
70
- end
71
-
72
- @instance_mutex.synchronize do
73
- @instance_cache.clear
77
+ @cache.clear
78
+ @cache_hits = 0
79
+ @cache_lookups = 0
74
80
  end
75
81
  end
76
82
 
@@ -78,10 +84,9 @@ module StringToNumber
78
84
  def cache_stats
79
85
  @cache_mutex.synchronize do
80
86
  {
81
- conversion_cache_size: @conversion_cache.size,
87
+ conversion_cache_size: @cache.size,
82
88
  conversion_cache_limit: MAX_CACHE_SIZE,
83
- instance_cache_size: @instance_cache.size,
84
- cache_hit_ratio: calculate_hit_ratio
89
+ cache_hit_ratio: @cache_lookups.zero? ? 0.0 : @cache_hits.to_f / @cache_lookups
85
90
  }
86
91
  end
87
92
  end
@@ -95,42 +100,6 @@ module StringToNumber
95
100
  def normalize_text(text)
96
101
  text.to_s.downcase.strip
97
102
  end
98
-
99
- def get_cached_conversion(normalized_text)
100
- @cache_mutex.synchronize do
101
- if @conversion_cache.key?(normalized_text)
102
- # Update LRU order
103
- @cache_access_order.delete(normalized_text)
104
- @cache_access_order.push(normalized_text)
105
- return @conversion_cache[normalized_text]
106
- end
107
- end
108
- nil
109
- end
110
-
111
- def cache_conversion(normalized_text, result)
112
- @cache_mutex.synchronize do
113
- # LRU eviction
114
- if @conversion_cache.size >= MAX_CACHE_SIZE
115
- oldest = @cache_access_order.shift
116
- @conversion_cache.delete(oldest)
117
- end
118
-
119
- @conversion_cache[normalized_text] = result
120
- @cache_access_order.push(normalized_text)
121
- end
122
- end
123
-
124
- def get_cached_instance(normalized_text)
125
- @instance_mutex.synchronize do
126
- @instance_cache[normalized_text] ||= new(normalized_text)
127
- end
128
- end
129
-
130
- def calculate_hit_ratio
131
- return 0.0 if @cache_access_order.empty?
132
- @conversion_cache.size.to_f / @cache_access_order.size
133
- end
134
103
  end
135
104
 
136
105
  # Initialize parser with normalized text
@@ -147,12 +116,12 @@ module StringToNumber
147
116
  # but with performance optimizations
148
117
  def parse_optimized(text)
149
118
  return 0 if text.nil? || text.empty?
150
-
119
+
151
120
  # Direct lookup (fastest path)
152
121
  return WORD_VALUES[text] if WORD_VALUES.key?(text)
153
122
 
154
123
  # Use the proven extraction algorithm from the original implementation
155
- extract_optimized(text, MULTIPLIER_KEYS.join('|'))
124
+ extract_optimized(text)
156
125
  end
157
126
 
158
127
  private
@@ -160,14 +129,14 @@ module StringToNumber
160
129
  # Optimized version of the original extract method
161
130
  # This maintains the exact logic of the working implementation
162
131
  # but with performance improvements
163
- def extract_optimized(sentence, keys, detail: false)
132
+ def extract_optimized(sentence, detail: false)
164
133
  return 0 if sentence.nil? || sentence.empty?
165
-
134
+
166
135
  # Direct lookup
167
136
  return WORD_VALUES[sentence] if WORD_VALUES.key?(sentence)
168
137
 
169
138
  # Main pattern matching using pre-compiled regex
170
- if result = MULTIPLIER_PATTERN.match(sentence)
139
+ if (result = MULTIPLIER_PATTERN.match(sentence))
171
140
  # Remove matched portion
172
141
  sentence = sentence.gsub(result[0], '') if result[0]
173
142
 
@@ -178,7 +147,7 @@ module StringToNumber
178
147
 
179
148
  # Handle compound numbers
180
149
  if higher_multiple_exists?(result[:m], sentence)
181
- details = extract_optimized(sentence, keys, detail: true)
150
+ details = extract_optimized(sentence, detail: true)
182
151
  factor = (factor * multiple_of_ten) + details[:factor]
183
152
  multiple_of_ten = details[:multiple_of_ten]
184
153
  sentence = details[:sentence]
@@ -193,19 +162,19 @@ module StringToNumber
193
162
  }
194
163
  end
195
164
 
196
- return extract_optimized(sentence, keys) + factor * multiple_of_ten
165
+ extract_optimized(sentence) + (factor * multiple_of_ten)
197
166
 
198
167
  # Quatre-vingt special handling
199
- elsif m = QUATRE_VINGT_PATTERN.match(sentence)
200
- normalize_str = m[1].tr(' ', '-')
168
+ elsif (m = QUATRE_VINGT_PATTERN.match(sentence))
169
+ normalize_str = m[:base].tr(' ', '-')
201
170
  normalize_str = normalize_str[0...-1] if normalize_str[-1] == 's'
202
171
 
203
172
  sentence = sentence.gsub(m[0], '')
204
173
 
205
- return extract_optimized(sentence, keys) +
206
- WORD_VALUES[normalize_str] + (WORD_VALUES[m[8]] || 0)
174
+ extract_optimized(sentence) +
175
+ WORD_VALUES[normalize_str] + (WORD_VALUES[m[:suffix]] || 0)
207
176
  else
208
- return match_optimized(sentence)
177
+ match_optimized(sentence)
209
178
  end
210
179
  end
211
180
 
@@ -213,8 +182,9 @@ module StringToNumber
213
182
  def match_optimized(sentence)
214
183
  return 0 if sentence.nil?
215
184
 
216
- sentence.tr('-', ' ').split(' ').reverse.sum do |word|
185
+ sentence.tr('-', ' ').split.reverse.sum do |word|
217
186
  next 0 if word == 'et'
187
+
218
188
  WORD_VALUES[word] || (MULTIPLIERS[word] ? 10 * MULTIPLIERS[word] : 0)
219
189
  end
220
190
  end
@@ -227,4 +197,4 @@ module StringToNumber
227
197
  end
228
198
  end
229
199
  end
230
- end
200
+ end
@@ -47,8 +47,8 @@ module StringToNumber
47
47
  'quatre-vingt' => 80, # Standard French: "four-twenty" (singular)
48
48
  'huitante' => 80, # Swiss French alternative
49
49
  'quatre-vingt-dix' => 90, # Standard French: "four-twenty-ten"
50
- 'quatre-vingts-dix' => 90,# Alternative with plural "vingts"
51
- 'nonante' => 90 # Belgian/Swiss French alternative
50
+ 'quatre-vingts-dix' => 90, # Alternative with plural "vingts"
51
+ 'nonante' => 90 # Belgian/Swiss French alternative
52
52
  }.freeze
53
53
 
54
54
  # POWERS_OF_TEN maps French number words to their power of 10 exponents
@@ -100,7 +100,7 @@ module StringToNumber
100
100
  'trigintillion' => 93,
101
101
  'untrigintillion' => 96,
102
102
  'duotrigintillion' => 99,
103
- 'googol' => 100 # Special case: 10^100
103
+ 'googol' => 100 # Special case: 10^100
104
104
  }.freeze
105
105
 
106
106
  # Initialize the ToNumber parser with a French sentence
@@ -111,7 +111,7 @@ module StringToNumber
111
111
  # Sort keys by length (longest first) to ensure longer matches are preferred
112
112
  # This prevents "cent" from matching before "cents" in "cinq cents"
113
113
  sorted_keys = POWERS_OF_TEN.keys.reject { |k| %w[un dix].include?(k) }.sort_by(&:length).reverse
114
- @keys = sorted_keys.join('|') # Create regex alternation pattern
114
+ @keys = sorted_keys.join('|') # Create regex alternation pattern
115
115
  # Normalize input to lowercase for case-insensitive matching
116
116
  @sentence = sentence&.downcase || ''
117
117
  end
@@ -133,10 +133,10 @@ module StringToNumber
133
133
  def extract(sentence, keys, detail: false)
134
134
  # Base cases: handle empty/nil input
135
135
  return 0 if sentence.nil? || sentence.empty?
136
-
136
+
137
137
  # Ensure case-insensitive matching
138
138
  sentence = sentence.downcase
139
-
139
+
140
140
  # Direct lookup for simple cases (e.g., "vingt" -> 20)
141
141
  return EXCEPTIONS[sentence] unless EXCEPTIONS[sentence].nil?
142
142
 
@@ -146,19 +146,19 @@ module StringToNumber
146
146
  # (?<f>.*?) - Non-greedy capture of factor part (before multiplier)
147
147
  # \s? - Optional space
148
148
  # (?<m>#{keys}) - Named capture of multiplier from keys pattern
149
- if result = /(?<f>.*?)\s?(?<m>#{keys})/.match(sentence)
149
+ if (result = /(?<f>.*?)\s?(?<m>#{keys})/.match(sentence))
150
150
  # Remove the matched portion from sentence for further processing
151
- sentence.gsub!($&, '') if $&
151
+ sentence.gsub!(::Regexp.last_match(0), '') if ::Regexp.last_match(0)
152
152
 
153
153
  # Parse the factor part (number before the multiplier)
154
154
  # Example: "cinq" -> 5, "deux cent" -> 200
155
155
  factor = EXCEPTIONS[result[:f]] || match(result[:f])
156
-
156
+
157
157
  # Handle implicit factor of 1 for standalone multipliers
158
158
  # Example: "million" -> factor=1, but only for top-level calls
159
159
  # For recursive calls (detail=true), keep factor as 0 to avoid double-counting
160
160
  factor = 1 if factor.zero? && !detail
161
-
161
+
162
162
  # Calculate the multiplier value (10^exponent)
163
163
  # Example: "cents" -> 10^2 = 100, "millions" -> 10^6 = 1,000,000
164
164
  multiple_of_ten = 10**(POWERS_OF_TEN[result[:m]] || 0)
@@ -192,19 +192,18 @@ module StringToNumber
192
192
 
193
193
  # Final calculation: process any remaining sentence + current factor*multiplier
194
194
  # Example: For "trois millions cinq cents", this handles the "cinq cents" part
195
- return extract(sentence, keys) + factor * multiple_of_ten
195
+ extract(sentence, keys) + (factor * multiple_of_ten)
196
196
 
197
197
  # Special case handling for "quatre-vingt" variations
198
198
  # This complex regex handles the irregular French "eighty" patterns:
199
199
  # - "quatre-vingt" / "quatre vingts" (with/without 's')
200
200
  # - "quatre-vingt-dix" / "quatre vingts dix" (90)
201
201
  # - Space vs hyphen variations
202
- elsif m = /(quatre(-|\s)vingt(s?)((-|\s)dix)?)((-|\s)?)(\w*)/.match(sentence)
202
+ elsif (m = /(?<base>quatre[-\s]vingt(?:s?)(?:[-\s]dix)?)(?:[-\s]?)(?<suffix>\w*)/.match(sentence))
203
203
  # Normalize spacing to hyphens for consistent lookup
204
- normalize_str = m[1].tr(' ', '-')
205
-
204
+ normalize_str = m[:base].tr(' ', '-')
205
+
206
206
  # Remove trailing 's' from "quatre-vingts" if present
207
- # Bug fix: use [-1] instead of [length] for last character
208
207
  normalize_str = normalize_str[0...-1] if normalize_str[-1] == 's'
209
208
 
210
209
  # Remove the matched portion from sentence
@@ -212,11 +211,11 @@ module StringToNumber
212
211
 
213
212
  # Return sum of: remaining sentence + normalized quatre-vingt value + any suffix
214
213
  # Example: "quatre-vingt-cinq" -> EXCEPTIONS["quatre-vingt"] + EXCEPTIONS["cinq"]
215
- return extract(sentence, keys) +
216
- EXCEPTIONS[normalize_str] + (EXCEPTIONS[m[8]] || 0)
214
+ extract(sentence, keys) +
215
+ EXCEPTIONS[normalize_str] + (EXCEPTIONS[m[:suffix]] || 0)
217
216
  else
218
217
  # Fallback: use match() method for simple word combinations
219
- return match(sentence)
218
+ match(sentence)
220
219
  end
221
220
  end
222
221
 
@@ -229,11 +228,11 @@ module StringToNumber
229
228
 
230
229
  # Process words in reverse order for proper French number logic
231
230
  # Example: "vingt et un" -> ["un", "et", "vingt"] -> 1 + 0 + 20 = 21
232
- sentence.downcase.tr('-', ' ').split(' ').reverse.sum do |word|
231
+ sentence.downcase.tr('-', ' ').split.reverse.sum do |word|
233
232
  # Handle French "et" (and) conjunction by ignoring it in calculations
234
233
  # Example: "vingt et un" -> ignore "et", sum "vingt" + "un"
235
234
  next 0 if word == 'et'
236
-
235
+
237
236
  # Look up word value in either EXCEPTIONS or POWERS_OF_TEN
238
237
  if EXCEPTIONS[word].nil? && POWERS_OF_TEN[word].nil?
239
238
  # Unknown words contribute 0 to the sum
@@ -241,8 +240,8 @@ module StringToNumber
241
240
  else
242
241
  # Use EXCEPTIONS value if available, otherwise use 10 * power_of_ten
243
242
  # Example: "dix" -> EXCEPTIONS["dix"] = 10
244
- # "cent" -> 10 * POWERS_OF_TEN["cent"] = 10 * 2 = 100
245
- (EXCEPTIONS[word] || (10 * POWERS_OF_TEN[word]))
243
+ # "cent" -> 10 * POWERS_OF_TEN["cent"] = 10 * 2 = 100
244
+ EXCEPTIONS[word] || (10 * POWERS_OF_TEN[word])
246
245
  end
247
246
  end
248
247
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module StringToNumber
2
- VERSION = '0.2.0'.freeze
4
+ VERSION = '0.3.0'
3
5
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'string_to_number/version'
2
4
 
3
5
  # Load original implementation first for constant definitions
@@ -78,20 +80,20 @@ module StringToNumber
78
80
  #
79
81
  def valid_french_number?(text)
80
82
  return false unless text.respond_to?(:to_s)
81
-
83
+
82
84
  normalized = text.to_s.downcase.strip
83
85
  return false if normalized.empty?
84
-
86
+
85
87
  # Check if any words are recognized French number words
86
88
  words = normalized.tr('-', ' ').split(/\s+/)
87
89
  recognized_words = words.count do |word|
88
- word == 'et' ||
89
- Parser::WORD_VALUES.key?(word) ||
90
- Parser::MULTIPLIERS.key?(word)
90
+ word == 'et' ||
91
+ Parser::WORD_VALUES.key?(word) ||
92
+ Parser::MULTIPLIERS.key?(word)
91
93
  end
92
-
94
+
93
95
  # Require at least 50% recognized words for validation
94
96
  recognized_words.to_f / words.size >= 0.5
95
97
  end
96
98
  end
97
- end
99
+ end