RubyGems - string_to_number - Versions diffs - 0.2.0 → 0.2.1 - Mend

string_to_number 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/.github/workflows/ci.yml +83 -0
data/.rubocop.yml +110 -0
data/Gemfile +9 -0
data/Gemfile.lock +32 -1
data/README.md +9 -5
data/Rakefile +5 -1
data/benchmark.rb +41 -40
data/lib/string_to_number/parser.rb +20 -18
data/lib/string_to_number/to_number.rb +20 -20
data/lib/string_to_number/version.rb +3 -1
data/lib/string_to_number.rb +9 -7
data/logo.png +0 -0
data/microbenchmark.rb +81 -80
data/performance_comparison.rb +34 -35
data/profile.rb +44 -45
data/string_to_number.gemspec +5 -6
metadata +7 -45

data/lib/string_to_number/to_number.rb CHANGED Viewed

@@ -47,8 +47,8 @@ module StringToNumber
       'quatre-vingt' => 80,     # Standard French: "four-twenty" (singular)
       'huitante' => 80,         # Swiss French alternative
       'quatre-vingt-dix' => 90, # Standard French: "four-twenty-ten"
-      'quatre-vingts-dix' => 90,# Alternative with plural "vingts"
-      'nonante' => 90           # Belgian/Swiss French alternative
+      'quatre-vingts-dix' => 90, # Alternative with plural "vingts"
+      'nonante' => 90 # Belgian/Swiss French alternative
     }.freeze
     # POWERS_OF_TEN maps French number words to their power of 10 exponents
@@ -100,7 +100,7 @@ module StringToNumber
       'trigintillion' => 93,
       'untrigintillion' => 96,
       'duotrigintillion' => 99,
-      'googol' => 100      # Special case: 10^100
+      'googol' => 100 # Special case: 10^100
     }.freeze
     # Initialize the ToNumber parser with a French sentence
@@ -111,7 +111,7 @@ module StringToNumber
       # Sort keys by length (longest first) to ensure longer matches are preferred
       # This prevents "cent" from matching before "cents" in "cinq cents"
       sorted_keys = POWERS_OF_TEN.keys.reject { |k| %w[un dix].include?(k) }.sort_by(&:length).reverse
-      @keys = sorted_keys.join('|')  # Create regex alternation pattern
+      @keys = sorted_keys.join('|') # Create regex alternation pattern
       # Normalize input to lowercase for case-insensitive matching
       @sentence = sentence&.downcase || ''
     end
@@ -133,10 +133,10 @@ module StringToNumber
     def extract(sentence, keys, detail: false)
       # Base cases: handle empty/nil input
       return 0 if sentence.nil? || sentence.empty?
       # Ensure case-insensitive matching
       sentence = sentence.downcase
       # Direct lookup for simple cases (e.g., "vingt" -> 20)
       return EXCEPTIONS[sentence] unless EXCEPTIONS[sentence].nil?
@@ -146,19 +146,19 @@ module StringToNumber
       #   (?<f>.*?) - Non-greedy capture of factor part (before multiplier)
       #   \s?       - Optional space
       #   (?<m>#{keys}) - Named capture of multiplier from keys pattern
-      if result = /(?<f>.*?)\s?(?<m>#{keys})/.match(sentence)
+      if (result = /(?<f>.*?)\s?(?<m>#{keys})/.match(sentence))
         # Remove the matched portion from sentence for further processing
-        sentence.gsub!($&, '') if $&
+        sentence.gsub!(::Regexp.last_match(0), '') if ::Regexp.last_match(0)
         # Parse the factor part (number before the multiplier)
         # Example: "cinq" -> 5, "deux cent" -> 200
         factor = EXCEPTIONS[result[:f]] || match(result[:f])
         # Handle implicit factor of 1 for standalone multipliers
         # Example: "million" -> factor=1, but only for top-level calls
         # For recursive calls (detail=true), keep factor as 0 to avoid double-counting
         factor = 1 if factor.zero? && !detail
         # Calculate the multiplier value (10^exponent)
         # Example: "cents" -> 10^2 = 100, "millions" -> 10^6 = 1,000,000
         multiple_of_ten = 10**(POWERS_OF_TEN[result[:m]] || 0)
@@ -192,17 +192,17 @@ module StringToNumber
         # Final calculation: process any remaining sentence + current factor*multiplier
         # Example: For "trois millions cinq cents", this handles the "cinq cents" part
-        return extract(sentence, keys) + factor * multiple_of_ten
+        extract(sentence, keys) + (factor * multiple_of_ten)
       # Special case handling for "quatre-vingt" variations
       # This complex regex handles the irregular French "eighty" patterns:
       # - "quatre-vingt" / "quatre vingts" (with/without 's')
       # - "quatre-vingt-dix" / "quatre vingts dix" (90)
       # - Space vs hyphen variations
-      elsif m = /(quatre(-|\s)vingt(s?)((-|\s)dix)?)((-|\s)?)(\w*)/.match(sentence)
+      elsif (m = /(quatre(-|\s)vingt(s?)((-|\s)dix)?)((-|\s)?)(\w*)/.match(sentence))
         # Normalize spacing to hyphens for consistent lookup
         normalize_str = m[1].tr(' ', '-')
         # Remove trailing 's' from "quatre-vingts" if present
         # Bug fix: use [-1] instead of [length] for last character
         normalize_str = normalize_str[0...-1] if normalize_str[-1] == 's'
@@ -212,11 +212,11 @@ module StringToNumber
         # Return sum of: remaining sentence + normalized quatre-vingt value + any suffix
         # Example: "quatre-vingt-cinq" -> EXCEPTIONS["quatre-vingt"] + EXCEPTIONS["cinq"]
-        return extract(sentence, keys) +
-               EXCEPTIONS[normalize_str] + (EXCEPTIONS[m[8]] || 0)
+        extract(sentence, keys) +
+          EXCEPTIONS[normalize_str] + (EXCEPTIONS[m[8]] || 0)
       else
         # Fallback: use match() method for simple word combinations
-        return match(sentence)
+        match(sentence)
       end
     end
@@ -229,11 +229,11 @@ module StringToNumber
       # Process words in reverse order for proper French number logic
       # Example: "vingt et un" -> ["un", "et", "vingt"] -> 1 + 0 + 20 = 21
-      sentence.downcase.tr('-', ' ').split(' ').reverse.sum do |word|
+      sentence.downcase.tr('-', ' ').split.reverse.sum do |word|
         # Handle French "et" (and) conjunction by ignoring it in calculations
         # Example: "vingt et un" -> ignore "et", sum "vingt" + "un"
         next 0 if word == 'et'
         # Look up word value in either EXCEPTIONS or POWERS_OF_TEN
         if EXCEPTIONS[word].nil? && POWERS_OF_TEN[word].nil?
           # Unknown words contribute 0 to the sum
@@ -241,8 +241,8 @@ module StringToNumber
         else
           # Use EXCEPTIONS value if available, otherwise use 10 * power_of_ten
           # Example: "dix" -> EXCEPTIONS["dix"] = 10
-          #          "cent" -> 10 * POWERS_OF_TEN["cent"] = 10 * 2 = 100
-          (EXCEPTIONS[word] || (10 * POWERS_OF_TEN[word]))
+          #          "cent" -> 10 * POWERS_OF_TEN["cent"] = 10 * 2 = 100
+          EXCEPTIONS[word] || (10 * POWERS_OF_TEN[word])
         end
       end
     end

data/lib/string_to_number/version.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module StringToNumber
-  VERSION = '0.2.0'.freeze
+  VERSION = '0.2.1'
 end

data/lib/string_to_number.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require 'string_to_number/version'
 # Load original implementation first for constant definitions
@@ -78,20 +80,20 @@ module StringToNumber
     #
     def valid_french_number?(text)
       return false unless text.respond_to?(:to_s)
       normalized = text.to_s.downcase.strip
       return false if normalized.empty?
       # Check if any words are recognized French number words
       words = normalized.tr('-', ' ').split(/\s+/)
       recognized_words = words.count do |word|
-        word == 'et' ||
-        Parser::WORD_VALUES.key?(word) ||
-        Parser::MULTIPLIERS.key?(word)
+        word == 'et' ||
+          Parser::WORD_VALUES.key?(word) ||
+          Parser::MULTIPLIERS.key?(word)
       end
       # Require at least 50% recognized words for validation
       recognized_words.to_f / words.size >= 0.5
     end
   end
-end
+end

data/logo.png ADDED Viewed

Binary file

data/microbenchmark.rb CHANGED Viewed

@@ -9,10 +9,10 @@ require 'benchmark'
 class MicroBenchmark
   def self.run
-    puts "StringToNumber Micro-Benchmarks"
-    puts "=" * 50
+    puts 'StringToNumber Micro-Benchmarks'
+    puts '=' * 50
     puts
     # Test individual components
     test_initialization
     test_regex_compilation
@@ -20,43 +20,45 @@ class MicroBenchmark
     test_hash_lookups
     test_string_operations
     test_recursion_overhead
     puts "\nConclusions and Recommendations:"
-    puts "=" * 50
+    puts '=' * 50
     analyze_results
   end
   def self.test_initialization
-    puts "1. Initialization Performance"
-    puts "-" * 30
+    puts '1. Initialization Performance'
+    puts '-' * 30
     # Test the cost of creating new instances
     sentences = ['un', 'vingt et un', 'mille deux cent', 'trois milliards cinq cents millions']
     sentences.each do |sentence|
       time = Benchmark.realtime do
         1000.times { StringToNumber::ToNumber.new(sentence) }
       end
       puts "#{sentence.ljust(35)}: #{(time * 1000).round(4)}ms per 1000 instances"
     end
     puts
   end
   def self.test_regex_compilation
-    puts "2. Regex Compilation Performance"
-    puts "-" * 30
+    puts '2. Regex Compilation Performance'
+    puts '-' * 30
     # Test the cost of regex compilation vs pre-compiled regex
-    keys = StringToNumber::ToNumber::POWERS_OF_TEN.keys.reject { |k| %w[un dix].include?(k) }.sort_by(&:length).reverse.join('|')
+    keys = StringToNumber::ToNumber::POWERS_OF_TEN.keys.reject do |k|
+      %w[un dix].include?(k)
+    end.sort_by(&:length).reverse.join('|')
     # Dynamic compilation
     dynamic_time = Benchmark.realtime do
       1000.times do
         /(?<f>.*?)\s?(?<m>#{keys})/.match('trois milliards')
       end
     end
     # Pre-compiled regex
     compiled_regex = /(?<f>.*?)\s?(?<m>#{Regexp.escape(keys)})/
     precompiled_time = Benchmark.realtime do
@@ -64,7 +66,7 @@ class MicroBenchmark
         compiled_regex.match('trois milliards')
       end
     end
     puts "Dynamic regex compilation: #{(dynamic_time * 1000).round(4)}ms per 1000 matches"
     puts "Pre-compiled regex:        #{(precompiled_time * 1000).round(4)}ms per 1000 matches"
     puts "Compilation overhead:      #{((dynamic_time - precompiled_time) * 1000).round(4)}ms per 1000 matches"
@@ -72,62 +74,64 @@ class MicroBenchmark
   end
   def self.test_regex_matching
-    puts "3. Regex Pattern Complexity"
-    puts "-" * 30
+    puts '3. Regex Pattern Complexity'
+    puts '-' * 30
     # Test different regex patterns to see which are expensive
     test_patterns = {
       'Simple word match' => /vingt/,
       'Word boundary match' => /\bvingt\b/,
       'Named capture groups' => /(?<f>.*?)\s?(?<m>vingt)/,
       'Complex alternation' => /(?<f>.*?)\s?(?<m>vingt|trente|quarante|cinquante)/,
-      'Full keys pattern' => /(?<f>.*?)\s?(?<m>#{StringToNumber::ToNumber::POWERS_OF_TEN.keys.reject { |k| %w[un dix].include?(k) }.sort_by(&:length).reverse.join('|')})/
+      'Full keys pattern' => /(?<f>.*?)\s?(?<m>#{StringToNumber::ToNumber::POWERS_OF_TEN.keys.reject do |k|
+        %w[un dix].include?(k)
+      end.sort_by(&:length).reverse.join('|')})/
     }
     test_string = 'trois milliards cinq cents millions'
     test_patterns.each do |name, pattern|
       time = Benchmark.realtime do
         5000.times { pattern.match(test_string) }
       end
       puts "#{name.ljust(25)}: #{(time * 1000).round(4)}ms per 5000 matches"
     end
     puts
   end
   def self.test_hash_lookups
-    puts "4. Hash Lookup Performance"
-    puts "-" * 30
+    puts '4. Hash Lookup Performance'
+    puts '-' * 30
     exceptions = StringToNumber::ToNumber::EXCEPTIONS
     powers = StringToNumber::ToNumber::POWERS_OF_TEN
     # Test lookup performance
     exceptions_time = Benchmark.realtime do
-      10000.times do
+      10_000.times do
         exceptions['vingt']
         exceptions['trois']
         exceptions['cent']
       end
     end
     powers_time = Benchmark.realtime do
-      10000.times do
+      10_000.times do
         powers['million']
         powers['mille']
         powers['cent']
       end
     end
     # Test nil checks
     nil_check_time = Benchmark.realtime do
-      10000.times do
+      10_000.times do
         exceptions['nonexistent'].nil?
         powers['nonexistent'].nil?
       end
     end
     puts "EXCEPTIONS hash lookups:   #{(exceptions_time * 100).round(4)}ms per 10000 lookups"
     puts "POWERS_OF_TEN hash lookups: #{(powers_time * 100).round(4)}ms per 10000 lookups"
     puts "Nil check operations:      #{(nil_check_time * 100).round(4)}ms per 10000 checks"
@@ -135,28 +139,28 @@ class MicroBenchmark
   end
   def self.test_string_operations
-    puts "5. String Operations Performance"
-    puts "-" * 30
+    puts '5. String Operations Performance'
+    puts '-' * 30
     test_string = 'TROIS MILLIARDS CINQ CENTS MILLIONS'
     # Test different string operations
     downcase_time = Benchmark.realtime do
       5000.times { test_string.downcase }
     end
     gsub_time = Benchmark.realtime do
-      5000.times { test_string.gsub(/MILLIONS/, '') }
+      5000.times { test_string.gsub('MILLIONS', '') }
     end
     split_time = Benchmark.realtime do
-      5000.times { test_string.split(' ') }
+      5000.times { test_string.split }
     end
     tr_time = Benchmark.realtime do
       5000.times { test_string.tr('-', ' ') }
     end
     puts "String#downcase:  #{(downcase_time * 1000).round(4)}ms per 5000 operations"
     puts "String#gsub:      #{(gsub_time * 1000).round(4)}ms per 5000 operations"
     puts "String#split:     #{(split_time * 1000).round(4)}ms per 5000 operations"
@@ -165,29 +169,28 @@ class MicroBenchmark
   end
   def self.test_recursion_overhead
-    puts "6. Recursion vs Iteration Performance"
-    puts "-" * 30
+    puts '6. Recursion vs Iteration Performance'
+    puts '-' * 30
     # Compare recursive vs iterative approaches
-    def self.recursive_sum(arr, index = 0)
+    recursive_sum = lambda do |arr, index = 0|
       return 0 if index >= arr.length
-      arr[index] + recursive_sum(arr, index + 1)
-    end
-    def self.iterative_sum(arr)
-      arr.sum
+      arr[index] + recursive_sum.call(arr, index + 1)
     end
+    iterative_sum = :sum.to_proc
     test_array = Array.new(100) { rand(100) }
     recursive_time = Benchmark.realtime do
-      1000.times { recursive_sum(test_array) }
+      1000.times { recursive_sum.call(test_array) }
     end
     iterative_time = Benchmark.realtime do
-      1000.times { iterative_sum(test_array) }
+      1000.times { iterative_sum.call(test_array) }
     end
     puts "Recursive approach: #{(recursive_time * 1000).round(4)}ms per 1000 operations"
     puts "Iterative approach: #{(iterative_time * 1000).round(4)}ms per 1000 operations"
     puts "Recursion overhead: #{((recursive_time - iterative_time) * 1000).round(4)}ms per 1000 operations"
@@ -195,32 +198,30 @@ class MicroBenchmark
   end
   def self.analyze_results
-    puts "Key Performance Insights:"
+    puts 'Key Performance Insights:'
     puts
-    puts "1. 🔍 INITIALIZATION COST:"
-    puts "   - Creating new ToNumber instances is expensive (~13ms per 1000)"
-    puts "   - Consider caching or singleton pattern for repeated use"
+    puts '1. 🔍 INITIALIZATION COST:'
+    puts '   - Creating new ToNumber instances is expensive (~13ms per 1000)'
+    puts '   - Consider caching or singleton pattern for repeated use'
     puts
-    puts "2. 🔍 REGEX COMPLEXITY:"
-    puts "   - Complex alternation patterns are the main bottleneck"
-    puts "   - Keys pattern is 521 characters long - very expensive to match"
-    puts "   - Consider breaking down into simpler patterns or using different approach"
+    puts '2. 🔍 REGEX COMPLEXITY:'
+    puts '   - Complex alternation patterns are the main bottleneck'
+    puts '   - Keys pattern is 521 characters long - very expensive to match'
+    puts '   - Consider breaking down into simpler patterns or using different approach'
     puts
-    puts "3. 🔍 SCALABILITY ISSUES:"
-    puts "   - Performance degrades significantly with input length (43x for longest)"
-    puts "   - Recursive parsing creates overhead for complex numbers"
-    puts "   - String operations add up with multiple passes"
+    puts '3. 🔍 SCALABILITY ISSUES:'
+    puts '   - Performance degrades significantly with input length (43x for longest)'
+    puts '   - Recursive parsing creates overhead for complex numbers'
+    puts '   - String operations add up with multiple passes'
     puts
-    puts "📊 OPTIMIZATION RECOMMENDATIONS:"
-    puts "   1. Pre-compile regex patterns in class constants"
-    puts "   2. Use simpler regex patterns with multiple passes if needed"
-    puts "   3. Implement caching for repeated conversions"
-    puts "   4. Consider iterative parsing instead of recursive for complex cases"
-    puts "   5. Optimize string operations (minimize downcase/gsub calls)"
+    puts '📊 OPTIMIZATION RECOMMENDATIONS:'
+    puts '   1. Pre-compile regex patterns in class constants'
+    puts '   2. Use simpler regex patterns with multiple passes if needed'
+    puts '   3. Implement caching for repeated conversions'
+    puts '   4. Consider iterative parsing instead of recursive for complex cases'
+    puts '   5. Optimize string operations (minimize downcase/gsub calls)'
   end
 end
 # Run the micro-benchmarks
-if __FILE__ == $0
-  MicroBenchmark.run
-end
+MicroBenchmark.run if __FILE__ == $PROGRAM_NAME

data/performance_comparison.rb CHANGED Viewed

@@ -16,20 +16,20 @@ class PerformanceComparison
   ].freeze
   def self.run_comparison
-    puts "StringToNumber Performance Comparison"
-    puts "=" * 60
-    puts "Original vs Optimized Implementation"
-    puts "=" * 60
+    puts 'StringToNumber Performance Comparison'
+    puts '=' * 60
+    puts 'Original vs Optimized Implementation'
+    puts '=' * 60
     puts
     TEST_CASES.each_with_index do |test_case, index|
       puts "Test #{index + 1}: '#{test_case}'"
-      puts "-" * 50
+      puts '-' * 50
       # Verify both implementations produce same results
       original_result = StringToNumber.in_numbers(test_case, use_optimized: false)
       optimized_result = StringToNumber.in_numbers(test_case, use_optimized: true)
       if original_result == optimized_result
         puts "✅ Results match: #{original_result}"
       else
@@ -38,7 +38,7 @@ class PerformanceComparison
       end
       # Benchmark both implementations
-      iterations = 10000
+      iterations = 10_000
       original_time = Benchmark.realtime do
         iterations.times { StringToNumber.in_numbers(test_case, use_optimized: false) }
@@ -55,27 +55,27 @@ class PerformanceComparison
       puts "Original:  #{original_avg.round(4)}ms average"
       puts "Optimized: #{optimized_avg.round(4)}ms average"
       puts "Speedup:   #{speedup.round(1)}x faster"
       # Performance rating
       rating = case speedup
-               when 0..2 then "🟡 Minor improvement"
-               when 2..10 then "🟢 Good improvement"
-               when 10..50 then "🟢 Great improvement"
-               else "🚀 Exceptional improvement"
+               when 0..2 then '🟡 Minor improvement'
+               when 2..10 then '🟢 Good improvement'
+               when 10..50 then '🟢 Great improvement'
+               else '🚀 Exceptional improvement'
                end
       puts "Rating:    #{rating}"
       puts
     end
     # Overall comparison
-    puts "=" * 60
-    puts "OVERALL PERFORMANCE ANALYSIS"
-    puts "=" * 60
+    puts '=' * 60
+    puts 'OVERALL PERFORMANCE ANALYSIS'
+    puts '=' * 60
     # Test cache performance
     puts "\nCache Performance Test:"
-    puts "-" * 30
+    puts '-' * 30
     # Clear caches
     StringToNumber.clear_caches!
@@ -107,17 +107,17 @@ class PerformanceComparison
     # Scalability test
     puts "\nScalability Comparison:"
-    puts "-" * 30
+    puts '-' * 30
     scalability_tests = [
       'un',                                                           # 2 chars
       'vingt et un',                                                  # 11 chars
-      'mille deux cent trente-quatre',                               # 29 chars
+      'mille deux cent trente-quatre', # 29 chars
       'soixante-quinze million trois cent quarante six mille sept cent quatre-vingt-dix neuf' # 85 chars
     ]
-    puts "Input Length | Original | Optimized | Improvement"
-    puts "-------------|----------|-----------|------------"
+    puts 'Input Length | Original | Optimized | Improvement'
+    puts '-------------|----------|-----------|------------'
     scalability_tests.each do |test|
       original_time = Benchmark.realtime do
@@ -132,24 +132,23 @@ class PerformanceComparison
       optimized_ms = (optimized_time / 1000) * 1000
       improvement = original_ms / optimized_ms
-      puts "#{test.length.to_s.rjust(11)} | #{original_ms.round(4).to_s.rjust(8)} | #{optimized_ms.round(4).to_s.rjust(9)} | #{improvement.round(1).to_s.rjust(10)}x"
+      puts "#{test.length.to_s.rjust(11)} | #{original_ms.round(4).to_s.rjust(8)} | " \
+           "#{optimized_ms.round(4).to_s.rjust(9)} | #{improvement.round(1).to_s.rjust(10)}x"
     end
-    puts "\n" + "=" * 60
-    puts "SUMMARY"
-    puts "=" * 60
-    puts "✅ All test cases produce identical results"
-    puts "🚀 Significant performance improvements across all test cases"
-    puts "📈 Better scalability with input length"
-    puts "💾 Effective caching reduces repeated conversion time"
-    puts "🧠 Lower memory usage and object creation"
+    puts "\n#{'=' * 60}"
+    puts 'SUMMARY'
+    puts '=' * 60
+    puts '✅ All test cases produce identical results'
+    puts '🚀 Significant performance improvements across all test cases'
+    puts '📈 Better scalability with input length'
+    puts '💾 Effective caching reduces repeated conversion time'
+    puts '🧠 Lower memory usage and object creation'
     puts
-    puts "The optimized implementation successfully addresses all identified"
-    puts "performance bottlenecks while maintaining full compatibility."
+    puts 'The optimized implementation successfully addresses all identified'
+    puts 'performance bottlenecks while maintaining full compatibility.'
   end
 end
 # Run the comparison
-if __FILE__ == $0
-  PerformanceComparison.run_comparison
-end
+PerformanceComparison.run_comparison if __FILE__ == $PROGRAM_NAME