RubyGems - string_to_number - Versions diffs - 0.2.0 → 0.3.0 - Mend

string_to_number 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/.github/workflows/ci.yml +81 -0
data/.github/workflows/release.yml +62 -0
data/.rubocop.yml +110 -0
data/CLAUDE.md +23 -85
data/Gemfile +9 -0
data/Gemfile.lock +32 -1
data/README.md +53 -163
data/Rakefile +5 -1
data/SECURITY.md +25 -0
data/benchmark.rb +41 -40
data/docs/ARCHITECTURE.md +131 -0
data/docs/demo.gif +0 -0
data/lib/string_to_number/parser.rb +49 -79
data/lib/string_to_number/to_number.rb +21 -22
data/lib/string_to_number/version.rb +3 -1
data/lib/string_to_number.rb +9 -7
data/microbenchmark.rb +81 -80
data/performance_comparison.rb +34 -35
data/profile.rb +44 -45
data/string_to_number.gemspec +5 -6
metadata +15 -51
data/.travis.yml +0 -5
/data/{LICENSE.txt → LICENSE} +0 -0

data/lib/string_to_number/parser.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 module StringToNumber
   # High-performance French text to number parser
-  #
+  #
   # This class provides a clean, optimized implementation that maintains
   # compatibility with the original algorithm while adding significant
   # performance improvements through caching and memoization.
@@ -21,21 +21,21 @@ module StringToNumber
     MULTIPLIERS = StringToNumber::ToNumber::POWERS_OF_TEN.freeze
     # Pre-compiled regex patterns for optimal performance
-    MULTIPLIER_KEYS = MULTIPLIERS.keys.reject { |k| %w[un dix].include?(k) }
+    MULTIPLIER_KEYS = MULTIPLIERS.keys
+                                 .reject { |k| %w[un dix].include?(k) }
                                  .sort_by(&:length).reverse.freeze
-    MULTIPLIER_PATTERN = /(?<f>.*?)\s?(?<m>#{MULTIPLIER_KEYS.join('|')})/
-    QUATRE_VINGT_PATTERN = /(quatre(-|\s)vingt(s?)((-|\s)dix)?)((-|\s)?)(\w*)/
+    MULTIPLIER_PATTERN = /(?<f>.*?)\s?(?<m>#{MULTIPLIER_KEYS.join('|')})/.freeze
+    QUATRE_VINGT_PATTERN = /(?<base>quatre[-\s]vingt(?:s?)(?:[-\s]dix)?)(?:[-\s]?)(?<suffix>\w*)/.freeze
     # Cache configuration
     MAX_CACHE_SIZE = 1000
     private_constant :MAX_CACHE_SIZE
-    # Thread-safe class-level caches
-    @conversion_cache = {}
-    @cache_access_order = []
-    @instance_cache = {}
+    # Thread-safe LRU cache using Hash insertion order (Ruby 1.9+)
+    @cache = {}
+    @cache_hits = 0
+    @cache_lookups = 0
     @cache_mutex = Mutex.new
-    @instance_mutex = Mutex.new
     class << self
       # Convert French text to number using cached parser instance
@@ -45,32 +45,38 @@ module StringToNumber
       # @raise [ArgumentError] if text is not a string
       def convert(text)
         validate_input!(text)
         normalized = normalize_text(text)
         return 0 if normalized.empty?
-        # Check conversion cache first
-        cached_result = get_cached_conversion(normalized)
-        return cached_result if cached_result
+        @cache_mutex.synchronize do
+          @cache_lookups += 1
+          if @cache.key?(normalized)
+            @cache_hits += 1
+            # Delete and reinsert to move to end (most recently used)
+            value = @cache.delete(normalized)
+            @cache[normalized] = value
+            return value
+          end
+        end
+        result = new(normalized).parse_optimized(normalized)
+        @cache_mutex.synchronize do
+          @cache.delete(@cache.first[0]) if @cache.size >= MAX_CACHE_SIZE
+          @cache[normalized] = result
+        end
-        # Get or create parser instance and convert
-        parser = get_cached_instance(normalized)
-        result = parser.parse_optimized(normalized)
-        # Cache the result
-        cache_conversion(normalized, result)
         result
       end
       # Clear all caches
       def clear_caches!
         @cache_mutex.synchronize do
-          @conversion_cache.clear
-          @cache_access_order.clear
-        end
-        @instance_mutex.synchronize do
-          @instance_cache.clear
+          @cache.clear
+          @cache_hits = 0
+          @cache_lookups = 0
         end
       end
@@ -78,10 +84,9 @@ module StringToNumber
       def cache_stats
         @cache_mutex.synchronize do
           {
-            conversion_cache_size: @conversion_cache.size,
+            conversion_cache_size: @cache.size,
             conversion_cache_limit: MAX_CACHE_SIZE,
-            instance_cache_size: @instance_cache.size,
-            cache_hit_ratio: calculate_hit_ratio
+            cache_hit_ratio: @cache_lookups.zero? ? 0.0 : @cache_hits.to_f / @cache_lookups
           }
         end
       end
@@ -95,42 +100,6 @@ module StringToNumber
       def normalize_text(text)
         text.to_s.downcase.strip
       end
-      def get_cached_conversion(normalized_text)
-        @cache_mutex.synchronize do
-          if @conversion_cache.key?(normalized_text)
-            # Update LRU order
-            @cache_access_order.delete(normalized_text)
-            @cache_access_order.push(normalized_text)
-            return @conversion_cache[normalized_text]
-          end
-        end
-        nil
-      end
-      def cache_conversion(normalized_text, result)
-        @cache_mutex.synchronize do
-          # LRU eviction
-          if @conversion_cache.size >= MAX_CACHE_SIZE
-            oldest = @cache_access_order.shift
-            @conversion_cache.delete(oldest)
-          end
-          @conversion_cache[normalized_text] = result
-          @cache_access_order.push(normalized_text)
-        end
-      end
-      def get_cached_instance(normalized_text)
-        @instance_mutex.synchronize do
-          @instance_cache[normalized_text] ||= new(normalized_text)
-        end
-      end
-      def calculate_hit_ratio
-        return 0.0 if @cache_access_order.empty?
-        @conversion_cache.size.to_f / @cache_access_order.size
-      end
     end
     # Initialize parser with normalized text
@@ -147,12 +116,12 @@ module StringToNumber
     # but with performance optimizations
     def parse_optimized(text)
       return 0 if text.nil? || text.empty?
       # Direct lookup (fastest path)
       return WORD_VALUES[text] if WORD_VALUES.key?(text)
       # Use the proven extraction algorithm from the original implementation
-      extract_optimized(text, MULTIPLIER_KEYS.join('|'))
+      extract_optimized(text)
     end
     private
@@ -160,14 +129,14 @@ module StringToNumber
     # Optimized version of the original extract method
     # This maintains the exact logic of the working implementation
     # but with performance improvements
-    def extract_optimized(sentence, keys, detail: false)
+    def extract_optimized(sentence, detail: false)
       return 0 if sentence.nil? || sentence.empty?
       # Direct lookup
       return WORD_VALUES[sentence] if WORD_VALUES.key?(sentence)
       # Main pattern matching using pre-compiled regex
-      if result = MULTIPLIER_PATTERN.match(sentence)
+      if (result = MULTIPLIER_PATTERN.match(sentence))
         # Remove matched portion
         sentence = sentence.gsub(result[0], '') if result[0]
@@ -178,7 +147,7 @@ module StringToNumber
         # Handle compound numbers
         if higher_multiple_exists?(result[:m], sentence)
-          details = extract_optimized(sentence, keys, detail: true)
+          details = extract_optimized(sentence, detail: true)
           factor = (factor * multiple_of_ten) + details[:factor]
           multiple_of_ten = details[:multiple_of_ten]
           sentence = details[:sentence]
@@ -193,19 +162,19 @@ module StringToNumber
           }
         end
-        return extract_optimized(sentence, keys) + factor * multiple_of_ten
+        extract_optimized(sentence) + (factor * multiple_of_ten)
       # Quatre-vingt special handling
-      elsif m = QUATRE_VINGT_PATTERN.match(sentence)
-        normalize_str = m[1].tr(' ', '-')
+      elsif (m = QUATRE_VINGT_PATTERN.match(sentence))
+        normalize_str = m[:base].tr(' ', '-')
         normalize_str = normalize_str[0...-1] if normalize_str[-1] == 's'
         sentence = sentence.gsub(m[0], '')
-        return extract_optimized(sentence, keys) +
-               WORD_VALUES[normalize_str] + (WORD_VALUES[m[8]] || 0)
+        extract_optimized(sentence) +
+          WORD_VALUES[normalize_str] + (WORD_VALUES[m[:suffix]] || 0)
       else
-        return match_optimized(sentence)
+        match_optimized(sentence)
       end
     end
@@ -213,8 +182,9 @@ module StringToNumber
     def match_optimized(sentence)
       return 0 if sentence.nil?
-      sentence.tr('-', ' ').split(' ').reverse.sum do |word|
+      sentence.tr('-', ' ').split.reverse.sum do |word|
         next 0 if word == 'et'
         WORD_VALUES[word] || (MULTIPLIERS[word] ? 10 * MULTIPLIERS[word] : 0)
       end
     end
@@ -227,4 +197,4 @@ module StringToNumber
       end
     end
   end
-end
+end

data/lib/string_to_number/to_number.rb CHANGED Viewed

@@ -47,8 +47,8 @@ module StringToNumber
       'quatre-vingt' => 80,     # Standard French: "four-twenty" (singular)
       'huitante' => 80,         # Swiss French alternative
       'quatre-vingt-dix' => 90, # Standard French: "four-twenty-ten"
-      'quatre-vingts-dix' => 90,# Alternative with plural "vingts"
-      'nonante' => 90           # Belgian/Swiss French alternative
+      'quatre-vingts-dix' => 90, # Alternative with plural "vingts"
+      'nonante' => 90 # Belgian/Swiss French alternative
     }.freeze
     # POWERS_OF_TEN maps French number words to their power of 10 exponents
@@ -100,7 +100,7 @@ module StringToNumber
       'trigintillion' => 93,
       'untrigintillion' => 96,
       'duotrigintillion' => 99,
-      'googol' => 100      # Special case: 10^100
+      'googol' => 100 # Special case: 10^100
     }.freeze
     # Initialize the ToNumber parser with a French sentence
@@ -111,7 +111,7 @@ module StringToNumber
       # Sort keys by length (longest first) to ensure longer matches are preferred
       # This prevents "cent" from matching before "cents" in "cinq cents"
       sorted_keys = POWERS_OF_TEN.keys.reject { |k| %w[un dix].include?(k) }.sort_by(&:length).reverse
-      @keys = sorted_keys.join('|')  # Create regex alternation pattern
+      @keys = sorted_keys.join('|') # Create regex alternation pattern
       # Normalize input to lowercase for case-insensitive matching
       @sentence = sentence&.downcase || ''
     end
@@ -133,10 +133,10 @@ module StringToNumber
     def extract(sentence, keys, detail: false)
       # Base cases: handle empty/nil input
       return 0 if sentence.nil? || sentence.empty?
       # Ensure case-insensitive matching
       sentence = sentence.downcase
       # Direct lookup for simple cases (e.g., "vingt" -> 20)
       return EXCEPTIONS[sentence] unless EXCEPTIONS[sentence].nil?
@@ -146,19 +146,19 @@ module StringToNumber
       #   (?<f>.*?) - Non-greedy capture of factor part (before multiplier)
       #   \s?       - Optional space
       #   (?<m>#{keys}) - Named capture of multiplier from keys pattern
-      if result = /(?<f>.*?)\s?(?<m>#{keys})/.match(sentence)
+      if (result = /(?<f>.*?)\s?(?<m>#{keys})/.match(sentence))
         # Remove the matched portion from sentence for further processing
-        sentence.gsub!($&, '') if $&
+        sentence.gsub!(::Regexp.last_match(0), '') if ::Regexp.last_match(0)
         # Parse the factor part (number before the multiplier)
         # Example: "cinq" -> 5, "deux cent" -> 200
         factor = EXCEPTIONS[result[:f]] || match(result[:f])
         # Handle implicit factor of 1 for standalone multipliers
         # Example: "million" -> factor=1, but only for top-level calls
         # For recursive calls (detail=true), keep factor as 0 to avoid double-counting
         factor = 1 if factor.zero? && !detail
         # Calculate the multiplier value (10^exponent)
         # Example: "cents" -> 10^2 = 100, "millions" -> 10^6 = 1,000,000
         multiple_of_ten = 10**(POWERS_OF_TEN[result[:m]] || 0)
@@ -192,19 +192,18 @@ module StringToNumber
         # Final calculation: process any remaining sentence + current factor*multiplier
         # Example: For "trois millions cinq cents", this handles the "cinq cents" part
-        return extract(sentence, keys) + factor * multiple_of_ten
+        extract(sentence, keys) + (factor * multiple_of_ten)
       # Special case handling for "quatre-vingt" variations
       # This complex regex handles the irregular French "eighty" patterns:
       # - "quatre-vingt" / "quatre vingts" (with/without 's')
       # - "quatre-vingt-dix" / "quatre vingts dix" (90)
       # - Space vs hyphen variations
-      elsif m = /(quatre(-|\s)vingt(s?)((-|\s)dix)?)((-|\s)?)(\w*)/.match(sentence)
+      elsif (m = /(?<base>quatre[-\s]vingt(?:s?)(?:[-\s]dix)?)(?:[-\s]?)(?<suffix>\w*)/.match(sentence))
         # Normalize spacing to hyphens for consistent lookup
-        normalize_str = m[1].tr(' ', '-')
+        normalize_str = m[:base].tr(' ', '-')
         # Remove trailing 's' from "quatre-vingts" if present
-        # Bug fix: use [-1] instead of [length] for last character
         normalize_str = normalize_str[0...-1] if normalize_str[-1] == 's'
         # Remove the matched portion from sentence
@@ -212,11 +211,11 @@ module StringToNumber
         # Return sum of: remaining sentence + normalized quatre-vingt value + any suffix
         # Example: "quatre-vingt-cinq" -> EXCEPTIONS["quatre-vingt"] + EXCEPTIONS["cinq"]
-        return extract(sentence, keys) +
-               EXCEPTIONS[normalize_str] + (EXCEPTIONS[m[8]] || 0)
+        extract(sentence, keys) +
+          EXCEPTIONS[normalize_str] + (EXCEPTIONS[m[:suffix]] || 0)
       else
         # Fallback: use match() method for simple word combinations
-        return match(sentence)
+        match(sentence)
       end
     end
@@ -229,11 +228,11 @@ module StringToNumber
       # Process words in reverse order for proper French number logic
       # Example: "vingt et un" -> ["un", "et", "vingt"] -> 1 + 0 + 20 = 21
-      sentence.downcase.tr('-', ' ').split(' ').reverse.sum do |word|
+      sentence.downcase.tr('-', ' ').split.reverse.sum do |word|
         # Handle French "et" (and) conjunction by ignoring it in calculations
         # Example: "vingt et un" -> ignore "et", sum "vingt" + "un"
         next 0 if word == 'et'
         # Look up word value in either EXCEPTIONS or POWERS_OF_TEN
         if EXCEPTIONS[word].nil? && POWERS_OF_TEN[word].nil?
           # Unknown words contribute 0 to the sum
@@ -241,8 +240,8 @@ module StringToNumber
         else
           # Use EXCEPTIONS value if available, otherwise use 10 * power_of_ten
           # Example: "dix" -> EXCEPTIONS["dix"] = 10
-          #          "cent" -> 10 * POWERS_OF_TEN["cent"] = 10 * 2 = 100
-          (EXCEPTIONS[word] || (10 * POWERS_OF_TEN[word]))
+          #          "cent" -> 10 * POWERS_OF_TEN["cent"] = 10 * 2 = 100
+          EXCEPTIONS[word] || (10 * POWERS_OF_TEN[word])
         end
       end
     end

data/lib/string_to_number/version.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module StringToNumber
-  VERSION = '0.2.0'.freeze
+  VERSION = '0.3.0'
 end

data/lib/string_to_number.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require 'string_to_number/version'
 # Load original implementation first for constant definitions
@@ -78,20 +80,20 @@ module StringToNumber
     #
     def valid_french_number?(text)
       return false unless text.respond_to?(:to_s)
       normalized = text.to_s.downcase.strip
       return false if normalized.empty?
       # Check if any words are recognized French number words
       words = normalized.tr('-', ' ').split(/\s+/)
       recognized_words = words.count do |word|
-        word == 'et' ||
-        Parser::WORD_VALUES.key?(word) ||
-        Parser::MULTIPLIERS.key?(word)
+        word == 'et' ||
+          Parser::WORD_VALUES.key?(word) ||
+          Parser::MULTIPLIERS.key?(word)
       end
       # Require at least 50% recognized words for validation
       recognized_words.to_f / words.size >= 0.5
     end
   end
-end
+end