RubyGems - lucarecord - Versions diffs - 0.2.25 → 0.2.26 - Mend

lucarecord 0.2.25 → 0.2.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 1982b60b00eddc3d201368a2f436835fa51c6ba58538de3fcd0ce962c4529246
-  data.tar.gz: 894ef1778f5a8be1f2091575ec86e84ceb61d0d158ba0cb4b8bc297c7070593a
+  metadata.gz: 526cf95ca548f5f2b4617f0af76337fa1d9a05896b212a4468f2d68db4b3ab03
+  data.tar.gz: 203299e63b9835ca1da312059df927cc8d65d81bea25d299fd08cf1d5afca0fa
 SHA512:
-  metadata.gz: 396bfbd54619361753b576fdda9f715b115ab274df10dce07abb1a5f180ed161946c72bae02c4926ff05d828e20af6920c689a412c71234c5baef072f870b528
-  data.tar.gz: 633d8920f547c893b2b1ac6645bdd108439d3568cfa3ec738c70ec61f1f658374e598bce0144a5a85142a15e4353d102e35b48ea84988f4dae06b0658a3952f7
+  metadata.gz: d53ff1db376c14b4b39f2975dc70ec0cb74a04dc25f7b17da193c532b819f76614399ad019e4101d73acde439bfdef7a0cc6773a8bacda88b0bc22dedeb8c5e8
+  data.tar.gz: 4feed97a4a7b5f7505f6aee51a71c7301606387f3ce2c1bf7086edf1c3bb5e3ceab42b43e7a5daa2576e94ba4ae8e054a3d5ec88d735b5ff47c903ee0a31290a

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,8 @@
+## LucaRecord 0.2.26
+* Support #dig / #search for TSV dictionary
+* Fix: shorten n-gram split factor on search word length < specified factor
 ## LucaRecord 0.2.25
 * Implement `dir_digest()` for data validation.

data/lib/luca_record/dict.rb CHANGED Viewed

@@ -17,10 +17,11 @@ module LucaRecord
       set_driver
     end
-    # Search word with n-gram.
+    # Search code with n-gram word.
     # If dictionary has Hash or Array, it returns [label, options].
     #
     def search(word, default_word = nil, main_key: 'label', options: nil)
+      definitions_lazyload
       res, score = max_score_code(word.gsub(/[[:space:]]/, ''))
       return default_word if score < 0.4
@@ -34,6 +35,12 @@ module LucaRecord
       end
     end
+    # Search with unique code.
+    #
+    def dig(*args)
+      @data.dig(*args)
+    end
     # Separate main item from other options.
     # If options specified as Array of string, it works as safe list filter.
     #
@@ -49,7 +56,6 @@ module LucaRecord
       [obj[main_key], options.compact]
     end
-    #
     # Load CSV with config options
     #
     def load_csv(path)
@@ -58,7 +64,6 @@ module LucaRecord
       end
     end
-    #
     # load dictionary data
     #
     def self.load(file = @filename)
@@ -72,7 +77,6 @@ module LucaRecord
       end
     end
-    #
     # generate dictionary from TSV file. Minimum assumption is as bellows:
     # 1st row is converted symbol.
     #
@@ -101,7 +105,7 @@ module LucaRecord
         puts 'No error detected.'
         nil
       else
-        "Key #{errors.join(', ')} has nil #{target_key}."
+        puts "Key #{errors.join(', ')} has nil #{target_key}."
         errors.count
       end
     end
@@ -109,9 +113,15 @@ module LucaRecord
     private
     def set_driver
-      input = self.class.load(@path)
-      @config = input['config']
-      @definitions = input['definitions']
+      @data = self.class.load(@path)
+      @config = @data['config']
+      @definitions = @data['definitions']
+    end
+    # Build Reverse dictionary for TSV data
+    #
+    def definitions_lazyload
+      @definitions ||= @data.each_with_object({}) { |(k, entry), h| h[entry[:label]] = k if entry[:label] }
     end
     def self.dict_path(filename)
@@ -124,7 +134,7 @@ module LucaRecord
     def max_score_code(str)
       res = @definitions.map do |k, v|
-        [v, match_score(str, k, 3)]
+        [v, match_score(str, k, 2)]
       end
       res.max { |x, y| x[1] <=> y[1] }
     end

data/lib/luca_record/io.rb CHANGED Viewed

@@ -311,6 +311,14 @@ module LucaRecord # :nodoc:
             File.open(subpath, mode) { |f| yield(f, id_set) }
           end
         end
+        # Calculate md5sum with original digest, file content and filename(optional).
+        #
+        def update_digest(digest, str, filename = nil)
+          str = filename.nil? ? str : filename + str
+          content = Digest::MD5.new.update(str).hexdigest
+          Digest::MD5.new.update(digest + content).hexdigest
+        end
       end
       # git object like structure
@@ -433,13 +441,5 @@ module LucaRecord # :nodoc:
         {}
       end
     end
-    # Calculate md5sum with original digest, file content and filename(optional).
-    #
-    def update_digest(digest, str, filename = nil)
-      str = filename.nil? ? str : filename + str
-      content = Digest::MD5.new.update(str).hexdigest
-      Digest::MD5.new.update(digest + content).hexdigest
-    end
   end
 end

data/lib/luca_record/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module LucaRecord
-  VERSION = '0.2.25'
+  VERSION = '0.2.26'
 end

data/lib/luca_support/code.rb CHANGED Viewed

@@ -132,8 +132,9 @@ module LucaSupport
     end
     def match_score(a, b, n = 2)
-      v_a = to_ngram(a, n)
-      v_b = to_ngram(b, n)
+      split_factor = [a.length, b.length, n].min
+      v_a = to_ngram(a, split_factor)
+      v_b = to_ngram(b, split_factor)
       v_a.map { |item| v_b.include?(item) ? 1 : 0 }.sum / v_a.length.to_f
     end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: lucarecord
 version: !ruby/object:Gem::Version
-  version: 0.2.25
+  version: 0.2.26
 platform: ruby
 authors:
 - Chuma Takahiro
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2021-03-13 00:00:00.000000000 Z
+date: 2021-03-15 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mail