lucarecord 0.2.25 → 0.2.26

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1982b60b00eddc3d201368a2f436835fa51c6ba58538de3fcd0ce962c4529246
4
- data.tar.gz: 894ef1778f5a8be1f2091575ec86e84ceb61d0d158ba0cb4b8bc297c7070593a
3
+ metadata.gz: 526cf95ca548f5f2b4617f0af76337fa1d9a05896b212a4468f2d68db4b3ab03
4
+ data.tar.gz: 203299e63b9835ca1da312059df927cc8d65d81bea25d299fd08cf1d5afca0fa
5
5
  SHA512:
6
- metadata.gz: 396bfbd54619361753b576fdda9f715b115ab274df10dce07abb1a5f180ed161946c72bae02c4926ff05d828e20af6920c689a412c71234c5baef072f870b528
7
- data.tar.gz: 633d8920f547c893b2b1ac6645bdd108439d3568cfa3ec738c70ec61f1f658374e598bce0144a5a85142a15e4353d102e35b48ea84988f4dae06b0658a3952f7
6
+ metadata.gz: d53ff1db376c14b4b39f2975dc70ec0cb74a04dc25f7b17da193c532b819f76614399ad019e4101d73acde439bfdef7a0cc6773a8bacda88b0bc22dedeb8c5e8
7
+ data.tar.gz: 4feed97a4a7b5f7505f6aee51a71c7301606387f3ce2c1bf7086edf1c3bb5e3ceab42b43e7a5daa2576e94ba4ae8e054a3d5ec88d735b5ff47c903ee0a31290a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## LucaRecord 0.2.26
2
+
3
+ * Support #dig / #search for TSV dictionary
4
+ * Fix: shorten n-gram split factor on search word length < specified factor
5
+
1
6
  ## LucaRecord 0.2.25
2
7
 
3
8
  * Implement `dir_digest()` for data validation.
@@ -17,10 +17,11 @@ module LucaRecord
17
17
  set_driver
18
18
  end
19
19
 
20
- # Search word with n-gram.
20
+ # Search code with n-gram word.
21
21
  # If dictionary has Hash or Array, it returns [label, options].
22
22
  #
23
23
  def search(word, default_word = nil, main_key: 'label', options: nil)
24
+ definitions_lazyload
24
25
  res, score = max_score_code(word.gsub(/[[:space:]]/, ''))
25
26
  return default_word if score < 0.4
26
27
 
@@ -34,6 +35,12 @@ module LucaRecord
34
35
  end
35
36
  end
36
37
 
38
+ # Search with unique code.
39
+ #
40
+ def dig(*args)
41
+ @data.dig(*args)
42
+ end
43
+
37
44
  # Separate main item from other options.
38
45
  # If options specified as Array of string, it works as safe list filter.
39
46
  #
@@ -49,7 +56,6 @@ module LucaRecord
49
56
  [obj[main_key], options.compact]
50
57
  end
51
58
 
52
- #
53
59
  # Load CSV with config options
54
60
  #
55
61
  def load_csv(path)
@@ -58,7 +64,6 @@ module LucaRecord
58
64
  end
59
65
  end
60
66
 
61
- #
62
67
  # load dictionary data
63
68
  #
64
69
  def self.load(file = @filename)
@@ -72,7 +77,6 @@ module LucaRecord
72
77
  end
73
78
  end
74
79
 
75
- #
76
80
  # generate dictionary from TSV file. Minimum assumption is as bellows:
77
81
  # 1st row is converted symbol.
78
82
  #
@@ -101,7 +105,7 @@ module LucaRecord
101
105
  puts 'No error detected.'
102
106
  nil
103
107
  else
104
- "Key #{errors.join(', ')} has nil #{target_key}."
108
+ puts "Key #{errors.join(', ')} has nil #{target_key}."
105
109
  errors.count
106
110
  end
107
111
  end
@@ -109,9 +113,15 @@ module LucaRecord
109
113
  private
110
114
 
111
115
  def set_driver
112
- input = self.class.load(@path)
113
- @config = input['config']
114
- @definitions = input['definitions']
116
+ @data = self.class.load(@path)
117
+ @config = @data['config']
118
+ @definitions = @data['definitions']
119
+ end
120
+
121
+ # Build Reverse dictionary for TSV data
122
+ #
123
+ def definitions_lazyload
124
+ @definitions ||= @data.each_with_object({}) { |(k, entry), h| h[entry[:label]] = k if entry[:label] }
115
125
  end
116
126
 
117
127
  def self.dict_path(filename)
@@ -124,7 +134,7 @@ module LucaRecord
124
134
 
125
135
  def max_score_code(str)
126
136
  res = @definitions.map do |k, v|
127
- [v, match_score(str, k, 3)]
137
+ [v, match_score(str, k, 2)]
128
138
  end
129
139
  res.max { |x, y| x[1] <=> y[1] }
130
140
  end
@@ -311,6 +311,14 @@ module LucaRecord # :nodoc:
311
311
  File.open(subpath, mode) { |f| yield(f, id_set) }
312
312
  end
313
313
  end
314
+
315
+ # Calculate md5sum with original digest, file content and filename(optional).
316
+ #
317
+ def update_digest(digest, str, filename = nil)
318
+ str = filename.nil? ? str : filename + str
319
+ content = Digest::MD5.new.update(str).hexdigest
320
+ Digest::MD5.new.update(digest + content).hexdigest
321
+ end
314
322
  end
315
323
 
316
324
  # git object like structure
@@ -433,13 +441,5 @@ module LucaRecord # :nodoc:
433
441
  {}
434
442
  end
435
443
  end
436
-
437
- # Calculate md5sum with original digest, file content and filename(optional).
438
- #
439
- def update_digest(digest, str, filename = nil)
440
- str = filename.nil? ? str : filename + str
441
- content = Digest::MD5.new.update(str).hexdigest
442
- Digest::MD5.new.update(digest + content).hexdigest
443
- end
444
444
  end
445
445
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LucaRecord
4
- VERSION = '0.2.25'
4
+ VERSION = '0.2.26'
5
5
  end
@@ -132,8 +132,9 @@ module LucaSupport
132
132
  end
133
133
 
134
134
  def match_score(a, b, n = 2)
135
- v_a = to_ngram(a, n)
136
- v_b = to_ngram(b, n)
135
+ split_factor = [a.length, b.length, n].min
136
+ v_a = to_ngram(a, split_factor)
137
+ v_b = to_ngram(b, split_factor)
137
138
 
138
139
  v_a.map { |item| v_b.include?(item) ? 1 : 0 }.sum / v_a.length.to_f
139
140
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lucarecord
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.25
4
+ version: 0.2.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chuma Takahiro
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-03-13 00:00:00.000000000 Z
11
+ date: 2021-03-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mail