lucarecord 0.2.25 → 0.2.26
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/luca_record/dict.rb +19 -9
- data/lib/luca_record/io.rb +8 -8
- data/lib/luca_record/version.rb +1 -1
- data/lib/luca_support/code.rb +3 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 526cf95ca548f5f2b4617f0af76337fa1d9a05896b212a4468f2d68db4b3ab03
|
4
|
+
data.tar.gz: 203299e63b9835ca1da312059df927cc8d65d81bea25d299fd08cf1d5afca0fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d53ff1db376c14b4b39f2975dc70ec0cb74a04dc25f7b17da193c532b819f76614399ad019e4101d73acde439bfdef7a0cc6773a8bacda88b0bc22dedeb8c5e8
|
7
|
+
data.tar.gz: 4feed97a4a7b5f7505f6aee51a71c7301606387f3ce2c1bf7086edf1c3bb5e3ceab42b43e7a5daa2576e94ba4ae8e054a3d5ec88d735b5ff47c903ee0a31290a
|
data/CHANGELOG.md
CHANGED
data/lib/luca_record/dict.rb
CHANGED
@@ -17,10 +17,11 @@ module LucaRecord
|
|
17
17
|
set_driver
|
18
18
|
end
|
19
19
|
|
20
|
-
# Search
|
20
|
+
# Search code with n-gram word.
|
21
21
|
# If dictionary has Hash or Array, it returns [label, options].
|
22
22
|
#
|
23
23
|
def search(word, default_word = nil, main_key: 'label', options: nil)
|
24
|
+
definitions_lazyload
|
24
25
|
res, score = max_score_code(word.gsub(/[[:space:]]/, ''))
|
25
26
|
return default_word if score < 0.4
|
26
27
|
|
@@ -34,6 +35,12 @@ module LucaRecord
|
|
34
35
|
end
|
35
36
|
end
|
36
37
|
|
38
|
+
# Search with unique code.
|
39
|
+
#
|
40
|
+
def dig(*args)
|
41
|
+
@data.dig(*args)
|
42
|
+
end
|
43
|
+
|
37
44
|
# Separate main item from other options.
|
38
45
|
# If options specified as Array of string, it works as safe list filter.
|
39
46
|
#
|
@@ -49,7 +56,6 @@ module LucaRecord
|
|
49
56
|
[obj[main_key], options.compact]
|
50
57
|
end
|
51
58
|
|
52
|
-
#
|
53
59
|
# Load CSV with config options
|
54
60
|
#
|
55
61
|
def load_csv(path)
|
@@ -58,7 +64,6 @@ module LucaRecord
|
|
58
64
|
end
|
59
65
|
end
|
60
66
|
|
61
|
-
#
|
62
67
|
# load dictionary data
|
63
68
|
#
|
64
69
|
def self.load(file = @filename)
|
@@ -72,7 +77,6 @@ module LucaRecord
|
|
72
77
|
end
|
73
78
|
end
|
74
79
|
|
75
|
-
#
|
76
80
|
# generate dictionary from TSV file. Minimum assumption is as bellows:
|
77
81
|
# 1st row is converted symbol.
|
78
82
|
#
|
@@ -101,7 +105,7 @@ module LucaRecord
|
|
101
105
|
puts 'No error detected.'
|
102
106
|
nil
|
103
107
|
else
|
104
|
-
"Key #{errors.join(', ')} has nil #{target_key}."
|
108
|
+
puts "Key #{errors.join(', ')} has nil #{target_key}."
|
105
109
|
errors.count
|
106
110
|
end
|
107
111
|
end
|
@@ -109,9 +113,15 @@ module LucaRecord
|
|
109
113
|
private
|
110
114
|
|
111
115
|
def set_driver
|
112
|
-
|
113
|
-
@config =
|
114
|
-
@definitions =
|
116
|
+
@data = self.class.load(@path)
|
117
|
+
@config = @data['config']
|
118
|
+
@definitions = @data['definitions']
|
119
|
+
end
|
120
|
+
|
121
|
+
# Build Reverse dictionary for TSV data
|
122
|
+
#
|
123
|
+
def definitions_lazyload
|
124
|
+
@definitions ||= @data.each_with_object({}) { |(k, entry), h| h[entry[:label]] = k if entry[:label] }
|
115
125
|
end
|
116
126
|
|
117
127
|
def self.dict_path(filename)
|
@@ -124,7 +134,7 @@ module LucaRecord
|
|
124
134
|
|
125
135
|
def max_score_code(str)
|
126
136
|
res = @definitions.map do |k, v|
|
127
|
-
[v, match_score(str, k,
|
137
|
+
[v, match_score(str, k, 2)]
|
128
138
|
end
|
129
139
|
res.max { |x, y| x[1] <=> y[1] }
|
130
140
|
end
|
data/lib/luca_record/io.rb
CHANGED
@@ -311,6 +311,14 @@ module LucaRecord # :nodoc:
|
|
311
311
|
File.open(subpath, mode) { |f| yield(f, id_set) }
|
312
312
|
end
|
313
313
|
end
|
314
|
+
|
315
|
+
# Calculate md5sum with original digest, file content and filename(optional).
|
316
|
+
#
|
317
|
+
def update_digest(digest, str, filename = nil)
|
318
|
+
str = filename.nil? ? str : filename + str
|
319
|
+
content = Digest::MD5.new.update(str).hexdigest
|
320
|
+
Digest::MD5.new.update(digest + content).hexdigest
|
321
|
+
end
|
314
322
|
end
|
315
323
|
|
316
324
|
# git object like structure
|
@@ -433,13 +441,5 @@ module LucaRecord # :nodoc:
|
|
433
441
|
{}
|
434
442
|
end
|
435
443
|
end
|
436
|
-
|
437
|
-
# Calculate md5sum with original digest, file content and filename(optional).
|
438
|
-
#
|
439
|
-
def update_digest(digest, str, filename = nil)
|
440
|
-
str = filename.nil? ? str : filename + str
|
441
|
-
content = Digest::MD5.new.update(str).hexdigest
|
442
|
-
Digest::MD5.new.update(digest + content).hexdigest
|
443
|
-
end
|
444
444
|
end
|
445
445
|
end
|
data/lib/luca_record/version.rb
CHANGED
data/lib/luca_support/code.rb
CHANGED
@@ -132,8 +132,9 @@ module LucaSupport
|
|
132
132
|
end
|
133
133
|
|
134
134
|
def match_score(a, b, n = 2)
|
135
|
-
|
136
|
-
|
135
|
+
split_factor = [a.length, b.length, n].min
|
136
|
+
v_a = to_ngram(a, split_factor)
|
137
|
+
v_b = to_ngram(b, split_factor)
|
137
138
|
|
138
139
|
v_a.map { |item| v_b.include?(item) ? 1 : 0 }.sum / v_a.length.to_f
|
139
140
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lucarecord
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.26
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chuma Takahiro
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mail
|