lucarecord 0.2.25 → 0.2.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/luca_record/dict.rb +19 -9
- data/lib/luca_record/io.rb +8 -8
- data/lib/luca_record/version.rb +1 -1
- data/lib/luca_support/code.rb +3 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 526cf95ca548f5f2b4617f0af76337fa1d9a05896b212a4468f2d68db4b3ab03
|
4
|
+
data.tar.gz: 203299e63b9835ca1da312059df927cc8d65d81bea25d299fd08cf1d5afca0fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d53ff1db376c14b4b39f2975dc70ec0cb74a04dc25f7b17da193c532b819f76614399ad019e4101d73acde439bfdef7a0cc6773a8bacda88b0bc22dedeb8c5e8
|
7
|
+
data.tar.gz: 4feed97a4a7b5f7505f6aee51a71c7301606387f3ce2c1bf7086edf1c3bb5e3ceab42b43e7a5daa2576e94ba4ae8e054a3d5ec88d735b5ff47c903ee0a31290a
|
data/CHANGELOG.md
CHANGED
data/lib/luca_record/dict.rb
CHANGED
@@ -17,10 +17,11 @@ module LucaRecord
|
|
17
17
|
set_driver
|
18
18
|
end
|
19
19
|
|
20
|
-
# Search
|
20
|
+
# Search code with n-gram word.
|
21
21
|
# If dictionary has Hash or Array, it returns [label, options].
|
22
22
|
#
|
23
23
|
def search(word, default_word = nil, main_key: 'label', options: nil)
|
24
|
+
definitions_lazyload
|
24
25
|
res, score = max_score_code(word.gsub(/[[:space:]]/, ''))
|
25
26
|
return default_word if score < 0.4
|
26
27
|
|
@@ -34,6 +35,12 @@ module LucaRecord
|
|
34
35
|
end
|
35
36
|
end
|
36
37
|
|
38
|
+
# Search with unique code.
|
39
|
+
#
|
40
|
+
def dig(*args)
|
41
|
+
@data.dig(*args)
|
42
|
+
end
|
43
|
+
|
37
44
|
# Separate main item from other options.
|
38
45
|
# If options specified as Array of string, it works as safe list filter.
|
39
46
|
#
|
@@ -49,7 +56,6 @@ module LucaRecord
|
|
49
56
|
[obj[main_key], options.compact]
|
50
57
|
end
|
51
58
|
|
52
|
-
#
|
53
59
|
# Load CSV with config options
|
54
60
|
#
|
55
61
|
def load_csv(path)
|
@@ -58,7 +64,6 @@ module LucaRecord
|
|
58
64
|
end
|
59
65
|
end
|
60
66
|
|
61
|
-
#
|
62
67
|
# load dictionary data
|
63
68
|
#
|
64
69
|
def self.load(file = @filename)
|
@@ -72,7 +77,6 @@ module LucaRecord
|
|
72
77
|
end
|
73
78
|
end
|
74
79
|
|
75
|
-
#
|
76
80
|
# generate dictionary from TSV file. Minimum assumption is as bellows:
|
77
81
|
# 1st row is converted symbol.
|
78
82
|
#
|
@@ -101,7 +105,7 @@ module LucaRecord
|
|
101
105
|
puts 'No error detected.'
|
102
106
|
nil
|
103
107
|
else
|
104
|
-
"Key #{errors.join(', ')} has nil #{target_key}."
|
108
|
+
puts "Key #{errors.join(', ')} has nil #{target_key}."
|
105
109
|
errors.count
|
106
110
|
end
|
107
111
|
end
|
@@ -109,9 +113,15 @@ module LucaRecord
|
|
109
113
|
private
|
110
114
|
|
111
115
|
def set_driver
|
112
|
-
|
113
|
-
@config =
|
114
|
-
@definitions =
|
116
|
+
@data = self.class.load(@path)
|
117
|
+
@config = @data['config']
|
118
|
+
@definitions = @data['definitions']
|
119
|
+
end
|
120
|
+
|
121
|
+
# Build Reverse dictionary for TSV data
|
122
|
+
#
|
123
|
+
def definitions_lazyload
|
124
|
+
@definitions ||= @data.each_with_object({}) { |(k, entry), h| h[entry[:label]] = k if entry[:label] }
|
115
125
|
end
|
116
126
|
|
117
127
|
def self.dict_path(filename)
|
@@ -124,7 +134,7 @@ module LucaRecord
|
|
124
134
|
|
125
135
|
def max_score_code(str)
|
126
136
|
res = @definitions.map do |k, v|
|
127
|
-
[v, match_score(str, k,
|
137
|
+
[v, match_score(str, k, 2)]
|
128
138
|
end
|
129
139
|
res.max { |x, y| x[1] <=> y[1] }
|
130
140
|
end
|
data/lib/luca_record/io.rb
CHANGED
@@ -311,6 +311,14 @@ module LucaRecord # :nodoc:
|
|
311
311
|
File.open(subpath, mode) { |f| yield(f, id_set) }
|
312
312
|
end
|
313
313
|
end
|
314
|
+
|
315
|
+
# Calculate md5sum with original digest, file content and filename(optional).
|
316
|
+
#
|
317
|
+
def update_digest(digest, str, filename = nil)
|
318
|
+
str = filename.nil? ? str : filename + str
|
319
|
+
content = Digest::MD5.new.update(str).hexdigest
|
320
|
+
Digest::MD5.new.update(digest + content).hexdigest
|
321
|
+
end
|
314
322
|
end
|
315
323
|
|
316
324
|
# git object like structure
|
@@ -433,13 +441,5 @@ module LucaRecord # :nodoc:
|
|
433
441
|
{}
|
434
442
|
end
|
435
443
|
end
|
436
|
-
|
437
|
-
# Calculate md5sum with original digest, file content and filename(optional).
|
438
|
-
#
|
439
|
-
def update_digest(digest, str, filename = nil)
|
440
|
-
str = filename.nil? ? str : filename + str
|
441
|
-
content = Digest::MD5.new.update(str).hexdigest
|
442
|
-
Digest::MD5.new.update(digest + content).hexdigest
|
443
|
-
end
|
444
444
|
end
|
445
445
|
end
|
data/lib/luca_record/version.rb
CHANGED
data/lib/luca_support/code.rb
CHANGED
@@ -132,8 +132,9 @@ module LucaSupport
|
|
132
132
|
end
|
133
133
|
|
134
134
|
def match_score(a, b, n = 2)
|
135
|
-
|
136
|
-
|
135
|
+
split_factor = [a.length, b.length, n].min
|
136
|
+
v_a = to_ngram(a, split_factor)
|
137
|
+
v_b = to_ngram(b, split_factor)
|
137
138
|
|
138
139
|
v_a.map { |item| v_b.include?(item) ? 1 : 0 }.sum / v_a.length.to_f
|
139
140
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lucarecord
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.26
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chuma Takahiro
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-03-
|
11
|
+
date: 2021-03-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mail
|