mormor 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Changelog.md +11 -0
- data/exe/mormor-dump +1 -1
- data/lib/mormor/dictionary.rb +8 -6
- data/lib/mormor/fsa/enumerator.rb +1 -1
- data/lib/mormor/fsa.rb +3 -3
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e56bbee95de83605fba20dc4f49fddf100e28a8da1151eb0138d7bf69e3e3093
|
4
|
+
data.tar.gz: c1a02c07b147374335616c60078a1ce6167ed12becf20b1a09855467da570649
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15d7965789cec21ef0275d5dd2c018bb6a4c0fe33b5922e176946f7ae3f5723aa11b30476a2825d3840678e531ebb39a95bc457c27f68f84167f1b3f1aefb7e4
|
7
|
+
data.tar.gz: f794bd2f339c105e0975990c21e377f6862b91c01ecac679f06128ae816f602277903290111bc7b4425af2a8bb0ff2999ade4aa349a959dddcf3ae56731f32e2
|
data/Changelog.md
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
# MorMor changelog
|
2
|
+
|
3
|
+
## 0.0.2 / 2022-09-18
|
4
|
+
|
5
|
+
* Minimum Ruby version is 2.7
|
6
|
+
* Fixed bug with `Array.slice!`, affecting German dictionary reading ([@glaszig](https://github.com/glaszig))
|
7
|
+
* Improved handling of `utf8` encoding (seems to be common way for Morfologik dictionaries to spell it)
|
8
|
+
|
9
|
+
## 0.0.1 / 2019-06-21
|
10
|
+
|
11
|
+
Initial release
|
data/exe/mormor-dump
CHANGED
data/lib/mormor/dictionary.rb
CHANGED
@@ -25,6 +25,9 @@ module MorMor
|
|
25
25
|
# @private
|
26
26
|
DECODERS = {'SUFFIX' => :suffix, 'PREFIX' => :prefix_suffix}.freeze
|
27
27
|
|
28
|
+
# @private
|
29
|
+
ENCODING_ALIASES = {'utf8' => 'UTF-8'}.freeze
|
30
|
+
|
28
31
|
# @private
|
29
32
|
attr_reader :fsa
|
30
33
|
# @return [Hash]
|
@@ -92,7 +95,7 @@ module MorMor
|
|
92
95
|
# NB: All possible values described in DictionaryAttribute.java
|
93
96
|
|
94
97
|
# Cache it to be quickly accessible
|
95
|
-
@encoding = @info.fetch('fsa.dict.encoding')
|
98
|
+
@encoding = @info.fetch('fsa.dict.encoding').then { ENCODING_ALIASES.fetch(_1, _1) }
|
96
99
|
@separator = @info.fetch('fsa.dict.separator')
|
97
100
|
@sepbyte = @separator.bytes.first
|
98
101
|
|
@@ -102,9 +105,8 @@ module MorMor
|
|
102
105
|
def read_values(path)
|
103
106
|
File.exist?(path) or fail ArgumentError, "#{path} does not exist"
|
104
107
|
File.read(path).split("\n")
|
105
|
-
.map {
|
106
|
-
.
|
107
|
-
.to_h
|
108
|
+
.map { _1.sub(/\#.*$/, '').strip }.reject(&:empty?)
|
109
|
+
.to_h { _1.split('=', 2) }
|
108
110
|
end
|
109
111
|
|
110
112
|
def choose_decoder(name)
|
@@ -115,14 +117,14 @@ module MorMor
|
|
115
117
|
def suffix(source, encoded)
|
116
118
|
truncate_suf = encoded[0...1].bytes.first.-(65) & 0xff # 65 is 'A'
|
117
119
|
# TODO: If remove == 255, means "remove all"
|
118
|
-
source[0...source.size - truncate_suf] + encoded[1
|
120
|
+
source[0...source.size - truncate_suf] + encoded[1..]
|
119
121
|
end
|
120
122
|
|
121
123
|
def prefix_suffix(source, encoded)
|
122
124
|
truncate_pref, truncate_suf = encoded[0...2].bytes.first(2).map { |b| (b - 65) & 0xff } # 65 is 'A'
|
123
125
|
# TODO: If remove == 255, means "remove all"
|
124
126
|
|
125
|
-
source[truncate_pref...source.size - truncate_suf] + encoded[2
|
127
|
+
source[truncate_pref...source.size - truncate_suf] + encoded[2..]
|
126
128
|
end
|
127
129
|
end
|
128
130
|
end
|
data/lib/mormor/fsa.rb
CHANGED
@@ -49,7 +49,7 @@ module MorMor
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def each_sequence(from: root_node, &block)
|
52
|
-
Enumerator.new(self, from).then {
|
52
|
+
Enumerator.new(self, from).then { block ? _1.each(&block) : _1 }
|
53
53
|
end
|
54
54
|
|
55
55
|
def next_arc(arc)
|
@@ -67,12 +67,12 @@ module MorMor
|
|
67
67
|
end
|
68
68
|
|
69
69
|
def find_arc(node, label)
|
70
|
-
each_arc(from: node).detect {
|
70
|
+
each_arc(from: node).detect { arc_label(_1) == label } || 0
|
71
71
|
end
|
72
72
|
|
73
73
|
# Port of FSATraversal.java
|
74
74
|
# Method is left unsplit to leave original algorithm recognizable, hence rubocop:disable's
|
75
|
-
def match(sequence, node = root_node) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
|
75
|
+
def match(sequence, node = root_node) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
76
76
|
return Match.new(:no) if node.zero?
|
77
77
|
|
78
78
|
sequence.each_with_index do |byte, i|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mormor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Shepelev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: backports
|
@@ -87,6 +87,7 @@ executables:
|
|
87
87
|
extensions: []
|
88
88
|
extra_rdoc_files: []
|
89
89
|
files:
|
90
|
+
- Changelog.md
|
90
91
|
- LICENSE.txt
|
91
92
|
- README.md
|
92
93
|
- exe/mormor-dump
|
@@ -108,15 +109,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
108
109
|
requirements:
|
109
110
|
- - ">="
|
110
111
|
- !ruby/object:Gem::Version
|
111
|
-
version: 2.
|
112
|
+
version: 2.7.0
|
112
113
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
114
|
requirements:
|
114
115
|
- - ">="
|
115
116
|
- !ruby/object:Gem::Version
|
116
117
|
version: '0'
|
117
118
|
requirements: []
|
118
|
-
|
119
|
-
rubygems_version: 2.7.7
|
119
|
+
rubygems_version: 3.1.6
|
120
120
|
signing_key:
|
121
121
|
specification_version: 4
|
122
122
|
summary: 'Morfologik dictionaries client in pure Ruby: POS tagging & spellcheck'
|