mormor 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Changelog.md +11 -0
- data/exe/mormor-dump +1 -1
- data/lib/mormor/dictionary.rb +8 -6
- data/lib/mormor/fsa/enumerator.rb +1 -1
- data/lib/mormor/fsa.rb +3 -3
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e56bbee95de83605fba20dc4f49fddf100e28a8da1151eb0138d7bf69e3e3093
|
4
|
+
data.tar.gz: c1a02c07b147374335616c60078a1ce6167ed12becf20b1a09855467da570649
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15d7965789cec21ef0275d5dd2c018bb6a4c0fe33b5922e176946f7ae3f5723aa11b30476a2825d3840678e531ebb39a95bc457c27f68f84167f1b3f1aefb7e4
|
7
|
+
data.tar.gz: f794bd2f339c105e0975990c21e377f6862b91c01ecac679f06128ae816f602277903290111bc7b4425af2a8bb0ff2999ade4aa349a959dddcf3ae56731f32e2
|
data/Changelog.md
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
# MorMor changelog
|
2
|
+
|
3
|
+
## 0.0.2 / 2022-09-18
|
4
|
+
|
5
|
+
* Minimum Ruby version is 2.7
|
6
|
+
* Fixed bug with `Array.slice!`, affecting German dictionary reading ([@glaszig](https://github.com/glaszig))
|
7
|
+
* Improved handling of `utf8` encoding (seems to be common way for Morfologik dictionaries to spell it)
|
8
|
+
|
9
|
+
## 0.0.1 / 2019-06-21
|
10
|
+
|
11
|
+
Initial release
|
data/exe/mormor-dump
CHANGED
data/lib/mormor/dictionary.rb
CHANGED
@@ -25,6 +25,9 @@ module MorMor
|
|
25
25
|
# @private
|
26
26
|
DECODERS = {'SUFFIX' => :suffix, 'PREFIX' => :prefix_suffix}.freeze
|
27
27
|
|
28
|
+
# @private
|
29
|
+
ENCODING_ALIASES = {'utf8' => 'UTF-8'}.freeze
|
30
|
+
|
28
31
|
# @private
|
29
32
|
attr_reader :fsa
|
30
33
|
# @return [Hash]
|
@@ -92,7 +95,7 @@ module MorMor
|
|
92
95
|
# NB: All possible values described in DictionaryAttribute.java
|
93
96
|
|
94
97
|
# Cache it to be quickly accessible
|
95
|
-
@encoding = @info.fetch('fsa.dict.encoding')
|
98
|
+
@encoding = @info.fetch('fsa.dict.encoding').then { ENCODING_ALIASES.fetch(_1, _1) }
|
96
99
|
@separator = @info.fetch('fsa.dict.separator')
|
97
100
|
@sepbyte = @separator.bytes.first
|
98
101
|
|
@@ -102,9 +105,8 @@ module MorMor
|
|
102
105
|
def read_values(path)
|
103
106
|
File.exist?(path) or fail ArgumentError, "#{path} does not exist"
|
104
107
|
File.read(path).split("\n")
|
105
|
-
.map {
|
106
|
-
.
|
107
|
-
.to_h
|
108
|
+
.map { _1.sub(/\#.*$/, '').strip }.reject(&:empty?)
|
109
|
+
.to_h { _1.split('=', 2) }
|
108
110
|
end
|
109
111
|
|
110
112
|
def choose_decoder(name)
|
@@ -115,14 +117,14 @@ module MorMor
|
|
115
117
|
def suffix(source, encoded)
|
116
118
|
truncate_suf = encoded[0...1].bytes.first.-(65) & 0xff # 65 is 'A'
|
117
119
|
# TODO: If remove == 255, means "remove all"
|
118
|
-
source[0...source.size - truncate_suf] + encoded[1
|
120
|
+
source[0...source.size - truncate_suf] + encoded[1..]
|
119
121
|
end
|
120
122
|
|
121
123
|
def prefix_suffix(source, encoded)
|
122
124
|
truncate_pref, truncate_suf = encoded[0...2].bytes.first(2).map { |b| (b - 65) & 0xff } # 65 is 'A'
|
123
125
|
# TODO: If remove == 255, means "remove all"
|
124
126
|
|
125
|
-
source[truncate_pref...source.size - truncate_suf] + encoded[2
|
127
|
+
source[truncate_pref...source.size - truncate_suf] + encoded[2..]
|
126
128
|
end
|
127
129
|
end
|
128
130
|
end
|
data/lib/mormor/fsa.rb
CHANGED
@@ -49,7 +49,7 @@ module MorMor
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def each_sequence(from: root_node, &block)
|
52
|
-
Enumerator.new(self, from).then {
|
52
|
+
Enumerator.new(self, from).then { block ? _1.each(&block) : _1 }
|
53
53
|
end
|
54
54
|
|
55
55
|
def next_arc(arc)
|
@@ -67,12 +67,12 @@ module MorMor
|
|
67
67
|
end
|
68
68
|
|
69
69
|
def find_arc(node, label)
|
70
|
-
each_arc(from: node).detect {
|
70
|
+
each_arc(from: node).detect { arc_label(_1) == label } || 0
|
71
71
|
end
|
72
72
|
|
73
73
|
# Port of FSATraversal.java
|
74
74
|
# Method is left unsplit to leave original algorithm recognizable, hence rubocop:disable's
|
75
|
-
def match(sequence, node = root_node) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
|
75
|
+
def match(sequence, node = root_node) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
76
76
|
return Match.new(:no) if node.zero?
|
77
77
|
|
78
78
|
sequence.each_with_index do |byte, i|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mormor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Shepelev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: backports
|
@@ -87,6 +87,7 @@ executables:
|
|
87
87
|
extensions: []
|
88
88
|
extra_rdoc_files: []
|
89
89
|
files:
|
90
|
+
- Changelog.md
|
90
91
|
- LICENSE.txt
|
91
92
|
- README.md
|
92
93
|
- exe/mormor-dump
|
@@ -108,15 +109,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
108
109
|
requirements:
|
109
110
|
- - ">="
|
110
111
|
- !ruby/object:Gem::Version
|
111
|
-
version: 2.
|
112
|
+
version: 2.7.0
|
112
113
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
114
|
requirements:
|
114
115
|
- - ">="
|
115
116
|
- !ruby/object:Gem::Version
|
116
117
|
version: '0'
|
117
118
|
requirements: []
|
118
|
-
|
119
|
-
rubygems_version: 2.7.7
|
119
|
+
rubygems_version: 3.1.6
|
120
120
|
signing_key:
|
121
121
|
specification_version: 4
|
122
122
|
summary: 'Morfologik dictionaries client in pure Ruby: POS tagging & spellcheck'
|