mormor 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2293b20144e224eff386a73bc985a03e360dcd9a73afcde01af33ec0b0bc9d32
4
- data.tar.gz: 2e0e577ee7925c5cdc453870152b8d30698371b9c31f65dc1aa0bff3c0475e4a
3
+ metadata.gz: e56bbee95de83605fba20dc4f49fddf100e28a8da1151eb0138d7bf69e3e3093
4
+ data.tar.gz: c1a02c07b147374335616c60078a1ce6167ed12becf20b1a09855467da570649
5
5
  SHA512:
6
- metadata.gz: 63f3bb8643b13a296a8fc3dfecab97dd88f948660b9aed395d9264066edd666137c321a35efeeeda2640099d98641784064495039fa9e3a3339102a2ea6f04ed
7
- data.tar.gz: 3c42a83dfcfbfe52348559d8171ea7d10a5bb5fa4087d90824b9e82d07057cfb9748e5f55e9a586561726106c6e37a25b8ec46d67cd4bf1872118a5704c54beb
6
+ metadata.gz: 15d7965789cec21ef0275d5dd2c018bb6a4c0fe33b5922e176946f7ae3f5723aa11b30476a2825d3840678e531ebb39a95bc457c27f68f84167f1b3f1aefb7e4
7
+ data.tar.gz: f794bd2f339c105e0975990c21e377f6862b91c01ecac679f06128ae816f602277903290111bc7b4425af2a8bb0ff2999ade4aa349a959dddcf3ae56731f32e2
data/Changelog.md ADDED
@@ -0,0 +1,11 @@
1
+ # MorMor changelog
2
+
3
+ ## 0.0.2 / 2022-09-18
4
+
5
+ * Minimum Ruby version is 2.7
6
+ * Fixed bug with `Array.slice!`, affecting German dictionary reading ([@glaszig](https://github.com/glaszig))
7
+ * Improved handling of `utf8` encoding (seems to be common way for Morfologik dictionaries to spell it)
8
+
9
+ ## 0.0.1 / 2019-06-21
10
+
11
+ Initial release
data/exe/mormor-dump CHANGED
@@ -4,4 +4,4 @@ require_relative '../lib/mormor'
4
4
  path = ARGV.shift or abort "Usage: mormor-dump <dictionary>.dict"
5
5
  File.exist?(path) or abort "#{path} does not exist"
6
6
 
7
- MorMor::FSA.new(path).each_sequence(&method(:puts))
7
+ MorMor::FSA.read(path).each_sequence(&method(:puts))
@@ -25,6 +25,9 @@ module MorMor
25
25
  # @private
26
26
  DECODERS = {'SUFFIX' => :suffix, 'PREFIX' => :prefix_suffix}.freeze
27
27
 
28
+ # @private
29
+ ENCODING_ALIASES = {'utf8' => 'UTF-8'}.freeze
30
+
28
31
  # @private
29
32
  attr_reader :fsa
30
33
  # @return [Hash]
@@ -92,7 +95,7 @@ module MorMor
92
95
  # NB: All possible values described in DictionaryAttribute.java
93
96
 
94
97
  # Cache it to be quickly accessible
95
- @encoding = @info.fetch('fsa.dict.encoding')
98
+ @encoding = @info.fetch('fsa.dict.encoding').then { ENCODING_ALIASES.fetch(_1, _1) }
96
99
  @separator = @info.fetch('fsa.dict.separator')
97
100
  @sepbyte = @separator.bytes.first
98
101
 
@@ -102,9 +105,8 @@ module MorMor
102
105
  def read_values(path)
103
106
  File.exist?(path) or fail ArgumentError, "#{path} does not exist"
104
107
  File.read(path).split("\n")
105
- .map { |ln| ln.sub(/\#.*$/, '').strip }.reject(&:empty?)
106
- .map { |ln| ln.split('=', 2) }
107
- .to_h
108
+ .map { _1.sub(/\#.*$/, '').strip }.reject(&:empty?)
109
+ .to_h { _1.split('=', 2) }
108
110
  end
109
111
 
110
112
  def choose_decoder(name)
@@ -115,14 +117,14 @@ module MorMor
115
117
  def suffix(source, encoded)
116
118
  truncate_suf = encoded[0...1].bytes.first.-(65) & 0xff # 65 is 'A'
117
119
  # TODO: If remove == 255, means "remove all"
118
- source[0...source.size - truncate_suf] + encoded[1..-1]
120
+ source[0...source.size - truncate_suf] + encoded[1..]
119
121
  end
120
122
 
121
123
  def prefix_suffix(source, encoded)
122
124
  truncate_pref, truncate_suf = encoded[0...2].bytes.first(2).map { |b| (b - 65) & 0xff } # 65 is 'A'
123
125
  # TODO: If remove == 255, means "remove all"
124
126
 
125
- source[truncate_pref...source.size - truncate_suf] + encoded[2..-1]
127
+ source[truncate_pref...source.size - truncate_suf] + encoded[2..]
126
128
  end
127
129
  end
128
130
  end
@@ -54,7 +54,7 @@ module MorMor
54
54
  arcs_stack.push(fsa.end_node(arc)) unless fsa.terminal_arc?(arc)
55
55
 
56
56
  if fsa.final_arc?(arc)
57
- sequence.slice!(arcs_stack.count)
57
+ sequence.slice!(arcs_stack.count..)
58
58
  return sequence
59
59
  end
60
60
  end
data/lib/mormor/fsa.rb CHANGED
@@ -49,7 +49,7 @@ module MorMor
49
49
  end
50
50
 
51
51
  def each_sequence(from: root_node, &block)
52
- Enumerator.new(self, from).then { |e| block ? e.each(&block) : e }
52
+ Enumerator.new(self, from).then { block ? _1.each(&block) : _1 }
53
53
  end
54
54
 
55
55
  def next_arc(arc)
@@ -67,12 +67,12 @@ module MorMor
67
67
  end
68
68
 
69
69
  def find_arc(node, label)
70
- each_arc(from: node).detect { |a| arc_label(a) == label } || 0
70
+ each_arc(from: node).detect { arc_label(_1) == label } || 0
71
71
  end
72
72
 
73
73
  # Port of FSATraversal.java
74
74
  # Method is left unsplit to leave original algorithm recognizable, hence rubocop:disable's
75
- def match(sequence, node = root_node) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
75
+ def match(sequence, node = root_node) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
76
76
  return Match.new(:no) if node.zero?
77
77
 
78
78
  sequence.each_with_index do |byte, i|
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mormor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Shepelev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-21 00:00:00.000000000 Z
11
+ date: 2022-09-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: backports
@@ -87,6 +87,7 @@ executables:
87
87
  extensions: []
88
88
  extra_rdoc_files: []
89
89
  files:
90
+ - Changelog.md
90
91
  - LICENSE.txt
91
92
  - README.md
92
93
  - exe/mormor-dump
@@ -108,15 +109,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
108
109
  requirements:
109
110
  - - ">="
110
111
  - !ruby/object:Gem::Version
111
- version: 2.3.0
112
+ version: 2.7.0
112
113
  required_rubygems_version: !ruby/object:Gem::Requirement
113
114
  requirements:
114
115
  - - ">="
115
116
  - !ruby/object:Gem::Version
116
117
  version: '0'
117
118
  requirements: []
118
- rubyforge_project:
119
- rubygems_version: 2.7.7
119
+ rubygems_version: 3.1.6
120
120
  signing_key:
121
121
  specification_version: 4
122
122
  summary: 'Morfologik dictionaries client in pure Ruby: POS tagging & spellcheck'