mormor 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2293b20144e224eff386a73bc985a03e360dcd9a73afcde01af33ec0b0bc9d32
4
- data.tar.gz: 2e0e577ee7925c5cdc453870152b8d30698371b9c31f65dc1aa0bff3c0475e4a
3
+ metadata.gz: e56bbee95de83605fba20dc4f49fddf100e28a8da1151eb0138d7bf69e3e3093
4
+ data.tar.gz: c1a02c07b147374335616c60078a1ce6167ed12becf20b1a09855467da570649
5
5
  SHA512:
6
- metadata.gz: 63f3bb8643b13a296a8fc3dfecab97dd88f948660b9aed395d9264066edd666137c321a35efeeeda2640099d98641784064495039fa9e3a3339102a2ea6f04ed
7
- data.tar.gz: 3c42a83dfcfbfe52348559d8171ea7d10a5bb5fa4087d90824b9e82d07057cfb9748e5f55e9a586561726106c6e37a25b8ec46d67cd4bf1872118a5704c54beb
6
+ metadata.gz: 15d7965789cec21ef0275d5dd2c018bb6a4c0fe33b5922e176946f7ae3f5723aa11b30476a2825d3840678e531ebb39a95bc457c27f68f84167f1b3f1aefb7e4
7
+ data.tar.gz: f794bd2f339c105e0975990c21e377f6862b91c01ecac679f06128ae816f602277903290111bc7b4425af2a8bb0ff2999ade4aa349a959dddcf3ae56731f32e2
data/Changelog.md ADDED
@@ -0,0 +1,11 @@
1
+ # MorMor changelog
2
+
3
+ ## 0.0.2 / 2022-09-18
4
+
5
+ * Minimum Ruby version is 2.7
6
+ * Fixed bug with `Array.slice!`, affecting German dictionary reading ([@glaszig](https://github.com/glaszig))
7
+ * Improved handling of `utf8` encoding (seems to be common way for Morfologik dictionaries to spell it)
8
+
9
+ ## 0.0.1 / 2019-06-21
10
+
11
+ Initial release
data/exe/mormor-dump CHANGED
@@ -4,4 +4,4 @@ require_relative '../lib/mormor'
4
4
  path = ARGV.shift or abort "Usage: mormor-dump <dictionary>.dict"
5
5
  File.exist?(path) or abort "#{path} does not exist"
6
6
 
7
- MorMor::FSA.new(path).each_sequence(&method(:puts))
7
+ MorMor::FSA.read(path).each_sequence(&method(:puts))
@@ -25,6 +25,9 @@ module MorMor
25
25
  # @private
26
26
  DECODERS = {'SUFFIX' => :suffix, 'PREFIX' => :prefix_suffix}.freeze
27
27
 
28
+ # @private
29
+ ENCODING_ALIASES = {'utf8' => 'UTF-8'}.freeze
30
+
28
31
  # @private
29
32
  attr_reader :fsa
30
33
  # @return [Hash]
@@ -92,7 +95,7 @@ module MorMor
92
95
  # NB: All possible values described in DictionaryAttribute.java
93
96
 
94
97
  # Cache it to be quickly accessible
95
- @encoding = @info.fetch('fsa.dict.encoding')
98
+ @encoding = @info.fetch('fsa.dict.encoding').then { ENCODING_ALIASES.fetch(_1, _1) }
96
99
  @separator = @info.fetch('fsa.dict.separator')
97
100
  @sepbyte = @separator.bytes.first
98
101
 
@@ -102,9 +105,8 @@ module MorMor
102
105
  def read_values(path)
103
106
  File.exist?(path) or fail ArgumentError, "#{path} does not exist"
104
107
  File.read(path).split("\n")
105
- .map { |ln| ln.sub(/\#.*$/, '').strip }.reject(&:empty?)
106
- .map { |ln| ln.split('=', 2) }
107
- .to_h
108
+ .map { _1.sub(/\#.*$/, '').strip }.reject(&:empty?)
109
+ .to_h { _1.split('=', 2) }
108
110
  end
109
111
 
110
112
  def choose_decoder(name)
@@ -115,14 +117,14 @@ module MorMor
115
117
  def suffix(source, encoded)
116
118
  truncate_suf = encoded[0...1].bytes.first.-(65) & 0xff # 65 is 'A'
117
119
  # TODO: If remove == 255, means "remove all"
118
- source[0...source.size - truncate_suf] + encoded[1..-1]
120
+ source[0...source.size - truncate_suf] + encoded[1..]
119
121
  end
120
122
 
121
123
  def prefix_suffix(source, encoded)
122
124
  truncate_pref, truncate_suf = encoded[0...2].bytes.first(2).map { |b| (b - 65) & 0xff } # 65 is 'A'
123
125
  # TODO: If remove == 255, means "remove all"
124
126
 
125
- source[truncate_pref...source.size - truncate_suf] + encoded[2..-1]
127
+ source[truncate_pref...source.size - truncate_suf] + encoded[2..]
126
128
  end
127
129
  end
128
130
  end
@@ -54,7 +54,7 @@ module MorMor
54
54
  arcs_stack.push(fsa.end_node(arc)) unless fsa.terminal_arc?(arc)
55
55
 
56
56
  if fsa.final_arc?(arc)
57
- sequence.slice!(arcs_stack.count)
57
+ sequence.slice!(arcs_stack.count..)
58
58
  return sequence
59
59
  end
60
60
  end
data/lib/mormor/fsa.rb CHANGED
@@ -49,7 +49,7 @@ module MorMor
49
49
  end
50
50
 
51
51
  def each_sequence(from: root_node, &block)
52
- Enumerator.new(self, from).then { |e| block ? e.each(&block) : e }
52
+ Enumerator.new(self, from).then { block ? _1.each(&block) : _1 }
53
53
  end
54
54
 
55
55
  def next_arc(arc)
@@ -67,12 +67,12 @@ module MorMor
67
67
  end
68
68
 
69
69
  def find_arc(node, label)
70
- each_arc(from: node).detect { |a| arc_label(a) == label } || 0
70
+ each_arc(from: node).detect { arc_label(_1) == label } || 0
71
71
  end
72
72
 
73
73
  # Port of FSATraversal.java
74
74
  # Method is left unsplit to leave original algorithm recognizable, hence rubocop:disable's
75
- def match(sequence, node = root_node) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity
75
+ def match(sequence, node = root_node) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
76
76
  return Match.new(:no) if node.zero?
77
77
 
78
78
  sequence.each_with_index do |byte, i|
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mormor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Shepelev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-21 00:00:00.000000000 Z
11
+ date: 2022-09-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: backports
@@ -87,6 +87,7 @@ executables:
87
87
  extensions: []
88
88
  extra_rdoc_files: []
89
89
  files:
90
+ - Changelog.md
90
91
  - LICENSE.txt
91
92
  - README.md
92
93
  - exe/mormor-dump
@@ -108,15 +109,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
108
109
  requirements:
109
110
  - - ">="
110
111
  - !ruby/object:Gem::Version
111
- version: 2.3.0
112
+ version: 2.7.0
112
113
  required_rubygems_version: !ruby/object:Gem::Requirement
113
114
  requirements:
114
115
  - - ">="
115
116
  - !ruby/object:Gem::Version
116
117
  version: '0'
117
118
  requirements: []
118
- rubyforge_project:
119
- rubygems_version: 2.7.7
119
+ rubygems_version: 3.1.6
120
120
  signing_key:
121
121
  specification_version: 4
122
122
  summary: 'Morfologik dictionaries client in pure Ruby: POS tagging & spellcheck'