groonga-synonym 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,122 @@
1
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require "datasets"
17
+
18
+ require_relative "synonym"
19
+
20
+ module GroongaSynonym
21
+ class Sudachi
22
+ include Enumerable
23
+
24
+ def initialize
25
+ @dataset = Datasets::SudachiSynonymDictionary.new
26
+ end
27
+
28
+ def each
29
+ return to_enum(__method__) unless block_given?
30
+
31
+ groups = {}
32
+ group_id = nil
33
+ group = nil
34
+ @dataset.each do |synonym|
35
+ if synonym.group_id != group_id
36
+ emit_synonyms(groups, group)
37
+ group_id = synonym.group_id
38
+ group = [synonym]
39
+ else
40
+ group << synonym
41
+ end
42
+ end
43
+ emit_synonyms(groups, group)
44
+ filter_groups(groups) do |term, synonyms|
45
+ yield(term, synonyms)
46
+ end
47
+ end
48
+
49
+ private
50
+ def emit_synonyms(groups, group)
51
+ return if group.nil?
52
+ target_synonyms = group.reject do |synonym|
53
+ synonym.expansion_type == :never
54
+ end
55
+ return if target_synonyms.size <= 1
56
+ target_synonyms.each_with_index do |typical, i|
57
+ next unless typical.expansion_type == :always
58
+ term = typical.notation
59
+ synonyms = []
60
+ target_synonyms.each_with_index do |synonym, j|
61
+ if i == j
62
+ weight = nil
63
+ elsif synonym.lexeme_id == typical.lexeme_id
64
+ weight = 0.8
65
+ else
66
+ weight = 0.6
67
+ end
68
+ synonyms << Synonym.new(synonym.notation, weight)
69
+ end
70
+ # e.g.: 働き手
71
+ if groups.key?(term)
72
+ groups[term] |= synonyms
73
+ else
74
+ groups[term] = synonyms
75
+ end
76
+ end
77
+ end
78
+
79
+ def filter_groups(groups)
80
+ groups.each do |term, synonyms|
81
+ typical_synonym = nil
82
+ other_synonyms = []
83
+ synonyms.each do |synonym|
84
+ if synonym.weight.nil?
85
+ typical_synonym = synonym
86
+ else
87
+ other_synonyms << synonym
88
+ end
89
+ end
90
+ others_sub_synonyms = []
91
+ sub_synonyms = []
92
+ super_synonyms = []
93
+ other_synonyms.each do |synonym|
94
+ is_sub_synonym = other_synonyms.any? do |other_synonym|
95
+ other_synonym != synonym and
96
+ synonym.term.include?(other_synonym.term)
97
+ end
98
+ if is_sub_synonym
99
+ others_sub_synonyms << synonym
100
+ elsif term.include?(synonym.term)
101
+ sub_synonyms << synonym
102
+ elsif synonym.term.include?(term)
103
+ super_synonyms << synonym
104
+ end
105
+ end
106
+ synonyms -= others_sub_synonyms
107
+ synonyms -= super_synonyms
108
+ unless sub_synonyms.empty?
109
+ sorted_sub_synonyms = sub_synonyms.sort_by do |synonym|
110
+ synonym.term.size
111
+ end
112
+ typical_sub_synonym, *other_sub_synonyms = sorted_sub_synonyms
113
+ synonyms -= other_sub_synonyms
114
+ synonyms.delete(typical_synonym)
115
+ synonyms << Synonym.new(typical_synonym.term,
116
+ (1.0 - typical_sub_synonym.weight).round(2))
117
+ end
118
+ yield(term, synonyms)
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,61 @@
1
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ module GroongaSynonym
17
+ class Synonym
18
+ attr_reader :term
19
+ attr_reader :weight
20
+ def initialize(term, weight=nil)
21
+ @term = term
22
+ @weight = weight
23
+ end
24
+
25
+ def to_groonga
26
+ formatted = ""
27
+ if @weight and @weight != 1.0
28
+ formatted << ">" << ("%f" % (@weight - 1)).gsub(/0+\z/, "")
29
+ end
30
+ formatted << escape_term(@term)
31
+ formatted
32
+ end
33
+
34
+ def ==(other)
35
+ other.is_a?(self.class) and
36
+ @term == other.term and
37
+ @weight == other.weight
38
+ end
39
+
40
+ def eql?(other)
41
+ self == other
42
+ end
43
+
44
+ def hash
45
+ [@term, @weight].hash
46
+ end
47
+
48
+ private
49
+ def escape_term(term)
50
+ return "\"#{term}\"" if term == "OR"
51
+ term = term.gsub(/["()\\*:+-]/) do |matched|
52
+ "\\#{matched}"
53
+ end
54
+ if term.include?(" ")
55
+ "\"#{term}\""
56
+ else
57
+ term
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,18 @@
1
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ module GroongaSynonym
17
+ VERSION = "1.0.0"
18
+ end
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: groonga-synonym
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Sutou Kouhei
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-07-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: red-datasets
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.1.3
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.1.3
27
+ description: ''
28
+ email:
29
+ - kou@clear-code.com
30
+ executables:
31
+ - groonga-synonym-generate
32
+ extensions: []
33
+ extra_rdoc_files: []
34
+ files:
35
+ - Gemfile
36
+ - LICENSE.txt
37
+ - README.md
38
+ - Rakefile
39
+ - bin/groonga-synonym-generate
40
+ - groonga-synonym.gemspec
41
+ - lib/groonga-synonym.rb
42
+ - lib/groonga-synonym/command-line/generator.rb
43
+ - lib/groonga-synonym/groonga-generator.rb
44
+ - lib/groonga-synonym/pgroonga-generator.rb
45
+ - lib/groonga-synonym/sudachi.rb
46
+ - lib/groonga-synonym/synonym.rb
47
+ - lib/groonga-synonym/version.rb
48
+ homepage: https://github.com/groonga/groonga-synonym
49
+ licenses:
50
+ - GPL-3.0+
51
+ metadata: {}
52
+ post_install_message:
53
+ rdoc_options: []
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ requirements: []
67
+ rubygems_version: 3.3.0.dev
68
+ signing_key:
69
+ specification_version: 4
70
+ summary: Groonga synonym provides tools for synonym of Groonga families.
71
+ test_files: []