groonga-synonym 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,21 @@
1
+ # Groonga synonym
2
+
3
+ ## Description
4
+
5
+ Groonga synonym provides tools for synonym of Groonga families.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ gem install groonga-synonym
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```bash
16
+ groonga-synonym-generate --source sudachi --format groonga
17
+ ```
18
+
19
+ ## License
20
+
21
+ GPLv3 or later. See `LICENSE.txt` for details.
data/Rakefile ADDED
@@ -0,0 +1,36 @@
1
+ # -*- ruby -*-
2
+ #
3
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+
18
+ require "rubygems"
19
+ require "bundler/gem_helper"
20
+
21
+ base_dir = File.join(__dir__)
22
+
23
+ helper = Bundler::GemHelper.new(base_dir)
24
+ def helper.version_tag
25
+ version
26
+ end
27
+
28
+ helper.install
29
+ spec = helper.gemspec
30
+
31
+ desc "Run tests"
32
+ task :test do
33
+ ruby("test/run.rb")
34
+ end
35
+
36
+ task default: :test
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+
18
+ require_relative "../lib/groonga-synonym"
19
+
20
+ generator = GroongaSynonym::CommandLine::Generator.new
21
+ exit(generator.run(ARGV))
@@ -0,0 +1,51 @@
1
+ # -*- ruby -*-
2
+ #
3
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+
18
+ clean_white_space = lambda do |entry|
19
+ entry.gsub(/(\A\n+|\n+\z)/, '') + "\n"
20
+ end
21
+
22
+ require_relative "lib/groonga-synonym/version"
23
+
24
+ Gem::Specification.new do |spec|
25
+ spec.name = "groonga-synonym"
26
+ spec.version = GroongaSynonym::VERSION
27
+ spec.homepage = "https://github.com/groonga/groonga-synonym"
28
+ spec.authors = ["Sutou Kouhei"]
29
+ spec.email = ["kou@clear-code.com"]
30
+
31
+ readme = File.read("README.md")
32
+ readme.force_encoding("UTF-8")
33
+ entries = readme.split(/^\#\#\s(.*)$/)
34
+ clean_white_space.call(entries[entries.index("Description") + 1])
35
+ description = clean_white_space.call(entries[entries.index("Description") + 1])
36
+ spec.summary, spec.description, = description.split(/\n\n+/, 3)
37
+ spec.license = "GPL-3.0+"
38
+ spec.files = [
39
+ "README.md",
40
+ "LICENSE.txt",
41
+ "Rakefile",
42
+ "Gemfile",
43
+ "#{spec.name}.gemspec",
44
+ ]
45
+ spec.files += Dir.glob("lib/**/*.rb")
46
+ Dir.chdir("bin") do
47
+ spec.executables = Dir.glob("*")
48
+ end
49
+
50
+ spec.add_runtime_dependency("red-datasets", ">= 0.1.3")
51
+ end
@@ -0,0 +1,20 @@
1
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require_relative "groonga-synonym/command-line/generator"
17
+ require_relative "groonga-synonym/groonga-generator"
18
+ require_relative "groonga-synonym/pgroonga-generator"
19
+ require_relative "groonga-synonym/sudachi"
20
+ require_relative "groonga-synonym/version"
@@ -0,0 +1,176 @@
1
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require "optparse"
17
+
18
+ module GroongaSynonym
19
+ module CommandLine
20
+ class Generator
21
+ AVAILABLE_SOURCES = [
22
+ :sudachi,
23
+ ]
24
+
25
+ AVAILABLE_FORMATS = [
26
+ :groonga,
27
+ :pgroonga,
28
+ ]
29
+
30
+ def initialize(output=nil)
31
+ @source = AVAILABLE_SOURCES.first
32
+ @format = AVAILABLE_FORMATS.first
33
+ @table = nil
34
+ @term_column = nil
35
+ @synonyms_column = nil
36
+ @synonyms_column_is_vector = true
37
+ @output = output || "-"
38
+ @defaults = {
39
+ groonga: {
40
+ table: "Thesaurus",
41
+ term_column: "_key",
42
+ synonyms_column: "synonyms",
43
+ },
44
+ pgroonga: {
45
+ table: "thesaurus",
46
+ term_column: "term",
47
+ synonyms_column: "synonyms",
48
+ },
49
+ }
50
+ end
51
+
52
+ def run(args)
53
+ catch do |tag|
54
+ parse_args(args, tag)
55
+ source = create_source
56
+ open_output do |output|
57
+ generator = create_generator(source, output)
58
+ generator.generate
59
+ true
60
+ end
61
+ end
62
+ end
63
+
64
+ private
65
+ def format_availables(availables)
66
+ "[" + availables.join(", ") + "]"
67
+ end
68
+
69
+ def format_defaults(key)
70
+ AVAILABLE_FORMATS.collect do |format|
71
+ "#{format}: (#{@defaults[format][key]})"
72
+ end
73
+ end
74
+
75
+ def parse_args(args, tag)
76
+ parser = OptionParser.new
77
+ parser.on("--source=SOURCE",
78
+ AVAILABLE_SOURCES,
79
+ "Synonym source",
80
+ format_availables(AVAILABLE_SOURCES),
81
+ "(#{@source})") do |source|
82
+ @source = source
83
+ end
84
+ parser.on("--format=FORMAT",
85
+ AVAILABLE_FORMATS,
86
+ "Output format",
87
+ format_availables(AVAILABLE_FORMATS),
88
+ "(#{@format})") do |format|
89
+ @format = format
90
+ end
91
+ parser.on("--table=TABLE",
92
+ "Synonyms table's name",
93
+ *format_defaults(:table)) do |table|
94
+ @table = table
95
+ end
96
+ parser.on("--term-column=COLUMN",
97
+ "Term column's name",
98
+ *format_defaults(:term_column)) do |column|
99
+ @term_column = column
100
+ end
101
+ parser.on("--synonyms-column=COLUMN",
102
+ "Synonyms column's name",
103
+ *format_defaults(:synonyms_column)) do |column|
104
+ @synonyms_column = column
105
+ end
106
+ parser.on("--no-synonyms-column-is-vector",
107
+ "Synonyms column isn't a vector column",
108
+ "This is only for 'groonga' source") do |boolean|
109
+ @synonyms_column_is_vector = boolean
110
+ end
111
+ parser.on("--output=OUTPUT",
112
+ "Output path",
113
+ "'-' means the standard output",
114
+ "(#{@output})") do |output|
115
+ @output = output
116
+ end
117
+ parser.on("--version",
118
+ "Show version and exit") do
119
+ puts(VERSION)
120
+ throw(tag, true)
121
+ end
122
+ parser.on("--help",
123
+ "Show this message and exit") do
124
+ puts(parser.help)
125
+ throw(tag, true)
126
+ end
127
+ parser.parse!(args.dup)
128
+ end
129
+
130
+ def open_output(&block)
131
+ case @output
132
+ when "-"
133
+ yield($stdout)
134
+ when String
135
+ File.open(@output, "w", &block)
136
+ else
137
+ yield(@output)
138
+ end
139
+ end
140
+
141
+ def create_source
142
+ case @source
143
+ when :sudachi
144
+ Sudachi.new
145
+ end
146
+ end
147
+
148
+ def create_generator(source, output)
149
+ options = {
150
+ output: output,
151
+ }
152
+ case @format
153
+ when :groonga
154
+ default = @defaults[:groonga]
155
+ term_column = @term_column || default[:term_column]
156
+ synonyms_column = @synonyms_column || default[:synonyms_column]
157
+ options[:synonyms_column_is_vector] = @synonyms_column_is_vector
158
+ GroongaGenerator.new(source,
159
+ term_column,
160
+ synonyms_column,
161
+ **options)
162
+ when :pgroonga
163
+ default = @defaults[:pgroonga]
164
+ table = @table || default[:table]
165
+ term_column = @term_column || default[:term_column]
166
+ synonyms_column = @synonyms_column || default[:synonyms_column]
167
+ PGroongaGenerator.new(source,
168
+ table,
169
+ term_column,
170
+ synonyms_column,
171
+ **options)
172
+ end
173
+ end
174
+ end
175
+ end
176
+ end
@@ -0,0 +1,55 @@
1
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require "json"
17
+
18
+ module GroongaSynonym
19
+ class GroongaGenerator
20
+ def initialize(source,
21
+ term_column,
22
+ synonyms_column,
23
+ synonyms_column_is_vector: true,
24
+ output: $stdout)
25
+ @source = source
26
+ @term_column = term_column
27
+ @synonyms_column = synonyms_column
28
+ @synonyms_column_is_vector = synonyms_column_is_vector
29
+ @output = output
30
+ end
31
+
32
+ def generate
33
+ @output.print("[\n")
34
+ @output.print([@term_column, @synonyms_column].to_json)
35
+ @source.each do |term, synonyms|
36
+ @output.print(",\n")
37
+ record = [term]
38
+ formatted_synonyms = synonyms.collect do |synonym|
39
+ formatted_synonym = synonym.to_groonga
40
+ unless @synonyms_column_is_vector
41
+ formatted_synonym = "(#{formatted_synonym})"
42
+ end
43
+ formatted_synonym
44
+ end
45
+ if @synonyms_column_is_vector
46
+ record << formatted_synonyms
47
+ else
48
+ record << formatted_synonyms.join(" OR ")
49
+ end
50
+ @output.print(record.to_json)
51
+ end
52
+ @output.print("\n]\n")
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,59 @@
1
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require "json"
17
+
18
+ module GroongaSynonym
19
+ class PGroongaGenerator
20
+ def initialize(source,
21
+ table,
22
+ term_column,
23
+ synonyms_column,
24
+ output: $stdout)
25
+ @source = source
26
+ @table = table
27
+ @term_column = term_column
28
+ @synonyms_column = synonyms_column
29
+ @output = output
30
+ end
31
+
32
+ def generate
33
+ @output.print("INSERT INTO #{@table} ")
34
+ @output.print("(#{@term_column}, #{@synonyms_column}) ")
35
+ @output.print("VALUES")
36
+ i = 0
37
+ @source.each do |term, synonyms|
38
+ i += 1
39
+ @output.print(",") unless i == 1
40
+ @output.print("\n")
41
+ formatted_synonyms = synonyms.collect do |synonym|
42
+ escape(synonym.to_groonga)
43
+ end
44
+ @output.print(" (#{escape(term)}, ARRAY[")
45
+ @output.print(formatted_synonyms.join(", "))
46
+ @output.print("])")
47
+ end
48
+ @output.print(";\n")
49
+ end
50
+
51
+ private
52
+ def escape(string)
53
+ escaped = "'"
54
+ escaped << string.gsub("'", "''")
55
+ escaped << "'"
56
+ escaped
57
+ end
58
+ end
59
+ end