groonga-synonym 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,21 @@
1
+ # Groonga synonym
2
+
3
+ ## Description
4
+
5
+ Groonga synonym provides tools for synonym of Groonga families.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ gem install groonga-synonym
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```bash
16
+ groonga-synonym-generate --source sudachi --format groonga
17
+ ```
18
+
19
+ ## License
20
+
21
+ GPLv3 or later. See `LICENSE.txt` for details.
data/Rakefile ADDED
@@ -0,0 +1,36 @@
1
+ # -*- ruby -*-
2
+ #
3
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+
18
+ require "rubygems"
19
+ require "bundler/gem_helper"
20
+
21
+ base_dir = File.join(__dir__)
22
+
23
+ helper = Bundler::GemHelper.new(base_dir)
24
+ def helper.version_tag
25
+ version
26
+ end
27
+
28
+ helper.install
29
+ spec = helper.gemspec
30
+
31
+ desc "Run tests"
32
+ task :test do
33
+ ruby("test/run.rb")
34
+ end
35
+
36
+ task default: :test
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+
18
+ require_relative "../lib/groonga-synonym"
19
+
20
+ generator = GroongaSynonym::CommandLine::Generator.new
21
+ exit(generator.run(ARGV))
@@ -0,0 +1,51 @@
1
+ # -*- ruby -*-
2
+ #
3
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+
18
+ clean_white_space = lambda do |entry|
19
+ entry.gsub(/(\A\n+|\n+\z)/, '') + "\n"
20
+ end
21
+
22
+ require_relative "lib/groonga-synonym/version"
23
+
24
+ Gem::Specification.new do |spec|
25
+ spec.name = "groonga-synonym"
26
+ spec.version = GroongaSynonym::VERSION
27
+ spec.homepage = "https://github.com/groonga/groonga-synonym"
28
+ spec.authors = ["Sutou Kouhei"]
29
+ spec.email = ["kou@clear-code.com"]
30
+
31
+ readme = File.read("README.md")
32
+ readme.force_encoding("UTF-8")
33
+ entries = readme.split(/^\#\#\s(.*)$/)
34
+ clean_white_space.call(entries[entries.index("Description") + 1])
35
+ description = clean_white_space.call(entries[entries.index("Description") + 1])
36
+ spec.summary, spec.description, = description.split(/\n\n+/, 3)
37
+ spec.license = "GPL-3.0+"
38
+ spec.files = [
39
+ "README.md",
40
+ "LICENSE.txt",
41
+ "Rakefile",
42
+ "Gemfile",
43
+ "#{spec.name}.gemspec",
44
+ ]
45
+ spec.files += Dir.glob("lib/**/*.rb")
46
+ Dir.chdir("bin") do
47
+ spec.executables = Dir.glob("*")
48
+ end
49
+
50
+ spec.add_runtime_dependency("red-datasets", ">= 0.1.3")
51
+ end
@@ -0,0 +1,20 @@
1
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require_relative "groonga-synonym/command-line/generator"
17
+ require_relative "groonga-synonym/groonga-generator"
18
+ require_relative "groonga-synonym/pgroonga-generator"
19
+ require_relative "groonga-synonym/sudachi"
20
+ require_relative "groonga-synonym/version"
@@ -0,0 +1,176 @@
1
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require "optparse"
17
+
18
+ module GroongaSynonym
19
+ module CommandLine
20
+ class Generator
21
+ AVAILABLE_SOURCES = [
22
+ :sudachi,
23
+ ]
24
+
25
+ AVAILABLE_FORMATS = [
26
+ :groonga,
27
+ :pgroonga,
28
+ ]
29
+
30
+ def initialize(output=nil)
31
+ @source = AVAILABLE_SOURCES.first
32
+ @format = AVAILABLE_FORMATS.first
33
+ @table = nil
34
+ @term_column = nil
35
+ @synonyms_column = nil
36
+ @synonyms_column_is_vector = true
37
+ @output = output || "-"
38
+ @defaults = {
39
+ groonga: {
40
+ table: "Thesaurus",
41
+ term_column: "_key",
42
+ synonyms_column: "synonyms",
43
+ },
44
+ pgroonga: {
45
+ table: "thesaurus",
46
+ term_column: "term",
47
+ synonyms_column: "synonyms",
48
+ },
49
+ }
50
+ end
51
+
52
+ def run(args)
53
+ catch do |tag|
54
+ parse_args(args, tag)
55
+ source = create_source
56
+ open_output do |output|
57
+ generator = create_generator(source, output)
58
+ generator.generate
59
+ true
60
+ end
61
+ end
62
+ end
63
+
64
+ private
65
+ def format_availables(availables)
66
+ "[" + availables.join(", ") + "]"
67
+ end
68
+
69
+ def format_defaults(key)
70
+ AVAILABLE_FORMATS.collect do |format|
71
+ "#{format}: (#{@defaults[format][key]})"
72
+ end
73
+ end
74
+
75
+ def parse_args(args, tag)
76
+ parser = OptionParser.new
77
+ parser.on("--source=SOURCE",
78
+ AVAILABLE_SOURCES,
79
+ "Synonym source",
80
+ format_availables(AVAILABLE_SOURCES),
81
+ "(#{@source})") do |source|
82
+ @source = source
83
+ end
84
+ parser.on("--format=FORMAT",
85
+ AVAILABLE_FORMATS,
86
+ "Output format",
87
+ format_availables(AVAILABLE_FORMATS),
88
+ "(#{@format})") do |format|
89
+ @format = format
90
+ end
91
+ parser.on("--table=TABLE",
92
+ "Synonyms table's name",
93
+ *format_defaults(:table)) do |table|
94
+ @table = table
95
+ end
96
+ parser.on("--term-column=COLUMN",
97
+ "Term column's name",
98
+ *format_defaults(:term_column)) do |column|
99
+ @term_column = column
100
+ end
101
+ parser.on("--synonyms-column=COLUMN",
102
+ "Synonyms column's name",
103
+ *format_defaults(:synonyms_column)) do |column|
104
+ @synonyms_column = column
105
+ end
106
+ parser.on("--no-synonyms-column-is-vector",
107
+ "Synonyms column isn't a vector column",
108
+ "This is only for 'groonga' source") do |boolean|
109
+ @synonyms_column_is_vector = boolean
110
+ end
111
+ parser.on("--output=OUTPUT",
112
+ "Output path",
113
+ "'-' means the standard output",
114
+ "(#{@output})") do |output|
115
+ @output = output
116
+ end
117
+ parser.on("--version",
118
+ "Show version and exit") do
119
+ puts(VERSION)
120
+ throw(tag, true)
121
+ end
122
+ parser.on("--help",
123
+ "Show this message and exit") do
124
+ puts(parser.help)
125
+ throw(tag, true)
126
+ end
127
+ parser.parse!(args.dup)
128
+ end
129
+
130
+ def open_output(&block)
131
+ case @output
132
+ when "-"
133
+ yield($stdout)
134
+ when String
135
+ File.open(@output, "w", &block)
136
+ else
137
+ yield(@output)
138
+ end
139
+ end
140
+
141
+ def create_source
142
+ case @source
143
+ when :sudachi
144
+ Sudachi.new
145
+ end
146
+ end
147
+
148
+ def create_generator(source, output)
149
+ options = {
150
+ output: output,
151
+ }
152
+ case @format
153
+ when :groonga
154
+ default = @defaults[:groonga]
155
+ term_column = @term_column || default[:term_column]
156
+ synonyms_column = @synonyms_column || default[:synonyms_column]
157
+ options[:synonyms_column_is_vector] = @synonyms_column_is_vector
158
+ GroongaGenerator.new(source,
159
+ term_column,
160
+ synonyms_column,
161
+ **options)
162
+ when :pgroonga
163
+ default = @defaults[:pgroonga]
164
+ table = @table || default[:table]
165
+ term_column = @term_column || default[:term_column]
166
+ synonyms_column = @synonyms_column || default[:synonyms_column]
167
+ PGroongaGenerator.new(source,
168
+ table,
169
+ term_column,
170
+ synonyms_column,
171
+ **options)
172
+ end
173
+ end
174
+ end
175
+ end
176
+ end
@@ -0,0 +1,55 @@
1
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require "json"
17
+
18
+ module GroongaSynonym
19
+ class GroongaGenerator
20
+ def initialize(source,
21
+ term_column,
22
+ synonyms_column,
23
+ synonyms_column_is_vector: true,
24
+ output: $stdout)
25
+ @source = source
26
+ @term_column = term_column
27
+ @synonyms_column = synonyms_column
28
+ @synonyms_column_is_vector = synonyms_column_is_vector
29
+ @output = output
30
+ end
31
+
32
+ def generate
33
+ @output.print("[\n")
34
+ @output.print([@term_column, @synonyms_column].to_json)
35
+ @source.each do |term, synonyms|
36
+ @output.print(",\n")
37
+ record = [term]
38
+ formatted_synonyms = synonyms.collect do |synonym|
39
+ formatted_synonym = synonym.to_groonga
40
+ unless @synonyms_column_is_vector
41
+ formatted_synonym = "(#{formatted_synonym})"
42
+ end
43
+ formatted_synonym
44
+ end
45
+ if @synonyms_column_is_vector
46
+ record << formatted_synonyms
47
+ else
48
+ record << formatted_synonyms.join(" OR ")
49
+ end
50
+ @output.print(record.to_json)
51
+ end
52
+ @output.print("\n]\n")
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,59 @@
1
+ # Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require "json"
17
+
18
+ module GroongaSynonym
19
+ class PGroongaGenerator
20
+ def initialize(source,
21
+ table,
22
+ term_column,
23
+ synonyms_column,
24
+ output: $stdout)
25
+ @source = source
26
+ @table = table
27
+ @term_column = term_column
28
+ @synonyms_column = synonyms_column
29
+ @output = output
30
+ end
31
+
32
+ def generate
33
+ @output.print("INSERT INTO #{@table} ")
34
+ @output.print("(#{@term_column}, #{@synonyms_column}) ")
35
+ @output.print("VALUES")
36
+ i = 0
37
+ @source.each do |term, synonyms|
38
+ i += 1
39
+ @output.print(",") unless i == 1
40
+ @output.print("\n")
41
+ formatted_synonyms = synonyms.collect do |synonym|
42
+ escape(synonym.to_groonga)
43
+ end
44
+ @output.print(" (#{escape(term)}, ARRAY[")
45
+ @output.print(formatted_synonyms.join(", "))
46
+ @output.print("])")
47
+ end
48
+ @output.print(";\n")
49
+ end
50
+
51
+ private
52
+ def escape(string)
53
+ escaped = "'"
54
+ escaped << string.gsub("'", "''")
55
+ escaped << "'"
56
+ escaped
57
+ end
58
+ end
59
+ end