groonga-synonym 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +24 -0
- data/LICENSE.txt +674 -0
- data/README.md +21 -0
- data/Rakefile +36 -0
- data/bin/groonga-synonym-generate +21 -0
- data/groonga-synonym.gemspec +51 -0
- data/lib/groonga-synonym.rb +20 -0
- data/lib/groonga-synonym/command-line/generator.rb +176 -0
- data/lib/groonga-synonym/groonga-generator.rb +55 -0
- data/lib/groonga-synonym/pgroonga-generator.rb +59 -0
- data/lib/groonga-synonym/sudachi.rb +122 -0
- data/lib/groonga-synonym/synonym.rb +61 -0
- data/lib/groonga-synonym/version.rb +18 -0
- metadata +71 -0
data/README.md
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# Groonga synonym
|
2
|
+
|
3
|
+
## Description
|
4
|
+
|
5
|
+
Groonga synonym provides tools for synonym of Groonga families.
|
6
|
+
|
7
|
+
## Install
|
8
|
+
|
9
|
+
```bash
|
10
|
+
gem install groonga-synonym
|
11
|
+
```
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
```bash
|
16
|
+
groonga-synonym-generate --source sudachi --format groonga
|
17
|
+
```
|
18
|
+
|
19
|
+
## License
|
20
|
+
|
21
|
+
GPLv3 or later. See `LICENSE.txt` for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
#
|
3
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
|
18
|
+
require "rubygems"
|
19
|
+
require "bundler/gem_helper"
|
20
|
+
|
21
|
+
base_dir = File.join(__dir__)
|
22
|
+
|
23
|
+
helper = Bundler::GemHelper.new(base_dir)
|
24
|
+
def helper.version_tag
|
25
|
+
version
|
26
|
+
end
|
27
|
+
|
28
|
+
helper.install
|
29
|
+
spec = helper.gemspec
|
30
|
+
|
31
|
+
desc "Run tests"
|
32
|
+
task :test do
|
33
|
+
ruby("test/run.rb")
|
34
|
+
end
|
35
|
+
|
36
|
+
task default: :test
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
|
18
|
+
require_relative "../lib/groonga-synonym"
|
19
|
+
|
20
|
+
generator = GroongaSynonym::CommandLine::Generator.new
|
21
|
+
exit(generator.run(ARGV))
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
#
|
3
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
|
18
|
+
clean_white_space = lambda do |entry|
|
19
|
+
entry.gsub(/(\A\n+|\n+\z)/, '') + "\n"
|
20
|
+
end
|
21
|
+
|
22
|
+
require_relative "lib/groonga-synonym/version"
|
23
|
+
|
24
|
+
Gem::Specification.new do |spec|
|
25
|
+
spec.name = "groonga-synonym"
|
26
|
+
spec.version = GroongaSynonym::VERSION
|
27
|
+
spec.homepage = "https://github.com/groonga/groonga-synonym"
|
28
|
+
spec.authors = ["Sutou Kouhei"]
|
29
|
+
spec.email = ["kou@clear-code.com"]
|
30
|
+
|
31
|
+
readme = File.read("README.md")
|
32
|
+
readme.force_encoding("UTF-8")
|
33
|
+
entries = readme.split(/^\#\#\s(.*)$/)
|
34
|
+
clean_white_space.call(entries[entries.index("Description") + 1])
|
35
|
+
description = clean_white_space.call(entries[entries.index("Description") + 1])
|
36
|
+
spec.summary, spec.description, = description.split(/\n\n+/, 3)
|
37
|
+
spec.license = "GPL-3.0+"
|
38
|
+
spec.files = [
|
39
|
+
"README.md",
|
40
|
+
"LICENSE.txt",
|
41
|
+
"Rakefile",
|
42
|
+
"Gemfile",
|
43
|
+
"#{spec.name}.gemspec",
|
44
|
+
]
|
45
|
+
spec.files += Dir.glob("lib/**/*.rb")
|
46
|
+
Dir.chdir("bin") do
|
47
|
+
spec.executables = Dir.glob("*")
|
48
|
+
end
|
49
|
+
|
50
|
+
spec.add_runtime_dependency("red-datasets", ">= 0.1.3")
|
51
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require_relative "groonga-synonym/command-line/generator"
|
17
|
+
require_relative "groonga-synonym/groonga-generator"
|
18
|
+
require_relative "groonga-synonym/pgroonga-generator"
|
19
|
+
require_relative "groonga-synonym/sudachi"
|
20
|
+
require_relative "groonga-synonym/version"
|
@@ -0,0 +1,176 @@
|
|
1
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "optparse"
|
17
|
+
|
18
|
+
module GroongaSynonym
|
19
|
+
module CommandLine
|
20
|
+
class Generator
|
21
|
+
AVAILABLE_SOURCES = [
|
22
|
+
:sudachi,
|
23
|
+
]
|
24
|
+
|
25
|
+
AVAILABLE_FORMATS = [
|
26
|
+
:groonga,
|
27
|
+
:pgroonga,
|
28
|
+
]
|
29
|
+
|
30
|
+
def initialize(output=nil)
|
31
|
+
@source = AVAILABLE_SOURCES.first
|
32
|
+
@format = AVAILABLE_FORMATS.first
|
33
|
+
@table = nil
|
34
|
+
@term_column = nil
|
35
|
+
@synonyms_column = nil
|
36
|
+
@synonyms_column_is_vector = true
|
37
|
+
@output = output || "-"
|
38
|
+
@defaults = {
|
39
|
+
groonga: {
|
40
|
+
table: "Thesaurus",
|
41
|
+
term_column: "_key",
|
42
|
+
synonyms_column: "synonyms",
|
43
|
+
},
|
44
|
+
pgroonga: {
|
45
|
+
table: "thesaurus",
|
46
|
+
term_column: "term",
|
47
|
+
synonyms_column: "synonyms",
|
48
|
+
},
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def run(args)
|
53
|
+
catch do |tag|
|
54
|
+
parse_args(args, tag)
|
55
|
+
source = create_source
|
56
|
+
open_output do |output|
|
57
|
+
generator = create_generator(source, output)
|
58
|
+
generator.generate
|
59
|
+
true
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
def format_availables(availables)
|
66
|
+
"[" + availables.join(", ") + "]"
|
67
|
+
end
|
68
|
+
|
69
|
+
def format_defaults(key)
|
70
|
+
AVAILABLE_FORMATS.collect do |format|
|
71
|
+
"#{format}: (#{@defaults[format][key]})"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def parse_args(args, tag)
|
76
|
+
parser = OptionParser.new
|
77
|
+
parser.on("--source=SOURCE",
|
78
|
+
AVAILABLE_SOURCES,
|
79
|
+
"Synonym source",
|
80
|
+
format_availables(AVAILABLE_SOURCES),
|
81
|
+
"(#{@source})") do |source|
|
82
|
+
@source = source
|
83
|
+
end
|
84
|
+
parser.on("--format=FORMAT",
|
85
|
+
AVAILABLE_FORMATS,
|
86
|
+
"Output format",
|
87
|
+
format_availables(AVAILABLE_FORMATS),
|
88
|
+
"(#{@format})") do |format|
|
89
|
+
@format = format
|
90
|
+
end
|
91
|
+
parser.on("--table=TABLE",
|
92
|
+
"Synonyms table's name",
|
93
|
+
*format_defaults(:table)) do |table|
|
94
|
+
@table = table
|
95
|
+
end
|
96
|
+
parser.on("--term-column=COLUMN",
|
97
|
+
"Term column's name",
|
98
|
+
*format_defaults(:term_column)) do |column|
|
99
|
+
@term_column = column
|
100
|
+
end
|
101
|
+
parser.on("--synonyms-column=COLUMN",
|
102
|
+
"Synonyms column's name",
|
103
|
+
*format_defaults(:synonyms_column)) do |column|
|
104
|
+
@synonyms_column = column
|
105
|
+
end
|
106
|
+
parser.on("--no-synonyms-column-is-vector",
|
107
|
+
"Synonyms column isn't a vector column",
|
108
|
+
"This is only for 'groonga' source") do |boolean|
|
109
|
+
@synonyms_column_is_vector = boolean
|
110
|
+
end
|
111
|
+
parser.on("--output=OUTPUT",
|
112
|
+
"Output path",
|
113
|
+
"'-' means the standard output",
|
114
|
+
"(#{@output})") do |output|
|
115
|
+
@output = output
|
116
|
+
end
|
117
|
+
parser.on("--version",
|
118
|
+
"Show version and exit") do
|
119
|
+
puts(VERSION)
|
120
|
+
throw(tag, true)
|
121
|
+
end
|
122
|
+
parser.on("--help",
|
123
|
+
"Show this message and exit") do
|
124
|
+
puts(parser.help)
|
125
|
+
throw(tag, true)
|
126
|
+
end
|
127
|
+
parser.parse!(args.dup)
|
128
|
+
end
|
129
|
+
|
130
|
+
def open_output(&block)
|
131
|
+
case @output
|
132
|
+
when "-"
|
133
|
+
yield($stdout)
|
134
|
+
when String
|
135
|
+
File.open(@output, "w", &block)
|
136
|
+
else
|
137
|
+
yield(@output)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def create_source
|
142
|
+
case @source
|
143
|
+
when :sudachi
|
144
|
+
Sudachi.new
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def create_generator(source, output)
|
149
|
+
options = {
|
150
|
+
output: output,
|
151
|
+
}
|
152
|
+
case @format
|
153
|
+
when :groonga
|
154
|
+
default = @defaults[:groonga]
|
155
|
+
term_column = @term_column || default[:term_column]
|
156
|
+
synonyms_column = @synonyms_column || default[:synonyms_column]
|
157
|
+
options[:synonyms_column_is_vector] = @synonyms_column_is_vector
|
158
|
+
GroongaGenerator.new(source,
|
159
|
+
term_column,
|
160
|
+
synonyms_column,
|
161
|
+
**options)
|
162
|
+
when :pgroonga
|
163
|
+
default = @defaults[:pgroonga]
|
164
|
+
table = @table || default[:table]
|
165
|
+
term_column = @term_column || default[:term_column]
|
166
|
+
synonyms_column = @synonyms_column || default[:synonyms_column]
|
167
|
+
PGroongaGenerator.new(source,
|
168
|
+
table,
|
169
|
+
term_column,
|
170
|
+
synonyms_column,
|
171
|
+
**options)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "json"
|
17
|
+
|
18
|
+
module GroongaSynonym
|
19
|
+
class GroongaGenerator
|
20
|
+
def initialize(source,
|
21
|
+
term_column,
|
22
|
+
synonyms_column,
|
23
|
+
synonyms_column_is_vector: true,
|
24
|
+
output: $stdout)
|
25
|
+
@source = source
|
26
|
+
@term_column = term_column
|
27
|
+
@synonyms_column = synonyms_column
|
28
|
+
@synonyms_column_is_vector = synonyms_column_is_vector
|
29
|
+
@output = output
|
30
|
+
end
|
31
|
+
|
32
|
+
def generate
|
33
|
+
@output.print("[\n")
|
34
|
+
@output.print([@term_column, @synonyms_column].to_json)
|
35
|
+
@source.each do |term, synonyms|
|
36
|
+
@output.print(",\n")
|
37
|
+
record = [term]
|
38
|
+
formatted_synonyms = synonyms.collect do |synonym|
|
39
|
+
formatted_synonym = synonym.to_groonga
|
40
|
+
unless @synonyms_column_is_vector
|
41
|
+
formatted_synonym = "(#{formatted_synonym})"
|
42
|
+
end
|
43
|
+
formatted_synonym
|
44
|
+
end
|
45
|
+
if @synonyms_column_is_vector
|
46
|
+
record << formatted_synonyms
|
47
|
+
else
|
48
|
+
record << formatted_synonyms.join(" OR ")
|
49
|
+
end
|
50
|
+
@output.print(record.to_json)
|
51
|
+
end
|
52
|
+
@output.print("\n]\n")
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "json"
|
17
|
+
|
18
|
+
module GroongaSynonym
|
19
|
+
class PGroongaGenerator
|
20
|
+
def initialize(source,
|
21
|
+
table,
|
22
|
+
term_column,
|
23
|
+
synonyms_column,
|
24
|
+
output: $stdout)
|
25
|
+
@source = source
|
26
|
+
@table = table
|
27
|
+
@term_column = term_column
|
28
|
+
@synonyms_column = synonyms_column
|
29
|
+
@output = output
|
30
|
+
end
|
31
|
+
|
32
|
+
def generate
|
33
|
+
@output.print("INSERT INTO #{@table} ")
|
34
|
+
@output.print("(#{@term_column}, #{@synonyms_column}) ")
|
35
|
+
@output.print("VALUES")
|
36
|
+
i = 0
|
37
|
+
@source.each do |term, synonyms|
|
38
|
+
i += 1
|
39
|
+
@output.print(",") unless i == 1
|
40
|
+
@output.print("\n")
|
41
|
+
formatted_synonyms = synonyms.collect do |synonym|
|
42
|
+
escape(synonym.to_groonga)
|
43
|
+
end
|
44
|
+
@output.print(" (#{escape(term)}, ARRAY[")
|
45
|
+
@output.print(formatted_synonyms.join(", "))
|
46
|
+
@output.print("])")
|
47
|
+
end
|
48
|
+
@output.print(";\n")
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def escape(string)
|
53
|
+
escaped = "'"
|
54
|
+
escaped << string.gsub("'", "''")
|
55
|
+
escaped << "'"
|
56
|
+
escaped
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|