groonga-synonym 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +24 -0
- data/LICENSE.txt +674 -0
- data/README.md +21 -0
- data/Rakefile +36 -0
- data/bin/groonga-synonym-generate +21 -0
- data/groonga-synonym.gemspec +51 -0
- data/lib/groonga-synonym.rb +20 -0
- data/lib/groonga-synonym/command-line/generator.rb +176 -0
- data/lib/groonga-synonym/groonga-generator.rb +55 -0
- data/lib/groonga-synonym/pgroonga-generator.rb +59 -0
- data/lib/groonga-synonym/sudachi.rb +122 -0
- data/lib/groonga-synonym/synonym.rb +61 -0
- data/lib/groonga-synonym/version.rb +18 -0
- metadata +71 -0
data/README.md
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# Groonga synonym
|
2
|
+
|
3
|
+
## Description
|
4
|
+
|
5
|
+
Groonga synonym provides tools for synonym of Groonga families.
|
6
|
+
|
7
|
+
## Install
|
8
|
+
|
9
|
+
```bash
|
10
|
+
gem install groonga-synonym
|
11
|
+
```
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
```bash
|
16
|
+
groonga-synonym-generate --source sudachi --format groonga
|
17
|
+
```
|
18
|
+
|
19
|
+
## License
|
20
|
+
|
21
|
+
GPLv3 or later. See `LICENSE.txt` for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
#
|
3
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
|
18
|
+
require "rubygems"
|
19
|
+
require "bundler/gem_helper"
|
20
|
+
|
21
|
+
base_dir = File.join(__dir__)
|
22
|
+
|
23
|
+
helper = Bundler::GemHelper.new(base_dir)
|
24
|
+
def helper.version_tag
|
25
|
+
version
|
26
|
+
end
|
27
|
+
|
28
|
+
helper.install
|
29
|
+
spec = helper.gemspec
|
30
|
+
|
31
|
+
desc "Run tests"
|
32
|
+
task :test do
|
33
|
+
ruby("test/run.rb")
|
34
|
+
end
|
35
|
+
|
36
|
+
task default: :test
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
|
18
|
+
require_relative "../lib/groonga-synonym"
|
19
|
+
|
20
|
+
generator = GroongaSynonym::CommandLine::Generator.new
|
21
|
+
exit(generator.run(ARGV))
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
#
|
3
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
4
|
+
#
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
|
18
|
+
clean_white_space = lambda do |entry|
|
19
|
+
entry.gsub(/(\A\n+|\n+\z)/, '') + "\n"
|
20
|
+
end
|
21
|
+
|
22
|
+
require_relative "lib/groonga-synonym/version"
|
23
|
+
|
24
|
+
Gem::Specification.new do |spec|
|
25
|
+
spec.name = "groonga-synonym"
|
26
|
+
spec.version = GroongaSynonym::VERSION
|
27
|
+
spec.homepage = "https://github.com/groonga/groonga-synonym"
|
28
|
+
spec.authors = ["Sutou Kouhei"]
|
29
|
+
spec.email = ["kou@clear-code.com"]
|
30
|
+
|
31
|
+
readme = File.read("README.md")
|
32
|
+
readme.force_encoding("UTF-8")
|
33
|
+
entries = readme.split(/^\#\#\s(.*)$/)
|
34
|
+
clean_white_space.call(entries[entries.index("Description") + 1])
|
35
|
+
description = clean_white_space.call(entries[entries.index("Description") + 1])
|
36
|
+
spec.summary, spec.description, = description.split(/\n\n+/, 3)
|
37
|
+
spec.license = "GPL-3.0+"
|
38
|
+
spec.files = [
|
39
|
+
"README.md",
|
40
|
+
"LICENSE.txt",
|
41
|
+
"Rakefile",
|
42
|
+
"Gemfile",
|
43
|
+
"#{spec.name}.gemspec",
|
44
|
+
]
|
45
|
+
spec.files += Dir.glob("lib/**/*.rb")
|
46
|
+
Dir.chdir("bin") do
|
47
|
+
spec.executables = Dir.glob("*")
|
48
|
+
end
|
49
|
+
|
50
|
+
spec.add_runtime_dependency("red-datasets", ">= 0.1.3")
|
51
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require_relative "groonga-synonym/command-line/generator"
|
17
|
+
require_relative "groonga-synonym/groonga-generator"
|
18
|
+
require_relative "groonga-synonym/pgroonga-generator"
|
19
|
+
require_relative "groonga-synonym/sudachi"
|
20
|
+
require_relative "groonga-synonym/version"
|
@@ -0,0 +1,176 @@
|
|
1
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "optparse"
|
17
|
+
|
18
|
+
module GroongaSynonym
|
19
|
+
module CommandLine
|
20
|
+
class Generator
|
21
|
+
AVAILABLE_SOURCES = [
|
22
|
+
:sudachi,
|
23
|
+
]
|
24
|
+
|
25
|
+
AVAILABLE_FORMATS = [
|
26
|
+
:groonga,
|
27
|
+
:pgroonga,
|
28
|
+
]
|
29
|
+
|
30
|
+
def initialize(output=nil)
|
31
|
+
@source = AVAILABLE_SOURCES.first
|
32
|
+
@format = AVAILABLE_FORMATS.first
|
33
|
+
@table = nil
|
34
|
+
@term_column = nil
|
35
|
+
@synonyms_column = nil
|
36
|
+
@synonyms_column_is_vector = true
|
37
|
+
@output = output || "-"
|
38
|
+
@defaults = {
|
39
|
+
groonga: {
|
40
|
+
table: "Thesaurus",
|
41
|
+
term_column: "_key",
|
42
|
+
synonyms_column: "synonyms",
|
43
|
+
},
|
44
|
+
pgroonga: {
|
45
|
+
table: "thesaurus",
|
46
|
+
term_column: "term",
|
47
|
+
synonyms_column: "synonyms",
|
48
|
+
},
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def run(args)
|
53
|
+
catch do |tag|
|
54
|
+
parse_args(args, tag)
|
55
|
+
source = create_source
|
56
|
+
open_output do |output|
|
57
|
+
generator = create_generator(source, output)
|
58
|
+
generator.generate
|
59
|
+
true
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
def format_availables(availables)
|
66
|
+
"[" + availables.join(", ") + "]"
|
67
|
+
end
|
68
|
+
|
69
|
+
def format_defaults(key)
|
70
|
+
AVAILABLE_FORMATS.collect do |format|
|
71
|
+
"#{format}: (#{@defaults[format][key]})"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def parse_args(args, tag)
|
76
|
+
parser = OptionParser.new
|
77
|
+
parser.on("--source=SOURCE",
|
78
|
+
AVAILABLE_SOURCES,
|
79
|
+
"Synonym source",
|
80
|
+
format_availables(AVAILABLE_SOURCES),
|
81
|
+
"(#{@source})") do |source|
|
82
|
+
@source = source
|
83
|
+
end
|
84
|
+
parser.on("--format=FORMAT",
|
85
|
+
AVAILABLE_FORMATS,
|
86
|
+
"Output format",
|
87
|
+
format_availables(AVAILABLE_FORMATS),
|
88
|
+
"(#{@format})") do |format|
|
89
|
+
@format = format
|
90
|
+
end
|
91
|
+
parser.on("--table=TABLE",
|
92
|
+
"Synonyms table's name",
|
93
|
+
*format_defaults(:table)) do |table|
|
94
|
+
@table = table
|
95
|
+
end
|
96
|
+
parser.on("--term-column=COLUMN",
|
97
|
+
"Term column's name",
|
98
|
+
*format_defaults(:term_column)) do |column|
|
99
|
+
@term_column = column
|
100
|
+
end
|
101
|
+
parser.on("--synonyms-column=COLUMN",
|
102
|
+
"Synonyms column's name",
|
103
|
+
*format_defaults(:synonyms_column)) do |column|
|
104
|
+
@synonyms_column = column
|
105
|
+
end
|
106
|
+
parser.on("--no-synonyms-column-is-vector",
|
107
|
+
"Synonyms column isn't a vector column",
|
108
|
+
"This is only for 'groonga' source") do |boolean|
|
109
|
+
@synonyms_column_is_vector = boolean
|
110
|
+
end
|
111
|
+
parser.on("--output=OUTPUT",
|
112
|
+
"Output path",
|
113
|
+
"'-' means the standard output",
|
114
|
+
"(#{@output})") do |output|
|
115
|
+
@output = output
|
116
|
+
end
|
117
|
+
parser.on("--version",
|
118
|
+
"Show version and exit") do
|
119
|
+
puts(VERSION)
|
120
|
+
throw(tag, true)
|
121
|
+
end
|
122
|
+
parser.on("--help",
|
123
|
+
"Show this message and exit") do
|
124
|
+
puts(parser.help)
|
125
|
+
throw(tag, true)
|
126
|
+
end
|
127
|
+
parser.parse!(args.dup)
|
128
|
+
end
|
129
|
+
|
130
|
+
def open_output(&block)
|
131
|
+
case @output
|
132
|
+
when "-"
|
133
|
+
yield($stdout)
|
134
|
+
when String
|
135
|
+
File.open(@output, "w", &block)
|
136
|
+
else
|
137
|
+
yield(@output)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def create_source
|
142
|
+
case @source
|
143
|
+
when :sudachi
|
144
|
+
Sudachi.new
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def create_generator(source, output)
|
149
|
+
options = {
|
150
|
+
output: output,
|
151
|
+
}
|
152
|
+
case @format
|
153
|
+
when :groonga
|
154
|
+
default = @defaults[:groonga]
|
155
|
+
term_column = @term_column || default[:term_column]
|
156
|
+
synonyms_column = @synonyms_column || default[:synonyms_column]
|
157
|
+
options[:synonyms_column_is_vector] = @synonyms_column_is_vector
|
158
|
+
GroongaGenerator.new(source,
|
159
|
+
term_column,
|
160
|
+
synonyms_column,
|
161
|
+
**options)
|
162
|
+
when :pgroonga
|
163
|
+
default = @defaults[:pgroonga]
|
164
|
+
table = @table || default[:table]
|
165
|
+
term_column = @term_column || default[:term_column]
|
166
|
+
synonyms_column = @synonyms_column || default[:synonyms_column]
|
167
|
+
PGroongaGenerator.new(source,
|
168
|
+
table,
|
169
|
+
term_column,
|
170
|
+
synonyms_column,
|
171
|
+
**options)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "json"
|
17
|
+
|
18
|
+
module GroongaSynonym
|
19
|
+
class GroongaGenerator
|
20
|
+
def initialize(source,
|
21
|
+
term_column,
|
22
|
+
synonyms_column,
|
23
|
+
synonyms_column_is_vector: true,
|
24
|
+
output: $stdout)
|
25
|
+
@source = source
|
26
|
+
@term_column = term_column
|
27
|
+
@synonyms_column = synonyms_column
|
28
|
+
@synonyms_column_is_vector = synonyms_column_is_vector
|
29
|
+
@output = output
|
30
|
+
end
|
31
|
+
|
32
|
+
def generate
|
33
|
+
@output.print("[\n")
|
34
|
+
@output.print([@term_column, @synonyms_column].to_json)
|
35
|
+
@source.each do |term, synonyms|
|
36
|
+
@output.print(",\n")
|
37
|
+
record = [term]
|
38
|
+
formatted_synonyms = synonyms.collect do |synonym|
|
39
|
+
formatted_synonym = synonym.to_groonga
|
40
|
+
unless @synonyms_column_is_vector
|
41
|
+
formatted_synonym = "(#{formatted_synonym})"
|
42
|
+
end
|
43
|
+
formatted_synonym
|
44
|
+
end
|
45
|
+
if @synonyms_column_is_vector
|
46
|
+
record << formatted_synonyms
|
47
|
+
else
|
48
|
+
record << formatted_synonyms.join(" OR ")
|
49
|
+
end
|
50
|
+
@output.print(record.to_json)
|
51
|
+
end
|
52
|
+
@output.print("\n]\n")
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Copyright (C) 2021 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "json"
|
17
|
+
|
18
|
+
module GroongaSynonym
|
19
|
+
class PGroongaGenerator
|
20
|
+
def initialize(source,
|
21
|
+
table,
|
22
|
+
term_column,
|
23
|
+
synonyms_column,
|
24
|
+
output: $stdout)
|
25
|
+
@source = source
|
26
|
+
@table = table
|
27
|
+
@term_column = term_column
|
28
|
+
@synonyms_column = synonyms_column
|
29
|
+
@output = output
|
30
|
+
end
|
31
|
+
|
32
|
+
def generate
|
33
|
+
@output.print("INSERT INTO #{@table} ")
|
34
|
+
@output.print("(#{@term_column}, #{@synonyms_column}) ")
|
35
|
+
@output.print("VALUES")
|
36
|
+
i = 0
|
37
|
+
@source.each do |term, synonyms|
|
38
|
+
i += 1
|
39
|
+
@output.print(",") unless i == 1
|
40
|
+
@output.print("\n")
|
41
|
+
formatted_synonyms = synonyms.collect do |synonym|
|
42
|
+
escape(synonym.to_groonga)
|
43
|
+
end
|
44
|
+
@output.print(" (#{escape(term)}, ARRAY[")
|
45
|
+
@output.print(formatted_synonyms.join(", "))
|
46
|
+
@output.print("])")
|
47
|
+
end
|
48
|
+
@output.print(";\n")
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def escape(string)
|
53
|
+
escaped = "'"
|
54
|
+
escaped << string.gsub("'", "''")
|
55
|
+
escaped << "'"
|
56
|
+
escaped
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|