rudachi 1.0.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -10
- data/lib/rudachi/config.rb +2 -26
- data/lib/rudachi/configurable.rb +4 -5
- data/lib/rudachi/dependencies.rb +9 -8
- data/lib/rudachi/file_parser.rb +16 -13
- data/lib/rudachi/loader.rb +13 -2
- data/lib/rudachi/option/boolean_option.rb +14 -0
- data/lib/rudachi/option/config.rb +47 -0
- data/lib/rudachi/option/string_option.rb +14 -0
- data/lib/rudachi/stream_parser.rb +11 -0
- data/lib/rudachi/text_parser.rb +13 -9
- data/lib/rudachi/version.rb +1 -1
- data/lib/rudachi.rb +2 -0
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 515a2588b078bce3a050c6e1c4cd115524daaf5edfb71b76024e44ca9cb1cf26
|
4
|
+
data.tar.gz: eeb369e360a30b5dc050820c0b77ce4f9e98f6f9babac3e7700009ba8d2b372f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 989c5f7acc769e3b9304592db515f30192bdf51d1aa2ce3578b380d30a06d229cfa8f96bc818d7245adeb3b72a8fa657178796a2a08cab3addb6658236219e7d
|
7
|
+
data.tar.gz: 704c0e1053077802101e7218fae201a53a2adf44203f66ef2bf8093c3c8f4de208b06c7e905b2c54aeb3c2a009bb392f73f92181633c4417081a4675245f866f
|
data/README.md
CHANGED
@@ -1,23 +1,29 @@
|
|
1
1
|
# Rudachi
|
2
|
-
[Sudachi](https://github.com/WorksApplications/Sudachi)
|
2
|
+
JRuby wrapper for [Sudachi](https://github.com/WorksApplications/Sudachi).
|
3
3
|
|
4
|
-
#### Text
|
4
|
+
#### Text
|
5
5
|
```rb
|
6
6
|
Rudachi::TextParser.parse('東京都へ行く')
|
7
7
|
=> "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
8
8
|
```
|
9
9
|
|
10
|
-
#### File
|
10
|
+
#### File
|
11
11
|
```rb
|
12
|
-
File.open('
|
13
|
-
Rudachi::FileParser.parse('
|
12
|
+
File.open('input.txt', 'w') { |f| f << '東京都へ行く' }
|
13
|
+
Rudachi::FileParser.parse('input.txt')
|
14
|
+
=> "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
15
|
+
```
|
16
|
+
|
17
|
+
#### IO
|
18
|
+
```rb
|
19
|
+
Rudachi::StreamParser.parse(StringIO.new('東京都へ行く'))
|
14
20
|
=> "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
15
21
|
```
|
16
22
|
|
17
23
|
#### With [some options](https://github.com/WorksApplications/Sudachi#options)
|
18
24
|
```rb
|
19
|
-
Rudachi::TextParser.new(o: '
|
20
|
-
File.read('
|
25
|
+
Rudachi::TextParser.new(o: 'output.txt', m: 'A').parse('東京都へ行く')
|
26
|
+
File.read('output.txt')
|
21
27
|
=> "東京\t名詞,固有名詞,地名,一般,*,*\t東京\n都\t名詞,普通名詞,一般,*,*,*\t都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
22
28
|
```
|
23
29
|
|
@@ -54,11 +60,13 @@ LEGAL LICENSE-2.0.txt system_core.dic
|
|
54
60
|
gem 'rudachi'
|
55
61
|
```
|
56
62
|
|
57
|
-
Then run `bundle install
|
63
|
+
Then run `bundle install`.
|
58
64
|
|
59
65
|
3. Initialize Rudachi
|
60
66
|
|
61
67
|
```rb
|
68
|
+
require 'rudachi'
|
69
|
+
|
62
70
|
Rudachi.configure do |config|
|
63
71
|
config.jar_path = 'sudachi-0.5.3/sudachi-0.5.3.jar'
|
64
72
|
end
|
@@ -71,8 +79,6 @@ end
|
|
71
79
|
4. Did it !!
|
72
80
|
|
73
81
|
```rb
|
74
|
-
require 'rudachi'
|
75
|
-
|
76
82
|
Rudachi::TextParser.parse('こんにちは世界')
|
77
83
|
=> "こんにちは\t感動詞,一般,*,*,*,*\t今日は\n世界\t名詞,普通名詞,一般,*,*,*\t世界\nEOS\n"
|
78
84
|
```
|
data/lib/rudachi/config.rb
CHANGED
@@ -1,32 +1,8 @@
|
|
1
1
|
require 'rudachi/configurable'
|
2
|
+
require 'rudachi/option/string_option'
|
2
3
|
|
3
4
|
module Rudachi
|
4
5
|
extend Configurable
|
5
6
|
|
6
|
-
config_accessor :jar_path,
|
7
|
-
|
8
|
-
module Option
|
9
|
-
extend Configurable
|
10
|
-
|
11
|
-
# @see https://github.com/WorksApplications/Sudachi#options
|
12
|
-
config_accessor :r, default: nil
|
13
|
-
config_accessor :s, default: nil
|
14
|
-
config_accessor :p, default: '/usr/java/lib'
|
15
|
-
config_accessor :m, default: 'C'
|
16
|
-
config_accessor :o, default: nil
|
17
|
-
config_accessor :t, default: nil
|
18
|
-
config_accessor :ts, default: nil
|
19
|
-
config_accessor :a, default: nil
|
20
|
-
config_accessor :f, default: nil
|
21
|
-
config_accessor :d, default: nil
|
22
|
-
config_accessor :h, default: nil
|
23
|
-
|
24
|
-
def self.cmds(opts)
|
25
|
-
class_variables.each_with_object([]) do |name, flags|
|
26
|
-
key = name.to_s.delete('@@')
|
27
|
-
val = opts[key] || opts[key.to_sym] || class_variable_get(name) or next
|
28
|
-
flags << "-#{key}" << val.to_s
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
7
|
+
config_accessor :jar_path, klass: Option::StringOption, default: '/usr/java/lib/sudachi.jar'
|
32
8
|
end
|
data/lib/rudachi/configurable.rb
CHANGED
@@ -6,13 +6,12 @@ module Rudachi
|
|
6
6
|
|
7
7
|
private
|
8
8
|
|
9
|
-
def config_accessor(name, default:
|
10
|
-
|
9
|
+
def config_accessor(name, klass:, default:)
|
10
|
+
module_eval <<~EOS
|
11
11
|
def self.#{name}; @@#{name}; end
|
12
|
-
def self.#{name}=(val); @@#{name} = val; end
|
12
|
+
def self.#{name}=(val); @@#{name} = #{klass}.new(val); end
|
13
13
|
EOS
|
14
|
-
|
15
|
-
class_variable_set("@@#{name}", default)
|
14
|
+
public_send("#{name}=", default)
|
16
15
|
end
|
17
16
|
end
|
18
17
|
end
|
data/lib/rudachi/dependencies.rb
CHANGED
@@ -8,12 +8,13 @@ java_import 'java.nio.charset.StandardCharsets'
|
|
8
8
|
require Rudachi.jar_path
|
9
9
|
java_import 'com.worksap.nlp.sudachi.SudachiCommandLine'
|
10
10
|
|
11
|
-
module
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
11
|
+
module Rudachi
|
12
|
+
module Java
|
13
|
+
System = ::Java::JavaLang::System
|
14
|
+
ByteArrayInputStream = ::Java::JavaIo::ByteArrayInputStream
|
15
|
+
ByteArrayOutputStream = ::Java::JavaIo::ByteArrayOutputStream
|
16
|
+
PrintStream = ::Java::JavaIo::PrintStream
|
17
|
+
UTF_8 = ::Java::JavaNioCharset::StandardCharsets::UTF_8
|
18
|
+
SudachiCommandLine = ::Java::ComWorksapNlpSudachi::SudachiCommandLine
|
19
|
+
end
|
19
20
|
end
|
data/lib/rudachi/file_parser.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require 'rudachi/config'
|
1
|
+
require 'rudachi/option/config'
|
2
2
|
require 'rudachi/loader'
|
3
3
|
|
4
4
|
module Rudachi
|
@@ -9,30 +9,33 @@ module Rudachi
|
|
9
9
|
|
10
10
|
def initialize(**opts)
|
11
11
|
Rudachi.load!
|
12
|
-
|
13
|
-
@output = Java::ByteArrayOutputStream.new
|
14
|
-
@opts = opts
|
12
|
+
@opts = Option.new(opts)
|
15
13
|
end
|
16
14
|
|
17
15
|
def parse(path)
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
16
|
+
output_stream do |output|
|
17
|
+
take_stdout(output) do
|
18
|
+
Java::SudachiCommandLine.main(
|
19
|
+
Option.cmds(@opts).push(path)
|
20
|
+
)
|
21
|
+
end
|
22
22
|
end
|
23
|
-
@output.toString
|
24
23
|
end
|
25
24
|
|
26
25
|
private
|
27
26
|
|
28
|
-
def take_stdout
|
27
|
+
def take_stdout(output)
|
29
28
|
stdout = Java::System.out
|
30
|
-
stream = Java::PrintStream.new(@output)
|
31
|
-
Java::System.setOut(stream)
|
32
29
|
|
30
|
+
Java::System.setOut(output)
|
33
31
|
yield
|
34
|
-
|
35
32
|
Java::System.setOut(stdout)
|
36
33
|
end
|
34
|
+
|
35
|
+
def output_stream
|
36
|
+
Java::ByteArrayOutputStream.new.tap do |output|
|
37
|
+
yield Java::PrintStream.new(output)
|
38
|
+
end.toString
|
39
|
+
end
|
37
40
|
end
|
38
41
|
end
|
data/lib/rudachi/loader.rb
CHANGED
@@ -1,5 +1,16 @@
|
|
1
1
|
module Rudachi
|
2
|
-
|
3
|
-
|
2
|
+
class UnavailableError < StandardError; end;
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def load!
|
6
|
+
raise UnavailableError, 'jruby_required' unless jruby?
|
7
|
+
require 'rudachi/dependencies'
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def jruby?
|
13
|
+
RUBY_PLATFORM == 'java'
|
14
|
+
end
|
4
15
|
end
|
5
16
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module Rudachi
|
2
|
+
class Option
|
3
|
+
class BooleanOption < Delegator
|
4
|
+
def initialize(bool)
|
5
|
+
raise ArgumentError, 'must be `false` or `true`' unless bool.is_a?(FalseClass) || bool.is_a?(TrueClass)
|
6
|
+
@value = bool
|
7
|
+
end
|
8
|
+
|
9
|
+
def __getobj__; @value; end
|
10
|
+
def enable?; @value; end
|
11
|
+
def with_arg?; false; end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'rudachi/configurable'
|
2
|
+
require 'rudachi/option/boolean_option'
|
3
|
+
require 'rudachi/option/string_option'
|
4
|
+
|
5
|
+
module Rudachi
|
6
|
+
class Option
|
7
|
+
extend Configurable
|
8
|
+
|
9
|
+
# @see https://github.com/WorksApplications/Sudachi#options
|
10
|
+
config_accessor :r, klass: StringOption, default: nil
|
11
|
+
config_accessor :s, klass: StringOption, default: nil
|
12
|
+
config_accessor :p, klass: StringOption, default: nil
|
13
|
+
config_accessor :m, klass: StringOption, default: nil
|
14
|
+
config_accessor :o, klass: StringOption, default: nil
|
15
|
+
config_accessor :a, klass: BooleanOption, default: false
|
16
|
+
config_accessor :d, klass: BooleanOption, default: false
|
17
|
+
config_accessor :t, klass: BooleanOption, default: false
|
18
|
+
config_accessor :ts, klass: BooleanOption, default: false
|
19
|
+
config_accessor :f, klass: BooleanOption, default: false
|
20
|
+
config_accessor :h, klass: BooleanOption, default: false
|
21
|
+
|
22
|
+
def self.cmds(opts=Option.new)
|
23
|
+
class_variables.each_with_object([]) do |name, flags|
|
24
|
+
key = name[2..-1].to_sym
|
25
|
+
opt = opts.get(key) { class_variable_get(name) }
|
26
|
+
next unless opt&.enable?
|
27
|
+
flags << "-#{key}"
|
28
|
+
flags << opt.to_s if opt.with_arg?
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize(**hash)
|
33
|
+
@opts = hash.each_with_object({}) do |(key, val), _hash|
|
34
|
+
raise ArgumentError, %{unknown option "#{key}"} unless self.class.class_variable_defined?("@@#{key}")
|
35
|
+
begin
|
36
|
+
_hash[key.to_sym] = self.class.class_variable_get("@@#{key}").class.new(val)
|
37
|
+
rescue ArgumentError => e
|
38
|
+
raise ArgumentError, %{"#{key}" #{e.message}}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def get(key, &block)
|
44
|
+
@opts.key?(key) ? @opts[key] : block&.call
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module Rudachi
|
2
|
+
class Option
|
3
|
+
class StringOption < Delegator
|
4
|
+
def initialize(str)
|
5
|
+
raise ArgumentError, 'must be `nil` or `String`' unless str.nil? || str.is_a?(String)
|
6
|
+
@value = str
|
7
|
+
end
|
8
|
+
|
9
|
+
def __getobj__; @value; end
|
10
|
+
def enable?; !!@value; end
|
11
|
+
def with_arg?; true; end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
data/lib/rudachi/text_parser.rb
CHANGED
@@ -3,25 +3,29 @@ require 'rudachi/file_parser'
|
|
3
3
|
module Rudachi
|
4
4
|
class TextParser < FileParser
|
5
5
|
def parse(text)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
output_stream do |output|
|
7
|
+
take_stdin(input_stream(text)) do
|
8
|
+
take_stdout(output) do
|
9
|
+
Java::SudachiCommandLine.main(Option.cmds(@opts))
|
10
|
+
end
|
10
11
|
end
|
11
12
|
end
|
12
|
-
@output.toString
|
13
13
|
end
|
14
14
|
|
15
15
|
private
|
16
16
|
|
17
|
-
def take_stdin
|
17
|
+
def take_stdin(input)
|
18
18
|
stdin = Java::System.in
|
19
|
-
stream = Java::ByteArrayInputStream.new(@input.getBytes(Java::UTF_8))
|
20
|
-
Java::System.setIn(stream)
|
21
19
|
|
20
|
+
Java::System.setIn(input)
|
22
21
|
yield
|
23
|
-
|
24
22
|
Java::System.setIn(stdin)
|
25
23
|
end
|
24
|
+
|
25
|
+
def input_stream(text)
|
26
|
+
Java::ByteArrayInputStream.new(
|
27
|
+
text.to_java.getBytes(Java::UTF_8)
|
28
|
+
)
|
29
|
+
end
|
26
30
|
end
|
27
31
|
end
|
data/lib/rudachi/version.rb
CHANGED
data/lib/rudachi.rb
CHANGED
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rudachi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- SongCastle
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
11
|
+
date: 2022-04-12 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description:
|
13
|
+
description: A JRuby wrapper for Sudachi.
|
14
14
|
email: "-"
|
15
15
|
executables: []
|
16
16
|
extensions: []
|
@@ -23,6 +23,10 @@ files:
|
|
23
23
|
- lib/rudachi/dependencies.rb
|
24
24
|
- lib/rudachi/file_parser.rb
|
25
25
|
- lib/rudachi/loader.rb
|
26
|
+
- lib/rudachi/option/boolean_option.rb
|
27
|
+
- lib/rudachi/option/config.rb
|
28
|
+
- lib/rudachi/option/string_option.rb
|
29
|
+
- lib/rudachi/stream_parser.rb
|
26
30
|
- lib/rudachi/text_parser.rb
|
27
31
|
- lib/rudachi/version.rb
|
28
32
|
homepage: https://github.com/SongCastle/rudachi
|
@@ -47,5 +51,5 @@ requirements: []
|
|
47
51
|
rubygems_version: 3.0.3
|
48
52
|
signing_key:
|
49
53
|
specification_version: 4
|
50
|
-
summary:
|
54
|
+
summary: A JRuby wrapper for Sudachi
|
51
55
|
test_files: []
|