rudachi 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -11
- data/lib/rudachi/configurable.rb +1 -2
- data/lib/rudachi/dependencies/jruby.rb +42 -0
- data/lib/rudachi/dependencies/ruby.rb +37 -0
- data/lib/rudachi/file_parser.rb +15 -12
- data/lib/rudachi/lazy_load.rb +16 -0
- data/lib/rudachi/loader.rb +12 -2
- data/lib/rudachi/stream_parser.rb +10 -0
- data/lib/rudachi/text_parser.rb +13 -9
- data/lib/rudachi/version.rb +1 -1
- data/lib/rudachi.rb +1 -0
- metadata +23 -6
- data/lib/rudachi/dependencies.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b2a7d72805e33fa5e12884193bf931d6bdf7ce16d6f9243022c7fcedad8c2f23
|
4
|
+
data.tar.gz: '0050952b57a66b96a89ed7cebf5dc51caed87a2437cf9c4dc494b62578f90f20'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a1a1799c8bcda90ec099a44fedc81f2c2018cd1fec7fbbf1065b1dcde5282ff0d427702b3f5276679adac88ebcf27cd6370f987ebb5aa751d660b96392b91c86
|
7
|
+
data.tar.gz: e25716b5b9b2483bb5c3433482d0b655ddb2eee944d18d1b9fdacca409aa26a8faf20ad350d478fffcbfe0f9761e867208e81a3396749b6f189195e35e4d1f03
|
data/README.md
CHANGED
@@ -1,29 +1,41 @@
|
|
1
1
|
# Rudachi
|
2
|
-
[Sudachi](https://github.com/WorksApplications/Sudachi)
|
2
|
+
Ruby wrapper for [Sudachi](https://github.com/WorksApplications/Sudachi).
|
3
3
|
|
4
|
-
####
|
4
|
+
#### Text
|
5
5
|
```rb
|
6
6
|
Rudachi::TextParser.parse('東京都へ行く')
|
7
7
|
=> "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
8
8
|
```
|
9
9
|
|
10
|
-
####
|
10
|
+
#### File
|
11
11
|
```rb
|
12
|
-
File.open('
|
13
|
-
Rudachi::FileParser.parse('
|
12
|
+
File.open('input.txt', 'w') { |f| f << '東京都へ行く' }
|
13
|
+
Rudachi::FileParser.parse('input.txt')
|
14
|
+
=> "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
15
|
+
```
|
16
|
+
|
17
|
+
#### IO
|
18
|
+
```rb
|
19
|
+
Rudachi::StreamParser.parse(StringIO.new('東京都へ行く'))
|
14
20
|
=> "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
15
21
|
```
|
16
22
|
|
17
23
|
#### With [some options](https://github.com/WorksApplications/Sudachi#options)
|
18
24
|
```rb
|
19
|
-
Rudachi::TextParser.new(o: '
|
20
|
-
File.read('
|
25
|
+
Rudachi::TextParser.new(o: 'output.txt', m: 'A').parse('東京都へ行く')
|
26
|
+
File.read('output.txt')
|
21
27
|
=> "東京\t名詞,固有名詞,地名,一般,*,*\t東京\n都\t名詞,普通名詞,一般,*,*,*\t都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
22
28
|
```
|
23
29
|
|
24
30
|
## Requirements
|
25
31
|
|
26
|
-
|
32
|
+
#### Ruby
|
33
|
+
- Ruby 2.3.0 or newer
|
34
|
+
- [rjb](https://github.com/arton/rjb) 1.1.1 or newer
|
35
|
+
- [Sudachi](https://github.com/WorksApplications/Sudachi)
|
36
|
+
|
37
|
+
#### JRuby
|
38
|
+
- [JRuby](https://github.com/jruby/jruby) 9.1.3.0 or newer
|
27
39
|
- [Sudachi](https://github.com/WorksApplications/Sudachi)
|
28
40
|
|
29
41
|
## Installation
|
@@ -54,11 +66,13 @@ LEGAL LICENSE-2.0.txt system_core.dic
|
|
54
66
|
gem 'rudachi'
|
55
67
|
```
|
56
68
|
|
57
|
-
Then run `bundle install
|
69
|
+
Then run `bundle install`.
|
58
70
|
|
59
71
|
3. Initialize Rudachi
|
60
72
|
|
61
73
|
```rb
|
74
|
+
require 'rudachi'
|
75
|
+
|
62
76
|
Rudachi.configure do |config|
|
63
77
|
config.jar_path = 'sudachi-0.5.3/sudachi-0.5.3.jar'
|
64
78
|
end
|
@@ -71,8 +85,6 @@ end
|
|
71
85
|
4. Did it !!
|
72
86
|
|
73
87
|
```rb
|
74
|
-
require 'rudachi'
|
75
|
-
|
76
88
|
Rudachi::TextParser.parse('こんにちは世界')
|
77
89
|
=> "こんにちは\t感動詞,一般,*,*,*,*\t今日は\n世界\t名詞,普通名詞,一般,*,*,*\t世界\nEOS\n"
|
78
90
|
```
|
data/lib/rudachi/configurable.rb
CHANGED
@@ -7,11 +7,10 @@ module Rudachi
|
|
7
7
|
private
|
8
8
|
|
9
9
|
def config_accessor(name, klass:, default:)
|
10
|
-
|
10
|
+
module_eval <<~EOS
|
11
11
|
def self.#{name}; @@#{name}; end
|
12
12
|
def self.#{name}=(val); @@#{name} = #{klass}.new(val); end
|
13
13
|
EOS
|
14
|
-
module_eval(attr_def)
|
15
14
|
public_send("#{name}=", default)
|
16
15
|
end
|
17
16
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'java'
|
2
|
+
require 'rudachi/lazy_load'
|
3
|
+
require Rudachi.jar_path
|
4
|
+
|
5
|
+
java_import 'java.lang.System'
|
6
|
+
java_import 'java.io.PrintStream'
|
7
|
+
java_import 'java.io.ByteArrayInputStream'
|
8
|
+
java_import 'java.io.ByteArrayOutputStream'
|
9
|
+
java_import 'java.nio.charset.StandardCharsets'
|
10
|
+
java_import 'com.worksap.nlp.sudachi.SudachiCommandLine'
|
11
|
+
|
12
|
+
module Rudachi
|
13
|
+
module Java
|
14
|
+
String = ::Java::JavaLang::String
|
15
|
+
System = ::Java::JavaLang::System
|
16
|
+
PrintStream = ::Java::JavaIo::PrintStream
|
17
|
+
ByteArrayInputStream = ::Java::JavaIo::ByteArrayInputStream
|
18
|
+
ByteArrayOutputStream = ::Java::JavaIo::ByteArrayOutputStream
|
19
|
+
UTF_8 = ::Java::JavaNioCharset::StandardCharsets::UTF_8
|
20
|
+
SudachiCommandLine = ::Java::ComWorksapNlpSudachi::SudachiCommandLine
|
21
|
+
end
|
22
|
+
|
23
|
+
module StreamProcessor
|
24
|
+
class InvalidInclusion < StandardError; end
|
25
|
+
|
26
|
+
def self.included(base)
|
27
|
+
raise InvalidInclusion unless base.ancestors.include?(TextParser)
|
28
|
+
end
|
29
|
+
|
30
|
+
def parse(io)
|
31
|
+
output_stream do |output|
|
32
|
+
take_stdin(io.to_inputstream) do
|
33
|
+
take_stdout(output) do
|
34
|
+
Java::SudachiCommandLine.main(Option.cmds(@opts))
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
LazyLoad.run_load_hooks(:stream_processor)
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'rjb'
|
2
|
+
require 'rudachi/lazy_load'
|
3
|
+
|
4
|
+
Rjb::load(Rudachi.jar_path.to_s)
|
5
|
+
|
6
|
+
module Rudachi
|
7
|
+
module Java
|
8
|
+
String = Rjb::import('java.lang.String')
|
9
|
+
System = Rjb::import('java.lang.System')
|
10
|
+
PrintStream = Rjb::import('java.io.PrintStream')
|
11
|
+
ByteArrayInputStream = Rjb::import('java.io.ByteArrayInputStream')
|
12
|
+
ByteArrayOutputStream = Rjb::import('java.io.ByteArrayOutputStream')
|
13
|
+
UTF_8 = Rjb::import('java.nio.charset.StandardCharsets').UTF_8
|
14
|
+
SudachiCommandLine = Rjb::import('com.worksap.nlp.sudachi.SudachiCommandLine')
|
15
|
+
end
|
16
|
+
|
17
|
+
module StreamProcessor
|
18
|
+
class InvalidInclusion < StandardError; end
|
19
|
+
|
20
|
+
TERM = ?\n
|
21
|
+
|
22
|
+
def self.included(base)
|
23
|
+
raise InvalidInclusion unless base.ancestors.include?(TextParser)
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse(io)
|
27
|
+
ret = []
|
28
|
+
while data = io.gets
|
29
|
+
ret << super(data).strip
|
30
|
+
end
|
31
|
+
|
32
|
+
ret.join(TERM)
|
33
|
+
end
|
34
|
+
|
35
|
+
LazyLoad.run_load_hooks(:stream_processor)
|
36
|
+
end
|
37
|
+
end
|
data/lib/rudachi/file_parser.rb
CHANGED
@@ -9,30 +9,33 @@ module Rudachi
|
|
9
9
|
|
10
10
|
def initialize(**opts)
|
11
11
|
Rudachi.load!
|
12
|
-
|
13
|
-
@output = Java::ByteArrayOutputStream.new
|
14
|
-
@opts = Option.new(opts)
|
12
|
+
@opts = Option.new(**opts)
|
15
13
|
end
|
16
14
|
|
17
15
|
def parse(path)
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
16
|
+
output_stream do |output|
|
17
|
+
take_stdout(output) do
|
18
|
+
Java::SudachiCommandLine.main(
|
19
|
+
Option.cmds(@opts).push(path)
|
20
|
+
)
|
21
|
+
end
|
22
22
|
end
|
23
|
-
@output.toString
|
24
23
|
end
|
25
24
|
|
26
25
|
private
|
27
26
|
|
28
|
-
def take_stdout
|
27
|
+
def take_stdout(output)
|
29
28
|
stdout = Java::System.out
|
30
|
-
stream = Java::PrintStream.new(@output)
|
31
|
-
Java::System.setOut(stream)
|
32
29
|
|
30
|
+
Java::System.setOut(output)
|
33
31
|
yield
|
34
|
-
|
35
32
|
Java::System.setOut(stdout)
|
36
33
|
end
|
34
|
+
|
35
|
+
def output_stream
|
36
|
+
Java::ByteArrayOutputStream.new.tap do |output|
|
37
|
+
yield Java::PrintStream.new(output)
|
38
|
+
end.toString
|
39
|
+
end
|
37
40
|
end
|
38
41
|
end
|
data/lib/rudachi/loader.rb
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
module Rudachi
|
2
|
-
|
3
|
-
|
2
|
+
class << self
|
3
|
+
def load!
|
4
|
+
if jruby?
|
5
|
+
require 'rudachi/dependencies/jruby'
|
6
|
+
else
|
7
|
+
require 'rudachi/dependencies/ruby'
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def jruby?
|
12
|
+
RUBY_PLATFORM == 'java'
|
13
|
+
end
|
4
14
|
end
|
5
15
|
end
|
data/lib/rudachi/text_parser.rb
CHANGED
@@ -3,25 +3,29 @@ require 'rudachi/file_parser'
|
|
3
3
|
module Rudachi
|
4
4
|
class TextParser < FileParser
|
5
5
|
def parse(text)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
output_stream do |output|
|
7
|
+
take_stdin(input_stream(text)) do
|
8
|
+
take_stdout(output) do
|
9
|
+
Java::SudachiCommandLine.main(Option.cmds(@opts))
|
10
|
+
end
|
10
11
|
end
|
11
12
|
end
|
12
|
-
@output.toString
|
13
13
|
end
|
14
14
|
|
15
15
|
private
|
16
16
|
|
17
|
-
def take_stdin
|
17
|
+
def take_stdin(input)
|
18
18
|
stdin = Java::System.in
|
19
|
-
stream = Java::ByteArrayInputStream.new(@input.getBytes(Java::UTF_8))
|
20
|
-
Java::System.setIn(stream)
|
21
19
|
|
20
|
+
Java::System.setIn(input)
|
22
21
|
yield
|
23
|
-
|
24
22
|
Java::System.setIn(stdin)
|
25
23
|
end
|
24
|
+
|
25
|
+
def input_stream(text)
|
26
|
+
Java::ByteArrayInputStream.new(
|
27
|
+
Java::String.new(text).getBytes(Java::UTF_8)
|
28
|
+
)
|
29
|
+
end
|
26
30
|
end
|
27
31
|
end
|
data/lib/rudachi/version.rb
CHANGED
data/lib/rudachi.rb
CHANGED
metadata
CHANGED
@@ -1,16 +1,30 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rudachi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- SongCastle
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
12
|
-
dependencies:
|
13
|
-
|
11
|
+
date: 2022-04-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rjb
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.1.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.1.1
|
27
|
+
description: A Ruby wrapper for Sudachi.
|
14
28
|
email: "-"
|
15
29
|
executables: []
|
16
30
|
extensions: []
|
@@ -20,12 +34,15 @@ files:
|
|
20
34
|
- lib/rudachi.rb
|
21
35
|
- lib/rudachi/config.rb
|
22
36
|
- lib/rudachi/configurable.rb
|
23
|
-
- lib/rudachi/dependencies.rb
|
37
|
+
- lib/rudachi/dependencies/jruby.rb
|
38
|
+
- lib/rudachi/dependencies/ruby.rb
|
24
39
|
- lib/rudachi/file_parser.rb
|
40
|
+
- lib/rudachi/lazy_load.rb
|
25
41
|
- lib/rudachi/loader.rb
|
26
42
|
- lib/rudachi/option/boolean_option.rb
|
27
43
|
- lib/rudachi/option/config.rb
|
28
44
|
- lib/rudachi/option/string_option.rb
|
45
|
+
- lib/rudachi/stream_parser.rb
|
29
46
|
- lib/rudachi/text_parser.rb
|
30
47
|
- lib/rudachi/version.rb
|
31
48
|
homepage: https://github.com/SongCastle/rudachi
|
@@ -50,5 +67,5 @@ requirements: []
|
|
50
67
|
rubygems_version: 3.0.3
|
51
68
|
signing_key:
|
52
69
|
specification_version: 4
|
53
|
-
summary:
|
70
|
+
summary: A Ruby wrapper for Sudachi
|
54
71
|
test_files: []
|
data/lib/rudachi/dependencies.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
require 'java'
|
2
|
-
java_import 'java.lang.System'
|
3
|
-
java_import 'java.io.PrintStream'
|
4
|
-
java_import 'java.io.ByteArrayInputStream'
|
5
|
-
java_import 'java.io.ByteArrayOutputStream'
|
6
|
-
java_import 'java.nio.charset.StandardCharsets'
|
7
|
-
|
8
|
-
require Rudachi.jar_path
|
9
|
-
java_import 'com.worksap.nlp.sudachi.SudachiCommandLine'
|
10
|
-
|
11
|
-
module Java
|
12
|
-
String = JavaLang::String
|
13
|
-
System = JavaLang::System
|
14
|
-
ByteArrayInputStream = JavaIo::ByteArrayInputStream
|
15
|
-
ByteArrayOutputStream = JavaIo::ByteArrayOutputStream
|
16
|
-
PrintStream = JavaIo::PrintStream
|
17
|
-
UTF_8 = JavaNioCharset::StandardCharsets::UTF_8
|
18
|
-
SudachiCommandLine = ComWorksapNlpSudachi::SudachiCommandLine
|
19
|
-
end
|