rudachi 1.1.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 72d5093c7585e3ffdf4792c81489924ab73d55988f7648b0249ff4c6669e5cb0
4
- data.tar.gz: e780dc4a15c8575fc1ddee73c46923a3ea2c5d48ec175f54f6455d8b8fabb542
3
+ metadata.gz: b2a7d72805e33fa5e12884193bf931d6bdf7ce16d6f9243022c7fcedad8c2f23
4
+ data.tar.gz: '0050952b57a66b96a89ed7cebf5dc51caed87a2437cf9c4dc494b62578f90f20'
5
5
  SHA512:
6
- metadata.gz: 71f5f9c92949085bad0cacf1fffe6c7dfe9f3cc0974ae2ef1ce14b4888d72ba44ce9f74feba1b6937743c0c8c861a8a5cd3a1525d250ee499277a03fdf8890f8
7
- data.tar.gz: aa843e075e6cc61aa6d275bfe2fe6caf80faa6cca22c41d5ee38da0ab444e1ade89402d0f14815dcc4d16c0558347b62cfa6a79c467f011243afa92b7a22424f
6
+ metadata.gz: a1a1799c8bcda90ec099a44fedc81f2c2018cd1fec7fbbf1065b1dcde5282ff0d427702b3f5276679adac88ebcf27cd6370f987ebb5aa751d660b96392b91c86
7
+ data.tar.gz: e25716b5b9b2483bb5c3433482d0b655ddb2eee944d18d1b9fdacca409aa26a8faf20ad350d478fffcbfe0f9761e867208e81a3396749b6f189195e35e4d1f03
data/README.md CHANGED
@@ -1,29 +1,41 @@
1
1
  # Rudachi
2
- [Sudachi](https://github.com/WorksApplications/Sudachi) wrapper Gem for JRuby.
2
+ Ruby wrapper for [Sudachi](https://github.com/WorksApplications/Sudachi).
3
3
 
4
- #### For Text
4
+ #### Text
5
5
  ```rb
6
6
  Rudachi::TextParser.parse('東京都へ行く')
7
7
  => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
8
8
  ```
9
9
 
10
- #### For File
10
+ #### File
11
11
  ```rb
12
- File.open('sample.txt', 'w') { |f| f << '東京都へ行く' }
13
- Rudachi::FileParser.parse('sample.txt')
12
+ File.open('input.txt', 'w') { |f| f << '東京都へ行く' }
13
+ Rudachi::FileParser.parse('input.txt')
14
+ => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
15
+ ```
16
+
17
+ #### IO
18
+ ```rb
19
+ Rudachi::StreamParser.parse(StringIO.new('東京都へ行く'))
14
20
  => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
15
21
  ```
16
22
 
17
23
  #### With [some options](https://github.com/WorksApplications/Sudachi#options)
18
24
  ```rb
19
- Rudachi::TextParser.new(o: 'result.txt', m: 'A').parse('東京都へ行く')
20
- File.read('result.txt')
25
+ Rudachi::TextParser.new(o: 'output.txt', m: 'A').parse('東京都へ行く')
26
+ File.read('output.txt')
21
27
  => "東京\t名詞,固有名詞,地名,一般,*,*\t東京\n都\t名詞,普通名詞,一般,*,*,*\t都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
22
28
  ```
23
29
 
24
30
  ## Requirements
25
31
 
26
- - [JRuby](https://github.com/jruby/jruby) 9.1.3.0 or later
32
+ #### Ruby
33
+ - Ruby 2.3.0 or newer
34
+ - [rjb](https://github.com/arton/rjb) 1.1.1 or newer
35
+ - [Sudachi](https://github.com/WorksApplications/Sudachi)
36
+
37
+ #### JRuby
38
+ - [JRuby](https://github.com/jruby/jruby) 9.1.3.0 or newer
27
39
  - [Sudachi](https://github.com/WorksApplications/Sudachi)
28
40
 
29
41
  ## Installation
@@ -54,11 +66,13 @@ LEGAL LICENSE-2.0.txt system_core.dic
54
66
  gem 'rudachi'
55
67
  ```
56
68
 
57
- Then run `bundle install` .
69
+ Then run `bundle install`.
58
70
 
59
71
  3. Initialize Rudachi
60
72
 
61
73
  ```rb
74
+ require 'rudachi'
75
+
62
76
  Rudachi.configure do |config|
63
77
  config.jar_path = 'sudachi-0.5.3/sudachi-0.5.3.jar'
64
78
  end
@@ -71,8 +85,6 @@ end
71
85
  4. Did it !!
72
86
 
73
87
  ```rb
74
- require 'rudachi'
75
-
76
88
  Rudachi::TextParser.parse('こんにちは世界')
77
89
  => "こんにちは\t感動詞,一般,*,*,*,*\t今日は\n世界\t名詞,普通名詞,一般,*,*,*\t世界\nEOS\n"
78
90
  ```
@@ -7,11 +7,10 @@ module Rudachi
7
7
  private
8
8
 
9
9
  def config_accessor(name, klass:, default:)
10
- attr_def = <<~EOS
10
+ module_eval <<~EOS
11
11
  def self.#{name}; @@#{name}; end
12
12
  def self.#{name}=(val); @@#{name} = #{klass}.new(val); end
13
13
  EOS
14
- module_eval(attr_def)
15
14
  public_send("#{name}=", default)
16
15
  end
17
16
  end
@@ -0,0 +1,42 @@
1
+ require 'java'
2
+ require 'rudachi/lazy_load'
3
+ require Rudachi.jar_path
4
+
5
+ java_import 'java.lang.System'
6
+ java_import 'java.io.PrintStream'
7
+ java_import 'java.io.ByteArrayInputStream'
8
+ java_import 'java.io.ByteArrayOutputStream'
9
+ java_import 'java.nio.charset.StandardCharsets'
10
+ java_import 'com.worksap.nlp.sudachi.SudachiCommandLine'
11
+
12
+ module Rudachi
13
+ module Java
14
+ String = ::Java::JavaLang::String
15
+ System = ::Java::JavaLang::System
16
+ PrintStream = ::Java::JavaIo::PrintStream
17
+ ByteArrayInputStream = ::Java::JavaIo::ByteArrayInputStream
18
+ ByteArrayOutputStream = ::Java::JavaIo::ByteArrayOutputStream
19
+ UTF_8 = ::Java::JavaNioCharset::StandardCharsets::UTF_8
20
+ SudachiCommandLine = ::Java::ComWorksapNlpSudachi::SudachiCommandLine
21
+ end
22
+
23
+ module StreamProcessor
24
+ class InvalidInclusion < StandardError; end
25
+
26
+ def self.included(base)
27
+ raise InvalidInclusion unless base.ancestors.include?(TextParser)
28
+ end
29
+
30
+ def parse(io)
31
+ output_stream do |output|
32
+ take_stdin(io.to_inputstream) do
33
+ take_stdout(output) do
34
+ Java::SudachiCommandLine.main(Option.cmds(@opts))
35
+ end
36
+ end
37
+ end
38
+ end
39
+
40
+ LazyLoad.run_load_hooks(:stream_processor)
41
+ end
42
+ end
@@ -0,0 +1,37 @@
1
+ require 'rjb'
2
+ require 'rudachi/lazy_load'
3
+
4
+ Rjb::load(Rudachi.jar_path.to_s)
5
+
6
+ module Rudachi
7
+ module Java
8
+ String = Rjb::import('java.lang.String')
9
+ System = Rjb::import('java.lang.System')
10
+ PrintStream = Rjb::import('java.io.PrintStream')
11
+ ByteArrayInputStream = Rjb::import('java.io.ByteArrayInputStream')
12
+ ByteArrayOutputStream = Rjb::import('java.io.ByteArrayOutputStream')
13
+ UTF_8 = Rjb::import('java.nio.charset.StandardCharsets').UTF_8
14
+ SudachiCommandLine = Rjb::import('com.worksap.nlp.sudachi.SudachiCommandLine')
15
+ end
16
+
17
+ module StreamProcessor
18
+ class InvalidInclusion < StandardError; end
19
+
20
+ TERM = ?\n
21
+
22
+ def self.included(base)
23
+ raise InvalidInclusion unless base.ancestors.include?(TextParser)
24
+ end
25
+
26
+ def parse(io)
27
+ ret = []
28
+ while data = io.gets
29
+ ret << super(data).strip
30
+ end
31
+
32
+ ret.join(TERM)
33
+ end
34
+
35
+ LazyLoad.run_load_hooks(:stream_processor)
36
+ end
37
+ end
@@ -9,30 +9,33 @@ module Rudachi
9
9
 
10
10
  def initialize(**opts)
11
11
  Rudachi.load!
12
-
13
- @output = Java::ByteArrayOutputStream.new
14
- @opts = Option.new(opts)
12
+ @opts = Option.new(**opts)
15
13
  end
16
14
 
17
15
  def parse(path)
18
- take_stdout do
19
- Java::SudachiCommandLine.main(
20
- Option.cmds(@opts).push(Java::String.new(path))
21
- )
16
+ output_stream do |output|
17
+ take_stdout(output) do
18
+ Java::SudachiCommandLine.main(
19
+ Option.cmds(@opts).push(path)
20
+ )
21
+ end
22
22
  end
23
- @output.toString
24
23
  end
25
24
 
26
25
  private
27
26
 
28
- def take_stdout
27
+ def take_stdout(output)
29
28
  stdout = Java::System.out
30
- stream = Java::PrintStream.new(@output)
31
- Java::System.setOut(stream)
32
29
 
30
+ Java::System.setOut(output)
33
31
  yield
34
-
35
32
  Java::System.setOut(stdout)
36
33
  end
34
+
35
+ def output_stream
36
+ Java::ByteArrayOutputStream.new.tap do |output|
37
+ yield Java::PrintStream.new(output)
38
+ end.toString
39
+ end
37
40
  end
38
41
  end
@@ -0,0 +1,16 @@
1
+ module Rudachi
2
+ module LazyLoad
3
+ @@hooks = {}
4
+
5
+ class << self
6
+ def on_load(name, &block)
7
+ @@hooks[name] ||= []
8
+ @@hooks[name] << block
9
+ end
10
+
11
+ def run_load_hooks(name)
12
+ @@hooks[name]&.each(&:call)
13
+ end
14
+ end
15
+ end
16
+ end
@@ -1,5 +1,15 @@
1
1
  module Rudachi
2
- def self.load!
3
- require 'rudachi/dependencies'
2
+ class << self
3
+ def load!
4
+ if jruby?
5
+ require 'rudachi/dependencies/jruby'
6
+ else
7
+ require 'rudachi/dependencies/ruby'
8
+ end
9
+ end
10
+
11
+ def jruby?
12
+ RUBY_PLATFORM == 'java'
13
+ end
4
14
  end
5
15
  end
@@ -0,0 +1,10 @@
1
+ require 'rudachi/lazy_load'
2
+ require 'rudachi/text_parser'
3
+
4
+ module Rudachi
5
+ class StreamParser < TextParser
6
+ LazyLoad.on_load(:stream_processor) do
7
+ include StreamProcessor
8
+ end
9
+ end
10
+ end
@@ -3,25 +3,29 @@ require 'rudachi/file_parser'
3
3
  module Rudachi
4
4
  class TextParser < FileParser
5
5
  def parse(text)
6
- @input = Java::String.new(text)
7
- take_stdin do
8
- take_stdout do
9
- Java::SudachiCommandLine.main(Option.cmds(@opts))
6
+ output_stream do |output|
7
+ take_stdin(input_stream(text)) do
8
+ take_stdout(output) do
9
+ Java::SudachiCommandLine.main(Option.cmds(@opts))
10
+ end
10
11
  end
11
12
  end
12
- @output.toString
13
13
  end
14
14
 
15
15
  private
16
16
 
17
- def take_stdin
17
+ def take_stdin(input)
18
18
  stdin = Java::System.in
19
- stream = Java::ByteArrayInputStream.new(@input.getBytes(Java::UTF_8))
20
- Java::System.setIn(stream)
21
19
 
20
+ Java::System.setIn(input)
22
21
  yield
23
-
24
22
  Java::System.setIn(stdin)
25
23
  end
24
+
25
+ def input_stream(text)
26
+ Java::ByteArrayInputStream.new(
27
+ Java::String.new(text).getBytes(Java::UTF_8)
28
+ )
29
+ end
26
30
  end
27
31
  end
@@ -1,3 +1,3 @@
1
1
  module Rudachi
2
- VERSION = '1.1.0'
2
+ VERSION = '1.3.0'
3
3
  end
data/lib/rudachi.rb CHANGED
@@ -2,3 +2,4 @@ require 'rudachi/config'
2
2
  require 'rudachi/option/config'
3
3
  require 'rudachi/file_parser'
4
4
  require 'rudachi/text_parser'
5
+ require 'rudachi/stream_parser'
metadata CHANGED
@@ -1,16 +1,30 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rudachi
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - SongCastle
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-04-09 00:00:00.000000000 Z
12
- dependencies: []
13
- description: Sudachi wrapper for JRuby.
11
+ date: 2022-04-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rjb
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.1.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.1.1
27
+ description: A Ruby wrapper for Sudachi.
14
28
  email: "-"
15
29
  executables: []
16
30
  extensions: []
@@ -20,12 +34,15 @@ files:
20
34
  - lib/rudachi.rb
21
35
  - lib/rudachi/config.rb
22
36
  - lib/rudachi/configurable.rb
23
- - lib/rudachi/dependencies.rb
37
+ - lib/rudachi/dependencies/jruby.rb
38
+ - lib/rudachi/dependencies/ruby.rb
24
39
  - lib/rudachi/file_parser.rb
40
+ - lib/rudachi/lazy_load.rb
25
41
  - lib/rudachi/loader.rb
26
42
  - lib/rudachi/option/boolean_option.rb
27
43
  - lib/rudachi/option/config.rb
28
44
  - lib/rudachi/option/string_option.rb
45
+ - lib/rudachi/stream_parser.rb
29
46
  - lib/rudachi/text_parser.rb
30
47
  - lib/rudachi/version.rb
31
48
  homepage: https://github.com/SongCastle/rudachi
@@ -50,5 +67,5 @@ requirements: []
50
67
  rubygems_version: 3.0.3
51
68
  signing_key:
52
69
  specification_version: 4
53
- summary: Sudachi wrapper for JRuby
70
+ summary: A Ruby wrapper for Sudachi
54
71
  test_files: []
@@ -1,19 +0,0 @@
1
- require 'java'
2
- java_import 'java.lang.System'
3
- java_import 'java.io.PrintStream'
4
- java_import 'java.io.ByteArrayInputStream'
5
- java_import 'java.io.ByteArrayOutputStream'
6
- java_import 'java.nio.charset.StandardCharsets'
7
-
8
- require Rudachi.jar_path
9
- java_import 'com.worksap.nlp.sudachi.SudachiCommandLine'
10
-
11
- module Java
12
- String = JavaLang::String
13
- System = JavaLang::System
14
- ByteArrayInputStream = JavaIo::ByteArrayInputStream
15
- ByteArrayOutputStream = JavaIo::ByteArrayOutputStream
16
- PrintStream = JavaIo::PrintStream
17
- UTF_8 = JavaNioCharset::StandardCharsets::UTF_8
18
- SudachiCommandLine = ComWorksapNlpSudachi::SudachiCommandLine
19
- end