rudachi 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 72d5093c7585e3ffdf4792c81489924ab73d55988f7648b0249ff4c6669e5cb0
4
- data.tar.gz: e780dc4a15c8575fc1ddee73c46923a3ea2c5d48ec175f54f6455d8b8fabb542
3
+ metadata.gz: b2a7d72805e33fa5e12884193bf931d6bdf7ce16d6f9243022c7fcedad8c2f23
4
+ data.tar.gz: '0050952b57a66b96a89ed7cebf5dc51caed87a2437cf9c4dc494b62578f90f20'
5
5
  SHA512:
6
- metadata.gz: 71f5f9c92949085bad0cacf1fffe6c7dfe9f3cc0974ae2ef1ce14b4888d72ba44ce9f74feba1b6937743c0c8c861a8a5cd3a1525d250ee499277a03fdf8890f8
7
- data.tar.gz: aa843e075e6cc61aa6d275bfe2fe6caf80faa6cca22c41d5ee38da0ab444e1ade89402d0f14815dcc4d16c0558347b62cfa6a79c467f011243afa92b7a22424f
6
+ metadata.gz: a1a1799c8bcda90ec099a44fedc81f2c2018cd1fec7fbbf1065b1dcde5282ff0d427702b3f5276679adac88ebcf27cd6370f987ebb5aa751d660b96392b91c86
7
+ data.tar.gz: e25716b5b9b2483bb5c3433482d0b655ddb2eee944d18d1b9fdacca409aa26a8faf20ad350d478fffcbfe0f9761e867208e81a3396749b6f189195e35e4d1f03
data/README.md CHANGED
@@ -1,29 +1,41 @@
1
1
  # Rudachi
2
- [Sudachi](https://github.com/WorksApplications/Sudachi) wrapper Gem for JRuby.
2
+ Ruby wrapper for [Sudachi](https://github.com/WorksApplications/Sudachi).
3
3
 
4
- #### For Text
4
+ #### Text
5
5
  ```rb
6
6
  Rudachi::TextParser.parse('東京都へ行く')
7
7
  => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
8
8
  ```
9
9
 
10
- #### For File
10
+ #### File
11
11
  ```rb
12
- File.open('sample.txt', 'w') { |f| f << '東京都へ行く' }
13
- Rudachi::FileParser.parse('sample.txt')
12
+ File.open('input.txt', 'w') { |f| f << '東京都へ行く' }
13
+ Rudachi::FileParser.parse('input.txt')
14
+ => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
15
+ ```
16
+
17
+ #### IO
18
+ ```rb
19
+ Rudachi::StreamParser.parse(StringIO.new('東京都へ行く'))
14
20
  => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
15
21
  ```
16
22
 
17
23
  #### With [some options](https://github.com/WorksApplications/Sudachi#options)
18
24
  ```rb
19
- Rudachi::TextParser.new(o: 'result.txt', m: 'A').parse('東京都へ行く')
20
- File.read('result.txt')
25
+ Rudachi::TextParser.new(o: 'output.txt', m: 'A').parse('東京都へ行く')
26
+ File.read('output.txt')
21
27
  => "東京\t名詞,固有名詞,地名,一般,*,*\t東京\n都\t名詞,普通名詞,一般,*,*,*\t都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
22
28
  ```
23
29
 
24
30
  ## Requirements
25
31
 
26
- - [JRuby](https://github.com/jruby/jruby) 9.1.3.0 or later
32
+ #### Ruby
33
+ - Ruby 2.3.0 or newer
34
+ - [rjb](https://github.com/arton/rjb) 1.1.1 or newer
35
+ - [Sudachi](https://github.com/WorksApplications/Sudachi)
36
+
37
+ #### JRuby
38
+ - [JRuby](https://github.com/jruby/jruby) 9.1.3.0 or newer
27
39
  - [Sudachi](https://github.com/WorksApplications/Sudachi)
28
40
 
29
41
  ## Installation
@@ -54,11 +66,13 @@ LEGAL LICENSE-2.0.txt system_core.dic
54
66
  gem 'rudachi'
55
67
  ```
56
68
 
57
- Then run `bundle install` .
69
+ Then run `bundle install`.
58
70
 
59
71
  3. Initialize Rudachi
60
72
 
61
73
  ```rb
74
+ require 'rudachi'
75
+
62
76
  Rudachi.configure do |config|
63
77
  config.jar_path = 'sudachi-0.5.3/sudachi-0.5.3.jar'
64
78
  end
@@ -71,8 +85,6 @@ end
71
85
  4. Did it !!
72
86
 
73
87
  ```rb
74
- require 'rudachi'
75
-
76
88
  Rudachi::TextParser.parse('こんにちは世界')
77
89
  => "こんにちは\t感動詞,一般,*,*,*,*\t今日は\n世界\t名詞,普通名詞,一般,*,*,*\t世界\nEOS\n"
78
90
  ```
@@ -7,11 +7,10 @@ module Rudachi
7
7
  private
8
8
 
9
9
  def config_accessor(name, klass:, default:)
10
- attr_def = <<~EOS
10
+ module_eval <<~EOS
11
11
  def self.#{name}; @@#{name}; end
12
12
  def self.#{name}=(val); @@#{name} = #{klass}.new(val); end
13
13
  EOS
14
- module_eval(attr_def)
15
14
  public_send("#{name}=", default)
16
15
  end
17
16
  end
@@ -0,0 +1,42 @@
1
+ require 'java'
2
+ require 'rudachi/lazy_load'
3
+ require Rudachi.jar_path
4
+
5
+ java_import 'java.lang.System'
6
+ java_import 'java.io.PrintStream'
7
+ java_import 'java.io.ByteArrayInputStream'
8
+ java_import 'java.io.ByteArrayOutputStream'
9
+ java_import 'java.nio.charset.StandardCharsets'
10
+ java_import 'com.worksap.nlp.sudachi.SudachiCommandLine'
11
+
12
+ module Rudachi
13
+ module Java
14
+ String = ::Java::JavaLang::String
15
+ System = ::Java::JavaLang::System
16
+ PrintStream = ::Java::JavaIo::PrintStream
17
+ ByteArrayInputStream = ::Java::JavaIo::ByteArrayInputStream
18
+ ByteArrayOutputStream = ::Java::JavaIo::ByteArrayOutputStream
19
+ UTF_8 = ::Java::JavaNioCharset::StandardCharsets::UTF_8
20
+ SudachiCommandLine = ::Java::ComWorksapNlpSudachi::SudachiCommandLine
21
+ end
22
+
23
+ module StreamProcessor
24
+ class InvalidInclusion < StandardError; end
25
+
26
+ def self.included(base)
27
+ raise InvalidInclusion unless base.ancestors.include?(TextParser)
28
+ end
29
+
30
+ def parse(io)
31
+ output_stream do |output|
32
+ take_stdin(io.to_inputstream) do
33
+ take_stdout(output) do
34
+ Java::SudachiCommandLine.main(Option.cmds(@opts))
35
+ end
36
+ end
37
+ end
38
+ end
39
+
40
+ LazyLoad.run_load_hooks(:stream_processor)
41
+ end
42
+ end
@@ -0,0 +1,37 @@
1
+ require 'rjb'
2
+ require 'rudachi/lazy_load'
3
+
4
+ Rjb::load(Rudachi.jar_path.to_s)
5
+
6
+ module Rudachi
7
+ module Java
8
+ String = Rjb::import('java.lang.String')
9
+ System = Rjb::import('java.lang.System')
10
+ PrintStream = Rjb::import('java.io.PrintStream')
11
+ ByteArrayInputStream = Rjb::import('java.io.ByteArrayInputStream')
12
+ ByteArrayOutputStream = Rjb::import('java.io.ByteArrayOutputStream')
13
+ UTF_8 = Rjb::import('java.nio.charset.StandardCharsets').UTF_8
14
+ SudachiCommandLine = Rjb::import('com.worksap.nlp.sudachi.SudachiCommandLine')
15
+ end
16
+
17
+ module StreamProcessor
18
+ class InvalidInclusion < StandardError; end
19
+
20
+ TERM = ?\n
21
+
22
+ def self.included(base)
23
+ raise InvalidInclusion unless base.ancestors.include?(TextParser)
24
+ end
25
+
26
+ def parse(io)
27
+ ret = []
28
+ while data = io.gets
29
+ ret << super(data).strip
30
+ end
31
+
32
+ ret.join(TERM)
33
+ end
34
+
35
+ LazyLoad.run_load_hooks(:stream_processor)
36
+ end
37
+ end
@@ -9,30 +9,33 @@ module Rudachi
9
9
 
10
10
  def initialize(**opts)
11
11
  Rudachi.load!
12
-
13
- @output = Java::ByteArrayOutputStream.new
14
- @opts = Option.new(opts)
12
+ @opts = Option.new(**opts)
15
13
  end
16
14
 
17
15
  def parse(path)
18
- take_stdout do
19
- Java::SudachiCommandLine.main(
20
- Option.cmds(@opts).push(Java::String.new(path))
21
- )
16
+ output_stream do |output|
17
+ take_stdout(output) do
18
+ Java::SudachiCommandLine.main(
19
+ Option.cmds(@opts).push(path)
20
+ )
21
+ end
22
22
  end
23
- @output.toString
24
23
  end
25
24
 
26
25
  private
27
26
 
28
- def take_stdout
27
+ def take_stdout(output)
29
28
  stdout = Java::System.out
30
- stream = Java::PrintStream.new(@output)
31
- Java::System.setOut(stream)
32
29
 
30
+ Java::System.setOut(output)
33
31
  yield
34
-
35
32
  Java::System.setOut(stdout)
36
33
  end
34
+
35
+ def output_stream
36
+ Java::ByteArrayOutputStream.new.tap do |output|
37
+ yield Java::PrintStream.new(output)
38
+ end.toString
39
+ end
37
40
  end
38
41
  end
@@ -0,0 +1,16 @@
1
+ module Rudachi
2
+ module LazyLoad
3
+ @@hooks = {}
4
+
5
+ class << self
6
+ def on_load(name, &block)
7
+ @@hooks[name] ||= []
8
+ @@hooks[name] << block
9
+ end
10
+
11
+ def run_load_hooks(name)
12
+ @@hooks[name]&.each(&:call)
13
+ end
14
+ end
15
+ end
16
+ end
@@ -1,5 +1,15 @@
1
1
  module Rudachi
2
- def self.load!
3
- require 'rudachi/dependencies'
2
+ class << self
3
+ def load!
4
+ if jruby?
5
+ require 'rudachi/dependencies/jruby'
6
+ else
7
+ require 'rudachi/dependencies/ruby'
8
+ end
9
+ end
10
+
11
+ def jruby?
12
+ RUBY_PLATFORM == 'java'
13
+ end
4
14
  end
5
15
  end
@@ -0,0 +1,10 @@
1
+ require 'rudachi/lazy_load'
2
+ require 'rudachi/text_parser'
3
+
4
+ module Rudachi
5
+ class StreamParser < TextParser
6
+ LazyLoad.on_load(:stream_processor) do
7
+ include StreamProcessor
8
+ end
9
+ end
10
+ end
@@ -3,25 +3,29 @@ require 'rudachi/file_parser'
3
3
  module Rudachi
4
4
  class TextParser < FileParser
5
5
  def parse(text)
6
- @input = Java::String.new(text)
7
- take_stdin do
8
- take_stdout do
9
- Java::SudachiCommandLine.main(Option.cmds(@opts))
6
+ output_stream do |output|
7
+ take_stdin(input_stream(text)) do
8
+ take_stdout(output) do
9
+ Java::SudachiCommandLine.main(Option.cmds(@opts))
10
+ end
10
11
  end
11
12
  end
12
- @output.toString
13
13
  end
14
14
 
15
15
  private
16
16
 
17
- def take_stdin
17
+ def take_stdin(input)
18
18
  stdin = Java::System.in
19
- stream = Java::ByteArrayInputStream.new(@input.getBytes(Java::UTF_8))
20
- Java::System.setIn(stream)
21
19
 
20
+ Java::System.setIn(input)
22
21
  yield
23
-
24
22
  Java::System.setIn(stdin)
25
23
  end
24
+
25
+ def input_stream(text)
26
+ Java::ByteArrayInputStream.new(
27
+ Java::String.new(text).getBytes(Java::UTF_8)
28
+ )
29
+ end
26
30
  end
27
31
  end
@@ -1,3 +1,3 @@
1
1
  module Rudachi
2
- VERSION = '1.1.0'
2
+ VERSION = '1.3.0'
3
3
  end
data/lib/rudachi.rb CHANGED
@@ -2,3 +2,4 @@ require 'rudachi/config'
2
2
  require 'rudachi/option/config'
3
3
  require 'rudachi/file_parser'
4
4
  require 'rudachi/text_parser'
5
+ require 'rudachi/stream_parser'
metadata CHANGED
@@ -1,16 +1,30 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rudachi
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - SongCastle
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-04-09 00:00:00.000000000 Z
12
- dependencies: []
13
- description: Sudachi wrapper for JRuby.
11
+ date: 2022-04-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rjb
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.1.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.1.1
27
+ description: A Ruby wrapper for Sudachi.
14
28
  email: "-"
15
29
  executables: []
16
30
  extensions: []
@@ -20,12 +34,15 @@ files:
20
34
  - lib/rudachi.rb
21
35
  - lib/rudachi/config.rb
22
36
  - lib/rudachi/configurable.rb
23
- - lib/rudachi/dependencies.rb
37
+ - lib/rudachi/dependencies/jruby.rb
38
+ - lib/rudachi/dependencies/ruby.rb
24
39
  - lib/rudachi/file_parser.rb
40
+ - lib/rudachi/lazy_load.rb
25
41
  - lib/rudachi/loader.rb
26
42
  - lib/rudachi/option/boolean_option.rb
27
43
  - lib/rudachi/option/config.rb
28
44
  - lib/rudachi/option/string_option.rb
45
+ - lib/rudachi/stream_parser.rb
29
46
  - lib/rudachi/text_parser.rb
30
47
  - lib/rudachi/version.rb
31
48
  homepage: https://github.com/SongCastle/rudachi
@@ -50,5 +67,5 @@ requirements: []
50
67
  rubygems_version: 3.0.3
51
68
  signing_key:
52
69
  specification_version: 4
53
- summary: Sudachi wrapper for JRuby
70
+ summary: A Ruby wrapper for Sudachi
54
71
  test_files: []
@@ -1,19 +0,0 @@
1
- require 'java'
2
- java_import 'java.lang.System'
3
- java_import 'java.io.PrintStream'
4
- java_import 'java.io.ByteArrayInputStream'
5
- java_import 'java.io.ByteArrayOutputStream'
6
- java_import 'java.nio.charset.StandardCharsets'
7
-
8
- require Rudachi.jar_path
9
- java_import 'com.worksap.nlp.sudachi.SudachiCommandLine'
10
-
11
- module Java
12
- String = JavaLang::String
13
- System = JavaLang::System
14
- ByteArrayInputStream = JavaIo::ByteArrayInputStream
15
- ByteArrayOutputStream = JavaIo::ByteArrayOutputStream
16
- PrintStream = JavaIo::PrintStream
17
- UTF_8 = JavaNioCharset::StandardCharsets::UTF_8
18
- SudachiCommandLine = ComWorksapNlpSudachi::SudachiCommandLine
19
- end