rudachi-rb 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: '0136994c8ace08ad3b922acc5599ad8b35d5c1f0387826e24c4d4ad9efd26fe8'
4
+ data.tar.gz: 3fbbf61bdf9db17a35efad222c211aacc741f7b6b1221edf044fd19984695ff6
5
+ SHA512:
6
+ metadata.gz: 810dc0f49f4ee9d8dbceb25847c623c0cb84618d74c25542b0632c45a2f021b4ca127d7649e4bb129bcef6b8e7a7efa7e551865410f0b605a079eabb6375c582
7
+ data.tar.gz: 4135642a21814f82103eaf3c107ae4a5f1b2de3728f06ca9159b9286057fae5631fa36ee77073106ed0b815962a68d5162c94f913884fab57aee754e732feba9
data/README.md ADDED
@@ -0,0 +1,88 @@
1
+ # Rudachi-rb
2
+ Ruby wrapper for [Sudachi](https://github.com/WorksApplications/Sudachi).<br>
3
+ ([rudachi](https://github.com/SongCastle/rudachi) for Ruby)
4
+
5
+ #### Text
6
+ ```rb
7
+ Rudachi::TextParser.parse('東京都へ行く')
8
+ => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
9
+ ```
10
+
11
+ #### File
12
+ ```rb
13
+ File.open('input.txt', 'w') { |f| f << '東京都へ行く' }
14
+ Rudachi::FileParser.parse('input.txt')
15
+ => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
16
+ ```
17
+
18
+ #### IO
19
+ ```rb
20
+ Rudachi::StreamParser.parse(StringIO.new('東京都へ行く'))
21
+ => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
22
+ ```
23
+
24
+ #### With [some options](https://github.com/WorksApplications/Sudachi#options)
25
+ ```rb
26
+ Rudachi::TextParser.new(o: 'output.txt', m: 'A').parse('東京都へ行く')
27
+ File.read('output.txt')
28
+ => "東京\t名詞,固有名詞,地名,一般,*,*\t東京\n都\t名詞,普通名詞,一般,*,*,*\t都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS"
29
+ ```
30
+
31
+ ## Requirements
32
+
33
+ - Ruby 2.3.0 or newer
34
+ - [rjb](https://github.com/arton/rjb) 1.1.1 or newer
35
+ - [Sudachi](https://github.com/WorksApplications/Sudachi)
36
+
37
+ For JRuby, please check [rudachi](https://github.com/SongCastle/rudachi).
38
+
39
+ ## Installation
40
+
41
+ 1. Install JAR and dictionary of Sudachi ([Details](https://github.com/WorksApplications/Sudachi/blob/develop/docs/tutorial.md#linux-%E3%81%AE%E5%A0%B4%E5%90%88))
42
+
43
+ ##### Install the Sudachi JAR file
44
+ ```sh
45
+ $ wget https://github.com/WorksApplications/Sudachi/releases/download/v0.5.3/sudachi-0.5.3-executable.zip
46
+ $ unzip sudachi-0.5.3-executable.zip
47
+ $ ls sudachi-0.5.3
48
+ LICENSE-2.0.txt README.md javax.json-1.1.jar jdartsclone-1.2.0.jar licenses sudachi-0.5.3.jar sudachi.json sudachi_fulldict.json
49
+ ```
50
+
51
+ ##### Install the Sudachi dictionary
52
+ ```sh
53
+ $ wget http://sudachi.s3-website-ap-northeast-1.amazonaws.com/sudachidict/sudachi-dictionary-latest-full.zip
54
+ $ unzip -j -d sudachi-dictionary-latest-full sudachi-dictionary-latest-full.zip
55
+ $ mv sudachi-dictionary-latest-full/system_full.dic sudachi-dictionary-latest-full/system_core.dic
56
+ $ ls sudachi-dictionary-latest-full
57
+ LEGAL LICENSE-2.0.txt system_core.dic
58
+ ```
59
+
60
+ 2. Install Rudachi
61
+
62
+ ```rb
63
+ # Gemfile
64
+ gem 'rudachi-rb'
65
+ ```
66
+
67
+ Then run `bundle install`.
68
+
69
+ 3. Initialize Rudachi
70
+
71
+ ```rb
72
+ require 'rudachi/rb'
73
+
74
+ Rudachi.configure do |config|
75
+ config.jar_path = 'sudachi-0.5.3/sudachi-0.5.3.jar'
76
+ end
77
+
78
+ Rudachi::Option.configure do |config|
79
+ config.p = 'sudachi-dictionary-latest-full'
80
+ end
81
+ ```
82
+
83
+ 4. Did it !!
84
+
85
+ ```rb
86
+ Rudachi::TextParser.parse('こんにちは世界')
87
+ => "こんにちは\t感動詞,一般,*,*,*,*\t今日は\n世界\t名詞,普通名詞,一般,*,*,*\t世界\nEOS\n"
88
+ ```
@@ -0,0 +1,41 @@
1
+ require 'rjb'
2
+ require 'rudachi/lazy_load'
3
+
4
+ Rjb::load(Rudachi.jar_path.to_s)
5
+
6
+ module Rudachi
7
+ module RB
8
+ module Java
9
+ String = Rjb::import('java.lang.String')
10
+ System = Rjb::import('java.lang.System')
11
+ PrintStream = Rjb::import('java.io.PrintStream')
12
+ ByteArrayInputStream = Rjb::import('java.io.ByteArrayInputStream')
13
+ ByteArrayOutputStream = Rjb::import('java.io.ByteArrayOutputStream')
14
+ UTF_8 = Rjb::import('java.nio.charset.StandardCharsets').UTF_8
15
+ SudachiCommandLine = Rjb::import('com.worksap.nlp.sudachi.SudachiCommandLine')
16
+
17
+ Rudachi::LazyLoad.run_load_hooks(:java_rb, self)
18
+ end
19
+
20
+ module StreamProcessor
21
+ class InvalidInclusion < StandardError; end
22
+
23
+ TERM = ?\n
24
+
25
+ def self.included(base)
26
+ raise InvalidInclusion unless base.ancestors.include?(TextParser)
27
+ end
28
+
29
+ def parse(io)
30
+ ret = []
31
+ while data = io.gets
32
+ ret << super(data).strip
33
+ end
34
+
35
+ ret.join(TERM)
36
+ end
37
+
38
+ Rudachi::LazyLoad.run_load_hooks(:stream_processor, self)
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,10 @@
1
+ module Rudachi
2
+ module RB
3
+ module Loader
4
+ def load!
5
+ raise Rudachi::Loader::UnavailableError, 'ruby_required' if jruby?
6
+ require 'rudachi/rb/dependencies'
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,5 @@
1
+ module Rudachi
2
+ module RB
3
+ VERSION = '1.0.0'
4
+ end
5
+ end
data/lib/rudachi/rb.rb ADDED
@@ -0,0 +1,16 @@
1
+ require 'rudachi'
2
+ require 'rudachi/rb/loader'
3
+
4
+ module Rudachi
5
+ module Loader
6
+ class << self
7
+ prepend RB::Loader
8
+ end
9
+ end
10
+
11
+ module Java
12
+ LazyLoad.on_load(:java_rb) do |mod|
13
+ include mod
14
+ end
15
+ end
16
+ end
data/lib/rudachi-rb.rb ADDED
@@ -0,0 +1 @@
1
+ require 'rudachi/rb'
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rudachi-rb
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - SongCastle
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-04-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rjb
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.1.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.1.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: rudachi
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.4.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 1.4.0
41
+ description: A Ruby wrapper for Sudachi.
42
+ email: "-"
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - README.md
48
+ - lib/rudachi-rb.rb
49
+ - lib/rudachi/rb.rb
50
+ - lib/rudachi/rb/dependencies.rb
51
+ - lib/rudachi/rb/loader.rb
52
+ - lib/rudachi/rb/version.rb
53
+ homepage: https://github.com/SongCastle/rudachi-rb
54
+ licenses:
55
+ - MIT
56
+ metadata: {}
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '2.3'
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ requirements: []
72
+ rubygems_version: 3.0.3
73
+ signing_key:
74
+ specification_version: 4
75
+ summary: A Ruby wrapper for Sudachi
76
+ test_files: []