rudachi-rb 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: '0136994c8ace08ad3b922acc5599ad8b35d5c1f0387826e24c4d4ad9efd26fe8'
4
+ data.tar.gz: 3fbbf61bdf9db17a35efad222c211aacc741f7b6b1221edf044fd19984695ff6
5
+ SHA512:
6
+ metadata.gz: 810dc0f49f4ee9d8dbceb25847c623c0cb84618d74c25542b0632c45a2f021b4ca127d7649e4bb129bcef6b8e7a7efa7e551865410f0b605a079eabb6375c582
7
+ data.tar.gz: 4135642a21814f82103eaf3c107ae4a5f1b2de3728f06ca9159b9286057fae5631fa36ee77073106ed0b815962a68d5162c94f913884fab57aee754e732feba9
data/README.md ADDED
@@ -0,0 +1,88 @@
1
+ # Rudachi-rb
2
+ Ruby wrapper for [Sudachi](https://github.com/WorksApplications/Sudachi).<br>
3
+ ([rudachi](https://github.com/SongCastle/rudachi) for Ruby)
4
+
5
+ #### Text
6
+ ```rb
7
+ Rudachi::TextParser.parse('東京都へ行く')
8
+ => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
9
+ ```
10
+
11
+ #### File
12
+ ```rb
13
+ File.open('input.txt', 'w') { |f| f << '東京都へ行く' }
14
+ Rudachi::FileParser.parse('input.txt')
15
+ => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
16
+ ```
17
+
18
+ #### IO
19
+ ```rb
20
+ Rudachi::StreamParser.parse(StringIO.new('東京都へ行く'))
21
+ => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
22
+ ```
23
+
24
+ #### With [some options](https://github.com/WorksApplications/Sudachi#options)
25
+ ```rb
26
+ Rudachi::TextParser.new(o: 'output.txt', m: 'A').parse('東京都へ行く')
27
+ File.read('output.txt')
28
+ => "東京\t名詞,固有名詞,地名,一般,*,*\t東京\n都\t名詞,普通名詞,一般,*,*,*\t都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS"
29
+ ```
30
+
31
+ ## Requirements
32
+
33
+ - Ruby 2.3.0 or newer
34
+ - [rjb](https://github.com/arton/rjb) 1.1.1 or newer
35
+ - [Sudachi](https://github.com/WorksApplications/Sudachi)
36
+
37
+ For JRuby, please check [rudachi](https://github.com/SongCastle/rudachi).
38
+
39
+ ## Installation
40
+
41
+ 1. Install JAR and dictionary of Sudachi ([Details](https://github.com/WorksApplications/Sudachi/blob/develop/docs/tutorial.md#linux-%E3%81%AE%E5%A0%B4%E5%90%88))
42
+
43
+ ##### Install the Sudachi JAR file
44
+ ```sh
45
+ $ wget https://github.com/WorksApplications/Sudachi/releases/download/v0.5.3/sudachi-0.5.3-executable.zip
46
+ $ unzip sudachi-0.5.3-executable.zip
47
+ $ ls sudachi-0.5.3
48
+ LICENSE-2.0.txt README.md javax.json-1.1.jar jdartsclone-1.2.0.jar licenses sudachi-0.5.3.jar sudachi.json sudachi_fulldict.json
49
+ ```
50
+
51
+ ##### Install the Sudachi dictionary
52
+ ```sh
53
+ $ wget http://sudachi.s3-website-ap-northeast-1.amazonaws.com/sudachidict/sudachi-dictionary-latest-full.zip
54
+ $ unzip -j -d sudachi-dictionary-latest-full sudachi-dictionary-latest-full.zip
55
+ $ mv sudachi-dictionary-latest-full/system_full.dic sudachi-dictionary-latest-full/system_core.dic
56
+ $ ls sudachi-dictionary-latest-full
57
+ LEGAL LICENSE-2.0.txt system_core.dic
58
+ ```
59
+
60
+ 2. Install Rudachi
61
+
62
+ ```rb
63
+ # Gemfile
64
+ gem 'rudachi-rb'
65
+ ```
66
+
67
+ Then run `bundle install`.
68
+
69
+ 3. Initialize Rudachi
70
+
71
+ ```rb
72
+ require 'rudachi/rb'
73
+
74
+ Rudachi.configure do |config|
75
+ config.jar_path = 'sudachi-0.5.3/sudachi-0.5.3.jar'
76
+ end
77
+
78
+ Rudachi::Option.configure do |config|
79
+ config.p = 'sudachi-dictionary-latest-full'
80
+ end
81
+ ```
82
+
83
+ 4. Did it !!
84
+
85
+ ```rb
86
+ Rudachi::TextParser.parse('こんにちは世界')
87
+ => "こんにちは\t感動詞,一般,*,*,*,*\t今日は\n世界\t名詞,普通名詞,一般,*,*,*\t世界\nEOS\n"
88
+ ```
@@ -0,0 +1,41 @@
1
+ require 'rjb'
2
+ require 'rudachi/lazy_load'
3
+
4
+ Rjb::load(Rudachi.jar_path.to_s)
5
+
6
+ module Rudachi
7
+ module RB
8
+ module Java
9
+ String = Rjb::import('java.lang.String')
10
+ System = Rjb::import('java.lang.System')
11
+ PrintStream = Rjb::import('java.io.PrintStream')
12
+ ByteArrayInputStream = Rjb::import('java.io.ByteArrayInputStream')
13
+ ByteArrayOutputStream = Rjb::import('java.io.ByteArrayOutputStream')
14
+ UTF_8 = Rjb::import('java.nio.charset.StandardCharsets').UTF_8
15
+ SudachiCommandLine = Rjb::import('com.worksap.nlp.sudachi.SudachiCommandLine')
16
+
17
+ Rudachi::LazyLoad.run_load_hooks(:java_rb, self)
18
+ end
19
+
20
+ module StreamProcessor
21
+ class InvalidInclusion < StandardError; end
22
+
23
+ TERM = ?\n
24
+
25
+ def self.included(base)
26
+ raise InvalidInclusion unless base.ancestors.include?(TextParser)
27
+ end
28
+
29
+ def parse(io)
30
+ ret = []
31
+ while data = io.gets
32
+ ret << super(data).strip
33
+ end
34
+
35
+ ret.join(TERM)
36
+ end
37
+
38
+ Rudachi::LazyLoad.run_load_hooks(:stream_processor, self)
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,10 @@
1
+ module Rudachi
2
+ module RB
3
+ module Loader
4
+ def load!
5
+ raise Rudachi::Loader::UnavailableError, 'ruby_required' if jruby?
6
+ require 'rudachi/rb/dependencies'
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,5 @@
1
+ module Rudachi
2
+ module RB
3
+ VERSION = '1.0.0'
4
+ end
5
+ end
data/lib/rudachi/rb.rb ADDED
@@ -0,0 +1,16 @@
1
+ require 'rudachi'
2
+ require 'rudachi/rb/loader'
3
+
4
+ module Rudachi
5
+ module Loader
6
+ class << self
7
+ prepend RB::Loader
8
+ end
9
+ end
10
+
11
+ module Java
12
+ LazyLoad.on_load(:java_rb) do |mod|
13
+ include mod
14
+ end
15
+ end
16
+ end
data/lib/rudachi-rb.rb ADDED
@@ -0,0 +1 @@
1
+ require 'rudachi/rb'
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rudachi-rb
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - SongCastle
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-04-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rjb
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.1.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.1.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: rudachi
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.4.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 1.4.0
41
+ description: A Ruby wrapper for Sudachi.
42
+ email: "-"
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - README.md
48
+ - lib/rudachi-rb.rb
49
+ - lib/rudachi/rb.rb
50
+ - lib/rudachi/rb/dependencies.rb
51
+ - lib/rudachi/rb/loader.rb
52
+ - lib/rudachi/rb/version.rb
53
+ homepage: https://github.com/SongCastle/rudachi-rb
54
+ licenses:
55
+ - MIT
56
+ metadata: {}
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '2.3'
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ requirements: []
72
+ rubygems_version: 3.0.3
73
+ signing_key:
74
+ specification_version: 4
75
+ summary: A Ruby wrapper for Sudachi
76
+ test_files: []