rudachi-rb 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +88 -0
- data/lib/rudachi/rb/dependencies.rb +41 -0
- data/lib/rudachi/rb/loader.rb +10 -0
- data/lib/rudachi/rb/version.rb +5 -0
- data/lib/rudachi/rb.rb +16 -0
- data/lib/rudachi-rb.rb +1 -0
- metadata +76 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: '0136994c8ace08ad3b922acc5599ad8b35d5c1f0387826e24c4d4ad9efd26fe8'
|
4
|
+
data.tar.gz: 3fbbf61bdf9db17a35efad222c211aacc741f7b6b1221edf044fd19984695ff6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 810dc0f49f4ee9d8dbceb25847c623c0cb84618d74c25542b0632c45a2f021b4ca127d7649e4bb129bcef6b8e7a7efa7e551865410f0b605a079eabb6375c582
|
7
|
+
data.tar.gz: 4135642a21814f82103eaf3c107ae4a5f1b2de3728f06ca9159b9286057fae5631fa36ee77073106ed0b815962a68d5162c94f913884fab57aee754e732feba9
|
data/README.md
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
# Rudachi-rb
|
2
|
+
Ruby wrapper for [Sudachi](https://github.com/WorksApplications/Sudachi).<br>
|
3
|
+
([rudachi](https://github.com/SongCastle/rudachi) for Ruby)
|
4
|
+
|
5
|
+
#### Text
|
6
|
+
```rb
|
7
|
+
Rudachi::TextParser.parse('東京都へ行く')
|
8
|
+
=> "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
9
|
+
```
|
10
|
+
|
11
|
+
#### File
|
12
|
+
```rb
|
13
|
+
File.open('input.txt', 'w') { |f| f << '東京都へ行く' }
|
14
|
+
Rudachi::FileParser.parse('input.txt')
|
15
|
+
=> "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
16
|
+
```
|
17
|
+
|
18
|
+
#### IO
|
19
|
+
```rb
|
20
|
+
Rudachi::StreamParser.parse(StringIO.new('東京都へ行く'))
|
21
|
+
=> "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
22
|
+
```
|
23
|
+
|
24
|
+
#### With [some options](https://github.com/WorksApplications/Sudachi#options)
|
25
|
+
```rb
|
26
|
+
Rudachi::TextParser.new(o: 'output.txt', m: 'A').parse('東京都へ行く')
|
27
|
+
File.read('output.txt')
|
28
|
+
=> "東京\t名詞,固有名詞,地名,一般,*,*\t東京\n都\t名詞,普通名詞,一般,*,*,*\t都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS"
|
29
|
+
```
|
30
|
+
|
31
|
+
## Requirements
|
32
|
+
|
33
|
+
- Ruby 2.3.0 or newer
|
34
|
+
- [rjb](https://github.com/arton/rjb) 1.1.1 or newer
|
35
|
+
- [Sudachi](https://github.com/WorksApplications/Sudachi)
|
36
|
+
|
37
|
+
For JRuby, please check [rudachi](https://github.com/SongCastle/rudachi).
|
38
|
+
|
39
|
+
## Installation
|
40
|
+
|
41
|
+
1. Install JAR and dictionary of Sudachi ([Details](https://github.com/WorksApplications/Sudachi/blob/develop/docs/tutorial.md#linux-%E3%81%AE%E5%A0%B4%E5%90%88))
|
42
|
+
|
43
|
+
##### Install the Sudachi JAR file
|
44
|
+
```sh
|
45
|
+
$ wget https://github.com/WorksApplications/Sudachi/releases/download/v0.5.3/sudachi-0.5.3-executable.zip
|
46
|
+
$ unzip sudachi-0.5.3-executable.zip
|
47
|
+
$ ls sudachi-0.5.3
|
48
|
+
LICENSE-2.0.txt README.md javax.json-1.1.jar jdartsclone-1.2.0.jar licenses sudachi-0.5.3.jar sudachi.json sudachi_fulldict.json
|
49
|
+
```
|
50
|
+
|
51
|
+
##### Install the Sudachi dictionary
|
52
|
+
```sh
|
53
|
+
$ wget http://sudachi.s3-website-ap-northeast-1.amazonaws.com/sudachidict/sudachi-dictionary-latest-full.zip
|
54
|
+
$ unzip -j -d sudachi-dictionary-latest-full sudachi-dictionary-latest-full.zip
|
55
|
+
$ mv sudachi-dictionary-latest-full/system_full.dic sudachi-dictionary-latest-full/system_core.dic
|
56
|
+
$ ls sudachi-dictionary-latest-full
|
57
|
+
LEGAL LICENSE-2.0.txt system_core.dic
|
58
|
+
```
|
59
|
+
|
60
|
+
2. Install Rudachi
|
61
|
+
|
62
|
+
```rb
|
63
|
+
# Gemfile
|
64
|
+
gem 'rudachi-rb'
|
65
|
+
```
|
66
|
+
|
67
|
+
Then run `bundle install`.
|
68
|
+
|
69
|
+
3. Initialize Rudachi
|
70
|
+
|
71
|
+
```rb
|
72
|
+
require 'rudachi/rb'
|
73
|
+
|
74
|
+
Rudachi.configure do |config|
|
75
|
+
config.jar_path = 'sudachi-0.5.3/sudachi-0.5.3.jar'
|
76
|
+
end
|
77
|
+
|
78
|
+
Rudachi::Option.configure do |config|
|
79
|
+
config.p = 'sudachi-dictionary-latest-full'
|
80
|
+
end
|
81
|
+
```
|
82
|
+
|
83
|
+
4. Did it !!
|
84
|
+
|
85
|
+
```rb
|
86
|
+
Rudachi::TextParser.parse('こんにちは世界')
|
87
|
+
=> "こんにちは\t感動詞,一般,*,*,*,*\t今日は\n世界\t名詞,普通名詞,一般,*,*,*\t世界\nEOS\n"
|
88
|
+
```
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'rjb'
|
2
|
+
require 'rudachi/lazy_load'
|
3
|
+
|
4
|
+
Rjb::load(Rudachi.jar_path.to_s)
|
5
|
+
|
6
|
+
module Rudachi
|
7
|
+
module RB
|
8
|
+
module Java
|
9
|
+
String = Rjb::import('java.lang.String')
|
10
|
+
System = Rjb::import('java.lang.System')
|
11
|
+
PrintStream = Rjb::import('java.io.PrintStream')
|
12
|
+
ByteArrayInputStream = Rjb::import('java.io.ByteArrayInputStream')
|
13
|
+
ByteArrayOutputStream = Rjb::import('java.io.ByteArrayOutputStream')
|
14
|
+
UTF_8 = Rjb::import('java.nio.charset.StandardCharsets').UTF_8
|
15
|
+
SudachiCommandLine = Rjb::import('com.worksap.nlp.sudachi.SudachiCommandLine')
|
16
|
+
|
17
|
+
Rudachi::LazyLoad.run_load_hooks(:java_rb, self)
|
18
|
+
end
|
19
|
+
|
20
|
+
module StreamProcessor
|
21
|
+
class InvalidInclusion < StandardError; end
|
22
|
+
|
23
|
+
TERM = ?\n
|
24
|
+
|
25
|
+
def self.included(base)
|
26
|
+
raise InvalidInclusion unless base.ancestors.include?(TextParser)
|
27
|
+
end
|
28
|
+
|
29
|
+
def parse(io)
|
30
|
+
ret = []
|
31
|
+
while data = io.gets
|
32
|
+
ret << super(data).strip
|
33
|
+
end
|
34
|
+
|
35
|
+
ret.join(TERM)
|
36
|
+
end
|
37
|
+
|
38
|
+
Rudachi::LazyLoad.run_load_hooks(:stream_processor, self)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/rudachi/rb.rb
ADDED
data/lib/rudachi-rb.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'rudachi/rb'
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rudachi-rb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- SongCastle
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-04-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rjb
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.1.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.1.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rudachi
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.4.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.4.0
|
41
|
+
description: A Ruby wrapper for Sudachi.
|
42
|
+
email: "-"
|
43
|
+
executables: []
|
44
|
+
extensions: []
|
45
|
+
extra_rdoc_files: []
|
46
|
+
files:
|
47
|
+
- README.md
|
48
|
+
- lib/rudachi-rb.rb
|
49
|
+
- lib/rudachi/rb.rb
|
50
|
+
- lib/rudachi/rb/dependencies.rb
|
51
|
+
- lib/rudachi/rb/loader.rb
|
52
|
+
- lib/rudachi/rb/version.rb
|
53
|
+
homepage: https://github.com/SongCastle/rudachi-rb
|
54
|
+
licenses:
|
55
|
+
- MIT
|
56
|
+
metadata: {}
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options: []
|
59
|
+
require_paths:
|
60
|
+
- lib
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '2.3'
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
requirements: []
|
72
|
+
rubygems_version: 3.0.3
|
73
|
+
signing_key:
|
74
|
+
specification_version: 4
|
75
|
+
summary: A Ruby wrapper for Sudachi
|
76
|
+
test_files: []
|