rudachi 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +78 -0
- data/lib/rudachi/config.rb +32 -0
- data/lib/rudachi/configurable.rb +18 -0
- data/lib/rudachi/dependencies.rb +19 -0
- data/lib/rudachi/file_parser.rb +38 -0
- data/lib/rudachi/loader.rb +5 -0
- data/lib/rudachi/text_parser.rb +27 -0
- data/lib/rudachi/version.rb +3 -0
- data/lib/rudachi.rb +3 -0
- metadata +51 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f43021bc769b0005c5d46c3abde244756712ce1419b82ae884729508aab64780
|
4
|
+
data.tar.gz: 5f3b2f74e771d2b3feb8d6de4abf7d59864da6108904437c28a498f2ae6d1e98
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 38535bc9cee7b34ee2ab3219294484d30758c82b4c2a85f53cb36519700eb52d2e44f75160ba2aee24a626a7dc95c5a02774bfd10c86da18067a7d6afcd33996
|
7
|
+
data.tar.gz: 289e76174d4f7053e71c2c276635ab6b2283b48c220cb498615c88605ffec6a41f3888c89a80ad728383170c5052c3dd76be81a02bef91dc38f4049b18db1c21
|
data/README.md
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
# Rudachi
|
2
|
+
[Sudachi](https://github.com/WorksApplications/Sudachi) wrapper Gem for JRuby.
|
3
|
+
|
4
|
+
- Text base
|
5
|
+
```rb
|
6
|
+
Rudachi::TextParser.parse('東京都へ行く')
|
7
|
+
=> "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
8
|
+
```
|
9
|
+
|
10
|
+
- File base
|
11
|
+
```rb
|
12
|
+
File.open('sample.txt', 'w') { |f| f << '東京都へ行く' }
|
13
|
+
Rudachi::FileParser.parse('sample.txt')
|
14
|
+
=> "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
15
|
+
```
|
16
|
+
|
17
|
+
- With some options
|
18
|
+
```rb
|
19
|
+
Rudachi::TextParser.new(o: 'result.txt', m: 'A').parse('東京都へ行く')
|
20
|
+
File.read('result.txt')
|
21
|
+
=> "東京\t名詞,固有名詞,地名,一般,*,*\t東京\n都\t名詞,普通名詞,一般,*,*,*\t都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
22
|
+
```
|
23
|
+
|
24
|
+
## Requirements
|
25
|
+
|
26
|
+
- [JRuby](https://github.com/jruby/jruby) 9.1.3.0 or later
|
27
|
+
- [Sudachi](https://github.com/WorksApplications/Sudachi)
|
28
|
+
|
29
|
+
## Installation
|
30
|
+
|
31
|
+
1. Install JAR and dictionary of Sudachi ([details](https://github.com/WorksApplications/Sudachi/blob/develop/docs/tutorial.md#linux-%E3%81%AE%E5%A0%B4%E5%90%88))
|
32
|
+
|
33
|
+
- Install the Sudachi JAR file
|
34
|
+
```sh
|
35
|
+
$ wget https://github.com/WorksApplications/Sudachi/releases/download/v0.5.3/sudachi-0.5.3-executable.zip
|
36
|
+
$ unzip sudachi-0.5.3-executable.zip
|
37
|
+
$ ls sudachi-0.5.3
|
38
|
+
LICENSE-2.0.txt README.md javax.json-1.1.jar jdartsclone-1.2.0.jar licenses sudachi-0.5.3.jar sudachi.json sudachi_fulldict.json
|
39
|
+
```
|
40
|
+
|
41
|
+
- Install the Sudachi dictionary
|
42
|
+
```sh
|
43
|
+
$ wget http://sudachi.s3-website-ap-northeast-1.amazonaws.com/sudachidict/sudachi-dictionary-latest-full.zip
|
44
|
+
$ unzip -j -d sudachi-dictionary-latest-full sudachi-dictionary-latest-full.zip
|
45
|
+
$ mv sudachi-dictionary-latest-full/system_full.dic sudachi-dictionary-latest-full/system_core.dic
|
46
|
+
$ ls sudachi-dictionary-latest-full
|
47
|
+
LEGAL LICENSE-2.0.txt system_core.dic
|
48
|
+
```
|
49
|
+
|
50
|
+
2. Install Rudachi
|
51
|
+
|
52
|
+
```rb
|
53
|
+
# Gemfile
|
54
|
+
gem 'rudachi', git: 'https://github.com/SongCastle/rudachi.git', tag: 'v1.0.0'
|
55
|
+
```
|
56
|
+
|
57
|
+
Then run `bundle install` .
|
58
|
+
|
59
|
+
3. Initialize Rudachi
|
60
|
+
|
61
|
+
```rb
|
62
|
+
Rudachi.configure do |config|
|
63
|
+
config.jar_path = 'sudachi-0.5.3/sudachi-0.5.3.jar'
|
64
|
+
end
|
65
|
+
|
66
|
+
Rudachi::Option.configure do |config|
|
67
|
+
config.p = 'sudachi-dictionary-latest-full'
|
68
|
+
end
|
69
|
+
```
|
70
|
+
|
71
|
+
4. Dit it !!
|
72
|
+
|
73
|
+
```rb
|
74
|
+
require 'rudachi'
|
75
|
+
|
76
|
+
Rudachi::TextParser.parse('こんにちは世界')
|
77
|
+
=> "こんにちは\t感動詞,一般,*,*,*,*\t今日は\n世界\t名詞,普通名詞,一般,*,*,*\t世界\nEOS\n"
|
78
|
+
```
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'rudachi/configurable'
|
2
|
+
|
3
|
+
module Rudachi
|
4
|
+
extend Configurable
|
5
|
+
|
6
|
+
config_accessor :jar_path, default: '/usr/java/lib/sudachi.jar'
|
7
|
+
|
8
|
+
module Option
|
9
|
+
extend Configurable
|
10
|
+
|
11
|
+
# @see https://github.com/WorksApplications/Sudachi#options
|
12
|
+
config_accessor :r, default: nil
|
13
|
+
config_accessor :s, default: nil
|
14
|
+
config_accessor :p, default: '/usr/java/lib'
|
15
|
+
config_accessor :m, default: 'C'
|
16
|
+
config_accessor :o, default: nil
|
17
|
+
config_accessor :t, default: nil
|
18
|
+
config_accessor :ts, default: nil
|
19
|
+
config_accessor :a, default: nil
|
20
|
+
config_accessor :f, default: nil
|
21
|
+
config_accessor :d, default: nil
|
22
|
+
config_accessor :h, default: nil
|
23
|
+
|
24
|
+
def self.cmds(opts)
|
25
|
+
class_variables.each_with_object([]) do |name, flags|
|
26
|
+
key = name.to_s.delete('@@')
|
27
|
+
val = opts[key] || opts[key.to_sym] || class_variable_get(name) or next
|
28
|
+
flags << "-#{key}" << val.to_s
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Rudachi
|
2
|
+
module Configurable
|
3
|
+
def configure
|
4
|
+
yield self
|
5
|
+
end
|
6
|
+
|
7
|
+
private
|
8
|
+
|
9
|
+
def config_accessor(name, default: nil)
|
10
|
+
attr_def = <<~EOS
|
11
|
+
def self.#{name}; @@#{name}; end
|
12
|
+
def self.#{name}=(val); @@#{name} = val; end
|
13
|
+
EOS
|
14
|
+
module_eval(attr_def)
|
15
|
+
class_variable_set("@@#{name}", default)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'java'
|
2
|
+
java_import 'java.lang.System'
|
3
|
+
java_import 'java.io.PrintStream'
|
4
|
+
java_import 'java.io.ByteArrayInputStream'
|
5
|
+
java_import 'java.io.ByteArrayOutputStream'
|
6
|
+
java_import 'java.nio.charset.StandardCharsets'
|
7
|
+
|
8
|
+
require Rudachi.jar_path
|
9
|
+
java_import 'com.worksap.nlp.sudachi.SudachiCommandLine'
|
10
|
+
|
11
|
+
module Java
|
12
|
+
String = JavaLang::String
|
13
|
+
System = JavaLang::System
|
14
|
+
ByteArrayInputStream = JavaIo::ByteArrayInputStream
|
15
|
+
ByteArrayOutputStream = JavaIo::ByteArrayOutputStream
|
16
|
+
PrintStream = JavaIo::PrintStream
|
17
|
+
UTF_8 = JavaNioCharset::StandardCharsets::UTF_8
|
18
|
+
SudachiCommandLine = ComWorksapNlpSudachi::SudachiCommandLine
|
19
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'rudachi/config'
|
2
|
+
require 'rudachi/loader'
|
3
|
+
|
4
|
+
module Rudachi
|
5
|
+
class FileParser
|
6
|
+
def self.parse(path)
|
7
|
+
new.parse(path)
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(**opts)
|
11
|
+
Rudachi.load!
|
12
|
+
|
13
|
+
@output = Java::ByteArrayOutputStream.new
|
14
|
+
@opts = opts
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse(path)
|
18
|
+
take_stdout do
|
19
|
+
Java::SudachiCommandLine.main(
|
20
|
+
Option.cmds(@opts).push(Java::String.new(path))
|
21
|
+
)
|
22
|
+
end
|
23
|
+
@output.toString
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def take_stdout
|
29
|
+
stdout = Java::System.out
|
30
|
+
stream = Java::PrintStream.new(@output)
|
31
|
+
Java::System.setOut(stream)
|
32
|
+
|
33
|
+
yield
|
34
|
+
|
35
|
+
Java::System.setOut(stdout)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rudachi/file_parser'
|
2
|
+
|
3
|
+
module Rudachi
|
4
|
+
class TextParser < FileParser
|
5
|
+
def parse(text)
|
6
|
+
@input = Java::String.new(text)
|
7
|
+
take_stdin do
|
8
|
+
take_stdout do
|
9
|
+
Java::SudachiCommandLine.main(Option.cmds(@opts))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
@output.toString
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def take_stdin
|
18
|
+
stdin = Java::System.in
|
19
|
+
stream = Java::ByteArrayInputStream.new(@input.getBytes(Java::UTF_8))
|
20
|
+
Java::System.setIn(stream)
|
21
|
+
|
22
|
+
yield
|
23
|
+
|
24
|
+
Java::System.setIn(stdin)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/rudachi.rb
ADDED
metadata
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rudachi
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- SongCastle
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-04-06 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Sudachi wrapper for JRuby.
|
14
|
+
email: "-"
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- README.md
|
20
|
+
- lib/rudachi.rb
|
21
|
+
- lib/rudachi/config.rb
|
22
|
+
- lib/rudachi/configurable.rb
|
23
|
+
- lib/rudachi/dependencies.rb
|
24
|
+
- lib/rudachi/file_parser.rb
|
25
|
+
- lib/rudachi/loader.rb
|
26
|
+
- lib/rudachi/text_parser.rb
|
27
|
+
- lib/rudachi/version.rb
|
28
|
+
homepage: https://github.com/SongCastle/rudachi
|
29
|
+
licenses:
|
30
|
+
- MIT
|
31
|
+
metadata: {}
|
32
|
+
post_install_message:
|
33
|
+
rdoc_options: []
|
34
|
+
require_paths:
|
35
|
+
- lib
|
36
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.3'
|
41
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
requirements: []
|
47
|
+
rubygems_version: 3.0.3
|
48
|
+
signing_key:
|
49
|
+
specification_version: 4
|
50
|
+
summary: Sudachi wrapper for JRuby
|
51
|
+
test_files: []
|