rudachi 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +78 -0
- data/lib/rudachi/config.rb +32 -0
- data/lib/rudachi/configurable.rb +18 -0
- data/lib/rudachi/dependencies.rb +19 -0
- data/lib/rudachi/file_parser.rb +38 -0
- data/lib/rudachi/loader.rb +5 -0
- data/lib/rudachi/text_parser.rb +27 -0
- data/lib/rudachi/version.rb +3 -0
- data/lib/rudachi.rb +3 -0
- metadata +51 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f43021bc769b0005c5d46c3abde244756712ce1419b82ae884729508aab64780
|
4
|
+
data.tar.gz: 5f3b2f74e771d2b3feb8d6de4abf7d59864da6108904437c28a498f2ae6d1e98
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 38535bc9cee7b34ee2ab3219294484d30758c82b4c2a85f53cb36519700eb52d2e44f75160ba2aee24a626a7dc95c5a02774bfd10c86da18067a7d6afcd33996
|
7
|
+
data.tar.gz: 289e76174d4f7053e71c2c276635ab6b2283b48c220cb498615c88605ffec6a41f3888c89a80ad728383170c5052c3dd76be81a02bef91dc38f4049b18db1c21
|
data/README.md
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
# Rudachi
|
2
|
+
[Sudachi](https://github.com/WorksApplications/Sudachi) wrapper Gem for JRuby.
|
3
|
+
|
4
|
+
- Text base
|
5
|
+
```rb
|
6
|
+
Rudachi::TextParser.parse('東京都へ行く')
|
7
|
+
=> "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
8
|
+
```
|
9
|
+
|
10
|
+
- File base
|
11
|
+
```rb
|
12
|
+
File.open('sample.txt', 'w') { |f| f << '東京都へ行く' }
|
13
|
+
Rudachi::FileParser.parse('sample.txt')
|
14
|
+
=> "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
15
|
+
```
|
16
|
+
|
17
|
+
- With some options
|
18
|
+
```rb
|
19
|
+
Rudachi::TextParser.new(o: 'result.txt', m: 'A').parse('東京都へ行く')
|
20
|
+
File.read('result.txt')
|
21
|
+
=> "東京\t名詞,固有名詞,地名,一般,*,*\t東京\n都\t名詞,普通名詞,一般,*,*,*\t都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
|
22
|
+
```
|
23
|
+
|
24
|
+
## Requirements
|
25
|
+
|
26
|
+
- [JRuby](https://github.com/jruby/jruby) 9.1.3.0 or later
|
27
|
+
- [Sudachi](https://github.com/WorksApplications/Sudachi)
|
28
|
+
|
29
|
+
## Installation
|
30
|
+
|
31
|
+
1. Install JAR and dictionary of Sudachi ([details](https://github.com/WorksApplications/Sudachi/blob/develop/docs/tutorial.md#linux-%E3%81%AE%E5%A0%B4%E5%90%88))
|
32
|
+
|
33
|
+
- Install the Sudachi JAR file
|
34
|
+
```sh
|
35
|
+
$ wget https://github.com/WorksApplications/Sudachi/releases/download/v0.5.3/sudachi-0.5.3-executable.zip
|
36
|
+
$ unzip sudachi-0.5.3-executable.zip
|
37
|
+
$ ls sudachi-0.5.3
|
38
|
+
LICENSE-2.0.txt README.md javax.json-1.1.jar jdartsclone-1.2.0.jar licenses sudachi-0.5.3.jar sudachi.json sudachi_fulldict.json
|
39
|
+
```
|
40
|
+
|
41
|
+
- Install the Sudachi dictionary
|
42
|
+
```sh
|
43
|
+
$ wget http://sudachi.s3-website-ap-northeast-1.amazonaws.com/sudachidict/sudachi-dictionary-latest-full.zip
|
44
|
+
$ unzip -j -d sudachi-dictionary-latest-full sudachi-dictionary-latest-full.zip
|
45
|
+
$ mv sudachi-dictionary-latest-full/system_full.dic sudachi-dictionary-latest-full/system_core.dic
|
46
|
+
$ ls sudachi-dictionary-latest-full
|
47
|
+
LEGAL LICENSE-2.0.txt system_core.dic
|
48
|
+
```
|
49
|
+
|
50
|
+
2. Install Rudachi
|
51
|
+
|
52
|
+
```rb
|
53
|
+
# Gemfile
|
54
|
+
gem 'rudachi', git: 'https://github.com/SongCastle/rudachi.git', tag: 'v1.0.0'
|
55
|
+
```
|
56
|
+
|
57
|
+
Then run `bundle install` .
|
58
|
+
|
59
|
+
3. Initialize Rudachi
|
60
|
+
|
61
|
+
```rb
|
62
|
+
Rudachi.configure do |config|
|
63
|
+
config.jar_path = 'sudachi-0.5.3/sudachi-0.5.3.jar'
|
64
|
+
end
|
65
|
+
|
66
|
+
Rudachi::Option.configure do |config|
|
67
|
+
config.p = 'sudachi-dictionary-latest-full'
|
68
|
+
end
|
69
|
+
```
|
70
|
+
|
71
|
+
4. Dit it !!
|
72
|
+
|
73
|
+
```rb
|
74
|
+
require 'rudachi'
|
75
|
+
|
76
|
+
Rudachi::TextParser.parse('こんにちは世界')
|
77
|
+
=> "こんにちは\t感動詞,一般,*,*,*,*\t今日は\n世界\t名詞,普通名詞,一般,*,*,*\t世界\nEOS\n"
|
78
|
+
```
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'rudachi/configurable'
|
2
|
+
|
3
|
+
module Rudachi
|
4
|
+
extend Configurable
|
5
|
+
|
6
|
+
config_accessor :jar_path, default: '/usr/java/lib/sudachi.jar'
|
7
|
+
|
8
|
+
module Option
|
9
|
+
extend Configurable
|
10
|
+
|
11
|
+
# @see https://github.com/WorksApplications/Sudachi#options
|
12
|
+
config_accessor :r, default: nil
|
13
|
+
config_accessor :s, default: nil
|
14
|
+
config_accessor :p, default: '/usr/java/lib'
|
15
|
+
config_accessor :m, default: 'C'
|
16
|
+
config_accessor :o, default: nil
|
17
|
+
config_accessor :t, default: nil
|
18
|
+
config_accessor :ts, default: nil
|
19
|
+
config_accessor :a, default: nil
|
20
|
+
config_accessor :f, default: nil
|
21
|
+
config_accessor :d, default: nil
|
22
|
+
config_accessor :h, default: nil
|
23
|
+
|
24
|
+
def self.cmds(opts)
|
25
|
+
class_variables.each_with_object([]) do |name, flags|
|
26
|
+
key = name.to_s.delete('@@')
|
27
|
+
val = opts[key] || opts[key.to_sym] || class_variable_get(name) or next
|
28
|
+
flags << "-#{key}" << val.to_s
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Rudachi
|
2
|
+
module Configurable
|
3
|
+
def configure
|
4
|
+
yield self
|
5
|
+
end
|
6
|
+
|
7
|
+
private
|
8
|
+
|
9
|
+
def config_accessor(name, default: nil)
|
10
|
+
attr_def = <<~EOS
|
11
|
+
def self.#{name}; @@#{name}; end
|
12
|
+
def self.#{name}=(val); @@#{name} = val; end
|
13
|
+
EOS
|
14
|
+
module_eval(attr_def)
|
15
|
+
class_variable_set("@@#{name}", default)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'java'
|
2
|
+
java_import 'java.lang.System'
|
3
|
+
java_import 'java.io.PrintStream'
|
4
|
+
java_import 'java.io.ByteArrayInputStream'
|
5
|
+
java_import 'java.io.ByteArrayOutputStream'
|
6
|
+
java_import 'java.nio.charset.StandardCharsets'
|
7
|
+
|
8
|
+
require Rudachi.jar_path
|
9
|
+
java_import 'com.worksap.nlp.sudachi.SudachiCommandLine'
|
10
|
+
|
11
|
+
module Java
|
12
|
+
String = JavaLang::String
|
13
|
+
System = JavaLang::System
|
14
|
+
ByteArrayInputStream = JavaIo::ByteArrayInputStream
|
15
|
+
ByteArrayOutputStream = JavaIo::ByteArrayOutputStream
|
16
|
+
PrintStream = JavaIo::PrintStream
|
17
|
+
UTF_8 = JavaNioCharset::StandardCharsets::UTF_8
|
18
|
+
SudachiCommandLine = ComWorksapNlpSudachi::SudachiCommandLine
|
19
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'rudachi/config'
|
2
|
+
require 'rudachi/loader'
|
3
|
+
|
4
|
+
module Rudachi
|
5
|
+
class FileParser
|
6
|
+
def self.parse(path)
|
7
|
+
new.parse(path)
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(**opts)
|
11
|
+
Rudachi.load!
|
12
|
+
|
13
|
+
@output = Java::ByteArrayOutputStream.new
|
14
|
+
@opts = opts
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse(path)
|
18
|
+
take_stdout do
|
19
|
+
Java::SudachiCommandLine.main(
|
20
|
+
Option.cmds(@opts).push(Java::String.new(path))
|
21
|
+
)
|
22
|
+
end
|
23
|
+
@output.toString
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def take_stdout
|
29
|
+
stdout = Java::System.out
|
30
|
+
stream = Java::PrintStream.new(@output)
|
31
|
+
Java::System.setOut(stream)
|
32
|
+
|
33
|
+
yield
|
34
|
+
|
35
|
+
Java::System.setOut(stdout)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rudachi/file_parser'
|
2
|
+
|
3
|
+
module Rudachi
|
4
|
+
class TextParser < FileParser
|
5
|
+
def parse(text)
|
6
|
+
@input = Java::String.new(text)
|
7
|
+
take_stdin do
|
8
|
+
take_stdout do
|
9
|
+
Java::SudachiCommandLine.main(Option.cmds(@opts))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
@output.toString
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def take_stdin
|
18
|
+
stdin = Java::System.in
|
19
|
+
stream = Java::ByteArrayInputStream.new(@input.getBytes(Java::UTF_8))
|
20
|
+
Java::System.setIn(stream)
|
21
|
+
|
22
|
+
yield
|
23
|
+
|
24
|
+
Java::System.setIn(stdin)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/rudachi.rb
ADDED
metadata
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rudachi
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- SongCastle
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-04-06 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Sudachi wrapper for JRuby.
|
14
|
+
email: "-"
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- README.md
|
20
|
+
- lib/rudachi.rb
|
21
|
+
- lib/rudachi/config.rb
|
22
|
+
- lib/rudachi/configurable.rb
|
23
|
+
- lib/rudachi/dependencies.rb
|
24
|
+
- lib/rudachi/file_parser.rb
|
25
|
+
- lib/rudachi/loader.rb
|
26
|
+
- lib/rudachi/text_parser.rb
|
27
|
+
- lib/rudachi/version.rb
|
28
|
+
homepage: https://github.com/SongCastle/rudachi
|
29
|
+
licenses:
|
30
|
+
- MIT
|
31
|
+
metadata: {}
|
32
|
+
post_install_message:
|
33
|
+
rdoc_options: []
|
34
|
+
require_paths:
|
35
|
+
- lib
|
36
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.3'
|
41
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
requirements: []
|
47
|
+
rubygems_version: 3.0.3
|
48
|
+
signing_key:
|
49
|
+
specification_version: 4
|
50
|
+
summary: Sudachi wrapper for JRuby
|
51
|
+
test_files: []
|