rudachi 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f43021bc769b0005c5d46c3abde244756712ce1419b82ae884729508aab64780
4
+ data.tar.gz: 5f3b2f74e771d2b3feb8d6de4abf7d59864da6108904437c28a498f2ae6d1e98
5
+ SHA512:
6
+ metadata.gz: 38535bc9cee7b34ee2ab3219294484d30758c82b4c2a85f53cb36519700eb52d2e44f75160ba2aee24a626a7dc95c5a02774bfd10c86da18067a7d6afcd33996
7
+ data.tar.gz: 289e76174d4f7053e71c2c276635ab6b2283b48c220cb498615c88605ffec6a41f3888c89a80ad728383170c5052c3dd76be81a02bef91dc38f4049b18db1c21
data/README.md ADDED
@@ -0,0 +1,78 @@
1
+ # Rudachi
2
+ [Sudachi](https://github.com/WorksApplications/Sudachi) wrapper Gem for JRuby.
3
+
4
+ - Text base
5
+ ```rb
6
+ Rudachi::TextParser.parse('東京都へ行く')
7
+ => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
8
+ ```
9
+
10
+ - File base
11
+ ```rb
12
+ File.open('sample.txt', 'w') { |f| f << '東京都へ行く' }
13
+ Rudachi::FileParser.parse('sample.txt')
14
+ => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
15
+ ```
16
+
17
+ - With some options
18
+ ```rb
19
+ Rudachi::TextParser.new(o: 'result.txt', m: 'A').parse('東京都へ行く')
20
+ File.read('result.txt')
21
+ => "東京\t名詞,固有名詞,地名,一般,*,*\t東京\n都\t名詞,普通名詞,一般,*,*,*\t都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
22
+ ```
23
+
24
+ ## Requirements
25
+
26
+ - [JRuby](https://github.com/jruby/jruby) 9.1.3.0 or later
27
+ - [Sudachi](https://github.com/WorksApplications/Sudachi)
28
+
29
+ ## Installation
30
+
31
+ 1. Install JAR and dictionary of Sudachi ([details](https://github.com/WorksApplications/Sudachi/blob/develop/docs/tutorial.md#linux-%E3%81%AE%E5%A0%B4%E5%90%88))
32
+
33
+ - Install the Sudachi JAR file
34
+ ```sh
35
+ $ wget https://github.com/WorksApplications/Sudachi/releases/download/v0.5.3/sudachi-0.5.3-executable.zip
36
+ $ unzip sudachi-0.5.3-executable.zip
37
+ $ ls sudachi-0.5.3
38
+ LICENSE-2.0.txt README.md javax.json-1.1.jar jdartsclone-1.2.0.jar licenses sudachi-0.5.3.jar sudachi.json sudachi_fulldict.json
39
+ ```
40
+
41
+ - Install the Sudachi dictionary
42
+ ```sh
43
+ $ wget http://sudachi.s3-website-ap-northeast-1.amazonaws.com/sudachidict/sudachi-dictionary-latest-full.zip
44
+ $ unzip -j -d sudachi-dictionary-latest-full sudachi-dictionary-latest-full.zip
45
+ $ mv sudachi-dictionary-latest-full/system_full.dic sudachi-dictionary-latest-full/system_core.dic
46
+ $ ls sudachi-dictionary-latest-full
47
+ LEGAL LICENSE-2.0.txt system_core.dic
48
+ ```
49
+
50
+ 2. Install Rudachi
51
+
52
+ ```rb
53
+ # Gemfile
54
+ gem 'rudachi', git: 'https://github.com/SongCastle/rudachi.git', tag: 'v1.0.0'
55
+ ```
56
+
57
+ Then run `bundle install` .
58
+
59
+ 3. Initialize Rudachi
60
+
61
+ ```rb
62
+ Rudachi.configure do |config|
63
+ config.jar_path = 'sudachi-0.5.3/sudachi-0.5.3.jar'
64
+ end
65
+
66
+ Rudachi::Option.configure do |config|
67
+ config.p = 'sudachi-dictionary-latest-full'
68
+ end
69
+ ```
70
+
71
+ 4. Dit it !!
72
+
73
+ ```rb
74
+ require 'rudachi'
75
+
76
+ Rudachi::TextParser.parse('こんにちは世界')
77
+ => "こんにちは\t感動詞,一般,*,*,*,*\t今日は\n世界\t名詞,普通名詞,一般,*,*,*\t世界\nEOS\n"
78
+ ```
@@ -0,0 +1,32 @@
1
+ require 'rudachi/configurable'
2
+
3
+ module Rudachi
4
+ extend Configurable
5
+
6
+ config_accessor :jar_path, default: '/usr/java/lib/sudachi.jar'
7
+
8
+ module Option
9
+ extend Configurable
10
+
11
+ # @see https://github.com/WorksApplications/Sudachi#options
12
+ config_accessor :r, default: nil
13
+ config_accessor :s, default: nil
14
+ config_accessor :p, default: '/usr/java/lib'
15
+ config_accessor :m, default: 'C'
16
+ config_accessor :o, default: nil
17
+ config_accessor :t, default: nil
18
+ config_accessor :ts, default: nil
19
+ config_accessor :a, default: nil
20
+ config_accessor :f, default: nil
21
+ config_accessor :d, default: nil
22
+ config_accessor :h, default: nil
23
+
24
+ def self.cmds(opts)
25
+ class_variables.each_with_object([]) do |name, flags|
26
+ key = name.to_s.delete('@@')
27
+ val = opts[key] || opts[key.to_sym] || class_variable_get(name) or next
28
+ flags << "-#{key}" << val.to_s
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,18 @@
1
+ module Rudachi
2
+ module Configurable
3
+ def configure
4
+ yield self
5
+ end
6
+
7
+ private
8
+
9
+ def config_accessor(name, default: nil)
10
+ attr_def = <<~EOS
11
+ def self.#{name}; @@#{name}; end
12
+ def self.#{name}=(val); @@#{name} = val; end
13
+ EOS
14
+ module_eval(attr_def)
15
+ class_variable_set("@@#{name}", default)
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,19 @@
1
+ require 'java'
2
+ java_import 'java.lang.System'
3
+ java_import 'java.io.PrintStream'
4
+ java_import 'java.io.ByteArrayInputStream'
5
+ java_import 'java.io.ByteArrayOutputStream'
6
+ java_import 'java.nio.charset.StandardCharsets'
7
+
8
+ require Rudachi.jar_path
9
+ java_import 'com.worksap.nlp.sudachi.SudachiCommandLine'
10
+
11
+ module Java
12
+ String = JavaLang::String
13
+ System = JavaLang::System
14
+ ByteArrayInputStream = JavaIo::ByteArrayInputStream
15
+ ByteArrayOutputStream = JavaIo::ByteArrayOutputStream
16
+ PrintStream = JavaIo::PrintStream
17
+ UTF_8 = JavaNioCharset::StandardCharsets::UTF_8
18
+ SudachiCommandLine = ComWorksapNlpSudachi::SudachiCommandLine
19
+ end
@@ -0,0 +1,38 @@
1
+ require 'rudachi/config'
2
+ require 'rudachi/loader'
3
+
4
+ module Rudachi
5
+ class FileParser
6
+ def self.parse(path)
7
+ new.parse(path)
8
+ end
9
+
10
+ def initialize(**opts)
11
+ Rudachi.load!
12
+
13
+ @output = Java::ByteArrayOutputStream.new
14
+ @opts = opts
15
+ end
16
+
17
+ def parse(path)
18
+ take_stdout do
19
+ Java::SudachiCommandLine.main(
20
+ Option.cmds(@opts).push(Java::String.new(path))
21
+ )
22
+ end
23
+ @output.toString
24
+ end
25
+
26
+ private
27
+
28
+ def take_stdout
29
+ stdout = Java::System.out
30
+ stream = Java::PrintStream.new(@output)
31
+ Java::System.setOut(stream)
32
+
33
+ yield
34
+
35
+ Java::System.setOut(stdout)
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,5 @@
1
+ module Rudachi
2
+ def self.load!
3
+ require 'rudachi/dependencies'
4
+ end
5
+ end
@@ -0,0 +1,27 @@
1
+ require 'rudachi/file_parser'
2
+
3
+ module Rudachi
4
+ class TextParser < FileParser
5
+ def parse(text)
6
+ @input = Java::String.new(text)
7
+ take_stdin do
8
+ take_stdout do
9
+ Java::SudachiCommandLine.main(Option.cmds(@opts))
10
+ end
11
+ end
12
+ @output.toString
13
+ end
14
+
15
+ private
16
+
17
+ def take_stdin
18
+ stdin = Java::System.in
19
+ stream = Java::ByteArrayInputStream.new(@input.getBytes(Java::UTF_8))
20
+ Java::System.setIn(stream)
21
+
22
+ yield
23
+
24
+ Java::System.setIn(stdin)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,3 @@
1
+ module Rudachi
2
+ VERSION = '1.0.0'
3
+ end
data/lib/rudachi.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'rudachi/config'
2
+ require 'rudachi/file_parser'
3
+ require 'rudachi/text_parser'
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rudachi
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - SongCastle
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-04-06 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Sudachi wrapper for JRuby.
14
+ email: "-"
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - README.md
20
+ - lib/rudachi.rb
21
+ - lib/rudachi/config.rb
22
+ - lib/rudachi/configurable.rb
23
+ - lib/rudachi/dependencies.rb
24
+ - lib/rudachi/file_parser.rb
25
+ - lib/rudachi/loader.rb
26
+ - lib/rudachi/text_parser.rb
27
+ - lib/rudachi/version.rb
28
+ homepage: https://github.com/SongCastle/rudachi
29
+ licenses:
30
+ - MIT
31
+ metadata: {}
32
+ post_install_message:
33
+ rdoc_options: []
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '2.3'
41
+ required_rubygems_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ requirements: []
47
+ rubygems_version: 3.0.3
48
+ signing_key:
49
+ specification_version: 4
50
+ summary: Sudachi wrapper for JRuby
51
+ test_files: []