rudachi 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f43021bc769b0005c5d46c3abde244756712ce1419b82ae884729508aab64780
4
+ data.tar.gz: 5f3b2f74e771d2b3feb8d6de4abf7d59864da6108904437c28a498f2ae6d1e98
5
+ SHA512:
6
+ metadata.gz: 38535bc9cee7b34ee2ab3219294484d30758c82b4c2a85f53cb36519700eb52d2e44f75160ba2aee24a626a7dc95c5a02774bfd10c86da18067a7d6afcd33996
7
+ data.tar.gz: 289e76174d4f7053e71c2c276635ab6b2283b48c220cb498615c88605ffec6a41f3888c89a80ad728383170c5052c3dd76be81a02bef91dc38f4049b18db1c21
data/README.md ADDED
@@ -0,0 +1,78 @@
1
+ # Rudachi
2
+ [Sudachi](https://github.com/WorksApplications/Sudachi) wrapper Gem for JRuby.
3
+
4
+ - Text base
5
+ ```rb
6
+ Rudachi::TextParser.parse('東京都へ行く')
7
+ => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
8
+ ```
9
+
10
+ - File base
11
+ ```rb
12
+ File.open('sample.txt', 'w') { |f| f << '東京都へ行く' }
13
+ Rudachi::FileParser.parse('sample.txt')
14
+ => "東京都\t名詞,固有名詞,地名,一般,*,*\t東京都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
15
+ ```
16
+
17
+ - With some options
18
+ ```rb
19
+ Rudachi::TextParser.new(o: 'result.txt', m: 'A').parse('東京都へ行く')
20
+ File.read('result.txt')
21
+ => "東京\t名詞,固有名詞,地名,一般,*,*\t東京\n都\t名詞,普通名詞,一般,*,*,*\t都\nへ\t助詞,格助詞,*,*,*,*\tへ\n行く\t動詞,非自立可能,*,*,五段-カ行,終止形-一般\t行く\nEOS\n"
22
+ ```
23
+
24
+ ## Requirements
25
+
26
+ - [JRuby](https://github.com/jruby/jruby) 9.1.3.0 or later
27
+ - [Sudachi](https://github.com/WorksApplications/Sudachi)
28
+
29
+ ## Installation
30
+
31
+ 1. Install JAR and dictionary of Sudachi ([details](https://github.com/WorksApplications/Sudachi/blob/develop/docs/tutorial.md#linux-%E3%81%AE%E5%A0%B4%E5%90%88))
32
+
33
+ - Install the Sudachi JAR file
34
+ ```sh
35
+ $ wget https://github.com/WorksApplications/Sudachi/releases/download/v0.5.3/sudachi-0.5.3-executable.zip
36
+ $ unzip sudachi-0.5.3-executable.zip
37
+ $ ls sudachi-0.5.3
38
+ LICENSE-2.0.txt README.md javax.json-1.1.jar jdartsclone-1.2.0.jar licenses sudachi-0.5.3.jar sudachi.json sudachi_fulldict.json
39
+ ```
40
+
41
+ - Install the Sudachi dictionary
42
+ ```sh
43
+ $ wget http://sudachi.s3-website-ap-northeast-1.amazonaws.com/sudachidict/sudachi-dictionary-latest-full.zip
44
+ $ unzip -j -d sudachi-dictionary-latest-full sudachi-dictionary-latest-full.zip
45
+ $ mv sudachi-dictionary-latest-full/system_full.dic sudachi-dictionary-latest-full/system_core.dic
46
+ $ ls sudachi-dictionary-latest-full
47
+ LEGAL LICENSE-2.0.txt system_core.dic
48
+ ```
49
+
50
+ 2. Install Rudachi
51
+
52
+ ```rb
53
+ # Gemfile
54
+ gem 'rudachi', git: 'https://github.com/SongCastle/rudachi.git', tag: 'v1.0.0'
55
+ ```
56
+
57
+ Then run `bundle install` .
58
+
59
+ 3. Initialize Rudachi
60
+
61
+ ```rb
62
+ Rudachi.configure do |config|
63
+ config.jar_path = 'sudachi-0.5.3/sudachi-0.5.3.jar'
64
+ end
65
+
66
+ Rudachi::Option.configure do |config|
67
+ config.p = 'sudachi-dictionary-latest-full'
68
+ end
69
+ ```
70
+
71
+ 4. Dit it !!
72
+
73
+ ```rb
74
+ require 'rudachi'
75
+
76
+ Rudachi::TextParser.parse('こんにちは世界')
77
+ => "こんにちは\t感動詞,一般,*,*,*,*\t今日は\n世界\t名詞,普通名詞,一般,*,*,*\t世界\nEOS\n"
78
+ ```
@@ -0,0 +1,32 @@
1
+ require 'rudachi/configurable'
2
+
3
+ module Rudachi
4
+ extend Configurable
5
+
6
+ config_accessor :jar_path, default: '/usr/java/lib/sudachi.jar'
7
+
8
+ module Option
9
+ extend Configurable
10
+
11
+ # @see https://github.com/WorksApplications/Sudachi#options
12
+ config_accessor :r, default: nil
13
+ config_accessor :s, default: nil
14
+ config_accessor :p, default: '/usr/java/lib'
15
+ config_accessor :m, default: 'C'
16
+ config_accessor :o, default: nil
17
+ config_accessor :t, default: nil
18
+ config_accessor :ts, default: nil
19
+ config_accessor :a, default: nil
20
+ config_accessor :f, default: nil
21
+ config_accessor :d, default: nil
22
+ config_accessor :h, default: nil
23
+
24
+ def self.cmds(opts)
25
+ class_variables.each_with_object([]) do |name, flags|
26
+ key = name.to_s.delete('@@')
27
+ val = opts[key] || opts[key.to_sym] || class_variable_get(name) or next
28
+ flags << "-#{key}" << val.to_s
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,18 @@
1
+ module Rudachi
2
+ module Configurable
3
+ def configure
4
+ yield self
5
+ end
6
+
7
+ private
8
+
9
+ def config_accessor(name, default: nil)
10
+ attr_def = <<~EOS
11
+ def self.#{name}; @@#{name}; end
12
+ def self.#{name}=(val); @@#{name} = val; end
13
+ EOS
14
+ module_eval(attr_def)
15
+ class_variable_set("@@#{name}", default)
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,19 @@
1
+ require 'java'
2
+ java_import 'java.lang.System'
3
+ java_import 'java.io.PrintStream'
4
+ java_import 'java.io.ByteArrayInputStream'
5
+ java_import 'java.io.ByteArrayOutputStream'
6
+ java_import 'java.nio.charset.StandardCharsets'
7
+
8
+ require Rudachi.jar_path
9
+ java_import 'com.worksap.nlp.sudachi.SudachiCommandLine'
10
+
11
+ module Java
12
+ String = JavaLang::String
13
+ System = JavaLang::System
14
+ ByteArrayInputStream = JavaIo::ByteArrayInputStream
15
+ ByteArrayOutputStream = JavaIo::ByteArrayOutputStream
16
+ PrintStream = JavaIo::PrintStream
17
+ UTF_8 = JavaNioCharset::StandardCharsets::UTF_8
18
+ SudachiCommandLine = ComWorksapNlpSudachi::SudachiCommandLine
19
+ end
@@ -0,0 +1,38 @@
1
+ require 'rudachi/config'
2
+ require 'rudachi/loader'
3
+
4
+ module Rudachi
5
+ class FileParser
6
+ def self.parse(path)
7
+ new.parse(path)
8
+ end
9
+
10
+ def initialize(**opts)
11
+ Rudachi.load!
12
+
13
+ @output = Java::ByteArrayOutputStream.new
14
+ @opts = opts
15
+ end
16
+
17
+ def parse(path)
18
+ take_stdout do
19
+ Java::SudachiCommandLine.main(
20
+ Option.cmds(@opts).push(Java::String.new(path))
21
+ )
22
+ end
23
+ @output.toString
24
+ end
25
+
26
+ private
27
+
28
+ def take_stdout
29
+ stdout = Java::System.out
30
+ stream = Java::PrintStream.new(@output)
31
+ Java::System.setOut(stream)
32
+
33
+ yield
34
+
35
+ Java::System.setOut(stdout)
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,5 @@
1
+ module Rudachi
2
+ def self.load!
3
+ require 'rudachi/dependencies'
4
+ end
5
+ end
@@ -0,0 +1,27 @@
1
+ require 'rudachi/file_parser'
2
+
3
+ module Rudachi
4
+ class TextParser < FileParser
5
+ def parse(text)
6
+ @input = Java::String.new(text)
7
+ take_stdin do
8
+ take_stdout do
9
+ Java::SudachiCommandLine.main(Option.cmds(@opts))
10
+ end
11
+ end
12
+ @output.toString
13
+ end
14
+
15
+ private
16
+
17
+ def take_stdin
18
+ stdin = Java::System.in
19
+ stream = Java::ByteArrayInputStream.new(@input.getBytes(Java::UTF_8))
20
+ Java::System.setIn(stream)
21
+
22
+ yield
23
+
24
+ Java::System.setIn(stdin)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,3 @@
1
+ module Rudachi
2
+ VERSION = '1.0.0'
3
+ end
data/lib/rudachi.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'rudachi/config'
2
+ require 'rudachi/file_parser'
3
+ require 'rudachi/text_parser'
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rudachi
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - SongCastle
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-04-06 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Sudachi wrapper for JRuby.
14
+ email: "-"
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - README.md
20
+ - lib/rudachi.rb
21
+ - lib/rudachi/config.rb
22
+ - lib/rudachi/configurable.rb
23
+ - lib/rudachi/dependencies.rb
24
+ - lib/rudachi/file_parser.rb
25
+ - lib/rudachi/loader.rb
26
+ - lib/rudachi/text_parser.rb
27
+ - lib/rudachi/version.rb
28
+ homepage: https://github.com/SongCastle/rudachi
29
+ licenses:
30
+ - MIT
31
+ metadata: {}
32
+ post_install_message:
33
+ rdoc_options: []
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '2.3'
41
+ required_rubygems_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ requirements: []
47
+ rubygems_version: 3.0.3
48
+ signing_key:
49
+ specification_version: 4
50
+ summary: Sudachi wrapper for JRuby
51
+ test_files: []