furigana 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ = furigana
2
+
3
+ Describe your project here
4
+
5
+ :include:furigana.rdoc
6
+
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ require 'furigana'
4
+ require 'singleton'
5
+ require 'ostruct'
6
+ require 'optparse'
7
+
8
+ module Furigana
9
+ class CLI
10
+ include Singleton
11
+
12
+ def initialize
13
+ @settings = OpenStruct.new
14
+ @settings.format = :text
15
+ end
16
+
17
+ def parse_options
18
+ OptionParser.new do |opts|
19
+ opts.banner = "Usage: furigana [options] [file]"
20
+
21
+ opts.on("--text", "Add furigana and output text (default)") do
22
+ @settings.format = :text
23
+ end
24
+ opts.on("--html", "Add furigana and output HTML") do
25
+ @settings.format = :html
26
+ end
27
+ opts.on("--json", "Add furigana and output JSON") do
28
+ @settings.format = :json
29
+ end
30
+ opts.on_tail("-h", "--help", "Show this message") do
31
+ puts opts
32
+ exit
33
+ end
34
+ opts.on_tail("--version", "Show version") do
35
+ puts Furigana::VERSION
36
+ exit
37
+ end
38
+ end.parse!
39
+ end
40
+
41
+ def start
42
+ parse_options
43
+
44
+ input = ARGF.read
45
+ case @settings.format
46
+ when :text
47
+ puts Formatter::Text.format(input, Reader.new.reading(input))
48
+ when :html
49
+ puts Formatter::HTML.format(input, Reader.new.reading(input))
50
+ when :json
51
+ puts Formatter::JSON.format(input, Reader.new.reading(input))
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+ Furigana::CLI.instance.start
@@ -0,0 +1,5 @@
1
+ = furigana
2
+
3
+ Generate this with
4
+ furigana rdoc
5
+ After you have described your command line interface
@@ -0,0 +1,8 @@
1
+ require 'diff/lcs'
2
+ require 'nkf'
3
+ require 'json'
4
+
5
+ require 'furigana/version'
6
+ require 'furigana/mecab'
7
+ require 'furigana/reader'
8
+ require 'furigana/formatters'
@@ -0,0 +1,41 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Furigana
3
+ module Formatter
4
+ class Base
5
+ class << self
6
+ def format(text, tokens)
7
+ surface_form, reading = 0, 1
8
+ new_text = ''
9
+ tokens_enum = tokens.to_enum
10
+ current_token = tokens_enum.next
11
+ substring, pos = "", 0
12
+
13
+ text.each_char do |char|
14
+ if current_token
15
+ substring += char
16
+ if char == current_token[surface_form][pos]
17
+ if pos == current_token[surface_form].length-1
18
+ new_text += replacement(current_token[surface_form], current_token[reading])
19
+ begin
20
+ current_token = tokens_enum.next
21
+ rescue StopIteration
22
+ current_token = nil
23
+ end
24
+ substring, pos = "", 0
25
+ else
26
+ pos += 1
27
+ end
28
+ else # not a match
29
+ new_text += substring
30
+ substring, pos = "", 0
31
+ end
32
+ else # no more tokens
33
+ new_text += char
34
+ end
35
+ end
36
+ new_text
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,12 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Furigana
3
+ module Formatter
4
+ class HTML < Formatter::Base
5
+ class << self
6
+ def replacement(surface_form, reading)
7
+ "<ruby><rb>%s</rb><rp>【</rp><rt>%s</rt><rp>】</rp></ruby>" % [surface_form, reading]
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,11 @@
1
+ module Furigana
2
+ module Formatter
3
+ class JSON < Formatter::Base
4
+ class << self
5
+ def format(text, tokens)
6
+ tokens.to_json
7
+ end
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,12 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Furigana
3
+ module Formatter
4
+ class Text < Formatter::Base
5
+ class << self
6
+ def replacement(surface_form, reading)
7
+ "%s【%s】" % [surface_form, reading]
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,4 @@
1
+ require_relative 'formatter/base'
2
+ require_relative 'formatter/text'
3
+ require_relative 'formatter/html'
4
+ require_relative 'formatter/json'
@@ -0,0 +1,25 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'open3'
3
+
4
+ module Furigana
5
+ class Mecab
6
+ class << self
7
+ def sanitize_text(text)
8
+ text.tr("\n", "")
9
+ end
10
+
11
+ def tokenize(text)
12
+ surface_form, reading = 0, 1
13
+ stdout, stderr, status = Open3.capture3("mecab -Ochasen", :stdin_data => sanitize_text(text))
14
+ stdout.split("\n").inject([]) do |output, line|
15
+ columns = line.split("\t")
16
+ output << {
17
+ :surface_form => columns[surface_form],
18
+ :reading => columns[reading]
19
+ } if columns[surface_form] != 'EOS'
20
+ output
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,62 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Furigana
3
+ class Reader
4
+ def reading(text)
5
+ Mecab.tokenize(text).inject([]) do |list, token|
6
+ with_furigana = add_furigana(yomi_to_hiragana(token))
7
+ list.push(with_furigana) if with_furigana
8
+ list
9
+ end
10
+ end
11
+
12
+ private
13
+
14
+ def yomi_to_hiragana(token)
15
+ token[:reading] = choose_reading(token[:surface_form], token[:reading])
16
+ token
17
+ end
18
+
19
+ def k2h(k)
20
+ return nil if k.nil?
21
+ NKF.nkf("-h1 -w", k)
22
+ end
23
+
24
+ def kana?(str)
25
+ /^[ぁ-んァ-ンー]+$/.match(str)
26
+ end
27
+
28
+ def choose_reading(surface_form, reading)
29
+ !kana?(surface_form) ? k2h(reading) : surface_form
30
+ end
31
+
32
+ def sdiff(first, second)
33
+ Diff::LCS.sdiff(first, second)
34
+ end
35
+
36
+ def diff_token_surface_form_and_reading(token)
37
+ sdiff(token[:surface_form], token[:reading])
38
+ end
39
+
40
+ def add_furigana(token)
41
+ states = { kanji_and_yomi: '!', yomi: '+', kana: '=' }
42
+ kanji, yomi = 0, 1
43
+
44
+ list = nil
45
+ on_kanji = false
46
+ diff_token_surface_form_and_reading(token).each do |part|
47
+ case part.action
48
+ when states[:kanji_and_yomi]
49
+ list = ['',''] unless on_kanji
50
+ list[kanji] += part.old_element
51
+ list[yomi] += part.new_element
52
+ on_kanji = true
53
+ when states[:yomi]
54
+ list[yomi] += part.new_element
55
+ when states[:kana]
56
+ on_kanji = false
57
+ end
58
+ end
59
+ list
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,3 @@
1
+ module Furigana
2
+ VERSION = '0.0.1'
3
+ end
metadata ADDED
@@ -0,0 +1,145 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: furigana
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Michael V. O'Brien
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-19 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rdoc
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: diff-lcs
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: debugger
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: diff-lcs
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description:
95
+ email: michael@michaelvobrien.com
96
+ executables:
97
+ - furigana
98
+ extensions: []
99
+ extra_rdoc_files:
100
+ - README.rdoc
101
+ - furigana.rdoc
102
+ files:
103
+ - bin/furigana
104
+ - lib/furigana.rb
105
+ - lib/furigana/formatter/base.rb
106
+ - lib/furigana/formatter/html.rb
107
+ - lib/furigana/formatter/json.rb
108
+ - lib/furigana/formatter/text.rb
109
+ - lib/furigana/formatters.rb
110
+ - lib/furigana/mecab.rb
111
+ - lib/furigana/reader.rb
112
+ - lib/furigana/version.rb
113
+ - README.rdoc
114
+ - furigana.rdoc
115
+ homepage: https://github.com/michaelvobrien/furigana
116
+ licenses: []
117
+ post_install_message:
118
+ rdoc_options:
119
+ - --title
120
+ - furigana
121
+ - --main
122
+ - README.rdoc
123
+ - -ri
124
+ require_paths:
125
+ - lib
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ! '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ none: false
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ requirements: []
140
+ rubyforge_project:
141
+ rubygems_version: 1.8.23
142
+ signing_key:
143
+ specification_version: 3
144
+ summary: Add furigana to text
145
+ test_files: []