furigana 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ = furigana
2
+
3
+ Describe your project here
4
+
5
+ :include:furigana.rdoc
6
+
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ require 'furigana'
4
+ require 'singleton'
5
+ require 'ostruct'
6
+ require 'optparse'
7
+
8
+ module Furigana
9
+ class CLI
10
+ include Singleton
11
+
12
+ def initialize
13
+ @settings = OpenStruct.new
14
+ @settings.format = :text
15
+ end
16
+
17
+ def parse_options
18
+ OptionParser.new do |opts|
19
+ opts.banner = "Usage: furigana [options] [file]"
20
+
21
+ opts.on("--text", "Add furigana and output text (default)") do
22
+ @settings.format = :text
23
+ end
24
+ opts.on("--html", "Add furigana and output HTML") do
25
+ @settings.format = :html
26
+ end
27
+ opts.on("--json", "Add furigana and output JSON") do
28
+ @settings.format = :json
29
+ end
30
+ opts.on_tail("-h", "--help", "Show this message") do
31
+ puts opts
32
+ exit
33
+ end
34
+ opts.on_tail("--version", "Show version") do
35
+ puts Furigana::VERSION
36
+ exit
37
+ end
38
+ end.parse!
39
+ end
40
+
41
+ def start
42
+ parse_options
43
+
44
+ input = ARGF.read
45
+ case @settings.format
46
+ when :text
47
+ puts Formatter::Text.format(input, Reader.new.reading(input))
48
+ when :html
49
+ puts Formatter::HTML.format(input, Reader.new.reading(input))
50
+ when :json
51
+ puts Formatter::JSON.format(input, Reader.new.reading(input))
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+ Furigana::CLI.instance.start
@@ -0,0 +1,5 @@
1
+ = furigana
2
+
3
+ Generate this with
4
+ furigana rdoc
5
+ After you have described your command line interface
@@ -0,0 +1,8 @@
1
+ require 'diff/lcs'
2
+ require 'nkf'
3
+ require 'json'
4
+
5
+ require 'furigana/version'
6
+ require 'furigana/mecab'
7
+ require 'furigana/reader'
8
+ require 'furigana/formatters'
@@ -0,0 +1,41 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Furigana
3
+ module Formatter
4
+ class Base
5
+ class << self
6
+ def format(text, tokens)
7
+ surface_form, reading = 0, 1
8
+ new_text = ''
9
+ tokens_enum = tokens.to_enum
10
+ current_token = tokens_enum.next
11
+ substring, pos = "", 0
12
+
13
+ text.each_char do |char|
14
+ if current_token
15
+ substring += char
16
+ if char == current_token[surface_form][pos]
17
+ if pos == current_token[surface_form].length-1
18
+ new_text += replacement(current_token[surface_form], current_token[reading])
19
+ begin
20
+ current_token = tokens_enum.next
21
+ rescue StopIteration
22
+ current_token = nil
23
+ end
24
+ substring, pos = "", 0
25
+ else
26
+ pos += 1
27
+ end
28
+ else # not a match
29
+ new_text += substring
30
+ substring, pos = "", 0
31
+ end
32
+ else # no more tokens
33
+ new_text += char
34
+ end
35
+ end
36
+ new_text
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,12 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Furigana
3
+ module Formatter
4
+ class HTML < Formatter::Base
5
+ class << self
6
+ def replacement(surface_form, reading)
7
+ "<ruby><rb>%s</rb><rp>【</rp><rt>%s</rt><rp>】</rp></ruby>" % [surface_form, reading]
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,11 @@
1
+ module Furigana
2
+ module Formatter
3
+ class JSON < Formatter::Base
4
+ class << self
5
+ def format(text, tokens)
6
+ tokens.to_json
7
+ end
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,12 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Furigana
3
+ module Formatter
4
+ class Text < Formatter::Base
5
+ class << self
6
+ def replacement(surface_form, reading)
7
+ "%s【%s】" % [surface_form, reading]
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,4 @@
1
+ require_relative 'formatter/base'
2
+ require_relative 'formatter/text'
3
+ require_relative 'formatter/html'
4
+ require_relative 'formatter/json'
@@ -0,0 +1,25 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'open3'
3
+
4
+ module Furigana
5
+ class Mecab
6
+ class << self
7
+ def sanitize_text(text)
8
+ text.tr("\n", "")
9
+ end
10
+
11
+ def tokenize(text)
12
+ surface_form, reading = 0, 1
13
+ stdout, stderr, status = Open3.capture3("mecab -Ochasen", :stdin_data => sanitize_text(text))
14
+ stdout.split("\n").inject([]) do |output, line|
15
+ columns = line.split("\t")
16
+ output << {
17
+ :surface_form => columns[surface_form],
18
+ :reading => columns[reading]
19
+ } if columns[surface_form] != 'EOS'
20
+ output
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,62 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Furigana
3
+ class Reader
4
+ def reading(text)
5
+ Mecab.tokenize(text).inject([]) do |list, token|
6
+ with_furigana = add_furigana(yomi_to_hiragana(token))
7
+ list.push(with_furigana) if with_furigana
8
+ list
9
+ end
10
+ end
11
+
12
+ private
13
+
14
+ def yomi_to_hiragana(token)
15
+ token[:reading] = choose_reading(token[:surface_form], token[:reading])
16
+ token
17
+ end
18
+
19
+ def k2h(k)
20
+ return nil if k.nil?
21
+ NKF.nkf("-h1 -w", k)
22
+ end
23
+
24
+ def kana?(str)
25
+ /^[ぁ-んァ-ンー]+$/.match(str)
26
+ end
27
+
28
+ def choose_reading(surface_form, reading)
29
+ !kana?(surface_form) ? k2h(reading) : surface_form
30
+ end
31
+
32
+ def sdiff(first, second)
33
+ Diff::LCS.sdiff(first, second)
34
+ end
35
+
36
+ def diff_token_surface_form_and_reading(token)
37
+ sdiff(token[:surface_form], token[:reading])
38
+ end
39
+
40
+ def add_furigana(token)
41
+ states = { kanji_and_yomi: '!', yomi: '+', kana: '=' }
42
+ kanji, yomi = 0, 1
43
+
44
+ list = nil
45
+ on_kanji = false
46
+ diff_token_surface_form_and_reading(token).each do |part|
47
+ case part.action
48
+ when states[:kanji_and_yomi]
49
+ list = ['',''] unless on_kanji
50
+ list[kanji] += part.old_element
51
+ list[yomi] += part.new_element
52
+ on_kanji = true
53
+ when states[:yomi]
54
+ list[yomi] += part.new_element
55
+ when states[:kana]
56
+ on_kanji = false
57
+ end
58
+ end
59
+ list
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,3 @@
1
+ module Furigana
2
+ VERSION = '0.0.1'
3
+ end
metadata ADDED
@@ -0,0 +1,145 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: furigana
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Michael V. O'Brien
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-19 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rdoc
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: diff-lcs
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: debugger
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: diff-lcs
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description:
95
+ email: michael@michaelvobrien.com
96
+ executables:
97
+ - furigana
98
+ extensions: []
99
+ extra_rdoc_files:
100
+ - README.rdoc
101
+ - furigana.rdoc
102
+ files:
103
+ - bin/furigana
104
+ - lib/furigana.rb
105
+ - lib/furigana/formatter/base.rb
106
+ - lib/furigana/formatter/html.rb
107
+ - lib/furigana/formatter/json.rb
108
+ - lib/furigana/formatter/text.rb
109
+ - lib/furigana/formatters.rb
110
+ - lib/furigana/mecab.rb
111
+ - lib/furigana/reader.rb
112
+ - lib/furigana/version.rb
113
+ - README.rdoc
114
+ - furigana.rdoc
115
+ homepage: https://github.com/michaelvobrien/furigana
116
+ licenses: []
117
+ post_install_message:
118
+ rdoc_options:
119
+ - --title
120
+ - furigana
121
+ - --main
122
+ - README.rdoc
123
+ - -ri
124
+ require_paths:
125
+ - lib
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ none: false
129
+ requirements:
130
+ - - ! '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ none: false
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ requirements: []
140
+ rubyforge_project:
141
+ rubygems_version: 1.8.23
142
+ signing_key:
143
+ specification_version: 3
144
+ summary: Add furigana to text
145
+ test_files: []