text-hyphen 1.0.0 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +23 -0
- data/COPYING.txt +339 -0
- data/History.txt +23 -0
- data/{LICENCE → LICENCE.txt} +9 -9
- data/Manifest.txt +44 -0
- data/README.txt +82 -0
- data/Rakefile +16 -108
- data/bin/hyphen +3 -1
- data/lib/text-hyphen.rb +1 -0
- data/lib/text/hyphen.rb +135 -134
- data/lib/text/hyphen/language.rb +13 -9
- data/lib/text/hyphen/language/cs.rb +363 -363
- data/lib/text/hyphen/language/da.rb +1 -1
- data/lib/text/hyphen/language/de.rb +1 -0
- data/lib/text/hyphen/language/de1.rb +8 -6
- data/lib/text/hyphen/language/de2.rb +7 -6
- data/lib/text/hyphen/language/en_uk.rb +1 -1
- data/lib/text/hyphen/language/et.rb +1 -1
- data/lib/text/hyphen/language/hsb.rb +1 -1
- data/lib/text/hyphen/language/hu1.rb +1 -1
- data/lib/text/hyphen/language/hu2.rb +1 -1
- data/lib/text/hyphen/language/is.rb +1 -1
- data/lib/text/hyphen/language/mn.rb +1 -1
- data/lib/text/hyphen/language/pl.rb +1 -1
- data/test/test_bugs.rb +26 -0
- data/{tests/tc_text_hyphen.rb → test/test_text_hyphen.rb} +2 -8
- data/text-hyphen.gemspec +63 -0
- metadata +214 -82
- data/ChangeLog +0 -4
- data/Changelog +0 -4
- data/INSTALL +0 -6
- data/README +0 -56
data/README.txt
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
== text-hyphen
|
2
|
+
|
3
|
+
http://rubyforge.org/projects/text-format/
|
4
|
+
http://github.com/halostatue/text-hyphen/
|
5
|
+
|
6
|
+
== DESCRIPTION:
|
7
|
+
|
8
|
+
Text::Hyphen will hyphenate words using modified versions of TeX hyphenation
|
9
|
+
patterns.
|
10
|
+
|
11
|
+
Text::Hyphen will properly hyphenate various words according to the rules of
|
12
|
+
the language the word is written in. The algorithm is based on that of the TeX
|
13
|
+
typesetting system by Donald E. Knuth. This is based on the Perl implementation
|
14
|
+
of TeX::Hyphen[1] and the Ruby port[2]. The language hyphenation pattern files
|
15
|
+
are based on the sources available from CTAN[3] as of 2004.12.19 and have been
|
16
|
+
translated by Austin Ziegler.
|
17
|
+
|
18
|
+
This release is 1.0.2. It is a minor bugfix for the RubyGem release of
|
19
|
+
Text::Hyphen to enable the hyphen command-line program. Text::Hyphen represents
|
20
|
+
a significant improvement over its predecessor, TeX::Hyphen.
|
21
|
+
|
22
|
+
== SYNOPSIS:
|
23
|
+
|
24
|
+
require 'text/hyphen'
|
25
|
+
hh = Text::Hyphen.new(:language => 'en_us', :left => 2, :right => 2)
|
26
|
+
# Defaults to the above
|
27
|
+
hh = TeX::Hyphen.new
|
28
|
+
|
29
|
+
word = "representation"
|
30
|
+
points = hyp.hyphenate(word) #=> [3, 5, 8, 10]
|
31
|
+
puts hyp.visualize(word) #=> rep-re-sen-ta-tion
|
32
|
+
|
33
|
+
Text::Hyphen is truly multilingual[4]. As an example, consider the difference
|
34
|
+
between the following:
|
35
|
+
|
36
|
+
require 'text/hyphen'
|
37
|
+
# Using left and right minimum values of 0 ensures that you will
|
38
|
+
# see all possible hyphenation points, not just those that meet
|
39
|
+
# the minimum width requirements.
|
40
|
+
en = Text::Hyphen.new(:left => 0, :right => 0)
|
41
|
+
fr = Text::Hyphen.new(:language = "fr", :left => 0, :right => 0)
|
42
|
+
|
43
|
+
puts en.visualise("organiser") #=> or-gan-iser
|
44
|
+
puts fr.visualise("organiser") #=> or-ga-ni-ser
|
45
|
+
|
46
|
+
As you can see, the hyphenation is distinct between the two hyphenators.
|
47
|
+
Additional improvements over TeX::Hyphen include thread safety (except for
|
48
|
+
debug control) and (minimal) support for UTF-8.
|
49
|
+
|
50
|
+
== FUTURE ENHANCEMENTS:
|
51
|
+
|
52
|
+
* Ruby 1.9 compatibility.
|
53
|
+
|
54
|
+
== INSTALL:
|
55
|
+
|
56
|
+
* This release of text-hyphen is only installed with RubyGems.
|
57
|
+
|
58
|
+
== DEVELOPERS:
|
59
|
+
|
60
|
+
After checking out the source, run:
|
61
|
+
|
62
|
+
$ rake newb
|
63
|
+
|
64
|
+
This task will install any missing dependencies, run the tests/specs,
|
65
|
+
and generate the RDoc.
|
66
|
+
|
67
|
+
== LICENSE:
|
68
|
+
|
69
|
+
The licensing for Text::Hyphen is complex and somewhat dependent upon the
|
70
|
+
languages being used during hyphenation; some languages are held under a more
|
71
|
+
strict licence than that granted in the LICENCE file.
|
72
|
+
|
73
|
+
Copyright 2004 - 2005 Austin Ziegler <austin@rubyforge.org>
|
74
|
+
See the LICENCE.txt file for more information.
|
75
|
+
|
76
|
+
[1] <http://search.cpan.org/author/JANPAZ/TeX-Hyphen-0.140/lib/TeX/Hyphen.pm>
|
77
|
+
Maintained by Jan Pazdziora.
|
78
|
+
[2] Available at <http://rubyforge.org/projects/text-format>.
|
79
|
+
[3] <http://www.ctan.org>
|
80
|
+
[4] There are some bugs and design decisions in the original Perl
|
81
|
+
implementation of TeX::Hyphen that make it unsuitable for most multilingual
|
82
|
+
implementations that carried over to the Ruby port of TeX::Hyphen.
|
data/Rakefile
CHANGED
@@ -1,116 +1,24 @@
|
|
1
|
-
|
2
|
-
$LOAD_PATH.unshift('lib')
|
1
|
+
# -*- ruby -*-
|
3
2
|
|
4
3
|
require 'rubygems'
|
5
|
-
require '
|
6
|
-
require '
|
7
|
-
require 'archive/tar/minitar'
|
8
|
-
require 'zlib'
|
4
|
+
require 'hoe'
|
5
|
+
require 'rubyforge'
|
9
6
|
|
10
|
-
|
11
|
-
|
7
|
+
Hoe.plugin :doofus
|
8
|
+
Hoe.plugin :gemspec
|
9
|
+
Hoe.plugin :git
|
10
|
+
Hoe.plugin :rubyforge
|
12
11
|
|
13
|
-
|
12
|
+
Hoe.spec 'text-hyphen' do
|
13
|
+
developer('Austin Ziegler', 'austin@rubyforge.org')
|
14
|
+
self.rubyforge_name = 'text-format'
|
14
15
|
|
15
|
-
|
16
|
-
year, month, day, hour, minute, second = DATE_RE.match(ENV['RELEASE_DATE']).captures
|
17
|
-
year ||= 0
|
18
|
-
month ||= 0
|
19
|
-
day ||= 0
|
20
|
-
hour ||= 0
|
21
|
-
minute ||= 0
|
22
|
-
second ||= 0
|
23
|
-
ReleaseDate = Time.mktime(year, month, day, hour, minute, second)
|
24
|
-
else
|
25
|
-
ReleaseDate = nil
|
26
|
-
end
|
27
|
-
|
28
|
-
task :test do |t|
|
29
|
-
require 'test/unit/testsuite'
|
30
|
-
require 'test/unit/ui/console/testrunner'
|
31
|
-
|
32
|
-
runner = Test::Unit::UI::Console::TestRunner
|
33
|
-
|
34
|
-
$LOAD_PATH.unshift('tests')
|
35
|
-
$stderr.puts "Checking for test cases:" if t.verbose
|
36
|
-
Dir['tests/tc_*.rb'].each do |testcase|
|
37
|
-
$stderr.puts "\t#{testcase}" if t.verbose
|
38
|
-
load testcase
|
39
|
-
end
|
40
|
-
|
41
|
-
suite = Test::Unit::TestSuite.new("Text::Hyphen")
|
16
|
+
self.spec_extras[:required_ruby_version] = '< 1.9'
|
42
17
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
runner.run(suite)
|
48
|
-
end
|
49
|
-
|
50
|
-
spec = eval(File.read("text-hyphen.gemspec"))
|
51
|
-
spec.version = Text::Hyphen::VERSION
|
52
|
-
desc "Build the RubyGem for Text::Hyphen"
|
53
|
-
task :gem => [ :test ]
|
54
|
-
Rake::GemPackageTask.new(spec) do |g|
|
55
|
-
g.need_tar = false
|
56
|
-
g.need_zip = false
|
57
|
-
g.package_dir = ".."
|
58
|
-
end
|
59
|
-
|
60
|
-
desc "Build a Text::Hyphen .tar.gz distribution."
|
61
|
-
task :tar => [ TARDIST ]
|
62
|
-
file TARDIST => [ :test ] do |t|
|
63
|
-
current = File.basename(Dir.pwd)
|
64
|
-
Dir.chdir("..") do
|
65
|
-
begin
|
66
|
-
files = Dir["#{current}/**/*"].select { |dd| dd !~ %r{(?:/CVS/?|~$)} }
|
67
|
-
files.map! do |dd|
|
68
|
-
ddnew = dd.gsub(/^#{current}/, DISTDIR)
|
69
|
-
mtime = ReleaseDate || File.stat(dd).mtime
|
70
|
-
if File.directory?(dd)
|
71
|
-
{ :name => ddnew, :mode => 0755, :dir => true, :mtime => mtime }
|
72
|
-
else
|
73
|
-
if dd =~ %r{bin/}
|
74
|
-
mode = 0755
|
75
|
-
else
|
76
|
-
mode = 0644
|
77
|
-
end
|
78
|
-
data = File.read(dd)
|
79
|
-
{ :name => ddnew, :mode => mode, :data => data, :size => data.size,
|
80
|
-
:mtime => mtime }
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
ff = File.open(t.name.gsub(%r{^\.\./}o, ''), "wb")
|
85
|
-
gz = Zlib::GzipWriter.new(ff)
|
86
|
-
tw = Archive::Tar::Minitar::Writer.new(gz)
|
87
|
-
|
88
|
-
files.each do |entry|
|
89
|
-
if entry[:dir]
|
90
|
-
tw.mkdir(entry[:name], entry)
|
91
|
-
else
|
92
|
-
tw.add_file_simple(entry[:name], entry) { |os| os.write(entry[:data]) }
|
93
|
-
end
|
94
|
-
end
|
95
|
-
ensure
|
96
|
-
tw.close if tw
|
97
|
-
gz.close if gz
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
101
|
-
task TARDIST => [ :test ]
|
102
|
-
|
103
|
-
def sign(file)
|
104
|
-
system %("C:/Program Files/Windows Privacy Tools/GnuPG/gpg.exe" -ba #{file}).gsub(%r{/}) { "\\" }
|
105
|
-
raise "Error signing with GPG." unless File.exists?("#{file}.asc")
|
106
|
-
end
|
107
|
-
|
108
|
-
task :signtar => [ :tar ] do
|
109
|
-
sign TARDIST
|
110
|
-
end
|
111
|
-
task :signgem => [ :gem ] do
|
112
|
-
sign "../#{DISTDIR}.gem"
|
18
|
+
self.extra_dev_deps << ['hoe-doofus', '~> 1.0']
|
19
|
+
self.extra_dev_deps << ['hoe-gemspec', '~> 1.0']
|
20
|
+
self.extra_dev_deps << ['hoe-git', '~> 1.0']
|
21
|
+
self.extra_dev_deps << ['hoe-seattlerb', '~> 1.0']
|
113
22
|
end
|
114
23
|
|
115
|
-
|
116
|
-
task :default => [ :signtar, :signgem ]
|
24
|
+
# vim: syntax=ruby
|
data/bin/hyphen
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
#
|
5
5
|
# Licensed under the same terms as Ruby.
|
6
6
|
#
|
7
|
-
# $Id
|
7
|
+
# $Id$
|
8
8
|
#++
|
9
9
|
|
10
10
|
require 'optparse'
|
@@ -84,6 +84,7 @@ when :visualise
|
|
84
84
|
size -= (vis.size + 1)
|
85
85
|
print "#{vis} "
|
86
86
|
end
|
87
|
+
puts
|
87
88
|
when :hyphenate
|
88
89
|
ARGV.each do |word|
|
89
90
|
hyp = hyphenator.hyphenate(word)
|
@@ -102,6 +103,7 @@ when :hyphenate_to
|
|
102
103
|
size -= (vis.size + 1)
|
103
104
|
print "#{vis} "
|
104
105
|
end
|
106
|
+
puts
|
105
107
|
when :stats
|
106
108
|
puts hyphenator.stats
|
107
109
|
end
|
data/lib/text-hyphen.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'text/hyphen'
|
data/lib/text/hyphen.rb
CHANGED
@@ -1,120 +1,116 @@
|
|
1
1
|
module Text; end
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
81
|
-
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
82
|
-
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
83
|
-
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
3
|
+
# = Introduction
|
4
|
+
# Text::Hyphen -- hyphenate words using modified versions of TeX hyphenation
|
5
|
+
# patterns.
|
6
|
+
#
|
7
|
+
# == Usage
|
8
|
+
# require 'text/hyphen'
|
9
|
+
# hh = Text::Hyphen.new(:language => 'en_us', :left => 2, :right => 2)
|
10
|
+
# # Defaults to the above
|
11
|
+
# hh = TeX::Hyphen.new
|
12
|
+
#
|
13
|
+
# word = "representation"
|
14
|
+
# points = hyp.hyphenate(word) #=> [3, 5, 8, 10]
|
15
|
+
# puts hyp.visualize(word) #=> rep-re-sen-ta-tion
|
16
|
+
#
|
17
|
+
# en = Text::Hyphen.new(:left => 0, :right => 0)
|
18
|
+
# fr = Text::Hyphen.new(:language = "fr", :left => 0, :right => 0)
|
19
|
+
# puts en.visualise("organiser") #=> or-gan-iser
|
20
|
+
# puts fr.visualise("organiser") #=> or-ga-ni-ser
|
21
|
+
#
|
22
|
+
# == Description
|
23
|
+
# Creates a new Hyphen object and loads the language patterns into memory.
|
24
|
+
# The hyphenator can then be asked for the hyphenation of a word. If no
|
25
|
+
# language is specified, then the language en_us (EN_US) is used by default.
|
26
|
+
#
|
27
|
+
# Copyright:: Copyright (c) 2004 - 2005 Austin Ziegler
|
28
|
+
# Version:: 1.0.2
|
29
|
+
# Based On:: <tt>TeX::Hyphen</tt> 0.4 Copyright (c) 2003 - 2004
|
30
|
+
# Martin DeMello and Austin Ziegler, in turn based on
|
31
|
+
# Perl's <tt>TeX::Hyphen</tt>
|
32
|
+
# [http://search.cpan.org/author/JANPAZ/TeX-Hyphen-0.140/lib/TeX/Hyphen.pm]
|
33
|
+
# Copyright (c) 1997 - 2002 Jan Pazdziora
|
34
|
+
#
|
35
|
+
# == Licence
|
36
|
+
# Licensing for Text::Hyphen is unfortunately complex because of the various
|
37
|
+
# copyrights and licences of the source hyphenation files. Some of these
|
38
|
+
# files are available only under the TeX licence and others are available
|
39
|
+
# only under the GNU GPL while others are public domain. Each language file
|
40
|
+
# has these licences embedded within the file. Please consult each file's
|
41
|
+
# licence to ensure that it is compatible with your application.
|
42
|
+
#
|
43
|
+
# The copyright on the Text::Hyphen application/library and the Ruby
|
44
|
+
# translations of hyphenation files belongs to Austin Ziegler. All other
|
45
|
+
# copyrights on original versions still stand; Text::Hyphen is a derivative
|
46
|
+
# work of these and other projects.
|
47
|
+
#
|
48
|
+
# === Application and Compilation Licences
|
49
|
+
# Text::Hyphen, the application/library is licensed under the same terms as
|
50
|
+
# Ruby. Note that this specifically refers to the contents of bin/hyphen,
|
51
|
+
# lib/text/hyphen.rb, and lib/text/hyphen/language.rb.
|
52
|
+
#
|
53
|
+
# Individual language hyphenation files are NOT licensed under these terms,
|
54
|
+
# but under the following MIT-style licence and the original hyphenation
|
55
|
+
# pattern licenses. The copyright for the original TeX hyphenation files is
|
56
|
+
# held by the original authors; any mistakes in conversion of these files to
|
57
|
+
# Ruby is attributable to the contributors to the Text::Hyphen package only.
|
58
|
+
#
|
59
|
+
# The compilation package Text::Hyphen is licensed under the same terms as
|
60
|
+
# Ruby.
|
61
|
+
#
|
62
|
+
# === Blanket Language Hyphenation File Licence
|
63
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
64
|
+
# copy of this software and associated documentation files (the "Software"),
|
65
|
+
# to deal in the Software without restriction, including without limitation
|
66
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
67
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
68
|
+
# Software is furnished to do so, subject to the following conditions:
|
69
|
+
#
|
70
|
+
# The above copyright notice and this permission notice shall be included in
|
71
|
+
# all copies or substantial portions of the Software.
|
72
|
+
#
|
73
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
74
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
75
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
76
|
+
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
77
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
78
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
79
|
+
# DEALINGS IN THE SOFTWARE.
|
84
80
|
class Text::Hyphen
|
85
81
|
DEBUG = false
|
86
|
-
VERSION = '1.0.
|
82
|
+
VERSION = '1.0.2'
|
87
83
|
|
88
84
|
DEFAULT_MIN_LEFT = 2
|
89
85
|
DEFAULT_MIN_RIGHT = 2
|
90
86
|
|
91
|
-
|
92
|
-
|
87
|
+
# No fewer than this number of letters will show up to the left of the
|
88
|
+
# hyphen. This overrides the default specified in the language.
|
93
89
|
attr_accessor :left
|
94
|
-
|
95
|
-
|
90
|
+
# No fewer than this number of letters will show up to the right of the
|
91
|
+
# hyphen. This overrides the default specified in the language.
|
96
92
|
attr_accessor :right
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
# Text::Hyphen::Language.
|
93
|
+
# The name of the language to be used in hyphenating words. This will be a
|
94
|
+
# two or three character ISO 639 code, with the two character form being
|
95
|
+
# the canonical resource name. This will load the language hyphenation
|
96
|
+
# definitions from text/hyphen/language/<code> as a Ruby class. The
|
97
|
+
# resource 'text/hyphen/language/en_us' defines the language class
|
98
|
+
# Text::Hyphen::Language::EN_US. It also defines the secondary forms
|
99
|
+
# Text::Hyphen::Language::EN and Text::Hyphen::Language::ENG_US.
|
100
|
+
#
|
101
|
+
# Minimal transformations will be performed on the language code provided,
|
102
|
+
# such that any dashes are converted to underscores (e.g., 'en-us' becomes
|
103
|
+
# 'en_us') and all characters are regularised. Resource names will be
|
104
|
+
# downcased and class names will be upcased (e.g., 'Pt' for the Portuguese
|
105
|
+
# language becomes 'pt' and 'PT', respectively).
|
106
|
+
#
|
107
|
+
# The language may also be specified as an instance of
|
108
|
+
# Text::Hyphen::Language.
|
114
109
|
attr_accessor :language
|
115
|
-
|
110
|
+
undef :language=
|
111
|
+
def language=(lang)
|
116
112
|
require 'text/hyphen/language' unless defined?(Text::Hyphen::Language)
|
117
|
-
if lang.kind_of?
|
113
|
+
if lang.kind_of? Text::Hyphen::Language
|
118
114
|
@iso_language = lang.to_s.split(%r{::}o)[-1].downcase
|
119
115
|
@language = lang
|
120
116
|
else
|
@@ -123,17 +119,18 @@ class Text::Hyphen
|
|
123
119
|
end
|
124
120
|
@iso_language
|
125
121
|
end
|
126
|
-
|
122
|
+
# Returns the language's ISO 639 ID, e.g., "en_us" or "pt".
|
127
123
|
attr_reader :iso_language
|
128
124
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
125
|
+
# The following initializations are equivalent:
|
126
|
+
#
|
127
|
+
# hyp = TeX::Hyphenate.new(:language => "EU")
|
128
|
+
# hyp = TeX::Hyphenate.new { |h| h.language = "EU" }
|
133
129
|
def initialize(options = {}) # :yields self:
|
134
130
|
@iso_language = options[:language]
|
135
131
|
@left = options[:left]
|
136
132
|
@right = options[:right]
|
133
|
+
@language = nil
|
137
134
|
|
138
135
|
@cache = {}
|
139
136
|
@vcache = {}
|
@@ -154,12 +151,12 @@ class Text::Hyphen
|
|
154
151
|
@right ||= DEFAULT_MIN_RIGHT
|
155
152
|
end
|
156
153
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
154
|
+
# Returns a list of places where the word can be divided, as
|
155
|
+
#
|
156
|
+
# hyp.hyphenate('representation')
|
157
|
+
#
|
158
|
+
# returns [3, 5, 8, 10]. If the word has been hyphenated previously, it
|
159
|
+
# will be returned from a per-instance cache.
|
163
160
|
def hyphenate(word)
|
164
161
|
word = word.downcase
|
165
162
|
$stderr.puts "Hyphenating #{word}" if DEBUG
|
@@ -199,13 +196,13 @@ class Text::Hyphen
|
|
199
196
|
@cache[word] = make_result_list(result)
|
200
197
|
end
|
201
198
|
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
199
|
+
# Returns a visualization of the hyphenation points, so:
|
200
|
+
#
|
201
|
+
# hyp.visualize('representation')
|
202
|
+
#
|
203
|
+
# returns <tt>rep-re-sen-ta-tion</tt>, at least for English patterns. If
|
204
|
+
# the word has been visualised previously, it will be returned from a
|
205
|
+
# per-instance cache.
|
209
206
|
def visualise(word)
|
210
207
|
return @vcache[word] if @vcache.has_key?(word)
|
211
208
|
w = word.dup
|
@@ -214,7 +211,6 @@ class Text::Hyphen
|
|
214
211
|
end
|
215
212
|
@vcache[word] = w
|
216
213
|
end
|
217
|
-
|
218
214
|
alias visualize visualise
|
219
215
|
|
220
216
|
def clear_cache!
|
@@ -222,8 +218,8 @@ class Text::Hyphen
|
|
222
218
|
@vcache.clear
|
223
219
|
end
|
224
220
|
|
225
|
-
|
226
|
-
|
221
|
+
# This function will hyphenate a word so that the first point is at most
|
222
|
+
# +size+ characters.
|
227
223
|
def hyphenate_to(word, size)
|
228
224
|
point = hyphenate(word).delete_if { |e| e >= size }.max
|
229
225
|
if point.nil?
|
@@ -233,7 +229,7 @@ class Text::Hyphen
|
|
233
229
|
end
|
234
230
|
end
|
235
231
|
|
236
|
-
|
232
|
+
# Returns statistics
|
237
233
|
def stats
|
238
234
|
_b = @language.both.size
|
239
235
|
_s = @language.start.size
|
@@ -255,8 +251,7 @@ EOS
|
|
255
251
|
s % [ @iso_language, _T, _s, _e, _b, _h, _x ]
|
256
252
|
end
|
257
253
|
|
258
|
-
|
259
|
-
def updateresult(hash, str, pos) #:nodoc:
|
254
|
+
def updateresult(hash, str, pos)
|
260
255
|
if hash.has_key?(str)
|
261
256
|
STDERR.print "#{pos}: #{str}: #{hash[str]}" if DEBUG
|
262
257
|
hash[str].split('').each_with_index do |c, i|
|
@@ -266,24 +261,30 @@ private
|
|
266
261
|
STDERR.puts ": #{@result}" if DEBUG
|
267
262
|
end
|
268
263
|
end
|
264
|
+
private :updateresult
|
269
265
|
|
270
|
-
def make_result_list(res)
|
266
|
+
def make_result_list(res)
|
271
267
|
r = []
|
272
268
|
res.each_with_index { |c, i| r << i * (c.to_i % 2) }
|
273
269
|
r.reject { |i| i.to_i == 0 }
|
274
270
|
end
|
271
|
+
private :make_result_list
|
275
272
|
|
276
273
|
def load_language
|
277
274
|
return if @first_load
|
278
275
|
|
279
276
|
@iso_language ||= "en_us"
|
280
277
|
|
281
|
-
|
278
|
+
unless @language
|
279
|
+
require "text/hyphen/language/#{@iso_language}"
|
280
|
+
@language = Text::Hyphen::Language.const_get(@iso_language.upcase)
|
281
|
+
@iso_language = @language.isocode if @language.isocode
|
282
|
+
end
|
282
283
|
|
283
|
-
@
|
284
|
-
@
|
285
|
-
@right ||= @language.right
|
284
|
+
@left ||= @language.left
|
285
|
+
@right ||= @language.right
|
286
286
|
|
287
287
|
@iso_language
|
288
288
|
end
|
289
|
+
private :load_language
|
289
290
|
end
|