tex-hyphen 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/Changelog ADDED
@@ -0,0 +1,19 @@
1
+ == TeX::Hyphen 0.4.0
2
+ * This is the final release of TeX::Hyphen for Ruby. The next version will be
3
+ called Text::Hyphen and will provide significant enhancement to the
4
+ capabilities of hyphenation as well as an API change.
5
+ * Added bin/hyphen -- a program to demonstrate hyphenation.
6
+
7
+ == TeX::Hyphen 0.3.1
8
+ * Created a gemspec. Removed some unnecessary scaffolding code.
9
+
10
+ == TeX::Hyphen 0.3
11
+ * Added caching capabilities to both #hyphenate and #visualise.
12
+
13
+ == TeX::Hyphen 0.2
14
+ * Added TeX::Hyphen#hyphenate_to(word, size). This is in preparation for a
15
+ change to Text::Format for hyphenation of words. Created Test::Unit unit
16
+ tests.
17
+
18
+ == TeX::Hyphen 0.1
19
+ * Initial Ruby version by Martin DeMello.
data/INSTALL ADDED
@@ -0,0 +1,6 @@
1
+ Installing this package is as simple as:
2
+
3
+ % ruby install.rb
4
+
5
+ Alternatively, you can use the RubyGem version of TeX::Hyphen available as
6
+ tex-hyphen-0.4.0.gem from the usual sources.
data/README ADDED
@@ -0,0 +1,14 @@
1
+ TeX::Hyphen 0.4.0
2
+ Copyright � 2003 - 2004 Martin DeMello and Austin Ziegler
3
+
4
+ Hyphenates a word according to a TeX pattern file (defaults to Donald E.
5
+ Knuth's hyphen.tex, included in this distribution). This is a straightforward
6
+ port of the perl implementation by Jan Pazdziora
7
+ [http://search.cpan.org/author/JANPAZ/TeX-Hyphen-0.140/lib/TeX/Hyphen.pm].
8
+ Because hyphenation can potentially be an expensive proposition, TeX::Hyphen
9
+ will now return from a cache.
10
+
11
+ TeX::Hyphen is licensed under the same terms as Ruby or under the GPL version
12
+ 2, or later.
13
+
14
+ This is the last version of TeX::Hyphen.
data/Rakefile ADDED
@@ -0,0 +1,116 @@
1
+ #! /usr/bin/env rake
2
+ $LOAD_PATH.unshift('lib')
3
+
4
+ require 'rubygems'
5
+ require 'rake/gempackagetask'
6
+ require 'tex/hyphen'
7
+ require 'archive/tar/minitar'
8
+ require 'zlib'
9
+
10
+ DISTDIR = "tex-hyphen-#{TeX::Hyphen::VERSION}"
11
+ TARDIST = "../#{DISTDIR}.tar.gz"
12
+
13
+ DATE_RE = %r<(\d{4})[./-]?(\d{2})[./-]?(\d{2})(?:[\sT]?(\d{2})[:.]?(\d{2})[:.]?(\d{2})?)?>
14
+
15
+ if ENV['RELEASE_DATE']
16
+ year, month, day, hour, minute, second = DATE_RE.match(ENV['RELEASE_DATE']).captures
17
+ year ||= 0
18
+ month ||= 0
19
+ day ||= 0
20
+ hour ||= 0
21
+ minute ||= 0
22
+ second ||= 0
23
+ ReleaseDate = Time.mktime(year, month, day, hour, minute, second)
24
+ else
25
+ ReleaseDate = nil
26
+ end
27
+
28
+ task :test do |t|
29
+ require 'test/unit/testsuite'
30
+ require 'test/unit/ui/console/testrunner'
31
+
32
+ runner = Test::Unit::UI::Console::TestRunner
33
+
34
+ $LOAD_PATH.unshift('tests')
35
+ $stderr.puts "Checking for test cases:" if t.verbose
36
+ Dir['tests/tc_*.rb'].each do |testcase|
37
+ $stderr.puts "\t#{testcase}" if t.verbose
38
+ load testcase
39
+ end
40
+
41
+ suite = Test::Unit::TestSuite.new("TeX::Hyphen")
42
+
43
+ ObjectSpace.each_object(Class) do |testcase|
44
+ suite << testcase.suite if testcase < Test::Unit::TestCase
45
+ end
46
+
47
+ runner.run(suite)
48
+ end
49
+
50
+ spec = eval(File.read("tex-hyphen.gemspec"))
51
+ spec.version = TeX::Hyphen::VERSION
52
+ desc "Build the RubyGem for TeX::Hyphen"
53
+ task :gem => [ :test ]
54
+ Rake::GemPackageTask.new(spec) do |g|
55
+ g.need_tar = false
56
+ g.need_zip = false
57
+ g.package_dir = ".."
58
+ end
59
+
60
+ desc "Build a TeX::Hyphen .tar.gz distribution."
61
+ task :tar => [ TARDIST ]
62
+ file TARDIST => [ :test ] do |t|
63
+ current = File.basename(Dir.pwd)
64
+ Dir.chdir("..") do
65
+ begin
66
+ files = Dir["#{current}/**/*"].select { |dd| dd !~ %r{(?:/CVS/?|~$)} }
67
+ files.map! do |dd|
68
+ ddnew = dd.gsub(/^#{current}/, DISTDIR)
69
+ mtime = ReleaseDate || File.stat(dd).mtime
70
+ if File.directory?(dd)
71
+ { :name => ddnew, :mode => 0755, :dir => true, :mtime => mtime }
72
+ else
73
+ if dd =~ %r{bin/}
74
+ mode = 0755
75
+ else
76
+ mode = 0644
77
+ end
78
+ data = File.read(dd)
79
+ { :name => ddnew, :mode => mode, :data => data, :size => data.size,
80
+ :mtime => mtime }
81
+ end
82
+ end
83
+
84
+ ff = File.open(t.name.gsub(%r{^\.\./}o, ''), "wb")
85
+ gz = Zlib::GzipWriter.new(ff)
86
+ tw = Archive::Tar::Minitar::Writer.new(gz)
87
+
88
+ files.each do |entry|
89
+ if entry[:dir]
90
+ tw.mkdir(entry[:name], entry)
91
+ else
92
+ tw.add_file_simple(entry[:name], entry) { |os| os.write(entry[:data]) }
93
+ end
94
+ end
95
+ ensure
96
+ tw.close if tw
97
+ gz.close if gz
98
+ end
99
+ end
100
+ end
101
+ task TARDIST => [ :test ]
102
+
103
+ def sign(file)
104
+ system %("C:/Program Files/Windows Privacy Tools/GnuPG/gpg.exe" -ba #{file}).gsub(%r{/}) { "\\" }
105
+ raise "Error signing with GPG." unless File.exists?("#{file}.asc")
106
+ end
107
+
108
+ task :signtar => [ :tar ] do
109
+ sign TARDIST
110
+ end
111
+ task :signgem => [ :gem ] do
112
+ sign "../#{DISTDIR}.gem"
113
+ end
114
+
115
+ desc "Build everything."
116
+ task :default => [ :signtar, :signgem ]
data/bin/hyphen ADDED
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env ruby
2
+ # Text::Hyphen
3
+ # Copyright 2003 - 2004, Martin DeMello and Austin Ziegler
4
+ #
5
+ # Licensed under the same terms as Ruby.
6
+ #
7
+ # $Id: hyphen,v 1.1 2004/12/10 03:56:54 austin Exp $
8
+ #++
9
+
10
+ require 'optparse'
11
+ require 'ostruct'
12
+
13
+ begin
14
+ require 'tex/hyphen'
15
+ rescue LoadError
16
+ require 'rubygems'
17
+ require 'tex/hyphen'
18
+ end
19
+
20
+ options = OpenStruct.new
21
+ options.action = :visualise
22
+ ARGV.options do |opt|
23
+ opt.banner = "Usage: #{File.basename($0)} [options] [mode] word+"
24
+ opt.separator ""
25
+ opt.separator "Modes"
26
+ opt.on('-V', '--visualise', 'Visualises the hyphenation of the word.', 'Default action.') { |mode|
27
+ options.action = :visualise
28
+ }
29
+ opt.on('-P', '--points', 'Shows the letters on which a word will', 'be hyphenated.') { |mode|
30
+ options.action = :hyphenate
31
+ }
32
+ opt.on('-H', '--hyphenate-to SIZE', Numeric, 'Hyphenates the word so that the first', 'point is at least SIZE letters.') { |size|
33
+ options.action = :hyphenate_to
34
+ options.size = size
35
+ }
36
+
37
+ opt.separator ""
38
+ opt.separator "Options"
39
+ opt.on('-L', '--left SIZE', 'Sets the minimum number of letters on', 'the left side of the word.') { |left|
40
+ options.left = left
41
+ }
42
+ opt.on('-R', '--right SIZE', 'Sets the minimum number of letters on', 'the right side of the word.') { |right|
43
+ options.right = right
44
+ }
45
+ opt.on('--pattern-file PATTERNS', 'Loads the specified TeX pattern file.') { |file|
46
+ options.file = file
47
+ }
48
+
49
+ opt.separator ""
50
+ opt.on_tail('-h', '--help', 'Shows this help') {
51
+ $stderr.puts opt
52
+ exit 0
53
+ }
54
+ opt.parse!
55
+ end
56
+
57
+ if ARGV.empty?
58
+ $stderr.puts ARGV.options
59
+ exit 0
60
+ end
61
+
62
+ hyphenator = TeX::Hyphen.new do
63
+ @min_left = options.left if options.left
64
+ @min_right = options.right if options.right
65
+ @style = 'czech'
66
+ @file = options.file if options.file
67
+ end
68
+
69
+ case options.action
70
+ when :visualise
71
+ size = 80
72
+ ARGV.each do |word|
73
+ vis = hyphenator.visualise(word)
74
+ if (size - vis.size - 1) < 0
75
+ puts
76
+ size = 80
77
+ end
78
+ size -= (vis.size + 1)
79
+ print "#{vis} "
80
+ end
81
+ when :hyphenate
82
+ ARGV.each do |word|
83
+ hyp = hyphenator.hyphenate(word)
84
+ print "#{word}: "
85
+ hyp.each { |pt| print "#{word[pt, 1]} " }
86
+ puts
87
+ end
88
+ when :hyphenate_to
89
+ size = 80
90
+ ARGV.each do |word|
91
+ vis = hyphenator.visualise_to(word, options.size)
92
+ if (size - vis.size - 1) < 0
93
+ puts
94
+ size = 80
95
+ end
96
+ size -= (vis.size + 1)
97
+ print "#{vis} "
98
+ end
99
+ end
@@ -0,0 +1,116 @@
1
+ module TeX #:nodoc:
2
+ class Hyphen #:nodoc:
3
+ # = TeX::Hyphen::Czech
4
+ #
5
+ # Provides parsing routine for Czech patterns
6
+ #
7
+ # == Synopsis
8
+ # require 'tex/hyphen'
9
+ # hyp = TeX::Hyphen.new
10
+ # # Default hyphenation style is 'czech'
11
+ #
12
+ # == Description
13
+ # This pattern processing happens to be the default. If you need to
14
+ # write you own style of parsing the pattern file, you might want to
15
+ # start with this file and hack it to suit your needs. There is nothing
16
+ # for end users here -- just specify the style parameter in call to new
17
+ # TeX::Hyphen.
18
+ #
19
+ # The language style specific modules have to define the following
20
+ # functions:
21
+ #
22
+ # * process_patterns
23
+ # * process_hyphenation
24
+ #
25
+ # Check the 'tex/hyphen/czech.rb' source to see the exact form of
26
+ # the values inserted into these has structures.
27
+ #
28
+ # Each style module should also define DEFAULT_STYLE_MIN_LEFT and
29
+ # DEFAULT_STYLE_MIN_RIGHT global variables, if they have different
30
+ # values than the default 2. The values should match the paratemers used
31
+ # to generate the patterns. Since various pattern files could be
32
+ # generated with different values set, this is just a default that can
33
+ # be changed with parameters to the TeX::Hyphen constructor.
34
+ #
35
+ # Note: the encoding for this is ISO-8859-1 or ISO-8859-15.
36
+ module Czech
37
+ private
38
+ BACKV = { 'c' => '�', 'd' => '�', 'e' => '�', 'l' => '�', 'n' => '�',
39
+ 'r' => '�', 's' => '�', 't' => '�', 'z' => '�', 'C' => '�',
40
+ 'D' => '�', 'E' => '�', 'L' => '�', 'N' => '�', 'R' => '�',
41
+ 'S' => '�', 'T' => '�', 'Z' => '�' }
42
+ BACKAP = { 'a' => '�', 'e' => '�', 'i' => '�', 'l' => '�', 'o' => '�',
43
+ 'u' => '�', 'y' => '�', 'A' => '�', 'E' => '�', 'I' => '�',
44
+ 'L' => '�', 'O' => '�', 'U' => '�', 'Y' => '�' }
45
+
46
+ STYLE_VERSION = '0.121'
47
+
48
+ DEFAULT_STYLE_MIN_LEFT = 2
49
+ DEFAULT_STYLE_MIN_RIGHT = 2
50
+
51
+ def cstolower(e)
52
+ e.tr('A-Z��������ť�����ة�����ݬ�', 'a-z������������������������')
53
+ end
54
+
55
+ # This method gets individual lines of the \patterns content. It
56
+ # should parse these lines, and fill values in @both_hyphen,
57
+ # @begin_hyphen, @end_hyphen and @hyphen, members of the class. The
58
+ # function should return false if end of the pattern section (macro)
59
+ # was reached, 1 if the parsing should continue.
60
+ def process_patterns(line)
61
+ return false if (line =~ /\}/)
62
+
63
+ line.split(/\s+/).each do |w|
64
+ next if w.empty?
65
+
66
+ start = stop = false
67
+
68
+ start = true if w.sub!(/^\./, '')
69
+ stop = true if w.sub!(/\.$/, '')
70
+
71
+ w.gsub!(/\\v\s+(.)/) { BACKV[$1] } # Process the \v tag
72
+ w.gsub!(/\\'(.)/) { BACKAP[$1] } # Process the \' tag
73
+ w.gsub!(/\^\^(..)/) { $1.hex.to_s } # convert things like ^^fc
74
+
75
+ w.gsub!(/(\D)(?=\D)/) { "#{$1}0" } # insert zeroes
76
+ w.gsub!(/^(?=\D)/, '0') # and start with some digit
77
+
78
+ #tag = cstolower(w.gsub(/\d/, '')) # get the lowercase string...
79
+ tag = w.gsub(/\d/, '')
80
+ value = w.gsub(/\D/, '') # and numbers apart
81
+ tag = cstolower(tag)
82
+ # The Perl maintainers say: (if we knew locales were fine
83
+ # everywhere, we could use them)
84
+
85
+ if (start and stop)
86
+ @both_hyphen[tag] = value
87
+ elsif (start)
88
+ @begin_hyphen[tag] = value
89
+ elsif (stop)
90
+ @end_hyphen[tag] = value
91
+ else
92
+ @hyphen[tag] = value
93
+ end
94
+ end
95
+ true
96
+ end
97
+
98
+ # This method gets the lines of the \hyphenation content. It should
99
+ # parse these lines and fill values into exception which is passed as
100
+ # second parameter upon call. The function should return 0 if end of
101
+ # the exception section (macro) was reached, 1 if the parsing should
102
+ # continue.
103
+ def process_hyphenation(line)
104
+ return false if line =~ /\}/
105
+
106
+ l = line.gsub(/\\v\s+(.)/) { BACKV[$1] }
107
+ l.gsub!(/\\'(.)/) { BACKAP{$1} }
108
+ tag = cstolower(l.gsub(/-/, ''))
109
+ value = "0" + l.gsub(/[^-](?=[^-])/, '0').gsub(/[^-]-/, '1')
110
+ value.gsub!(/[^01]/, '0')
111
+ @exception[tag] = value
112
+ true
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,93 @@
1
+ module TeX #:nodoc:
2
+ class Hyphen #:nodoc:
3
+ # = TeX::Hyphen::German
4
+ #
5
+ # Provides parsing routines for German patterns.
6
+ #
7
+ # == Synopsis
8
+ # require 'tex/hyphen'
9
+ # hyp = TeX::Hyphen.new(:style => 'german')
10
+ #
11
+ # See Tex::Hyphen::Czech for more information.
12
+ module German
13
+ BACKV = { 'c' => '�', 'd' => '�', 'e' => '�', 'l' => '�', 'n' => '�',
14
+ 'r' => '�', 's' => '�', 't' => '�', 'z' => '�', 'C' => '�',
15
+ 'D' => '�', 'E' => '�', 'L' => '�', 'N' => '�', 'R' => '�',
16
+ 'S' => '�', 'T' => '�', 'Z' => '�' }
17
+ BACKAP = { 'a' => '�', 'e' => '�', 'i' => '�', 'l' => '�', 'o' => '�',
18
+ 'u' => '�', 'y' => '�', 'A' => '�', 'E' => '�', 'I' => '�',
19
+ 'L' => '�', 'O' => '�', 'U' => '�', 'Y' => '�' }
20
+
21
+ GERMAN = { 'a' => "�", 'o' => "�", 'u' => "�", '3' => "�", 'A' => "�",
22
+ 'O' => "�", 'U' => "�" }
23
+
24
+ STYLE_VERSION = '0.121'
25
+
26
+ DEFAULT_STYLE_MIN_LEFT = 2
27
+ DEFAULT_STYLE_MIN_RIGHT = 2
28
+
29
+ private
30
+ def cstolower(e)
31
+ e.tr('A-Z��������ť�����ة�����ݬ�', 'a-z������������������������')
32
+ end
33
+
34
+ # This method gets individual lines of the \patterns content. It
35
+ # should parse these lines, and fill values in @both_hyphen,
36
+ # @begin_hyphen, @end_hyphen and @hyphen, members of the class. The
37
+ # function should return false if end of the pattern section (macro)
38
+ # was reached, 1 if the parsing should continue.
39
+ def process_patterns(line)
40
+ return false if line =~ /\endgroup/ || line =~ /\}/
41
+
42
+ line.split(/\s+/).each do |w|
43
+ next if w.empty?
44
+
45
+ start = stop = false
46
+
47
+ start = true if w.sub!(/^\./, '')
48
+ stop = true if w.sub!(/\.$/, '')
49
+
50
+ w.gsub!(/\\n\{([^\}]+)\}/) { $1 }
51
+ w.gsub!(/\"(aouAOU3)/) { GERMAN[$1] } # Process \" German tags
52
+ w.gsub!(/\\v\s+(.)/) { BACKV[$1] } # Process the \v tag
53
+ w.gsub!(/\\'(.)/) { BACKAP[$1] } # Process the \' tag
54
+ w.gsub!(/\^\^(..))/) { $1.hex.to_s } # convert things like ^^fc
55
+ w.gsub!(/(\D)(?=\D)/) { "#{$1}0" } # insert zeroes
56
+ w.gsub!(/^(?=\D)/, '0') # and start with some digit
57
+ tag = cstolower(w.gsub(/\d/, '')) # get the lowercase string...
58
+ value = w.gsub(/\D/, '') # and numbers apart
59
+ # The Perl maintainers say: (if we knew locales were fine
60
+ # everywhere, we could use them)
61
+
62
+ if (start and stop)
63
+ @both_hyphen[tag] = value
64
+ elsif (start)
65
+ @begin_hyphen[tag] = value
66
+ elsif (stop)
67
+ @end_hyphen[tag] = value
68
+ else
69
+ @hyphen[tag] = value
70
+ end
71
+ end
72
+ true
73
+ end
74
+
75
+ # This method gets the lines of the \hyphenation content. It should
76
+ # parse these lines and fill values into exception which is passed as
77
+ # second parameter upon call. The function should return 0 if end of
78
+ # the exception section (macro) was reached, 1 if the parsing should
79
+ # continue.
80
+ def process_hyphenation(line, exception)
81
+ return false if line =~ /\}/
82
+
83
+ l = line.gsub(/\\v\s+(.)/) { BACKV[$1] }
84
+ l.gsub!(/\\'(.)/) { BACKAP{$1} }
85
+ tag = cstolower(l.gsub(/-/, ''))
86
+ value = "0" + l.gsub(/[^-](?=[^-]/, '0')).gsub(/[^-]-/, '1')
87
+ value.gsub!(/[^01]/, '0')
88
+ @exception[tag] = value
89
+ true
90
+ end
91
+ end
92
+ end
93
+ end