tex-hyphen 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Changelog ADDED
@@ -0,0 +1,19 @@
1
+ == TeX::Hyphen 0.4.0
2
+ * This is the final release of TeX::Hyphen for Ruby. The next version will be
3
+ called Text::Hyphen and will provide significant enhancement to the
4
+ capabilities of hyphenation as well as an API change.
5
+ * Added bin/hyphen -- a program to demonstrate hyphenation.
6
+
7
+ == TeX::Hyphen 0.3.1
8
+ * Created a gemspec. Removed some unnecessary scaffolding code.
9
+
10
+ == TeX::Hyphen 0.3
11
+ * Added caching capabilities to both #hyphenate and #visualise.
12
+
13
+ == TeX::Hyphen 0.2
14
+ * Added TeX::Hyphen#hyphenate_to(word, size). This is in preparation for a
15
+ change to Text::Format for hyphenation of words. Created Test::Unit unit
16
+ tests.
17
+
18
+ == TeX::Hyphen 0.1
19
+ * Initial Ruby version by Martin DeMello.
data/INSTALL ADDED
@@ -0,0 +1,6 @@
1
+ Installing this package is as simple as:
2
+
3
+ % ruby install.rb
4
+
5
+ Alternatively, you can use the RubyGem version of TeX::Hyphen available as
6
+ tex-hyphen-0.4.0.gem from the usual sources.
data/README ADDED
@@ -0,0 +1,14 @@
1
+ TeX::Hyphen 0.4.0
2
+ Copyright � 2003 - 2004 Martin DeMello and Austin Ziegler
3
+
4
+ Hyphenates a word according to a TeX pattern file (defaults to Donald E.
5
+ Knuth's hyphen.tex, included in this distribution). This is a straightforward
6
+ port of the perl implementation by Jan Pazdziora
7
+ [http://search.cpan.org/author/JANPAZ/TeX-Hyphen-0.140/lib/TeX/Hyphen.pm].
8
+ Because hyphenation can potentially be an expensive proposition, TeX::Hyphen
9
+ will now return from a cache.
10
+
11
+ TeX::Hyphen is licensed under the same terms as Ruby or under the GPL version
12
+ 2, or later.
13
+
14
+ This is the last version of TeX::Hyphen.
data/Rakefile ADDED
@@ -0,0 +1,116 @@
1
+ #! /usr/bin/env rake
2
+ $LOAD_PATH.unshift('lib')
3
+
4
+ require 'rubygems'
5
+ require 'rake/gempackagetask'
6
+ require 'tex/hyphen'
7
+ require 'archive/tar/minitar'
8
+ require 'zlib'
9
+
10
+ DISTDIR = "tex-hyphen-#{TeX::Hyphen::VERSION}"
11
+ TARDIST = "../#{DISTDIR}.tar.gz"
12
+
13
+ DATE_RE = %r<(\d{4})[./-]?(\d{2})[./-]?(\d{2})(?:[\sT]?(\d{2})[:.]?(\d{2})[:.]?(\d{2})?)?>
14
+
15
+ if ENV['RELEASE_DATE']
16
+ year, month, day, hour, minute, second = DATE_RE.match(ENV['RELEASE_DATE']).captures
17
+ year ||= 0
18
+ month ||= 0
19
+ day ||= 0
20
+ hour ||= 0
21
+ minute ||= 0
22
+ second ||= 0
23
+ ReleaseDate = Time.mktime(year, month, day, hour, minute, second)
24
+ else
25
+ ReleaseDate = nil
26
+ end
27
+
28
+ task :test do |t|
29
+ require 'test/unit/testsuite'
30
+ require 'test/unit/ui/console/testrunner'
31
+
32
+ runner = Test::Unit::UI::Console::TestRunner
33
+
34
+ $LOAD_PATH.unshift('tests')
35
+ $stderr.puts "Checking for test cases:" if t.verbose
36
+ Dir['tests/tc_*.rb'].each do |testcase|
37
+ $stderr.puts "\t#{testcase}" if t.verbose
38
+ load testcase
39
+ end
40
+
41
+ suite = Test::Unit::TestSuite.new("TeX::Hyphen")
42
+
43
+ ObjectSpace.each_object(Class) do |testcase|
44
+ suite << testcase.suite if testcase < Test::Unit::TestCase
45
+ end
46
+
47
+ runner.run(suite)
48
+ end
49
+
50
+ spec = eval(File.read("tex-hyphen.gemspec"))
51
+ spec.version = TeX::Hyphen::VERSION
52
+ desc "Build the RubyGem for TeX::Hyphen"
53
+ task :gem => [ :test ]
54
+ Rake::GemPackageTask.new(spec) do |g|
55
+ g.need_tar = false
56
+ g.need_zip = false
57
+ g.package_dir = ".."
58
+ end
59
+
60
+ desc "Build a TeX::Hyphen .tar.gz distribution."
61
+ task :tar => [ TARDIST ]
62
+ file TARDIST => [ :test ] do |t|
63
+ current = File.basename(Dir.pwd)
64
+ Dir.chdir("..") do
65
+ begin
66
+ files = Dir["#{current}/**/*"].select { |dd| dd !~ %r{(?:/CVS/?|~$)} }
67
+ files.map! do |dd|
68
+ ddnew = dd.gsub(/^#{current}/, DISTDIR)
69
+ mtime = ReleaseDate || File.stat(dd).mtime
70
+ if File.directory?(dd)
71
+ { :name => ddnew, :mode => 0755, :dir => true, :mtime => mtime }
72
+ else
73
+ if dd =~ %r{bin/}
74
+ mode = 0755
75
+ else
76
+ mode = 0644
77
+ end
78
+ data = File.read(dd)
79
+ { :name => ddnew, :mode => mode, :data => data, :size => data.size,
80
+ :mtime => mtime }
81
+ end
82
+ end
83
+
84
+ ff = File.open(t.name.gsub(%r{^\.\./}o, ''), "wb")
85
+ gz = Zlib::GzipWriter.new(ff)
86
+ tw = Archive::Tar::Minitar::Writer.new(gz)
87
+
88
+ files.each do |entry|
89
+ if entry[:dir]
90
+ tw.mkdir(entry[:name], entry)
91
+ else
92
+ tw.add_file_simple(entry[:name], entry) { |os| os.write(entry[:data]) }
93
+ end
94
+ end
95
+ ensure
96
+ tw.close if tw
97
+ gz.close if gz
98
+ end
99
+ end
100
+ end
101
+ task TARDIST => [ :test ]
102
+
103
+ def sign(file)
104
+ system %("C:/Program Files/Windows Privacy Tools/GnuPG/gpg.exe" -ba #{file}).gsub(%r{/}) { "\\" }
105
+ raise "Error signing with GPG." unless File.exists?("#{file}.asc")
106
+ end
107
+
108
+ task :signtar => [ :tar ] do
109
+ sign TARDIST
110
+ end
111
+ task :signgem => [ :gem ] do
112
+ sign "../#{DISTDIR}.gem"
113
+ end
114
+
115
+ desc "Build everything."
116
+ task :default => [ :signtar, :signgem ]
data/bin/hyphen ADDED
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env ruby
2
+ # Text::Hyphen
3
+ # Copyright 2003 - 2004, Martin DeMello and Austin Ziegler
4
+ #
5
+ # Licensed under the same terms as Ruby.
6
+ #
7
+ # $Id: hyphen,v 1.1 2004/12/10 03:56:54 austin Exp $
8
+ #++
9
+
10
+ require 'optparse'
11
+ require 'ostruct'
12
+
13
+ begin
14
+ require 'tex/hyphen'
15
+ rescue LoadError
16
+ require 'rubygems'
17
+ require 'tex/hyphen'
18
+ end
19
+
20
+ options = OpenStruct.new
21
+ options.action = :visualise
22
+ ARGV.options do |opt|
23
+ opt.banner = "Usage: #{File.basename($0)} [options] [mode] word+"
24
+ opt.separator ""
25
+ opt.separator "Modes"
26
+ opt.on('-V', '--visualise', 'Visualises the hyphenation of the word.', 'Default action.') { |mode|
27
+ options.action = :visualise
28
+ }
29
+ opt.on('-P', '--points', 'Shows the letters on which a word will', 'be hyphenated.') { |mode|
30
+ options.action = :hyphenate
31
+ }
32
+ opt.on('-H', '--hyphenate-to SIZE', Numeric, 'Hyphenates the word so that the first', 'point is at least SIZE letters.') { |size|
33
+ options.action = :hyphenate_to
34
+ options.size = size
35
+ }
36
+
37
+ opt.separator ""
38
+ opt.separator "Options"
39
+ opt.on('-L', '--left SIZE', 'Sets the minimum number of letters on', 'the left side of the word.') { |left|
40
+ options.left = left
41
+ }
42
+ opt.on('-R', '--right SIZE', 'Sets the minimum number of letters on', 'the right side of the word.') { |right|
43
+ options.right = right
44
+ }
45
+ opt.on('--pattern-file PATTERNS', 'Loads the specified TeX pattern file.') { |file|
46
+ options.file = file
47
+ }
48
+
49
+ opt.separator ""
50
+ opt.on_tail('-h', '--help', 'Shows this help') {
51
+ $stderr.puts opt
52
+ exit 0
53
+ }
54
+ opt.parse!
55
+ end
56
+
57
+ if ARGV.empty?
58
+ $stderr.puts ARGV.options
59
+ exit 0
60
+ end
61
+
62
+ hyphenator = TeX::Hyphen.new do
63
+ @min_left = options.left if options.left
64
+ @min_right = options.right if options.right
65
+ @style = 'czech'
66
+ @file = options.file if options.file
67
+ end
68
+
69
+ case options.action
70
+ when :visualise
71
+ size = 80
72
+ ARGV.each do |word|
73
+ vis = hyphenator.visualise(word)
74
+ if (size - vis.size - 1) < 0
75
+ puts
76
+ size = 80
77
+ end
78
+ size -= (vis.size + 1)
79
+ print "#{vis} "
80
+ end
81
+ when :hyphenate
82
+ ARGV.each do |word|
83
+ hyp = hyphenator.hyphenate(word)
84
+ print "#{word}: "
85
+ hyp.each { |pt| print "#{word[pt, 1]} " }
86
+ puts
87
+ end
88
+ when :hyphenate_to
89
+ size = 80
90
+ ARGV.each do |word|
91
+ vis = hyphenator.visualise_to(word, options.size)
92
+ if (size - vis.size - 1) < 0
93
+ puts
94
+ size = 80
95
+ end
96
+ size -= (vis.size + 1)
97
+ print "#{vis} "
98
+ end
99
+ end
@@ -0,0 +1,116 @@
1
+ module TeX #:nodoc:
2
+ class Hyphen #:nodoc:
3
+ # = TeX::Hyphen::Czech
4
+ #
5
+ # Provides parsing routine for Czech patterns
6
+ #
7
+ # == Synopsis
8
+ # require 'tex/hyphen'
9
+ # hyp = TeX::Hyphen.new
10
+ # # Default hyphenation style is 'czech'
11
+ #
12
+ # == Description
13
+ # This pattern processing happens to be the default. If you need to
14
+ # write you own style of parsing the pattern file, you might want to
15
+ # start with this file and hack it to suit your needs. There is nothing
16
+ # for end users here -- just specify the style parameter in call to new
17
+ # TeX::Hyphen.
18
+ #
19
+ # The language style specific modules have to define the following
20
+ # functions:
21
+ #
22
+ # * process_patterns
23
+ # * process_hyphenation
24
+ #
25
+ # Check the 'tex/hyphen/czech.rb' source to see the exact form of
26
+ # the values inserted into these has structures.
27
+ #
28
+ # Each style module should also define DEFAULT_STYLE_MIN_LEFT and
29
+ # DEFAULT_STYLE_MIN_RIGHT global variables, if they have different
30
+ # values than the default 2. The values should match the paratemers used
31
+ # to generate the patterns. Since various pattern files could be
32
+ # generated with different values set, this is just a default that can
33
+ # be changed with parameters to the TeX::Hyphen constructor.
34
+ #
35
+ # Note: the encoding for this is ISO-8859-1 or ISO-8859-15.
36
+ module Czech
37
+ private
38
+ BACKV = { 'c' => '�', 'd' => '�', 'e' => '�', 'l' => '�', 'n' => '�',
39
+ 'r' => '�', 's' => '�', 't' => '�', 'z' => '�', 'C' => '�',
40
+ 'D' => '�', 'E' => '�', 'L' => '�', 'N' => '�', 'R' => '�',
41
+ 'S' => '�', 'T' => '�', 'Z' => '�' }
42
+ BACKAP = { 'a' => '�', 'e' => '�', 'i' => '�', 'l' => '�', 'o' => '�',
43
+ 'u' => '�', 'y' => '�', 'A' => '�', 'E' => '�', 'I' => '�',
44
+ 'L' => '�', 'O' => '�', 'U' => '�', 'Y' => '�' }
45
+
46
+ STYLE_VERSION = '0.121'
47
+
48
+ DEFAULT_STYLE_MIN_LEFT = 2
49
+ DEFAULT_STYLE_MIN_RIGHT = 2
50
+
51
+ def cstolower(e)
52
+ e.tr('A-Z��������ť�����ة�����ݬ�', 'a-z������������������������')
53
+ end
54
+
55
+ # This method gets individual lines of the \patterns content. It
56
+ # should parse these lines, and fill values in @both_hyphen,
57
+ # @begin_hyphen, @end_hyphen and @hyphen, members of the class. The
58
+ # function should return false if end of the pattern section (macro)
59
+ # was reached, 1 if the parsing should continue.
60
+ def process_patterns(line)
61
+ return false if (line =~ /\}/)
62
+
63
+ line.split(/\s+/).each do |w|
64
+ next if w.empty?
65
+
66
+ start = stop = false
67
+
68
+ start = true if w.sub!(/^\./, '')
69
+ stop = true if w.sub!(/\.$/, '')
70
+
71
+ w.gsub!(/\\v\s+(.)/) { BACKV[$1] } # Process the \v tag
72
+ w.gsub!(/\\'(.)/) { BACKAP[$1] } # Process the \' tag
73
+ w.gsub!(/\^\^(..)/) { $1.hex.to_s } # convert things like ^^fc
74
+
75
+ w.gsub!(/(\D)(?=\D)/) { "#{$1}0" } # insert zeroes
76
+ w.gsub!(/^(?=\D)/, '0') # and start with some digit
77
+
78
+ #tag = cstolower(w.gsub(/\d/, '')) # get the lowercase string...
79
+ tag = w.gsub(/\d/, '')
80
+ value = w.gsub(/\D/, '') # and numbers apart
81
+ tag = cstolower(tag)
82
+ # The Perl maintainers say: (if we knew locales were fine
83
+ # everywhere, we could use them)
84
+
85
+ if (start and stop)
86
+ @both_hyphen[tag] = value
87
+ elsif (start)
88
+ @begin_hyphen[tag] = value
89
+ elsif (stop)
90
+ @end_hyphen[tag] = value
91
+ else
92
+ @hyphen[tag] = value
93
+ end
94
+ end
95
+ true
96
+ end
97
+
98
+ # This method gets the lines of the \hyphenation content. It should
99
+ # parse these lines and fill values into exception which is passed as
100
+ # second parameter upon call. The function should return 0 if end of
101
+ # the exception section (macro) was reached, 1 if the parsing should
102
+ # continue.
103
+ def process_hyphenation(line)
104
+ return false if line =~ /\}/
105
+
106
+ l = line.gsub(/\\v\s+(.)/) { BACKV[$1] }
107
+ l.gsub!(/\\'(.)/) { BACKAP{$1} }
108
+ tag = cstolower(l.gsub(/-/, ''))
109
+ value = "0" + l.gsub(/[^-](?=[^-])/, '0').gsub(/[^-]-/, '1')
110
+ value.gsub!(/[^01]/, '0')
111
+ @exception[tag] = value
112
+ true
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,93 @@
1
+ module TeX #:nodoc:
2
+ class Hyphen #:nodoc:
3
+ # = TeX::Hyphen::German
4
+ #
5
+ # Provides parsing routines for German patterns.
6
+ #
7
+ # == Synopsis
8
+ # require 'tex/hyphen'
9
+ # hyp = TeX::Hyphen.new(:style => 'german')
10
+ #
11
+ # See Tex::Hyphen::Czech for more information.
12
+ module German
13
+ BACKV = { 'c' => '�', 'd' => '�', 'e' => '�', 'l' => '�', 'n' => '�',
14
+ 'r' => '�', 's' => '�', 't' => '�', 'z' => '�', 'C' => '�',
15
+ 'D' => '�', 'E' => '�', 'L' => '�', 'N' => '�', 'R' => '�',
16
+ 'S' => '�', 'T' => '�', 'Z' => '�' }
17
+ BACKAP = { 'a' => '�', 'e' => '�', 'i' => '�', 'l' => '�', 'o' => '�',
18
+ 'u' => '�', 'y' => '�', 'A' => '�', 'E' => '�', 'I' => '�',
19
+ 'L' => '�', 'O' => '�', 'U' => '�', 'Y' => '�' }
20
+
21
+ GERMAN = { 'a' => "�", 'o' => "�", 'u' => "�", '3' => "�", 'A' => "�",
22
+ 'O' => "�", 'U' => "�" }
23
+
24
+ STYLE_VERSION = '0.121'
25
+
26
+ DEFAULT_STYLE_MIN_LEFT = 2
27
+ DEFAULT_STYLE_MIN_RIGHT = 2
28
+
29
+ private
30
+ def cstolower(e)
31
+ e.tr('A-Z��������ť�����ة�����ݬ�', 'a-z������������������������')
32
+ end
33
+
34
+ # This method gets individual lines of the \patterns content. It
35
+ # should parse these lines, and fill values in @both_hyphen,
36
+ # @begin_hyphen, @end_hyphen and @hyphen, members of the class. The
37
+ # function should return false if end of the pattern section (macro)
38
+ # was reached, 1 if the parsing should continue.
39
+ def process_patterns(line)
40
+ return false if line =~ /\endgroup/ || line =~ /\}/
41
+
42
+ line.split(/\s+/).each do |w|
43
+ next if w.empty?
44
+
45
+ start = stop = false
46
+
47
+ start = true if w.sub!(/^\./, '')
48
+ stop = true if w.sub!(/\.$/, '')
49
+
50
+ w.gsub!(/\\n\{([^\}]+)\}/) { $1 }
51
+ w.gsub!(/\"(aouAOU3)/) { GERMAN[$1] } # Process \" German tags
52
+ w.gsub!(/\\v\s+(.)/) { BACKV[$1] } # Process the \v tag
53
+ w.gsub!(/\\'(.)/) { BACKAP[$1] } # Process the \' tag
54
+ w.gsub!(/\^\^(..))/) { $1.hex.to_s } # convert things like ^^fc
55
+ w.gsub!(/(\D)(?=\D)/) { "#{$1}0" } # insert zeroes
56
+ w.gsub!(/^(?=\D)/, '0') # and start with some digit
57
+ tag = cstolower(w.gsub(/\d/, '')) # get the lowercase string...
58
+ value = w.gsub(/\D/, '') # and numbers apart
59
+ # The Perl maintainers say: (if we knew locales were fine
60
+ # everywhere, we could use them)
61
+
62
+ if (start and stop)
63
+ @both_hyphen[tag] = value
64
+ elsif (start)
65
+ @begin_hyphen[tag] = value
66
+ elsif (stop)
67
+ @end_hyphen[tag] = value
68
+ else
69
+ @hyphen[tag] = value
70
+ end
71
+ end
72
+ true
73
+ end
74
+
75
+ # This method gets the lines of the \hyphenation content. It should
76
+ # parse these lines and fill values into exception which is passed as
77
+ # second parameter upon call. The function should return 0 if end of
78
+ # the exception section (macro) was reached, 1 if the parsing should
79
+ # continue.
80
+ def process_hyphenation(line, exception)
81
+ return false if line =~ /\}/
82
+
83
+ l = line.gsub(/\\v\s+(.)/) { BACKV[$1] }
84
+ l.gsub!(/\\'(.)/) { BACKAP{$1} }
85
+ tag = cstolower(l.gsub(/-/, ''))
86
+ value = "0" + l.gsub(/[^-](?=[^-]/, '0')).gsub(/[^-]-/, '1')
87
+ value.gsub!(/[^01]/, '0')
88
+ @exception[tag] = value
89
+ true
90
+ end
91
+ end
92
+ end
93
+ end