text-reform 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ == Text::Reform 0.2.0
2
+ * Initial release. Ported from the Perl by Kaspar Schiess with assistance from
3
+ Austin Ziegler.
4
+
5
+ $Id: Changelog,v 1.2 2005/01/18 11:21:01 eule Exp $
data/Install ADDED
@@ -0,0 +1,4 @@
1
+ Make sure that you have Phil Thomson's install-package on your system.
2
+
3
+ Simply run:
4
+ % ruby install.rb
data/README ADDED
@@ -0,0 +1,36 @@
1
+ Text::Reform README
2
+ ===================
3
+
4
+ Text::Reform class is a rewrite from the Perl module with the same name by
5
+ Damian Conway (damian@conway.org). Much of this documentation has been copied
6
+ from the original documentation and adapted to the Ruby version.
7
+
8
+ The interface is subject to change, since it will undergo major Rubyfication;
9
+ additionally, some features may have not been ported yet.
10
+
11
+ Synopsis
12
+ --------
13
+ require 'text/reform'
14
+ f = Text::Reform.new
15
+
16
+ puts f.format(template, data)
17
+
18
+ Author
19
+ ------
20
+ Kaspar Schiess (eule@space.ch).
21
+
22
+ Ported from the original Perl library and documentation by Damian Conway
23
+ (damian@conway.org).
24
+
25
+ Bugs
26
+ ----
27
+ There are undoubtedly serious bugs lurking somewhere in code this funky :-) Bug
28
+ reports and other feedback are most welcome.
29
+
30
+ Copyright
31
+ ---------
32
+ Copyright (c) 2005, Kaspar Schiess. All Rights Reserved. This module is free
33
+ software. It may be used, redistributed and/or modified under the terms of the
34
+ Ruby License (see http://www.ruby-lang.org/en/LICENSE.txt)
35
+
36
+ $Id: README,v 1.1.1.1 2005/01/18 11:15:40 eule Exp $
@@ -0,0 +1,116 @@
1
+ #! /usr/bin/env rake
2
+ $LOAD_PATH.unshift('lib')
3
+
4
+ require 'rubygems'
5
+ require 'rake/gempackagetask'
6
+ require 'text/reform'
7
+ require 'archive/tar/minitar'
8
+ require 'zlib'
9
+
10
+ DISTDIR = "text-reform-#{Text::Reform::VERSION}"
11
+ TARDIST = "../#{DISTDIR}.tar.gz"
12
+
13
+ DATE_RE = %r<(\d{4})[./-]?(\d{2})[./-]?(\d{2})(?:[\sT]?(\d{2})[:.]?(\d{2})[:.]?(\d{2})?)?>
14
+
15
+ if ENV['RELEASE_DATE']
16
+ year, month, day, hour, minute, second = DATE_RE.match(ENV['RELEASE_DATE']).captures
17
+ year ||= 0
18
+ month ||= 0
19
+ day ||= 0
20
+ hour ||= 0
21
+ minute ||= 0
22
+ second ||= 0
23
+ ReleaseDate = Time.mktime(year, month, day, hour, minute, second)
24
+ else
25
+ ReleaseDate = nil
26
+ end
27
+
28
+ task :test do |t|
29
+ require 'test/unit/testsuite'
30
+ require 'test/unit/ui/console/testrunner'
31
+
32
+ runner = Test::Unit::UI::Console::TestRunner
33
+
34
+ $LOAD_PATH.unshift('tests')
35
+ $stderr.puts "Checking for test cases:" if t.verbose
36
+ Dir['tests/tc_*.rb'].each do |testcase|
37
+ $stderr.puts "\t#{testcase}" if t.verbose
38
+ load testcase
39
+ end
40
+
41
+ suite = Test::Unit::TestSuite.new("Text::Reform")
42
+
43
+ ObjectSpace.each_object(Class) do |testcase|
44
+ suite << testcase.suite if testcase < Test::Unit::TestCase
45
+ end
46
+
47
+ runner.run(suite)
48
+ end
49
+
50
+ spec = eval(File.read("text-reform.gemspec"))
51
+ spec.version = Text::Reform::VERSION
52
+ desc "Build the RubyGem for Text::Reform"
53
+ task :gem => [ :test ]
54
+ Rake::GemPackageTask.new(spec) do |g|
55
+ g.need_tar = false
56
+ g.need_zip = false
57
+ g.package_dir = ".."
58
+ end
59
+
60
+ desc "Build a Text::Reform .tar.gz distribution."
61
+ task :tar => [ TARDIST ]
62
+ file TARDIST => [ :test ] do |t|
63
+ current = File.basename(Dir.pwd)
64
+ Dir.chdir("..") do
65
+ begin
66
+ files = Dir["#{current}/**/*"].select { |dd| dd !~ %r{(?:/CVS/?|~$)} }
67
+ files.map! do |dd|
68
+ ddnew = dd.gsub(/^#{current}/, DISTDIR)
69
+ mtime = ReleaseDate || File.stat(dd).mtime
70
+ if File.directory?(dd)
71
+ { :name => ddnew, :mode => 0755, :dir => true, :mtime => mtime }
72
+ else
73
+ if dd =~ %r{bin/}
74
+ mode = 0755
75
+ else
76
+ mode = 0644
77
+ end
78
+ data = File.read(dd)
79
+ { :name => ddnew, :mode => mode, :data => data, :size => data.size,
80
+ :mtime => mtime }
81
+ end
82
+ end
83
+
84
+ ff = File.open(t.name.gsub(%r{^\.\./}o, ''), "wb")
85
+ gz = Zlib::GzipWriter.new(ff)
86
+ tw = Archive::Tar::Minitar::Writer.new(gz)
87
+
88
+ files.each do |entry|
89
+ if entry[:dir]
90
+ tw.mkdir(entry[:name], entry)
91
+ else
92
+ tw.add_file_simple(entry[:name], entry) { |os| os.write(entry[:data]) }
93
+ end
94
+ end
95
+ ensure
96
+ tw.close if tw
97
+ gz.close if gz
98
+ end
99
+ end
100
+ end
101
+ task TARDIST => [ :test ]
102
+
103
+ def sign(file)
104
+ system %("C:/Program Files/Windows Privacy Tools/GnuPG/gpg.exe" -ba #{file}).gsub(%r{/}) { "\\" }
105
+ raise "Error signing with GPG." unless File.exists?("#{file}.asc")
106
+ end
107
+
108
+ task :signtar => [ :tar ] do
109
+ sign TARDIST
110
+ end
111
+ task :signgem => [ :gem ] do
112
+ sign "../#{DISTDIR}.gem"
113
+ end
114
+
115
+ desc "Build everything."
116
+ task :default => [ :signtar, :signgem ]
data/TODO ADDED
@@ -0,0 +1,5 @@
1
+
2
+ - For page header and footer, if you mix :center, :left and :right
3
+ in the same hash, only one of them will get used.
4
+
5
+ It would be nice if those were combined.
@@ -0,0 +1,1541 @@
1
+ # :title: Text::Reform
2
+ # :main: Text::Reform
3
+ #--
4
+ # Text::Reform for Ruby
5
+ # Version 0.2.0
6
+ #
7
+ # Copyright (c) 2004 by Kaspar Schiess
8
+ #
9
+ # $Id: reform.rb,v 1.1.1.1 2005/01/18 11:15:51 eule Exp $
10
+ #++
11
+
12
+ require 'scanf'
13
+ unless defined?(Text)
14
+ module Text; end
15
+ end
16
+
17
+ # = Introduction
18
+ #
19
+ # Text::Reform class is a rewrite from the perl module with the same name
20
+ # by Damian Conway (damian@conway.org). Much of this documentation has
21
+ # been copied from the original documentation and adapted to the Ruby
22
+ # version.
23
+ #
24
+ # The interface is subject to change, since it will undergo major
25
+ # Rubyfication.
26
+ #
27
+ # = Synopsis
28
+ # require 'text/reform'
29
+ # f = Text::Reform.new
30
+ #
31
+ # puts f.format(template, data)
32
+ #
33
+ # = Description
34
+ # == The Reform#format method
35
+ #
36
+ # Reform#format takes a series of format (or "picture") strings followed
37
+ # by replacement values, interpolates those values into each picture
38
+ # string, and returns the result.
39
+ #
40
+ # A picture string consists of sequences of the following characters:
41
+ # [<] Left-justified field indicator. A series of two or
42
+ # more sequential +<+'s specify a left-justified
43
+ # field to be filled by a subsequent value. A single
44
+ # +<+ is formatted as the literal character '<'.
45
+ # [>] Right-justified field indicator. A series of two
46
+ # or more sequential >'s specify a right-justified
47
+ # field to be filled by a subsequent value. A single
48
+ # < is formatted as the literal character '<'.
49
+ # [<<>>] Fully-justified field indicator. Field may be of
50
+ # any width, and brackets need not balance, but
51
+ # there must be at least 2 '<' and 2 '>'.
52
+ # [^] Centre-justified field indicator. A series of two
53
+ # or more sequential ^'s specify a centred field to
54
+ # be filled by a subsequent value. A single ^ is
55
+ # formatted as the literal character '<'.
56
+ # [>>.<<<<] A numerically formatted field with the specified
57
+ # number of digits to either side of the decimal
58
+ # place. See _Numerical formatting_ below.
59
+ # [[] Left-justified block field indicator. Just like a
60
+ # < field, except it repeats as required on
61
+ # subsequent lines. See below. A single [ is
62
+ # formatted as the literal character '['.
63
+ # []] Right-justified block field indicator. Just like a
64
+ # > field, except it repeats as required on
65
+ # subsequent lines. See below. A single ] is
66
+ # formatted as the literal character ']'.
67
+ # [[[]]] Fully-justified block field indicator. Just like a
68
+ # <<<>>> field, except it repeats as required on
69
+ # subsequent lines. See below. Field may be of any
70
+ # width, and brackets need not balance, but there
71
+ # must be at least 2 '[' and 2 ']'.
72
+ # [|] Centre-justified block field indicator. Just like
73
+ # a ^ field, except it repeats as required on
74
+ # subsequent lines. See below. A single | is
75
+ # formatted as the literal character '|'.
76
+ # []]].[[[[] A numerically formatted block field with the
77
+ # specified number of digits to either side of the
78
+ # decimal place. Just like a +>>>.<<<<+ field,
79
+ # except it repeats as required on subsequent lines.
80
+ # See below.
81
+ # [~] A one-character wide block field.
82
+ # [\] Literal escape of next character (e.g. +\+ is
83
+ # formatted as '~', not a one character wide block
84
+ # field).
85
+ # [Any other character] That literal character.
86
+ #
87
+ # Any substitution value which is +nil+ (either explicitly so, or because
88
+ # it is missing) is replaced by an empty string.
89
+ #
90
+ # == Controlling Reform instance options
91
+ # There are several ways to influence options set in the Reform instance:
92
+ #
93
+ # 1. At creation:
94
+ # # using a hash
95
+ # r1 = Text::Reform.new(:squeeze => true)
96
+ #
97
+ # # using a block
98
+ # r2 = Text::Reform.new do |rf|
99
+ # rf.squeeze = true
100
+ # rf.fill = true
101
+ # end
102
+ #
103
+ # 2. Using accessors:
104
+ # r = Text::Reform.new
105
+ # r.squeeze = true
106
+ # r.fill = true
107
+ #
108
+ # The Perl way of interleaving option changes with picture strings and
109
+ # data is currently *NOT* supported.
110
+ #
111
+ # == Controlling line filling
112
+ # #squeeze replaces sequences of spaces or tabs to be replaced with a
113
+ # single space; #fill removes newlines from the input. To minimize all
114
+ # whitespace, you need to specify both options. Hence:
115
+ #
116
+ # format = "EG> [[[[[[[[[[[[[[[[[[[[["
117
+ # data = "h e\t l lo\nworld\t\t\t\t\t"
118
+ # r = Text::Reform.new
119
+ # r.squeeze = false # default, implied
120
+ # r.fill = false # default, implied
121
+ # puts r.format(format, data)
122
+ # # all whitespace preserved:
123
+ # #
124
+ # # EG> h e l lo
125
+ # # EG> world
126
+ #
127
+ # r.squeeze = true
128
+ # r.fill = false # default, implied
129
+ # puts r.format(format, data)
130
+ # # only newlines preserved
131
+ # #
132
+ # # EG> h e l lo
133
+ # # EG> world
134
+ #
135
+ # r.squeeze = false # default, implied
136
+ # r.fill = true
137
+ # puts r.format(format, data)
138
+ # # only spaces/tabs preserved:
139
+ # #
140
+ # # EG> h e l lo world
141
+ #
142
+ # r.fill = true
143
+ # r.squeeze = true
144
+ # puts r.format(format, data)
145
+ # # no whitespace preserved:
146
+ # #
147
+ # # EG> h e l lo world
148
+ #
149
+ # Whether or not filling or squeezing is in effect, #format can also be
150
+ # directed to trim any extra whitespace from the end of each line it
151
+ # formats, using the #trim option. If this option is specified with a
152
+ # +true+ value, every line returned by #format will automatically have the
153
+ # substitution +.gsub!(/[ \t]+/, '')+ applied to it.
154
+ #
155
+ # r.format("[[[[[[[[[[[", 'short').length # => 11
156
+ # r.trim = true
157
+ # r.format("[[[[[[[[[[[", 'short').length # => 6
158
+ #
159
+ # It is also possible to control the character used to fill lines that are
160
+ # too short, using the #filler option. If this option is specified the
161
+ # value of the #filler flag is used as the fill string, rather than the
162
+ # default +" "+.
163
+ #
164
+ # For example:
165
+ # r.filler = '*'
166
+ # print r.format("Pay bearer: ^^^^^^^^^^^^^^^^^^^^", '$123.4')
167
+ # prints:
168
+ # Pay bearer: *******$123.4*******
169
+ #
170
+ # If the filler string is longer than one character, it is truncated to
171
+ # the appropriate length. So:
172
+ # r.filler = '-->'
173
+ # print r.format("Pay bearer: ^^^^^^^^^^^^^^^^^^^^", '$123.4')
174
+ # print r.format("Pay bearer: ^^^^^^^^^^^^^^^^^^^^", '$13.4')
175
+ # print r.format("Pay bearer: ^^^^^^^^^^^^^^^^^^^^", '$1.4')
176
+ # prints:
177
+ # Pay bearer: -->-->-$123.4-->-->-
178
+ # Pay bearer: -->-->--$13.4-->-->-
179
+ # Pay bearer: -->-->--$1.4-->-->--
180
+ #
181
+ # If the value of the #filler option is a hash, then its +:left+ and
182
+ # +:right+ entries specify separate filler strings for each side of an
183
+ # interpolated value.
184
+ #
185
+ # == Options
186
+ # The Perl variant supports option switching during processing of the
187
+ # arguments of a single call to #format. This has been removed while
188
+ # porting to Ruby, since I believe that this does not add to clarity
189
+ # of code. So you have to change options explicitly.
190
+ #
191
+ # == Data argument types and handling
192
+ # The +data+ part of the call to format can be either in String form, the
193
+ # items being newline separated, or in Array form. The array form can
194
+ # contain any kind of type you want, as long as it supports #to_s.
195
+ #
196
+ # So all of the following examples return the same result:
197
+ # # String form
198
+ # r.format("]]]].[[", "1234\n123")
199
+ # # Array form
200
+ # r.format("]]]].[[", [ 1234, 123 ])
201
+ # # Array with another type
202
+ # r.format("]]]].[[", [ 1234.0, 123.0 ])
203
+ #
204
+ # == Multi-line format specifiers and interleaving
205
+ # By default, if a format specifier contains two or more lines (i.e. one
206
+ # or more newline characters), the entire format specifier is repeatedly
207
+ # filled as a unit, until all block fields have consumed their
208
+ # corresponding arguments. For example, to build a simple look-up table:
209
+ # values = (1..12).to_a
210
+ # squares = values.map { |el| sprintf "%.6g", el**2 }
211
+ # roots = values.map { |el| sprintf "%.6g", Math.sqrt(el) }
212
+ # logs = values.map { |el| sprintf "%.6g", Math.log(el) }
213
+ # inverses = values.map { |el| sprintf "%.6g", 1/el }
214
+ #
215
+ # puts reform.format(
216
+ # " N N**2 sqrt(N) log(N) 1/N",
217
+ # "=====================================================",
218
+ # "| [[ | [[[ | [[[[[[[[[[ | [[[[[[[[[ | [[[[[[[[[ |" +
219
+ # "-----------------------------------------------------",
220
+ # values, squares, roots, logs, inverses
221
+ # )
222
+ #
223
+ # The multiline format specifier:
224
+ # "| [[ | [[[ | [[[[[[[[[[ | [[[[[[[[[ | [[[[[[[[[ |" +
225
+ # "-----------------------------------------------------"
226
+ #
227
+ # is treated as a single logical line. So #format alternately fills the
228
+ # first physical line (interpolating one value from each of the arrays)
229
+ # and the second physical line (which puts a line of dashes between each
230
+ # row of the table) producing:
231
+ # N N**2 sqrt(N) log(N) 1/N
232
+ # =====================================================
233
+ # | 1 | 1 | 1 | 0 | 1 |
234
+ # -----------------------------------------------------
235
+ # | 2 | 4 | 1.41421 | 0.693147 | 0.5 |
236
+ # -----------------------------------------------------
237
+ # | 3 | 9 | 1.73205 | 1.09861 | 0.333333 |
238
+ # -----------------------------------------------------
239
+ # | 4 | 16 | 2 | 1.38629 | 0.25 |
240
+ # -----------------------------------------------------
241
+ # | 5 | 25 | 2.23607 | 1.60944 | 0.2 |
242
+ # -----------------------------------------------------
243
+ # | 6 | 36 | 2.44949 | 1.79176 | 0.166667 |
244
+ # -----------------------------------------------------
245
+ # | 7 | 49 | 2.64575 | 1.94591 | 0.142857 |
246
+ # -----------------------------------------------------
247
+ # | 8 | 64 | 2.82843 | 2.07944 | 0.125 |
248
+ # -----------------------------------------------------
249
+ # | 9 | 81 | 3 | 2.19722 | 0.111111 |
250
+ # -----------------------------------------------------
251
+ # | 10 | 100 | 3.16228 | 2.30259 | 0.1 |
252
+ # -----------------------------------------------------
253
+ # | 11 | 121 | 3.31662 | 2.3979 | 0.0909091 |
254
+ # -----------------------------------------------------
255
+ # | 12 | 144 | 3.4641 | 2.48491 | 0.0833333 |
256
+ # -----------------------------------------------------
257
+ #
258
+ # This implies that formats and the variables from which they're filled
259
+ # need to be interleaved. That is, a multi-line specification like this:
260
+ # puts r.format(
261
+ # "Passed: ##
262
+ # [[[[[[[[[[[[[[[ # single format specification
263
+ # Failed: # (needs two sets of data)
264
+ # [[[[[[[[[[[[[[[", ##
265
+ # passes, fails) ## data for previous format
266
+ # would print:
267
+ # Passed:
268
+ # <pass 1>
269
+ # Failed:
270
+ # <fail 1>
271
+ # Passed:
272
+ # <pass 2>
273
+ # Failed:
274
+ # <fail 2>
275
+ # Passed:
276
+ # <pass 3>
277
+ # Failed:
278
+ # <fail 3>
279
+ #
280
+ # because the four-line format specifier is treated as a single unit, to
281
+ # be repeatedly filled until all the data in +passes+ and +fails+ has been
282
+ # consumed.
283
+ #
284
+ # Unlike the table example, where this unit filling correctly put a line
285
+ # of dashes between lines of data, in this case the alternation of passes
286
+ # and fails is probably /not/ the desired effect.
287
+ #
288
+ # Judging by the labels, it is far more likely that the user wanted:
289
+ # Passed:
290
+ # <pass 1>
291
+ # <pass 2>
292
+ # <pass 3>
293
+ # Failed:
294
+ # <fail 4>
295
+ # <fail 5>
296
+ # <fail 6>
297
+ #
298
+ # To achieve that, either explicitly interleave the formats and their data
299
+ # sources:
300
+ # puts r.format(
301
+ # "Passed:", ## single format (no data required)
302
+ # " [[[[[[[[[[[[[[[", ## single format (needs one set of data)
303
+ # passes, ## data for previous format
304
+ # "Failed:", ## single format (no data required)
305
+ # " [[[[[[[[[[[[[[[", ## single format (needs one set of data)
306
+ # fails) ## data for previous format
307
+ # or instruct #format to do it for you automagically, by setting the
308
+ # 'interleave' flag +true+:
309
+ #
310
+ # r.interleave = true
311
+ # puts r.format(
312
+ # "Passed: ##
313
+ # [[[[[[[[[[[[[[[ # single format
314
+ # Failed: # (needs two sets of data)
315
+ # [[[[[[[[[[[[[[[", ##
316
+ # ## data to be automagically interleaved
317
+ # passes, fails) # as necessary between lines of previous
318
+ # ## format
319
+ #
320
+ # == How #format hyphenates
321
+ # Any line with a block field repeats on subsequent lines until all block
322
+ # fields on that line have consumed all their data. Non-block fields on
323
+ # these lines are replaced by the appropriate number of spaces.
324
+ #
325
+ # Words are wrapped whole, unless they will not fit into the field at all,
326
+ # in which case they are broken and (by default) hyphenated. Simple
327
+ # hyphenation is used (i.e. break at the +N-1+th character and insert a
328
+ # '-'), unless a suitable alternative subroutine is specified instead.
329
+ #
330
+ # Words will not be broken if the break would leave less than 2 characters
331
+ # on the current line. This minimum can be varied by setting the
332
+ # +min_break+ option to a numeric value indicating the minumum total broken
333
+ # characters (including hyphens) required on the current line. Note that,
334
+ # for very narrow fields, words will still be broken (but
335
+ # __unhyphenated__). For example:
336
+ #
337
+ # puts r.format('~', 'split')
338
+ #
339
+ # would print:
340
+ #
341
+ # s
342
+ # p
343
+ # l
344
+ # i
345
+ # t
346
+ #
347
+ # whilst:
348
+ #
349
+ # r.min_break= 1
350
+ # puts r.format('~', 'split')
351
+ #
352
+ # would print:
353
+ #
354
+ # s-
355
+ # p-
356
+ # l-
357
+ # i-
358
+ # t
359
+ #
360
+ # Alternative breaking strategies can be specified using the "break"
361
+ # option in a configuration hash. For example:
362
+ #
363
+ # r.break = MyBreaker.new
364
+ # r.format(fmt, data)
365
+ #
366
+ # #format expects a user-defined line-breaking strategy to listen to the
367
+ # method #break that takes three arguments (the string to be broken, the
368
+ # maximum permissible length of the initial section, and the total width
369
+ # of the field being filled). #break must return a list of two strings:
370
+ # the initial (broken) section of the word, and the remainder of the
371
+ # string respectivly).
372
+ #
373
+ # For example:
374
+ # class MyBreaker
375
+ # def break(str, initial, total)
376
+ # [ str[0, initial-1].'~'], str[initial-1..-1] ]
377
+ # end
378
+ # end
379
+ #
380
+ # r.break = MyBreaker.new
381
+ #
382
+ # makes '~' the hyphenation character, whilst:
383
+ # class WrapAndSlop
384
+ # def break(str, initial, total)
385
+ # if (initial == total)
386
+ # str =~ /\A(\s*\S*)(.*)/
387
+ # [ $1, $2 ]
388
+ # else
389
+ # [ '', str ]
390
+ # end
391
+ # end
392
+ # end
393
+ #
394
+ # r.break = WrapAndSlop.new
395
+ #
396
+ # wraps excessively long words to the next line and "slops" them over the
397
+ # right margin if necessary.
398
+ #
399
+ # The Text::Reform class provides three functions to simplify the use of
400
+ # variant hyphenation schemes. Text::Reform::break_wrap returns an
401
+ # instance implementing the "wrap-and-slop" algorithm shown in the last
402
+ # example, which could therefore be rewritten:
403
+ #
404
+ # r.break = Text::Reform.break_wrap
405
+ #
406
+ # Text::Reform::break_with takes a single string argument and returns an
407
+ # instance of a class which hyphenates by cutting off the text at the
408
+ # right margin and appending the string argument. Hence the first of the
409
+ # two examples could be rewritten:
410
+ #
411
+ # r.break = Text::Reform.break_with('~')
412
+ #
413
+ # The method Text::Reform::break_at takes a single string argument and
414
+ # returns a reference to a sub which hyphenates by breaking immediately
415
+ # after that string. For example:
416
+ #
417
+ # r.break = Text::Reform.break_at('-')
418
+ # r.format("[[[[[[[[[[[[[[", "The Newton-Raphson methodology")
419
+ #
420
+ # returns:
421
+ # "The Newton-
422
+ # Raphson
423
+ # methodology"
424
+ #
425
+ # Note that this differs from the behaviour of Text::Reform::break_with,
426
+ # which would be:
427
+ #
428
+ # r.break = Text::Reform.break_width('-')
429
+ # r.format("[[[[[[[[[[[[[[", "The Newton-Raphson methodology")
430
+ #
431
+ # returns:
432
+ # "The Newton-R-
433
+ # aphson metho-
434
+ # dology"
435
+ #
436
+ # Choosing the correct breaking strategy depends on your kind of data.
437
+ #
438
+ # The method Text::Reform::break_hyphen returns an instance of a class
439
+ # which hyphenates using a Ruby hyphenator. The hyphenator must be
440
+ # provided to the method. At the time of release, there are two
441
+ # implementations of hyphenators available: TeX::Hyphen by Martin DeMello
442
+ # and Austin Ziegler (a Ruby port of Jan Pazdziora's TeX::Hyphen module);
443
+ # and Text::Hyphen by Austin Ziegler (a significant recoding of
444
+ # TeX::Hyphen to better support non-English languages).
445
+ #
446
+ # For example:
447
+ # r.break = Text::Reform.break_hyphen
448
+ #
449
+ # Note that in the previous example the calls to .break_at, .break_wrap
450
+ # and .break_hyphen produce instances of the corresponding strategy class.
451
+ #
452
+ # == The algorithm #format uses is:
453
+ #
454
+ # 1. If interleaving is specified, split the first string in the
455
+ # argument list into individual format lines and add a
456
+ # terminating newline (unless one is already present).
457
+ # therwise, treat the entire string as a single "line" (like
458
+ # /s does in regexes)
459
+ #
460
+ # 2. For each format line...
461
+ #
462
+ # 1. determine the number of fields and shift
463
+ # that many values off the argument list and
464
+ # into the filling list. If insufficient
465
+ # arguments are available, generate as many
466
+ # empty strings as are required.
467
+ #
468
+ # 2. generate a text line by filling each field
469
+ # in the format line with the initial contents
470
+ # of the corresponding arg in the filling list
471
+ # (and remove those initial contents from the arg).
472
+ #
473
+ # 3. replace any <,>, or ^ fields by an equivalent
474
+ # number of spaces. Splice out the corresponding
475
+ # args from the filling list.
476
+ #
477
+ # 4. Repeat from step 2.2 until all args in the
478
+ # filling list are empty.
479
+ #
480
+ # 3. concatenate the text lines generated in step 2
481
+ #
482
+ # Note that in difference to the Perl version of Text::Reform,
483
+ # this version does not currently loop over several format strings
484
+ # in one function call.
485
+ #
486
+ #
487
+ # == Reform#format examples
488
+ #
489
+ # As an example of the use of #format, the following:
490
+ #
491
+ # count = 1
492
+ # text = "A big long piece of text to be formatted exquisitely"
493
+ # output = ''
494
+ # output << r.format(" |||| <<<<<<<<<< ", count, text)
495
+ # output << r.format(" ---------------- ",
496
+ # " ^^^^ ]]]]]]]]]]| ", count+11, text)
497
+ #
498
+ # results in +output+:
499
+ # 1 A big lon-
500
+ # ----------------
501
+ # 12 g piece|
502
+ # of text|
503
+ # to be for-|
504
+ # matted ex-|
505
+ # quisitely|
506
+ #
507
+ # Note that block fields in a multi-line format string,
508
+ # cause the entire multi-line format to be repeated as
509
+ # often as necessary.
510
+ #
511
+ # Unlike traditional Perl #format arguments, picture strings and
512
+ # arguments cannot be interleaved in Ruby version. This is partly
513
+ # by intention to see if the feature is a feature or if it
514
+ # can be disposed with. Another example:
515
+ #
516
+ # report = ''
517
+ # report << r.format(
518
+ # 'Name Rank Serial Number',
519
+ # '==== ==== =============',
520
+ # '<<<<<<<<<<<<< ^^^^ <<<<<<<<<<<<<',
521
+ # name, rank, serial_number
522
+ # )
523
+ #
524
+ # results in:
525
+ #
526
+ # Name Rank Serial Number
527
+ # ==== ==== =============
528
+ # John Doe high 314159
529
+ #
530
+ # == Numerical formatting
531
+ #
532
+ # The ">>>.<<<" and "]]].[[[" field specifiers may be used to format
533
+ # numeric values about a fixed decimal place marker. For example:
534
+ #
535
+ # puts r.format('(]]]]].[[)', %w{
536
+ # 1
537
+ # 1.0
538
+ # 1.001
539
+ # 1.009
540
+ # 123.456
541
+ # 1234567
542
+ # one two
543
+ # })
544
+ #
545
+ # would print:
546
+ #
547
+ # ( 1.0)
548
+ # ( 1.0)
549
+ # ( 1.00)
550
+ # ( 1.01)
551
+ # ( 123.46)
552
+ # (#####.##)
553
+ # (?????.??)
554
+ # (?????.??)
555
+ #
556
+ # Fractions are rounded to the specified number of places after the
557
+ # decimal, but only significant digits are shown. That's why, in the
558
+ # above example, 1 and 1.0 are formatted as "1.0", whilst 1.001 is
559
+ # formatted as "1.00".
560
+ #
561
+ # You can specify that the maximal number of decimal places always be used
562
+ # by giving the configuration option 'numeric' the value NUMBERS_ALL_PLACES.
563
+ # For example:
564
+ #
565
+ # r.numeric = Text::Reform::NUMBERS_ALL_PLACES
566
+ # puts r.format('(]]]]].[[)', <<EONUMS)
567
+ # 1
568
+ # 1.0
569
+ # EONUMS
570
+ #
571
+ # would print:
572
+ #
573
+ # ( 1.00)
574
+ # ( 1.00)
575
+ #
576
+ # Note that although decimal digits are rounded to fit the specified width, the
577
+ # integral part of a number is never modified. If there are not enough places
578
+ # before the decimal place to represent the number, the entire number is
579
+ # replaced with hashes.
580
+ #
581
+ # If a non-numeric sequence is passed as data for a numeric field, it is
582
+ # formatted as a series of question marks. This querulous behaviour can be
583
+ # changed by giving the configuration option 'numeric' a value that
584
+ # matches /\bSkipNaN\b/i in which case, any invalid numeric data is simply
585
+ # ignored. For example:
586
+ #
587
+ #
588
+ # r.numeric = Text::Reform::NUMBERS_SKIP_NAN
589
+ # puts r.format('(]]]]].[[)', %w{
590
+ # 1
591
+ # two three
592
+ # 4
593
+ # })
594
+ #
595
+ #
596
+ # would print:
597
+ #
598
+ # ( 1.0)
599
+ # ( 4.0)
600
+ #
601
+ # == Filling block fields with lists of values
602
+ #
603
+ # If an argument contains an array, then #format
604
+ # automatically joins the elements of the array into a single string, separating
605
+ # each element with a newline character. As a result, a call like this:
606
+ #
607
+ # svalues = %w{ 1 10 100 1000 }
608
+ # nvalues = [1, 10, 100, 1000]
609
+ # puts r.format(
610
+ # "(]]]].[[)",
611
+ # svalues # you could also use nvalues here.
612
+ # )
613
+ #
614
+ # will print out
615
+ #
616
+ # ( 1.00)
617
+ # ( 10.00)
618
+ # (100.00)
619
+ # (1000.00)
620
+ #
621
+ # as might be expected.
622
+ #
623
+ # Note: While String arguments are consumed during formatting process
624
+ # and will be empty at the end of formatting, array arguments are not.
625
+ # So svalues (nvalues) still contains [1,10,100,1000] after the call
626
+ # to #format.
627
+ #
628
+ # == Headers, footers, and pages
629
+ #
630
+ # The #format method can also insert headers, footers, and page-feeds
631
+ # as it formats. These features are controlled by the "header", "footer",
632
+ # "page_feed", "page_len", and "page_num" options.
633
+ #
634
+ # If the +page_num+ option is set to an Integer value, page numbering
635
+ # will start at that value.
636
+ #
637
+ # The +page_len+ option specifies the total number of lines in a page (including
638
+ # headers, footers, and page-feeds).
639
+ #
640
+ # The +page_width+ option specifies the total number of columns in a page.
641
+ #
642
+ # If the +header+ option is specified with a string value, that string is
643
+ # used as the header of every page generated. If it is specified as a block,
644
+ # that block is called at the start of every page and
645
+ # its return value used as the header string. When called, the block is
646
+ # passed the current page number.
647
+ #
648
+ # Likewise, if the +footer+ option is specified with a string value, that
649
+ # string is used as the footer of every page generated. If it is specified
650
+ # as a block, that block is called at the *start*
651
+ # of every page and its return value used as the footer string. When called,
652
+ # the footer block is passed the current page number.
653
+ #
654
+ # Both the header and footer options can also be specified as hash references.
655
+ # In this case the hash entries for keys +left+, +centre+ (or +center+), and
656
+ # +right+ specify what is to appear on the left, centre, and right of the
657
+ # header/footer. The entry for the key +width+ specifies how wide the
658
+ # footer is to be. If the +width+ key is omitted, the +page_width+ configuration
659
+ # option (which defaults to 72 characters) is used.
660
+ #
661
+ # The +:left+, +:centre+, and +:right+ values may be literal
662
+ # strings, or blocks (just as a normal header/footer specification may
663
+ # be.) See the second example, below.
664
+ #
665
+ # Another alternative for header and footer options is to specify them as a
666
+ # block that returns a hash reference. The subroutine is called for each
667
+ # page, then the resulting hash is treated like the hashes described in the
668
+ # preceding paragraph. See the third example, below.
669
+ #
670
+ # The +page_feed+ option acts in exactly the same way, to produce a
671
+ # page_feed which is appended after the footer. But note that the page_feed
672
+ # is not counted as part of the page length.
673
+ #
674
+ # All three of these page components are recomputed at the *start of each
675
+ # new page*, before the page contents are formatted (recomputing the header
676
+ # and footer first makes it possible to determine how many lines of data to
677
+ # format so as to adhere to the specified page length).
678
+ #
679
+ # When the call to #format is complete and the data has been fully formatted,
680
+ # the footer subroutine is called one last time, with an extra argument of +true+.
681
+ # The string returned by this final call is used as the final footer.
682
+ #
683
+ # So for example, a 60-line per page report, starting at page 7,
684
+ # with appropriate headers and footers might be set up like so:
685
+ #
686
+ # small = Text::Reform.new
687
+ # r.header = lambda do |page| "Page #{page}\n\n" end
688
+ # r.footer = lambda do |page, last|
689
+ # if last
690
+ # ''
691
+ # else
692
+ # ('-'*50 + "\n" + small.format('>'*50, "...#{page+1}"))
693
+ # end
694
+ # end
695
+ # r.page_feed = "\n\n"
696
+ # r.page_len = 60
697
+ # r.page_num = 7
698
+ #
699
+ # r.format(template, data)
700
+ #
701
+ # Note that you can't reuse the +r+ instance of Text::Reform inside
702
+ # the footer, it will end up calling itself recursivly until stack
703
+ # exhaustion.
704
+ #
705
+ # Alternatively, to set up headers and footers such that the running
706
+ # head is right justified in the header and the page number is centred
707
+ # in the footer:
708
+ #
709
+ # r.header = { :right => 'Running head' }
710
+ # r.footer = { :centre => lambda do |page| "page #{page}" end }
711
+ # r.page_len = 60
712
+ #
713
+ # r.format(template, data)
714
+ #
715
+ # The footer in the previous example could also have been specified the other
716
+ # way around, as a block that returns a hash (rather than a hash containing
717
+ # a block):
718
+ #
719
+ # r.header = { :right => 'Running head' }
720
+ # r.footer = lambda do |page| { :center => "page #{page}" } end
721
+ #
722
+ #
723
+ # = AUTHOR
724
+ #
725
+ # Original Perl library and documentation:
726
+ # Damian Conway (damian at conway dot org)
727
+ #
728
+ # Translating everything to Ruby (and leaving a lot of stuff out):
729
+ # Kaspar Schiess (eule at space dot ch)
730
+ #
731
+ # = BUGS
732
+ #
733
+ # There are undoubtedly serious bugs lurking somewhere in code this funky :-)
734
+ # Bug reports and other feedback are most welcome.
735
+ #
736
+ # = COPYRIGHT
737
+ #
738
+ # Copyright (c) 2005, Kaspar Schiess. All Rights Reserved.
739
+ # This module is free software. It may be used, redistributed
740
+ # and/or modified under the terms of the Ruby License
741
+ # (see http://www.ruby-lang.org/en/LICENSE.txt)
742
+ class Text::Reform
743
+ VERSION = "0.2.0"
744
+
745
+ # various regexp parts for matching patterns.
746
+ BSPECIALS = %w{ [ | ] }
747
+ LSPECIALS = %w{ < ^ > }
748
+ LJUSTIFIED = "[<]{2,} [>]{2,}"
749
+ BJUSTIFIED = "[\\[]{2,} [\\]]{2,}"
750
+ BSINGLE = "~+"
751
+ SPECIALS = [BSPECIALS, LSPECIALS].flatten.map { |spec| Regexp.escape(spec)+"{2,}" }
752
+ FIXED_FIELDPAT = [LJUSTIFIED, BJUSTIFIED, BSINGLE, SPECIALS ].flatten.join('|')
753
+
754
+ DECIMAL = '.' # TODO: Make this locale dependent
755
+ # Matches one or more > followed by . followed by one or more <
756
+ LNUMERICAL = "[>]+ (?:#{Regexp.escape(DECIMAL)}[<]{1,})"
757
+ # Matches one or more ] followed by . followed by one or more [
758
+ BNUMERICAL = "[\\]]+ (?: #{Regexp.escape(DECIMAL)} [\\[]{1,})"
759
+
760
+ FIELDPAT = [LNUMERICAL, BNUMERICAL, FIXED_FIELDPAT].join('|')
761
+
762
+ LFIELDMARK = [LNUMERICAL, LJUSTIFIED, LSPECIALS.map { |l| Regexp.escape(l) + "{2}" } ].flatten.join('|')
763
+ BFIELDMARK = [BNUMERICAL, BJUSTIFIED, BSINGLE, BSPECIALS.map { |l| Regexp.escape(l) + "{2}" } ].flatten.join('|')
764
+
765
+ FIELDMARK = [LNUMERICAL, BNUMERICAL, BSINGLE, LJUSTIFIED, BJUSTIFIED, LFIELDMARK, BFIELDMARK].flatten.join('|')
766
+
767
+ # For use with #header, #footer, and #page_feed; this will clear the
768
+ # header, footer, or page feed block result to be an empty block.
769
+ CLEAR_BLOCK = lambda { "" }
770
+
771
+ # Proc returning page header. This is called before the page actually
772
+ # gets formatted to permit calculation of page length.
773
+ #
774
+ # *Default*:: +CLEAR_BLOCK+
775
+ attr_accessor :header
776
+
777
+ # Proc returning the page footer. This gets called before the
778
+ # page gets formatted to permit calculation of page length.
779
+ #
780
+ # *Default*:: +CLEAR_BLOCK+
781
+ attr_accessor :footer
782
+
783
+ # Proc to be called for page feed text. This is also called at
784
+ # the start of each page, but does not count towards page length.
785
+ #
786
+ # *Default*:: +CLEAR_BLOCK+
787
+ attr_accessor :page_feed
788
+
789
+ # Specifies the total number of lines in a page (including headers,
790
+ # footers, and page-feeds).
791
+ #
792
+ # *Default*:: +nil+
793
+ attr_accessor :page_len
794
+
795
+ # Where to start page numbering.
796
+ #
797
+ # *Default*:: +nil+
798
+ attr_accessor :page_num
799
+
800
+ # Specifies the total number of columns in a page.
801
+ #
802
+ # *Default*:: 72
803
+ attr_accessor :page_width
804
+
805
+ # Break class instance that is used to break words in hyphenation. This
806
+ # class must have a #break method accepting the three arguments +str+,
807
+ # +initial_max_length+ and +maxLength+.
808
+ #
809
+ # You can directly call the break_* methods to produce such a class
810
+ # instance for you; Available methods are #break_width, #break_at,
811
+ # #break_wrap, #break_hyphenator.
812
+ #
813
+ # *Default*:: Text::Hyphen::break_with('-')
814
+ attr_accessor :break
815
+
816
+ # Specifies the minimal number of characters that must be left on a
817
+ # line. This prevents breaking of words below its value.
818
+ #
819
+ # *Default*:: 2
820
+ attr_accessor :min_break
821
+
822
+ # If +true+, causes any sequence of spaces and/or tabs (but not
823
+ # newlines) in an interpolated string to be replaced with a single
824
+ # space.
825
+ #
826
+ # *Default*:: +false+
827
+ attr_accessor :squeeze
828
+
829
+ # If +true+, causes newlines to be removed from the input. If you want
830
+ # to squeeze all whitespace, set #fill and #squeeze to true.
831
+ #
832
+ # *Default*:: +false+
833
+ attr_accessor :fill
834
+
835
+ # Controls character that is used to fill lines that are too short.
836
+ # If this attribute has a hash value, the symbols :left and :right
837
+ # store the filler character to use on the left and the right,
838
+ # respectivly.
839
+ #
840
+ # *Default*:: +' '+ on both sides
841
+ attr_accessor :filler
842
+ def filler=(value) #:nodoc:
843
+ if value.kind_of?(Hash)
844
+ unless value[:left] and value[:right]
845
+ raise ArgumentError, "If #filler is provided as a Hash, it must contain the keys :left and :right"
846
+ else
847
+ @filler = value
848
+ end
849
+ else
850
+ @filler = { :left => value, :right => value }
851
+ end
852
+ end
853
+
854
+ # This implies that formats and the variables from which they're filled
855
+ # need to be interleaved. That is, a multi-line specification like this:
856
+ #
857
+ # print format(
858
+ # "Passed: ##
859
+ # [[[[[[[[[[[[[[[ # single format specification
860
+ # Failed: # (needs two sets of data)
861
+ # [[[[[[[[[[[[[[[", ##
862
+ #
863
+ # fails, passes) ## two arrays, data for previous format
864
+ #
865
+ # would print:
866
+ #
867
+ # Passed:
868
+ # <pass 1>
869
+ # Failed:
870
+ # <fail 1>
871
+ # Passed:
872
+ # <pass 2>
873
+ # Failed:
874
+ # <fail 2>
875
+ # Passed:
876
+ # <pass 3>
877
+ # Failed:
878
+ # <fail 3>
879
+ #
880
+ # because the four-line format specifier is treated as a single unit, to
881
+ # be repeatedly filled until all the data in +passes+ and +fails+ has
882
+ # been consumed.
883
+ #
884
+ # *Default*:: false
885
+ attr_accessor :interleave
886
+
887
+ # Numbers are printed, leaving off unnecessary decimal places. Non-
888
+ # numeric data is printed as a series of question marks. This is the
889
+ # default for formatting numbers.
890
+ NUMBERS_NORMAL = 0
891
+ # Numbers are printed, retaining all decimal places. Non-numeric data is
892
+ # printed as a series of question marks.
893
+ #
894
+ # [[[[[.]] # format
895
+ # 1.0 -> 1.00
896
+ # 1 -> 1.00
897
+ NUMBERS_ALL_PLACES = 1
898
+ # Numbers are printed as ffor +NUMBERS_NORMAL+, but NaN ("not a number")
899
+ # values are skipped.
900
+ NUMBERS_SKIP_NAN = 2
901
+ # Numbers are printed as for +NUMBERS_ALL_PLACES+, but NaN values are
902
+ # skipped.
903
+ NUMBERS_ALL_AND_SKIP = NUMBERS_ALL_PLACES | NUMBERS_SKIP_NAN
904
+
905
+ # Specifies handling method for numerical data. Allowed values include:
906
+ # * +NUMBERS_NORMAL+
907
+ # * +NUMBERS_ALL_PLACES+
908
+ # * +NUMBERS_SKIP_NAN+
909
+ # * +NUMBERS_ALL_AND_SKIP+
910
+ #
911
+ # *Default*:: NUMBERS_NORMAL
912
+ attr_accessor :numeric
913
+
914
+ # Controls trimming of whitespace at end of lines.
915
+ #
916
+ # *Default*:: +true+
917
+ attr_accessor :trim
918
+
919
+ # Create a Text::Reform object. Accepts an optional hash of
920
+ # construction option (this will change to named parameters in Ruby
921
+ # 2.0). After the initial object is constructed (with either the
922
+ # provided or default values), the object will be yielded (as +self+) to
923
+ # an optional block for further construction and operation.
924
+
925
+ def initialize(options = {}) #:yields self:
926
+ @debug = options[:debug] || false
927
+ @header = options[:header] || CLEAR_BLOCK
928
+ @footer = options[:footer] || CLEAR_BLOCK
929
+ @page_feed = options[:page_feed] || CLEAR_BLOCK
930
+ @page_len = options[:page_len] || nil
931
+ @page_num = options[:page_num] || nil
932
+ @page_width = options[:page_width] || 72
933
+ @break = options[:break] || Text::Reform.break_with('-')
934
+ @min_break = options[:min_break] || 2
935
+ @squeeze = options[:squeeze] || false
936
+ @fill = options[:fill] || false
937
+ @filler = options[:filler] || { :left => ' ', :right => ' ' }
938
+ @interleave = options[:interleave] || false
939
+ @numeric = options[:numeric] || 0
940
+ @trim = options[:trim] || false
941
+
942
+ yield self if block_given?
943
+ end
944
+
945
+ # Format data according to +format+.
946
+ def format(*args)
947
+ @page_num ||= 1
948
+
949
+ __debug("Acquiring header and footer: ", @page_num)
950
+ header = __header(@page_num)
951
+ footer = __footer(@page_num, false)
952
+
953
+ previous_footer = footer
954
+
955
+ line_count = count_lines(header, footer)
956
+ hf_count = line_count
957
+
958
+ text = header
959
+ format_stack = []
960
+
961
+ while (args and not args.empty?) or (not format_stack.empty?)
962
+ __debug("Arguments: ", args)
963
+ __debug("Formats left: ", format_stack)
964
+
965
+ if format_stack.empty?
966
+ if @interleave
967
+ # split format in its parts and recombine line by line
968
+ format_stack += args.shift.split(%r{\n}o).collect { |fmtline| fmtline << "\n" }
969
+ else
970
+ format_stack << args.shift
971
+ end
972
+ end
973
+
974
+ format = format_stack.shift
975
+
976
+ parts = format.split(%r{( # Capture
977
+ \n | # newline... OR
978
+ (?:\\.)+ | # one or more escapes... OR
979
+ #{FIELDPAT} | # patterns
980
+ )}ox)
981
+ parts << "\n" unless parts[-1] == "\n"
982
+
983
+ # Count all fields (inject 0, increment when field) and prepare
984
+ # data.
985
+ field_count = parts.inject(0) do |count, el|
986
+ if (el =~ /#{LFIELDMARK}/ox or el =~ /#{FIELDMARK}/ox)
987
+ count + 1
988
+ else
989
+ count
990
+ end
991
+ end
992
+
993
+ if field_count.nonzero?
994
+ data = args.first(field_count).collect do |el|
995
+ if el.kind_of?(Array)
996
+ el.join("\n")
997
+ else
998
+ el.to_s
999
+ end
1000
+ end
1001
+ # shift all arguments that we have just consumed
1002
+ args = args[field_count..-1]
1003
+ # Is argument count correct ?
1004
+ data += [''] * (field_count-data.length) unless data.length == field_count
1005
+ else
1006
+ data = [[]] # one line of data, contains nothing
1007
+ end
1008
+
1009
+ first_line = true
1010
+ data_left = true
1011
+ while data_left
1012
+ idx = 0
1013
+ data_left = false
1014
+
1015
+ parts.each do |part|
1016
+ # Is part an escaped format literal ?
1017
+ if part =~ /\A (?:\\.)+/ox
1018
+ __debug("esc literal: ", part)
1019
+ text << part.gsub(/\\(.)/, "\1")
1020
+ # Is part a once field mark ?
1021
+ elsif part =~ /(#{LFIELDMARK})/ox
1022
+ if first_line
1023
+ type = __construct_type($1, LJUSTIFIED)
1024
+
1025
+ __debug("once field: ", part)
1026
+ __debug("data is: ", data[idx])
1027
+ text << replace(type, part.length, data[idx])
1028
+ __debug("data now: ", data[idx])
1029
+ else
1030
+ text << (@filler[:left] * part.length)[0, part.length]
1031
+ __debug("missing once field: ", part)
1032
+ end
1033
+ idx += 1
1034
+ # Is part a multi field mark ?
1035
+ elsif part =~ /(#{FIELDMARK})/ox and part[0, 2] != '~~'
1036
+ type = __construct_type($1, BJUSTIFIED)
1037
+
1038
+ __debug("multi field: ", part)
1039
+ __debug("data is: ", data[idx])
1040
+ text << replace(type, part.length, data[idx])
1041
+ __debug("data now: ", data[idx])
1042
+ data_left = true if data[idx].strip.length > 0
1043
+ idx += 1
1044
+ # Part is a literal.
1045
+ else
1046
+ __debug("literal: ", part)
1047
+ text << part.gsub(/\0(\0*)/, '\1') # XXX: What is this gsub for ?
1048
+
1049
+ # New line ?
1050
+ if part == "\n"
1051
+ line_count += 1
1052
+ if @page_len && line_count >= @page_len
1053
+ __debug("\tejecting page: #@page_num")
1054
+
1055
+ @page_num += 1
1056
+ page_feed = __pagefeed
1057
+ header = __header(@page_num)
1058
+
1059
+ text << footer + page_feed + header
1060
+ previous_footer = footer
1061
+
1062
+ footer = __footer(@page_num, false)
1063
+
1064
+ line_count = hf_count = (header.count("\n") + footer.count("\n"))
1065
+
1066
+ header = page_feed + header
1067
+ end
1068
+ end
1069
+ end # multiway if on part
1070
+ end # parts.each
1071
+
1072
+ __debug("Accumulated: ", text)
1073
+
1074
+ first_line = false
1075
+ end
1076
+ end # while args or formats left
1077
+
1078
+ # Adjust final page header or footer as required
1079
+ if hf_count > 0 and line_count == hf_count
1080
+ # there is a header that we don't need
1081
+ text.sub!(/#{Regexp.escape(header)}\Z/, '')
1082
+ elsif line_count > 0 and @page_len and @page_len > 0
1083
+ # missing footer:
1084
+ text << "\n" * (@page_len - line_count) + footer
1085
+ previous_footer = footer
1086
+ end
1087
+
1088
+ # Replace last footer
1089
+ if previous_footer and not previous_footer.empty?
1090
+ lastFooter = __footer(@page_num, true)
1091
+ footerDiff = lastFooter.count("\n") - previous_footer.count("\n")
1092
+
1093
+ # Enough space to squeeze the longer final footer in ?
1094
+ if footerDiff > 0 && text =~ /(#{'^[^\S\n]*\n' * footerDiff}#{Regexp.escape(previous_footer)})\Z/
1095
+ previous_footer = $1
1096
+ footerDiff = 0
1097
+ end
1098
+
1099
+ # If not, create an empty page for it.
1100
+ if footerDiff > 0
1101
+ @page_num += 1
1102
+ lastHeader = __header(@page_num)
1103
+ lastFooter = __footer(@page_num, true)
1104
+
1105
+ text << lastHeader
1106
+ text << "\n" * (@page_len - lastHeader.count("\n") - lastFooter.count("\n"))
1107
+ text << lastFooter
1108
+ else
1109
+ lastFooter = "\n" * (-footerDiff) + lastFooter
1110
+ text[-(previous_footer.length), text.length] = lastFooter
1111
+ end
1112
+ end
1113
+
1114
+ # Trim text
1115
+ text.gsub!(/[ ]+$/m, '') if @trim
1116
+ text
1117
+ end
1118
+
1119
+ # Replaces a placeholder with the text given. The +format+ string gives
1120
+ # the type of the replace match: When exactly two chars, this indicates
1121
+ # a text replace field, when longer, this is a numeric field.
1122
+ def replace(format, length, value)
1123
+ text = ''
1124
+ remaining = length
1125
+ filled = 0
1126
+
1127
+ __debug("value is: ", value)
1128
+
1129
+ if @fill
1130
+ value.sub!(/\A\s*/m, '')
1131
+ else
1132
+ value.sub!(/\A[ \t]*/, '')
1133
+ end
1134
+
1135
+ if value and format.length > 2
1136
+ # find length of numerical fields
1137
+ if format =~ /([\]>]+)#{Regexp.escape(DECIMAL)}([\[<]+)/
1138
+ ilen, dlen = $1.length, $2.length
1139
+ end
1140
+
1141
+ # Try to extract a numeric value from +value+
1142
+ done = false
1143
+ while not done
1144
+ num, extra = scanf_remains(value, "%f")
1145
+ __debug "Number split into: ", [num, extra]
1146
+ done = true
1147
+
1148
+ if extra.length == value.length
1149
+ value.sub!(/\s*\S*/, '') # skip offending non number value
1150
+ if (@numeric & NUMBERS_SKIP_NAN) > 0 && value =~ /\S/
1151
+ __debug("Not a Number, retrying ", value)
1152
+ done = false
1153
+ else
1154
+ text = '?' * ilen + DECIMAL + '?' * dlen
1155
+ return text
1156
+ end
1157
+ end
1158
+ end
1159
+
1160
+ __debug("Finally number is: ", num)
1161
+
1162
+ formatted = "%#{format.length}.#{dlen}f"% num
1163
+ if formatted.length > format.length
1164
+ text = '#' * ilen + DECIMAL + '#'*dlen
1165
+ else
1166
+ text = formatted
1167
+ end
1168
+
1169
+ # Only output significant digits. Unless not all places were
1170
+ # explicitly requested or the number has more digits than we just
1171
+ # output replace trailing zeros with spaces.
1172
+ unless (@numeric & NUMBERS_ALL_PLACES > 0) or num.to_s =~ /#{Regexp.escape(DECIMAL)}\d\d{#{dlen},}$/
1173
+ text.sub!(/(#{Regexp.escape(DECIMAL)}\d+?)(0+)$/) do |match|
1174
+ $1 + ' '*$2.length
1175
+ end
1176
+ end
1177
+
1178
+ value.replace(extra)
1179
+ remaining = 0
1180
+ else
1181
+ while not (value =~ /\S/o).nil?
1182
+ # Only whitespace remaining ?
1183
+ if ! @fill && value.sub!(/\A[ \t]*\n/, '')
1184
+ filled = 2
1185
+ break
1186
+ end
1187
+ break unless value =~ /\A(\s*)(\S+)(.*)\z/om;
1188
+
1189
+ ws, word, extra = $1, $2, $3
1190
+
1191
+ # Replace all newlines by spaces when fill was specified.
1192
+ nonnl = (ws =~ /[^\n]/o)
1193
+ if @fill
1194
+ ws.gsub!(/\n/) do |match|
1195
+ nonnl ? '' : ' '
1196
+ end
1197
+ end
1198
+
1199
+ # Replace all whitespace by one space if squeeze was specified.
1200
+ lead = @squeeze ? (ws.length > 0 ? ' ' : '') : ws
1201
+ match = lead + word
1202
+
1203
+ __debug("Extracted: ", match)
1204
+ break if text and match =~ /\n/o
1205
+
1206
+ if match.length <= remaining
1207
+ __debug("Accepted: ", match)
1208
+ text << match
1209
+ remaining -= match.length
1210
+ value.replace(extra)
1211
+ else
1212
+ __debug("Need to break: ", match)
1213
+ if (remaining - lead.length) >= @min_break
1214
+ __debug("Trying to break: ", match)
1215
+ broken, left = @break.break(match, remaining, length)
1216
+ text << broken
1217
+ __debug("Broke as: ", [broken, left])
1218
+ value.replace left + extra
1219
+
1220
+ # Adjust remaining chars, but allow for underflow.
1221
+ t = remaining-broken.length
1222
+ if t < 0
1223
+ remaining = 0
1224
+ else
1225
+ remaining = t
1226
+ end
1227
+ end
1228
+ break
1229
+ end
1230
+
1231
+ filled = 1
1232
+ end
1233
+ end
1234
+
1235
+ if filled.zero? and remaining > 0 and value =~ /\S/ and text.empty?
1236
+ value.sub!(/^\s*(.{1,#{remaining}})/, '')
1237
+ text = $1
1238
+ remaining -= text.length
1239
+ end
1240
+
1241
+ # Justify format?
1242
+ if text =~ / /o and format == 'J' and value =~ /\S/o and filled != 2
1243
+ # Fully justified
1244
+ text.reverse!
1245
+ text.gsub!(/( +)/o) do |match|
1246
+ remaining -= 1
1247
+ if remaining > 0
1248
+ " #{$1}"
1249
+ else
1250
+ $1
1251
+ end
1252
+ end while remaining > 0
1253
+ text.reverse!
1254
+ elsif format =~ /\>|\]/o
1255
+ # Right justified
1256
+ text[0, 0] = (@filler[:left] * remaining)[0, remaining] if remaining > 0
1257
+ elsif format =~ /\^|\|/o
1258
+ # Center justified
1259
+ half_remaining = remaining / 2
1260
+ text[0, 0] = (@filler[:left] * half_remaining)[0, half_remaining]
1261
+ half_remaining = remaining - half_remaining
1262
+ text << (@filler[:right] * half_remaining)[0, half_remaining]
1263
+ else
1264
+ # Left justified
1265
+ text << (@filler[:right] * remaining)[0, remaining]
1266
+ end
1267
+
1268
+ text
1269
+ end
1270
+
1271
+ # Quotes any characters that might be interpreted in +str+ to be normal
1272
+ # characters.
1273
+ def quote(str)
1274
+ puts 'Text::Reform warning: not quoting string...' if @debug
1275
+ str
1276
+ end
1277
+
1278
+ # Turn on internal debugging output for the duration of the
1279
+ # block.
1280
+ def debug
1281
+ d = @debug
1282
+ @debug = true
1283
+ yield
1284
+ @debug = d
1285
+ end
1286
+
1287
+ class << self
1288
+ # Takes a +hyphen+ string as argument, breaks by inserting that hyphen
1289
+ # into the word to be hyphenated.
1290
+ def break_with(hyphen)
1291
+ BreakWith.new(hyphen)
1292
+ end
1293
+
1294
+ # Takes a +bat+ string as argument, breaks by looking for that
1295
+ # substring and breaking just after it.
1296
+ def break_at(bat)
1297
+ BreakAt.new(bat)
1298
+ end
1299
+
1300
+ # Breaks by using a 'wrap and slop' algorithm.
1301
+ def break_wrap
1302
+ BreakWrap.new
1303
+ end
1304
+
1305
+ # Hyphenates with a class that implements the API of TeX::Hyphen or
1306
+ # Text::Hyphen.
1307
+ def break_hyphenator(hyphenator)
1308
+ BreakHyphenator.new(hyphenator)
1309
+ end
1310
+ end
1311
+
1312
+ # Return the header to use. Header can be in many formats, refer
1313
+ # yourself to the documentation.
1314
+ def __header(page_num)
1315
+ __header_or_footer(@header, page_num, false)
1316
+ end
1317
+ private :__header
1318
+
1319
+ # Return the footer to use for +page_num+ page. +last+ is true if this
1320
+ # is the last page.
1321
+ def __footer(page_num, last)
1322
+ __header_or_footer(@footer, page_num, last)
1323
+ end
1324
+ private :__footer
1325
+
1326
+ # Return a header or footer, disambiguating of types and unchomping is
1327
+ # done here.
1328
+ #
1329
+ #
1330
+ # +element+ is the element (header or footer) to process.
1331
+ # +page+ is the current page number. +last+ indicates
1332
+ # whether this is the last page.
1333
+ def __header_or_footer(element, page, last)
1334
+ __debug("element: ", element)
1335
+ if element.respond_to?(:call)
1336
+ if element.arity == 1
1337
+ __header_or_footer(element.call(page), page, last)
1338
+ else
1339
+ __header_or_footer(element.call(page, last), page, last)
1340
+ end
1341
+ elsif element.kind_of?(Hash)
1342
+ page_width = element[:width] || @page_width
1343
+ @internal_formatter = self.class.new unless @internal_formatter
1344
+
1345
+ if element[:left]
1346
+ format = "<" * page_width
1347
+ data = element[:left]
1348
+ end
1349
+
1350
+ if element[:center] or element[:centre]
1351
+ format = "^" * page_width
1352
+ data = element[:center] || element[:centre]
1353
+ end
1354
+
1355
+ if element[:right]
1356
+ format = ">" * page_width
1357
+ data = element[:right]
1358
+ end
1359
+
1360
+ if format
1361
+ if data.respond_to?(:call)
1362
+ @internal_formatter.format(format, __header_or_footer(data.call(page), page, last))
1363
+ else
1364
+ @internal_formatter.format(format, data.dup)
1365
+ end
1366
+ else
1367
+ ""
1368
+ end
1369
+ else
1370
+ unchomp(element)
1371
+ end
1372
+ end
1373
+ private :__header_or_footer
1374
+
1375
+ # Use the page_feed attribute to get the page feed text. +page_feed+ can
1376
+ # contain a block to call or a String.
1377
+ def __pagefeed
1378
+ if @page_feed.respond_to?(:call)
1379
+ @page_feed.call(@page)
1380
+ else
1381
+ @page_feed
1382
+ end
1383
+ end
1384
+ private :__pagefeed
1385
+
1386
+ # Using Scanf module, scanf a string and return what has not been
1387
+ # matched in addition to normal scanf return.
1388
+ def scanf_remains(value, fstr, &block)
1389
+ if block.nil?
1390
+ unless fstr.kind_of?(Scanf::FormatString)
1391
+ fstr = Scanf::FormatString.new(fstr)
1392
+ end
1393
+ [ fstr.match(value), fstr.string_left ]
1394
+ else
1395
+ value.block_scanf(fstr, &block)
1396
+ end
1397
+ end
1398
+
1399
+ # Count occurrences of \n (lines) of all strings that are passed as
1400
+ # parameter.
1401
+ def count_lines(*args)
1402
+ args.inject(0) do |sum, el|
1403
+ sum + el.count("\n")
1404
+ end
1405
+ end
1406
+
1407
+ # Construct a type that can be passed to #replace from last a string.
1408
+ def __construct_type(str, justifiedPattern)
1409
+ if str =~ /#{justifiedPattern}/x
1410
+ 'J'
1411
+ else
1412
+ str
1413
+ end
1414
+ end
1415
+
1416
+ # Adds a \n character to the end of the line unless it already has a
1417
+ # \n at the end of the line. Returns a modified copy of +str+.
1418
+ def unchomp(str)
1419
+ unchomp!(str.dup)
1420
+ end
1421
+
1422
+ # Adds a \n character to the end of the line unless it already has a
1423
+ # \n at the end of the line.
1424
+ def unchomp!(str)
1425
+ if str.empty? or str[-1] == ?\n
1426
+ str
1427
+ else
1428
+ str << "\n"
1429
+ end
1430
+ end
1431
+
1432
+ # Debug output. Message +msg+ is printed at start of line, then +obj+
1433
+ # is output using +pp+.
1434
+ def __debug(msg, obj = nil)
1435
+ return unless @debug
1436
+ require 'pp'
1437
+ print msg
1438
+ pp obj
1439
+ end
1440
+ private :__debug
1441
+
1442
+ class BreakWith
1443
+ def initialize hyphen
1444
+ @hyphen = hyphen
1445
+ @hylen = hyphen.length
1446
+ end
1447
+
1448
+ # Break by inserting a hyphen string.
1449
+ #
1450
+ # +initial_max_length+:: The maximum size of the first part of the
1451
+ # word that will remain on the first line.
1452
+ # +total_width+:: The total width that can be appended to this
1453
+ # first line.
1454
+ def break(str, initial_max_length, total_width)
1455
+ if total_width <= @hylen
1456
+ ret = [str[0...1], str[1..-1]]
1457
+ else
1458
+ ret = [str[0...(initial_max_length-@hylen)], str[(initial_max_length-@hylen)..-1]]
1459
+ end
1460
+
1461
+ if ret.first =~ /\A\s*\Z/
1462
+ return ['', str]
1463
+ else
1464
+ return [ret.first + @hyphen, ret.last]
1465
+ end
1466
+ end
1467
+ end
1468
+
1469
+ class BreakAt
1470
+ def initialize hyphen
1471
+ @hyphen = hyphen
1472
+ end
1473
+
1474
+ # Break by inserting a hyphen string.
1475
+ #
1476
+ # +initial_max_length+:: The maximum size of the first part of the
1477
+ # word that will remain on the first line.
1478
+ # +total_width+:: The total width that can be appended to this
1479
+ # first line.
1480
+ def break(str, initial_max_length, total_width)
1481
+ max = total_width - @hyphen.length
1482
+ if max <= 0
1483
+ ret = [str[0, 1], str[1, -1]]
1484
+ elsif str =~ /(.{1,#{max}}#@hyphen)(.*)/s
1485
+ ret = [ $1, $2 ]
1486
+ elsif str.length > total_width
1487
+ sep = initial_max_length-@hyphen.length
1488
+ ret = [
1489
+ str[0, sep]+@hyphen,
1490
+ str[sep..-1]
1491
+ ]
1492
+ else
1493
+ ret = [ '', str ]
1494
+ end
1495
+
1496
+ return '', str if ret[0] =~ /\A\s*\Z/
1497
+ return ret
1498
+ end
1499
+ end
1500
+
1501
+ class BreakWrap
1502
+ def initialize
1503
+ end
1504
+
1505
+ # Break by wrapping and slopping to the next line.
1506
+ #
1507
+ # +initial_max_length+:: The maximum size of the first part of the
1508
+ # word that will remain on the first line.
1509
+ # +total_width+:: The total width that can be appended to this
1510
+ # first line.
1511
+ def break(text, initial, total)
1512
+ if initial == total
1513
+ text =~ /\A(\s*\S*)(.*)/
1514
+ return $1, $2
1515
+ else
1516
+ return '', text
1517
+ end
1518
+ end
1519
+ end
1520
+
1521
+ # This word-breaker uses a class that implements the API presented by
1522
+ # TeX::Hyphen and Text::Hyphen modules.
1523
+ class BreakHyphenator
1524
+ def initialize(hyphenator)
1525
+ @hyphenator = hyphenator
1526
+ end
1527
+
1528
+ # Break a word using the provided hyphenation module that responds to
1529
+ # #hyphenate_to.
1530
+ #
1531
+ # +initial_max_length+:: The maximum size of the first part of the
1532
+ # word that will remain on the first line.
1533
+ # +total_width+:: The total width that can be appended to this
1534
+ # first line.
1535
+ def break(str, initial_max_length, total_width)
1536
+ res = @hyphenator.hyphenate_to(str, initial_max_length)
1537
+ res.map! { |ee| ee.nil? ? "" : ee }
1538
+ res
1539
+ end
1540
+ end
1541
+ end