resumetools 0.2.7.0 → 0.2.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1044 @@
1
+ # :title: Text::Format
2
+ # :main: Text::Format
3
+ #--
4
+ # Text::Format for Ruby
5
+ # Version 1.0.0
6
+ #
7
+ # Copyright (c) 2002 - 2005 Austin Ziegler
8
+ #
9
+ # $Id: format.rb,v 1.5 2005/04/20 01:43:55 austin Exp $
10
+ #++
11
+
12
+ # = Introduction
13
+ #
14
+ # Text::Format provides the ability to nicely format fixed-width text with
15
+ # knowledge of the writeable space (number of columns), margins, and
16
+ # indentation settings.
17
+ #
18
+ # Copyright:: Copyright (c) 2002 - 2005 by Austin Ziegler
19
+ # Version:: 1.0.0
20
+ # Based On:: Perl
21
+ # Text::Format[http://search.cpan.org/author/GABOR/Text-Format0.52/lib/Text/Format.pm],
22
+ # Copyright (c) 1998 G�bor Egressy
23
+ # Licence:: Ruby's, Perl Artistic, or GPL version 2 (or later)
24
+ #
25
+ module Text
26
+ class Format
27
+ VERSION = '1.0.0'
28
+
29
+ SPACES_RE = %r{\s+}mo.freeze
30
+ NEWLINE_RE = %r{\n}o.freeze
31
+ TAB = "\t".freeze
32
+ NEWLINE = "\n".freeze
33
+
34
+ # Global common English abbreviations. More can be added with
35
+ # #abbreviations.
36
+ ABBREV = %w(Mr Mrs Ms Jr Sr Dr)
37
+
38
+ # Formats text flush to the left margin with a visual and physical
39
+ # ragged right margin.
40
+ #
41
+ # >A paragraph that is<
42
+ # >left aligned.<
43
+ LEFT_ALIGN = :left
44
+ # Formats text flush to the right margin with a visual ragged left
45
+ # margin. The actual left margin is padded with spaces from the
46
+ # beginning of the line to the start of the text such that the right
47
+ # margin will be flush.
48
+ #
49
+ # >A paragraph that is<
50
+ # > right aligned.<
51
+ RIGHT_ALIGN = :right
52
+ # Formats text flush to the left margin with a visual ragged right
53
+ # margin. The line is padded with spaces from the end of the text to the
54
+ # right margin.
55
+ #
56
+ # >A paragraph that is<
57
+ # >right filled. <
58
+ RIGHT_FILL = :fill
59
+ # Formats the text flush to both the left and right margins. The last
60
+ # line will not be justified if it consists of a single word (it will be
61
+ # treated as +RIGHT_FILL+ in this case). Spacing between words is
62
+ # increased to ensure that the textg is flush with both margins.
63
+ #
64
+ # |A paragraph that|
65
+ # |is justified.|
66
+ #
67
+ # |A paragraph that is|
68
+ # |justified. |
69
+ JUSTIFY = :justify
70
+
71
+ # When #hard_margins is enabled, a word that extends over the right
72
+ # margin will be split at the number of characters needed. This is
73
+ # similar to how characters wrap on a terminal. This is the default
74
+ # split mechanism when #hard_margins is enabled.
75
+ #
76
+ # repre
77
+ # senta
78
+ # ion
79
+ SPLIT_FIXED = 1
80
+ # When #hard_margins is enabled, a word that extends over the right
81
+ # margin will be split at one less than the number of characters needed
82
+ # with a C-style continuation character (\). If the word cannot be split
83
+ # using the rules of SPLIT_CONTINUATION, and the word will not fit
84
+ # wholly into the next line, then SPLIT_FIXED will be used.
85
+ #
86
+ # repr\
87
+ # esen\
88
+ # tati\
89
+ # on
90
+ SPLIT_CONTINUATION = 2
91
+ # When #hard_margins is enabled, a word that extends over the right
92
+ # margin will be split according to the hyphenator specified by the
93
+ # #hyphenator object; if there is no hyphenation library supplied, then
94
+ # the hyphenator of Text::Format itself is used, which is the same as
95
+ # SPLIT_CONTINUATION. See #hyphenator for more information about
96
+ # hyphenation libraries. The example below is valid with either
97
+ # TeX::Hyphen or Text::Hyphen. If the word cannot be split using the
98
+ # hyphenator's rules, and the word will not fit wholly into the next
99
+ # line, then SPLIT_FIXED will be used.
100
+ #
101
+ # rep-
102
+ # re-
103
+ # sen-
104
+ # ta-
105
+ # tion
106
+ #
107
+ SPLIT_HYPHENATION = 4
108
+ # When #hard_margins is enabled, a word that extends over the right
109
+ # margin will be split at one less than the number of characters needed
110
+ # with a C-style continuation character (\). If the word cannot be split
111
+ # using the rules of SPLIT_CONTINUATION, then SPLIT_FIXED will be used.
112
+ SPLIT_CONTINUATION_FIXED = SPLIT_CONTINUATION | SPLIT_FIXED
113
+ # When #hard_margins is enabled, a word that extends over the right
114
+ # margin will be split according to the hyphenator specified by the
115
+ # #hyphenator object; if there is no hyphenation library supplied, then
116
+ # the hyphenator of Text::Format itself is used, which is the same as
117
+ # SPLIT_CONTINUATION. See #hyphenator for more information about
118
+ # hyphenation libraries. The example below is valid with either
119
+ # TeX::Hyphen or Text::Hyphen. If the word cannot be split using the
120
+ # hyphenator's rules, then SPLIT_FIXED will be used.
121
+ SPLIT_HYPHENATION_FIXED = SPLIT_HYPHENATION | SPLIT_FIXED
122
+ # Attempts to split words according to the rules of the supplied
123
+ # hyphenator (e.g., SPLIT_HYPHENATION); if the word cannot be split
124
+ # using these rules, then the rules of SPLIT_CONTINUATION will be
125
+ # followed. In all cases, if the word cannot be split using either
126
+ # SPLIT_HYPHENATION or SPLIT_CONTINUATION, and the word will not fit
127
+ # wholly into the next line, then SPLIT_FIXED will be used.
128
+ SPLIT_HYPHENATION_CONTINUATION = SPLIT_HYPHENATION | SPLIT_CONTINUATION
129
+ # Attempts to split words according to the rules of the supplied
130
+ # hyphenator (e.g., SPLIT_HYPHENATION); if the word cannot be split
131
+ # using these rules, then the rules of SPLIT_CONTINUATION will be
132
+ # followed. In all cases, if the word cannot be split using either
133
+ # SPLIT_HYPHENATION or SPLIT_CONTINUATION, then SPLIT_FIXED will be
134
+ # used.
135
+ SPLIT_ALL = SPLIT_HYPHENATION | SPLIT_CONTINUATION | SPLIT_FIXED
136
+
137
+ # Words forcibly split by Text::Format will be stored as split words.
138
+ # This class represents a word forcibly split.
139
+ class SplitWord
140
+ # The word that was split.
141
+ attr_reader :word
142
+ # The first part of the word that was split.
143
+ attr_reader :first
144
+ # The remainder of the word that was split.
145
+ attr_reader :rest
146
+
147
+ def initialize(word, first, rest)
148
+ @word = word
149
+ @first = first
150
+ @rest = rest
151
+ end
152
+ end
153
+
154
+ # Indicates punctuation characters that terminates a sentence, as some
155
+ # English typesetting rules indicate that sentences should be followed
156
+ # by two spaces. This is an archaic rule, but is supported with
157
+ # #extra_space. This is the default set of terminal punctuation
158
+ # characters. Additional terminal punctuation may be added to the
159
+ # formatting object through #terminal_punctuation.
160
+ TERMINAL_PUNCTUATION = %q(.?!)
161
+ # Indicates quote characters that may follow terminal punctuation under
162
+ # the current formatting rules. This satisfies the English formatting
163
+ # rule that indicates that sentences terminated inside of quotes should
164
+ # have the punctuation inside of the quoted text, not outside of the
165
+ # terminal quote. Additional terminal quotes may be added to the
166
+ # formatting object through #terminal_quotes. See TERMINAL_PUNCTUATION
167
+ # for more information.
168
+ TERMINAL_QUOTES = %q('")
169
+
170
+ # This method returns the regular expression used to detect the end of a
171
+ # sentence under the current definition of TERMINAL_PUNCTUATION,
172
+ # #terminal_punctuation, TERMINAL_QUOTES, and #terminal_quotes.
173
+ def __sentence_end_re
174
+ %r{[#{TERMINAL_PUNCTUATION}#{self.terminal_punctuation}][#{TERMINAL_QUOTES}#{self.terminal_quotes}]?$}
175
+ end
176
+ private :__sentence_end_re
177
+
178
+ # Returns a regular expression for a set of characters (at least one
179
+ # non-whitespace followed by at least one space) of the specified size
180
+ # followed by one or more of any character.
181
+ RE_BREAK_SIZE = lambda { |size| %r[((?:\S+\s+){#{size}})(.+)] }
182
+
183
+ # Compares the formatting rules, excepting #hyphenator, of two
184
+ # Text::Format objects. Generated results (e.g., #split_words) are not
185
+ # compared.
186
+ def ==(o)
187
+ (@text == o.text) and
188
+ (@columns == o.columns) and
189
+ (@left_margin == o.left_margin) and
190
+ (@right_margin == o.right_margin) and
191
+ (@hard_margins == o.hard_margins) and
192
+ (@split_rules == o.split_rules) and
193
+ (@first_indent == o.first_indent) and
194
+ (@body_indent == o.body_indent) and
195
+ (@tag_text == o.tag_text) and
196
+ (@tabstop == o.tabstop) and
197
+ (@format_style == o.format_style) and
198
+ (@extra_space == o.extra_space) and
199
+ (@tag_paragraph == o.tag_paragraph) and
200
+ (@nobreak == o.nobreak) and
201
+ (@terminal_punctuation == o.terminal_punctuation) and
202
+ (@terminal_quotes == o.terminal_quotes) and
203
+ (@abbreviations == o.abbreviations) and
204
+ (@nobreak_regex == o.nobreak_regex)
205
+ end
206
+
207
+ # The default text to be manipulated. Note that value is optional, but
208
+ # if the formatting functions are called without values, this text is
209
+ # what will be formatted.
210
+ #
211
+ # *Default*:: <tt>[]</tt>
212
+ # <b>Used in</b>:: All methods
213
+ attr_accessor :text
214
+
215
+ # The total width of the format area. The margins, indentation, and text
216
+ # are formatted into this space. Any value provided is silently
217
+ # converted to a positive integer.
218
+ #
219
+ # COLUMNS
220
+ # <-------------------------------------------------------------->
221
+ # <-----------><------><---------------------------><------------>
222
+ # left margin indent text is formatted into here right margin
223
+ #
224
+ # *Default*:: <tt>72</tt>
225
+ # <b>Used in</b>:: #format, #paragraphs, #center
226
+ attr_accessor :columns
227
+ def columns=(col) #:nodoc:
228
+ @columns = col.to_i.abs
229
+ end
230
+
231
+ # The number of spaces used for the left margin. The value provided is
232
+ # silently converted to a positive integer value.
233
+ #
234
+ # columns
235
+ # <-------------------------------------------------------------->
236
+ # <-----------><------><---------------------------><------------>
237
+ # LEFT MARGIN indent text is formatted into here right margin
238
+ #
239
+ # *Default*:: <tt>0</tt>
240
+ # <b>Used in</b>:: #format, #paragraphs, #center
241
+ attr_accessor :left_margin
242
+ def left_margin=(left) #:nodoc:
243
+ @left_margin = left.to_i.abs
244
+ end
245
+
246
+ # The number of spaces used for the right margin. The value provided is
247
+ # silently converted to a positive integer value.
248
+ #
249
+ # columns
250
+ # <-------------------------------------------------------------->
251
+ # <-----------><------><---------------------------><------------>
252
+ # left margin indent text is formatted into here RIGHT MARGIN
253
+ #
254
+ # *Default*:: <tt>0</tt>
255
+ # <b>Used in</b>:: #format, #paragraphs, #center
256
+ attr_accessor :right_margin
257
+ def right_margin=(right) #:nodoc:
258
+ @right_margin = right.to_i.abs
259
+ end
260
+
261
+ # The number of spaces to indent the first line of a paragraph. The
262
+ # value provided is silently converted to a positive integer value.
263
+ #
264
+ # columns
265
+ # <-------------------------------------------------------------->
266
+ # <-----------><------><---------------------------><------------>
267
+ # left margin INDENT text is formatted into here right margin
268
+ #
269
+ # *Default*:: <tt>4</tt>
270
+ # <b>Used in</b>:: #format, #paragraphs
271
+ attr_accessor :first_indent
272
+ def first_indent=(first) #:nodoc:
273
+ @first_indent = first.to_i.abs
274
+ end
275
+
276
+ # The number of spaces to indent all lines after the first line of a
277
+ # paragraph. The value provided is silently converted to a positive
278
+ # integer value.
279
+ #
280
+ # columns
281
+ # <-------------------------------------------------------------->
282
+ # <-----------><------><---------------------------><------------>
283
+ # left margin INDENT text is formatted into here right margin
284
+ #
285
+ # *Default*:: <tt>0</tt>
286
+ # <b>Used in</b>:: #format, #paragraphs
287
+ attr_accessor :body_indent
288
+ def body_indent=(body) #:nodoc:
289
+ @body_indent = body.to_i.abs
290
+ end
291
+
292
+ # Normally, words larger than the format area will be placed on a line
293
+ # by themselves. Setting this value to +true+ will force words larger
294
+ # than the format area to be split into one or more "words" each at most
295
+ # the size of the format area. The first line and the original word will
296
+ # be placed into #split_words. Note that this will cause the output to
297
+ # look *similar* to a #format_style of JUSTIFY. (Lines will be filled as
298
+ # much as possible.)
299
+ #
300
+ # *Default*:: +false+
301
+ # <b>Used in</b>:: #format, #paragraphs
302
+ attr_accessor :hard_margins
303
+
304
+ # An array of words split during formatting if #hard_margins is set to
305
+ # +true+.
306
+ # #split_words << Text::Format::SplitWord.new(word, first, rest)
307
+ attr_reader :split_words
308
+
309
+ # The object responsible for hyphenating. It must respond to
310
+ # #hyphenate_to(word, size) or #hyphenate_to(word, size, formatter) and
311
+ # return an array of the word split into two parts (e.g., <tt>[part1,
312
+ # part2]</tt>; if there is a hyphenation mark to be applied,
313
+ # responsibility belongs to the hyphenator object. The size is the
314
+ # MAXIMUM size permitted, including any hyphenation marks.
315
+ #
316
+ # If the #hyphenate_to method has an arity of 3, the current formatter
317
+ # (+self+) will be provided to the method. This allows the hyphenator to
318
+ # make decisions about the hyphenation based on the formatting rules.
319
+ #
320
+ # #hyphenate_to should return <tt>[nil, word]</tt> if the word cannot be
321
+ # hyphenated.
322
+ #
323
+ # *Default*:: +self+ (SPLIT_CONTINUATION)
324
+ # <b>Used in</b>:: #format, #paragraphs
325
+ attr_accessor :hyphenator
326
+ def hyphenator=(h) #:nodoc:
327
+ h ||= self
328
+
329
+ raise ArgumentError, "#{h.inspect} is not a valid hyphenator." unless h.respond_to?(:hyphenate_to)
330
+ arity = h.method(:hyphenate_to).arity
331
+ raise ArgumentError, "#{h.inspect} must have exactly two or three arguments." unless arity.between?(2, 3)
332
+
333
+ @hyphenator = h
334
+ @hyphenator_arity = arity
335
+ end
336
+
337
+ # Specifies the split mode; used only when #hard_margins is set to
338
+ # +true+. Allowable values are:
339
+ #
340
+ # * +SPLIT_FIXED+
341
+ # * +SPLIT_CONTINUATION+
342
+ # * +SPLIT_HYPHENATION+
343
+ # * +SPLIT_CONTINUATION_FIXED+
344
+ # * +SPLIT_HYPHENATION_FIXED+
345
+ # * +SPLIT_HYPHENATION_CONTINUATION+
346
+ # * +SPLIT_ALL+
347
+ #
348
+ # *Default*:: <tt>Text::Format::SPLIT_FIXED</tt>
349
+ # <b>Used in</b>:: #format, #paragraphs
350
+ attr_accessor :split_rules
351
+ def split_rules=(s) #:nodoc:
352
+ raise ArgumentError, "Invalid value provided for #split_rules." if ((s < SPLIT_FIXED) or (s > SPLIT_ALL))
353
+ @split_rules = s
354
+ end
355
+
356
+ # Indicates whether sentence terminators should be followed by a single
357
+ # space (+false+), or two spaces (+true+). See #abbreviations for more
358
+ # information.
359
+ #
360
+ # *Default*:: +false+
361
+ # <b>Used in</b>:: #format, #paragraphs
362
+ attr_accessor :extra_space
363
+
364
+ # Defines the current abbreviations as an array. This is only used if
365
+ # extra_space is turned on.
366
+ #
367
+ # If one is abbreviating "President" as "Pres." (abbreviations =
368
+ # ["Pres"]), then the results of formatting will be as illustrated in
369
+ # the table below:
370
+ #
371
+ # abbreviations
372
+ # extra_space | #include?("Pres") | not #include?("Pres")
373
+ # ------------+-------------------+----------------------
374
+ # true | Pres. Lincoln | Pres. Lincoln
375
+ # false | Pres. Lincoln | Pres. Lincoln
376
+ # ------------+-------------------+----------------------
377
+ # extra_space | #include?("Mrs") | not #include?("Mrs")
378
+ # true | Mrs. Lincoln | Mrs. Lincoln
379
+ # false | Mrs. Lincoln | Mrs. Lincoln
380
+ #
381
+ # Note that abbreviations should not have the terminal period as part of
382
+ # their definitions.
383
+ #
384
+ # This automatic abbreviation handling *will* cause some issues with
385
+ # uncommon sentence structures. The two sentences below will not be
386
+ # formatted correctly:
387
+ #
388
+ # You're in trouble now, Mr.
389
+ # Just wait until your father gets home.
390
+ #
391
+ # Under no circumstances (because Mr is a predefined abbreviation) will
392
+ # this ever be separated by two spaces.
393
+ #
394
+ # *Default*:: <tt>[]</tt>
395
+ # <b>Used in</b>:: #format, #paragraphs
396
+ attr_accessor :abbreviations
397
+
398
+ # Specifies additional punctuation characters that terminate a sentence,
399
+ # as some English typesetting rules indicate that sentences should be
400
+ # followed by two spaces. This is an archaic rule, but is supported with
401
+ # #extra_space. This is added to the default set of terminal punctuation
402
+ # defined in TERMINAL_PUNCTUATION.
403
+ #
404
+ # *Default*:: <tt>""</tt>
405
+ # <b>Used in</b>:: #format, #paragraphs
406
+ attr_accessor :terminal_punctuation
407
+ # Specifies additional quote characters that may follow
408
+ # terminal punctuation under the current formatting rules. This
409
+ # satisfies the English formatting rule that indicates that sentences
410
+ # terminated inside of quotes should have the punctuation inside of the
411
+ # quoted text, not outside of the terminal quote. This is added to the
412
+ # default set of terminal quotes defined in TERMINAL_QUOTES.
413
+ #
414
+ # *Default*:: <tt>""</tt>
415
+ # <b>Used in</b>:: #format, #paragraphs
416
+ attr_accessor :terminal_quotes
417
+
418
+ # Indicates whether the formatting of paragraphs should be done with
419
+ # tagged paragraphs. Useful only with #tag_text.
420
+ #
421
+ # *Default*:: +false+
422
+ # <b>Used in</b>:: #format, #paragraphs
423
+ attr_accessor :tag_paragraph
424
+
425
+ # The text to be placed before each paragraph when #tag_paragraph is
426
+ # +true+. When #format is called, only the first element (#tag_text[0])
427
+ # is used. When #paragraphs is called, then each successive element
428
+ # (#tag_text[n]) will be used once, with corresponding paragraphs. If
429
+ # the tag elements are exhausted before the text is exhausted, then the
430
+ # remaining paragraphs will not be tagged. Regardless of indentation
431
+ # settings, a blank line will be inserted between all paragraphs when
432
+ # #tag_paragraph is +true+.
433
+ #
434
+ # The Text::Format package provides three number generators,
435
+ # Text::Format::Alpha, Text::Format::Number, and Text::Format::Roman to
436
+ # assist with the numbering of paragraphs.
437
+ #
438
+ # *Default*:: <tt>[]</tt>
439
+ # <b>Used in</b>:: #format, #paragraphs
440
+ attr_accessor :tag_text
441
+
442
+ # Indicates whether or not the non-breaking space feature should be
443
+ # used.
444
+ #
445
+ # *Default*:: +false+
446
+ # <b>Used in</b>:: #format, #paragraphs
447
+ attr_accessor :nobreak
448
+
449
+ # A hash which holds the regular expressions on which spaces should not
450
+ # be broken. The hash is set up such that the key is the first word and
451
+ # the value is the second word.
452
+ #
453
+ # For example, if +nobreak_regex+ contains the following hash:
454
+ #
455
+ # { %r{Mrs?\.?} => %r{\S+}, %r{\S+} => %r{(?:[SJ])r\.?} }
456
+ #
457
+ # Then "Mr. Jones", "Mrs Jones", and "Jones Jr." would not be broken. If
458
+ # this simple matching algorithm indicates that there should not be a
459
+ # break at the current end of line, then a backtrack is done until there
460
+ # are two words on which line breaking is permitted. If two such words
461
+ # are not found, then the end of the line will be broken *regardless*.
462
+ # If there is a single word on the current line, then no backtrack is
463
+ # done and the word is stuck on the end.
464
+ #
465
+ # *Default*:: <tt>{}</tt>
466
+ # <b>Used in</b>:: #format, #paragraphs
467
+ attr_accessor :nobreak_regex
468
+
469
+ # Indicates the number of spaces that a single tab represents. Any value
470
+ # provided is silently converted to a positive integer.
471
+ #
472
+ # *Default*:: <tt>8</tt>
473
+ # <b>Used in</b>:: #expand, #unexpand,
474
+ # #paragraphs
475
+ attr_accessor :tabstop
476
+ def tabstop=(tabs) #:nodoc:
477
+ @tabstop = tabs.to_i.abs
478
+ end
479
+
480
+ # Specifies the format style. Allowable values are:
481
+ # *+LEFT_ALIGN+
482
+ # *+RIGHT_ALIGN+
483
+ # *+RIGHT_FILL+
484
+ # *+JUSTIFY+
485
+ #
486
+ # *Default*:: <tt>Text::Format::LEFT_ALIGN</tt>
487
+ # <b>Used in</b>:: #format, #paragraphs
488
+ attr_accessor :format_style
489
+ def format_style=(fs) #:nodoc:
490
+ raise ArgumentError, "Invalid value provided for format_style." unless [LEFT_ALIGN, RIGHT_ALIGN, RIGHT_FILL, JUSTIFY].include?(fs)
491
+ @format_style = fs
492
+ end
493
+
494
+ # Indicates that the format style is left alignment.
495
+ #
496
+ # *Default*:: +true+
497
+ # <b>Used in</b>:: #format, #paragraphs
498
+ def left_align?
499
+ @format_style == LEFT_ALIGN
500
+ end
501
+
502
+ # Indicates that the format style is right alignment.
503
+ #
504
+ # *Default*:: +false+
505
+ # <b>Used in</b>:: #format, #paragraphs
506
+ def right_align?
507
+ @format_style == RIGHT_ALIGN
508
+ end
509
+
510
+ # Indicates that the format style is right fill.
511
+ #
512
+ # *Default*:: +false+
513
+ # <b>Used in</b>:: #format, #paragraphs
514
+ def right_fill?
515
+ @format_style == RIGHT_FILL
516
+ end
517
+
518
+ # Indicates that the format style is full justification.
519
+ #
520
+ # *Default*:: +false+
521
+ # <b>Used in</b>:: #format, #paragraphs
522
+ def justify?
523
+ @format_style == JUSTIFY
524
+ end
525
+
526
+ # The formatting object itself can be used as a #hyphenator, where the
527
+ # default implementation of #hyphenate_to implements the conditions
528
+ # necessary to properly produce SPLIT_CONTINUATION.
529
+ def hyphenate_to(word, size)
530
+ if (size - 2) < 0
531
+ [nil, word]
532
+ else
533
+ [word[0 .. (size - 2)] + "\\", word[(size - 1) .. -1]]
534
+ end
535
+ end
536
+
537
+ # Splits the provided word so that it is in two parts, <tt>word[0 ..
538
+ # (size - 1)]</tt> and <tt>word[size .. -1]</tt>.
539
+ def split_word_to(word, size)
540
+ [word[0 .. (size - 1)], word[size .. -1]]
541
+ end
542
+
543
+ # Formats text into a nice paragraph format. The text is separated into
544
+ # words and then reassembled a word at a time using the settings of this
545
+ # Format object.
546
+ #
547
+ # If +text+ is +nil+, then the value of #text will be worked on.
548
+ def format_one_paragraph(text = nil)
549
+ text ||= @text
550
+ text = text[0] if text.kind_of?(Array)
551
+
552
+ # Convert the provided paragraph to a list of words.
553
+ words = text.split(SPACES_RE).reverse.reject { |ww| ww.nil? or ww.empty? }
554
+
555
+ text = []
556
+
557
+ # Find the maximum line width and the initial indent string.
558
+ # TODO 20050114 - allow the left and right margins to be specified as
559
+ # strings. If they are strings, then we need to use the sizes of the
560
+ # strings. Also: allow the indent string to be set manually and
561
+ # indicate whether the indent string will have a following space.
562
+ max_line_width = @columns - @first_indent - @left_margin - @right_margin
563
+ indent_str = ' ' * @first_indent
564
+
565
+ first_line = true
566
+
567
+ if words.empty?
568
+ line = []
569
+ line_size = 0
570
+ extra_space = false
571
+ else
572
+ line = [ words.pop ]
573
+ line_size = line[-1].size
574
+ extra_space = __add_extra_space?(line[-1])
575
+ end
576
+
577
+ while next_word = words.pop
578
+ next_word.strip! unless next_word.nil?
579
+ new_line_size = (next_word.size + line_size) + 1
580
+
581
+ if extra_space
582
+ if (line[-1] !~ __sentence_end_re)
583
+ extra_space = false
584
+ end
585
+ end
586
+
587
+ # Increase the width of the new line if there's a sentence
588
+ # terminator and we are applying extra_space.
589
+ new_line_size += 1 if extra_space
590
+
591
+ # Will the word fit onto the current line? If so, simply append it
592
+ # to the end of the line.
593
+
594
+ if new_line_size <= max_line_width
595
+ if line.empty?
596
+ line << next_word
597
+ else
598
+ if extra_space
599
+ line << " #{next_word}"
600
+ else
601
+ line << " #{next_word}"
602
+ end
603
+ end
604
+ else
605
+ # Forcibly wrap the line if nonbreaking spaces are turned on and
606
+ # there is a condition where words must be wrapped. If we have
607
+ # returned more than one word, readjust the word list.
608
+ line, next_word = __wrap_line(line, next_word) if @nobreak
609
+ if next_word.kind_of?(Array)
610
+ if next_word.size > 1
611
+ words.push(*(next_word.reverse))
612
+ next_word = words.pop
613
+ else
614
+ next_word = next_word[0]
615
+ end
616
+ next_word.strip! unless next_word.nil?
617
+ end
618
+
619
+ # Check to see if the line needs to be hyphenated. If a word has a
620
+ # hyphen in it (e.g., "fixed-width"), then we can ALWAYS wrap at
621
+ # that hyphenation, even if #hard_margins is not turned on. More
622
+ # elaborate forms of hyphenation will only be performed if
623
+ # #hard_margins is turned on. If we have returned more than one
624
+ # word, readjust the word list.
625
+ line, new_line_size, next_word = __hyphenate(line, line_size, next_word, max_line_width)
626
+ if next_word.kind_of?(Array)
627
+ if next_word.size > 1
628
+ words.push(*(next_word.reverse))
629
+ next_word = words.pop
630
+ else
631
+ next_word = next_word[0]
632
+ end
633
+ next_word.strip! unless next_word.nil?
634
+ end
635
+
636
+ text << __make_line(line, indent_str, max_line_width, next_word.nil?) unless line.nil?
637
+
638
+ if first_line
639
+ first_line = false
640
+ max_line_width = @columns - @body_indent - @left_margin - @right_margin
641
+ indent_str = ' ' * @body_indent
642
+ end
643
+
644
+ if next_word.nil?
645
+ line = []
646
+ new_line_size = 0
647
+ else
648
+ line = [ next_word ]
649
+ new_line_size = next_word.size
650
+ end
651
+ end
652
+
653
+ line_size = new_line_size
654
+ extra_space = __add_extra_space?(next_word) unless next_word.nil?
655
+ end
656
+
657
+ loop do
658
+ break if line.nil? or line.empty?
659
+ line, line_size, ww = __hyphenate(line, line_size, ww, max_line_width)#if @hard_margins
660
+ text << __make_line(line, indent_str, max_line_width, ww.nil?)
661
+ line = ww
662
+ ww = nil
663
+ end
664
+
665
+ if (@tag_paragraph and (not text.empty?))
666
+ if @tag_cur.nil? or @tag_cur.empty?
667
+ @tag_cur = @tag_text[0]
668
+ end
669
+
670
+ fchar = /(\S)/o.match(text[0])[1]
671
+ white = text[0].index(fchar)
672
+
673
+ unless @tag_cur.nil?
674
+ if ((white - @left_margin - 1) > @tag_cur.size) then
675
+ white = @tag_cur.size + @left_margin
676
+ text[0].gsub!(/^ {#{white}}/, "#{' ' * @left_margin}#{@tag_cur}")
677
+ else
678
+ text.unshift("#{' ' * @left_margin}#{@tag_cur}\n")
679
+ end
680
+ end
681
+ end
682
+
683
+ text.join('')
684
+ end
685
+ alias format format_one_paragraph
686
+
687
+ # Considers each element of text (provided or internal) as a paragraph.
688
+ # If #first_indent is the same as #body_indent, then paragraphs will be
689
+ # separated by a single empty line in the result; otherwise, the
690
+ # paragraphs will follow immediately after each other. Uses #format to
691
+ # do the heavy lifting.
692
+ #
693
+ # If +to_wrap+ responds to #split, then it will be split into an array
694
+ # of elements by calling #split with the value of +split_on+. The
695
+ # default value of split_on is $/, or the default record separator,
696
+ # repeated twice (e.g., /\n\n/).
697
+ def paragraphs(to_wrap = nil, split_on = /(#{$/}){2}/o)
698
+ to_wrap = @text if to_wrap.nil?
699
+ if to_wrap.respond_to?(:split)
700
+ to_wrap = to_wrap.split(split_on)
701
+ else
702
+ to_wrap = [to_wrap].flatten
703
+ end
704
+
705
+ if ((@first_indent == @body_indent) or @tag_paragraph) then
706
+ p_end = NEWLINE
707
+ else
708
+ p_end = ''
709
+ end
710
+
711
+ cnt = 0
712
+ ret = []
713
+ to_wrap.each do |tw|
714
+ @tag_cur = @tag_text[cnt] if @tag_paragraph
715
+ @tag_cur = '' if @tag_cur.nil?
716
+ line = format(tw)
717
+ ret << "#{line}#{p_end}" if (not line.nil?) and (line.size > 0)
718
+ cnt += 1
719
+ end
720
+
721
+ ret[-1].chomp! unless ret.empty?
722
+ ret.join('')
723
+ end
724
+
725
+ # Centers the text, preserving empty lines and tabs.
726
+ def center(to_center = nil)
727
+ to_center = @text if to_center.nil?
728
+ to_center = [to_center].flatten
729
+
730
+ tabs = 0
731
+ width = @columns - @left_margin - @right_margin
732
+ centered = []
733
+ to_center.each do |tc|
734
+ s = tc.strip
735
+ tabs = s.count(TAB)
736
+ tabs = 0 if tabs.nil?
737
+ ct = ((width - s.size - (tabs * @tabstop) + tabs) / 2)
738
+ ct = (width - @left_margin - @right_margin) - ct
739
+ centered << "#{s.rjust(ct)}\n"
740
+ end
741
+ centered.join('')
742
+ end
743
+
744
+ # Replaces all tab characters in the text with #tabstop spaces.
745
+ def expand(to_expand = nil)
746
+ to_expand = @text if to_expand.nil?
747
+
748
+ tmp = ' ' * @tabstop
749
+ changer = lambda do |text|
750
+ res = text.split(NEWLINE_RE)
751
+ res.collect! { |ln| ln.gsub!(/\t/o, tmp) }
752
+ res.join(NEWLINE)
753
+ end
754
+
755
+ if to_expand.kind_of?(Array)
756
+ to_expand.collect { |te| changer[te] }
757
+ else
758
+ changer[to_expand]
759
+ end
760
+ end
761
+
762
+ # Replaces all occurrences of #tabstop consecutive spaces with a tab
763
+ # character.
764
+ def unexpand(to_unexpand = nil)
765
+ to_unexpand = @text if to_unexpand.nil?
766
+
767
+ tmp = / {#{@tabstop}}/
768
+ changer = lambda do |text|
769
+ res = text.split(NEWLINE_RE)
770
+ res.collect! { |ln| ln.gsub!(tmp, TAB) }
771
+ res.join(NEWLINE)
772
+ end
773
+
774
+ if to_unexpand.kind_of?(Array)
775
+ to_unexpand.collect { |tu| changer[tu] }
776
+ else
777
+ changer[to_unexpand]
778
+ end
779
+ end
780
+
781
+ # Return +true+ if the word may have an extra space added after it. This
782
+ # will only be the case if #extra_space is +true+ and the word is not an
783
+ # abbreviation.
784
+ def __add_extra_space?(word)
785
+ return false unless @extra_space
786
+ word = word.gsub(/\.$/o, '') unless word.nil?
787
+ return false if ABBREV.include?(word)
788
+ return false if @abbreviations.include?(word)
789
+ true
790
+ end
791
+ private :__add_extra_space?
792
+
793
+ def __make_line(line, indent, width, last = false) #:nodoc:
794
+ line_size = line.inject(0) { |ls, el| ls + el.size }
795
+ lmargin = " " * @left_margin
796
+ fill = " " * (width - line_size) if right_fill? and (line_size <= width)
797
+
798
+ unless last
799
+ if justify? and (line.size > 1)
800
+ spaces = width - line_size
801
+ word_spaces = spaces / (line.size / 2)
802
+ spaces = spaces % (line.size / 2) if word_spaces > 0
803
+ line.reverse.each do |word|
804
+ next if (word =~ /^\S/o)
805
+
806
+ word.sub!(/^/o, " " * word_spaces)
807
+
808
+ next unless (spaces > 0)
809
+
810
+ word.sub!(/^/o, " ")
811
+ spaces -= 1
812
+ end
813
+ end
814
+ end
815
+
816
+ line = "#{lmargin}#{indent}#{line.join('')}#{fill}\n" unless line.empty?
817
+
818
+ if right_align? and (not line.nil?)
819
+ line.sub(/^/o, " " * (@columns - @right_margin - (line.size - 1)))
820
+ else
821
+ line
822
+ end
823
+ end
824
+ # private :__make_line
825
+
826
+ def __hyphenate(line, line_size, next_word, width) #:nodoc:
827
+ return [ line, line_size, next_word ] if line.nil? or line.empty?
828
+ rline = line.dup
829
+ rsize = line_size
830
+
831
+ rnext = []
832
+ rnext << next_word.dup unless next_word.nil?
833
+
834
+ loop do
835
+ break if rnext.nil? or rline.nil?
836
+
837
+ if rsize == width
838
+ break
839
+ elsif rsize > width
840
+ word = rline.pop
841
+ size = width - rsize + word.size
842
+
843
+ if (size < 1)
844
+ rnext.unshift word
845
+ next
846
+ end
847
+
848
+ first = rest = nil
849
+
850
+ # TODO: Add the check to see if the word contains a hyphen to
851
+ # split on automatically.
852
+ # Does the word already have a hyphen in it? If so, try to use
853
+ # that to split the word.
854
+ # if word.index('-') < size
855
+ # first = word[0 ... word.index("-")]
856
+ # rest = word[word.index("-") .. -1]
857
+ # end
858
+
859
+ if @hard_margins
860
+ if first.nil? and (@split_rules & SPLIT_HYPHENATION) == SPLIT_HYPHENATION
861
+ if @hyphenator_arity == 2
862
+ first, rest = @hyphenator.hyphenate_to(word, size)
863
+ else
864
+ first, rest = @hyphenator.hyphenate_to(word, size, self)
865
+ end
866
+ end
867
+
868
+ if first.nil? and (@split_rules & SPLIT_CONTINUATION) == SPLIT_CONTINUATION
869
+ first, rest = self.hyphenate_to(word, size)
870
+ end
871
+
872
+ if first.nil?
873
+ if (@split_rules & SPLIT_FIXED) == SPLIT_FIXED
874
+ first, rest = split_word_to(word, size)
875
+ elsif (not rest.nil? and (rest.size > size))
876
+ first, rest = split_word_to(word, size)
877
+ end
878
+ end
879
+ else
880
+ first = word if first.nil?
881
+ end
882
+
883
+ if first.nil?
884
+ rest = word
885
+ else
886
+ rsize = rsize - word.size + first.size
887
+ if rline.empty?
888
+ rline << first
889
+ else
890
+ rsize += 1
891
+ rline << " #{first}"
892
+ end
893
+ @split_words << SplitWord.new(word, first, rest)
894
+ end
895
+ rnext.unshift rest unless rest.nil?
896
+ break
897
+ else
898
+ break if rnext.empty?
899
+ word = rnext.shift.dup
900
+ size = width - rsize - 1
901
+
902
+ if (size <= 0)
903
+ rnext.unshift word
904
+ break
905
+ end
906
+
907
+ first = rest = nil
908
+
909
+ # TODO: Add the check to see if the word contains a hyphen to
910
+ # split on automatically.
911
+ # Does the word already have a hyphen in it? If so, try to use
912
+ # that to split the word.
913
+ # if word.index('-') < size
914
+ # first = word[0 ... word.index("-")]
915
+ # rest = word[word.index("-") .. -1]
916
+ # end
917
+
918
+ if @hard_margins
919
+ if (@split_rules & SPLIT_HYPHENATION) == SPLIT_HYPHENATION
920
+ if @hyphenator_arity == 2
921
+ first, rest = @hyphenator.hyphenate_to(word, size)
922
+ else
923
+ first, rest = @hyphenator.hyphenate_to(word, size, self)
924
+ end
925
+ end
926
+
927
+ if first.nil? and (@split_rules & SPLIT_CONTINUATION) == SPLIT_CONTINUATION
928
+ first, rest = self.hyphenate_to(word, size)
929
+ end
930
+
931
+ if first.nil?
932
+ if (@split_rules & SPLIT_FIXED) == SPLIT_FIXED
933
+ first, rest = split_word_to(word, size)
934
+ elsif (not rest.nil? and (rest.size > width))
935
+ first, rest = split_word_to(word, size)
936
+ end
937
+ end
938
+ else
939
+ first = word if first.nil?
940
+ end
941
+
942
+ # The word was successfully split. Does it fit?
943
+ unless first.nil?
944
+ if (rsize + first.size) < width
945
+ @split_words << SplitWord.new(word, first, rest)
946
+
947
+ rsize += first.size + 1
948
+ rline << " #{first}"
949
+ else
950
+ rest = word
951
+ end
952
+ else
953
+ rest = word unless rest.nil?
954
+ end
955
+
956
+ rnext.unshift rest
957
+ break
958
+ end
959
+ end
960
+ [ rline, rsize, rnext ]
961
+ end
962
+ private :__hyphenate
963
+
964
+ # The line must be broken. Typically, this is done by moving the last
965
+ # word on the current line to the next line. However, it may be possible
966
+ # that certain combinations of words may not be broken (see
967
+ # #nobreak_regex for more information). Therefore, it may be necessary
968
+ # to move multiple words from the current line to the next line. This
969
+ # function does this.
970
+ def __wrap_line(line, next_word)
971
+ no_break = false
972
+
973
+ word_index = line.size - 1
974
+
975
+ @nobreak_regex.each_pair do |first, second|
976
+ if line[word_index] =~ first and next_word =~ second
977
+ no_break = true
978
+ end
979
+ end
980
+
981
+ # If the last word and the next word aren't to be broken, and the line
982
+ # has more than one word in it, then we need to go back by words to
983
+ # ensure that we break as allowed.
984
+ if no_break and word_index.nonzero?
985
+ word_index -= 1
986
+
987
+ while word_index.nonzero?
988
+ no_break = false
989
+ @nobreak_regex.each_pair { |first, second|
990
+ if line[word_index] =~ first and line[word_index + 1] =~ second
991
+ no_break = true
992
+ end
993
+ }
994
+
995
+ break unless no_break
996
+ word_index -= 1
997
+ end
998
+
999
+ if word_index.nonzero?
1000
+ words = line.slice!(word_index .. -1)
1001
+ words << next_word
1002
+ end
1003
+ end
1004
+
1005
+ [line, words]
1006
+ end
1007
+ private :__wrap_line
1008
+
1009
+ # Create a Text::Format object. Accepts an optional hash of construction
1010
+ # options (this will be changed to named paramters in Ruby 2.0). After
1011
+ # the initial object is constructed (with either the provided or default
1012
+ # values), the object will be yielded (as +self+) to an optional block
1013
+ # for further construction and operation.
1014
+ def initialize(options = {}) #:yields self:
1015
+ @text = options[:text] || []
1016
+ @columns = options[:columns] || 72
1017
+ @tabstop = options[:tabstop] || 8
1018
+ @first_indent = options[:first_indent] || 4
1019
+ @body_indent = options[:body_indent] || 0
1020
+ @format_style = options[:format_style] || LEFT_ALIGN
1021
+ @left_margin = options[:left_margin] || 0
1022
+ @right_margin = options[:right_margin] || 0
1023
+ @extra_space = options[:extra_space] || false
1024
+ @tag_paragraph = options[:tag_paragraph] || false
1025
+ @tag_text = options[:tag_text] || []
1026
+ @abbreviations = options[:abbreviations] || []
1027
+ @terminal_punctuation = options[:terminal_punctuation] || ""
1028
+ @terminal_quotes = options[:terminal_quotes] || ""
1029
+ @nobreak = options[:nobreak] || false
1030
+ @nobreak_regex = options[:nobreak_regex] || {}
1031
+ @hard_margins = options[:hard_margins] || false
1032
+ @split_rules = options[:split_rules] || SPLIT_FIXED
1033
+ @hyphenator = options[:hyphenator] || self
1034
+
1035
+ @hyphenator_arity = @hyphenator.method(:hyphenate_to).arity
1036
+ @tag_cur = ""
1037
+ @split_words = []
1038
+
1039
+ yield self if block_given?
1040
+ end
1041
+
1042
+ end # class Format
1043
+
1044
+ end # module Text