resumetools 0.2.7.0 → 0.2.7.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1044 @@
1
+ # :title: Text::Format
2
+ # :main: Text::Format
3
+ #--
4
+ # Text::Format for Ruby
5
+ # Version 1.0.0
6
+ #
7
+ # Copyright (c) 2002 - 2005 Austin Ziegler
8
+ #
9
+ # $Id: format.rb,v 1.5 2005/04/20 01:43:55 austin Exp $
10
+ #++
11
+
12
+ # = Introduction
13
+ #
14
+ # Text::Format provides the ability to nicely format fixed-width text with
15
+ # knowledge of the writeable space (number of columns), margins, and
16
+ # indentation settings.
17
+ #
18
+ # Copyright:: Copyright (c) 2002 - 2005 by Austin Ziegler
19
+ # Version:: 1.0.0
20
+ # Based On:: Perl
21
+ # Text::Format[http://search.cpan.org/author/GABOR/Text-Format0.52/lib/Text/Format.pm],
22
+ # Copyright (c) 1998 G�bor Egressy
23
+ # Licence:: Ruby's, Perl Artistic, or GPL version 2 (or later)
24
+ #
25
+ module Text
26
+ class Format
27
+ VERSION = '1.0.0'
28
+
29
+ SPACES_RE = %r{\s+}mo.freeze
30
+ NEWLINE_RE = %r{\n}o.freeze
31
+ TAB = "\t".freeze
32
+ NEWLINE = "\n".freeze
33
+
34
+ # Global common English abbreviations. More can be added with
35
+ # #abbreviations.
36
+ ABBREV = %w(Mr Mrs Ms Jr Sr Dr)
37
+
38
+ # Formats text flush to the left margin with a visual and physical
39
+ # ragged right margin.
40
+ #
41
+ # >A paragraph that is<
42
+ # >left aligned.<
43
+ LEFT_ALIGN = :left
44
+ # Formats text flush to the right margin with a visual ragged left
45
+ # margin. The actual left margin is padded with spaces from the
46
+ # beginning of the line to the start of the text such that the right
47
+ # margin will be flush.
48
+ #
49
+ # >A paragraph that is<
50
+ # > right aligned.<
51
+ RIGHT_ALIGN = :right
52
+ # Formats text flush to the left margin with a visual ragged right
53
+ # margin. The line is padded with spaces from the end of the text to the
54
+ # right margin.
55
+ #
56
+ # >A paragraph that is<
57
+ # >right filled. <
58
+ RIGHT_FILL = :fill
59
+ # Formats the text flush to both the left and right margins. The last
60
+ # line will not be justified if it consists of a single word (it will be
61
+ # treated as +RIGHT_FILL+ in this case). Spacing between words is
62
+ # increased to ensure that the textg is flush with both margins.
63
+ #
64
+ # |A paragraph that|
65
+ # |is justified.|
66
+ #
67
+ # |A paragraph that is|
68
+ # |justified. |
69
+ JUSTIFY = :justify
70
+
71
+ # When #hard_margins is enabled, a word that extends over the right
72
+ # margin will be split at the number of characters needed. This is
73
+ # similar to how characters wrap on a terminal. This is the default
74
+ # split mechanism when #hard_margins is enabled.
75
+ #
76
+ # repre
77
+ # senta
78
+ # ion
79
+ SPLIT_FIXED = 1
80
+ # When #hard_margins is enabled, a word that extends over the right
81
+ # margin will be split at one less than the number of characters needed
82
+ # with a C-style continuation character (\). If the word cannot be split
83
+ # using the rules of SPLIT_CONTINUATION, and the word will not fit
84
+ # wholly into the next line, then SPLIT_FIXED will be used.
85
+ #
86
+ # repr\
87
+ # esen\
88
+ # tati\
89
+ # on
90
+ SPLIT_CONTINUATION = 2
91
+ # When #hard_margins is enabled, a word that extends over the right
92
+ # margin will be split according to the hyphenator specified by the
93
+ # #hyphenator object; if there is no hyphenation library supplied, then
94
+ # the hyphenator of Text::Format itself is used, which is the same as
95
+ # SPLIT_CONTINUATION. See #hyphenator for more information about
96
+ # hyphenation libraries. The example below is valid with either
97
+ # TeX::Hyphen or Text::Hyphen. If the word cannot be split using the
98
+ # hyphenator's rules, and the word will not fit wholly into the next
99
+ # line, then SPLIT_FIXED will be used.
100
+ #
101
+ # rep-
102
+ # re-
103
+ # sen-
104
+ # ta-
105
+ # tion
106
+ #
107
+ SPLIT_HYPHENATION = 4
108
+ # When #hard_margins is enabled, a word that extends over the right
109
+ # margin will be split at one less than the number of characters needed
110
+ # with a C-style continuation character (\). If the word cannot be split
111
+ # using the rules of SPLIT_CONTINUATION, then SPLIT_FIXED will be used.
112
+ SPLIT_CONTINUATION_FIXED = SPLIT_CONTINUATION | SPLIT_FIXED
113
+ # When #hard_margins is enabled, a word that extends over the right
114
+ # margin will be split according to the hyphenator specified by the
115
+ # #hyphenator object; if there is no hyphenation library supplied, then
116
+ # the hyphenator of Text::Format itself is used, which is the same as
117
+ # SPLIT_CONTINUATION. See #hyphenator for more information about
118
+ # hyphenation libraries. The example below is valid with either
119
+ # TeX::Hyphen or Text::Hyphen. If the word cannot be split using the
120
+ # hyphenator's rules, then SPLIT_FIXED will be used.
121
+ SPLIT_HYPHENATION_FIXED = SPLIT_HYPHENATION | SPLIT_FIXED
122
+ # Attempts to split words according to the rules of the supplied
123
+ # hyphenator (e.g., SPLIT_HYPHENATION); if the word cannot be split
124
+ # using these rules, then the rules of SPLIT_CONTINUATION will be
125
+ # followed. In all cases, if the word cannot be split using either
126
+ # SPLIT_HYPHENATION or SPLIT_CONTINUATION, and the word will not fit
127
+ # wholly into the next line, then SPLIT_FIXED will be used.
128
+ SPLIT_HYPHENATION_CONTINUATION = SPLIT_HYPHENATION | SPLIT_CONTINUATION
129
+ # Attempts to split words according to the rules of the supplied
130
+ # hyphenator (e.g., SPLIT_HYPHENATION); if the word cannot be split
131
+ # using these rules, then the rules of SPLIT_CONTINUATION will be
132
+ # followed. In all cases, if the word cannot be split using either
133
+ # SPLIT_HYPHENATION or SPLIT_CONTINUATION, then SPLIT_FIXED will be
134
+ # used.
135
+ SPLIT_ALL = SPLIT_HYPHENATION | SPLIT_CONTINUATION | SPLIT_FIXED
136
+
137
+ # Words forcibly split by Text::Format will be stored as split words.
138
+ # This class represents a word forcibly split.
139
+ class SplitWord
140
+ # The word that was split.
141
+ attr_reader :word
142
+ # The first part of the word that was split.
143
+ attr_reader :first
144
+ # The remainder of the word that was split.
145
+ attr_reader :rest
146
+
147
+ def initialize(word, first, rest)
148
+ @word = word
149
+ @first = first
150
+ @rest = rest
151
+ end
152
+ end
153
+
154
+ # Indicates punctuation characters that terminates a sentence, as some
155
+ # English typesetting rules indicate that sentences should be followed
156
+ # by two spaces. This is an archaic rule, but is supported with
157
+ # #extra_space. This is the default set of terminal punctuation
158
+ # characters. Additional terminal punctuation may be added to the
159
+ # formatting object through #terminal_punctuation.
160
+ TERMINAL_PUNCTUATION = %q(.?!)
161
+ # Indicates quote characters that may follow terminal punctuation under
162
+ # the current formatting rules. This satisfies the English formatting
163
+ # rule that indicates that sentences terminated inside of quotes should
164
+ # have the punctuation inside of the quoted text, not outside of the
165
+ # terminal quote. Additional terminal quotes may be added to the
166
+ # formatting object through #terminal_quotes. See TERMINAL_PUNCTUATION
167
+ # for more information.
168
+ TERMINAL_QUOTES = %q('")
169
+
170
+ # This method returns the regular expression used to detect the end of a
171
+ # sentence under the current definition of TERMINAL_PUNCTUATION,
172
+ # #terminal_punctuation, TERMINAL_QUOTES, and #terminal_quotes.
173
+ def __sentence_end_re
174
+ %r{[#{TERMINAL_PUNCTUATION}#{self.terminal_punctuation}][#{TERMINAL_QUOTES}#{self.terminal_quotes}]?$}
175
+ end
176
+ private :__sentence_end_re
177
+
178
+ # Returns a regular expression for a set of characters (at least one
179
+ # non-whitespace followed by at least one space) of the specified size
180
+ # followed by one or more of any character.
181
+ RE_BREAK_SIZE = lambda { |size| %r[((?:\S+\s+){#{size}})(.+)] }
182
+
183
+ # Compares the formatting rules, excepting #hyphenator, of two
184
+ # Text::Format objects. Generated results (e.g., #split_words) are not
185
+ # compared.
186
+ def ==(o)
187
+ (@text == o.text) and
188
+ (@columns == o.columns) and
189
+ (@left_margin == o.left_margin) and
190
+ (@right_margin == o.right_margin) and
191
+ (@hard_margins == o.hard_margins) and
192
+ (@split_rules == o.split_rules) and
193
+ (@first_indent == o.first_indent) and
194
+ (@body_indent == o.body_indent) and
195
+ (@tag_text == o.tag_text) and
196
+ (@tabstop == o.tabstop) and
197
+ (@format_style == o.format_style) and
198
+ (@extra_space == o.extra_space) and
199
+ (@tag_paragraph == o.tag_paragraph) and
200
+ (@nobreak == o.nobreak) and
201
+ (@terminal_punctuation == o.terminal_punctuation) and
202
+ (@terminal_quotes == o.terminal_quotes) and
203
+ (@abbreviations == o.abbreviations) and
204
+ (@nobreak_regex == o.nobreak_regex)
205
+ end
206
+
207
+ # The default text to be manipulated. Note that value is optional, but
208
+ # if the formatting functions are called without values, this text is
209
+ # what will be formatted.
210
+ #
211
+ # *Default*:: <tt>[]</tt>
212
+ # <b>Used in</b>:: All methods
213
+ attr_accessor :text
214
+
215
+ # The total width of the format area. The margins, indentation, and text
216
+ # are formatted into this space. Any value provided is silently
217
+ # converted to a positive integer.
218
+ #
219
+ # COLUMNS
220
+ # <-------------------------------------------------------------->
221
+ # <-----------><------><---------------------------><------------>
222
+ # left margin indent text is formatted into here right margin
223
+ #
224
+ # *Default*:: <tt>72</tt>
225
+ # <b>Used in</b>:: #format, #paragraphs, #center
226
+ attr_accessor :columns
227
+ def columns=(col) #:nodoc:
228
+ @columns = col.to_i.abs
229
+ end
230
+
231
+ # The number of spaces used for the left margin. The value provided is
232
+ # silently converted to a positive integer value.
233
+ #
234
+ # columns
235
+ # <-------------------------------------------------------------->
236
+ # <-----------><------><---------------------------><------------>
237
+ # LEFT MARGIN indent text is formatted into here right margin
238
+ #
239
+ # *Default*:: <tt>0</tt>
240
+ # <b>Used in</b>:: #format, #paragraphs, #center
241
+ attr_accessor :left_margin
242
+ def left_margin=(left) #:nodoc:
243
+ @left_margin = left.to_i.abs
244
+ end
245
+
246
+ # The number of spaces used for the right margin. The value provided is
247
+ # silently converted to a positive integer value.
248
+ #
249
+ # columns
250
+ # <-------------------------------------------------------------->
251
+ # <-----------><------><---------------------------><------------>
252
+ # left margin indent text is formatted into here RIGHT MARGIN
253
+ #
254
+ # *Default*:: <tt>0</tt>
255
+ # <b>Used in</b>:: #format, #paragraphs, #center
256
+ attr_accessor :right_margin
257
+ def right_margin=(right) #:nodoc:
258
+ @right_margin = right.to_i.abs
259
+ end
260
+
261
+ # The number of spaces to indent the first line of a paragraph. The
262
+ # value provided is silently converted to a positive integer value.
263
+ #
264
+ # columns
265
+ # <-------------------------------------------------------------->
266
+ # <-----------><------><---------------------------><------------>
267
+ # left margin INDENT text is formatted into here right margin
268
+ #
269
+ # *Default*:: <tt>4</tt>
270
+ # <b>Used in</b>:: #format, #paragraphs
271
+ attr_accessor :first_indent
272
+ def first_indent=(first) #:nodoc:
273
+ @first_indent = first.to_i.abs
274
+ end
275
+
276
+ # The number of spaces to indent all lines after the first line of a
277
+ # paragraph. The value provided is silently converted to a positive
278
+ # integer value.
279
+ #
280
+ # columns
281
+ # <-------------------------------------------------------------->
282
+ # <-----------><------><---------------------------><------------>
283
+ # left margin INDENT text is formatted into here right margin
284
+ #
285
+ # *Default*:: <tt>0</tt>
286
+ # <b>Used in</b>:: #format, #paragraphs
287
+ attr_accessor :body_indent
288
+ def body_indent=(body) #:nodoc:
289
+ @body_indent = body.to_i.abs
290
+ end
291
+
292
+ # Normally, words larger than the format area will be placed on a line
293
+ # by themselves. Setting this value to +true+ will force words larger
294
+ # than the format area to be split into one or more "words" each at most
295
+ # the size of the format area. The first line and the original word will
296
+ # be placed into #split_words. Note that this will cause the output to
297
+ # look *similar* to a #format_style of JUSTIFY. (Lines will be filled as
298
+ # much as possible.)
299
+ #
300
+ # *Default*:: +false+
301
+ # <b>Used in</b>:: #format, #paragraphs
302
+ attr_accessor :hard_margins
303
+
304
+ # An array of words split during formatting if #hard_margins is set to
305
+ # +true+.
306
+ # #split_words << Text::Format::SplitWord.new(word, first, rest)
307
+ attr_reader :split_words
308
+
309
+ # The object responsible for hyphenating. It must respond to
310
+ # #hyphenate_to(word, size) or #hyphenate_to(word, size, formatter) and
311
+ # return an array of the word split into two parts (e.g., <tt>[part1,
312
+ # part2]</tt>; if there is a hyphenation mark to be applied,
313
+ # responsibility belongs to the hyphenator object. The size is the
314
+ # MAXIMUM size permitted, including any hyphenation marks.
315
+ #
316
+ # If the #hyphenate_to method has an arity of 3, the current formatter
317
+ # (+self+) will be provided to the method. This allows the hyphenator to
318
+ # make decisions about the hyphenation based on the formatting rules.
319
+ #
320
+ # #hyphenate_to should return <tt>[nil, word]</tt> if the word cannot be
321
+ # hyphenated.
322
+ #
323
+ # *Default*:: +self+ (SPLIT_CONTINUATION)
324
+ # <b>Used in</b>:: #format, #paragraphs
325
+ attr_accessor :hyphenator
326
+ def hyphenator=(h) #:nodoc:
327
+ h ||= self
328
+
329
+ raise ArgumentError, "#{h.inspect} is not a valid hyphenator." unless h.respond_to?(:hyphenate_to)
330
+ arity = h.method(:hyphenate_to).arity
331
+ raise ArgumentError, "#{h.inspect} must have exactly two or three arguments." unless arity.between?(2, 3)
332
+
333
+ @hyphenator = h
334
+ @hyphenator_arity = arity
335
+ end
336
+
337
+ # Specifies the split mode; used only when #hard_margins is set to
338
+ # +true+. Allowable values are:
339
+ #
340
+ # * +SPLIT_FIXED+
341
+ # * +SPLIT_CONTINUATION+
342
+ # * +SPLIT_HYPHENATION+
343
+ # * +SPLIT_CONTINUATION_FIXED+
344
+ # * +SPLIT_HYPHENATION_FIXED+
345
+ # * +SPLIT_HYPHENATION_CONTINUATION+
346
+ # * +SPLIT_ALL+
347
+ #
348
+ # *Default*:: <tt>Text::Format::SPLIT_FIXED</tt>
349
+ # <b>Used in</b>:: #format, #paragraphs
350
+ attr_accessor :split_rules
351
+ def split_rules=(s) #:nodoc:
352
+ raise ArgumentError, "Invalid value provided for #split_rules." if ((s < SPLIT_FIXED) or (s > SPLIT_ALL))
353
+ @split_rules = s
354
+ end
355
+
356
+ # Indicates whether sentence terminators should be followed by a single
357
+ # space (+false+), or two spaces (+true+). See #abbreviations for more
358
+ # information.
359
+ #
360
+ # *Default*:: +false+
361
+ # <b>Used in</b>:: #format, #paragraphs
362
+ attr_accessor :extra_space
363
+
364
+ # Defines the current abbreviations as an array. This is only used if
365
+ # extra_space is turned on.
366
+ #
367
+ # If one is abbreviating "President" as "Pres." (abbreviations =
368
+ # ["Pres"]), then the results of formatting will be as illustrated in
369
+ # the table below:
370
+ #
371
+ # abbreviations
372
+ # extra_space | #include?("Pres") | not #include?("Pres")
373
+ # ------------+-------------------+----------------------
374
+ # true | Pres. Lincoln | Pres. Lincoln
375
+ # false | Pres. Lincoln | Pres. Lincoln
376
+ # ------------+-------------------+----------------------
377
+ # extra_space | #include?("Mrs") | not #include?("Mrs")
378
+ # true | Mrs. Lincoln | Mrs. Lincoln
379
+ # false | Mrs. Lincoln | Mrs. Lincoln
380
+ #
381
+ # Note that abbreviations should not have the terminal period as part of
382
+ # their definitions.
383
+ #
384
+ # This automatic abbreviation handling *will* cause some issues with
385
+ # uncommon sentence structures. The two sentences below will not be
386
+ # formatted correctly:
387
+ #
388
+ # You're in trouble now, Mr.
389
+ # Just wait until your father gets home.
390
+ #
391
+ # Under no circumstances (because Mr is a predefined abbreviation) will
392
+ # this ever be separated by two spaces.
393
+ #
394
+ # *Default*:: <tt>[]</tt>
395
+ # <b>Used in</b>:: #format, #paragraphs
396
+ attr_accessor :abbreviations
397
+
398
+ # Specifies additional punctuation characters that terminate a sentence,
399
+ # as some English typesetting rules indicate that sentences should be
400
+ # followed by two spaces. This is an archaic rule, but is supported with
401
+ # #extra_space. This is added to the default set of terminal punctuation
402
+ # defined in TERMINAL_PUNCTUATION.
403
+ #
404
+ # *Default*:: <tt>""</tt>
405
+ # <b>Used in</b>:: #format, #paragraphs
406
+ attr_accessor :terminal_punctuation
407
+ # Specifies additional quote characters that may follow
408
+ # terminal punctuation under the current formatting rules. This
409
+ # satisfies the English formatting rule that indicates that sentences
410
+ # terminated inside of quotes should have the punctuation inside of the
411
+ # quoted text, not outside of the terminal quote. This is added to the
412
+ # default set of terminal quotes defined in TERMINAL_QUOTES.
413
+ #
414
+ # *Default*:: <tt>""</tt>
415
+ # <b>Used in</b>:: #format, #paragraphs
416
+ attr_accessor :terminal_quotes
417
+
418
+ # Indicates whether the formatting of paragraphs should be done with
419
+ # tagged paragraphs. Useful only with #tag_text.
420
+ #
421
+ # *Default*:: +false+
422
+ # <b>Used in</b>:: #format, #paragraphs
423
+ attr_accessor :tag_paragraph
424
+
425
+ # The text to be placed before each paragraph when #tag_paragraph is
426
+ # +true+. When #format is called, only the first element (#tag_text[0])
427
+ # is used. When #paragraphs is called, then each successive element
428
+ # (#tag_text[n]) will be used once, with corresponding paragraphs. If
429
+ # the tag elements are exhausted before the text is exhausted, then the
430
+ # remaining paragraphs will not be tagged. Regardless of indentation
431
+ # settings, a blank line will be inserted between all paragraphs when
432
+ # #tag_paragraph is +true+.
433
+ #
434
+ # The Text::Format package provides three number generators,
435
+ # Text::Format::Alpha, Text::Format::Number, and Text::Format::Roman to
436
+ # assist with the numbering of paragraphs.
437
+ #
438
+ # *Default*:: <tt>[]</tt>
439
+ # <b>Used in</b>:: #format, #paragraphs
440
+ attr_accessor :tag_text
441
+
442
+ # Indicates whether or not the non-breaking space feature should be
443
+ # used.
444
+ #
445
+ # *Default*:: +false+
446
+ # <b>Used in</b>:: #format, #paragraphs
447
+ attr_accessor :nobreak
448
+
449
+ # A hash which holds the regular expressions on which spaces should not
450
+ # be broken. The hash is set up such that the key is the first word and
451
+ # the value is the second word.
452
+ #
453
+ # For example, if +nobreak_regex+ contains the following hash:
454
+ #
455
+ # { %r{Mrs?\.?} => %r{\S+}, %r{\S+} => %r{(?:[SJ])r\.?} }
456
+ #
457
+ # Then "Mr. Jones", "Mrs Jones", and "Jones Jr." would not be broken. If
458
+ # this simple matching algorithm indicates that there should not be a
459
+ # break at the current end of line, then a backtrack is done until there
460
+ # are two words on which line breaking is permitted. If two such words
461
+ # are not found, then the end of the line will be broken *regardless*.
462
+ # If there is a single word on the current line, then no backtrack is
463
+ # done and the word is stuck on the end.
464
+ #
465
+ # *Default*:: <tt>{}</tt>
466
+ # <b>Used in</b>:: #format, #paragraphs
467
+ attr_accessor :nobreak_regex
468
+
469
+ # Indicates the number of spaces that a single tab represents. Any value
470
+ # provided is silently converted to a positive integer.
471
+ #
472
+ # *Default*:: <tt>8</tt>
473
+ # <b>Used in</b>:: #expand, #unexpand,
474
+ # #paragraphs
475
+ attr_accessor :tabstop
476
+ def tabstop=(tabs) #:nodoc:
477
+ @tabstop = tabs.to_i.abs
478
+ end
479
+
480
+ # Specifies the format style. Allowable values are:
481
+ # *+LEFT_ALIGN+
482
+ # *+RIGHT_ALIGN+
483
+ # *+RIGHT_FILL+
484
+ # *+JUSTIFY+
485
+ #
486
+ # *Default*:: <tt>Text::Format::LEFT_ALIGN</tt>
487
+ # <b>Used in</b>:: #format, #paragraphs
488
+ attr_accessor :format_style
489
+ def format_style=(fs) #:nodoc:
490
+ raise ArgumentError, "Invalid value provided for format_style." unless [LEFT_ALIGN, RIGHT_ALIGN, RIGHT_FILL, JUSTIFY].include?(fs)
491
+ @format_style = fs
492
+ end
493
+
494
+ # Indicates that the format style is left alignment.
495
+ #
496
+ # *Default*:: +true+
497
+ # <b>Used in</b>:: #format, #paragraphs
498
+ def left_align?
499
+ @format_style == LEFT_ALIGN
500
+ end
501
+
502
+ # Indicates that the format style is right alignment.
503
+ #
504
+ # *Default*:: +false+
505
+ # <b>Used in</b>:: #format, #paragraphs
506
+ def right_align?
507
+ @format_style == RIGHT_ALIGN
508
+ end
509
+
510
+ # Indicates that the format style is right fill.
511
+ #
512
+ # *Default*:: +false+
513
+ # <b>Used in</b>:: #format, #paragraphs
514
+ def right_fill?
515
+ @format_style == RIGHT_FILL
516
+ end
517
+
518
+ # Indicates that the format style is full justification.
519
+ #
520
+ # *Default*:: +false+
521
+ # <b>Used in</b>:: #format, #paragraphs
522
+ def justify?
523
+ @format_style == JUSTIFY
524
+ end
525
+
526
+ # The formatting object itself can be used as a #hyphenator, where the
527
+ # default implementation of #hyphenate_to implements the conditions
528
+ # necessary to properly produce SPLIT_CONTINUATION.
529
+ def hyphenate_to(word, size)
530
+ if (size - 2) < 0
531
+ [nil, word]
532
+ else
533
+ [word[0 .. (size - 2)] + "\\", word[(size - 1) .. -1]]
534
+ end
535
+ end
536
+
537
+ # Splits the provided word so that it is in two parts, <tt>word[0 ..
538
+ # (size - 1)]</tt> and <tt>word[size .. -1]</tt>.
539
+ def split_word_to(word, size)
540
+ [word[0 .. (size - 1)], word[size .. -1]]
541
+ end
542
+
543
+ # Formats text into a nice paragraph format. The text is separated into
544
+ # words and then reassembled a word at a time using the settings of this
545
+ # Format object.
546
+ #
547
+ # If +text+ is +nil+, then the value of #text will be worked on.
548
+ def format_one_paragraph(text = nil)
549
+ text ||= @text
550
+ text = text[0] if text.kind_of?(Array)
551
+
552
+ # Convert the provided paragraph to a list of words.
553
+ words = text.split(SPACES_RE).reverse.reject { |ww| ww.nil? or ww.empty? }
554
+
555
+ text = []
556
+
557
+ # Find the maximum line width and the initial indent string.
558
+ # TODO 20050114 - allow the left and right margins to be specified as
559
+ # strings. If they are strings, then we need to use the sizes of the
560
+ # strings. Also: allow the indent string to be set manually and
561
+ # indicate whether the indent string will have a following space.
562
+ max_line_width = @columns - @first_indent - @left_margin - @right_margin
563
+ indent_str = ' ' * @first_indent
564
+
565
+ first_line = true
566
+
567
+ if words.empty?
568
+ line = []
569
+ line_size = 0
570
+ extra_space = false
571
+ else
572
+ line = [ words.pop ]
573
+ line_size = line[-1].size
574
+ extra_space = __add_extra_space?(line[-1])
575
+ end
576
+
577
+ while next_word = words.pop
578
+ next_word.strip! unless next_word.nil?
579
+ new_line_size = (next_word.size + line_size) + 1
580
+
581
+ if extra_space
582
+ if (line[-1] !~ __sentence_end_re)
583
+ extra_space = false
584
+ end
585
+ end
586
+
587
+ # Increase the width of the new line if there's a sentence
588
+ # terminator and we are applying extra_space.
589
+ new_line_size += 1 if extra_space
590
+
591
+ # Will the word fit onto the current line? If so, simply append it
592
+ # to the end of the line.
593
+
594
+ if new_line_size <= max_line_width
595
+ if line.empty?
596
+ line << next_word
597
+ else
598
+ if extra_space
599
+ line << " #{next_word}"
600
+ else
601
+ line << " #{next_word}"
602
+ end
603
+ end
604
+ else
605
+ # Forcibly wrap the line if nonbreaking spaces are turned on and
606
+ # there is a condition where words must be wrapped. If we have
607
+ # returned more than one word, readjust the word list.
608
+ line, next_word = __wrap_line(line, next_word) if @nobreak
609
+ if next_word.kind_of?(Array)
610
+ if next_word.size > 1
611
+ words.push(*(next_word.reverse))
612
+ next_word = words.pop
613
+ else
614
+ next_word = next_word[0]
615
+ end
616
+ next_word.strip! unless next_word.nil?
617
+ end
618
+
619
+ # Check to see if the line needs to be hyphenated. If a word has a
620
+ # hyphen in it (e.g., "fixed-width"), then we can ALWAYS wrap at
621
+ # that hyphenation, even if #hard_margins is not turned on. More
622
+ # elaborate forms of hyphenation will only be performed if
623
+ # #hard_margins is turned on. If we have returned more than one
624
+ # word, readjust the word list.
625
+ line, new_line_size, next_word = __hyphenate(line, line_size, next_word, max_line_width)
626
+ if next_word.kind_of?(Array)
627
+ if next_word.size > 1
628
+ words.push(*(next_word.reverse))
629
+ next_word = words.pop
630
+ else
631
+ next_word = next_word[0]
632
+ end
633
+ next_word.strip! unless next_word.nil?
634
+ end
635
+
636
+ text << __make_line(line, indent_str, max_line_width, next_word.nil?) unless line.nil?
637
+
638
+ if first_line
639
+ first_line = false
640
+ max_line_width = @columns - @body_indent - @left_margin - @right_margin
641
+ indent_str = ' ' * @body_indent
642
+ end
643
+
644
+ if next_word.nil?
645
+ line = []
646
+ new_line_size = 0
647
+ else
648
+ line = [ next_word ]
649
+ new_line_size = next_word.size
650
+ end
651
+ end
652
+
653
+ line_size = new_line_size
654
+ extra_space = __add_extra_space?(next_word) unless next_word.nil?
655
+ end
656
+
657
+ loop do
658
+ break if line.nil? or line.empty?
659
+ line, line_size, ww = __hyphenate(line, line_size, ww, max_line_width)#if @hard_margins
660
+ text << __make_line(line, indent_str, max_line_width, ww.nil?)
661
+ line = ww
662
+ ww = nil
663
+ end
664
+
665
+ if (@tag_paragraph and (not text.empty?))
666
+ if @tag_cur.nil? or @tag_cur.empty?
667
+ @tag_cur = @tag_text[0]
668
+ end
669
+
670
+ fchar = /(\S)/o.match(text[0])[1]
671
+ white = text[0].index(fchar)
672
+
673
+ unless @tag_cur.nil?
674
+ if ((white - @left_margin - 1) > @tag_cur.size) then
675
+ white = @tag_cur.size + @left_margin
676
+ text[0].gsub!(/^ {#{white}}/, "#{' ' * @left_margin}#{@tag_cur}")
677
+ else
678
+ text.unshift("#{' ' * @left_margin}#{@tag_cur}\n")
679
+ end
680
+ end
681
+ end
682
+
683
+ text.join('')
684
+ end
685
+ alias format format_one_paragraph
686
+
687
+ # Considers each element of text (provided or internal) as a paragraph.
688
+ # If #first_indent is the same as #body_indent, then paragraphs will be
689
+ # separated by a single empty line in the result; otherwise, the
690
+ # paragraphs will follow immediately after each other. Uses #format to
691
+ # do the heavy lifting.
692
+ #
693
+ # If +to_wrap+ responds to #split, then it will be split into an array
694
+ # of elements by calling #split with the value of +split_on+. The
695
+ # default value of split_on is $/, or the default record separator,
696
+ # repeated twice (e.g., /\n\n/).
697
+ def paragraphs(to_wrap = nil, split_on = /(#{$/}){2}/o)
698
+ to_wrap = @text if to_wrap.nil?
699
+ if to_wrap.respond_to?(:split)
700
+ to_wrap = to_wrap.split(split_on)
701
+ else
702
+ to_wrap = [to_wrap].flatten
703
+ end
704
+
705
+ if ((@first_indent == @body_indent) or @tag_paragraph) then
706
+ p_end = NEWLINE
707
+ else
708
+ p_end = ''
709
+ end
710
+
711
+ cnt = 0
712
+ ret = []
713
+ to_wrap.each do |tw|
714
+ @tag_cur = @tag_text[cnt] if @tag_paragraph
715
+ @tag_cur = '' if @tag_cur.nil?
716
+ line = format(tw)
717
+ ret << "#{line}#{p_end}" if (not line.nil?) and (line.size > 0)
718
+ cnt += 1
719
+ end
720
+
721
+ ret[-1].chomp! unless ret.empty?
722
+ ret.join('')
723
+ end
724
+
725
+ # Centers the text, preserving empty lines and tabs.
726
+ def center(to_center = nil)
727
+ to_center = @text if to_center.nil?
728
+ to_center = [to_center].flatten
729
+
730
+ tabs = 0
731
+ width = @columns - @left_margin - @right_margin
732
+ centered = []
733
+ to_center.each do |tc|
734
+ s = tc.strip
735
+ tabs = s.count(TAB)
736
+ tabs = 0 if tabs.nil?
737
+ ct = ((width - s.size - (tabs * @tabstop) + tabs) / 2)
738
+ ct = (width - @left_margin - @right_margin) - ct
739
+ centered << "#{s.rjust(ct)}\n"
740
+ end
741
+ centered.join('')
742
+ end
743
+
744
+ # Replaces all tab characters in the text with #tabstop spaces.
745
+ def expand(to_expand = nil)
746
+ to_expand = @text if to_expand.nil?
747
+
748
+ tmp = ' ' * @tabstop
749
+ changer = lambda do |text|
750
+ res = text.split(NEWLINE_RE)
751
+ res.collect! { |ln| ln.gsub!(/\t/o, tmp) }
752
+ res.join(NEWLINE)
753
+ end
754
+
755
+ if to_expand.kind_of?(Array)
756
+ to_expand.collect { |te| changer[te] }
757
+ else
758
+ changer[to_expand]
759
+ end
760
+ end
761
+
762
+ # Replaces all occurrences of #tabstop consecutive spaces with a tab
763
+ # character.
764
+ def unexpand(to_unexpand = nil)
765
+ to_unexpand = @text if to_unexpand.nil?
766
+
767
+ tmp = / {#{@tabstop}}/
768
+ changer = lambda do |text|
769
+ res = text.split(NEWLINE_RE)
770
+ res.collect! { |ln| ln.gsub!(tmp, TAB) }
771
+ res.join(NEWLINE)
772
+ end
773
+
774
+ if to_unexpand.kind_of?(Array)
775
+ to_unexpand.collect { |tu| changer[tu] }
776
+ else
777
+ changer[to_unexpand]
778
+ end
779
+ end
780
+
781
+ # Return +true+ if the word may have an extra space added after it. This
782
+ # will only be the case if #extra_space is +true+ and the word is not an
783
+ # abbreviation.
784
+ def __add_extra_space?(word)
785
+ return false unless @extra_space
786
+ word = word.gsub(/\.$/o, '') unless word.nil?
787
+ return false if ABBREV.include?(word)
788
+ return false if @abbreviations.include?(word)
789
+ true
790
+ end
791
+ private :__add_extra_space?
792
+
793
+ def __make_line(line, indent, width, last = false) #:nodoc:
794
+ line_size = line.inject(0) { |ls, el| ls + el.size }
795
+ lmargin = " " * @left_margin
796
+ fill = " " * (width - line_size) if right_fill? and (line_size <= width)
797
+
798
+ unless last
799
+ if justify? and (line.size > 1)
800
+ spaces = width - line_size
801
+ word_spaces = spaces / (line.size / 2)
802
+ spaces = spaces % (line.size / 2) if word_spaces > 0
803
+ line.reverse.each do |word|
804
+ next if (word =~ /^\S/o)
805
+
806
+ word.sub!(/^/o, " " * word_spaces)
807
+
808
+ next unless (spaces > 0)
809
+
810
+ word.sub!(/^/o, " ")
811
+ spaces -= 1
812
+ end
813
+ end
814
+ end
815
+
816
+ line = "#{lmargin}#{indent}#{line.join('')}#{fill}\n" unless line.empty?
817
+
818
+ if right_align? and (not line.nil?)
819
+ line.sub(/^/o, " " * (@columns - @right_margin - (line.size - 1)))
820
+ else
821
+ line
822
+ end
823
+ end
824
+ # private :__make_line
825
+
826
+ def __hyphenate(line, line_size, next_word, width) #:nodoc:
827
+ return [ line, line_size, next_word ] if line.nil? or line.empty?
828
+ rline = line.dup
829
+ rsize = line_size
830
+
831
+ rnext = []
832
+ rnext << next_word.dup unless next_word.nil?
833
+
834
+ loop do
835
+ break if rnext.nil? or rline.nil?
836
+
837
+ if rsize == width
838
+ break
839
+ elsif rsize > width
840
+ word = rline.pop
841
+ size = width - rsize + word.size
842
+
843
+ if (size < 1)
844
+ rnext.unshift word
845
+ next
846
+ end
847
+
848
+ first = rest = nil
849
+
850
+ # TODO: Add the check to see if the word contains a hyphen to
851
+ # split on automatically.
852
+ # Does the word already have a hyphen in it? If so, try to use
853
+ # that to split the word.
854
+ # if word.index('-') < size
855
+ # first = word[0 ... word.index("-")]
856
+ # rest = word[word.index("-") .. -1]
857
+ # end
858
+
859
+ if @hard_margins
860
+ if first.nil? and (@split_rules & SPLIT_HYPHENATION) == SPLIT_HYPHENATION
861
+ if @hyphenator_arity == 2
862
+ first, rest = @hyphenator.hyphenate_to(word, size)
863
+ else
864
+ first, rest = @hyphenator.hyphenate_to(word, size, self)
865
+ end
866
+ end
867
+
868
+ if first.nil? and (@split_rules & SPLIT_CONTINUATION) == SPLIT_CONTINUATION
869
+ first, rest = self.hyphenate_to(word, size)
870
+ end
871
+
872
+ if first.nil?
873
+ if (@split_rules & SPLIT_FIXED) == SPLIT_FIXED
874
+ first, rest = split_word_to(word, size)
875
+ elsif (not rest.nil? and (rest.size > size))
876
+ first, rest = split_word_to(word, size)
877
+ end
878
+ end
879
+ else
880
+ first = word if first.nil?
881
+ end
882
+
883
+ if first.nil?
884
+ rest = word
885
+ else
886
+ rsize = rsize - word.size + first.size
887
+ if rline.empty?
888
+ rline << first
889
+ else
890
+ rsize += 1
891
+ rline << " #{first}"
892
+ end
893
+ @split_words << SplitWord.new(word, first, rest)
894
+ end
895
+ rnext.unshift rest unless rest.nil?
896
+ break
897
+ else
898
+ break if rnext.empty?
899
+ word = rnext.shift.dup
900
+ size = width - rsize - 1
901
+
902
+ if (size <= 0)
903
+ rnext.unshift word
904
+ break
905
+ end
906
+
907
+ first = rest = nil
908
+
909
+ # TODO: Add the check to see if the word contains a hyphen to
910
+ # split on automatically.
911
+ # Does the word already have a hyphen in it? If so, try to use
912
+ # that to split the word.
913
+ # if word.index('-') < size
914
+ # first = word[0 ... word.index("-")]
915
+ # rest = word[word.index("-") .. -1]
916
+ # end
917
+
918
+ if @hard_margins
919
+ if (@split_rules & SPLIT_HYPHENATION) == SPLIT_HYPHENATION
920
+ if @hyphenator_arity == 2
921
+ first, rest = @hyphenator.hyphenate_to(word, size)
922
+ else
923
+ first, rest = @hyphenator.hyphenate_to(word, size, self)
924
+ end
925
+ end
926
+
927
+ if first.nil? and (@split_rules & SPLIT_CONTINUATION) == SPLIT_CONTINUATION
928
+ first, rest = self.hyphenate_to(word, size)
929
+ end
930
+
931
+ if first.nil?
932
+ if (@split_rules & SPLIT_FIXED) == SPLIT_FIXED
933
+ first, rest = split_word_to(word, size)
934
+ elsif (not rest.nil? and (rest.size > width))
935
+ first, rest = split_word_to(word, size)
936
+ end
937
+ end
938
+ else
939
+ first = word if first.nil?
940
+ end
941
+
942
+ # The word was successfully split. Does it fit?
943
+ unless first.nil?
944
+ if (rsize + first.size) < width
945
+ @split_words << SplitWord.new(word, first, rest)
946
+
947
+ rsize += first.size + 1
948
+ rline << " #{first}"
949
+ else
950
+ rest = word
951
+ end
952
+ else
953
+ rest = word unless rest.nil?
954
+ end
955
+
956
+ rnext.unshift rest
957
+ break
958
+ end
959
+ end
960
+ [ rline, rsize, rnext ]
961
+ end
962
+ private :__hyphenate
963
+
964
+ # The line must be broken. Typically, this is done by moving the last
965
+ # word on the current line to the next line. However, it may be possible
966
+ # that certain combinations of words may not be broken (see
967
+ # #nobreak_regex for more information). Therefore, it may be necessary
968
+ # to move multiple words from the current line to the next line. This
969
+ # function does this.
970
+ def __wrap_line(line, next_word)
971
+ no_break = false
972
+
973
+ word_index = line.size - 1
974
+
975
+ @nobreak_regex.each_pair do |first, second|
976
+ if line[word_index] =~ first and next_word =~ second
977
+ no_break = true
978
+ end
979
+ end
980
+
981
+ # If the last word and the next word aren't to be broken, and the line
982
+ # has more than one word in it, then we need to go back by words to
983
+ # ensure that we break as allowed.
984
+ if no_break and word_index.nonzero?
985
+ word_index -= 1
986
+
987
+ while word_index.nonzero?
988
+ no_break = false
989
+ @nobreak_regex.each_pair { |first, second|
990
+ if line[word_index] =~ first and line[word_index + 1] =~ second
991
+ no_break = true
992
+ end
993
+ }
994
+
995
+ break unless no_break
996
+ word_index -= 1
997
+ end
998
+
999
+ if word_index.nonzero?
1000
+ words = line.slice!(word_index .. -1)
1001
+ words << next_word
1002
+ end
1003
+ end
1004
+
1005
+ [line, words]
1006
+ end
1007
+ private :__wrap_line
1008
+
1009
+ # Create a Text::Format object. Accepts an optional hash of construction
1010
+ # options (this will be changed to named paramters in Ruby 2.0). After
1011
+ # the initial object is constructed (with either the provided or default
1012
+ # values), the object will be yielded (as +self+) to an optional block
1013
+ # for further construction and operation.
1014
+ def initialize(options = {}) #:yields self:
1015
+ @text = options[:text] || []
1016
+ @columns = options[:columns] || 72
1017
+ @tabstop = options[:tabstop] || 8
1018
+ @first_indent = options[:first_indent] || 4
1019
+ @body_indent = options[:body_indent] || 0
1020
+ @format_style = options[:format_style] || LEFT_ALIGN
1021
+ @left_margin = options[:left_margin] || 0
1022
+ @right_margin = options[:right_margin] || 0
1023
+ @extra_space = options[:extra_space] || false
1024
+ @tag_paragraph = options[:tag_paragraph] || false
1025
+ @tag_text = options[:tag_text] || []
1026
+ @abbreviations = options[:abbreviations] || []
1027
+ @terminal_punctuation = options[:terminal_punctuation] || ""
1028
+ @terminal_quotes = options[:terminal_quotes] || ""
1029
+ @nobreak = options[:nobreak] || false
1030
+ @nobreak_regex = options[:nobreak_regex] || {}
1031
+ @hard_margins = options[:hard_margins] || false
1032
+ @split_rules = options[:split_rules] || SPLIT_FIXED
1033
+ @hyphenator = options[:hyphenator] || self
1034
+
1035
+ @hyphenator_arity = @hyphenator.method(:hyphenate_to).arity
1036
+ @tag_cur = ""
1037
+ @split_words = []
1038
+
1039
+ yield self if block_given?
1040
+ end
1041
+
1042
+ end # class Format
1043
+
1044
+ end # module Text