plain_text 0.4 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +28 -0
- data/README.en.rdoc +52 -8
- data/bin/head.rb +27 -13
- data/bin/tail.rb +28 -12
- data/bin/yard2md_afterclean +213 -0
- data/lib/plain_text/parse_rule.rb +4 -1
- data/lib/plain_text/part.rb +103 -0
- data/lib/plain_text/split.rb +74 -0
- data/lib/plain_text/util.rb +71 -10
- data/lib/plain_text.rb +153 -28
- data/plain_text.gemspec +9 -5
- data/test/test_plain_text.rb +110 -1
- data/test/test_plain_text_part.rb +80 -0
- data/test/test_plain_text_split.rb +29 -0
- data/test/test_plain_text_util.rb +36 -0
- data/test/testhead_rb.rb +59 -4
- data/test/testtail_rb.rb +58 -8
- data/test/testyard2md_afterclean.rb +71 -0
- metadata +11 -3
data/lib/plain_text/part.rb
CHANGED
@@ -81,6 +81,7 @@ module PlainText
|
|
81
81
|
even_num: 'even number of elements must be specified.',
|
82
82
|
use_to_a: 'To handle it as an Array, use to_a first.',
|
83
83
|
}
|
84
|
+
private_constant :ERR_MSGS
|
84
85
|
|
85
86
|
# @param arin [Array] of [Paragraph1, Boundary1, Para2, Bd2, ...] or Part/Paragraph if boundaries is given
|
86
87
|
# @param boundaries [Array] of Boundary
|
@@ -283,6 +284,108 @@ module PlainText
|
|
283
284
|
map_part_core(with_index: false, **kwd, &bl)
|
284
285
|
end
|
285
286
|
|
287
|
+
# merge parts/paragraphs if they satisfy the conditions.
|
288
|
+
#
|
289
|
+
# A group of two Parts/Paragraphs and the Boundaries in between and before and after
|
290
|
+
# is passed to the block consecutively.
|
291
|
+
#
|
292
|
+
# @yield [ary, b1, b2, i] Returns true if the two paragraphs should be merged.
|
293
|
+
# @yieldparam [Array] ary of [Para1st, BoundaryBetween, Para2nd]
|
294
|
+
# @yieldparam [Boundary] b1 Boundary-String before the first part/paragraph (nil for the first one)
|
295
|
+
# @yieldparam [Boundary] b2 Boundary-String after the second part/paragraph
|
296
|
+
# @yieldparam [Integer] i Index of the first Para
|
297
|
+
# @yieldreturn [Boolean, Symbol] True if they should be merged. :abort if cancel it.
|
298
|
+
# @return [self, false] false if no pairs of parts/paragraphs are merged, else self.
|
299
|
+
def merge_para_if()
|
300
|
+
arind2del = [] # Indices to delete (both paras and boundaries)
|
301
|
+
each_index do |ei|
|
302
|
+
break if ei >= size - 3 # 2nd last paragraph or later.
|
303
|
+
next if !index_para?(ei, skip_check: true)
|
304
|
+
ar1st = self.to_a[ei..ei+2]
|
305
|
+
ar2nd = ((ei==0) ? nil : self[ei-1])
|
306
|
+
do_merge = yield(ar1st, ar2nd, self[ei+3], ei)
|
307
|
+
return false if do_merge == :abort
|
308
|
+
arind2del.push ei, ei+1, ei+2 if do_merge
|
309
|
+
end
|
310
|
+
|
311
|
+
return false if arind2del.empty?
|
312
|
+
arind2del.uniq!
|
313
|
+
|
314
|
+
(arind2ranges arind2del).reverse.each do |er|
|
315
|
+
merge_para!(er)
|
316
|
+
end
|
317
|
+
return self
|
318
|
+
end
|
319
|
+
|
320
|
+
# merge multiple paragraphs
|
321
|
+
#
|
322
|
+
# The boundaries between them are simply joined as String as they are.
|
323
|
+
#
|
324
|
+
# @overload set(index1, index2, *rest)
|
325
|
+
# With a list of indices. Unless use_para_index is true, this means the main Array index. Namely, if Part is [P0, B0, P1, B1, P2, B2, B3] and if you want to merge P1 and P2, you specify as (2,3,4) or (2,4). If use_para_index is true, specify as (1,2).
|
326
|
+
# @param index1 [Integer] the first index to merge
|
327
|
+
# @param index2 [Integer] the second index to merge, and so on...
|
328
|
+
# @overload set(range)
|
329
|
+
# With a range of the indices to merge. Unless use_para_index is true, this means the main Array index. See the first overload set about it.
|
330
|
+
# @param range [Range] describe value param
|
331
|
+
# @param use_para_index: [Boolean] If false (Default), the indices are for the main indices (alternative between Parts/Paragraphs and Boundaries, starting from Part/Paragraph). If true, the indices are as obtained with {#parts}, namely the array containing only Parts/Paragraphs.
|
332
|
+
# @return [self, nil] nil if nothing is merged (because of wrong indices).
|
333
|
+
def merge_para!(*rest, use_para_index: false)
|
334
|
+
$myd = true
|
335
|
+
#warn "DEBUG:m00: #{rest}\n"
|
336
|
+
(ranchk = build_index_range_for_merge_para!(*rest, use_para_index: use_para_index)) || (return self) # Do nothing.
|
337
|
+
# ranchk is guaranteed to have a size of 2 or greater.
|
338
|
+
#warn "DEBUG:m0: #{ranchk}\n"
|
339
|
+
self[ranchk] = [self[ranchk].to_a[0..-2].join, self[ranchk.end]] # 2-elements (Para, Boundary)
|
340
|
+
self
|
341
|
+
end
|
342
|
+
|
343
|
+
# Building a proper array for the indices to merge
|
344
|
+
#
|
345
|
+
# Returns always an even number of Range, starting from para,
|
346
|
+
# like (2..5), the last of which is a Boundary which is not merged.
|
347
|
+
# In this example, Para(i=2)/Boundary(3)/Para(4) is merged,
|
348
|
+
# while Boundary(5) stays as it is.
|
349
|
+
#
|
350
|
+
# @param (see #merge_para!)
|
351
|
+
# @param use_para_index: [Boolean] false
|
352
|
+
# @return [Range, nil] nil if no range is selected.
|
353
|
+
def build_index_range_for_merge_para!(*rest, use_para_index: false)
|
354
|
+
#warn "DEBUG:b0: #{rest.inspect} to_a=#{to_a}\n"
|
355
|
+
inary = rest.flatten
|
356
|
+
return nil if inary.empty?
|
357
|
+
# inary = inary[0] if like_range?(inary[0])
|
358
|
+
#warn "DEBUG:b1: #{inary.inspect}\n"
|
359
|
+
(ary = to_ary_positive_index(inary, to_a)) || return # Guaranteed to be an array of positive indices (sorted and uniq-ed).
|
360
|
+
#warn "DEBUG:b3: #{ary}\n"
|
361
|
+
return nil if ary.empty?
|
362
|
+
|
363
|
+
# Normalize so the array contains both Paragraph and Boundaries in between.
|
364
|
+
# After this, ary must be [Para1-index, Boundary1-index, P2, B2, ..., Pn-index, Bn-index]
|
365
|
+
# Note: In the input, the index is probably for Paragraph. But,
|
366
|
+
# for the sake of later processing, make the array contain an even number
|
367
|
+
# of elements, ending with Boundary.
|
368
|
+
if use_para_index
|
369
|
+
ary = ary.map{|i| [i*2, i*2+1]}.flatten
|
370
|
+
elsif index_para?(ary[-1], skip_check: true)
|
371
|
+
# The last index in the given Array or Range was for Paragraph (Likely case).
|
372
|
+
ary.push(ary[-1]+1)
|
373
|
+
end
|
374
|
+
|
375
|
+
# Exception if they are not consecutive.
|
376
|
+
ary.inject{|memo, val| (val == memo+1) ? val : raise(ArgumentError, "Given (Paragraph) indices are not consecutive.")}
|
377
|
+
|
378
|
+
$myd = false
|
379
|
+
# Exception if the first index is for Boundary and no Paragraph.
|
380
|
+
raise ArgumentError, "The first index (#{ary[0]}) is not for Paragraph." if !index_para?(ary[0], skip_check: true)
|
381
|
+
|
382
|
+
i_end = [ary[-1], size-1].min
|
383
|
+
return if i_end - ary[0] < 3 # No more than 1 para selected.
|
384
|
+
|
385
|
+
(ary[0]..ary[-1])
|
386
|
+
end
|
387
|
+
private :build_index_range_for_merge_para!
|
388
|
+
|
286
389
|
# Normalize the content, making sure it has an even number of elements
|
287
390
|
#
|
288
391
|
# The even and odd number elements are, if bare Strings or Array, converted into
|
data/lib/plain_text/split.rb
CHANGED
@@ -46,6 +46,42 @@ module PlainText
|
|
46
46
|
adjust_last_element(arret) # => Array
|
47
47
|
end
|
48
48
|
|
49
|
+
# The class-method version of the instance method of the same name.
|
50
|
+
#
|
51
|
+
# One more parameter (input String) is required to specify.
|
52
|
+
#
|
53
|
+
# @param instr [String] String that is examined.
|
54
|
+
# @param re_in [Regexp, String] If String, it is interpreted literally as in String#split.
|
55
|
+
# @param like_linenum: [Boolean] if true (Def: false), it counts like the line number.
|
56
|
+
# @param with_if_end: [Boolean] a special case (see the description).
|
57
|
+
# @return [Integer] always positive
|
58
|
+
# @see PlainText::Split#count_regexp
|
59
|
+
def self.count_regexp(instr, re_in, like_linenum: false, with_if_end: false)
|
60
|
+
like_linenum = true if with_if_end
|
61
|
+
return (with_if_end ? [0, true] : 0) if instr.empty?
|
62
|
+
allsize = split_with_delimiter(instr, re_in).size
|
63
|
+
|
64
|
+
n_normal = allsize.div(2)
|
65
|
+
return n_normal if !like_linenum
|
66
|
+
n_lines = (allsize.even? ? allsize : allsize+1).div 2
|
67
|
+
with_if_end ? [n_normal, (n_normal == n_lines)] : n_lines
|
68
|
+
end
|
69
|
+
|
70
|
+
# The class-method version of the instance method of the same name.
|
71
|
+
#
|
72
|
+
# One more parameter (input String) is required to specify.
|
73
|
+
#
|
74
|
+
# @param instr [String] String that is examined.
|
75
|
+
# @param linebreak: [String] +\n+ etc (Default: $/).
|
76
|
+
# @return [Integer] always positive
|
77
|
+
# @see #count_lines
|
78
|
+
def self.count_lines(instr, linebreak: $/)
|
79
|
+
return 0 if instr.empty?
|
80
|
+
ar = instr.split(linebreak, -1) # -1 is specified to preserve the last linebreak(s).
|
81
|
+
ar.pop if "" == ar[-1]
|
82
|
+
ar.size
|
83
|
+
end
|
84
|
+
|
49
85
|
####################################################
|
50
86
|
# Class methods (Private)
|
51
87
|
####################################################
|
@@ -93,6 +129,44 @@ module PlainText
|
|
93
129
|
def split_with_delimiter(*rest)
|
94
130
|
PlainText::Split.public_send(__method__, self, *rest)
|
95
131
|
end
|
132
|
+
|
133
|
+
# Count the number of matches to self that satisfy the given Regexp
|
134
|
+
#
|
135
|
+
# If like_linenum option is specified, it is counted like the number of
|
136
|
+
# lines, namely the returned value is incremented from the number of
|
137
|
+
# matches by 1 unless the very last characters of the String is
|
138
|
+
# the last match.
|
139
|
+
# For example, if no matches are found, this still returns one.
|
140
|
+
#
|
141
|
+
# Note if the String (self) is empty, this always returns 0.
|
142
|
+
#
|
143
|
+
# The special option is +with_if_end+. If given true,
|
144
|
+
# this returns Array<Integer, Boolean> instead of a simple Integer,
|
145
|
+
# with the first parameter being the Integer of the count as with
|
146
|
+
# the default like_linenum=false, and the second parameter gives
|
147
|
+
# true if the number is the same even if it was like_linenum=true,
|
148
|
+
# namely if the end of the String coincides with the last match,
|
149
|
+
# else false.
|
150
|
+
# (This parameter is introduced just to reduce the overhead of
|
151
|
+
# potentially calling this routine twice or user's making their own check.)
|
152
|
+
#
|
153
|
+
# @param re_in [Regexp, String] If String, it is interpreted literally as in String#split.
|
154
|
+
# @param like_linenum: [Boolean] if true (Def: false), it counts like the line number.
|
155
|
+
# @param with_if_end: [Boolean] a special case (see the description).
|
156
|
+
# @return [Integer, Array<Integer, Boolean>] always positive
|
157
|
+
# @see PlainText::Split#count_regexp
|
158
|
+
def count_regexp(*rest, **kwd)
|
159
|
+
PlainText::Split.public_send(__method__, self, *rest, **kwd)
|
160
|
+
end
|
161
|
+
|
162
|
+
# Returns the number of lines.
|
163
|
+
#
|
164
|
+
# @param linebreak: [String] +\n+ etc (Default: $/).
|
165
|
+
# @return [Integer] always positive
|
166
|
+
# @see PlainText::Split#count_regexp
|
167
|
+
def count_lines(**kwd)
|
168
|
+
PlainText::Split.public_send(__method__, self, **kwd)
|
169
|
+
end
|
96
170
|
end # module Split
|
97
171
|
end # module PlainText
|
98
172
|
|
data/lib/plain_text/util.rb
CHANGED
@@ -11,6 +11,32 @@ module PlainText
|
|
11
11
|
# All methods in this Module are module functions.
|
12
12
|
module_function
|
13
13
|
|
14
|
+
private
|
15
|
+
|
16
|
+
# Make the Array of Ranges from an Array of positive Integers
|
17
|
+
#
|
18
|
+
# @example
|
19
|
+
# arind2ranges [1,2,3,6,7,9]
|
20
|
+
# # => [(1..3), (6..7), (9..9)]
|
21
|
+
#
|
22
|
+
# @param arin [Array<Integer>]
|
23
|
+
# @return [Array<Range>]
|
24
|
+
def arind2ranges(arin)
|
25
|
+
arout = []
|
26
|
+
(curi = curf = arin[0]) || raise("arin should not be empty.")
|
27
|
+
arin.each do |i|
|
28
|
+
if i > curf + 1
|
29
|
+
arout.push(curi..curf)
|
30
|
+
curi = curf = i
|
31
|
+
else
|
32
|
+
curf = i
|
33
|
+
end
|
34
|
+
end
|
35
|
+
arout.push(curi..curf)
|
36
|
+
arout
|
37
|
+
end
|
38
|
+
private :arind2ranges
|
39
|
+
|
14
40
|
# Returns a pair of Arrays of even and odd number-indices of the original Array
|
15
41
|
#
|
16
42
|
# @example
|
@@ -36,14 +62,15 @@ module PlainText
|
|
36
62
|
# If the negative index is out of range, it returns nil.
|
37
63
|
#
|
38
64
|
# @param i [Integer]
|
39
|
-
# @param ary [Array] Reference Array.
|
65
|
+
# @param ary [Array, Integer, nil] Reference Array or its size (Array#size) or nil (interpreted as self#size (untested)).
|
40
66
|
# @return [Integer, NilClass] nil if out of range to the negative. Note in most cases in Ruby default, it raises IndexError. See the code of {#positive_array_index_checked}
|
41
67
|
# @raise [TypeError] if non-integer is specified.
|
42
|
-
# @raise [ArgumentError] if ary is
|
43
|
-
def positive_array_index(i, ary)
|
44
|
-
|
68
|
+
# @raise [ArgumentError] if ary is neither an Array nor Integer, or more specifically, it does not have size method or ary.size does not return Integer or similar.
|
69
|
+
def positive_array_index(i, ary=nil)
|
70
|
+
arysize = (ary ? (ary.respond_to?(:to_int) ? ary.to_int : ary.size) : size)
|
71
|
+
i2 = i.to_int rescue (raise TypeError, sprintf("no implicit conversion of #{i.class} into Integer (i=#{i.inspect},ary=#{ary.inspect})"))
|
45
72
|
return i2 if i2 >= 0
|
46
|
-
ret =
|
73
|
+
ret = arysize + i2 rescue (raise ArgumentError, "Reference is neither an Array nor Integer.")
|
47
74
|
(ret < 0) ? nil : ret
|
48
75
|
end
|
49
76
|
|
@@ -55,22 +82,24 @@ module PlainText
|
|
55
82
|
# Wrapper for {#positive_array_index}
|
56
83
|
#
|
57
84
|
# @param index_in [Integer] Index to check and convert from. Potentially negative integer.
|
58
|
-
# @param ary [Array] Reference Array.
|
85
|
+
# @param ary [Array, Integer, nil] Reference Array or its size (Array#size) or nil (interpreted as self#size (untested)).
|
59
86
|
# @param accept_too_big: [Boolean, NilClass] if true (Default), a positive index larger than the last array index is returned as it is. If nil, the last index + 1 is accepted but raises an Exception for anything larger. If false, any index larger than the last index raises an Exception.
|
60
87
|
# @param varname: [NilClass, String] Name of the variable (or nil) to be used for error messages.
|
61
88
|
# @return [Integer] Non-negative index; i.e., if index=-1 is specified for an Array with a size of 3, the returned value is 2 (the last index of it).
|
62
89
|
# @raise [IndexError] if the index is out of the range to negative.
|
63
|
-
|
90
|
+
# @raise [ArgumentError] if ary is neither an Array nor Integer, or more specifically, it does not have size method or ary.size does not return Integer or similar.
|
91
|
+
def positive_array_index_checked(index_in, ary=nil, accept_too_big: true, varname: nil)
|
64
92
|
# def self.positive_valid_index_for_array(index_in, ary, varname: nil)
|
65
93
|
errmsgs = {}
|
66
94
|
%w(of for).each do |i|
|
67
95
|
errmsgs[i] = (varname ? "." : sprintf(" %s %s.", i, varname))
|
68
96
|
end
|
69
97
|
|
70
|
-
|
71
|
-
|
98
|
+
arysize = (ary ? (ary.respond_to?(:to_int) ? ary.to_int : ary.size) : size)
|
99
|
+
index = positive_array_index(index_in, arysize) # guaranteed to be Integer or nil (or ArgumentError)
|
100
|
+
raise IndexError, sprintf("index (%s) too small for array; minimum: -%d", index_in, arysize) if !index # Ruby default Error message (except the variable "index" as opposed to "index_in is used in the true Ruby default).
|
72
101
|
if index_in >= 0
|
73
|
-
last_index =
|
102
|
+
last_index = arysize - 1
|
74
103
|
errnote1 = nil
|
75
104
|
if (index > last_index + 1) && !accept_too_big
|
76
105
|
errnote1 = ' (or +1)'
|
@@ -82,6 +111,38 @@ module PlainText
|
|
82
111
|
index
|
83
112
|
end
|
84
113
|
|
114
|
+
# Converts Array or Range to an Array with positive indices.
|
115
|
+
#
|
116
|
+
# @param from [Array, Range]
|
117
|
+
# @param arref [Array, Integer] Reference Array or its size (Array#size) or nil (interpreted as self#size).
|
118
|
+
# @param flatten: [Boolean] If true (Default), if elements are Range, they are unfolded. If false and if an Array containing a Range, Exception is raised.
|
119
|
+
# @param sortuniq: [Boolean] If true (Default), the return is sorted and uniq-ed.
|
120
|
+
# @return [Array, nil] nil if arref is empty or if out of range to the negative. Note in most cases in Ruby default, it raises IndexError. See the code of {#positive_array_index_checked}
|
121
|
+
# @raise [TypeError] if non-integer is specified.
|
122
|
+
# @raise [ArgumentError] if arref is neither an Array nor Integer, or more specifically, it does not have size method or arref.size does not return Integer or similar.
|
123
|
+
def to_ary_positive_index(from, arref, flatten: true, sortuniq: true, **kwd)
|
124
|
+
arrefsize = (arref ? (arref.respond_to?(:to_int) ? arref.to_int : arref.size) : size)
|
125
|
+
return nil if arrefsize < 1
|
126
|
+
if flatten
|
127
|
+
from = [from].flatten.map{|ec| like_range?(ec) ? send(__method__, ec, arrefsize, flatten: false, sortuniq: sortuniq, **kwd) : ec }.flatten
|
128
|
+
end
|
129
|
+
if like_range?(from)
|
130
|
+
i_beg = positive_array_index_checked(from.begin, arrefsize, **kwd)
|
131
|
+
n = from.end
|
132
|
+
i_end = ((n.nil? || n == Float::INFINITY) ? arrefsize-1 : positive_array_index_checked(n, arrefsize, **kwd))
|
133
|
+
return (from.exclude_end? ? (i_beg...i_end) : (i_beg..i_end)).to_a
|
134
|
+
end
|
135
|
+
ret = from.map{|i| positive_array_index_checked(i, arrefsize, **kwd)}
|
136
|
+
(sortuniq ? ret.sort.uniq : ret)
|
137
|
+
end
|
138
|
+
|
139
|
+
# Returns true if obj is like Range (duck-typing).
|
140
|
+
#
|
141
|
+
# @param obj [Object]
|
142
|
+
def like_range?(obj)
|
143
|
+
obj.respond_to? :exclude_end?
|
144
|
+
end
|
145
|
+
|
85
146
|
# Raise TypeError
|
86
147
|
#
|
87
148
|
# Call as +raise_typeerror(var_name)+ from instance methods,
|
data/lib/plain_text.rb
CHANGED
@@ -591,7 +591,7 @@ module PlainText
|
|
591
591
|
# @param inclusive: [Boolean] read only when unit is :line. If inclusive (Default), the (entire) line that matches is included in the result.
|
592
592
|
# @param linebreak: [String] +\n+ etc (Default: +$/+), used when +unit==:line+ (Default)
|
593
593
|
# @return [String] as self
|
594
|
-
def head(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, linebreak: $/)
|
594
|
+
def head(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, padding: 0, linebreak: $/)
|
595
595
|
if num_in.class.method_defined? :to_int
|
596
596
|
num = num_in.to_int
|
597
597
|
raise ArgumentError, "Non-positive num (#{num_in}) is given in #{__method__}" if num.to_int < 1
|
@@ -604,10 +604,10 @@ module PlainText
|
|
604
604
|
case unit
|
605
605
|
when :line, "-n"
|
606
606
|
# Regexp (for boundary)
|
607
|
-
return head_regexp(re_in, inclusive: inclusive, linebreak: linebreak) if re_in
|
607
|
+
return head_regexp(re_in, inclusive: inclusive, padding: padding, linebreak: linebreak) if re_in
|
608
608
|
|
609
609
|
# Integer (a number of lines)
|
610
|
-
ret = split(linebreak)[0..(num-1)].join(linebreak)
|
610
|
+
ret = split(linebreak, -1)[0..(num-1)].join(linebreak) # -1 is specified to preserve the last linebreak(s).
|
611
611
|
return ret if size <= ret.size # Specified line is larger than the original or the last NL is missing.
|
612
612
|
return(ret << linebreak) # NL is added to the tail as in the original.
|
613
613
|
when :char
|
@@ -634,7 +634,7 @@ module PlainText
|
|
634
634
|
# @return same as self
|
635
635
|
def head_inverse(*rest, **key)
|
636
636
|
s2 = head(*rest, **key)
|
637
|
-
(s2.size >= size) ?
|
637
|
+
(s2.size >= size) ? self[0,0] : self[s2.size..-1]
|
638
638
|
end
|
639
639
|
|
640
640
|
# Normalizes line-breaks
|
@@ -779,7 +779,8 @@ module PlainText
|
|
779
779
|
# @param inclusive: [Boolean] read only when unit is :line. If inclusive (Default), the (entire) line that matches is included in the result.
|
780
780
|
# @param linebreak: [String] +\n+ etc (Default: +$/+), used when unit==:line (Default)
|
781
781
|
# @return [String] as self
|
782
|
-
def tail(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, linebreak: $/)
|
782
|
+
def tail(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, padding: 0, linebreak: $/)
|
783
|
+
|
783
784
|
if num_in.class.method_defined? :to_int
|
784
785
|
num = num_in.to_int
|
785
786
|
raise ArgumentError, "num of zero is given in #{__method__}" if num == 0
|
@@ -793,7 +794,7 @@ module PlainText
|
|
793
794
|
case unit
|
794
795
|
when :line, '-n'
|
795
796
|
# Regexp (for boundary)
|
796
|
-
return tail_regexp(re_in, inclusive: inclusive, linebreak: linebreak) if re_in
|
797
|
+
return tail_regexp(re_in, inclusive: inclusive, padding: padding, linebreak: linebreak) if re_in
|
797
798
|
|
798
799
|
# Integer (a number of lines)
|
799
800
|
return tail_linenum(num_in, num, linebreak: linebreak)
|
@@ -822,7 +823,7 @@ module PlainText
|
|
822
823
|
# @return same as self
|
823
824
|
def tail_inverse(*rest, **key)
|
824
825
|
s2 = tail(*rest, **key)
|
825
|
-
(s2.size >= size) ?
|
826
|
+
(s2.size >= size) ? self[0,0] : self[0..(size-s2.size-1)]
|
826
827
|
end
|
827
828
|
|
828
829
|
|
@@ -832,25 +833,71 @@ module PlainText
|
|
832
833
|
|
833
834
|
# head command with Regexp
|
834
835
|
#
|
836
|
+
# @todo Improve the algorithm like {#tail_regexp}
|
837
|
+
#
|
835
838
|
# @param re_in [Regexp] Regexp to determine the boundary.
|
836
839
|
# @param inclusive: [Boolean] If true (Default), the (entire) line that matches re_in is included in the result. Else the entire line is excluded.
|
840
|
+
# @param padding: [Integer] Add (postive/negative) the number of lines returned.
|
837
841
|
# @param linebreak: [String] +\n+ etc (Default: $/).
|
838
842
|
# @return [String] as self
|
839
843
|
# @see #head
|
840
|
-
def head_regexp(re_in, inclusive: true, linebreak: $/)
|
844
|
+
def head_regexp(re_in, inclusive: true, padding: 0, linebreak: $/)
|
845
|
+
return self if empty?
|
841
846
|
mat = re_in.match self
|
842
847
|
return self if !mat
|
843
848
|
if inclusive
|
844
|
-
|
849
|
+
postmat = post_match_in_line(mat, linebreak: linebreak)
|
850
|
+
strmain = mat.pre_match+mat[0]+(postmat ? postmat[0] : "")
|
845
851
|
else
|
846
|
-
|
852
|
+
strmain = pre_match_in_line(mat.pre_match, linebreak: linebreak).pre_match
|
847
853
|
end
|
854
|
+
return strmain if padding == 0
|
855
|
+
|
856
|
+
## Adds paddig
|
857
|
+
|
858
|
+
lines_main_deli, nlines_main = split_lines_with_nlines(strmain, linebreak: linebreak)
|
859
|
+
n_lines2ret = nlines_main + padding
|
860
|
+
|
861
|
+
return self[0,0] if n_lines2ret <= 0 # padding is too negatively large and hence no lines left.
|
862
|
+
# [0,0] instead of "" is used to preserve its class (in case it is a child class of String).
|
863
|
+
return lines_main_deli[0..(n_lines2ret*2-1)].join if padding < 0
|
864
|
+
|
865
|
+
# positive padding
|
866
|
+
lines_self_deli, nlines_self = split_lines_with_nlines(linebreak: linebreak)
|
867
|
+
return self if n_lines2ret > nlines_self
|
868
|
+
return lines_self_deli[0..(n_lines2ret*2-1)].join
|
848
869
|
end
|
849
870
|
private :head_regexp
|
850
871
|
|
872
|
+
# Returns Array of [Line,NL,Line,NL, ...] and number of lines
|
873
|
+
#
|
874
|
+
# Wrapper of {PlainText::Split::split_with_delimiter}(str, linebreak)
|
875
|
+
#
|
876
|
+
# See {PlainText::Split::count_lines} (and {PlainText::Split#count_lines})
|
877
|
+
#
|
878
|
+
# @param str [String]
|
879
|
+
# @return [Array<Array, Integer>]
|
880
|
+
def split_lines_with_nlines(str=self, linebreak: $/)
|
881
|
+
arline = PlainText::Split::split_with_delimiter(str, linebreak)
|
882
|
+
nlines = arline.size
|
883
|
+
nlines += 1 if nlines.odd? # There is no newline at the tail.
|
884
|
+
|
885
|
+
# This is the NUMBER of the lines (NOT index)
|
886
|
+
nlines = nlines.quo 2 # "/" MUST be fine...
|
887
|
+
[arline, nlines]
|
888
|
+
end
|
889
|
+
private :split_lines_with_nlines
|
890
|
+
|
851
891
|
|
852
892
|
# Returns MatchData of the String at and before the first linebreak before the MatchData (inclusive)
|
853
893
|
#
|
894
|
+
# Basically this returns String after the last linebreak of the input
|
895
|
+
#
|
896
|
+
# @example
|
897
|
+
# pre_match_in_line("1\n2\n__abc") # => #<MatchData "__abc"> pre_match=="1\n2\n"
|
898
|
+
# pre_match_in_line("1\n2\n") # => #<MatchData "" > pre_match=="1\n2\n"
|
899
|
+
# pre_match_in_line( "__abc") # => #<MatchData "__abc"> pre_match==" "
|
900
|
+
#
|
854
901
|
# @param strpre [String] String of prematch of the last MatchData
|
855
902
|
# @param linebreak: [String] +\n+ etc (Default: $/)
|
856
903
|
# @return [MatchData] m[0] is the string after the last linebreak before the matched data (exclusive) and m.pre_match is all the lines before that.
|
@@ -861,46 +908,124 @@ module PlainText
|
|
861
908
|
end
|
862
909
|
private :pre_match_in_line
|
863
910
|
|
911
|
+
# Get the line index numbers of the first and last lines of the mathced string
|
912
|
+
#
|
913
|
+
# It is the Ruby index number as used in the each_line method.
|
914
|
+
#
|
915
|
+
# Matched String can span for multi-lines.
|
916
|
+
#
|
917
|
+
# Note if matched string is empty, it still is treated as significant.
|
918
|
+
#
|
919
|
+
# @param mat [MatchData, String] If String, it is User's (last) matched String.
|
920
|
+
# @param strpre [String, nil] Pre-match from the beginning of self to the mathced string, if mat is String.
|
921
|
+
# @param linebreak: [String] +\n+ etc (Default: $/)
|
922
|
+
# @return [Hash<Integer, nil>] 4 keys: :last_prematch, :first_matched, :last_matched, :first_post_match
|
923
|
+
def _matched_line_indices(mat, strpre=nil, linebreak: $/)
|
924
|
+
if mat.class.method_defined? :post_match
|
925
|
+
# mat is MatchData
|
926
|
+
strmatched, strpre = mat[0], mat.pre_match
|
927
|
+
else
|
928
|
+
strmatched = mat.to_str rescue raise_typeerror(mat, 'String')
|
929
|
+
end
|
930
|
+
|
931
|
+
hsret = {
|
932
|
+
# last_prematch: nil,
|
933
|
+
first_matched: nil,
|
934
|
+
last_matched: nil,
|
935
|
+
# first_post_match: nil
|
936
|
+
}
|
937
|
+
|
938
|
+
_, hsret[:first_matched] = _ilines_consecutive(strpre, linebreak: linebreak)
|
939
|
+
hsret[:last_matched], _ = _ilines_consecutive(strpre+strmatched, linebreak: linebreak)
|
940
|
+
|
941
|
+
hsret
|
942
|
+
end
|
943
|
+
private :_matched_line_indices
|
944
|
+
|
945
|
+
# Returns line number of the three lines.
|
946
|
+
#
|
947
|
+
# In the following order:
|
948
|
+
#
|
949
|
+
# 1. Index of the last line of the argument String (number of lines - 1) (or nil if it becomes negative).
|
950
|
+
# 2. the 1st one plus 1 IF the last line ends with a linebreak. Or, 0 if the first one is empty. Otherwise the same as the 1st.
|
951
|
+
#
|
952
|
+
# @return [Array<Integer, nil>] nil if the value is invalid.
|
953
|
+
def _ilines_consecutive(str, linebreak: $/)
|
954
|
+
return [nil, 0] if str.empty?
|
955
|
+
|
956
|
+
# Only if first ends with a linebreak, increment by 1 for the second.
|
957
|
+
nmatched, with_linened = PlainText::Split.count_regexp(str, linebreak, with_if_end: true)
|
958
|
+
first = nmatched + (with_linened ? 0 : 1) - 1
|
959
|
+
second = first + (with_linened ? 1 : 0)
|
960
|
+
first = nil if first < 0
|
961
|
+
[first, second]
|
962
|
+
end
|
963
|
+
private :_ilines_consecutive
|
964
|
+
|
864
965
|
# Returns MatchData of the String after the MatchData to the linebreak (inclusive)
|
865
966
|
#
|
866
|
-
# @param mat [MatchData, String]
|
867
|
-
# @param strpost [String, nil] Post-match, if mat is String.
|
967
|
+
# @param mat [MatchData, String] If String, it is User's (last) matched String.
|
968
|
+
# @param strpost [String, nil] Post-match, if mat is String. After User's last match.
|
868
969
|
# @param linebreak: [String] +\n+ etc (Default: $/)
|
869
|
-
# @return [MatchData] m[0] is the string after matched data and up to the next first linebreak (inclusive) (or empty string if the last character(s) of matched data is the linebreak) and m.post_match is all the lines after that.
|
970
|
+
# @return [MatchData] m[0] is the string after matched data and up to the next first linebreak (inclusive) (or empty string if the last character(s) of matched data is the linebreak) and m.post_match is all the lines after that. (maybe nil?? not sure...)
|
870
971
|
def post_match_in_line(mat, strpost=nil, linebreak: $/)
|
972
|
+
lb_quo = Regexp.quote linebreak
|
871
973
|
if mat.class.method_defined? :post_match
|
872
974
|
# mat is MatchData
|
873
975
|
strmatched, strpost = mat[0], mat.post_match
|
874
976
|
else
|
875
977
|
strmatched = mat.to_str rescue raise_typeerror(mat, 'String')
|
876
978
|
end
|
877
|
-
lb_quo
|
878
|
-
return
|
979
|
+
return(/\A/.match strpost) if /#{lb_quo}\z/ =~ strmatched
|
980
|
+
return(/\A/.match strpost) if strpost.empty?
|
879
981
|
/.*?#{lb_quo}/m.match strpost # non-greedy match and m option are required, as linebreak can be any characters.
|
880
982
|
end
|
881
983
|
private :post_match_in_line
|
882
984
|
|
883
985
|
# tail command with Regexp
|
884
986
|
#
|
987
|
+
# == Algorithm
|
988
|
+
#
|
989
|
+
# 1. Split the String with Regexp with {PlainText::Split#split_with_delimiter}
|
990
|
+
# 2. Find the last matched String.
|
991
|
+
# 3. Find the "line"-index-number of the matched String.
|
992
|
+
# 4. Adjust the line index number depending inclusive/exclusive
|
993
|
+
# 5. Add positive/negative padding number
|
994
|
+
# 6. pass it to {#head_inverse} (after Line-1).
|
995
|
+
#
|
885
996
|
# @param re_in [Regexp] Regexp to determine the boundary.
|
886
997
|
# @param inclusive: [Boolean] If true (Default), the (entire) line that matches re_in is included in the result. Else the entire line is excluded.
|
887
998
|
# @param linebreak: [String] +\n+ etc (Default: $/).
|
888
999
|
# @return [String] as self
|
889
1000
|
# @see #tail
|
890
|
-
def tail_regexp(re_in, inclusive: true, linebreak: $/)
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
1001
|
+
def tail_regexp(re_in, inclusive: true, padding: 0, linebreak: $/)
|
1002
|
+
return self if empty?
|
1003
|
+
|
1004
|
+
# "split" with the given Regexp pattern (NOT with linebreak!)
|
1005
|
+
# arst == (Array<String>) [PreMatch, UsersMatch1, Str, UsersMatch2,..., UsersMatchN [, PostMatch]]
|
1006
|
+
# If the user's match comes at the very end, the last element does not exist.
|
1007
|
+
arst = split_with_delimiter re_in # PlainText::Split#split_with_delimiter (included in this module (hence maybe String))
|
1008
|
+
|
1009
|
+
# UsersMatch basically failed - no match.
|
1010
|
+
return self[0,0] if arst.size <= 1
|
1011
|
+
|
1012
|
+
arst.push "" if arst.size.even?
|
1013
|
+
# Now the last component is guarantee to be not the delimiter (= String of User's match)
|
1014
|
+
# arst == [PreMatch, UsersMatch1, ..., UsersMatchN, PostMatch(maybe-empty)]
|
1015
|
+
# Minimum:
|
1016
|
+
# arst == [PreMatch, UsersMatch1, PostMatch] (<= maybe much more PreMatch-es)
|
1017
|
+
# (Either/both PreMatch and PostMatch can be empty).
|
1018
|
+
|
1019
|
+
hslinenum = _matched_line_indices(arst[-2], arst[0..-3].join, linebreak: $/)
|
1020
|
+
|
1021
|
+
# Note: hslinenum[] is for indices, whereas the number of lines is
|
1022
|
+
# required here to pass to head_inverse().
|
1023
|
+
nlines_remove_to = (inclusive ? hslinenum[:first_matched] : hslinenum[:last_matched]+1) - padding
|
1024
|
+
return self if nlines_remove_to <= 0 # everything
|
1025
|
+
return head_inverse(nlines_remove_to)
|
900
1026
|
end
|
901
1027
|
private :tail_regexp
|
902
1028
|
|
903
|
-
|
904
1029
|
# tail command based on the number of lines
|
905
1030
|
#
|
906
1031
|
# @param num_in [Integer] Original argument of the specified number of lines
|
@@ -910,7 +1035,7 @@ module PlainText
|
|
910
1035
|
# @see #tail
|
911
1036
|
def tail_linenum(num_in, num, linebreak: $/)
|
912
1037
|
arret = split(linebreak, -1) # -1 is specified to preserve the last linebreak(s).
|
913
|
-
return self
|
1038
|
+
return self[0,0] if arret.empty?
|
914
1039
|
|
915
1040
|
lb_quo = Regexp.quote linebreak
|
916
1041
|
if num_in > 0
|
@@ -918,7 +1043,7 @@ module PlainText
|
|
918
1043
|
num = 0 if num >= arret.size
|
919
1044
|
end
|
920
1045
|
ar = arret[(-num)..-1]
|
921
|
-
(ar.nil? || ar.empty?) ? self
|
1046
|
+
(ar.nil? || ar.empty?) ? self[0,0] : ar.join(linebreak)
|
922
1047
|
end
|
923
1048
|
private :tail_linenum
|
924
1049
|
|
data/plain_text.gemspec
CHANGED
@@ -5,17 +5,17 @@ require 'date'
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{plain_text}.sub(/.*/){|c| (c == File.basename(Dir.pwd)) ? c : raise("ERROR: s.name=(#{c}) in gemspec seems wrong!")}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.5".sub(/.*/){|c| fs = Dir.glob('changelog{,.*}', File::FNM_CASEFOLD); raise('More than one ChangeLog exist!') if fs.size > 1; warn("WARNING: Version(s.version=#{c}) already exists in #{fs[0]} - ok?") if fs.size == 1 && !IO.readlines(fs[0]).grep(/^\(Version: #{Regexp.quote c}\)$/).empty? ; c } # n.b., In macOS, changelog and ChangeLog are identical in default.
|
9
9
|
# s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
10
|
-
|
10
|
+
s.bindir = 'bin'
|
11
|
+
%w(countchar textclean head.rb tail.rb yard2md_afterclean).each do |f|
|
11
12
|
path = s.bindir+'/'+f
|
12
13
|
File.executable?(path) ? s.executables << f : raise("ERROR: Executable (#{path}) is not executable!")
|
13
14
|
end
|
14
|
-
s.bindir = 'bin'
|
15
15
|
s.authors = ["Masa Sakano"]
|
16
|
-
s.date = %q{2019-
|
16
|
+
s.date = %q{2019-11-07}.sub(/.*/){|c| (Date.parse(c) == Date.today) ? c : raise("ERROR: s.date=(#{c}) is not today!")}
|
17
17
|
s.summary = %q{Module to handle Plain-Text}
|
18
|
-
s.description = %q{This module provides utility functions and methods to handle plain text, classes Part/Paragraph/Boundary to represent the logical structure of a document and ParseRule to describe the rules to parse plain text to produce a Part-type Ruby instance.}
|
18
|
+
s.description = %q{This module provides utility functions and methods to handle plain text, classes Part/Paragraph/Boundary to represent the logical structure of a document and ParseRule to describe the rules to parse plain text to produce a Part-type Ruby instance. A few handy Ruby executable scripts to make use of them are included.}
|
19
19
|
# s.email = %q{abc@example.com}
|
20
20
|
s.extra_rdoc_files = [
|
21
21
|
# "LICENSE",
|
@@ -34,6 +34,7 @@ Gem::Specification.new do |s|
|
|
34
34
|
ret
|
35
35
|
}
|
36
36
|
s.files.reject! { |fn| File.symlink? fn }
|
37
|
+
|
37
38
|
# s.add_runtime_dependency 'rails'
|
38
39
|
# s.add_development_dependency "bourne", [">= 0"]
|
39
40
|
s.homepage = %q{https://www.wisebabel.com}
|
@@ -47,5 +48,8 @@ Gem::Specification.new do |s|
|
|
47
48
|
# s.rubygems_version = %q{1.3.5} # This is always set automatically!!
|
48
49
|
|
49
50
|
s.metadata["yard.run"] = "yri" # use "yard" to build full HTML docs.
|
51
|
+
s.metadata["changelog_uri"] = "https://github.com/masasakano/#{s.name}/blob/master/ChangeLog"
|
52
|
+
s.metadata["source_code_uri"] = "https://github.com/masasakano/#{s.name}"
|
53
|
+
# s.metadata["documentation_uri"] = "https://www.example.info/gems/bestgemever/0.0.1"
|
50
54
|
end
|
51
55
|
|