plain_text 0.6 → 0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +13 -0
- data/Makefile +1 -1
- data/README.en.rdoc +19 -19
- data/bin/yard2md_afterclean +14 -3
- data/lib/plain_text/parse_rule.rb +3 -3
- data/lib/plain_text/part.rb +7 -8
- data/lib/plain_text/split.rb +9 -8
- data/lib/plain_text/util.rb +6 -6
- data/lib/plain_text.rb +34 -34
- data/plain_text.gemspec +3 -3
- data/test/testyard2md_afterclean.rb +38 -2
- metadata +12 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 21f798fe1e22424b48114466382f56a8c27a065ee63d6e9c68c98f5c7e505f14
|
4
|
+
data.tar.gz: 0dde006503a336e1e96960dedd7e04c09ea88723495df93ca11bfa177d0f390e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 30a1f8819371a6b2204df7e47b671c95b227fe14d8b134373c3d0768e35e89bd0c3386707cebbc499aab026631aa0b6fb3838112f198a46ae35328e19ab66eec
|
7
|
+
data.tar.gz: baca6464f9e66e01154fe72c7fee869ae8218f4a65f46679b2d9f934780eeef0f893f7d01c3a417a9beb007c390ce0b6c7f3aa40b9c49130ca4dcc5a40c2ba4e
|
data/ChangeLog
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
-----
|
2
|
+
(Version: 0.7)
|
3
|
+
2022-08-25 Masa Sakano
|
4
|
+
* fixed many yard-doc warnings.
|
5
|
+
|
6
|
+
-----
|
7
|
+
2022-08-25 Masa Sakano
|
8
|
+
* Now auto-judges languages. Fixed a bug of chopping some tails.
|
9
|
+
|
10
|
+
-----
|
11
|
+
2019-11-07 Masa Sakano
|
12
|
+
* Modified .github/README.md
|
13
|
+
|
1
14
|
-----
|
2
15
|
(Version: 0.6)
|
3
16
|
2019-11-07 Masa Sakano
|
data/Makefile
CHANGED
@@ -20,5 +20,5 @@ test:
|
|
20
20
|
|
21
21
|
## yard2md_afterclean in Gem plain_text https://rubygems.org/gems/plain_text
|
22
22
|
doc:
|
23
|
-
yard doc; [[ -x ".github" && ( "README.en.rdoc" -nt ".github/README.md" ) ]] && ( ruby -r rdoc -e 'puts RDoc::Markup::ToMarkdown.new.convert ARGF.read' < README.en.rdoc | yard2md_afterclean > .github/.README.md && mv .github/.README.md .github/README.md && echo ".github/README.md is updated." ) || exit 0
|
23
|
+
yard doc; [[ -x ".github" && ( "README.en.rdoc" -nt ".github/README.md" ) ]] && ( ruby -r rdoc -e 'puts RDoc::Markup::ToMarkdown.new.convert ARGF.read' < README.en.rdoc | yard2md_afterclean | ruby -e 'puts ARGF.read.sub(/(```)ruby(\nPart )/){$$1+"text"+$$2}' > .github/.README.md && mv .github/.README.md .github/README.md && echo ".github/README.md is updated." ) || exit 0
|
24
24
|
|
data/README.en.rdoc
CHANGED
@@ -9,7 +9,7 @@ which represent the logical structure of a document and another class
|
|
9
9
|
ParseRule, which describes the rules to parse plain text to produce a Part-type Ruby instance.
|
10
10
|
This package also provides a few command-line programs, such as counting the number
|
11
11
|
of characters (especially useful for documents in Asian (CJK)
|
12
|
-
|
12
|
+
characters) and advanced head/tail commands.
|
13
13
|
|
14
14
|
The master of this README file, as well as the document for all the methods, is found in
|
15
15
|
{RubyGems/plain_text}[https://rubygems.org/gems/plain_text]
|
@@ -119,7 +119,7 @@ Counts the number of characters in a file(s) or STDIN.
|
|
119
119
|
|
120
120
|
The simplest example to run the command-line script is
|
121
121
|
|
122
|
-
countchar YourFile.txt
|
122
|
+
% countchar YourFile.txt
|
123
123
|
|
124
124
|
=== textclean
|
125
125
|
|
@@ -132,30 +132,30 @@ into 2. See the reference of {PlainText.clean_text} for detail.
|
|
132
132
|
This gives advanced functions, in addition to the standard +head+, including
|
133
133
|
|
134
134
|
Regexp:: It can accept Ruby Regexp to determine the boundary (beginning to the first-matched line), including ignore-case, multi-line, extra *padding-line* etc.
|
135
|
-
Character-based:: With +--char+ option, it handles the file in units of a
|
136
|
-
Reverse:: It can *
|
135
|
+
Character-based:: With +--char+ option, it handles the file in units of a character, which is especially handy to deal with multi-byte characters like UTF-8.
|
136
|
+
Reverse:: It can *reverse* the behaviour - inverse the counting to output everything but initial NUM lines.
|
137
137
|
|
138
138
|
A few examples are
|
139
139
|
|
140
|
-
head.rb -n 5 < try.txt
|
140
|
+
% head.rb -n 5 < try.txt
|
141
141
|
# the same as the UNIX head; printing the first 5 lines
|
142
142
|
|
143
|
-
head.rb -i -n 5 try.txt
|
143
|
+
% head.rb -i -n 5 try.txt
|
144
144
|
# printing everything but the first 5 lines
|
145
145
|
# The same as the UNIX command: tail -n +5
|
146
146
|
|
147
|
-
head.rb -e '^===+' try.txt
|
147
|
+
% head.rb -e '^===+' try.txt
|
148
148
|
# => from the top up to the line that begins with more than 3 "="
|
149
149
|
|
150
|
-
head.rb -x -e '^===+' try.txt
|
150
|
+
% head.rb -x -e '^===+' try.txt
|
151
151
|
# => from the top up to the line before what begins with more than 3 "="
|
152
152
|
|
153
|
-
head.rb -e '^===+' -p 3 try.txt
|
153
|
+
% head.rb -e '^===+' -p 3 try.txt
|
154
154
|
# => from the top up to 3 lines after what begins with more than 3 "="
|
155
155
|
|
156
|
-
head.rb -e '([a-z])\1$' --padding=-2 try.txt
|
156
|
+
% head.rb -e '([a-z])\1$' --padding=-2 try.txt
|
157
157
|
# => from the top up to 2 lines before what ends with 2
|
158
|
-
# consecutive same letters (case-
|
158
|
+
# consecutive same letters (case-insensitive) like "AA" or "qQ"
|
159
159
|
|
160
160
|
The suffix +.rb+ is used to distinguish this command from the UNIX-shell standard command.
|
161
161
|
|
@@ -164,18 +164,18 @@ The suffix +.rb+ is used to distinguish this command from the UNIX-shell standar
|
|
164
164
|
This gives advanced functions, in addition to the standard +tail+, including
|
165
165
|
|
166
166
|
Regexp:: It can accept Ruby Regexp to determine the boundary (last-matched line to the end), including ignore-case, multi-line, extra *padding-line* etc.
|
167
|
-
Character-based:: With +--char+ option, it handles the file in units of a
|
168
|
-
Reverse:: It can *
|
167
|
+
Character-based:: With +--char+ option, it handles the file in units of a character, which is especially handy to deal with multi-byte characters like UTF-8.
|
168
|
+
Reverse:: It can *reverse* the behaviour - inverse the counting to output everything but the last NUM lines.
|
169
169
|
|
170
170
|
See +head.rb+ for practical examples.
|
171
171
|
|
172
172
|
Note the UNIX form of
|
173
173
|
|
174
|
-
tail -n +5
|
174
|
+
% tail -n +5
|
175
175
|
|
176
|
-
(which I think is a bit counter-
|
176
|
+
(which I think is a bit counter-intuitive format) is equivalent to
|
177
177
|
|
178
|
-
head.rb -i -n 5
|
178
|
+
% head.rb -i -n 5
|
179
179
|
|
180
180
|
The suffix +.rb+ is used to distinguish this command from the UNIX-shell standard command.
|
181
181
|
|
@@ -185,7 +185,7 @@ This stands for "yard to markdown - after-clean".
|
|
185
185
|
|
186
186
|
The standard conversion way of RDoc (written for yard) with +rdoc+ library
|
187
187
|
|
188
|
-
RDoc::Markup::ToMarkdown.new.convert
|
188
|
+
RDoc::Markup::ToMarkdown.new.convert
|
189
189
|
|
190
190
|
is limited, with the produced markdown having a fair number of flaws.
|
191
191
|
This command tries to botch-fix it. The result is
|
@@ -222,7 +222,7 @@ Work in progress...
|
|
222
222
|
== Install
|
223
223
|
|
224
224
|
This script requires {Ruby}[http://www.ruby-lang.org] Version 2.0
|
225
|
-
or above (
|
225
|
+
or above (possibly 2.2 or above?).
|
226
226
|
|
227
227
|
For use of the library, if your Ruby script declares
|
228
228
|
|
@@ -243,7 +243,7 @@ You may need to modify the first line (Shebang line) of the script to suit your
|
|
243
243
|
environment (it should be unnecessary for Linux and MacOS), or run it
|
244
244
|
explicitly with your Ruby command as
|
245
245
|
|
246
|
-
|
246
|
+
% /YOUR/ENV/ruby /YOUR/INSTALLED/countchar
|
247
247
|
|
248
248
|
== Developer's note
|
249
249
|
|
data/bin/yard2md_afterclean
CHANGED
@@ -8,6 +8,7 @@ require 'plain_text'
|
|
8
8
|
BANNER = <<"__EOF__"
|
9
9
|
USAGE: #{File.basename($0)} [options] [INFILE.txt] < STDIN
|
10
10
|
Clean the partially ill-formated (Github) Markdown converted from yard-Rdoc.
|
11
|
+
Create <dl>, fix "+", add code-block languages etc.
|
11
12
|
__EOF__
|
12
13
|
|
13
14
|
# Initialising the hash for the command-line options.
|
@@ -25,7 +26,7 @@ OPTS = {
|
|
25
26
|
#
|
26
27
|
def handle_argv
|
27
28
|
opt = OptionParser.new(BANNER)
|
28
|
-
opt.on( '--lang=LANGUAGE', sprintf("Programming Language like ruby (Def: %s).", OPTS[:lang])) { |v| OPTS[:lang]=v.strip }
|
29
|
+
opt.on( '--lang=LANGUAGE', sprintf("Programming Language like ruby (Def: %s).", OPTS[:lang]), ' NOTE: blocks starting with "% " => sh, "<[a-z]" => HTML in default.') { |v| OPTS[:lang]=v.strip }
|
29
30
|
# opt.on( '--version', "Display the version and exits.", TrueClass) {|v| OPTS[:version] = v} # Consider opts.on_tail
|
30
31
|
opt.on( '--[no-]debug', "Debug (Def: false)", TrueClass) {|v| OPTS[:debug] = v}
|
31
32
|
# opt.separator "" # Way to control a help message.
|
@@ -65,7 +66,7 @@ end
|
|
65
66
|
def fix_def_list(str)
|
66
67
|
str.gsub(/^(\S+[^\n]*)\n:((?:\s+[^\n]+(?:\n|\z))+)/m){
|
67
68
|
sdt, sdd = $1, $2
|
68
|
-
"<dt>%s</dt>\n<dd>%s</dd>\n"%[remove_mdfmt_raw(sdt), remove_mdfmt(sdd.
|
69
|
+
"<dt>%s</dt>\n<dd>%s</dd>\n"%[remove_mdfmt_raw(sdt), remove_mdfmt(sdd.chomp)]
|
69
70
|
}.gsub(/(\s+\n|\A)(<dt>)/m, '\1<dl>'+"\n"+'\2').gsub(%r@(</dd>[[:blank:]]*)(\n(?:\s+|\z))@, '\1'+"\n"+'</dl>\2')
|
70
71
|
end
|
71
72
|
|
@@ -195,6 +196,7 @@ mdpara.merge_para_if{ |pbp, _, _|
|
|
195
196
|
false
|
196
197
|
}
|
197
198
|
|
199
|
+
## Add a programming language to each code block.
|
198
200
|
indent_next = 0
|
199
201
|
mdpara = mdpara.map_para{|ec|
|
200
202
|
indent_prev = indent_next
|
@@ -202,7 +204,16 @@ mdpara = mdpara.map_para{|ec|
|
|
202
204
|
next fix_string_based(ec) if !md_code_block?(ec, indent_prev)
|
203
205
|
inde = " "*indent_prev
|
204
206
|
st = ec.gsub(/^ /, '')
|
205
|
-
|
207
|
+
lang =
|
208
|
+
if (/\A\s*<[a-z]/i =~ st) && /^(javascript|x?html|xml|rss|xsd|wsdl)$/ !~ opts[:lang].downcase.strip
|
209
|
+
'html'
|
210
|
+
elsif (/\A\s*[%\$] /i =~ st) && /^(bash|zsh|shell-script|tex|latex)$/ !~ opts[:lang].downcase.strip
|
211
|
+
# NOTE: "postscr" (PostScript) starts from "%!PS" with no spaces in between.
|
212
|
+
'sh'
|
213
|
+
else
|
214
|
+
opts[:lang]
|
215
|
+
end
|
216
|
+
"%s```%s\n%s\n%s```"%[inde, lang, st, inde]
|
206
217
|
}
|
207
218
|
|
208
219
|
puts mdpara.join('')
|
@@ -122,7 +122,7 @@ module PlainText
|
|
122
122
|
# Optionally, when a non-Array argument or block is given, a name can be specified as the human-readable name for the rule.
|
123
123
|
#
|
124
124
|
# @option rule [ParseRule, Array, Regexp, Proc]
|
125
|
-
# @param name
|
125
|
+
# @param name [String, Symbol]
|
126
126
|
#
|
127
127
|
# @yield [inprm] Block to register.
|
128
128
|
# @yieldparam [String, Array<Part, Paragraph, Boundary>, Part] inprm Input String/Part/Array to apply the rule to.
|
@@ -221,7 +221,7 @@ module PlainText
|
|
221
221
|
# Optionally, providing non-Array argument or block is given, a name can be specified as the human-readable name for the rule.
|
222
222
|
#
|
223
223
|
# @option *rule [Regexp, Proc]
|
224
|
-
# @param name
|
224
|
+
# @param name [String, Symbol, NilClass, Array<String, Symbol, NilClass>] Array is not supported, yet.
|
225
225
|
# @return [self]
|
226
226
|
#
|
227
227
|
# @yield [inprm] Block to register.
|
@@ -398,7 +398,7 @@ module PlainText
|
|
398
398
|
# #=> ["abc", "==", "DEF", "==\n"])
|
399
399
|
#
|
400
400
|
# @param inprm [String, Array, PlainText::Part]
|
401
|
-
# @param index
|
401
|
+
# @param index [Array, Range, Integer, String, Symbol] If given, the rule(s) at the given index (indices) or key(s) only are applied in the given order.
|
402
402
|
# @return [Array] array of String, Paragraph, Boundary, Array, Part, etc
|
403
403
|
def apply(inprm, index: nil, from_string: true, from_array: true)
|
404
404
|
allrules = (index ? rules_at(index) : @rules)
|
data/lib/plain_text/part.rb
CHANGED
@@ -328,7 +328,7 @@ module PlainText
|
|
328
328
|
# @overload set(range)
|
329
329
|
# With a range of the indices to merge. Unless use_para_index is true, this means the main Array index. See the first overload set about it.
|
330
330
|
# @param range [Range] describe value param
|
331
|
-
# @param use_para_index
|
331
|
+
# @param use_para_index [Boolean] If false (Default), the indices are for the main indices (alternative between Paras and Boundaries, starting from Para). If true, the indices are as obtained with {#paras}, namely the array containing only Paras.
|
332
332
|
# @return [self, nil] nil if nothing is merged (because of wrong indices).
|
333
333
|
def merge_para!(*rest, use_para_index: false)
|
334
334
|
$myd = true
|
@@ -348,7 +348,7 @@ $myd = true
|
|
348
348
|
# while Boundary(5) stays as it is.
|
349
349
|
#
|
350
350
|
# @param (see #merge_para!)
|
351
|
-
# @param use_para_index
|
351
|
+
# @param use_para_index [Boolean] false
|
352
352
|
# @return [Range, nil] nil if no range is selected.
|
353
353
|
def build_index_range_for_merge_para!(*rest, use_para_index: false)
|
354
354
|
#warn "DEBUG:b0: #{rest.inspect} to_a=#{to_a}\n"
|
@@ -460,10 +460,9 @@ $myd = false
|
|
460
460
|
|
461
461
|
# Reparses self or a part of it.
|
462
462
|
#
|
463
|
-
# @
|
464
|
-
# @option
|
465
|
-
# @option
|
466
|
-
# @option range: [Range, nil] Range of indices of self to reparse. In Default, the entire self.
|
463
|
+
# @option rule [PlainText::ParseRule] (PlainText::ParseRule::RuleConsecutiveLbs)
|
464
|
+
# @option name [String, Symbol, Integer, nil] Identifier of rule, if need to specify.
|
465
|
+
# @option range [Range, nil] Range of indices of self to reparse. In Default, the entire self.
|
467
466
|
# @return [self]
|
468
467
|
def reparse!(rule: PlainText::ParseRule::RuleConsecutiveLbs, name: nil, range: (0..-1))
|
469
468
|
insert range.begin, self.class.parse((range ? self[range] : self), rule: rule, name: name)
|
@@ -825,7 +824,7 @@ $myd = false
|
|
825
824
|
#
|
826
825
|
# @see #insert
|
827
826
|
#
|
828
|
-
# @param
|
827
|
+
# @param rest [Array<Array>]
|
829
828
|
# @return [self]
|
830
829
|
def concat(*rest)
|
831
830
|
insert(size, *(rest.sum([])))
|
@@ -835,7 +834,7 @@ $myd = false
|
|
835
834
|
#
|
836
835
|
# @see #concat
|
837
836
|
#
|
838
|
-
# @param
|
837
|
+
# @param rest [Array]
|
839
838
|
# @return [self]
|
840
839
|
def push(*rest)
|
841
840
|
concat(rest)
|
data/lib/plain_text/split.rb
CHANGED
@@ -52,8 +52,8 @@ module PlainText
|
|
52
52
|
#
|
53
53
|
# @param instr [String] String that is examined.
|
54
54
|
# @param re_in [Regexp, String] If String, it is interpreted literally as in String#split.
|
55
|
-
# @param like_linenum
|
56
|
-
# @param with_if_end
|
55
|
+
# @param like_linenum [Boolean] if true (Def: false), it counts like the line number.
|
56
|
+
# @param with_if_end [Boolean] a special case (see the description).
|
57
57
|
# @return [Integer] always positive
|
58
58
|
# @see PlainText::Split#count_regexp
|
59
59
|
def self.count_regexp(instr, re_in, like_linenum: false, with_if_end: false)
|
@@ -72,7 +72,7 @@ module PlainText
|
|
72
72
|
# One more parameter (input String) is required to specify.
|
73
73
|
#
|
74
74
|
# @param instr [String] String that is examined.
|
75
|
-
# @param linebreak
|
75
|
+
# @param linebreak [String] +\n+ etc (Default: $/).
|
76
76
|
# @return [Integer] always positive
|
77
77
|
# @see #count_lines
|
78
78
|
def self.count_lines(instr, linebreak: $/)
|
@@ -124,7 +124,7 @@ module PlainText
|
|
124
124
|
# s.split_with_delimiter(/X+(Q?)/)
|
125
125
|
# #=> ["", "XQ", "ab", "XX", "c", "XQ"]
|
126
126
|
#
|
127
|
-
# @param
|
127
|
+
# @param rest [Regexp, String] If String, it is interpreted literally as in String#split.
|
128
128
|
# @return [Array]
|
129
129
|
def split_with_delimiter(*rest)
|
130
130
|
PlainText::Split.public_send(__method__, self, *rest)
|
@@ -150,9 +150,10 @@ module PlainText
|
|
150
150
|
# (This parameter is introduced just to reduce the overhead of
|
151
151
|
# potentially calling this routine twice or user's making their own check.)
|
152
152
|
#
|
153
|
-
# @param
|
154
|
-
# @param like_linenum:
|
155
|
-
#
|
153
|
+
# @param rest [Regexp, String] re_in: If String, it is interpreted literally as in String#split.
|
154
|
+
# @param kwd [Hash<like_linenum: Boolean, with_if_end: Boolean>]
|
155
|
+
# if like_linenum: true (Def: false), it counts like the line number.
|
156
|
+
# with_if_end: a special case (see the description).
|
156
157
|
# @return [Integer, Array<Integer, Boolean>] always positive
|
157
158
|
# @see PlainText::Split#count_regexp
|
158
159
|
def count_regexp(*rest, **kwd)
|
@@ -161,7 +162,7 @@ module PlainText
|
|
161
162
|
|
162
163
|
# Returns the number of lines.
|
163
164
|
#
|
164
|
-
# @param linebreak:
|
165
|
+
# @param kwd [Hash<linebreak: String>] +\n+ etc (Default: $/).
|
165
166
|
# @return [Integer] always positive
|
166
167
|
# @see PlainText::Split#count_regexp
|
167
168
|
def count_lines(**kwd)
|
data/lib/plain_text/util.rb
CHANGED
@@ -44,8 +44,8 @@ module PlainText
|
|
44
44
|
# # => [[33, 55], [44, ""]]
|
45
45
|
#
|
46
46
|
# @param ary [Array]
|
47
|
-
# @param size_even
|
48
|
-
# @param filler
|
47
|
+
# @param size_even [Boolean] if true (Def: false), the sizes of the returned arrays are guaranteed to be identical.
|
48
|
+
# @param filler [Object] if size_even: is true and if matching is performed, this filler is added at the end of the last element.
|
49
49
|
def even_odd_arrays(ary, size_even: false, filler: "")
|
50
50
|
ar_even = select.with_index { |_, i| i.even? } rescue select.each_with_index { |_, i| i.even? } # Rescue for Ruby 2.1 or earlier
|
51
51
|
ar_odd = select.with_index { |_, i| i.odd? } rescue select.each_with_index { |_, i| i.odd? } # Rescue for Ruby 2.1 or earlier
|
@@ -83,8 +83,8 @@ module PlainText
|
|
83
83
|
#
|
84
84
|
# @param index_in [Integer] Index to check and convert from. Potentially negative integer.
|
85
85
|
# @param ary [Array, Integer, nil] Reference Array or its size (Array#size) or nil (interpreted as self#size (untested)).
|
86
|
-
# @param accept_too_big
|
87
|
-
# @param varname
|
86
|
+
# @param accept_too_big [Boolean, NilClass] if true (Default), a positive index larger than the last array index is returned as it is. If nil, the last index + 1 is accepted but raises an Exception for anything larger. If false, any index larger than the last index raises an Exception.
|
87
|
+
# @param varname [NilClass, String] Name of the variable (or nil) to be used for error messages.
|
88
88
|
# @return [Integer] Non-negative index; i.e., if index=-1 is specified for an Array with a size of 3, the returned value is 2 (the last index of it).
|
89
89
|
# @raise [IndexError] if the index is out of the range to negative.
|
90
90
|
# @raise [ArgumentError] if ary is neither an Array nor Integer, or more specifically, it does not have size method or ary.size does not return Integer or similar.
|
@@ -115,8 +115,8 @@ module PlainText
|
|
115
115
|
#
|
116
116
|
# @param from [Array, Range]
|
117
117
|
# @param arref [Array, Integer] Reference Array or its size (Array#size) or nil (interpreted as self#size).
|
118
|
-
# @param flatten
|
119
|
-
# @param sortuniq
|
118
|
+
# @param flatten [Boolean] If true (Default), if elements are Range, they are unfolded. If false and if an Array containing a Range, Exception is raised.
|
119
|
+
# @param sortuniq [Boolean] If true (Default), the return is sorted and uniq-ed.
|
120
120
|
# @return [Array, nil] nil if arref is empty or if out of range to the negative. Note in most cases in Ruby default, it raises IndexError. See the code of {#positive_array_index_checked}
|
121
121
|
# @raise [TypeError] if non-integer is specified.
|
122
122
|
# @raise [ArgumentError] if arref is neither an Array nor Integer, or more specifically, it does not have size method or arref.size does not return Integer or similar.
|
data/lib/plain_text.rb
CHANGED
@@ -131,18 +131,18 @@ module PlainText
|
|
131
131
|
# /(\A[[:blank:]]+|\n[[:space:]]+)/
|
132
132
|
#
|
133
133
|
# @param prt [PlainText:Part, String] {Part} or String to examine.
|
134
|
-
# @param preserve_paragraph
|
135
|
-
# @param boundary_style
|
136
|
-
# @param lbs_style
|
137
|
-
# @param sps_style
|
138
|
-
# @param lb_is_space
|
139
|
-
# @param delete_asian_space
|
140
|
-
# @param linehead_style
|
141
|
-
# @param linetail_style
|
142
|
-
# @param firstlbs_style
|
143
|
-
# @param lastsps_style
|
144
|
-
# @param lb
|
145
|
-
# @param lb_out
|
134
|
+
# @param preserve_paragraph [Boolean] Paragraphs are taken into account if true (Def: False). In the input, paragraphs are defined to be separated with more than one +lb+ with potentially some space characters in between. Their output style is specified with +boundary_style+.
|
135
|
+
# @param boundary_style [String, Symbol] One of +(:truncate|:truncate2|:delete|:none)+ or String. If String, the boundaries between paragraphs are replaced with this String (Def: +lb_out*2+). If +:truncate+, consecutive linebreaks and spaces are truncated into 2 linebreaks. +:truncate2+ are similar, but they are not truncated beyond 3 linebreaks (ie., up to 2 blank lines between Paragraphs). If +:none+, nothing is done about them. Unless :none, all the white spaces between linebreaks are deleted.
|
136
|
+
# @param lbs_style [Symbol] One of +(:truncate|:delete|:none)+ (Def: +:truncate+). If :delete, all the linebreaks within paragraphs are deleted. +:truncate+ is meaningful only when +preserve_paragraph=false+ and consecutive linebreaks are truncated into 1 linebreak.
|
137
|
+
# @param sps_style [Symbol] One of +(:truncate|:delete|:none)+ (Def: +:truncate+). If +:truncate+, the consecutive white spaces within paragraphs, *except* for those at the line-head or line-tail (which are controlled by +linehead_style+ and +linehead_style+, respectively), are truncated into a single white space. If :delete, they are deleted.
|
138
|
+
# @param lb_is_space [Boolean] If true, a line-break, except those for the boundaries (unless +preserve_paragraph+ is false), is equivalent to a space (Def: False).
|
139
|
+
# @param delete_asian_space [Boolean] Any spaces between, before, after Asian characters (but punctuation) are deleted, if true (Default).
|
140
|
+
# @param linehead_style [Symbol] One of +(:truncate|:delete|:none)+ (Def: :none). Determine how to handle consecutive white spaces at the beggining of each line.
|
141
|
+
# @param linetail_style [Symbol] One of +(:truncate|:delete|:markdown|:none)+ (Def: :delete). Determine how to handle consecutive white spaces at the end of each line. If +:markdown, 1 space is always deleted, and two or more spaces are truncated into two ASCII whitespaces *if* the last two spaces are ASCII whitespaces, or else untouched.
|
142
|
+
# @param firstlbs_style [Symbol, String] One of +(:truncate|:delete|:none)+ or String (Def: :default). If +:truncate+, any linebreaks at the very beginning of self (and whitespaces in between), if exist, are truncated to a single linebreak. If String, they are, even if not exists, replaced with the specified String (such as a linebreak). If +:delete+, they are deleted. Note This option has nothing to do with the whitespaces at the beginning of the first significant line (hence the name of the option). Note if a (random) Part is given, this option only considers the first significant element of it.
|
143
|
+
# @param lastsps_style [Symbol, String] One of +(:truncate|:delete|:none|:linebreak)+ or String (Def: :truncate). If +:truncate+, any of linebreaks *AND* white spaces at the tail of self, if exist, are truncated to a single linebreak. If +:delete+, they are deleted. If String, they are, even if not exists, replaced with the specified String (such as a linebreak, in which case +lb_out+ is used as String, i.e., it guarantees only 1 linebreak to exist at the end of the String). Note if a (random) Part is given, this option only considers the last significant element of it.
|
144
|
+
# @param lb [String] Linebreak character like +\n+ etc (Default: $/). If this is one of the standard line-breaks, irregular line-breaks (for example, existence of CR when only LF should be there) are corrected.
|
145
|
+
# @param lb_out [String] Linebreak used for output (Default: +lb+)
|
146
146
|
# @return same as prt
|
147
147
|
#
|
148
148
|
def self.clean_text(
|
@@ -587,9 +587,9 @@ module PlainText
|
|
587
587
|
# if num is +/ABC/+ (Regexp), String of the lines from the beginning up to the line that contains the character +"ABC"+ is returned.
|
588
588
|
#
|
589
589
|
# @param num_in [Integer, Regexp] Number (positive or negative, but not 0) of :unit to extract (Def: 10), or Regexp, which is valid only if unit is :line.
|
590
|
-
# @param unit
|
591
|
-
# @param inclusive
|
592
|
-
# @param linebreak
|
590
|
+
# @param unit [Symbol, String] One of +:line+ (or +"-n"+), :+char+, +:byte+ (or +"-c"+)
|
591
|
+
# @param inclusive [Boolean] read only when unit is :line. If inclusive (Default), the (entire) line that matches is included in the result.
|
592
|
+
# @param linebreak [String] +\n+ etc (Default: +$/+), used when +unit==:line+ (Default)
|
593
593
|
# @return [String] as self
|
594
594
|
def head(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, padding: 0, linebreak: $/)
|
595
595
|
if num_in.class.method_defined? :to_int
|
@@ -670,10 +670,10 @@ module PlainText
|
|
670
670
|
|
671
671
|
# String#strip! for each line
|
672
672
|
#
|
673
|
-
# @param strip_head
|
674
|
-
# @param strip_tail
|
675
|
-
# @param markdown
|
676
|
-
# @param linebreak
|
673
|
+
# @param strip_head [Boolean] if true (Default), spaces at each line head are removed.
|
674
|
+
# @param strip_tail [Boolean] if true (Default), spaces at each line tail are removed (see +markdown+ option).
|
675
|
+
# @param markdown [Boolean] if true (Def: false), a double space at each tail remains and +strip_head+ is forcibly false.
|
676
|
+
# @param linebreak [String] +\n+ etc (Default: +$/+)
|
677
677
|
# @return [self, NilClass] nil if gsub! does not match at all, i.e., there are no spaces to remove.
|
678
678
|
def strip_at_lines!(strip_head: true, strip_tail: true, markdown: false, linebreak: $/)
|
679
679
|
strip_head = false if markdown
|
@@ -695,7 +695,7 @@ module PlainText
|
|
695
695
|
|
696
696
|
# String#strip! for each line but only for the head part (NOT tail part)
|
697
697
|
#
|
698
|
-
# @param linebreak
|
698
|
+
# @param linebreak [String] "\n" etc (Default: $/)
|
699
699
|
# @return [self, NilClass] nil if gsub! does not match at all, i.e., there are no spaces to remove.
|
700
700
|
def strip_at_lines_head!(linebreak: $/)
|
701
701
|
lb_quo = Regexp.quote linebreak
|
@@ -714,8 +714,8 @@ module PlainText
|
|
714
714
|
|
715
715
|
# String#strip! for each line but only for the tail part (NOT head part)
|
716
716
|
#
|
717
|
-
# @param markdown
|
718
|
-
# @param linebreak
|
717
|
+
# @param markdown [Boolean] if true (Def: false), a double space at each tail remains.
|
718
|
+
# @param linebreak [String] "\n" etc (Default: $/)
|
719
719
|
# @return [self, NilClass] nil if gsub! does not match at all, i.e., there are no spaces to remove.
|
720
720
|
def strip_at_lines_tail!(markdown: false, linebreak: $/)
|
721
721
|
lb_quo = Regexp.quote linebreak
|
@@ -775,9 +775,9 @@ module PlainText
|
|
775
775
|
# *all the lines from Line 1* would be included, which is most likely not what the caller wants.
|
776
776
|
#
|
777
777
|
# @param num_in [Integer, Regexp] Number (positive or negative, but not 0) of :unit to extract (Def: 10), or Regexp, which is valid only if unit is :line. If positive, the last num_in lines are returned. If negative, the lines from the num-in-th line from the head are returned. In short, calling this method as +tail(3)+ and +tail(-3)+ is similar to the UNIX commands "tail -n 3" and "tail -n +3", respectively.
|
778
|
-
# @param unit
|
779
|
-
# @param inclusive
|
780
|
-
# @param linebreak
|
778
|
+
# @param unit [Symbol] One of :line (as in -n option), :char, :byte (-c option)
|
779
|
+
# @param inclusive [Boolean] read only when unit is :line. If inclusive (Default), the (entire) line that matches is included in the result.
|
780
|
+
# @param linebreak [String] +\n+ etc (Default: +$/+), used when unit==:line (Default)
|
781
781
|
# @return [String] as self
|
782
782
|
def tail(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, padding: 0, linebreak: $/)
|
783
783
|
|
@@ -836,9 +836,9 @@ module PlainText
|
|
836
836
|
# @todo Improve the algorithm like {#tail_regexp}
|
837
837
|
#
|
838
838
|
# @param re_in [Regexp] Regexp to determine the boundary.
|
839
|
-
# @param inclusive
|
840
|
-
# @param padding
|
841
|
-
# @param linebreak
|
839
|
+
# @param inclusive [Boolean] If true (Default), the (entire) line that matches re_in is included in the result. Else the entire line is excluded.
|
840
|
+
# @param padding [Integer] Add (postive/negative) the number of lines returned.
|
841
|
+
# @param linebreak [String] +\n+ etc (Default: $/).
|
842
842
|
# @return [String] as self
|
843
843
|
# @see #head
|
844
844
|
def head_regexp(re_in, inclusive: true, padding: 0, linebreak: $/)
|
@@ -899,7 +899,7 @@ module PlainText
|
|
899
899
|
# pre_match_in_line( "__abc") # => #<MatchData "__abc"> pre_match==" "
|
900
900
|
#
|
901
901
|
# @param strpre [String] String of prematch of the last MatchData
|
902
|
-
# @param linebreak
|
902
|
+
# @param linebreak [String] +\n+ etc (Default: $/)
|
903
903
|
# @return [MatchData] m[0] is the string after the last linebreak before the matched data (exclusive) and m.pre_match is all the lines before that.
|
904
904
|
def pre_match_in_line(strpre, linebreak: $/)
|
905
905
|
lb_quo = Regexp.quote linebreak
|
@@ -918,7 +918,7 @@ module PlainText
|
|
918
918
|
#
|
919
919
|
# @param mat [MatchData, String] If String, it is User's (last) matched String.
|
920
920
|
# @param strpre [String, nil] Pre-match from the beginning of self to the mathced string, if mat is String.
|
921
|
-
# @param linebreak
|
921
|
+
# @param linebreak [String] +\n+ etc (Default: $/)
|
922
922
|
# @return [Hash<Integer, nil>] 4 keys: :last_prematch, :first_matched, :last_matched, :first_post_match
|
923
923
|
def _matched_line_indices(mat, strpre=nil, linebreak: $/)
|
924
924
|
if mat.class.method_defined? :post_match
|
@@ -966,7 +966,7 @@ module PlainText
|
|
966
966
|
#
|
967
967
|
# @param mat [MatchData, String] If String, it is User's (last) matched String.
|
968
968
|
# @param strpost [String, nil] Post-match, if mat is String. After User's last match.
|
969
|
-
# @param linebreak
|
969
|
+
# @param linebreak [String] +\n+ etc (Default: $/)
|
970
970
|
# @return [MatchData] m[0] is the string after matched data and up to the next first linebreak (inclusive) (or empty string if the last character(s) of matched data is the linebreak) and m.post_match is all the lines after that. (maybe nil?? not sure...)
|
971
971
|
def post_match_in_line(mat, strpost=nil, linebreak: $/)
|
972
972
|
lb_quo = Regexp.quote linebreak
|
@@ -994,8 +994,8 @@ module PlainText
|
|
994
994
|
# 6. pass it to {#head_inverse} (after Line-1).
|
995
995
|
#
|
996
996
|
# @param re_in [Regexp] Regexp to determine the boundary.
|
997
|
-
# @param inclusive
|
998
|
-
# @param linebreak
|
997
|
+
# @param inclusive [Boolean] If true (Default), the (entire) line that matches re_in is included in the result. Else the entire line is excluded.
|
998
|
+
# @param linebreak [String] +\n+ etc (Default: $/).
|
999
999
|
# @return [String] as self
|
1000
1000
|
# @see #tail
|
1001
1001
|
def tail_regexp(re_in, inclusive: true, padding: 0, linebreak: $/)
|
@@ -1030,7 +1030,7 @@ module PlainText
|
|
1030
1030
|
#
|
1031
1031
|
# @param num_in [Integer] Original argument of the specified number of lines
|
1032
1032
|
# @param num [Integer] Converted integer for num_in
|
1033
|
-
# @param linebreak
|
1033
|
+
# @param linebreak [String] +\n+ etc (Default: $/).
|
1034
1034
|
# @return [String] as self
|
1035
1035
|
# @see #tail
|
1036
1036
|
def tail_linenum(num_in, num, linebreak: $/)
|
data/plain_text.gemspec
CHANGED
@@ -5,7 +5,7 @@ require 'date'
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{plain_text}.sub(/.*/){|c| (c == File.basename(Dir.pwd)) ? c : raise("ERROR: s.name=(#{c}) in gemspec seems wrong!")}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.7".sub(/.*/){|c| fs = Dir.glob('changelog{,.*}', File::FNM_CASEFOLD); raise('More than one ChangeLog exist!') if fs.size > 1; warn("WARNING: Version(s.version=#{c}) already exists in #{fs[0]} - ok?") if fs.size == 1 && !IO.readlines(fs[0]).grep(/^\(Version: #{Regexp.quote c}\)$/).empty? ; c } # n.b., In macOS, changelog and ChangeLog are identical in default.
|
9
9
|
# s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
10
10
|
s.bindir = 'bin'
|
11
11
|
%w(countchar textclean head.rb tail.rb yard2md_afterclean).each do |f|
|
@@ -13,12 +13,12 @@ Gem::Specification.new do |s|
|
|
13
13
|
File.executable?(path) ? s.executables << f : raise("ERROR: Executable (#{path}) is not executable!")
|
14
14
|
end
|
15
15
|
s.authors = ["Masa Sakano"]
|
16
|
-
s.date = %q{
|
16
|
+
s.date = %q{2022-08-25}.sub(/.*/){|c| (Date.parse(c) == Date.today) ? c : raise("ERROR: s.date=(#{c}) is not today!")}
|
17
17
|
s.summary = %q{Module to handle Plain-Text}
|
18
18
|
s.description = %q{This module provides utility functions and methods to handle plain text, classes Part/Paragraph/Boundary to represent the logical structure of a document and ParseRule to describe the rules to parse plain text to produce a Part-type Ruby instance. A few handy Ruby executable scripts to make use of them are included.}
|
19
19
|
# s.email = %q{abc@example.com}
|
20
20
|
s.extra_rdoc_files = [
|
21
|
-
# "LICENSE",
|
21
|
+
# "LICENSE.txt",
|
22
22
|
"README.en.rdoc",
|
23
23
|
]
|
24
24
|
s.license = 'MIT'
|
@@ -58,14 +58,50 @@ class TestUnitYard2mdRb < MiniTest::Test
|
|
58
58
|
assert_equal exp, o, "期待:#{exp.inspect} ⇔ \n実際:#{o.inspect}"
|
59
59
|
assert_empty e
|
60
60
|
|
61
|
-
stin = "
|
61
|
+
stin = " abc def " + "\n\n\n efg\n"
|
62
62
|
srub = "```ruby\n"
|
63
|
-
exp = srub+"
|
63
|
+
exp = srub+"abc def \n```\n\n\n efg\n"
|
64
64
|
o, e, s = Open3.capture3 EXE, stdin_data: stin
|
65
65
|
assert_equal 0, s.exitstatus
|
66
66
|
assert_equal exp, o, "期待:#{exp.inspect} ⇔ \n実際:#{o.inspect}"
|
67
67
|
assert_empty e
|
68
68
|
|
69
|
+
# automated judge: sh
|
70
|
+
stin = " % abc def " + "\n\n\n efg\n"
|
71
|
+
srub = "```sh\n"
|
72
|
+
exp = srub+"% abc def \n```\n\n\n efg\n"
|
73
|
+
o, e, s = Open3.capture3 EXE, stdin_data: stin
|
74
|
+
assert_equal 0, s.exitstatus
|
75
|
+
assert_equal exp, o, "期待:#{exp.inspect} ⇔ \n実際:#{o.inspect}"
|
76
|
+
assert_empty e
|
77
|
+
|
78
|
+
# automated judge unchanged: tex
|
79
|
+
stin = " % abc def " + "\n\n\n efg\n"
|
80
|
+
srub = "```tex\n"
|
81
|
+
exp = srub+"% abc def \n```\n\n\n efg\n"
|
82
|
+
o, e, s = Open3.capture3 EXE+" --lang=tex", stdin_data: stin
|
83
|
+
assert_equal 0, s.exitstatus
|
84
|
+
assert_equal exp, o, "期待:#{exp.inspect} ⇔ \n実際:#{o.inspect}"
|
85
|
+
assert_empty e
|
86
|
+
|
87
|
+
# automated judge: html
|
88
|
+
stin = " <abc>def " + "\n\n\n efg\n"
|
89
|
+
srub = "```html\n"
|
90
|
+
exp = srub+"<abc>def \n```\n\n\n efg\n"
|
91
|
+
o, e, s = Open3.capture3 EXE, stdin_data: stin
|
92
|
+
assert_equal 0, s.exitstatus
|
93
|
+
assert_equal exp, o, "期待:#{exp.inspect} ⇔ \n実際:#{o.inspect}"
|
94
|
+
assert_empty e
|
95
|
+
|
96
|
+
# automated judge unchanged: javascript
|
97
|
+
stin = " <abc>def " + "\n\n\n efg\n"
|
98
|
+
srub = "```javascript\n"
|
99
|
+
exp = srub+"<abc>def \n```\n\n\n efg\n"
|
100
|
+
o, e, s = Open3.capture3 EXE+" --lang=javascript", stdin_data: stin
|
101
|
+
assert_equal 0, s.exitstatus
|
102
|
+
assert_equal exp, o, "期待:#{exp.inspect} ⇔ \n実際:#{o.inspect}"
|
103
|
+
assert_empty e
|
104
|
+
|
69
105
|
end
|
70
106
|
end
|
71
107
|
|
metadata
CHANGED
@@ -1,20 +1,20 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: plain_text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.7'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Masa Sakano
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-08-25 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: This module provides utility functions and methods to handle plain text,
|
14
14
|
classes Part/Paragraph/Boundary to represent the logical structure of a document
|
15
15
|
and ParseRule to describe the rules to parse plain text to produce a Part-type Ruby
|
16
16
|
instance. A few handy Ruby executable scripts to make use of them are included.
|
17
|
-
email:
|
17
|
+
email:
|
18
18
|
executables:
|
19
19
|
- countchar
|
20
20
|
- textclean
|
@@ -59,7 +59,7 @@ licenses:
|
|
59
59
|
- MIT
|
60
60
|
metadata:
|
61
61
|
yard.run: yri
|
62
|
-
post_install_message:
|
62
|
+
post_install_message:
|
63
63
|
rdoc_options:
|
64
64
|
- "--charset=UTF-8"
|
65
65
|
require_paths:
|
@@ -75,18 +75,18 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
75
|
- !ruby/object:Gem::Version
|
76
76
|
version: '0'
|
77
77
|
requirements: []
|
78
|
-
rubygems_version: 3.
|
79
|
-
signing_key:
|
78
|
+
rubygems_version: 3.3.7
|
79
|
+
signing_key:
|
80
80
|
specification_version: 4
|
81
81
|
summary: Module to handle Plain-Text
|
82
82
|
test_files:
|
83
|
+
- test/test_plain_text.rb
|
83
84
|
- test/test_plain_text_parse_rule.rb
|
84
|
-
- test/testtail_rb.rb
|
85
85
|
- test/test_plain_text_part.rb
|
86
|
-
- test/test_plain_text.rb
|
87
|
-
- test/testyard2md_afterclean.rb
|
88
|
-
- test/testcountchar.rb
|
89
|
-
- test/testtextclean.rb
|
90
86
|
- test/test_plain_text_split.rb
|
91
87
|
- test/test_plain_text_util.rb
|
88
|
+
- test/testcountchar.rb
|
92
89
|
- test/testhead_rb.rb
|
90
|
+
- test/testtail_rb.rb
|
91
|
+
- test/testtextclean.rb
|
92
|
+
- test/testyard2md_afterclean.rb
|