plain_text 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +51 -0
- data/ChangeLog +5 -0
- data/Makefile +23 -0
- data/README.en.rdoc +172 -0
- data/Rakefile +9 -0
- data/bin/countchar +89 -0
- data/lib/plain_text/parse_rule.rb +474 -0
- data/lib/plain_text/part/boundary.rb +44 -0
- data/lib/plain_text/part/paragraph.rb +35 -0
- data/lib/plain_text/part.rb +973 -0
- data/lib/plain_text/split.rb +103 -0
- data/lib/plain_text/util.rb +104 -0
- data/lib/plain_text.rb +839 -0
- data/plain_text.gemspec +49 -0
- data/test/test_plain_text.rb +280 -0
- data/test/test_plain_text_parse_rule.rb +146 -0
- data/test/test_plain_text_part.rb +353 -0
- data/test/test_plain_text_split.rb +78 -0
- metadata +72 -0
data/plain_text.gemspec
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'rake'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = %q{plain_text}
|
7
|
+
s.version = "0.1"
|
8
|
+
# s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
9
|
+
%w(countchar).each do |f|
|
10
|
+
s.executables << f
|
11
|
+
end
|
12
|
+
s.bindir = 'bin'
|
13
|
+
s.authors = ["Masa Sakano"]
|
14
|
+
s.date = %q{2019-10-25}
|
15
|
+
s.summary = %q{Module to handle Plain-Text}
|
16
|
+
s.description = %q{This module provides utility functions and methods to handle plain text, classes Part/Paragraph/Boundary to represent the logical structure of a document and ParseRule to describe the rules to parse plain text to produce a Part-type Ruby instance.}
|
17
|
+
# s.email = %q{abc@example.com}
|
18
|
+
s.extra_rdoc_files = [
|
19
|
+
# "LICENSE",
|
20
|
+
"README.en.rdoc",
|
21
|
+
]
|
22
|
+
s.license = 'MIT'
|
23
|
+
s.files = FileList['.gitignore','lib/**/*.rb','[A-Z]*','test/**/*.rb', '*.gemspec', 'bin'].to_a.delete_if{ |f|
|
24
|
+
ret = false
|
25
|
+
arignore = IO.readlines('.gitignore')
|
26
|
+
arignore.map{|i| i.chomp}.each do |suffix|
|
27
|
+
if File.fnmatch(suffix, File.basename(f))
|
28
|
+
ret = true
|
29
|
+
break
|
30
|
+
end
|
31
|
+
end
|
32
|
+
ret
|
33
|
+
}
|
34
|
+
s.files.reject! { |fn| File.symlink? fn }
|
35
|
+
# s.add_runtime_dependency 'rails'
|
36
|
+
# s.add_development_dependency "bourne", [">= 0"]
|
37
|
+
s.homepage = %q{https://www.wisebabel.com}
|
38
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
39
|
+
|
40
|
+
# s.require_paths = ["lib"] # Default "lib"
|
41
|
+
s.required_ruby_version = '>= 2.0'
|
42
|
+
s.test_files = Dir['test/**/*.rb']
|
43
|
+
s.test_files.reject! { |fn| File.symlink? fn }
|
44
|
+
# s.requirements << 'libmagick, v6.0' # Simply, info to users.
|
45
|
+
# s.rubygems_version = %q{1.3.5} # This is always set automatically!!
|
46
|
+
|
47
|
+
s.metadata["yard.run"] = "yri" # use "yard" to build full HTML docs.
|
48
|
+
end
|
49
|
+
|
@@ -0,0 +1,280 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
# Author: M. Sakano (Wise Babel Ltd)
|
4
|
+
|
5
|
+
require 'plain_text'
|
6
|
+
|
7
|
+
$stdout.sync=true
|
8
|
+
$stderr.sync=true
|
9
|
+
# print '$LOAD_PATH=';p $LOAD_PATH
|
10
|
+
|
11
|
+
#################################################
|
12
|
+
# Unit Test
|
13
|
+
#################################################
|
14
|
+
|
15
|
+
#if $0 == __FILE__
|
16
|
+
gem "minitest"
|
17
|
+
# require 'minitest/unit'
|
18
|
+
require 'minitest/autorun'
|
19
|
+
# MiniTest::Unit.autorun
|
20
|
+
|
21
|
+
class TestUnitPlainText < MiniTest::Test
|
22
|
+
T = true
|
23
|
+
F = false
|
24
|
+
SCFNAME = File.basename(__FILE__)
|
25
|
+
PT = PlainText
|
26
|
+
|
27
|
+
class ChString < String
|
28
|
+
# Test sub-class.
|
29
|
+
end
|
30
|
+
|
31
|
+
def setup
|
32
|
+
end
|
33
|
+
|
34
|
+
def teardown
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_clean_text01
|
38
|
+
assert_raises(ArgumentError){ PT.clean_text("abc\n\ndef\n\n", trailing_s: false) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_clean_text02
|
42
|
+
assert_equal 3, PT.clean_text("abc").size
|
43
|
+
assert_equal 7, PT.clean_text("abc\ndef").size
|
44
|
+
assert_equal 8, PT.clean_text("abc\ndef\n").size
|
45
|
+
assert_equal 8, PT.clean_text("abc\ndef\n\n").size
|
46
|
+
assert_equal 8, PT.clean_text("abc\ndef\n\n\n").size
|
47
|
+
|
48
|
+
s0 = "abc\n\ndef\n\n"
|
49
|
+
|
50
|
+
sr = PT.clean_text(s0)
|
51
|
+
assert_equal s0[0..-2], sr, "#{s0[0..-2].inspect}(Expected) != #{sr.inspect}"
|
52
|
+
sr = PT.clean_text(s0, lastsps_style: :delete) # preserve_paragraph=true
|
53
|
+
assert_equal s0[0..-3], sr
|
54
|
+
sr = PT.clean_text(s0, lbs_style: :delete, lastsps_style: :delete, lb_out: "\n") # preserve_paragraph=true
|
55
|
+
assert_equal s0[0..-3], sr, "#{s0[0..-3].inspect}(Expected) != #{sr.inspect}"
|
56
|
+
|
57
|
+
s2 = "abcXXdefXX"
|
58
|
+
sr = PT.clean_text(s0, lbs_style: :delete, lastsps_style: :none, lb_out: "X") # preserve_paragraph=true
|
59
|
+
assert_equal s2, sr, "#{s2.inspect}(Expected) != #{sr.inspect}"
|
60
|
+
|
61
|
+
s1 = "abc\n\n\ndef\n\n\n"
|
62
|
+
s2 = "abcdef"
|
63
|
+
assert_equal s2, PT.clean_text(s1, preserve_paragraph: false, lbs_style: :delete, lastsps_style: :none)
|
64
|
+
|
65
|
+
s2 = "abc\n\ndef\n\n"
|
66
|
+
sr = PT.clean_text(s1, lbs_style: :delete, lastsps_style: :none) # preserve_paragraph=true
|
67
|
+
assert_equal s2, sr, "#{s2.inspect}(Expected) != #{sr.inspect}"
|
68
|
+
|
69
|
+
s2 = "abcXYZdefXYZ"
|
70
|
+
sr = PT.clean_text(s1, lbs_style: :delete, lastsps_style: :none, boundary_style: "XYZ") # preserve_paragraph=true
|
71
|
+
assert_equal s2, sr, "#{s2.inspect}(Expected) != #{sr.inspect}"
|
72
|
+
|
73
|
+
s2 = "あいうえお"
|
74
|
+
assert_equal s2, PT.clean_text("あいう\nえお\n", lbs_style: :delete, lastsps_style: :delete) # preserve_paragraph=true
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_clean_text03
|
78
|
+
assert_raises(ArgumentError){ PT.clean_text("abc", boundary_style: nil) }
|
79
|
+
s1 = "abc \n \n def\n\n"
|
80
|
+
s20 = "abc\n \ndef\n"
|
81
|
+
s21 = "abcXYZdefXYZ"
|
82
|
+
s22 = "abc\n\ndef\n"
|
83
|
+
sr = PT.clean_text(s1, boundary_style: :none)
|
84
|
+
assert_equal s20, sr, prerr(s20, sr)
|
85
|
+
sr = PT.clean_text(s1, boundary_style: "XYZ")
|
86
|
+
assert_equal s21, sr, prerr(s21, sr)
|
87
|
+
sr = PT.clean_text(s1, boundary_style: :truncate)
|
88
|
+
assert_equal s22, sr, prerr(s22, sr)
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_clean_text_lastsps_style01
|
92
|
+
assert_raises(ArgumentError){ PT.clean_text("abc", lastsps_style: nil) }
|
93
|
+
s1 = "\nabc\n\ndef\n"
|
94
|
+
s20 = "abc\n\ndef\n"
|
95
|
+
s21 = "abc\n\ndef"
|
96
|
+
s22 = "abc\n\ndefTT"
|
97
|
+
|
98
|
+
sr = PT.clean_text(s1)
|
99
|
+
assert_equal s20, sr, prerr(s20, sr)
|
100
|
+
sr = PT.clean_text(s1, lastsps_style: :none)
|
101
|
+
assert_equal s20, sr, prerr(s20, sr)
|
102
|
+
sr = PT.clean_text(s1, lastsps_style: :delete)
|
103
|
+
assert_equal s21, sr, prerr(s21, sr)
|
104
|
+
sr = PT.clean_text(s1, lastsps_style: 'TT')
|
105
|
+
assert_equal s22, sr, prerr(s22, sr)
|
106
|
+
|
107
|
+
s3 = "\nabc\n\ndef"
|
108
|
+
s41 = " abc\n\ndefTT"
|
109
|
+
s42 = "\nabc\n\ndef"
|
110
|
+
sr = PT.clean_text(s3, firstsps_style: :truncate, lastsps_style: 'TT')
|
111
|
+
assert_equal s41, sr, prerr(s41, sr)
|
112
|
+
sr = PT.clean_text(s3, firstsps_style: :none, lastsps_style: :delete)
|
113
|
+
assert_equal s42, sr, prerr(s42, sr)
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_clean_text_boundary01
|
117
|
+
assert_raises(ArgumentError){ PT.clean_text("abc", boundary_style: nil) }
|
118
|
+
s1 = "\n ab\n \ncd\n \n \n ef\n \n \n \n gh\n \n \n \n"
|
119
|
+
s21 = " ab\n \ncd\n \n \n ef\n \n \n \n gh\n"
|
120
|
+
s22 = "\n ab\n\ncd\n\n ef\n\n gh\n\n"
|
121
|
+
s23 = "\n ab\n\ncd\n\n\n ef\n\n\n gh\n\n\n"
|
122
|
+
sr = PT.clean_text(s1, boundary_style: :n, lastsps_style: :t, linehead_style: :n, firstsps_style: :t, sps_style: :n)
|
123
|
+
assert_equal s21, sr, prerr(s21, sr)
|
124
|
+
sr = PT.clean_text(s1, boundary_style: :t, lastsps_style: :n, linehead_style: :n, firstsps_style: :n, sps_style: :n)
|
125
|
+
assert_equal s22, sr, prerr(s22, sr)
|
126
|
+
sr = PT.clean_text(s1, boundary_style: :t2, lastsps_style: :n, linehead_style: :t, firstsps_style: :n, sps_style: :n)
|
127
|
+
assert_equal s23, sr, prerr(s23, sr)
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_clean_text_part01
|
131
|
+
s0 = "\n \n abc\n\n \ndef\n\n \n\n"
|
132
|
+
s1 = "TTabc\n\ndef\n"
|
133
|
+
p00 = PT::Part.parse s0
|
134
|
+
p0 = PT::Part.parse s0
|
135
|
+
sr = PT.clean_text(s0, firstsps_style: 'TT')
|
136
|
+
assert_equal s1, sr, prerr(s1, sr)
|
137
|
+
sr = PT.clean_text(p0, firstsps_style: 'TT')
|
138
|
+
assert_equal PT::Part, sr.class
|
139
|
+
assert_equal s1, sr.join
|
140
|
+
assert_equal p00, p0, prerr(p00, p0) # p0 is deepcopied?
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_count_char02
|
144
|
+
assert_equal 3, PT.count_char("abc")
|
145
|
+
assert_equal 6, PT.count_char("abc\ndef")
|
146
|
+
assert_equal 6, PT.count_char("abc\ndef\n")
|
147
|
+
assert_equal 6, PT.count_char("abc\ndef\n\n")
|
148
|
+
assert_equal 6, PT.count_char("abc\ndef\n\n\n")
|
149
|
+
|
150
|
+
assert_equal 3, PT.count_char("abc")
|
151
|
+
assert_equal 3, PT.count_char("abc\n")
|
152
|
+
assert_equal 3, PT.count_char("abc\n\n")
|
153
|
+
assert_equal 8, PT.count_char("abc\n\ndef")
|
154
|
+
assert_equal 8, PT.count_char("abc\n\ndef\n")
|
155
|
+
assert_equal 8, PT.count_char("abc\n\ndef\n\n")
|
156
|
+
assert_equal 8, PT.count_char("abc\n\ndef\n\n\n")
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_head01
|
160
|
+
assert_raises(TypeError){ PT.head("abc", :wrong) }
|
161
|
+
|
162
|
+
s = "\n2\n\n四\n5\n6\n\n8\n9\n10\n11\n\n13\n14\n15\n16\n\n18\n19\n\n"
|
163
|
+
assert_equal s.sub(/(([^\n]*\n){#{PT::DEF_HEADTAIL_N_LINES}}).*/m, '\1'), PT.head(s) # 10 lines
|
164
|
+
s = "\nab四\n\n\nd\nef"
|
165
|
+
s1 = "\nab四\n\n"
|
166
|
+
s2 = "\nd\nef"
|
167
|
+
assert_equal s1, PT.head(s, 3)
|
168
|
+
assert_equal s2, PT.head_inverse(s, 3)
|
169
|
+
|
170
|
+
# char & byte options
|
171
|
+
assert_equal s1, PT.head(s, 6, unit: :char)
|
172
|
+
assert_equal "", PT.head("", 8, unit: :char)
|
173
|
+
assert_equal ?a, PT.head(?a, 8, unit: :char)
|
174
|
+
assert_equal s1, PT.head(s, 8, unit: :byte)
|
175
|
+
assert_equal s1, PT.head(s, 8, unit: '-c')
|
176
|
+
assert_equal ?a, PT.head(?a, 8, unit: :byte)
|
177
|
+
assert_equal "", PT.head("", 10, unit: :byte)
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
def test_head_re02
|
182
|
+
s = "\n\n\n 04==\n\n 06==\n07\n08\n\n10\n11\n12\n14\n\n16\n17\n18\n19\n\n21\n22\n\n\n"
|
183
|
+
s1 = "\n\n\n 04==\n"
|
184
|
+
s2 = "\n 06==\n07\n08\n\n10\n11\n12\n14\n\n16\n17\n18\n19\n\n21\n22\n\n\n"
|
185
|
+
assert_equal s1, PT.head(s, /==/) # Up to Line 4
|
186
|
+
assert_equal s2, PT.head_inverse(s, /==/) # From Line 5
|
187
|
+
end
|
188
|
+
|
189
|
+
def test_head_re03
|
190
|
+
s = "\n2\n\n四\n5\n6\n\n8\n9\n10\n1T\n\n13\n14\n15\n16\n\n壱T\n19\n\n"
|
191
|
+
s3 = "\n2\n\n四\n5\n6\n\n8\n9\n10\n1T\n\n13\n14\n15\n16\n\n"
|
192
|
+
s4 = "壱T\n19\n\n"
|
193
|
+
s5 = "\n2\n\n四\n5\n6\n\n8\n9\n10\n"
|
194
|
+
s6 = "\n2\n\n四\n5\n6\n\n8\n9\n"
|
195
|
+
s7 = "\n2\n\n四\n5\n6\n\n8\n9\n10\n1T\n"
|
196
|
+
assert_equal s3, PT.head(s, /壱/, inclusive: false), s4.inspect+" <=> \n"+PT.head(s, /壱/, inclusive: false).inspect # Up to 17
|
197
|
+
assert_equal s6, PT.head(s, /1/ , inclusive: false), s6.inspect+" <=> \n"+PT.head(s, /1/ , inclusive: false).inspect # Up to 9
|
198
|
+
assert_equal s4, PT.head_inverse(s, /壱/, inclusive: false) # After 17
|
199
|
+
assert_equal s5, PT.head(s, /1/), s5.inspect+" <=> \n"+PT.head(s, /1/).inspect # Up to 9
|
200
|
+
assert_equal s7, PT.head(s, /T/), s7.inspect+" <=> \n"+PT.head(s, /T/).inspect # Up to 11
|
201
|
+
end
|
202
|
+
|
203
|
+
def test_tail01
|
204
|
+
assert_equal "", PT.tail("")
|
205
|
+
assert_equal 'abc', PT.tail("abc")
|
206
|
+
assert_raises(TypeError){ PT.tail("abc", :wrong) }
|
207
|
+
assert_raises(ArgumentError){ PT.tail("abc", 0) }
|
208
|
+
|
209
|
+
s = "\n2\n\n四\n5\n6\n\n8\n9\n10\n11\n\n13\n14\n15\n16\n\n壱8\n19\n\n"
|
210
|
+
|
211
|
+
s2 = s.sub(/.*11/m, '11')
|
212
|
+
assert_equal s2, PT.tail(s), s2.inspect+' <=> '+PT.tail(s).inspect # 10 lines
|
213
|
+
assert_equal s2.sub(/.\z/m, "X"), PT.tail(s.sub(/.\z/m, "X")) # Ending with no linebreak
|
214
|
+
se = "\n壱8\n19\n\n"
|
215
|
+
assert_equal se, PT.tail(s, 4), se.inspect+" <=> \n"+PT.tail(s,4).inspect
|
216
|
+
assert_equal se[1..-1], PT.tail(s, 3)
|
217
|
+
assert_equal se[1..-1], PT.tail(s, 3, unit: '-n')
|
218
|
+
|
219
|
+
# char & byte options
|
220
|
+
assert_equal se, PT.tail(s, 8, unit: :char)
|
221
|
+
assert_equal "", PT.tail("", 8, unit: :char)
|
222
|
+
assert_equal ?a, PT.tail(?a, 8, unit: :char)
|
223
|
+
assert_equal se, PT.tail(s, 10, unit: :byte)
|
224
|
+
assert_equal se, PT.tail(s, 10, unit: '-c')
|
225
|
+
assert_equal ?a, PT.tail(?a, 8, unit: :byte)
|
226
|
+
assert_equal "", PT.tail("", 10, unit: :byte)
|
227
|
+
|
228
|
+
# Negative index
|
229
|
+
assert_equal s, PT.tail(s, -1)
|
230
|
+
assert_equal "", PT.tail(s, -100)
|
231
|
+
|
232
|
+
assert_equal PT.head(s, 17), PT.tail_inverse(s, 3)
|
233
|
+
assert_equal "", PT.tail_inverse("", 3)
|
234
|
+
|
235
|
+
# Child class of String
|
236
|
+
chs = ChString.new ""
|
237
|
+
nam = chs.class.name
|
238
|
+
assert_equal "", chs
|
239
|
+
assert_equal chs, PT.tail(chs)
|
240
|
+
assert_equal nam, PT.tail(chs).class.name, nam+" <=> \n"+PT.tail(chs).class.name.inspect
|
241
|
+
assert_equal nam, PT.tail(chs.class.name)
|
242
|
+
assert_equal nam, PT.tail(chs, -100).class.name
|
243
|
+
end
|
244
|
+
|
245
|
+
def test_tail_re02
|
246
|
+
s = "\n2\n\n四\n5\n6\n\n8\n9\n10\n11\n\n13\n14\n15\n16\n\n壱8\n19\n\n"
|
247
|
+
s1 = "\n2\n\n四\n5\n6\n\n8\n9\n10\n11\n\n13\n14\n15\n"
|
248
|
+
s2 = "16\n\n壱8\n19\n\n"
|
249
|
+
assert_equal s2, PT.tail(s, /16/), s2.inspect+" <=> \n"+PT.tail(s, /16/).inspect # After 15
|
250
|
+
assert_equal s1, PT.tail_inverse(s, /16/) # Up to 15
|
251
|
+
assert_equal s2, PT.tail(s, /15/, inclusive: false), s2.inspect+" <=> \n"+PT.tail(s, /15/, inclusive: false).inspect # After 16
|
252
|
+
|
253
|
+
s3 = "\n2\n\n四\n5\n6\n\n8\n9\n10\n11\n\n13\n14\n15\n16\n\n"
|
254
|
+
s4 = "壱8\n19\n\n"
|
255
|
+
assert_equal s4, PT.tail(s, /壱/), s4.inspect+" <=> \n"+PT.tail(s, /壱/).inspect # After 17
|
256
|
+
assert_equal s4, PT.tail(s, /8/), s4.inspect+" <=> \n"+PT.tail(s, /8/ ).inspect # After 17
|
257
|
+
assert_equal s3, PT.tail_inverse(s, /壱/) # Up to 17
|
258
|
+
assert_equal s2, PT.tail(s, /5/, inclusive: false), s2.inspect+" <=> \n"+PT.tail(s, /5/, inclusive: false).inspect # After 16
|
259
|
+
end
|
260
|
+
|
261
|
+
def test_tail_re03
|
262
|
+
# Boundary condition tests - when the first line is included!
|
263
|
+
s = "abc\ndef"
|
264
|
+
assert_equal s, PT.tail(s, /a/), prerr(s, PT.tail(s, /a/), long: nil)
|
265
|
+
assert_equal s, PT.tail(s, /a/), prerr(s, PT.tail(s, /a/))
|
266
|
+
assert_equal s, PT.tail(s, /b/), prerr(s, PT.tail(s, /b/))
|
267
|
+
assert_equal "def", PT.tail(s, /a/, inclusive: false)
|
268
|
+
assert_equal "def", PT.tail(s, /b/, inclusive: false), prerr('"def"', PT.tail(s, /b/, inclusive: false))
|
269
|
+
end
|
270
|
+
|
271
|
+
# @param *rest [Object] Parameters to print. Expected first, Actual second.
|
272
|
+
# @param long: [Boolena] If true, linefeed is inserted (Better for String comparison).
|
273
|
+
# @return [String] Error message when failed.
|
274
|
+
def prerr(*rest, long: true)
|
275
|
+
'[期待] '+rest.map(&:inspect).join(" ⇔ "+(long ? "\n" : "")+'[実際] ')
|
276
|
+
end
|
277
|
+
end # class TestUnitPlainText < MiniTest::Test
|
278
|
+
|
279
|
+
#end # if $0 == __FILE__
|
280
|
+
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
# Author: M. Sakano (Wise Babel Ltd)
|
4
|
+
|
5
|
+
require 'plain_text'
|
6
|
+
require 'plain_text/parse_rule'
|
7
|
+
|
8
|
+
$stdout.sync=true
|
9
|
+
$stderr.sync=true
|
10
|
+
# print '$LOAD_PATH=';p $LOAD_PATH
|
11
|
+
|
12
|
+
#################################################
|
13
|
+
# Unit Test
|
14
|
+
#################################################
|
15
|
+
|
16
|
+
#if $0 == __FILE__
|
17
|
+
gem "minitest"
|
18
|
+
# require 'minitest/unit'
|
19
|
+
require 'minitest/autorun'
|
20
|
+
# MiniTest::Unit.autorun
|
21
|
+
|
22
|
+
class TestUnitPlainTextParseRule < MiniTest::Test
|
23
|
+
T = true
|
24
|
+
F = false
|
25
|
+
SCFNAME = File.basename(__FILE__)
|
26
|
+
PR = PlainText::ParseRule
|
27
|
+
PRLb = PR::RuleConsecutiveLbs
|
28
|
+
|
29
|
+
def setup
|
30
|
+
end
|
31
|
+
|
32
|
+
def teardown
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_new01
|
36
|
+
re1 = /(\n{2,})/
|
37
|
+
pr1 = PR.new re1, name: :std
|
38
|
+
assert_equal [re1], pr1.rules
|
39
|
+
assert_equal ['std'], pr1.names
|
40
|
+
|
41
|
+
re2 = /\s*=\s*/i
|
42
|
+
pr1.push re2, name: 'equ'
|
43
|
+
assert_equal [re1, /(\s*=\s*)/i], pr1.rules # Grouping in Regexp added by ParseRule#add_grouping (Private method)
|
44
|
+
assert_equal ['std', 'equ'], pr1.names
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_clone
|
48
|
+
re2 = /(\s*=\s*)/
|
49
|
+
assert_output(nil, /frozen|freeze/i){ PRLb.clone } # STDOUT is whatever (to suppress it)
|
50
|
+
|
51
|
+
pr1 = PR.new(/abc/)
|
52
|
+
assert_output("", ""){ pr1.clone }
|
53
|
+
pr2 = pr1.clone
|
54
|
+
pr2.push re2, name: 'new'
|
55
|
+
assert_equal 1, pr1.size, "pr1.rules-id=#{pr1.rules.object_id}, pr2.rules-id=#{pr2.rules.object_id}"
|
56
|
+
assert_equal 2, pr2.size
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_dup
|
60
|
+
pr1 = PRLb.dup
|
61
|
+
re2 = /(\s*=\s*)/
|
62
|
+
pr1.push re2, name: 'new'
|
63
|
+
assert_equal 1, PRLb.size
|
64
|
+
assert_equal 2, pr1.size
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_apply01
|
68
|
+
str = "\n\n\nFirst = para. \n\n"
|
69
|
+
ar = PRLb.apply str
|
70
|
+
assert_equal ['ConsecutiveLbs'], PRLb.names
|
71
|
+
assert_equal Array, ar.class
|
72
|
+
assert_equal 4, ar.size, "Wrong returned array = #{ar.inspect}"
|
73
|
+
assert_equal ["", "\n\n\n", 'First = para. ', "\n\n"], ar
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_apply02
|
77
|
+
str = "\n\n\nFirst = para. \n\n"
|
78
|
+
re1 = /(\n{2,})/
|
79
|
+
re2 = /(\s*=\s*)/
|
80
|
+
|
81
|
+
pr2 = PR.new re1, name: :std
|
82
|
+
ar1 = pr2.apply str
|
83
|
+
assert_equal 4, ar1.size, "Wrong returned array = #{ar1.inspect}"
|
84
|
+
assert_equal ["", "\n\n\n", 'First = para. ', "\n\n"], ar1
|
85
|
+
|
86
|
+
# Two Regexp applied freshly.
|
87
|
+
pr2.push re2
|
88
|
+
assert_equal [re1, re2], pr2.rules
|
89
|
+
assert_equal ['std', nil], pr2.names
|
90
|
+
pr2.set_name_at(:myname, 1) # Test of ParseRule#set_name_at
|
91
|
+
assert_equal ['std', 'myname'], pr2.names
|
92
|
+
assert_equal re2, pr2.rule_at('myname') # Tests of ParseRule#rule_at
|
93
|
+
assert_equal re2, pr2.rule_at(:myname)
|
94
|
+
assert_equal re2, pr2.rule_at(1)
|
95
|
+
assert_nil pr2.rule_at('naiyo')
|
96
|
+
ar2 = pr2.apply str
|
97
|
+
assert_equal 6, ar2.size, "Wrong returned array = #{ar2.inspect}"
|
98
|
+
assert_equal ["", "\n\n\n", "First", " = ", "para. ", "\n\n"], ar2, "Wrong returned array = #{ar2.inspect}"
|
99
|
+
|
100
|
+
# Third Proc, applied independently for an Array, called by name.
|
101
|
+
pr2.push(name: 'paranize'){ |arin| (defined?(arin.map) ? arin : [arin]).map{|i| ("First"==i) ? PlainText::Part::Paragraph.new(i) : i} }
|
102
|
+
ar3 = pr2.apply ar2, index: :paranize # index can be specified either String or Symbol (or index)
|
103
|
+
assert_equal ["", "\n\n\n", "First", " = ", "para. ", "\n\n"], ar3, "Wrong returned array = #{ar3.inspect}"
|
104
|
+
assert_equal PlainText::Part::Paragraph, ar3[2].class
|
105
|
+
|
106
|
+
# Fourth Proc, applied independently for an Array, called by index.
|
107
|
+
mk_bound = Proc.new{ |arin| (defined?(arin.map) ? arin : [arin]).map{|i| (/\n+/m =~ i) ? PlainText::Part::Boundary.new(i) : i} }
|
108
|
+
pr2.push(mk_bound)
|
109
|
+
ar4 = pr2.apply ar3, index: 3
|
110
|
+
assert_equal ["", "\n\n\n", "First", " = ", "para. ", "\n\n"], ar4, "Wrong returned array = #{ar4.inspect}"
|
111
|
+
assert ar4[0].empty?
|
112
|
+
assert_equal PlainText::Part::Boundary, ar4[1].class
|
113
|
+
assert_equal PlainText::Part::Paragraph, ar4[2].class
|
114
|
+
assert_equal String, ar4[3].class
|
115
|
+
assert_equal String, ar4[4].class
|
116
|
+
assert_equal PlainText::Part::Boundary, ar4[5].class
|
117
|
+
|
118
|
+
# Tests of pop
|
119
|
+
assert_equal 4, pr2.rules.size
|
120
|
+
assert_equal 4, pr2.names.size
|
121
|
+
pr2.pop
|
122
|
+
assert_equal 3, pr2.rules.size
|
123
|
+
assert_equal 3, pr2.names.size
|
124
|
+
|
125
|
+
assert_raises(RuntimeError){ pr2.set_name_at( 'myname', 2) } # name already used
|
126
|
+
assert_raises(RuntimeError){ pr2.send(:set_name_at, :myname, 2) } # name already used
|
127
|
+
end
|
128
|
+
|
129
|
+
def test_apply03
|
130
|
+
str = "\n\n\nFirst = para. \n\n"
|
131
|
+
re1 = /(\n{2,})/
|
132
|
+
re2 = /(\s)/
|
133
|
+
|
134
|
+
pr2 = PR.new [re1, re2]
|
135
|
+
ar2 = pr2.apply str
|
136
|
+
assert_equal 2, pr2.size
|
137
|
+
assert_equal 8, ar2.size, "Wrong returned array = #{ar2.inspect}"
|
138
|
+
assert_equal ["", "\n\n\n", "First", " ", "=", " ", "para.", " \n\n"], ar2, "Wrong returned array = #{ar2.inspect}"
|
139
|
+
end
|
140
|
+
|
141
|
+
#assert_operator pt2, '!=', a2
|
142
|
+
#assert_match(/^\s*ADD CONSTRAINT/ , s1.instance_eval{ @strall })
|
143
|
+
end # class TestUnitPlainTextParseRule < MiniTest::Test
|
144
|
+
|
145
|
+
#end # if $0 == __FILE__
|
146
|
+
|