ae_easy-text 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CODE_OF_CONDUCT.md +1 -1
- data/Gemfile +1 -1
- data/README.md +8 -4
- data/Rakefile +0 -10
- data/ae_easy-text.gemspec +6 -9
- data/lib/ae_easy/text.rb +3 -309
- metadata +23 -67
- data/doc/AeEasy.html +0 -117
- data/doc/AeEasy/Text.html +0 -2146
- data/doc/_index.html +0 -122
- data/doc/class_list.html +0 -51
- data/doc/css/common.css +0 -1
- data/doc/css/full_list.css +0 -58
- data/doc/css/style.css +0 -496
- data/doc/file.README.html +0 -91
- data/doc/file_list.html +0 -56
- data/doc/frames.html +0 -17
- data/doc/index.html +0 -91
- data/doc/js/app.js +0 -292
- data/doc/js/full_list.js +0 -216
- data/doc/js/jquery.js +0 -4
- data/doc/method_list.html +0 -131
- data/doc/top-level-namespace.html +0 -110
- data/lib/ae_easy/text/version.rb +0 -6
data/doc/AeEasy.html
DELETED
@@ -1,117 +0,0 @@
|
|
1
|
-
<!DOCTYPE html>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
<meta charset="utf-8">
|
5
|
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6
|
-
<title>
|
7
|
-
Module: AeEasy
|
8
|
-
|
9
|
-
— Documentation by YARD 0.9.18
|
10
|
-
|
11
|
-
</title>
|
12
|
-
|
13
|
-
<link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
|
14
|
-
|
15
|
-
<link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
|
16
|
-
|
17
|
-
<script type="text/javascript" charset="utf-8">
|
18
|
-
pathId = "AeEasy";
|
19
|
-
relpath = '';
|
20
|
-
</script>
|
21
|
-
|
22
|
-
|
23
|
-
<script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
|
24
|
-
|
25
|
-
<script type="text/javascript" charset="utf-8" src="js/app.js"></script>
|
26
|
-
|
27
|
-
|
28
|
-
</head>
|
29
|
-
<body>
|
30
|
-
<div class="nav_wrap">
|
31
|
-
<iframe id="nav" src="class_list.html?1"></iframe>
|
32
|
-
<div id="resizer"></div>
|
33
|
-
</div>
|
34
|
-
|
35
|
-
<div id="main" tabindex="-1">
|
36
|
-
<div id="header">
|
37
|
-
<div id="menu">
|
38
|
-
|
39
|
-
<a href="_index.html">Index (A)</a> »
|
40
|
-
|
41
|
-
|
42
|
-
<span class="title">AeEasy</span>
|
43
|
-
|
44
|
-
</div>
|
45
|
-
|
46
|
-
<div id="search">
|
47
|
-
|
48
|
-
<a class="full_list_link" id="class_list_link"
|
49
|
-
href="class_list.html">
|
50
|
-
|
51
|
-
<svg width="24" height="24">
|
52
|
-
<rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
|
53
|
-
<rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
|
54
|
-
<rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
|
55
|
-
</svg>
|
56
|
-
</a>
|
57
|
-
|
58
|
-
</div>
|
59
|
-
<div class="clear"></div>
|
60
|
-
</div>
|
61
|
-
|
62
|
-
<div id="content"><h1>Module: AeEasy
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
</h1>
|
67
|
-
<div class="box_info">
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
<dl>
|
80
|
-
<dt>Defined in:</dt>
|
81
|
-
<dd>lib/ae_easy/text.rb<span class="defines">,<br />
|
82
|
-
lib/ae_easy/text/version.rb</span>
|
83
|
-
</dd>
|
84
|
-
</dl>
|
85
|
-
|
86
|
-
</div>
|
87
|
-
|
88
|
-
<h2>Defined Under Namespace</h2>
|
89
|
-
<p class="children">
|
90
|
-
|
91
|
-
|
92
|
-
<strong class="modules">Modules:</strong> <span class='object_link'><a href="AeEasy/Text.html" title="AeEasy::Text (module)">Text</a></span>
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
</p>
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
</div>
|
108
|
-
|
109
|
-
<div id="footer">
|
110
|
-
Generated on Mon Mar 11 21:38:55 2019 by
|
111
|
-
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
112
|
-
0.9.18 (ruby-2.5.3).
|
113
|
-
</div>
|
114
|
-
|
115
|
-
</div>
|
116
|
-
</body>
|
117
|
-
</html>
|
data/doc/AeEasy/Text.html
DELETED
@@ -1,2146 +0,0 @@
|
|
1
|
-
<!DOCTYPE html>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
<meta charset="utf-8">
|
5
|
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6
|
-
<title>
|
7
|
-
Module: AeEasy::Text
|
8
|
-
|
9
|
-
— Documentation by YARD 0.9.18
|
10
|
-
|
11
|
-
</title>
|
12
|
-
|
13
|
-
<link rel="stylesheet" href="../css/style.css" type="text/css" charset="utf-8" />
|
14
|
-
|
15
|
-
<link rel="stylesheet" href="../css/common.css" type="text/css" charset="utf-8" />
|
16
|
-
|
17
|
-
<script type="text/javascript" charset="utf-8">
|
18
|
-
pathId = "AeEasy::Text";
|
19
|
-
relpath = '../';
|
20
|
-
</script>
|
21
|
-
|
22
|
-
|
23
|
-
<script type="text/javascript" charset="utf-8" src="../js/jquery.js"></script>
|
24
|
-
|
25
|
-
<script type="text/javascript" charset="utf-8" src="../js/app.js"></script>
|
26
|
-
|
27
|
-
|
28
|
-
</head>
|
29
|
-
<body>
|
30
|
-
<div class="nav_wrap">
|
31
|
-
<iframe id="nav" src="../class_list.html?1"></iframe>
|
32
|
-
<div id="resizer"></div>
|
33
|
-
</div>
|
34
|
-
|
35
|
-
<div id="main" tabindex="-1">
|
36
|
-
<div id="header">
|
37
|
-
<div id="menu">
|
38
|
-
|
39
|
-
<a href="../_index.html">Index (T)</a> »
|
40
|
-
<span class='title'><span class='object_link'><a href="../AeEasy.html" title="AeEasy (module)">AeEasy</a></span></span>
|
41
|
-
»
|
42
|
-
<span class="title">Text</span>
|
43
|
-
|
44
|
-
</div>
|
45
|
-
|
46
|
-
<div id="search">
|
47
|
-
|
48
|
-
<a class="full_list_link" id="class_list_link"
|
49
|
-
href="../class_list.html">
|
50
|
-
|
51
|
-
<svg width="24" height="24">
|
52
|
-
<rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
|
53
|
-
<rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
|
54
|
-
<rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
|
55
|
-
</svg>
|
56
|
-
</a>
|
57
|
-
|
58
|
-
</div>
|
59
|
-
<div class="clear"></div>
|
60
|
-
</div>
|
61
|
-
|
62
|
-
<div id="content"><h1>Module: AeEasy::Text
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
</h1>
|
67
|
-
<div class="box_info">
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
<dl>
|
80
|
-
<dt>Defined in:</dt>
|
81
|
-
<dd>lib/ae_easy/text.rb<span class="defines">,<br />
|
82
|
-
lib/ae_easy/text/version.rb</span>
|
83
|
-
</dd>
|
84
|
-
</dl>
|
85
|
-
|
86
|
-
</div>
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
<h2>
|
91
|
-
Constant Summary
|
92
|
-
<small><a href="#" class="constants_summary_toggle">collapse</a></small>
|
93
|
-
</h2>
|
94
|
-
|
95
|
-
<dl class="constants">
|
96
|
-
|
97
|
-
<dt id="VERSION-constant" class="">VERSION =
|
98
|
-
<div class="docstring">
|
99
|
-
<div class="discussion">
|
100
|
-
|
101
|
-
<p>Gem version</p>
|
102
|
-
|
103
|
-
|
104
|
-
</div>
|
105
|
-
</div>
|
106
|
-
<div class="tags">
|
107
|
-
|
108
|
-
|
109
|
-
</div>
|
110
|
-
</dt>
|
111
|
-
<dd><pre class="code"><span class='tstring'><span class='tstring_beg'>"</span><span class='tstring_content'>0.0.4</span><span class='tstring_end'>"</span></span></pre></dd>
|
112
|
-
|
113
|
-
</dl>
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
<h2>
|
124
|
-
Class Method Summary
|
125
|
-
<small><a href="#" class="summary_toggle">collapse</a></small>
|
126
|
-
</h2>
|
127
|
-
|
128
|
-
<ul class="summary">
|
129
|
-
|
130
|
-
<li class="public ">
|
131
|
-
<span class="summary_signature">
|
132
|
-
|
133
|
-
<a href="#decode_html-class_method" title="decode_html (class method)">.<strong>decode_html</strong>(text) ⇒ String </a>
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
</span>
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
<span class="summary_desc"><div class='inline'>
|
148
|
-
<p>Decode HTML entities from text .</p>
|
149
|
-
</div></span>
|
150
|
-
|
151
|
-
</li>
|
152
|
-
|
153
|
-
|
154
|
-
<li class="public ">
|
155
|
-
<span class="summary_signature">
|
156
|
-
|
157
|
-
<a href="#default_parser-class_method" title="default_parser (class method)">.<strong>default_parser</strong>(cell_element, data, key) ⇒ Object </a>
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
</span>
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
<span class="summary_desc"><div class='inline'>
|
172
|
-
<p>Default cell content parser used to parse cell element.</p>
|
173
|
-
</div></span>
|
174
|
-
|
175
|
-
</li>
|
176
|
-
|
177
|
-
|
178
|
-
<li class="public ">
|
179
|
-
<span class="summary_signature">
|
180
|
-
|
181
|
-
<a href="#encode_html-class_method" title="encode_html (class method)">.<strong>encode_html</strong>(text) ⇒ String </a>
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
</span>
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
<span class="summary_desc"><div class='inline'>
|
196
|
-
<p>Encode text for valid HTML entities.</p>
|
197
|
-
</div></span>
|
198
|
-
|
199
|
-
</li>
|
200
|
-
|
201
|
-
|
202
|
-
<li class="public ">
|
203
|
-
<span class="summary_signature">
|
204
|
-
|
205
|
-
<a href="#hash-class_method" title="hash (class method)">.<strong>hash</strong>(object) ⇒ String </a>
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
</span>
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
<span class="summary_desc"><div class='inline'>
|
220
|
-
<p>Create a hash from object.</p>
|
221
|
-
</div></span>
|
222
|
-
|
223
|
-
</li>
|
224
|
-
|
225
|
-
|
226
|
-
<li class="public ">
|
227
|
-
<span class="summary_signature">
|
228
|
-
|
229
|
-
<a href="#parse_content-class_method" title="parse_content (class method)">.<strong>parse_content</strong>(opts) {|data, row, header_map| ... } ⇒ Array<Hash><sup>?</sup> </a>
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
</span>
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
<span class="summary_desc"><div class='inline'>
|
244
|
-
<p>Parse row data matching a selector using a header map to translate
|
245
|
-
between columns and friendly keys.</p>
|
246
|
-
</div></span>
|
247
|
-
|
248
|
-
</li>
|
249
|
-
|
250
|
-
|
251
|
-
<li class="public ">
|
252
|
-
<span class="summary_signature">
|
253
|
-
|
254
|
-
<a href="#parse_header_map-class_method" title="parse_header_map (class method)">.<strong>parse_header_map</strong>(opts = {}) ⇒ Hash{Symbol,String => Integer}<sup>?</sup> </a>
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
</span>
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
<span class="summary_desc"><div class='inline'>
|
269
|
-
<p>Parse header from selector and create a header map to match a column key
|
270
|
-
with column index.</p>
|
271
|
-
</div></span>
|
272
|
-
|
273
|
-
</li>
|
274
|
-
|
275
|
-
|
276
|
-
<li class="public ">
|
277
|
-
<span class="summary_signature">
|
278
|
-
|
279
|
-
<a href="#parse_table-class_method" title="parse_table (class method)">.<strong>parse_table</strong>(opts = {}) {|data, row, header_map| ... } ⇒ Hash{Symbol => Array,Hash,nil} </a>
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
</span>
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
<span class="summary_desc"><div class='inline'>
|
294
|
-
<p>Parse data from a horizontal table like structure matching a selectors and
|
295
|
-
using a header map to match columns.</p>
|
296
|
-
</div></span>
|
297
|
-
|
298
|
-
</li>
|
299
|
-
|
300
|
-
|
301
|
-
<li class="public ">
|
302
|
-
<span class="summary_signature">
|
303
|
-
|
304
|
-
<a href="#parse_vertical_table-class_method" title="parse_vertical_table (class method)">.<strong>parse_vertical_table</strong>(opts = {}) {|data, row, header_map| ... } ⇒ Hash{Symbol => Array,Hash,nil} </a>
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
</span>
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
<span class="summary_desc"><div class='inline'>
|
319
|
-
<p>Parse data from a vertical table like structure matching a selectors and
|
320
|
-
using a header map to match columns.</p>
|
321
|
-
</div></span>
|
322
|
-
|
323
|
-
</li>
|
324
|
-
|
325
|
-
|
326
|
-
<li class="public ">
|
327
|
-
<span class="summary_signature">
|
328
|
-
|
329
|
-
<a href="#strip-class_method" title="strip (class method)">.<strong>strip</strong>(raw_text, orig_encoding = 'ASCII') ⇒ String<sup>?</sup> </a>
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
</span>
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
<span class="summary_desc"><div class='inline'>
|
344
|
-
<p>Strip a value by trimming spaces, reducing secuential spaces into a
|
345
|
-
single space, decode HTML entities and change encoding to UTF-8.</p>
|
346
|
-
</div></span>
|
347
|
-
|
348
|
-
</li>
|
349
|
-
|
350
|
-
|
351
|
-
<li class="public ">
|
352
|
-
<span class="summary_signature">
|
353
|
-
|
354
|
-
<a href="#translate_label_to_key-class_method" title="translate_label_to_key (class method)">.<strong>translate_label_to_key</strong>(element, label_map) ⇒ Symbol, String </a>
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
</span>
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
<span class="summary_desc"><div class='inline'>
|
369
|
-
<p>Extract column label and translate it into a frienly key.</p>
|
370
|
-
</div></span>
|
371
|
-
|
372
|
-
</li>
|
373
|
-
|
374
|
-
|
375
|
-
</ul>
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
<div id="class_method_details" class="method_details_list">
|
381
|
-
<h2>Class Method Details</h2>
|
382
|
-
|
383
|
-
|
384
|
-
<div class="method_details first">
|
385
|
-
<h3 class="signature first" id="decode_html-class_method">
|
386
|
-
|
387
|
-
.<strong>decode_html</strong>(text) ⇒ <tt>String</tt>
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
</h3><div class="docstring">
|
394
|
-
<div class="discussion">
|
395
|
-
|
396
|
-
<p>Decode HTML entities from text .</p>
|
397
|
-
|
398
|
-
|
399
|
-
</div>
|
400
|
-
</div>
|
401
|
-
<div class="tags">
|
402
|
-
<p class="tag_title">Parameters:</p>
|
403
|
-
<ul class="param">
|
404
|
-
|
405
|
-
<li>
|
406
|
-
|
407
|
-
<span class='name'>text</span>
|
408
|
-
|
409
|
-
|
410
|
-
<span class='type'>(<tt>String</tt>)</span>
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
—
|
415
|
-
<div class='inline'>
|
416
|
-
<p>Text to decode.</p>
|
417
|
-
</div>
|
418
|
-
|
419
|
-
</li>
|
420
|
-
|
421
|
-
</ul>
|
422
|
-
|
423
|
-
<p class="tag_title">Returns:</p>
|
424
|
-
<ul class="return">
|
425
|
-
|
426
|
-
<li>
|
427
|
-
|
428
|
-
|
429
|
-
<span class='type'>(<tt>String</tt>)</span>
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
</li>
|
434
|
-
|
435
|
-
</ul>
|
436
|
-
|
437
|
-
</div><table class="source_code">
|
438
|
-
<tr>
|
439
|
-
<td>
|
440
|
-
<pre class="lines">
|
441
|
-
|
442
|
-
|
443
|
-
33
|
444
|
-
34
|
445
|
-
35</pre>
|
446
|
-
</td>
|
447
|
-
<td>
|
448
|
-
<pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 33</span>
|
449
|
-
|
450
|
-
<span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_decode_html'>decode_html</span> <span class='id identifier rubyid_text'>text</span>
|
451
|
-
<span class='const'>CGI</span><span class='period'>.</span><span class='id identifier rubyid_unescapeHTML'>unescapeHTML</span> <span class='id identifier rubyid_text'>text</span>
|
452
|
-
<span class='kw'>end</span></pre>
|
453
|
-
</td>
|
454
|
-
</tr>
|
455
|
-
</table>
|
456
|
-
</div>
|
457
|
-
|
458
|
-
<div class="method_details ">
|
459
|
-
<h3 class="signature " id="default_parser-class_method">
|
460
|
-
|
461
|
-
.<strong>default_parser</strong>(cell_element, data, key) ⇒ <tt>Object</tt>
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
</h3><div class="docstring">
|
468
|
-
<div class="discussion">
|
469
|
-
|
470
|
-
<p>Default cell content parser used to parse cell element.</p>
|
471
|
-
|
472
|
-
|
473
|
-
</div>
|
474
|
-
</div>
|
475
|
-
<div class="tags">
|
476
|
-
<p class="tag_title">Parameters:</p>
|
477
|
-
<ul class="param">
|
478
|
-
|
479
|
-
<li>
|
480
|
-
|
481
|
-
<span class='name'>cell_element</span>
|
482
|
-
|
483
|
-
|
484
|
-
<span class='type'>(<tt>Nokogiri::Element</tt>)</span>
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
—
|
489
|
-
<div class='inline'>
|
490
|
-
<p>Cell element to parse.</p>
|
491
|
-
</div>
|
492
|
-
|
493
|
-
</li>
|
494
|
-
|
495
|
-
<li>
|
496
|
-
|
497
|
-
<span class='name'>data</span>
|
498
|
-
|
499
|
-
|
500
|
-
<span class='type'>(<tt>Hash</tt>)</span>
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
—
|
505
|
-
<div class='inline'>
|
506
|
-
<p>Data hash to save parsed data into.</p>
|
507
|
-
</div>
|
508
|
-
|
509
|
-
</li>
|
510
|
-
|
511
|
-
<li>
|
512
|
-
|
513
|
-
<span class='name'>key</span>
|
514
|
-
|
515
|
-
|
516
|
-
<span class='type'>(<tt>String</tt>, <tt>Symbol</tt>)</span>
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
—
|
521
|
-
<div class='inline'>
|
522
|
-
<p>Header column key being parsed.</p>
|
523
|
-
</div>
|
524
|
-
|
525
|
-
</li>
|
526
|
-
|
527
|
-
</ul>
|
528
|
-
|
529
|
-
|
530
|
-
</div><table class="source_code">
|
531
|
-
<tr>
|
532
|
-
<td>
|
533
|
-
<pre class="lines">
|
534
|
-
|
535
|
-
|
536
|
-
62
|
537
|
-
63
|
538
|
-
64
|
539
|
-
65
|
540
|
-
66</pre>
|
541
|
-
</td>
|
542
|
-
<td>
|
543
|
-
<pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 62</span>
|
544
|
-
|
545
|
-
<span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_default_parser'>default_parser</span> <span class='id identifier rubyid_cell_element'>cell_element</span><span class='comma'>,</span> <span class='id identifier rubyid_data'>data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span>
|
546
|
-
<span class='kw'>return</span> <span class='kw'>if</span> <span class='id identifier rubyid_cell_element'>cell_element</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
|
547
|
-
<span class='id identifier rubyid_cell_element'>cell_element</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>//i</span><span class='tstring_end'>'</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_remove'>remove</span> <span class='kw'>if</span> <span class='id identifier rubyid_cell_element'>cell_element</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>//i</span><span class='tstring_end'>'</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_count'>count</span> <span class='op'>></span> <span class='int'>0</span>
|
548
|
-
<span class='id identifier rubyid_data'>data</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span> <span class='op'>=</span> <span class='id identifier rubyid_strip'>strip</span> <span class='id identifier rubyid_cell_element'>cell_element</span><span class='period'>.</span><span class='id identifier rubyid_text'>text</span>
|
549
|
-
<span class='kw'>end</span></pre>
|
550
|
-
</td>
|
551
|
-
</tr>
|
552
|
-
</table>
|
553
|
-
</div>
|
554
|
-
|
555
|
-
<div class="method_details ">
|
556
|
-
<h3 class="signature " id="encode_html-class_method">
|
557
|
-
|
558
|
-
.<strong>encode_html</strong>(text) ⇒ <tt>String</tt>
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
</h3><div class="docstring">
|
565
|
-
<div class="discussion">
|
566
|
-
|
567
|
-
<p>Encode text for valid HTML entities.</p>
|
568
|
-
|
569
|
-
|
570
|
-
</div>
|
571
|
-
</div>
|
572
|
-
<div class="tags">
|
573
|
-
<p class="tag_title">Parameters:</p>
|
574
|
-
<ul class="param">
|
575
|
-
|
576
|
-
<li>
|
577
|
-
|
578
|
-
<span class='name'>text</span>
|
579
|
-
|
580
|
-
|
581
|
-
<span class='type'>(<tt>String</tt>)</span>
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
—
|
586
|
-
<div class='inline'>
|
587
|
-
<p>Text to encode.</p>
|
588
|
-
</div>
|
589
|
-
|
590
|
-
</li>
|
591
|
-
|
592
|
-
</ul>
|
593
|
-
|
594
|
-
<p class="tag_title">Returns:</p>
|
595
|
-
<ul class="return">
|
596
|
-
|
597
|
-
<li>
|
598
|
-
|
599
|
-
|
600
|
-
<span class='type'>(<tt>String</tt>)</span>
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
</li>
|
605
|
-
|
606
|
-
</ul>
|
607
|
-
|
608
|
-
</div><table class="source_code">
|
609
|
-
<tr>
|
610
|
-
<td>
|
611
|
-
<pre class="lines">
|
612
|
-
|
613
|
-
|
614
|
-
24
|
615
|
-
25
|
616
|
-
26</pre>
|
617
|
-
</td>
|
618
|
-
<td>
|
619
|
-
<pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 24</span>
|
620
|
-
|
621
|
-
<span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_encode_html'>encode_html</span> <span class='id identifier rubyid_text'>text</span>
|
622
|
-
<span class='const'>CGI</span><span class='period'>.</span><span class='id identifier rubyid_escapeHTML'>escapeHTML</span> <span class='id identifier rubyid_text'>text</span>
|
623
|
-
<span class='kw'>end</span></pre>
|
624
|
-
</td>
|
625
|
-
</tr>
|
626
|
-
</table>
|
627
|
-
</div>
|
628
|
-
|
629
|
-
<div class="method_details ">
|
630
|
-
<h3 class="signature " id="hash-class_method">
|
631
|
-
|
632
|
-
.<strong>hash</strong>(object) ⇒ <tt>String</tt>
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
</h3><div class="docstring">
|
639
|
-
<div class="discussion">
|
640
|
-
|
641
|
-
<p>Create a hash from object</p>
|
642
|
-
|
643
|
-
|
644
|
-
</div>
|
645
|
-
</div>
|
646
|
-
<div class="tags">
|
647
|
-
<p class="tag_title">Parameters:</p>
|
648
|
-
<ul class="param">
|
649
|
-
|
650
|
-
<li>
|
651
|
-
|
652
|
-
<span class='name'>object</span>
|
653
|
-
|
654
|
-
|
655
|
-
<span class='type'>(<tt>String</tt>, <tt>Hash</tt>, <tt>Object</tt>)</span>
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
—
|
660
|
-
<div class='inline'>
|
661
|
-
<p>Object to create hash from.</p>
|
662
|
-
</div>
|
663
|
-
|
664
|
-
</li>
|
665
|
-
|
666
|
-
</ul>
|
667
|
-
|
668
|
-
<p class="tag_title">Returns:</p>
|
669
|
-
<ul class="return">
|
670
|
-
|
671
|
-
<li>
|
672
|
-
|
673
|
-
|
674
|
-
<span class='type'>(<tt>String</tt>)</span>
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
</li>
|
679
|
-
|
680
|
-
</ul>
|
681
|
-
|
682
|
-
</div><table class="source_code">
|
683
|
-
<tr>
|
684
|
-
<td>
|
685
|
-
<pre class="lines">
|
686
|
-
|
687
|
-
|
688
|
-
14
|
689
|
-
15
|
690
|
-
16
|
691
|
-
17</pre>
|
692
|
-
</td>
|
693
|
-
<td>
|
694
|
-
<pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 14</span>
|
695
|
-
|
696
|
-
<span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_hash'>hash</span> <span class='id identifier rubyid_object'>object</span>
|
697
|
-
<span class='id identifier rubyid_object'>object</span> <span class='op'>=</span> <span class='id identifier rubyid_object'>object</span><span class='period'>.</span><span class='id identifier rubyid_hash'>hash</span> <span class='kw'>if</span> <span class='id identifier rubyid_object'>object</span><span class='period'>.</span><span class='id identifier rubyid_is_a?'>is_a?</span> <span class='const'>Hash</span>
|
698
|
-
<span class='const'>Digest</span><span class='op'>::</span><span class='const'>SHA1</span><span class='period'>.</span><span class='id identifier rubyid_hexdigest'>hexdigest</span> <span class='id identifier rubyid_object'>object</span><span class='period'>.</span><span class='id identifier rubyid_to_s'>to_s</span>
|
699
|
-
<span class='kw'>end</span></pre>
|
700
|
-
</td>
|
701
|
-
</tr>
|
702
|
-
</table>
|
703
|
-
</div>
|
704
|
-
|
705
|
-
<div class="method_details ">
|
706
|
-
<h3 class="signature " id="parse_content-class_method">
|
707
|
-
|
708
|
-
.<strong>parse_content</strong>(opts) {|data, row, header_map| ... } ⇒ <tt>Array<Hash></tt><sup>?</sup>
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
</h3><div class="docstring">
|
715
|
-
<div class="discussion">
|
716
|
-
|
717
|
-
<p>Parse row data matching a selector using a header map to translate</p>
|
718
|
-
|
719
|
-
<pre class="code ruby"><code class="ruby">between columns and friendly keys.
|
720
|
-
</code></pre>
|
721
|
-
|
722
|
-
|
723
|
-
</div>
|
724
|
-
</div>
|
725
|
-
<div class="tags">
|
726
|
-
<p class="tag_title">Parameters:</p>
|
727
|
-
<ul class="param">
|
728
|
-
|
729
|
-
<li>
|
730
|
-
|
731
|
-
<span class='name'>opts</span>
|
732
|
-
|
733
|
-
|
734
|
-
<span class='type'>(<tt>Hash</tt>)</span>
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
—
|
739
|
-
<div class='inline'>
|
740
|
-
<p>({}) Configuration options.</p>
|
741
|
-
</div>
|
742
|
-
|
743
|
-
</li>
|
744
|
-
|
745
|
-
</ul>
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
<p class="tag_title">Options Hash (<tt>opts</tt>):</p>
|
751
|
-
<ul class="option">
|
752
|
-
|
753
|
-
<li>
|
754
|
-
<span class="name">:html</span>
|
755
|
-
<span class="type">(<tt>Nokogiri::Element</tt>)</span>
|
756
|
-
<span class="default">
|
757
|
-
|
758
|
-
</span>
|
759
|
-
|
760
|
-
— <div class='inline'>
|
761
|
-
<p>Container element to search into.</p>
|
762
|
-
</div>
|
763
|
-
|
764
|
-
</li>
|
765
|
-
|
766
|
-
<li>
|
767
|
-
<span class="name">:selector</span>
|
768
|
-
<span class="type">(<tt>String</tt>)</span>
|
769
|
-
<span class="default">
|
770
|
-
|
771
|
-
</span>
|
772
|
-
|
773
|
-
— <div class='inline'>
|
774
|
-
<p>CSS selector to match content cells.</p>
|
775
|
-
</div>
|
776
|
-
|
777
|
-
</li>
|
778
|
-
|
779
|
-
<li>
|
780
|
-
<span class="name">:first_row_header</span>
|
781
|
-
<span class="type">(<tt>Boolean</tt>)</span>
|
782
|
-
<span class="default">
|
783
|
-
|
784
|
-
— default:
|
785
|
-
<tt>false</tt>
|
786
|
-
|
787
|
-
</span>
|
788
|
-
|
789
|
-
— <div class='inline'>
|
790
|
-
<p>If true then first matching element will be assumed to be header and
|
791
|
-
ignored.</p>
|
792
|
-
</div>
|
793
|
-
|
794
|
-
</li>
|
795
|
-
|
796
|
-
<li>
|
797
|
-
<span class="name">:header_map</span>
|
798
|
-
<span class="type">(<tt>Hash{Symbol,String => Integer}</tt>)</span>
|
799
|
-
<span class="default">
|
800
|
-
|
801
|
-
</span>
|
802
|
-
|
803
|
-
— <div class='inline'>
|
804
|
-
<p>Header key vs index dictionary.</p>
|
805
|
-
</div>
|
806
|
-
|
807
|
-
</li>
|
808
|
-
|
809
|
-
<li>
|
810
|
-
<span class="name">:column_parsers</span>
|
811
|
-
<span class="type">(<tt>Hash{Symbol,String => lambda,proc}</tt>)</span>
|
812
|
-
<span class="default">
|
813
|
-
|
814
|
-
— default:
|
815
|
-
<tt>{}</tt>
|
816
|
-
|
817
|
-
</span>
|
818
|
-
|
819
|
-
— <div class='inline'>
|
820
|
-
<p>Custom column parsers for advance data extraction.</p>
|
821
|
-
</div>
|
822
|
-
|
823
|
-
</li>
|
824
|
-
|
825
|
-
<li>
|
826
|
-
<span class="name">:ignore_text_nodes</span>
|
827
|
-
<span class="type">(<tt>Boolean</tt>)</span>
|
828
|
-
<span class="default">
|
829
|
-
|
830
|
-
— default:
|
831
|
-
<tt>true</tt>
|
832
|
-
|
833
|
-
</span>
|
834
|
-
|
835
|
-
— <div class='inline'>
|
836
|
-
<p>Ignore text nodes when retriving content cells and rows.</p>
|
837
|
-
</div>
|
838
|
-
|
839
|
-
</li>
|
840
|
-
|
841
|
-
</ul>
|
842
|
-
|
843
|
-
|
844
|
-
<p class="tag_title">Yield Parameters:</p>
|
845
|
-
<ul class="yieldparam">
|
846
|
-
|
847
|
-
<li>
|
848
|
-
|
849
|
-
<span class='name'>data</span>
|
850
|
-
|
851
|
-
|
852
|
-
<span class='type'>(<tt>Hash{Symbol,String => Object}</tt>)</span>
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
—
|
857
|
-
<div class='inline'>
|
858
|
-
<p>Parsed row data.</p>
|
859
|
-
</div>
|
860
|
-
|
861
|
-
</li>
|
862
|
-
|
863
|
-
<li>
|
864
|
-
|
865
|
-
<span class='name'>row</span>
|
866
|
-
|
867
|
-
|
868
|
-
<span class='type'>(<tt>Array</tt>)</span>
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
—
|
873
|
-
<div class='inline'>
|
874
|
-
<p>Raw row data.</p>
|
875
|
-
</div>
|
876
|
-
|
877
|
-
</li>
|
878
|
-
|
879
|
-
<li>
|
880
|
-
|
881
|
-
<span class='name'>header_map</span>
|
882
|
-
|
883
|
-
|
884
|
-
<span class='type'>(<tt>Hash{Symbol,String => Integer}</tt>)</span>
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
—
|
889
|
-
<div class='inline'>
|
890
|
-
<p>Header map used.</p>
|
891
|
-
</div>
|
892
|
-
|
893
|
-
</li>
|
894
|
-
|
895
|
-
</ul>
|
896
|
-
<p class="tag_title">Yield Returns:</p>
|
897
|
-
<ul class="yieldreturn">
|
898
|
-
|
899
|
-
<li>
|
900
|
-
|
901
|
-
|
902
|
-
<span class='type'>(<tt>Boolean</tt>)</span>
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
—
|
907
|
-
<div class='inline'>
|
908
|
-
<p>`true` when valid, else `false`.</p>
|
909
|
-
</div>
|
910
|
-
|
911
|
-
</li>
|
912
|
-
|
913
|
-
</ul>
|
914
|
-
<p class="tag_title">Returns:</p>
|
915
|
-
<ul class="return">
|
916
|
-
|
917
|
-
<li>
|
918
|
-
|
919
|
-
|
920
|
-
<span class='type'>(<tt>Array<Hash></tt>, <tt>nil</tt>)</span>
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
—
|
925
|
-
<div class='inline'>
|
926
|
-
<p>Parsed rows data.</p>
|
927
|
-
</div>
|
928
|
-
|
929
|
-
</li>
|
930
|
-
|
931
|
-
</ul>
|
932
|
-
|
933
|
-
</div><table class="source_code">
|
934
|
-
<tr>
|
935
|
-
<td>
|
936
|
-
<pre class="lines">
|
937
|
-
|
938
|
-
|
939
|
-
89
|
940
|
-
90
|
941
|
-
91
|
942
|
-
92
|
943
|
-
93
|
944
|
-
94
|
945
|
-
95
|
946
|
-
96
|
947
|
-
97
|
948
|
-
98
|
949
|
-
99
|
950
|
-
100
|
951
|
-
101
|
952
|
-
102
|
953
|
-
103
|
954
|
-
104
|
955
|
-
105
|
956
|
-
106
|
957
|
-
107
|
958
|
-
108
|
959
|
-
109
|
960
|
-
110
|
961
|
-
111
|
962
|
-
112
|
963
|
-
113
|
964
|
-
114
|
965
|
-
115
|
966
|
-
116
|
967
|
-
117
|
968
|
-
118
|
969
|
-
119
|
970
|
-
120
|
971
|
-
121
|
972
|
-
122
|
973
|
-
123
|
974
|
-
124
|
975
|
-
125
|
976
|
-
126
|
977
|
-
127
|
978
|
-
128
|
979
|
-
129
|
980
|
-
130
|
981
|
-
131
|
982
|
-
132
|
983
|
-
133</pre>
|
984
|
-
</td>
|
985
|
-
<td>
|
986
|
-
<pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 89</span>
|
987
|
-
|
988
|
-
<span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_content'>parse_content</span> <span class='id identifier rubyid_opts'>opts</span><span class='comma'>,</span> <span class='op'>&</span><span class='id identifier rubyid_filter'>filter</span>
|
989
|
-
<span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span>
|
990
|
-
<span class='label'>html:</span> <span class='kw'>nil</span><span class='comma'>,</span>
|
991
|
-
<span class='label'>selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
|
992
|
-
<span class='label'>first_row_header:</span> <span class='kw'>false</span><span class='comma'>,</span>
|
993
|
-
<span class='label'>header_map:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
|
994
|
-
<span class='label'>column_parsers:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
|
995
|
-
<span class='label'>ignore_text_nodes:</span> <span class='kw'>true</span>
|
996
|
-
<span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_merge'>merge</span> <span class='id identifier rubyid_opts'>opts</span>
|
997
|
-
|
998
|
-
<span class='comment'># Setup config
|
999
|
-
</span> <span class='id identifier rubyid_data'>data</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
|
1000
|
-
<span class='id identifier rubyid_row_data'>row_data</span> <span class='op'>=</span> <span class='id identifier rubyid_child_element'>child_element</span> <span class='op'>=</span> <span class='kw'>nil</span>
|
1001
|
-
<span class='id identifier rubyid_first'>first</span> <span class='op'>=</span> <span class='id identifier rubyid_first_row_header'>first_row_header</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:first_row_header</span><span class='rbracket'>]</span>
|
1002
|
-
<span class='id identifier rubyid_header_map'>header_map</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_map</span><span class='rbracket'>]</span>
|
1003
|
-
<span class='id identifier rubyid_column_parsers'>column_parsers</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:column_parsers</span><span class='rbracket'>]</span>
|
1004
|
-
<span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:ignore_text_nodes</span><span class='rbracket'>]</span>
|
1005
|
-
|
1006
|
-
<span class='comment'># Get and parse rows
|
1007
|
-
</span> <span class='id identifier rubyid_html_rows'>html_rows</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:selector</span><span class='rbracket'>]</span><span class='rparen'>)</span>
|
1008
|
-
<span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_row'>row</span><span class='op'>|</span>
|
1009
|
-
<span class='kw'>next</span> <span class='kw'>if</span> <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span> <span class='op'>&&</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_name'>name</span> <span class='op'>==</span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>text</span><span class='tstring_end'>'</span></span>
|
1010
|
-
|
1011
|
-
<span class='comment'># First row header validation
|
1012
|
-
</span> <span class='kw'>if</span> <span class='id identifier rubyid_first'>first</span> <span class='op'>&&</span> <span class='id identifier rubyid_first_row_header'>first_row_header</span>
|
1013
|
-
<span class='id identifier rubyid_first'>first</span> <span class='op'>=</span> <span class='kw'>false</span>
|
1014
|
-
<span class='kw'>next</span>
|
1015
|
-
<span class='kw'>end</span>
|
1016
|
-
|
1017
|
-
<span class='comment'># Extract content data
|
1018
|
-
</span> <span class='id identifier rubyid_row_data'>row_data</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span>
|
1019
|
-
<span class='id identifier rubyid_header_map'>header_map</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_key'>key</span><span class='comma'>,</span> <span class='id identifier rubyid_index'>index</span><span class='op'>|</span>
|
1020
|
-
<span class='comment'># Parse column html with default or custom parser
|
1021
|
-
</span> <span class='id identifier rubyid_children'>children</span> <span class='op'>=</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_children'>children</span>
|
1022
|
-
<span class='id identifier rubyid_children'>children</span> <span class='op'>=</span> <span class='id identifier rubyid_children'>children</span><span class='period'>.</span><span class='id identifier rubyid_select'>select</span><span class='lbrace'>{</span><span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='period'>.</span><span class='id identifier rubyid_name'>name</span> <span class='op'>!=</span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>text</span><span class='tstring_end'>'</span></span><span class='rbrace'>}</span> <span class='kw'>if</span> <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span>
|
1023
|
-
<span class='id identifier rubyid_child_element'>child_element</span> <span class='op'>=</span> <span class='id identifier rubyid_children'>children</span><span class='lbracket'>[</span><span class='id identifier rubyid_index'>index</span><span class='rbracket'>]</span>
|
1024
|
-
<span class='id identifier rubyid_column_parsers'>column_parsers</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>?</span>
|
1025
|
-
<span class='id identifier rubyid_default_parser'>default_parser</span><span class='lparen'>(</span><span class='id identifier rubyid_child_element'>child_element</span><span class='comma'>,</span> <span class='id identifier rubyid_row_data'>row_data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span><span class='rparen'>)</span> <span class='op'>:</span>
|
1026
|
-
<span class='id identifier rubyid_column_parsers'>column_parsers</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_call'>call</span><span class='lparen'>(</span><span class='id identifier rubyid_child_element'>child_element</span><span class='comma'>,</span> <span class='id identifier rubyid_row_data'>row_data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span><span class='rparen'>)</span>
|
1027
|
-
<span class='kw'>end</span>
|
1028
|
-
<span class='kw'>next</span> <span class='kw'>unless</span> <span class='id identifier rubyid_filter'>filter</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>||</span> <span class='id identifier rubyid_filter'>filter</span><span class='period'>.</span><span class='id identifier rubyid_call'>call</span><span class='lparen'>(</span><span class='id identifier rubyid_row_data'>row_data</span><span class='comma'>,</span> <span class='id identifier rubyid_row'>row</span><span class='comma'>,</span> <span class='id identifier rubyid_header_map'>header_map</span><span class='rparen'>)</span>
|
1029
|
-
<span class='id identifier rubyid_data'>data</span> <span class='op'><<</span> <span class='id identifier rubyid_row_data'>row_data</span>
|
1030
|
-
<span class='kw'>end</span>
|
1031
|
-
<span class='id identifier rubyid_data'>data</span>
|
1032
|
-
<span class='kw'>end</span></pre>
|
1033
|
-
</td>
|
1034
|
-
</tr>
|
1035
|
-
</table>
|
1036
|
-
</div>
|
1037
|
-
|
1038
|
-
<div class="method_details ">
|
1039
|
-
<h3 class="signature " id="parse_header_map-class_method">
|
1040
|
-
|
1041
|
-
.<strong>parse_header_map</strong>(opts = {}) ⇒ <tt>Hash{Symbol,String => Integer}</tt><sup>?</sup>
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
|
1047
|
-
</h3><div class="docstring">
|
1048
|
-
<div class="discussion">
|
1049
|
-
|
1050
|
-
<p>Parse header from selector and create a header map to match a column key</p>
|
1051
|
-
|
1052
|
-
<pre class="code ruby"><code class="ruby">with column index.
|
1053
|
-
</code></pre>
|
1054
|
-
|
1055
|
-
|
1056
|
-
</div>
|
1057
|
-
</div>
|
1058
|
-
<div class="tags">
|
1059
|
-
<p class="tag_title">Parameters:</p>
|
1060
|
-
<ul class="param">
|
1061
|
-
|
1062
|
-
<li>
|
1063
|
-
|
1064
|
-
<span class='name'>opts</span>
|
1065
|
-
|
1066
|
-
|
1067
|
-
<span class='type'>(<tt>Hash</tt>)</span>
|
1068
|
-
|
1069
|
-
|
1070
|
-
<em class="default">(defaults to: <tt>{}</tt>)</em>
|
1071
|
-
|
1072
|
-
|
1073
|
-
—
|
1074
|
-
<div class='inline'>
|
1075
|
-
<p>({}) Configuration options.</p>
|
1076
|
-
</div>
|
1077
|
-
|
1078
|
-
</li>
|
1079
|
-
|
1080
|
-
</ul>
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
<p class="tag_title">Options Hash (<tt>opts</tt>):</p>
|
1086
|
-
<ul class="option">
|
1087
|
-
|
1088
|
-
<li>
|
1089
|
-
<span class="name">:html</span>
|
1090
|
-
<span class="type">(<tt>Nokogiri::Element</tt>)</span>
|
1091
|
-
<span class="default">
|
1092
|
-
|
1093
|
-
</span>
|
1094
|
-
|
1095
|
-
— <div class='inline'>
|
1096
|
-
<p>Container element to search into.</p>
|
1097
|
-
</div>
|
1098
|
-
|
1099
|
-
</li>
|
1100
|
-
|
1101
|
-
<li>
|
1102
|
-
<span class="name">:selector</span>
|
1103
|
-
<span class="type">(<tt>String</tt>)</span>
|
1104
|
-
<span class="default">
|
1105
|
-
|
1106
|
-
</span>
|
1107
|
-
|
1108
|
-
— <div class='inline'>
|
1109
|
-
<p>CSS selector to match header cells.</p>
|
1110
|
-
</div>
|
1111
|
-
|
1112
|
-
</li>
|
1113
|
-
|
1114
|
-
<li>
|
1115
|
-
<span class="name">:column_key_label_map</span>
|
1116
|
-
<span class="type">(<tt>Hash{Symbol,String => Regex,String}</tt>)</span>
|
1117
|
-
<span class="default">
|
1118
|
-
|
1119
|
-
</span>
|
1120
|
-
|
1121
|
-
— <div class='inline'>
|
1122
|
-
<p>Key vs. label dictionary.</p>
|
1123
|
-
</div>
|
1124
|
-
|
1125
|
-
</li>
|
1126
|
-
|
1127
|
-
<li>
|
1128
|
-
<span class="name">:first_row_header</span>
|
1129
|
-
<span class="type">(<tt>Boolean</tt>)</span>
|
1130
|
-
<span class="default">
|
1131
|
-
|
1132
|
-
— default:
|
1133
|
-
<tt>false</tt>
|
1134
|
-
|
1135
|
-
</span>
|
1136
|
-
|
1137
|
-
— <div class='inline'>
|
1138
|
-
<p>If true then selector first matching row will be used as header for
|
1139
|
-
parsing.</p>
|
1140
|
-
</div>
|
1141
|
-
|
1142
|
-
</li>
|
1143
|
-
|
1144
|
-
<li>
|
1145
|
-
<span class="name">:ignore_text_nodes</span>
|
1146
|
-
<span class="type">(<tt>Boolean</tt>)</span>
|
1147
|
-
<span class="default">
|
1148
|
-
|
1149
|
-
— default:
|
1150
|
-
<tt>true</tt>
|
1151
|
-
|
1152
|
-
</span>
|
1153
|
-
|
1154
|
-
— <div class='inline'>
|
1155
|
-
<p>Ignore text nodes when retriving header cells and rows.</p>
|
1156
|
-
</div>
|
1157
|
-
|
1158
|
-
</li>
|
1159
|
-
|
1160
|
-
</ul>
|
1161
|
-
|
1162
|
-
|
1163
|
-
<p class="tag_title">Returns:</p>
|
1164
|
-
<ul class="return">
|
1165
|
-
|
1166
|
-
<li>
|
1167
|
-
|
1168
|
-
|
1169
|
-
<span class='type'>(<tt>Hash{Symbol,String => Integer}</tt>, <tt>nil</tt>)</span>
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
—
|
1174
|
-
<div class='inline'>
|
1175
|
-
<p>Key vs. column index map.</p>
|
1176
|
-
</div>
|
1177
|
-
|
1178
|
-
</li>
|
1179
|
-
|
1180
|
-
</ul>
|
1181
|
-
|
1182
|
-
</div><table class="source_code">
|
1183
|
-
<tr>
|
1184
|
-
<td>
|
1185
|
-
<pre class="lines">
|
1186
|
-
|
1187
|
-
|
1188
|
-
166
|
1189
|
-
167
|
1190
|
-
168
|
1191
|
-
169
|
1192
|
-
170
|
1193
|
-
171
|
1194
|
-
172
|
1195
|
-
173
|
1196
|
-
174
|
1197
|
-
175
|
1198
|
-
176
|
1199
|
-
177
|
1200
|
-
178
|
1201
|
-
179
|
1202
|
-
180
|
1203
|
-
181
|
1204
|
-
182
|
1205
|
-
183
|
1206
|
-
184
|
1207
|
-
185
|
1208
|
-
186
|
1209
|
-
187
|
1210
|
-
188
|
1211
|
-
189
|
1212
|
-
190
|
1213
|
-
191
|
1214
|
-
192
|
1215
|
-
193
|
1216
|
-
194
|
1217
|
-
195
|
1218
|
-
196
|
1219
|
-
197
|
1220
|
-
198
|
1221
|
-
199
|
1222
|
-
200</pre>
|
1223
|
-
</td>
|
1224
|
-
<td>
|
1225
|
-
<pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 166</span>
|
1226
|
-
|
1227
|
-
<span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_header_map'>parse_header_map</span> <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span>
|
1228
|
-
<span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span>
|
1229
|
-
<span class='label'>html:</span> <span class='kw'>nil</span><span class='comma'>,</span>
|
1230
|
-
<span class='label'>selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
|
1231
|
-
<span class='label'>column_key_label_map:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
|
1232
|
-
<span class='label'>first_row_header:</span> <span class='kw'>false</span><span class='comma'>,</span>
|
1233
|
-
<span class='label'>ignore_text_nodes:</span> <span class='kw'>true</span>
|
1234
|
-
<span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_merge'>merge</span> <span class='id identifier rubyid_opts'>opts</span>
|
1235
|
-
|
1236
|
-
<span class='comment'># Setup config
|
1237
|
-
</span> <span class='id identifier rubyid_dictionary'>dictionary</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:column_key_label_map</span><span class='rbracket'>]</span>
|
1238
|
-
<span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:ignore_text_nodes</span><span class='rbracket'>]</span>
|
1239
|
-
<span class='id identifier rubyid_data'>data</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
|
1240
|
-
<span class='id identifier rubyid_column_map'>column_map</span> <span class='op'>=</span> <span class='kw'>nil</span>
|
1241
|
-
|
1242
|
-
<span class='comment'># Extract and parse header rows
|
1243
|
-
</span> <span class='id identifier rubyid_html_rows'>html_rows</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:selector</span><span class='rbracket'>]</span><span class='rparen'>)</span> <span class='kw'>rescue</span> <span class='kw'>nil</span>
|
1244
|
-
<span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
|
1245
|
-
<span class='id identifier rubyid_html_rows'>html_rows</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_first'>first</span><span class='rbracket'>]</span> <span class='kw'>if</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:first_row_header</span><span class='rbracket'>]</span>
|
1246
|
-
<span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_row'>row</span><span class='op'>|</span>
|
1247
|
-
<span class='kw'>next</span> <span class='kw'>if</span> <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span> <span class='op'>&&</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_name'>name</span> <span class='op'>==</span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>text</span><span class='tstring_end'>'</span></span>
|
1248
|
-
|
1249
|
-
<span class='id identifier rubyid_column_map'>column_map</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span>
|
1250
|
-
<span class='id identifier rubyid_children'>children</span> <span class='op'>=</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_children'>children</span>
|
1251
|
-
<span class='id identifier rubyid_children'>children</span> <span class='op'>=</span> <span class='id identifier rubyid_children'>children</span><span class='period'>.</span><span class='id identifier rubyid_select'>select</span><span class='lbrace'>{</span><span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='period'>.</span><span class='id identifier rubyid_name'>name</span> <span class='op'>!=</span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>text</span><span class='tstring_end'>'</span></span><span class='rbrace'>}</span> <span class='kw'>if</span> <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span>
|
1252
|
-
<span class='id identifier rubyid_children'>children</span><span class='period'>.</span><span class='id identifier rubyid_each_with_index'>each_with_index</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_col'>col</span><span class='comma'>,</span> <span class='id identifier rubyid_index'>index</span><span class='op'>|</span>
|
1253
|
-
<span class='comment'># Parse and map column header
|
1254
|
-
</span> <span class='id identifier rubyid_column_key'>column_key</span> <span class='op'>=</span> <span class='id identifier rubyid_translate_label_to_key'>translate_label_to_key</span> <span class='id identifier rubyid_col'>col</span><span class='comma'>,</span> <span class='id identifier rubyid_dictionary'>dictionary</span>
|
1255
|
-
<span class='kw'>next</span> <span class='kw'>if</span> <span class='id identifier rubyid_column_key'>column_key</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
|
1256
|
-
<span class='id identifier rubyid_column_map'>column_map</span><span class='lbracket'>[</span><span class='id identifier rubyid_column_key'>column_key</span><span class='rbracket'>]</span> <span class='op'>=</span> <span class='id identifier rubyid_index'>index</span>
|
1257
|
-
<span class='kw'>end</span>
|
1258
|
-
<span class='id identifier rubyid_data'>data</span> <span class='op'><<</span> <span class='id identifier rubyid_column_map'>column_map</span>
|
1259
|
-
<span class='kw'>end</span>
|
1260
|
-
<span class='id identifier rubyid_data'>data</span><span class='op'>&.</span><span class='id identifier rubyid_first'>first</span>
|
1261
|
-
<span class='kw'>end</span></pre>
|
1262
|
-
</td>
|
1263
|
-
</tr>
|
1264
|
-
</table>
|
1265
|
-
</div>
|
1266
|
-
|
1267
|
-
<div class="method_details ">
|
1268
|
-
<h3 class="signature " id="parse_table-class_method">
|
1269
|
-
|
1270
|
-
.<strong>parse_table</strong>(opts = {}) {|data, row, header_map| ... } ⇒ <tt>Hash{Symbol => Array,Hash,nil}</tt>
|
1271
|
-
|
1272
|
-
|
1273
|
-
|
1274
|
-
|
1275
|
-
|
1276
|
-
</h3><div class="docstring">
|
1277
|
-
<div class="discussion">
|
1278
|
-
|
1279
|
-
<p>Parse data from a horizontal table like structure matching a selectors and</p>
|
1280
|
-
|
1281
|
-
<pre class="code ruby"><code class="ruby">using a header map to match columns.
|
1282
|
-
</code></pre>
|
1283
|
-
|
1284
|
-
|
1285
|
-
</div>
|
1286
|
-
</div>
|
1287
|
-
<div class="tags">
|
1288
|
-
<p class="tag_title">Parameters:</p>
|
1289
|
-
<ul class="param">
|
1290
|
-
|
1291
|
-
<li>
|
1292
|
-
|
1293
|
-
<span class='name'>opts</span>
|
1294
|
-
|
1295
|
-
|
1296
|
-
<span class='type'>(<tt>Hash</tt>)</span>
|
1297
|
-
|
1298
|
-
|
1299
|
-
<em class="default">(defaults to: <tt>{}</tt>)</em>
|
1300
|
-
|
1301
|
-
|
1302
|
-
—
|
1303
|
-
<div class='inline'>
|
1304
|
-
<p>({}) Configuration options.</p>
|
1305
|
-
</div>
|
1306
|
-
|
1307
|
-
</li>
|
1308
|
-
|
1309
|
-
</ul>
|
1310
|
-
|
1311
|
-
|
1312
|
-
|
1313
|
-
|
1314
|
-
<p class="tag_title">Options Hash (<tt>opts</tt>):</p>
|
1315
|
-
<ul class="option">
|
1316
|
-
|
1317
|
-
<li>
|
1318
|
-
<span class="name">:html</span>
|
1319
|
-
<span class="type">(<tt>Nokogiri::Element</tt>)</span>
|
1320
|
-
<span class="default">
|
1321
|
-
|
1322
|
-
</span>
|
1323
|
-
|
1324
|
-
— <div class='inline'>
|
1325
|
-
<p>Container element to search into.</p>
|
1326
|
-
</div>
|
1327
|
-
|
1328
|
-
</li>
|
1329
|
-
|
1330
|
-
<li>
|
1331
|
-
<span class="name">:header_selector</span>
|
1332
|
-
<span class="type">(<tt>String</tt>)</span>
|
1333
|
-
<span class="default">
|
1334
|
-
|
1335
|
-
</span>
|
1336
|
-
|
1337
|
-
— <div class='inline'>
|
1338
|
-
<p>Header column elements selector.</p>
|
1339
|
-
</div>
|
1340
|
-
|
1341
|
-
</li>
|
1342
|
-
|
1343
|
-
<li>
|
1344
|
-
<span class="name">:header_key_label_map</span>
|
1345
|
-
<span class="type">(<tt>Hash{Symbol,String => Regex,String}</tt>)</span>
|
1346
|
-
<span class="default">
|
1347
|
-
|
1348
|
-
</span>
|
1349
|
-
|
1350
|
-
— <div class='inline'>
|
1351
|
-
<p>Header key vs. label dictionary to match column indexes.</p>
|
1352
|
-
</div>
|
1353
|
-
|
1354
|
-
</li>
|
1355
|
-
|
1356
|
-
<li>
|
1357
|
-
<span class="name">:content_selector</span>
|
1358
|
-
<span class="type">(<tt>String</tt>)</span>
|
1359
|
-
<span class="default">
|
1360
|
-
|
1361
|
-
</span>
|
1362
|
-
|
1363
|
-
— <div class='inline'>
|
1364
|
-
<p>Content row elements selector.</p>
|
1365
|
-
</div>
|
1366
|
-
|
1367
|
-
</li>
|
1368
|
-
|
1369
|
-
<li>
|
1370
|
-
<span class="name">:first_row_header</span>
|
1371
|
-
<span class="type">(<tt>Boolean</tt>)</span>
|
1372
|
-
<span class="default">
|
1373
|
-
|
1374
|
-
— default:
|
1375
|
-
<tt>false</tt>
|
1376
|
-
|
1377
|
-
</span>
|
1378
|
-
|
1379
|
-
— <div class='inline'>
|
1380
|
-
<p>If true then selector first matching row will be used as header for
|
1381
|
-
parsing.</p>
|
1382
|
-
</div>
|
1383
|
-
|
1384
|
-
</li>
|
1385
|
-
|
1386
|
-
<li>
|
1387
|
-
<span class="name">:column_parsers</span>
|
1388
|
-
<span class="type">(<tt>Hash{Symbol,String => lambda,proc}</tt>)</span>
|
1389
|
-
<span class="default">
|
1390
|
-
|
1391
|
-
— default:
|
1392
|
-
<tt>{}</tt>
|
1393
|
-
|
1394
|
-
</span>
|
1395
|
-
|
1396
|
-
— <div class='inline'>
|
1397
|
-
<p>Custom column parsers for advance data extraction.</p>
|
1398
|
-
</div>
|
1399
|
-
|
1400
|
-
</li>
|
1401
|
-
|
1402
|
-
<li>
|
1403
|
-
<span class="name">:ignore_text_nodes</span>
|
1404
|
-
<span class="type">(<tt>Boolean</tt>)</span>
|
1405
|
-
<span class="default">
|
1406
|
-
|
1407
|
-
— default:
|
1408
|
-
<tt>true</tt>
|
1409
|
-
|
1410
|
-
</span>
|
1411
|
-
|
1412
|
-
— <div class='inline'>
|
1413
|
-
<p>Ignore text nodes when retriving cells and rows.</p>
|
1414
|
-
</div>
|
1415
|
-
|
1416
|
-
</li>
|
1417
|
-
|
1418
|
-
</ul>
|
1419
|
-
|
1420
|
-
|
1421
|
-
<p class="tag_title">Yield Parameters:</p>
|
1422
|
-
<ul class="yieldparam">
|
1423
|
-
|
1424
|
-
<li>
|
1425
|
-
|
1426
|
-
<span class='name'>data</span>
|
1427
|
-
|
1428
|
-
|
1429
|
-
<span class='type'>(<tt>Hash{Symbol,String => Object}</tt>)</span>
|
1430
|
-
|
1431
|
-
|
1432
|
-
|
1433
|
-
—
|
1434
|
-
<div class='inline'>
|
1435
|
-
<p>Parsed content row data.</p>
|
1436
|
-
</div>
|
1437
|
-
|
1438
|
-
</li>
|
1439
|
-
|
1440
|
-
<li>
|
1441
|
-
|
1442
|
-
<span class='name'>row</span>
|
1443
|
-
|
1444
|
-
|
1445
|
-
<span class='type'>(<tt>Array</tt>)</span>
|
1446
|
-
|
1447
|
-
|
1448
|
-
|
1449
|
-
—
|
1450
|
-
<div class='inline'>
|
1451
|
-
<p>Raw content row data.</p>
|
1452
|
-
</div>
|
1453
|
-
|
1454
|
-
</li>
|
1455
|
-
|
1456
|
-
<li>
|
1457
|
-
|
1458
|
-
<span class='name'>header_map</span>
|
1459
|
-
|
1460
|
-
|
1461
|
-
<span class='type'>(<tt>Hash{Symbol,String => Integer}</tt>)</span>
|
1462
|
-
|
1463
|
-
|
1464
|
-
|
1465
|
-
—
|
1466
|
-
<div class='inline'>
|
1467
|
-
<p>Header map used.</p>
|
1468
|
-
</div>
|
1469
|
-
|
1470
|
-
</li>
|
1471
|
-
|
1472
|
-
</ul>
|
1473
|
-
<p class="tag_title">Yield Returns:</p>
|
1474
|
-
<ul class="yieldreturn">
|
1475
|
-
|
1476
|
-
<li>
|
1477
|
-
|
1478
|
-
|
1479
|
-
<span class='type'>(<tt>Boolean</tt>)</span>
|
1480
|
-
|
1481
|
-
|
1482
|
-
|
1483
|
-
—
|
1484
|
-
<div class='inline'>
|
1485
|
-
<p>`true` when valid, else `false`.</p>
|
1486
|
-
</div>
|
1487
|
-
|
1488
|
-
</li>
|
1489
|
-
|
1490
|
-
</ul>
|
1491
|
-
<p class="tag_title">Returns:</p>
|
1492
|
-
<ul class="return">
|
1493
|
-
|
1494
|
-
<li>
|
1495
|
-
|
1496
|
-
|
1497
|
-
<span class='type'>(<tt>Hash{Symbol => Array,Hash,nil}</tt>)</span>
|
1498
|
-
|
1499
|
-
|
1500
|
-
|
1501
|
-
—
|
1502
|
-
<div class='inline'>
|
1503
|
-
<p>Hash data is as follows:</p>
|
1504
|
-
<ul><li>
|
1505
|
-
<p>`[Hash] :header_map` Header map used.</p>
|
1506
|
-
</li><li>
|
1507
|
-
<p>`[Array<Hash>,nil] :data` Parsed rows data.</p>
|
1508
|
-
</li></ul>
|
1509
|
-
</div>
|
1510
|
-
|
1511
|
-
</li>
|
1512
|
-
|
1513
|
-
</ul>
|
1514
|
-
|
1515
|
-
</div><table class="source_code">
|
1516
|
-
<tr>
|
1517
|
-
<td>
|
1518
|
-
<pre class="lines">
|
1519
|
-
|
1520
|
-
|
1521
|
-
226
|
1522
|
-
227
|
1523
|
-
228
|
1524
|
-
229
|
1525
|
-
230
|
1526
|
-
231
|
1527
|
-
232
|
1528
|
-
233
|
1529
|
-
234
|
1530
|
-
235
|
1531
|
-
236
|
1532
|
-
237
|
1533
|
-
238
|
1534
|
-
239
|
1535
|
-
240
|
1536
|
-
241
|
1537
|
-
242
|
1538
|
-
243
|
1539
|
-
244
|
1540
|
-
245
|
1541
|
-
246
|
1542
|
-
247
|
1543
|
-
248
|
1544
|
-
249
|
1545
|
-
250
|
1546
|
-
251</pre>
|
1547
|
-
</td>
|
1548
|
-
<td>
|
1549
|
-
<pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 226</span>
|
1550
|
-
|
1551
|
-
<span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_table'>parse_table</span> <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span> <span class='op'>&</span><span class='id identifier rubyid_filter'>filter</span>
|
1552
|
-
<span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span>
|
1553
|
-
<span class='label'>html:</span> <span class='kw'>nil</span><span class='comma'>,</span>
|
1554
|
-
<span class='label'>header_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
|
1555
|
-
<span class='label'>header_key_label_map:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
|
1556
|
-
<span class='label'>content_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
|
1557
|
-
<span class='label'>first_row_header:</span> <span class='kw'>false</span><span class='comma'>,</span>
|
1558
|
-
<span class='label'>column_parsers:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
|
1559
|
-
<span class='label'>ignore_text_nodes:</span> <span class='kw'>true</span>
|
1560
|
-
<span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_merge'>merge</span> <span class='id identifier rubyid_opts'>opts</span>
|
1561
|
-
<span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
|
1562
|
-
<span class='id identifier rubyid_header_map'>header_map</span> <span class='op'>=</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_header_map'>parse_header_map</span> <span class='label'>html:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='comma'>,</span>
|
1563
|
-
<span class='label'>selector:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_selector</span><span class='rbracket'>]</span><span class='comma'>,</span>
|
1564
|
-
<span class='label'>column_key_label_map:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_key_label_map</span><span class='rbracket'>]</span><span class='comma'>,</span>
|
1565
|
-
<span class='label'>first_row_header:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:first_row_header</span><span class='rbracket'>]</span><span class='comma'>,</span>
|
1566
|
-
<span class='label'>ignore_text_nodes:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:ignore_text_nodes</span><span class='rbracket'>]</span>
|
1567
|
-
<span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_header_map'>header_map</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
|
1568
|
-
<span class='id identifier rubyid_data'>data</span> <span class='op'>=</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_content'>parse_content</span> <span class='label'>html:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='comma'>,</span>
|
1569
|
-
<span class='label'>selector:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:content_selector</span><span class='rbracket'>]</span><span class='comma'>,</span>
|
1570
|
-
<span class='label'>header_map:</span> <span class='id identifier rubyid_header_map'>header_map</span><span class='comma'>,</span>
|
1571
|
-
<span class='label'>first_row_header:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:first_row_header</span><span class='rbracket'>]</span><span class='comma'>,</span>
|
1572
|
-
<span class='label'>column_parsers:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:column_parsers</span><span class='rbracket'>]</span><span class='comma'>,</span>
|
1573
|
-
<span class='label'>ignore_text_nodes:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:ignore_text_nodes</span><span class='rbracket'>]</span><span class='comma'>,</span>
|
1574
|
-
<span class='op'>&</span><span class='id identifier rubyid_filter'>filter</span>
|
1575
|
-
<span class='lbrace'>{</span><span class='label'>header_map:</span> <span class='id identifier rubyid_header_map'>header_map</span><span class='comma'>,</span> <span class='label'>data:</span> <span class='id identifier rubyid_data'>data</span><span class='rbrace'>}</span>
|
1576
|
-
<span class='kw'>end</span></pre>
|
1577
|
-
</td>
|
1578
|
-
</tr>
|
1579
|
-
</table>
|
1580
|
-
</div>
|
1581
|
-
|
1582
|
-
<div class="method_details ">
|
1583
|
-
<h3 class="signature " id="parse_vertical_table-class_method">
|
1584
|
-
|
1585
|
-
.<strong>parse_vertical_table</strong>(opts = {}) {|data, row, header_map| ... } ⇒ <tt>Hash{Symbol => Array,Hash,nil}</tt>
|
1586
|
-
|
1587
|
-
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1591
|
-
</h3><div class="docstring">
|
1592
|
-
<div class="discussion">
|
1593
|
-
|
1594
|
-
<p>Parse data from a vertical table like structure matching a selectors and</p>
|
1595
|
-
|
1596
|
-
<pre class="code ruby"><code class="ruby">using a header map to match columns.
|
1597
|
-
</code></pre>
|
1598
|
-
|
1599
|
-
|
1600
|
-
</div>
|
1601
|
-
</div>
|
1602
|
-
<div class="tags">
|
1603
|
-
<p class="tag_title">Parameters:</p>
|
1604
|
-
<ul class="param">
|
1605
|
-
|
1606
|
-
<li>
|
1607
|
-
|
1608
|
-
<span class='name'>opts</span>
|
1609
|
-
|
1610
|
-
|
1611
|
-
<span class='type'>(<tt>Hash</tt>)</span>
|
1612
|
-
|
1613
|
-
|
1614
|
-
<em class="default">(defaults to: <tt>{}</tt>)</em>
|
1615
|
-
|
1616
|
-
|
1617
|
-
—
|
1618
|
-
<div class='inline'>
|
1619
|
-
<p>({}) Configuration options.</p>
|
1620
|
-
</div>
|
1621
|
-
|
1622
|
-
</li>
|
1623
|
-
|
1624
|
-
</ul>
|
1625
|
-
|
1626
|
-
|
1627
|
-
|
1628
|
-
|
1629
|
-
<p class="tag_title">Options Hash (<tt>opts</tt>):</p>
|
1630
|
-
<ul class="option">
|
1631
|
-
|
1632
|
-
<li>
|
1633
|
-
<span class="name">:html</span>
|
1634
|
-
<span class="type">(<tt>Nokogiri::Element</tt>)</span>
|
1635
|
-
<span class="default">
|
1636
|
-
|
1637
|
-
</span>
|
1638
|
-
|
1639
|
-
— <div class='inline'>
|
1640
|
-
<p>Container element to search into.</p>
|
1641
|
-
</div>
|
1642
|
-
|
1643
|
-
</li>
|
1644
|
-
|
1645
|
-
<li>
|
1646
|
-
<span class="name">:row_selector</span>
|
1647
|
-
<span class="type">(<tt>String</tt>)</span>
|
1648
|
-
<span class="default">
|
1649
|
-
|
1650
|
-
</span>
|
1651
|
-
|
1652
|
-
— <div class='inline'>
|
1653
|
-
<p>Vertical row like elements selector.</p>
|
1654
|
-
</div>
|
1655
|
-
|
1656
|
-
</li>
|
1657
|
-
|
1658
|
-
<li>
|
1659
|
-
<span class="name">:header_selector</span>
|
1660
|
-
<span class="type">(<tt>String</tt>)</span>
|
1661
|
-
<span class="default">
|
1662
|
-
|
1663
|
-
</span>
|
1664
|
-
|
1665
|
-
— <div class='inline'>
|
1666
|
-
<p>Header column elements selector.</p>
|
1667
|
-
</div>
|
1668
|
-
|
1669
|
-
</li>
|
1670
|
-
|
1671
|
-
<li>
|
1672
|
-
<span class="name">:header_key_label_map</span>
|
1673
|
-
<span class="type">(<tt>Hash{Symbol,String => Regex,String}</tt>)</span>
|
1674
|
-
<span class="default">
|
1675
|
-
|
1676
|
-
</span>
|
1677
|
-
|
1678
|
-
— <div class='inline'>
|
1679
|
-
<p>Header key vs. label dictionary to match column indexes.</p>
|
1680
|
-
</div>
|
1681
|
-
|
1682
|
-
</li>
|
1683
|
-
|
1684
|
-
<li>
|
1685
|
-
<span class="name">:content_selector</span>
|
1686
|
-
<span class="type">(<tt>String</tt>)</span>
|
1687
|
-
<span class="default">
|
1688
|
-
|
1689
|
-
</span>
|
1690
|
-
|
1691
|
-
— <div class='inline'>
|
1692
|
-
<p>Content row elements selector.</p>
|
1693
|
-
</div>
|
1694
|
-
|
1695
|
-
</li>
|
1696
|
-
|
1697
|
-
<li>
|
1698
|
-
<span class="name">:column_parsers</span>
|
1699
|
-
<span class="type">(<tt>Hash{Symbol,String => lambda,proc}</tt>)</span>
|
1700
|
-
<span class="default">
|
1701
|
-
|
1702
|
-
— default:
|
1703
|
-
<tt>{}</tt>
|
1704
|
-
|
1705
|
-
</span>
|
1706
|
-
|
1707
|
-
— <div class='inline'>
|
1708
|
-
<p>Custom column parsers for advance data extraction.</p>
|
1709
|
-
</div>
|
1710
|
-
|
1711
|
-
</li>
|
1712
|
-
|
1713
|
-
<li>
|
1714
|
-
<span class="name">:ignore_text_nodes</span>
|
1715
|
-
<span class="type">(<tt>Boolean</tt>)</span>
|
1716
|
-
<span class="default">
|
1717
|
-
|
1718
|
-
— default:
|
1719
|
-
<tt>true</tt>
|
1720
|
-
|
1721
|
-
</span>
|
1722
|
-
|
1723
|
-
— <div class='inline'>
|
1724
|
-
<p>Ignore text nodes when retriving cells and rows.</p>
|
1725
|
-
</div>
|
1726
|
-
|
1727
|
-
</li>
|
1728
|
-
|
1729
|
-
</ul>
|
1730
|
-
|
1731
|
-
|
1732
|
-
<p class="tag_title">Yield Parameters:</p>
|
1733
|
-
<ul class="yieldparam">
|
1734
|
-
|
1735
|
-
<li>
|
1736
|
-
|
1737
|
-
<span class='name'>data</span>
|
1738
|
-
|
1739
|
-
|
1740
|
-
<span class='type'>(<tt>Hash{Symbol,String => Object}</tt>)</span>
|
1741
|
-
|
1742
|
-
|
1743
|
-
|
1744
|
-
—
|
1745
|
-
<div class='inline'>
|
1746
|
-
<p>Parsed content row data.</p>
|
1747
|
-
</div>
|
1748
|
-
|
1749
|
-
</li>
|
1750
|
-
|
1751
|
-
<li>
|
1752
|
-
|
1753
|
-
<span class='name'>row</span>
|
1754
|
-
|
1755
|
-
|
1756
|
-
<span class='type'>(<tt>Array</tt>)</span>
|
1757
|
-
|
1758
|
-
|
1759
|
-
|
1760
|
-
—
|
1761
|
-
<div class='inline'>
|
1762
|
-
<p>Raw content row data.</p>
|
1763
|
-
</div>
|
1764
|
-
|
1765
|
-
</li>
|
1766
|
-
|
1767
|
-
<li>
|
1768
|
-
|
1769
|
-
<span class='name'>header_map</span>
|
1770
|
-
|
1771
|
-
|
1772
|
-
<span class='type'>(<tt>Hash{Symbol,String => Integer}</tt>)</span>
|
1773
|
-
|
1774
|
-
|
1775
|
-
|
1776
|
-
—
|
1777
|
-
<div class='inline'>
|
1778
|
-
<p>Header map used.</p>
|
1779
|
-
</div>
|
1780
|
-
|
1781
|
-
</li>
|
1782
|
-
|
1783
|
-
</ul>
|
1784
|
-
<p class="tag_title">Yield Returns:</p>
|
1785
|
-
<ul class="yieldreturn">
|
1786
|
-
|
1787
|
-
<li>
|
1788
|
-
|
1789
|
-
|
1790
|
-
<span class='type'>(<tt>Boolean</tt>)</span>
|
1791
|
-
|
1792
|
-
|
1793
|
-
|
1794
|
-
—
|
1795
|
-
<div class='inline'>
|
1796
|
-
<p>`true` when valid, else `false`.</p>
|
1797
|
-
</div>
|
1798
|
-
|
1799
|
-
</li>
|
1800
|
-
|
1801
|
-
</ul>
|
1802
|
-
<p class="tag_title">Returns:</p>
|
1803
|
-
<ul class="return">
|
1804
|
-
|
1805
|
-
<li>
|
1806
|
-
|
1807
|
-
|
1808
|
-
<span class='type'>(<tt>Hash{Symbol => Array,Hash,nil}</tt>)</span>
|
1809
|
-
|
1810
|
-
|
1811
|
-
|
1812
|
-
—
|
1813
|
-
<div class='inline'>
|
1814
|
-
<p>Hash data is as follows:</p>
|
1815
|
-
<ul><li>
|
1816
|
-
<p>`[Hash] :header_map` Header map used.</p>
|
1817
|
-
</li><li>
|
1818
|
-
<p>`[Array<Hash>,nil] :data` Parsed rows data.</p>
|
1819
|
-
</li></ul>
|
1820
|
-
</div>
|
1821
|
-
|
1822
|
-
</li>
|
1823
|
-
|
1824
|
-
</ul>
|
1825
|
-
|
1826
|
-
</div><table class="source_code">
|
1827
|
-
<tr>
|
1828
|
-
<td>
|
1829
|
-
<pre class="lines">
|
1830
|
-
|
1831
|
-
|
1832
|
-
276
|
1833
|
-
277
|
1834
|
-
278
|
1835
|
-
279
|
1836
|
-
280
|
1837
|
-
281
|
1838
|
-
282
|
1839
|
-
283
|
1840
|
-
284
|
1841
|
-
285
|
1842
|
-
286
|
1843
|
-
287
|
1844
|
-
288
|
1845
|
-
289
|
1846
|
-
290
|
1847
|
-
291
|
1848
|
-
292
|
1849
|
-
293
|
1850
|
-
294
|
1851
|
-
295
|
1852
|
-
296
|
1853
|
-
297
|
1854
|
-
298
|
1855
|
-
299
|
1856
|
-
300
|
1857
|
-
301
|
1858
|
-
302
|
1859
|
-
303
|
1860
|
-
304
|
1861
|
-
305
|
1862
|
-
306
|
1863
|
-
307
|
1864
|
-
308
|
1865
|
-
309</pre>
|
1866
|
-
</td>
|
1867
|
-
<td>
|
1868
|
-
<pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 276</span>
|
1869
|
-
|
1870
|
-
<span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_vertical_table'>parse_vertical_table</span> <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span> <span class='op'>&</span><span class='id identifier rubyid_filter'>filter</span>
|
1871
|
-
<span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span>
|
1872
|
-
<span class='label'>html:</span> <span class='kw'>nil</span><span class='comma'>,</span>
|
1873
|
-
<span class='label'>row_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
|
1874
|
-
<span class='label'>header_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
|
1875
|
-
<span class='label'>header_key_label_map:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
|
1876
|
-
<span class='label'>content_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
|
1877
|
-
<span class='label'>column_parsers:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
|
1878
|
-
<span class='label'>ignore_text_nodes:</span> <span class='kw'>true</span>
|
1879
|
-
<span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_merge'>merge</span> <span class='id identifier rubyid_opts'>opts</span>
|
1880
|
-
<span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
|
1881
|
-
|
1882
|
-
<span class='comment'># Setup config
|
1883
|
-
</span> <span class='id identifier rubyid_data'>data</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span>
|
1884
|
-
<span class='id identifier rubyid_dictionary'>dictionary</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_key_label_map</span><span class='rbracket'>]</span>
|
1885
|
-
<span class='id identifier rubyid_column_parsers'>column_parsers</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:column_parsers</span><span class='rbracket'>]</span>
|
1886
|
-
|
1887
|
-
<span class='comment'># Extract headers and content
|
1888
|
-
</span> <span class='id identifier rubyid_html_rows'>html_rows</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:row_selector</span><span class='rbracket'>]</span><span class='rparen'>)</span> <span class='kw'>rescue</span> <span class='kw'>nil</span>
|
1889
|
-
<span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
|
1890
|
-
<span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_row'>row</span><span class='op'>|</span>
|
1891
|
-
<span class='comment'># Parse and map column header
|
1892
|
-
</span> <span class='id identifier rubyid_header_element'>header_element</span> <span class='op'>=</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_selector</span><span class='rbracket'>]</span><span class='rparen'>)</span>
|
1893
|
-
<span class='id identifier rubyid_key'>key</span> <span class='op'>=</span> <span class='id identifier rubyid_translate_label_to_key'>translate_label_to_key</span> <span class='id identifier rubyid_header_element'>header_element</span><span class='comma'>,</span> <span class='id identifier rubyid_dictionary'>dictionary</span>
|
1894
|
-
<span class='kw'>next</span> <span class='kw'>if</span> <span class='id identifier rubyid_key'>key</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>||</span> <span class='id identifier rubyid_key'>key</span> <span class='op'>==</span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_end'>'</span></span>
|
1895
|
-
|
1896
|
-
<span class='comment'># Parse column html with default or custom parser
|
1897
|
-
</span> <span class='id identifier rubyid_content_element'>content_element</span> <span class='op'>=</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:content_selector</span><span class='rbracket'>]</span><span class='rparen'>)</span>
|
1898
|
-
<span class='id identifier rubyid_column_parsers'>column_parsers</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>?</span>
|
1899
|
-
<span class='id identifier rubyid_default_parser'>default_parser</span><span class='lparen'>(</span><span class='id identifier rubyid_content_element'>content_element</span><span class='comma'>,</span> <span class='id identifier rubyid_data'>data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span><span class='rparen'>)</span> <span class='op'>:</span>
|
1900
|
-
<span class='id identifier rubyid_column_parsers'>column_parsers</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_call'>call</span><span class='lparen'>(</span><span class='id identifier rubyid_content_element'>content_element</span><span class='comma'>,</span> <span class='id identifier rubyid_data'>data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span><span class='rparen'>)</span>
|
1901
|
-
<span class='kw'>end</span>
|
1902
|
-
<span class='id identifier rubyid_data'>data</span>
|
1903
|
-
<span class='kw'>end</span></pre>
|
1904
|
-
</td>
|
1905
|
-
</tr>
|
1906
|
-
</table>
|
1907
|
-
</div>
|
1908
|
-
|
1909
|
-
<div class="method_details ">
|
1910
|
-
<h3 class="signature " id="strip-class_method">
|
1911
|
-
|
1912
|
-
.<strong>strip</strong>(raw_text, orig_encoding = 'ASCII') ⇒ <tt>String</tt><sup>?</sup>
|
1913
|
-
|
1914
|
-
|
1915
|
-
|
1916
|
-
|
1917
|
-
|
1918
|
-
</h3><div class="docstring">
|
1919
|
-
<div class="discussion">
|
1920
|
-
|
1921
|
-
<p>Strip a value by trimming spaces, reducing secuential spaces into a</p>
|
1922
|
-
|
1923
|
-
<pre class="code ruby"><code class="ruby">single space, decode HTML entities and change encoding to UTF-8.
|
1924
|
-
</code></pre>
|
1925
|
-
|
1926
|
-
|
1927
|
-
</div>
|
1928
|
-
</div>
|
1929
|
-
<div class="tags">
|
1930
|
-
<p class="tag_title">Parameters:</p>
|
1931
|
-
<ul class="param">
|
1932
|
-
|
1933
|
-
<li>
|
1934
|
-
|
1935
|
-
<span class='name'>raw_text</span>
|
1936
|
-
|
1937
|
-
|
1938
|
-
<span class='type'>(<tt>String</tt>, <tt>Object</tt>, <tt>nil</tt>)</span>
|
1939
|
-
|
1940
|
-
|
1941
|
-
|
1942
|
-
—
|
1943
|
-
<div class='inline'>
|
1944
|
-
<p>Text to strip.</p>
|
1945
|
-
</div>
|
1946
|
-
|
1947
|
-
</li>
|
1948
|
-
|
1949
|
-
<li>
|
1950
|
-
|
1951
|
-
<span class='name'>orig_encoding</span>
|
1952
|
-
|
1953
|
-
|
1954
|
-
<span class='type'>(<tt>String</tt>)</span>
|
1955
|
-
|
1956
|
-
|
1957
|
-
<em class="default">(defaults to: <tt>'ASCII'</tt>)</em>
|
1958
|
-
|
1959
|
-
|
1960
|
-
—
|
1961
|
-
<div class='inline'>
|
1962
|
-
<p>Text original encoding.</p>
|
1963
|
-
</div>
|
1964
|
-
|
1965
|
-
</li>
|
1966
|
-
|
1967
|
-
</ul>
|
1968
|
-
|
1969
|
-
<p class="tag_title">Returns:</p>
|
1970
|
-
<ul class="return">
|
1971
|
-
|
1972
|
-
<li>
|
1973
|
-
|
1974
|
-
|
1975
|
-
<span class='type'>(<tt>String</tt>, <tt>nil</tt>)</span>
|
1976
|
-
|
1977
|
-
|
1978
|
-
|
1979
|
-
—
|
1980
|
-
<div class='inline'>
|
1981
|
-
<p>`nil` when <code>raw_text</code> is nil, else `String`.</p>
|
1982
|
-
</div>
|
1983
|
-
|
1984
|
-
</li>
|
1985
|
-
|
1986
|
-
</ul>
|
1987
|
-
|
1988
|
-
</div><table class="source_code">
|
1989
|
-
<tr>
|
1990
|
-
<td>
|
1991
|
-
<pre class="lines">
|
1992
|
-
|
1993
|
-
|
1994
|
-
44
|
1995
|
-
45
|
1996
|
-
46
|
1997
|
-
47
|
1998
|
-
48
|
1999
|
-
49
|
2000
|
-
50
|
2001
|
-
51
|
2002
|
-
52
|
2003
|
-
53
|
2004
|
-
54
|
2005
|
-
55</pre>
|
2006
|
-
</td>
|
2007
|
-
<td>
|
2008
|
-
<pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 44</span>
|
2009
|
-
|
2010
|
-
<span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_strip'>strip</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='comma'>,</span> <span class='id identifier rubyid_orig_encoding'>orig_encoding</span> <span class='op'>=</span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>ASCII</span><span class='tstring_end'>'</span></span>
|
2011
|
-
<span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
|
2012
|
-
<span class='id identifier rubyid_raw_text'>raw_text</span> <span class='op'>=</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_to_s'>to_s</span> <span class='kw'>unless</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_is_a?'>is_a?</span> <span class='const'>String</span>
|
2013
|
-
<span class='id identifier rubyid_regex'>regex</span> <span class='op'>=</span> <span class='tstring'><span class='regexp_beg'>/</span><span class='tstring_content'>(\s|\u3000|\u00a0)+</span><span class='regexp_end'>/</span></span>
|
2014
|
-
<span class='id identifier rubyid_good_encoding'>good_encoding</span> <span class='op'>=</span> <span class='lparen'>(</span><span class='id identifier rubyid_raw_text'>raw_text</span> <span class='op'>=~</span> <span class='tstring'><span class='regexp_beg'>/</span><span class='tstring_content'>\u3000</span><span class='regexp_end'>/</span></span> <span class='op'>||</span> <span class='kw'>true</span><span class='rparen'>)</span> <span class='kw'>rescue</span> <span class='kw'>false</span>
|
2015
|
-
<span class='kw'>unless</span> <span class='id identifier rubyid_good_encoding'>good_encoding</span>
|
2016
|
-
<span class='id identifier rubyid_raw_text'>raw_text</span> <span class='op'>=</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_force_encoding'>force_encoding</span><span class='lparen'>(</span><span class='id identifier rubyid_orig_encoding'>orig_encoding</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_encode'>encode</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>UTF-8</span><span class='tstring_end'>'</span></span><span class='comma'>,</span> <span class='label'>invalid:</span> <span class='symbol'>:replace</span><span class='comma'>,</span> <span class='label'>undef:</span> <span class='symbol'>:replace</span><span class='rparen'>)</span>
|
2017
|
-
<span class='id identifier rubyid_regex'>regex</span> <span class='op'>=</span> <span class='tstring'><span class='regexp_beg'>/</span><span class='tstring_content'>(\s|\u3000|\u00a0|\u00c2\u00a0)+</span><span class='regexp_end'>/</span></span>
|
2018
|
-
<span class='kw'>end</span>
|
2019
|
-
<span class='id identifier rubyid_text'>text</span> <span class='op'>=</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_gsub'>gsub</span><span class='lparen'>(</span><span class='id identifier rubyid_regex'>regex</span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'> </span><span class='tstring_end'>'</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_strip'>strip</span>
|
2020
|
-
<span class='id identifier rubyid_text'>text</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>?</span> <span class='kw'>nil</span> <span class='op'>:</span> <span class='id identifier rubyid_decode_html'>decode_html</span><span class='lparen'>(</span><span class='id identifier rubyid_text'>text</span><span class='rparen'>)</span>
|
2021
|
-
<span class='kw'>end</span></pre>
|
2022
|
-
</td>
|
2023
|
-
</tr>
|
2024
|
-
</table>
|
2025
|
-
</div>
|
2026
|
-
|
2027
|
-
<div class="method_details ">
|
2028
|
-
<h3 class="signature " id="translate_label_to_key-class_method">
|
2029
|
-
|
2030
|
-
.<strong>translate_label_to_key</strong>(element, label_map) ⇒ <tt>Symbol</tt>, <tt>String</tt>
|
2031
|
-
|
2032
|
-
|
2033
|
-
|
2034
|
-
|
2035
|
-
|
2036
|
-
</h3><div class="docstring">
|
2037
|
-
<div class="discussion">
|
2038
|
-
|
2039
|
-
<p>Extract column label and translate it into a frienly key.</p>
|
2040
|
-
|
2041
|
-
|
2042
|
-
</div>
|
2043
|
-
</div>
|
2044
|
-
<div class="tags">
|
2045
|
-
<p class="tag_title">Parameters:</p>
|
2046
|
-
<ul class="param">
|
2047
|
-
|
2048
|
-
<li>
|
2049
|
-
|
2050
|
-
<span class='name'>element</span>
|
2051
|
-
|
2052
|
-
|
2053
|
-
<span class='type'>(<tt>Nokogiri::Element</tt>)</span>
|
2054
|
-
|
2055
|
-
|
2056
|
-
|
2057
|
-
—
|
2058
|
-
<div class='inline'>
|
2059
|
-
<p>Html element to parse.</p>
|
2060
|
-
</div>
|
2061
|
-
|
2062
|
-
</li>
|
2063
|
-
|
2064
|
-
<li>
|
2065
|
-
|
2066
|
-
<span class='name'>label_map</span>
|
2067
|
-
|
2068
|
-
|
2069
|
-
<span class='type'>(<tt>Hash{Symbol,String => Regex,String}</tt>)</span>
|
2070
|
-
|
2071
|
-
|
2072
|
-
|
2073
|
-
—
|
2074
|
-
<div class='inline'>
|
2075
|
-
<p>Label dictionary for translation into key.</p>
|
2076
|
-
</div>
|
2077
|
-
|
2078
|
-
</li>
|
2079
|
-
|
2080
|
-
</ul>
|
2081
|
-
|
2082
|
-
<p class="tag_title">Returns:</p>
|
2083
|
-
<ul class="return">
|
2084
|
-
|
2085
|
-
<li>
|
2086
|
-
|
2087
|
-
|
2088
|
-
<span class='type'>(<tt>Symbol</tt>, <tt>String</tt>)</span>
|
2089
|
-
|
2090
|
-
|
2091
|
-
|
2092
|
-
—
|
2093
|
-
<div class='inline'>
|
2094
|
-
<p>Translated key.</p>
|
2095
|
-
</div>
|
2096
|
-
|
2097
|
-
</li>
|
2098
|
-
|
2099
|
-
</ul>
|
2100
|
-
|
2101
|
-
</div><table class="source_code">
|
2102
|
-
<tr>
|
2103
|
-
<td>
|
2104
|
-
<pre class="lines">
|
2105
|
-
|
2106
|
-
|
2107
|
-
142
|
2108
|
-
143
|
2109
|
-
144
|
2110
|
-
145
|
2111
|
-
146
|
2112
|
-
147
|
2113
|
-
148
|
2114
|
-
149
|
2115
|
-
150</pre>
|
2116
|
-
</td>
|
2117
|
-
<td>
|
2118
|
-
<pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 142</span>
|
2119
|
-
|
2120
|
-
<span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_translate_label_to_key'>translate_label_to_key</span> <span class='id identifier rubyid_element'>element</span><span class='comma'>,</span> <span class='id identifier rubyid_label_map'>label_map</span>
|
2121
|
-
<span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_element'>element</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
|
2122
|
-
<span class='id identifier rubyid_element'>element</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>//i</span><span class='tstring_end'>'</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_remove'>remove</span> <span class='kw'>if</span> <span class='id identifier rubyid_element'>element</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>'</span><span class='tstring_content'>//i</span><span class='tstring_end'>'</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_count'>count</span> <span class='op'>></span> <span class='int'>0</span>
|
2123
|
-
<span class='id identifier rubyid_text'>text</span> <span class='op'>=</span> <span class='id identifier rubyid_strip'>strip</span> <span class='id identifier rubyid_element'>element</span><span class='period'>.</span><span class='id identifier rubyid_text'>text</span>
|
2124
|
-
<span class='id identifier rubyid_key_pair'>key_pair</span> <span class='op'>=</span> <span class='id identifier rubyid_label_map'>label_map</span><span class='period'>.</span><span class='id identifier rubyid_find'>find</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_k'>k</span><span class='comma'>,</span><span class='id identifier rubyid_v'>v</span><span class='op'>|</span>
|
2125
|
-
<span class='id identifier rubyid_v'>v</span><span class='period'>.</span><span class='id identifier rubyid_is_a?'>is_a?</span><span class='lparen'>(</span><span class='const'>Regexp</span><span class='rparen'>)</span> <span class='op'>?</span> <span class='lparen'>(</span><span class='id identifier rubyid_text'>text</span> <span class='op'>=~</span> <span class='id identifier rubyid_v'>v</span><span class='rparen'>)</span> <span class='op'>:</span> <span class='lparen'>(</span><span class='id identifier rubyid_text'>text</span> <span class='op'>==</span> <span class='id identifier rubyid_v'>v</span><span class='rparen'>)</span>
|
2126
|
-
<span class='kw'>end</span>
|
2127
|
-
<span class='id identifier rubyid_key'>key</span> <span class='op'>=</span> <span class='id identifier rubyid_key_pair'>key_pair</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>?</span> <span class='kw'>nil</span> <span class='op'>:</span> <span class='id identifier rubyid_key_pair'>key_pair</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span>
|
2128
|
-
<span class='kw'>end</span></pre>
|
2129
|
-
</td>
|
2130
|
-
</tr>
|
2131
|
-
</table>
|
2132
|
-
</div>
|
2133
|
-
|
2134
|
-
</div>
|
2135
|
-
|
2136
|
-
</div>
|
2137
|
-
|
2138
|
-
<div id="footer">
|
2139
|
-
Generated on Mon Mar 11 21:38:55 2019 by
|
2140
|
-
<a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
|
2141
|
-
0.9.18 (ruby-2.5.3).
|
2142
|
-
</div>
|
2143
|
-
|
2144
|
-
</div>
|
2145
|
-
</body>
|
2146
|
-
</html>
|