ae_easy-text 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,117 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>
7
- Module: AeEasy
8
-
9
- &mdash; Documentation by YARD 0.9.18
10
-
11
- </title>
12
-
13
- <link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
14
-
15
- <link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
16
-
17
- <script type="text/javascript" charset="utf-8">
18
- pathId = "AeEasy";
19
- relpath = '';
20
- </script>
21
-
22
-
23
- <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
24
-
25
- <script type="text/javascript" charset="utf-8" src="js/app.js"></script>
26
-
27
-
28
- </head>
29
- <body>
30
- <div class="nav_wrap">
31
- <iframe id="nav" src="class_list.html?1"></iframe>
32
- <div id="resizer"></div>
33
- </div>
34
-
35
- <div id="main" tabindex="-1">
36
- <div id="header">
37
- <div id="menu">
38
-
39
- <a href="_index.html">Index (A)</a> &raquo;
40
-
41
-
42
- <span class="title">AeEasy</span>
43
-
44
- </div>
45
-
46
- <div id="search">
47
-
48
- <a class="full_list_link" id="class_list_link"
49
- href="class_list.html">
50
-
51
- <svg width="24" height="24">
52
- <rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
53
- <rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
54
- <rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
55
- </svg>
56
- </a>
57
-
58
- </div>
59
- <div class="clear"></div>
60
- </div>
61
-
62
- <div id="content"><h1>Module: AeEasy
63
-
64
-
65
-
66
- </h1>
67
- <div class="box_info">
68
-
69
-
70
-
71
-
72
-
73
-
74
-
75
-
76
-
77
-
78
-
79
- <dl>
80
- <dt>Defined in:</dt>
81
- <dd>lib/ae_easy/text.rb<span class="defines">,<br />
82
- lib/ae_easy/text/version.rb</span>
83
- </dd>
84
- </dl>
85
-
86
- </div>
87
-
88
- <h2>Defined Under Namespace</h2>
89
- <p class="children">
90
-
91
-
92
- <strong class="modules">Modules:</strong> <span class='object_link'><a href="AeEasy/Text.html" title="AeEasy::Text (module)">Text</a></span>
93
-
94
-
95
-
96
-
97
- </p>
98
-
99
-
100
-
101
-
102
-
103
-
104
-
105
-
106
-
107
- </div>
108
-
109
- <div id="footer">
110
- Generated on Mon Mar 11 21:38:55 2019 by
111
- <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
112
- 0.9.18 (ruby-2.5.3).
113
- </div>
114
-
115
- </div>
116
- </body>
117
- </html>
@@ -1,2146 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>
7
- Module: AeEasy::Text
8
-
9
- &mdash; Documentation by YARD 0.9.18
10
-
11
- </title>
12
-
13
- <link rel="stylesheet" href="../css/style.css" type="text/css" charset="utf-8" />
14
-
15
- <link rel="stylesheet" href="../css/common.css" type="text/css" charset="utf-8" />
16
-
17
- <script type="text/javascript" charset="utf-8">
18
- pathId = "AeEasy::Text";
19
- relpath = '../';
20
- </script>
21
-
22
-
23
- <script type="text/javascript" charset="utf-8" src="../js/jquery.js"></script>
24
-
25
- <script type="text/javascript" charset="utf-8" src="../js/app.js"></script>
26
-
27
-
28
- </head>
29
- <body>
30
- <div class="nav_wrap">
31
- <iframe id="nav" src="../class_list.html?1"></iframe>
32
- <div id="resizer"></div>
33
- </div>
34
-
35
- <div id="main" tabindex="-1">
36
- <div id="header">
37
- <div id="menu">
38
-
39
- <a href="../_index.html">Index (T)</a> &raquo;
40
- <span class='title'><span class='object_link'><a href="../AeEasy.html" title="AeEasy (module)">AeEasy</a></span></span>
41
- &raquo;
42
- <span class="title">Text</span>
43
-
44
- </div>
45
-
46
- <div id="search">
47
-
48
- <a class="full_list_link" id="class_list_link"
49
- href="../class_list.html">
50
-
51
- <svg width="24" height="24">
52
- <rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
53
- <rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
54
- <rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
55
- </svg>
56
- </a>
57
-
58
- </div>
59
- <div class="clear"></div>
60
- </div>
61
-
62
- <div id="content"><h1>Module: AeEasy::Text
63
-
64
-
65
-
66
- </h1>
67
- <div class="box_info">
68
-
69
-
70
-
71
-
72
-
73
-
74
-
75
-
76
-
77
-
78
-
79
- <dl>
80
- <dt>Defined in:</dt>
81
- <dd>lib/ae_easy/text.rb<span class="defines">,<br />
82
- lib/ae_easy/text/version.rb</span>
83
- </dd>
84
- </dl>
85
-
86
- </div>
87
-
88
-
89
-
90
- <h2>
91
- Constant Summary
92
- <small><a href="#" class="constants_summary_toggle">collapse</a></small>
93
- </h2>
94
-
95
- <dl class="constants">
96
-
97
- <dt id="VERSION-constant" class="">VERSION =
98
- <div class="docstring">
99
- <div class="discussion">
100
-
101
- <p>Gem version</p>
102
-
103
-
104
- </div>
105
- </div>
106
- <div class="tags">
107
-
108
-
109
- </div>
110
- </dt>
111
- <dd><pre class="code"><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>0.0.4</span><span class='tstring_end'>&quot;</span></span></pre></dd>
112
-
113
- </dl>
114
-
115
-
116
-
117
-
118
-
119
-
120
-
121
-
122
-
123
- <h2>
124
- Class Method Summary
125
- <small><a href="#" class="summary_toggle">collapse</a></small>
126
- </h2>
127
-
128
- <ul class="summary">
129
-
130
- <li class="public ">
131
- <span class="summary_signature">
132
-
133
- <a href="#decode_html-class_method" title="decode_html (class method)">.<strong>decode_html</strong>(text) &#x21d2; String </a>
134
-
135
-
136
-
137
- </span>
138
-
139
-
140
-
141
-
142
-
143
-
144
-
145
-
146
-
147
- <span class="summary_desc"><div class='inline'>
148
- <p>Decode HTML entities from text .</p>
149
- </div></span>
150
-
151
- </li>
152
-
153
-
154
- <li class="public ">
155
- <span class="summary_signature">
156
-
157
- <a href="#default_parser-class_method" title="default_parser (class method)">.<strong>default_parser</strong>(cell_element, data, key) &#x21d2; Object </a>
158
-
159
-
160
-
161
- </span>
162
-
163
-
164
-
165
-
166
-
167
-
168
-
169
-
170
-
171
- <span class="summary_desc"><div class='inline'>
172
- <p>Default cell content parser used to parse cell element.</p>
173
- </div></span>
174
-
175
- </li>
176
-
177
-
178
- <li class="public ">
179
- <span class="summary_signature">
180
-
181
- <a href="#encode_html-class_method" title="encode_html (class method)">.<strong>encode_html</strong>(text) &#x21d2; String </a>
182
-
183
-
184
-
185
- </span>
186
-
187
-
188
-
189
-
190
-
191
-
192
-
193
-
194
-
195
- <span class="summary_desc"><div class='inline'>
196
- <p>Encode text for valid HTML entities.</p>
197
- </div></span>
198
-
199
- </li>
200
-
201
-
202
- <li class="public ">
203
- <span class="summary_signature">
204
-
205
- <a href="#hash-class_method" title="hash (class method)">.<strong>hash</strong>(object) &#x21d2; String </a>
206
-
207
-
208
-
209
- </span>
210
-
211
-
212
-
213
-
214
-
215
-
216
-
217
-
218
-
219
- <span class="summary_desc"><div class='inline'>
220
- <p>Create a hash from object.</p>
221
- </div></span>
222
-
223
- </li>
224
-
225
-
226
- <li class="public ">
227
- <span class="summary_signature">
228
-
229
- <a href="#parse_content-class_method" title="parse_content (class method)">.<strong>parse_content</strong>(opts) {|data, row, header_map| ... } &#x21d2; Array&lt;Hash&gt;<sup>?</sup> </a>
230
-
231
-
232
-
233
- </span>
234
-
235
-
236
-
237
-
238
-
239
-
240
-
241
-
242
-
243
- <span class="summary_desc"><div class='inline'>
244
- <p>Parse row data matching a selector using a header map to translate
245
- between columns and friendly keys.</p>
246
- </div></span>
247
-
248
- </li>
249
-
250
-
251
- <li class="public ">
252
- <span class="summary_signature">
253
-
254
- <a href="#parse_header_map-class_method" title="parse_header_map (class method)">.<strong>parse_header_map</strong>(opts = {}) &#x21d2; Hash{Symbol,String =&gt; Integer}<sup>?</sup> </a>
255
-
256
-
257
-
258
- </span>
259
-
260
-
261
-
262
-
263
-
264
-
265
-
266
-
267
-
268
- <span class="summary_desc"><div class='inline'>
269
- <p>Parse header from selector and create a header map to match a column key
270
- with column index.</p>
271
- </div></span>
272
-
273
- </li>
274
-
275
-
276
- <li class="public ">
277
- <span class="summary_signature">
278
-
279
- <a href="#parse_table-class_method" title="parse_table (class method)">.<strong>parse_table</strong>(opts = {}) {|data, row, header_map| ... } &#x21d2; Hash{Symbol =&gt; Array,Hash,nil} </a>
280
-
281
-
282
-
283
- </span>
284
-
285
-
286
-
287
-
288
-
289
-
290
-
291
-
292
-
293
- <span class="summary_desc"><div class='inline'>
294
- <p>Parse data from a horizontal table like structure matching a selectors and
295
- using a header map to match columns.</p>
296
- </div></span>
297
-
298
- </li>
299
-
300
-
301
- <li class="public ">
302
- <span class="summary_signature">
303
-
304
- <a href="#parse_vertical_table-class_method" title="parse_vertical_table (class method)">.<strong>parse_vertical_table</strong>(opts = {}) {|data, row, header_map| ... } &#x21d2; Hash{Symbol =&gt; Array,Hash,nil} </a>
305
-
306
-
307
-
308
- </span>
309
-
310
-
311
-
312
-
313
-
314
-
315
-
316
-
317
-
318
- <span class="summary_desc"><div class='inline'>
319
- <p>Parse data from a vertical table like structure matching a selectors and
320
- using a header map to match columns.</p>
321
- </div></span>
322
-
323
- </li>
324
-
325
-
326
- <li class="public ">
327
- <span class="summary_signature">
328
-
329
- <a href="#strip-class_method" title="strip (class method)">.<strong>strip</strong>(raw_text, orig_encoding = &#39;ASCII&#39;) &#x21d2; String<sup>?</sup> </a>
330
-
331
-
332
-
333
- </span>
334
-
335
-
336
-
337
-
338
-
339
-
340
-
341
-
342
-
343
- <span class="summary_desc"><div class='inline'>
344
- <p>Strip a value by trimming spaces, reducing secuential spaces into a
345
- single space, decode HTML entities and change encoding to UTF-8.</p>
346
- </div></span>
347
-
348
- </li>
349
-
350
-
351
- <li class="public ">
352
- <span class="summary_signature">
353
-
354
- <a href="#translate_label_to_key-class_method" title="translate_label_to_key (class method)">.<strong>translate_label_to_key</strong>(element, label_map) &#x21d2; Symbol, String </a>
355
-
356
-
357
-
358
- </span>
359
-
360
-
361
-
362
-
363
-
364
-
365
-
366
-
367
-
368
- <span class="summary_desc"><div class='inline'>
369
- <p>Extract column label and translate it into a frienly key.</p>
370
- </div></span>
371
-
372
- </li>
373
-
374
-
375
- </ul>
376
-
377
-
378
-
379
-
380
- <div id="class_method_details" class="method_details_list">
381
- <h2>Class Method Details</h2>
382
-
383
-
384
- <div class="method_details first">
385
- <h3 class="signature first" id="decode_html-class_method">
386
-
387
- .<strong>decode_html</strong>(text) &#x21d2; <tt>String</tt>
388
-
389
-
390
-
391
-
392
-
393
- </h3><div class="docstring">
394
- <div class="discussion">
395
-
396
- <p>Decode HTML entities from text .</p>
397
-
398
-
399
- </div>
400
- </div>
401
- <div class="tags">
402
- <p class="tag_title">Parameters:</p>
403
- <ul class="param">
404
-
405
- <li>
406
-
407
- <span class='name'>text</span>
408
-
409
-
410
- <span class='type'>(<tt>String</tt>)</span>
411
-
412
-
413
-
414
- &mdash;
415
- <div class='inline'>
416
- <p>Text to decode.</p>
417
- </div>
418
-
419
- </li>
420
-
421
- </ul>
422
-
423
- <p class="tag_title">Returns:</p>
424
- <ul class="return">
425
-
426
- <li>
427
-
428
-
429
- <span class='type'>(<tt>String</tt>)</span>
430
-
431
-
432
-
433
- </li>
434
-
435
- </ul>
436
-
437
- </div><table class="source_code">
438
- <tr>
439
- <td>
440
- <pre class="lines">
441
-
442
-
443
- 33
444
- 34
445
- 35</pre>
446
- </td>
447
- <td>
448
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 33</span>
449
-
450
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_decode_html'>decode_html</span> <span class='id identifier rubyid_text'>text</span>
451
- <span class='const'>CGI</span><span class='period'>.</span><span class='id identifier rubyid_unescapeHTML'>unescapeHTML</span> <span class='id identifier rubyid_text'>text</span>
452
- <span class='kw'>end</span></pre>
453
- </td>
454
- </tr>
455
- </table>
456
- </div>
457
-
458
- <div class="method_details ">
459
- <h3 class="signature " id="default_parser-class_method">
460
-
461
- .<strong>default_parser</strong>(cell_element, data, key) &#x21d2; <tt>Object</tt>
462
-
463
-
464
-
465
-
466
-
467
- </h3><div class="docstring">
468
- <div class="discussion">
469
-
470
- <p>Default cell content parser used to parse cell element.</p>
471
-
472
-
473
- </div>
474
- </div>
475
- <div class="tags">
476
- <p class="tag_title">Parameters:</p>
477
- <ul class="param">
478
-
479
- <li>
480
-
481
- <span class='name'>cell_element</span>
482
-
483
-
484
- <span class='type'>(<tt>Nokogiri::Element</tt>)</span>
485
-
486
-
487
-
488
- &mdash;
489
- <div class='inline'>
490
- <p>Cell element to parse.</p>
491
- </div>
492
-
493
- </li>
494
-
495
- <li>
496
-
497
- <span class='name'>data</span>
498
-
499
-
500
- <span class='type'>(<tt>Hash</tt>)</span>
501
-
502
-
503
-
504
- &mdash;
505
- <div class='inline'>
506
- <p>Data hash to save parsed data into.</p>
507
- </div>
508
-
509
- </li>
510
-
511
- <li>
512
-
513
- <span class='name'>key</span>
514
-
515
-
516
- <span class='type'>(<tt>String</tt>, <tt>Symbol</tt>)</span>
517
-
518
-
519
-
520
- &mdash;
521
- <div class='inline'>
522
- <p>Header column key being parsed.</p>
523
- </div>
524
-
525
- </li>
526
-
527
- </ul>
528
-
529
-
530
- </div><table class="source_code">
531
- <tr>
532
- <td>
533
- <pre class="lines">
534
-
535
-
536
- 62
537
- 63
538
- 64
539
- 65
540
- 66</pre>
541
- </td>
542
- <td>
543
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 62</span>
544
-
545
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_default_parser'>default_parser</span> <span class='id identifier rubyid_cell_element'>cell_element</span><span class='comma'>,</span> <span class='id identifier rubyid_data'>data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span>
546
- <span class='kw'>return</span> <span class='kw'>if</span> <span class='id identifier rubyid_cell_element'>cell_element</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
547
- <span class='id identifier rubyid_cell_element'>cell_element</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>//i</span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_remove'>remove</span> <span class='kw'>if</span> <span class='id identifier rubyid_cell_element'>cell_element</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>//i</span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_count'>count</span> <span class='op'>&gt;</span> <span class='int'>0</span>
548
- <span class='id identifier rubyid_data'>data</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span> <span class='op'>=</span> <span class='id identifier rubyid_strip'>strip</span> <span class='id identifier rubyid_cell_element'>cell_element</span><span class='period'>.</span><span class='id identifier rubyid_text'>text</span>
549
- <span class='kw'>end</span></pre>
550
- </td>
551
- </tr>
552
- </table>
553
- </div>
554
-
555
- <div class="method_details ">
556
- <h3 class="signature " id="encode_html-class_method">
557
-
558
- .<strong>encode_html</strong>(text) &#x21d2; <tt>String</tt>
559
-
560
-
561
-
562
-
563
-
564
- </h3><div class="docstring">
565
- <div class="discussion">
566
-
567
- <p>Encode text for valid HTML entities.</p>
568
-
569
-
570
- </div>
571
- </div>
572
- <div class="tags">
573
- <p class="tag_title">Parameters:</p>
574
- <ul class="param">
575
-
576
- <li>
577
-
578
- <span class='name'>text</span>
579
-
580
-
581
- <span class='type'>(<tt>String</tt>)</span>
582
-
583
-
584
-
585
- &mdash;
586
- <div class='inline'>
587
- <p>Text to encode.</p>
588
- </div>
589
-
590
- </li>
591
-
592
- </ul>
593
-
594
- <p class="tag_title">Returns:</p>
595
- <ul class="return">
596
-
597
- <li>
598
-
599
-
600
- <span class='type'>(<tt>String</tt>)</span>
601
-
602
-
603
-
604
- </li>
605
-
606
- </ul>
607
-
608
- </div><table class="source_code">
609
- <tr>
610
- <td>
611
- <pre class="lines">
612
-
613
-
614
- 24
615
- 25
616
- 26</pre>
617
- </td>
618
- <td>
619
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 24</span>
620
-
621
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_encode_html'>encode_html</span> <span class='id identifier rubyid_text'>text</span>
622
- <span class='const'>CGI</span><span class='period'>.</span><span class='id identifier rubyid_escapeHTML'>escapeHTML</span> <span class='id identifier rubyid_text'>text</span>
623
- <span class='kw'>end</span></pre>
624
- </td>
625
- </tr>
626
- </table>
627
- </div>
628
-
629
- <div class="method_details ">
630
- <h3 class="signature " id="hash-class_method">
631
-
632
- .<strong>hash</strong>(object) &#x21d2; <tt>String</tt>
633
-
634
-
635
-
636
-
637
-
638
- </h3><div class="docstring">
639
- <div class="discussion">
640
-
641
- <p>Create a hash from object</p>
642
-
643
-
644
- </div>
645
- </div>
646
- <div class="tags">
647
- <p class="tag_title">Parameters:</p>
648
- <ul class="param">
649
-
650
- <li>
651
-
652
- <span class='name'>object</span>
653
-
654
-
655
- <span class='type'>(<tt>String</tt>, <tt>Hash</tt>, <tt>Object</tt>)</span>
656
-
657
-
658
-
659
- &mdash;
660
- <div class='inline'>
661
- <p>Object to create hash from.</p>
662
- </div>
663
-
664
- </li>
665
-
666
- </ul>
667
-
668
- <p class="tag_title">Returns:</p>
669
- <ul class="return">
670
-
671
- <li>
672
-
673
-
674
- <span class='type'>(<tt>String</tt>)</span>
675
-
676
-
677
-
678
- </li>
679
-
680
- </ul>
681
-
682
- </div><table class="source_code">
683
- <tr>
684
- <td>
685
- <pre class="lines">
686
-
687
-
688
- 14
689
- 15
690
- 16
691
- 17</pre>
692
- </td>
693
- <td>
694
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 14</span>
695
-
696
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_hash'>hash</span> <span class='id identifier rubyid_object'>object</span>
697
- <span class='id identifier rubyid_object'>object</span> <span class='op'>=</span> <span class='id identifier rubyid_object'>object</span><span class='period'>.</span><span class='id identifier rubyid_hash'>hash</span> <span class='kw'>if</span> <span class='id identifier rubyid_object'>object</span><span class='period'>.</span><span class='id identifier rubyid_is_a?'>is_a?</span> <span class='const'>Hash</span>
698
- <span class='const'>Digest</span><span class='op'>::</span><span class='const'>SHA1</span><span class='period'>.</span><span class='id identifier rubyid_hexdigest'>hexdigest</span> <span class='id identifier rubyid_object'>object</span><span class='period'>.</span><span class='id identifier rubyid_to_s'>to_s</span>
699
- <span class='kw'>end</span></pre>
700
- </td>
701
- </tr>
702
- </table>
703
- </div>
704
-
705
- <div class="method_details ">
706
- <h3 class="signature " id="parse_content-class_method">
707
-
708
- .<strong>parse_content</strong>(opts) {|data, row, header_map| ... } &#x21d2; <tt>Array&lt;Hash&gt;</tt><sup>?</sup>
709
-
710
-
711
-
712
-
713
-
714
- </h3><div class="docstring">
715
- <div class="discussion">
716
-
717
- <p>Parse row data matching a selector using a header map to translate</p>
718
-
719
- <pre class="code ruby"><code class="ruby">between columns and friendly keys.
720
- </code></pre>
721
-
722
-
723
- </div>
724
- </div>
725
- <div class="tags">
726
- <p class="tag_title">Parameters:</p>
727
- <ul class="param">
728
-
729
- <li>
730
-
731
- <span class='name'>opts</span>
732
-
733
-
734
- <span class='type'>(<tt>Hash</tt>)</span>
735
-
736
-
737
-
738
- &mdash;
739
- <div class='inline'>
740
- <p>({}) Configuration options.</p>
741
- </div>
742
-
743
- </li>
744
-
745
- </ul>
746
-
747
-
748
-
749
-
750
- <p class="tag_title">Options Hash (<tt>opts</tt>):</p>
751
- <ul class="option">
752
-
753
- <li>
754
- <span class="name">:html</span>
755
- <span class="type">(<tt>Nokogiri::Element</tt>)</span>
756
- <span class="default">
757
-
758
- </span>
759
-
760
- &mdash; <div class='inline'>
761
- <p>Container element to search into.</p>
762
- </div>
763
-
764
- </li>
765
-
766
- <li>
767
- <span class="name">:selector</span>
768
- <span class="type">(<tt>String</tt>)</span>
769
- <span class="default">
770
-
771
- </span>
772
-
773
- &mdash; <div class='inline'>
774
- <p>CSS selector to match content cells.</p>
775
- </div>
776
-
777
- </li>
778
-
779
- <li>
780
- <span class="name">:first_row_header</span>
781
- <span class="type">(<tt>Boolean</tt>)</span>
782
- <span class="default">
783
-
784
- &mdash; default:
785
- <tt>false</tt>
786
-
787
- </span>
788
-
789
- &mdash; <div class='inline'>
790
- <p>If true then first matching element will be assumed to be header and
791
- ignored.</p>
792
- </div>
793
-
794
- </li>
795
-
796
- <li>
797
- <span class="name">:header_map</span>
798
- <span class="type">(<tt>Hash{Symbol,String =&gt; Integer}</tt>)</span>
799
- <span class="default">
800
-
801
- </span>
802
-
803
- &mdash; <div class='inline'>
804
- <p>Header key vs index dictionary.</p>
805
- </div>
806
-
807
- </li>
808
-
809
- <li>
810
- <span class="name">:column_parsers</span>
811
- <span class="type">(<tt>Hash{Symbol,String =&gt; lambda,proc}</tt>)</span>
812
- <span class="default">
813
-
814
- &mdash; default:
815
- <tt>{}</tt>
816
-
817
- </span>
818
-
819
- &mdash; <div class='inline'>
820
- <p>Custom column parsers for advance data extraction.</p>
821
- </div>
822
-
823
- </li>
824
-
825
- <li>
826
- <span class="name">:ignore_text_nodes</span>
827
- <span class="type">(<tt>Boolean</tt>)</span>
828
- <span class="default">
829
-
830
- &mdash; default:
831
- <tt>true</tt>
832
-
833
- </span>
834
-
835
- &mdash; <div class='inline'>
836
- <p>Ignore text nodes when retriving content cells and rows.</p>
837
- </div>
838
-
839
- </li>
840
-
841
- </ul>
842
-
843
-
844
- <p class="tag_title">Yield Parameters:</p>
845
- <ul class="yieldparam">
846
-
847
- <li>
848
-
849
- <span class='name'>data</span>
850
-
851
-
852
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Object}</tt>)</span>
853
-
854
-
855
-
856
- &mdash;
857
- <div class='inline'>
858
- <p>Parsed row data.</p>
859
- </div>
860
-
861
- </li>
862
-
863
- <li>
864
-
865
- <span class='name'>row</span>
866
-
867
-
868
- <span class='type'>(<tt>Array</tt>)</span>
869
-
870
-
871
-
872
- &mdash;
873
- <div class='inline'>
874
- <p>Raw row data.</p>
875
- </div>
876
-
877
- </li>
878
-
879
- <li>
880
-
881
- <span class='name'>header_map</span>
882
-
883
-
884
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Integer}</tt>)</span>
885
-
886
-
887
-
888
- &mdash;
889
- <div class='inline'>
890
- <p>Header map used.</p>
891
- </div>
892
-
893
- </li>
894
-
895
- </ul>
896
- <p class="tag_title">Yield Returns:</p>
897
- <ul class="yieldreturn">
898
-
899
- <li>
900
-
901
-
902
- <span class='type'>(<tt>Boolean</tt>)</span>
903
-
904
-
905
-
906
- &mdash;
907
- <div class='inline'>
908
- <p>`true` when valid, else `false`.</p>
909
- </div>
910
-
911
- </li>
912
-
913
- </ul>
914
- <p class="tag_title">Returns:</p>
915
- <ul class="return">
916
-
917
- <li>
918
-
919
-
920
- <span class='type'>(<tt>Array&lt;Hash&gt;</tt>, <tt>nil</tt>)</span>
921
-
922
-
923
-
924
- &mdash;
925
- <div class='inline'>
926
- <p>Parsed rows data.</p>
927
- </div>
928
-
929
- </li>
930
-
931
- </ul>
932
-
933
- </div><table class="source_code">
934
- <tr>
935
- <td>
936
- <pre class="lines">
937
-
938
-
939
- 89
940
- 90
941
- 91
942
- 92
943
- 93
944
- 94
945
- 95
946
- 96
947
- 97
948
- 98
949
- 99
950
- 100
951
- 101
952
- 102
953
- 103
954
- 104
955
- 105
956
- 106
957
- 107
958
- 108
959
- 109
960
- 110
961
- 111
962
- 112
963
- 113
964
- 114
965
- 115
966
- 116
967
- 117
968
- 118
969
- 119
970
- 120
971
- 121
972
- 122
973
- 123
974
- 124
975
- 125
976
- 126
977
- 127
978
- 128
979
- 129
980
- 130
981
- 131
982
- 132
983
- 133</pre>
984
- </td>
985
- <td>
986
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 89</span>
987
-
988
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_content'>parse_content</span> <span class='id identifier rubyid_opts'>opts</span><span class='comma'>,</span> <span class='op'>&amp;</span><span class='id identifier rubyid_filter'>filter</span>
989
- <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span>
990
- <span class='label'>html:</span> <span class='kw'>nil</span><span class='comma'>,</span>
991
- <span class='label'>selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
992
- <span class='label'>first_row_header:</span> <span class='kw'>false</span><span class='comma'>,</span>
993
- <span class='label'>header_map:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
994
- <span class='label'>column_parsers:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
995
- <span class='label'>ignore_text_nodes:</span> <span class='kw'>true</span>
996
- <span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_merge'>merge</span> <span class='id identifier rubyid_opts'>opts</span>
997
-
998
- <span class='comment'># Setup config
999
- </span> <span class='id identifier rubyid_data'>data</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
1000
- <span class='id identifier rubyid_row_data'>row_data</span> <span class='op'>=</span> <span class='id identifier rubyid_child_element'>child_element</span> <span class='op'>=</span> <span class='kw'>nil</span>
1001
- <span class='id identifier rubyid_first'>first</span> <span class='op'>=</span> <span class='id identifier rubyid_first_row_header'>first_row_header</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:first_row_header</span><span class='rbracket'>]</span>
1002
- <span class='id identifier rubyid_header_map'>header_map</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_map</span><span class='rbracket'>]</span>
1003
- <span class='id identifier rubyid_column_parsers'>column_parsers</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:column_parsers</span><span class='rbracket'>]</span>
1004
- <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:ignore_text_nodes</span><span class='rbracket'>]</span>
1005
-
1006
- <span class='comment'># Get and parse rows
1007
- </span> <span class='id identifier rubyid_html_rows'>html_rows</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:selector</span><span class='rbracket'>]</span><span class='rparen'>)</span>
1008
- <span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_row'>row</span><span class='op'>|</span>
1009
- <span class='kw'>next</span> <span class='kw'>if</span> <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span> <span class='op'>&amp;&amp;</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_name'>name</span> <span class='op'>==</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>text</span><span class='tstring_end'>&#39;</span></span>
1010
-
1011
- <span class='comment'># First row header validation
1012
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_first'>first</span> <span class='op'>&amp;&amp;</span> <span class='id identifier rubyid_first_row_header'>first_row_header</span>
1013
- <span class='id identifier rubyid_first'>first</span> <span class='op'>=</span> <span class='kw'>false</span>
1014
- <span class='kw'>next</span>
1015
- <span class='kw'>end</span>
1016
-
1017
- <span class='comment'># Extract content data
1018
- </span> <span class='id identifier rubyid_row_data'>row_data</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span>
1019
- <span class='id identifier rubyid_header_map'>header_map</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_key'>key</span><span class='comma'>,</span> <span class='id identifier rubyid_index'>index</span><span class='op'>|</span>
1020
- <span class='comment'># Parse column html with default or custom parser
1021
- </span> <span class='id identifier rubyid_children'>children</span> <span class='op'>=</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_children'>children</span>
1022
- <span class='id identifier rubyid_children'>children</span> <span class='op'>=</span> <span class='id identifier rubyid_children'>children</span><span class='period'>.</span><span class='id identifier rubyid_select'>select</span><span class='lbrace'>{</span><span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='period'>.</span><span class='id identifier rubyid_name'>name</span> <span class='op'>!=</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>text</span><span class='tstring_end'>&#39;</span></span><span class='rbrace'>}</span> <span class='kw'>if</span> <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span>
1023
- <span class='id identifier rubyid_child_element'>child_element</span> <span class='op'>=</span> <span class='id identifier rubyid_children'>children</span><span class='lbracket'>[</span><span class='id identifier rubyid_index'>index</span><span class='rbracket'>]</span>
1024
- <span class='id identifier rubyid_column_parsers'>column_parsers</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>?</span>
1025
- <span class='id identifier rubyid_default_parser'>default_parser</span><span class='lparen'>(</span><span class='id identifier rubyid_child_element'>child_element</span><span class='comma'>,</span> <span class='id identifier rubyid_row_data'>row_data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span><span class='rparen'>)</span> <span class='op'>:</span>
1026
- <span class='id identifier rubyid_column_parsers'>column_parsers</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_call'>call</span><span class='lparen'>(</span><span class='id identifier rubyid_child_element'>child_element</span><span class='comma'>,</span> <span class='id identifier rubyid_row_data'>row_data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span><span class='rparen'>)</span>
1027
- <span class='kw'>end</span>
1028
- <span class='kw'>next</span> <span class='kw'>unless</span> <span class='id identifier rubyid_filter'>filter</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>||</span> <span class='id identifier rubyid_filter'>filter</span><span class='period'>.</span><span class='id identifier rubyid_call'>call</span><span class='lparen'>(</span><span class='id identifier rubyid_row_data'>row_data</span><span class='comma'>,</span> <span class='id identifier rubyid_row'>row</span><span class='comma'>,</span> <span class='id identifier rubyid_header_map'>header_map</span><span class='rparen'>)</span>
1029
- <span class='id identifier rubyid_data'>data</span> <span class='op'>&lt;&lt;</span> <span class='id identifier rubyid_row_data'>row_data</span>
1030
- <span class='kw'>end</span>
1031
- <span class='id identifier rubyid_data'>data</span>
1032
- <span class='kw'>end</span></pre>
1033
- </td>
1034
- </tr>
1035
- </table>
1036
- </div>
1037
-
1038
- <div class="method_details ">
1039
- <h3 class="signature " id="parse_header_map-class_method">
1040
-
1041
- .<strong>parse_header_map</strong>(opts = {}) &#x21d2; <tt>Hash{Symbol,String =&gt; Integer}</tt><sup>?</sup>
1042
-
1043
-
1044
-
1045
-
1046
-
1047
- </h3><div class="docstring">
1048
- <div class="discussion">
1049
-
1050
- <p>Parse header from selector and create a header map to match a column key</p>
1051
-
1052
- <pre class="code ruby"><code class="ruby">with column index.
1053
- </code></pre>
1054
-
1055
-
1056
- </div>
1057
- </div>
1058
- <div class="tags">
1059
- <p class="tag_title">Parameters:</p>
1060
- <ul class="param">
1061
-
1062
- <li>
1063
-
1064
- <span class='name'>opts</span>
1065
-
1066
-
1067
- <span class='type'>(<tt>Hash</tt>)</span>
1068
-
1069
-
1070
- <em class="default">(defaults to: <tt>{}</tt>)</em>
1071
-
1072
-
1073
- &mdash;
1074
- <div class='inline'>
1075
- <p>({}) Configuration options.</p>
1076
- </div>
1077
-
1078
- </li>
1079
-
1080
- </ul>
1081
-
1082
-
1083
-
1084
-
1085
- <p class="tag_title">Options Hash (<tt>opts</tt>):</p>
1086
- <ul class="option">
1087
-
1088
- <li>
1089
- <span class="name">:html</span>
1090
- <span class="type">(<tt>Nokogiri::Element</tt>)</span>
1091
- <span class="default">
1092
-
1093
- </span>
1094
-
1095
- &mdash; <div class='inline'>
1096
- <p>Container element to search into.</p>
1097
- </div>
1098
-
1099
- </li>
1100
-
1101
- <li>
1102
- <span class="name">:selector</span>
1103
- <span class="type">(<tt>String</tt>)</span>
1104
- <span class="default">
1105
-
1106
- </span>
1107
-
1108
- &mdash; <div class='inline'>
1109
- <p>CSS selector to match header cells.</p>
1110
- </div>
1111
-
1112
- </li>
1113
-
1114
- <li>
1115
- <span class="name">:column_key_label_map</span>
1116
- <span class="type">(<tt>Hash{Symbol,String =&gt; Regex,String}</tt>)</span>
1117
- <span class="default">
1118
-
1119
- </span>
1120
-
1121
- &mdash; <div class='inline'>
1122
- <p>Key vs. label dictionary.</p>
1123
- </div>
1124
-
1125
- </li>
1126
-
1127
- <li>
1128
- <span class="name">:first_row_header</span>
1129
- <span class="type">(<tt>Boolean</tt>)</span>
1130
- <span class="default">
1131
-
1132
- &mdash; default:
1133
- <tt>false</tt>
1134
-
1135
- </span>
1136
-
1137
- &mdash; <div class='inline'>
1138
- <p>If true then selector first matching row will be used as header for
1139
- parsing.</p>
1140
- </div>
1141
-
1142
- </li>
1143
-
1144
- <li>
1145
- <span class="name">:ignore_text_nodes</span>
1146
- <span class="type">(<tt>Boolean</tt>)</span>
1147
- <span class="default">
1148
-
1149
- &mdash; default:
1150
- <tt>true</tt>
1151
-
1152
- </span>
1153
-
1154
- &mdash; <div class='inline'>
1155
- <p>Ignore text nodes when retriving header cells and rows.</p>
1156
- </div>
1157
-
1158
- </li>
1159
-
1160
- </ul>
1161
-
1162
-
1163
- <p class="tag_title">Returns:</p>
1164
- <ul class="return">
1165
-
1166
- <li>
1167
-
1168
-
1169
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Integer}</tt>, <tt>nil</tt>)</span>
1170
-
1171
-
1172
-
1173
- &mdash;
1174
- <div class='inline'>
1175
- <p>Key vs. column index map.</p>
1176
- </div>
1177
-
1178
- </li>
1179
-
1180
- </ul>
1181
-
1182
- </div><table class="source_code">
1183
- <tr>
1184
- <td>
1185
- <pre class="lines">
1186
-
1187
-
1188
- 166
1189
- 167
1190
- 168
1191
- 169
1192
- 170
1193
- 171
1194
- 172
1195
- 173
1196
- 174
1197
- 175
1198
- 176
1199
- 177
1200
- 178
1201
- 179
1202
- 180
1203
- 181
1204
- 182
1205
- 183
1206
- 184
1207
- 185
1208
- 186
1209
- 187
1210
- 188
1211
- 189
1212
- 190
1213
- 191
1214
- 192
1215
- 193
1216
- 194
1217
- 195
1218
- 196
1219
- 197
1220
- 198
1221
- 199
1222
- 200</pre>
1223
- </td>
1224
- <td>
1225
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 166</span>
1226
-
1227
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_header_map'>parse_header_map</span> <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span>
1228
- <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span>
1229
- <span class='label'>html:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1230
- <span class='label'>selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1231
- <span class='label'>column_key_label_map:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
1232
- <span class='label'>first_row_header:</span> <span class='kw'>false</span><span class='comma'>,</span>
1233
- <span class='label'>ignore_text_nodes:</span> <span class='kw'>true</span>
1234
- <span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_merge'>merge</span> <span class='id identifier rubyid_opts'>opts</span>
1235
-
1236
- <span class='comment'># Setup config
1237
- </span> <span class='id identifier rubyid_dictionary'>dictionary</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:column_key_label_map</span><span class='rbracket'>]</span>
1238
- <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:ignore_text_nodes</span><span class='rbracket'>]</span>
1239
- <span class='id identifier rubyid_data'>data</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
1240
- <span class='id identifier rubyid_column_map'>column_map</span> <span class='op'>=</span> <span class='kw'>nil</span>
1241
-
1242
- <span class='comment'># Extract and parse header rows
1243
- </span> <span class='id identifier rubyid_html_rows'>html_rows</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:selector</span><span class='rbracket'>]</span><span class='rparen'>)</span> <span class='kw'>rescue</span> <span class='kw'>nil</span>
1244
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
1245
- <span class='id identifier rubyid_html_rows'>html_rows</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_first'>first</span><span class='rbracket'>]</span> <span class='kw'>if</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:first_row_header</span><span class='rbracket'>]</span>
1246
- <span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_row'>row</span><span class='op'>|</span>
1247
- <span class='kw'>next</span> <span class='kw'>if</span> <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span> <span class='op'>&amp;&amp;</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_name'>name</span> <span class='op'>==</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>text</span><span class='tstring_end'>&#39;</span></span>
1248
-
1249
- <span class='id identifier rubyid_column_map'>column_map</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span>
1250
- <span class='id identifier rubyid_children'>children</span> <span class='op'>=</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_children'>children</span>
1251
- <span class='id identifier rubyid_children'>children</span> <span class='op'>=</span> <span class='id identifier rubyid_children'>children</span><span class='period'>.</span><span class='id identifier rubyid_select'>select</span><span class='lbrace'>{</span><span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='period'>.</span><span class='id identifier rubyid_name'>name</span> <span class='op'>!=</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>text</span><span class='tstring_end'>&#39;</span></span><span class='rbrace'>}</span> <span class='kw'>if</span> <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span>
1252
- <span class='id identifier rubyid_children'>children</span><span class='period'>.</span><span class='id identifier rubyid_each_with_index'>each_with_index</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_col'>col</span><span class='comma'>,</span> <span class='id identifier rubyid_index'>index</span><span class='op'>|</span>
1253
- <span class='comment'># Parse and map column header
1254
- </span> <span class='id identifier rubyid_column_key'>column_key</span> <span class='op'>=</span> <span class='id identifier rubyid_translate_label_to_key'>translate_label_to_key</span> <span class='id identifier rubyid_col'>col</span><span class='comma'>,</span> <span class='id identifier rubyid_dictionary'>dictionary</span>
1255
- <span class='kw'>next</span> <span class='kw'>if</span> <span class='id identifier rubyid_column_key'>column_key</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
1256
- <span class='id identifier rubyid_column_map'>column_map</span><span class='lbracket'>[</span><span class='id identifier rubyid_column_key'>column_key</span><span class='rbracket'>]</span> <span class='op'>=</span> <span class='id identifier rubyid_index'>index</span>
1257
- <span class='kw'>end</span>
1258
- <span class='id identifier rubyid_data'>data</span> <span class='op'>&lt;&lt;</span> <span class='id identifier rubyid_column_map'>column_map</span>
1259
- <span class='kw'>end</span>
1260
- <span class='id identifier rubyid_data'>data</span><span class='op'>&amp;.</span><span class='id identifier rubyid_first'>first</span>
1261
- <span class='kw'>end</span></pre>
1262
- </td>
1263
- </tr>
1264
- </table>
1265
- </div>
1266
-
1267
- <div class="method_details ">
1268
- <h3 class="signature " id="parse_table-class_method">
1269
-
1270
- .<strong>parse_table</strong>(opts = {}) {|data, row, header_map| ... } &#x21d2; <tt>Hash{Symbol =&gt; Array,Hash,nil}</tt>
1271
-
1272
-
1273
-
1274
-
1275
-
1276
- </h3><div class="docstring">
1277
- <div class="discussion">
1278
-
1279
- <p>Parse data from a horizontal table like structure matching a selectors and</p>
1280
-
1281
- <pre class="code ruby"><code class="ruby">using a header map to match columns.
1282
- </code></pre>
1283
-
1284
-
1285
- </div>
1286
- </div>
1287
- <div class="tags">
1288
- <p class="tag_title">Parameters:</p>
1289
- <ul class="param">
1290
-
1291
- <li>
1292
-
1293
- <span class='name'>opts</span>
1294
-
1295
-
1296
- <span class='type'>(<tt>Hash</tt>)</span>
1297
-
1298
-
1299
- <em class="default">(defaults to: <tt>{}</tt>)</em>
1300
-
1301
-
1302
- &mdash;
1303
- <div class='inline'>
1304
- <p>({}) Configuration options.</p>
1305
- </div>
1306
-
1307
- </li>
1308
-
1309
- </ul>
1310
-
1311
-
1312
-
1313
-
1314
- <p class="tag_title">Options Hash (<tt>opts</tt>):</p>
1315
- <ul class="option">
1316
-
1317
- <li>
1318
- <span class="name">:html</span>
1319
- <span class="type">(<tt>Nokogiri::Element</tt>)</span>
1320
- <span class="default">
1321
-
1322
- </span>
1323
-
1324
- &mdash; <div class='inline'>
1325
- <p>Container element to search into.</p>
1326
- </div>
1327
-
1328
- </li>
1329
-
1330
- <li>
1331
- <span class="name">:header_selector</span>
1332
- <span class="type">(<tt>String</tt>)</span>
1333
- <span class="default">
1334
-
1335
- </span>
1336
-
1337
- &mdash; <div class='inline'>
1338
- <p>Header column elements selector.</p>
1339
- </div>
1340
-
1341
- </li>
1342
-
1343
- <li>
1344
- <span class="name">:header_key_label_map</span>
1345
- <span class="type">(<tt>Hash{Symbol,String =&gt; Regex,String}</tt>)</span>
1346
- <span class="default">
1347
-
1348
- </span>
1349
-
1350
- &mdash; <div class='inline'>
1351
- <p>Header key vs. label dictionary to match column indexes.</p>
1352
- </div>
1353
-
1354
- </li>
1355
-
1356
- <li>
1357
- <span class="name">:content_selector</span>
1358
- <span class="type">(<tt>String</tt>)</span>
1359
- <span class="default">
1360
-
1361
- </span>
1362
-
1363
- &mdash; <div class='inline'>
1364
- <p>Content row elements selector.</p>
1365
- </div>
1366
-
1367
- </li>
1368
-
1369
- <li>
1370
- <span class="name">:first_row_header</span>
1371
- <span class="type">(<tt>Boolean</tt>)</span>
1372
- <span class="default">
1373
-
1374
- &mdash; default:
1375
- <tt>false</tt>
1376
-
1377
- </span>
1378
-
1379
- &mdash; <div class='inline'>
1380
- <p>If true then selector first matching row will be used as header for
1381
- parsing.</p>
1382
- </div>
1383
-
1384
- </li>
1385
-
1386
- <li>
1387
- <span class="name">:column_parsers</span>
1388
- <span class="type">(<tt>Hash{Symbol,String =&gt; lambda,proc}</tt>)</span>
1389
- <span class="default">
1390
-
1391
- &mdash; default:
1392
- <tt>{}</tt>
1393
-
1394
- </span>
1395
-
1396
- &mdash; <div class='inline'>
1397
- <p>Custom column parsers for advance data extraction.</p>
1398
- </div>
1399
-
1400
- </li>
1401
-
1402
- <li>
1403
- <span class="name">:ignore_text_nodes</span>
1404
- <span class="type">(<tt>Boolean</tt>)</span>
1405
- <span class="default">
1406
-
1407
- &mdash; default:
1408
- <tt>true</tt>
1409
-
1410
- </span>
1411
-
1412
- &mdash; <div class='inline'>
1413
- <p>Ignore text nodes when retriving cells and rows.</p>
1414
- </div>
1415
-
1416
- </li>
1417
-
1418
- </ul>
1419
-
1420
-
1421
- <p class="tag_title">Yield Parameters:</p>
1422
- <ul class="yieldparam">
1423
-
1424
- <li>
1425
-
1426
- <span class='name'>data</span>
1427
-
1428
-
1429
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Object}</tt>)</span>
1430
-
1431
-
1432
-
1433
- &mdash;
1434
- <div class='inline'>
1435
- <p>Parsed content row data.</p>
1436
- </div>
1437
-
1438
- </li>
1439
-
1440
- <li>
1441
-
1442
- <span class='name'>row</span>
1443
-
1444
-
1445
- <span class='type'>(<tt>Array</tt>)</span>
1446
-
1447
-
1448
-
1449
- &mdash;
1450
- <div class='inline'>
1451
- <p>Raw content row data.</p>
1452
- </div>
1453
-
1454
- </li>
1455
-
1456
- <li>
1457
-
1458
- <span class='name'>header_map</span>
1459
-
1460
-
1461
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Integer}</tt>)</span>
1462
-
1463
-
1464
-
1465
- &mdash;
1466
- <div class='inline'>
1467
- <p>Header map used.</p>
1468
- </div>
1469
-
1470
- </li>
1471
-
1472
- </ul>
1473
- <p class="tag_title">Yield Returns:</p>
1474
- <ul class="yieldreturn">
1475
-
1476
- <li>
1477
-
1478
-
1479
- <span class='type'>(<tt>Boolean</tt>)</span>
1480
-
1481
-
1482
-
1483
- &mdash;
1484
- <div class='inline'>
1485
- <p>`true` when valid, else `false`.</p>
1486
- </div>
1487
-
1488
- </li>
1489
-
1490
- </ul>
1491
- <p class="tag_title">Returns:</p>
1492
- <ul class="return">
1493
-
1494
- <li>
1495
-
1496
-
1497
- <span class='type'>(<tt>Hash{Symbol =&gt; Array,Hash,nil}</tt>)</span>
1498
-
1499
-
1500
-
1501
- &mdash;
1502
- <div class='inline'>
1503
- <p>Hash data is as follows:</p>
1504
- <ul><li>
1505
- <p>`[Hash] :header_map` Header map used.</p>
1506
- </li><li>
1507
- <p>`[Array&lt;Hash&gt;,nil] :data` Parsed rows data.</p>
1508
- </li></ul>
1509
- </div>
1510
-
1511
- </li>
1512
-
1513
- </ul>
1514
-
1515
- </div><table class="source_code">
1516
- <tr>
1517
- <td>
1518
- <pre class="lines">
1519
-
1520
-
1521
- 226
1522
- 227
1523
- 228
1524
- 229
1525
- 230
1526
- 231
1527
- 232
1528
- 233
1529
- 234
1530
- 235
1531
- 236
1532
- 237
1533
- 238
1534
- 239
1535
- 240
1536
- 241
1537
- 242
1538
- 243
1539
- 244
1540
- 245
1541
- 246
1542
- 247
1543
- 248
1544
- 249
1545
- 250
1546
- 251</pre>
1547
- </td>
1548
- <td>
1549
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 226</span>
1550
-
1551
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_table'>parse_table</span> <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span> <span class='op'>&amp;</span><span class='id identifier rubyid_filter'>filter</span>
1552
- <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span>
1553
- <span class='label'>html:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1554
- <span class='label'>header_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1555
- <span class='label'>header_key_label_map:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
1556
- <span class='label'>content_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1557
- <span class='label'>first_row_header:</span> <span class='kw'>false</span><span class='comma'>,</span>
1558
- <span class='label'>column_parsers:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
1559
- <span class='label'>ignore_text_nodes:</span> <span class='kw'>true</span>
1560
- <span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_merge'>merge</span> <span class='id identifier rubyid_opts'>opts</span>
1561
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
1562
- <span class='id identifier rubyid_header_map'>header_map</span> <span class='op'>=</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_header_map'>parse_header_map</span> <span class='label'>html:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='comma'>,</span>
1563
- <span class='label'>selector:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_selector</span><span class='rbracket'>]</span><span class='comma'>,</span>
1564
- <span class='label'>column_key_label_map:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_key_label_map</span><span class='rbracket'>]</span><span class='comma'>,</span>
1565
- <span class='label'>first_row_header:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:first_row_header</span><span class='rbracket'>]</span><span class='comma'>,</span>
1566
- <span class='label'>ignore_text_nodes:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:ignore_text_nodes</span><span class='rbracket'>]</span>
1567
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_header_map'>header_map</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
1568
- <span class='id identifier rubyid_data'>data</span> <span class='op'>=</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_content'>parse_content</span> <span class='label'>html:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='comma'>,</span>
1569
- <span class='label'>selector:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:content_selector</span><span class='rbracket'>]</span><span class='comma'>,</span>
1570
- <span class='label'>header_map:</span> <span class='id identifier rubyid_header_map'>header_map</span><span class='comma'>,</span>
1571
- <span class='label'>first_row_header:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:first_row_header</span><span class='rbracket'>]</span><span class='comma'>,</span>
1572
- <span class='label'>column_parsers:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:column_parsers</span><span class='rbracket'>]</span><span class='comma'>,</span>
1573
- <span class='label'>ignore_text_nodes:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:ignore_text_nodes</span><span class='rbracket'>]</span><span class='comma'>,</span>
1574
- <span class='op'>&amp;</span><span class='id identifier rubyid_filter'>filter</span>
1575
- <span class='lbrace'>{</span><span class='label'>header_map:</span> <span class='id identifier rubyid_header_map'>header_map</span><span class='comma'>,</span> <span class='label'>data:</span> <span class='id identifier rubyid_data'>data</span><span class='rbrace'>}</span>
1576
- <span class='kw'>end</span></pre>
1577
- </td>
1578
- </tr>
1579
- </table>
1580
- </div>
1581
-
1582
- <div class="method_details ">
1583
- <h3 class="signature " id="parse_vertical_table-class_method">
1584
-
1585
- .<strong>parse_vertical_table</strong>(opts = {}) {|data, row, header_map| ... } &#x21d2; <tt>Hash{Symbol =&gt; Array,Hash,nil}</tt>
1586
-
1587
-
1588
-
1589
-
1590
-
1591
- </h3><div class="docstring">
1592
- <div class="discussion">
1593
-
1594
- <p>Parse data from a vertical table like structure matching a selectors and</p>
1595
-
1596
- <pre class="code ruby"><code class="ruby">using a header map to match columns.
1597
- </code></pre>
1598
-
1599
-
1600
- </div>
1601
- </div>
1602
- <div class="tags">
1603
- <p class="tag_title">Parameters:</p>
1604
- <ul class="param">
1605
-
1606
- <li>
1607
-
1608
- <span class='name'>opts</span>
1609
-
1610
-
1611
- <span class='type'>(<tt>Hash</tt>)</span>
1612
-
1613
-
1614
- <em class="default">(defaults to: <tt>{}</tt>)</em>
1615
-
1616
-
1617
- &mdash;
1618
- <div class='inline'>
1619
- <p>({}) Configuration options.</p>
1620
- </div>
1621
-
1622
- </li>
1623
-
1624
- </ul>
1625
-
1626
-
1627
-
1628
-
1629
- <p class="tag_title">Options Hash (<tt>opts</tt>):</p>
1630
- <ul class="option">
1631
-
1632
- <li>
1633
- <span class="name">:html</span>
1634
- <span class="type">(<tt>Nokogiri::Element</tt>)</span>
1635
- <span class="default">
1636
-
1637
- </span>
1638
-
1639
- &mdash; <div class='inline'>
1640
- <p>Container element to search into.</p>
1641
- </div>
1642
-
1643
- </li>
1644
-
1645
- <li>
1646
- <span class="name">:row_selector</span>
1647
- <span class="type">(<tt>String</tt>)</span>
1648
- <span class="default">
1649
-
1650
- </span>
1651
-
1652
- &mdash; <div class='inline'>
1653
- <p>Vertical row like elements selector.</p>
1654
- </div>
1655
-
1656
- </li>
1657
-
1658
- <li>
1659
- <span class="name">:header_selector</span>
1660
- <span class="type">(<tt>String</tt>)</span>
1661
- <span class="default">
1662
-
1663
- </span>
1664
-
1665
- &mdash; <div class='inline'>
1666
- <p>Header column elements selector.</p>
1667
- </div>
1668
-
1669
- </li>
1670
-
1671
- <li>
1672
- <span class="name">:header_key_label_map</span>
1673
- <span class="type">(<tt>Hash{Symbol,String =&gt; Regex,String}</tt>)</span>
1674
- <span class="default">
1675
-
1676
- </span>
1677
-
1678
- &mdash; <div class='inline'>
1679
- <p>Header key vs. label dictionary to match column indexes.</p>
1680
- </div>
1681
-
1682
- </li>
1683
-
1684
- <li>
1685
- <span class="name">:content_selector</span>
1686
- <span class="type">(<tt>String</tt>)</span>
1687
- <span class="default">
1688
-
1689
- </span>
1690
-
1691
- &mdash; <div class='inline'>
1692
- <p>Content row elements selector.</p>
1693
- </div>
1694
-
1695
- </li>
1696
-
1697
- <li>
1698
- <span class="name">:column_parsers</span>
1699
- <span class="type">(<tt>Hash{Symbol,String =&gt; lambda,proc}</tt>)</span>
1700
- <span class="default">
1701
-
1702
- &mdash; default:
1703
- <tt>{}</tt>
1704
-
1705
- </span>
1706
-
1707
- &mdash; <div class='inline'>
1708
- <p>Custom column parsers for advance data extraction.</p>
1709
- </div>
1710
-
1711
- </li>
1712
-
1713
- <li>
1714
- <span class="name">:ignore_text_nodes</span>
1715
- <span class="type">(<tt>Boolean</tt>)</span>
1716
- <span class="default">
1717
-
1718
- &mdash; default:
1719
- <tt>true</tt>
1720
-
1721
- </span>
1722
-
1723
- &mdash; <div class='inline'>
1724
- <p>Ignore text nodes when retriving cells and rows.</p>
1725
- </div>
1726
-
1727
- </li>
1728
-
1729
- </ul>
1730
-
1731
-
1732
- <p class="tag_title">Yield Parameters:</p>
1733
- <ul class="yieldparam">
1734
-
1735
- <li>
1736
-
1737
- <span class='name'>data</span>
1738
-
1739
-
1740
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Object}</tt>)</span>
1741
-
1742
-
1743
-
1744
- &mdash;
1745
- <div class='inline'>
1746
- <p>Parsed content row data.</p>
1747
- </div>
1748
-
1749
- </li>
1750
-
1751
- <li>
1752
-
1753
- <span class='name'>row</span>
1754
-
1755
-
1756
- <span class='type'>(<tt>Array</tt>)</span>
1757
-
1758
-
1759
-
1760
- &mdash;
1761
- <div class='inline'>
1762
- <p>Raw content row data.</p>
1763
- </div>
1764
-
1765
- </li>
1766
-
1767
- <li>
1768
-
1769
- <span class='name'>header_map</span>
1770
-
1771
-
1772
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Integer}</tt>)</span>
1773
-
1774
-
1775
-
1776
- &mdash;
1777
- <div class='inline'>
1778
- <p>Header map used.</p>
1779
- </div>
1780
-
1781
- </li>
1782
-
1783
- </ul>
1784
- <p class="tag_title">Yield Returns:</p>
1785
- <ul class="yieldreturn">
1786
-
1787
- <li>
1788
-
1789
-
1790
- <span class='type'>(<tt>Boolean</tt>)</span>
1791
-
1792
-
1793
-
1794
- &mdash;
1795
- <div class='inline'>
1796
- <p>`true` when valid, else `false`.</p>
1797
- </div>
1798
-
1799
- </li>
1800
-
1801
- </ul>
1802
- <p class="tag_title">Returns:</p>
1803
- <ul class="return">
1804
-
1805
- <li>
1806
-
1807
-
1808
- <span class='type'>(<tt>Hash{Symbol =&gt; Array,Hash,nil}</tt>)</span>
1809
-
1810
-
1811
-
1812
- &mdash;
1813
- <div class='inline'>
1814
- <p>Hash data is as follows:</p>
1815
- <ul><li>
1816
- <p>`[Hash] :header_map` Header map used.</p>
1817
- </li><li>
1818
- <p>`[Array&lt;Hash&gt;,nil] :data` Parsed rows data.</p>
1819
- </li></ul>
1820
- </div>
1821
-
1822
- </li>
1823
-
1824
- </ul>
1825
-
1826
- </div><table class="source_code">
1827
- <tr>
1828
- <td>
1829
- <pre class="lines">
1830
-
1831
-
1832
- 276
1833
- 277
1834
- 278
1835
- 279
1836
- 280
1837
- 281
1838
- 282
1839
- 283
1840
- 284
1841
- 285
1842
- 286
1843
- 287
1844
- 288
1845
- 289
1846
- 290
1847
- 291
1848
- 292
1849
- 293
1850
- 294
1851
- 295
1852
- 296
1853
- 297
1854
- 298
1855
- 299
1856
- 300
1857
- 301
1858
- 302
1859
- 303
1860
- 304
1861
- 305
1862
- 306
1863
- 307
1864
- 308
1865
- 309</pre>
1866
- </td>
1867
- <td>
1868
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 276</span>
1869
-
1870
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_vertical_table'>parse_vertical_table</span> <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span> <span class='op'>&amp;</span><span class='id identifier rubyid_filter'>filter</span>
1871
- <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span>
1872
- <span class='label'>html:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1873
- <span class='label'>row_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1874
- <span class='label'>header_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1875
- <span class='label'>header_key_label_map:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
1876
- <span class='label'>content_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1877
- <span class='label'>column_parsers:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
1878
- <span class='label'>ignore_text_nodes:</span> <span class='kw'>true</span>
1879
- <span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_merge'>merge</span> <span class='id identifier rubyid_opts'>opts</span>
1880
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
1881
-
1882
- <span class='comment'># Setup config
1883
- </span> <span class='id identifier rubyid_data'>data</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span>
1884
- <span class='id identifier rubyid_dictionary'>dictionary</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_key_label_map</span><span class='rbracket'>]</span>
1885
- <span class='id identifier rubyid_column_parsers'>column_parsers</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:column_parsers</span><span class='rbracket'>]</span>
1886
-
1887
- <span class='comment'># Extract headers and content
1888
- </span> <span class='id identifier rubyid_html_rows'>html_rows</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:row_selector</span><span class='rbracket'>]</span><span class='rparen'>)</span> <span class='kw'>rescue</span> <span class='kw'>nil</span>
1889
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
1890
- <span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_row'>row</span><span class='op'>|</span>
1891
- <span class='comment'># Parse and map column header
1892
- </span> <span class='id identifier rubyid_header_element'>header_element</span> <span class='op'>=</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_selector</span><span class='rbracket'>]</span><span class='rparen'>)</span>
1893
- <span class='id identifier rubyid_key'>key</span> <span class='op'>=</span> <span class='id identifier rubyid_translate_label_to_key'>translate_label_to_key</span> <span class='id identifier rubyid_header_element'>header_element</span><span class='comma'>,</span> <span class='id identifier rubyid_dictionary'>dictionary</span>
1894
- <span class='kw'>next</span> <span class='kw'>if</span> <span class='id identifier rubyid_key'>key</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>||</span> <span class='id identifier rubyid_key'>key</span> <span class='op'>==</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_end'>&#39;</span></span>
1895
-
1896
- <span class='comment'># Parse column html with default or custom parser
1897
- </span> <span class='id identifier rubyid_content_element'>content_element</span> <span class='op'>=</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:content_selector</span><span class='rbracket'>]</span><span class='rparen'>)</span>
1898
- <span class='id identifier rubyid_column_parsers'>column_parsers</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>?</span>
1899
- <span class='id identifier rubyid_default_parser'>default_parser</span><span class='lparen'>(</span><span class='id identifier rubyid_content_element'>content_element</span><span class='comma'>,</span> <span class='id identifier rubyid_data'>data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span><span class='rparen'>)</span> <span class='op'>:</span>
1900
- <span class='id identifier rubyid_column_parsers'>column_parsers</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_call'>call</span><span class='lparen'>(</span><span class='id identifier rubyid_content_element'>content_element</span><span class='comma'>,</span> <span class='id identifier rubyid_data'>data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span><span class='rparen'>)</span>
1901
- <span class='kw'>end</span>
1902
- <span class='id identifier rubyid_data'>data</span>
1903
- <span class='kw'>end</span></pre>
1904
- </td>
1905
- </tr>
1906
- </table>
1907
- </div>
1908
-
1909
- <div class="method_details ">
1910
- <h3 class="signature " id="strip-class_method">
1911
-
1912
- .<strong>strip</strong>(raw_text, orig_encoding = &#39;ASCII&#39;) &#x21d2; <tt>String</tt><sup>?</sup>
1913
-
1914
-
1915
-
1916
-
1917
-
1918
- </h3><div class="docstring">
1919
- <div class="discussion">
1920
-
1921
- <p>Strip a value by trimming spaces, reducing secuential spaces into a</p>
1922
-
1923
- <pre class="code ruby"><code class="ruby">single space, decode HTML entities and change encoding to UTF-8.
1924
- </code></pre>
1925
-
1926
-
1927
- </div>
1928
- </div>
1929
- <div class="tags">
1930
- <p class="tag_title">Parameters:</p>
1931
- <ul class="param">
1932
-
1933
- <li>
1934
-
1935
- <span class='name'>raw_text</span>
1936
-
1937
-
1938
- <span class='type'>(<tt>String</tt>, <tt>Object</tt>, <tt>nil</tt>)</span>
1939
-
1940
-
1941
-
1942
- &mdash;
1943
- <div class='inline'>
1944
- <p>Text to strip.</p>
1945
- </div>
1946
-
1947
- </li>
1948
-
1949
- <li>
1950
-
1951
- <span class='name'>orig_encoding</span>
1952
-
1953
-
1954
- <span class='type'>(<tt>String</tt>)</span>
1955
-
1956
-
1957
- <em class="default">(defaults to: <tt>&#39;ASCII&#39;</tt>)</em>
1958
-
1959
-
1960
- &mdash;
1961
- <div class='inline'>
1962
- <p>Text original encoding.</p>
1963
- </div>
1964
-
1965
- </li>
1966
-
1967
- </ul>
1968
-
1969
- <p class="tag_title">Returns:</p>
1970
- <ul class="return">
1971
-
1972
- <li>
1973
-
1974
-
1975
- <span class='type'>(<tt>String</tt>, <tt>nil</tt>)</span>
1976
-
1977
-
1978
-
1979
- &mdash;
1980
- <div class='inline'>
1981
- <p>`nil` when <code>raw_text</code> is nil, else `String`.</p>
1982
- </div>
1983
-
1984
- </li>
1985
-
1986
- </ul>
1987
-
1988
- </div><table class="source_code">
1989
- <tr>
1990
- <td>
1991
- <pre class="lines">
1992
-
1993
-
1994
- 44
1995
- 45
1996
- 46
1997
- 47
1998
- 48
1999
- 49
2000
- 50
2001
- 51
2002
- 52
2003
- 53
2004
- 54
2005
- 55</pre>
2006
- </td>
2007
- <td>
2008
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 44</span>
2009
-
2010
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_strip'>strip</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='comma'>,</span> <span class='id identifier rubyid_orig_encoding'>orig_encoding</span> <span class='op'>=</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>ASCII</span><span class='tstring_end'>&#39;</span></span>
2011
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
2012
- <span class='id identifier rubyid_raw_text'>raw_text</span> <span class='op'>=</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_to_s'>to_s</span> <span class='kw'>unless</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_is_a?'>is_a?</span> <span class='const'>String</span>
2013
- <span class='id identifier rubyid_regex'>regex</span> <span class='op'>=</span> <span class='tstring'><span class='regexp_beg'>/</span><span class='tstring_content'>(\s|\u3000|\u00a0)+</span><span class='regexp_end'>/</span></span>
2014
- <span class='id identifier rubyid_good_encoding'>good_encoding</span> <span class='op'>=</span> <span class='lparen'>(</span><span class='id identifier rubyid_raw_text'>raw_text</span> <span class='op'>=~</span> <span class='tstring'><span class='regexp_beg'>/</span><span class='tstring_content'>\u3000</span><span class='regexp_end'>/</span></span> <span class='op'>||</span> <span class='kw'>true</span><span class='rparen'>)</span> <span class='kw'>rescue</span> <span class='kw'>false</span>
2015
- <span class='kw'>unless</span> <span class='id identifier rubyid_good_encoding'>good_encoding</span>
2016
- <span class='id identifier rubyid_raw_text'>raw_text</span> <span class='op'>=</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_force_encoding'>force_encoding</span><span class='lparen'>(</span><span class='id identifier rubyid_orig_encoding'>orig_encoding</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_encode'>encode</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>UTF-8</span><span class='tstring_end'>&#39;</span></span><span class='comma'>,</span> <span class='label'>invalid:</span> <span class='symbol'>:replace</span><span class='comma'>,</span> <span class='label'>undef:</span> <span class='symbol'>:replace</span><span class='rparen'>)</span>
2017
- <span class='id identifier rubyid_regex'>regex</span> <span class='op'>=</span> <span class='tstring'><span class='regexp_beg'>/</span><span class='tstring_content'>(\s|\u3000|\u00a0|\u00c2\u00a0)+</span><span class='regexp_end'>/</span></span>
2018
- <span class='kw'>end</span>
2019
- <span class='id identifier rubyid_text'>text</span> <span class='op'>=</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_gsub'>gsub</span><span class='lparen'>(</span><span class='id identifier rubyid_regex'>regex</span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'> </span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_strip'>strip</span>
2020
- <span class='id identifier rubyid_text'>text</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>?</span> <span class='kw'>nil</span> <span class='op'>:</span> <span class='id identifier rubyid_decode_html'>decode_html</span><span class='lparen'>(</span><span class='id identifier rubyid_text'>text</span><span class='rparen'>)</span>
2021
- <span class='kw'>end</span></pre>
2022
- </td>
2023
- </tr>
2024
- </table>
2025
- </div>
2026
-
2027
- <div class="method_details ">
2028
- <h3 class="signature " id="translate_label_to_key-class_method">
2029
-
2030
- .<strong>translate_label_to_key</strong>(element, label_map) &#x21d2; <tt>Symbol</tt>, <tt>String</tt>
2031
-
2032
-
2033
-
2034
-
2035
-
2036
- </h3><div class="docstring">
2037
- <div class="discussion">
2038
-
2039
- <p>Extract column label and translate it into a frienly key.</p>
2040
-
2041
-
2042
- </div>
2043
- </div>
2044
- <div class="tags">
2045
- <p class="tag_title">Parameters:</p>
2046
- <ul class="param">
2047
-
2048
- <li>
2049
-
2050
- <span class='name'>element</span>
2051
-
2052
-
2053
- <span class='type'>(<tt>Nokogiri::Element</tt>)</span>
2054
-
2055
-
2056
-
2057
- &mdash;
2058
- <div class='inline'>
2059
- <p>Html element to parse.</p>
2060
- </div>
2061
-
2062
- </li>
2063
-
2064
- <li>
2065
-
2066
- <span class='name'>label_map</span>
2067
-
2068
-
2069
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Regex,String}</tt>)</span>
2070
-
2071
-
2072
-
2073
- &mdash;
2074
- <div class='inline'>
2075
- <p>Label dictionary for translation into key.</p>
2076
- </div>
2077
-
2078
- </li>
2079
-
2080
- </ul>
2081
-
2082
- <p class="tag_title">Returns:</p>
2083
- <ul class="return">
2084
-
2085
- <li>
2086
-
2087
-
2088
- <span class='type'>(<tt>Symbol</tt>, <tt>String</tt>)</span>
2089
-
2090
-
2091
-
2092
- &mdash;
2093
- <div class='inline'>
2094
- <p>Translated key.</p>
2095
- </div>
2096
-
2097
- </li>
2098
-
2099
- </ul>
2100
-
2101
- </div><table class="source_code">
2102
- <tr>
2103
- <td>
2104
- <pre class="lines">
2105
-
2106
-
2107
- 142
2108
- 143
2109
- 144
2110
- 145
2111
- 146
2112
- 147
2113
- 148
2114
- 149
2115
- 150</pre>
2116
- </td>
2117
- <td>
2118
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 142</span>
2119
-
2120
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_translate_label_to_key'>translate_label_to_key</span> <span class='id identifier rubyid_element'>element</span><span class='comma'>,</span> <span class='id identifier rubyid_label_map'>label_map</span>
2121
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_element'>element</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
2122
- <span class='id identifier rubyid_element'>element</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>//i</span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_remove'>remove</span> <span class='kw'>if</span> <span class='id identifier rubyid_element'>element</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>//i</span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_count'>count</span> <span class='op'>&gt;</span> <span class='int'>0</span>
2123
- <span class='id identifier rubyid_text'>text</span> <span class='op'>=</span> <span class='id identifier rubyid_strip'>strip</span> <span class='id identifier rubyid_element'>element</span><span class='period'>.</span><span class='id identifier rubyid_text'>text</span>
2124
- <span class='id identifier rubyid_key_pair'>key_pair</span> <span class='op'>=</span> <span class='id identifier rubyid_label_map'>label_map</span><span class='period'>.</span><span class='id identifier rubyid_find'>find</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_k'>k</span><span class='comma'>,</span><span class='id identifier rubyid_v'>v</span><span class='op'>|</span>
2125
- <span class='id identifier rubyid_v'>v</span><span class='period'>.</span><span class='id identifier rubyid_is_a?'>is_a?</span><span class='lparen'>(</span><span class='const'>Regexp</span><span class='rparen'>)</span> <span class='op'>?</span> <span class='lparen'>(</span><span class='id identifier rubyid_text'>text</span> <span class='op'>=~</span> <span class='id identifier rubyid_v'>v</span><span class='rparen'>)</span> <span class='op'>:</span> <span class='lparen'>(</span><span class='id identifier rubyid_text'>text</span> <span class='op'>==</span> <span class='id identifier rubyid_v'>v</span><span class='rparen'>)</span>
2126
- <span class='kw'>end</span>
2127
- <span class='id identifier rubyid_key'>key</span> <span class='op'>=</span> <span class='id identifier rubyid_key_pair'>key_pair</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>?</span> <span class='kw'>nil</span> <span class='op'>:</span> <span class='id identifier rubyid_key_pair'>key_pair</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span>
2128
- <span class='kw'>end</span></pre>
2129
- </td>
2130
- </tr>
2131
- </table>
2132
- </div>
2133
-
2134
- </div>
2135
-
2136
- </div>
2137
-
2138
- <div id="footer">
2139
- Generated on Mon Mar 11 21:38:55 2019 by
2140
- <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
2141
- 0.9.18 (ruby-2.5.3).
2142
- </div>
2143
-
2144
- </div>
2145
- </body>
2146
- </html>