ae_easy-text 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,117 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>
7
- Module: AeEasy
8
-
9
- &mdash; Documentation by YARD 0.9.18
10
-
11
- </title>
12
-
13
- <link rel="stylesheet" href="css/style.css" type="text/css" charset="utf-8" />
14
-
15
- <link rel="stylesheet" href="css/common.css" type="text/css" charset="utf-8" />
16
-
17
- <script type="text/javascript" charset="utf-8">
18
- pathId = "AeEasy";
19
- relpath = '';
20
- </script>
21
-
22
-
23
- <script type="text/javascript" charset="utf-8" src="js/jquery.js"></script>
24
-
25
- <script type="text/javascript" charset="utf-8" src="js/app.js"></script>
26
-
27
-
28
- </head>
29
- <body>
30
- <div class="nav_wrap">
31
- <iframe id="nav" src="class_list.html?1"></iframe>
32
- <div id="resizer"></div>
33
- </div>
34
-
35
- <div id="main" tabindex="-1">
36
- <div id="header">
37
- <div id="menu">
38
-
39
- <a href="_index.html">Index (A)</a> &raquo;
40
-
41
-
42
- <span class="title">AeEasy</span>
43
-
44
- </div>
45
-
46
- <div id="search">
47
-
48
- <a class="full_list_link" id="class_list_link"
49
- href="class_list.html">
50
-
51
- <svg width="24" height="24">
52
- <rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
53
- <rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
54
- <rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
55
- </svg>
56
- </a>
57
-
58
- </div>
59
- <div class="clear"></div>
60
- </div>
61
-
62
- <div id="content"><h1>Module: AeEasy
63
-
64
-
65
-
66
- </h1>
67
- <div class="box_info">
68
-
69
-
70
-
71
-
72
-
73
-
74
-
75
-
76
-
77
-
78
-
79
- <dl>
80
- <dt>Defined in:</dt>
81
- <dd>lib/ae_easy/text.rb<span class="defines">,<br />
82
- lib/ae_easy/text/version.rb</span>
83
- </dd>
84
- </dl>
85
-
86
- </div>
87
-
88
- <h2>Defined Under Namespace</h2>
89
- <p class="children">
90
-
91
-
92
- <strong class="modules">Modules:</strong> <span class='object_link'><a href="AeEasy/Text.html" title="AeEasy::Text (module)">Text</a></span>
93
-
94
-
95
-
96
-
97
- </p>
98
-
99
-
100
-
101
-
102
-
103
-
104
-
105
-
106
-
107
- </div>
108
-
109
- <div id="footer">
110
- Generated on Mon Mar 11 21:38:55 2019 by
111
- <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
112
- 0.9.18 (ruby-2.5.3).
113
- </div>
114
-
115
- </div>
116
- </body>
117
- </html>
@@ -1,2146 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>
7
- Module: AeEasy::Text
8
-
9
- &mdash; Documentation by YARD 0.9.18
10
-
11
- </title>
12
-
13
- <link rel="stylesheet" href="../css/style.css" type="text/css" charset="utf-8" />
14
-
15
- <link rel="stylesheet" href="../css/common.css" type="text/css" charset="utf-8" />
16
-
17
- <script type="text/javascript" charset="utf-8">
18
- pathId = "AeEasy::Text";
19
- relpath = '../';
20
- </script>
21
-
22
-
23
- <script type="text/javascript" charset="utf-8" src="../js/jquery.js"></script>
24
-
25
- <script type="text/javascript" charset="utf-8" src="../js/app.js"></script>
26
-
27
-
28
- </head>
29
- <body>
30
- <div class="nav_wrap">
31
- <iframe id="nav" src="../class_list.html?1"></iframe>
32
- <div id="resizer"></div>
33
- </div>
34
-
35
- <div id="main" tabindex="-1">
36
- <div id="header">
37
- <div id="menu">
38
-
39
- <a href="../_index.html">Index (T)</a> &raquo;
40
- <span class='title'><span class='object_link'><a href="../AeEasy.html" title="AeEasy (module)">AeEasy</a></span></span>
41
- &raquo;
42
- <span class="title">Text</span>
43
-
44
- </div>
45
-
46
- <div id="search">
47
-
48
- <a class="full_list_link" id="class_list_link"
49
- href="../class_list.html">
50
-
51
- <svg width="24" height="24">
52
- <rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
53
- <rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
54
- <rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
55
- </svg>
56
- </a>
57
-
58
- </div>
59
- <div class="clear"></div>
60
- </div>
61
-
62
- <div id="content"><h1>Module: AeEasy::Text
63
-
64
-
65
-
66
- </h1>
67
- <div class="box_info">
68
-
69
-
70
-
71
-
72
-
73
-
74
-
75
-
76
-
77
-
78
-
79
- <dl>
80
- <dt>Defined in:</dt>
81
- <dd>lib/ae_easy/text.rb<span class="defines">,<br />
82
- lib/ae_easy/text/version.rb</span>
83
- </dd>
84
- </dl>
85
-
86
- </div>
87
-
88
-
89
-
90
- <h2>
91
- Constant Summary
92
- <small><a href="#" class="constants_summary_toggle">collapse</a></small>
93
- </h2>
94
-
95
- <dl class="constants">
96
-
97
- <dt id="VERSION-constant" class="">VERSION =
98
- <div class="docstring">
99
- <div class="discussion">
100
-
101
- <p>Gem version</p>
102
-
103
-
104
- </div>
105
- </div>
106
- <div class="tags">
107
-
108
-
109
- </div>
110
- </dt>
111
- <dd><pre class="code"><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>0.0.4</span><span class='tstring_end'>&quot;</span></span></pre></dd>
112
-
113
- </dl>
114
-
115
-
116
-
117
-
118
-
119
-
120
-
121
-
122
-
123
- <h2>
124
- Class Method Summary
125
- <small><a href="#" class="summary_toggle">collapse</a></small>
126
- </h2>
127
-
128
- <ul class="summary">
129
-
130
- <li class="public ">
131
- <span class="summary_signature">
132
-
133
- <a href="#decode_html-class_method" title="decode_html (class method)">.<strong>decode_html</strong>(text) &#x21d2; String </a>
134
-
135
-
136
-
137
- </span>
138
-
139
-
140
-
141
-
142
-
143
-
144
-
145
-
146
-
147
- <span class="summary_desc"><div class='inline'>
148
- <p>Decode HTML entities from text .</p>
149
- </div></span>
150
-
151
- </li>
152
-
153
-
154
- <li class="public ">
155
- <span class="summary_signature">
156
-
157
- <a href="#default_parser-class_method" title="default_parser (class method)">.<strong>default_parser</strong>(cell_element, data, key) &#x21d2; Object </a>
158
-
159
-
160
-
161
- </span>
162
-
163
-
164
-
165
-
166
-
167
-
168
-
169
-
170
-
171
- <span class="summary_desc"><div class='inline'>
172
- <p>Default cell content parser used to parse cell element.</p>
173
- </div></span>
174
-
175
- </li>
176
-
177
-
178
- <li class="public ">
179
- <span class="summary_signature">
180
-
181
- <a href="#encode_html-class_method" title="encode_html (class method)">.<strong>encode_html</strong>(text) &#x21d2; String </a>
182
-
183
-
184
-
185
- </span>
186
-
187
-
188
-
189
-
190
-
191
-
192
-
193
-
194
-
195
- <span class="summary_desc"><div class='inline'>
196
- <p>Encode text for valid HTML entities.</p>
197
- </div></span>
198
-
199
- </li>
200
-
201
-
202
- <li class="public ">
203
- <span class="summary_signature">
204
-
205
- <a href="#hash-class_method" title="hash (class method)">.<strong>hash</strong>(object) &#x21d2; String </a>
206
-
207
-
208
-
209
- </span>
210
-
211
-
212
-
213
-
214
-
215
-
216
-
217
-
218
-
219
- <span class="summary_desc"><div class='inline'>
220
- <p>Create a hash from object.</p>
221
- </div></span>
222
-
223
- </li>
224
-
225
-
226
- <li class="public ">
227
- <span class="summary_signature">
228
-
229
- <a href="#parse_content-class_method" title="parse_content (class method)">.<strong>parse_content</strong>(opts) {|data, row, header_map| ... } &#x21d2; Array&lt;Hash&gt;<sup>?</sup> </a>
230
-
231
-
232
-
233
- </span>
234
-
235
-
236
-
237
-
238
-
239
-
240
-
241
-
242
-
243
- <span class="summary_desc"><div class='inline'>
244
- <p>Parse row data matching a selector using a header map to translate
245
- between columns and friendly keys.</p>
246
- </div></span>
247
-
248
- </li>
249
-
250
-
251
- <li class="public ">
252
- <span class="summary_signature">
253
-
254
- <a href="#parse_header_map-class_method" title="parse_header_map (class method)">.<strong>parse_header_map</strong>(opts = {}) &#x21d2; Hash{Symbol,String =&gt; Integer}<sup>?</sup> </a>
255
-
256
-
257
-
258
- </span>
259
-
260
-
261
-
262
-
263
-
264
-
265
-
266
-
267
-
268
- <span class="summary_desc"><div class='inline'>
269
- <p>Parse header from selector and create a header map to match a column key
270
- with column index.</p>
271
- </div></span>
272
-
273
- </li>
274
-
275
-
276
- <li class="public ">
277
- <span class="summary_signature">
278
-
279
- <a href="#parse_table-class_method" title="parse_table (class method)">.<strong>parse_table</strong>(opts = {}) {|data, row, header_map| ... } &#x21d2; Hash{Symbol =&gt; Array,Hash,nil} </a>
280
-
281
-
282
-
283
- </span>
284
-
285
-
286
-
287
-
288
-
289
-
290
-
291
-
292
-
293
- <span class="summary_desc"><div class='inline'>
294
- <p>Parse data from a horizontal table like structure matching a selectors and
295
- using a header map to match columns.</p>
296
- </div></span>
297
-
298
- </li>
299
-
300
-
301
- <li class="public ">
302
- <span class="summary_signature">
303
-
304
- <a href="#parse_vertical_table-class_method" title="parse_vertical_table (class method)">.<strong>parse_vertical_table</strong>(opts = {}) {|data, row, header_map| ... } &#x21d2; Hash{Symbol =&gt; Array,Hash,nil} </a>
305
-
306
-
307
-
308
- </span>
309
-
310
-
311
-
312
-
313
-
314
-
315
-
316
-
317
-
318
- <span class="summary_desc"><div class='inline'>
319
- <p>Parse data from a vertical table like structure matching a selectors and
320
- using a header map to match columns.</p>
321
- </div></span>
322
-
323
- </li>
324
-
325
-
326
- <li class="public ">
327
- <span class="summary_signature">
328
-
329
- <a href="#strip-class_method" title="strip (class method)">.<strong>strip</strong>(raw_text, orig_encoding = &#39;ASCII&#39;) &#x21d2; String<sup>?</sup> </a>
330
-
331
-
332
-
333
- </span>
334
-
335
-
336
-
337
-
338
-
339
-
340
-
341
-
342
-
343
- <span class="summary_desc"><div class='inline'>
344
- <p>Strip a value by trimming spaces, reducing secuential spaces into a
345
- single space, decode HTML entities and change encoding to UTF-8.</p>
346
- </div></span>
347
-
348
- </li>
349
-
350
-
351
- <li class="public ">
352
- <span class="summary_signature">
353
-
354
- <a href="#translate_label_to_key-class_method" title="translate_label_to_key (class method)">.<strong>translate_label_to_key</strong>(element, label_map) &#x21d2; Symbol, String </a>
355
-
356
-
357
-
358
- </span>
359
-
360
-
361
-
362
-
363
-
364
-
365
-
366
-
367
-
368
- <span class="summary_desc"><div class='inline'>
369
- <p>Extract column label and translate it into a frienly key.</p>
370
- </div></span>
371
-
372
- </li>
373
-
374
-
375
- </ul>
376
-
377
-
378
-
379
-
380
- <div id="class_method_details" class="method_details_list">
381
- <h2>Class Method Details</h2>
382
-
383
-
384
- <div class="method_details first">
385
- <h3 class="signature first" id="decode_html-class_method">
386
-
387
- .<strong>decode_html</strong>(text) &#x21d2; <tt>String</tt>
388
-
389
-
390
-
391
-
392
-
393
- </h3><div class="docstring">
394
- <div class="discussion">
395
-
396
- <p>Decode HTML entities from text .</p>
397
-
398
-
399
- </div>
400
- </div>
401
- <div class="tags">
402
- <p class="tag_title">Parameters:</p>
403
- <ul class="param">
404
-
405
- <li>
406
-
407
- <span class='name'>text</span>
408
-
409
-
410
- <span class='type'>(<tt>String</tt>)</span>
411
-
412
-
413
-
414
- &mdash;
415
- <div class='inline'>
416
- <p>Text to decode.</p>
417
- </div>
418
-
419
- </li>
420
-
421
- </ul>
422
-
423
- <p class="tag_title">Returns:</p>
424
- <ul class="return">
425
-
426
- <li>
427
-
428
-
429
- <span class='type'>(<tt>String</tt>)</span>
430
-
431
-
432
-
433
- </li>
434
-
435
- </ul>
436
-
437
- </div><table class="source_code">
438
- <tr>
439
- <td>
440
- <pre class="lines">
441
-
442
-
443
- 33
444
- 34
445
- 35</pre>
446
- </td>
447
- <td>
448
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 33</span>
449
-
450
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_decode_html'>decode_html</span> <span class='id identifier rubyid_text'>text</span>
451
- <span class='const'>CGI</span><span class='period'>.</span><span class='id identifier rubyid_unescapeHTML'>unescapeHTML</span> <span class='id identifier rubyid_text'>text</span>
452
- <span class='kw'>end</span></pre>
453
- </td>
454
- </tr>
455
- </table>
456
- </div>
457
-
458
- <div class="method_details ">
459
- <h3 class="signature " id="default_parser-class_method">
460
-
461
- .<strong>default_parser</strong>(cell_element, data, key) &#x21d2; <tt>Object</tt>
462
-
463
-
464
-
465
-
466
-
467
- </h3><div class="docstring">
468
- <div class="discussion">
469
-
470
- <p>Default cell content parser used to parse cell element.</p>
471
-
472
-
473
- </div>
474
- </div>
475
- <div class="tags">
476
- <p class="tag_title">Parameters:</p>
477
- <ul class="param">
478
-
479
- <li>
480
-
481
- <span class='name'>cell_element</span>
482
-
483
-
484
- <span class='type'>(<tt>Nokogiri::Element</tt>)</span>
485
-
486
-
487
-
488
- &mdash;
489
- <div class='inline'>
490
- <p>Cell element to parse.</p>
491
- </div>
492
-
493
- </li>
494
-
495
- <li>
496
-
497
- <span class='name'>data</span>
498
-
499
-
500
- <span class='type'>(<tt>Hash</tt>)</span>
501
-
502
-
503
-
504
- &mdash;
505
- <div class='inline'>
506
- <p>Data hash to save parsed data into.</p>
507
- </div>
508
-
509
- </li>
510
-
511
- <li>
512
-
513
- <span class='name'>key</span>
514
-
515
-
516
- <span class='type'>(<tt>String</tt>, <tt>Symbol</tt>)</span>
517
-
518
-
519
-
520
- &mdash;
521
- <div class='inline'>
522
- <p>Header column key being parsed.</p>
523
- </div>
524
-
525
- </li>
526
-
527
- </ul>
528
-
529
-
530
- </div><table class="source_code">
531
- <tr>
532
- <td>
533
- <pre class="lines">
534
-
535
-
536
- 62
537
- 63
538
- 64
539
- 65
540
- 66</pre>
541
- </td>
542
- <td>
543
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 62</span>
544
-
545
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_default_parser'>default_parser</span> <span class='id identifier rubyid_cell_element'>cell_element</span><span class='comma'>,</span> <span class='id identifier rubyid_data'>data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span>
546
- <span class='kw'>return</span> <span class='kw'>if</span> <span class='id identifier rubyid_cell_element'>cell_element</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
547
- <span class='id identifier rubyid_cell_element'>cell_element</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>//i</span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_remove'>remove</span> <span class='kw'>if</span> <span class='id identifier rubyid_cell_element'>cell_element</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>//i</span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_count'>count</span> <span class='op'>&gt;</span> <span class='int'>0</span>
548
- <span class='id identifier rubyid_data'>data</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span> <span class='op'>=</span> <span class='id identifier rubyid_strip'>strip</span> <span class='id identifier rubyid_cell_element'>cell_element</span><span class='period'>.</span><span class='id identifier rubyid_text'>text</span>
549
- <span class='kw'>end</span></pre>
550
- </td>
551
- </tr>
552
- </table>
553
- </div>
554
-
555
- <div class="method_details ">
556
- <h3 class="signature " id="encode_html-class_method">
557
-
558
- .<strong>encode_html</strong>(text) &#x21d2; <tt>String</tt>
559
-
560
-
561
-
562
-
563
-
564
- </h3><div class="docstring">
565
- <div class="discussion">
566
-
567
- <p>Encode text for valid HTML entities.</p>
568
-
569
-
570
- </div>
571
- </div>
572
- <div class="tags">
573
- <p class="tag_title">Parameters:</p>
574
- <ul class="param">
575
-
576
- <li>
577
-
578
- <span class='name'>text</span>
579
-
580
-
581
- <span class='type'>(<tt>String</tt>)</span>
582
-
583
-
584
-
585
- &mdash;
586
- <div class='inline'>
587
- <p>Text to encode.</p>
588
- </div>
589
-
590
- </li>
591
-
592
- </ul>
593
-
594
- <p class="tag_title">Returns:</p>
595
- <ul class="return">
596
-
597
- <li>
598
-
599
-
600
- <span class='type'>(<tt>String</tt>)</span>
601
-
602
-
603
-
604
- </li>
605
-
606
- </ul>
607
-
608
- </div><table class="source_code">
609
- <tr>
610
- <td>
611
- <pre class="lines">
612
-
613
-
614
- 24
615
- 25
616
- 26</pre>
617
- </td>
618
- <td>
619
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 24</span>
620
-
621
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_encode_html'>encode_html</span> <span class='id identifier rubyid_text'>text</span>
622
- <span class='const'>CGI</span><span class='period'>.</span><span class='id identifier rubyid_escapeHTML'>escapeHTML</span> <span class='id identifier rubyid_text'>text</span>
623
- <span class='kw'>end</span></pre>
624
- </td>
625
- </tr>
626
- </table>
627
- </div>
628
-
629
- <div class="method_details ">
630
- <h3 class="signature " id="hash-class_method">
631
-
632
- .<strong>hash</strong>(object) &#x21d2; <tt>String</tt>
633
-
634
-
635
-
636
-
637
-
638
- </h3><div class="docstring">
639
- <div class="discussion">
640
-
641
- <p>Create a hash from object</p>
642
-
643
-
644
- </div>
645
- </div>
646
- <div class="tags">
647
- <p class="tag_title">Parameters:</p>
648
- <ul class="param">
649
-
650
- <li>
651
-
652
- <span class='name'>object</span>
653
-
654
-
655
- <span class='type'>(<tt>String</tt>, <tt>Hash</tt>, <tt>Object</tt>)</span>
656
-
657
-
658
-
659
- &mdash;
660
- <div class='inline'>
661
- <p>Object to create hash from.</p>
662
- </div>
663
-
664
- </li>
665
-
666
- </ul>
667
-
668
- <p class="tag_title">Returns:</p>
669
- <ul class="return">
670
-
671
- <li>
672
-
673
-
674
- <span class='type'>(<tt>String</tt>)</span>
675
-
676
-
677
-
678
- </li>
679
-
680
- </ul>
681
-
682
- </div><table class="source_code">
683
- <tr>
684
- <td>
685
- <pre class="lines">
686
-
687
-
688
- 14
689
- 15
690
- 16
691
- 17</pre>
692
- </td>
693
- <td>
694
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 14</span>
695
-
696
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_hash'>hash</span> <span class='id identifier rubyid_object'>object</span>
697
- <span class='id identifier rubyid_object'>object</span> <span class='op'>=</span> <span class='id identifier rubyid_object'>object</span><span class='period'>.</span><span class='id identifier rubyid_hash'>hash</span> <span class='kw'>if</span> <span class='id identifier rubyid_object'>object</span><span class='period'>.</span><span class='id identifier rubyid_is_a?'>is_a?</span> <span class='const'>Hash</span>
698
- <span class='const'>Digest</span><span class='op'>::</span><span class='const'>SHA1</span><span class='period'>.</span><span class='id identifier rubyid_hexdigest'>hexdigest</span> <span class='id identifier rubyid_object'>object</span><span class='period'>.</span><span class='id identifier rubyid_to_s'>to_s</span>
699
- <span class='kw'>end</span></pre>
700
- </td>
701
- </tr>
702
- </table>
703
- </div>
704
-
705
- <div class="method_details ">
706
- <h3 class="signature " id="parse_content-class_method">
707
-
708
- .<strong>parse_content</strong>(opts) {|data, row, header_map| ... } &#x21d2; <tt>Array&lt;Hash&gt;</tt><sup>?</sup>
709
-
710
-
711
-
712
-
713
-
714
- </h3><div class="docstring">
715
- <div class="discussion">
716
-
717
- <p>Parse row data matching a selector using a header map to translate</p>
718
-
719
- <pre class="code ruby"><code class="ruby">between columns and friendly keys.
720
- </code></pre>
721
-
722
-
723
- </div>
724
- </div>
725
- <div class="tags">
726
- <p class="tag_title">Parameters:</p>
727
- <ul class="param">
728
-
729
- <li>
730
-
731
- <span class='name'>opts</span>
732
-
733
-
734
- <span class='type'>(<tt>Hash</tt>)</span>
735
-
736
-
737
-
738
- &mdash;
739
- <div class='inline'>
740
- <p>({}) Configuration options.</p>
741
- </div>
742
-
743
- </li>
744
-
745
- </ul>
746
-
747
-
748
-
749
-
750
- <p class="tag_title">Options Hash (<tt>opts</tt>):</p>
751
- <ul class="option">
752
-
753
- <li>
754
- <span class="name">:html</span>
755
- <span class="type">(<tt>Nokogiri::Element</tt>)</span>
756
- <span class="default">
757
-
758
- </span>
759
-
760
- &mdash; <div class='inline'>
761
- <p>Container element to search into.</p>
762
- </div>
763
-
764
- </li>
765
-
766
- <li>
767
- <span class="name">:selector</span>
768
- <span class="type">(<tt>String</tt>)</span>
769
- <span class="default">
770
-
771
- </span>
772
-
773
- &mdash; <div class='inline'>
774
- <p>CSS selector to match content cells.</p>
775
- </div>
776
-
777
- </li>
778
-
779
- <li>
780
- <span class="name">:first_row_header</span>
781
- <span class="type">(<tt>Boolean</tt>)</span>
782
- <span class="default">
783
-
784
- &mdash; default:
785
- <tt>false</tt>
786
-
787
- </span>
788
-
789
- &mdash; <div class='inline'>
790
- <p>If true then first matching element will be assumed to be header and
791
- ignored.</p>
792
- </div>
793
-
794
- </li>
795
-
796
- <li>
797
- <span class="name">:header_map</span>
798
- <span class="type">(<tt>Hash{Symbol,String =&gt; Integer}</tt>)</span>
799
- <span class="default">
800
-
801
- </span>
802
-
803
- &mdash; <div class='inline'>
804
- <p>Header key vs index dictionary.</p>
805
- </div>
806
-
807
- </li>
808
-
809
- <li>
810
- <span class="name">:column_parsers</span>
811
- <span class="type">(<tt>Hash{Symbol,String =&gt; lambda,proc}</tt>)</span>
812
- <span class="default">
813
-
814
- &mdash; default:
815
- <tt>{}</tt>
816
-
817
- </span>
818
-
819
- &mdash; <div class='inline'>
820
- <p>Custom column parsers for advance data extraction.</p>
821
- </div>
822
-
823
- </li>
824
-
825
- <li>
826
- <span class="name">:ignore_text_nodes</span>
827
- <span class="type">(<tt>Boolean</tt>)</span>
828
- <span class="default">
829
-
830
- &mdash; default:
831
- <tt>true</tt>
832
-
833
- </span>
834
-
835
- &mdash; <div class='inline'>
836
- <p>Ignore text nodes when retriving content cells and rows.</p>
837
- </div>
838
-
839
- </li>
840
-
841
- </ul>
842
-
843
-
844
- <p class="tag_title">Yield Parameters:</p>
845
- <ul class="yieldparam">
846
-
847
- <li>
848
-
849
- <span class='name'>data</span>
850
-
851
-
852
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Object}</tt>)</span>
853
-
854
-
855
-
856
- &mdash;
857
- <div class='inline'>
858
- <p>Parsed row data.</p>
859
- </div>
860
-
861
- </li>
862
-
863
- <li>
864
-
865
- <span class='name'>row</span>
866
-
867
-
868
- <span class='type'>(<tt>Array</tt>)</span>
869
-
870
-
871
-
872
- &mdash;
873
- <div class='inline'>
874
- <p>Raw row data.</p>
875
- </div>
876
-
877
- </li>
878
-
879
- <li>
880
-
881
- <span class='name'>header_map</span>
882
-
883
-
884
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Integer}</tt>)</span>
885
-
886
-
887
-
888
- &mdash;
889
- <div class='inline'>
890
- <p>Header map used.</p>
891
- </div>
892
-
893
- </li>
894
-
895
- </ul>
896
- <p class="tag_title">Yield Returns:</p>
897
- <ul class="yieldreturn">
898
-
899
- <li>
900
-
901
-
902
- <span class='type'>(<tt>Boolean</tt>)</span>
903
-
904
-
905
-
906
- &mdash;
907
- <div class='inline'>
908
- <p>`true` when valid, else `false`.</p>
909
- </div>
910
-
911
- </li>
912
-
913
- </ul>
914
- <p class="tag_title">Returns:</p>
915
- <ul class="return">
916
-
917
- <li>
918
-
919
-
920
- <span class='type'>(<tt>Array&lt;Hash&gt;</tt>, <tt>nil</tt>)</span>
921
-
922
-
923
-
924
- &mdash;
925
- <div class='inline'>
926
- <p>Parsed rows data.</p>
927
- </div>
928
-
929
- </li>
930
-
931
- </ul>
932
-
933
- </div><table class="source_code">
934
- <tr>
935
- <td>
936
- <pre class="lines">
937
-
938
-
939
- 89
940
- 90
941
- 91
942
- 92
943
- 93
944
- 94
945
- 95
946
- 96
947
- 97
948
- 98
949
- 99
950
- 100
951
- 101
952
- 102
953
- 103
954
- 104
955
- 105
956
- 106
957
- 107
958
- 108
959
- 109
960
- 110
961
- 111
962
- 112
963
- 113
964
- 114
965
- 115
966
- 116
967
- 117
968
- 118
969
- 119
970
- 120
971
- 121
972
- 122
973
- 123
974
- 124
975
- 125
976
- 126
977
- 127
978
- 128
979
- 129
980
- 130
981
- 131
982
- 132
983
- 133</pre>
984
- </td>
985
- <td>
986
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 89</span>
987
-
988
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_content'>parse_content</span> <span class='id identifier rubyid_opts'>opts</span><span class='comma'>,</span> <span class='op'>&amp;</span><span class='id identifier rubyid_filter'>filter</span>
989
- <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span>
990
- <span class='label'>html:</span> <span class='kw'>nil</span><span class='comma'>,</span>
991
- <span class='label'>selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
992
- <span class='label'>first_row_header:</span> <span class='kw'>false</span><span class='comma'>,</span>
993
- <span class='label'>header_map:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
994
- <span class='label'>column_parsers:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
995
- <span class='label'>ignore_text_nodes:</span> <span class='kw'>true</span>
996
- <span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_merge'>merge</span> <span class='id identifier rubyid_opts'>opts</span>
997
-
998
- <span class='comment'># Setup config
999
- </span> <span class='id identifier rubyid_data'>data</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
1000
- <span class='id identifier rubyid_row_data'>row_data</span> <span class='op'>=</span> <span class='id identifier rubyid_child_element'>child_element</span> <span class='op'>=</span> <span class='kw'>nil</span>
1001
- <span class='id identifier rubyid_first'>first</span> <span class='op'>=</span> <span class='id identifier rubyid_first_row_header'>first_row_header</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:first_row_header</span><span class='rbracket'>]</span>
1002
- <span class='id identifier rubyid_header_map'>header_map</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_map</span><span class='rbracket'>]</span>
1003
- <span class='id identifier rubyid_column_parsers'>column_parsers</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:column_parsers</span><span class='rbracket'>]</span>
1004
- <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:ignore_text_nodes</span><span class='rbracket'>]</span>
1005
-
1006
- <span class='comment'># Get and parse rows
1007
- </span> <span class='id identifier rubyid_html_rows'>html_rows</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:selector</span><span class='rbracket'>]</span><span class='rparen'>)</span>
1008
- <span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_row'>row</span><span class='op'>|</span>
1009
- <span class='kw'>next</span> <span class='kw'>if</span> <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span> <span class='op'>&amp;&amp;</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_name'>name</span> <span class='op'>==</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>text</span><span class='tstring_end'>&#39;</span></span>
1010
-
1011
- <span class='comment'># First row header validation
1012
- </span> <span class='kw'>if</span> <span class='id identifier rubyid_first'>first</span> <span class='op'>&amp;&amp;</span> <span class='id identifier rubyid_first_row_header'>first_row_header</span>
1013
- <span class='id identifier rubyid_first'>first</span> <span class='op'>=</span> <span class='kw'>false</span>
1014
- <span class='kw'>next</span>
1015
- <span class='kw'>end</span>
1016
-
1017
- <span class='comment'># Extract content data
1018
- </span> <span class='id identifier rubyid_row_data'>row_data</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span>
1019
- <span class='id identifier rubyid_header_map'>header_map</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_key'>key</span><span class='comma'>,</span> <span class='id identifier rubyid_index'>index</span><span class='op'>|</span>
1020
- <span class='comment'># Parse column html with default or custom parser
1021
- </span> <span class='id identifier rubyid_children'>children</span> <span class='op'>=</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_children'>children</span>
1022
- <span class='id identifier rubyid_children'>children</span> <span class='op'>=</span> <span class='id identifier rubyid_children'>children</span><span class='period'>.</span><span class='id identifier rubyid_select'>select</span><span class='lbrace'>{</span><span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='period'>.</span><span class='id identifier rubyid_name'>name</span> <span class='op'>!=</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>text</span><span class='tstring_end'>&#39;</span></span><span class='rbrace'>}</span> <span class='kw'>if</span> <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span>
1023
- <span class='id identifier rubyid_child_element'>child_element</span> <span class='op'>=</span> <span class='id identifier rubyid_children'>children</span><span class='lbracket'>[</span><span class='id identifier rubyid_index'>index</span><span class='rbracket'>]</span>
1024
- <span class='id identifier rubyid_column_parsers'>column_parsers</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>?</span>
1025
- <span class='id identifier rubyid_default_parser'>default_parser</span><span class='lparen'>(</span><span class='id identifier rubyid_child_element'>child_element</span><span class='comma'>,</span> <span class='id identifier rubyid_row_data'>row_data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span><span class='rparen'>)</span> <span class='op'>:</span>
1026
- <span class='id identifier rubyid_column_parsers'>column_parsers</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_call'>call</span><span class='lparen'>(</span><span class='id identifier rubyid_child_element'>child_element</span><span class='comma'>,</span> <span class='id identifier rubyid_row_data'>row_data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span><span class='rparen'>)</span>
1027
- <span class='kw'>end</span>
1028
- <span class='kw'>next</span> <span class='kw'>unless</span> <span class='id identifier rubyid_filter'>filter</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>||</span> <span class='id identifier rubyid_filter'>filter</span><span class='period'>.</span><span class='id identifier rubyid_call'>call</span><span class='lparen'>(</span><span class='id identifier rubyid_row_data'>row_data</span><span class='comma'>,</span> <span class='id identifier rubyid_row'>row</span><span class='comma'>,</span> <span class='id identifier rubyid_header_map'>header_map</span><span class='rparen'>)</span>
1029
- <span class='id identifier rubyid_data'>data</span> <span class='op'>&lt;&lt;</span> <span class='id identifier rubyid_row_data'>row_data</span>
1030
- <span class='kw'>end</span>
1031
- <span class='id identifier rubyid_data'>data</span>
1032
- <span class='kw'>end</span></pre>
1033
- </td>
1034
- </tr>
1035
- </table>
1036
- </div>
1037
-
1038
- <div class="method_details ">
1039
- <h3 class="signature " id="parse_header_map-class_method">
1040
-
1041
- .<strong>parse_header_map</strong>(opts = {}) &#x21d2; <tt>Hash{Symbol,String =&gt; Integer}</tt><sup>?</sup>
1042
-
1043
-
1044
-
1045
-
1046
-
1047
- </h3><div class="docstring">
1048
- <div class="discussion">
1049
-
1050
- <p>Parse header from selector and create a header map to match a column key</p>
1051
-
1052
- <pre class="code ruby"><code class="ruby">with column index.
1053
- </code></pre>
1054
-
1055
-
1056
- </div>
1057
- </div>
1058
- <div class="tags">
1059
- <p class="tag_title">Parameters:</p>
1060
- <ul class="param">
1061
-
1062
- <li>
1063
-
1064
- <span class='name'>opts</span>
1065
-
1066
-
1067
- <span class='type'>(<tt>Hash</tt>)</span>
1068
-
1069
-
1070
- <em class="default">(defaults to: <tt>{}</tt>)</em>
1071
-
1072
-
1073
- &mdash;
1074
- <div class='inline'>
1075
- <p>({}) Configuration options.</p>
1076
- </div>
1077
-
1078
- </li>
1079
-
1080
- </ul>
1081
-
1082
-
1083
-
1084
-
1085
- <p class="tag_title">Options Hash (<tt>opts</tt>):</p>
1086
- <ul class="option">
1087
-
1088
- <li>
1089
- <span class="name">:html</span>
1090
- <span class="type">(<tt>Nokogiri::Element</tt>)</span>
1091
- <span class="default">
1092
-
1093
- </span>
1094
-
1095
- &mdash; <div class='inline'>
1096
- <p>Container element to search into.</p>
1097
- </div>
1098
-
1099
- </li>
1100
-
1101
- <li>
1102
- <span class="name">:selector</span>
1103
- <span class="type">(<tt>String</tt>)</span>
1104
- <span class="default">
1105
-
1106
- </span>
1107
-
1108
- &mdash; <div class='inline'>
1109
- <p>CSS selector to match header cells.</p>
1110
- </div>
1111
-
1112
- </li>
1113
-
1114
- <li>
1115
- <span class="name">:column_key_label_map</span>
1116
- <span class="type">(<tt>Hash{Symbol,String =&gt; Regex,String}</tt>)</span>
1117
- <span class="default">
1118
-
1119
- </span>
1120
-
1121
- &mdash; <div class='inline'>
1122
- <p>Key vs. label dictionary.</p>
1123
- </div>
1124
-
1125
- </li>
1126
-
1127
- <li>
1128
- <span class="name">:first_row_header</span>
1129
- <span class="type">(<tt>Boolean</tt>)</span>
1130
- <span class="default">
1131
-
1132
- &mdash; default:
1133
- <tt>false</tt>
1134
-
1135
- </span>
1136
-
1137
- &mdash; <div class='inline'>
1138
- <p>If true then selector first matching row will be used as header for
1139
- parsing.</p>
1140
- </div>
1141
-
1142
- </li>
1143
-
1144
- <li>
1145
- <span class="name">:ignore_text_nodes</span>
1146
- <span class="type">(<tt>Boolean</tt>)</span>
1147
- <span class="default">
1148
-
1149
- &mdash; default:
1150
- <tt>true</tt>
1151
-
1152
- </span>
1153
-
1154
- &mdash; <div class='inline'>
1155
- <p>Ignore text nodes when retriving header cells and rows.</p>
1156
- </div>
1157
-
1158
- </li>
1159
-
1160
- </ul>
1161
-
1162
-
1163
- <p class="tag_title">Returns:</p>
1164
- <ul class="return">
1165
-
1166
- <li>
1167
-
1168
-
1169
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Integer}</tt>, <tt>nil</tt>)</span>
1170
-
1171
-
1172
-
1173
- &mdash;
1174
- <div class='inline'>
1175
- <p>Key vs. column index map.</p>
1176
- </div>
1177
-
1178
- </li>
1179
-
1180
- </ul>
1181
-
1182
- </div><table class="source_code">
1183
- <tr>
1184
- <td>
1185
- <pre class="lines">
1186
-
1187
-
1188
- 166
1189
- 167
1190
- 168
1191
- 169
1192
- 170
1193
- 171
1194
- 172
1195
- 173
1196
- 174
1197
- 175
1198
- 176
1199
- 177
1200
- 178
1201
- 179
1202
- 180
1203
- 181
1204
- 182
1205
- 183
1206
- 184
1207
- 185
1208
- 186
1209
- 187
1210
- 188
1211
- 189
1212
- 190
1213
- 191
1214
- 192
1215
- 193
1216
- 194
1217
- 195
1218
- 196
1219
- 197
1220
- 198
1221
- 199
1222
- 200</pre>
1223
- </td>
1224
- <td>
1225
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 166</span>
1226
-
1227
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_header_map'>parse_header_map</span> <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span>
1228
- <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span>
1229
- <span class='label'>html:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1230
- <span class='label'>selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1231
- <span class='label'>column_key_label_map:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
1232
- <span class='label'>first_row_header:</span> <span class='kw'>false</span><span class='comma'>,</span>
1233
- <span class='label'>ignore_text_nodes:</span> <span class='kw'>true</span>
1234
- <span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_merge'>merge</span> <span class='id identifier rubyid_opts'>opts</span>
1235
-
1236
- <span class='comment'># Setup config
1237
- </span> <span class='id identifier rubyid_dictionary'>dictionary</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:column_key_label_map</span><span class='rbracket'>]</span>
1238
- <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:ignore_text_nodes</span><span class='rbracket'>]</span>
1239
- <span class='id identifier rubyid_data'>data</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
1240
- <span class='id identifier rubyid_column_map'>column_map</span> <span class='op'>=</span> <span class='kw'>nil</span>
1241
-
1242
- <span class='comment'># Extract and parse header rows
1243
- </span> <span class='id identifier rubyid_html_rows'>html_rows</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:selector</span><span class='rbracket'>]</span><span class='rparen'>)</span> <span class='kw'>rescue</span> <span class='kw'>nil</span>
1244
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
1245
- <span class='id identifier rubyid_html_rows'>html_rows</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_first'>first</span><span class='rbracket'>]</span> <span class='kw'>if</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:first_row_header</span><span class='rbracket'>]</span>
1246
- <span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_row'>row</span><span class='op'>|</span>
1247
- <span class='kw'>next</span> <span class='kw'>if</span> <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span> <span class='op'>&amp;&amp;</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_name'>name</span> <span class='op'>==</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>text</span><span class='tstring_end'>&#39;</span></span>
1248
-
1249
- <span class='id identifier rubyid_column_map'>column_map</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span>
1250
- <span class='id identifier rubyid_children'>children</span> <span class='op'>=</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_children'>children</span>
1251
- <span class='id identifier rubyid_children'>children</span> <span class='op'>=</span> <span class='id identifier rubyid_children'>children</span><span class='period'>.</span><span class='id identifier rubyid_select'>select</span><span class='lbrace'>{</span><span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='op'>|</span><span class='id identifier rubyid_i'>i</span><span class='period'>.</span><span class='id identifier rubyid_name'>name</span> <span class='op'>!=</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>text</span><span class='tstring_end'>&#39;</span></span><span class='rbrace'>}</span> <span class='kw'>if</span> <span class='id identifier rubyid_ignore_text_nodes'>ignore_text_nodes</span>
1252
- <span class='id identifier rubyid_children'>children</span><span class='period'>.</span><span class='id identifier rubyid_each_with_index'>each_with_index</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_col'>col</span><span class='comma'>,</span> <span class='id identifier rubyid_index'>index</span><span class='op'>|</span>
1253
- <span class='comment'># Parse and map column header
1254
- </span> <span class='id identifier rubyid_column_key'>column_key</span> <span class='op'>=</span> <span class='id identifier rubyid_translate_label_to_key'>translate_label_to_key</span> <span class='id identifier rubyid_col'>col</span><span class='comma'>,</span> <span class='id identifier rubyid_dictionary'>dictionary</span>
1255
- <span class='kw'>next</span> <span class='kw'>if</span> <span class='id identifier rubyid_column_key'>column_key</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
1256
- <span class='id identifier rubyid_column_map'>column_map</span><span class='lbracket'>[</span><span class='id identifier rubyid_column_key'>column_key</span><span class='rbracket'>]</span> <span class='op'>=</span> <span class='id identifier rubyid_index'>index</span>
1257
- <span class='kw'>end</span>
1258
- <span class='id identifier rubyid_data'>data</span> <span class='op'>&lt;&lt;</span> <span class='id identifier rubyid_column_map'>column_map</span>
1259
- <span class='kw'>end</span>
1260
- <span class='id identifier rubyid_data'>data</span><span class='op'>&amp;.</span><span class='id identifier rubyid_first'>first</span>
1261
- <span class='kw'>end</span></pre>
1262
- </td>
1263
- </tr>
1264
- </table>
1265
- </div>
1266
-
1267
- <div class="method_details ">
1268
- <h3 class="signature " id="parse_table-class_method">
1269
-
1270
- .<strong>parse_table</strong>(opts = {}) {|data, row, header_map| ... } &#x21d2; <tt>Hash{Symbol =&gt; Array,Hash,nil}</tt>
1271
-
1272
-
1273
-
1274
-
1275
-
1276
- </h3><div class="docstring">
1277
- <div class="discussion">
1278
-
1279
- <p>Parse data from a horizontal table like structure matching a selectors and</p>
1280
-
1281
- <pre class="code ruby"><code class="ruby">using a header map to match columns.
1282
- </code></pre>
1283
-
1284
-
1285
- </div>
1286
- </div>
1287
- <div class="tags">
1288
- <p class="tag_title">Parameters:</p>
1289
- <ul class="param">
1290
-
1291
- <li>
1292
-
1293
- <span class='name'>opts</span>
1294
-
1295
-
1296
- <span class='type'>(<tt>Hash</tt>)</span>
1297
-
1298
-
1299
- <em class="default">(defaults to: <tt>{}</tt>)</em>
1300
-
1301
-
1302
- &mdash;
1303
- <div class='inline'>
1304
- <p>({}) Configuration options.</p>
1305
- </div>
1306
-
1307
- </li>
1308
-
1309
- </ul>
1310
-
1311
-
1312
-
1313
-
1314
- <p class="tag_title">Options Hash (<tt>opts</tt>):</p>
1315
- <ul class="option">
1316
-
1317
- <li>
1318
- <span class="name">:html</span>
1319
- <span class="type">(<tt>Nokogiri::Element</tt>)</span>
1320
- <span class="default">
1321
-
1322
- </span>
1323
-
1324
- &mdash; <div class='inline'>
1325
- <p>Container element to search into.</p>
1326
- </div>
1327
-
1328
- </li>
1329
-
1330
- <li>
1331
- <span class="name">:header_selector</span>
1332
- <span class="type">(<tt>String</tt>)</span>
1333
- <span class="default">
1334
-
1335
- </span>
1336
-
1337
- &mdash; <div class='inline'>
1338
- <p>Header column elements selector.</p>
1339
- </div>
1340
-
1341
- </li>
1342
-
1343
- <li>
1344
- <span class="name">:header_key_label_map</span>
1345
- <span class="type">(<tt>Hash{Symbol,String =&gt; Regex,String}</tt>)</span>
1346
- <span class="default">
1347
-
1348
- </span>
1349
-
1350
- &mdash; <div class='inline'>
1351
- <p>Header key vs. label dictionary to match column indexes.</p>
1352
- </div>
1353
-
1354
- </li>
1355
-
1356
- <li>
1357
- <span class="name">:content_selector</span>
1358
- <span class="type">(<tt>String</tt>)</span>
1359
- <span class="default">
1360
-
1361
- </span>
1362
-
1363
- &mdash; <div class='inline'>
1364
- <p>Content row elements selector.</p>
1365
- </div>
1366
-
1367
- </li>
1368
-
1369
- <li>
1370
- <span class="name">:first_row_header</span>
1371
- <span class="type">(<tt>Boolean</tt>)</span>
1372
- <span class="default">
1373
-
1374
- &mdash; default:
1375
- <tt>false</tt>
1376
-
1377
- </span>
1378
-
1379
- &mdash; <div class='inline'>
1380
- <p>If true then selector first matching row will be used as header for
1381
- parsing.</p>
1382
- </div>
1383
-
1384
- </li>
1385
-
1386
- <li>
1387
- <span class="name">:column_parsers</span>
1388
- <span class="type">(<tt>Hash{Symbol,String =&gt; lambda,proc}</tt>)</span>
1389
- <span class="default">
1390
-
1391
- &mdash; default:
1392
- <tt>{}</tt>
1393
-
1394
- </span>
1395
-
1396
- &mdash; <div class='inline'>
1397
- <p>Custom column parsers for advance data extraction.</p>
1398
- </div>
1399
-
1400
- </li>
1401
-
1402
- <li>
1403
- <span class="name">:ignore_text_nodes</span>
1404
- <span class="type">(<tt>Boolean</tt>)</span>
1405
- <span class="default">
1406
-
1407
- &mdash; default:
1408
- <tt>true</tt>
1409
-
1410
- </span>
1411
-
1412
- &mdash; <div class='inline'>
1413
- <p>Ignore text nodes when retriving cells and rows.</p>
1414
- </div>
1415
-
1416
- </li>
1417
-
1418
- </ul>
1419
-
1420
-
1421
- <p class="tag_title">Yield Parameters:</p>
1422
- <ul class="yieldparam">
1423
-
1424
- <li>
1425
-
1426
- <span class='name'>data</span>
1427
-
1428
-
1429
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Object}</tt>)</span>
1430
-
1431
-
1432
-
1433
- &mdash;
1434
- <div class='inline'>
1435
- <p>Parsed content row data.</p>
1436
- </div>
1437
-
1438
- </li>
1439
-
1440
- <li>
1441
-
1442
- <span class='name'>row</span>
1443
-
1444
-
1445
- <span class='type'>(<tt>Array</tt>)</span>
1446
-
1447
-
1448
-
1449
- &mdash;
1450
- <div class='inline'>
1451
- <p>Raw content row data.</p>
1452
- </div>
1453
-
1454
- </li>
1455
-
1456
- <li>
1457
-
1458
- <span class='name'>header_map</span>
1459
-
1460
-
1461
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Integer}</tt>)</span>
1462
-
1463
-
1464
-
1465
- &mdash;
1466
- <div class='inline'>
1467
- <p>Header map used.</p>
1468
- </div>
1469
-
1470
- </li>
1471
-
1472
- </ul>
1473
- <p class="tag_title">Yield Returns:</p>
1474
- <ul class="yieldreturn">
1475
-
1476
- <li>
1477
-
1478
-
1479
- <span class='type'>(<tt>Boolean</tt>)</span>
1480
-
1481
-
1482
-
1483
- &mdash;
1484
- <div class='inline'>
1485
- <p>`true` when valid, else `false`.</p>
1486
- </div>
1487
-
1488
- </li>
1489
-
1490
- </ul>
1491
- <p class="tag_title">Returns:</p>
1492
- <ul class="return">
1493
-
1494
- <li>
1495
-
1496
-
1497
- <span class='type'>(<tt>Hash{Symbol =&gt; Array,Hash,nil}</tt>)</span>
1498
-
1499
-
1500
-
1501
- &mdash;
1502
- <div class='inline'>
1503
- <p>Hash data is as follows:</p>
1504
- <ul><li>
1505
- <p>`[Hash] :header_map` Header map used.</p>
1506
- </li><li>
1507
- <p>`[Array&lt;Hash&gt;,nil] :data` Parsed rows data.</p>
1508
- </li></ul>
1509
- </div>
1510
-
1511
- </li>
1512
-
1513
- </ul>
1514
-
1515
- </div><table class="source_code">
1516
- <tr>
1517
- <td>
1518
- <pre class="lines">
1519
-
1520
-
1521
- 226
1522
- 227
1523
- 228
1524
- 229
1525
- 230
1526
- 231
1527
- 232
1528
- 233
1529
- 234
1530
- 235
1531
- 236
1532
- 237
1533
- 238
1534
- 239
1535
- 240
1536
- 241
1537
- 242
1538
- 243
1539
- 244
1540
- 245
1541
- 246
1542
- 247
1543
- 248
1544
- 249
1545
- 250
1546
- 251</pre>
1547
- </td>
1548
- <td>
1549
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 226</span>
1550
-
1551
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_table'>parse_table</span> <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span> <span class='op'>&amp;</span><span class='id identifier rubyid_filter'>filter</span>
1552
- <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span>
1553
- <span class='label'>html:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1554
- <span class='label'>header_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1555
- <span class='label'>header_key_label_map:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
1556
- <span class='label'>content_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1557
- <span class='label'>first_row_header:</span> <span class='kw'>false</span><span class='comma'>,</span>
1558
- <span class='label'>column_parsers:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
1559
- <span class='label'>ignore_text_nodes:</span> <span class='kw'>true</span>
1560
- <span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_merge'>merge</span> <span class='id identifier rubyid_opts'>opts</span>
1561
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
1562
- <span class='id identifier rubyid_header_map'>header_map</span> <span class='op'>=</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_header_map'>parse_header_map</span> <span class='label'>html:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='comma'>,</span>
1563
- <span class='label'>selector:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_selector</span><span class='rbracket'>]</span><span class='comma'>,</span>
1564
- <span class='label'>column_key_label_map:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_key_label_map</span><span class='rbracket'>]</span><span class='comma'>,</span>
1565
- <span class='label'>first_row_header:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:first_row_header</span><span class='rbracket'>]</span><span class='comma'>,</span>
1566
- <span class='label'>ignore_text_nodes:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:ignore_text_nodes</span><span class='rbracket'>]</span>
1567
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_header_map'>header_map</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
1568
- <span class='id identifier rubyid_data'>data</span> <span class='op'>=</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_content'>parse_content</span> <span class='label'>html:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='comma'>,</span>
1569
- <span class='label'>selector:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:content_selector</span><span class='rbracket'>]</span><span class='comma'>,</span>
1570
- <span class='label'>header_map:</span> <span class='id identifier rubyid_header_map'>header_map</span><span class='comma'>,</span>
1571
- <span class='label'>first_row_header:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:first_row_header</span><span class='rbracket'>]</span><span class='comma'>,</span>
1572
- <span class='label'>column_parsers:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:column_parsers</span><span class='rbracket'>]</span><span class='comma'>,</span>
1573
- <span class='label'>ignore_text_nodes:</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:ignore_text_nodes</span><span class='rbracket'>]</span><span class='comma'>,</span>
1574
- <span class='op'>&amp;</span><span class='id identifier rubyid_filter'>filter</span>
1575
- <span class='lbrace'>{</span><span class='label'>header_map:</span> <span class='id identifier rubyid_header_map'>header_map</span><span class='comma'>,</span> <span class='label'>data:</span> <span class='id identifier rubyid_data'>data</span><span class='rbrace'>}</span>
1576
- <span class='kw'>end</span></pre>
1577
- </td>
1578
- </tr>
1579
- </table>
1580
- </div>
1581
-
1582
- <div class="method_details ">
1583
- <h3 class="signature " id="parse_vertical_table-class_method">
1584
-
1585
- .<strong>parse_vertical_table</strong>(opts = {}) {|data, row, header_map| ... } &#x21d2; <tt>Hash{Symbol =&gt; Array,Hash,nil}</tt>
1586
-
1587
-
1588
-
1589
-
1590
-
1591
- </h3><div class="docstring">
1592
- <div class="discussion">
1593
-
1594
- <p>Parse data from a vertical table like structure matching a selectors and</p>
1595
-
1596
- <pre class="code ruby"><code class="ruby">using a header map to match columns.
1597
- </code></pre>
1598
-
1599
-
1600
- </div>
1601
- </div>
1602
- <div class="tags">
1603
- <p class="tag_title">Parameters:</p>
1604
- <ul class="param">
1605
-
1606
- <li>
1607
-
1608
- <span class='name'>opts</span>
1609
-
1610
-
1611
- <span class='type'>(<tt>Hash</tt>)</span>
1612
-
1613
-
1614
- <em class="default">(defaults to: <tt>{}</tt>)</em>
1615
-
1616
-
1617
- &mdash;
1618
- <div class='inline'>
1619
- <p>({}) Configuration options.</p>
1620
- </div>
1621
-
1622
- </li>
1623
-
1624
- </ul>
1625
-
1626
-
1627
-
1628
-
1629
- <p class="tag_title">Options Hash (<tt>opts</tt>):</p>
1630
- <ul class="option">
1631
-
1632
- <li>
1633
- <span class="name">:html</span>
1634
- <span class="type">(<tt>Nokogiri::Element</tt>)</span>
1635
- <span class="default">
1636
-
1637
- </span>
1638
-
1639
- &mdash; <div class='inline'>
1640
- <p>Container element to search into.</p>
1641
- </div>
1642
-
1643
- </li>
1644
-
1645
- <li>
1646
- <span class="name">:row_selector</span>
1647
- <span class="type">(<tt>String</tt>)</span>
1648
- <span class="default">
1649
-
1650
- </span>
1651
-
1652
- &mdash; <div class='inline'>
1653
- <p>Vertical row like elements selector.</p>
1654
- </div>
1655
-
1656
- </li>
1657
-
1658
- <li>
1659
- <span class="name">:header_selector</span>
1660
- <span class="type">(<tt>String</tt>)</span>
1661
- <span class="default">
1662
-
1663
- </span>
1664
-
1665
- &mdash; <div class='inline'>
1666
- <p>Header column elements selector.</p>
1667
- </div>
1668
-
1669
- </li>
1670
-
1671
- <li>
1672
- <span class="name">:header_key_label_map</span>
1673
- <span class="type">(<tt>Hash{Symbol,String =&gt; Regex,String}</tt>)</span>
1674
- <span class="default">
1675
-
1676
- </span>
1677
-
1678
- &mdash; <div class='inline'>
1679
- <p>Header key vs. label dictionary to match column indexes.</p>
1680
- </div>
1681
-
1682
- </li>
1683
-
1684
- <li>
1685
- <span class="name">:content_selector</span>
1686
- <span class="type">(<tt>String</tt>)</span>
1687
- <span class="default">
1688
-
1689
- </span>
1690
-
1691
- &mdash; <div class='inline'>
1692
- <p>Content row elements selector.</p>
1693
- </div>
1694
-
1695
- </li>
1696
-
1697
- <li>
1698
- <span class="name">:column_parsers</span>
1699
- <span class="type">(<tt>Hash{Symbol,String =&gt; lambda,proc}</tt>)</span>
1700
- <span class="default">
1701
-
1702
- &mdash; default:
1703
- <tt>{}</tt>
1704
-
1705
- </span>
1706
-
1707
- &mdash; <div class='inline'>
1708
- <p>Custom column parsers for advance data extraction.</p>
1709
- </div>
1710
-
1711
- </li>
1712
-
1713
- <li>
1714
- <span class="name">:ignore_text_nodes</span>
1715
- <span class="type">(<tt>Boolean</tt>)</span>
1716
- <span class="default">
1717
-
1718
- &mdash; default:
1719
- <tt>true</tt>
1720
-
1721
- </span>
1722
-
1723
- &mdash; <div class='inline'>
1724
- <p>Ignore text nodes when retriving cells and rows.</p>
1725
- </div>
1726
-
1727
- </li>
1728
-
1729
- </ul>
1730
-
1731
-
1732
- <p class="tag_title">Yield Parameters:</p>
1733
- <ul class="yieldparam">
1734
-
1735
- <li>
1736
-
1737
- <span class='name'>data</span>
1738
-
1739
-
1740
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Object}</tt>)</span>
1741
-
1742
-
1743
-
1744
- &mdash;
1745
- <div class='inline'>
1746
- <p>Parsed content row data.</p>
1747
- </div>
1748
-
1749
- </li>
1750
-
1751
- <li>
1752
-
1753
- <span class='name'>row</span>
1754
-
1755
-
1756
- <span class='type'>(<tt>Array</tt>)</span>
1757
-
1758
-
1759
-
1760
- &mdash;
1761
- <div class='inline'>
1762
- <p>Raw content row data.</p>
1763
- </div>
1764
-
1765
- </li>
1766
-
1767
- <li>
1768
-
1769
- <span class='name'>header_map</span>
1770
-
1771
-
1772
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Integer}</tt>)</span>
1773
-
1774
-
1775
-
1776
- &mdash;
1777
- <div class='inline'>
1778
- <p>Header map used.</p>
1779
- </div>
1780
-
1781
- </li>
1782
-
1783
- </ul>
1784
- <p class="tag_title">Yield Returns:</p>
1785
- <ul class="yieldreturn">
1786
-
1787
- <li>
1788
-
1789
-
1790
- <span class='type'>(<tt>Boolean</tt>)</span>
1791
-
1792
-
1793
-
1794
- &mdash;
1795
- <div class='inline'>
1796
- <p>`true` when valid, else `false`.</p>
1797
- </div>
1798
-
1799
- </li>
1800
-
1801
- </ul>
1802
- <p class="tag_title">Returns:</p>
1803
- <ul class="return">
1804
-
1805
- <li>
1806
-
1807
-
1808
- <span class='type'>(<tt>Hash{Symbol =&gt; Array,Hash,nil}</tt>)</span>
1809
-
1810
-
1811
-
1812
- &mdash;
1813
- <div class='inline'>
1814
- <p>Hash data is as follows:</p>
1815
- <ul><li>
1816
- <p>`[Hash] :header_map` Header map used.</p>
1817
- </li><li>
1818
- <p>`[Array&lt;Hash&gt;,nil] :data` Parsed rows data.</p>
1819
- </li></ul>
1820
- </div>
1821
-
1822
- </li>
1823
-
1824
- </ul>
1825
-
1826
- </div><table class="source_code">
1827
- <tr>
1828
- <td>
1829
- <pre class="lines">
1830
-
1831
-
1832
- 276
1833
- 277
1834
- 278
1835
- 279
1836
- 280
1837
- 281
1838
- 282
1839
- 283
1840
- 284
1841
- 285
1842
- 286
1843
- 287
1844
- 288
1845
- 289
1846
- 290
1847
- 291
1848
- 292
1849
- 293
1850
- 294
1851
- 295
1852
- 296
1853
- 297
1854
- 298
1855
- 299
1856
- 300
1857
- 301
1858
- 302
1859
- 303
1860
- 304
1861
- 305
1862
- 306
1863
- 307
1864
- 308
1865
- 309</pre>
1866
- </td>
1867
- <td>
1868
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 276</span>
1869
-
1870
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_parse_vertical_table'>parse_vertical_table</span> <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span> <span class='op'>&amp;</span><span class='id identifier rubyid_filter'>filter</span>
1871
- <span class='id identifier rubyid_opts'>opts</span> <span class='op'>=</span> <span class='lbrace'>{</span>
1872
- <span class='label'>html:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1873
- <span class='label'>row_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1874
- <span class='label'>header_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1875
- <span class='label'>header_key_label_map:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
1876
- <span class='label'>content_selector:</span> <span class='kw'>nil</span><span class='comma'>,</span>
1877
- <span class='label'>column_parsers:</span> <span class='lbrace'>{</span><span class='rbrace'>}</span><span class='comma'>,</span>
1878
- <span class='label'>ignore_text_nodes:</span> <span class='kw'>true</span>
1879
- <span class='rbrace'>}</span><span class='period'>.</span><span class='id identifier rubyid_merge'>merge</span> <span class='id identifier rubyid_opts'>opts</span>
1880
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
1881
-
1882
- <span class='comment'># Setup config
1883
- </span> <span class='id identifier rubyid_data'>data</span> <span class='op'>=</span> <span class='lbrace'>{</span><span class='rbrace'>}</span>
1884
- <span class='id identifier rubyid_dictionary'>dictionary</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_key_label_map</span><span class='rbracket'>]</span>
1885
- <span class='id identifier rubyid_column_parsers'>column_parsers</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:column_parsers</span><span class='rbracket'>]</span>
1886
-
1887
- <span class='comment'># Extract headers and content
1888
- </span> <span class='id identifier rubyid_html_rows'>html_rows</span> <span class='op'>=</span> <span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:html</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:row_selector</span><span class='rbracket'>]</span><span class='rparen'>)</span> <span class='kw'>rescue</span> <span class='kw'>nil</span>
1889
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
1890
- <span class='id identifier rubyid_html_rows'>html_rows</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_row'>row</span><span class='op'>|</span>
1891
- <span class='comment'># Parse and map column header
1892
- </span> <span class='id identifier rubyid_header_element'>header_element</span> <span class='op'>=</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:header_selector</span><span class='rbracket'>]</span><span class='rparen'>)</span>
1893
- <span class='id identifier rubyid_key'>key</span> <span class='op'>=</span> <span class='id identifier rubyid_translate_label_to_key'>translate_label_to_key</span> <span class='id identifier rubyid_header_element'>header_element</span><span class='comma'>,</span> <span class='id identifier rubyid_dictionary'>dictionary</span>
1894
- <span class='kw'>next</span> <span class='kw'>if</span> <span class='id identifier rubyid_key'>key</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>||</span> <span class='id identifier rubyid_key'>key</span> <span class='op'>==</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_end'>&#39;</span></span>
1895
-
1896
- <span class='comment'># Parse column html with default or custom parser
1897
- </span> <span class='id identifier rubyid_content_element'>content_element</span> <span class='op'>=</span> <span class='id identifier rubyid_row'>row</span><span class='period'>.</span><span class='id identifier rubyid_css'>css</span><span class='lparen'>(</span><span class='id identifier rubyid_opts'>opts</span><span class='lbracket'>[</span><span class='symbol'>:content_selector</span><span class='rbracket'>]</span><span class='rparen'>)</span>
1898
- <span class='id identifier rubyid_column_parsers'>column_parsers</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>?</span>
1899
- <span class='id identifier rubyid_default_parser'>default_parser</span><span class='lparen'>(</span><span class='id identifier rubyid_content_element'>content_element</span><span class='comma'>,</span> <span class='id identifier rubyid_data'>data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span><span class='rparen'>)</span> <span class='op'>:</span>
1900
- <span class='id identifier rubyid_column_parsers'>column_parsers</span><span class='lbracket'>[</span><span class='id identifier rubyid_key'>key</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_call'>call</span><span class='lparen'>(</span><span class='id identifier rubyid_content_element'>content_element</span><span class='comma'>,</span> <span class='id identifier rubyid_data'>data</span><span class='comma'>,</span> <span class='id identifier rubyid_key'>key</span><span class='rparen'>)</span>
1901
- <span class='kw'>end</span>
1902
- <span class='id identifier rubyid_data'>data</span>
1903
- <span class='kw'>end</span></pre>
1904
- </td>
1905
- </tr>
1906
- </table>
1907
- </div>
1908
-
1909
- <div class="method_details ">
1910
- <h3 class="signature " id="strip-class_method">
1911
-
1912
- .<strong>strip</strong>(raw_text, orig_encoding = &#39;ASCII&#39;) &#x21d2; <tt>String</tt><sup>?</sup>
1913
-
1914
-
1915
-
1916
-
1917
-
1918
- </h3><div class="docstring">
1919
- <div class="discussion">
1920
-
1921
- <p>Strip a value by trimming spaces, reducing secuential spaces into a</p>
1922
-
1923
- <pre class="code ruby"><code class="ruby">single space, decode HTML entities and change encoding to UTF-8.
1924
- </code></pre>
1925
-
1926
-
1927
- </div>
1928
- </div>
1929
- <div class="tags">
1930
- <p class="tag_title">Parameters:</p>
1931
- <ul class="param">
1932
-
1933
- <li>
1934
-
1935
- <span class='name'>raw_text</span>
1936
-
1937
-
1938
- <span class='type'>(<tt>String</tt>, <tt>Object</tt>, <tt>nil</tt>)</span>
1939
-
1940
-
1941
-
1942
- &mdash;
1943
- <div class='inline'>
1944
- <p>Text to strip.</p>
1945
- </div>
1946
-
1947
- </li>
1948
-
1949
- <li>
1950
-
1951
- <span class='name'>orig_encoding</span>
1952
-
1953
-
1954
- <span class='type'>(<tt>String</tt>)</span>
1955
-
1956
-
1957
- <em class="default">(defaults to: <tt>&#39;ASCII&#39;</tt>)</em>
1958
-
1959
-
1960
- &mdash;
1961
- <div class='inline'>
1962
- <p>Text original encoding.</p>
1963
- </div>
1964
-
1965
- </li>
1966
-
1967
- </ul>
1968
-
1969
- <p class="tag_title">Returns:</p>
1970
- <ul class="return">
1971
-
1972
- <li>
1973
-
1974
-
1975
- <span class='type'>(<tt>String</tt>, <tt>nil</tt>)</span>
1976
-
1977
-
1978
-
1979
- &mdash;
1980
- <div class='inline'>
1981
- <p>`nil` when <code>raw_text</code> is nil, else `String`.</p>
1982
- </div>
1983
-
1984
- </li>
1985
-
1986
- </ul>
1987
-
1988
- </div><table class="source_code">
1989
- <tr>
1990
- <td>
1991
- <pre class="lines">
1992
-
1993
-
1994
- 44
1995
- 45
1996
- 46
1997
- 47
1998
- 48
1999
- 49
2000
- 50
2001
- 51
2002
- 52
2003
- 53
2004
- 54
2005
- 55</pre>
2006
- </td>
2007
- <td>
2008
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 44</span>
2009
-
2010
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_strip'>strip</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='comma'>,</span> <span class='id identifier rubyid_orig_encoding'>orig_encoding</span> <span class='op'>=</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>ASCII</span><span class='tstring_end'>&#39;</span></span>
2011
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
2012
- <span class='id identifier rubyid_raw_text'>raw_text</span> <span class='op'>=</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_to_s'>to_s</span> <span class='kw'>unless</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_is_a?'>is_a?</span> <span class='const'>String</span>
2013
- <span class='id identifier rubyid_regex'>regex</span> <span class='op'>=</span> <span class='tstring'><span class='regexp_beg'>/</span><span class='tstring_content'>(\s|\u3000|\u00a0)+</span><span class='regexp_end'>/</span></span>
2014
- <span class='id identifier rubyid_good_encoding'>good_encoding</span> <span class='op'>=</span> <span class='lparen'>(</span><span class='id identifier rubyid_raw_text'>raw_text</span> <span class='op'>=~</span> <span class='tstring'><span class='regexp_beg'>/</span><span class='tstring_content'>\u3000</span><span class='regexp_end'>/</span></span> <span class='op'>||</span> <span class='kw'>true</span><span class='rparen'>)</span> <span class='kw'>rescue</span> <span class='kw'>false</span>
2015
- <span class='kw'>unless</span> <span class='id identifier rubyid_good_encoding'>good_encoding</span>
2016
- <span class='id identifier rubyid_raw_text'>raw_text</span> <span class='op'>=</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_force_encoding'>force_encoding</span><span class='lparen'>(</span><span class='id identifier rubyid_orig_encoding'>orig_encoding</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_encode'>encode</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>UTF-8</span><span class='tstring_end'>&#39;</span></span><span class='comma'>,</span> <span class='label'>invalid:</span> <span class='symbol'>:replace</span><span class='comma'>,</span> <span class='label'>undef:</span> <span class='symbol'>:replace</span><span class='rparen'>)</span>
2017
- <span class='id identifier rubyid_regex'>regex</span> <span class='op'>=</span> <span class='tstring'><span class='regexp_beg'>/</span><span class='tstring_content'>(\s|\u3000|\u00a0|\u00c2\u00a0)+</span><span class='regexp_end'>/</span></span>
2018
- <span class='kw'>end</span>
2019
- <span class='id identifier rubyid_text'>text</span> <span class='op'>=</span> <span class='id identifier rubyid_raw_text'>raw_text</span><span class='period'>.</span><span class='id identifier rubyid_gsub'>gsub</span><span class='lparen'>(</span><span class='id identifier rubyid_regex'>regex</span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'> </span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_strip'>strip</span>
2020
- <span class='id identifier rubyid_text'>text</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>?</span> <span class='kw'>nil</span> <span class='op'>:</span> <span class='id identifier rubyid_decode_html'>decode_html</span><span class='lparen'>(</span><span class='id identifier rubyid_text'>text</span><span class='rparen'>)</span>
2021
- <span class='kw'>end</span></pre>
2022
- </td>
2023
- </tr>
2024
- </table>
2025
- </div>
2026
-
2027
- <div class="method_details ">
2028
- <h3 class="signature " id="translate_label_to_key-class_method">
2029
-
2030
- .<strong>translate_label_to_key</strong>(element, label_map) &#x21d2; <tt>Symbol</tt>, <tt>String</tt>
2031
-
2032
-
2033
-
2034
-
2035
-
2036
- </h3><div class="docstring">
2037
- <div class="discussion">
2038
-
2039
- <p>Extract column label and translate it into a frienly key.</p>
2040
-
2041
-
2042
- </div>
2043
- </div>
2044
- <div class="tags">
2045
- <p class="tag_title">Parameters:</p>
2046
- <ul class="param">
2047
-
2048
- <li>
2049
-
2050
- <span class='name'>element</span>
2051
-
2052
-
2053
- <span class='type'>(<tt>Nokogiri::Element</tt>)</span>
2054
-
2055
-
2056
-
2057
- &mdash;
2058
- <div class='inline'>
2059
- <p>Html element to parse.</p>
2060
- </div>
2061
-
2062
- </li>
2063
-
2064
- <li>
2065
-
2066
- <span class='name'>label_map</span>
2067
-
2068
-
2069
- <span class='type'>(<tt>Hash{Symbol,String =&gt; Regex,String}</tt>)</span>
2070
-
2071
-
2072
-
2073
- &mdash;
2074
- <div class='inline'>
2075
- <p>Label dictionary for translation into key.</p>
2076
- </div>
2077
-
2078
- </li>
2079
-
2080
- </ul>
2081
-
2082
- <p class="tag_title">Returns:</p>
2083
- <ul class="return">
2084
-
2085
- <li>
2086
-
2087
-
2088
- <span class='type'>(<tt>Symbol</tt>, <tt>String</tt>)</span>
2089
-
2090
-
2091
-
2092
- &mdash;
2093
- <div class='inline'>
2094
- <p>Translated key.</p>
2095
- </div>
2096
-
2097
- </li>
2098
-
2099
- </ul>
2100
-
2101
- </div><table class="source_code">
2102
- <tr>
2103
- <td>
2104
- <pre class="lines">
2105
-
2106
-
2107
- 142
2108
- 143
2109
- 144
2110
- 145
2111
- 146
2112
- 147
2113
- 148
2114
- 149
2115
- 150</pre>
2116
- </td>
2117
- <td>
2118
- <pre class="code"><span class="info file"># File 'lib/ae_easy/text.rb', line 142</span>
2119
-
2120
- <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_translate_label_to_key'>translate_label_to_key</span> <span class='id identifier rubyid_element'>element</span><span class='comma'>,</span> <span class='id identifier rubyid_label_map'>label_map</span>
2121
- <span class='kw'>return</span> <span class='kw'>nil</span> <span class='kw'>if</span> <span class='id identifier rubyid_element'>element</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span>
2122
- <span class='id identifier rubyid_element'>element</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>//i</span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_remove'>remove</span> <span class='kw'>if</span> <span class='id identifier rubyid_element'>element</span><span class='period'>.</span><span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>//i</span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_count'>count</span> <span class='op'>&gt;</span> <span class='int'>0</span>
2123
- <span class='id identifier rubyid_text'>text</span> <span class='op'>=</span> <span class='id identifier rubyid_strip'>strip</span> <span class='id identifier rubyid_element'>element</span><span class='period'>.</span><span class='id identifier rubyid_text'>text</span>
2124
- <span class='id identifier rubyid_key_pair'>key_pair</span> <span class='op'>=</span> <span class='id identifier rubyid_label_map'>label_map</span><span class='period'>.</span><span class='id identifier rubyid_find'>find</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_k'>k</span><span class='comma'>,</span><span class='id identifier rubyid_v'>v</span><span class='op'>|</span>
2125
- <span class='id identifier rubyid_v'>v</span><span class='period'>.</span><span class='id identifier rubyid_is_a?'>is_a?</span><span class='lparen'>(</span><span class='const'>Regexp</span><span class='rparen'>)</span> <span class='op'>?</span> <span class='lparen'>(</span><span class='id identifier rubyid_text'>text</span> <span class='op'>=~</span> <span class='id identifier rubyid_v'>v</span><span class='rparen'>)</span> <span class='op'>:</span> <span class='lparen'>(</span><span class='id identifier rubyid_text'>text</span> <span class='op'>==</span> <span class='id identifier rubyid_v'>v</span><span class='rparen'>)</span>
2126
- <span class='kw'>end</span>
2127
- <span class='id identifier rubyid_key'>key</span> <span class='op'>=</span> <span class='id identifier rubyid_key_pair'>key_pair</span><span class='period'>.</span><span class='id identifier rubyid_nil?'>nil?</span> <span class='op'>?</span> <span class='kw'>nil</span> <span class='op'>:</span> <span class='id identifier rubyid_key_pair'>key_pair</span><span class='lbracket'>[</span><span class='int'>0</span><span class='rbracket'>]</span>
2128
- <span class='kw'>end</span></pre>
2129
- </td>
2130
- </tr>
2131
- </table>
2132
- </div>
2133
-
2134
- </div>
2135
-
2136
- </div>
2137
-
2138
- <div id="footer">
2139
- Generated on Mon Mar 11 21:38:55 2019 by
2140
- <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
2141
- 0.9.18 (ruby-2.5.3).
2142
- </div>
2143
-
2144
- </div>
2145
- </body>
2146
- </html>