html2rss 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +18 -11
  3. data/.travis.yml +3 -3
  4. data/.yardopts +6 -0
  5. data/Gemfile.lock +23 -5
  6. data/README.md +2 -1
  7. data/docs/Html2rss.html +353 -0
  8. data/docs/Html2rss/AttributePostProcessors.html +203 -0
  9. data/docs/Html2rss/AttributePostProcessors/ParseTime.html +332 -0
  10. data/docs/Html2rss/AttributePostProcessors/ParseUri.html +314 -0
  11. data/docs/Html2rss/AttributePostProcessors/SanitizeHtml.html +346 -0
  12. data/docs/Html2rss/AttributePostProcessors/Substring.html +321 -0
  13. data/docs/Html2rss/AttributePostProcessors/Template.html +336 -0
  14. data/docs/Html2rss/Config.html +795 -0
  15. data/docs/Html2rss/FeedBuilder.html +295 -0
  16. data/docs/Html2rss/Item.html +654 -0
  17. data/docs/Html2rss/ItemExtractors.html +297 -0
  18. data/docs/Html2rss/ItemExtractors/Attribute.html +317 -0
  19. data/docs/Html2rss/ItemExtractors/CurrentTime.html +297 -0
  20. data/docs/Html2rss/ItemExtractors/Href.html +319 -0
  21. data/docs/Html2rss/ItemExtractors/Html.html +314 -0
  22. data/docs/Html2rss/ItemExtractors/Static.html +301 -0
  23. data/docs/Html2rss/ItemExtractors/Text.html +312 -0
  24. data/docs/Html2rss/Utils.html +115 -0
  25. data/docs/Html2rss/Utils/IndifferentAccessHash.html +142 -0
  26. data/docs/_index.html +300 -0
  27. data/docs/class_list.html +51 -0
  28. data/docs/css/common.css +1 -0
  29. data/docs/css/full_list.css +58 -0
  30. data/docs/css/style.css +496 -0
  31. data/docs/file.README.html +135 -0
  32. data/docs/file_list.html +56 -0
  33. data/docs/frames.html +17 -0
  34. data/docs/index.html +135 -0
  35. data/docs/js/app.js +303 -0
  36. data/docs/js/full_list.js +216 -0
  37. data/docs/js/jquery.js +4 -0
  38. data/docs/method_list.html +435 -0
  39. data/docs/top-level-namespace.html +110 -0
  40. data/html2rss.gemspec +3 -0
  41. data/lib/html2rss.rb +19 -4
  42. data/lib/html2rss/attribute_post_processors.rb +5 -3
  43. data/lib/html2rss/attribute_post_processors/parse_time.rb +29 -3
  44. data/lib/html2rss/attribute_post_processors/parse_uri.rb +20 -1
  45. data/lib/html2rss/attribute_post_processors/sanitize_html.rb +65 -3
  46. data/lib/html2rss/attribute_post_processors/substring.rb +24 -3
  47. data/lib/html2rss/attribute_post_processors/template.rb +37 -10
  48. data/lib/html2rss/config.rb +11 -12
  49. data/lib/html2rss/feed_builder.rb +8 -6
  50. data/lib/html2rss/item.rb +28 -19
  51. data/lib/html2rss/item_extractors.rb +29 -0
  52. data/lib/html2rss/item_extractors/attribute.rb +37 -0
  53. data/lib/html2rss/item_extractors/current_time.rb +21 -0
  54. data/lib/html2rss/item_extractors/href.rb +36 -0
  55. data/lib/html2rss/item_extractors/html.rb +34 -0
  56. data/lib/html2rss/item_extractors/static.rb +28 -0
  57. data/lib/html2rss/item_extractors/text.rb +32 -0
  58. data/lib/html2rss/utils.rb +25 -0
  59. data/lib/html2rss/version.rb +1 -1
  60. metadata +88 -4
  61. data/lib/html2rss/item_extractor.rb +0 -37
@@ -0,0 +1,297 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>
7
+ Class: Html2rss::ItemExtractors::CurrentTime
8
+
9
+ &mdash; Documentation by YARD 0.9.20
10
+
11
+ </title>
12
+
13
+ <link rel="stylesheet" href="../../css/style.css" type="text/css" charset="utf-8" />
14
+
15
+ <link rel="stylesheet" href="../../css/common.css" type="text/css" charset="utf-8" />
16
+
17
+ <script type="text/javascript" charset="utf-8">
18
+ pathId = "Html2rss::ItemExtractors::CurrentTime";
19
+ relpath = '../../';
20
+ </script>
21
+
22
+
23
+ <script type="text/javascript" charset="utf-8" src="../../js/jquery.js"></script>
24
+
25
+ <script type="text/javascript" charset="utf-8" src="../../js/app.js"></script>
26
+
27
+
28
+ </head>
29
+ <body>
30
+ <div class="nav_wrap">
31
+ <iframe id="nav" src="../../class_list.html?1"></iframe>
32
+ <div id="resizer"></div>
33
+ </div>
34
+
35
+ <div id="main" tabindex="-1">
36
+ <div id="header">
37
+ <div id="menu">
38
+
39
+ <a href="../../_index.html">Index (C)</a> &raquo;
40
+ <span class='title'><span class='object_link'><a href="../../Html2rss.html" title="Html2rss (module)">Html2rss</a></span></span> &raquo; <span class='title'><span class='object_link'><a href="../ItemExtractors.html" title="Html2rss::ItemExtractors (module)">ItemExtractors</a></span></span>
41
+ &raquo;
42
+ <span class="title">CurrentTime</span>
43
+
44
+ </div>
45
+
46
+ <div id="search">
47
+
48
+ <a class="full_list_link" id="class_list_link"
49
+ href="../../class_list.html">
50
+
51
+ <svg width="24" height="24">
52
+ <rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
53
+ <rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
54
+ <rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
55
+ </svg>
56
+ </a>
57
+
58
+ </div>
59
+ <div class="clear"></div>
60
+ </div>
61
+
62
+ <div id="content"><h1>Class: Html2rss::ItemExtractors::CurrentTime
63
+
64
+
65
+
66
+ </h1>
67
+ <div class="box_info">
68
+
69
+ <dl>
70
+ <dt>Inherits:</dt>
71
+ <dd>
72
+ <span class="inheritName">Object</span>
73
+
74
+ <ul class="fullTree">
75
+ <li>Object</li>
76
+
77
+ <li class="next">Html2rss::ItemExtractors::CurrentTime</li>
78
+
79
+ </ul>
80
+ <a href="#" class="inheritanceTree">show all</a>
81
+
82
+ </dd>
83
+ </dl>
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+ <dl>
96
+ <dt>Defined in:</dt>
97
+ <dd>lib/html2rss/item_extractors/current_time.rb</dd>
98
+ </dl>
99
+
100
+ </div>
101
+
102
+ <h2>Overview</h2><div class="docstring">
103
+ <div class="discussion">
104
+
105
+ <p>Returns the current Time.</p>
106
+
107
+ <p>YAML usage example:</p>
108
+
109
+ <pre class="code ruby"><code class="ruby">selectors:
110
+ updated:
111
+ extractor: current_time
112
+ </code></pre>
113
+
114
+
115
+ </div>
116
+ </div>
117
+ <div class="tags">
118
+
119
+
120
+ </div>
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+ <h2>
129
+ Instance Method Summary
130
+ <small><a href="#" class="summary_toggle">collapse</a></small>
131
+ </h2>
132
+
133
+ <ul class="summary">
134
+
135
+ <li class="public ">
136
+ <span class="summary_signature">
137
+
138
+ <a href="#get-instance_method" title="#get (instance method)">#<strong>get</strong> &#x21d2; Time </a>
139
+
140
+
141
+
142
+ </span>
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+ <span class="summary_desc"><div class='inline'></div></span>
153
+
154
+ </li>
155
+
156
+
157
+ <li class="public ">
158
+ <span class="summary_signature">
159
+
160
+ <a href="#initialize-instance_method" title="#initialize (instance method)">#<strong>initialize</strong>(_xml, _options) &#x21d2; CurrentTime </a>
161
+
162
+
163
+
164
+ </span>
165
+
166
+
167
+ <span class="note title constructor">constructor</span>
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+ <span class="summary_desc"><div class='inline'>
177
+ <p>A new instance of CurrentTime.</p>
178
+ </div></span>
179
+
180
+ </li>
181
+
182
+
183
+ </ul>
184
+
185
+
186
+ <div id="constructor_details" class="method_details_list">
187
+ <h2>Constructor Details</h2>
188
+
189
+ <div class="method_details first">
190
+ <h3 class="signature first" id="initialize-instance_method">
191
+
192
+ #<strong>initialize</strong>(_xml, _options) &#x21d2; <tt><span class='object_link'><a href="" title="Html2rss::ItemExtractors::CurrentTime (class)">CurrentTime</a></span></tt>
193
+
194
+
195
+
196
+
197
+
198
+ </h3><div class="docstring">
199
+ <div class="discussion">
200
+
201
+ <p>Returns a new instance of CurrentTime</p>
202
+
203
+
204
+ </div>
205
+ </div>
206
+ <div class="tags">
207
+
208
+
209
+ </div><table class="source_code">
210
+ <tr>
211
+ <td>
212
+ <pre class="lines">
213
+
214
+
215
+ 12</pre>
216
+ </td>
217
+ <td>
218
+ <pre class="code"><span class="info file"># File 'lib/html2rss/item_extractors/current_time.rb', line 12</span>
219
+
220
+ <span class='kw'>def</span> <span class='id identifier rubyid_initialize'>initialize</span><span class='lparen'>(</span><span class='id identifier rubyid__xml'>_xml</span><span class='comma'>,</span> <span class='id identifier rubyid__options'>_options</span><span class='rparen'>)</span><span class='semicolon'>;</span> <span class='kw'>end</span></pre>
221
+ </td>
222
+ </tr>
223
+ </table>
224
+ </div>
225
+
226
+ </div>
227
+
228
+
229
+ <div id="instance_method_details" class="method_details_list">
230
+ <h2>Instance Method Details</h2>
231
+
232
+
233
+ <div class="method_details first">
234
+ <h3 class="signature first" id="get-instance_method">
235
+
236
+ #<strong>get</strong> &#x21d2; <tt>Time</tt>
237
+
238
+
239
+
240
+
241
+
242
+ </h3><div class="docstring">
243
+ <div class="discussion">
244
+
245
+
246
+ </div>
247
+ </div>
248
+ <div class="tags">
249
+
250
+ <p class="tag_title">Returns:</p>
251
+ <ul class="return">
252
+
253
+ <li>
254
+
255
+
256
+ <span class='type'>(<tt>Time</tt>)</span>
257
+
258
+
259
+
260
+ </li>
261
+
262
+ </ul>
263
+
264
+ </div><table class="source_code">
265
+ <tr>
266
+ <td>
267
+ <pre class="lines">
268
+
269
+
270
+ 16
271
+ 17
272
+ 18</pre>
273
+ </td>
274
+ <td>
275
+ <pre class="code"><span class="info file"># File 'lib/html2rss/item_extractors/current_time.rb', line 16</span>
276
+
277
+ <span class='kw'>def</span> <span class='id identifier rubyid_get'>get</span>
278
+ <span class='const'>Time</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span>
279
+ <span class='kw'>end</span></pre>
280
+ </td>
281
+ </tr>
282
+ </table>
283
+ </div>
284
+
285
+ </div>
286
+
287
+ </div>
288
+
289
+ <div id="footer">
290
+ Generated on Sun Jul 14 19:35:06 2019 by
291
+ <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
292
+ 0.9.20 (ruby-2.6.3).
293
+ </div>
294
+
295
+ </div>
296
+ </body>
297
+ </html>
@@ -0,0 +1,319 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>
7
+ Class: Html2rss::ItemExtractors::Href
8
+
9
+ &mdash; Documentation by YARD 0.9.20
10
+
11
+ </title>
12
+
13
+ <link rel="stylesheet" href="../../css/style.css" type="text/css" charset="utf-8" />
14
+
15
+ <link rel="stylesheet" href="../../css/common.css" type="text/css" charset="utf-8" />
16
+
17
+ <script type="text/javascript" charset="utf-8">
18
+ pathId = "Html2rss::ItemExtractors::Href";
19
+ relpath = '../../';
20
+ </script>
21
+
22
+
23
+ <script type="text/javascript" charset="utf-8" src="../../js/jquery.js"></script>
24
+
25
+ <script type="text/javascript" charset="utf-8" src="../../js/app.js"></script>
26
+
27
+
28
+ </head>
29
+ <body>
30
+ <div class="nav_wrap">
31
+ <iframe id="nav" src="../../class_list.html?1"></iframe>
32
+ <div id="resizer"></div>
33
+ </div>
34
+
35
+ <div id="main" tabindex="-1">
36
+ <div id="header">
37
+ <div id="menu">
38
+
39
+ <a href="../../_index.html">Index (H)</a> &raquo;
40
+ <span class='title'><span class='object_link'><a href="../../Html2rss.html" title="Html2rss (module)">Html2rss</a></span></span> &raquo; <span class='title'><span class='object_link'><a href="../ItemExtractors.html" title="Html2rss::ItemExtractors (module)">ItemExtractors</a></span></span>
41
+ &raquo;
42
+ <span class="title">Href</span>
43
+
44
+ </div>
45
+
46
+ <div id="search">
47
+
48
+ <a class="full_list_link" id="class_list_link"
49
+ href="../../class_list.html">
50
+
51
+ <svg width="24" height="24">
52
+ <rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
53
+ <rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
54
+ <rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
55
+ </svg>
56
+ </a>
57
+
58
+ </div>
59
+ <div class="clear"></div>
60
+ </div>
61
+
62
+ <div id="content"><h1>Class: Html2rss::ItemExtractors::Href
63
+
64
+
65
+
66
+ </h1>
67
+ <div class="box_info">
68
+
69
+ <dl>
70
+ <dt>Inherits:</dt>
71
+ <dd>
72
+ <span class="inheritName">Object</span>
73
+
74
+ <ul class="fullTree">
75
+ <li>Object</li>
76
+
77
+ <li class="next">Html2rss::ItemExtractors::Href</li>
78
+
79
+ </ul>
80
+ <a href="#" class="inheritanceTree">show all</a>
81
+
82
+ </dd>
83
+ </dl>
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+ <dl>
96
+ <dt>Defined in:</dt>
97
+ <dd>lib/html2rss/item_extractors/href.rb</dd>
98
+ </dl>
99
+
100
+ </div>
101
+
102
+ <h2>Overview</h2><div class="docstring">
103
+ <div class="discussion">
104
+
105
+ <p>Returns the value of the <code>href</code> attribute. It always returns absolute URLs. If the extracted <code>href</code> value is a relative URL, it prepends the channel&#39;s URL.</p>
106
+
107
+ <p>Imagine this <code>a</code> HTML element with a <code>href</code> attribute:</p>
108
+
109
+ <pre class="code ruby"><code class="ruby">&lt;a href=&quot;/posts/latest-findings&quot;&gt;...&lt;/a&gt;
110
+ </code></pre>
111
+
112
+ <p>YAML usage example:</p>
113
+
114
+ <pre class="code ruby"><code class="ruby">channel:
115
+ url: http://blog-without-a-feed.example.com
116
+ ...
117
+ selectors:
118
+ link:
119
+ selector: a
120
+ extractor: href
121
+ </code></pre>
122
+
123
+ <p>Would return:</p>
124
+
125
+ <pre class="code ruby"><code class="ruby"><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>http://blog-without-a-feed.example.com/posts/latest-findings</span><span class='tstring_end'>&#39;</span></span>
126
+ </code></pre>
127
+
128
+
129
+ </div>
130
+ </div>
131
+ <div class="tags">
132
+
133
+
134
+ </div>
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+ <h2>
143
+ Instance Method Summary
144
+ <small><a href="#" class="summary_toggle">collapse</a></small>
145
+ </h2>
146
+
147
+ <ul class="summary">
148
+
149
+ <li class="public ">
150
+ <span class="summary_signature">
151
+
152
+ <a href="#get-instance_method" title="#get (instance method)">#<strong>get</strong> &#x21d2; URI::HTTPS, URI::HTTP </a>
153
+
154
+
155
+
156
+ </span>
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+
166
+ <span class="summary_desc"><div class='inline'></div></span>
167
+
168
+ </li>
169
+
170
+
171
+ <li class="public ">
172
+ <span class="summary_signature">
173
+
174
+ <a href="#initialize-instance_method" title="#initialize (instance method)">#<strong>initialize</strong>(xml, options) &#x21d2; Href </a>
175
+
176
+
177
+
178
+ </span>
179
+
180
+
181
+ <span class="note title constructor">constructor</span>
182
+
183
+
184
+
185
+
186
+
187
+
188
+
189
+
190
+ <span class="summary_desc"><div class='inline'>
191
+ <p>A new instance of Href.</p>
192
+ </div></span>
193
+
194
+ </li>
195
+
196
+
197
+ </ul>
198
+
199
+
200
+ <div id="constructor_details" class="method_details_list">
201
+ <h2>Constructor Details</h2>
202
+
203
+ <div class="method_details first">
204
+ <h3 class="signature first" id="initialize-instance_method">
205
+
206
+ #<strong>initialize</strong>(xml, options) &#x21d2; <tt><span class='object_link'><a href="" title="Html2rss::ItemExtractors::Href (class)">Href</a></span></tt>
207
+
208
+
209
+
210
+
211
+
212
+ </h3><div class="docstring">
213
+ <div class="discussion">
214
+
215
+ <p>Returns a new instance of Href</p>
216
+
217
+
218
+ </div>
219
+ </div>
220
+ <div class="tags">
221
+
222
+
223
+ </div><table class="source_code">
224
+ <tr>
225
+ <td>
226
+ <pre class="lines">
227
+
228
+
229
+ 24
230
+ 25
231
+ 26
232
+ 27
233
+ 28</pre>
234
+ </td>
235
+ <td>
236
+ <pre class="code"><span class="info file"># File 'lib/html2rss/item_extractors/href.rb', line 24</span>
237
+
238
+ <span class='kw'>def</span> <span class='id identifier rubyid_initialize'>initialize</span><span class='lparen'>(</span><span class='id identifier rubyid_xml'>xml</span><span class='comma'>,</span> <span class='id identifier rubyid_options'>options</span><span class='rparen'>)</span>
239
+ <span class='ivar'>@options</span> <span class='op'>=</span> <span class='id identifier rubyid_options'>options</span>
240
+ <span class='id identifier rubyid_element'>element</span> <span class='op'>=</span> <span class='const'><span class='object_link'><a href="../ItemExtractors.html" title="Html2rss::ItemExtractors (module)">ItemExtractors</a></span></span><span class='period'>.</span><span class='id identifier rubyid_element'><span class='object_link'><a href="../ItemExtractors.html#element-class_method" title="Html2rss::ItemExtractors.element (method)">element</a></span></span><span class='lparen'>(</span><span class='id identifier rubyid_xml'>xml</span><span class='comma'>,</span> <span class='id identifier rubyid_options'>options</span><span class='rparen'>)</span>
241
+ <span class='ivar'>@href</span> <span class='op'>=</span> <span class='id identifier rubyid_element'>element</span><span class='period'>.</span><span class='id identifier rubyid_attr'>attr</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>href</span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_to_s'>to_s</span>
242
+ <span class='kw'>end</span></pre>
243
+ </td>
244
+ </tr>
245
+ </table>
246
+ </div>
247
+
248
+ </div>
249
+
250
+
251
+ <div id="instance_method_details" class="method_details_list">
252
+ <h2>Instance Method Details</h2>
253
+
254
+
255
+ <div class="method_details first">
256
+ <h3 class="signature first" id="get-instance_method">
257
+
258
+ #<strong>get</strong> &#x21d2; <tt>URI::HTTPS</tt>, <tt>URI::HTTP</tt>
259
+
260
+
261
+
262
+
263
+
264
+ </h3><div class="docstring">
265
+ <div class="discussion">
266
+
267
+
268
+ </div>
269
+ </div>
270
+ <div class="tags">
271
+
272
+ <p class="tag_title">Returns:</p>
273
+ <ul class="return">
274
+
275
+ <li>
276
+
277
+
278
+ <span class='type'>(<tt>URI::HTTPS</tt>, <tt>URI::HTTP</tt>)</span>
279
+
280
+
281
+
282
+ </li>
283
+
284
+ </ul>
285
+
286
+ </div><table class="source_code">
287
+ <tr>
288
+ <td>
289
+ <pre class="lines">
290
+
291
+
292
+ 31
293
+ 32
294
+ 33</pre>
295
+ </td>
296
+ <td>
297
+ <pre class="code"><span class="info file"># File 'lib/html2rss/item_extractors/href.rb', line 31</span>
298
+
299
+ <span class='kw'>def</span> <span class='id identifier rubyid_get'>get</span>
300
+ <span class='id identifier rubyid_href'>href</span><span class='period'>.</span><span class='id identifier rubyid_start_with?'>start_with?</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>http</span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span> <span class='op'>?</span> <span class='id identifier rubyid_absolute_url'>absolute_url</span> <span class='op'>:</span> <span class='id identifier rubyid_build_absolute_url_from_relative'>build_absolute_url_from_relative</span>
301
+ <span class='kw'>end</span></pre>
302
+ </td>
303
+ </tr>
304
+ </table>
305
+ </div>
306
+
307
+ </div>
308
+
309
+ </div>
310
+
311
+ <div id="footer">
312
+ Generated on Sun Jul 14 19:35:05 2019 by
313
+ <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
314
+ 0.9.20 (ruby-2.6.3).
315
+ </div>
316
+
317
+ </div>
318
+ </body>
319
+ </html>