html2rss 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +18 -11
  3. data/.travis.yml +3 -3
  4. data/.yardopts +6 -0
  5. data/Gemfile.lock +23 -5
  6. data/README.md +2 -1
  7. data/docs/Html2rss.html +353 -0
  8. data/docs/Html2rss/AttributePostProcessors.html +203 -0
  9. data/docs/Html2rss/AttributePostProcessors/ParseTime.html +332 -0
  10. data/docs/Html2rss/AttributePostProcessors/ParseUri.html +314 -0
  11. data/docs/Html2rss/AttributePostProcessors/SanitizeHtml.html +346 -0
  12. data/docs/Html2rss/AttributePostProcessors/Substring.html +321 -0
  13. data/docs/Html2rss/AttributePostProcessors/Template.html +336 -0
  14. data/docs/Html2rss/Config.html +795 -0
  15. data/docs/Html2rss/FeedBuilder.html +295 -0
  16. data/docs/Html2rss/Item.html +654 -0
  17. data/docs/Html2rss/ItemExtractors.html +297 -0
  18. data/docs/Html2rss/ItemExtractors/Attribute.html +317 -0
  19. data/docs/Html2rss/ItemExtractors/CurrentTime.html +297 -0
  20. data/docs/Html2rss/ItemExtractors/Href.html +319 -0
  21. data/docs/Html2rss/ItemExtractors/Html.html +314 -0
  22. data/docs/Html2rss/ItemExtractors/Static.html +301 -0
  23. data/docs/Html2rss/ItemExtractors/Text.html +312 -0
  24. data/docs/Html2rss/Utils.html +115 -0
  25. data/docs/Html2rss/Utils/IndifferentAccessHash.html +142 -0
  26. data/docs/_index.html +300 -0
  27. data/docs/class_list.html +51 -0
  28. data/docs/css/common.css +1 -0
  29. data/docs/css/full_list.css +58 -0
  30. data/docs/css/style.css +496 -0
  31. data/docs/file.README.html +135 -0
  32. data/docs/file_list.html +56 -0
  33. data/docs/frames.html +17 -0
  34. data/docs/index.html +135 -0
  35. data/docs/js/app.js +303 -0
  36. data/docs/js/full_list.js +216 -0
  37. data/docs/js/jquery.js +4 -0
  38. data/docs/method_list.html +435 -0
  39. data/docs/top-level-namespace.html +110 -0
  40. data/html2rss.gemspec +3 -0
  41. data/lib/html2rss.rb +19 -4
  42. data/lib/html2rss/attribute_post_processors.rb +5 -3
  43. data/lib/html2rss/attribute_post_processors/parse_time.rb +29 -3
  44. data/lib/html2rss/attribute_post_processors/parse_uri.rb +20 -1
  45. data/lib/html2rss/attribute_post_processors/sanitize_html.rb +65 -3
  46. data/lib/html2rss/attribute_post_processors/substring.rb +24 -3
  47. data/lib/html2rss/attribute_post_processors/template.rb +37 -10
  48. data/lib/html2rss/config.rb +11 -12
  49. data/lib/html2rss/feed_builder.rb +8 -6
  50. data/lib/html2rss/item.rb +28 -19
  51. data/lib/html2rss/item_extractors.rb +29 -0
  52. data/lib/html2rss/item_extractors/attribute.rb +37 -0
  53. data/lib/html2rss/item_extractors/current_time.rb +21 -0
  54. data/lib/html2rss/item_extractors/href.rb +36 -0
  55. data/lib/html2rss/item_extractors/html.rb +34 -0
  56. data/lib/html2rss/item_extractors/static.rb +28 -0
  57. data/lib/html2rss/item_extractors/text.rb +32 -0
  58. data/lib/html2rss/utils.rb +25 -0
  59. data/lib/html2rss/version.rb +1 -1
  60. metadata +88 -4
  61. data/lib/html2rss/item_extractor.rb +0 -37
@@ -0,0 +1,203 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>
7
+ Module: Html2rss::AttributePostProcessors
8
+
9
+ &mdash; Documentation by YARD 0.9.20
10
+
11
+ </title>
12
+
13
+ <link rel="stylesheet" href="../css/style.css" type="text/css" charset="utf-8" />
14
+
15
+ <link rel="stylesheet" href="../css/common.css" type="text/css" charset="utf-8" />
16
+
17
+ <script type="text/javascript" charset="utf-8">
18
+ pathId = "Html2rss::AttributePostProcessors";
19
+ relpath = '../';
20
+ </script>
21
+
22
+
23
+ <script type="text/javascript" charset="utf-8" src="../js/jquery.js"></script>
24
+
25
+ <script type="text/javascript" charset="utf-8" src="../js/app.js"></script>
26
+
27
+
28
+ </head>
29
+ <body>
30
+ <div class="nav_wrap">
31
+ <iframe id="nav" src="../class_list.html?1"></iframe>
32
+ <div id="resizer"></div>
33
+ </div>
34
+
35
+ <div id="main" tabindex="-1">
36
+ <div id="header">
37
+ <div id="menu">
38
+
39
+ <a href="../_index.html">Index (A)</a> &raquo;
40
+ <span class='title'><span class='object_link'><a href="../Html2rss.html" title="Html2rss (module)">Html2rss</a></span></span>
41
+ &raquo;
42
+ <span class="title">AttributePostProcessors</span>
43
+
44
+ </div>
45
+
46
+ <div id="search">
47
+
48
+ <a class="full_list_link" id="class_list_link"
49
+ href="../class_list.html">
50
+
51
+ <svg width="24" height="24">
52
+ <rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
53
+ <rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
54
+ <rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
55
+ </svg>
56
+ </a>
57
+
58
+ </div>
59
+ <div class="clear"></div>
60
+ </div>
61
+
62
+ <div id="content"><h1>Module: Html2rss::AttributePostProcessors
63
+
64
+
65
+
66
+ </h1>
67
+ <div class="box_info">
68
+
69
+
70
+
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+
79
+ <dl>
80
+ <dt>Defined in:</dt>
81
+ <dd>lib/html2rss/attribute_post_processors.rb<span class="defines">,<br />
82
+ lib/html2rss/attribute_post_processors/template.rb,<br /> lib/html2rss/attribute_post_processors/parse_uri.rb,<br /> lib/html2rss/attribute_post_processors/substring.rb,<br /> lib/html2rss/attribute_post_processors/parse_time.rb,<br /> lib/html2rss/attribute_post_processors/sanitize_html.rb</span>
83
+ </dd>
84
+ </dl>
85
+
86
+ </div>
87
+
88
+ <h2>Overview</h2><div class="docstring">
89
+ <div class="discussion">
90
+
91
+ <p>Provides a namespace for attribute post processors.</p>
92
+
93
+
94
+ </div>
95
+ </div>
96
+ <div class="tags">
97
+
98
+
99
+ </div><h2>Defined Under Namespace</h2>
100
+ <p class="children">
101
+
102
+
103
+
104
+
105
+ <strong class="classes">Classes:</strong> <span class='object_link'><a href="AttributePostProcessors/ParseTime.html" title="Html2rss::AttributePostProcessors::ParseTime (class)">ParseTime</a></span>, <span class='object_link'><a href="AttributePostProcessors/ParseUri.html" title="Html2rss::AttributePostProcessors::ParseUri (class)">ParseUri</a></span>, <span class='object_link'><a href="AttributePostProcessors/SanitizeHtml.html" title="Html2rss::AttributePostProcessors::SanitizeHtml (class)">SanitizeHtml</a></span>, <span class='object_link'><a href="AttributePostProcessors/Substring.html" title="Html2rss::AttributePostProcessors::Substring (class)">Substring</a></span>, <span class='object_link'><a href="AttributePostProcessors/Template.html" title="Html2rss::AttributePostProcessors::Template (class)">Template</a></span>
106
+
107
+
108
+ </p>
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+ <h2>
118
+ Class Method Summary
119
+ <small><a href="#" class="summary_toggle">collapse</a></small>
120
+ </h2>
121
+
122
+ <ul class="summary">
123
+
124
+ <li class="public ">
125
+ <span class="summary_signature">
126
+
127
+ <a href="#get_processor-class_method" title="get_processor (class method)">.<strong>get_processor</strong>(name) &#x21d2; Object </a>
128
+
129
+
130
+
131
+ </span>
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+ <span class="summary_desc"><div class='inline'></div></span>
142
+
143
+ </li>
144
+
145
+
146
+ </ul>
147
+
148
+
149
+
150
+
151
+ <div id="class_method_details" class="method_details_list">
152
+ <h2>Class Method Details</h2>
153
+
154
+
155
+ <div class="method_details first">
156
+ <h3 class="signature first" id="get_processor-class_method">
157
+
158
+ .<strong>get_processor</strong>(name) &#x21d2; <tt>Object</tt>
159
+
160
+
161
+
162
+
163
+
164
+ </h3><table class="source_code">
165
+ <tr>
166
+ <td>
167
+ <pre class="lines">
168
+
169
+
170
+ 11
171
+ 12
172
+ 13
173
+ 14
174
+ 15
175
+ 16</pre>
176
+ </td>
177
+ <td>
178
+ <pre class="code"><span class="info file"># File 'lib/html2rss/attribute_post_processors.rb', line 11</span>
179
+
180
+ <span class='kw'>def</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_get_processor'>get_processor</span><span class='lparen'>(</span><span class='id identifier rubyid_name'>name</span><span class='rparen'>)</span>
181
+ <span class='id identifier rubyid_camel_cased_name'>camel_cased_name</span> <span class='op'>=</span> <span class='id identifier rubyid_name'>name</span><span class='period'>.</span><span class='id identifier rubyid_split'>split</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>_</span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_map'>map</span><span class='lparen'>(</span><span class='op'>&amp;</span><span class='symbol'>:capitalize</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_join'>join</span>
182
+ <span class='id identifier rubyid_class_name'>class_name</span> <span class='op'>=</span> <span class='lbracket'>[</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>Html2rss</span><span class='tstring_end'>&#39;</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>AttributePostProcessors</span><span class='tstring_end'>&#39;</span></span><span class='comma'>,</span> <span class='id identifier rubyid_camel_cased_name'>camel_cased_name</span><span class='rbracket'>]</span><span class='period'>.</span><span class='id identifier rubyid_join'>join</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>::</span><span class='tstring_end'>&#39;</span></span><span class='rparen'>)</span>
183
+
184
+ <span class='const'>Object</span><span class='period'>.</span><span class='id identifier rubyid_const_get'>const_get</span><span class='lparen'>(</span><span class='id identifier rubyid_class_name'>class_name</span><span class='rparen'>)</span>
185
+ <span class='kw'>end</span></pre>
186
+ </td>
187
+ </tr>
188
+ </table>
189
+ </div>
190
+
191
+ </div>
192
+
193
+ </div>
194
+
195
+ <div id="footer">
196
+ Generated on Sun Jul 14 19:35:05 2019 by
197
+ <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
198
+ 0.9.20 (ruby-2.6.3).
199
+ </div>
200
+
201
+ </div>
202
+ </body>
203
+ </html>
@@ -0,0 +1,332 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>
7
+ Class: Html2rss::AttributePostProcessors::ParseTime
8
+
9
+ &mdash; Documentation by YARD 0.9.20
10
+
11
+ </title>
12
+
13
+ <link rel="stylesheet" href="../../css/style.css" type="text/css" charset="utf-8" />
14
+
15
+ <link rel="stylesheet" href="../../css/common.css" type="text/css" charset="utf-8" />
16
+
17
+ <script type="text/javascript" charset="utf-8">
18
+ pathId = "Html2rss::AttributePostProcessors::ParseTime";
19
+ relpath = '../../';
20
+ </script>
21
+
22
+
23
+ <script type="text/javascript" charset="utf-8" src="../../js/jquery.js"></script>
24
+
25
+ <script type="text/javascript" charset="utf-8" src="../../js/app.js"></script>
26
+
27
+
28
+ </head>
29
+ <body>
30
+ <div class="nav_wrap">
31
+ <iframe id="nav" src="../../class_list.html?1"></iframe>
32
+ <div id="resizer"></div>
33
+ </div>
34
+
35
+ <div id="main" tabindex="-1">
36
+ <div id="header">
37
+ <div id="menu">
38
+
39
+ <a href="../../_index.html">Index (P)</a> &raquo;
40
+ <span class='title'><span class='object_link'><a href="../../Html2rss.html" title="Html2rss (module)">Html2rss</a></span></span> &raquo; <span class='title'><span class='object_link'><a href="../AttributePostProcessors.html" title="Html2rss::AttributePostProcessors (module)">AttributePostProcessors</a></span></span>
41
+ &raquo;
42
+ <span class="title">ParseTime</span>
43
+
44
+ </div>
45
+
46
+ <div id="search">
47
+
48
+ <a class="full_list_link" id="class_list_link"
49
+ href="../../class_list.html">
50
+
51
+ <svg width="24" height="24">
52
+ <rect x="0" y="4" width="24" height="4" rx="1" ry="1"></rect>
53
+ <rect x="0" y="12" width="24" height="4" rx="1" ry="1"></rect>
54
+ <rect x="0" y="20" width="24" height="4" rx="1" ry="1"></rect>
55
+ </svg>
56
+ </a>
57
+
58
+ </div>
59
+ <div class="clear"></div>
60
+ </div>
61
+
62
+ <div id="content"><h1>Class: Html2rss::AttributePostProcessors::ParseTime
63
+
64
+
65
+
66
+ </h1>
67
+ <div class="box_info">
68
+
69
+ <dl>
70
+ <dt>Inherits:</dt>
71
+ <dd>
72
+ <span class="inheritName">Object</span>
73
+
74
+ <ul class="fullTree">
75
+ <li>Object</li>
76
+
77
+ <li class="next">Html2rss::AttributePostProcessors::ParseTime</li>
78
+
79
+ </ul>
80
+ <a href="#" class="inheritanceTree">show all</a>
81
+
82
+ </dd>
83
+ </dl>
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+ <dl>
96
+ <dt>Defined in:</dt>
97
+ <dd>lib/html2rss/attribute_post_processors/parse_time.rb</dd>
98
+ </dl>
99
+
100
+ </div>
101
+
102
+ <h2>Overview</h2><div class="docstring">
103
+ <div class="discussion">
104
+
105
+ <p>Returns the <a href="https://www.w3.org/Protocols/rfc822" target="_parent" title="/ RFC822">/ RFC822</a> representation of a time.</p>
106
+
107
+ <p>Imagine this HTML structure:</p>
108
+
109
+ <pre class="code ruby"><code class="ruby">&lt;p&gt;Published on &lt;span&gt;2019-07-02&lt;/span&gt;&lt;/p&gt;
110
+ </code></pre>
111
+
112
+ <p>YAML usage example:</p>
113
+
114
+ <pre class="code ruby"><code class="ruby">selectors:
115
+ description:
116
+ selector: span
117
+ post_process:
118
+ name: &#39;parse_time&#39;
119
+ </code></pre>
120
+
121
+ <p>Would return:</p>
122
+
123
+ <pre class="code ruby"><code class="ruby"><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>Tue, 02 Jul 2019 00:00:00 +0200</span><span class='tstring_end'>&quot;</span></span>
124
+ </code></pre>
125
+
126
+ <p>It uses <a href="https://ruby-doc.org/stdlib-2.5.3/libdoc/time/rdoc/Time.html#method-c-parse" target="_parent" title="Time.parse">Time.parse</a>. As of now it ignores time zones and always falls back to the UTC time zone.</p>
127
+
128
+
129
+ </div>
130
+ </div>
131
+ <div class="tags">
132
+
133
+
134
+ </div>
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+ <h2>
143
+ Instance Method Summary
144
+ <small><a href="#" class="summary_toggle">collapse</a></small>
145
+ </h2>
146
+
147
+ <ul class="summary">
148
+
149
+ <li class="public ">
150
+ <span class="summary_signature">
151
+
152
+ <a href="#get-instance_method" title="#get (instance method)">#<strong>get</strong> &#x21d2; String </a>
153
+
154
+
155
+
156
+ </span>
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+
166
+ <span class="summary_desc"><div class='inline'>
167
+ <p>Rfc822 formatted time.</p>
168
+ </div></span>
169
+
170
+ </li>
171
+
172
+
173
+ <li class="public ">
174
+ <span class="summary_signature">
175
+
176
+ <a href="#initialize-instance_method" title="#initialize (instance method)">#<strong>initialize</strong>(value, _options, _item) &#x21d2; ParseTime </a>
177
+
178
+
179
+
180
+ </span>
181
+
182
+
183
+ <span class="note title constructor">constructor</span>
184
+
185
+
186
+
187
+
188
+
189
+
190
+
191
+
192
+ <span class="summary_desc"><div class='inline'>
193
+ <p>A new instance of ParseTime.</p>
194
+ </div></span>
195
+
196
+ </li>
197
+
198
+
199
+ </ul>
200
+
201
+
202
+ <div id="constructor_details" class="method_details_list">
203
+ <h2>Constructor Details</h2>
204
+
205
+ <div class="method_details first">
206
+ <h3 class="signature first" id="initialize-instance_method">
207
+
208
+ #<strong>initialize</strong>(value, _options, _item) &#x21d2; <tt><span class='object_link'><a href="" title="Html2rss::AttributePostProcessors::ParseTime (class)">ParseTime</a></span></tt>
209
+
210
+
211
+
212
+
213
+
214
+ </h3><div class="docstring">
215
+ <div class="discussion">
216
+
217
+ <p>Returns a new instance of ParseTime</p>
218
+
219
+
220
+ </div>
221
+ </div>
222
+ <div class="tags">
223
+
224
+
225
+ </div><table class="source_code">
226
+ <tr>
227
+ <td>
228
+ <pre class="lines">
229
+
230
+
231
+ 24
232
+ 25
233
+ 26</pre>
234
+ </td>
235
+ <td>
236
+ <pre class="code"><span class="info file"># File 'lib/html2rss/attribute_post_processors/parse_time.rb', line 24</span>
237
+
238
+ <span class='kw'>def</span> <span class='id identifier rubyid_initialize'>initialize</span><span class='lparen'>(</span><span class='id identifier rubyid_value'>value</span><span class='comma'>,</span> <span class='id identifier rubyid__options'>_options</span><span class='comma'>,</span> <span class='id identifier rubyid__item'>_item</span><span class='rparen'>)</span>
239
+ <span class='ivar'>@value</span> <span class='op'>=</span> <span class='id identifier rubyid_value'>value</span><span class='period'>.</span><span class='id identifier rubyid_to_s'>to_s</span>
240
+ <span class='kw'>end</span></pre>
241
+ </td>
242
+ </tr>
243
+ </table>
244
+ </div>
245
+
246
+ </div>
247
+
248
+
249
+ <div id="instance_method_details" class="method_details_list">
250
+ <h2>Instance Method Details</h2>
251
+
252
+
253
+ <div class="method_details first">
254
+ <h3 class="signature first" id="get-instance_method">
255
+
256
+ #<strong>get</strong> &#x21d2; <tt>String</tt>
257
+
258
+
259
+
260
+
261
+
262
+ </h3><div class="docstring">
263
+ <div class="discussion">
264
+
265
+ <p>Returns rfc822 formatted time</p>
266
+
267
+
268
+ </div>
269
+ </div>
270
+ <div class="tags">
271
+
272
+ <p class="tag_title">Returns:</p>
273
+ <ul class="return">
274
+
275
+ <li>
276
+
277
+
278
+ <span class='type'>(<tt>String</tt>)</span>
279
+
280
+
281
+
282
+ &mdash;
283
+ <div class='inline'>
284
+ <p>rfc822 formatted time</p>
285
+ </div>
286
+
287
+ </li>
288
+
289
+ </ul>
290
+
291
+ </div><table class="source_code">
292
+ <tr>
293
+ <td>
294
+ <pre class="lines">
295
+
296
+
297
+ 30
298
+ 31
299
+ 32
300
+ 33
301
+ 34
302
+ 35
303
+ 36</pre>
304
+ </td>
305
+ <td>
306
+ <pre class="code"><span class="info file"># File 'lib/html2rss/attribute_post_processors/parse_time.rb', line 30</span>
307
+
308
+ <span class='kw'>def</span> <span class='id identifier rubyid_get'>get</span>
309
+ <span class='id identifier rubyid_prev_tz'>prev_tz</span> <span class='op'>=</span> <span class='const'>ENV</span><span class='lbracket'>[</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>TZ</span><span class='tstring_end'>&#39;</span></span><span class='rbracket'>]</span>
310
+ <span class='const'>ENV</span><span class='lbracket'>[</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>TZ</span><span class='tstring_end'>&#39;</span></span><span class='rbracket'>]</span> <span class='op'>=</span> <span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>UTC</span><span class='tstring_end'>&#39;</span></span>
311
+ <span class='const'>Time</span><span class='period'>.</span><span class='id identifier rubyid_parse'>parse</span><span class='lparen'>(</span><span class='ivar'>@value</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_rfc822'>rfc822</span>
312
+ <span class='kw'>ensure</span>
313
+ <span class='const'>ENV</span><span class='lbracket'>[</span><span class='tstring'><span class='tstring_beg'>&#39;</span><span class='tstring_content'>TZ</span><span class='tstring_end'>&#39;</span></span><span class='rbracket'>]</span> <span class='op'>=</span> <span class='id identifier rubyid_prev_tz'>prev_tz</span>
314
+ <span class='kw'>end</span></pre>
315
+ </td>
316
+ </tr>
317
+ </table>
318
+ </div>
319
+
320
+ </div>
321
+
322
+ </div>
323
+
324
+ <div id="footer">
325
+ Generated on Sun Jul 14 19:35:06 2019 by
326
+ <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
327
+ 0.9.20 (ruby-2.6.3).
328
+ </div>
329
+
330
+ </div>
331
+ </body>
332
+ </html>