html2rss 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -1
- data/Gemfile.lock +5 -3
- data/README.md +6 -4
- data/html2rss.gemspec +1 -1
- data/lib/html2rss/attribute_post_processors/gsub.rb +3 -2
- data/lib/html2rss/attribute_post_processors/parse_uri.rb +1 -1
- data/lib/html2rss/attribute_post_processors/substring.rb +2 -2
- data/lib/html2rss/attribute_post_processors/template.rb +15 -12
- data/lib/html2rss/config.rb +34 -26
- data/lib/html2rss/feed_builder.rb +35 -32
- data/lib/html2rss/item.rb +16 -14
- data/lib/html2rss/item_extractors.rb +1 -1
- data/lib/html2rss/item_extractors/attribute.rb +1 -1
- data/lib/html2rss/item_extractors/href.rb +2 -2
- data/lib/html2rss/item_extractors/static.rb +2 -2
- data/lib/html2rss/utils.rb +12 -12
- data/lib/html2rss/version.rb +1 -1
- metadata +16 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc1a99cdc487b0367e8af1145d876b84d043cf5abe31fdf8e7662e6ac3ab72c9
|
4
|
+
data.tar.gz: 57c41b955c3d35c7681d2213518d7faca1f5fe536f08fb6715547180b31c3312
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9d47c5c10bacc651a25fa90b1054d5658c7d4a1fe803a22dbcbeda42e931acc55cbbb39b8d7eafaf948bcc497d7961c4dca7c795cd955a7b6bbfe58c6a3925f2
|
7
|
+
data.tar.gz: 8521bcc416c72c86407c99b2e60950c5b3c683ea8aa05012e0ee829e6cfc33e8320564480c99f824d2c2da7f1732d638da26589fc4a4875e0e0a75b3c4e6a1d2
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,13 @@
|
|
1
|
-
# [](https://github.com/gildesmarais/html2rss/compare/v0.8.
|
1
|
+
# [](https://github.com/gildesmarais/html2rss/compare/v0.8.2...v) (2019-11-09)
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
## [0.8.2](https://github.com/gildesmarais/html2rss/compare/v0.8.1...v0.8.2) (2019-11-09)
|
6
|
+
|
7
|
+
|
8
|
+
### Features
|
9
|
+
|
10
|
+
* improve url handling by sanitizing and normalizing urls ([#70](https://github.com/gildesmarais/html2rss/issues/70)) ([02cd551](https://github.com/gildesmarais/html2rss/commit/02cd551))
|
2
11
|
|
3
12
|
|
4
13
|
|
data/Gemfile.lock
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
html2rss (0.8.
|
4
|
+
html2rss (0.8.2)
|
5
5
|
activesupport (~> 5.0)
|
6
|
+
addressable (~> 2.7)
|
6
7
|
builder
|
7
8
|
faraday (~> 0.15)
|
8
9
|
faraday_middleware (~> 0.13)
|
9
|
-
hashie (~> 3.6)
|
10
10
|
kramdown
|
11
11
|
mime-types (> 3.0)
|
12
12
|
nokogiri (>= 1.10, < 2.0)
|
@@ -23,6 +23,8 @@ GEM
|
|
23
23
|
i18n (>= 0.7, < 2)
|
24
24
|
minitest (~> 5.1)
|
25
25
|
tzinfo (~> 1.1)
|
26
|
+
addressable (2.7.0)
|
27
|
+
public_suffix (>= 2.0.2, < 5.0)
|
26
28
|
ast (2.4.0)
|
27
29
|
builder (3.2.3)
|
28
30
|
byebug (11.0.1)
|
@@ -40,7 +42,6 @@ GEM
|
|
40
42
|
multipart-post (>= 1.2, < 3)
|
41
43
|
faraday_middleware (0.13.1)
|
42
44
|
faraday (>= 0.7.4, < 1.0)
|
43
|
-
hashie (3.6.0)
|
44
45
|
i18n (1.7.0)
|
45
46
|
concurrent-ruby (~> 1.0)
|
46
47
|
jaro_winkler (1.5.4)
|
@@ -59,6 +60,7 @@ GEM
|
|
59
60
|
parallel (1.18.0)
|
60
61
|
parser (2.6.5.0)
|
61
62
|
ast (~> 2.4.0)
|
63
|
+
public_suffix (4.0.1)
|
62
64
|
rainbow (3.0.0)
|
63
65
|
reverse_markdown (1.3.0)
|
64
66
|
nokogiri
|
data/README.md
CHANGED
@@ -61,6 +61,8 @@ The contents of both hashes are explained below.
|
|
61
61
|
| `description` | optional | String | auto-generated | |
|
62
62
|
| `ttl` | optional | Integer | `360` | TTL in _minutes_ |
|
63
63
|
| `time_zone` | optional | String | `'UTC'` | TimeZone name |
|
64
|
+
| `language` | optional | String | `'en'` | Language code |
|
65
|
+
| `author` | optional | String | | Format: `email (Name)'` |
|
64
66
|
| `headers` | optional | Hash | `{}` | Set HTTP request headers. See notes below. |
|
65
67
|
| `json` | optional | Boolean | `false` | Handle JSON response. See notes below. |
|
66
68
|
|
@@ -413,10 +415,10 @@ Use this to e.g. have Cookie or Authorization information sent or to spoof the U
|
|
413
415
|
url: 'https://example.com',
|
414
416
|
title: "Example with http headers",
|
415
417
|
headers: {
|
416
|
-
"User-Agent"
|
417
|
-
"X-Something"
|
418
|
-
"Authorization"
|
419
|
-
"Cookie"
|
418
|
+
"User-Agent": "html2rss-request",
|
419
|
+
"X-Something": "Foobar",
|
420
|
+
"Authorization": "Token deadbea7",
|
421
|
+
"Cookie": "monster=MeWantCookie"
|
420
422
|
}
|
421
423
|
},
|
422
424
|
selectors: {}
|
data/html2rss.gemspec
CHANGED
@@ -30,10 +30,10 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.require_paths = ['lib']
|
31
31
|
|
32
32
|
spec.add_dependency 'activesupport', '~> 5.0'
|
33
|
+
spec.add_dependency 'addressable', '~> 2.7'
|
33
34
|
spec.add_dependency 'builder'
|
34
35
|
spec.add_dependency 'faraday', '~> 0.15'
|
35
36
|
spec.add_dependency 'faraday_middleware', '~> 0.13'
|
36
|
-
spec.add_dependency 'hashie', '~> 3.6'
|
37
37
|
spec.add_dependency 'kramdown'
|
38
38
|
spec.add_dependency 'mime-types', '> 3.0'
|
39
39
|
spec.add_dependency 'nokogiri', '>= 1.10', '< 2.0'
|
@@ -27,8 +27,9 @@ module Html2rss
|
|
27
27
|
class Gsub
|
28
28
|
def initialize(value, env)
|
29
29
|
@value = value
|
30
|
-
|
31
|
-
@
|
30
|
+
options = env[:options]
|
31
|
+
@pattern = options[:pattern].to_regexp || options[:pattern]
|
32
|
+
@replacement = options[:replacement]
|
32
33
|
end
|
33
34
|
|
34
35
|
##
|
@@ -34,8 +34,8 @@ module Html2rss
|
|
34
34
|
##
|
35
35
|
# @return [String]
|
36
36
|
def get
|
37
|
-
ending = @options.fetch(
|
38
|
-
@value[@options[
|
37
|
+
ending = @options.fetch(:end, @value.length).to_i
|
38
|
+
@value[@options[:start].to_i..ending]
|
39
39
|
end
|
40
40
|
end
|
41
41
|
end
|
@@ -33,33 +33,36 @@ module Html2rss
|
|
33
33
|
@value = value
|
34
34
|
@options = env[:options]
|
35
35
|
@item = env[:item]
|
36
|
+
@string = @options[:string]
|
36
37
|
end
|
37
38
|
|
38
39
|
##
|
39
40
|
# @return [String]
|
40
41
|
def get
|
41
|
-
if @options[
|
42
|
-
string % methods
|
43
|
-
else
|
44
|
-
names = string.scan(/%[<|{](\w*)[>|}]/).flatten
|
45
|
-
names.uniq!
|
42
|
+
return format_string_with_methods if @options[:methods]
|
46
43
|
|
47
|
-
|
48
|
-
|
44
|
+
names = string.scan(/%[<|{](\w*)[>|}]/)
|
45
|
+
names.flatten!
|
46
|
+
names.compact!
|
47
|
+
names.map!(&:to_sym)
|
48
|
+
|
49
|
+
format(string, names.map { |name| [name, item_value(name)] }.to_h)
|
49
50
|
end
|
50
51
|
|
51
52
|
private
|
52
53
|
|
53
|
-
|
54
|
-
@options['string']
|
55
|
-
end
|
54
|
+
attr_reader :string
|
56
55
|
|
57
56
|
def methods
|
58
|
-
@methods ||= @options[
|
57
|
+
@methods ||= @options[:methods].map(&method(:item_value))
|
58
|
+
end
|
59
|
+
|
60
|
+
def format_string_with_methods
|
61
|
+
string % methods
|
59
62
|
end
|
60
63
|
|
61
64
|
def item_value(method_name)
|
62
|
-
method_name.
|
65
|
+
method_name.to_sym == :self ? @value.to_s : @item.public_send(method_name).to_s
|
63
66
|
end
|
64
67
|
end
|
65
68
|
end
|
data/lib/html2rss/config.rb
CHANGED
@@ -1,79 +1,87 @@
|
|
1
|
+
require 'active_support/core_ext/hash'
|
2
|
+
|
1
3
|
module Html2rss
|
2
4
|
##
|
3
5
|
# The Config class abstracts from the config data structure and
|
4
6
|
# provides default values.
|
5
7
|
class Config
|
6
8
|
def initialize(feed_config, global_config = {})
|
7
|
-
@global_config =
|
8
|
-
@feed_config =
|
9
|
-
@channel_config =
|
9
|
+
@global_config = global_config.deep_symbolize_keys
|
10
|
+
@feed_config = feed_config.deep_symbolize_keys
|
11
|
+
@channel_config = @feed_config.fetch(:channel, {})
|
10
12
|
end
|
11
13
|
|
12
14
|
def author
|
13
|
-
channel_config.fetch
|
15
|
+
channel_config.fetch :author, 'html2rss'
|
14
16
|
end
|
15
17
|
|
16
18
|
def ttl
|
17
|
-
channel_config.fetch
|
19
|
+
channel_config.fetch :ttl, 360
|
18
20
|
end
|
19
21
|
|
20
22
|
def title
|
21
|
-
channel_config.fetch
|
22
|
-
|
23
|
+
channel_config.fetch(:title) { generated_title }
|
24
|
+
end
|
25
|
+
|
26
|
+
def generated_title
|
27
|
+
uri = URI(url)
|
23
28
|
|
24
|
-
|
25
|
-
|
26
|
-
nicer_path.map!(&:titleize)
|
29
|
+
nicer_path = uri.path.split('/')
|
30
|
+
nicer_path.reject! { |part| part == '' }
|
27
31
|
|
28
|
-
|
29
|
-
end
|
32
|
+
nicer_path.any? ? "#{uri.host}: #{nicer_path.join(' ').titleize}" : uri.host
|
30
33
|
end
|
31
34
|
|
32
35
|
def language
|
33
|
-
channel_config.fetch
|
36
|
+
channel_config.fetch :language, 'en'
|
34
37
|
end
|
35
38
|
|
36
39
|
def description
|
37
|
-
channel_config.fetch
|
40
|
+
channel_config.fetch :description, "Latest items from #{url}."
|
38
41
|
end
|
39
42
|
|
40
43
|
def url
|
41
|
-
channel_config.dig
|
44
|
+
channel_config.dig :url
|
42
45
|
end
|
43
46
|
alias link url
|
44
47
|
|
45
48
|
def time_zone
|
46
|
-
channel_config.fetch
|
49
|
+
channel_config.fetch :time_zone, 'UTC'
|
47
50
|
end
|
48
51
|
|
49
52
|
def json?
|
50
|
-
channel_config.fetch
|
53
|
+
channel_config.fetch :json, false
|
51
54
|
end
|
52
55
|
|
53
56
|
def headers
|
54
|
-
global_config.fetch(
|
57
|
+
global_config.fetch(:headers, {}).merge(channel_config.fetch(:headers, {}))
|
55
58
|
end
|
56
59
|
|
57
60
|
def attribute_options(name)
|
58
|
-
feed_config.dig(
|
61
|
+
feed_config.dig(:selectors).fetch(name, {}).merge(channel: channel_config)
|
59
62
|
end
|
60
63
|
|
61
64
|
def attribute?(name)
|
62
|
-
attribute_names.include?(name
|
65
|
+
attribute_names.include?(name)
|
63
66
|
end
|
64
67
|
|
65
|
-
def
|
66
|
-
feed_config.dig(
|
68
|
+
def category_selectors
|
69
|
+
categories = feed_config.dig(:selectors, :categories)
|
70
|
+
return [] unless categories
|
71
|
+
|
72
|
+
categories = categories.keep_if { |category| category.to_s != '' }
|
73
|
+
categories.map!(&:to_sym)
|
74
|
+
categories.uniq!
|
75
|
+
|
76
|
+
categories
|
67
77
|
end
|
68
78
|
|
69
79
|
def selector(name)
|
70
|
-
feed_config.dig(
|
80
|
+
feed_config.dig(:selectors, name, :selector)
|
71
81
|
end
|
72
82
|
|
73
83
|
def attribute_names
|
74
|
-
@attribute_names ||= feed_config.fetch(
|
75
|
-
attrs.delete('items')
|
76
|
-
end
|
84
|
+
@attribute_names ||= feed_config.fetch(:selectors, {}).keys.tap { |attrs| attrs.delete(:items) }
|
77
85
|
end
|
78
86
|
|
79
87
|
private
|
@@ -18,55 +18,58 @@ module Html2rss
|
|
18
18
|
# @return [RSS:Rss]
|
19
19
|
def rss
|
20
20
|
RSS::Maker.make('2.0') do |maker|
|
21
|
-
add_channel(maker)
|
21
|
+
add_channel(maker.channel)
|
22
22
|
|
23
|
-
|
23
|
+
items.each { |item| add_item(item, maker.items.new_item) }
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
|
27
|
+
def self.add_categories(categories, item_maker)
|
28
|
+
categories.each { |category| item_maker.categories.new_category.content = category }
|
29
|
+
end
|
28
30
|
|
29
|
-
|
31
|
+
def self.add_enclosure_from_url(url, item_maker)
|
32
|
+
return unless url
|
30
33
|
|
31
|
-
|
32
|
-
|
33
|
-
maker.channel.public_send("#{attribute_name}=".to_sym, config.public_send(attribute_name))
|
34
|
-
end
|
34
|
+
enclosure = item_maker.enclosure
|
35
|
+
content_type = MIME::Types.type_for(File.extname(url).delete('.'))
|
35
36
|
|
36
|
-
|
37
|
-
|
37
|
+
enclosure.type = content_type.any? ? content_type.first.to_s : 'application/octet-stream'
|
38
|
+
enclosure.length = 0
|
39
|
+
enclosure.url = url
|
38
40
|
end
|
39
41
|
|
40
|
-
def
|
41
|
-
|
42
|
+
def self.add_guid(item, item_maker)
|
43
|
+
guid = item_maker.guid
|
44
|
+
guid.content = Digest::SHA1.hexdigest(item.title)
|
45
|
+
guid.isPermaLink = false
|
42
46
|
end
|
43
47
|
|
44
|
-
|
45
|
-
feed_item.available_attributes.each do |attribute_name|
|
46
|
-
rss_item.public_send("#{attribute_name}=".to_sym, feed_item.public_send(attribute_name))
|
47
|
-
end
|
48
|
+
private
|
48
49
|
|
49
|
-
|
50
|
-
add_enclosure_from_url(feed_item.enclosure_url, rss_item) if config.attribute?(:enclosure)
|
50
|
+
attr_reader :config
|
51
51
|
|
52
|
-
|
53
|
-
|
52
|
+
def add_channel(channel_maker)
|
53
|
+
%i[language author title description link ttl].each do |attribute_name|
|
54
|
+
channel_maker.public_send("#{attribute_name}=", config.public_send(attribute_name))
|
55
|
+
end
|
54
56
|
|
55
|
-
|
56
|
-
|
57
|
+
channel_maker.generator = "html2rss V. #{::Html2rss::VERSION}"
|
58
|
+
channel_maker.lastBuildDate = Time.now
|
59
|
+
end
|
57
60
|
|
58
|
-
|
59
|
-
|
60
|
-
else
|
61
|
-
'application/octet-stream'
|
62
|
-
end
|
63
|
-
rss_item.enclosure.length = 0
|
64
|
-
rss_item.enclosure.url = url
|
61
|
+
def items
|
62
|
+
@items ||= Item.from_url(config.url, config)
|
65
63
|
end
|
66
64
|
|
67
|
-
def
|
68
|
-
|
69
|
-
|
65
|
+
def add_item(item, item_maker)
|
66
|
+
item.available_attributes.each do |attribute_name|
|
67
|
+
item_maker.public_send("#{attribute_name}=", item.public_send(attribute_name))
|
68
|
+
end
|
69
|
+
|
70
|
+
self.class.add_categories(item.categories, item_maker)
|
71
|
+
self.class.add_enclosure_from_url(item.enclosure_url, item_maker) if item.enclosure?
|
72
|
+
self.class.add_guid(item, item_maker)
|
70
73
|
end
|
71
74
|
end
|
72
75
|
end
|
data/lib/html2rss/item.rb
CHANGED
@@ -23,15 +23,15 @@ module Html2rss
|
|
23
23
|
|
24
24
|
attribute_options = config.attribute_options(method_name)
|
25
25
|
|
26
|
-
extractor = ItemExtractors.get_extractor(attribute_options[
|
26
|
+
extractor = ItemExtractors.get_extractor(attribute_options[:extractor])
|
27
27
|
value = extractor.new(xml, attribute_options).get
|
28
28
|
|
29
|
-
post_process(value, attribute_options.fetch(
|
29
|
+
post_process(value, attribute_options.fetch(:post_process, false))
|
30
30
|
end
|
31
31
|
|
32
32
|
def available_attributes
|
33
|
-
@available_attributes ||= (%
|
34
|
-
@config.attribute_names) - %
|
33
|
+
@available_attributes ||= (%i[title link description author comments updated] &
|
34
|
+
@config.attribute_names) - %i[categories enclosure]
|
35
35
|
end
|
36
36
|
|
37
37
|
##
|
@@ -45,17 +45,17 @@ module Html2rss
|
|
45
45
|
##
|
46
46
|
# @return [Array]
|
47
47
|
def categories
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
48
|
+
config.category_selectors.map(&method(:method_missing))
|
49
|
+
end
|
50
|
+
|
51
|
+
def enclosure?
|
52
|
+
config.attribute?(:enclosure)
|
52
53
|
end
|
53
54
|
|
54
55
|
def enclosure_url
|
55
|
-
enclosure = method_missing(:enclosure)
|
56
|
-
return if enclosure.to_s == ''
|
56
|
+
enclosure = Html2rss::Utils.sanitize_url(method_missing(:enclosure))
|
57
57
|
|
58
|
-
Html2rss::Utils.build_absolute_url_from_relative(enclosure, config.url).to_s
|
58
|
+
Html2rss::Utils.build_absolute_url_from_relative(enclosure, config.url).to_s if enclosure
|
59
59
|
end
|
60
60
|
|
61
61
|
##
|
@@ -63,7 +63,9 @@ module Html2rss
|
|
63
63
|
def self.from_url(url, config)
|
64
64
|
body = get_body_from_url(url, config)
|
65
65
|
|
66
|
-
Nokogiri.HTML(body).css(config.selector(
|
66
|
+
Nokogiri.HTML(body).css(config.selector(:items))
|
67
|
+
.map { |xml_item| new xml_item, config }
|
68
|
+
.keep_if(&:valid?)
|
67
69
|
end
|
68
70
|
|
69
71
|
private
|
@@ -74,7 +76,7 @@ module Html2rss
|
|
74
76
|
faraday.adapter Faraday.default_adapter
|
75
77
|
end.get.body
|
76
78
|
|
77
|
-
config.json? ? Html2rss::Utils.
|
79
|
+
config.json? ? Html2rss::Utils.object_to_xml(JSON.parse(body)) : body
|
78
80
|
end
|
79
81
|
private_class_method :get_body_from_url
|
80
82
|
|
@@ -84,7 +86,7 @@ module Html2rss
|
|
84
86
|
return value unless post_process_options
|
85
87
|
|
86
88
|
[post_process_options].flatten.each do |options|
|
87
|
-
value = AttributePostProcessors.get_processor(options[
|
89
|
+
value = AttributePostProcessors.get_processor(options[:name])
|
88
90
|
.new(value, options: options, item: self, config: @config)
|
89
91
|
.get
|
90
92
|
end
|
@@ -24,12 +24,12 @@ module Html2rss
|
|
24
24
|
def initialize(xml, options)
|
25
25
|
@options = options
|
26
26
|
element = ItemExtractors.element(xml, options)
|
27
|
-
@href = element.attr('href')
|
27
|
+
@href = Html2rss::Utils.sanitize_url(element.attr('href'))
|
28
28
|
end
|
29
29
|
|
30
30
|
# @return [URI::HTTPS, URI::HTTP]
|
31
31
|
def get
|
32
|
-
Html2rss::Utils.build_absolute_url_from_relative(@href, @options[
|
32
|
+
Html2rss::Utils.build_absolute_url_from_relative(@href, @options[:channel][:url])
|
33
33
|
end
|
34
34
|
end
|
35
35
|
end
|
@@ -15,13 +15,13 @@ module Html2rss
|
|
15
15
|
@options = options
|
16
16
|
end
|
17
17
|
|
18
|
-
# Returns what options[
|
18
|
+
# Returns what options[:static] holds.
|
19
19
|
#
|
20
20
|
# options = { static: 'Foobar' }
|
21
21
|
# Static.new(xml, options).get
|
22
22
|
# # => 'Foobar'
|
23
23
|
def get
|
24
|
-
@options[
|
24
|
+
@options[:static]
|
25
25
|
end
|
26
26
|
end
|
27
27
|
end
|
data/lib/html2rss/utils.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'active_support/core_ext/hash'
|
2
|
+
require 'addressable/uri'
|
2
3
|
require 'builder'
|
3
|
-
require 'hashie'
|
4
4
|
require 'json'
|
5
5
|
require 'nokogiri'
|
6
6
|
|
@@ -8,27 +8,20 @@ module Html2rss
|
|
8
8
|
##
|
9
9
|
# The collecting tank for utility methods.
|
10
10
|
module Utils
|
11
|
-
|
12
|
-
# A Hash with indifferent access, build with {https://github.com/intridea/hashie Hashie}.
|
13
|
-
class IndifferentAccessHash < Hash
|
14
|
-
include Hashie::Extensions::MergeInitializer
|
15
|
-
include Hashie::Extensions::IndifferentAccess
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.build_absolute_url_from_relative(url, channel_url)
|
11
|
+
def self.build_absolute_url_from_relative(url, base_url)
|
19
12
|
url = URI(url) if url.is_a?(String)
|
20
13
|
|
21
14
|
return url if url.absolute?
|
22
15
|
|
23
|
-
URI(
|
16
|
+
URI(base_url).tap do |uri|
|
24
17
|
uri.path = url.path.to_s.start_with?('/') ? url.path : "/#{url.path}"
|
25
18
|
uri.query = url.query
|
26
19
|
uri.fragment = url.fragment if url.fragment
|
27
20
|
end
|
28
21
|
end
|
29
22
|
|
30
|
-
def self.
|
31
|
-
|
23
|
+
def self.object_to_xml(object)
|
24
|
+
object.to_xml(skip_instruct: true, skip_types: true)
|
32
25
|
end
|
33
26
|
|
34
27
|
def self.get_class_from_name(snake_cased_name, module_name)
|
@@ -36,5 +29,12 @@ module Html2rss
|
|
36
29
|
class_name = ['Html2rss', module_name, camel_cased_name].join('::')
|
37
30
|
Object.const_get(class_name)
|
38
31
|
end
|
32
|
+
|
33
|
+
def self.sanitize_url(url)
|
34
|
+
squished_url = url.to_s.split(' ').join
|
35
|
+
return if squished_url.to_s == ''
|
36
|
+
|
37
|
+
Addressable::URI.parse(squished_url).normalize.to_s
|
38
|
+
end
|
39
39
|
end
|
40
40
|
end
|
data/lib/html2rss/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2rss
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gil Desmarais
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-11-
|
11
|
+
date: 2019-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '5.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: addressable
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.7'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.7'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: builder
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,20 +80,6 @@ dependencies:
|
|
66
80
|
- - "~>"
|
67
81
|
- !ruby/object:Gem::Version
|
68
82
|
version: '0.13'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: hashie
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - "~>"
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '3.6'
|
76
|
-
type: :runtime
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - "~>"
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '3.6'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: kramdown
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|