yarss 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/yarss/atom/feed_parser.rb +1 -1
- data/lib/yarss/atom/item_parser.rb +13 -5
- data/lib/yarss/attribute.rb +26 -2
- data/lib/yarss/rdf/feed_parser.rb +1 -1
- data/lib/yarss/rdf/item_parser.rb +12 -4
- data/lib/yarss/rss/feed_parser.rb +1 -1
- data/lib/yarss/rss/item_parser.rb +21 -8
- data/lib/yarss/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: df8227811b0fa37c7605e53cbe37106a67fd4f5f
|
4
|
+
data.tar.gz: 239430b572285f2db67d39c3bc29ca743118eee3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 01c7ee7bb21d06cf598ad31f9b80980487ad52e62e3380c6125c2bc14204855faed438347b5109e2a272bcba3128ee620ca1b428e2a05bdf74f2dff8655a4e04
|
7
|
+
data.tar.gz: 318a7f1a2d7589e1e2ed8a887ddc15ff31126c4ce2ac8e9c49ca45f66957887241efa9a8304958bdf3617cc05dc054b99cb1bd68d5858893ae313a46112825f3
|
@@ -78,7 +78,7 @@ module Yarss
|
|
78
78
|
def items
|
79
79
|
items = feed.fetch('entry')
|
80
80
|
items = [items] unless items.is_a?(Array)
|
81
|
-
items.map { |d| ItemParser.new(d).parse }
|
81
|
+
items.map { |d| ItemParser.new(d, feed_link: link).parse }
|
82
82
|
rescue KeyError => e
|
83
83
|
raise ParseError, e
|
84
84
|
end
|
@@ -11,9 +11,16 @@ module Yarss
|
|
11
11
|
# @return [Hash]
|
12
12
|
attr_accessor :data
|
13
13
|
|
14
|
-
#
|
15
|
-
|
16
|
-
|
14
|
+
# Feed link URL.
|
15
|
+
#
|
16
|
+
# @return [String]
|
17
|
+
attr_accessor :feed_link
|
18
|
+
|
19
|
+
# @param data [Hash] Parsed Atom feed item.
|
20
|
+
# @param feed_link [String] Feed link URL.
|
21
|
+
def initialize(data, feed_link: '')
|
22
|
+
self.data = data
|
23
|
+
self.feed_link = feed_link
|
17
24
|
end
|
18
25
|
|
19
26
|
# Parse out the feed item id, title, updated, link and content and wrap
|
@@ -92,8 +99,9 @@ module Yarss
|
|
92
99
|
def content
|
93
100
|
summary = Attribute.value(data['summary'] || '')
|
94
101
|
content = Attribute.value(data['content'] || '')
|
95
|
-
|
96
|
-
|
102
|
+
content = summary if content.empty?
|
103
|
+
|
104
|
+
Attribute.absolutize_urls(content, feed_link)
|
97
105
|
end
|
98
106
|
end
|
99
107
|
end
|
data/lib/yarss/attribute.rb
CHANGED
@@ -79,8 +79,8 @@ module Yarss
|
|
79
79
|
link_value(value.fetch('href'))
|
80
80
|
when Array
|
81
81
|
item = value.find { |l| l.is_a?(String) } ||
|
82
|
-
value.find { |l| l['rel'] && l['rel'] == '
|
83
|
-
value.find { |l| l['rel']
|
82
|
+
value.find { |l| l['rel'] && l['rel'] == 'alternate' } ||
|
83
|
+
value.find { |l| l['rel'].nil? }
|
84
84
|
raise KeyError unless item
|
85
85
|
link_value(item)
|
86
86
|
when String
|
@@ -91,5 +91,29 @@ module Yarss
|
|
91
91
|
rescue KeyError => e
|
92
92
|
raise ParseError, e
|
93
93
|
end
|
94
|
+
|
95
|
+
# Make relative URLs absolute.
|
96
|
+
#
|
97
|
+
# @param content [String] Item content.
|
98
|
+
# @param base [String] Base URL.
|
99
|
+
#
|
100
|
+
# @return [String]
|
101
|
+
def self.absolutize_urls(content, base)
|
102
|
+
return content if base.empty? || content.empty?
|
103
|
+
|
104
|
+
regex = %r{
|
105
|
+
(?<=src="|href="|src='|href=')
|
106
|
+
/
|
107
|
+
([^/"'].+?)? # Don't match "//xx" but do match "/".
|
108
|
+
(?="|')
|
109
|
+
}x
|
110
|
+
|
111
|
+
content = content.gsub(regex) do |url|
|
112
|
+
"#{base.chomp('/')}#{url}"
|
113
|
+
end
|
114
|
+
content.gsub!("\r\n", "\n")
|
115
|
+
|
116
|
+
content.freeze
|
117
|
+
end
|
94
118
|
end
|
95
119
|
end
|
@@ -101,7 +101,7 @@ module Yarss
|
|
101
101
|
def items
|
102
102
|
items = feed.fetch('item')
|
103
103
|
items = [items] unless items.is_a?(Array)
|
104
|
-
items.map { |d| ItemParser.new(d).parse }
|
104
|
+
items.map { |d| ItemParser.new(d, feed_link: link).parse }
|
105
105
|
rescue KeyError => e
|
106
106
|
raise ParseError, e
|
107
107
|
end
|
@@ -9,9 +9,16 @@ module Yarss
|
|
9
9
|
# @return [Hash]
|
10
10
|
attr_accessor :data
|
11
11
|
|
12
|
-
#
|
13
|
-
|
14
|
-
|
12
|
+
# Feed link URL.
|
13
|
+
#
|
14
|
+
# @return [String]
|
15
|
+
attr_accessor :feed_link
|
16
|
+
|
17
|
+
# @param data [Hash] Parsed Rdf feed item.
|
18
|
+
# @param feed_link [String] Feed link URL.
|
19
|
+
def initialize(data, feed_link: '')
|
20
|
+
self.data = data
|
21
|
+
self.feed_link = feed_link
|
15
22
|
end
|
16
23
|
|
17
24
|
# Parse out the feed item id, title, updated, link and content and wrap
|
@@ -105,7 +112,8 @@ module Yarss
|
|
105
112
|
#
|
106
113
|
# @return [String]
|
107
114
|
def description
|
108
|
-
Attribute.value(data.fetch('description'))
|
115
|
+
description = Attribute.value(data.fetch('description'))
|
116
|
+
Attribute.absolutize_urls(description, feed_link)
|
109
117
|
rescue KeyError => e
|
110
118
|
raise ParseError, e
|
111
119
|
end
|
@@ -84,7 +84,7 @@ module Yarss
|
|
84
84
|
def items
|
85
85
|
items = feed.fetch('item')
|
86
86
|
items = [items] unless items.is_a?(Array)
|
87
|
-
items.map { |d| ItemParser.new(d).parse }
|
87
|
+
items.map { |d| ItemParser.new(d, feed_link: link).parse }
|
88
88
|
rescue KeyError => e
|
89
89
|
raise ParseError, e
|
90
90
|
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'digest'
|
4
|
+
|
3
5
|
module Yarss
|
4
6
|
module Rss
|
5
7
|
# Extract id, title, updated, link and content from a feed item.
|
@@ -11,9 +13,16 @@ module Yarss
|
|
11
13
|
# @return [Hash]
|
12
14
|
attr_accessor :data
|
13
15
|
|
14
|
-
#
|
15
|
-
|
16
|
-
|
16
|
+
# Feed link URL.
|
17
|
+
#
|
18
|
+
# @return [String]
|
19
|
+
attr_accessor :feed_link
|
20
|
+
|
21
|
+
# @param data [Hash] Parsed RSS feed item.
|
22
|
+
# @param feed_link [String] Feed link URL.
|
23
|
+
def initialize(data, feed_link: '')
|
24
|
+
self.data = data
|
25
|
+
self.feed_link = feed_link
|
17
26
|
end
|
18
27
|
|
19
28
|
# Parse out the feed item id, title, updated, link and content and wrap
|
@@ -102,12 +111,16 @@ module Yarss
|
|
102
111
|
#
|
103
112
|
# @return [String]
|
104
113
|
def description
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
114
|
+
content = if data['content:encoded']
|
115
|
+
data['content:encoded']
|
116
|
+
elsif data['description']
|
117
|
+
data['description']
|
118
|
+
else
|
119
|
+
''
|
120
|
+
end
|
109
121
|
|
110
|
-
|
122
|
+
content = Attribute.value(content)
|
123
|
+
Attribute.absolutize_urls(content, feed_link)
|
111
124
|
end
|
112
125
|
end
|
113
126
|
end
|
data/lib/yarss/version.rb
CHANGED