feed_searcher 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +7 -3
- data/lib/feed_searcher/page.rb +8 -4
- data/lib/feed_searcher/version.rb +1 -1
- data/spec/feed_searcher_spec.rb +52 -7
- metadata +2 -8
- data/spec/fixtures/example.atom +0 -15
- data/spec/fixtures/example.html +0 -18
- data/spec/fixtures/example.rss +0 -12
data/README.md
CHANGED
@@ -13,8 +13,12 @@ $ gem install feed_searcher
|
|
13
13
|
## Usage
|
14
14
|
```ruby
|
15
15
|
require "feed_searcher"
|
16
|
-
|
17
|
-
|
16
|
+
|
17
|
+
FeedSearcher.search("https://github.com/fastladder/feed_searcher")
|
18
|
+
#=> ["https://github.com/fastladder/feed_searcher/commits/master.atom"]
|
19
|
+
|
20
|
+
FeedSearcher.search("https://github.com/fastladder/feed_searcher/commits/master.atom")
|
21
|
+
#=> ["https://github.com/fastladder/feed_searcher/commits/master.atom"]
|
18
22
|
```
|
19
23
|
|
20
24
|
|
@@ -25,7 +29,7 @@ Let me explain how FeedSearcher works along its execution sequence.
|
|
25
29
|
2. Finds link elements (represented as XPath format)
|
26
30
|
3. Extracts URLs from the elements via its `href` attribute
|
27
31
|
4. Includes the given URL if its resource itself is a feed
|
28
|
-
5. Converts
|
32
|
+
5. Converts from relative path to absolute path
|
29
33
|
|
30
34
|
FeedSearcher finds link elements matcing following XPath patterns.
|
31
35
|
|
data/lib/feed_searcher/page.rb
CHANGED
@@ -20,14 +20,14 @@ class FeedSearcher
|
|
20
20
|
|
21
21
|
def feed_urls
|
22
22
|
urls = []
|
23
|
-
urls << url if
|
23
|
+
urls << url if like_xml? && parsable_as_xml? && has_feed_element?
|
24
24
|
urls += links.map {|link| link["href"] }
|
25
25
|
end
|
26
26
|
|
27
27
|
private
|
28
28
|
|
29
29
|
def has_xml_declaration?
|
30
|
-
!!body.
|
30
|
+
!!body.start_with?("<?xml")
|
31
31
|
end
|
32
32
|
|
33
33
|
def has_feed_mime_type?
|
@@ -38,12 +38,16 @@ class FeedSearcher
|
|
38
38
|
EXTENSIONS.include?(extension)
|
39
39
|
end
|
40
40
|
|
41
|
+
def has_feed_element?
|
42
|
+
root.xpath("contains(' feed RDF rss ', concat(' ', local-name(/*), ' '))")
|
43
|
+
end
|
44
|
+
|
41
45
|
def parsable_as_xml?
|
42
46
|
!!xml
|
43
47
|
end
|
44
48
|
|
45
|
-
def
|
46
|
-
has_xml_declaration?
|
49
|
+
def like_xml?
|
50
|
+
has_xml_declaration? || has_feed_mime_type? || has_feed_extension?
|
47
51
|
end
|
48
52
|
|
49
53
|
def url
|
data/spec/feed_searcher_spec.rb
CHANGED
@@ -3,7 +3,7 @@ require "active_support/core_ext/string/strip"
|
|
3
3
|
|
4
4
|
describe FeedSearcher do
|
5
5
|
describe ".search" do
|
6
|
-
context "when
|
6
|
+
context "when there are link elements of feeds in the resource" do
|
7
7
|
before do
|
8
8
|
stub_request(:get, "http://example.com/").to_return(
|
9
9
|
:body => <<-EOS.strip_heredoc
|
@@ -38,7 +38,7 @@ describe FeedSearcher do
|
|
38
38
|
# * it keeps other domain
|
39
39
|
# * it converts relative path to absolute url
|
40
40
|
#
|
41
|
-
it "
|
41
|
+
it "includes hrefs of them as feed URLs" do
|
42
42
|
FeedSearcher.search("http://example.com/").should == %w[
|
43
43
|
http://example.com/1
|
44
44
|
http://example.com/2
|
@@ -50,10 +50,36 @@ describe FeedSearcher do
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
-
context "when the
|
53
|
+
context "when the resource has feed MIME type and parsable XML and rss element" do
|
54
54
|
before do
|
55
55
|
stub_request(:get, "http://example.com/").to_return(
|
56
56
|
:headers => { "Content-Type" => "application/rss+xml; charset=UTF-8" },
|
57
|
+
:body => <<-EOS.strip_heredoc
|
58
|
+
<rss>
|
59
|
+
<channel>
|
60
|
+
<title>title</title>
|
61
|
+
<link>http://exmple.com/</link>
|
62
|
+
<item>
|
63
|
+
<title>item title</title>
|
64
|
+
<link>http://example.com/item</link>
|
65
|
+
<description>item description</description>
|
66
|
+
</item>
|
67
|
+
</channel>
|
68
|
+
</rss>
|
69
|
+
EOS
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
73
|
+
it "includes the given URL as a feed URL" do
|
74
|
+
FeedSearcher.search("http://example.com/").should == %w[
|
75
|
+
http://example.com/
|
76
|
+
]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
context "when the resource has XML declaration and parsable XML and rss element" do
|
81
|
+
before do
|
82
|
+
stub_request(:get, "http://example.com/").to_return(
|
57
83
|
:body => <<-EOS.strip_heredoc
|
58
84
|
<?xml version="1.0" encoding="UTF-8"?>
|
59
85
|
<rss>
|
@@ -71,18 +97,17 @@ describe FeedSearcher do
|
|
71
97
|
)
|
72
98
|
end
|
73
99
|
|
74
|
-
it "
|
100
|
+
it "includes the given URL as a feed URL" do
|
75
101
|
FeedSearcher.search("http://example.com/").should == %w[
|
76
102
|
http://example.com/
|
77
103
|
]
|
78
104
|
end
|
79
105
|
end
|
80
106
|
|
81
|
-
context "when the
|
107
|
+
context "when the resource has feed extension and parsable XML and feed element" do
|
82
108
|
before do
|
83
109
|
stub_request(:get, "http://example.com/feed.atom").to_return(
|
84
110
|
:body => <<-EOS.strip_heredoc
|
85
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
86
111
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
87
112
|
<title>title</title>
|
88
113
|
<link rel="self" href="http://example.com/1"/>
|
@@ -101,11 +126,31 @@ describe FeedSearcher do
|
|
101
126
|
)
|
102
127
|
end
|
103
128
|
|
104
|
-
it "
|
129
|
+
it "includes the given URL as a feed URL" do
|
105
130
|
FeedSearcher.search("http://example.com/feed.atom").should == %w[
|
106
131
|
http://example.com/feed.atom
|
107
132
|
]
|
108
133
|
end
|
109
134
|
end
|
135
|
+
|
136
|
+
context "when the resource has XML declaration and parsable XML and no feed element" do
|
137
|
+
before do
|
138
|
+
stub_request(:get, "http://example.com/p3p.xml").to_return(
|
139
|
+
:headers => { "Content-Type" => "application/xhtml+xml" },
|
140
|
+
:body => <<-EOS.strip_heredoc
|
141
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
142
|
+
<META xmlns="http://www.w3.org/2002/01/P3Pv1">
|
143
|
+
<POLICY-REFERENCES>
|
144
|
+
</POLICY-REFERENCES>
|
145
|
+
</META>
|
146
|
+
EOS
|
147
|
+
)
|
148
|
+
end
|
149
|
+
|
150
|
+
it "does not includes the given URL as a feed URL" do
|
151
|
+
FeedSearcher.search("http://example.com/p3p.xml").should == %w[
|
152
|
+
]
|
153
|
+
end
|
154
|
+
end
|
110
155
|
end
|
111
156
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feed_searcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
@@ -174,9 +174,6 @@ files:
|
|
174
174
|
- lib/feed_searcher/page.rb
|
175
175
|
- lib/feed_searcher/version.rb
|
176
176
|
- spec/feed_searcher_spec.rb
|
177
|
-
- spec/fixtures/example.atom
|
178
|
-
- spec/fixtures/example.html
|
179
|
-
- spec/fixtures/example.rss
|
180
177
|
- spec/spec_helper.rb
|
181
178
|
homepage: https://github.com/r7kamura/feed_searcher
|
182
179
|
licenses:
|
@@ -205,8 +202,5 @@ specification_version: 3
|
|
205
202
|
summary: Search RSS feed URLs from the given URL
|
206
203
|
test_files:
|
207
204
|
- spec/feed_searcher_spec.rb
|
208
|
-
- spec/fixtures/example.atom
|
209
|
-
- spec/fixtures/example.html
|
210
|
-
- spec/fixtures/example.rss
|
211
205
|
- spec/spec_helper.rb
|
212
206
|
has_rdoc:
|
data/spec/fixtures/example.atom
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<feed xmlns="http://www.w3.org/2005/Atom">
|
3
|
-
<title>title</title>
|
4
|
-
<link rel="self" href="http://example.com/1"/>
|
5
|
-
<link rel="alternate" href="http://example.com/"/>
|
6
|
-
<entry>
|
7
|
-
<title>item title</title>
|
8
|
-
<link rel="alternate" href="http://example.com/"/>
|
9
|
-
<content type="html">
|
10
|
-
<div xmlns="http://www.w3.org/1999/xhtml">
|
11
|
-
<p>item content</p>
|
12
|
-
</div>
|
13
|
-
</content>
|
14
|
-
</entry>
|
15
|
-
</feed>
|
data/spec/fixtures/example.html
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
<!DOCTYPE HTML>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
<meta charset="UTF-8">
|
5
|
-
<link href="http://example.com/1" rel="alternate" type="application/atom+xml" />
|
6
|
-
<link href="http://example.com/2" rel="alternate" type="application/rdf+xml" />
|
7
|
-
<link href="http://example.com/3" rel="alternate" type="application/rss+xml" />
|
8
|
-
<link href="http://example.com/4" rel="alternate" type="application/xml" />
|
9
|
-
<link href="http://example.com/5" rel="resource" type="application/rss+xml" />
|
10
|
-
<link href="http://www.example.com/6" rel="alternate" type="application/rss+xml" />
|
11
|
-
<link href="http://other-example.com/7" rel="alternate" type="application/rss+xml" />
|
12
|
-
<link href="/8" rel="alternate" type="application/rss+xml" />
|
13
|
-
</head>
|
14
|
-
<body>
|
15
|
-
body
|
16
|
-
</body>
|
17
|
-
</html>
|
18
|
-
|
data/spec/fixtures/example.rss
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<rss>
|
3
|
-
<channel>
|
4
|
-
<title>title</title>
|
5
|
-
<link>http://exmple.com/</link>
|
6
|
-
<item>
|
7
|
-
<title>item title</title>
|
8
|
-
<link>http://example.com/item</link>
|
9
|
-
<description>item description</description>
|
10
|
-
</item>
|
11
|
-
</channel>
|
12
|
-
</rss>
|