feed_searcher 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +7 -3
- data/lib/feed_searcher/page.rb +8 -4
- data/lib/feed_searcher/version.rb +1 -1
- data/spec/feed_searcher_spec.rb +52 -7
- metadata +2 -8
- data/spec/fixtures/example.atom +0 -15
- data/spec/fixtures/example.html +0 -18
- data/spec/fixtures/example.rss +0 -12
data/README.md
CHANGED
@@ -13,8 +13,12 @@ $ gem install feed_searcher
|
|
13
13
|
## Usage
|
14
14
|
```ruby
|
15
15
|
require "feed_searcher"
|
16
|
-
|
17
|
-
|
16
|
+
|
17
|
+
FeedSearcher.search("https://github.com/fastladder/feed_searcher")
|
18
|
+
#=> ["https://github.com/fastladder/feed_searcher/commits/master.atom"]
|
19
|
+
|
20
|
+
FeedSearcher.search("https://github.com/fastladder/feed_searcher/commits/master.atom")
|
21
|
+
#=> ["https://github.com/fastladder/feed_searcher/commits/master.atom"]
|
18
22
|
```
|
19
23
|
|
20
24
|
|
@@ -25,7 +29,7 @@ Let me explain how FeedSearcher works along its execution sequence.
|
|
25
29
|
2. Finds link elements (represented as XPath format)
|
26
30
|
3. Extracts URLs from the elements via its `href` attribute
|
27
31
|
4. Includes the given URL if its resource itself is a feed
|
28
|
-
5. Converts
|
32
|
+
5. Converts from relative path to absolute path
|
29
33
|
|
30
34
|
FeedSearcher finds link elements matcing following XPath patterns.
|
31
35
|
|
data/lib/feed_searcher/page.rb
CHANGED
@@ -20,14 +20,14 @@ class FeedSearcher
|
|
20
20
|
|
21
21
|
def feed_urls
|
22
22
|
urls = []
|
23
|
-
urls << url if
|
23
|
+
urls << url if like_xml? && parsable_as_xml? && has_feed_element?
|
24
24
|
urls += links.map {|link| link["href"] }
|
25
25
|
end
|
26
26
|
|
27
27
|
private
|
28
28
|
|
29
29
|
def has_xml_declaration?
|
30
|
-
!!body.
|
30
|
+
!!body.start_with?("<?xml")
|
31
31
|
end
|
32
32
|
|
33
33
|
def has_feed_mime_type?
|
@@ -38,12 +38,16 @@ class FeedSearcher
|
|
38
38
|
EXTENSIONS.include?(extension)
|
39
39
|
end
|
40
40
|
|
41
|
+
def has_feed_element?
|
42
|
+
root.xpath("contains(' feed RDF rss ', concat(' ', local-name(/*), ' '))")
|
43
|
+
end
|
44
|
+
|
41
45
|
def parsable_as_xml?
|
42
46
|
!!xml
|
43
47
|
end
|
44
48
|
|
45
|
-
def
|
46
|
-
has_xml_declaration?
|
49
|
+
def like_xml?
|
50
|
+
has_xml_declaration? || has_feed_mime_type? || has_feed_extension?
|
47
51
|
end
|
48
52
|
|
49
53
|
def url
|
data/spec/feed_searcher_spec.rb
CHANGED
@@ -3,7 +3,7 @@ require "active_support/core_ext/string/strip"
|
|
3
3
|
|
4
4
|
describe FeedSearcher do
|
5
5
|
describe ".search" do
|
6
|
-
context "when
|
6
|
+
context "when there are link elements of feeds in the resource" do
|
7
7
|
before do
|
8
8
|
stub_request(:get, "http://example.com/").to_return(
|
9
9
|
:body => <<-EOS.strip_heredoc
|
@@ -38,7 +38,7 @@ describe FeedSearcher do
|
|
38
38
|
# * it keeps other domain
|
39
39
|
# * it converts relative path to absolute url
|
40
40
|
#
|
41
|
-
it "
|
41
|
+
it "includes hrefs of them as feed URLs" do
|
42
42
|
FeedSearcher.search("http://example.com/").should == %w[
|
43
43
|
http://example.com/1
|
44
44
|
http://example.com/2
|
@@ -50,10 +50,36 @@ describe FeedSearcher do
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
-
context "when the
|
53
|
+
context "when the resource has feed MIME type and parsable XML and rss element" do
|
54
54
|
before do
|
55
55
|
stub_request(:get, "http://example.com/").to_return(
|
56
56
|
:headers => { "Content-Type" => "application/rss+xml; charset=UTF-8" },
|
57
|
+
:body => <<-EOS.strip_heredoc
|
58
|
+
<rss>
|
59
|
+
<channel>
|
60
|
+
<title>title</title>
|
61
|
+
<link>http://exmple.com/</link>
|
62
|
+
<item>
|
63
|
+
<title>item title</title>
|
64
|
+
<link>http://example.com/item</link>
|
65
|
+
<description>item description</description>
|
66
|
+
</item>
|
67
|
+
</channel>
|
68
|
+
</rss>
|
69
|
+
EOS
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
73
|
+
it "includes the given URL as a feed URL" do
|
74
|
+
FeedSearcher.search("http://example.com/").should == %w[
|
75
|
+
http://example.com/
|
76
|
+
]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
context "when the resource has XML declaration and parsable XML and rss element" do
|
81
|
+
before do
|
82
|
+
stub_request(:get, "http://example.com/").to_return(
|
57
83
|
:body => <<-EOS.strip_heredoc
|
58
84
|
<?xml version="1.0" encoding="UTF-8"?>
|
59
85
|
<rss>
|
@@ -71,18 +97,17 @@ describe FeedSearcher do
|
|
71
97
|
)
|
72
98
|
end
|
73
99
|
|
74
|
-
it "
|
100
|
+
it "includes the given URL as a feed URL" do
|
75
101
|
FeedSearcher.search("http://example.com/").should == %w[
|
76
102
|
http://example.com/
|
77
103
|
]
|
78
104
|
end
|
79
105
|
end
|
80
106
|
|
81
|
-
context "when the
|
107
|
+
context "when the resource has feed extension and parsable XML and feed element" do
|
82
108
|
before do
|
83
109
|
stub_request(:get, "http://example.com/feed.atom").to_return(
|
84
110
|
:body => <<-EOS.strip_heredoc
|
85
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
86
111
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
87
112
|
<title>title</title>
|
88
113
|
<link rel="self" href="http://example.com/1"/>
|
@@ -101,11 +126,31 @@ describe FeedSearcher do
|
|
101
126
|
)
|
102
127
|
end
|
103
128
|
|
104
|
-
it "
|
129
|
+
it "includes the given URL as a feed URL" do
|
105
130
|
FeedSearcher.search("http://example.com/feed.atom").should == %w[
|
106
131
|
http://example.com/feed.atom
|
107
132
|
]
|
108
133
|
end
|
109
134
|
end
|
135
|
+
|
136
|
+
context "when the resource has XML declaration and parsable XML and no feed element" do
|
137
|
+
before do
|
138
|
+
stub_request(:get, "http://example.com/p3p.xml").to_return(
|
139
|
+
:headers => { "Content-Type" => "application/xhtml+xml" },
|
140
|
+
:body => <<-EOS.strip_heredoc
|
141
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
142
|
+
<META xmlns="http://www.w3.org/2002/01/P3Pv1">
|
143
|
+
<POLICY-REFERENCES>
|
144
|
+
</POLICY-REFERENCES>
|
145
|
+
</META>
|
146
|
+
EOS
|
147
|
+
)
|
148
|
+
end
|
149
|
+
|
150
|
+
it "does not includes the given URL as a feed URL" do
|
151
|
+
FeedSearcher.search("http://example.com/p3p.xml").should == %w[
|
152
|
+
]
|
153
|
+
end
|
154
|
+
end
|
110
155
|
end
|
111
156
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feed_searcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
@@ -174,9 +174,6 @@ files:
|
|
174
174
|
- lib/feed_searcher/page.rb
|
175
175
|
- lib/feed_searcher/version.rb
|
176
176
|
- spec/feed_searcher_spec.rb
|
177
|
-
- spec/fixtures/example.atom
|
178
|
-
- spec/fixtures/example.html
|
179
|
-
- spec/fixtures/example.rss
|
180
177
|
- spec/spec_helper.rb
|
181
178
|
homepage: https://github.com/r7kamura/feed_searcher
|
182
179
|
licenses:
|
@@ -205,8 +202,5 @@ specification_version: 3
|
|
205
202
|
summary: Search RSS feed URLs from the given URL
|
206
203
|
test_files:
|
207
204
|
- spec/feed_searcher_spec.rb
|
208
|
-
- spec/fixtures/example.atom
|
209
|
-
- spec/fixtures/example.html
|
210
|
-
- spec/fixtures/example.rss
|
211
205
|
- spec/spec_helper.rb
|
212
206
|
has_rdoc:
|
data/spec/fixtures/example.atom
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<feed xmlns="http://www.w3.org/2005/Atom">
|
3
|
-
<title>title</title>
|
4
|
-
<link rel="self" href="http://example.com/1"/>
|
5
|
-
<link rel="alternate" href="http://example.com/"/>
|
6
|
-
<entry>
|
7
|
-
<title>item title</title>
|
8
|
-
<link rel="alternate" href="http://example.com/"/>
|
9
|
-
<content type="html">
|
10
|
-
<div xmlns="http://www.w3.org/1999/xhtml">
|
11
|
-
<p>item content</p>
|
12
|
-
</div>
|
13
|
-
</content>
|
14
|
-
</entry>
|
15
|
-
</feed>
|
data/spec/fixtures/example.html
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
<!DOCTYPE HTML>
|
2
|
-
<html>
|
3
|
-
<head>
|
4
|
-
<meta charset="UTF-8">
|
5
|
-
<link href="http://example.com/1" rel="alternate" type="application/atom+xml" />
|
6
|
-
<link href="http://example.com/2" rel="alternate" type="application/rdf+xml" />
|
7
|
-
<link href="http://example.com/3" rel="alternate" type="application/rss+xml" />
|
8
|
-
<link href="http://example.com/4" rel="alternate" type="application/xml" />
|
9
|
-
<link href="http://example.com/5" rel="resource" type="application/rss+xml" />
|
10
|
-
<link href="http://www.example.com/6" rel="alternate" type="application/rss+xml" />
|
11
|
-
<link href="http://other-example.com/7" rel="alternate" type="application/rss+xml" />
|
12
|
-
<link href="/8" rel="alternate" type="application/rss+xml" />
|
13
|
-
</head>
|
14
|
-
<body>
|
15
|
-
body
|
16
|
-
</body>
|
17
|
-
</html>
|
18
|
-
|
data/spec/fixtures/example.rss
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<rss>
|
3
|
-
<channel>
|
4
|
-
<title>title</title>
|
5
|
-
<link>http://exmple.com/</link>
|
6
|
-
<item>
|
7
|
-
<title>item title</title>
|
8
|
-
<link>http://example.com/item</link>
|
9
|
-
<description>item description</description>
|
10
|
-
</item>
|
11
|
-
</channel>
|
12
|
-
</rss>
|