feed_searcher 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -13,8 +13,12 @@ $ gem install feed_searcher
13
13
  ## Usage
14
14
  ```ruby
15
15
  require "feed_searcher"
16
- FeedSearcher.search("https://github.com/r7kamura/feed_searcher")
17
- #=> ["https://github.com/r7kamura/feed_searcher/commits/master.atom"]
16
+
17
+ FeedSearcher.search("https://github.com/fastladder/feed_searcher")
18
+ #=> ["https://github.com/fastladder/feed_searcher/commits/master.atom"]
19
+
20
+ FeedSearcher.search("https://github.com/fastladder/feed_searcher/commits/master.atom")
21
+ #=> ["https://github.com/fastladder/feed_searcher/commits/master.atom"]
18
22
  ```
19
23
 
20
24
 
@@ -25,7 +29,7 @@ Let me explain how FeedSearcher works along its execution sequence.
25
29
  2. Finds link elements (represented as XPath format)
26
30
  3. Extracts URLs from the elements via its `href` attribute
27
31
  4. Includes the given URL if its resource itself is a feed
28
- 5. Converts to relative path to absolute path
32
+ 5. Converts from relative path to absolute path
29
33
 
30
34
  FeedSearcher finds link elements matcing following XPath patterns.
31
35
 
@@ -20,14 +20,14 @@ class FeedSearcher
20
20
 
21
21
  def feed_urls
22
22
  urls = []
23
- urls << url if (has_feed_mime_type? || has_feed_extension?) && xml?
23
+ urls << url if like_xml? && parsable_as_xml? && has_feed_element?
24
24
  urls += links.map {|link| link["href"] }
25
25
  end
26
26
 
27
27
  private
28
28
 
29
29
  def has_xml_declaration?
30
- !!body.index("<?xml")
30
+ !!body.start_with?("<?xml")
31
31
  end
32
32
 
33
33
  def has_feed_mime_type?
@@ -38,12 +38,16 @@ class FeedSearcher
38
38
  EXTENSIONS.include?(extension)
39
39
  end
40
40
 
41
+ def has_feed_element?
42
+ root.xpath("contains(' feed RDF rss ', concat(' ', local-name(/*), ' '))")
43
+ end
44
+
41
45
  def parsable_as_xml?
42
46
  !!xml
43
47
  end
44
48
 
45
- def xml?
46
- has_xml_declaration? && parsable_as_xml?
49
+ def like_xml?
50
+ has_xml_declaration? || has_feed_mime_type? || has_feed_extension?
47
51
  end
48
52
 
49
53
  def url
@@ -1,3 +1,3 @@
1
1
  class FeedSearcher
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
@@ -3,7 +3,7 @@ require "active_support/core_ext/string/strip"
3
3
 
4
4
  describe FeedSearcher do
5
5
  describe ".search" do
6
- context "when the specified resource is HTML" do
6
+ context "when there are link elements of feeds in the resource" do
7
7
  before do
8
8
  stub_request(:get, "http://example.com/").to_return(
9
9
  :body => <<-EOS.strip_heredoc
@@ -38,7 +38,7 @@ describe FeedSearcher do
38
38
  # * it keeps other domain
39
39
  # * it converts relative path to absolute url
40
40
  #
41
- it "returns feed URLs from link elements in the specified resource" do
41
+ it "includes hrefs of them as feed URLs" do
42
42
  FeedSearcher.search("http://example.com/").should == %w[
43
43
  http://example.com/1
44
44
  http://example.com/2
@@ -50,10 +50,36 @@ describe FeedSearcher do
50
50
  end
51
51
  end
52
52
 
53
- context "when the specified resource has feed MIME type and be parsable as XML" do
53
+ context "when the resource has feed MIME type and parsable XML and rss element" do
54
54
  before do
55
55
  stub_request(:get, "http://example.com/").to_return(
56
56
  :headers => { "Content-Type" => "application/rss+xml; charset=UTF-8" },
57
+ :body => <<-EOS.strip_heredoc
58
+ <rss>
59
+ <channel>
60
+ <title>title</title>
61
+ <link>http://exmple.com/</link>
62
+ <item>
63
+ <title>item title</title>
64
+ <link>http://example.com/item</link>
65
+ <description>item description</description>
66
+ </item>
67
+ </channel>
68
+ </rss>
69
+ EOS
70
+ )
71
+ end
72
+
73
+ it "includes the given URL as a feed URL" do
74
+ FeedSearcher.search("http://example.com/").should == %w[
75
+ http://example.com/
76
+ ]
77
+ end
78
+ end
79
+
80
+ context "when the resource has XML declaration and parsable XML and rss element" do
81
+ before do
82
+ stub_request(:get, "http://example.com/").to_return(
57
83
  :body => <<-EOS.strip_heredoc
58
84
  <?xml version="1.0" encoding="UTF-8"?>
59
85
  <rss>
@@ -71,18 +97,17 @@ describe FeedSearcher do
71
97
  )
72
98
  end
73
99
 
74
- it "returns itself as a feed url" do
100
+ it "includes the given URL as a feed URL" do
75
101
  FeedSearcher.search("http://example.com/").should == %w[
76
102
  http://example.com/
77
103
  ]
78
104
  end
79
105
  end
80
106
 
81
- context "when the specified resource has feed extension and be parsable as XML" do
107
+ context "when the resource has feed extension and parsable XML and feed element" do
82
108
  before do
83
109
  stub_request(:get, "http://example.com/feed.atom").to_return(
84
110
  :body => <<-EOS.strip_heredoc
85
- <?xml version="1.0" encoding="UTF-8"?>
86
111
  <feed xmlns="http://www.w3.org/2005/Atom">
87
112
  <title>title</title>
88
113
  <link rel="self" href="http://example.com/1"/>
@@ -101,11 +126,31 @@ describe FeedSearcher do
101
126
  )
102
127
  end
103
128
 
104
- it "returns itself as a feed url" do
129
+ it "includes the given URL as a feed URL" do
105
130
  FeedSearcher.search("http://example.com/feed.atom").should == %w[
106
131
  http://example.com/feed.atom
107
132
  ]
108
133
  end
109
134
  end
135
+
136
+ context "when the resource has XML declaration and parsable XML and no feed element" do
137
+ before do
138
+ stub_request(:get, "http://example.com/p3p.xml").to_return(
139
+ :headers => { "Content-Type" => "application/xhtml+xml" },
140
+ :body => <<-EOS.strip_heredoc
141
+ <?xml version="1.0" encoding="UTF-8"?>
142
+ <META xmlns="http://www.w3.org/2002/01/P3Pv1">
143
+ <POLICY-REFERENCES>
144
+ </POLICY-REFERENCES>
145
+ </META>
146
+ EOS
147
+ )
148
+ end
149
+
150
+ it "does not includes the given URL as a feed URL" do
151
+ FeedSearcher.search("http://example.com/p3p.xml").should == %w[
152
+ ]
153
+ end
154
+ end
110
155
  end
111
156
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feed_searcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-19 00:00:00.000000000 Z
12
+ date: 2013-03-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
@@ -174,9 +174,6 @@ files:
174
174
  - lib/feed_searcher/page.rb
175
175
  - lib/feed_searcher/version.rb
176
176
  - spec/feed_searcher_spec.rb
177
- - spec/fixtures/example.atom
178
- - spec/fixtures/example.html
179
- - spec/fixtures/example.rss
180
177
  - spec/spec_helper.rb
181
178
  homepage: https://github.com/r7kamura/feed_searcher
182
179
  licenses:
@@ -205,8 +202,5 @@ specification_version: 3
205
202
  summary: Search RSS feed URLs from the given URL
206
203
  test_files:
207
204
  - spec/feed_searcher_spec.rb
208
- - spec/fixtures/example.atom
209
- - spec/fixtures/example.html
210
- - spec/fixtures/example.rss
211
205
  - spec/spec_helper.rb
212
206
  has_rdoc:
@@ -1,15 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <feed xmlns="http://www.w3.org/2005/Atom">
3
- <title>title</title>
4
- <link rel="self" href="http://example.com/1"/>
5
- <link rel="alternate" href="http://example.com/"/>
6
- <entry>
7
- <title>item title</title>
8
- <link rel="alternate" href="http://example.com/"/>
9
- <content type="html">
10
- <div xmlns="http://www.w3.org/1999/xhtml">
11
- <p>item content</p>
12
- </div>
13
- </content>
14
- </entry>
15
- </feed>
@@ -1,18 +0,0 @@
1
- <!DOCTYPE HTML>
2
- <html>
3
- <head>
4
- <meta charset="UTF-8">
5
- <link href="http://example.com/1" rel="alternate" type="application/atom+xml" />
6
- <link href="http://example.com/2" rel="alternate" type="application/rdf+xml" />
7
- <link href="http://example.com/3" rel="alternate" type="application/rss+xml" />
8
- <link href="http://example.com/4" rel="alternate" type="application/xml" />
9
- <link href="http://example.com/5" rel="resource" type="application/rss+xml" />
10
- <link href="http://www.example.com/6" rel="alternate" type="application/rss+xml" />
11
- <link href="http://other-example.com/7" rel="alternate" type="application/rss+xml" />
12
- <link href="/8" rel="alternate" type="application/rss+xml" />
13
- </head>
14
- <body>
15
- body
16
- </body>
17
- </html>
18
-
@@ -1,12 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <rss>
3
- <channel>
4
- <title>title</title>
5
- <link>http://exmple.com/</link>
6
- <item>
7
- <title>item title</title>
8
- <link>http://example.com/item</link>
9
- <description>item description</description>
10
- </item>
11
- </channel>
12
- </rss>