feed_searcher 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -13,8 +13,12 @@ $ gem install feed_searcher
13
13
  ## Usage
14
14
  ```ruby
15
15
  require "feed_searcher"
16
- FeedSearcher.search("https://github.com/r7kamura/feed_searcher")
17
- #=> ["https://github.com/r7kamura/feed_searcher/commits/master.atom"]
16
+
17
+ FeedSearcher.search("https://github.com/fastladder/feed_searcher")
18
+ #=> ["https://github.com/fastladder/feed_searcher/commits/master.atom"]
19
+
20
+ FeedSearcher.search("https://github.com/fastladder/feed_searcher/commits/master.atom")
21
+ #=> ["https://github.com/fastladder/feed_searcher/commits/master.atom"]
18
22
  ```
19
23
 
20
24
 
@@ -25,7 +29,7 @@ Let me explain how FeedSearcher works along its execution sequence.
25
29
  2. Finds link elements (represented as XPath format)
26
30
  3. Extracts URLs from the elements via its `href` attribute
27
31
  4. Includes the given URL if its resource itself is a feed
28
- 5. Converts to relative path to absolute path
32
+ 5. Converts from relative path to absolute path
29
33
 
30
34
  FeedSearcher finds link elements matcing following XPath patterns.
31
35
 
@@ -20,14 +20,14 @@ class FeedSearcher
20
20
 
21
21
  def feed_urls
22
22
  urls = []
23
- urls << url if (has_feed_mime_type? || has_feed_extension?) && xml?
23
+ urls << url if like_xml? && parsable_as_xml? && has_feed_element?
24
24
  urls += links.map {|link| link["href"] }
25
25
  end
26
26
 
27
27
  private
28
28
 
29
29
  def has_xml_declaration?
30
- !!body.index("<?xml")
30
+ !!body.start_with?("<?xml")
31
31
  end
32
32
 
33
33
  def has_feed_mime_type?
@@ -38,12 +38,16 @@ class FeedSearcher
38
38
  EXTENSIONS.include?(extension)
39
39
  end
40
40
 
41
+ def has_feed_element?
42
+ root.xpath("contains(' feed RDF rss ', concat(' ', local-name(/*), ' '))")
43
+ end
44
+
41
45
  def parsable_as_xml?
42
46
  !!xml
43
47
  end
44
48
 
45
- def xml?
46
- has_xml_declaration? && parsable_as_xml?
49
+ def like_xml?
50
+ has_xml_declaration? || has_feed_mime_type? || has_feed_extension?
47
51
  end
48
52
 
49
53
  def url
@@ -1,3 +1,3 @@
1
1
  class FeedSearcher
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
@@ -3,7 +3,7 @@ require "active_support/core_ext/string/strip"
3
3
 
4
4
  describe FeedSearcher do
5
5
  describe ".search" do
6
- context "when the specified resource is HTML" do
6
+ context "when there are link elements of feeds in the resource" do
7
7
  before do
8
8
  stub_request(:get, "http://example.com/").to_return(
9
9
  :body => <<-EOS.strip_heredoc
@@ -38,7 +38,7 @@ describe FeedSearcher do
38
38
  # * it keeps other domain
39
39
  # * it converts relative path to absolute url
40
40
  #
41
- it "returns feed URLs from link elements in the specified resource" do
41
+ it "includes hrefs of them as feed URLs" do
42
42
  FeedSearcher.search("http://example.com/").should == %w[
43
43
  http://example.com/1
44
44
  http://example.com/2
@@ -50,10 +50,36 @@ describe FeedSearcher do
50
50
  end
51
51
  end
52
52
 
53
- context "when the specified resource has feed MIME type and be parsable as XML" do
53
+ context "when the resource has feed MIME type and parsable XML and rss element" do
54
54
  before do
55
55
  stub_request(:get, "http://example.com/").to_return(
56
56
  :headers => { "Content-Type" => "application/rss+xml; charset=UTF-8" },
57
+ :body => <<-EOS.strip_heredoc
58
+ <rss>
59
+ <channel>
60
+ <title>title</title>
61
+ <link>http://exmple.com/</link>
62
+ <item>
63
+ <title>item title</title>
64
+ <link>http://example.com/item</link>
65
+ <description>item description</description>
66
+ </item>
67
+ </channel>
68
+ </rss>
69
+ EOS
70
+ )
71
+ end
72
+
73
+ it "includes the given URL as a feed URL" do
74
+ FeedSearcher.search("http://example.com/").should == %w[
75
+ http://example.com/
76
+ ]
77
+ end
78
+ end
79
+
80
+ context "when the resource has XML declaration and parsable XML and rss element" do
81
+ before do
82
+ stub_request(:get, "http://example.com/").to_return(
57
83
  :body => <<-EOS.strip_heredoc
58
84
  <?xml version="1.0" encoding="UTF-8"?>
59
85
  <rss>
@@ -71,18 +97,17 @@ describe FeedSearcher do
71
97
  )
72
98
  end
73
99
 
74
- it "returns itself as a feed url" do
100
+ it "includes the given URL as a feed URL" do
75
101
  FeedSearcher.search("http://example.com/").should == %w[
76
102
  http://example.com/
77
103
  ]
78
104
  end
79
105
  end
80
106
 
81
- context "when the specified resource has feed extension and be parsable as XML" do
107
+ context "when the resource has feed extension and parsable XML and feed element" do
82
108
  before do
83
109
  stub_request(:get, "http://example.com/feed.atom").to_return(
84
110
  :body => <<-EOS.strip_heredoc
85
- <?xml version="1.0" encoding="UTF-8"?>
86
111
  <feed xmlns="http://www.w3.org/2005/Atom">
87
112
  <title>title</title>
88
113
  <link rel="self" href="http://example.com/1"/>
@@ -101,11 +126,31 @@ describe FeedSearcher do
101
126
  )
102
127
  end
103
128
 
104
- it "returns itself as a feed url" do
129
+ it "includes the given URL as a feed URL" do
105
130
  FeedSearcher.search("http://example.com/feed.atom").should == %w[
106
131
  http://example.com/feed.atom
107
132
  ]
108
133
  end
109
134
  end
135
+
136
+ context "when the resource has XML declaration and parsable XML and no feed element" do
137
+ before do
138
+ stub_request(:get, "http://example.com/p3p.xml").to_return(
139
+ :headers => { "Content-Type" => "application/xhtml+xml" },
140
+ :body => <<-EOS.strip_heredoc
141
+ <?xml version="1.0" encoding="UTF-8"?>
142
+ <META xmlns="http://www.w3.org/2002/01/P3Pv1">
143
+ <POLICY-REFERENCES>
144
+ </POLICY-REFERENCES>
145
+ </META>
146
+ EOS
147
+ )
148
+ end
149
+
150
+ it "does not includes the given URL as a feed URL" do
151
+ FeedSearcher.search("http://example.com/p3p.xml").should == %w[
152
+ ]
153
+ end
154
+ end
110
155
  end
111
156
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feed_searcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-19 00:00:00.000000000 Z
12
+ date: 2013-03-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
@@ -174,9 +174,6 @@ files:
174
174
  - lib/feed_searcher/page.rb
175
175
  - lib/feed_searcher/version.rb
176
176
  - spec/feed_searcher_spec.rb
177
- - spec/fixtures/example.atom
178
- - spec/fixtures/example.html
179
- - spec/fixtures/example.rss
180
177
  - spec/spec_helper.rb
181
178
  homepage: https://github.com/r7kamura/feed_searcher
182
179
  licenses:
@@ -205,8 +202,5 @@ specification_version: 3
205
202
  summary: Search RSS feed URLs from the given URL
206
203
  test_files:
207
204
  - spec/feed_searcher_spec.rb
208
- - spec/fixtures/example.atom
209
- - spec/fixtures/example.html
210
- - spec/fixtures/example.rss
211
205
  - spec/spec_helper.rb
212
206
  has_rdoc:
@@ -1,15 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <feed xmlns="http://www.w3.org/2005/Atom">
3
- <title>title</title>
4
- <link rel="self" href="http://example.com/1"/>
5
- <link rel="alternate" href="http://example.com/"/>
6
- <entry>
7
- <title>item title</title>
8
- <link rel="alternate" href="http://example.com/"/>
9
- <content type="html">
10
- <div xmlns="http://www.w3.org/1999/xhtml">
11
- <p>item content</p>
12
- </div>
13
- </content>
14
- </entry>
15
- </feed>
@@ -1,18 +0,0 @@
1
- <!DOCTYPE HTML>
2
- <html>
3
- <head>
4
- <meta charset="UTF-8">
5
- <link href="http://example.com/1" rel="alternate" type="application/atom+xml" />
6
- <link href="http://example.com/2" rel="alternate" type="application/rdf+xml" />
7
- <link href="http://example.com/3" rel="alternate" type="application/rss+xml" />
8
- <link href="http://example.com/4" rel="alternate" type="application/xml" />
9
- <link href="http://example.com/5" rel="resource" type="application/rss+xml" />
10
- <link href="http://www.example.com/6" rel="alternate" type="application/rss+xml" />
11
- <link href="http://other-example.com/7" rel="alternate" type="application/rss+xml" />
12
- <link href="/8" rel="alternate" type="application/rss+xml" />
13
- </head>
14
- <body>
15
- body
16
- </body>
17
- </html>
18
-
@@ -1,12 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <rss>
3
- <channel>
4
- <title>title</title>
5
- <link>http://exmple.com/</link>
6
- <item>
7
- <title>item title</title>
8
- <link>http://example.com/item</link>
9
- <description>item description</description>
10
- </item>
11
- </channel>
12
- </rss>