feed_searcher 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ rvm:
2
+ - 1.9.3
3
+ - 2.0.0
4
+ branches:
5
+ only:
6
+ - master
data/README.md CHANGED
@@ -1,6 +1,8 @@
1
1
  # FeedSearcher
2
2
  Search RSS feed URLs from the given URL.
3
3
 
4
+ [![Build Status](https://travis-ci.org/fastladder/feed_searcher.png?branch=master)](https://travis-ci.org/fastladder/feed_searcher)
5
+
4
6
 
5
7
  ## Installation
6
8
  ```
@@ -14,3 +16,19 @@ require "feed_searcher"
14
16
  FeedSearcher.search("https://github.com/r7kamura/feed_searcher")
15
17
  #=> ["https://github.com/r7kamura/feed_searcher/commits/master.atom"]
16
18
  ```
19
+
20
+
21
+ ## Internal
22
+ Let me explain how FeedSearcher works along its execution sequence.
23
+
24
+ 1. Fetches the HTML source of the given URL
25
+ 2. Finds link elements (represented as XPath format)
26
+ 3. Extracts URLs from the elements via its `href` attribute
27
+ 4. Includes the given URL if its resource itself is a feed
28
+ 5. Converts to relative path to absolute path
29
+
30
+ FeedSearcher finds link elements matcing following XPath patterns.
31
+
32
+ * //link[@rel='alternate'][@type='application/atom+xml']
33
+ * //link[@rel='alternate'][@type='application/rdf+xml']
34
+ * //link[@rel='alternate'][@type='application/rss+xml']
data/Rakefile CHANGED
@@ -1 +1,5 @@
1
1
  require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+ task :default => :spec
@@ -20,8 +20,11 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_dependency "mechanize", ">= 1.0.0"
22
22
  spec.add_dependency "nokogiri"
23
+ spec.add_development_dependency "activesupport"
23
24
  spec.add_development_dependency "bundler", "~> 1.3"
25
+ spec.add_development_dependency "pry"
24
26
  spec.add_development_dependency "rake"
25
27
  spec.add_development_dependency "rspec", ">= 2.13.0"
28
+ spec.add_development_dependency "simplecov"
26
29
  spec.add_development_dependency "webmock"
27
30
  end
@@ -1,5 +1,11 @@
1
1
  class FeedSearcher
2
2
  class Page
3
+ EXTENSIONS = %w[
4
+ atom
5
+ rdf
6
+ rss
7
+ ]
8
+
3
9
  MIME_TYPES = %w[
4
10
  application/atom+xml
5
11
  application/rdf+xml
@@ -13,12 +19,54 @@ class FeedSearcher
13
19
  end
14
20
 
15
21
  def feed_urls
16
- feed_attributes.map {|attribute| attribute["href"] }
22
+ urls = []
23
+ urls << url if (has_feed_mime_type? || has_feed_extension?) && xml?
24
+ urls += links.map {|link| link["href"] }
17
25
  end
18
26
 
19
27
  private
20
28
 
21
- def feed_attributes
29
+ def has_xml_declaration?
30
+ !!body.index("<?xml")
31
+ end
32
+
33
+ def has_feed_mime_type?
34
+ MIME_TYPES.include?(mime_type)
35
+ end
36
+
37
+ def has_feed_extension?
38
+ EXTENSIONS.include?(extension)
39
+ end
40
+
41
+ def parsable_as_xml?
42
+ !!xml
43
+ end
44
+
45
+ def xml?
46
+ has_xml_declaration? && parsable_as_xml?
47
+ end
48
+
49
+ def url
50
+ page.uri.to_s
51
+ end
52
+
53
+ def content_type
54
+ page.response["content-type"]
55
+ end
56
+
57
+ def mime_type
58
+ content_type.sub(/;.*\z/, "") if content_type
59
+ end
60
+
61
+ def extension
62
+ File.extname(page.uri.path).sub(/^\./, "")
63
+ end
64
+
65
+ def body
66
+ page.body
67
+ end
68
+
69
+ def links
22
70
  root.xpath("//link[@rel='alternate' and (#{types_query})]")
23
71
  end
24
72
 
@@ -27,7 +75,27 @@ class FeedSearcher
27
75
  end
28
76
 
29
77
  def root
30
- Nokogiri.HTML(page.body)
78
+ xml || html
79
+ end
80
+
81
+ def xml
82
+ if @xml.nil?
83
+ @xml = parse_xml
84
+ else
85
+ @xml
86
+ end
87
+ end
88
+
89
+ def html
90
+ Nokogiri.HTML(body)
91
+ end
92
+
93
+ def parse_xml
94
+ Nokogiri.XML(body) do |config|
95
+ config.options = Nokogiri::XML::ParseOptions::STRICT | Nokogiri::XML::ParseOptions::NOENT
96
+ end
97
+ rescue
98
+ false
31
99
  end
32
100
  end
33
101
  end
@@ -1,3 +1,3 @@
1
1
  class FeedSearcher
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -1,50 +1,111 @@
1
1
  require "spec_helper"
2
+ require "active_support/core_ext/string/strip"
2
3
 
3
4
  describe FeedSearcher do
4
5
  describe ".search" do
5
- before do
6
- stub_request(:get, "http://example.com/").to_return(
7
- :body => <<-EOF
8
- <!DOCTYPE HTML>
9
- <html>
10
- <head>
11
- <meta charset="UTF-8">
12
- <link href="http://example.com/1" rel="alternate" type="application/atom+xml" />
13
- <link href="http://example.com/2" rel="alternate" type="application/rdf+xml" />
14
- <link href="http://example.com/3" rel="alternate" type="application/rss+xml" />
15
- <link href="http://example.com/4" rel="alternate" type="application/xml" />
16
- <link href="http://example.com/5" rel="resource" type="application/rss+xml" />
17
- <link href="http://www.example.com/6" rel="alternate" type="application/rss+xml" />
18
- <link href="http://other-example.com/7" rel="alternate" type="application/rss+xml" />
19
- <link href="/8" rel="alternate" type="application/rss+xml" />
20
- </head>
21
- <body>
22
- body
23
- </body>
24
- </html>
25
- EOF
26
- )
6
+ context "when the specified resource is HTML" do
7
+ before do
8
+ stub_request(:get, "http://example.com/").to_return(
9
+ :body => <<-EOS.strip_heredoc
10
+ <!DOCTYPE HTML>
11
+ <html>
12
+ <head>
13
+ <meta charset="UTF-8">
14
+ <link href="http://example.com/1" rel="alternate" type="application/atom+xml" />
15
+ <link href="http://example.com/2" rel="alternate" type="application/rdf+xml" />
16
+ <link href="http://example.com/3" rel="alternate" type="application/rss+xml" />
17
+ <link href="http://example.com/4" rel="alternate" type="application/xml" />
18
+ <link href="http://example.com/5" rel="resource" type="application/rss+xml" />
19
+ <link href="http://www.example.com/6" rel="alternate" type="application/rss+xml" />
20
+ <link href="http://other-example.com/7" rel="alternate" type="application/rss+xml" />
21
+ <link href="/8" rel="alternate" type="application/rss+xml" />
22
+ </head>
23
+ <body>
24
+ body
25
+ </body>
26
+ </html>
27
+ EOS
28
+ )
29
+ end
30
+
31
+ # This example makes sure the following specifications.
32
+ #
33
+ # * it recognizes application/atom+xml
34
+ # * it recognizes application/rdf+xml
35
+ # * it recognizes application/rss+xml
36
+ # * it does not recognize application/xml
37
+ # * it keeps subdomain
38
+ # * it keeps other domain
39
+ # * it converts relative path to absolute url
40
+ #
41
+ it "returns feed URLs from link elements in the specified resource" do
42
+ FeedSearcher.search("http://example.com/").should == %w[
43
+ http://example.com/1
44
+ http://example.com/2
45
+ http://example.com/3
46
+ http://www.example.com/6
47
+ http://other-example.com/7
48
+ http://example.com/8
49
+ ]
50
+ end
27
51
  end
28
52
 
29
- # This example makes sure the following specifications.
30
- #
31
- # * it recognizes application/atom+xml
32
- # * it recognizes application/rdf+xml
33
- # * it recognizes application/rss+xml
34
- # * it does not recognizes application/xml
35
- # * it keeps subdomain
36
- # * it keeps other domain
37
- # * it converts absolute url
38
- #
39
- it "returns feed URLs from given URL" do
40
- FeedSearcher.search("http://example.com/").should == %w[
41
- http://example.com/1
42
- http://example.com/2
43
- http://example.com/3
44
- http://www.example.com/6
45
- http://other-example.com/7
46
- http://example.com/8
47
- ]
53
+ context "when the specified resource has feed MIME type and be parsable as XML" do
54
+ before do
55
+ stub_request(:get, "http://example.com/").to_return(
56
+ :headers => { "Content-Type" => "application/rss+xml; charset=UTF-8" },
57
+ :body => <<-EOS.strip_heredoc
58
+ <?xml version="1.0" encoding="UTF-8"?>
59
+ <rss>
60
+ <channel>
61
+ <title>title</title>
62
+ <link>http://exmple.com/</link>
63
+ <item>
64
+ <title>item title</title>
65
+ <link>http://example.com/item</link>
66
+ <description>item description</description>
67
+ </item>
68
+ </channel>
69
+ </rss>
70
+ EOS
71
+ )
72
+ end
73
+
74
+ it "returns itself as a feed url" do
75
+ FeedSearcher.search("http://example.com/").should == %w[
76
+ http://example.com/
77
+ ]
78
+ end
79
+ end
80
+
81
+ context "when the specified resource has feed extension and be parsable as XML" do
82
+ before do
83
+ stub_request(:get, "http://example.com/feed.atom").to_return(
84
+ :body => <<-EOS.strip_heredoc
85
+ <?xml version="1.0" encoding="UTF-8"?>
86
+ <feed xmlns="http://www.w3.org/2005/Atom">
87
+ <title>title</title>
88
+ <link rel="self" href="http://example.com/1"/>
89
+ <link rel="alternate" href="http://example.com/"/>
90
+ <entry>
91
+ <title>item title</title>
92
+ <link rel="alternate" href="http://example.com/"/>
93
+ <content type="html">
94
+ <div xmlns="http://www.w3.org/1999/xhtml">
95
+ <p>item content</p>
96
+ </div>
97
+ </content>
98
+ </entry>
99
+ </feed>
100
+ EOS
101
+ )
102
+ end
103
+
104
+ it "returns itself as a feed url" do
105
+ FeedSearcher.search("http://example.com/feed.atom").should == %w[
106
+ http://example.com/feed.atom
107
+ ]
108
+ end
48
109
  end
49
110
  end
50
111
  end
@@ -0,0 +1,15 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <feed xmlns="http://www.w3.org/2005/Atom">
3
+ <title>title</title>
4
+ <link rel="self" href="http://example.com/1"/>
5
+ <link rel="alternate" href="http://example.com/"/>
6
+ <entry>
7
+ <title>item title</title>
8
+ <link rel="alternate" href="http://example.com/"/>
9
+ <content type="html">
10
+ <div xmlns="http://www.w3.org/1999/xhtml">
11
+ <p>item content</p>
12
+ </div>
13
+ </content>
14
+ </entry>
15
+ </feed>
@@ -0,0 +1,18 @@
1
+ <!DOCTYPE HTML>
2
+ <html>
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <link href="http://example.com/1" rel="alternate" type="application/atom+xml" />
6
+ <link href="http://example.com/2" rel="alternate" type="application/rdf+xml" />
7
+ <link href="http://example.com/3" rel="alternate" type="application/rss+xml" />
8
+ <link href="http://example.com/4" rel="alternate" type="application/xml" />
9
+ <link href="http://example.com/5" rel="resource" type="application/rss+xml" />
10
+ <link href="http://www.example.com/6" rel="alternate" type="application/rss+xml" />
11
+ <link href="http://other-example.com/7" rel="alternate" type="application/rss+xml" />
12
+ <link href="/8" rel="alternate" type="application/rss+xml" />
13
+ </head>
14
+ <body>
15
+ body
16
+ </body>
17
+ </html>
18
+
@@ -0,0 +1,12 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <rss>
3
+ <channel>
4
+ <title>title</title>
5
+ <link>http://exmple.com/</link>
6
+ <item>
7
+ <title>item title</title>
8
+ <link>http://example.com/item</link>
9
+ <description>item description</description>
10
+ </item>
11
+ </channel>
12
+ </rss>
data/spec/spec_helper.rb CHANGED
@@ -1,3 +1,6 @@
1
+ require "simplecov"
2
+ SimpleCov.start
3
+
1
4
  $LOAD_PATH.unshift File.expand_path("../../lib/feed_searcher", __FILE__)
2
5
  require "feed_searcher"
3
6
  require "webmock/rspec"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feed_searcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-17 00:00:00.000000000 Z
12
+ date: 2013-03-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
@@ -43,6 +43,22 @@ dependencies:
43
43
  - - ! '>='
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: activesupport
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
46
62
  - !ruby/object:Gem::Dependency
47
63
  name: bundler
48
64
  requirement: !ruby/object:Gem::Requirement
@@ -59,6 +75,22 @@ dependencies:
59
75
  - - ~>
60
76
  - !ruby/object:Gem::Version
61
77
  version: '1.3'
78
+ - !ruby/object:Gem::Dependency
79
+ name: pry
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
62
94
  - !ruby/object:Gem::Dependency
63
95
  name: rake
64
96
  requirement: !ruby/object:Gem::Requirement
@@ -91,6 +123,22 @@ dependencies:
91
123
  - - ! '>='
92
124
  - !ruby/object:Gem::Version
93
125
  version: 2.13.0
126
+ - !ruby/object:Gem::Dependency
127
+ name: simplecov
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
94
142
  - !ruby/object:Gem::Dependency
95
143
  name: webmock
96
144
  requirement: !ruby/object:Gem::Requirement
@@ -115,6 +163,7 @@ extensions: []
115
163
  extra_rdoc_files: []
116
164
  files:
117
165
  - .gitignore
166
+ - .travis.yml
118
167
  - Gemfile
119
168
  - LICENSE.txt
120
169
  - README.md
@@ -125,6 +174,9 @@ files:
125
174
  - lib/feed_searcher/page.rb
126
175
  - lib/feed_searcher/version.rb
127
176
  - spec/feed_searcher_spec.rb
177
+ - spec/fixtures/example.atom
178
+ - spec/fixtures/example.html
179
+ - spec/fixtures/example.rss
128
180
  - spec/spec_helper.rb
129
181
  homepage: https://github.com/r7kamura/feed_searcher
130
182
  licenses:
@@ -153,5 +205,8 @@ specification_version: 3
153
205
  summary: Search RSS feed URLs from the given URL
154
206
  test_files:
155
207
  - spec/feed_searcher_spec.rb
208
+ - spec/fixtures/example.atom
209
+ - spec/fixtures/example.html
210
+ - spec/fixtures/example.rss
156
211
  - spec/spec_helper.rb
157
212
  has_rdoc: