rss_detector 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ vendor/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rss_detector.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 yukihir0
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ # RssDetector
2
+
3
+ 'rss_detector' provides feature for feed detection from html document.
4
+
5
+ ## Install
6
+
7
+ ```
8
+ gem install 'rss_detector'
9
+ ```
10
+
11
+ or
12
+
13
+ ```
14
+ gem 'rss_detector'
15
+ ```
16
+
17
+ in your Gemfile.
18
+
19
+ ## How to use
20
+
21
+ ```
22
+ feed = RSSDetector::detect(html)
23
+ ```
24
+
25
+ For more information, please see [here](https://github.com/yukihir0/rss_detector/blob/master/sample/sample.rb)
26
+
27
+ ## License
28
+
29
+ Copyright © 2013 yukihir0
@@ -0,0 +1,9 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ Bundler.setup
4
+ require 'rspec/core/rake_task'
5
+
6
+ desc "run spec"
7
+ RSpec::Core::RakeTask.new(:spec) do |t|
8
+ t.rspec_opts = ["-c", "-fs"]
9
+ end
@@ -0,0 +1,41 @@
1
+ require "rss_detector/version"
2
+ require 'nokogiri'
3
+
4
+ class RSSDetector
5
+ # xpath for feed
6
+ RSS_XPATH = '//link[@rel="alternate"][@type="application/rss+xml"]'
7
+ ATOM_XPATH = '//link[@rel="alternate"][@type="application/atom+xml"]'
8
+
9
+ public
10
+ def RSSDetector::detect(doc)
11
+
12
+ # create html from string
13
+ html = Nokogiri::HTML(doc)
14
+
15
+ # detect rss and atom
16
+ @rss_feeds = detectFeed(html, RSS_XPATH)
17
+ @atom_feeds = detectFeed(html, ATOM_XPATH)
18
+
19
+ return @rss_feeds + @atom_feeds
20
+ end
21
+
22
+ private
23
+ def RSSDetector::detectFeed(html, feed_xpath)
24
+
25
+ # feed list
26
+ @feeds = Array.new
27
+
28
+ # discover feed
29
+ html.xpath(feed_xpath).each do |link|
30
+
31
+ # get feed title and url
32
+ @feed_title = link.attribute("title")
33
+ @feed_url = link.attribute("href")
34
+
35
+ # push hash to array
36
+ @feeds << {title: @feed_title, url: @feed_url}
37
+ end
38
+
39
+ return @feeds
40
+ end
41
+ end
@@ -0,0 +1,3 @@
1
+ module RssDetector
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'rss_detector/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "rss_detector"
8
+ gem.version = RssDetector::VERSION
9
+ gem.authors = ["yukihir0"]
10
+ gem.email = ["yukihiro.cotori@gmail.com"]
11
+ gem.description = %q{'rss_detector' provides feature for feed detection from html document.}
12
+ gem.summary = %q{'rss_detector' provides feature for feed detection from html document.}
13
+ gem.homepage = "https://github.com/yukihir0/rss_detector"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_dependency "nokogiri", "~>1.5"
21
+ gem.add_development_dependency "rspec"
22
+ end
@@ -0,0 +1,36 @@
1
+ # coding: utf-8
2
+ require 'rubygems'
3
+ require 'open-uri'
4
+ require 'rss_detector'
5
+
6
+ def get_html(url)
7
+ html = nil
8
+
9
+ begin
10
+ html = open(url).read
11
+ rescue OpenURI::HTTPError => ex
12
+ if ex.io.status[0] == '304'
13
+ warn ex.message
14
+ else
15
+ raise ex
16
+ end
17
+ end
18
+
19
+ return html
20
+ end
21
+
22
+ def print_feed(url, feeds)
23
+ header = "---------- #{url} ----------"
24
+ footer = '-' * header.length + "\n\n"
25
+
26
+ puts header
27
+ feeds.each { |feed| puts "#{feed[:title]} : #{feed[:url]}" }
28
+ puts footer
29
+ end
30
+
31
+
32
+ # main
33
+ url = 'http://d.hatena.ne.jp/yukihir0/'
34
+ html = get_html(url)
35
+ feed = RSSDetector::detect(html)
36
+ print_feed(url, feed)
@@ -0,0 +1,118 @@
1
+ # coding: utf-8
2
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
+
4
+ describe RSSDetector do
5
+ context "init" do
6
+ describe "#detect" do
7
+ context "nil input" do
8
+ it "should be no feed" do
9
+ @feeds = RSSDetector::detect(nil)
10
+ @feeds.size.should == 0
11
+ end
12
+ end
13
+
14
+ context "null string input" do
15
+ it "should be no feed" do
16
+ @feeds = RSSDetector::detect("")
17
+ @feeds.size.should == 0
18
+ end
19
+ end
20
+
21
+ context "no contain feed input" do
22
+ it "should be no feed" do
23
+ input = <<-EOS
24
+ <html><head>
25
+ </head></html>
26
+ EOS
27
+
28
+ @feeds = RSSDetector::detect(input)
29
+ @feeds.size.should == 0
30
+ end
31
+ end
32
+
33
+ context "contain 1 rss feed input" do
34
+ it "should be 1 rss feed" do
35
+ input = <<-EOS
36
+ <html><head>
37
+ <link rel="alternate" type="application/rss+xml" title="test_rss_feed_title" href="http://test_rss_feed_url/"/>
38
+ </head></html>
39
+ EOS
40
+
41
+ @feeds = RSSDetector::detect(input)
42
+ @feeds.length.should == 1
43
+ @feeds[0][:title].to_s.should == "test_rss_feed_title"
44
+ @feeds[0][:url].to_s.should == "http://test_rss_feed_url/"
45
+ end
46
+ end
47
+
48
+ context "contain 2 rss feed input" do
49
+ it "should be 2 rss feed" do
50
+ input = <<-EOS
51
+ <html><head>
52
+ <link rel="alternate" type="application/rss+xml" title="test_rss_feed_title_1" href="http://test_rss_feed_url_1/"/>
53
+ <link rel="alternate" type="application/rss+xml" title="test_rss_feed_title_2" href="http://test_rss_feed_url_2/"/>
54
+ </head></html>
55
+ EOS
56
+
57
+ @feeds = RSSDetector::detect(input)
58
+ @feeds.length.should == 2
59
+ @feeds[0][:title].to_s.should == "test_rss_feed_title_1"
60
+ @feeds[0][:url].to_s.should == "http://test_rss_feed_url_1/"
61
+ @feeds[1][:title].to_s.should == "test_rss_feed_title_2"
62
+ @feeds[1][:url].to_s.should == "http://test_rss_feed_url_2/"
63
+ end
64
+ end
65
+
66
+ context "contain 1 atom feed input" do
67
+ it "should be 1 atom feed" do
68
+ input = <<-EOS
69
+ <html><head>
70
+ <link rel="alternate" type="application/atom+xml" title="test_atom_feed_title" href="http://test_atom_feed_url/"/>
71
+ </head></html>
72
+ EOS
73
+
74
+ @feeds = RSSDetector::detect(input)
75
+ @feeds.length.should == 1
76
+ @feeds[0][:title].to_s.should == "test_atom_feed_title"
77
+ @feeds[0][:url].to_s.should == "http://test_atom_feed_url/"
78
+ end
79
+ end
80
+
81
+ context "contain 2 atom feed input" do
82
+ it "should be 2 atom feed" do
83
+ input = <<-EOS
84
+ <html><head>
85
+ <link rel="alternate" type="application/atom+xml" title="test_atom_feed_title_1" href="http://test_atom_feed_url_1/"/>
86
+ <link rel="alternate" type="application/atom+xml" title="test_atom_feed_title_2" href="http://test_atom_feed_url_2/"/>
87
+ </head></html>
88
+ EOS
89
+
90
+ @feeds = RSSDetector::detect(input)
91
+ @feeds.length.should == 2
92
+ @feeds[0][:title].to_s.should == "test_atom_feed_title_1"
93
+ @feeds[0][:url].to_s.should == "http://test_atom_feed_url_1/"
94
+ @feeds[1][:title].to_s.should == "test_atom_feed_title_2"
95
+ @feeds[1][:url].to_s.should == "http://test_atom_feed_url_2/"
96
+ end
97
+ end
98
+
99
+ context "contain rss and atom feed input" do
100
+ it "should be rss and atom feed" do
101
+ input = <<-EOS
102
+ <html><head>
103
+ <link rel="alternate" type="application/rss+xml" title="test_rss_feed_title" href="http://test_rss_feed_url/"/>
104
+ <link rel="alternate" type="application/atom+xml" title="test_atom_feed_title" href="http://test_atom_feed_url/"/>
105
+ </head></html>
106
+ EOS
107
+
108
+ @feeds = RSSDetector::detect(input)
109
+ @feeds.length.should == 2
110
+ @feeds[0][:title].to_s.should == "test_rss_feed_title"
111
+ @feeds[0][:url].to_s.should == "http://test_rss_feed_url/"
112
+ @feeds[1][:title].to_s.should == "test_atom_feed_title"
113
+ @feeds[1][:url].to_s.should == "http://test_atom_feed_url/"
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,3 @@
1
+ # coding: utf-8
2
+ require 'rubygems'
3
+ require 'rss_detector'
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rss_detector
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - yukihir0
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.5'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.5'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: ! '''rss_detector'' provides feature for feed detection from html document.'
47
+ email:
48
+ - yukihiro.cotori@gmail.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - Gemfile
55
+ - LICENSE.txt
56
+ - README.md
57
+ - Rakefile
58
+ - lib/rss_detector.rb
59
+ - lib/rss_detector/version.rb
60
+ - rss_detector.gemspec
61
+ - sample/sample.rb
62
+ - spec/rss_detector_spec.rb
63
+ - spec/spec_helper.rb
64
+ homepage: https://github.com/yukihir0/rss_detector
65
+ licenses: []
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ segments:
77
+ - 0
78
+ hash: -2779202382641332666
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ! '>='
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ segments:
86
+ - 0
87
+ hash: -2779202382641332666
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 1.8.24
91
+ signing_key:
92
+ specification_version: 3
93
+ summary: ! '''rss_detector'' provides feature for feed detection from html document.'
94
+ test_files:
95
+ - spec/rss_detector_spec.rb
96
+ - spec/spec_helper.rb