metainspector 1.3.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,5 +1,7 @@
1
1
  *.gem
2
2
  .bundle
3
3
  .rvmrc
4
+ .rspec
4
5
  Gemfile.lock
5
6
  pkg/*
7
+ .idea/
data/.rspec.example ADDED
@@ -0,0 +1 @@
1
+ --colour --format d
data/README.rdoc CHANGED
@@ -31,6 +31,7 @@ Then you can see the scraped data like this:
31
31
  page.meta_description # meta description, as string
32
32
  page.meta_keywords # meta keywords, as string
33
33
  page.image # Most relevant image, if defined with og:image
34
+ page.rss # Get rss or atom links in meta data fields as array
34
35
 
35
36
  MetaInspector uses dynamic methods for meta_tag discovery, so all these will work, and will be converted to a search of a meta tag by the corresponding name, and return its content attribute
36
37
 
@@ -84,7 +85,10 @@ You can find some sample scripts on the samples folder, including a basic scrapi
84
85
 
85
86
  = ZOMG Fork! Thank you!
86
87
 
87
- You're welcome to fork this project and send pull requests. I want to thank Ryan Romanchuk for his help https://github.com/rromanchuk
88
+ You're welcome to fork this project and send pull requests. I want to thank specially:
89
+
90
+ * Ryan Romanchuk https://github.com/rromanchuk
91
+ * Edmund Haselwanter https://github.com/ehaselwanter
88
92
 
89
93
  = To Do
90
94
 
data/Rakefile CHANGED
@@ -1,2 +1,5 @@
1
1
  require 'bundler'
2
+ require 'rspec/core/rake_task'
3
+
2
4
  Bundler::GemHelper.install_tasks
5
+ RSpec::Core::RakeTask.new :spec
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require_relative 'meta_inspector/scraper'
3
+ require File.expand_path(File.join(File.dirname(__FILE__), 'meta_inspector/scraper'))
4
4
 
5
5
  module MetaInspector
6
6
  extend self
@@ -28,6 +28,15 @@ module MetaInspector
28
28
  @links ||= parsed_document.search("//a").map {|link| link.attributes["href"].to_s.strip} rescue nil
29
29
  end
30
30
 
31
+ # Returns the parsed document meta rss links
32
+ def feed
33
+ @feed ||= parsed_document.xpath("//link").select{ |link|
34
+ link.attributes["type"] && link.attributes["type"].value =~ /(atom|rss)/
35
+ }.map { |link|
36
+ absolutify_url(link.attributes["href"].value)
37
+ }.first rescue nil
38
+ end
39
+
31
40
  # Returns the parsed image from Facebook's open graph property tags
32
41
  # Most all major websites now define this property and is usually very relevant
33
42
  # See doc at http://developers.facebook.com/docs/opengraph/
@@ -81,5 +90,11 @@ module MetaInspector
81
90
  super
82
91
  end
83
92
  end
93
+
94
+ private
95
+
96
+ def absolutify_url(url)
97
+ url =~ /^http.*/ ? url : File.join(@url,url)
98
+ end
84
99
  end
85
100
  end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.3.0"
4
+ VERSION = "1.4.0"
5
5
  end
@@ -22,5 +22,5 @@ Gem::Specification.new do |s|
22
22
  s.add_dependency 'nokogiri', '1.4.4'
23
23
  s.add_dependency 'charguess', '1.3.20110226181011'
24
24
 
25
- s.add_development_dependency 'rspec', '2.5.0'
25
+ s.add_development_dependency 'rspec', '~> 2.6.0'
26
26
  end
@@ -37,7 +37,7 @@ describe MetaInspector do
37
37
  end
38
38
 
39
39
  it "should get the links" do
40
- @m.links.size.should == 8
40
+ @m.links.size.should == 9
41
41
  end
42
42
 
43
43
  it "should have a Nokogiri::HTML::Document as parsed_document" do
@@ -47,6 +47,16 @@ describe MetaInspector do
47
47
  it "should have a String as document" do
48
48
  @m.document.class.should == String
49
49
  end
50
+
51
+ it "should get rss feed" do
52
+ @m = MetaInspector.new('http://www.iteh.at')
53
+ @m.feed.should == 'http://www.iteh.at/de/rss/'
54
+ end
55
+
56
+ it "should get atom feed" do
57
+ @m = MetaInspector.new('http://www.tea-tron.com/jbravo/blog/')
58
+ @m.feed.should == 'http://www.tea-tron.com/jbravo/blog/feed/'
59
+ end
50
60
  end
51
61
 
52
62
  context 'Getting meta tags by ghost methods' do
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 1
7
- - 3
7
+ - 4
8
8
  - 0
9
- version: 1.3.0
9
+ version: 1.4.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jaime Iniesta
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-05-09 00:00:00 +02:00
17
+ date: 2011-05-30 00:00:00 +02:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -53,13 +53,13 @@ dependencies:
53
53
  requirement: &id003 !ruby/object:Gem::Requirement
54
54
  none: false
55
55
  requirements:
56
- - - "="
56
+ - - ~>
57
57
  - !ruby/object:Gem::Version
58
58
  segments:
59
59
  - 2
60
- - 5
60
+ - 6
61
61
  - 0
62
- version: 2.5.0
62
+ version: 2.6.0
63
63
  type: :development
64
64
  version_requirements: *id003
65
65
  description: MetaInspector lets you scrape a web page and get its title, charset, link and meta tags
@@ -73,6 +73,7 @@ extra_rdoc_files: []
73
73
 
74
74
  files:
75
75
  - .gitignore
76
+ - .rspec.example
76
77
  - Gemfile
77
78
  - MIT-LICENSE
78
79
  - README.rdoc