metainspector 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -1,5 +1,7 @@
1
1
  *.gem
2
2
  .bundle
3
3
  .rvmrc
4
+ .rspec
4
5
  Gemfile.lock
5
6
  pkg/*
7
+ .idea/
data/.rspec.example ADDED
@@ -0,0 +1 @@
1
+ --colour --format d
data/README.rdoc CHANGED
@@ -31,6 +31,7 @@ Then you can see the scraped data like this:
31
31
  page.meta_description # meta description, as string
32
32
  page.meta_keywords # meta keywords, as string
33
33
  page.image # Most relevant image, if defined with og:image
34
+ page.rss # Get rss or atom links in meta data fields as array
34
35
 
35
36
  MetaInspector uses dynamic methods for meta_tag discovery, so all these will work, and will be converted to a search of a meta tag by the corresponding name, and return its content attribute
36
37
 
@@ -84,7 +85,10 @@ You can find some sample scripts on the samples folder, including a basic scrapi
84
85
 
85
86
  = ZOMG Fork! Thank you!
86
87
 
87
- You're welcome to fork this project and send pull requests. I want to thank Ryan Romanchuk for his help https://github.com/rromanchuk
88
+ You're welcome to fork this project and send pull requests. I want to thank specially:
89
+
90
+ * Ryan Romanchuk https://github.com/rromanchuk
91
+ * Edmund Haselwanter https://github.com/ehaselwanter
88
92
 
89
93
  = To Do
90
94
 
data/Rakefile CHANGED
@@ -1,2 +1,5 @@
1
1
  require 'bundler'
2
+ require 'rspec/core/rake_task'
3
+
2
4
  Bundler::GemHelper.install_tasks
5
+ RSpec::Core::RakeTask.new :spec
@@ -1,6 +1,6 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
- require_relative 'meta_inspector/scraper'
3
+ require File.expand_path(File.join(File.dirname(__FILE__), 'meta_inspector/scraper'))
4
4
 
5
5
  module MetaInspector
6
6
  extend self
@@ -28,6 +28,15 @@ module MetaInspector
28
28
  @links ||= parsed_document.search("//a").map {|link| link.attributes["href"].to_s.strip} rescue nil
29
29
  end
30
30
 
31
+ # Returns the parsed document meta rss links
32
+ def feed
33
+ @feed ||= parsed_document.xpath("//link").select{ |link|
34
+ link.attributes["type"] && link.attributes["type"].value =~ /(atom|rss)/
35
+ }.map { |link|
36
+ absolutify_url(link.attributes["href"].value)
37
+ }.first rescue nil
38
+ end
39
+
31
40
  # Returns the parsed image from Facebook's open graph property tags
32
41
  # Most all major websites now define this property and is usually very relevant
33
42
  # See doc at http://developers.facebook.com/docs/opengraph/
@@ -81,5 +90,11 @@ module MetaInspector
81
90
  super
82
91
  end
83
92
  end
93
+
94
+ private
95
+
96
+ def absolutify_url(url)
97
+ url =~ /^http.*/ ? url : File.join(@url,url)
98
+ end
84
99
  end
85
100
  end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.3.0"
4
+ VERSION = "1.4.0"
5
5
  end
@@ -22,5 +22,5 @@ Gem::Specification.new do |s|
22
22
  s.add_dependency 'nokogiri', '1.4.4'
23
23
  s.add_dependency 'charguess', '1.3.20110226181011'
24
24
 
25
- s.add_development_dependency 'rspec', '2.5.0'
25
+ s.add_development_dependency 'rspec', '~> 2.6.0'
26
26
  end
@@ -37,7 +37,7 @@ describe MetaInspector do
37
37
  end
38
38
 
39
39
  it "should get the links" do
40
- @m.links.size.should == 8
40
+ @m.links.size.should == 9
41
41
  end
42
42
 
43
43
  it "should have a Nokogiri::HTML::Document as parsed_document" do
@@ -47,6 +47,16 @@ describe MetaInspector do
47
47
  it "should have a String as document" do
48
48
  @m.document.class.should == String
49
49
  end
50
+
51
+ it "should get rss feed" do
52
+ @m = MetaInspector.new('http://www.iteh.at')
53
+ @m.feed.should == 'http://www.iteh.at/de/rss/'
54
+ end
55
+
56
+ it "should get atom feed" do
57
+ @m = MetaInspector.new('http://www.tea-tron.com/jbravo/blog/')
58
+ @m.feed.should == 'http://www.tea-tron.com/jbravo/blog/feed/'
59
+ end
50
60
  end
51
61
 
52
62
  context 'Getting meta tags by ghost methods' do
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 1
7
- - 3
7
+ - 4
8
8
  - 0
9
- version: 1.3.0
9
+ version: 1.4.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jaime Iniesta
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-05-09 00:00:00 +02:00
17
+ date: 2011-05-30 00:00:00 +02:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -53,13 +53,13 @@ dependencies:
53
53
  requirement: &id003 !ruby/object:Gem::Requirement
54
54
  none: false
55
55
  requirements:
56
- - - "="
56
+ - - ~>
57
57
  - !ruby/object:Gem::Version
58
58
  segments:
59
59
  - 2
60
- - 5
60
+ - 6
61
61
  - 0
62
- version: 2.5.0
62
+ version: 2.6.0
63
63
  type: :development
64
64
  version_requirements: *id003
65
65
  description: MetaInspector lets you scrape a web page and get its title, charset, link and meta tags
@@ -73,6 +73,7 @@ extra_rdoc_files: []
73
73
 
74
74
  files:
75
75
  - .gitignore
76
+ - .rspec.example
76
77
  - Gemfile
77
78
  - MIT-LICENSE
78
79
  - README.rdoc