metainspector 1.4.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,6 +5,9 @@ require File.join(File.dirname(__FILE__), "/spec_helper")
5
5
  describe MetaInspector do
6
6
 
7
7
  context 'Initialization' do
8
+
9
+ FakeWeb.register_uri(:get, "http://pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
10
+
8
11
  it 'should accept an URL with a scheme' do
9
12
  @m = MetaInspector.new('http://pagerankalert.com')
10
13
  @m.url.should == 'http://pagerankalert.com'
@@ -17,6 +20,12 @@ describe MetaInspector do
17
20
  end
18
21
 
19
22
  context 'Doing a basic scrape' do
23
+
24
+ FakeWeb.register_uri(:get, "http://pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
25
+ FakeWeb.register_uri(:get, "http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/", :response => fixture_file("theonion.com.response"))
26
+ FakeWeb.register_uri(:get, "http://www.iteh.at", :response => fixture_file("iteh.at.response"))
27
+ FakeWeb.register_uri(:get, "http://www.tea-tron.com/jbravo/blog/", :response => fixture_file("tea-tron.com.response"))
28
+
20
29
  EXPECTED_TITLE = 'PageRankAlert.com :: Track your PageRank changes'
21
30
 
22
31
  before(:each) do
@@ -36,10 +45,6 @@ describe MetaInspector do
36
45
  @m.image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
37
46
  end
38
47
 
39
- it "should get the links" do
40
- @m.links.size.should == 9
41
- end
42
-
43
48
  it "should have a Nokogiri::HTML::Document as parsed_document" do
44
49
  @m.parsed_document.class.should == Nokogiri::HTML::Document
45
50
  end
@@ -59,6 +64,36 @@ describe MetaInspector do
59
64
  end
60
65
  end
61
66
 
67
+ context 'Links' do
68
+ before(:each) do
69
+ @m = MetaInspector.new('http://pagerankalert.com')
70
+ end
71
+
72
+ it "should get the links" do
73
+ @m.links.should == [
74
+ "/",
75
+ "/es?language=es",
76
+ "/users/sign_up",
77
+ "/users/sign_in",
78
+ "http://pagerankalert.posterous.com",
79
+ "http://twitter.com/pagerankalert",
80
+ "http://twitter.com/share"
81
+ ]
82
+ end
83
+
84
+ it "should convert links to absolute urls" do
85
+ @m.absolute_links.should == [
86
+ "http://pagerankalert.com/",
87
+ "http://pagerankalert.com/es?language=es",
88
+ "http://pagerankalert.com/users/sign_up",
89
+ "http://pagerankalert.com/users/sign_in",
90
+ "http://pagerankalert.posterous.com",
91
+ "http://twitter.com/pagerankalert",
92
+ "http://twitter.com/share"
93
+ ]
94
+ end
95
+ end
96
+
62
97
  context 'Getting meta tags by ghost methods' do
63
98
  before(:each) do
64
99
  @m = MetaInspector.new('http://pagerankalert.com')
@@ -97,6 +132,10 @@ describe MetaInspector do
97
132
  end
98
133
 
99
134
  context 'Charset detection' do
135
+
136
+ FakeWeb.register_uri(:get, "http://www.pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
137
+ FakeWeb.register_uri(:get, "http://www.alazan.com", :response => fixture_file("alazan.com.response"))
138
+
100
139
  it "should detect windows-1252 charset" do
101
140
  @m = MetaInspector.new('http://www.alazan.com')
102
141
  @m.charset.should == "windows-1252"
data/spec/spec_helper.rb CHANGED
@@ -1,4 +1,13 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  $: << File.join(File.dirname(__FILE__), "/../lib")
4
- require 'meta_inspector'
4
+ require 'meta_inspector'
5
+ require 'fakeweb'
6
+
7
+ FakeWeb.allow_net_connect = false
8
+
9
+ def fixture_file(filename)
10
+ return '' if filename == ''
11
+ file_path = File.expand_path(File.dirname(__FILE__) + '/fixtures/' + filename)
12
+ File.read(file_path)
13
+ end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 1
7
- - 4
7
+ - 5
8
8
  - 0
9
- version: 1.4.0
9
+ version: 1.5.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jaime Iniesta
@@ -62,6 +62,21 @@ dependencies:
62
62
  version: 2.6.0
63
63
  type: :development
64
64
  version_requirements: *id003
65
+ - !ruby/object:Gem::Dependency
66
+ name: fakeweb
67
+ prerelease: false
68
+ requirement: &id004 !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ~>
72
+ - !ruby/object:Gem::Version
73
+ segments:
74
+ - 1
75
+ - 3
76
+ - 0
77
+ version: 1.3.0
78
+ type: :development
79
+ version_requirements: *id004
65
80
  description: MetaInspector lets you scrape a web page and get its title, charset, link and meta tags
66
81
  email:
67
82
  - jaimeiniesta@gmail.com
@@ -85,10 +100,15 @@ files:
85
100
  - meta_inspector.gemspec
86
101
  - samples/basic_scraping.rb
87
102
  - samples/spider.rb
103
+ - spec/fixtures/alazan.com.response
104
+ - spec/fixtures/iteh.at.response
105
+ - spec/fixtures/pagerankalert.com.response
106
+ - spec/fixtures/tea-tron.com.response
107
+ - spec/fixtures/theonion.com.response
88
108
  - spec/metainspector_spec.rb
89
109
  - spec/spec_helper.rb
90
110
  has_rdoc: true
91
- homepage: https://rubygems.org/gems/metainspector
111
+ homepage: https://github.com/jaimeiniesta/metainspector
92
112
  licenses: []
93
113
 
94
114
  post_install_message:
@@ -120,5 +140,10 @@ signing_key:
120
140
  specification_version: 3
121
141
  summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash with metadata from a given URL
122
142
  test_files:
143
+ - spec/fixtures/alazan.com.response
144
+ - spec/fixtures/iteh.at.response
145
+ - spec/fixtures/pagerankalert.com.response
146
+ - spec/fixtures/tea-tron.com.response
147
+ - spec/fixtures/theonion.com.response
123
148
  - spec/metainspector_spec.rb
124
149
  - spec/spec_helper.rb