metainspector 1.15.4 → 1.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +2 -1
- data/MIT-LICENSE +1 -1
- data/README.md +27 -4
- data/lib/meta_inspector/scraper.rb +9 -6
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +1 -1
- data/spec/metainspector_spec.rb +20 -1
- metadata +8 -32
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f52d3692c9305059affd0cecbe31856a9c67ef08
|
4
|
+
data.tar.gz: 9936b40ceb9430f22c1a77aeb57004e90c7dd3c4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 25dc0a75ee2c5a464c781de219cfc34744d5853bc96c4bc6b996ca7860bf093e990982e333650e9fc4cbc21d27747d4cb480bc8d506e0890c91a9d7d21f6d3bf
|
7
|
+
data.tar.gz: c278db782849963eecb68b37195f50ea5661d363574182930926f300f2182bb99062ec1dda42faba8833b1465cef2b3f9371da2a51c146990424e39b0466c334
|
data/.travis.yml
CHANGED
data/MIT-LICENSE
CHANGED
data/README.md
CHANGED
@@ -16,7 +16,7 @@ If you're using it on a Rails application, just add it to your Gemfile and run `
|
|
16
16
|
|
17
17
|
gem 'metainspector'
|
18
18
|
|
19
|
-
This gem is tested on Ruby versions 1.9.2
|
19
|
+
This gem is tested on Ruby versions 1.9.2, 1.9.3 and 2.0.0.
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
@@ -50,8 +50,6 @@ Then you can see the scraped data like this:
|
|
50
50
|
page.image # Most relevant image, if defined with og:image
|
51
51
|
page.images # array of strings, with every img found on the page as an absolute URL
|
52
52
|
page.feed # Get rss or atom links in meta data fields as array
|
53
|
-
page.meta_og_title # opengraph title
|
54
|
-
page.meta_og_image # opengraph image
|
55
53
|
page.charset # UTF-8
|
56
54
|
page.content_type # content-type returned by the server when the url was requested
|
57
55
|
|
@@ -82,6 +80,25 @@ And the full scraped document is accessible from:
|
|
82
80
|
|
83
81
|
page.parsed_document # Nokogiri doc that you can use it to get any element from the page
|
84
82
|
|
83
|
+
## Opengraph and Twitter card meta tags
|
84
|
+
|
85
|
+
Twitter cards & Open graph tags make it possible for you to attach media experiences to Tweets & Facebook posts. Nowadays most of the content creators add these meta tags to headers to quickly identify content on the page. Sometimes these tags could be nested as well. For example when a site wants to provide information about primary image used on a page it could use
|
86
|
+
|
87
|
+
<meta name="og:image" content="http://www.somedomain.com/assets/images/abc.jpeg">
|
88
|
+
<meta name="og:image:width" content="200">
|
89
|
+
<meta name="twitter:image" value="http://www.somedomain.com/assets/images/abc.jpeg">
|
90
|
+
<meta property="twitter:image:width" value="200">
|
91
|
+
|
92
|
+
Also many sites use name & property, content & value attributes interchangeably. Using MetaInspector accessing this information is as easy as -
|
93
|
+
|
94
|
+
page.meta_og_image
|
95
|
+
page.meta_twitter_image_width
|
96
|
+
|
97
|
+
Note that MetaInspector gives priority to content over value. In other words if there is a tag of the form
|
98
|
+
|
99
|
+
<meta property="og:something" value="100" content="real value">
|
100
|
+
page.meta_og_something #=> "real value"
|
101
|
+
|
85
102
|
## Options
|
86
103
|
|
87
104
|
### Timeout
|
@@ -176,4 +193,10 @@ Thanks to all the contributors:
|
|
176
193
|
|
177
194
|
[https://github.com/jaimeiniesta/metainspector/graphs/contributors](https://github.com/jaimeiniesta/metainspector/graphs/contributors)
|
178
195
|
|
179
|
-
|
196
|
+
## Related projects
|
197
|
+
|
198
|
+
* [go-metainspector](https://github.com/fern4lvarez/go-metainspector), a port of MetaInspector for Go.
|
199
|
+
* [Node-MetaInspector](https://github.com/gabceb/node-metainspector), a port of MetaInspector for Node.
|
200
|
+
|
201
|
+
## License
|
202
|
+
MetaInspector is released under the [MIT license](MIT-LICENSE).
|
@@ -72,7 +72,7 @@ module MetaInspector
|
|
72
72
|
# Most all major websites now define this property and is usually very relevant
|
73
73
|
# See doc at http://developers.facebook.com/docs/opengraph/
|
74
74
|
def image
|
75
|
-
meta_og_image
|
75
|
+
meta_og_image || meta_twitter_image
|
76
76
|
end
|
77
77
|
|
78
78
|
# Returns the parsed document meta rss link
|
@@ -152,7 +152,9 @@ module MetaInspector
|
|
152
152
|
def method_missing(method_name)
|
153
153
|
if method_name.to_s =~ /^meta_(.*)/
|
154
154
|
key = $1
|
155
|
-
|
155
|
+
|
156
|
+
#special treatment for opengraph (og:) and twitter card (twitter:) tags
|
157
|
+
key.gsub!("_",":") if key =~ /^og_(.*)/ || key =~ /^twitter_(.*)/
|
156
158
|
|
157
159
|
scrape_meta_data
|
158
160
|
|
@@ -187,10 +189,11 @@ module MetaInspector
|
|
187
189
|
|
188
190
|
# Store meta tag value, looking at meta name or meta property
|
189
191
|
def get_meta_name_or_property(element)
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
192
|
+
name_or_property = element.attributes["name"] ? "name" : (element.attributes["property"] ? "property" : nil)
|
193
|
+
content_or_value = element.attributes["content"] ? "content" : (element.attributes["value"] ? "value" : nil)
|
194
|
+
|
195
|
+
if !name_or_property.nil? && !content_or_value.nil?
|
196
|
+
@data.meta.name[element.attributes[name_or_property].value.downcase] = element.attributes[content_or_value].value
|
194
197
|
end
|
195
198
|
end
|
196
199
|
|
data/meta_inspector.gemspec
CHANGED
@@ -6,7 +6,7 @@ Gem::Specification.new do |gem|
|
|
6
6
|
gem.email = ["jaimeiniesta@gmail.com"]
|
7
7
|
gem.description = %q{MetaInspector lets you scrape a web page and get its title, charset, link and meta tags}
|
8
8
|
gem.summary = %q{MetaInspector is a ruby gem for web scraping purposes, that returns a hash with metadata from a given URL}
|
9
|
-
gem.homepage = "
|
9
|
+
gem.homepage = "http://jaimeiniesta.github.io/metainspector/"
|
10
10
|
|
11
11
|
gem.files = `git ls-files`.split("\n")
|
12
12
|
gem.test_files = `git ls-files -- {spec}/*`.split("\n")
|
data/spec/metainspector_spec.rb
CHANGED
@@ -227,7 +227,7 @@ describe MetaInspector do
|
|
227
227
|
m = MetaInspector.new('http://example.com/malformed_href')
|
228
228
|
expect {
|
229
229
|
m.external_links.should == ["skype:joeuser?call", "telnet://telnet.cdrom.com",
|
230
|
-
"javascript:alert('ok');", "mailto:email(at)example.com"]
|
230
|
+
"javascript:alert('ok');", "javascript://", "mailto:email(at)example.com"]
|
231
231
|
m.should_not be_ok
|
232
232
|
}.to_not raise_error
|
233
233
|
end
|
@@ -354,6 +354,25 @@ describe MetaInspector do
|
|
354
354
|
@m.meta_og_something.should == nil
|
355
355
|
end
|
356
356
|
|
357
|
+
it "should find a meta_twitter_site" do
|
358
|
+
@m = MetaInspector.new('http://www.youtube.com/watch?v=iaGSSrp49uc')
|
359
|
+
@m.meta_twitter_site.should == "@youtube"
|
360
|
+
end
|
361
|
+
|
362
|
+
it "should find a meta_twitter_player_width" do
|
363
|
+
@m = MetaInspector.new('http://www.youtube.com/watch?v=iaGSSrp49uc')
|
364
|
+
@m.meta_twitter_player_width.should == "1920"
|
365
|
+
end
|
366
|
+
|
367
|
+
it "should not find a meta_twitter_dummy" do
|
368
|
+
@m = MetaInspector.new('http://www.youtube.com/watch?v=iaGSSrp49uc')
|
369
|
+
@m.meta_twitter_dummy.should == nil
|
370
|
+
end
|
371
|
+
|
372
|
+
it "should find a meta_og_video_width" do
|
373
|
+
@m = MetaInspector.new('http://www.youtube.com/watch?v=iaGSSrp49uc')
|
374
|
+
@m.meta_og_video_width.should == "1920"
|
375
|
+
end
|
357
376
|
end
|
358
377
|
|
359
378
|
describe 'Charset detection' do
|
metadata
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.16.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Jaime Iniesta
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-04
|
11
|
+
date: 2013-09-04 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: nokogiri
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - ~>
|
20
18
|
- !ruby/object:Gem::Version
|
@@ -22,7 +20,6 @@ dependencies:
|
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - ~>
|
28
25
|
- !ruby/object:Gem::Version
|
@@ -30,7 +27,6 @@ dependencies:
|
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rash
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
31
|
- - ~>
|
36
32
|
- !ruby/object:Gem::Version
|
@@ -38,7 +34,6 @@ dependencies:
|
|
38
34
|
type: :runtime
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
38
|
- - ~>
|
44
39
|
- !ruby/object:Gem::Version
|
@@ -46,7 +41,6 @@ dependencies:
|
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: open_uri_redirections
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
45
|
- - ~>
|
52
46
|
- !ruby/object:Gem::Version
|
@@ -54,7 +48,6 @@ dependencies:
|
|
54
48
|
type: :runtime
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
52
|
- - ~>
|
60
53
|
- !ruby/object:Gem::Version
|
@@ -62,7 +55,6 @@ dependencies:
|
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: addressable
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
59
|
- - ~>
|
68
60
|
- !ruby/object:Gem::Version
|
@@ -70,7 +62,6 @@ dependencies:
|
|
70
62
|
type: :runtime
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
66
|
- - ~>
|
76
67
|
- !ruby/object:Gem::Version
|
@@ -78,7 +69,6 @@ dependencies:
|
|
78
69
|
- !ruby/object:Gem::Dependency
|
79
70
|
name: rspec
|
80
71
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
72
|
requirements:
|
83
73
|
- - '='
|
84
74
|
- !ruby/object:Gem::Version
|
@@ -86,7 +76,6 @@ dependencies:
|
|
86
76
|
type: :development
|
87
77
|
prerelease: false
|
88
78
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
79
|
requirements:
|
91
80
|
- - '='
|
92
81
|
- !ruby/object:Gem::Version
|
@@ -94,7 +83,6 @@ dependencies:
|
|
94
83
|
- !ruby/object:Gem::Dependency
|
95
84
|
name: fakeweb
|
96
85
|
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
86
|
requirements:
|
99
87
|
- - '='
|
100
88
|
- !ruby/object:Gem::Version
|
@@ -102,7 +90,6 @@ dependencies:
|
|
102
90
|
type: :development
|
103
91
|
prerelease: false
|
104
92
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
93
|
requirements:
|
107
94
|
- - '='
|
108
95
|
- !ruby/object:Gem::Version
|
@@ -110,7 +97,6 @@ dependencies:
|
|
110
97
|
- !ruby/object:Gem::Dependency
|
111
98
|
name: awesome_print
|
112
99
|
requirement: !ruby/object:Gem::Requirement
|
113
|
-
none: false
|
114
100
|
requirements:
|
115
101
|
- - '='
|
116
102
|
- !ruby/object:Gem::Version
|
@@ -118,7 +104,6 @@ dependencies:
|
|
118
104
|
type: :development
|
119
105
|
prerelease: false
|
120
106
|
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
none: false
|
122
107
|
requirements:
|
123
108
|
- - '='
|
124
109
|
- !ruby/object:Gem::Version
|
@@ -126,7 +111,6 @@ dependencies:
|
|
126
111
|
- !ruby/object:Gem::Dependency
|
127
112
|
name: rake
|
128
113
|
requirement: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
114
|
requirements:
|
131
115
|
- - ~>
|
132
116
|
- !ruby/object:Gem::Version
|
@@ -134,7 +118,6 @@ dependencies:
|
|
134
118
|
type: :development
|
135
119
|
prerelease: false
|
136
120
|
version_requirements: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
121
|
requirements:
|
139
122
|
- - ~>
|
140
123
|
- !ruby/object:Gem::Version
|
@@ -190,35 +173,28 @@ files:
|
|
190
173
|
- spec/metainspector_spec.rb
|
191
174
|
- spec/redirections_spec.rb
|
192
175
|
- spec/spec_helper.rb
|
193
|
-
homepage:
|
176
|
+
homepage: http://jaimeiniesta.github.io/metainspector/
|
194
177
|
licenses: []
|
178
|
+
metadata: {}
|
195
179
|
post_install_message:
|
196
180
|
rdoc_options: []
|
197
181
|
require_paths:
|
198
182
|
- lib
|
199
183
|
required_ruby_version: !ruby/object:Gem::Requirement
|
200
|
-
none: false
|
201
184
|
requirements:
|
202
|
-
- -
|
185
|
+
- - '>='
|
203
186
|
- !ruby/object:Gem::Version
|
204
187
|
version: '0'
|
205
|
-
segments:
|
206
|
-
- 0
|
207
|
-
hash: -4602043206768445405
|
208
188
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
209
|
-
none: false
|
210
189
|
requirements:
|
211
|
-
- -
|
190
|
+
- - '>='
|
212
191
|
- !ruby/object:Gem::Version
|
213
192
|
version: '0'
|
214
|
-
segments:
|
215
|
-
- 0
|
216
|
-
hash: -4602043206768445405
|
217
193
|
requirements: []
|
218
194
|
rubyforge_project:
|
219
|
-
rubygems_version:
|
195
|
+
rubygems_version: 2.0.3
|
220
196
|
signing_key:
|
221
|
-
specification_version:
|
197
|
+
specification_version: 4
|
222
198
|
summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash
|
223
199
|
with metadata from a given URL
|
224
200
|
test_files: []
|