metapage 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +26 -6
- data/Rakefile +11 -3
- data/lib/metapage/version.rb +1 -1
- data/lib/metapage.rb +46 -10
- data/metapage.gemspec +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 840338d16496a43238591046aefc0a503e272a3b
|
4
|
+
data.tar.gz: 83ae64c6920ee76d32fb63953c47bf820a95f992
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ba8dad8ccd4478ea90ea72e7b5bcb5ee253f1b9bd9c3dffd6e293840bcd325260f603db97d8a79fabbaf9896ce85029fd02afe31943b1937019826f0bb1997c1
|
7
|
+
data.tar.gz: 21813f6a22cc4bdf4d6c9a5c09c9dfc3584437956e0b342b03d8d11fbafd92093fc2c84b0061e4d2c734bf117bc29485420780fd2e1270528e0b622a61f2d56c
|
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -20,15 +20,35 @@ Add `gem 'metapage'` to your `Gemfile` or `gem install metapage` on your command
|
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
|
-
Fetch a specific URL. Returns `nil` if the content is not html or loading fails due to invalid url, http response or timeout.
|
23
|
+
Fetch a specific URL. Returns `nil` if the content is not html or an image or loading fails due to invalid url, http response or timeout.
|
24
24
|
|
25
|
-
Metapage.fetch(
|
26
|
-
{:
|
27
|
-
:
|
25
|
+
pp Metapage.fetch("https://github.com/colszowka/metapage").to_h
|
26
|
+
{:id=>"9fa08a8cef7450e558723a53c8a122a096599709",
|
27
|
+
:title=>"colszowka/metapage",
|
28
|
+
:description=>
|
29
|
+
"metapage - A tiny class for extracting title, description and some further information from given urls",
|
28
30
|
:image_url=>"https://avatars0.githubusercontent.com/u/13972?v=3&s=400",
|
29
31
|
:type=>"object",
|
30
|
-
:canonical_url=>"https://github.com/colszowka/
|
31
|
-
:site_name=>"GitHub"
|
32
|
+
:canonical_url=>"https://github.com/colszowka/metapage",
|
33
|
+
:site_name=>"GitHub",
|
34
|
+
:media_type=>"text",
|
35
|
+
:content_type=>"text/html"}
|
36
|
+
|
37
|
+
For images, the resulting content is much more minimal:
|
38
|
+
|
39
|
+
pp Metapage.fetch("https://s-media-cache-ak0.pinimg.com/736x/e3/ce/b3/e3ceb3fe3224e104ad0f019117b8e1f0.jpg").to_h
|
40
|
+
{:id=>"7bd710044d61b55c63d1d0089632a6417f370f53",
|
41
|
+
:title=>nil,
|
42
|
+
:description=>nil,
|
43
|
+
:image_url=>
|
44
|
+
"https://s-media-cache-ak0.pinimg.com/736x/e3/ce/b3/e3ceb3fe3224e104ad0f019117b8e1f0.jpg",
|
45
|
+
:type=>"image",
|
46
|
+
:canonical_url=>
|
47
|
+
"https://s-media-cache-ak0.pinimg.com/736x/e3/ce/b3/e3ceb3fe3224e104ad0f019117b8e1f0.jpg",
|
48
|
+
:site_name=>nil,
|
49
|
+
:media_type=>"image",
|
50
|
+
:content_type=>"image/jpeg"}
|
51
|
+
|
32
52
|
|
33
53
|
Extract urls from a given string and fetch the metadata for them. Only returns successfully retrieved results.
|
34
54
|
|
data/Rakefile
CHANGED
@@ -12,7 +12,15 @@ end
|
|
12
12
|
desc "Generates examples for the readme on the fly"
|
13
13
|
task examples: :env do
|
14
14
|
require 'pp'
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
|
16
|
+
[
|
17
|
+
'https://github.com/colszowka/metapage',
|
18
|
+
'https://s-media-cache-ak0.pinimg.com/736x/e3/ce/b3/e3ceb3fe3224e104ad0f019117b8e1f0.jpg'
|
19
|
+
].each do |url|
|
20
|
+
|
21
|
+
cmd = "pp Metapage.fetch(#{url.inspect}).to_h"
|
22
|
+
puts cmd
|
23
|
+
eval cmd
|
24
|
+
|
25
|
+
end
|
18
26
|
end
|
data/lib/metapage/version.rb
CHANGED
data/lib/metapage.rb
CHANGED
@@ -5,6 +5,7 @@ require 'uri'
|
|
5
5
|
require 'resolv'
|
6
6
|
require 'ipaddr'
|
7
7
|
require 'digest/sha1'
|
8
|
+
require 'mimemagic'
|
8
9
|
|
9
10
|
module Metapage
|
10
11
|
class ResolveError < StandardError; end;
|
@@ -40,24 +41,40 @@ module Metapage
|
|
40
41
|
end
|
41
42
|
|
42
43
|
def title
|
43
|
-
|
44
|
+
unless image?
|
45
|
+
@title ||= metatag_content('og:title') || html_content('title')
|
46
|
+
end
|
44
47
|
end
|
45
48
|
|
46
49
|
def description
|
47
|
-
|
50
|
+
unless image?
|
51
|
+
@description ||= metatag_content('og:description') || metatag_content('description')
|
52
|
+
end
|
48
53
|
end
|
49
54
|
|
50
55
|
def image_url
|
51
|
-
|
52
|
-
|
56
|
+
if image?
|
57
|
+
url
|
58
|
+
else
|
59
|
+
# Fallback to apple-touch-icon, fluid-icon, ms-tileicon etc
|
60
|
+
@image_url ||= metatag_content('og:image:secure_url') || metatag_content('og:image') || link_rel('apple-touch-icon-precomposed')
|
61
|
+
end
|
53
62
|
end
|
54
63
|
|
55
64
|
def type
|
56
|
-
|
65
|
+
if image?
|
66
|
+
'image'
|
67
|
+
else
|
68
|
+
@type ||= metatag_content('og:type') || 'website'
|
69
|
+
end
|
57
70
|
end
|
58
71
|
|
59
72
|
def canonical_url
|
60
|
-
|
73
|
+
if image?
|
74
|
+
url
|
75
|
+
else
|
76
|
+
@canonical_url ||= metatag_content('og:url') || link_rel('canonical') || url
|
77
|
+
end
|
61
78
|
end
|
62
79
|
|
63
80
|
def id
|
@@ -67,7 +84,17 @@ module Metapage
|
|
67
84
|
end
|
68
85
|
|
69
86
|
def site_name
|
70
|
-
|
87
|
+
unless image?
|
88
|
+
@site_name ||= metatag_content('og:site_name') || host
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def media_type
|
93
|
+
mimemagic and mimemagic.mediatype
|
94
|
+
end
|
95
|
+
|
96
|
+
def content_type
|
97
|
+
mimemagic and mimemagic.type
|
71
98
|
end
|
72
99
|
|
73
100
|
def to_h
|
@@ -78,7 +105,9 @@ module Metapage
|
|
78
105
|
image_url: image_url,
|
79
106
|
type: type,
|
80
107
|
canonical_url: canonical_url,
|
81
|
-
site_name: site_name
|
108
|
+
site_name: site_name,
|
109
|
+
media_type: media_type,
|
110
|
+
content_type: content_type
|
82
111
|
}
|
83
112
|
end
|
84
113
|
|
@@ -86,7 +115,6 @@ module Metapage
|
|
86
115
|
to_h.to_json
|
87
116
|
end
|
88
117
|
|
89
|
-
|
90
118
|
private
|
91
119
|
|
92
120
|
def uri
|
@@ -137,8 +165,16 @@ module Metapage
|
|
137
165
|
end
|
138
166
|
end
|
139
167
|
|
168
|
+
def image?
|
169
|
+
media_type == 'image'
|
170
|
+
end
|
171
|
+
|
172
|
+
def mimemagic
|
173
|
+
@mimemagic ||= MimeMagic.by_magic(content)
|
174
|
+
end
|
175
|
+
|
140
176
|
def content
|
141
|
-
http_response.body
|
177
|
+
http_response.body
|
142
178
|
end
|
143
179
|
|
144
180
|
def http_response
|
data/metapage.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.add_dependency 'httpclient'
|
22
22
|
spec.add_dependency 'nokogiri'
|
23
|
-
spec.add_dependency '
|
23
|
+
spec.add_dependency 'mimemagic'
|
24
24
|
|
25
25
|
spec.add_development_dependency "bundler", "~> 1.10"
|
26
26
|
spec.add_development_dependency "rake", "~> 10.0"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metapage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christoph Olszowka
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httpclient
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: mimemagic
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -147,6 +147,7 @@ files:
|
|
147
147
|
- ".gitignore"
|
148
148
|
- ".rspec"
|
149
149
|
- ".travis.yml"
|
150
|
+
- CHANGELOG.md
|
150
151
|
- Gemfile
|
151
152
|
- LICENSE.txt
|
152
153
|
- README.md
|