metapage 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +26 -6
- data/Rakefile +11 -3
- data/lib/metapage/version.rb +1 -1
- data/lib/metapage.rb +46 -10
- data/metapage.gemspec +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 840338d16496a43238591046aefc0a503e272a3b
|
4
|
+
data.tar.gz: 83ae64c6920ee76d32fb63953c47bf820a95f992
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ba8dad8ccd4478ea90ea72e7b5bcb5ee253f1b9bd9c3dffd6e293840bcd325260f603db97d8a79fabbaf9896ce85029fd02afe31943b1937019826f0bb1997c1
|
7
|
+
data.tar.gz: 21813f6a22cc4bdf4d6c9a5c09c9dfc3584437956e0b342b03d8d11fbafd92093fc2c84b0061e4d2c734bf117bc29485420780fd2e1270528e0b622a61f2d56c
|
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -20,15 +20,35 @@ Add `gem 'metapage'` to your `Gemfile` or `gem install metapage` on your command
|
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
|
-
Fetch a specific URL. Returns `nil` if the content is not html or loading fails due to invalid url, http response or timeout.
|
23
|
+
Fetch a specific URL. Returns `nil` if the content is not html or an image or loading fails due to invalid url, http response or timeout.
|
24
24
|
|
25
|
-
Metapage.fetch(
|
26
|
-
{:
|
27
|
-
:
|
25
|
+
pp Metapage.fetch("https://github.com/colszowka/metapage").to_h
|
26
|
+
{:id=>"9fa08a8cef7450e558723a53c8a122a096599709",
|
27
|
+
:title=>"colszowka/metapage",
|
28
|
+
:description=>
|
29
|
+
"metapage - A tiny class for extracting title, description and some further information from given urls",
|
28
30
|
:image_url=>"https://avatars0.githubusercontent.com/u/13972?v=3&s=400",
|
29
31
|
:type=>"object",
|
30
|
-
:canonical_url=>"https://github.com/colszowka/
|
31
|
-
:site_name=>"GitHub"
|
32
|
+
:canonical_url=>"https://github.com/colszowka/metapage",
|
33
|
+
:site_name=>"GitHub",
|
34
|
+
:media_type=>"text",
|
35
|
+
:content_type=>"text/html"}
|
36
|
+
|
37
|
+
For images, the resulting content is much more minimal:
|
38
|
+
|
39
|
+
pp Metapage.fetch("https://s-media-cache-ak0.pinimg.com/736x/e3/ce/b3/e3ceb3fe3224e104ad0f019117b8e1f0.jpg").to_h
|
40
|
+
{:id=>"7bd710044d61b55c63d1d0089632a6417f370f53",
|
41
|
+
:title=>nil,
|
42
|
+
:description=>nil,
|
43
|
+
:image_url=>
|
44
|
+
"https://s-media-cache-ak0.pinimg.com/736x/e3/ce/b3/e3ceb3fe3224e104ad0f019117b8e1f0.jpg",
|
45
|
+
:type=>"image",
|
46
|
+
:canonical_url=>
|
47
|
+
"https://s-media-cache-ak0.pinimg.com/736x/e3/ce/b3/e3ceb3fe3224e104ad0f019117b8e1f0.jpg",
|
48
|
+
:site_name=>nil,
|
49
|
+
:media_type=>"image",
|
50
|
+
:content_type=>"image/jpeg"}
|
51
|
+
|
32
52
|
|
33
53
|
Extract urls from a given string and fetch the metadata for them. Only returns successfully retrieved results.
|
34
54
|
|
data/Rakefile
CHANGED
@@ -12,7 +12,15 @@ end
|
|
12
12
|
desc "Generates examples for the readme on the fly"
|
13
13
|
task examples: :env do
|
14
14
|
require 'pp'
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
|
16
|
+
[
|
17
|
+
'https://github.com/colszowka/metapage',
|
18
|
+
'https://s-media-cache-ak0.pinimg.com/736x/e3/ce/b3/e3ceb3fe3224e104ad0f019117b8e1f0.jpg'
|
19
|
+
].each do |url|
|
20
|
+
|
21
|
+
cmd = "pp Metapage.fetch(#{url.inspect}).to_h"
|
22
|
+
puts cmd
|
23
|
+
eval cmd
|
24
|
+
|
25
|
+
end
|
18
26
|
end
|
data/lib/metapage/version.rb
CHANGED
data/lib/metapage.rb
CHANGED
@@ -5,6 +5,7 @@ require 'uri'
|
|
5
5
|
require 'resolv'
|
6
6
|
require 'ipaddr'
|
7
7
|
require 'digest/sha1'
|
8
|
+
require 'mimemagic'
|
8
9
|
|
9
10
|
module Metapage
|
10
11
|
class ResolveError < StandardError; end;
|
@@ -40,24 +41,40 @@ module Metapage
|
|
40
41
|
end
|
41
42
|
|
42
43
|
def title
|
43
|
-
|
44
|
+
unless image?
|
45
|
+
@title ||= metatag_content('og:title') || html_content('title')
|
46
|
+
end
|
44
47
|
end
|
45
48
|
|
46
49
|
def description
|
47
|
-
|
50
|
+
unless image?
|
51
|
+
@description ||= metatag_content('og:description') || metatag_content('description')
|
52
|
+
end
|
48
53
|
end
|
49
54
|
|
50
55
|
def image_url
|
51
|
-
|
52
|
-
|
56
|
+
if image?
|
57
|
+
url
|
58
|
+
else
|
59
|
+
# Fallback to apple-touch-icon, fluid-icon, ms-tileicon etc
|
60
|
+
@image_url ||= metatag_content('og:image:secure_url') || metatag_content('og:image') || link_rel('apple-touch-icon-precomposed')
|
61
|
+
end
|
53
62
|
end
|
54
63
|
|
55
64
|
def type
|
56
|
-
|
65
|
+
if image?
|
66
|
+
'image'
|
67
|
+
else
|
68
|
+
@type ||= metatag_content('og:type') || 'website'
|
69
|
+
end
|
57
70
|
end
|
58
71
|
|
59
72
|
def canonical_url
|
60
|
-
|
73
|
+
if image?
|
74
|
+
url
|
75
|
+
else
|
76
|
+
@canonical_url ||= metatag_content('og:url') || link_rel('canonical') || url
|
77
|
+
end
|
61
78
|
end
|
62
79
|
|
63
80
|
def id
|
@@ -67,7 +84,17 @@ module Metapage
|
|
67
84
|
end
|
68
85
|
|
69
86
|
def site_name
|
70
|
-
|
87
|
+
unless image?
|
88
|
+
@site_name ||= metatag_content('og:site_name') || host
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def media_type
|
93
|
+
mimemagic and mimemagic.mediatype
|
94
|
+
end
|
95
|
+
|
96
|
+
def content_type
|
97
|
+
mimemagic and mimemagic.type
|
71
98
|
end
|
72
99
|
|
73
100
|
def to_h
|
@@ -78,7 +105,9 @@ module Metapage
|
|
78
105
|
image_url: image_url,
|
79
106
|
type: type,
|
80
107
|
canonical_url: canonical_url,
|
81
|
-
site_name: site_name
|
108
|
+
site_name: site_name,
|
109
|
+
media_type: media_type,
|
110
|
+
content_type: content_type
|
82
111
|
}
|
83
112
|
end
|
84
113
|
|
@@ -86,7 +115,6 @@ module Metapage
|
|
86
115
|
to_h.to_json
|
87
116
|
end
|
88
117
|
|
89
|
-
|
90
118
|
private
|
91
119
|
|
92
120
|
def uri
|
@@ -137,8 +165,16 @@ module Metapage
|
|
137
165
|
end
|
138
166
|
end
|
139
167
|
|
168
|
+
def image?
|
169
|
+
media_type == 'image'
|
170
|
+
end
|
171
|
+
|
172
|
+
def mimemagic
|
173
|
+
@mimemagic ||= MimeMagic.by_magic(content)
|
174
|
+
end
|
175
|
+
|
140
176
|
def content
|
141
|
-
http_response.body
|
177
|
+
http_response.body
|
142
178
|
end
|
143
179
|
|
144
180
|
def http_response
|
data/metapage.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.add_dependency 'httpclient'
|
22
22
|
spec.add_dependency 'nokogiri'
|
23
|
-
spec.add_dependency '
|
23
|
+
spec.add_dependency 'mimemagic'
|
24
24
|
|
25
25
|
spec.add_development_dependency "bundler", "~> 1.10"
|
26
26
|
spec.add_development_dependency "rake", "~> 10.0"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metapage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christoph Olszowka
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httpclient
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: mimemagic
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -147,6 +147,7 @@ files:
|
|
147
147
|
- ".gitignore"
|
148
148
|
- ".rspec"
|
149
149
|
- ".travis.yml"
|
150
|
+
- CHANGELOG.md
|
150
151
|
- Gemfile
|
151
152
|
- LICENSE.txt
|
152
153
|
- README.md
|