href-preview 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG +3 -0
- data/Gemfile +15 -0
- data/LICENSE +202 -0
- data/README.md +39 -0
- data/Rakefile +42 -0
- data/lib/href-preview.rb +17 -0
- data/lib/href_preview.rb +34 -0
- data/lib/href_preview/faraday_common_request.rb +40 -0
- data/lib/href_preview/fastimage_uri.rb +20 -0
- data/lib/href_preview/preview.rb +451 -0
- data/lib/href_preview/version.rb +27 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +7 -0
- data/tasks/clobber.rake +2 -0
- data/tasks/gem.rake +95 -0
- data/tasks/git.rake +40 -0
- data/tasks/metrics.rake +22 -0
- data/tasks/spec.rake +57 -0
- data/tasks/yard.rake +26 -0
- metadata +234 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a67ef243d53323e6ece890b63a587c0b6a298db3
|
4
|
+
data.tar.gz: 4843646e829511658a361df0fe24a81fea30d512
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bd3ea4180c61cab25a689adecc200cded13d3747bcefa1ab6061cbd13dc014e77be8fd2b3cca8cedc17ca4d15a5d9b998cb9a2f83d1e4abc6950de98fe3b9389
|
7
|
+
data.tar.gz: 88b207d8bcc1b6a5574e0987bd717257d1b90c98b48277f09d53ae282cfcbd17f8c34db9d783e35322569048901546387e4179d46e447d545e97a40b839163e1
|
data/CHANGELOG
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
# dependencies go here
|
4
|
+
|
5
|
+
group :development do
|
6
|
+
gem 'launchy', '>= 2.1.1'
|
7
|
+
gem 'yard'
|
8
|
+
gem 'kramdown'
|
9
|
+
end
|
10
|
+
|
11
|
+
group :test, :development do
|
12
|
+
gem 'rake', '>= 0.9.0'
|
13
|
+
gem 'rspec', '>= 2.11.0'
|
14
|
+
gem 'rcov', '>= 0.9.9', :platform => :mri_18
|
15
|
+
end
|
data/LICENSE
ADDED
@@ -0,0 +1,202 @@
|
|
1
|
+
|
2
|
+
Apache License
|
3
|
+
Version 2.0, January 2004
|
4
|
+
http://www.apache.org/licenses/
|
5
|
+
|
6
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
7
|
+
|
8
|
+
1. Definitions.
|
9
|
+
|
10
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
11
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
12
|
+
|
13
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
14
|
+
the copyright owner that is granting the License.
|
15
|
+
|
16
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
17
|
+
other entities that control, are controlled by, or are under common
|
18
|
+
control with that entity. For the purposes of this definition,
|
19
|
+
"control" means (i) the power, direct or indirect, to cause the
|
20
|
+
direction or management of such entity, whether by contract or
|
21
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
22
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
23
|
+
|
24
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
25
|
+
exercising permissions granted by this License.
|
26
|
+
|
27
|
+
"Source" form shall mean the preferred form for making modifications,
|
28
|
+
including but not limited to software source code, documentation
|
29
|
+
source, and configuration files.
|
30
|
+
|
31
|
+
"Object" form shall mean any form resulting from mechanical
|
32
|
+
transformation or translation of a Source form, including but
|
33
|
+
not limited to compiled object code, generated documentation,
|
34
|
+
and conversions to other media types.
|
35
|
+
|
36
|
+
"Work" shall mean the work of authorship, whether in Source or
|
37
|
+
Object form, made available under the License, as indicated by a
|
38
|
+
copyright notice that is included in or attached to the work
|
39
|
+
(an example is provided in the Appendix below).
|
40
|
+
|
41
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
42
|
+
form, that is based on (or derived from) the Work and for which the
|
43
|
+
editorial revisions, annotations, elaborations, or other modifications
|
44
|
+
represent, as a whole, an original work of authorship. For the purposes
|
45
|
+
of this License, Derivative Works shall not include works that remain
|
46
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
47
|
+
the Work and Derivative Works thereof.
|
48
|
+
|
49
|
+
"Contribution" shall mean any work of authorship, including
|
50
|
+
the original version of the Work and any modifications or additions
|
51
|
+
to that Work or Derivative Works thereof, that is intentionally
|
52
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
53
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
54
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
55
|
+
means any form of electronic, verbal, or written communication sent
|
56
|
+
to the Licensor or its representatives, including but not limited to
|
57
|
+
communication on electronic mailing lists, source code control systems,
|
58
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
59
|
+
Licensor for the purpose of discussing and improving the Work, but
|
60
|
+
excluding communication that is conspicuously marked or otherwise
|
61
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
62
|
+
|
63
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
64
|
+
on behalf of whom a Contribution has been received by Licensor and
|
65
|
+
subsequently incorporated within the Work.
|
66
|
+
|
67
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
68
|
+
this License, each Contributor hereby grants to You a perpetual,
|
69
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
70
|
+
copyright license to reproduce, prepare Derivative Works of,
|
71
|
+
publicly display, publicly perform, sublicense, and distribute the
|
72
|
+
Work and such Derivative Works in Source or Object form.
|
73
|
+
|
74
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
75
|
+
this License, each Contributor hereby grants to You a perpetual,
|
76
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
77
|
+
(except as stated in this section) patent license to make, have made,
|
78
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
79
|
+
where such license applies only to those patent claims licensable
|
80
|
+
by such Contributor that are necessarily infringed by their
|
81
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
82
|
+
with the Work to which such Contribution(s) was submitted. If You
|
83
|
+
institute patent litigation against any entity (including a
|
84
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
85
|
+
or a Contribution incorporated within the Work constitutes direct
|
86
|
+
or contributory patent infringement, then any patent licenses
|
87
|
+
granted to You under this License for that Work shall terminate
|
88
|
+
as of the date such litigation is filed.
|
89
|
+
|
90
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
91
|
+
Work or Derivative Works thereof in any medium, with or without
|
92
|
+
modifications, and in Source or Object form, provided that You
|
93
|
+
meet the following conditions:
|
94
|
+
|
95
|
+
(a) You must give any other recipients of the Work or
|
96
|
+
Derivative Works a copy of this License; and
|
97
|
+
|
98
|
+
(b) You must cause any modified files to carry prominent notices
|
99
|
+
stating that You changed the files; and
|
100
|
+
|
101
|
+
(c) You must retain, in the Source form of any Derivative Works
|
102
|
+
that You distribute, all copyright, patent, trademark, and
|
103
|
+
attribution notices from the Source form of the Work,
|
104
|
+
excluding those notices that do not pertain to any part of
|
105
|
+
the Derivative Works; and
|
106
|
+
|
107
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
108
|
+
distribution, then any Derivative Works that You distribute must
|
109
|
+
include a readable copy of the attribution notices contained
|
110
|
+
within such NOTICE file, excluding those notices that do not
|
111
|
+
pertain to any part of the Derivative Works, in at least one
|
112
|
+
of the following places: within a NOTICE text file distributed
|
113
|
+
as part of the Derivative Works; within the Source form or
|
114
|
+
documentation, if provided along with the Derivative Works; or,
|
115
|
+
within a display generated by the Derivative Works, if and
|
116
|
+
wherever such third-party notices normally appear. The contents
|
117
|
+
of the NOTICE file are for informational purposes only and
|
118
|
+
do not modify the License. You may add Your own attribution
|
119
|
+
notices within Derivative Works that You distribute, alongside
|
120
|
+
or as an addendum to the NOTICE text from the Work, provided
|
121
|
+
that such additional attribution notices cannot be construed
|
122
|
+
as modifying the License.
|
123
|
+
|
124
|
+
You may add Your own copyright statement to Your modifications and
|
125
|
+
may provide additional or different license terms and conditions
|
126
|
+
for use, reproduction, or distribution of Your modifications, or
|
127
|
+
for any such Derivative Works as a whole, provided Your use,
|
128
|
+
reproduction, and distribution of the Work otherwise complies with
|
129
|
+
the conditions stated in this License.
|
130
|
+
|
131
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
132
|
+
any Contribution intentionally submitted for inclusion in the Work
|
133
|
+
by You to the Licensor shall be under the terms and conditions of
|
134
|
+
this License, without any additional terms or conditions.
|
135
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
136
|
+
the terms of any separate license agreement you may have executed
|
137
|
+
with Licensor regarding such Contributions.
|
138
|
+
|
139
|
+
6. Trademarks. This License does not grant permission to use the trade
|
140
|
+
names, trademarks, service marks, or product names of the Licensor,
|
141
|
+
except as required for reasonable and customary use in describing the
|
142
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
143
|
+
|
144
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
145
|
+
agreed to in writing, Licensor provides the Work (and each
|
146
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
147
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
148
|
+
implied, including, without limitation, any warranties or conditions
|
149
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
150
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
151
|
+
appropriateness of using or redistributing the Work and assume any
|
152
|
+
risks associated with Your exercise of permissions under this License.
|
153
|
+
|
154
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
155
|
+
whether in tort (including negligence), contract, or otherwise,
|
156
|
+
unless required by applicable law (such as deliberate and grossly
|
157
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
158
|
+
liable to You for damages, including any direct, indirect, special,
|
159
|
+
incidental, or consequential damages of any character arising as a
|
160
|
+
result of this License or out of the use or inability to use the
|
161
|
+
Work (including but not limited to damages for loss of goodwill,
|
162
|
+
work stoppage, computer failure or malfunction, or any and all
|
163
|
+
other commercial damages or losses), even if such Contributor
|
164
|
+
has been advised of the possibility of such damages.
|
165
|
+
|
166
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
167
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
168
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
169
|
+
or other liability obligations and/or rights consistent with this
|
170
|
+
License. However, in accepting such obligations, You may act only
|
171
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
172
|
+
of any other Contributor, and only if You agree to indemnify,
|
173
|
+
defend, and hold each Contributor harmless for any liability
|
174
|
+
incurred by, or claims asserted against, such Contributor by reason
|
175
|
+
of your accepting any such warranty or additional liability.
|
176
|
+
|
177
|
+
END OF TERMS AND CONDITIONS
|
178
|
+
|
179
|
+
APPENDIX: How to apply the Apache License to your work.
|
180
|
+
|
181
|
+
To apply the Apache License to your work, attach the following
|
182
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
183
|
+
replaced with your own identifying information. (Don't include
|
184
|
+
the brackets!) The text should be enclosed in the appropriate
|
185
|
+
comment syntax for the file format. We also recommend that a
|
186
|
+
file or class name and description of purpose be included on the
|
187
|
+
same "printed page" as the copyright notice for easier
|
188
|
+
identification within third-party archives.
|
189
|
+
|
190
|
+
Copyright [yyyy] [name of copyright owner]
|
191
|
+
|
192
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
193
|
+
you may not use this file except in compliance with the License.
|
194
|
+
You may obtain a copy of the License at
|
195
|
+
|
196
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
197
|
+
|
198
|
+
Unless required by applicable law or agreed to in writing, software
|
199
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
200
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
201
|
+
See the License for the specific language governing permissions and
|
202
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# HREF Preview
|
2
|
+
|
3
|
+
<dl>
|
4
|
+
<dt>Homepage</dt><dd><a href="https://github.com/sporkmonger/href-preview">https://github.com/sporkmonger/href-preview</a></dd>
|
5
|
+
<dt>Author</dt><dd><a href="mailto:bob@sporkmonger.com">Bob Aman</a></dd>
|
6
|
+
<dt>Copyright</dt><dd>Copyright © 2014 Bob Aman</dd>
|
7
|
+
<dt>License</dt><dd>Apache 2.0</dd>
|
8
|
+
</dl>
|
9
|
+
|
10
|
+
# Description
|
11
|
+
|
12
|
+
A comprehensive library for efficiently previewing links.
|
13
|
+
|
14
|
+
# Features
|
15
|
+
|
16
|
+
Capable of detecting:
|
17
|
+
* OpenGraph metadata
|
18
|
+
* Twitter metadata
|
19
|
+
* Microdata metadata
|
20
|
+
* Assorted microformat metadata
|
21
|
+
* RDFa metadata
|
22
|
+
* rel="canonical" links
|
23
|
+
|
24
|
+
# Example Usage
|
25
|
+
|
26
|
+
require 'href_preview'
|
27
|
+
p = HRefPreview.open('http://nyti.ms/1c1zNtX')
|
28
|
+
p.title
|
29
|
+
# => "A Successor to Sagan Reboots ‘Cosmos’"
|
30
|
+
p.description
|
31
|
+
# =>
|
32
|
+
p.article_html
|
33
|
+
p.article_text
|
34
|
+
p.published
|
35
|
+
p.canonical_uri
|
36
|
+
|
37
|
+
# Install
|
38
|
+
|
39
|
+
* gem install href-preview
|
data/Rakefile
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
lib_dir = File.expand_path(File.join(File.dirname(__FILE__), 'lib'))
|
2
|
+
$:.unshift(lib_dir)
|
3
|
+
$:.uniq!
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'rake'
|
7
|
+
|
8
|
+
require File.join(File.dirname(__FILE__), 'lib/href_preview', 'version')
|
9
|
+
|
10
|
+
PKG_DISPLAY_NAME = 'HREF Preview'
|
11
|
+
PKG_NAME = PKG_DISPLAY_NAME.gsub(/ /, '-').downcase
|
12
|
+
PKG_VERSION = HRefPreview::VERSION::STRING
|
13
|
+
PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
|
14
|
+
|
15
|
+
RELEASE_NAME = "REL #{PKG_VERSION}"
|
16
|
+
GIT_HUB_URL = "https://github.com/sporkmonger/href-preview"
|
17
|
+
|
18
|
+
PKG_AUTHOR = 'Bob Aman'
|
19
|
+
PKG_AUTHOR_EMAIL = 'bob@sporkmonger.com'
|
20
|
+
PKG_HOMEPAGE = GIT_HUB_URL
|
21
|
+
PKG_SUMMARY = 'Package Summary'
|
22
|
+
PKG_DESCRIPTION = <<-TEXT
|
23
|
+
A simple library for efficiently previewing links.
|
24
|
+
TEXT
|
25
|
+
|
26
|
+
PKG_FILES = FileList[
|
27
|
+
'lib/**/*', 'spec/**/*', 'vendor/**/*',
|
28
|
+
'tasks/**/*', 'website/**/*',
|
29
|
+
'[A-Z]*', 'Rakefile'
|
30
|
+
].exclude(/database\.yml/).exclude(/[_\.]git$/).exclude(/Gemfile\.lock/)
|
31
|
+
|
32
|
+
RCOV_ENABLED = (RUBY_PLATFORM != 'java' && RUBY_VERSION =~ /^1\.8/)
|
33
|
+
if RCOV_ENABLED
|
34
|
+
task :default => 'spec:rcov'
|
35
|
+
else
|
36
|
+
task :default => 'spec'
|
37
|
+
end
|
38
|
+
|
39
|
+
WINDOWS = (RUBY_PLATFORM =~ /mswin|win32|mingw|bccwin|cygwin/) rescue false
|
40
|
+
SUDO = WINDOWS ? '' : ('sudo' unless ENV['SUDOLESS'])
|
41
|
+
|
42
|
+
Dir['tasks/**/*.rake'].each { |rake| load rake }
|
data/lib/href-preview.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# Copyright 2014 Bob Aman
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
# Just a require alias.
|
17
|
+
require 'href_preview'
|
data/lib/href_preview.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# Copyright 2014 Bob Aman
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require 'addressable/uri'
|
17
|
+
require 'faraday'
|
18
|
+
require 'faraday_middleware'
|
19
|
+
require 'href_preview/version'
|
20
|
+
require 'href_preview/preview'
|
21
|
+
require 'href_preview/faraday_common_request'
|
22
|
+
|
23
|
+
module HRefPreview
|
24
|
+
DEFAULT_CONNECTION = Faraday.new do |connection|
|
25
|
+
connection.use FaradayMiddleware::FollowRedirects, {:limit => 5}
|
26
|
+
connection.use Faraday::CommonRequest
|
27
|
+
connection.adapter :httpclient
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.open(uri, connection=DEFAULT_CONNECTION)
|
31
|
+
response = connection.get(Addressable::URI.parse(uri))
|
32
|
+
return HRefPreview::Preview.new(response, connection)
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Copyright 2014 Bob Aman
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
module Faraday
|
17
|
+
class CommonRequest < Faraday::Middleware
|
18
|
+
def initialize(app, *args)
|
19
|
+
@app = app
|
20
|
+
@options = args.shift || {}
|
21
|
+
@options.merge!(
|
22
|
+
:user_agent => (
|
23
|
+
"Mozilla/5.0 (compatible; " +
|
24
|
+
"HRefPreview/#{HRefPreview::VERSION::STRING}; " +
|
25
|
+
"+https://github.com/sporkmonger/href-preview)"
|
26
|
+
),
|
27
|
+
:accept => "*/*"
|
28
|
+
)
|
29
|
+
@user_agent = @options[:user_agent]
|
30
|
+
@accept = @options[:accept]
|
31
|
+
end
|
32
|
+
|
33
|
+
def call(env)
|
34
|
+
env[:request_headers].merge!('User-Agent' => @user_agent)
|
35
|
+
env[:request_headers].merge!('Accept' => @accept)
|
36
|
+
response = @app.call env
|
37
|
+
response
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# Copyright 2014 Bob Aman
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require 'fastimage'
|
17
|
+
|
18
|
+
class FastImage
|
19
|
+
attr_reader :uri
|
20
|
+
end
|
@@ -0,0 +1,451 @@
|
|
1
|
+
# Copyright 2014 Bob Aman
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
require 'mime/types'
|
17
|
+
require 'faraday'
|
18
|
+
require 'nokogiri'
|
19
|
+
require 'sanitize'
|
20
|
+
require 'fastimage'
|
21
|
+
require 'href_preview/fastimage_uri'
|
22
|
+
require 'time'
|
23
|
+
|
24
|
+
module HRefPreview
|
25
|
+
class Preview
|
26
|
+
##
|
27
|
+
# Initializes a `Preview` from an HTTP response.
|
28
|
+
def initialize(response, connection=DEFAULT_CONNECTION)
|
29
|
+
@response = response
|
30
|
+
@connection = connection
|
31
|
+
end
|
32
|
+
|
33
|
+
attr_reader :response
|
34
|
+
|
35
|
+
##
|
36
|
+
# Returns the MIME type declared in the HTTP headers or HTML meta
|
37
|
+
# tags.
|
38
|
+
#
|
39
|
+
# @return [MIME::Type] The MIME type of the HTTP response.
|
40
|
+
def mime_type
|
41
|
+
@mime_type ||= (begin
|
42
|
+
MIME::Types[response.headers['Content-Type']].first or
|
43
|
+
begin
|
44
|
+
node = dom.xpath("//*/meta[@http-equiv='Content-Type']/@content").first
|
45
|
+
MIME::Types[node.value].first if node && node.value
|
46
|
+
end or
|
47
|
+
begin
|
48
|
+
node = dom.xpath("//*/meta[@name='dc.format']/@content").first
|
49
|
+
MIME::Types[node.value].first if node && node.value
|
50
|
+
end
|
51
|
+
end)
|
52
|
+
end
|
53
|
+
|
54
|
+
##
|
55
|
+
# Returns the charset declared in the HTTP headers or HTML meta
|
56
|
+
# tags.
|
57
|
+
#
|
58
|
+
# @return [String] The charset of the HTTP response.
|
59
|
+
def charset
|
60
|
+
@charset ||= (begin
|
61
|
+
charset = response.headers['Content-Type'].to_s[/;\s*charset=([^;,]*)/, 1] or
|
62
|
+
begin
|
63
|
+
node = dom.xpath("//*/meta[@http-equiv='Content-Type']/@content").first
|
64
|
+
node.value.to_s[/;\s*charset=([^;,]*)/, 1] if node
|
65
|
+
end or
|
66
|
+
begin
|
67
|
+
node = dom.xpath("//*/meta/@charset").first
|
68
|
+
node.value if node
|
69
|
+
end
|
70
|
+
charset.strip if charset
|
71
|
+
end)
|
72
|
+
end
|
73
|
+
|
74
|
+
##
|
75
|
+
# @returns [String] The two-letter language code for the content.
|
76
|
+
def language
|
77
|
+
@language ||= (begin
|
78
|
+
language = response.headers['Content-Language'] or
|
79
|
+
begin
|
80
|
+
node = dom.xpath("//*/meta[@http-equiv='Content-Language']/@content").first
|
81
|
+
node.value if node
|
82
|
+
end or
|
83
|
+
begin
|
84
|
+
node = dom.xpath("//*/meta[@name='dc.language']/@content").first
|
85
|
+
node.value if node
|
86
|
+
end
|
87
|
+
if language
|
88
|
+
# Strip the irrelevant '-US' from 'en-US' if it appears.
|
89
|
+
language[/^([a-z]{2})/, 1].to_s.downcase
|
90
|
+
end
|
91
|
+
end)
|
92
|
+
end
|
93
|
+
|
94
|
+
##
|
95
|
+
# Returns true if the response had a 2xx HTTP code and the mime type
|
96
|
+
# is either HTML or XHTML.
|
97
|
+
#
|
98
|
+
# @return [true, false] Whether successful HTML response or not.
|
99
|
+
def is_html?
|
100
|
+
return (
|
101
|
+
response.status >= 200 && response.status < 300 &&
|
102
|
+
mime_type && mime_type.sub_type =~ /^x?html/
|
103
|
+
)
|
104
|
+
end
|
105
|
+
|
106
|
+
##
|
107
|
+
# The DOM for the response body.
|
108
|
+
#
|
109
|
+
# @return [Nokogiri::HTML::Document]
|
110
|
+
# The DOM, as generated by Nokogiri.
|
111
|
+
def dom
|
112
|
+
@dom ||= Nokogiri::HTML(response.body)
|
113
|
+
end
|
114
|
+
|
115
|
+
##
|
116
|
+
# @return [String] The title of the page.
|
117
|
+
def title
|
118
|
+
@title ||= (begin
|
119
|
+
if is_html?
|
120
|
+
title = begin
|
121
|
+
node = dom.xpath("//*/meta[@property='og:title']/@content").first
|
122
|
+
node.value if node
|
123
|
+
end or
|
124
|
+
begin
|
125
|
+
node = dom.xpath("//*/meta[@name='dc.title']/@content").first
|
126
|
+
node.value if node
|
127
|
+
end or
|
128
|
+
begin
|
129
|
+
if article_node
|
130
|
+
node = article_node.xpath("*[@itemprop='headline']").first
|
131
|
+
node.text if node
|
132
|
+
end
|
133
|
+
end or
|
134
|
+
begin
|
135
|
+
node = dom.xpath("//*/*[(self::h1 or self::h2) and @itemprop='headline']").first
|
136
|
+
node.text if node
|
137
|
+
end or
|
138
|
+
begin
|
139
|
+
node = dom.xpath("//*/head/title").first
|
140
|
+
node.text if node
|
141
|
+
end or
|
142
|
+
begin
|
143
|
+
# Unlikely to ever happen
|
144
|
+
node = dom.xpath("//*/meta[@name='twitter:title']/@content").first
|
145
|
+
node.value if node
|
146
|
+
end or
|
147
|
+
begin
|
148
|
+
# Unlikely to ever happen
|
149
|
+
node = dom.xpath("//*/meta[@name='sailthru.title']/@content").first
|
150
|
+
node.value if node
|
151
|
+
end
|
152
|
+
if title
|
153
|
+
title.gsub!(/ /, ' ')
|
154
|
+
title.gsub!(/^#{site_name}[\s\|\-\:]*/, '')
|
155
|
+
title.gsub!(/[\s\|\-\:]*#{site_name}$/, '')
|
156
|
+
title.strip
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end)
|
160
|
+
end
|
161
|
+
|
162
|
+
def description
|
163
|
+
@description ||= (begin
|
164
|
+
if is_html?
|
165
|
+
description = begin
|
166
|
+
node = dom.xpath("//*/meta[@property='og:description']/@content").first
|
167
|
+
node.value if node
|
168
|
+
end or
|
169
|
+
begin
|
170
|
+
node = dom.xpath("//*/meta[@name='dc.description']/@content").first
|
171
|
+
node.value if node
|
172
|
+
end or
|
173
|
+
begin
|
174
|
+
node = dom.xpath("//*/meta[@itemprop='description']/@content").first
|
175
|
+
node.value if node
|
176
|
+
end or
|
177
|
+
begin
|
178
|
+
node = dom.xpath("//*/meta[@name='description']/@content").first
|
179
|
+
node.value if node
|
180
|
+
end or
|
181
|
+
begin
|
182
|
+
node = dom.xpath("//*/meta[@name='dcterms.abstract']/@content").first
|
183
|
+
node.value if node
|
184
|
+
end or
|
185
|
+
begin
|
186
|
+
# Unlikely to ever happen
|
187
|
+
node = dom.xpath("//*/meta[@name='twitter:description']/@content").first
|
188
|
+
node.value if node
|
189
|
+
end or
|
190
|
+
begin
|
191
|
+
# Unlikely to ever happen
|
192
|
+
node = dom.xpath("//*/meta[@name='sailthru.description']/@content").first
|
193
|
+
node.value if node
|
194
|
+
end
|
195
|
+
if description
|
196
|
+
description.gsub!(/ /, ' ')
|
197
|
+
description.strip
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end)
|
201
|
+
end
|
202
|
+
|
203
|
+
def canonical_uri
|
204
|
+
@canonical_uri ||= (if is_html?
|
205
|
+
begin
|
206
|
+
node = dom.xpath("//*/link[@rel='canonical']/@href").first
|
207
|
+
Addressable::URI.parse(node.value) if node && node.value && node.value != ''
|
208
|
+
end or
|
209
|
+
begin
|
210
|
+
node = dom.xpath("//*/meta[@property='og:url']/@content").first
|
211
|
+
Addressable::URI.parse(node.value) if node && node.value && node.value != ''
|
212
|
+
end or
|
213
|
+
Addressable::URI.parse(response.env.url.to_s)
|
214
|
+
else
|
215
|
+
Addressable::URI.parse(response.env.url.to_s)
|
216
|
+
end)
|
217
|
+
end
|
218
|
+
|
219
|
+
def shortlink_uri
|
220
|
+
@shortlink_uri ||= (if is_html?
|
221
|
+
begin
|
222
|
+
node = dom.xpath("//*/link[@rel='shortlink']/@href").first
|
223
|
+
Addressable::URI.parse(node.value) if node && node.value && node.value != ''
|
224
|
+
end or
|
225
|
+
begin
|
226
|
+
node = dom.xpath("//*[@class='story-short-url']/a/@href").first
|
227
|
+
Addressable::URI.parse(node.value) if node && node.value && node.value != ''
|
228
|
+
end
|
229
|
+
end)
|
230
|
+
end
|
231
|
+
|
232
|
+
def image_uri
|
233
|
+
@image_uri ||= (images.first ? Addressable::URI.parse(images.first.uri) : nil)
|
234
|
+
end
|
235
|
+
|
236
|
+
def images
|
237
|
+
@images ||= (begin
|
238
|
+
image_uris = []
|
239
|
+
if is_html?
|
240
|
+
nodes = dom.xpath("//*/meta[@property='og:image']/@content")
|
241
|
+
nodes.each do |node|
|
242
|
+
if node && node.value && node.value != ''
|
243
|
+
image_uris << Addressable::URI.parse(node.value)
|
244
|
+
end
|
245
|
+
end
|
246
|
+
if article_node
|
247
|
+
nodes = article_node.xpath("meta[@itemprop='thumbnailurl']/@content")
|
248
|
+
nodes.each do |node|
|
249
|
+
if node && node.value && node.value != ''
|
250
|
+
image_uris << Addressable::URI.parse(node.value)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
elsif mime_type && mime_type.media_type == 'image'
|
255
|
+
image_uris << canonical_uri
|
256
|
+
end
|
257
|
+
image_uris.uniq.map { |uri| FastImage.new(uri, :timeout => 0.5) }
|
258
|
+
end)
|
259
|
+
end
|
260
|
+
|
261
|
+
def item_type
|
262
|
+
@item_type ||= (if is_html?
|
263
|
+
begin
|
264
|
+
node = dom.xpath("//*/meta[@property='og:type']/@content").first
|
265
|
+
node.value if node
|
266
|
+
end or
|
267
|
+
if dom.xpath("//*[@itemtype='http://schema.org/NewsArticle']").first != nil
|
268
|
+
'article'
|
269
|
+
end
|
270
|
+
end)
|
271
|
+
end
|
272
|
+
|
273
|
+
def site_name
|
274
|
+
@site_name ||= (if is_html?
|
275
|
+
begin
|
276
|
+
node = dom.xpath("//*/meta[@property='og:site_name']/@content").first
|
277
|
+
node.value if node
|
278
|
+
end or
|
279
|
+
begin
|
280
|
+
node = dom.xpath("//*/meta[@name='dc.publisher']/@content").first
|
281
|
+
node.value if node
|
282
|
+
end
|
283
|
+
end)
|
284
|
+
end
|
285
|
+
|
286
|
+
##
|
287
|
+
# @return [String] The Twitter handle used by the site.
|
288
|
+
def twitter
|
289
|
+
@twitter ||= (if is_html?
|
290
|
+
begin
|
291
|
+
node = dom.xpath("//*/meta[@name='twitter:site']/@content").first
|
292
|
+
node.value if node && node.value && node.value =~ /^@/
|
293
|
+
end
|
294
|
+
end)
|
295
|
+
end
|
296
|
+
|
297
|
+
def article_node
|
298
|
+
@article_node ||= (if is_html?
|
299
|
+
begin
|
300
|
+
nodes = dom.xpath("/html[@itemtype='http://schema.org/NewsArticle']//article[@id='story']")
|
301
|
+
nodes.first if nodes.size == 1
|
302
|
+
end or
|
303
|
+
begin
|
304
|
+
nodes = dom.xpath("//*/*[@itemtype='http://schema.org/NewsArticle']")
|
305
|
+
nodes.first if nodes.size == 1
|
306
|
+
end or
|
307
|
+
begin
|
308
|
+
nodes = dom.xpath("//*/*[@itemprop='articleBody']")
|
309
|
+
nodes.first if nodes.size == 1
|
310
|
+
end or
|
311
|
+
begin
|
312
|
+
nodes = dom.css("article div.article-entry")
|
313
|
+
nodes.first if nodes.size == 1
|
314
|
+
end or
|
315
|
+
begin
|
316
|
+
nodes = dom.css("article.post div.entry-content")
|
317
|
+
nodes.first if nodes.size == 1
|
318
|
+
end or
|
319
|
+
begin
|
320
|
+
nodes = dom.css("div.post div.postBody")
|
321
|
+
nodes.first if nodes.size == 1
|
322
|
+
end or
|
323
|
+
begin
|
324
|
+
nodes = dom.css(".pg_story div#leftcolumn div.body")
|
325
|
+
nodes.first if nodes.size == 1
|
326
|
+
end
|
327
|
+
end)
|
328
|
+
end
|
329
|
+
|
330
|
+
options = Sanitize::Config::RELAXED.merge(
|
331
|
+
:remove_contents => true,
|
332
|
+
:elements => %w[
|
333
|
+
a abbr address b bdi bdo blockquote br caption cite code col colgroup dd
|
334
|
+
del dfn dl dt em figcaption figure h1 h2 h3 h4 h5 h6 hgroup hr i img ins
|
335
|
+
kbd li mark ol p pre q rp rt ruby s samp small span strike strong sub
|
336
|
+
summary sup table tbody td tfoot th thead time tr u ul var wbr
|
337
|
+
]
|
338
|
+
)
|
339
|
+
options[:attributes]['span'] = []
|
340
|
+
SANITIZE_OPTIONS = options
|
341
|
+
|
342
|
+
def article_html
|
343
|
+
@article_html ||= (if is_html?
|
344
|
+
begin
|
345
|
+
html = nil
|
346
|
+
if article_node
|
347
|
+
html = article_node.children.reject do |child|
|
348
|
+
next unless child.attribute('class')
|
349
|
+
[
|
350
|
+
'related_links_inline',
|
351
|
+
'inline-share-btn-label',
|
352
|
+
'inline-share-btn'
|
353
|
+
].include?(child.attribute('class').value)
|
354
|
+
end.map(&:to_s).join('')
|
355
|
+
end
|
356
|
+
if html
|
357
|
+
html = Sanitize.clean(html, SANITIZE_OPTIONS)
|
358
|
+
html.gsub!("\r\n", "\n")
|
359
|
+
html.gsub!("\t", " ")
|
360
|
+
html.gsub!(/ *\n */, "\n")
|
361
|
+
html.gsub!(/\n\n+/, "\n\n")
|
362
|
+
html.gsub!(/<p>\n+/, "<p>\n")
|
363
|
+
html.gsub!(/\n+<\/p>/, "\n</p>")
|
364
|
+
html.gsub!(/<\/p>\n+/, "</p>\n")
|
365
|
+
html.strip!
|
366
|
+
|
367
|
+
# Excise empty elements
|
368
|
+
reparsed = Nokogiri::HTML.fragment(html)
|
369
|
+
excise_empty = lambda do |node|
|
370
|
+
if node.respond_to?(:name) && node.name == "script"
|
371
|
+
node.unlink
|
372
|
+
else
|
373
|
+
node.children.each do |node|
|
374
|
+
excise_empty.call(node) if node.element?
|
375
|
+
end
|
376
|
+
if node.respond_to?(:attribute_nodes) && node.respond_to?(:text)
|
377
|
+
if node.attribute_nodes.size == 0 && node.text.to_s.strip =~ /^\s*$/ &&
|
378
|
+
node.children.all? { |child| child.text? }
|
379
|
+
node.unlink
|
380
|
+
end
|
381
|
+
end
|
382
|
+
end
|
383
|
+
end
|
384
|
+
excise_empty.call(reparsed)
|
385
|
+
html = reparsed.to_s
|
386
|
+
end
|
387
|
+
html
|
388
|
+
end
|
389
|
+
end)
|
390
|
+
end
|
391
|
+
|
392
|
+
def article_text
|
393
|
+
@article_text ||= is_html? ? Sanitize.clean(article_html) : nil
|
394
|
+
end
|
395
|
+
|
396
|
+
def published
|
397
|
+
@published ||= (begin
|
398
|
+
# Check under the article node first, otherwise search all
|
399
|
+
begin
|
400
|
+
node = dom.xpath("//*/meta[@property='article:published_time']/@content").first
|
401
|
+
Time.parse(node.value) if node && node.value && node.value != ''
|
402
|
+
end or
|
403
|
+
if article_node
|
404
|
+
node = article_node.xpath("meta[@itemprop='datepublished']/@content").first
|
405
|
+
Time.parse(node.value) if node && node.value && node.value != ''
|
406
|
+
end or
|
407
|
+
begin
|
408
|
+
node = dom.xpath("//*/meta[@itemprop='datepublished']/@content").first
|
409
|
+
Time.parse(node.value) if node && node.value && node.value != ''
|
410
|
+
end or
|
411
|
+
begin
|
412
|
+
node = dom.xpath("//*/meta[@name='dcterms.created']/@content").first
|
413
|
+
Time.parse(node.value) if node && node.value && node.value != ''
|
414
|
+
end or
|
415
|
+
begin
|
416
|
+
# Only a date, not a time, and not particularly specific,
|
417
|
+
# so this is a fallback at best.
|
418
|
+
node = dom.xpath("//*/meta[@name='dc.date']/@content").first
|
419
|
+
Time.parse(node.value) if node && node.value && node.value != ''
|
420
|
+
end
|
421
|
+
end)
|
422
|
+
end
|
423
|
+
|
424
|
+
def updated
|
425
|
+
@updated ||= (begin
|
426
|
+
# Check under the article node first, otherwise search all
|
427
|
+
begin
|
428
|
+
node = dom.xpath("//*/meta[@property='article:modified_time']/@content").first
|
429
|
+
Time.parse(node.value) if node && node.value && node.value != ''
|
430
|
+
end or
|
431
|
+
if article_node
|
432
|
+
node = article_node.xpath("meta[@itemprop='datemodified']/@content").first
|
433
|
+
Time.parse(node.value) if node && node.value && node.value != ''
|
434
|
+
end or
|
435
|
+
begin
|
436
|
+
node = dom.xpath("meta[@itemprop='datemodified']/@content").first
|
437
|
+
Time.parse(node.value) if node && node.value && node.value != ''
|
438
|
+
end or
|
439
|
+
begin
|
440
|
+
node = dom.xpath("//*/meta[@name='dcterms.modified']/@content").first
|
441
|
+
Time.parse(node.value) if node && node.value && node.value != ''
|
442
|
+
end
|
443
|
+
end)
|
444
|
+
end
|
445
|
+
|
446
|
+
def inspect
|
447
|
+
addr = '0x' + ('%x' % (object_id << 1)).rjust(14, '0')
|
448
|
+
"#<HRefPreview::Preview:#{addr} TITLE=#{title.inspect}>"
|
449
|
+
end
|
450
|
+
end
|
451
|
+
end
|