onebox 1.8.81 → 1.8.82
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -0
- data/.rspec +0 -0
- data/.rubocop.yml +0 -0
- data/.ruby-gemset +0 -0
- data/.travis.yml +0 -0
- data/CHANGELOG.md +26 -26
- data/Gemfile +0 -0
- data/Gemfile.lock +154 -154
- data/Guardfile +0 -0
- data/LICENSE.txt +0 -0
- data/README.md +223 -223
- data/Rakefile +0 -0
- data/lib/onebox.rb +0 -0
- data/lib/onebox/engine.rb +188 -188
- data/lib/onebox/engine/amazon_onebox.rb +167 -167
- data/lib/onebox/engine/asciinema_onebox.rb +0 -0
- data/lib/onebox/engine/audio_onebox.rb +0 -0
- data/lib/onebox/engine/audioboom_onebox.rb +24 -24
- data/lib/onebox/engine/bandcamp_onebox.rb +32 -32
- data/lib/onebox/engine/cloudapp_onebox.rb +51 -51
- data/lib/onebox/engine/coub_onebox.rb +21 -21
- data/lib/onebox/engine/douban_onebox.rb +0 -0
- data/lib/onebox/engine/five_hundred_px_onebox.rb +17 -17
- data/lib/onebox/engine/flickr_onebox.rb +0 -0
- data/lib/onebox/engine/flickr_shortened_onebox.rb +0 -0
- data/lib/onebox/engine/gfycat_onebox.rb +0 -0
- data/lib/onebox/engine/giphy_onebox.rb +22 -22
- data/lib/onebox/engine/github_blob_onebox.rb +0 -0
- data/lib/onebox/engine/github_commit_onebox.rb +0 -0
- data/lib/onebox/engine/github_gist_onebox.rb +0 -0
- data/lib/onebox/engine/github_issue_onebox.rb +0 -0
- data/lib/onebox/engine/github_pullrequest_onebox.rb +0 -0
- data/lib/onebox/engine/gitlab_blob_onebox.rb +0 -0
- data/lib/onebox/engine/google_calendar_onebox.rb +0 -0
- data/lib/onebox/engine/google_docs_onebox.rb +0 -0
- data/lib/onebox/engine/google_maps_onebox.rb +0 -0
- data/lib/onebox/engine/google_photos_onebox.rb +57 -57
- data/lib/onebox/engine/google_play_app_onebox.rb +0 -0
- data/lib/onebox/engine/html.rb +0 -0
- data/lib/onebox/engine/image_onebox.rb +0 -0
- data/lib/onebox/engine/imgur_onebox.rb +65 -65
- data/lib/onebox/engine/instagram_onebox.rb +32 -32
- data/lib/onebox/engine/json.rb +0 -0
- data/lib/onebox/engine/kaltura_onebox.rb +31 -31
- data/lib/onebox/engine/mixcloud_onebox.rb +20 -20
- data/lib/onebox/engine/opengraph_image.rb +12 -12
- data/lib/onebox/engine/pastebin_onebox.rb +0 -0
- data/lib/onebox/engine/pdf_onebox.rb +0 -0
- data/lib/onebox/engine/pubmed_onebox.rb +0 -0
- data/lib/onebox/engine/replit_onebox.rb +24 -24
- data/lib/onebox/engine/sketchfab_onebox.rb +31 -31
- data/lib/onebox/engine/slides_onebox.rb +0 -0
- data/lib/onebox/engine/soundcloud_onebox.rb +31 -31
- data/lib/onebox/engine/stack_exchange_onebox.rb +0 -0
- data/lib/onebox/engine/standard_embed.rb +145 -145
- data/lib/onebox/engine/steam_store_onebox.rb +37 -37
- data/lib/onebox/engine/trello_onebox.rb +0 -0
- data/lib/onebox/engine/twitch_clips_onebox.rb +0 -0
- data/lib/onebox/engine/twitch_stream_onebox.rb +0 -0
- data/lib/onebox/engine/twitch_video_onebox.rb +0 -0
- data/lib/onebox/engine/twitter_status_onebox.rb +0 -0
- data/lib/onebox/engine/typeform_onebox.rb +41 -41
- data/lib/onebox/engine/video_onebox.rb +0 -0
- data/lib/onebox/engine/vimeo_onebox.rb +20 -20
- data/lib/onebox/engine/wechat_mp_onebox.rb +0 -0
- data/lib/onebox/engine/whitelisted_generic_onebox.rb +366 -366
- data/lib/onebox/engine/wikimedia_onebox.rb +0 -0
- data/lib/onebox/engine/wikipedia_onebox.rb +0 -0
- data/lib/onebox/engine/wistia_onebox.rb +27 -27
- data/lib/onebox/engine/xkcd_onebox.rb +0 -0
- data/lib/onebox/engine/youku_onebox.rb +0 -0
- data/lib/onebox/engine/youtube_onebox.rb +163 -163
- data/lib/onebox/file_type_finder.rb +0 -0
- data/lib/onebox/helpers.rb +188 -188
- data/lib/onebox/layout.rb +0 -0
- data/lib/onebox/layout_support.rb +0 -0
- data/lib/onebox/matcher.rb +0 -0
- data/lib/onebox/mixins/git_blob_onebox.rb +1 -1
- data/lib/onebox/mixins/twitch_onebox.rb +0 -0
- data/lib/onebox/oembed.rb +15 -15
- data/lib/onebox/open_graph.rb +90 -90
- data/lib/onebox/preview.rb +0 -0
- data/lib/onebox/sanitize_config.rb +0 -0
- data/lib/onebox/status_check.rb +0 -0
- data/lib/onebox/template_support.rb +0 -0
- data/lib/onebox/version.rb +5 -5
- data/lib/onebox/view.rb +0 -0
- data/lib/onebox/web.rb +0 -0
- data/lib/onebox/web_helpers.rb +0 -0
- data/onebox.gemspec +0 -0
- data/templates/_layout.mustache +0 -0
- data/templates/amazon.mustache +0 -0
- data/templates/douban.mustache +0 -0
- data/templates/githubblob.mustache +1 -1
- data/templates/githubcommit.mustache +0 -0
- data/templates/githubgist.mustache +0 -0
- data/templates/githubissue.mustache +0 -0
- data/templates/githubpullrequest.mustache +0 -0
- data/templates/gitlabblob.mustache +0 -0
- data/templates/googledocs.mustache +0 -0
- data/templates/googleplayapp.mustache +0 -0
- data/templates/instagram.mustache +0 -0
- data/templates/pastebin.mustache +0 -0
- data/templates/pdf.mustache +0 -0
- data/templates/pubmed.mustache +0 -0
- data/templates/stackexchange.mustache +0 -0
- data/templates/twitterstatus.mustache +0 -0
- data/templates/wechatmp.mustache +0 -0
- data/templates/whitelistedgeneric.mustache +0 -0
- data/templates/wikimedia.mustache +0 -0
- data/templates/wikipedia.mustache +0 -0
- data/templates/xkcd.mustache +0 -0
- metadata +3 -4
data/Rakefile
CHANGED
File without changes
|
data/lib/onebox.rb
CHANGED
File without changes
|
data/lib/onebox/engine.rb
CHANGED
@@ -1,188 +1,188 @@
|
|
1
|
-
module Onebox
|
2
|
-
module Engine
|
3
|
-
def self.included(object)
|
4
|
-
object.extend(ClassMethods)
|
5
|
-
end
|
6
|
-
|
7
|
-
def self.engines
|
8
|
-
constants.select do |constant|
|
9
|
-
constant.to_s =~ /Onebox$/
|
10
|
-
end.map(&method(:const_get))
|
11
|
-
end
|
12
|
-
|
13
|
-
attr_reader :url, :uri
|
14
|
-
attr_reader :cache
|
15
|
-
attr_reader :timeout
|
16
|
-
|
17
|
-
DEFAULT = {}
|
18
|
-
def options
|
19
|
-
@options
|
20
|
-
end
|
21
|
-
|
22
|
-
def options=(opt)
|
23
|
-
return @options if opt.nil? #make sure options provided
|
24
|
-
opt = opt.to_h if opt.instance_of?(OpenStruct)
|
25
|
-
@options.merge!(opt)
|
26
|
-
@options
|
27
|
-
end
|
28
|
-
|
29
|
-
def initialize(link, cache = nil, timeout = nil)
|
30
|
-
@options = DEFAULT
|
31
|
-
class_name = self.class.name.split("::").last.to_s
|
32
|
-
self.options = Onebox.options[class_name] || {} #Set the engine options extracted from global options.
|
33
|
-
|
34
|
-
@url = link
|
35
|
-
@uri = URI(link)
|
36
|
-
if always_https?
|
37
|
-
@uri.scheme = 'https'
|
38
|
-
@url = @uri.to_s
|
39
|
-
end
|
40
|
-
@cache = cache || Onebox.options.cache
|
41
|
-
@timeout = timeout || Onebox.options.timeout
|
42
|
-
end
|
43
|
-
|
44
|
-
# raises error if not defined in onebox engine.
|
45
|
-
# This is the output method for an engine.
|
46
|
-
def to_html
|
47
|
-
fail NoMethodError, "Engines need to implement this method"
|
48
|
-
end
|
49
|
-
|
50
|
-
# Some oneboxes create iframes or other complicated controls. If you're using
|
51
|
-
# a live editor with HTML preview, rendering those complicated controls can
|
52
|
-
# be slow or cause flickering.
|
53
|
-
#
|
54
|
-
# This method allows engines to produce a placeholder such as static image
|
55
|
-
# frame of a video.
|
56
|
-
#
|
57
|
-
# By default it just calls `to_html` unless implemented.
|
58
|
-
def placeholder_html
|
59
|
-
to_html
|
60
|
-
end
|
61
|
-
|
62
|
-
private
|
63
|
-
|
64
|
-
def record
|
65
|
-
url_result = url
|
66
|
-
result = cache.fetch(url_result) { data }
|
67
|
-
cache[url_result] = result if cache.respond_to?(:key?)
|
68
|
-
result
|
69
|
-
end
|
70
|
-
|
71
|
-
# raises error if not defined in onebox engine
|
72
|
-
# in each onebox, uses either Nokogiri or StandardEmbed to get raw HTML from url
|
73
|
-
def raw
|
74
|
-
fail NoMethodError, "Engines need to implement this method"
|
75
|
-
end
|
76
|
-
|
77
|
-
# raises error if not defined in onebox engine
|
78
|
-
# in each onebox, returns hash of desired onebox content
|
79
|
-
def data
|
80
|
-
fail NoMethodError, "Engines need this method defined"
|
81
|
-
end
|
82
|
-
|
83
|
-
def link
|
84
|
-
@url.gsub(/['\"<>]/,
|
85
|
-
"'" => ''',
|
86
|
-
'"' => '"',
|
87
|
-
'<' => '<',
|
88
|
-
'>' => '>',
|
89
|
-
)
|
90
|
-
end
|
91
|
-
|
92
|
-
def always_https?
|
93
|
-
self.class.always_https?
|
94
|
-
end
|
95
|
-
|
96
|
-
module ClassMethods
|
97
|
-
def ===(other)
|
98
|
-
if other.kind_of?(URI)
|
99
|
-
!!(other.to_s =~ class_variable_get(:@@matcher))
|
100
|
-
else
|
101
|
-
super
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
def priority
|
106
|
-
100
|
107
|
-
end
|
108
|
-
|
109
|
-
def matches_regexp(r)
|
110
|
-
class_variable_set :@@matcher, r
|
111
|
-
end
|
112
|
-
|
113
|
-
# calculates a name for onebox using the class name of engine
|
114
|
-
def onebox_name
|
115
|
-
name.split("::").last.downcase.gsub(/onebox/, "")
|
116
|
-
end
|
117
|
-
|
118
|
-
def always_https
|
119
|
-
@https = true
|
120
|
-
end
|
121
|
-
|
122
|
-
def always_https?
|
123
|
-
@https
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
require_relative "helpers"
|
130
|
-
require_relative "layout_support"
|
131
|
-
require_relative "file_type_finder"
|
132
|
-
require_relative "engine/standard_embed"
|
133
|
-
require_relative "engine/html"
|
134
|
-
require_relative "engine/json"
|
135
|
-
require_relative "engine/amazon_onebox"
|
136
|
-
require_relative "engine/github_issue_onebox"
|
137
|
-
require_relative "engine/github_blob_onebox"
|
138
|
-
require_relative "engine/github_commit_onebox"
|
139
|
-
require_relative "engine/github_gist_onebox"
|
140
|
-
require_relative "engine/github_pullrequest_onebox"
|
141
|
-
require_relative "engine/google_calendar_onebox"
|
142
|
-
require_relative "engine/google_docs_onebox"
|
143
|
-
require_relative "engine/google_maps_onebox"
|
144
|
-
require_relative "engine/google_play_app_onebox"
|
145
|
-
require_relative "engine/image_onebox"
|
146
|
-
require_relative "engine/video_onebox"
|
147
|
-
require_relative "engine/audio_onebox"
|
148
|
-
require_relative "engine/stack_exchange_onebox"
|
149
|
-
require_relative "engine/twitter_status_onebox"
|
150
|
-
require_relative "engine/wikimedia_onebox"
|
151
|
-
require_relative "engine/wikipedia_onebox"
|
152
|
-
require_relative "engine/youtube_onebox"
|
153
|
-
require_relative "engine/youku_onebox"
|
154
|
-
require_relative "engine/douban_onebox"
|
155
|
-
require_relative "engine/whitelisted_generic_onebox"
|
156
|
-
require_relative "engine/pubmed_onebox"
|
157
|
-
require_relative "engine/soundcloud_onebox"
|
158
|
-
require_relative "engine/imgur_onebox"
|
159
|
-
require_relative "engine/pastebin_onebox"
|
160
|
-
require_relative "engine/slides_onebox"
|
161
|
-
require_relative "engine/xkcd_onebox"
|
162
|
-
require_relative "engine/giphy_onebox"
|
163
|
-
require_relative "engine/gfycat_onebox"
|
164
|
-
require_relative "engine/typeform_onebox"
|
165
|
-
require_relative "engine/vimeo_onebox"
|
166
|
-
require_relative "engine/steam_store_onebox"
|
167
|
-
require_relative "engine/sketchfab_onebox"
|
168
|
-
require_relative "engine/audioboom_onebox"
|
169
|
-
require_relative "engine/replit_onebox"
|
170
|
-
require_relative "engine/asciinema_onebox"
|
171
|
-
require_relative "engine/mixcloud_onebox"
|
172
|
-
require_relative "engine/bandcamp_onebox"
|
173
|
-
require_relative "engine/coub_onebox"
|
174
|
-
require_relative "engine/flickr_onebox"
|
175
|
-
require_relative "engine/flickr_shortened_onebox"
|
176
|
-
require_relative "engine/five_hundred_px_onebox"
|
177
|
-
require_relative "engine/pdf_onebox"
|
178
|
-
require_relative "engine/twitch_clips_onebox"
|
179
|
-
require_relative "engine/twitch_stream_onebox"
|
180
|
-
require_relative "engine/twitch_video_onebox"
|
181
|
-
require_relative "engine/trello_onebox"
|
182
|
-
require_relative "engine/wechat_mp_onebox"
|
183
|
-
require_relative "engine/cloudapp_onebox"
|
184
|
-
require_relative "engine/wistia_onebox"
|
185
|
-
require_relative "engine/instagram_onebox"
|
186
|
-
require_relative "engine/gitlab_blob_onebox"
|
187
|
-
require_relative "engine/google_photos_onebox"
|
188
|
-
require_relative "engine/kaltura_onebox"
|
1
|
+
module Onebox
|
2
|
+
module Engine
|
3
|
+
def self.included(object)
|
4
|
+
object.extend(ClassMethods)
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.engines
|
8
|
+
constants.select do |constant|
|
9
|
+
constant.to_s =~ /Onebox$/
|
10
|
+
end.map(&method(:const_get))
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_reader :url, :uri
|
14
|
+
attr_reader :cache
|
15
|
+
attr_reader :timeout
|
16
|
+
|
17
|
+
DEFAULT = {}
|
18
|
+
def options
|
19
|
+
@options
|
20
|
+
end
|
21
|
+
|
22
|
+
def options=(opt)
|
23
|
+
return @options if opt.nil? #make sure options provided
|
24
|
+
opt = opt.to_h if opt.instance_of?(OpenStruct)
|
25
|
+
@options.merge!(opt)
|
26
|
+
@options
|
27
|
+
end
|
28
|
+
|
29
|
+
def initialize(link, cache = nil, timeout = nil)
|
30
|
+
@options = DEFAULT
|
31
|
+
class_name = self.class.name.split("::").last.to_s
|
32
|
+
self.options = Onebox.options[class_name] || {} #Set the engine options extracted from global options.
|
33
|
+
|
34
|
+
@url = link
|
35
|
+
@uri = URI(link)
|
36
|
+
if always_https?
|
37
|
+
@uri.scheme = 'https'
|
38
|
+
@url = @uri.to_s
|
39
|
+
end
|
40
|
+
@cache = cache || Onebox.options.cache
|
41
|
+
@timeout = timeout || Onebox.options.timeout
|
42
|
+
end
|
43
|
+
|
44
|
+
# raises error if not defined in onebox engine.
|
45
|
+
# This is the output method for an engine.
|
46
|
+
def to_html
|
47
|
+
fail NoMethodError, "Engines need to implement this method"
|
48
|
+
end
|
49
|
+
|
50
|
+
# Some oneboxes create iframes or other complicated controls. If you're using
|
51
|
+
# a live editor with HTML preview, rendering those complicated controls can
|
52
|
+
# be slow or cause flickering.
|
53
|
+
#
|
54
|
+
# This method allows engines to produce a placeholder such as static image
|
55
|
+
# frame of a video.
|
56
|
+
#
|
57
|
+
# By default it just calls `to_html` unless implemented.
|
58
|
+
def placeholder_html
|
59
|
+
to_html
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def record
|
65
|
+
url_result = url
|
66
|
+
result = cache.fetch(url_result) { data }
|
67
|
+
cache[url_result] = result if cache.respond_to?(:key?)
|
68
|
+
result
|
69
|
+
end
|
70
|
+
|
71
|
+
# raises error if not defined in onebox engine
|
72
|
+
# in each onebox, uses either Nokogiri or StandardEmbed to get raw HTML from url
|
73
|
+
def raw
|
74
|
+
fail NoMethodError, "Engines need to implement this method"
|
75
|
+
end
|
76
|
+
|
77
|
+
# raises error if not defined in onebox engine
|
78
|
+
# in each onebox, returns hash of desired onebox content
|
79
|
+
def data
|
80
|
+
fail NoMethodError, "Engines need this method defined"
|
81
|
+
end
|
82
|
+
|
83
|
+
def link
|
84
|
+
@url.gsub(/['\"<>]/,
|
85
|
+
"'" => ''',
|
86
|
+
'"' => '"',
|
87
|
+
'<' => '<',
|
88
|
+
'>' => '>',
|
89
|
+
)
|
90
|
+
end
|
91
|
+
|
92
|
+
def always_https?
|
93
|
+
self.class.always_https?
|
94
|
+
end
|
95
|
+
|
96
|
+
module ClassMethods
|
97
|
+
def ===(other)
|
98
|
+
if other.kind_of?(URI)
|
99
|
+
!!(other.to_s =~ class_variable_get(:@@matcher))
|
100
|
+
else
|
101
|
+
super
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def priority
|
106
|
+
100
|
107
|
+
end
|
108
|
+
|
109
|
+
def matches_regexp(r)
|
110
|
+
class_variable_set :@@matcher, r
|
111
|
+
end
|
112
|
+
|
113
|
+
# calculates a name for onebox using the class name of engine
|
114
|
+
def onebox_name
|
115
|
+
name.split("::").last.downcase.gsub(/onebox/, "")
|
116
|
+
end
|
117
|
+
|
118
|
+
def always_https
|
119
|
+
@https = true
|
120
|
+
end
|
121
|
+
|
122
|
+
def always_https?
|
123
|
+
@https
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
require_relative "helpers"
|
130
|
+
require_relative "layout_support"
|
131
|
+
require_relative "file_type_finder"
|
132
|
+
require_relative "engine/standard_embed"
|
133
|
+
require_relative "engine/html"
|
134
|
+
require_relative "engine/json"
|
135
|
+
require_relative "engine/amazon_onebox"
|
136
|
+
require_relative "engine/github_issue_onebox"
|
137
|
+
require_relative "engine/github_blob_onebox"
|
138
|
+
require_relative "engine/github_commit_onebox"
|
139
|
+
require_relative "engine/github_gist_onebox"
|
140
|
+
require_relative "engine/github_pullrequest_onebox"
|
141
|
+
require_relative "engine/google_calendar_onebox"
|
142
|
+
require_relative "engine/google_docs_onebox"
|
143
|
+
require_relative "engine/google_maps_onebox"
|
144
|
+
require_relative "engine/google_play_app_onebox"
|
145
|
+
require_relative "engine/image_onebox"
|
146
|
+
require_relative "engine/video_onebox"
|
147
|
+
require_relative "engine/audio_onebox"
|
148
|
+
require_relative "engine/stack_exchange_onebox"
|
149
|
+
require_relative "engine/twitter_status_onebox"
|
150
|
+
require_relative "engine/wikimedia_onebox"
|
151
|
+
require_relative "engine/wikipedia_onebox"
|
152
|
+
require_relative "engine/youtube_onebox"
|
153
|
+
require_relative "engine/youku_onebox"
|
154
|
+
require_relative "engine/douban_onebox"
|
155
|
+
require_relative "engine/whitelisted_generic_onebox"
|
156
|
+
require_relative "engine/pubmed_onebox"
|
157
|
+
require_relative "engine/soundcloud_onebox"
|
158
|
+
require_relative "engine/imgur_onebox"
|
159
|
+
require_relative "engine/pastebin_onebox"
|
160
|
+
require_relative "engine/slides_onebox"
|
161
|
+
require_relative "engine/xkcd_onebox"
|
162
|
+
require_relative "engine/giphy_onebox"
|
163
|
+
require_relative "engine/gfycat_onebox"
|
164
|
+
require_relative "engine/typeform_onebox"
|
165
|
+
require_relative "engine/vimeo_onebox"
|
166
|
+
require_relative "engine/steam_store_onebox"
|
167
|
+
require_relative "engine/sketchfab_onebox"
|
168
|
+
require_relative "engine/audioboom_onebox"
|
169
|
+
require_relative "engine/replit_onebox"
|
170
|
+
require_relative "engine/asciinema_onebox"
|
171
|
+
require_relative "engine/mixcloud_onebox"
|
172
|
+
require_relative "engine/bandcamp_onebox"
|
173
|
+
require_relative "engine/coub_onebox"
|
174
|
+
require_relative "engine/flickr_onebox"
|
175
|
+
require_relative "engine/flickr_shortened_onebox"
|
176
|
+
require_relative "engine/five_hundred_px_onebox"
|
177
|
+
require_relative "engine/pdf_onebox"
|
178
|
+
require_relative "engine/twitch_clips_onebox"
|
179
|
+
require_relative "engine/twitch_stream_onebox"
|
180
|
+
require_relative "engine/twitch_video_onebox"
|
181
|
+
require_relative "engine/trello_onebox"
|
182
|
+
require_relative "engine/wechat_mp_onebox"
|
183
|
+
require_relative "engine/cloudapp_onebox"
|
184
|
+
require_relative "engine/wistia_onebox"
|
185
|
+
require_relative "engine/instagram_onebox"
|
186
|
+
require_relative "engine/gitlab_blob_onebox"
|
187
|
+
require_relative "engine/google_photos_onebox"
|
188
|
+
require_relative "engine/kaltura_onebox"
|
@@ -1,167 +1,167 @@
|
|
1
|
-
require 'json'
|
2
|
-
require "onebox/open_graph"
|
3
|
-
|
4
|
-
module Onebox
|
5
|
-
module Engine
|
6
|
-
class AmazonOnebox
|
7
|
-
include Engine
|
8
|
-
include LayoutSupport
|
9
|
-
include HTML
|
10
|
-
|
11
|
-
always_https
|
12
|
-
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br)\//)
|
13
|
-
|
14
|
-
def url
|
15
|
-
if match && match[:id]
|
16
|
-
return "https://www.amazon.#{tld}/gp/aw/d/#{URI::encode(match[:id])}"
|
17
|
-
end
|
18
|
-
|
19
|
-
@url
|
20
|
-
end
|
21
|
-
|
22
|
-
def tld
|
23
|
-
@tld || @@matcher.match(@url)["tld"]
|
24
|
-
end
|
25
|
-
|
26
|
-
def http_params
|
27
|
-
{
|
28
|
-
'User-Agent' =>
|
29
|
-
'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3'
|
30
|
-
}
|
31
|
-
end
|
32
|
-
|
33
|
-
private
|
34
|
-
|
35
|
-
def match
|
36
|
-
@match ||= @url.match(/(?:d|g)p\/(?:product\/)?(?<id>[^\/]+)(?:\/|$)/mi)
|
37
|
-
end
|
38
|
-
|
39
|
-
def image
|
40
|
-
if (main_image = raw.css("#main-image")) && main_image.any?
|
41
|
-
attributes = main_image.first.attributes
|
42
|
-
|
43
|
-
return attributes["data-a-hires"].to_s if attributes["data-a-hires"]
|
44
|
-
|
45
|
-
if attributes["data-a-dynamic-image"]
|
46
|
-
return ::JSON.parse(attributes["data-a-dynamic-image"].value).keys.first
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
if (landing_image = raw.css("#landingImage")) && landing_image.any?
|
51
|
-
landing_image.first["src"].to_s
|
52
|
-
end
|
53
|
-
|
54
|
-
if (ebook_image = raw.css("#ebooksImgBlkFront")) && ebook_image.any?
|
55
|
-
::JSON.parse(ebook_image.first.attributes["data-a-dynamic-image"].value).keys.first
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
def price
|
60
|
-
# get item price (Amazon markup is inconsistent, deal with it)
|
61
|
-
if raw.css("#priceblock_ourprice .restOfPrice")[0] && raw.css("#priceblock_ourprice .restOfPrice")[0].inner_text
|
62
|
-
"#{raw.css("#priceblock_ourprice .restOfPrice")[0].inner_text}#{raw.css("#priceblock_ourprice .buyingPrice")[0].inner_text}.#{raw.css("#priceblock_ourprice .restOfPrice")[1].inner_text}"
|
63
|
-
elsif raw.css("#priceblock_dealprice") && (dealprice = raw.css("#priceblock_dealprice span")[0])
|
64
|
-
dealprice.inner_text
|
65
|
-
elsif !raw.css("#priceblock_ourprice").inner_text.empty?
|
66
|
-
raw.css("#priceblock_ourprice").inner_text
|
67
|
-
else
|
68
|
-
raw.css(".mediaMatrixListItem.a-active .a-color-price").inner_text
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
def multiple_authors(authors_xpath)
|
73
|
-
author_list = raw.xpath(authors_xpath)
|
74
|
-
authors = []
|
75
|
-
author_list.each { |a| authors << a.inner_text.strip }
|
76
|
-
authors.join(", ")
|
77
|
-
end
|
78
|
-
|
79
|
-
def data
|
80
|
-
og = ::Onebox::OpenGraph.new(raw)
|
81
|
-
|
82
|
-
if raw.at_css('#dp.book_mobile') #printed books
|
83
|
-
title = raw.at("h1#title")&.inner_text
|
84
|
-
authors = raw.at_css('#byline_secondary_view_div') ? multiple_authors("//div[@id='byline_secondary_view_div']//span[@class='a-text-bold']") : raw.at("#byline")&.inner_text
|
85
|
-
rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text
|
86
|
-
|
87
|
-
table_xpath = "//div[@id='productDetails_secondary_view_div']//table[@id='productDetails_techSpec_section_1']"
|
88
|
-
isbn = raw.xpath("#{table_xpath}//tr[8]//td").inner_text.strip
|
89
|
-
|
90
|
-
# if ISBN is misplaced or absent it's hard to find out which data is
|
91
|
-
# available and where to find it so just set it all to nil
|
92
|
-
if /^\d(\-?\d){12}$/.match(isbn)
|
93
|
-
publisher = raw.xpath("#{table_xpath}//tr[1]//td").inner_text.strip
|
94
|
-
published = raw.xpath("#{table_xpath}//tr[2]//td").inner_text.strip
|
95
|
-
book_length = raw.xpath("#{table_xpath}//tr[6]//td").inner_text.strip
|
96
|
-
else
|
97
|
-
isbn = publisher = published = book_length = nil
|
98
|
-
end
|
99
|
-
|
100
|
-
result = {
|
101
|
-
link: link,
|
102
|
-
title: title,
|
103
|
-
by_info: authors,
|
104
|
-
image: og.image || image,
|
105
|
-
description: raw.at("#productDescription")&.inner_text,
|
106
|
-
rating: "#{rating}#{', ' if rating && (!isbn&.empty? || !price&.empty?)}",
|
107
|
-
price: price,
|
108
|
-
isbn_asin_text: "ISBN",
|
109
|
-
isbn_asin: isbn,
|
110
|
-
publisher: publisher,
|
111
|
-
published: "#{published}#{', ' if published && !price&.empty?}"
|
112
|
-
}
|
113
|
-
|
114
|
-
elsif raw.at_css('#dp.ebooks_mobile') # ebooks
|
115
|
-
title = raw.at("#ebooksTitle")&.inner_text
|
116
|
-
authors = raw.at_css('#a-popover-mobile-udp-contributor-popover-id') ? multiple_authors("//div[@id='a-popover-mobile-udp-contributor-popover-id']//span[contains(@class,'a-text-bold')]") : (raw.at("#byline")&.inner_text&.strip || raw.at("#bylineInfo")&.inner_text&.strip)
|
117
|
-
rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text || raw.at("#acrCustomerReviewLink .a-icon")&.inner_text
|
118
|
-
|
119
|
-
table_xpath = "//div[@id='detailBullets_secondary_view_div']//ul"
|
120
|
-
asin = raw.xpath("#{table_xpath}//li[4]/span/span[2]").inner_text
|
121
|
-
|
122
|
-
# if ASIN is misplaced or absent it's hard to find out which data is
|
123
|
-
# available and where to find it so just set it all to nil
|
124
|
-
if /^[0-9A-Z]{10}$/.match(asin)
|
125
|
-
publisher = raw.xpath("#{table_xpath}//li[2]/span/span[2]").inner_text
|
126
|
-
published = raw.xpath("#{table_xpath}//li[1]/span/span[2]").inner_text
|
127
|
-
else
|
128
|
-
asin = publisher = published = nil
|
129
|
-
end
|
130
|
-
|
131
|
-
result = {
|
132
|
-
link: link,
|
133
|
-
title: title,
|
134
|
-
by_info: authors,
|
135
|
-
image: og.image || image,
|
136
|
-
description: raw.at("#productDescription")&.inner_text,
|
137
|
-
rating: "#{rating}#{', ' if rating && (!asin&.empty? || !price&.empty?)}",
|
138
|
-
price: price,
|
139
|
-
isbn_asin_text: "ASIN",
|
140
|
-
isbn_asin: asin,
|
141
|
-
publisher: publisher,
|
142
|
-
published: "#{published}#{', ' if published && !price&.empty?}"
|
143
|
-
}
|
144
|
-
|
145
|
-
else
|
146
|
-
title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
|
147
|
-
result = {
|
148
|
-
link: link,
|
149
|
-
title: title,
|
150
|
-
image: og.image || image,
|
151
|
-
price: price
|
152
|
-
}
|
153
|
-
|
154
|
-
result[:by_info] = raw.at("#by-line")
|
155
|
-
result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
|
156
|
-
|
157
|
-
summary = raw.at("#productDescription")
|
158
|
-
result[:description] = og.description || (summary && summary.inner_text)
|
159
|
-
end
|
160
|
-
|
161
|
-
result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
|
162
|
-
|
163
|
-
result
|
164
|
-
end
|
165
|
-
end
|
166
|
-
end
|
167
|
-
end
|
1
|
+
require 'json'
|
2
|
+
require "onebox/open_graph"
|
3
|
+
|
4
|
+
module Onebox
|
5
|
+
module Engine
|
6
|
+
class AmazonOnebox
|
7
|
+
include Engine
|
8
|
+
include LayoutSupport
|
9
|
+
include HTML
|
10
|
+
|
11
|
+
always_https
|
12
|
+
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br)\//)
|
13
|
+
|
14
|
+
def url
|
15
|
+
if match && match[:id]
|
16
|
+
return "https://www.amazon.#{tld}/gp/aw/d/#{URI::encode(match[:id])}"
|
17
|
+
end
|
18
|
+
|
19
|
+
@url
|
20
|
+
end
|
21
|
+
|
22
|
+
def tld
|
23
|
+
@tld || @@matcher.match(@url)["tld"]
|
24
|
+
end
|
25
|
+
|
26
|
+
def http_params
|
27
|
+
{
|
28
|
+
'User-Agent' =>
|
29
|
+
'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3'
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def match
|
36
|
+
@match ||= @url.match(/(?:d|g)p\/(?:product\/)?(?<id>[^\/]+)(?:\/|$)/mi)
|
37
|
+
end
|
38
|
+
|
39
|
+
def image
|
40
|
+
if (main_image = raw.css("#main-image")) && main_image.any?
|
41
|
+
attributes = main_image.first.attributes
|
42
|
+
|
43
|
+
return attributes["data-a-hires"].to_s if attributes["data-a-hires"]
|
44
|
+
|
45
|
+
if attributes["data-a-dynamic-image"]
|
46
|
+
return ::JSON.parse(attributes["data-a-dynamic-image"].value).keys.first
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
if (landing_image = raw.css("#landingImage")) && landing_image.any?
|
51
|
+
landing_image.first["src"].to_s
|
52
|
+
end
|
53
|
+
|
54
|
+
if (ebook_image = raw.css("#ebooksImgBlkFront")) && ebook_image.any?
|
55
|
+
::JSON.parse(ebook_image.first.attributes["data-a-dynamic-image"].value).keys.first
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def price
|
60
|
+
# get item price (Amazon markup is inconsistent, deal with it)
|
61
|
+
if raw.css("#priceblock_ourprice .restOfPrice")[0] && raw.css("#priceblock_ourprice .restOfPrice")[0].inner_text
|
62
|
+
"#{raw.css("#priceblock_ourprice .restOfPrice")[0].inner_text}#{raw.css("#priceblock_ourprice .buyingPrice")[0].inner_text}.#{raw.css("#priceblock_ourprice .restOfPrice")[1].inner_text}"
|
63
|
+
elsif raw.css("#priceblock_dealprice") && (dealprice = raw.css("#priceblock_dealprice span")[0])
|
64
|
+
dealprice.inner_text
|
65
|
+
elsif !raw.css("#priceblock_ourprice").inner_text.empty?
|
66
|
+
raw.css("#priceblock_ourprice").inner_text
|
67
|
+
else
|
68
|
+
raw.css(".mediaMatrixListItem.a-active .a-color-price").inner_text
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def multiple_authors(authors_xpath)
|
73
|
+
author_list = raw.xpath(authors_xpath)
|
74
|
+
authors = []
|
75
|
+
author_list.each { |a| authors << a.inner_text.strip }
|
76
|
+
authors.join(", ")
|
77
|
+
end
|
78
|
+
|
79
|
+
def data
|
80
|
+
og = ::Onebox::OpenGraph.new(raw)
|
81
|
+
|
82
|
+
if raw.at_css('#dp.book_mobile') #printed books
|
83
|
+
title = raw.at("h1#title")&.inner_text
|
84
|
+
authors = raw.at_css('#byline_secondary_view_div') ? multiple_authors("//div[@id='byline_secondary_view_div']//span[@class='a-text-bold']") : raw.at("#byline")&.inner_text
|
85
|
+
rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text
|
86
|
+
|
87
|
+
table_xpath = "//div[@id='productDetails_secondary_view_div']//table[@id='productDetails_techSpec_section_1']"
|
88
|
+
isbn = raw.xpath("#{table_xpath}//tr[8]//td").inner_text.strip
|
89
|
+
|
90
|
+
# if ISBN is misplaced or absent it's hard to find out which data is
|
91
|
+
# available and where to find it so just set it all to nil
|
92
|
+
if /^\d(\-?\d){12}$/.match(isbn)
|
93
|
+
publisher = raw.xpath("#{table_xpath}//tr[1]//td").inner_text.strip
|
94
|
+
published = raw.xpath("#{table_xpath}//tr[2]//td").inner_text.strip
|
95
|
+
book_length = raw.xpath("#{table_xpath}//tr[6]//td").inner_text.strip
|
96
|
+
else
|
97
|
+
isbn = publisher = published = book_length = nil
|
98
|
+
end
|
99
|
+
|
100
|
+
result = {
|
101
|
+
link: link,
|
102
|
+
title: title,
|
103
|
+
by_info: authors,
|
104
|
+
image: og.image || image,
|
105
|
+
description: raw.at("#productDescription")&.inner_text,
|
106
|
+
rating: "#{rating}#{', ' if rating && (!isbn&.empty? || !price&.empty?)}",
|
107
|
+
price: price,
|
108
|
+
isbn_asin_text: "ISBN",
|
109
|
+
isbn_asin: isbn,
|
110
|
+
publisher: publisher,
|
111
|
+
published: "#{published}#{', ' if published && !price&.empty?}"
|
112
|
+
}
|
113
|
+
|
114
|
+
elsif raw.at_css('#dp.ebooks_mobile') # ebooks
|
115
|
+
title = raw.at("#ebooksTitle")&.inner_text
|
116
|
+
authors = raw.at_css('#a-popover-mobile-udp-contributor-popover-id') ? multiple_authors("//div[@id='a-popover-mobile-udp-contributor-popover-id']//span[contains(@class,'a-text-bold')]") : (raw.at("#byline")&.inner_text&.strip || raw.at("#bylineInfo")&.inner_text&.strip)
|
117
|
+
rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text || raw.at("#acrCustomerReviewLink .a-icon")&.inner_text
|
118
|
+
|
119
|
+
table_xpath = "//div[@id='detailBullets_secondary_view_div']//ul"
|
120
|
+
asin = raw.xpath("#{table_xpath}//li[4]/span/span[2]").inner_text
|
121
|
+
|
122
|
+
# if ASIN is misplaced or absent it's hard to find out which data is
|
123
|
+
# available and where to find it so just set it all to nil
|
124
|
+
if /^[0-9A-Z]{10}$/.match(asin)
|
125
|
+
publisher = raw.xpath("#{table_xpath}//li[2]/span/span[2]").inner_text
|
126
|
+
published = raw.xpath("#{table_xpath}//li[1]/span/span[2]").inner_text
|
127
|
+
else
|
128
|
+
asin = publisher = published = nil
|
129
|
+
end
|
130
|
+
|
131
|
+
result = {
|
132
|
+
link: link,
|
133
|
+
title: title,
|
134
|
+
by_info: authors,
|
135
|
+
image: og.image || image,
|
136
|
+
description: raw.at("#productDescription")&.inner_text,
|
137
|
+
rating: "#{rating}#{', ' if rating && (!asin&.empty? || !price&.empty?)}",
|
138
|
+
price: price,
|
139
|
+
isbn_asin_text: "ASIN",
|
140
|
+
isbn_asin: asin,
|
141
|
+
publisher: publisher,
|
142
|
+
published: "#{published}#{', ' if published && !price&.empty?}"
|
143
|
+
}
|
144
|
+
|
145
|
+
else
|
146
|
+
title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
|
147
|
+
result = {
|
148
|
+
link: link,
|
149
|
+
title: title,
|
150
|
+
image: og.image || image,
|
151
|
+
price: price
|
152
|
+
}
|
153
|
+
|
154
|
+
result[:by_info] = raw.at("#by-line")
|
155
|
+
result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
|
156
|
+
|
157
|
+
summary = raw.at("#productDescription")
|
158
|
+
result[:description] = og.description || (summary && summary.inner_text)
|
159
|
+
end
|
160
|
+
|
161
|
+
result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
|
162
|
+
|
163
|
+
result
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|