onebox 1.8.81 → 1.8.82

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -0
  3. data/.rspec +0 -0
  4. data/.rubocop.yml +0 -0
  5. data/.ruby-gemset +0 -0
  6. data/.travis.yml +0 -0
  7. data/CHANGELOG.md +26 -26
  8. data/Gemfile +0 -0
  9. data/Gemfile.lock +154 -154
  10. data/Guardfile +0 -0
  11. data/LICENSE.txt +0 -0
  12. data/README.md +223 -223
  13. data/Rakefile +0 -0
  14. data/lib/onebox.rb +0 -0
  15. data/lib/onebox/engine.rb +188 -188
  16. data/lib/onebox/engine/amazon_onebox.rb +167 -167
  17. data/lib/onebox/engine/asciinema_onebox.rb +0 -0
  18. data/lib/onebox/engine/audio_onebox.rb +0 -0
  19. data/lib/onebox/engine/audioboom_onebox.rb +24 -24
  20. data/lib/onebox/engine/bandcamp_onebox.rb +32 -32
  21. data/lib/onebox/engine/cloudapp_onebox.rb +51 -51
  22. data/lib/onebox/engine/coub_onebox.rb +21 -21
  23. data/lib/onebox/engine/douban_onebox.rb +0 -0
  24. data/lib/onebox/engine/five_hundred_px_onebox.rb +17 -17
  25. data/lib/onebox/engine/flickr_onebox.rb +0 -0
  26. data/lib/onebox/engine/flickr_shortened_onebox.rb +0 -0
  27. data/lib/onebox/engine/gfycat_onebox.rb +0 -0
  28. data/lib/onebox/engine/giphy_onebox.rb +22 -22
  29. data/lib/onebox/engine/github_blob_onebox.rb +0 -0
  30. data/lib/onebox/engine/github_commit_onebox.rb +0 -0
  31. data/lib/onebox/engine/github_gist_onebox.rb +0 -0
  32. data/lib/onebox/engine/github_issue_onebox.rb +0 -0
  33. data/lib/onebox/engine/github_pullrequest_onebox.rb +0 -0
  34. data/lib/onebox/engine/gitlab_blob_onebox.rb +0 -0
  35. data/lib/onebox/engine/google_calendar_onebox.rb +0 -0
  36. data/lib/onebox/engine/google_docs_onebox.rb +0 -0
  37. data/lib/onebox/engine/google_maps_onebox.rb +0 -0
  38. data/lib/onebox/engine/google_photos_onebox.rb +57 -57
  39. data/lib/onebox/engine/google_play_app_onebox.rb +0 -0
  40. data/lib/onebox/engine/html.rb +0 -0
  41. data/lib/onebox/engine/image_onebox.rb +0 -0
  42. data/lib/onebox/engine/imgur_onebox.rb +65 -65
  43. data/lib/onebox/engine/instagram_onebox.rb +32 -32
  44. data/lib/onebox/engine/json.rb +0 -0
  45. data/lib/onebox/engine/kaltura_onebox.rb +31 -31
  46. data/lib/onebox/engine/mixcloud_onebox.rb +20 -20
  47. data/lib/onebox/engine/opengraph_image.rb +12 -12
  48. data/lib/onebox/engine/pastebin_onebox.rb +0 -0
  49. data/lib/onebox/engine/pdf_onebox.rb +0 -0
  50. data/lib/onebox/engine/pubmed_onebox.rb +0 -0
  51. data/lib/onebox/engine/replit_onebox.rb +24 -24
  52. data/lib/onebox/engine/sketchfab_onebox.rb +31 -31
  53. data/lib/onebox/engine/slides_onebox.rb +0 -0
  54. data/lib/onebox/engine/soundcloud_onebox.rb +31 -31
  55. data/lib/onebox/engine/stack_exchange_onebox.rb +0 -0
  56. data/lib/onebox/engine/standard_embed.rb +145 -145
  57. data/lib/onebox/engine/steam_store_onebox.rb +37 -37
  58. data/lib/onebox/engine/trello_onebox.rb +0 -0
  59. data/lib/onebox/engine/twitch_clips_onebox.rb +0 -0
  60. data/lib/onebox/engine/twitch_stream_onebox.rb +0 -0
  61. data/lib/onebox/engine/twitch_video_onebox.rb +0 -0
  62. data/lib/onebox/engine/twitter_status_onebox.rb +0 -0
  63. data/lib/onebox/engine/typeform_onebox.rb +41 -41
  64. data/lib/onebox/engine/video_onebox.rb +0 -0
  65. data/lib/onebox/engine/vimeo_onebox.rb +20 -20
  66. data/lib/onebox/engine/wechat_mp_onebox.rb +0 -0
  67. data/lib/onebox/engine/whitelisted_generic_onebox.rb +366 -366
  68. data/lib/onebox/engine/wikimedia_onebox.rb +0 -0
  69. data/lib/onebox/engine/wikipedia_onebox.rb +0 -0
  70. data/lib/onebox/engine/wistia_onebox.rb +27 -27
  71. data/lib/onebox/engine/xkcd_onebox.rb +0 -0
  72. data/lib/onebox/engine/youku_onebox.rb +0 -0
  73. data/lib/onebox/engine/youtube_onebox.rb +163 -163
  74. data/lib/onebox/file_type_finder.rb +0 -0
  75. data/lib/onebox/helpers.rb +188 -188
  76. data/lib/onebox/layout.rb +0 -0
  77. data/lib/onebox/layout_support.rb +0 -0
  78. data/lib/onebox/matcher.rb +0 -0
  79. data/lib/onebox/mixins/git_blob_onebox.rb +1 -1
  80. data/lib/onebox/mixins/twitch_onebox.rb +0 -0
  81. data/lib/onebox/oembed.rb +15 -15
  82. data/lib/onebox/open_graph.rb +90 -90
  83. data/lib/onebox/preview.rb +0 -0
  84. data/lib/onebox/sanitize_config.rb +0 -0
  85. data/lib/onebox/status_check.rb +0 -0
  86. data/lib/onebox/template_support.rb +0 -0
  87. data/lib/onebox/version.rb +5 -5
  88. data/lib/onebox/view.rb +0 -0
  89. data/lib/onebox/web.rb +0 -0
  90. data/lib/onebox/web_helpers.rb +0 -0
  91. data/onebox.gemspec +0 -0
  92. data/templates/_layout.mustache +0 -0
  93. data/templates/amazon.mustache +0 -0
  94. data/templates/douban.mustache +0 -0
  95. data/templates/githubblob.mustache +1 -1
  96. data/templates/githubcommit.mustache +0 -0
  97. data/templates/githubgist.mustache +0 -0
  98. data/templates/githubissue.mustache +0 -0
  99. data/templates/githubpullrequest.mustache +0 -0
  100. data/templates/gitlabblob.mustache +0 -0
  101. data/templates/googledocs.mustache +0 -0
  102. data/templates/googleplayapp.mustache +0 -0
  103. data/templates/instagram.mustache +0 -0
  104. data/templates/pastebin.mustache +0 -0
  105. data/templates/pdf.mustache +0 -0
  106. data/templates/pubmed.mustache +0 -0
  107. data/templates/stackexchange.mustache +0 -0
  108. data/templates/twitterstatus.mustache +0 -0
  109. data/templates/wechatmp.mustache +0 -0
  110. data/templates/whitelistedgeneric.mustache +0 -0
  111. data/templates/wikimedia.mustache +0 -0
  112. data/templates/wikipedia.mustache +0 -0
  113. data/templates/xkcd.mustache +0 -0
  114. metadata +3 -4
data/Rakefile CHANGED
File without changes
data/lib/onebox.rb CHANGED
File without changes
data/lib/onebox/engine.rb CHANGED
@@ -1,188 +1,188 @@
1
- module Onebox
2
- module Engine
3
- def self.included(object)
4
- object.extend(ClassMethods)
5
- end
6
-
7
- def self.engines
8
- constants.select do |constant|
9
- constant.to_s =~ /Onebox$/
10
- end.map(&method(:const_get))
11
- end
12
-
13
- attr_reader :url, :uri
14
- attr_reader :cache
15
- attr_reader :timeout
16
-
17
- DEFAULT = {}
18
- def options
19
- @options
20
- end
21
-
22
- def options=(opt)
23
- return @options if opt.nil? #make sure options provided
24
- opt = opt.to_h if opt.instance_of?(OpenStruct)
25
- @options.merge!(opt)
26
- @options
27
- end
28
-
29
- def initialize(link, cache = nil, timeout = nil)
30
- @options = DEFAULT
31
- class_name = self.class.name.split("::").last.to_s
32
- self.options = Onebox.options[class_name] || {} #Set the engine options extracted from global options.
33
-
34
- @url = link
35
- @uri = URI(link)
36
- if always_https?
37
- @uri.scheme = 'https'
38
- @url = @uri.to_s
39
- end
40
- @cache = cache || Onebox.options.cache
41
- @timeout = timeout || Onebox.options.timeout
42
- end
43
-
44
- # raises error if not defined in onebox engine.
45
- # This is the output method for an engine.
46
- def to_html
47
- fail NoMethodError, "Engines need to implement this method"
48
- end
49
-
50
- # Some oneboxes create iframes or other complicated controls. If you're using
51
- # a live editor with HTML preview, rendering those complicated controls can
52
- # be slow or cause flickering.
53
- #
54
- # This method allows engines to produce a placeholder such as static image
55
- # frame of a video.
56
- #
57
- # By default it just calls `to_html` unless implemented.
58
- def placeholder_html
59
- to_html
60
- end
61
-
62
- private
63
-
64
- def record
65
- url_result = url
66
- result = cache.fetch(url_result) { data }
67
- cache[url_result] = result if cache.respond_to?(:key?)
68
- result
69
- end
70
-
71
- # raises error if not defined in onebox engine
72
- # in each onebox, uses either Nokogiri or StandardEmbed to get raw HTML from url
73
- def raw
74
- fail NoMethodError, "Engines need to implement this method"
75
- end
76
-
77
- # raises error if not defined in onebox engine
78
- # in each onebox, returns hash of desired onebox content
79
- def data
80
- fail NoMethodError, "Engines need this method defined"
81
- end
82
-
83
- def link
84
- @url.gsub(/['\"<>]/,
85
- "'" => '&#39;',
86
- '"' => '&quot;',
87
- '<' => '&lt;',
88
- '>' => '&gt;',
89
- )
90
- end
91
-
92
- def always_https?
93
- self.class.always_https?
94
- end
95
-
96
- module ClassMethods
97
- def ===(other)
98
- if other.kind_of?(URI)
99
- !!(other.to_s =~ class_variable_get(:@@matcher))
100
- else
101
- super
102
- end
103
- end
104
-
105
- def priority
106
- 100
107
- end
108
-
109
- def matches_regexp(r)
110
- class_variable_set :@@matcher, r
111
- end
112
-
113
- # calculates a name for onebox using the class name of engine
114
- def onebox_name
115
- name.split("::").last.downcase.gsub(/onebox/, "")
116
- end
117
-
118
- def always_https
119
- @https = true
120
- end
121
-
122
- def always_https?
123
- @https
124
- end
125
- end
126
- end
127
- end
128
-
129
- require_relative "helpers"
130
- require_relative "layout_support"
131
- require_relative "file_type_finder"
132
- require_relative "engine/standard_embed"
133
- require_relative "engine/html"
134
- require_relative "engine/json"
135
- require_relative "engine/amazon_onebox"
136
- require_relative "engine/github_issue_onebox"
137
- require_relative "engine/github_blob_onebox"
138
- require_relative "engine/github_commit_onebox"
139
- require_relative "engine/github_gist_onebox"
140
- require_relative "engine/github_pullrequest_onebox"
141
- require_relative "engine/google_calendar_onebox"
142
- require_relative "engine/google_docs_onebox"
143
- require_relative "engine/google_maps_onebox"
144
- require_relative "engine/google_play_app_onebox"
145
- require_relative "engine/image_onebox"
146
- require_relative "engine/video_onebox"
147
- require_relative "engine/audio_onebox"
148
- require_relative "engine/stack_exchange_onebox"
149
- require_relative "engine/twitter_status_onebox"
150
- require_relative "engine/wikimedia_onebox"
151
- require_relative "engine/wikipedia_onebox"
152
- require_relative "engine/youtube_onebox"
153
- require_relative "engine/youku_onebox"
154
- require_relative "engine/douban_onebox"
155
- require_relative "engine/whitelisted_generic_onebox"
156
- require_relative "engine/pubmed_onebox"
157
- require_relative "engine/soundcloud_onebox"
158
- require_relative "engine/imgur_onebox"
159
- require_relative "engine/pastebin_onebox"
160
- require_relative "engine/slides_onebox"
161
- require_relative "engine/xkcd_onebox"
162
- require_relative "engine/giphy_onebox"
163
- require_relative "engine/gfycat_onebox"
164
- require_relative "engine/typeform_onebox"
165
- require_relative "engine/vimeo_onebox"
166
- require_relative "engine/steam_store_onebox"
167
- require_relative "engine/sketchfab_onebox"
168
- require_relative "engine/audioboom_onebox"
169
- require_relative "engine/replit_onebox"
170
- require_relative "engine/asciinema_onebox"
171
- require_relative "engine/mixcloud_onebox"
172
- require_relative "engine/bandcamp_onebox"
173
- require_relative "engine/coub_onebox"
174
- require_relative "engine/flickr_onebox"
175
- require_relative "engine/flickr_shortened_onebox"
176
- require_relative "engine/five_hundred_px_onebox"
177
- require_relative "engine/pdf_onebox"
178
- require_relative "engine/twitch_clips_onebox"
179
- require_relative "engine/twitch_stream_onebox"
180
- require_relative "engine/twitch_video_onebox"
181
- require_relative "engine/trello_onebox"
182
- require_relative "engine/wechat_mp_onebox"
183
- require_relative "engine/cloudapp_onebox"
184
- require_relative "engine/wistia_onebox"
185
- require_relative "engine/instagram_onebox"
186
- require_relative "engine/gitlab_blob_onebox"
187
- require_relative "engine/google_photos_onebox"
188
- require_relative "engine/kaltura_onebox"
1
+ module Onebox
2
+ module Engine
3
+ def self.included(object)
4
+ object.extend(ClassMethods)
5
+ end
6
+
7
+ def self.engines
8
+ constants.select do |constant|
9
+ constant.to_s =~ /Onebox$/
10
+ end.map(&method(:const_get))
11
+ end
12
+
13
+ attr_reader :url, :uri
14
+ attr_reader :cache
15
+ attr_reader :timeout
16
+
17
+ DEFAULT = {}
18
+ def options
19
+ @options
20
+ end
21
+
22
+ def options=(opt)
23
+ return @options if opt.nil? #make sure options provided
24
+ opt = opt.to_h if opt.instance_of?(OpenStruct)
25
+ @options.merge!(opt)
26
+ @options
27
+ end
28
+
29
+ def initialize(link, cache = nil, timeout = nil)
30
+ @options = DEFAULT
31
+ class_name = self.class.name.split("::").last.to_s
32
+ self.options = Onebox.options[class_name] || {} #Set the engine options extracted from global options.
33
+
34
+ @url = link
35
+ @uri = URI(link)
36
+ if always_https?
37
+ @uri.scheme = 'https'
38
+ @url = @uri.to_s
39
+ end
40
+ @cache = cache || Onebox.options.cache
41
+ @timeout = timeout || Onebox.options.timeout
42
+ end
43
+
44
+ # raises error if not defined in onebox engine.
45
+ # This is the output method for an engine.
46
+ def to_html
47
+ fail NoMethodError, "Engines need to implement this method"
48
+ end
49
+
50
+ # Some oneboxes create iframes or other complicated controls. If you're using
51
+ # a live editor with HTML preview, rendering those complicated controls can
52
+ # be slow or cause flickering.
53
+ #
54
+ # This method allows engines to produce a placeholder such as static image
55
+ # frame of a video.
56
+ #
57
+ # By default it just calls `to_html` unless implemented.
58
+ def placeholder_html
59
+ to_html
60
+ end
61
+
62
+ private
63
+
64
+ def record
65
+ url_result = url
66
+ result = cache.fetch(url_result) { data }
67
+ cache[url_result] = result if cache.respond_to?(:key?)
68
+ result
69
+ end
70
+
71
+ # raises error if not defined in onebox engine
72
+ # in each onebox, uses either Nokogiri or StandardEmbed to get raw HTML from url
73
+ def raw
74
+ fail NoMethodError, "Engines need to implement this method"
75
+ end
76
+
77
+ # raises error if not defined in onebox engine
78
+ # in each onebox, returns hash of desired onebox content
79
+ def data
80
+ fail NoMethodError, "Engines need this method defined"
81
+ end
82
+
83
+ def link
84
+ @url.gsub(/['\"<>]/,
85
+ "'" => '&#39;',
86
+ '"' => '&quot;',
87
+ '<' => '&lt;',
88
+ '>' => '&gt;',
89
+ )
90
+ end
91
+
92
+ def always_https?
93
+ self.class.always_https?
94
+ end
95
+
96
+ module ClassMethods
97
+ def ===(other)
98
+ if other.kind_of?(URI)
99
+ !!(other.to_s =~ class_variable_get(:@@matcher))
100
+ else
101
+ super
102
+ end
103
+ end
104
+
105
+ def priority
106
+ 100
107
+ end
108
+
109
+ def matches_regexp(r)
110
+ class_variable_set :@@matcher, r
111
+ end
112
+
113
+ # calculates a name for onebox using the class name of engine
114
+ def onebox_name
115
+ name.split("::").last.downcase.gsub(/onebox/, "")
116
+ end
117
+
118
+ def always_https
119
+ @https = true
120
+ end
121
+
122
+ def always_https?
123
+ @https
124
+ end
125
+ end
126
+ end
127
+ end
128
+
129
+ require_relative "helpers"
130
+ require_relative "layout_support"
131
+ require_relative "file_type_finder"
132
+ require_relative "engine/standard_embed"
133
+ require_relative "engine/html"
134
+ require_relative "engine/json"
135
+ require_relative "engine/amazon_onebox"
136
+ require_relative "engine/github_issue_onebox"
137
+ require_relative "engine/github_blob_onebox"
138
+ require_relative "engine/github_commit_onebox"
139
+ require_relative "engine/github_gist_onebox"
140
+ require_relative "engine/github_pullrequest_onebox"
141
+ require_relative "engine/google_calendar_onebox"
142
+ require_relative "engine/google_docs_onebox"
143
+ require_relative "engine/google_maps_onebox"
144
+ require_relative "engine/google_play_app_onebox"
145
+ require_relative "engine/image_onebox"
146
+ require_relative "engine/video_onebox"
147
+ require_relative "engine/audio_onebox"
148
+ require_relative "engine/stack_exchange_onebox"
149
+ require_relative "engine/twitter_status_onebox"
150
+ require_relative "engine/wikimedia_onebox"
151
+ require_relative "engine/wikipedia_onebox"
152
+ require_relative "engine/youtube_onebox"
153
+ require_relative "engine/youku_onebox"
154
+ require_relative "engine/douban_onebox"
155
+ require_relative "engine/whitelisted_generic_onebox"
156
+ require_relative "engine/pubmed_onebox"
157
+ require_relative "engine/soundcloud_onebox"
158
+ require_relative "engine/imgur_onebox"
159
+ require_relative "engine/pastebin_onebox"
160
+ require_relative "engine/slides_onebox"
161
+ require_relative "engine/xkcd_onebox"
162
+ require_relative "engine/giphy_onebox"
163
+ require_relative "engine/gfycat_onebox"
164
+ require_relative "engine/typeform_onebox"
165
+ require_relative "engine/vimeo_onebox"
166
+ require_relative "engine/steam_store_onebox"
167
+ require_relative "engine/sketchfab_onebox"
168
+ require_relative "engine/audioboom_onebox"
169
+ require_relative "engine/replit_onebox"
170
+ require_relative "engine/asciinema_onebox"
171
+ require_relative "engine/mixcloud_onebox"
172
+ require_relative "engine/bandcamp_onebox"
173
+ require_relative "engine/coub_onebox"
174
+ require_relative "engine/flickr_onebox"
175
+ require_relative "engine/flickr_shortened_onebox"
176
+ require_relative "engine/five_hundred_px_onebox"
177
+ require_relative "engine/pdf_onebox"
178
+ require_relative "engine/twitch_clips_onebox"
179
+ require_relative "engine/twitch_stream_onebox"
180
+ require_relative "engine/twitch_video_onebox"
181
+ require_relative "engine/trello_onebox"
182
+ require_relative "engine/wechat_mp_onebox"
183
+ require_relative "engine/cloudapp_onebox"
184
+ require_relative "engine/wistia_onebox"
185
+ require_relative "engine/instagram_onebox"
186
+ require_relative "engine/gitlab_blob_onebox"
187
+ require_relative "engine/google_photos_onebox"
188
+ require_relative "engine/kaltura_onebox"
@@ -1,167 +1,167 @@
1
- require 'json'
2
- require "onebox/open_graph"
3
-
4
- module Onebox
5
- module Engine
6
- class AmazonOnebox
7
- include Engine
8
- include LayoutSupport
9
- include HTML
10
-
11
- always_https
12
- matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br)\//)
13
-
14
- def url
15
- if match && match[:id]
16
- return "https://www.amazon.#{tld}/gp/aw/d/#{URI::encode(match[:id])}"
17
- end
18
-
19
- @url
20
- end
21
-
22
- def tld
23
- @tld || @@matcher.match(@url)["tld"]
24
- end
25
-
26
- def http_params
27
- {
28
- 'User-Agent' =>
29
- 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3'
30
- }
31
- end
32
-
33
- private
34
-
35
- def match
36
- @match ||= @url.match(/(?:d|g)p\/(?:product\/)?(?<id>[^\/]+)(?:\/|$)/mi)
37
- end
38
-
39
- def image
40
- if (main_image = raw.css("#main-image")) && main_image.any?
41
- attributes = main_image.first.attributes
42
-
43
- return attributes["data-a-hires"].to_s if attributes["data-a-hires"]
44
-
45
- if attributes["data-a-dynamic-image"]
46
- return ::JSON.parse(attributes["data-a-dynamic-image"].value).keys.first
47
- end
48
- end
49
-
50
- if (landing_image = raw.css("#landingImage")) && landing_image.any?
51
- landing_image.first["src"].to_s
52
- end
53
-
54
- if (ebook_image = raw.css("#ebooksImgBlkFront")) && ebook_image.any?
55
- ::JSON.parse(ebook_image.first.attributes["data-a-dynamic-image"].value).keys.first
56
- end
57
- end
58
-
59
- def price
60
- # get item price (Amazon markup is inconsistent, deal with it)
61
- if raw.css("#priceblock_ourprice .restOfPrice")[0] && raw.css("#priceblock_ourprice .restOfPrice")[0].inner_text
62
- "#{raw.css("#priceblock_ourprice .restOfPrice")[0].inner_text}#{raw.css("#priceblock_ourprice .buyingPrice")[0].inner_text}.#{raw.css("#priceblock_ourprice .restOfPrice")[1].inner_text}"
63
- elsif raw.css("#priceblock_dealprice") && (dealprice = raw.css("#priceblock_dealprice span")[0])
64
- dealprice.inner_text
65
- elsif !raw.css("#priceblock_ourprice").inner_text.empty?
66
- raw.css("#priceblock_ourprice").inner_text
67
- else
68
- raw.css(".mediaMatrixListItem.a-active .a-color-price").inner_text
69
- end
70
- end
71
-
72
- def multiple_authors(authors_xpath)
73
- author_list = raw.xpath(authors_xpath)
74
- authors = []
75
- author_list.each { |a| authors << a.inner_text.strip }
76
- authors.join(", ")
77
- end
78
-
79
- def data
80
- og = ::Onebox::OpenGraph.new(raw)
81
-
82
- if raw.at_css('#dp.book_mobile') #printed books
83
- title = raw.at("h1#title")&.inner_text
84
- authors = raw.at_css('#byline_secondary_view_div') ? multiple_authors("//div[@id='byline_secondary_view_div']//span[@class='a-text-bold']") : raw.at("#byline")&.inner_text
85
- rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text
86
-
87
- table_xpath = "//div[@id='productDetails_secondary_view_div']//table[@id='productDetails_techSpec_section_1']"
88
- isbn = raw.xpath("#{table_xpath}//tr[8]//td").inner_text.strip
89
-
90
- # if ISBN is misplaced or absent it's hard to find out which data is
91
- # available and where to find it so just set it all to nil
92
- if /^\d(\-?\d){12}$/.match(isbn)
93
- publisher = raw.xpath("#{table_xpath}//tr[1]//td").inner_text.strip
94
- published = raw.xpath("#{table_xpath}//tr[2]//td").inner_text.strip
95
- book_length = raw.xpath("#{table_xpath}//tr[6]//td").inner_text.strip
96
- else
97
- isbn = publisher = published = book_length = nil
98
- end
99
-
100
- result = {
101
- link: link,
102
- title: title,
103
- by_info: authors,
104
- image: og.image || image,
105
- description: raw.at("#productDescription")&.inner_text,
106
- rating: "#{rating}#{', ' if rating && (!isbn&.empty? || !price&.empty?)}",
107
- price: price,
108
- isbn_asin_text: "ISBN",
109
- isbn_asin: isbn,
110
- publisher: publisher,
111
- published: "#{published}#{', ' if published && !price&.empty?}"
112
- }
113
-
114
- elsif raw.at_css('#dp.ebooks_mobile') # ebooks
115
- title = raw.at("#ebooksTitle")&.inner_text
116
- authors = raw.at_css('#a-popover-mobile-udp-contributor-popover-id') ? multiple_authors("//div[@id='a-popover-mobile-udp-contributor-popover-id']//span[contains(@class,'a-text-bold')]") : (raw.at("#byline")&.inner_text&.strip || raw.at("#bylineInfo")&.inner_text&.strip)
117
- rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text || raw.at("#acrCustomerReviewLink .a-icon")&.inner_text
118
-
119
- table_xpath = "//div[@id='detailBullets_secondary_view_div']//ul"
120
- asin = raw.xpath("#{table_xpath}//li[4]/span/span[2]").inner_text
121
-
122
- # if ASIN is misplaced or absent it's hard to find out which data is
123
- # available and where to find it so just set it all to nil
124
- if /^[0-9A-Z]{10}$/.match(asin)
125
- publisher = raw.xpath("#{table_xpath}//li[2]/span/span[2]").inner_text
126
- published = raw.xpath("#{table_xpath}//li[1]/span/span[2]").inner_text
127
- else
128
- asin = publisher = published = nil
129
- end
130
-
131
- result = {
132
- link: link,
133
- title: title,
134
- by_info: authors,
135
- image: og.image || image,
136
- description: raw.at("#productDescription")&.inner_text,
137
- rating: "#{rating}#{', ' if rating && (!asin&.empty? || !price&.empty?)}",
138
- price: price,
139
- isbn_asin_text: "ASIN",
140
- isbn_asin: asin,
141
- publisher: publisher,
142
- published: "#{published}#{', ' if published && !price&.empty?}"
143
- }
144
-
145
- else
146
- title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
147
- result = {
148
- link: link,
149
- title: title,
150
- image: og.image || image,
151
- price: price
152
- }
153
-
154
- result[:by_info] = raw.at("#by-line")
155
- result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
156
-
157
- summary = raw.at("#productDescription")
158
- result[:description] = og.description || (summary && summary.inner_text)
159
- end
160
-
161
- result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
162
-
163
- result
164
- end
165
- end
166
- end
167
- end
1
+ require 'json'
2
+ require "onebox/open_graph"
3
+
4
+ module Onebox
5
+ module Engine
6
+ class AmazonOnebox
7
+ include Engine
8
+ include LayoutSupport
9
+ include HTML
10
+
11
+ always_https
12
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br)\//)
13
+
14
+ def url
15
+ if match && match[:id]
16
+ return "https://www.amazon.#{tld}/gp/aw/d/#{URI::encode(match[:id])}"
17
+ end
18
+
19
+ @url
20
+ end
21
+
22
+ def tld
23
+ @tld || @@matcher.match(@url)["tld"]
24
+ end
25
+
26
+ def http_params
27
+ {
28
+ 'User-Agent' =>
29
+ 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3'
30
+ }
31
+ end
32
+
33
+ private
34
+
35
+ def match
36
+ @match ||= @url.match(/(?:d|g)p\/(?:product\/)?(?<id>[^\/]+)(?:\/|$)/mi)
37
+ end
38
+
39
+ def image
40
+ if (main_image = raw.css("#main-image")) && main_image.any?
41
+ attributes = main_image.first.attributes
42
+
43
+ return attributes["data-a-hires"].to_s if attributes["data-a-hires"]
44
+
45
+ if attributes["data-a-dynamic-image"]
46
+ return ::JSON.parse(attributes["data-a-dynamic-image"].value).keys.first
47
+ end
48
+ end
49
+
50
+ if (landing_image = raw.css("#landingImage")) && landing_image.any?
51
+ landing_image.first["src"].to_s
52
+ end
53
+
54
+ if (ebook_image = raw.css("#ebooksImgBlkFront")) && ebook_image.any?
55
+ ::JSON.parse(ebook_image.first.attributes["data-a-dynamic-image"].value).keys.first
56
+ end
57
+ end
58
+
59
+ def price
60
+ # get item price (Amazon markup is inconsistent, deal with it)
61
+ if raw.css("#priceblock_ourprice .restOfPrice")[0] && raw.css("#priceblock_ourprice .restOfPrice")[0].inner_text
62
+ "#{raw.css("#priceblock_ourprice .restOfPrice")[0].inner_text}#{raw.css("#priceblock_ourprice .buyingPrice")[0].inner_text}.#{raw.css("#priceblock_ourprice .restOfPrice")[1].inner_text}"
63
+ elsif raw.css("#priceblock_dealprice") && (dealprice = raw.css("#priceblock_dealprice span")[0])
64
+ dealprice.inner_text
65
+ elsif !raw.css("#priceblock_ourprice").inner_text.empty?
66
+ raw.css("#priceblock_ourprice").inner_text
67
+ else
68
+ raw.css(".mediaMatrixListItem.a-active .a-color-price").inner_text
69
+ end
70
+ end
71
+
72
+ def multiple_authors(authors_xpath)
73
+ author_list = raw.xpath(authors_xpath)
74
+ authors = []
75
+ author_list.each { |a| authors << a.inner_text.strip }
76
+ authors.join(", ")
77
+ end
78
+
79
+ def data
80
+ og = ::Onebox::OpenGraph.new(raw)
81
+
82
+ if raw.at_css('#dp.book_mobile') #printed books
83
+ title = raw.at("h1#title")&.inner_text
84
+ authors = raw.at_css('#byline_secondary_view_div') ? multiple_authors("//div[@id='byline_secondary_view_div']//span[@class='a-text-bold']") : raw.at("#byline")&.inner_text
85
+ rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text
86
+
87
+ table_xpath = "//div[@id='productDetails_secondary_view_div']//table[@id='productDetails_techSpec_section_1']"
88
+ isbn = raw.xpath("#{table_xpath}//tr[8]//td").inner_text.strip
89
+
90
+ # if ISBN is misplaced or absent it's hard to find out which data is
91
+ # available and where to find it so just set it all to nil
92
+ if /^\d(\-?\d){12}$/.match(isbn)
93
+ publisher = raw.xpath("#{table_xpath}//tr[1]//td").inner_text.strip
94
+ published = raw.xpath("#{table_xpath}//tr[2]//td").inner_text.strip
95
+ book_length = raw.xpath("#{table_xpath}//tr[6]//td").inner_text.strip
96
+ else
97
+ isbn = publisher = published = book_length = nil
98
+ end
99
+
100
+ result = {
101
+ link: link,
102
+ title: title,
103
+ by_info: authors,
104
+ image: og.image || image,
105
+ description: raw.at("#productDescription")&.inner_text,
106
+ rating: "#{rating}#{', ' if rating && (!isbn&.empty? || !price&.empty?)}",
107
+ price: price,
108
+ isbn_asin_text: "ISBN",
109
+ isbn_asin: isbn,
110
+ publisher: publisher,
111
+ published: "#{published}#{', ' if published && !price&.empty?}"
112
+ }
113
+
114
+ elsif raw.at_css('#dp.ebooks_mobile') # ebooks
115
+ title = raw.at("#ebooksTitle")&.inner_text
116
+ authors = raw.at_css('#a-popover-mobile-udp-contributor-popover-id') ? multiple_authors("//div[@id='a-popover-mobile-udp-contributor-popover-id']//span[contains(@class,'a-text-bold')]") : (raw.at("#byline")&.inner_text&.strip || raw.at("#bylineInfo")&.inner_text&.strip)
117
+ rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text || raw.at("#acrCustomerReviewLink .a-icon")&.inner_text
118
+
119
+ table_xpath = "//div[@id='detailBullets_secondary_view_div']//ul"
120
+ asin = raw.xpath("#{table_xpath}//li[4]/span/span[2]").inner_text
121
+
122
+ # if ASIN is misplaced or absent it's hard to find out which data is
123
+ # available and where to find it so just set it all to nil
124
+ if /^[0-9A-Z]{10}$/.match(asin)
125
+ publisher = raw.xpath("#{table_xpath}//li[2]/span/span[2]").inner_text
126
+ published = raw.xpath("#{table_xpath}//li[1]/span/span[2]").inner_text
127
+ else
128
+ asin = publisher = published = nil
129
+ end
130
+
131
+ result = {
132
+ link: link,
133
+ title: title,
134
+ by_info: authors,
135
+ image: og.image || image,
136
+ description: raw.at("#productDescription")&.inner_text,
137
+ rating: "#{rating}#{', ' if rating && (!asin&.empty? || !price&.empty?)}",
138
+ price: price,
139
+ isbn_asin_text: "ASIN",
140
+ isbn_asin: asin,
141
+ publisher: publisher,
142
+ published: "#{published}#{', ' if published && !price&.empty?}"
143
+ }
144
+
145
+ else
146
+ title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
147
+ result = {
148
+ link: link,
149
+ title: title,
150
+ image: og.image || image,
151
+ price: price
152
+ }
153
+
154
+ result[:by_info] = raw.at("#by-line")
155
+ result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
156
+
157
+ summary = raw.at("#productDescription")
158
+ result[:description] = og.description || (summary && summary.inner_text)
159
+ end
160
+
161
+ result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
162
+
163
+ result
164
+ end
165
+ end
166
+ end
167
+ end