rcarvalho-link_thumbnailer 1.0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/.gitignore +19 -0
  2. data/.rspec +2 -0
  3. data/.travis.yml +6 -0
  4. data/CHANGELOG.md +91 -0
  5. data/Gemfile +12 -0
  6. data/LICENSE +22 -0
  7. data/README.md +184 -0
  8. data/Rakefile +7 -0
  9. data/app/controllers/link_thumbnailer/application_controller.rb +4 -0
  10. data/app/controllers/link_thumbnailer/previews_controller.rb +11 -0
  11. data/lib/generators/link_thumbnailer/install_generator.rb +19 -0
  12. data/lib/generators/templates/initializer.rb +41 -0
  13. data/lib/link_thumbnailer.rb +96 -0
  14. data/lib/link_thumbnailer/configuration.rb +6 -0
  15. data/lib/link_thumbnailer/doc.rb +65 -0
  16. data/lib/link_thumbnailer/doc_parser.rb +15 -0
  17. data/lib/link_thumbnailer/engine.rb +9 -0
  18. data/lib/link_thumbnailer/fetcher.rb +34 -0
  19. data/lib/link_thumbnailer/img_comparator.rb +18 -0
  20. data/lib/link_thumbnailer/img_parser.rb +46 -0
  21. data/lib/link_thumbnailer/img_url_filter.rb +13 -0
  22. data/lib/link_thumbnailer/object.rb +41 -0
  23. data/lib/link_thumbnailer/opengraph.rb +20 -0
  24. data/lib/link_thumbnailer/rails/routes.rb +47 -0
  25. data/lib/link_thumbnailer/rails/routes/mapper.rb +30 -0
  26. data/lib/link_thumbnailer/rails/routes/mapping.rb +33 -0
  27. data/lib/link_thumbnailer/version.rb +3 -0
  28. data/lib/link_thumbnailer/web_image.rb +18 -0
  29. data/link_thumbnailer.gemspec +28 -0
  30. data/spec/doc_parser_spec.rb +25 -0
  31. data/spec/doc_spec.rb +23 -0
  32. data/spec/examples/empty_example.html +11 -0
  33. data/spec/examples/example.html +363 -0
  34. data/spec/examples/og_example.html +12 -0
  35. data/spec/fetcher_spec.rb +97 -0
  36. data/spec/img_comparator_spec.rb +16 -0
  37. data/spec/img_url_filter_spec.rb +31 -0
  38. data/spec/link_thumbnailer_spec.rb +205 -0
  39. data/spec/object_spec.rb +130 -0
  40. data/spec/opengraph_spec.rb +7 -0
  41. data/spec/spec_helper.rb +13 -0
  42. data/spec/web_image_spec.rb +57 -0
  43. metadata +245 -0
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ describe LinkThumbnailer::DocParser do
4
+
5
+ it { should respond_to(:parse).with(1).arguments }
6
+
7
+ let(:instance) { LinkThumbnailer::DocParser.new }
8
+
9
+ describe "#parse" do
10
+
11
+ let(:source_url) { 'http://foo.com' }
12
+
13
+ subject { instance.parse('', source_url) }
14
+
15
+ it { expect(subject.source_url).to eq(source_url) }
16
+ it { expect(subject).to respond_to(:doc_base_href) }
17
+ it { expect(subject).to respond_to(:img_srcs) }
18
+ it { expect(subject).to respond_to(:img_abs_urls) }
19
+ it { expect(subject).to respond_to(:title) }
20
+ it { expect(subject).to respond_to(:description) }
21
+ it { expect(subject).to respond_to(:source_url) }
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,23 @@
1
+ require 'spec_helper'
2
+
3
+ describe LinkThumbnailer::Doc do
4
+
5
+ class Foo
6
+ end
7
+
8
+ let(:foo) { Foo.new }
9
+
10
+ before do
11
+ foo.extend LinkThumbnailer::Doc
12
+ end
13
+
14
+ subject { foo }
15
+
16
+ it { should respond_to :doc_base_href }
17
+ it { should respond_to :img_srcs }
18
+ it { should respond_to :img_abs_urls }
19
+ it { should respond_to :title }
20
+ it { should respond_to :description }
21
+ it { should respond_to :source_url }
22
+
23
+ end
@@ -0,0 +1,11 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+ <meta charset="UTF-8">
6
+ <title>Foo.com</title>
7
+ </head>
8
+
9
+ <body>
10
+ </body>
11
+ </html>
@@ -0,0 +1,363 @@
1
+ <!DOCTYPE html>
2
+ <!-- saved from url=(0019)http://www.foo.com/ -->
3
+ <html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
4
+ <meta charset="UTF-8">
5
+ <title>Foo.com</title>
6
+ <link href="./example_files/style-3706572431311a3a4cf5c6f3266c9bce.css" media="all" rel="stylesheet" type="text/css">
7
+ <link href="./example_files/belt_layout1_google-1c8ee8bd905a6121e7a7ac73b7ffb888.css" media="all" rel="stylesheet" type="text/css">
8
+ <script src="./example_files/application-e8b3208729190cb9cc8a800ff3283fd8.js" type="text/javascript"></script>
9
+ <link href="./example_files/load_style.css" media="all" rel="stylesheet" type="text/css">
10
+ <meta content="authenticity_token" name="csrf-param">
11
+ <meta content="Mh9Ervhd7HlDNhhJdZ+nKec9RE0sn2FO8nyJXUrKUng=" name="csrf-token">
12
+ </head>
13
+ <body>
14
+
15
+
16
+ <style>
17
+ .listing .sitelinks{
18
+ padding:5px 15px;
19
+ }
20
+
21
+ .listing .sitelinks td{
22
+ padding-right:16px;
23
+
24
+ }
25
+ .listing .sitelinks a, .listing .ratings a{
26
+ color:#0000DE;
27
+ }
28
+ .listing .url a:hover{
29
+ color:#a02934;
30
+ }
31
+ .ratings{
32
+ color: #666666;
33
+ }
34
+ .listing{
35
+ margin-bottom:15px;
36
+ }
37
+ .listing .url {
38
+ font-size:16px;
39
+ }
40
+ .listing .host {
41
+ color:#a02934;
42
+ display:block;
43
+ font-size:16px;
44
+ text-decoration:none;
45
+ }
46
+
47
+ .listing .url a{
48
+ color:#0000DE;
49
+ line-height:auto;
50
+ }
51
+ .listing .description a{
52
+ text-decoration:none;
53
+ color:black;
54
+ font-size:16px;
55
+ line-height:12pt;
56
+ }
57
+ .listing .host a{
58
+ text-decoration:none;
59
+ color:#008000;
60
+ font-size:16px;
61
+ line-height:12pt;
62
+ }
63
+ </style>
64
+ <script src="./example_files/jquery-1.5.js"></script>
65
+ <script>
66
+ var google_about_url = '';
67
+ var ads_array = new Array();
68
+ var bottom_ads_array = new Array();
69
+ var webSearch = "";
70
+ var google_tag = "";
71
+
72
+ var genCatSearches_array=new Array();
73
+ var relatedSearches_array=new Array();
74
+ var popularCategories_array = new Array();
75
+ var digi_keyword_links_array = new Array();
76
+ var global_afd_response = "";
77
+
78
+ var call_num = 1;
79
+ function redirect(){
80
+ document.getElementById('results').innerHTML = "No results found";
81
+ }
82
+
83
+ function jscript_log(name, severity, message, page){
84
+ $.ajax({
85
+ type: "POST",
86
+ url: "/log_error",
87
+ data: {
88
+ name: name,
89
+ severity: severity,
90
+ domain: window.location.hostname,
91
+ message: message,
92
+ tag: google_tag,
93
+ page: page
94
+ },
95
+ context: document.body
96
+ })
97
+ }
98
+
99
+ function token_link(keyword){
100
+ window.location = '/results' + '?q=' + encodeURIComponent(keyword) + '&token=' + global_afd_response.token;
101
+ return false;
102
+ }
103
+
104
+ function link_to_google(){
105
+ if(google_about_url!=''){
106
+ window.open(google_about_url,'_blank');
107
+ }
108
+ }
109
+
110
+ function gadlink(ga, inner_text, visible_url){
111
+ visible_url = visible_url.replace(/<b>/gi,'')
112
+ visible_url = visible_url.replace(/<\/b>/gi,'')
113
+ return '<a ' +
114
+ 'href="http://' + visible_url + '"' +
115
+ ' onclick="window.open(\'' + ga.url + '\');return false;"' +
116
+ ' target="_blank" ' +
117
+ '>' +
118
+ inner_text + '</a>';
119
+
120
+ }
121
+
122
+ function google_afd_ad_request_done(google_afd_response) {
123
+ if(google_afd_response.faillisted==true){
124
+ jscript_log('faillisted', 10, JSON.stringify(google_afd_response), "/")
125
+ }
126
+ if(google_afd_response.token || google_afd_response.ads){
127
+
128
+ global_afd_response = google_afd_response;
129
+ if(google_afd_response.feedback_url){
130
+ google_about_url = google_afd_response.feedback_url;
131
+ }
132
+
133
+ if (google_afd_response.ads && google_afd_response.ads.length > 0) {
134
+ var google_ads = google_afd_response.ads;
135
+ for (var i = 0; i < google_ads.length; i++) {
136
+ seller_ratings = google_ads[i].seller_ratings;
137
+ site_links = google_ads[i].sitelinks;
138
+ ads="";
139
+ ads +=
140
+ '<div class="listing"><div class="url">' +
141
+ gadlink(google_ads[i], google_ads[i].line1, google_ads[i].visible_url) +
142
+ '</div>';
143
+ if (seller_ratings) {
144
+ ads += '<div class="ratings">'+seller_ratings.advertiser_info+' <img src="/assets/'+seller_ratings.rating+'star.gif" border="0"> <a href="'+seller_ratings.source_url+'" target="_blank">('+seller_ratings.review_count+')</a></div>';
145
+ }
146
+ ads +=
147
+ '<div class="description">' + google_ads[i].line2 + ' ' +
148
+ (google_ads[i].line3 != undefined ? google_ads[i].line3 + '' : '') +
149
+ '</div>' +
150
+ '<div class="host">' +
151
+ gadlink(google_ads[i], google_ads[i].visible_url, google_ads[i].visible_url) +
152
+ '</div>';
153
+ if (site_links && site_links.length > 0) {
154
+ ads += '<table class="sitelinks" cellpadding=0 cellspacing=0 border=0>';
155
+ for (var x = 0; x < site_links.length; x++) {
156
+ if(x % 2 == 0){
157
+ ads += '<tr>';
158
+ }
159
+ ads +=
160
+ '<td class="sitelink">' +
161
+ gadlink(site_links[x], site_links[x].link_text, google_ads[i].visible_url) +
162
+ '</td>';
163
+ if(x % 2 == 1){
164
+ ads += '</tr>';
165
+ }
166
+ }
167
+ if(x % 2 == 0){
168
+ ads += '</tr>';
169
+ }
170
+ ads += '</table>';
171
+ }
172
+
173
+ ads += '</div>';
174
+ if(google_ads[i].position=="Top"){
175
+ ads_array.push(ads);
176
+ }else{
177
+ bottom_ads_array.push(ads);
178
+ }
179
+ }
180
+ }
181
+
182
+ if (google_afd_response.categories && google_afd_response.categories.length > 0) {
183
+ var google_categories = google_afd_response.categories;
184
+ for (var i = 0; i < google_categories.length; i++) {
185
+ subcats = google_categories[i].subcategories;
186
+ popularCategories =
187
+ '<div class="termgroup">' +
188
+ '<h3><a class="popular_category" ' +
189
+ 'href="/#' + google_categories[i].term + '"' +
190
+ ' onclick="window.location=\'/results' +
191
+ '?q=' + encodeURIComponent(google_categories[i].term) +
192
+ '&token=' + google_categories[i].token + '\';return false;"' +
193
+ '>' +
194
+ google_categories[i].term +
195
+ '</a></h3>';
196
+ for(var gci =0; gci < subcats.length; gci++){
197
+ popularCategories = popularCategories +
198
+ '<div class="term">' +
199
+ '<a class="popular_category" ' +
200
+ 'href="/#' + subcats[gci].term + '"' +
201
+ ' onclick="window.location=\'/results' +
202
+ '?q=' + encodeURIComponent(subcats[gci].term) +
203
+ '&token=' + subcats[gci].token + '\';return false;"' +
204
+ '>' +
205
+ subcats[gci].term +
206
+ '</a></div>';
207
+ }
208
+ popularCategories = popularCategories + '</div>';
209
+ popularCategories_array.push(popularCategories);
210
+ }
211
+ }
212
+
213
+ if (google_afd_response.link_units && google_afd_response.link_units.length > 0) {
214
+ var google_link_units = google_afd_response.link_units;
215
+ for (var i = 0; i < google_link_units.length; i++) {
216
+ relatedSearches =
217
+ '<div class="term">' +
218
+ '<a class="related_searches" ' +
219
+ 'href="/#' + google_link_units[i].term + '"' +
220
+ ' onclick="window.location=\'/results' +
221
+ '?q=' + encodeURIComponent(google_link_units[i].term) +
222
+ '&token=' + google_link_units[i].token + '\';return false;"' +
223
+ '>' +
224
+ google_link_units[i].term +
225
+ '</a>' +
226
+ '</div>';
227
+ relatedSearches_array.push(relatedSearches);
228
+ }
229
+
230
+ }
231
+ if (google_afd_response.link_units && google_afd_response.link_units.length > 0) {
232
+ var google_link_units = google_afd_response.link_units;
233
+ for (var i = 0; i < google_link_units.length; i++) {
234
+ relatedSearches = "";
235
+ if(i % 5 == 0){
236
+ relatedSearches += "<div class='termgroup'>";
237
+ }
238
+ relatedSearches +=
239
+ '<div class="term">' +
240
+ '<a class="related_searches" ' +
241
+ 'href="/#' + google_link_units[i].term + '"' +
242
+ ' onclick="window.location=\'/results' +
243
+ '?q=' + encodeURIComponent(google_link_units[i].term) +
244
+ '&token=' + google_link_units[i].token + '\';return false;"' +
245
+ '>' +
246
+ google_link_units[i].term +
247
+ '</a>' +
248
+ '</div>';
249
+ if(((i % 5) == 4) || ((i-1)==google_link_units.length)){
250
+ relatedSearches += "</div>";
251
+ }
252
+ if(((i+1) % 10 == 0)){
253
+ relatedSearches += "<div class='clear'>&nbsp;</div>";
254
+ }
255
+ genCatSearches_array.push(relatedSearches);
256
+ }
257
+
258
+ }
259
+ try{document.getElementById('results').innerHTML = ads_array.join("\n");}catch(error){}
260
+ try{document.getElementById('bottomresults').innerHTML = bottom_ads_array.join("\n");}catch(error){}
261
+
262
+ if(ads_array.length>1){
263
+ document.getElementById('sponsoredresults').style.display='block';
264
+ }
265
+ if(bottom_ads_array.length>1){
266
+ document.getElementById('bottomsponsoredresults').style.display='block';
267
+ }
268
+
269
+ try{document.getElementById('keywords').innerHTML = relatedSearches_array.join("\n");}catch(error){}
270
+ try{document.getElementById('gencats').innerHTML = genCatSearches_array.join("\n");}catch(error){}
271
+ // if(genCatSearches_array.length==0){
272
+ // try{
273
+ // document.getElementById('gencats').innerHTML = popularCategories_array.join("\n");
274
+ // }catch(error){}
275
+ // }
276
+ try{document.getElementById('popcats').innerHTML = popularCategories_array.join("\n");}catch(error){}
277
+ try{document.getElementById('token').value = google_afd_response.search_token;}catch(error){}
278
+ console.log(google_afd_response);
279
+ try{document.getElementById('token2').value = google_afd_response.search_token;}catch(error){}
280
+ }
281
+ }
282
+
283
+ function getParam(name) {
284
+ var match = new RegExp('[\?&]' + name + "=([^&]+)","i").exec(location.search);
285
+ if (match==null) return null;
286
+ else return decodeURIComponent(match[1]).replace(/\+/g,' ');
287
+ }
288
+ </script>
289
+
290
+
291
+
292
+ <center>
293
+ <div id="container">
294
+ <div id="header">
295
+ <div class="logo">
296
+ <a href="./example_files/example.html">Foo.com</a>
297
+ </div>
298
+ </div>
299
+
300
+
301
+
302
+
303
+
304
+
305
+
306
+ <div id="main">
307
+ <div class="image">
308
+ <img alt="Cordovabeach" border="0" src="./example_files/cordovabeach.jpg">
309
+ </div>
310
+ <div class="search">
311
+ <div class="catch_phrase">
312
+ <div id="wysiwyg_id_322" class="mercury-region" data-type="editable">Search FOO.com
313
+ </div>
314
+ </div>
315
+ <div id="search">
316
+ <form action="http://www.foo.com/results" method="/get">
317
+ <input class="search_field" name="q" type="text">
318
+ <input id="token" name="token" type="hidden" value="AG06ipADDhq1IogUPtnbN7KiSVdr6HlXngoTCITm8PO41bICFSENtAodnwEA0RgDIAA4AUCwAlCUjqIBUIO6sAhQ4a65DlDN_pgPUOK53A9Qqo3jD1C1q_YPUM3B_g9QivCIEVCU-qcTUIfrhBtQx4yNHVCsxv8eUPD02SBQ1PXZIFDd2ZEhUIOcrSlQu5ytKVDSsa0pUPKxrSlQ3KCvKVCTnKYtULKU5y1QsKX8U1CTtJSVAVDL74OWAVDqp6KyAVCslfPAAVC6lfPAAVCOl_PAAVCYrMGbA1DlhNOdA2iUjqIBcQL5qk1K1hZJggETCOW09_O41bICFfILtAodzTkAq5EBp8Lq2utL_VI">
319
+ <input class="search_button" type="submit" value="Search">
320
+ </form>
321
+ </div>
322
+
323
+ </div>
324
+ </div>
325
+
326
+ <div id="footer">
327
+ <a href="http://www.foo.com/digimedia_privacy_policy.html" target="_blank">Privacy Policy</a>
328
+ -
329
+ © 2012 Digimedia.com, L.P.
330
+ </div>
331
+
332
+ </div>
333
+ </center>
334
+ <script type="text/javascript">
335
+ var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
336
+ document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
337
+ </script><script src="./example_files/ga.js" type="text/javascript"></script>
338
+ <script type="text/javascript">
339
+ var pageTracker = _gat._getTracker('UA-1726084-83');
340
+ pageTracker._initData();
341
+ pageTracker._setDomainName('foo.com');
342
+ pageTracker._trackPageview('/');
343
+ </script>
344
+
345
+ <script>
346
+ google_tag = 'dp-digimedia_js';
347
+ var google_afd_request = {
348
+ client: 'ca-dp-digimedia-rs_js',
349
+ domain_name: 'www.foo.com',
350
+ num_ads: 0,
351
+ num_radlinks: 10,
352
+ token: getParam('token'),
353
+ q: getParam('q'),
354
+ kw: 'foo',
355
+ channel: 'afsonly'
356
+ }
357
+ </script>
358
+ <script language="JavaScript" src="./example_files/show_afd_ads.js" type="text/javascript"></script><script src="./example_files/domainpark.cgi"></script>
359
+
360
+
361
+
362
+
363
+ </body></html>
@@ -0,0 +1,12 @@
1
+ <html xmlns:og="http://opengraphprotocol.org/schema/">
2
+ <head>
3
+ <meta charset="utf-8"/>
4
+ <meta property="og:type" content="website"/>
5
+ <meta property="og:site_name" content="foo.com">
6
+ <meta property="og:title" content="Foo Title">
7
+ <meta property="og:description" content="Foo description">
8
+ <meta property="og:image" content="http://foo.com/img/front/facebook.png">
9
+ <title>Foo</title>
10
+ </head>
11
+
12
+ </html>
@@ -0,0 +1,97 @@
1
+ require 'spec_helper'
2
+
3
+ describe LinkThumbnailer::Fetcher do
4
+
5
+ it { should respond_to :fetch }
6
+ it { should respond_to :url }
7
+ it { should respond_to :url= }
8
+
9
+ let(:fetcher) { LinkThumbnailer::Fetcher.new }
10
+ let(:url) { 'http://foo.com' }
11
+
12
+ describe ".fetch" do
13
+
14
+ before do
15
+ LinkThumbnailer.configure {|config| config.redirect_count = 3}
16
+ end
17
+
18
+ context "when redirect_count is more than config" do
19
+
20
+ it { expect { fetcher.fetch(url, 10) }.to raise_exception(ArgumentError) }
21
+
22
+ end
23
+
24
+ context "when no http error" do
25
+
26
+ before do
27
+ stub_request(:get, url).to_return(status: 200, body: 'foo', headers: {})
28
+ end
29
+
30
+ it "returns body response" do
31
+ fetcher.fetch(url).should eq('foo')
32
+ end
33
+
34
+ it "sets fetcher url" do
35
+ fetcher.fetch(url)
36
+ fetcher.url.to_s.should eq(url)
37
+ end
38
+
39
+ end
40
+
41
+ context "when http redirection" do
42
+
43
+ context "and relative uri" do
44
+
45
+ let(:another_url) { '/bar' }
46
+
47
+ before do
48
+ stub_request(:get, url).to_return(status: 300, body: 'foo', headers: { 'Location' => another_url })
49
+ stub_request(:get, url + another_url).to_return(status: 200, body: 'bar', headers: {})
50
+ end
51
+
52
+ it "returns body response" do
53
+ fetcher.fetch(url).should eq('bar')
54
+ end
55
+
56
+ it "sets fetcher url" do
57
+ fetcher.fetch(url)
58
+ fetcher.url.to_s.should eq(url + another_url)
59
+ end
60
+
61
+ end
62
+
63
+ context "and absolute uri" do
64
+
65
+ let(:another_url) { 'http://bar.com' }
66
+
67
+ before do
68
+ stub_request(:get, url).to_return(status: 300, body: 'foo', headers: { 'Location' => another_url })
69
+ stub_request(:get, another_url).to_return(status: 200, body: 'bar', headers: {})
70
+ end
71
+
72
+ it "returns body response" do
73
+ fetcher.fetch(url).should eq('bar')
74
+ end
75
+
76
+ it "sets fetcher url" do
77
+ fetcher.fetch(url)
78
+ fetcher.url.to_s.should eq(another_url)
79
+ end
80
+
81
+ end
82
+
83
+ end
84
+
85
+ context "when http error" do
86
+
87
+ before do
88
+ stub_request(:get, url).to_return(status: 500, body: 'foo', headers: {})
89
+ end
90
+
91
+ it { expect { fetcher.fetch(url) }.to raise_exception(Net::HTTPFatalError) }
92
+
93
+ end
94
+
95
+ end
96
+
97
+ end