rcarvalho-link_thumbnailer 1.0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/.gitignore +19 -0
  2. data/.rspec +2 -0
  3. data/.travis.yml +6 -0
  4. data/CHANGELOG.md +91 -0
  5. data/Gemfile +12 -0
  6. data/LICENSE +22 -0
  7. data/README.md +184 -0
  8. data/Rakefile +7 -0
  9. data/app/controllers/link_thumbnailer/application_controller.rb +4 -0
  10. data/app/controllers/link_thumbnailer/previews_controller.rb +11 -0
  11. data/lib/generators/link_thumbnailer/install_generator.rb +19 -0
  12. data/lib/generators/templates/initializer.rb +41 -0
  13. data/lib/link_thumbnailer.rb +96 -0
  14. data/lib/link_thumbnailer/configuration.rb +6 -0
  15. data/lib/link_thumbnailer/doc.rb +65 -0
  16. data/lib/link_thumbnailer/doc_parser.rb +15 -0
  17. data/lib/link_thumbnailer/engine.rb +9 -0
  18. data/lib/link_thumbnailer/fetcher.rb +34 -0
  19. data/lib/link_thumbnailer/img_comparator.rb +18 -0
  20. data/lib/link_thumbnailer/img_parser.rb +46 -0
  21. data/lib/link_thumbnailer/img_url_filter.rb +13 -0
  22. data/lib/link_thumbnailer/object.rb +41 -0
  23. data/lib/link_thumbnailer/opengraph.rb +20 -0
  24. data/lib/link_thumbnailer/rails/routes.rb +47 -0
  25. data/lib/link_thumbnailer/rails/routes/mapper.rb +30 -0
  26. data/lib/link_thumbnailer/rails/routes/mapping.rb +33 -0
  27. data/lib/link_thumbnailer/version.rb +3 -0
  28. data/lib/link_thumbnailer/web_image.rb +18 -0
  29. data/link_thumbnailer.gemspec +28 -0
  30. data/spec/doc_parser_spec.rb +25 -0
  31. data/spec/doc_spec.rb +23 -0
  32. data/spec/examples/empty_example.html +11 -0
  33. data/spec/examples/example.html +363 -0
  34. data/spec/examples/og_example.html +12 -0
  35. data/spec/fetcher_spec.rb +97 -0
  36. data/spec/img_comparator_spec.rb +16 -0
  37. data/spec/img_url_filter_spec.rb +31 -0
  38. data/spec/link_thumbnailer_spec.rb +205 -0
  39. data/spec/object_spec.rb +130 -0
  40. data/spec/opengraph_spec.rb +7 -0
  41. data/spec/spec_helper.rb +13 -0
  42. data/spec/web_image_spec.rb +57 -0
  43. metadata +245 -0
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ describe LinkThumbnailer::DocParser do
4
+
5
+ it { should respond_to(:parse).with(1).arguments }
6
+
7
+ let(:instance) { LinkThumbnailer::DocParser.new }
8
+
9
+ describe "#parse" do
10
+
11
+ let(:source_url) { 'http://foo.com' }
12
+
13
+ subject { instance.parse('', source_url) }
14
+
15
+ it { expect(subject.source_url).to eq(source_url) }
16
+ it { expect(subject).to respond_to(:doc_base_href) }
17
+ it { expect(subject).to respond_to(:img_srcs) }
18
+ it { expect(subject).to respond_to(:img_abs_urls) }
19
+ it { expect(subject).to respond_to(:title) }
20
+ it { expect(subject).to respond_to(:description) }
21
+ it { expect(subject).to respond_to(:source_url) }
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,23 @@
1
+ require 'spec_helper'
2
+
3
+ describe LinkThumbnailer::Doc do
4
+
5
+ class Foo
6
+ end
7
+
8
+ let(:foo) { Foo.new }
9
+
10
+ before do
11
+ foo.extend LinkThumbnailer::Doc
12
+ end
13
+
14
+ subject { foo }
15
+
16
+ it { should respond_to :doc_base_href }
17
+ it { should respond_to :img_srcs }
18
+ it { should respond_to :img_abs_urls }
19
+ it { should respond_to :title }
20
+ it { should respond_to :description }
21
+ it { should respond_to :source_url }
22
+
23
+ end
@@ -0,0 +1,11 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+ <meta charset="UTF-8">
6
+ <title>Foo.com</title>
7
+ </head>
8
+
9
+ <body>
10
+ </body>
11
+ </html>
@@ -0,0 +1,363 @@
1
+ <!DOCTYPE html>
2
+ <!-- saved from url=(0019)http://www.foo.com/ -->
3
+ <html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
4
+ <meta charset="UTF-8">
5
+ <title>Foo.com</title>
6
+ <link href="./example_files/style-3706572431311a3a4cf5c6f3266c9bce.css" media="all" rel="stylesheet" type="text/css">
7
+ <link href="./example_files/belt_layout1_google-1c8ee8bd905a6121e7a7ac73b7ffb888.css" media="all" rel="stylesheet" type="text/css">
8
+ <script src="./example_files/application-e8b3208729190cb9cc8a800ff3283fd8.js" type="text/javascript"></script>
9
+ <link href="./example_files/load_style.css" media="all" rel="stylesheet" type="text/css">
10
+ <meta content="authenticity_token" name="csrf-param">
11
+ <meta content="Mh9Ervhd7HlDNhhJdZ+nKec9RE0sn2FO8nyJXUrKUng=" name="csrf-token">
12
+ </head>
13
+ <body>
14
+
15
+
16
+ <style>
17
+ .listing .sitelinks{
18
+ padding:5px 15px;
19
+ }
20
+
21
+ .listing .sitelinks td{
22
+ padding-right:16px;
23
+
24
+ }
25
+ .listing .sitelinks a, .listing .ratings a{
26
+ color:#0000DE;
27
+ }
28
+ .listing .url a:hover{
29
+ color:#a02934;
30
+ }
31
+ .ratings{
32
+ color: #666666;
33
+ }
34
+ .listing{
35
+ margin-bottom:15px;
36
+ }
37
+ .listing .url {
38
+ font-size:16px;
39
+ }
40
+ .listing .host {
41
+ color:#a02934;
42
+ display:block;
43
+ font-size:16px;
44
+ text-decoration:none;
45
+ }
46
+
47
+ .listing .url a{
48
+ color:#0000DE;
49
+ line-height:auto;
50
+ }
51
+ .listing .description a{
52
+ text-decoration:none;
53
+ color:black;
54
+ font-size:16px;
55
+ line-height:12pt;
56
+ }
57
+ .listing .host a{
58
+ text-decoration:none;
59
+ color:#008000;
60
+ font-size:16px;
61
+ line-height:12pt;
62
+ }
63
+ </style>
64
+ <script src="./example_files/jquery-1.5.js"></script>
65
+ <script>
66
+ var google_about_url = '';
67
+ var ads_array = new Array();
68
+ var bottom_ads_array = new Array();
69
+ var webSearch = "";
70
+ var google_tag = "";
71
+
72
+ var genCatSearches_array=new Array();
73
+ var relatedSearches_array=new Array();
74
+ var popularCategories_array = new Array();
75
+ var digi_keyword_links_array = new Array();
76
+ var global_afd_response = "";
77
+
78
+ var call_num = 1;
79
+ function redirect(){
80
+ document.getElementById('results').innerHTML = "No results found";
81
+ }
82
+
83
+ function jscript_log(name, severity, message, page){
84
+ $.ajax({
85
+ type: "POST",
86
+ url: "/log_error",
87
+ data: {
88
+ name: name,
89
+ severity: severity,
90
+ domain: window.location.hostname,
91
+ message: message,
92
+ tag: google_tag,
93
+ page: page
94
+ },
95
+ context: document.body
96
+ })
97
+ }
98
+
99
+ function token_link(keyword){
100
+ window.location = '/results' + '?q=' + encodeURIComponent(keyword) + '&token=' + global_afd_response.token;
101
+ return false;
102
+ }
103
+
104
+ function link_to_google(){
105
+ if(google_about_url!=''){
106
+ window.open(google_about_url,'_blank');
107
+ }
108
+ }
109
+
110
+ function gadlink(ga, inner_text, visible_url){
111
+ visible_url = visible_url.replace(/<b>/gi,'')
112
+ visible_url = visible_url.replace(/<\/b>/gi,'')
113
+ return '<a ' +
114
+ 'href="http://' + visible_url + '"' +
115
+ ' onclick="window.open(\'' + ga.url + '\');return false;"' +
116
+ ' target="_blank" ' +
117
+ '>' +
118
+ inner_text + '</a>';
119
+
120
+ }
121
+
122
+ function google_afd_ad_request_done(google_afd_response) {
123
+ if(google_afd_response.faillisted==true){
124
+ jscript_log('faillisted', 10, JSON.stringify(google_afd_response), "/")
125
+ }
126
+ if(google_afd_response.token || google_afd_response.ads){
127
+
128
+ global_afd_response = google_afd_response;
129
+ if(google_afd_response.feedback_url){
130
+ google_about_url = google_afd_response.feedback_url;
131
+ }
132
+
133
+ if (google_afd_response.ads && google_afd_response.ads.length > 0) {
134
+ var google_ads = google_afd_response.ads;
135
+ for (var i = 0; i < google_ads.length; i++) {
136
+ seller_ratings = google_ads[i].seller_ratings;
137
+ site_links = google_ads[i].sitelinks;
138
+ ads="";
139
+ ads +=
140
+ '<div class="listing"><div class="url">' +
141
+ gadlink(google_ads[i], google_ads[i].line1, google_ads[i].visible_url) +
142
+ '</div>';
143
+ if (seller_ratings) {
144
+ ads += '<div class="ratings">'+seller_ratings.advertiser_info+' <img src="/assets/'+seller_ratings.rating+'star.gif" border="0"> <a href="'+seller_ratings.source_url+'" target="_blank">('+seller_ratings.review_count+')</a></div>';
145
+ }
146
+ ads +=
147
+ '<div class="description">' + google_ads[i].line2 + ' ' +
148
+ (google_ads[i].line3 != undefined ? google_ads[i].line3 + '' : '') +
149
+ '</div>' +
150
+ '<div class="host">' +
151
+ gadlink(google_ads[i], google_ads[i].visible_url, google_ads[i].visible_url) +
152
+ '</div>';
153
+ if (site_links && site_links.length > 0) {
154
+ ads += '<table class="sitelinks" cellpadding=0 cellspacing=0 border=0>';
155
+ for (var x = 0; x < site_links.length; x++) {
156
+ if(x % 2 == 0){
157
+ ads += '<tr>';
158
+ }
159
+ ads +=
160
+ '<td class="sitelink">' +
161
+ gadlink(site_links[x], site_links[x].link_text, google_ads[i].visible_url) +
162
+ '</td>';
163
+ if(x % 2 == 1){
164
+ ads += '</tr>';
165
+ }
166
+ }
167
+ if(x % 2 == 0){
168
+ ads += '</tr>';
169
+ }
170
+ ads += '</table>';
171
+ }
172
+
173
+ ads += '</div>';
174
+ if(google_ads[i].position=="Top"){
175
+ ads_array.push(ads);
176
+ }else{
177
+ bottom_ads_array.push(ads);
178
+ }
179
+ }
180
+ }
181
+
182
+ if (google_afd_response.categories && google_afd_response.categories.length > 0) {
183
+ var google_categories = google_afd_response.categories;
184
+ for (var i = 0; i < google_categories.length; i++) {
185
+ subcats = google_categories[i].subcategories;
186
+ popularCategories =
187
+ '<div class="termgroup">' +
188
+ '<h3><a class="popular_category" ' +
189
+ 'href="/#' + google_categories[i].term + '"' +
190
+ ' onclick="window.location=\'/results' +
191
+ '?q=' + encodeURIComponent(google_categories[i].term) +
192
+ '&token=' + google_categories[i].token + '\';return false;"' +
193
+ '>' +
194
+ google_categories[i].term +
195
+ '</a></h3>';
196
+ for(var gci =0; gci < subcats.length; gci++){
197
+ popularCategories = popularCategories +
198
+ '<div class="term">' +
199
+ '<a class="popular_category" ' +
200
+ 'href="/#' + subcats[gci].term + '"' +
201
+ ' onclick="window.location=\'/results' +
202
+ '?q=' + encodeURIComponent(subcats[gci].term) +
203
+ '&token=' + subcats[gci].token + '\';return false;"' +
204
+ '>' +
205
+ subcats[gci].term +
206
+ '</a></div>';
207
+ }
208
+ popularCategories = popularCategories + '</div>';
209
+ popularCategories_array.push(popularCategories);
210
+ }
211
+ }
212
+
213
+ if (google_afd_response.link_units && google_afd_response.link_units.length > 0) {
214
+ var google_link_units = google_afd_response.link_units;
215
+ for (var i = 0; i < google_link_units.length; i++) {
216
+ relatedSearches =
217
+ '<div class="term">' +
218
+ '<a class="related_searches" ' +
219
+ 'href="/#' + google_link_units[i].term + '"' +
220
+ ' onclick="window.location=\'/results' +
221
+ '?q=' + encodeURIComponent(google_link_units[i].term) +
222
+ '&token=' + google_link_units[i].token + '\';return false;"' +
223
+ '>' +
224
+ google_link_units[i].term +
225
+ '</a>' +
226
+ '</div>';
227
+ relatedSearches_array.push(relatedSearches);
228
+ }
229
+
230
+ }
231
+ if (google_afd_response.link_units && google_afd_response.link_units.length > 0) {
232
+ var google_link_units = google_afd_response.link_units;
233
+ for (var i = 0; i < google_link_units.length; i++) {
234
+ relatedSearches = "";
235
+ if(i % 5 == 0){
236
+ relatedSearches += "<div class='termgroup'>";
237
+ }
238
+ relatedSearches +=
239
+ '<div class="term">' +
240
+ '<a class="related_searches" ' +
241
+ 'href="/#' + google_link_units[i].term + '"' +
242
+ ' onclick="window.location=\'/results' +
243
+ '?q=' + encodeURIComponent(google_link_units[i].term) +
244
+ '&token=' + google_link_units[i].token + '\';return false;"' +
245
+ '>' +
246
+ google_link_units[i].term +
247
+ '</a>' +
248
+ '</div>';
249
+ if(((i % 5) == 4) || ((i-1)==google_link_units.length)){
250
+ relatedSearches += "</div>";
251
+ }
252
+ if(((i+1) % 10 == 0)){
253
+ relatedSearches += "<div class='clear'>&nbsp;</div>";
254
+ }
255
+ genCatSearches_array.push(relatedSearches);
256
+ }
257
+
258
+ }
259
+ try{document.getElementById('results').innerHTML = ads_array.join("\n");}catch(error){}
260
+ try{document.getElementById('bottomresults').innerHTML = bottom_ads_array.join("\n");}catch(error){}
261
+
262
+ if(ads_array.length>1){
263
+ document.getElementById('sponsoredresults').style.display='block';
264
+ }
265
+ if(bottom_ads_array.length>1){
266
+ document.getElementById('bottomsponsoredresults').style.display='block';
267
+ }
268
+
269
+ try{document.getElementById('keywords').innerHTML = relatedSearches_array.join("\n");}catch(error){}
270
+ try{document.getElementById('gencats').innerHTML = genCatSearches_array.join("\n");}catch(error){}
271
+ // if(genCatSearches_array.length==0){
272
+ // try{
273
+ // document.getElementById('gencats').innerHTML = popularCategories_array.join("\n");
274
+ // }catch(error){}
275
+ // }
276
+ try{document.getElementById('popcats').innerHTML = popularCategories_array.join("\n");}catch(error){}
277
+ try{document.getElementById('token').value = google_afd_response.search_token;}catch(error){}
278
+ console.log(google_afd_response);
279
+ try{document.getElementById('token2').value = google_afd_response.search_token;}catch(error){}
280
+ }
281
+ }
282
+
283
+ function getParam(name) {
284
+ var match = new RegExp('[\?&]' + name + "=([^&]+)","i").exec(location.search);
285
+ if (match==null) return null;
286
+ else return decodeURIComponent(match[1]).replace(/\+/g,' ');
287
+ }
288
+ </script>
289
+
290
+
291
+
292
+ <center>
293
+ <div id="container">
294
+ <div id="header">
295
+ <div class="logo">
296
+ <a href="./example_files/example.html">Foo.com</a>
297
+ </div>
298
+ </div>
299
+
300
+
301
+
302
+
303
+
304
+
305
+
306
+ <div id="main">
307
+ <div class="image">
308
+ <img alt="Cordovabeach" border="0" src="./example_files/cordovabeach.jpg">
309
+ </div>
310
+ <div class="search">
311
+ <div class="catch_phrase">
312
+ <div id="wysiwyg_id_322" class="mercury-region" data-type="editable">Search FOO.com
313
+ </div>
314
+ </div>
315
+ <div id="search">
316
+ <form action="http://www.foo.com/results" method="/get">
317
+ <input class="search_field" name="q" type="text">
318
+ <input id="token" name="token" type="hidden" value="AG06ipADDhq1IogUPtnbN7KiSVdr6HlXngoTCITm8PO41bICFSENtAodnwEA0RgDIAA4AUCwAlCUjqIBUIO6sAhQ4a65DlDN_pgPUOK53A9Qqo3jD1C1q_YPUM3B_g9QivCIEVCU-qcTUIfrhBtQx4yNHVCsxv8eUPD02SBQ1PXZIFDd2ZEhUIOcrSlQu5ytKVDSsa0pUPKxrSlQ3KCvKVCTnKYtULKU5y1QsKX8U1CTtJSVAVDL74OWAVDqp6KyAVCslfPAAVC6lfPAAVCOl_PAAVCYrMGbA1DlhNOdA2iUjqIBcQL5qk1K1hZJggETCOW09_O41bICFfILtAodzTkAq5EBp8Lq2utL_VI">
319
+ <input class="search_button" type="submit" value="Search">
320
+ </form>
321
+ </div>
322
+
323
+ </div>
324
+ </div>
325
+
326
+ <div id="footer">
327
+ <a href="http://www.foo.com/digimedia_privacy_policy.html" target="_blank">Privacy Policy</a>
328
+ -
329
+ © 2012 Digimedia.com, L.P.
330
+ </div>
331
+
332
+ </div>
333
+ </center>
334
+ <script type="text/javascript">
335
+ var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
336
+ document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
337
+ </script><script src="./example_files/ga.js" type="text/javascript"></script>
338
+ <script type="text/javascript">
339
+ var pageTracker = _gat._getTracker('UA-1726084-83');
340
+ pageTracker._initData();
341
+ pageTracker._setDomainName('foo.com');
342
+ pageTracker._trackPageview('/');
343
+ </script>
344
+
345
+ <script>
346
+ google_tag = 'dp-digimedia_js';
347
+ var google_afd_request = {
348
+ client: 'ca-dp-digimedia-rs_js',
349
+ domain_name: 'www.foo.com',
350
+ num_ads: 0,
351
+ num_radlinks: 10,
352
+ token: getParam('token'),
353
+ q: getParam('q'),
354
+ kw: 'foo',
355
+ channel: 'afsonly'
356
+ }
357
+ </script>
358
+ <script language="JavaScript" src="./example_files/show_afd_ads.js" type="text/javascript"></script><script src="./example_files/domainpark.cgi"></script>
359
+
360
+
361
+
362
+
363
+ </body></html>
@@ -0,0 +1,12 @@
1
+ <html xmlns:og="http://opengraphprotocol.org/schema/">
2
+ <head>
3
+ <meta charset="utf-8"/>
4
+ <meta property="og:type" content="website"/>
5
+ <meta property="og:site_name" content="foo.com">
6
+ <meta property="og:title" content="Foo Title">
7
+ <meta property="og:description" content="Foo description">
8
+ <meta property="og:image" content="http://foo.com/img/front/facebook.png">
9
+ <title>Foo</title>
10
+ </head>
11
+
12
+ </html>
@@ -0,0 +1,97 @@
1
+ require 'spec_helper'
2
+
3
+ describe LinkThumbnailer::Fetcher do
4
+
5
+ it { should respond_to :fetch }
6
+ it { should respond_to :url }
7
+ it { should respond_to :url= }
8
+
9
+ let(:fetcher) { LinkThumbnailer::Fetcher.new }
10
+ let(:url) { 'http://foo.com' }
11
+
12
+ describe ".fetch" do
13
+
14
+ before do
15
+ LinkThumbnailer.configure {|config| config.redirect_count = 3}
16
+ end
17
+
18
+ context "when redirect_count is more than config" do
19
+
20
+ it { expect { fetcher.fetch(url, 10) }.to raise_exception(ArgumentError) }
21
+
22
+ end
23
+
24
+ context "when no http error" do
25
+
26
+ before do
27
+ stub_request(:get, url).to_return(status: 200, body: 'foo', headers: {})
28
+ end
29
+
30
+ it "returns body response" do
31
+ fetcher.fetch(url).should eq('foo')
32
+ end
33
+
34
+ it "sets fetcher url" do
35
+ fetcher.fetch(url)
36
+ fetcher.url.to_s.should eq(url)
37
+ end
38
+
39
+ end
40
+
41
+ context "when http redirection" do
42
+
43
+ context "and relative uri" do
44
+
45
+ let(:another_url) { '/bar' }
46
+
47
+ before do
48
+ stub_request(:get, url).to_return(status: 300, body: 'foo', headers: { 'Location' => another_url })
49
+ stub_request(:get, url + another_url).to_return(status: 200, body: 'bar', headers: {})
50
+ end
51
+
52
+ it "returns body response" do
53
+ fetcher.fetch(url).should eq('bar')
54
+ end
55
+
56
+ it "sets fetcher url" do
57
+ fetcher.fetch(url)
58
+ fetcher.url.to_s.should eq(url + another_url)
59
+ end
60
+
61
+ end
62
+
63
+ context "and absolute uri" do
64
+
65
+ let(:another_url) { 'http://bar.com' }
66
+
67
+ before do
68
+ stub_request(:get, url).to_return(status: 300, body: 'foo', headers: { 'Location' => another_url })
69
+ stub_request(:get, another_url).to_return(status: 200, body: 'bar', headers: {})
70
+ end
71
+
72
+ it "returns body response" do
73
+ fetcher.fetch(url).should eq('bar')
74
+ end
75
+
76
+ it "sets fetcher url" do
77
+ fetcher.fetch(url)
78
+ fetcher.url.to_s.should eq(another_url)
79
+ end
80
+
81
+ end
82
+
83
+ end
84
+
85
+ context "when http error" do
86
+
87
+ before do
88
+ stub_request(:get, url).to_return(status: 500, body: 'foo', headers: {})
89
+ end
90
+
91
+ it { expect { fetcher.fetch(url) }.to raise_exception(Net::HTTPFatalError) }
92
+
93
+ end
94
+
95
+ end
96
+
97
+ end