link_thumbnailer 1.1.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +5 -13
  2. data/.travis.yml +1 -1
  3. data/CHANGELOG.md +117 -104
  4. data/Gemfile +1 -1
  5. data/{LICENSE → LICENSE.txt} +21 -21
  6. data/README.md +153 -184
  7. data/lib/generators/link_thumbnailer/install_generator.rb +0 -4
  8. data/lib/generators/templates/initializer.rb +63 -41
  9. data/lib/link_thumbnailer/configuration.rb +52 -10
  10. data/lib/link_thumbnailer/exceptions.rb +6 -0
  11. data/lib/link_thumbnailer/grader.rb +37 -0
  12. data/lib/link_thumbnailer/graders/base.rb +32 -0
  13. data/lib/link_thumbnailer/graders/html_attribute.rb +49 -0
  14. data/lib/link_thumbnailer/graders/length.rb +19 -0
  15. data/lib/link_thumbnailer/graders/link_density.rb +21 -0
  16. data/lib/link_thumbnailer/graders/position.rb +11 -0
  17. data/lib/link_thumbnailer/image_comparator.rb +24 -0
  18. data/lib/link_thumbnailer/image_comparators/base.rb +17 -0
  19. data/lib/link_thumbnailer/image_comparators/size.rb +11 -0
  20. data/lib/link_thumbnailer/image_parser.rb +18 -0
  21. data/lib/link_thumbnailer/image_parsers/size.rb +15 -0
  22. data/lib/link_thumbnailer/image_parsers/type.rb +15 -0
  23. data/lib/link_thumbnailer/image_validator.rb +30 -0
  24. data/lib/link_thumbnailer/model.rb +16 -0
  25. data/lib/link_thumbnailer/models/description.rb +34 -0
  26. data/lib/link_thumbnailer/models/image.rb +54 -0
  27. data/lib/link_thumbnailer/models/title.rb +20 -0
  28. data/lib/link_thumbnailer/models/website.rb +39 -0
  29. data/lib/link_thumbnailer/page.rb +40 -0
  30. data/lib/link_thumbnailer/parser.rb +13 -0
  31. data/lib/link_thumbnailer/processor.rb +94 -0
  32. data/lib/link_thumbnailer/railtie.rb +9 -9
  33. data/lib/link_thumbnailer/scraper.rb +64 -0
  34. data/lib/link_thumbnailer/scrapers/base.rb +63 -0
  35. data/lib/link_thumbnailer/scrapers/default/base.rb +10 -0
  36. data/lib/link_thumbnailer/scrapers/default/description.rb +47 -0
  37. data/lib/link_thumbnailer/scrapers/default/images.rb +64 -0
  38. data/lib/link_thumbnailer/scrapers/default/title.rb +25 -0
  39. data/lib/link_thumbnailer/scrapers/opengraph/base.rb +43 -0
  40. data/lib/link_thumbnailer/scrapers/opengraph/description.rb +10 -0
  41. data/lib/link_thumbnailer/scrapers/opengraph/image.rb +30 -0
  42. data/lib/link_thumbnailer/scrapers/opengraph/images.rb +16 -0
  43. data/lib/link_thumbnailer/scrapers/opengraph/title.rb +10 -0
  44. data/lib/link_thumbnailer/version.rb +3 -3
  45. data/lib/link_thumbnailer.rb +36 -119
  46. data/link_thumbnailer.gemspec +26 -28
  47. data/spec/configuration_spec.rb +51 -0
  48. data/spec/examples/empty_og_image_example.html +9 -0
  49. data/spec/fixture_spec.rb +88 -0
  50. data/spec/fixtures/bar.png +2907 -0
  51. data/spec/fixtures/default_from_body.html +12 -0
  52. data/spec/fixtures/default_from_meta.html +11 -0
  53. data/spec/{examples → fixtures}/example.html +53 -53
  54. data/spec/fixtures/foo.png +0 -0
  55. data/spec/fixtures/og_not_valid_example.html +12 -0
  56. data/spec/fixtures/og_valid_example.html +12 -0
  57. data/spec/fixtures/og_valid_multi_image_example.html +13 -0
  58. data/spec/grader_spec.rb +24 -0
  59. data/spec/graders/base_spec.rb +12 -0
  60. data/spec/graders/html_attribute_spec.rb +48 -0
  61. data/spec/graders/length_spec.rb +81 -0
  62. data/spec/graders/link_density_spec.rb +22 -0
  63. data/spec/image_comparators/size_spec.rb +39 -0
  64. data/spec/image_parsers/size_spec.rb +34 -0
  65. data/spec/image_parsers/type_spec.rb +34 -0
  66. data/spec/image_validator_spec.rb +35 -0
  67. data/spec/model_spec.rb +17 -0
  68. data/spec/models/description_spec.rb +64 -0
  69. data/spec/models/image_spec.rb +71 -0
  70. data/spec/models/title_spec.rb +24 -0
  71. data/spec/models/website_spec.rb +49 -0
  72. data/spec/page_spec.rb +26 -0
  73. data/spec/processor_spec.rb +349 -0
  74. data/spec/scraper_spec.rb +95 -0
  75. data/spec/scrapers/base_spec.rb +67 -0
  76. data/spec/scrapers/opengraph/base_spec.rb +94 -0
  77. data/spec/spec_helper.rb +15 -13
  78. metadata +126 -120
  79. data/app/controllers/link_thumbnailer/application_controller.rb +0 -4
  80. data/app/controllers/link_thumbnailer/previews_controller.rb +0 -11
  81. data/lib/link_thumbnailer/doc.rb +0 -65
  82. data/lib/link_thumbnailer/doc_parser.rb +0 -15
  83. data/lib/link_thumbnailer/engine.rb +0 -4
  84. data/lib/link_thumbnailer/fetcher.rb +0 -34
  85. data/lib/link_thumbnailer/img_comparator.rb +0 -17
  86. data/lib/link_thumbnailer/img_parser.rb +0 -41
  87. data/lib/link_thumbnailer/img_url_filter.rb +0 -13
  88. data/lib/link_thumbnailer/object.rb +0 -41
  89. data/lib/link_thumbnailer/opengraph.rb +0 -20
  90. data/lib/link_thumbnailer/rails/routes/mapper.rb +0 -30
  91. data/lib/link_thumbnailer/rails/routes/mapping.rb +0 -33
  92. data/lib/link_thumbnailer/rails/routes.rb +0 -47
  93. data/lib/link_thumbnailer/web_image.rb +0 -19
  94. data/spec/doc_parser_spec.rb +0 -25
  95. data/spec/doc_spec.rb +0 -23
  96. data/spec/examples/empty_example.html +0 -11
  97. data/spec/examples/og_example.html +0 -12
  98. data/spec/fetcher_spec.rb +0 -97
  99. data/spec/img_comparator_spec.rb +0 -16
  100. data/spec/img_url_filter_spec.rb +0 -31
  101. data/spec/link_thumbnailer_spec.rb +0 -205
  102. data/spec/object_spec.rb +0 -130
  103. data/spec/opengraph_spec.rb +0 -7
  104. data/spec/web_image_spec.rb +0 -57
@@ -1,119 +1,36 @@
1
- require 'link_thumbnailer/configuration'
2
- require 'link_thumbnailer/object'
3
- require 'link_thumbnailer/fetcher'
4
- require 'link_thumbnailer/doc_parser'
5
- require 'link_thumbnailer/doc'
6
- require 'link_thumbnailer/img_url_filter'
7
- require 'link_thumbnailer/img_parser'
8
- require 'link_thumbnailer/img_comparator'
9
- require 'link_thumbnailer/web_image'
10
- require 'link_thumbnailer/opengraph'
11
- require 'link_thumbnailer/version'
12
-
13
- module LinkThumbnailer
14
-
15
- module Rails
16
- autoload :Routes, 'link_thumbnailer/rails/routes'
17
- end
18
-
19
- class << self
20
-
21
- attr_accessor :configuration, :object, :fetcher, :doc_parser,
22
- :img_url_filters, :img_parser, :logger
23
-
24
- def logger
25
- @logger ||= ::Rails.logger
26
- end
27
-
28
- def config
29
- self.configuration ||= Configuration.new(
30
- mandatory_attributes: %w(url title images),
31
- strict: true,
32
- redirect_limit: 3,
33
- blacklist_urls: [
34
- %r{^http://ad\.doubleclick\.net/},
35
- %r{^http://b\.scorecardresearch\.com/},
36
- %r{^http://pixel\.quantserve\.com/},
37
- %r{^http://s7\.addthis\.com/}
38
- ],
39
- image_attributes: %w(source_url size type),
40
- limit: 10,
41
- top: 5,
42
- user_agent: 'linkthumbnailer',
43
- verify_ssl: true,
44
- http_timeout: 5
45
- )
46
- end
47
-
48
- def configure
49
- yield config
50
- end
51
-
52
- def generate(url, options = {})
53
- set_options(options)
54
- instantiate_classes
55
-
56
- doc = doc_parser.parse(fetcher.fetch(url), url)
57
-
58
- self.object[:url] = fetcher.url.to_s
59
- opengraph(doc) || custom(doc)
60
- end
61
-
62
- private
63
-
64
- def set_options(options)
65
- config
66
- options.each {|k, v| config[k] = v }
67
- end
68
-
69
- def instantiate_classes
70
- self.object = LinkThumbnailer::Object.new
71
- self.fetcher = LinkThumbnailer::Fetcher.new
72
- self.doc_parser = LinkThumbnailer::DocParser.new
73
- self.img_url_filters = [LinkThumbnailer::ImgUrlFilter.new]
74
- self.img_parser = LinkThumbnailer::ImgParser.new(fetcher, img_url_filters)
75
- end
76
-
77
- def opengraph(doc)
78
- return unless opengraph?(doc)
79
- self.object = LinkThumbnailer::Opengraph.parse(object, doc)
80
- return object if object.valid?
81
- nil
82
- end
83
-
84
- def custom(doc)
85
- self.object[:title] = doc.title
86
- self.object[:description] = doc.description
87
- self.object[:images] = img_parser.parse(doc.img_abs_urls.dup)
88
- self.object[:url] = doc.canonical_url || object[:url]
89
- return object if object.valid?
90
- nil
91
- end
92
-
93
- def opengraph?(doc)
94
- !doc.xpath('//meta[starts-with(@property, "og:") and @content]').empty?
95
- end
96
-
97
- end
98
-
99
- end
100
-
101
- begin
102
- require 'rails'
103
- rescue LoadError
104
- end
105
-
106
- $stderr.puts <<-EOC if !defined?(Rails)
107
- warning: no framework detected.
108
-
109
- Your Gemfile might not be configured properly.
110
- ---- e.g. ----
111
- Rails:
112
- gem 'link_thumbnailer'
113
-
114
- EOC
115
-
116
- if defined?(Rails)
117
- require 'link_thumbnailer/engine'
118
- require 'link_thumbnailer/railtie'
119
- end
1
+ require 'json'
2
+ require 'link_thumbnailer/version'
3
+ require 'link_thumbnailer/configuration'
4
+ require 'link_thumbnailer/exceptions'
5
+ require 'link_thumbnailer/page'
6
+
7
+ module LinkThumbnailer
8
+
9
+ class << self
10
+
11
+ attr_reader :page
12
+
13
+ def generate(url, options = {})
14
+ @page = ::LinkThumbnailer::Page.new(url, options)
15
+
16
+ page.generate
17
+ end
18
+
19
+ end
20
+
21
+ end
22
+
23
+ begin
24
+ require 'rails'
25
+ rescue LoadError
26
+ end
27
+
28
+ $stderr.puts <<-EOC if !defined?(Rails)
29
+ warning: no framework detected.
30
+
31
+ Your Gemfile might not be configured properly.
32
+ ---- e.g. ----
33
+ Rails:
34
+ gem 'link_thumbnailer'
35
+
36
+ EOC
@@ -1,28 +1,26 @@
1
- # -*- encoding: utf-8 -*-
2
- require File.expand_path('../lib/link_thumbnailer/version', __FILE__)
3
-
4
- Gem::Specification.new do |gem|
5
- gem.authors = ["Pierre-Louis Gottfrois"]
6
- gem.email = ["pierrelouis.gottfrois@gmail.com"]
7
- gem.description = %q{Ruby gem generating thumbnail images from a given URL.}
8
- gem.summary = %q{Ruby gem ranking images from a given URL returning an object containing images and website informations.}
9
- gem.homepage = "https://github.com/gottfrois/link_thumbnailer"
10
-
11
- gem.files = `git ls-files`.split($\)
12
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
- gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
- gem.name = "link_thumbnailer"
15
- gem.require_paths = ["lib"]
16
- gem.version = LinkThumbnailer::VERSION
17
-
18
- gem.add_dependency 'rake', '>= 0.9'
19
- gem.add_dependency 'nokogiri', '>= 1.5.5', '< 1.7'
20
- gem.add_dependency 'hashie', '>= 1.2.0'
21
- gem.add_dependency 'net-http-persistent', '~> 2.7'
22
- gem.add_dependency 'fastimage', '~> 1.5.5'
23
- gem.add_dependency 'json', '>= 1.7.6', '< 1.9'
24
-
25
- gem.add_development_dependency 'bundler', '~> 1.3'
26
- gem.add_development_dependency 'rspec', '~> 2.14'
27
- gem.add_development_dependency 'pry', '~> 0.9'
28
- end
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'link_thumbnailer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "link_thumbnailer"
8
+ spec.version = LinkThumbnailer::VERSION
9
+ spec.authors = ["Pierre-Louis Gottfrois"]
10
+ spec.email = ["pierrelouis.gottfrois@gmail.com"]
11
+ spec.description = %q{Ruby gem generating thumbnail images from a given URL.}
12
+ spec.summary = %q{Ruby gem ranking images from a given URL returning an object containing images and website informations.}
13
+ spec.homepage = "https://github.com/gottfrois/link_thumbnailer"
14
+
15
+ spec.files = `git ls-files`.split($\)
16
+ spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ['lib']
19
+
20
+ spec.add_dependency 'activesupport', '>= 3.0'
21
+ spec.add_dependency 'json', ['>= 1.7.7', '~> 1.7']
22
+ spec.add_dependency 'rake', '>= 0.9'
23
+ spec.add_dependency 'nokogiri', '~> 1.6'
24
+ spec.add_dependency 'net-http-persistent', '~> 2.9'
25
+ spec.add_dependency 'fastimage', '~> 1.5'
26
+ end
@@ -0,0 +1,51 @@
1
+ require 'spec_helper'
2
+
3
+ describe LinkThumbnailer::Configuration do
4
+
5
+ let(:instance) { described_class.new }
6
+
7
+ it { expect(instance.redirect_limit).to eq(3) }
8
+ it { expect(instance.user_agent).to eq('link_thumbnailer') }
9
+ it { expect(instance.verify_ssl).to eq(true) }
10
+ it { expect(instance.http_timeout).to eq(5) }
11
+ it { expect(instance.blacklist_urls).to_not be_empty }
12
+ it { expect(instance.attributes).to_not be_empty }
13
+ it { expect(instance.graders).to_not be_empty }
14
+ it { expect(instance.description_min_length).to eq(25) }
15
+ it { expect(instance.positive_regex).to_not be_nil }
16
+ it { expect(instance.negative_regex).to_not be_nil }
17
+ it { expect(instance.image_limit).to eq(5) }
18
+
19
+ describe '.config' do
20
+
21
+ it { expect(LinkThumbnailer.config).to be_a(described_class) }
22
+
23
+ end
24
+
25
+ describe '.configure' do
26
+
27
+ before do
28
+ LinkThumbnailer.stub(:config).and_return(instance)
29
+ end
30
+
31
+ context 'when block given' do
32
+
33
+ it 'yields' do
34
+ expect(LinkThumbnailer).to receive(:configure).and_yield(instance)
35
+ LinkThumbnailer.configure {|config|}
36
+ end
37
+
38
+ end
39
+
40
+ context 'when no block given' do
41
+
42
+ it 'does nothing' do
43
+ expect(LinkThumbnailer.configure).to be_nil
44
+ end
45
+
46
+ end
47
+
48
+ end
49
+
50
+ end
51
+
@@ -0,0 +1,9 @@
1
+ <html xmlns:og="http://opengraphprotocol.org/schema/">
2
+ <head>
3
+ <meta charset="utf-8"/>
4
+ <meta property="og:title" content="Foo Title">
5
+ <meta property="og:image" content="">
6
+ <title>Foo</title>
7
+ </head>
8
+
9
+ </html>
@@ -0,0 +1,88 @@
1
+ require 'spec_helper'
2
+
3
+ describe 'Fixture' do
4
+
5
+ let(:url) { 'http://foo.com' }
6
+ let(:png_url) { 'http://foo.com/foo.png' }
7
+ let(:png) { File.open(File.dirname(__FILE__) + '/fixtures/foo.png') }
8
+ let(:action) { LinkThumbnailer.generate(url) }
9
+
10
+ before do
11
+ stub_request(:get, url).to_return(status: 200, body: html, headers: {})
12
+ stub_request(:get, png_url).to_return(status: 200, body: png, headers: {})
13
+ end
14
+
15
+ describe 'Opengraph' do
16
+
17
+ let(:title) { 'Title from og' }
18
+ let(:description) { 'Description from og' }
19
+
20
+ context 'when valid' do
21
+
22
+ let(:html) { File.open(File.dirname(__FILE__) + '/fixtures/og_valid_example.html').read() }
23
+
24
+ it { expect(action.title).to eq(title) }
25
+ it { expect(action.description).to eq(description) }
26
+ it { expect(action.images.count).to eq(1) }
27
+ it { expect(action.images.first.src.to_s).to eq(png_url) }
28
+
29
+ end
30
+
31
+ context 'with multi image' do
32
+
33
+ let(:png_url_2) { 'http://foo.com/bar.png' }
34
+ let(:png_2) { File.open(File.dirname(__FILE__) + '/fixtures/bar.png') }
35
+ let(:html) { File.open(File.dirname(__FILE__) + '/fixtures/og_valid_multi_image_example.html').read() }
36
+
37
+ before do
38
+ stub_request(:get, png_url_2).to_return(status: 200, body: png_2, headers: {})
39
+ end
40
+
41
+ it { expect(action.title).to eq(title) }
42
+ it { expect(action.description).to eq(description) }
43
+ it { expect(action.images.count).to eq(2) }
44
+ it { expect(action.images.first.src.to_s).to eq(png_url) }
45
+ it { expect(action.images.last.src.to_s).to eq(png_url_2) }
46
+
47
+ end
48
+
49
+ context 'when not valid' do
50
+
51
+ let(:html) { File.open(File.dirname(__FILE__) + '/fixtures/og_not_valid_example.html').read() }
52
+
53
+ it { expect(action.title).to eq(title) }
54
+ it { expect(action.description).to eq(description) }
55
+ it { expect(action.images.count).to eq(1) }
56
+ it { expect(action.images.first.src.to_s).to eq(png_url) }
57
+
58
+ end
59
+
60
+ end
61
+
62
+ describe 'Default' do
63
+
64
+ context 'from meta' do
65
+
66
+ let(:html) { File.open(File.dirname(__FILE__) + '/fixtures/default_from_meta.html').read() }
67
+ let(:title) { 'Title from meta' }
68
+ let(:description) { 'Description from meta' }
69
+
70
+ it { expect(action.title).to eq(title) }
71
+ it { expect(action.description).to eq(description) }
72
+
73
+ end
74
+
75
+ context 'from body' do
76
+
77
+ let(:html) { File.open(File.dirname(__FILE__) + '/fixtures/default_from_body.html').read() }
78
+ let(:description) { 'Description from body' }
79
+
80
+ it { expect(action.description).to eq(description) }
81
+ it { expect(action.images.count).to eq(1) }
82
+ it { expect(action.images.first.src.to_s).to eq(png_url) }
83
+
84
+ end
85
+
86
+ end
87
+
88
+ end