webstract 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 66bbaffe0a470b990d080a7b5a8abd8c52f1b504
4
- data.tar.gz: 25f7ff96946ce8cf6ce57abf5a41a3352be3373b
3
+ metadata.gz: 6243396588bd377a63b7bde8eda4753f74580d2e
4
+ data.tar.gz: 1a3a31eb9db5c4e83b197798d00a8c049ca6acb6
5
5
  SHA512:
6
- metadata.gz: dd4c4ad6619ce8542105e27ec8e29eeb589656a3f7225365ee48d6f6f7df05f5c9da4e6e7fd1ffc88374c11173967453780b73ccad179bbd28f8bba39683cda3
7
- data.tar.gz: 6219e01b43583d324d523cb9a214d6d6e7b9d963333e3bf27cc39757b25d3c64769e371b2a6d8ff2e8e6e0c62d5b3b9c5169a11a20c6d4e99ab4ddd6bd56b80e
6
+ metadata.gz: 4a1b8514db48474677b8a2c51e59a4ad2da3a61d4f482372d2cc467dedfc98fbb8ad34abaee5fdca140b45afcf9f32cceefb473932da24a25761571e462c2a7e
7
+ data.tar.gz: 463d81732822cb8dea73e1c8055d71bc9f9824d8dc335d78ed1ef8557a5f48e2149da8132e74acc373e787e5c3658e4f05231a4816364304ee33d4e96e3db166
@@ -1,7 +1,10 @@
1
1
  module Webstract
2
2
 
3
+ autoload :Errors, 'webstract/errors'
4
+ autoload :ScreenCapture, 'webstract/screen_capture'
5
+ autoload :ScreenshotBackend, 'webstract/screenshot_backend'
3
6
  autoload :Screenshot, 'webstract/screenshot'
4
- autoload :Favicon, 'webstract/favicon'
7
+ autoload :Favicon, 'webstract/favicon'
5
8
 
6
9
  def self.screenshot(options = {})
7
10
  Webstract::Screenshot.new(options)
@@ -0,0 +1,4 @@
1
+ module Webstract
2
+ class Error < StandardError
3
+ end
4
+ end
@@ -0,0 +1,60 @@
1
+ require 'capybara/dsl'
2
+
3
+ module Webstract
4
+ class ScreenCapture
5
+ include Capybara::DSL
6
+
7
+ attr_reader :width, :height, :user_agent, :accept_language, :path
8
+
9
+ def initialize(opts = {})
10
+ Webstract::ScreenshotBackend.capybara_setup!
11
+ @width = opts.fetch(:width, Webstract::ScreenshotBackend.width)
12
+ @height = opts.fetch(:height, Webstract::ScreenshotBackend.height)
13
+ @user_agent = opts.fetch(:user_agent, Webstract::ScreenshotBackend.user_agent)
14
+ @accept_language = opts.fetch(:accept_language, Webstract::ScreenshotBackend.accept_language)
15
+
16
+ # Browser settings
17
+ page.driver.resize(@width, @height)
18
+ page.driver.headers = {
19
+ "User-Agent" => @user_agent,
20
+ 'Accept-Language' => @accept_language
21
+ }
22
+ end
23
+
24
+ def start_session(&block)
25
+ Capybara.reset_sessions!
26
+ Capybara.current_session.instance_eval(&block) if block_given?
27
+ @session_started = true
28
+ self
29
+ end
30
+
31
+ # Captures a screenshot of +url+ saving it to +path+.
32
+ def capture(url, path, opts = {})
33
+ begin
34
+ # Default settings
35
+ @width = opts.fetch(:width, 120) if opts[:width]
36
+ @height = opts.fetch(:height, 90) if opts[:width]
37
+
38
+ # Reset session before visiting url
39
+ Capybara.reset_sessions! unless @session_started
40
+ @session_started = false
41
+
42
+ # Open page
43
+ visit(url)
44
+
45
+ # Timeout
46
+ sleep opts[:timeout] if opts[:timeout]
47
+
48
+ # Check response code
49
+ if page.driver.status_code.to_i == 200 || page.driver.status_code.to_i / 100 == 3
50
+ page.driver.save_screenshot(path, :full => true)
51
+ else
52
+ raise Webstract::Error.new("Could not fetch page: #{url.inspect}, error code: #{page.driver.status_code}")
53
+ end
54
+ rescue Capybara::Poltergeist::BrowserError, Capybara::Poltergeist::DeadClient, Capybara::Poltergeist::TimeoutError, Errno::EPIPE => e
55
+ # TODO: Handle Errno::EPIPE and Errno::ECONNRESET
56
+ raise Webstract::Error.new("Capybara error: #{e.message.inspect}")
57
+ end
58
+ end
59
+ end
60
+ end
@@ -1,23 +1,19 @@
1
- require 'webshot'
2
-
3
1
  module Webstract
4
2
  class Screenshot
5
- attr_accessor :url, :path, :width, :height, :quality
3
+ attr_accessor :url, :path, :width, :height, :user_agent, :accept_language
6
4
  attr_reader :handle
7
5
 
8
6
  def initialize(options = {})
9
- @handle = Webshot::Screenshot.instance
7
+ @handle = Webstract::ScreenCapture.new(options)
10
8
 
11
9
  options.each do |k, value|
12
10
  setter = "#{k}="
13
11
  self.public_send(setter, value) if self.respond_to?(setter)
14
12
  end
15
-
16
- @quality = 85 unless self.quality
17
13
  end
18
14
 
19
15
  def capture
20
- handle.capture(url, path, width: width, height: height, quality: quality)
16
+ handle.capture(url, path, width: width, height: height, user_agent: user_agent, accept_language: accept_language)
21
17
  end
22
18
 
23
19
  end
@@ -0,0 +1,64 @@
1
+ require "capybara/dsl"
2
+ require "capybara/poltergeist"
3
+ require "active_support"
4
+ require "active_support/core_ext"
5
+
6
+ module Webstract
7
+ module ScreenshotBackend
8
+
9
+ USER_AGENTS = {
10
+ web: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.43 Safari/537.31',
11
+ android: 'Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 4 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19',
12
+ ios: 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3'
13
+ }
14
+
15
+ ## Browser settings
16
+ # Width
17
+ mattr_accessor :width
18
+ @@width = 1024
19
+
20
+ # Height
21
+ mattr_accessor :height
22
+ @@height = 768
23
+
24
+ mattr_accessor :accept_language
25
+ @@accept_language = 'en-us,en;q=0.5'
26
+
27
+
28
+ # User agent
29
+ class << self
30
+
31
+ def user_agent
32
+ @user_agent ||= USER_AGENT[:web]
33
+ end
34
+ def user_agent=(ua)
35
+ agent_string = USER_AGENT[ua]
36
+ raise(ArgumentError.new('must be one of #{USER_AGENTS.inspect}')) unless agent_string
37
+ @user_agent = agent_string
38
+ end
39
+
40
+ end
41
+
42
+ # Customize settings
43
+ def self.setup
44
+ yield(self)
45
+ end
46
+
47
+ # Capibara setup
48
+ def self.capybara_setup!
49
+ # By default Capybara will try to boot a rack application
50
+ # automatically. You might want to switch off Capybara's
51
+ # rack server if you are running against a remote application
52
+ Capybara.run_server = false
53
+ Capybara.register_driver :poltergeist do |app|
54
+ Capybara::Poltergeist::Driver.new(app, {
55
+ # Raise JavaScript errors to Ruby
56
+ js_errors: false,
57
+ # Additional command line options for PhantomJS
58
+ phantomjs_options: ['--ignore-ssl-errors=yes'],
59
+ })
60
+ end
61
+ Capybara.current_driver = :poltergeist
62
+ end
63
+ end
64
+ end
@@ -1,3 +1,3 @@
1
1
  module Webstract
2
- VERSION = '0.0.1'
2
+ VERSION = '0.0.2'
3
3
  end
metadata CHANGED
@@ -1,15 +1,63 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webstract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Faucett
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-08 00:00:00.000000000 Z
11
+ date: 2014-10-11 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '4.1'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '5'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '4.1'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '5'
33
+ - !ruby/object:Gem::Dependency
34
+ name: poltergeist
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '1.5'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '1.5'
47
+ - !ruby/object:Gem::Dependency
48
+ name: faviconduit
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
13
61
  - !ruby/object:Gem::Dependency
14
62
  name: bundler
15
63
  requirement: !ruby/object:Gem::Requirement
@@ -39,33 +87,33 @@ dependencies:
39
87
  - !ruby/object:Gem::Version
40
88
  version: '10.0'
41
89
  - !ruby/object:Gem::Dependency
42
- name: webshot
90
+ name: rspec
43
91
  requirement: !ruby/object:Gem::Requirement
44
92
  requirements:
45
- - - ">="
93
+ - - "~>"
46
94
  - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
95
+ version: '3'
96
+ type: :development
49
97
  prerelease: false
50
98
  version_requirements: !ruby/object:Gem::Requirement
51
99
  requirements:
52
- - - ">="
100
+ - - "~>"
53
101
  - !ruby/object:Gem::Version
54
- version: '0'
102
+ version: '3'
55
103
  - !ruby/object:Gem::Dependency
56
- name: faviconduit
104
+ name: pry
57
105
  requirement: !ruby/object:Gem::Requirement
58
106
  requirements:
59
107
  - - ">="
60
108
  - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :runtime
109
+ version: 0.10.0
110
+ type: :development
63
111
  prerelease: false
64
112
  version_requirements: !ruby/object:Gem::Requirement
65
113
  requirements:
66
114
  - - ">="
67
115
  - !ruby/object:Gem::Version
68
- version: '0'
116
+ version: 0.10.0
69
117
  description: Extract images, favicons, and meta info from websites
70
118
  email:
71
119
  - jwaterfaucett@gmail.com
@@ -73,16 +121,15 @@ executables: []
73
121
  extensions: []
74
122
  extra_rdoc_files: []
75
123
  files:
76
- - ".gitignore"
77
- - Gemfile
78
124
  - LICENSE.txt
79
125
  - README.md
80
- - Rakefile
81
126
  - lib/webstract.rb
127
+ - lib/webstract/errors.rb
82
128
  - lib/webstract/favicon.rb
129
+ - lib/webstract/screen_capture.rb
83
130
  - lib/webstract/screenshot.rb
131
+ - lib/webstract/screenshot_backend.rb
84
132
  - lib/webstract/version.rb
85
- - webstract.gemspec
86
133
  homepage:
87
134
  licenses:
88
135
  - MIT
data/.gitignore DELETED
@@ -1,14 +0,0 @@
1
- /.bundle/
2
- /.yardoc
3
- /Gemfile.lock
4
- /_yardoc/
5
- /coverage/
6
- /doc/
7
- /pkg/
8
- /spec/reports/
9
- /tmp/
10
- *.bundle
11
- *.so
12
- *.o
13
- *.a
14
- mkmf.log
data/Gemfile DELETED
@@ -1,4 +0,0 @@
1
- source 'https://rubygems.org'
2
-
3
- # Specify your gem's dependencies in webstract.gemspec
4
- gemspec
data/Rakefile DELETED
@@ -1,2 +0,0 @@
1
- require "bundler/gem_tasks"
2
-
@@ -1,24 +0,0 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
3
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'webstract/version'
5
-
6
- Gem::Specification.new do |spec|
7
- spec.name = "webstract"
8
- spec.version = Webstract::VERSION
9
- spec.authors = ["John Faucett"]
10
- spec.email = ["jwaterfaucett@gmail.com"]
11
- spec.summary = 'Extract information from websites'
12
- spec.description = 'Extract images, favicons, and meta info from websites'
13
- spec.license = 'MIT'
14
-
15
- spec.files = `git ls-files -z`.split("\x0")
16
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
17
- spec.require_paths = ['lib']
18
-
19
- spec.add_development_dependency 'bundler', '~> 1.7'
20
- spec.add_development_dependency 'rake', '~> 10.0'
21
-
22
- spec.add_runtime_dependency 'webshot'
23
- spec.add_runtime_dependency 'faviconduit'
24
- end