webstract 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 66bbaffe0a470b990d080a7b5a8abd8c52f1b504
4
- data.tar.gz: 25f7ff96946ce8cf6ce57abf5a41a3352be3373b
3
+ metadata.gz: 6243396588bd377a63b7bde8eda4753f74580d2e
4
+ data.tar.gz: 1a3a31eb9db5c4e83b197798d00a8c049ca6acb6
5
5
  SHA512:
6
- metadata.gz: dd4c4ad6619ce8542105e27ec8e29eeb589656a3f7225365ee48d6f6f7df05f5c9da4e6e7fd1ffc88374c11173967453780b73ccad179bbd28f8bba39683cda3
7
- data.tar.gz: 6219e01b43583d324d523cb9a214d6d6e7b9d963333e3bf27cc39757b25d3c64769e371b2a6d8ff2e8e6e0c62d5b3b9c5169a11a20c6d4e99ab4ddd6bd56b80e
6
+ metadata.gz: 4a1b8514db48474677b8a2c51e59a4ad2da3a61d4f482372d2cc467dedfc98fbb8ad34abaee5fdca140b45afcf9f32cceefb473932da24a25761571e462c2a7e
7
+ data.tar.gz: 463d81732822cb8dea73e1c8055d71bc9f9824d8dc335d78ed1ef8557a5f48e2149da8132e74acc373e787e5c3658e4f05231a4816364304ee33d4e96e3db166
@@ -1,7 +1,10 @@
1
1
  module Webstract
2
2
 
3
+ autoload :Errors, 'webstract/errors'
4
+ autoload :ScreenCapture, 'webstract/screen_capture'
5
+ autoload :ScreenshotBackend, 'webstract/screenshot_backend'
3
6
  autoload :Screenshot, 'webstract/screenshot'
4
- autoload :Favicon, 'webstract/favicon'
7
+ autoload :Favicon, 'webstract/favicon'
5
8
 
6
9
  def self.screenshot(options = {})
7
10
  Webstract::Screenshot.new(options)
@@ -0,0 +1,4 @@
1
+ module Webstract
2
+ class Error < StandardError
3
+ end
4
+ end
@@ -0,0 +1,60 @@
1
+ require 'capybara/dsl'
2
+
3
+ module Webstract
4
+ class ScreenCapture
5
+ include Capybara::DSL
6
+
7
+ attr_reader :width, :height, :user_agent, :accept_language, :path
8
+
9
+ def initialize(opts = {})
10
+ Webstract::ScreenshotBackend.capybara_setup!
11
+ @width = opts.fetch(:width, Webstract::ScreenshotBackend.width)
12
+ @height = opts.fetch(:height, Webstract::ScreenshotBackend.height)
13
+ @user_agent = opts.fetch(:user_agent, Webstract::ScreenshotBackend.user_agent)
14
+ @accept_language = opts.fetch(:accept_language, Webstract::ScreenshotBackend.accept_language)
15
+
16
+ # Browser settings
17
+ page.driver.resize(@width, @height)
18
+ page.driver.headers = {
19
+ "User-Agent" => @user_agent,
20
+ 'Accept-Language' => @accept_language
21
+ }
22
+ end
23
+
24
+ def start_session(&block)
25
+ Capybara.reset_sessions!
26
+ Capybara.current_session.instance_eval(&block) if block_given?
27
+ @session_started = true
28
+ self
29
+ end
30
+
31
+ # Captures a screenshot of +url+ saving it to +path+.
32
+ def capture(url, path, opts = {})
33
+ begin
34
+ # Default settings
35
+ @width = opts.fetch(:width, 120) if opts[:width]
36
+ @height = opts.fetch(:height, 90) if opts[:width]
37
+
38
+ # Reset session before visiting url
39
+ Capybara.reset_sessions! unless @session_started
40
+ @session_started = false
41
+
42
+ # Open page
43
+ visit(url)
44
+
45
+ # Timeout
46
+ sleep opts[:timeout] if opts[:timeout]
47
+
48
+ # Check response code
49
+ if page.driver.status_code.to_i == 200 || page.driver.status_code.to_i / 100 == 3
50
+ page.driver.save_screenshot(path, :full => true)
51
+ else
52
+ raise Webstract::Error.new("Could not fetch page: #{url.inspect}, error code: #{page.driver.status_code}")
53
+ end
54
+ rescue Capybara::Poltergeist::BrowserError, Capybara::Poltergeist::DeadClient, Capybara::Poltergeist::TimeoutError, Errno::EPIPE => e
55
+ # TODO: Handle Errno::EPIPE and Errno::ECONNRESET
56
+ raise Webstract::Error.new("Capybara error: #{e.message.inspect}")
57
+ end
58
+ end
59
+ end
60
+ end
@@ -1,23 +1,19 @@
1
- require 'webshot'
2
-
3
1
  module Webstract
4
2
  class Screenshot
5
- attr_accessor :url, :path, :width, :height, :quality
3
+ attr_accessor :url, :path, :width, :height, :user_agent, :accept_language
6
4
  attr_reader :handle
7
5
 
8
6
  def initialize(options = {})
9
- @handle = Webshot::Screenshot.instance
7
+ @handle = Webstract::ScreenCapture.new(options)
10
8
 
11
9
  options.each do |k, value|
12
10
  setter = "#{k}="
13
11
  self.public_send(setter, value) if self.respond_to?(setter)
14
12
  end
15
-
16
- @quality = 85 unless self.quality
17
13
  end
18
14
 
19
15
  def capture
20
- handle.capture(url, path, width: width, height: height, quality: quality)
16
+ handle.capture(url, path, width: width, height: height, user_agent: user_agent, accept_language: accept_language)
21
17
  end
22
18
 
23
19
  end
@@ -0,0 +1,64 @@
1
+ require "capybara/dsl"
2
+ require "capybara/poltergeist"
3
+ require "active_support"
4
+ require "active_support/core_ext"
5
+
6
+ module Webstract
7
+ module ScreenshotBackend
8
+
9
+ USER_AGENTS = {
10
+ web: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.43 Safari/537.31',
11
+ android: 'Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 4 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19',
12
+ ios: 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3'
13
+ }
14
+
15
+ ## Browser settings
16
+ # Width
17
+ mattr_accessor :width
18
+ @@width = 1024
19
+
20
+ # Height
21
+ mattr_accessor :height
22
+ @@height = 768
23
+
24
+ mattr_accessor :accept_language
25
+ @@accept_language = 'en-us,en;q=0.5'
26
+
27
+
28
+ # User agent
29
+ class << self
30
+
31
+ def user_agent
32
+ @user_agent ||= USER_AGENT[:web]
33
+ end
34
+ def user_agent=(ua)
35
+ agent_string = USER_AGENT[ua]
36
+ raise(ArgumentError.new('must be one of #{USER_AGENTS.inspect}')) unless agent_string
37
+ @user_agent = agent_string
38
+ end
39
+
40
+ end
41
+
42
+ # Customize settings
43
+ def self.setup
44
+ yield(self)
45
+ end
46
+
47
+ # Capibara setup
48
+ def self.capybara_setup!
49
+ # By default Capybara will try to boot a rack application
50
+ # automatically. You might want to switch off Capybara's
51
+ # rack server if you are running against a remote application
52
+ Capybara.run_server = false
53
+ Capybara.register_driver :poltergeist do |app|
54
+ Capybara::Poltergeist::Driver.new(app, {
55
+ # Raise JavaScript errors to Ruby
56
+ js_errors: false,
57
+ # Additional command line options for PhantomJS
58
+ phantomjs_options: ['--ignore-ssl-errors=yes'],
59
+ })
60
+ end
61
+ Capybara.current_driver = :poltergeist
62
+ end
63
+ end
64
+ end
@@ -1,3 +1,3 @@
1
1
  module Webstract
2
- VERSION = '0.0.1'
2
+ VERSION = '0.0.2'
3
3
  end
metadata CHANGED
@@ -1,15 +1,63 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webstract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Faucett
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-08 00:00:00.000000000 Z
11
+ date: 2014-10-11 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '4.1'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '5'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '4.1'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '5'
33
+ - !ruby/object:Gem::Dependency
34
+ name: poltergeist
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '1.5'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '1.5'
47
+ - !ruby/object:Gem::Dependency
48
+ name: faviconduit
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
13
61
  - !ruby/object:Gem::Dependency
14
62
  name: bundler
15
63
  requirement: !ruby/object:Gem::Requirement
@@ -39,33 +87,33 @@ dependencies:
39
87
  - !ruby/object:Gem::Version
40
88
  version: '10.0'
41
89
  - !ruby/object:Gem::Dependency
42
- name: webshot
90
+ name: rspec
43
91
  requirement: !ruby/object:Gem::Requirement
44
92
  requirements:
45
- - - ">="
93
+ - - "~>"
46
94
  - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
95
+ version: '3'
96
+ type: :development
49
97
  prerelease: false
50
98
  version_requirements: !ruby/object:Gem::Requirement
51
99
  requirements:
52
- - - ">="
100
+ - - "~>"
53
101
  - !ruby/object:Gem::Version
54
- version: '0'
102
+ version: '3'
55
103
  - !ruby/object:Gem::Dependency
56
- name: faviconduit
104
+ name: pry
57
105
  requirement: !ruby/object:Gem::Requirement
58
106
  requirements:
59
107
  - - ">="
60
108
  - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :runtime
109
+ version: 0.10.0
110
+ type: :development
63
111
  prerelease: false
64
112
  version_requirements: !ruby/object:Gem::Requirement
65
113
  requirements:
66
114
  - - ">="
67
115
  - !ruby/object:Gem::Version
68
- version: '0'
116
+ version: 0.10.0
69
117
  description: Extract images, favicons, and meta info from websites
70
118
  email:
71
119
  - jwaterfaucett@gmail.com
@@ -73,16 +121,15 @@ executables: []
73
121
  extensions: []
74
122
  extra_rdoc_files: []
75
123
  files:
76
- - ".gitignore"
77
- - Gemfile
78
124
  - LICENSE.txt
79
125
  - README.md
80
- - Rakefile
81
126
  - lib/webstract.rb
127
+ - lib/webstract/errors.rb
82
128
  - lib/webstract/favicon.rb
129
+ - lib/webstract/screen_capture.rb
83
130
  - lib/webstract/screenshot.rb
131
+ - lib/webstract/screenshot_backend.rb
84
132
  - lib/webstract/version.rb
85
- - webstract.gemspec
86
133
  homepage:
87
134
  licenses:
88
135
  - MIT
data/.gitignore DELETED
@@ -1,14 +0,0 @@
1
- /.bundle/
2
- /.yardoc
3
- /Gemfile.lock
4
- /_yardoc/
5
- /coverage/
6
- /doc/
7
- /pkg/
8
- /spec/reports/
9
- /tmp/
10
- *.bundle
11
- *.so
12
- *.o
13
- *.a
14
- mkmf.log
data/Gemfile DELETED
@@ -1,4 +0,0 @@
1
- source 'https://rubygems.org'
2
-
3
- # Specify your gem's dependencies in webstract.gemspec
4
- gemspec
data/Rakefile DELETED
@@ -1,2 +0,0 @@
1
- require "bundler/gem_tasks"
2
-
@@ -1,24 +0,0 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
3
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'webstract/version'
5
-
6
- Gem::Specification.new do |spec|
7
- spec.name = "webstract"
8
- spec.version = Webstract::VERSION
9
- spec.authors = ["John Faucett"]
10
- spec.email = ["jwaterfaucett@gmail.com"]
11
- spec.summary = 'Extract information from websites'
12
- spec.description = 'Extract images, favicons, and meta info from websites'
13
- spec.license = 'MIT'
14
-
15
- spec.files = `git ls-files -z`.split("\x0")
16
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
17
- spec.require_paths = ['lib']
18
-
19
- spec.add_development_dependency 'bundler', '~> 1.7'
20
- spec.add_development_dependency 'rake', '~> 10.0'
21
-
22
- spec.add_runtime_dependency 'webshot'
23
- spec.add_runtime_dependency 'faviconduit'
24
- end