auto_browse 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +79 -0
- data/LICENSE.txt +21 -0
- data/README.md +37 -0
- data/Rakefile +8 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/lib/auto_browse/browser.rb +133 -0
- data/lib/auto_browse/capybara_ext.rb +10 -0
- data/lib/auto_browse/ferrum_ext.rb +14 -0
- data/lib/auto_browse/mouse.rb +233 -0
- data/lib/auto_browse/page.rb +131 -0
- data/lib/auto_browse/version.rb +5 -0
- data/lib/auto_browse.rb +8 -0
- data/sig/auto_browse.rbs +4 -0
- metadata +146 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e5ceb905050f6a63790be2ee88058061771ff255bce8b253359aac9902e50136
|
4
|
+
data.tar.gz: 7e0cbdc6c451a39093dc8417a0912dc311506960be8b5f47aa6c1069bbe477d8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f984164b1089eb63196be99133f86e34d1f62eac9006a3ee7c68dfa4134bba27d550a47cc25936d8b77fcd6c49f732f854850e0afff7c7a369c3c5508652610d
|
7
|
+
data.tar.gz: bc79944ab2330e528c926d1faee0902bf845bf2aebb6aadd888e21585504934149d19d76e25c47d78529e7aa3f25d2409a13abd4866a2cd777166f8283a44c51
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
auto_browse (0.1.0)
|
5
|
+
bezier_curve
|
6
|
+
capybara
|
7
|
+
cuprite
|
8
|
+
ferrum
|
9
|
+
mini_magick
|
10
|
+
victor
|
11
|
+
|
12
|
+
GEM
|
13
|
+
remote: https://rubygems.org/
|
14
|
+
specs:
|
15
|
+
addressable (2.8.0)
|
16
|
+
public_suffix (>= 2.0.2, < 5.0)
|
17
|
+
bezier_curve (0.9.0)
|
18
|
+
capybara (3.36.0)
|
19
|
+
addressable
|
20
|
+
matrix
|
21
|
+
mini_mime (>= 0.1.3)
|
22
|
+
nokogiri (~> 1.8)
|
23
|
+
rack (>= 1.6.0)
|
24
|
+
rack-test (>= 0.6.3)
|
25
|
+
regexp_parser (>= 1.5, < 3.0)
|
26
|
+
xpath (~> 3.2)
|
27
|
+
cliver (0.3.2)
|
28
|
+
concurrent-ruby (1.1.10)
|
29
|
+
cuprite (0.13)
|
30
|
+
capybara (>= 2.1, < 4)
|
31
|
+
ferrum (~> 0.11.0)
|
32
|
+
diff-lcs (1.5.0)
|
33
|
+
ferrum (0.11)
|
34
|
+
addressable (~> 2.5)
|
35
|
+
cliver (~> 0.3)
|
36
|
+
concurrent-ruby (~> 1.1)
|
37
|
+
websocket-driver (>= 0.6, < 0.8)
|
38
|
+
matrix (0.4.2)
|
39
|
+
mini_magick (4.11.0)
|
40
|
+
mini_mime (1.1.2)
|
41
|
+
nokogiri (1.13.4-x86_64-linux)
|
42
|
+
racc (~> 1.4)
|
43
|
+
public_suffix (4.0.7)
|
44
|
+
racc (1.6.0)
|
45
|
+
rack (2.2.3)
|
46
|
+
rack-test (1.1.0)
|
47
|
+
rack (>= 1.0, < 3)
|
48
|
+
rake (13.0.6)
|
49
|
+
regexp_parser (2.3.0)
|
50
|
+
rspec (3.11.0)
|
51
|
+
rspec-core (~> 3.11.0)
|
52
|
+
rspec-expectations (~> 3.11.0)
|
53
|
+
rspec-mocks (~> 3.11.0)
|
54
|
+
rspec-core (3.11.0)
|
55
|
+
rspec-support (~> 3.11.0)
|
56
|
+
rspec-expectations (3.11.0)
|
57
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
58
|
+
rspec-support (~> 3.11.0)
|
59
|
+
rspec-mocks (3.11.1)
|
60
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
61
|
+
rspec-support (~> 3.11.0)
|
62
|
+
rspec-support (3.11.0)
|
63
|
+
victor (0.3.3)
|
64
|
+
websocket-driver (0.7.5)
|
65
|
+
websocket-extensions (>= 0.1.0)
|
66
|
+
websocket-extensions (0.1.5)
|
67
|
+
xpath (3.2.0)
|
68
|
+
nokogiri (~> 1.8)
|
69
|
+
|
70
|
+
PLATFORMS
|
71
|
+
x86_64-linux
|
72
|
+
|
73
|
+
DEPENDENCIES
|
74
|
+
auto_browse!
|
75
|
+
rake (~> 13.0)
|
76
|
+
rspec (~> 3.0)
|
77
|
+
|
78
|
+
BUNDLED WITH
|
79
|
+
2.2.33
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2022 David Ellis
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# auto_browse
|
2
|
+
|
3
|
+
auto_browse is a browser driver that I use for screen scraping.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'auto_browse'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle install
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install auto_browse
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
I'll put up some examples later.
|
24
|
+
|
25
|
+
## Development
|
26
|
+
|
27
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
28
|
+
|
29
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
30
|
+
|
31
|
+
## Contributing
|
32
|
+
|
33
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/davidkellis/auto_browse.
|
34
|
+
|
35
|
+
## License
|
36
|
+
|
37
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "auto_browse"
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
# require "pry"
|
12
|
+
# Pry.start
|
13
|
+
|
14
|
+
require "irb"
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
require "ferrum"
|
2
|
+
# require "ferrum/browser/options/chrome"
|
3
|
+
require "capybara"
|
4
|
+
require "capybara/cuprite"
|
5
|
+
|
6
|
+
require_relative "capybara_ext"
|
7
|
+
require_relative "ferrum_ext"
|
8
|
+
require_relative "mouse"
|
9
|
+
require_relative "page"
|
10
|
+
|
11
|
+
module AutoBrowse
|
12
|
+
class Browser
|
13
|
+
def execute_script(javascript)
|
14
|
+
raise "not implemented"
|
15
|
+
end
|
16
|
+
|
17
|
+
def goto(url)
|
18
|
+
raise "not implemented"
|
19
|
+
end
|
20
|
+
|
21
|
+
def set_default_timeout(timeout = 30)
|
22
|
+
raise "not implemented"
|
23
|
+
end
|
24
|
+
|
25
|
+
def quit
|
26
|
+
raise "not implemented"
|
27
|
+
end
|
28
|
+
|
29
|
+
def driver
|
30
|
+
raise "not implemented"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
class CupriteBrowser < Browser
|
36
|
+
def self.set_global_default_timeout(timeout = 30)
|
37
|
+
Capybara.default_max_wait_time = timeout
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.register
|
41
|
+
Capybara.javascript_driver = :cuprite
|
42
|
+
Capybara.register_driver(:cuprite) do |app|
|
43
|
+
# the list of command line flags is enormous: https://peter.sh/experiments/chromium-command-line-switches/
|
44
|
+
chrome_options = {
|
45
|
+
# "hide-scrollbars" => nil,
|
46
|
+
# "mute-audio" => nil,
|
47
|
+
# "enable-automation" => nil,
|
48
|
+
"disable-web-security" => nil,
|
49
|
+
# "disable-session-crashed-bubble" => nil,
|
50
|
+
"disable-breakpad" => nil,
|
51
|
+
# "disable-sync" => nil,
|
52
|
+
"no-first-run" => nil,
|
53
|
+
"use-mock-keychain" => nil,
|
54
|
+
# "keep-alive-for-test" => nil,
|
55
|
+
# "disable-popup-blocking" => nil,
|
56
|
+
# "disable-extensions" => nil,
|
57
|
+
"disable-hang-monitor" => nil,
|
58
|
+
"disable-features" => "site-per-process,TranslateUI",
|
59
|
+
# "disable-translate" => nil,
|
60
|
+
# "disable-background-networking" => nil,
|
61
|
+
"enable-features" => "NetworkService,NetworkServiceInProcess",
|
62
|
+
# "disable-background-timer-throttling" => nil,
|
63
|
+
# "disable-backgrounding-occluded-windows" => nil,
|
64
|
+
# "disable-client-side-phishing-detection" => nil,
|
65
|
+
# "disable-default-apps" => nil,
|
66
|
+
"disable-dev-shm-usage" => nil,
|
67
|
+
# "disable-ipc-flooding-protection" => nil,
|
68
|
+
# "disable-prompt-on-repost" => nil,
|
69
|
+
# "disable-renderer-backgrounding" => nil,
|
70
|
+
# "force-color-profile" => "srgb",
|
71
|
+
"metrics-recording-only" => nil,
|
72
|
+
# "safebrowsing-disable-auto-update" => nil,
|
73
|
+
"password-store" => "basic"
|
74
|
+
}
|
75
|
+
chrome_options.merge!(headless: nil) if ENV["HEADLESS"] == "true"
|
76
|
+
|
77
|
+
driver_options = {
|
78
|
+
window_size: [1440, 900],
|
79
|
+
# headless: ENV["HEADLESS"] == "true", # has no effect
|
80
|
+
ignore_default_browser_options: true,
|
81
|
+
browser_options: chrome_options
|
82
|
+
}
|
83
|
+
|
84
|
+
Capybara::Cuprite::Driver.new(app, driver_options)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def initialize
|
89
|
+
@session = Capybara::Session.new(:cuprite)
|
90
|
+
@windows = [@session.current_window]
|
91
|
+
end
|
92
|
+
|
93
|
+
def driver
|
94
|
+
@session
|
95
|
+
end
|
96
|
+
|
97
|
+
def mouse
|
98
|
+
@session.driver.browser.mouse
|
99
|
+
end
|
100
|
+
|
101
|
+
def move(x, y)
|
102
|
+
mouse.move(x: x, y: y)
|
103
|
+
sleep 0.01
|
104
|
+
end
|
105
|
+
|
106
|
+
def goto(url)
|
107
|
+
@session.visit(url)
|
108
|
+
end
|
109
|
+
|
110
|
+
def visit(url)
|
111
|
+
goto(url)
|
112
|
+
end
|
113
|
+
|
114
|
+
def new_window
|
115
|
+
window = @session.open_new_window
|
116
|
+
@windows << window
|
117
|
+
window
|
118
|
+
end
|
119
|
+
|
120
|
+
def switch_to_window(window)
|
121
|
+
@session.switch_to_window(window)
|
122
|
+
end
|
123
|
+
|
124
|
+
def windows
|
125
|
+
@windows
|
126
|
+
end
|
127
|
+
|
128
|
+
def quit
|
129
|
+
@session.quit
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "ferrum"
|
2
|
+
|
3
|
+
# redefing internals for debugging
|
4
|
+
module Ferrum
|
5
|
+
class Browser
|
6
|
+
class Command
|
7
|
+
def to_a
|
8
|
+
command_array = [path] + @flags.map { |k, v| v.nil? ? "--#{k}" : "--#{k}=#{v}" }
|
9
|
+
puts "Ferrum::Browser::Command -> #{command_array.join(" ")}"
|
10
|
+
command_array
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,233 @@
|
|
1
|
+
require "bezier_curve"
|
2
|
+
|
3
|
+
module AutoBrowse
|
4
|
+
module MouseMover
|
5
|
+
|
6
|
+
module MathUtils
|
7
|
+
def random_number_range(min, max)
|
8
|
+
rand * (max - min) + min
|
9
|
+
end
|
10
|
+
|
11
|
+
# returns target if min < target < max
|
12
|
+
# otherwise, returns min
|
13
|
+
def clamp(target, min, max)
|
14
|
+
[max, [min, target].max].min
|
15
|
+
end
|
16
|
+
|
17
|
+
def confine(target, min, max)
|
18
|
+
case
|
19
|
+
when target < min
|
20
|
+
min
|
21
|
+
when target > max
|
22
|
+
max
|
23
|
+
else
|
24
|
+
target
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Calculate the amount of time needed to move from (x1, y1) to (x2, y2)
|
29
|
+
# given the width of the element being clicked on
|
30
|
+
# https://en.wikipedia.org/wiki/Fitts%27s_law
|
31
|
+
def fitts(distance, width)
|
32
|
+
a = 0
|
33
|
+
b = 2
|
34
|
+
id = Math.log2(distance / width + 1)
|
35
|
+
a + b * id
|
36
|
+
end
|
37
|
+
|
38
|
+
# returns 0 if the supplied value is less than 0; otherwise returns the given value
|
39
|
+
def identity_floor0(x)
|
40
|
+
x < 0 ? 0 : x
|
41
|
+
end
|
42
|
+
|
43
|
+
# coord is a pair: [x, y]
|
44
|
+
# returns [identity_floor0(x), identity_floor0(y)]
|
45
|
+
def coordinate_floor0(coord)
|
46
|
+
coord.map {|val| identity_floor0(val) }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# vector math comes from https://github.com/Xetera/ghost-cursor/blob/master/src/math.ts
|
51
|
+
class Vector
|
52
|
+
include MathUtils
|
53
|
+
|
54
|
+
attr_accessor :x, :y
|
55
|
+
|
56
|
+
def initialize(x, y)
|
57
|
+
@x, @y = x.to_f, y.to_f
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_a
|
61
|
+
[x, y]
|
62
|
+
end
|
63
|
+
|
64
|
+
def +(other)
|
65
|
+
Vector.new(x + other.x, y + other.y)
|
66
|
+
end
|
67
|
+
|
68
|
+
def -(other)
|
69
|
+
Vector.new(x - other.x, y - other.y)
|
70
|
+
end
|
71
|
+
|
72
|
+
def *(multiplier)
|
73
|
+
Vector.new(x * multiplier, y * multiplier)
|
74
|
+
end
|
75
|
+
|
76
|
+
def /(divisor)
|
77
|
+
Vector.new(x / divisor, y / divisor)
|
78
|
+
end
|
79
|
+
|
80
|
+
def direction(other)
|
81
|
+
other - self
|
82
|
+
end
|
83
|
+
|
84
|
+
def perpendicular
|
85
|
+
Vector.new(y, -1 * x)
|
86
|
+
end
|
87
|
+
|
88
|
+
def magnitude
|
89
|
+
Math.sqrt(x ** 2 + y ** 2)
|
90
|
+
end
|
91
|
+
|
92
|
+
def unit
|
93
|
+
self / magnitude
|
94
|
+
end
|
95
|
+
|
96
|
+
def set_magnitude(magnitude)
|
97
|
+
unit * magnitude
|
98
|
+
end
|
99
|
+
|
100
|
+
# returns a randomly chosen vector that points to some point on the vector from self to other.
|
101
|
+
def random_vector_on_line(other)
|
102
|
+
vec = direction(other)
|
103
|
+
multiplier = rand
|
104
|
+
self + (vec * multiplier)
|
105
|
+
end
|
106
|
+
|
107
|
+
def random_normal_line(other, magnitude)
|
108
|
+
rand_mid = random_vector_on_line(other)
|
109
|
+
normal_vector = direction(rand_mid).perpendicular.set_magnitude(magnitude)
|
110
|
+
[rand_mid, normal_vector]
|
111
|
+
end
|
112
|
+
|
113
|
+
def generate_bezier_anchors(other, spread)
|
114
|
+
side = rand.round == 1 ? 1 : -1
|
115
|
+
calc = ->() do
|
116
|
+
rand_mid, normal_vector = self.random_normal_line(other, spread)
|
117
|
+
choice = normal_vector * side
|
118
|
+
rand_mid.random_vector_on_line(rand_mid + choice)
|
119
|
+
end
|
120
|
+
[calc.(), calc.()].sort {|a, b| a.x - b.x }
|
121
|
+
end
|
122
|
+
|
123
|
+
def overshoot(radius)
|
124
|
+
a = rand * 2 * Math::PI
|
125
|
+
rad = radius * Math.sqrt(rand)
|
126
|
+
vector = Vector.new(rad * Math.cos(a), rad * Math.sin(a))
|
127
|
+
self + vector
|
128
|
+
end
|
129
|
+
|
130
|
+
def bezier_curve(finish)
|
131
|
+
min = 10
|
132
|
+
max = 90
|
133
|
+
|
134
|
+
start = self
|
135
|
+
deviation = start.direction(finish).magnitude * 0.2
|
136
|
+
spread = confine(deviation, min, max)
|
137
|
+
anchors = start.generate_bezier_anchors(finish, spread)
|
138
|
+
|
139
|
+
control_point_vectors = [start] + anchors + [finish]
|
140
|
+
control_points = control_point_vectors.map(&:to_a)
|
141
|
+
BezierCurve.new(*control_points)
|
142
|
+
end
|
143
|
+
|
144
|
+
def bezier_curve_through_points(finish, intermediate_points = [])
|
145
|
+
min = 10
|
146
|
+
max = 90
|
147
|
+
|
148
|
+
start = self
|
149
|
+
deviation = start.direction(finish).magnitude * 0.2
|
150
|
+
spread = confine(deviation, min, max)
|
151
|
+
anchors = start.generate_bezier_anchors(finish, spread)
|
152
|
+
|
153
|
+
control_point_vectors = [start] + anchors + [finish]
|
154
|
+
control_points = control_point_vectors.map(&:to_a)
|
155
|
+
BezierCurve.new(*control_points)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
class BrowserMouseMover
|
160
|
+
include MathUtils
|
161
|
+
|
162
|
+
# browser must have the following methods:
|
163
|
+
# #move(x, y) - moves the mouse cursor to the given (x, y) coordinates
|
164
|
+
# #
|
165
|
+
attr_accessor :browser
|
166
|
+
attr_reader :current_coords
|
167
|
+
|
168
|
+
def initialize(browser, initial_x = 0, initial_y = 0)
|
169
|
+
@browser = browser
|
170
|
+
set_coords(initial_x, initial_y)
|
171
|
+
end
|
172
|
+
|
173
|
+
def set_coords(x, y)
|
174
|
+
browser.move(x, y)
|
175
|
+
@current_coords = [x, y]
|
176
|
+
end
|
177
|
+
|
178
|
+
def move(x2, y2, overshoot = true)
|
179
|
+
x1, y1 = *current_coords
|
180
|
+
if overshoot
|
181
|
+
start = Vector.new(x1, y1)
|
182
|
+
finish = Vector.new(x2, y2)
|
183
|
+
overshoot_radius = [start.direction(finish).magnitude * 0.33, 100].min.to_i
|
184
|
+
overshoot_x, overshoot_y = *finish.overshoot(overshoot_radius).to_a
|
185
|
+
move_over_path(x1, y1, overshoot_x, overshoot_y)
|
186
|
+
move_over_path(overshoot_x, overshoot_y, x2, y2)
|
187
|
+
else
|
188
|
+
move_over_path(x1, y1, x2, y2)
|
189
|
+
end
|
190
|
+
|
191
|
+
# move_over_path(x1, y1, x2, y2, overshoot)
|
192
|
+
end
|
193
|
+
|
194
|
+
def move_over_path(x1, y1, x2, y2, overshoot = false)
|
195
|
+
# if overshoot
|
196
|
+
# start = Vector.new(x1, y1)
|
197
|
+
# finish = Vector.new(x2, y2)
|
198
|
+
# overshoot_radius = [start.direction(finish).magnitude * 0.20, 100].min.to_i
|
199
|
+
# overshoot_x, overshoot_y = *finish.overshoot(overshoot_radius).to_a
|
200
|
+
# move_over_path(x1, y1, overshoot_x, overshoot_y)
|
201
|
+
# move_over_path(overshoot_x, overshoot_y, x2, y2)
|
202
|
+
# else
|
203
|
+
path(x1, y1, x2, y2).each do |coords|
|
204
|
+
x, y = *coords
|
205
|
+
set_coords(x, y)
|
206
|
+
end
|
207
|
+
# end
|
208
|
+
end
|
209
|
+
|
210
|
+
# reimplements https://github.com/Xetera/ghost-cursor/blob/master/src/spoof.ts#L92
|
211
|
+
def path(x1, y1, x2, y2, fitts = true)
|
212
|
+
start = Vector.new(x1, y1)
|
213
|
+
finish = Vector.new(x2, y2)
|
214
|
+
|
215
|
+
if fitts
|
216
|
+
default_width = 100
|
217
|
+
min_steps = 25
|
218
|
+
width = default_width
|
219
|
+
curve = start.bezier_curve(finish)
|
220
|
+
# length = curve.length() * 0.8
|
221
|
+
length = start.direction(finish).magnitude * 0.8
|
222
|
+
base_time = rand * min_steps
|
223
|
+
steps = ((Math.log2(fitts(length, width) + 1) + base_time) * 3).ceil
|
224
|
+
coordinate_points = curve.points(count: steps)
|
225
|
+
coordinate_points.map {|coord| coordinate_floor0(coord) }
|
226
|
+
else
|
227
|
+
curve = start.bezier_curve(finish)
|
228
|
+
curve.points
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
require "fileutils"
|
2
|
+
require "mini_magick"
|
3
|
+
|
4
|
+
require_relative "ferrum_ext"
|
5
|
+
|
6
|
+
module AutoBrowse
|
7
|
+
|
8
|
+
module Pagelike
|
9
|
+
def page
|
10
|
+
browser.driver
|
11
|
+
end
|
12
|
+
|
13
|
+
def scroll_to(*args, **options)
|
14
|
+
page.scroll_to(*args, **options)
|
15
|
+
end
|
16
|
+
|
17
|
+
def mouse
|
18
|
+
browser.mouse
|
19
|
+
end
|
20
|
+
|
21
|
+
def move(x, y)
|
22
|
+
browser.move(x, y)
|
23
|
+
end
|
24
|
+
|
25
|
+
def goto(product_url)
|
26
|
+
page.visit(product_url)
|
27
|
+
end
|
28
|
+
|
29
|
+
# returns [top, left, bottom, right] of element
|
30
|
+
def bounding_box(element)
|
31
|
+
top = element.evaluate_script("this.getBoundingClientRect().top;").to_i
|
32
|
+
left = element.evaluate_script("this.getBoundingClientRect().left;").to_i
|
33
|
+
bottom = element.evaluate_script("this.getBoundingClientRect().bottom;").to_i
|
34
|
+
right = element.evaluate_script("this.getBoundingClientRect().right;").to_i
|
35
|
+
[top, left, bottom, right]
|
36
|
+
end
|
37
|
+
|
38
|
+
# returns [x, y] of element
|
39
|
+
def coords(element)
|
40
|
+
top, left, bottom, right = *bounding_box(element)
|
41
|
+
[left, top]
|
42
|
+
end
|
43
|
+
|
44
|
+
# returns [width, height] of element
|
45
|
+
def dimensions(element)
|
46
|
+
top, left, bottom, right = *bounding_box(element)
|
47
|
+
height = bottom - top
|
48
|
+
width = right - left
|
49
|
+
[width, height]
|
50
|
+
end
|
51
|
+
|
52
|
+
# returns [x, y, width, height] of element
|
53
|
+
def coords_dimensions(element)
|
54
|
+
top, left, bottom, right = *bounding_box(element)
|
55
|
+
height = bottom - top
|
56
|
+
width = right - left
|
57
|
+
[left, top, width, height]
|
58
|
+
end
|
59
|
+
|
60
|
+
# this method will only work if the browser window has focus; otherwise, this method will raise a timeout exception
|
61
|
+
def viewport_screenshot(path)
|
62
|
+
page.save_screenshot(path, full: false) # this delegates to https://github.com/rubycdp/ferrum/blob/3a2dc276ba312831487b05cb6e176cae5a7375a4/lib/ferrum/page/screenshot.rb#L31
|
63
|
+
end
|
64
|
+
|
65
|
+
# this method will only work if the browser window has focus; otherwise, this method will raise a timeout exception
|
66
|
+
def full_page_screenshot(path)
|
67
|
+
page.save_screenshot(path, full: true) # this delegates to https://github.com/rubycdp/ferrum/blob/3a2dc276ba312831487b05cb6e176cae5a7375a4/lib/ferrum/page/screenshot.rb#L31
|
68
|
+
end
|
69
|
+
|
70
|
+
# containing_frame_offset_coords is an array of the form: [ [outer_frame.x, outer_frame.y], [inner_frame.x, inner.frame.y] ]
|
71
|
+
# that lists the [top, left] cords of all (if any) frames that contain the given element
|
72
|
+
def element_screenshot(element, output_file_path, containing_frame_offset_coords = [])
|
73
|
+
raise "A temporary screenshot must be taken before trying to capture an element screenshot." unless @last_screenshot
|
74
|
+
|
75
|
+
x, y, width, height = *coords_dimensions(element)
|
76
|
+
|
77
|
+
frame_offset_x, frame_offset_y = *containing_frame_offset_coords.reduce([0, 0]) {|memo, coord_pair| [ memo[0] + coord_pair[0], memo[1] + coord_pair[1] ] }
|
78
|
+
page_x = frame_offset_x + x
|
79
|
+
page_y = frame_offset_y + y
|
80
|
+
|
81
|
+
# this imagemagick command performs the crop on the full screenshot
|
82
|
+
# convert images/captcha/20210128_161014/tiles.png -crop 95x95+123+154 +repage images/captcha/20210128_161014/tile0.png
|
83
|
+
image = MiniMagick::Image.open(@last_screenshot_path)
|
84
|
+
image.combine_options do |cmd|
|
85
|
+
cmd.crop "#{width}x#{height}+#{page_x}+#{page_y}"
|
86
|
+
cmd.repage.+
|
87
|
+
end
|
88
|
+
|
89
|
+
FileUtils.mkdir_p(File.dirname(output_file_path))
|
90
|
+
image.write(output_file_path)
|
91
|
+
end
|
92
|
+
|
93
|
+
# example:
|
94
|
+
# with_temp_screenshot do |tmp_path|
|
95
|
+
# puts "temp screenshot is at #{tmp_path}"
|
96
|
+
# end
|
97
|
+
def with_temp_screenshot(temp_screenshot_path = "./tmp_screenshot.png", &blk)
|
98
|
+
temp_screenshot(temp_screenshot_path)
|
99
|
+
blk.call(@last_screenshot_path)
|
100
|
+
delete_temp_screenshot
|
101
|
+
end
|
102
|
+
|
103
|
+
def temp_screenshot(path = "./tmp_screenshot.png")
|
104
|
+
delete_temp_screenshot if @last_screenshot
|
105
|
+
|
106
|
+
@last_screenshot_path = path
|
107
|
+
@last_screenshot = viewport_screenshot(@last_screenshot_path)
|
108
|
+
end
|
109
|
+
|
110
|
+
def delete_temp_screenshot
|
111
|
+
if @last_screenshot && @last_screenshot_path && File.exist?(@last_screenshot_path)
|
112
|
+
File.delete(@last_screenshot_path)
|
113
|
+
|
114
|
+
@last_screenshot_path = nil
|
115
|
+
@last_screenshot = nil
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
class Page
|
122
|
+
include Pagelike
|
123
|
+
|
124
|
+
attr_accessor :browser
|
125
|
+
|
126
|
+
def initialize(browser)
|
127
|
+
@browser = browser
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
data/lib/auto_browse.rb
ADDED
data/sig/auto_browse.rbs
ADDED
metadata
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: auto_browse
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Ellis
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-04-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: capybara
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: cuprite
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ferrum
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bezier_curve
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: victor
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: mini_magick
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: auto_browse is a browser driver.
|
98
|
+
email:
|
99
|
+
- david@conquerthelawn.com
|
100
|
+
executables: []
|
101
|
+
extensions: []
|
102
|
+
extra_rdoc_files: []
|
103
|
+
files:
|
104
|
+
- ".rspec"
|
105
|
+
- Gemfile
|
106
|
+
- Gemfile.lock
|
107
|
+
- LICENSE.txt
|
108
|
+
- README.md
|
109
|
+
- Rakefile
|
110
|
+
- bin/console
|
111
|
+
- bin/setup
|
112
|
+
- lib/auto_browse.rb
|
113
|
+
- lib/auto_browse/browser.rb
|
114
|
+
- lib/auto_browse/capybara_ext.rb
|
115
|
+
- lib/auto_browse/ferrum_ext.rb
|
116
|
+
- lib/auto_browse/mouse.rb
|
117
|
+
- lib/auto_browse/page.rb
|
118
|
+
- lib/auto_browse/version.rb
|
119
|
+
- sig/auto_browse.rbs
|
120
|
+
homepage: https://github.com/davidkellis/auto_browse
|
121
|
+
licenses:
|
122
|
+
- MIT
|
123
|
+
metadata:
|
124
|
+
homepage_uri: https://github.com/davidkellis/auto_browse
|
125
|
+
source_code_uri: https://github.com/davidkellis/auto_browse
|
126
|
+
changelog_uri: https://github.com/davidkellis/auto_browse
|
127
|
+
post_install_message:
|
128
|
+
rdoc_options: []
|
129
|
+
require_paths:
|
130
|
+
- lib
|
131
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - ">="
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: 2.6.0
|
136
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
137
|
+
requirements:
|
138
|
+
- - ">="
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: '0'
|
141
|
+
requirements: []
|
142
|
+
rubygems_version: 3.2.33
|
143
|
+
signing_key:
|
144
|
+
specification_version: 4
|
145
|
+
summary: auto_browse is a browser driver.
|
146
|
+
test_files: []
|