web_loader 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/exe/wl +16 -53
- data/lib/web_loader/downloader.rb +66 -0
- data/lib/web_loader/version.rb +1 -1
- data/lib/web_loader.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 803b7f81240d732a305705cc0fa46610781688ce19203f0b7d8710822b088558
|
|
4
|
+
data.tar.gz: ec8ed1467ab473fa5fbae5a370304e3bb2281e525ecfc4f54c752828f49c1d00
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 51c54de50e0faa9a4886cc3c84efbf88887003c12e8cafcc9cfa1ea9c5ae5beeb795a2795d08d48031dd373f6cb56de1e28116c7d0450a18b288c04e81e54e0f
|
|
7
|
+
data.tar.gz: 62cd1ecd932a88fb51f919a5565833e5d14bbef94414acdcc55c90ab075fb387d0d98050753d05438bcaf3d8a371b731a2b146ac57cb98d4420a2b9ff97ffccc
|
data/Gemfile.lock
CHANGED
data/exe/wl
CHANGED
|
@@ -4,58 +4,21 @@ require "web_loader"
|
|
|
4
4
|
require 'web_loader/drivers/selenium_driver'
|
|
5
5
|
require 'optparse'
|
|
6
6
|
|
|
7
|
-
class Wl
|
|
8
|
-
def self.run(argv)
|
|
9
|
-
STDOUT.sync = true
|
|
10
|
-
opts = {}
|
|
11
|
-
opt = OptionParser.new(argv)
|
|
12
|
-
opt.banner = "Usage: #{opt.program_name} [-h|--help] [Options] <URL> "
|
|
13
|
-
opt.version = WebLoader::VERSION
|
|
14
|
-
opt.separator('')
|
|
15
|
-
opt.separator("Options:")
|
|
16
|
-
opt.on_head('-h', '--help', 'Show this message') do |v|
|
|
17
|
-
puts opt.help
|
|
18
|
-
exit
|
|
19
|
-
end
|
|
20
|
-
opt.on('-v', '--verbose', 'Verbose message') {|v| opts[:v] = v}
|
|
21
|
-
drivers = ['pureruby', 'selenium']
|
|
22
|
-
opt.on('-d DRIVER', '--driver=DRIVER', drivers, drivers.join("|") + "(default pureruby)") {|v| opts[:d] = v }
|
|
23
|
-
opt.on("--disable-cache", "Disable cache") {|v| opts[:disable_cache] = v }
|
|
24
|
-
opt.parse!(argv)
|
|
25
|
-
if argv.empty?
|
|
26
|
-
puts "Error: URL is required."
|
|
27
|
-
puts opt.help
|
|
28
|
-
exit
|
|
29
|
-
end
|
|
30
|
-
command = Wl.new(opts)
|
|
31
|
-
url = argv[0]
|
|
32
|
-
command.execute(url)
|
|
33
|
-
end
|
|
34
7
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
end
|
|
8
|
+
result = WebLoader::Downloader.run(ARGV)
|
|
9
|
+
puts result
|
|
38
10
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
driver = WebLoader::Drivers::SeleniumDriver.new
|
|
54
|
-
else
|
|
55
|
-
driver = WebLoader::Drivers::HttpDriver.new
|
|
56
|
-
end
|
|
57
|
-
driver
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
Wl.run(ARGV)
|
|
11
|
+
# custom downloader example(selenium with custom wait proc)
|
|
12
|
+
# wl --driver=selenium https://www.example.com
|
|
13
|
+
#
|
|
14
|
+
# class MyDownloader < WebLoader::Downloader
|
|
15
|
+
# def create_wait_proc
|
|
16
|
+
# # puts "Using custom wait proc..."
|
|
17
|
+
# proc do |driver|
|
|
18
|
+
# # Example wait condition: wait until the document is fully loaded
|
|
19
|
+
# driver.execute_script("return document.readyState") == "complete"
|
|
20
|
+
# end
|
|
21
|
+
# end
|
|
22
|
+
# end
|
|
23
|
+
# result = MyDownloader.run(ARGV)
|
|
24
|
+
# puts result
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
|
|
2
|
+
module WebLoader
|
|
3
|
+
class Downloader
|
|
4
|
+
|
|
5
|
+
def self.run(argv)
|
|
6
|
+
STDOUT.sync = true
|
|
7
|
+
opts = {}
|
|
8
|
+
opt = OptionParser.new(argv)
|
|
9
|
+
opt.banner = "Usage: #{opt.program_name} [-h|--help] [Options] <URL> "
|
|
10
|
+
opt.version = WebLoader::VERSION
|
|
11
|
+
opt.separator('')
|
|
12
|
+
opt.separator("Options:")
|
|
13
|
+
opt.on_head('-h', '--help', 'Show this message') do |v|
|
|
14
|
+
puts opt.help
|
|
15
|
+
exit
|
|
16
|
+
end
|
|
17
|
+
opt.on('-v', '--verbose', 'Verbose message') {|v| opts[:v] = v}
|
|
18
|
+
drivers = ['pureruby', 'selenium']
|
|
19
|
+
opt.on('-d DRIVER', '--driver=DRIVER', drivers, drivers.join("|") + "(default pureruby)") {|v| opts[:d] = v }
|
|
20
|
+
opt.on("--disable-cache", "Disable cache") {|v| opts[:disable_cache] = v }
|
|
21
|
+
opt.parse!(argv)
|
|
22
|
+
if argv.empty?
|
|
23
|
+
puts "Error: URL is required."
|
|
24
|
+
puts opt.help
|
|
25
|
+
exit
|
|
26
|
+
end
|
|
27
|
+
command = self.new(opts)
|
|
28
|
+
url = argv[0]
|
|
29
|
+
command.execute(url)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def initialize(opts)
|
|
33
|
+
@opts = opts
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def execute(url)
|
|
37
|
+
driver = create_driver
|
|
38
|
+
loader = WebLoader::Command.new(driver)
|
|
39
|
+
if @opts[:disable_cache]
|
|
40
|
+
loader.use_cache = false
|
|
41
|
+
end
|
|
42
|
+
loader.load(url)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
def create_driver
|
|
47
|
+
case @opts[:d]
|
|
48
|
+
when 'selenium'
|
|
49
|
+
driver = WebLoader::Drivers::SeleniumDriver.new
|
|
50
|
+
driver.wait_proc = create_wait_proc
|
|
51
|
+
else
|
|
52
|
+
driver = WebLoader::Drivers::HttpDriver.new
|
|
53
|
+
end
|
|
54
|
+
driver
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def create_wait_proc
|
|
58
|
+
# proc do |driver|
|
|
59
|
+
# # Example wait condition: wait until the document is fully loaded
|
|
60
|
+
# ready_state = driver.execute_script('return document.readyState')
|
|
61
|
+
# ready_state == 'complete'
|
|
62
|
+
# end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
end
|
data/lib/web_loader/version.rb
CHANGED
data/lib/web_loader.rb
CHANGED
|
@@ -4,6 +4,7 @@ require_relative "web_loader/version"
|
|
|
4
4
|
require_relative "web_loader/utils"
|
|
5
5
|
require_relative "web_loader/cache"
|
|
6
6
|
require_relative "web_loader/command"
|
|
7
|
+
require_relative "web_loader/downloader"
|
|
7
8
|
require_relative "web_loader/response"
|
|
8
9
|
require_relative "web_loader/drivers/base_driver"
|
|
9
10
|
require_relative "web_loader/drivers/http_driver"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: web_loader
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- src
|
|
@@ -39,6 +39,7 @@ files:
|
|
|
39
39
|
- lib/web_loader.rb
|
|
40
40
|
- lib/web_loader/cache.rb
|
|
41
41
|
- lib/web_loader/command.rb
|
|
42
|
+
- lib/web_loader/downloader.rb
|
|
42
43
|
- lib/web_loader/drivers/base_driver.rb
|
|
43
44
|
- lib/web_loader/drivers/http_driver.rb
|
|
44
45
|
- lib/web_loader/drivers/selenium_driver.rb
|