dh_easy-core 0.3.2 → 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f2ac7aed05a99e4908c4d69e5d7f523b371fa5bedafa28eb3ea366e79dccbb1
|
4
|
+
data.tar.gz: 0156a723554b7983b390cf06c204c306ac0d7e682c78a5064629983d1c5b47b5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e66d2bb8d41916a6c54029050b9665c0d0be2f00b3821dd2d183604f120f33735e685580d689560419a6836d28cbbd3b15b2e3a81ca7c8ca62c76b1b2a0bc5a4
|
7
|
+
data.tar.gz: 13ab9cd4131501c917c4e2bd666dd4ff3f10ad5326dc5fc47b866160ecf0c949c52a5c3d3d0120b5aa01c424f1644177c89e91110ac89fabd4c4adfb534df779
|
@@ -102,7 +102,8 @@ module DhEasy
|
|
102
102
|
# @return [URI::HTTPS]
|
103
103
|
def self.clean_uri_obj raw_url
|
104
104
|
url = URI.parse(raw_url)
|
105
|
-
url
|
105
|
+
return url if raw_url =~ /^\s*about:blank\s*$/i
|
106
|
+
url.hostname = url.hostname.downcase unless url.hostname.nil?
|
106
107
|
url.fragment = nil
|
107
108
|
|
108
109
|
# Sort query string keys
|
@@ -175,6 +176,7 @@ module DhEasy
|
|
175
176
|
return false if driver['pre_code'].to_s.strip != ''
|
176
177
|
return false if !driver['stealth'].nil? && !!driver['stealth']
|
177
178
|
return false if !driver['enable_images'].nil? && !!driver['enable_images']
|
179
|
+
return false if !driver['disable_adblocker'].nil? && !!driver['disable_adblocker']
|
178
180
|
return false if !driver['goto_options'].nil? && driver['goto_options'].is_a?(Hash) && driver['goto_options'].keys.length > 0
|
179
181
|
true
|
180
182
|
end
|
@@ -499,33 +501,61 @@ module DhEasy
|
|
499
501
|
@page_defaults ||= {
|
500
502
|
'job_id' => lambda{|page| job_id},
|
501
503
|
'url' => nil,
|
504
|
+
'effective_url' => nil,
|
502
505
|
'status' => 'to_fetch',
|
506
|
+
'hostname' => '',
|
503
507
|
'page_type' => 'default',
|
504
508
|
'method' => 'GET',
|
505
509
|
'headers' => {},
|
506
510
|
'fetch_type' => DEFAULT_FETCH_TYPE,
|
507
511
|
'cookie' => nil,
|
512
|
+
'no_default_headers' => false,
|
508
513
|
'no_redirect' => false,
|
509
514
|
'body' => nil,
|
510
515
|
'ua_type' => 'desktop',
|
516
|
+
'freshness' => nil,
|
517
|
+
'fresh' => nil,
|
518
|
+
'proxy_type' => '',
|
511
519
|
'no_url_encode' => false,
|
512
520
|
'http2' => false,
|
513
521
|
'priority' => 0,
|
514
522
|
'parsing_try_count' => 0,
|
515
523
|
'parsing_fail_count' => 0,
|
524
|
+
'parsing_at' => nil,
|
525
|
+
'parsing_failed_at' => nil,
|
526
|
+
'parsed_at' => nil,
|
516
527
|
'fetching_at' => '0001-01-01T00:00:00Z',
|
517
528
|
'fetching_try_count' => 0,
|
518
529
|
'refetch_count' => 0,
|
530
|
+
'to_fetch' => nil,
|
531
|
+
'fetched_at' => nil,
|
519
532
|
'fetched_from' => '',
|
520
533
|
'content_size' => 0,
|
534
|
+
'content_type' => nil,
|
521
535
|
'force_fetch' => false,
|
536
|
+
'response_checksum' => nil,
|
537
|
+
'response_cookie' => nil,
|
538
|
+
'response_headers' => nil,
|
539
|
+
'response_proto' => nil,
|
540
|
+
'response_status' => nil,
|
541
|
+
'response_status_code' => nil,
|
542
|
+
'failed_at' => nil,
|
543
|
+
'failed_content_type' => nil,
|
544
|
+
'failed_effective_url' => nil,
|
545
|
+
'failed_response_checksum' => nil,
|
546
|
+
'failed_response_cookie' => nil,
|
547
|
+
'failed_response_headers' => nil,
|
548
|
+
'failed_response_proto' => nil,
|
549
|
+
'failed_response_status' => nil,
|
550
|
+
'failed_response_status_code' => nil,
|
522
551
|
'driver' => {
|
523
552
|
'name' => '',
|
524
553
|
'pre_code' => '',
|
525
554
|
'code' => '',
|
526
555
|
'goto_options' => nil,
|
527
556
|
'stealth' => false,
|
528
|
-
'enable_images' => false
|
557
|
+
'enable_images' => false,
|
558
|
+
'disable_adblocker' => false
|
529
559
|
},
|
530
560
|
'display' => {
|
531
561
|
'width' => 0,
|
@@ -536,7 +566,11 @@ module DhEasy
|
|
536
566
|
'options' => nil
|
537
567
|
},
|
538
568
|
'driver_log' => nil,
|
539
|
-
'
|
569
|
+
'max_size' => 0,
|
570
|
+
'enable_global_cache' => nil,
|
571
|
+
'retry_interval' => nil,
|
572
|
+
'vars' => {},
|
573
|
+
'created_at' => nil,
|
540
574
|
}
|
541
575
|
end
|
542
576
|
|
@@ -574,7 +608,7 @@ module DhEasy
|
|
574
608
|
item['headers'] = nil if self.class.is_hash_empty? item['headers']
|
575
609
|
item['vars'] = nil if self.class.is_hash_empty? item['vars']
|
576
610
|
uri = self.class.clean_uri_obj(item['url'])
|
577
|
-
item['hostname'] = uri.hostname
|
611
|
+
item['hostname'] = (item['url'] =~ /^about:blank$/i) ? '127.0.0.1' : uri.hostname
|
578
612
|
uri = nil
|
579
613
|
if item['gid'].nil? || !allow_page_gid_override?
|
580
614
|
item['gid'] = generate_page_gid item
|
@@ -15,7 +15,10 @@ module DhEasy
|
|
15
15
|
:outputs,
|
16
16
|
:save_outputs,
|
17
17
|
:find_output,
|
18
|
-
:find_outputs
|
18
|
+
:find_outputs,
|
19
|
+
:get_content,
|
20
|
+
:get_failed_content,
|
21
|
+
:finish
|
19
22
|
]
|
20
23
|
DhEasy::Core::Mock::FakeExecutor.check_compatibility real_methods, mock_methods
|
21
24
|
mock_methods << :job_id
|
@@ -22,7 +22,11 @@ module DhEasy
|
|
22
22
|
:find_output,
|
23
23
|
:find_outputs,
|
24
24
|
:refetch,
|
25
|
-
:reparse
|
25
|
+
:reparse,
|
26
|
+
:limbo,
|
27
|
+
:get_content,
|
28
|
+
:get_failed_content,
|
29
|
+
:finish
|
26
30
|
].freeze
|
27
31
|
DhEasy::Core::Mock::FakeExecutor.check_compatibility real_methods, mock_methods
|
28
32
|
mock_methods
|
data/lib/dh_easy/core/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dh_easy-core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo Rosales
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: datahen
|