pulse-downloader 0.1.33 → 0.1.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/pulse/downloader/client.rb +3 -1
- data/lib/pulse/downloader/version.rb +1 -1
- data/lib/pulse/downloader/web_page_parser.rb +21 -3
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee4cd8b3e10c09d8a52ddf222d3d423ffb188a0b7eb4993febbcc97b74644764
|
4
|
+
data.tar.gz: 2126a1a1cecc91c2b0aa275ca17322deac30779682acbd6c08d4dad9f48d412e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b38229caa3120433d2434acc5ca8122c86fef5957db323895ea781473ef1cf40a9ab202f581c48de5ab27f438bfd0d046248867e5ea924ff9d473828d84763f
|
7
|
+
data.tar.gz: fd5908c8b30f58f07fa6496db39b88ba24a9f1dde4a6d87df5e65ba133b00241e6e836aeb4d66badbe832b6c4eb8621f98c1cbca03d3e94cfc2c7cbc18bef2d6
|
data/README.md
CHANGED
@@ -29,6 +29,7 @@ client = Pulse::Downloader::Client.new(
|
|
29
29
|
save_data: true,
|
30
30
|
save_path: '',
|
31
31
|
read_from_save_path: false,
|
32
|
+
traverse_folders: false,
|
32
33
|
verify_ssl: true,
|
33
34
|
drop_exitsing_files_in_path: false,
|
34
35
|
save_and_dont_return: true,
|
@@ -38,6 +39,8 @@ client = Pulse::Downloader::Client.new(
|
|
38
39
|
)
|
39
40
|
|
40
41
|
client.call!
|
42
|
+
|
43
|
+
client.file_paths # Will give you the list of file paths
|
41
44
|
```
|
42
45
|
|
43
46
|
## Development
|
@@ -12,6 +12,7 @@ module Pulse
|
|
12
12
|
:save_data,
|
13
13
|
:save_path,
|
14
14
|
:read_from_save_path,
|
15
|
+
:traverse_folders,
|
15
16
|
:verify_ssl,
|
16
17
|
:headers,
|
17
18
|
:drop_exitsing_files_in_path,
|
@@ -29,7 +30,6 @@ module Pulse
|
|
29
30
|
# TODO: Validation
|
30
31
|
# TODO: Retry
|
31
32
|
# TODO: DNS
|
32
|
-
# TODO: Multiple filetypes
|
33
33
|
# TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
|
34
34
|
def initialize(url:,
|
35
35
|
file_type:,
|
@@ -37,6 +37,7 @@ module Pulse
|
|
37
37
|
save_data: false,
|
38
38
|
save_path: '',
|
39
39
|
read_from_save_path: false,
|
40
|
+
traverse_folders: false,
|
40
41
|
verify_ssl: true,
|
41
42
|
headers: nil,
|
42
43
|
drop_exitsing_files_in_path: false,
|
@@ -50,6 +51,7 @@ module Pulse
|
|
50
51
|
@save_data = save_data
|
51
52
|
@save_path = save_path
|
52
53
|
@read_from_save_path = read_from_save_path
|
54
|
+
@traverse_folders = traverse_folders
|
53
55
|
@verify_ssl = verify_ssl
|
54
56
|
@headers = headers
|
55
57
|
@drop_exitsing_files_in_path = drop_exitsing_files_in_path
|
@@ -2,9 +2,25 @@ module Pulse
|
|
2
2
|
module Downloader
|
3
3
|
module WebPageParser
|
4
4
|
def fetch_file_paths(custom_path_root=nil)
|
5
|
+
if traverse_folders
|
6
|
+
fetch_folders(url).each do |folder_url|
|
7
|
+
fetch_and_parse_response(folder_url, custom_path_root)
|
8
|
+
end
|
9
|
+
else
|
10
|
+
fetch_and_parse_response(url, custom_path_root)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def fetch_folders(base_url)
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
def fetch_and_parse_response(folder_url, custom_path_root)
|
5
21
|
@start_time = get_micro_second_time
|
6
22
|
|
7
|
-
response = HTTParty.get(
|
23
|
+
response = HTTParty.get(folder_url, verify: verify_ssl, headers: headers)
|
8
24
|
|
9
25
|
@end_time = get_micro_second_time
|
10
26
|
|
@@ -12,6 +28,10 @@ module Pulse
|
|
12
28
|
print_time
|
13
29
|
end
|
14
30
|
|
31
|
+
parse_response(response, custom_path_root, file_type)
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse_response(response, custom_path_root, file_type)
|
15
35
|
if file_type.is_a?(Array)
|
16
36
|
file_type.flat_map do |type|
|
17
37
|
extract_file_urls(response, custom_path_root, type)
|
@@ -21,8 +41,6 @@ module Pulse
|
|
21
41
|
end
|
22
42
|
end
|
23
43
|
|
24
|
-
private
|
25
|
-
|
26
44
|
def extract_file_urls(response, custom_path_root, type)
|
27
45
|
return [] if response.body.nil? || response.body.empty?
|
28
46
|
|