pulse-downloader 0.1.31 → 0.1.35

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3036b6512e69bffe3a1c2b4dbfb156c4b932910dbf95ca4d61d7e89307a86de5
4
- data.tar.gz: 2aa3cd36ea992d59463d3127ec589eb848b1e59258e1714b9673d2dbd93c7e8d
3
+ metadata.gz: df940062ab2c0dcdc96b256b76e8198683776d7bdb834198c0f75da85d5e63fb
4
+ data.tar.gz: 158f5746ceb0d820934e126cae7ca53dfca0a841ae8a588b2b955c85de5abc7f
5
5
  SHA512:
6
- metadata.gz: 9df1e6f4b4136a6061e6222df80c2899b28b73f7b68648c7c2ecb7007eb8aa6c26c4a18fd6d65f9c11651504068ef053d9007614dd6fc09765ccdcae2b9946e9
7
- data.tar.gz: 5ac6bfb3d0bed3bddbefdd752418b6ae6f86c50aa76dfff4dbc1806fcfc0f2336215646af825c787d5cd3cd6bc08b1fd9ec0c5747f50cefa0ed5da449bf7f7e3
6
+ metadata.gz: 2494b72940a92e4b13fc44c8c9c81ff63e48a35940e9bcc6437235afdd4c1ca2fdc43098b71f976ce42d5fa65b53979aa3b055b511c91c1ca6b0ff64a0dd472c
7
+ data.tar.gz: 1c40920e57fdbb4184034cc91953f82c5babf1c09146027b9b887b1bac5d8b45f4e386b21133bd44625f8adb8843daac8b4652e35e94ebf25188738d02d8a36c
data/README.md CHANGED
@@ -29,6 +29,7 @@ client = Pulse::Downloader::Client.new(
29
29
  save_data: true,
30
30
  save_path: '',
31
31
  read_from_save_path: false,
32
+ traverse_folders: false,
32
33
  verify_ssl: true,
33
34
  drop_exitsing_files_in_path: false,
34
35
  save_and_dont_return: true,
@@ -38,6 +39,8 @@ client = Pulse::Downloader::Client.new(
38
39
  )
39
40
 
40
41
  client.call!
42
+
43
+ client.file_paths # Will give you the list of file paths
41
44
  ```
42
45
 
43
46
  ## Development
@@ -12,6 +12,7 @@ module Pulse
12
12
  :save_data,
13
13
  :save_path,
14
14
  :read_from_save_path,
15
+ :traverse_folders,
15
16
  :verify_ssl,
16
17
  :headers,
17
18
  :drop_exitsing_files_in_path,
@@ -20,7 +21,9 @@ module Pulse
20
21
  :start_time,
21
22
  :end_time,
22
23
  :progress_bar,
23
- :base_url
24
+ :base_url,
25
+ :file_paths,
26
+ :folder_urls
24
27
 
25
28
  # Does not continue downloads-
26
29
  # Will only save once the file has been downloaded in memory
@@ -28,7 +31,6 @@ module Pulse
28
31
  # TODO: Validation
29
32
  # TODO: Retry
30
33
  # TODO: DNS
31
- # TODO: Multiple filetypes
32
34
  # TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
33
35
  def initialize(url:,
34
36
  file_type:,
@@ -36,6 +38,7 @@ module Pulse
36
38
  save_data: false,
37
39
  save_path: '',
38
40
  read_from_save_path: false,
41
+ traverse_folders: false,
39
42
  verify_ssl: true,
40
43
  headers: nil,
41
44
  drop_exitsing_files_in_path: false,
@@ -49,6 +52,7 @@ module Pulse
49
52
  @save_data = save_data
50
53
  @save_path = save_path
51
54
  @read_from_save_path = read_from_save_path
55
+ @traverse_folders = traverse_folders
52
56
  @verify_ssl = verify_ssl
53
57
  @headers = headers
54
58
  @drop_exitsing_files_in_path = drop_exitsing_files_in_path
@@ -57,6 +61,7 @@ module Pulse
57
61
  @progress_bar = progress_bar
58
62
 
59
63
  @base_url = get_base_url
64
+ @folder_urls = []
60
65
  end
61
66
 
62
67
  def call!
@@ -66,12 +71,14 @@ module Pulse
66
71
  def call
67
72
  return false unless valid?
68
73
 
74
+ @file_paths = fetch_file_paths
75
+
69
76
  if @progress_bar
70
- @progress_bar = ::ProgressBar.new(fetch_file_paths.size)
77
+ @progress_bar = ::ProgressBar.new(file_paths.size)
71
78
  end
72
79
 
73
- fetch_file_paths.map do |file_path|
74
- download(file_path, @progress_bar)
80
+ file_paths.map do |file_path|
81
+ download(file_path, @progress_bar) if save_data
75
82
  @progress_bar.increment!
76
83
  end
77
84
  end
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.31"
3
+ VERSION = "0.1.35"
4
4
  end
5
5
  end
@@ -2,9 +2,38 @@ module Pulse
2
2
  module Downloader
3
3
  module WebPageParser
4
4
  def fetch_file_paths(custom_path_root=nil)
5
+ if traverse_folders
6
+ fetch_folders(url).each do |folder_url|
7
+ fetch_and_parse_response(folder_url, custom_path_root)
8
+ end
9
+ else
10
+ fetch_and_parse_response(url, custom_path_root)
11
+ end
12
+ end
13
+
14
+ private
15
+
16
+ def fetch_folders(base_url)
17
+ current_paths = extract_hrefs(get_response(folder_url), custom_path_root)
18
+ return unless current_paths.compact.size > 0
19
+
20
+ @folder_urls = folder_urls.union(current_paths).uniq.compact
21
+
22
+ current_paths.each do |path|
23
+ fetch_folders(path)
24
+ end
25
+
26
+ folder_urls
27
+ end
28
+
29
+ def fetch_and_parse_response(folder_url, custom_path_root)
30
+ parse_response(get_response(folder_url), custom_path_root, file_type)
31
+ end
32
+
33
+ def get_response(folder_url)
5
34
  @start_time = get_micro_second_time
6
35
 
7
- response = HTTParty.get(url, verify: verify_ssl, headers: headers)
36
+ response = HTTParty.get(folder_url, verify: verify_ssl, headers: headers)
8
37
 
9
38
  @end_time = get_micro_second_time
10
39
 
@@ -12,6 +41,10 @@ module Pulse
12
41
  print_time
13
42
  end
14
43
 
44
+ response
45
+ end
46
+
47
+ def parse_response(response, custom_path_root, file_type)
15
48
  if file_type.is_a?(Array)
16
49
  file_type.flat_map do |type|
17
50
  extract_file_urls(response, custom_path_root, type)
@@ -21,8 +54,6 @@ module Pulse
21
54
  end
22
55
  end
23
56
 
24
- private
25
-
26
57
  def extract_file_urls(response, custom_path_root, type)
27
58
  return [] if response.body.nil? || response.body.empty?
28
59
 
@@ -33,6 +64,14 @@ module Pulse
33
64
  ).uniq
34
65
  end
35
66
 
67
+ def extract_hrefs(response, custom_path_root)
68
+ parse_html(response.body)
69
+ .css('a')
70
+ .map { |link| link['href'] }
71
+ .reject { |link| link == "../" }
72
+ .map { |link| add_base_url(link, custom_path_root) }
73
+ end
74
+
36
75
  def extract_all_urls(response, custom_path_root, type)
37
76
  parse_html(response.body)
38
77
  .to_s
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.31
4
+ version: 0.1.35
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22