pulse-downloader 0.1.31 → 0.1.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3036b6512e69bffe3a1c2b4dbfb156c4b932910dbf95ca4d61d7e89307a86de5
4
- data.tar.gz: 2aa3cd36ea992d59463d3127ec589eb848b1e59258e1714b9673d2dbd93c7e8d
3
+ metadata.gz: df940062ab2c0dcdc96b256b76e8198683776d7bdb834198c0f75da85d5e63fb
4
+ data.tar.gz: 158f5746ceb0d820934e126cae7ca53dfca0a841ae8a588b2b955c85de5abc7f
5
5
  SHA512:
6
- metadata.gz: 9df1e6f4b4136a6061e6222df80c2899b28b73f7b68648c7c2ecb7007eb8aa6c26c4a18fd6d65f9c11651504068ef053d9007614dd6fc09765ccdcae2b9946e9
7
- data.tar.gz: 5ac6bfb3d0bed3bddbefdd752418b6ae6f86c50aa76dfff4dbc1806fcfc0f2336215646af825c787d5cd3cd6bc08b1fd9ec0c5747f50cefa0ed5da449bf7f7e3
6
+ metadata.gz: 2494b72940a92e4b13fc44c8c9c81ff63e48a35940e9bcc6437235afdd4c1ca2fdc43098b71f976ce42d5fa65b53979aa3b055b511c91c1ca6b0ff64a0dd472c
7
+ data.tar.gz: 1c40920e57fdbb4184034cc91953f82c5babf1c09146027b9b887b1bac5d8b45f4e386b21133bd44625f8adb8843daac8b4652e35e94ebf25188738d02d8a36c
data/README.md CHANGED
@@ -29,6 +29,7 @@ client = Pulse::Downloader::Client.new(
29
29
  save_data: true,
30
30
  save_path: '',
31
31
  read_from_save_path: false,
32
+ traverse_folders: false,
32
33
  verify_ssl: true,
33
34
  drop_exitsing_files_in_path: false,
34
35
  save_and_dont_return: true,
@@ -38,6 +39,8 @@ client = Pulse::Downloader::Client.new(
38
39
  )
39
40
 
40
41
  client.call!
42
+
43
+ client.file_paths # Will give you the list of file paths
41
44
  ```
42
45
 
43
46
  ## Development
@@ -12,6 +12,7 @@ module Pulse
12
12
  :save_data,
13
13
  :save_path,
14
14
  :read_from_save_path,
15
+ :traverse_folders,
15
16
  :verify_ssl,
16
17
  :headers,
17
18
  :drop_exitsing_files_in_path,
@@ -20,7 +21,9 @@ module Pulse
20
21
  :start_time,
21
22
  :end_time,
22
23
  :progress_bar,
23
- :base_url
24
+ :base_url,
25
+ :file_paths,
26
+ :folder_urls
24
27
 
25
28
  # Does not continue downloads-
26
29
  # Will only save once the file has been downloaded in memory
@@ -28,7 +31,6 @@ module Pulse
28
31
  # TODO: Validation
29
32
  # TODO: Retry
30
33
  # TODO: DNS
31
- # TODO: Multiple filetypes
32
34
  # TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
33
35
  def initialize(url:,
34
36
  file_type:,
@@ -36,6 +38,7 @@ module Pulse
36
38
  save_data: false,
37
39
  save_path: '',
38
40
  read_from_save_path: false,
41
+ traverse_folders: false,
39
42
  verify_ssl: true,
40
43
  headers: nil,
41
44
  drop_exitsing_files_in_path: false,
@@ -49,6 +52,7 @@ module Pulse
49
52
  @save_data = save_data
50
53
  @save_path = save_path
51
54
  @read_from_save_path = read_from_save_path
55
+ @traverse_folders = traverse_folders
52
56
  @verify_ssl = verify_ssl
53
57
  @headers = headers
54
58
  @drop_exitsing_files_in_path = drop_exitsing_files_in_path
@@ -57,6 +61,7 @@ module Pulse
57
61
  @progress_bar = progress_bar
58
62
 
59
63
  @base_url = get_base_url
64
+ @folder_urls = []
60
65
  end
61
66
 
62
67
  def call!
@@ -66,12 +71,14 @@ module Pulse
66
71
  def call
67
72
  return false unless valid?
68
73
 
74
+ @file_paths = fetch_file_paths
75
+
69
76
  if @progress_bar
70
- @progress_bar = ::ProgressBar.new(fetch_file_paths.size)
77
+ @progress_bar = ::ProgressBar.new(file_paths.size)
71
78
  end
72
79
 
73
- fetch_file_paths.map do |file_path|
74
- download(file_path, @progress_bar)
80
+ file_paths.map do |file_path|
81
+ download(file_path, @progress_bar) if save_data
75
82
  @progress_bar.increment!
76
83
  end
77
84
  end
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.31"
3
+ VERSION = "0.1.35"
4
4
  end
5
5
  end
@@ -2,9 +2,38 @@ module Pulse
2
2
  module Downloader
3
3
  module WebPageParser
4
4
  def fetch_file_paths(custom_path_root=nil)
5
+ if traverse_folders
6
+ fetch_folders(url).each do |folder_url|
7
+ fetch_and_parse_response(folder_url, custom_path_root)
8
+ end
9
+ else
10
+ fetch_and_parse_response(url, custom_path_root)
11
+ end
12
+ end
13
+
14
+ private
15
+
16
+ def fetch_folders(base_url)
17
+ current_paths = extract_hrefs(get_response(folder_url), custom_path_root)
18
+ return unless current_paths.compact.size > 0
19
+
20
+ @folder_urls = folder_urls.union(current_paths).uniq.compact
21
+
22
+ current_paths.each do |path|
23
+ fetch_folders(path)
24
+ end
25
+
26
+ folder_urls
27
+ end
28
+
29
+ def fetch_and_parse_response(folder_url, custom_path_root)
30
+ parse_response(get_response(folder_url), custom_path_root, file_type)
31
+ end
32
+
33
+ def get_response(folder_url)
5
34
  @start_time = get_micro_second_time
6
35
 
7
- response = HTTParty.get(url, verify: verify_ssl, headers: headers)
36
+ response = HTTParty.get(folder_url, verify: verify_ssl, headers: headers)
8
37
 
9
38
  @end_time = get_micro_second_time
10
39
 
@@ -12,6 +41,10 @@ module Pulse
12
41
  print_time
13
42
  end
14
43
 
44
+ response
45
+ end
46
+
47
+ def parse_response(response, custom_path_root, file_type)
15
48
  if file_type.is_a?(Array)
16
49
  file_type.flat_map do |type|
17
50
  extract_file_urls(response, custom_path_root, type)
@@ -21,8 +54,6 @@ module Pulse
21
54
  end
22
55
  end
23
56
 
24
- private
25
-
26
57
  def extract_file_urls(response, custom_path_root, type)
27
58
  return [] if response.body.nil? || response.body.empty?
28
59
 
@@ -33,6 +64,14 @@ module Pulse
33
64
  ).uniq
34
65
  end
35
66
 
67
+ def extract_hrefs(response, custom_path_root)
68
+ parse_html(response.body)
69
+ .css('a')
70
+ .map { |link| link['href'] }
71
+ .reject { |link| link == "../" }
72
+ .map { |link| add_base_url(link, custom_path_root) }
73
+ end
74
+
36
75
  def extract_all_urls(response, custom_path_root, type)
37
76
  parse_html(response.body)
38
77
  .to_s
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.31
4
+ version: 0.1.35
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22