pulse-downloader 0.1.33 → 0.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea8f7f1cba1f575af4fb82c8d83d7108decbdc8eabf38be38693b2f94c5d4b95
4
- data.tar.gz: 69deb413bdce72404ca9a76e5d5d01014214559c71a1aad421c8b3e32c400f52
3
+ metadata.gz: b0db887eeb1a5bdd42489b608d9ed59322af9532aa904a5c8ad6d0181aab5277
4
+ data.tar.gz: 1e0fd11d6462bd9c0f2c731c3e6fc1028a78577a1874d8959b1fd3b8af21123a
5
5
  SHA512:
6
- metadata.gz: 1f1633832d05cf665d116cd9180c66b02d7c0a0c5dbf77db03d758fa9f0f051982761125cbdfc4914e3dd53b5ee6c91fc9b74fe16cd9c992098e7f72df78ac52
7
- data.tar.gz: 4f571acd0d6019506a9cb58b5e86bcb8c79cbc856406342a1325864ee6f771b7c3bdc09e65d8bba6ee79d1d77b59f4a37b5e771e6b8b51d959350d32142bf378
6
+ metadata.gz: 189b1b83effd9aa0a71c4cf41dfcdbdc97d25dee9f5ee14a0a9d7e2a8de4652147a7df31575dfea6379ace00e45d21ada15a891cf1277c178e47622e0d573bd6
7
+ data.tar.gz: 35193c286218971b45a8a75b716c5f503ac239f7f7efb5f7cca7593827585bdb2194eae6879590bda4bb971ac4080677db560296d62fdf3c405b2635c4470411
data/README.md CHANGED
@@ -29,6 +29,7 @@ client = Pulse::Downloader::Client.new(
29
29
  save_data: true,
30
30
  save_path: '',
31
31
  read_from_save_path: false,
32
+ traverse_folders: false,
32
33
  verify_ssl: true,
33
34
  drop_exitsing_files_in_path: false,
34
35
  save_and_dont_return: true,
@@ -38,6 +39,8 @@ client = Pulse::Downloader::Client.new(
38
39
  )
39
40
 
40
41
  client.call!
42
+
43
+ client.file_paths # Will give you the list of file paths
41
44
  ```
42
45
 
43
46
  ## Development
@@ -12,6 +12,7 @@ module Pulse
12
12
  :save_data,
13
13
  :save_path,
14
14
  :read_from_save_path,
15
+ :traverse_folders,
15
16
  :verify_ssl,
16
17
  :headers,
17
18
  :drop_exitsing_files_in_path,
@@ -21,7 +22,8 @@ module Pulse
21
22
  :end_time,
22
23
  :progress_bar,
23
24
  :base_url,
24
- :file_paths
25
+ :file_paths,
26
+ :folder_urls
25
27
 
26
28
  # Does not continue downloads-
27
29
  # Will only save once the file has been downloaded in memory
@@ -29,7 +31,6 @@ module Pulse
29
31
  # TODO: Validation
30
32
  # TODO: Retry
31
33
  # TODO: DNS
32
- # TODO: Multiple filetypes
33
34
  # TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
34
35
  def initialize(url:,
35
36
  file_type:,
@@ -37,6 +38,7 @@ module Pulse
37
38
  save_data: false,
38
39
  save_path: '',
39
40
  read_from_save_path: false,
41
+ traverse_folders: false,
40
42
  verify_ssl: true,
41
43
  headers: nil,
42
44
  drop_exitsing_files_in_path: false,
@@ -50,6 +52,7 @@ module Pulse
50
52
  @save_data = save_data
51
53
  @save_path = save_path
52
54
  @read_from_save_path = read_from_save_path
55
+ @traverse_folders = traverse_folders
53
56
  @verify_ssl = verify_ssl
54
57
  @headers = headers
55
58
  @drop_exitsing_files_in_path = drop_exitsing_files_in_path
@@ -58,6 +61,7 @@ module Pulse
58
61
  @progress_bar = progress_bar
59
62
 
60
63
  @base_url = get_base_url
64
+ @folder_urls = []
61
65
  end
62
66
 
63
67
  def call!
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.33"
3
+ VERSION = "0.1.37"
4
4
  end
5
5
  end
@@ -2,9 +2,38 @@ module Pulse
2
2
  module Downloader
3
3
  module WebPageParser
4
4
  def fetch_file_paths(custom_path_root=nil)
5
+ if traverse_folders
6
+ fetch_folders(url, custom_path_root).each do |folder_url|
7
+ fetch_and_parse_response(folder_url, custom_path_root)
8
+ end
9
+ else
10
+ fetch_and_parse_response(url, custom_path_root)
11
+ end
12
+ end
13
+
14
+ private
15
+
16
+ def fetch_folders(folder_url, custom_path_root)
17
+ current_paths = extract_hrefs(get_response(folder_url), custom_path_root)
18
+ return unless current_paths.compact.size > 0
19
+
20
+ @folder_urls = folder_urls.union(current_paths).uniq.compact
21
+
22
+ current_paths.each do |path|
23
+ fetch_folders(path, custom_path_root)
24
+ end
25
+
26
+ folder_urls
27
+ end
28
+
29
+ def fetch_and_parse_response(folder_url, custom_path_root)
30
+ parse_response(get_response(folder_url), custom_path_root, file_type)
31
+ end
32
+
33
+ def get_response(folder_url)
5
34
  @start_time = get_micro_second_time
6
35
 
7
- response = HTTParty.get(url, verify: verify_ssl, headers: headers)
36
+ response = HTTParty.get(folder_url, verify: verify_ssl, headers: headers)
8
37
 
9
38
  @end_time = get_micro_second_time
10
39
 
@@ -12,6 +41,10 @@ module Pulse
12
41
  print_time
13
42
  end
14
43
 
44
+ response
45
+ end
46
+
47
+ def parse_response(response, custom_path_root, file_type)
15
48
  if file_type.is_a?(Array)
16
49
  file_type.flat_map do |type|
17
50
  extract_file_urls(response, custom_path_root, type)
@@ -21,8 +54,6 @@ module Pulse
21
54
  end
22
55
  end
23
56
 
24
- private
25
-
26
57
  def extract_file_urls(response, custom_path_root, type)
27
58
  return [] if response.body.nil? || response.body.empty?
28
59
 
@@ -33,6 +64,14 @@ module Pulse
33
64
  ).uniq
34
65
  end
35
66
 
67
+ def extract_hrefs(response, custom_path_root)
68
+ parse_html(response.body)
69
+ .css('a')
70
+ .map { |link| link['href'] }
71
+ .reject { |link| link == "../" }
72
+ .map { |link| add_base_url(link, custom_path_root) }
73
+ end
74
+
36
75
  def extract_all_urls(response, custom_path_root, type)
37
76
  parse_html(response.body)
38
77
  .to_s
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.33
4
+ version: 0.1.37
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22