pulse-downloader 0.1.33 → 0.1.37

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea8f7f1cba1f575af4fb82c8d83d7108decbdc8eabf38be38693b2f94c5d4b95
4
- data.tar.gz: 69deb413bdce72404ca9a76e5d5d01014214559c71a1aad421c8b3e32c400f52
3
+ metadata.gz: b0db887eeb1a5bdd42489b608d9ed59322af9532aa904a5c8ad6d0181aab5277
4
+ data.tar.gz: 1e0fd11d6462bd9c0f2c731c3e6fc1028a78577a1874d8959b1fd3b8af21123a
5
5
  SHA512:
6
- metadata.gz: 1f1633832d05cf665d116cd9180c66b02d7c0a0c5dbf77db03d758fa9f0f051982761125cbdfc4914e3dd53b5ee6c91fc9b74fe16cd9c992098e7f72df78ac52
7
- data.tar.gz: 4f571acd0d6019506a9cb58b5e86bcb8c79cbc856406342a1325864ee6f771b7c3bdc09e65d8bba6ee79d1d77b59f4a37b5e771e6b8b51d959350d32142bf378
6
+ metadata.gz: 189b1b83effd9aa0a71c4cf41dfcdbdc97d25dee9f5ee14a0a9d7e2a8de4652147a7df31575dfea6379ace00e45d21ada15a891cf1277c178e47622e0d573bd6
7
+ data.tar.gz: 35193c286218971b45a8a75b716c5f503ac239f7f7efb5f7cca7593827585bdb2194eae6879590bda4bb971ac4080677db560296d62fdf3c405b2635c4470411
data/README.md CHANGED
@@ -29,6 +29,7 @@ client = Pulse::Downloader::Client.new(
29
29
  save_data: true,
30
30
  save_path: '',
31
31
  read_from_save_path: false,
32
+ traverse_folders: false,
32
33
  verify_ssl: true,
33
34
  drop_exitsing_files_in_path: false,
34
35
  save_and_dont_return: true,
@@ -38,6 +39,8 @@ client = Pulse::Downloader::Client.new(
38
39
  )
39
40
 
40
41
  client.call!
42
+
43
+ client.file_paths # Will give you the list of file paths
41
44
  ```
42
45
 
43
46
  ## Development
@@ -12,6 +12,7 @@ module Pulse
12
12
  :save_data,
13
13
  :save_path,
14
14
  :read_from_save_path,
15
+ :traverse_folders,
15
16
  :verify_ssl,
16
17
  :headers,
17
18
  :drop_exitsing_files_in_path,
@@ -21,7 +22,8 @@ module Pulse
21
22
  :end_time,
22
23
  :progress_bar,
23
24
  :base_url,
24
- :file_paths
25
+ :file_paths,
26
+ :folder_urls
25
27
 
26
28
  # Does not continue downloads-
27
29
  # Will only save once the file has been downloaded in memory
@@ -29,7 +31,6 @@ module Pulse
29
31
  # TODO: Validation
30
32
  # TODO: Retry
31
33
  # TODO: DNS
32
- # TODO: Multiple filetypes
33
34
  # TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
34
35
  def initialize(url:,
35
36
  file_type:,
@@ -37,6 +38,7 @@ module Pulse
37
38
  save_data: false,
38
39
  save_path: '',
39
40
  read_from_save_path: false,
41
+ traverse_folders: false,
40
42
  verify_ssl: true,
41
43
  headers: nil,
42
44
  drop_exitsing_files_in_path: false,
@@ -50,6 +52,7 @@ module Pulse
50
52
  @save_data = save_data
51
53
  @save_path = save_path
52
54
  @read_from_save_path = read_from_save_path
55
+ @traverse_folders = traverse_folders
53
56
  @verify_ssl = verify_ssl
54
57
  @headers = headers
55
58
  @drop_exitsing_files_in_path = drop_exitsing_files_in_path
@@ -58,6 +61,7 @@ module Pulse
58
61
  @progress_bar = progress_bar
59
62
 
60
63
  @base_url = get_base_url
64
+ @folder_urls = []
61
65
  end
62
66
 
63
67
  def call!
@@ -1,5 +1,5 @@
1
1
  module Pulse
2
2
  module Downloader
3
- VERSION = "0.1.33"
3
+ VERSION = "0.1.37"
4
4
  end
5
5
  end
@@ -2,9 +2,38 @@ module Pulse
2
2
  module Downloader
3
3
  module WebPageParser
4
4
  def fetch_file_paths(custom_path_root=nil)
5
+ if traverse_folders
6
+ fetch_folders(url, custom_path_root).each do |folder_url|
7
+ fetch_and_parse_response(folder_url, custom_path_root)
8
+ end
9
+ else
10
+ fetch_and_parse_response(url, custom_path_root)
11
+ end
12
+ end
13
+
14
+ private
15
+
16
+ def fetch_folders(folder_url, custom_path_root)
17
+ current_paths = extract_hrefs(get_response(folder_url), custom_path_root)
18
+ return unless current_paths.compact.size > 0
19
+
20
+ @folder_urls = folder_urls.union(current_paths).uniq.compact
21
+
22
+ current_paths.each do |path|
23
+ fetch_folders(path, custom_path_root)
24
+ end
25
+
26
+ folder_urls
27
+ end
28
+
29
+ def fetch_and_parse_response(folder_url, custom_path_root)
30
+ parse_response(get_response(folder_url), custom_path_root, file_type)
31
+ end
32
+
33
+ def get_response(folder_url)
5
34
  @start_time = get_micro_second_time
6
35
 
7
- response = HTTParty.get(url, verify: verify_ssl, headers: headers)
36
+ response = HTTParty.get(folder_url, verify: verify_ssl, headers: headers)
8
37
 
9
38
  @end_time = get_micro_second_time
10
39
 
@@ -12,6 +41,10 @@ module Pulse
12
41
  print_time
13
42
  end
14
43
 
44
+ response
45
+ end
46
+
47
+ def parse_response(response, custom_path_root, file_type)
15
48
  if file_type.is_a?(Array)
16
49
  file_type.flat_map do |type|
17
50
  extract_file_urls(response, custom_path_root, type)
@@ -21,8 +54,6 @@ module Pulse
21
54
  end
22
55
  end
23
56
 
24
- private
25
-
26
57
  def extract_file_urls(response, custom_path_root, type)
27
58
  return [] if response.body.nil? || response.body.empty?
28
59
 
@@ -33,6 +64,14 @@ module Pulse
33
64
  ).uniq
34
65
  end
35
66
 
67
+ def extract_hrefs(response, custom_path_root)
68
+ parse_html(response.body)
69
+ .css('a')
70
+ .map { |link| link['href'] }
71
+ .reject { |link| link == "../" }
72
+ .map { |link| add_base_url(link, custom_path_root) }
73
+ end
74
+
36
75
  def extract_all_urls(response, custom_path_root, type)
37
76
  parse_html(response.body)
38
77
  .to_s
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pulse-downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.33
4
+ version: 0.1.37
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22