wayback_machine_downloader_straw 2.3.8 → 2.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: df42d96c68c19fd39b6da3c9e9d51934197484ccb1ceb7a9387116622b0214a7
4
- data.tar.gz: d6f04e3dc44c9f216b9d3dc631275fac5e48447ebd963a33818e82baf1ff79b3
3
+ metadata.gz: ef661bf573b09f79453cf6343d737c24715f343b6593cf313f2502ecd9a650cb
4
+ data.tar.gz: b80be4aaae7ab4ff695af6cc85273ac437fab1e6a68d3d8bdad67a9661be17e4
5
5
  SHA512:
6
- metadata.gz: b9654877bb591082e1ef1c5dfdacff0bf887ed68f8ae1b2d995a99b87232523aa3350aede2d8cbb4045dbb15b380a1e93451004a45f881ad323615c0f66632c5
7
- data.tar.gz: eb8753d3ceb689e9b8c3f3dbaeeac7c9dd818497f916882d5d3271f1901c099f8b7103e7b49bcef51d71aab86b2607174ac2eece768a092242b0d5e0dcec9b28
6
+ metadata.gz: 3dfb6477b142eebb45741e1b5a4552dd33feac34baa1eae5453baaa08a9a5be242ba46d4f1162e2dd2b68e8903e6de8402d6b6fa86128f312defac74f2e8da29
7
+ data.tar.gz: 39758aef4bda77babb81d479ef9f266e3fa328af163c7c3c053290796fda95ccb8ec8d3725a9dae5164b79debc6530919cd79df3f7421842f951b0ee6ef79e60
@@ -7,7 +7,8 @@ module ArchiveAPI
7
7
  # Automatically append /* if the URL doesn't contain a path after the domain
8
8
  # This is a workaround for an issue with the API and *some* domains.
9
9
  # See https://github.com/StrawberryMaster/wayback-machine-downloader/issues/6
10
- if url && !url.match(/^https?:\/\/.*\//i)
10
+ # But don't do this when exact_url flag is set
11
+ if url && !url.match(/^https?:\/\/.*\//i) && !@exact_url
11
12
  url = "#{url}/*"
12
13
  end
13
14
 
@@ -113,7 +113,7 @@ class WaybackMachineDownloader
113
113
 
114
114
  include ArchiveAPI
115
115
 
116
- VERSION = "2.3.8"
116
+ VERSION = "2.3.10"
117
117
  DEFAULT_TIMEOUT = 30
118
118
  MAX_RETRIES = 3
119
119
  RETRY_DELAY = 2
@@ -131,7 +131,11 @@ class WaybackMachineDownloader
131
131
  validate_params(params)
132
132
  @base_url = params[:base_url]
133
133
  @exact_url = params[:exact_url]
134
- @directory = params[:directory]
134
+ if params[:directory]
135
+ @directory = File.expand_path(params[:directory])
136
+ else
137
+ @directory = nil
138
+ end
135
139
  @all_timestamps = params[:all_timestamps]
136
140
  @from_timestamp = params[:from_timestamp].to_i
137
141
  @to_timestamp = params[:to_timestamp].to_i
@@ -165,13 +169,11 @@ class WaybackMachineDownloader
165
169
 
166
170
  def backup_path
167
171
  if @directory
168
- if @directory[-1] == '/'
169
- @directory
170
- else
171
- @directory + '/'
172
- end
172
+ # because @directory is already an absolute path, we just ensure it exists
173
+ @directory
173
174
  else
174
- 'websites/' + backup_name + '/'
175
+ # ensure the default path is absolute and normalized
176
+ File.expand_path(File.join('websites', backup_name))
175
177
  end
176
178
  end
177
179
 
@@ -382,7 +384,7 @@ class WaybackMachineDownloader
382
384
  end
383
385
  else
384
386
  file_list_curated = get_file_list_curated
385
- file_list_curated = file_list_curated.sort_by { |k,v| v[:timestamp] }.reverse
387
+ file_list_curated = file_list_curated.sort_by { |_,v| v[:timestamp].to_s }.reverse
386
388
  file_list_curated.map do |file_remote_info|
387
389
  file_remote_info[1][:file_id] = file_remote_info[0]
388
390
  file_remote_info[1]
@@ -638,21 +640,35 @@ class WaybackMachineDownloader
638
640
  file_url = file_remote_info[:file_url].encode(current_encoding)
639
641
  file_id = file_remote_info[:file_id]
640
642
  file_timestamp = file_remote_info[:timestamp]
641
- file_path_elements = file_id.split('/')
643
+
644
+ # sanitize file_id to ensure it is a valid path component
645
+ raw_path_elements = file_id.split('/')
646
+
647
+ sanitized_path_elements = raw_path_elements.map do |element|
648
+ if Gem.win_platform?
649
+ # for Windows, we need to sanitize path components to avoid invalid characters
650
+ # this prevents issues with file names that contain characters not allowed in
651
+ # Windows file systems. See # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions
652
+ element.gsub(/[:\*?"<>\|\&\=\/\\]/) { |match| '%' + match.ord.to_s(16).upcase }
653
+ else
654
+ element
655
+ end
656
+ end
657
+
658
+ current_backup_path = backup_path
642
659
 
643
660
  if file_id == ""
644
- dir_path = backup_path
645
- file_path = backup_path + 'index.html'
646
- elsif file_url[-1] == '/' or not file_path_elements[-1].include? '.'
647
- dir_path = backup_path + file_path_elements[0..-1].join('/')
648
- file_path = backup_path + file_path_elements[0..-1].join('/') + '/index.html'
661
+ dir_path = current_backup_path
662
+ file_path = File.join(dir_path, 'index.html')
663
+ elsif file_url[-1] == '/' || (sanitized_path_elements.last && !sanitized_path_elements.last.include?('.'))
664
+ # if file_id is a directory, we treat it as such
665
+ dir_path = File.join(current_backup_path, *sanitized_path_elements)
666
+ file_path = File.join(dir_path, 'index.html')
649
667
  else
650
- dir_path = backup_path + file_path_elements[0..-2].join('/')
651
- file_path = backup_path + file_path_elements[0..-1].join('/')
652
- end
653
- if Gem.win_platform?
654
- dir_path = dir_path.gsub(/[:*?&=<>\\|]/) {|s| '%' + s.ord.to_s(16) }
655
- file_path = file_path.gsub(/[:*?&=<>\\|]/) {|s| '%' + s.ord.to_s(16) }
668
+ # if file_id is a file, we treat it as such
669
+ filename = sanitized_path_elements.pop
670
+ dir_path = File.join(current_backup_path, *sanitized_path_elements)
671
+ file_path = File.join(dir_path, filename)
656
672
  end
657
673
 
658
674
  # check existence *before* download attempt
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wayback_machine_downloader_straw
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.8
4
+ version: 2.3.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - strawberrymaster
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-06-05 00:00:00.000000000 Z
10
+ date: 2025-06-27 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: concurrent-ruby