RubyGems - wayback_machine_downloader_straw - Versions diffs - 2.3.8 → 2.3.10 - Mend

wayback_machine_downloader_straw 2.3.8 → 2.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml +4 -4
data/lib/wayback_machine_downloader/archive_api.rb +2 -1
data/lib/wayback_machine_downloader.rb +37 -21
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: df42d96c68c19fd39b6da3c9e9d51934197484ccb1ceb7a9387116622b0214a7
-  data.tar.gz: d6f04e3dc44c9f216b9d3dc631275fac5e48447ebd963a33818e82baf1ff79b3
+  metadata.gz: ef661bf573b09f79453cf6343d737c24715f343b6593cf313f2502ecd9a650cb
+  data.tar.gz: b80be4aaae7ab4ff695af6cc85273ac437fab1e6a68d3d8bdad67a9661be17e4
 SHA512:
-  metadata.gz: b9654877bb591082e1ef1c5dfdacff0bf887ed68f8ae1b2d995a99b87232523aa3350aede2d8cbb4045dbb15b380a1e93451004a45f881ad323615c0f66632c5
-  data.tar.gz: eb8753d3ceb689e9b8c3f3dbaeeac7c9dd818497f916882d5d3271f1901c099f8b7103e7b49bcef51d71aab86b2607174ac2eece768a092242b0d5e0dcec9b28
+  metadata.gz: 3dfb6477b142eebb45741e1b5a4552dd33feac34baa1eae5453baaa08a9a5be242ba46d4f1162e2dd2b68e8903e6de8402d6b6fa86128f312defac74f2e8da29
+  data.tar.gz: 39758aef4bda77babb81d479ef9f266e3fa328af163c7c3c053290796fda95ccb8ec8d3725a9dae5164b79debc6530919cd79df3f7421842f951b0ee6ef79e60

data/lib/wayback_machine_downloader/archive_api.rb CHANGED Viewed

@@ -7,7 +7,8 @@ module ArchiveAPI
     # Automatically append /* if the URL doesn't contain a path after the domain
     # This is a workaround for an issue with the API and *some* domains.
     # See https://github.com/StrawberryMaster/wayback-machine-downloader/issues/6
-    if url && !url.match(/^https?:\/\/.*\//i)
+    # But don't do this when exact_url flag is set
+    if url && !url.match(/^https?:\/\/.*\//i) && !@exact_url
       url = "#{url}/*"
     end

data/lib/wayback_machine_downloader.rb CHANGED Viewed

@@ -113,7 +113,7 @@ class WaybackMachineDownloader
   include ArchiveAPI
-  VERSION = "2.3.8"
+  VERSION = "2.3.10"
   DEFAULT_TIMEOUT = 30
   MAX_RETRIES = 3
   RETRY_DELAY = 2
@@ -131,7 +131,11 @@ class WaybackMachineDownloader
     validate_params(params)
     @base_url = params[:base_url]
     @exact_url = params[:exact_url]
-    @directory = params[:directory]
+    if params[:directory]
+      @directory = File.expand_path(params[:directory])
+    else
+      @directory = nil
+    end
     @all_timestamps = params[:all_timestamps]
     @from_timestamp = params[:from_timestamp].to_i
     @to_timestamp = params[:to_timestamp].to_i
@@ -165,13 +169,11 @@ class WaybackMachineDownloader
   def backup_path
     if @directory
-      if @directory[-1] == '/'
-        @directory
-      else
-        @directory + '/'
-      end
+      # because @directory is already an absolute path, we just ensure it exists
+      @directory
     else
-      'websites/' + backup_name + '/'
+      # ensure the default path is absolute and normalized
+      File.expand_path(File.join('websites', backup_name))
     end
   end
@@ -382,7 +384,7 @@ class WaybackMachineDownloader
       end
     else
       file_list_curated = get_file_list_curated
-      file_list_curated = file_list_curated.sort_by { |k,v| v[:timestamp] }.reverse
+      file_list_curated = file_list_curated.sort_by { |_,v| v[:timestamp].to_s }.reverse
       file_list_curated.map do |file_remote_info|
         file_remote_info[1][:file_id] = file_remote_info[0]
         file_remote_info[1]
@@ -638,21 +640,35 @@ class WaybackMachineDownloader
     file_url = file_remote_info[:file_url].encode(current_encoding)
     file_id = file_remote_info[:file_id]
     file_timestamp = file_remote_info[:timestamp]
-    file_path_elements = file_id.split('/')
+    # sanitize file_id to ensure it is a valid path component
+    raw_path_elements = file_id.split('/')
+    sanitized_path_elements = raw_path_elements.map do |element|
+      if Gem.win_platform?
+        # for Windows, we need to sanitize path components to avoid invalid characters
+        # this prevents issues with file names that contain characters not allowed in
+        # Windows file systems. See # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions
+        element.gsub(/[:\*?"<>\|\&\=\/\\]/) { |match| '%' + match.ord.to_s(16).upcase }
+      else
+        element
+      end
+    end
+    current_backup_path = backup_path
     if file_id == ""
-      dir_path = backup_path
-      file_path = backup_path + 'index.html'
-    elsif file_url[-1] == '/' or not file_path_elements[-1].include? '.'
-      dir_path = backup_path + file_path_elements[0..-1].join('/')
-      file_path = backup_path + file_path_elements[0..-1].join('/') + '/index.html'
+      dir_path = current_backup_path
+      file_path = File.join(dir_path, 'index.html')
+    elsif file_url[-1] == '/' || (sanitized_path_elements.last && !sanitized_path_elements.last.include?('.'))
+      # if file_id is a directory, we treat it as such
+      dir_path = File.join(current_backup_path, *sanitized_path_elements)
+      file_path = File.join(dir_path, 'index.html')
     else
-      dir_path = backup_path + file_path_elements[0..-2].join('/')
-      file_path = backup_path + file_path_elements[0..-1].join('/')
-    end
-    if Gem.win_platform?
-      dir_path = dir_path.gsub(/[:*?&=<>\\|]/) {|s| '%' + s.ord.to_s(16) }
-      file_path = file_path.gsub(/[:*?&=<>\\|]/) {|s| '%' + s.ord.to_s(16) }
+      # if file_id is a file, we treat it as such
+      filename = sanitized_path_elements.pop
+      dir_path = File.join(current_backup_path, *sanitized_path_elements)
+      file_path = File.join(dir_path, filename)
     end
     # check existence *before* download attempt

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: wayback_machine_downloader_straw
 version: !ruby/object:Gem::Version
-  version: 2.3.8
+  version: 2.3.10
 platform: ruby
 authors:
 - strawberrymaster
 bindir: bin
 cert_chain: []
-date: 2025-06-05 00:00:00.000000000 Z
+date: 2025-06-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: concurrent-ruby