RubyGems - wayback_machine_downloader_hhr - Versions diffs - 2.3.2 - Mend

wayback_machine_downloader_hhr 2.3.2

Files changed (7) hide show

checksums.yaml +7 -0
data/bin/wayback_machine_downloader +79 -0
data/lib/wayback_machine_downloader/archive_api.rb +40 -0
data/lib/wayback_machine_downloader/tidy_bytes.rb +122 -0
data/lib/wayback_machine_downloader/to_regex.rb +81 -0
data/lib/wayback_machine_downloader.rb +323 -0
metadata +81 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 89aa753a924055b41a371b5c616158dc5b65dfa63b136fff078588d839949f64
+  data.tar.gz: be143940de3f24c545a8bf202b1fb28f601b124f69927213e834b49bada36cf3
+SHA512:
+  metadata.gz: adf23257485832a2e6c4ccc443cf43583e59851e39d2e474bbad097ff9332f71ec63e1e0ade769b72485ca6e38234fac5f18b62ba3ca9858cf6bd46ebc1a4835
+  data.tar.gz: 14779e8b3bc933186d33671047411a10bdb27f2b04e311013b9c630849b07393251ee6db6445419cea71ad8c012bd1fc81aa2b94f6ad51129fef9702e8d4fa42

data/bin/wayback_machine_downloader ADDED Viewed

@@ -0,0 +1,79 @@
+#!/usr/bin/env ruby
+require_relative '../lib/wayback_machine_downloader'
+require 'optparse'
+require 'pp'
+options = {}
+option_parser = OptionParser.new do |opts|
+  opts.banner = "Usage: wayback_machine_downloader http://example.com"
+  opts.separator ""
+  opts.separator "Download an entire website from the Wayback Machine."
+  opts.separator ""
+  opts.separator "Optional options:"
+  opts.on("-d", "--directory PATH", String, "Directory to save the downloaded files into", "Default is ./websites/ plus the domain name") do |t|
+    options[:directory] = t
+  end
+  opts.on("-s", "--all-timestamps", "Download all snapshots/timestamps for a given website") do |t|
+    options[:all_timestamps] = true
+  end
+  opts.on("-f", "--from TIMESTAMP", Integer, "Only files on or after timestamp supplied (ie. 20060716231334)") do |t|
+    options[:from_timestamp] = t
+  end
+  opts.on("-t", "--to TIMESTAMP", Integer, "Only files on or before timestamp supplied (ie. 20100916231334)") do |t|
+    options[:to_timestamp] = t
+  end
+  opts.on("-e", "--exact-url", "Download only the url provied and not the full site") do |t|
+    options[:exact_url] = t
+  end
+  opts.on("-o", "--only ONLY_FILTER", String, "Restrict downloading to urls that match this filter", "(use // notation for the filter to be treated as a regex)") do |t|
+    options[:only_filter] = t
+  end
+  opts.on("-x", "--exclude EXCLUDE_FILTER", String, "Skip downloading of urls that match this filter", "(use // notation for the filter to be treated as a regex)") do |t|
+    options[:exclude_filter] = t
+  end
+  opts.on("-a", "--all", "Expand downloading to error files (40x and 50x) and redirections (30x)") do |t|
+    options[:all] = true
+  end
+  opts.on("-c", "--concurrency NUMBER", Integer, "Number of multiple files to download at a time", "Default is one file at a time (ie. 20)") do |t|
+    options[:threads_count] = t
+  end
+  opts.on("-p", "--maximum-snapshot NUMBER", Integer, "Maximum snapshot pages to consider (Default is 100)", "Count an average of 150,000 snapshots per page") do |t|
+    options[:maximum_pages] = t
+  end
+  opts.on("-l", "--list", "Only list file urls in a JSON format with the archived timestamps, won't download anything") do |t|
+    options[:list] = true
+  end
+  opts.on("-v", "--version", "Display version") do |t|
+    options[:version] = t
+  end
+end.parse!
+if (base_url = ARGV[-1])
+  options[:base_url] = base_url
+  wayback_machine_downloader = WaybackMachineDownloader.new options
+  if options[:list]
+    wayback_machine_downloader.list_files
+  else
+    wayback_machine_downloader.download_files
+  end
+elsif options[:version]
+  puts WaybackMachineDownloader::VERSION
+else
+  puts "You need to specify a website to backup. (e.g., http://example.com)"
+  puts "Run `wayback_machine_downloader --help` for more help."
+end

data/lib/wayback_machine_downloader/archive_api.rb ADDED Viewed

@@ -0,0 +1,40 @@
+require 'json'
+require 'uri'
+module ArchiveAPI
+  def get_raw_list_from_api url, page_index, http
+    request_url = URI("https://web.archive.org/cdx/search/xd")
+    params = [["output", "json"], ["url", url]]
+    params += parameters_for_api page_index
+    request_url.query = URI.encode_www_form(params)
+    begin
+      json = JSON.parse(http.get(URI(request_url)).body)
+      if (json[0] <=> ["timestamp","original"]) == 0
+        json.shift
+      end
+      json
+    rescue JSON::ParserError
+      []
+    end
+  end
+  def parameters_for_api page_index
+    parameters = [["fl", "timestamp,original"], ["collapse", "digest"], ["gzip", "false"]]
+    if !@all
+      parameters.push(["filter", "statuscode:200"])
+    end
+    if @from_timestamp and @from_timestamp != 0
+      parameters.push(["from", @from_timestamp.to_s])
+    end
+    if @to_timestamp and @to_timestamp != 0
+      parameters.push(["to", @to_timestamp.to_s])
+    end
+    if page_index
+      parameters.push(["page", page_index])
+    end
+    parameters
+  end
+end

data/lib/wayback_machine_downloader/tidy_bytes.rb ADDED Viewed

@@ -0,0 +1,122 @@
+module TibyBytes
+  # CP-1252 decimal byte => UTF-8 approximation as an array of bytes
+  CP1252 = {
+    128 => [226, 130, 172],
+    129 => nil,
+    130 => [226, 128, 154],
+    131 => [198, 146],
+    132 => [226, 128, 158],
+    133 => [226, 128, 166],
+    134 => [226, 128, 160],
+    135 => [226, 128, 161],
+    136 => [203, 134],
+    137 => [226, 128, 176],
+    138 => [197, 160],
+    139 => [226, 128, 185],
+    140 => [197, 146],
+    141 => nil,
+    142 => [197, 189],
+    143 => nil,
+    144 => nil,
+    145 => [226, 128, 152],
+    146 => [226, 128, 153],
+    147 => [226, 128, 156],
+    148 => [226, 128, 157],
+    149 => [226, 128, 162],
+    150 => [226, 128, 147],
+    151 => [226, 128, 148],
+    152 => [203, 156],
+    153 => [226, 132, 162],
+    154 => [197, 161],
+    155 => [226, 128, 186],
+    156 => [197, 147],
+    157 => nil,
+    158 => [197, 190],
+    159 => [197, 184]
+  }
+  module StringMixin
+    # Attempt to replace invalid UTF-8 bytes with valid ones. This method
+    # naively assumes if you have invalid UTF8 bytes, they are either Windows
+    # CP-1252 or ISO8859-1. In practice this isn't a bad assumption, but may not
+    # always work.
+    #
+    # Passing +true+ will forcibly tidy all bytes, assuming that the string's
+    # encoding is CP-1252 or ISO-8859-1.
+    def tidy_bytes(force = false)
+      if force
+        return unpack("C*").map do |b|
+          tidy_byte(b)
+        end.flatten.compact.pack("C*").unpack("U*").pack("U*")
+      end
+      bytes = unpack("C*")
+      conts_expected = 0
+      last_lead = 0
+      bytes.each_index do |i|
+        byte          = bytes[i]
+        _is_ascii     = byte < 128
+        is_cont       = byte > 127 && byte < 192
+        is_lead       = byte > 191 && byte < 245
+        is_unused     = byte > 240
+        is_restricted = byte > 244
+        # Impossible or highly unlikely byte? Clean it.
+        if is_unused || is_restricted
+          bytes[i] = tidy_byte(byte)
+        elsif is_cont
+          # Not expecting continuation byte? Clean up. Otherwise, now expect one less.
+          conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
+        else
+          if conts_expected > 0
+            # Expected continuation, but got ASCII or leading? Clean backwards up to
+            # the leading byte.
+            begin
+              (1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
+            rescue NoMethodError
+              next
+            end
+            conts_expected = 0
+          end
+          if is_lead
+            # Final byte is leading? Clean it.
+            if i == bytes.length - 1
+              bytes[i] = tidy_byte(bytes.last)
+            else
+              # Valid leading byte? Expect continuations determined by position of
+              # first zero bit, with max of 3.
+              conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
+              last_lead = i
+            end
+          end
+        end
+      end
+      begin
+        bytes.empty? ? nil : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
+      rescue ArgumentError
+        nil
+      end
+    end
+    # Tidy bytes in-place.
+    def tidy_bytes!(force = false)
+      replace tidy_bytes(force)
+    end
+    private
+    def tidy_byte(byte)
+      byte < 160 ? TibyBytes::CP1252[byte] : byte < 192 ? [194, byte] : [195, byte - 64]
+    end
+  end
+end
+class String
+  include TibyBytes::StringMixin
+end

data/lib/wayback_machine_downloader/to_regex.rb ADDED Viewed

@@ -0,0 +1,81 @@
+module ToRegex
+  module StringMixin
+    class << self
+      def literal?(str)
+        REGEXP_DELIMITERS.none? { |s, e| str.start_with?(s) and str =~ /#{e}#{INLINE_OPTIONS}\z/ }
+      end
+    end
+    INLINE_OPTIONS = /[imxnesu]*/
+    REGEXP_DELIMITERS = {
+      '%r{' => '}',
+      '/' => '/',
+    }
+    # Get a regex back
+    #
+    # Without :literal or :detect, `"foo".to_regex` will return nil.
+    #
+    # @param [optional, Hash] options
+    # @option options [true,false] :literal Treat meta characters and other regexp codes as just text; always return a regexp
+    # @option options [true,false] :detect If string starts and ends with valid regexp delimiters, treat it as a regexp; otherwise, interpret it literally
+    # @option options [true,false] :ignore_case /foo/i
+    # @option options [true,false] :multiline /foo/m
+    # @option options [true,false] :extended /foo/x
+    # @option options [true,false] :lang /foo/[nesu]
+    def to_regex(options = {})
+      if args = as_regexp(options)
+        ::Regexp.new(*args)
+      end
+    end
+    # Return arguments that can be passed to `Regexp.new`
+    # @see to_regexp
+    def as_regexp(options = {})
+      unless options.is_a?(::Hash)
+        raise ::ArgumentError, "[to_regexp] Options must be a Hash"
+      end
+      str = self
+      return if options[:detect] and str == ''
+      if options[:literal] or (options[:detect] and ToRegexp::String.literal?(str))
+        content = ::Regexp.escape str
+      elsif delim_set = REGEXP_DELIMITERS.detect { |k, _| str.start_with?(k) }
+        delim_start, delim_end = delim_set
+        /\A#{delim_start}(.*)#{delim_end}(#{INLINE_OPTIONS})\z/u =~ str
+        content = $1
+        inline_options = $2
+        return unless content.is_a?(::String)
+        content.gsub! '\\/', '/'
+        if inline_options
+          options[:ignore_case] = true if inline_options.include?('i')
+          options[:multiline] = true if inline_options.include?('m')
+          options[:extended] = true if inline_options.include?('x')
+          # 'n', 'N' = none, 'e', 'E' = EUC, 's', 'S' = SJIS, 'u', 'U' = UTF-8
+          options[:lang] = inline_options.scan(/[nesu]/i).join.downcase
+        end
+      else
+        return
+      end
+      ignore_case = options[:ignore_case] ? ::Regexp::IGNORECASE : 0
+      multiline = options[:multiline] ? ::Regexp::MULTILINE : 0
+      extended = options[:extended] ? ::Regexp::EXTENDED : 0
+      lang = options[:lang] || ''
+      if ::RUBY_VERSION > '1.9' and lang.include?('u')
+        lang = lang.delete 'u'
+      end
+      if lang.empty?
+        [ content, (ignore_case|multiline|extended) ]
+      else
+        [ content, (ignore_case|multiline|extended), lang ]
+      end
+    end
+  end
+end
+class String
+  include ToRegex::StringMixin
+end

data/lib/wayback_machine_downloader.rb ADDED Viewed

@@ -0,0 +1,323 @@
+# encoding: UTF-8
+require 'thread'
+require 'net/http'
+require 'open-uri'
+require 'fileutils'
+require 'cgi'
+require 'json'
+require_relative 'wayback_machine_downloader/tidy_bytes'
+require_relative 'wayback_machine_downloader/to_regex'
+require_relative 'wayback_machine_downloader/archive_api'
+class WaybackMachineDownloader
+  include ArchiveAPI
+  VERSION = "2.3.2"
+  attr_accessor :base_url, :exact_url, :directory, :all_timestamps,
+    :from_timestamp, :to_timestamp, :only_filter, :exclude_filter,
+    :all, :maximum_pages, :threads_count
+  def initialize params
+    @base_url = params[:base_url]
+    @exact_url = params[:exact_url]
+    @directory = params[:directory]
+    @all_timestamps = params[:all_timestamps]
+    @from_timestamp = params[:from_timestamp].to_i
+    @to_timestamp = params[:to_timestamp].to_i
+    @only_filter = params[:only_filter]
+    @exclude_filter = params[:exclude_filter]
+    @all = params[:all]
+    @maximum_pages = params[:maximum_pages] ? params[:maximum_pages].to_i : 100
+    @threads_count = params[:threads_count].to_i
+  end
+  def backup_name
+    if @base_url.include? '//'
+      @base_url.split('/')[2]
+    else
+      @base_url
+    end
+  end
+  def backup_path
+    if @directory
+      if @directory[-1] == '/'
+        @directory
+      else
+        @directory + '/'
+      end
+    else
+      'websites/' + backup_name + '/'
+    end
+  end
+  def match_only_filter file_url
+    if @only_filter
+      only_filter_regex = @only_filter.to_regex
+      if only_filter_regex
+        only_filter_regex =~ file_url
+      else
+        file_url.downcase.include? @only_filter.downcase
+      end
+    else
+      true
+    end
+  end
+  def match_exclude_filter file_url
+    if @exclude_filter
+      exclude_filter_regex = @exclude_filter.to_regex
+      if exclude_filter_regex
+        exclude_filter_regex =~ file_url
+      else
+        file_url.downcase.include? @exclude_filter.downcase
+      end
+    else
+      false
+    end
+  end
+  def get_all_snapshots_to_consider
+    # Note: Passing a page index parameter allow us to get more snapshots,
+    # but from a less fresh index
+    http = Net::HTTP.new("web.archive.org", 443)
+    http.use_ssl = true
+    http.start()
+    print "Getting snapshot pages"
+    snapshot_list_to_consider = []
+    snapshot_list_to_consider += get_raw_list_from_api(@base_url, nil, http)
+    print "."
+    unless @exact_url
+      @maximum_pages.times do |page_index|
+        snapshot_list = get_raw_list_from_api(@base_url + '/*', page_index, http)
+        break if snapshot_list.empty?
+        snapshot_list_to_consider += snapshot_list
+        print "."
+      end
+    end
+    http.finish()
+    puts " found #{snapshot_list_to_consider.length} snaphots to consider."
+    puts
+    snapshot_list_to_consider
+  end
+  def get_file_list_curated
+    file_list_curated = Hash.new
+    get_all_snapshots_to_consider.each do |file_timestamp, file_url|
+      next unless file_url.include?('/')
+      file_id = file_url.split('/')[3..-1].join('/')
+      file_id = CGI::unescape file_id
+      file_id = file_id.tidy_bytes unless file_id == ""
+      if file_id.nil?
+        puts "Malformed file url, ignoring: #{file_url}"
+      else
+        if match_exclude_filter(file_url)
+          puts "File url matches exclude filter, ignoring: #{file_url}"
+        elsif not match_only_filter(file_url)
+          puts "File url doesn't match only filter, ignoring: #{file_url}"
+        elsif file_list_curated[file_id]
+          unless file_list_curated[file_id][:timestamp] > file_timestamp
+            file_list_curated[file_id] = {file_url: file_url, timestamp: file_timestamp}
+          end
+        else
+          file_list_curated[file_id] = {file_url: file_url, timestamp: file_timestamp}
+        end
+      end
+    end
+    file_list_curated
+  end
+  def get_file_list_all_timestamps
+    file_list_curated = Hash.new
+    get_all_snapshots_to_consider.each do |file_timestamp, file_url|
+      next unless file_url.include?('/')
+      file_id = file_url.split('/')[3..-1].join('/')
+      file_id_and_timestamp = [file_timestamp, file_id].join('/')
+      file_id_and_timestamp = CGI::unescape file_id_and_timestamp
+      file_id_and_timestamp = file_id_and_timestamp.tidy_bytes unless file_id_and_timestamp == ""
+      if file_id.nil?
+        puts "Malformed file url, ignoring: #{file_url}"
+      else
+        if match_exclude_filter(file_url)
+          puts "File url matches exclude filter, ignoring: #{file_url}"
+        elsif not match_only_filter(file_url)
+          puts "File url doesn't match only filter, ignoring: #{file_url}"
+        elsif file_list_curated[file_id_and_timestamp]
+          puts "Duplicate file and timestamp combo, ignoring: #{file_id}" if @verbose
+        else
+          file_list_curated[file_id_and_timestamp] = {file_url: file_url, timestamp: file_timestamp}
+        end
+      end
+    end
+    puts "file_list_curated: " + file_list_curated.count.to_s
+    file_list_curated
+  end
+  def get_file_list_by_timestamp
+    if @all_timestamps
+      file_list_curated = get_file_list_all_timestamps
+      file_list_curated.map do |file_remote_info|
+        file_remote_info[1][:file_id] = file_remote_info[0]
+        file_remote_info[1]
+      end
+    else
+      file_list_curated = get_file_list_curated
+      file_list_curated = file_list_curated.sort_by { |k,v| v[:timestamp] }.reverse
+      file_list_curated.map do |file_remote_info|
+        file_remote_info[1][:file_id] = file_remote_info[0]
+        file_remote_info[1]
+      end
+    end
+  end
+  def list_files
+    # retrieval produces its own output
+    @orig_stdout = $stdout
+    $stdout = $stderr
+    files = get_file_list_by_timestamp
+    $stdout = @orig_stdout
+    puts "["
+    files[0...-1].each do |file|
+      puts file.to_json + ","
+    end
+    puts files[-1].to_json
+    puts "]"
+  end
+  def download_files
+    start_time = Time.now
+    puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine archives."
+    puts
+    if file_list_by_timestamp.count == 0
+      puts "No files to download."
+      puts "Possible reasons:"
+      puts "\t* Site is not in Wayback Machine Archive."
+      puts "\t* From timestamp too much in the future." if @from_timestamp and @from_timestamp != 0
+      puts "\t* To timestamp too much in the past." if @to_timestamp and @to_timestamp != 0
+      puts "\t* Only filter too restrictive (#{only_filter.to_s})" if @only_filter
+      puts "\t* Exclude filter too wide (#{exclude_filter.to_s})" if @exclude_filter
+      return
+    end
+    puts "#{file_list_by_timestamp.count} files to download:"
+    threads = []
+    @processed_file_count = 0
+    @threads_count = 1 unless @threads_count != 0
+    @threads_count.times do
+      http = Net::HTTP.new("web.archive.org", 443)
+      http.use_ssl = true
+      http.start()
+      threads << Thread.new do
+        until file_queue.empty?
+          file_remote_info = file_queue.pop(true) rescue nil
+          download_file(file_remote_info, http) if file_remote_info
+        end
+        http.finish()
+      end
+    end
+    threads.each(&:join)
+    end_time = Time.now
+    puts
+    puts "Download completed in #{(end_time - start_time).round(2)}s, saved in #{backup_path} (#{file_list_by_timestamp.size} files)"
+  end
+  def structure_dir_path dir_path
+    begin
+      FileUtils::mkdir_p dir_path unless File.exist? dir_path
+    rescue Errno::EEXIST => e
+      error_to_string = e.to_s
+      puts "# #{error_to_string}"
+      if error_to_string.include? "File exists @ dir_s_mkdir - "
+        file_already_existing = error_to_string.split("File exists @ dir_s_mkdir - ")[-1]
+      elsif error_to_string.include? "File exists - "
+        file_already_existing = error_to_string.split("File exists - ")[-1]
+      else
+        raise "Unhandled directory restructure error # #{error_to_string}"
+      end
+      file_already_existing_temporary = file_already_existing + '.temp'
+      file_already_existing_permanent = file_already_existing + '/index.html'
+      FileUtils::mv file_already_existing, file_already_existing_temporary
+      FileUtils::mkdir_p file_already_existing
+      FileUtils::mv file_already_existing_temporary, file_already_existing_permanent
+      puts "#{file_already_existing} -> #{file_already_existing_permanent}"
+      structure_dir_path dir_path
+    end
+  end
+  def download_file (file_remote_info, http)
+    current_encoding = "".encoding
+    file_url = file_remote_info[:file_url].encode(current_encoding)
+    file_id = file_remote_info[:file_id]
+    file_timestamp = file_remote_info[:timestamp]
+    file_path_elements = file_id.split('/')
+    if file_id == ""
+      dir_path = backup_path
+      file_path = backup_path + 'index.html'
+    elsif file_url[-1] == '/' or not file_path_elements[-1].include? '.'
+      dir_path = backup_path + file_path_elements[0..-1].join('/')
+      file_path = backup_path + file_path_elements[0..-1].join('/') + '/index.html'
+    else
+      dir_path = backup_path + file_path_elements[0..-2].join('/')
+      file_path = backup_path + file_path_elements[0..-1].join('/')
+    end
+    if Gem.win_platform?
+      dir_path = dir_path.gsub(/[:*?&=<>\\|]/) {|s| '%' + s.ord.to_s(16) }
+      file_path = file_path.gsub(/[:*?&=<>\\|]/) {|s| '%' + s.ord.to_s(16) }
+    end
+    unless File.exist? file_path
+      begin
+        structure_dir_path dir_path
+        open(file_path, "wb") do |file|
+          begin
+            http.get(URI("https://web.archive.org/web/#{file_timestamp}id_/#{file_url}")) do |body|
+              file.write(body)
+            end
+          rescue OpenURI::HTTPError => e
+            puts "#{file_url} # #{e}"
+            if @all
+              file.write(e.io.read)
+              puts "#{file_path} saved anyway."
+            end
+          rescue StandardError => e
+            puts "#{file_url} # #{e}"
+          end
+        end
+      rescue StandardError => e
+        puts "#{file_url} # #{e}"
+      ensure
+        if not @all and File.exist?(file_path) and File.size(file_path) == 0
+          File.delete(file_path)
+          puts "#{file_path} was empty and was removed."
+        end
+      end
+      semaphore.synchronize do
+        @processed_file_count += 1
+        puts "#{file_url} -> #{file_path} (#{@processed_file_count}/#{file_list_by_timestamp.size})"
+      end
+    else
+      semaphore.synchronize do
+        @processed_file_count += 1
+        puts "#{file_url} # #{file_path} already exists. (#{@processed_file_count}/#{file_list_by_timestamp.size})"
+      end
+    end
+  end
+  def file_queue
+    @file_queue ||= file_list_by_timestamp.each_with_object(Queue.new) { |file_info, q| q << file_info }
+  end
+  def file_list_by_timestamp
+    @file_list_by_timestamp ||= get_file_list_by_timestamp
+  end
+  def semaphore
+    @semaphore ||= Mutex.new
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,81 @@
+--- !ruby/object:Gem::Specification
+name: wayback_machine_downloader_hhr
+version: !ruby/object:Gem::Version
+  version: 2.3.2
+platform: ruby
+authors:
+- hehaorui
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2024-11-03 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.2'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.2'
+- !ruby/object:Gem::Dependency
+  name: minitest
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.2'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.2'
+description: Download an entire website from the Wayback Machine. Wayback Machine
+  by Internet Archive (archive.org) is an awesome tool to view any website at any
+  point of time but lacks an export feature. Wayback Machine Downloader brings exactly
+  this. This version bears minor fixes on original version. It is for hehaorui personal
+  use.
+email: mail@hehaorui.com
+executables:
+- wayback_machine_downloader
+extensions: []
+extra_rdoc_files: []
+files:
+- bin/wayback_machine_downloader
+- lib/wayback_machine_downloader.rb
+- lib/wayback_machine_downloader/archive_api.rb
+- lib/wayback_machine_downloader/tidy_bytes.rb
+- lib/wayback_machine_downloader/to_regex.rb
+homepage: https://github.com/hehaorui/wayback-machine-downloader
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: 1.9.2
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.5.22
+signing_key:
+specification_version: 4
+summary: Download an entire website from the Wayback Machine, with minor fixes. For
+  hehaorui personal use.
+test_files: []