pulse-downloader 0.1.11 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 2b4edaf42a90782470ab4956cc6d9cf4214d6bb81c03b8e5b55193ac1867330e
         | 
| 4 | 
            +
              data.tar.gz: 81da683b9c0aa82f17197a280318b529ab4f6630dd20fa385925739f02165ca2
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: d8771a9ece20d74d44bb8e365d7f51be288ee8a9f5ff5dbec826013aec57998147d58d0e17ffd6ab5c7213f2dc441bfd4b471716f99e8df40e06cf2c7c00ddf0
         | 
| 7 | 
            +
              data.tar.gz: 053c925e99adeeb3cae3f277bb0e3ceda00d327ac57fdc76ce25e9a86220d66a291c0f388aa1aebe66a92e7b9b8971409c7b59dc4d188fa87ee4504914b52052
         | 
| @@ -8,6 +8,7 @@ module Pulse | |
| 8 8 |  | 
| 9 9 | 
             
                  attr_reader :url,
         | 
| 10 10 | 
             
                    :file_type,
         | 
| 11 | 
            +
                    :scrape_images,
         | 
| 11 12 | 
             
                    :save_data,
         | 
| 12 13 | 
             
                    :save_path,
         | 
| 13 14 | 
             
                    :read_from_save_path,
         | 
| @@ -29,6 +30,7 @@ module Pulse | |
| 29 30 | 
             
                  # TODO: lib/pulse/downloader/file_downloader.rb:13: warning: URI.escape is obsolete
         | 
| 30 31 | 
             
                  def initialize(url:,
         | 
| 31 32 | 
             
                    file_type:,
         | 
| 33 | 
            +
                    scrape_images: false,
         | 
| 32 34 | 
             
                    save_data: false,
         | 
| 33 35 | 
             
                    save_path: '',
         | 
| 34 36 | 
             
                    read_from_save_path: false,
         | 
| @@ -40,6 +42,7 @@ module Pulse | |
| 40 42 |  | 
| 41 43 | 
             
                    @url = url
         | 
| 42 44 | 
             
                    @file_type = file_type
         | 
| 45 | 
            +
                    @scrape_images = scrape_images
         | 
| 43 46 | 
             
                    @save_data = save_data
         | 
| 44 47 | 
             
                    @save_path = save_path
         | 
| 45 48 | 
             
                    @read_from_save_path = read_from_save_path
         | 
| @@ -1,8 +1,6 @@ | |
| 1 1 | 
             
            module Pulse
         | 
| 2 2 | 
             
              module Downloader
         | 
| 3 3 | 
             
                module FileDownloader
         | 
| 4 | 
            -
                  require 'uri'
         | 
| 5 | 
            -
             | 
| 6 4 | 
             
                  # save_path and verify_ssl are defined in client.rb
         | 
| 7 5 | 
             
                  def download(file_path, progress_bar=nil)
         | 
| 8 6 | 
             
                    raise "save_path is undefined" if save_data && save_path == ''
         | 
| @@ -10,7 +8,7 @@ module Pulse | |
| 10 8 |  | 
| 11 9 | 
             
                    @start_time = get_micro_second_time
         | 
| 12 10 |  | 
| 13 | 
            -
                    file_data = HTTParty.get( | 
| 11 | 
            +
                    file_data = HTTParty.get(escape(compute_file_link(file_path)), verify: verify_ssl)
         | 
| 14 12 |  | 
| 15 13 | 
             
                    @end_time = get_micro_second_time
         | 
| 16 14 |  | 
| @@ -68,6 +66,37 @@ module Pulse | |
| 68 66 | 
             
                  def section?(file_path)
         | 
| 69 67 | 
             
                    file_path[0] == '#'
         | 
| 70 68 | 
             
                  end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                  def escape(str)
         | 
| 71 | 
            +
                    str.gsub!(" ", "%20")
         | 
| 72 | 
            +
                    str.gsub!("$", "\%24")
         | 
| 73 | 
            +
                    str.gsub!("&", "\%26")
         | 
| 74 | 
            +
                    str.gsub!("`", "\%60")
         | 
| 75 | 
            +
                    # str.gsub!(":", "\%3A")
         | 
| 76 | 
            +
                    str.gsub!("<", "\%3C")
         | 
| 77 | 
            +
                    str.gsub!(">", "\%3E")
         | 
| 78 | 
            +
                    str.gsub!("[", "\%5B")
         | 
| 79 | 
            +
                    str.gsub!("]", "\%5D")
         | 
| 80 | 
            +
                    str.gsub!("{", "\%7B")
         | 
| 81 | 
            +
                    str.gsub!("}", "\%7D")
         | 
| 82 | 
            +
                    str.gsub!("“", "\%22")
         | 
| 83 | 
            +
                    str.gsub!('"', "\%22")
         | 
| 84 | 
            +
                    str.gsub!("+", "\%2B")
         | 
| 85 | 
            +
                    str.gsub!("#", "\%23")
         | 
| 86 | 
            +
                    str.gsub!("\%", "\%25")
         | 
| 87 | 
            +
                    str.gsub!("@", "\%40")
         | 
| 88 | 
            +
                    # str.gsub!("/", "\%2F")
         | 
| 89 | 
            +
                    str.gsub!(";", "\%3B")
         | 
| 90 | 
            +
                    str.gsub!("=", "\%3D")
         | 
| 91 | 
            +
                    str.gsub!("?", "\%3F")
         | 
| 92 | 
            +
                    str.gsub!("\\", "\%5C")
         | 
| 93 | 
            +
                    str.gsub!("^", "\%5E")
         | 
| 94 | 
            +
                    str.gsub!("|", "\%7C")
         | 
| 95 | 
            +
                    str.gsub!("~", "\%7E")
         | 
| 96 | 
            +
                    str.gsub!("‘", "\%27")
         | 
| 97 | 
            +
                    str.gsub!(",", "\%2C")
         | 
| 98 | 
            +
                    str
         | 
| 99 | 
            +
                  end
         | 
| 71 100 | 
             
                end
         | 
| 72 101 | 
             
              end
         | 
| 73 102 | 
             
            end
         | 
| @@ -19,18 +19,44 @@ module Pulse | |
| 19 19 |  | 
| 20 20 | 
             
                  def extract_file_urls(response)
         | 
| 21 21 | 
             
                    return [] if response.body.nil? || response.body.empty?
         | 
| 22 | 
            +
                    extract_download_links(response) + extract_embedded_images(response)
         | 
| 23 | 
            +
                  end
         | 
| 22 24 |  | 
| 25 | 
            +
                  def extract_download_links(response)
         | 
| 23 26 | 
             
                    parse_html(response.body)
         | 
| 24 27 | 
             
                      .css('a')
         | 
| 25 28 | 
             
                      .to_a
         | 
| 26 29 | 
             
                      .map { |link| link['href'] }
         | 
| 27 30 | 
             
                      .compact
         | 
| 28 31 | 
             
                      .select { |link| link.include? file_type }
         | 
| 32 | 
            +
                      .map { |link| add_base_url(link) }
         | 
| 33 | 
            +
                  end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                  def extract_embedded_images(response)
         | 
| 36 | 
            +
                    return [] unless scrape_images
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                    parse_html(response.body)
         | 
| 39 | 
            +
                      .css('img')
         | 
| 40 | 
            +
                      .to_a
         | 
| 41 | 
            +
                      .map { |e| e["src"] }
         | 
| 42 | 
            +
                      .compact
         | 
| 43 | 
            +
                      .select { |link| link.include? file_type }
         | 
| 44 | 
            +
                      .map { |link| add_base_url(link) }
         | 
| 29 45 | 
             
                  end
         | 
| 30 46 |  | 
| 31 47 | 
             
                  def parse_html(raw_html)
         | 
| 32 48 | 
             
                    Nokogiri::HTML(raw_html)
         | 
| 33 49 | 
             
                  end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                  def add_base_url(str)
         | 
| 52 | 
            +
                    url_breakdown = url.split('/')
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                    if url_breakdown.first.include?('https')
         | 
| 55 | 
            +
                      url_breakdown.third
         | 
| 56 | 
            +
                    else
         | 
| 57 | 
            +
                      url_breakdown.first
         | 
| 58 | 
            +
                    end
         | 
| 59 | 
            +
                  end
         | 
| 34 60 | 
             
                end
         | 
| 35 61 | 
             
              end
         | 
| 36 62 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: pulse-downloader
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.1. | 
| 4 | 
            +
              version: 0.1.16
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - trex22
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2021-05- | 
| 11 | 
            +
            date: 2021-05-10 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: httparty
         |