zorki 0.1.29 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/zorki/scrapers/scraper.rb +10 -7
- data/lib/zorki/version.rb +1 -1
- metadata +1 -1
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 360620c86ba91caf49a0e41a0362654620f60da706c761f3f9429757f570c7a4
         | 
| 4 | 
            +
              data.tar.gz: 442628f21ce102c28e22f2aa0cc9243fe1c836267a6901cd53c5f094fe778444
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: bb9d411327c46d63229dfc8e43cfcfb43d14a4b6a87615d07aa7871a29a9d0fe7390ef018134eb358314300fa1a67c0e976a43fd0357e7eea0efac4b4afede30
         | 
| 7 | 
            +
              data.tar.gz: 5ef69f54c80920ebfacf7520c851b06649b81262df809041bb6e97a981e8fd237ccea5b7d8972b4332e2a78fe25330171af9d0f62c9dfc2ccf2029cd0409044a
         | 
| @@ -81,7 +81,6 @@ module Zorki | |
| 81 81 | 
             
                      puts response.body
         | 
| 82 82 | 
             
                      puts "***********************************************************"
         | 
| 83 83 |  | 
| 84 | 
            -
                      # responses << response
         | 
| 85 84 | 
             
                      # Check if not a CORS prefetch and finish up if not
         | 
| 86 85 | 
             
                      if !response.body&.empty? && response.body
         | 
| 87 86 | 
             
                        check_passed = true
         | 
| @@ -103,6 +102,7 @@ module Zorki | |
| 103 102 | 
             
                          puts "checking FAILED request: #{request.url}"
         | 
| 104 103 | 
             
                          puts response.body
         | 
| 105 104 | 
             
                          puts "***********************************************************"
         | 
| 105 | 
            +
                          next
         | 
| 106 106 | 
             
                        end
         | 
| 107 107 |  | 
| 108 108 | 
             
                        response_body = response.body if check_passed == true
         | 
| @@ -110,10 +110,10 @@ module Zorki | |
| 110 110 | 
             
                    end
         | 
| 111 111 | 
             
                  rescue Selenium::WebDriver::Error::WebDriverError
         | 
| 112 112 | 
             
                    # Eat them
         | 
| 113 | 
            -
             | 
| 114 | 
            -
                    puts "***********************************************************"
         | 
| 115 | 
            -
                    puts "Error in intercept: #{e}"
         | 
| 116 | 
            -
                    puts "***********************************************************"
         | 
| 113 | 
            +
                    # rescue StandardError => e
         | 
| 114 | 
            +
                    # puts "***********************************************************"
         | 
| 115 | 
            +
                    # puts "Error in intercept: #{e}"
         | 
| 116 | 
            +
                    # puts "***********************************************************"
         | 
| 117 117 | 
             
                  end
         | 
| 118 118 |  | 
| 119 119 | 
             
                  # Now that the intercept is set up, we visit the page we want
         | 
| @@ -132,7 +132,6 @@ module Zorki | |
| 132 132 | 
             
                  # If this is a page that has not been marked as misinfo we can just pull the data
         | 
| 133 133 | 
             
                  # TODO: put this before the whole load loop
         | 
| 134 134 | 
             
                  if response_body.nil?
         | 
| 135 | 
            -
             | 
| 136 135 | 
             
                    doc = Nokogiri::HTML(page.driver.browser.page_source)
         | 
| 137 136 | 
             
                    # elements = doc.search("script").find_all do |e|
         | 
| 138 137 | 
             
                    #   e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
         | 
| @@ -141,7 +140,7 @@ module Zorki | |
| 141 140 | 
             
                    elements = doc.search("script").filter_map do |element|
         | 
| 142 141 | 
             
                      parsed_element_json = nil
         | 
| 143 142 | 
             
                      begin
         | 
| 144 | 
            -
                        element_json =  | 
| 143 | 
            +
                        element_json = Oj.load(element.text)
         | 
| 145 144 |  | 
| 146 145 | 
             
                        # if element.text.include?("jokoy.komi.io")
         | 
| 147 146 | 
             
                        # debugger
         | 
| @@ -155,6 +154,10 @@ module Zorki | |
| 155 154 |  | 
| 156 155 | 
             
                        parsed_element_json = element_json["require"].last.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
         | 
| 157 156 | 
             
                      rescue StandardError
         | 
| 157 | 
            +
                        # puts "***********************************************************"
         | 
| 158 | 
            +
                        # puts "Error in parsing JSON: #{e}"
         | 
| 159 | 
            +
                        # puts e.backtrace
         | 
| 160 | 
            +
                        # puts "***********************************************************"
         | 
| 158 161 | 
             
                        next
         | 
| 159 162 | 
             
                      end
         | 
| 160 163 |  | 
    
        data/lib/zorki/version.rb
    CHANGED