indeed_scraper2022 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/indeed_scraper2022.rb +44 -3
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +0 -0
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 972f811430fae59121e39c9c4752b64fc43b37165a52dcec8c3eac42cf1e4555
         | 
| 4 | 
            +
              data.tar.gz: 85e987eb264b098b4c892e2e05d2ab082e3b8968fff2bdd519552e889f014f9d
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: e7d3c2a13e315383248c557806dd0184d8831f46a2e314816395f62fcb886ba2e38a3e1f2deb180ceb33b614cf0b7be8a13379028fc56982e388948575bdb02c
         | 
| 7 | 
            +
              data.tar.gz: 6ca4792d260c43b22fcee5ead8928525df17f1db112c49ac4cc7d7a5c9b29a8f483b94c18cd39bace1d4a4ee553ff83254a7fb445d76abf06c1705d19bac455c
         | 
    
        checksums.yaml.gz.sig
    CHANGED
    
    | Binary file | 
    
        data/lib/indeed_scraper2022.rb
    CHANGED
    
    | @@ -4,11 +4,13 @@ | |
| 4 4 |  | 
| 5 5 | 
             
            require 'ferrumwizard'
         | 
| 6 6 | 
             
            require 'nokorexi'
         | 
| 7 | 
            +
            require 'yaml'
         | 
| 7 8 |  | 
| 8 9 | 
             
            # Given the nature of changes to jobsearch websites,
         | 
| 9 10 | 
             
            # don't rely upon this gem working in the near future.
         | 
| 10 11 |  | 
| 11 12 |  | 
| 13 | 
            +
             | 
| 12 14 | 
             
            class IndeedScraper2022Err < Exception
         | 
| 13 15 | 
             
            end
         | 
| 14 16 |  | 
| @@ -218,14 +220,53 @@ class IS22Plus < IndeedScraper2022 | |
| 218 220 | 
             
                      debug: debug)
         | 
| 219 221 | 
             
              end
         | 
| 220 222 |  | 
| 221 | 
            -
              def archive()
         | 
| 223 | 
            +
              def archive(filepath='/tmp/indeed')
         | 
| 222 224 |  | 
| 223 225 | 
             
                return unless @results
         | 
| 224 226 |  | 
| 225 | 
            -
                 | 
| 226 | 
            -
             | 
| 227 | 
            +
                FileUtils.mkdir_p filepath
         | 
| 228 | 
            +
             | 
| 229 | 
            +
                idxfile = File.join(filepath, 'index.yml')
         | 
| 230 | 
            +
             | 
| 231 | 
            +
                index = if File.exists? idxfile then
         | 
| 232 | 
            +
                  YAML.load(File.read(idxfile))
         | 
| 233 | 
            +
                else
         | 
| 234 | 
            +
                  {}
         | 
| 227 235 | 
             
                end
         | 
| 228 236 |  | 
| 237 | 
            +
                @results.each.with_index do |item, i|
         | 
| 238 | 
            +
             | 
| 239 | 
            +
                  puts 'saving ' + item[:title] if @debug
         | 
| 240 | 
            +
                  puts 'link: ' + item[:link].inspect
         | 
| 241 | 
            +
                  links = RXFReader.reveal(item[:link])
         | 
| 242 | 
            +
                  puts 'links: ' + links.inspect
         | 
| 243 | 
            +
             | 
| 244 | 
            +
                  url = links.last
         | 
| 245 | 
            +
                  id = url[/(?<=\?jk=)[^&]+/]
         | 
| 246 | 
            +
             | 
| 247 | 
            +
                  if index[id.to_sym] then
         | 
| 248 | 
            +
                    next
         | 
| 249 | 
            +
                  else
         | 
| 250 | 
            +
             | 
| 251 | 
            +
                    File.write File.join(filepath, 'j' + id + '.txt'), page(i+1)
         | 
| 252 | 
            +
             | 
| 253 | 
            +
                    h = {
         | 
| 254 | 
            +
                      link: url[/^[^&]+/],
         | 
| 255 | 
            +
                      title: item[:title].to_s,
         | 
| 256 | 
            +
                      salary: item[:salary].to_s,
         | 
| 257 | 
            +
                      company: item[:company].to_s.strip,
         | 
| 258 | 
            +
                      location: item[:location].to_s,
         | 
| 259 | 
            +
                      jobsnippet: item[:jobsnippet],
         | 
| 260 | 
            +
                      date: item[:date]
         | 
| 261 | 
            +
                    }
         | 
| 262 | 
            +
             | 
| 263 | 
            +
                    index[id.to_sym] = h
         | 
| 264 | 
            +
                  end
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                end
         | 
| 267 | 
            +
             | 
| 268 | 
            +
                File.write idxfile, index.to_yaml
         | 
| 269 | 
            +
             | 
| 229 270 | 
             
              end
         | 
| 230 271 |  | 
| 231 272 | 
             
              def list()
         | 
    
        data.tar.gz.sig
    CHANGED
    
    | Binary file | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: indeed_scraper2022
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.3.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - James Robertson
         | 
| @@ -35,7 +35,7 @@ cert_chain: | |
| 35 35 | 
             
              YLGLkwmbiUHX5dRU3RwOwOrZiyvND5BIj7S6dZ6jYHe0I727apgQNc3swTz5mW6I
         | 
| 36 36 | 
             
              SW/2zInu2bkj/meWm5eBoWHT
         | 
| 37 37 | 
             
              -----END CERTIFICATE-----
         | 
| 38 | 
            -
            date: 2022- | 
| 38 | 
            +
            date: 2022-04-01 00:00:00.000000000 Z
         | 
| 39 39 | 
             
            dependencies:
         | 
| 40 40 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 41 41 | 
             
              name: nokorexi
         | 
    
        metadata.gz.sig
    CHANGED
    
    | Binary file |