jobparser 0.13.8 → 0.13.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/jobparser/cache/mongostore.rb +28 -30
- data/lib/jobparser/cache/textfile.rb +17 -14
- data/lib/jobparser/parsehtml.rb +7 -7
- data/lib/jobparser/parser.rb +7 -5
- data/lib/jobparser/version.rb +1 -1
- metadata +2 -2
| @@ -4,51 +4,46 @@ module JobParser | |
| 4 4 | 
             
                class MongoStore
         | 
| 5 5 |  | 
| 6 6 | 
             
                  def has_cache_for_url?(url)
         | 
| 7 | 
            -
                     | 
| 7 | 
            +
                    job_for_url(url).count > 0
         | 
| 8 8 | 
             
                  end
         | 
| 9 9 |  | 
| 10 10 | 
             
                  def store(hash)
         | 
| 11 | 
            -
                     | 
| 12 | 
            -
                    hash =  | 
| 11 | 
            +
                    job_for_url(hash[:url]).delete
         | 
| 12 | 
            +
                    hash = strip_fields_not_stored(hash)
         | 
| 13 13 | 
             
                    Job.create(hash)
         | 
| 14 14 | 
             
                  end
         | 
| 15 15 |  | 
| 16 16 | 
             
                  def cache_expired?(url)
         | 
| 17 | 
            -
                    job =  | 
| 17 | 
            +
                    job = job_for_url(url).first
         | 
| 18 18 | 
             
                    expire_time = (job.created_at + JobParser.config[:cache_expire])
         | 
| 19 19 | 
             
                    Time.now > expire_time
         | 
| 20 20 | 
             
                  end
         | 
| 21 21 |  | 
| 22 22 | 
             
                  def get(url)
         | 
| 23 | 
            -
                    job =  | 
| 24 | 
            -
                     | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
                        new_obj[k.to_sym] = v
         | 
| 23 | 
            +
                    job = job_for_url(url).first
         | 
| 24 | 
            +
                    {}.tap do |job_obj|
         | 
| 25 | 
            +
                      job.attributes.each do |k, v|
         | 
| 26 | 
            +
                        job_obj[k.to_sym] = v unless %w{created_at _id updated_at}.include?(k)
         | 
| 28 27 | 
             
                      end
         | 
| 29 | 
            -
             | 
| 30 | 
            -
                     | 
| 31 | 
            -
                    new_obj
         | 
| 28 | 
            +
                      job_obj[:from_cache] = true
         | 
| 29 | 
            +
                    end
         | 
| 32 30 | 
             
                  end
         | 
| 33 31 |  | 
| 34 32 | 
             
                  def clear_all
         | 
| 35 | 
            -
                    MongoStore::Job.each | 
| 36 | 
            -
                      job.delete
         | 
| 37 | 
            -
                    end
         | 
| 33 | 
            +
                    MongoStore::Job.each(&:delete)
         | 
| 38 34 | 
             
                  end
         | 
| 39 35 |  | 
| 40 36 | 
             
                  def view_cache
         | 
| 41 | 
            -
                     | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 37 | 
            +
                    [].tap do |res|
         | 
| 38 | 
            +
                      Job.each do |job|
         | 
| 39 | 
            +
                        res.push({
         | 
| 40 | 
            +
                          :url => job.url,
         | 
| 41 | 
            +
                          :created => job.created_at
         | 
| 42 | 
            +
                        })
         | 
| 43 | 
            +
                      end
         | 
| 47 44 | 
             
                    end
         | 
| 48 | 
            -
                    res
         | 
| 49 45 | 
             
                  end
         | 
| 50 46 |  | 
| 51 | 
            -
             | 
| 52 47 | 
             
                  class Job
         | 
| 53 48 | 
             
                    include Mongoid::Document
         | 
| 54 49 | 
             
                    include Mongoid::Timestamps
         | 
| @@ -57,6 +52,7 @@ module JobParser | |
| 57 52 | 
             
                      benefits education_requirements incentives industry
         | 
| 58 53 | 
             
                      occupational_category qualifications responsibilities skills special_commitments work_hours
         | 
| 59 54 | 
             
                    }
         | 
| 55 | 
            +
             | 
| 60 56 | 
             
                    EXTRA_SCHEMA_TEXT_FIELDS.each do |f|
         | 
| 61 57 | 
             
                      field f.to_sym, :type => String
         | 
| 62 58 | 
             
                    end
         | 
| @@ -68,20 +64,22 @@ module JobParser | |
| 68 64 | 
             
                    field :salary_string, :type => String
         | 
| 69 65 | 
             
                    field :location,      :type => String
         | 
| 70 66 | 
             
                    field :deadline,      :type => String
         | 
| 71 | 
            -
                    field :postcode, | 
| 67 | 
            +
                    field :postcode,      :type => String
         | 
| 72 68 | 
             
                    field :schema,        :type => Boolean, :default => false
         | 
| 73 | 
            -
             | 
| 74 69 | 
             
                  end
         | 
| 75 70 |  | 
| 76 71 | 
             
                  private
         | 
| 77 72 |  | 
| 78 | 
            -
                  def  | 
| 79 | 
            -
                     | 
| 80 | 
            -
                    excluded_fields = [:from_cache]
         | 
| 81 | 
            -
                    hash.each { |k, v| new_hash[k] = v unless excluded_fields.include?(k) }
         | 
| 82 | 
            -
                    new_hash
         | 
| 73 | 
            +
                  def job_for_url(url)
         | 
| 74 | 
            +
                    Job.where(:url => url)
         | 
| 83 75 | 
             
                  end
         | 
| 84 76 |  | 
| 77 | 
            +
                  def strip_fields_not_stored(hash)
         | 
| 78 | 
            +
                    {}.tap do |new_hash|
         | 
| 79 | 
            +
                      excluded_fields = [:from_cache]
         | 
| 80 | 
            +
                      hash.each { |k, v| new_hash[k] = v unless excluded_fields.include?(k) }
         | 
| 81 | 
            +
                    end
         | 
| 82 | 
            +
                  end
         | 
| 85 83 | 
             
                end
         | 
| 86 84 | 
             
              end
         | 
| 87 85 | 
             
            end
         | 
| @@ -4,20 +4,17 @@ module JobParser | |
| 4 4 | 
             
              class Cache
         | 
| 5 5 | 
             
                class TextFile
         | 
| 6 6 | 
             
                  def has_cache_for_url?(url)
         | 
| 7 | 
            -
                     | 
| 8 | 
            -
                    File.exist?(path)
         | 
| 7 | 
            +
                    File.exist?(path_for_url(url))
         | 
| 9 8 | 
             
                  end
         | 
| 10 9 |  | 
| 11 10 | 
             
                  def store(job_hash)
         | 
| 12 | 
            -
                     | 
| 13 | 
            -
                    write_to_file(path_for_url(url), job_hash.to_json)
         | 
| 11 | 
            +
                    write_to_file(path_for_url(job_hash[:url]), job_hash.to_json)
         | 
| 14 12 | 
             
                  end
         | 
| 15 13 |  | 
| 16 14 | 
             
                  def get(url)
         | 
| 17 15 | 
             
                    path = path_for_url(url)
         | 
| 18 16 | 
             
                    obj = JSON.parse(IO.read(path))
         | 
| 19 | 
            -
                    sym_obj =  | 
| 20 | 
            -
                    obj.each { |k, v| sym_obj[k.to_sym] = v }
         | 
| 17 | 
            +
                    sym_obj = make_object_keys_symbols(obj)
         | 
| 21 18 | 
             
                    sym_obj[:from_cache] = true
         | 
| 22 19 | 
             
                    sym_obj
         | 
| 23 20 | 
             
                  end
         | 
| @@ -33,19 +30,25 @@ module JobParser | |
| 33 30 | 
             
                  end
         | 
| 34 31 |  | 
| 35 32 | 
             
                  def view_cache
         | 
| 36 | 
            -
                     | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 33 | 
            +
                    [].tap do |res|
         | 
| 34 | 
            +
                      cache_files.each do |f|
         | 
| 35 | 
            +
                        contents = JSON.parse(IO.read(f))
         | 
| 36 | 
            +
                        res.push({
         | 
| 37 | 
            +
                          :url => contents["url"],
         | 
| 38 | 
            +
                          :created => File.mtime(f)
         | 
| 39 | 
            +
                        })
         | 
| 40 | 
            +
                      end
         | 
| 43 41 | 
             
                    end
         | 
| 44 | 
            -
                    res
         | 
| 45 42 | 
             
                  end
         | 
| 46 43 |  | 
| 47 44 | 
             
                  private
         | 
| 48 45 |  | 
| 46 | 
            +
                  def make_object_keys_symbols(obj)
         | 
| 47 | 
            +
                    {}.tap do |sym_obj|
         | 
| 48 | 
            +
                      obj.each { |k, v| sym_obj[k.to_sym] = v }
         | 
| 49 | 
            +
                    end
         | 
| 50 | 
            +
                  end
         | 
| 51 | 
            +
             | 
| 49 52 | 
             
                  def cache_files
         | 
| 50 53 | 
             
                    Dir[File.join(JobParser.config[:cache_location], "*.txt")]
         | 
| 51 54 | 
             
                  end
         | 
    
        data/lib/jobparser/parsehtml.rb
    CHANGED
    
    | @@ -10,31 +10,31 @@ module JobParser | |
| 10 10 | 
             
                private
         | 
| 11 11 |  | 
| 12 12 | 
             
                def job_location
         | 
| 13 | 
            -
                  Facets::Location.new( | 
| 13 | 
            +
                  Facets::Location.new(*facet_args).parse
         | 
| 14 14 | 
             
                end
         | 
| 15 15 |  | 
| 16 16 | 
             
                def job_salary_string
         | 
| 17 | 
            -
                  Facets::SalaryString.new( | 
| 17 | 
            +
                  Facets::SalaryString.new(*facet_args).parse
         | 
| 18 18 | 
             
                end
         | 
| 19 19 |  | 
| 20 20 | 
             
                def job_salary
         | 
| 21 | 
            -
                  Facets::Salary.new( | 
| 21 | 
            +
                  Facets::Salary.new(*facet_args).parse
         | 
| 22 22 | 
             
                end
         | 
| 23 23 |  | 
| 24 24 | 
             
                def job_title
         | 
| 25 | 
            -
                  Facets::Title.new( | 
| 25 | 
            +
                  Facets::Title.new(*facet_args).parse
         | 
| 26 26 | 
             
                end
         | 
| 27 27 |  | 
| 28 28 | 
             
                def apply_link
         | 
| 29 | 
            -
                  Facets::Apply.new( | 
| 29 | 
            +
                  Facets::Apply.new(*facet_args).parse
         | 
| 30 30 | 
             
                end
         | 
| 31 31 |  | 
| 32 32 | 
             
                def deadline
         | 
| 33 | 
            -
                  Facets::Deadline.new( | 
| 33 | 
            +
                  Facets::Deadline.new(*facet_args).parse
         | 
| 34 34 | 
             
                end
         | 
| 35 35 |  | 
| 36 36 | 
             
                def job_postcode
         | 
| 37 | 
            -
                  Facets::Postcode.new( | 
| 37 | 
            +
                  Facets::Postcode.new(*facet_args).parse
         | 
| 38 38 | 
             
                end
         | 
| 39 39 | 
             
              end
         | 
| 40 40 | 
             
            end
         | 
    
        data/lib/jobparser/parser.rb
    CHANGED
    
    | @@ -6,6 +6,8 @@ module JobParser | |
| 6 6 | 
             
                def initialize(html, from_url)
         | 
| 7 7 | 
             
                  @url = from_url
         | 
| 8 8 | 
             
                  @html = html
         | 
| 9 | 
            +
                  @doc = strip_bad_elements(Nokogiri::HTML(@html))
         | 
| 10 | 
            +
                  @plain_text = get_plain_text
         | 
| 9 11 | 
             
                end
         | 
| 10 12 |  | 
| 11 13 | 
             
                def job
         | 
| @@ -13,10 +15,7 @@ module JobParser | |
| 13 15 | 
             
                    return JobParser.cache.fetch_result_for_url(@url)
         | 
| 14 16 | 
             
                  end
         | 
| 15 17 |  | 
| 16 | 
            -
                   | 
| 17 | 
            -
                  @plain_text = get_plain_text
         | 
| 18 | 
            -
             | 
| 19 | 
            -
                  result = { :url => @url,
         | 
| 18 | 
            +
                  { :url => @url,
         | 
| 20 19 | 
             
                    :salary => job_salary,
         | 
| 21 20 | 
             
                    :title => job_title,
         | 
| 22 21 | 
             
                    :apply => apply_link,
         | 
| @@ -25,11 +24,14 @@ module JobParser | |
| 25 24 | 
             
                    :deadline => deadline,
         | 
| 26 25 | 
             
                    :postcode => job_postcode
         | 
| 27 26 | 
             
                  }
         | 
| 28 | 
            -
                  result
         | 
| 29 27 | 
             
                end
         | 
| 30 28 |  | 
| 31 29 | 
             
                private
         | 
| 32 30 |  | 
| 31 | 
            +
                def facet_args
         | 
| 32 | 
            +
                  [@doc, @url, @plain_text]
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
             | 
| 33 35 | 
             
                def cache(result)
         | 
| 34 36 | 
             
                  if JobParser.config[:cache_on]
         | 
| 35 37 | 
             
                    store_result_to_cache(result)
         | 
    
        data/lib/jobparser/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: jobparser
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.13. | 
| 4 | 
            +
              version: 0.13.9
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -9,7 +9,7 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date: 2013-08- | 
| 12 | 
            +
            date: 2013-08-23 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: bundler
         |