jobparser 0.13.8 → 0.13.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,51 +4,46 @@ module JobParser
4
4
  class MongoStore
5
5
 
6
6
  def has_cache_for_url?(url)
7
- Job.where(:url => url).count > 0
7
+ job_for_url(url).count > 0
8
8
  end
9
9
 
10
10
  def store(hash)
11
- Job.where(:url => hash[:url]).delete
12
- hash = strip_job_fields(hash)
11
+ job_for_url(hash[:url]).delete
12
+ hash = strip_fields_not_stored(hash)
13
13
  Job.create(hash)
14
14
  end
15
15
 
16
16
  def cache_expired?(url)
17
- job = Job.where(:url => url).first
17
+ job = job_for_url(url).first
18
18
  expire_time = (job.created_at + JobParser.config[:cache_expire])
19
19
  Time.now > expire_time
20
20
  end
21
21
 
22
22
  def get(url)
23
- job = Job.where(:url => url).first
24
- new_obj = {}
25
- job.attributes.each { |k, v|
26
- unless %w{created_at _id updated_at}.include?(k)
27
- new_obj[k.to_sym] = v
23
+ job = job_for_url(url).first
24
+ {}.tap do |job_obj|
25
+ job.attributes.each do |k, v|
26
+ job_obj[k.to_sym] = v unless %w{created_at _id updated_at}.include?(k)
28
27
  end
29
- }
30
- new_obj[:from_cache] = true
31
- new_obj
28
+ job_obj[:from_cache] = true
29
+ end
32
30
  end
33
31
 
34
32
  def clear_all
35
- MongoStore::Job.each do |job|
36
- job.delete
37
- end
33
+ MongoStore::Job.each(&:delete)
38
34
  end
39
35
 
40
36
  def view_cache
41
- res = []
42
- Job.each do |job|
43
- res.push({
44
- :url => job.url,
45
- :created => job.created_at
46
- })
37
+ [].tap do |res|
38
+ Job.each do |job|
39
+ res.push({
40
+ :url => job.url,
41
+ :created => job.created_at
42
+ })
43
+ end
47
44
  end
48
- res
49
45
  end
50
46
 
51
-
52
47
  class Job
53
48
  include Mongoid::Document
54
49
  include Mongoid::Timestamps
@@ -57,6 +52,7 @@ module JobParser
57
52
  benefits education_requirements incentives industry
58
53
  occupational_category qualifications responsibilities skills special_commitments work_hours
59
54
  }
55
+
60
56
  EXTRA_SCHEMA_TEXT_FIELDS.each do |f|
61
57
  field f.to_sym, :type => String
62
58
  end
@@ -68,20 +64,22 @@ module JobParser
68
64
  field :salary_string, :type => String
69
65
  field :location, :type => String
70
66
  field :deadline, :type => String
71
- field :postcode, :type => String
67
+ field :postcode, :type => String
72
68
  field :schema, :type => Boolean, :default => false
73
-
74
69
  end
75
70
 
76
71
  private
77
72
 
78
- def strip_job_fields(hash)
79
- new_hash = {}
80
- excluded_fields = [:from_cache]
81
- hash.each { |k, v| new_hash[k] = v unless excluded_fields.include?(k) }
82
- new_hash
73
+ def job_for_url(url)
74
+ Job.where(:url => url)
83
75
  end
84
76
 
77
+ def strip_fields_not_stored(hash)
78
+ {}.tap do |new_hash|
79
+ excluded_fields = [:from_cache]
80
+ hash.each { |k, v| new_hash[k] = v unless excluded_fields.include?(k) }
81
+ end
82
+ end
85
83
  end
86
84
  end
87
85
  end
@@ -4,20 +4,17 @@ module JobParser
4
4
  class Cache
5
5
  class TextFile
6
6
  def has_cache_for_url?(url)
7
- path = path_for_url(url)
8
- File.exist?(path)
7
+ File.exist?(path_for_url(url))
9
8
  end
10
9
 
11
10
  def store(job_hash)
12
- url = job_hash[:url]
13
- write_to_file(path_for_url(url), job_hash.to_json)
11
+ write_to_file(path_for_url(job_hash[:url]), job_hash.to_json)
14
12
  end
15
13
 
16
14
  def get(url)
17
15
  path = path_for_url(url)
18
16
  obj = JSON.parse(IO.read(path))
19
- sym_obj = {}
20
- obj.each { |k, v| sym_obj[k.to_sym] = v }
17
+ sym_obj = make_object_keys_symbols(obj)
21
18
  sym_obj[:from_cache] = true
22
19
  sym_obj
23
20
  end
@@ -33,19 +30,25 @@ module JobParser
33
30
  end
34
31
 
35
32
  def view_cache
36
- res = []
37
- cache_files.each do |f|
38
- contents = JSON.parse(IO.read(f))
39
- res.push({
40
- :url => contents["url"],
41
- :created => File.mtime(f)
42
- })
33
+ [].tap do |res|
34
+ cache_files.each do |f|
35
+ contents = JSON.parse(IO.read(f))
36
+ res.push({
37
+ :url => contents["url"],
38
+ :created => File.mtime(f)
39
+ })
40
+ end
43
41
  end
44
- res
45
42
  end
46
43
 
47
44
  private
48
45
 
46
+ def make_object_keys_symbols(obj)
47
+ {}.tap do |sym_obj|
48
+ obj.each { |k, v| sym_obj[k.to_sym] = v }
49
+ end
50
+ end
51
+
49
52
  def cache_files
50
53
  Dir[File.join(JobParser.config[:cache_location], "*.txt")]
51
54
  end
@@ -10,31 +10,31 @@ module JobParser
10
10
  private
11
11
 
12
12
  def job_location
13
- Facets::Location.new(@doc, @url, @plain_text).parse
13
+ Facets::Location.new(*facet_args).parse
14
14
  end
15
15
 
16
16
  def job_salary_string
17
- Facets::SalaryString.new(@doc, @url, @plain_text).parse
17
+ Facets::SalaryString.new(*facet_args).parse
18
18
  end
19
19
 
20
20
  def job_salary
21
- Facets::Salary.new(@doc, @url, @plain_text).parse
21
+ Facets::Salary.new(*facet_args).parse
22
22
  end
23
23
 
24
24
  def job_title
25
- Facets::Title.new(@doc, @url, @pplain_text).parse
25
+ Facets::Title.new(*facet_args).parse
26
26
  end
27
27
 
28
28
  def apply_link
29
- Facets::Apply.new(@doc, @url, @plain_text).parse
29
+ Facets::Apply.new(*facet_args).parse
30
30
  end
31
31
 
32
32
  def deadline
33
- Facets::Deadline.new(@doc, @url, @plain_text).parse
33
+ Facets::Deadline.new(*facet_args).parse
34
34
  end
35
35
 
36
36
  def job_postcode
37
- Facets::Postcode.new(@doc, @url, @plain_text).parse
37
+ Facets::Postcode.new(*facet_args).parse
38
38
  end
39
39
  end
40
40
  end
@@ -6,6 +6,8 @@ module JobParser
6
6
  def initialize(html, from_url)
7
7
  @url = from_url
8
8
  @html = html
9
+ @doc = strip_bad_elements(Nokogiri::HTML(@html))
10
+ @plain_text = get_plain_text
9
11
  end
10
12
 
11
13
  def job
@@ -13,10 +15,7 @@ module JobParser
13
15
  return JobParser.cache.fetch_result_for_url(@url)
14
16
  end
15
17
 
16
- @doc = strip_bad_elements(Nokogiri::HTML(@html))
17
- @plain_text = get_plain_text
18
-
19
- result = { :url => @url,
18
+ { :url => @url,
20
19
  :salary => job_salary,
21
20
  :title => job_title,
22
21
  :apply => apply_link,
@@ -25,11 +24,14 @@ module JobParser
25
24
  :deadline => deadline,
26
25
  :postcode => job_postcode
27
26
  }
28
- result
29
27
  end
30
28
 
31
29
  private
32
30
 
31
+ def facet_args
32
+ [@doc, @url, @plain_text]
33
+ end
34
+
33
35
  def cache(result)
34
36
  if JobParser.config[:cache_on]
35
37
  store_result_to_cache(result)
@@ -1,3 +1,3 @@
1
1
  module JobParser
2
- VERSION = "0.13.8"
2
+ VERSION = "0.13.9"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jobparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.8
4
+ version: 0.13.9
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-08-22 00:00:00.000000000 Z
12
+ date: 2013-08-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler