jobparser 0.7.2 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,9 +1,16 @@
1
1
  # encoding: utf-8
2
2
  module JobParser
3
3
  class ParseSchema < Parser
4
+
5
+ def initialize(html, from_url)
6
+ Parser::ACCEPTED_ELEMENTS.push("span")
7
+ super(html, from_url)
8
+ end
4
9
  def job
5
10
  res = super
6
11
  res[:schema] = true
12
+ res[:benefits] = job_benefits
13
+ res[:education_requirements] = job_education_requirements
7
14
  res
8
15
  end
9
16
  private
@@ -23,21 +30,38 @@ module JobParser
23
30
  Facets::Apply.new(@doc, @url, @plain_text).parse
24
31
  end
25
32
 
33
+ def job_benefits
34
+ get_content_at_prop("benefits")
35
+ end
36
+
37
+ def job_education_requirements
38
+ get_content_at_prop("educationRequirements")
39
+ end
40
+
26
41
  def job_salary_string
27
42
  get_content_at_prop("baseSalary")
28
43
  end
29
44
 
30
45
  def job_location
31
- # some sites don't use the address stuff properly
32
- if is_content_at_prop?("addressLocality")
33
- get_content_at_prop("addressLocality")
46
+ if @doc.css("*").any? { |elem| elem['itemtype'] == "http://schema.org/PostalAddress" }
47
+ fields = %w{ streetAddress addressLocality addressRegion addressCountry postalCode }
48
+ address = []
49
+ fields.each do |field|
50
+ content = get_content_at_prop(field)
51
+ address.push(content) unless content.empty?
52
+ end
53
+ address.join(", ")
34
54
  else
35
- get_content_at_prop("jobLocation")
55
+ # some sites don't use the address stuff properly
56
+ if is_content_at_prop?("addressLocality")
57
+ get_content_at_prop("addressLocality")
58
+ else
59
+ get_content_at_prop("jobLocation")
60
+ end
36
61
  end
37
62
  end
38
63
 
39
64
  def deadline
40
-
41
65
  end
42
66
 
43
67
  def does_use_schema?
@@ -49,15 +73,15 @@ module JobParser
49
73
  def get_content_at_prop(prop)
50
74
  elem = find_with_itemprop(prop)
51
75
  if elem
52
- Cleaner.strip_all_white_space(find_with_itemprop(prop).content)
76
+ Cleaner.strip_all_white_space(elem.content)
53
77
  else
54
78
  ""
55
79
  end
56
80
  end
57
81
 
58
82
  def is_content_at_prop?(prop)
59
- elem = find_with_itemprop("prop")
60
- elem && !elem.empty?
83
+ elem = find_with_itemprop(prop)
84
+ elem && !elem.content.empty?
61
85
  end
62
86
 
63
87
  def find_with_itemprop(prop)
@@ -1,3 +1,3 @@
1
1
  module JobParser
2
- VERSION = "0.7.2"
2
+ VERSION = "0.8.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jobparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.8.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-08-06 00:00:00.000000000 Z
12
+ date: 2013-08-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler