jobparser 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,16 @@
1
1
  # encoding: utf-8
2
2
  module JobParser
3
3
  class ParseSchema < Parser
4
+
5
+ def initialize(html, from_url)
6
+ Parser::ACCEPTED_ELEMENTS.push("span")
7
+ super(html, from_url)
8
+ end
4
9
  def job
5
10
  res = super
6
11
  res[:schema] = true
12
+ res[:benefits] = job_benefits
13
+ res[:education_requirements] = job_education_requirements
7
14
  res
8
15
  end
9
16
  private
@@ -23,21 +30,38 @@ module JobParser
23
30
  Facets::Apply.new(@doc, @url, @plain_text).parse
24
31
  end
25
32
 
33
+ def job_benefits
34
+ get_content_at_prop("benefits")
35
+ end
36
+
37
+ def job_education_requirements
38
+ get_content_at_prop("educationRequirements")
39
+ end
40
+
26
41
  def job_salary_string
27
42
  get_content_at_prop("baseSalary")
28
43
  end
29
44
 
30
45
  def job_location
31
- # some sites don't use the address stuff properly
32
- if is_content_at_prop?("addressLocality")
33
- get_content_at_prop("addressLocality")
46
+ if @doc.css("*").any? { |elem| elem['itemtype'] == "http://schema.org/PostalAddress" }
47
+ fields = %w{ streetAddress addressLocality addressRegion addressCountry postalCode }
48
+ address = []
49
+ fields.each do |field|
50
+ content = get_content_at_prop(field)
51
+ address.push(content) unless content.empty?
52
+ end
53
+ address.join(", ")
34
54
  else
35
- get_content_at_prop("jobLocation")
55
+ # some sites don't use the address stuff properly
56
+ if is_content_at_prop?("addressLocality")
57
+ get_content_at_prop("addressLocality")
58
+ else
59
+ get_content_at_prop("jobLocation")
60
+ end
36
61
  end
37
62
  end
38
63
 
39
64
  def deadline
40
-
41
65
  end
42
66
 
43
67
  def does_use_schema?
@@ -49,15 +73,15 @@ module JobParser
49
73
  def get_content_at_prop(prop)
50
74
  elem = find_with_itemprop(prop)
51
75
  if elem
52
- Cleaner.strip_all_white_space(find_with_itemprop(prop).content)
76
+ Cleaner.strip_all_white_space(elem.content)
53
77
  else
54
78
  ""
55
79
  end
56
80
  end
57
81
 
58
82
  def is_content_at_prop?(prop)
59
- elem = find_with_itemprop("prop")
60
- elem && !elem.empty?
83
+ elem = find_with_itemprop(prop)
84
+ elem && !elem.content.empty?
61
85
  end
62
86
 
63
87
  def find_with_itemprop(prop)
@@ -1,3 +1,3 @@
1
1
  module JobParser
2
- VERSION = "0.7.2"
2
+ VERSION = "0.8.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jobparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.2
4
+ version: 0.8.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-08-06 00:00:00.000000000 Z
12
+ date: 2013-08-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler