jobparser 0.7.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/jobparser/parseschema.rb +32 -8
- data/lib/jobparser/version.rb +1 -1
- metadata +2 -2
@@ -1,9 +1,16 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
module JobParser
|
3
3
|
class ParseSchema < Parser
|
4
|
+
|
5
|
+
def initialize(html, from_url)
|
6
|
+
Parser::ACCEPTED_ELEMENTS.push("span")
|
7
|
+
super(html, from_url)
|
8
|
+
end
|
4
9
|
def job
|
5
10
|
res = super
|
6
11
|
res[:schema] = true
|
12
|
+
res[:benefits] = job_benefits
|
13
|
+
res[:education_requirements] = job_education_requirements
|
7
14
|
res
|
8
15
|
end
|
9
16
|
private
|
@@ -23,21 +30,38 @@ module JobParser
|
|
23
30
|
Facets::Apply.new(@doc, @url, @plain_text).parse
|
24
31
|
end
|
25
32
|
|
33
|
+
def job_benefits
|
34
|
+
get_content_at_prop("benefits")
|
35
|
+
end
|
36
|
+
|
37
|
+
def job_education_requirements
|
38
|
+
get_content_at_prop("educationRequirements")
|
39
|
+
end
|
40
|
+
|
26
41
|
def job_salary_string
|
27
42
|
get_content_at_prop("baseSalary")
|
28
43
|
end
|
29
44
|
|
30
45
|
def job_location
|
31
|
-
|
32
|
-
|
33
|
-
|
46
|
+
if @doc.css("*").any? { |elem| elem['itemtype'] == "http://schema.org/PostalAddress" }
|
47
|
+
fields = %w{ streetAddress addressLocality addressRegion addressCountry postalCode }
|
48
|
+
address = []
|
49
|
+
fields.each do |field|
|
50
|
+
content = get_content_at_prop(field)
|
51
|
+
address.push(content) unless content.empty?
|
52
|
+
end
|
53
|
+
address.join(", ")
|
34
54
|
else
|
35
|
-
|
55
|
+
# some sites don't use the address stuff properly
|
56
|
+
if is_content_at_prop?("addressLocality")
|
57
|
+
get_content_at_prop("addressLocality")
|
58
|
+
else
|
59
|
+
get_content_at_prop("jobLocation")
|
60
|
+
end
|
36
61
|
end
|
37
62
|
end
|
38
63
|
|
39
64
|
def deadline
|
40
|
-
|
41
65
|
end
|
42
66
|
|
43
67
|
def does_use_schema?
|
@@ -49,15 +73,15 @@ module JobParser
|
|
49
73
|
def get_content_at_prop(prop)
|
50
74
|
elem = find_with_itemprop(prop)
|
51
75
|
if elem
|
52
|
-
Cleaner.strip_all_white_space(
|
76
|
+
Cleaner.strip_all_white_space(elem.content)
|
53
77
|
else
|
54
78
|
""
|
55
79
|
end
|
56
80
|
end
|
57
81
|
|
58
82
|
def is_content_at_prop?(prop)
|
59
|
-
elem = find_with_itemprop(
|
60
|
-
elem && !elem.empty?
|
83
|
+
elem = find_with_itemprop(prop)
|
84
|
+
elem && !elem.content.empty?
|
61
85
|
end
|
62
86
|
|
63
87
|
def find_with_itemprop(prop)
|
data/lib/jobparser/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jobparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-08-
|
12
|
+
date: 2013-08-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|