jobparser 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/jobparser/facets/title.rb +6 -2
- data/lib/jobparser/regex.rb +1 -1
- data/lib/jobparser/version.rb +1 -1
- metadata +1 -1
@@ -7,7 +7,7 @@ module JobParser
|
|
7
7
|
|
8
8
|
title_scorer = Scorer.new
|
9
9
|
page_title = @doc.at_css("title").content
|
10
|
-
title_scorer.store_and_score(page_title,
|
10
|
+
title_scorer.store_and_score(page_title, 10)
|
11
11
|
|
12
12
|
# first see if we find something with a matching id
|
13
13
|
loop_over_elements do |name, elem|
|
@@ -35,7 +35,7 @@ module JobParser
|
|
35
35
|
}
|
36
36
|
end
|
37
37
|
|
38
|
-
title_scorer.top_match.strip.gsub(NBSP, "")
|
38
|
+
clean_title(title_scorer.top_match.strip.gsub(NBSP, ""))
|
39
39
|
end
|
40
40
|
|
41
41
|
private
|
@@ -43,6 +43,10 @@ module JobParser
|
|
43
43
|
def elem_is_heading?(name)
|
44
44
|
%w{h1 h2 h3 h4 h5}.include?(name)
|
45
45
|
end
|
46
|
+
|
47
|
+
def clean_title(title)
|
48
|
+
Cleaner.strip_all_white_space(title.split(":").last)
|
49
|
+
end
|
46
50
|
end
|
47
51
|
end
|
48
52
|
end
|
data/lib/jobparser/regex.rb
CHANGED
@@ -15,5 +15,5 @@ module JobParser
|
|
15
15
|
|
16
16
|
# words commonly used in job listings - not sure if this is a good way to go but I think it's worth a go
|
17
17
|
# could scope this regex just to headers
|
18
|
-
JOB_TITLE_WORDS = /representative|sales|nurse|manager/i
|
18
|
+
JOB_TITLE_WORDS = /representative|sales|nurse|manager|assistant/i
|
19
19
|
end
|
data/lib/jobparser/version.rb
CHANGED