jobparser 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/jobparser/facets/title.rb +6 -2
- data/lib/jobparser/regex.rb +1 -1
- data/lib/jobparser/version.rb +1 -1
- metadata +1 -1
@@ -7,7 +7,7 @@ module JobParser
|
|
7
7
|
|
8
8
|
title_scorer = Scorer.new
|
9
9
|
page_title = @doc.at_css("title").content
|
10
|
-
title_scorer.store_and_score(page_title,
|
10
|
+
title_scorer.store_and_score(page_title, 10)
|
11
11
|
|
12
12
|
# first see if we find something with a matching id
|
13
13
|
loop_over_elements do |name, elem|
|
@@ -35,7 +35,7 @@ module JobParser
|
|
35
35
|
}
|
36
36
|
end
|
37
37
|
|
38
|
-
title_scorer.top_match.strip.gsub(NBSP, "")
|
38
|
+
clean_title(title_scorer.top_match.strip.gsub(NBSP, ""))
|
39
39
|
end
|
40
40
|
|
41
41
|
private
|
@@ -43,6 +43,10 @@ module JobParser
|
|
43
43
|
def elem_is_heading?(name)
|
44
44
|
%w{h1 h2 h3 h4 h5}.include?(name)
|
45
45
|
end
|
46
|
+
|
47
|
+
def clean_title(title)
|
48
|
+
Cleaner.strip_all_white_space(title.split(":").last)
|
49
|
+
end
|
46
50
|
end
|
47
51
|
end
|
48
52
|
end
|
data/lib/jobparser/regex.rb
CHANGED
@@ -15,5 +15,5 @@ module JobParser
|
|
15
15
|
|
16
16
|
# words commonly used in job listings - not sure if this is a good way to go but I think it's worth a go
|
17
17
|
# could scope this regex just to headers
|
18
|
-
JOB_TITLE_WORDS = /representative|sales|nurse|manager/i
|
18
|
+
JOB_TITLE_WORDS = /representative|sales|nurse|manager|assistant/i
|
19
19
|
end
|
data/lib/jobparser/version.rb
CHANGED