jobparser 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,7 +7,6 @@ module JobParser
7
7
 
8
8
  SALARY_GROUP_REGEX = /£([\d,]*)(?:.+)£([\d,]*)/
9
9
  CLEAN_SALARY_REGEX = /,|\s/
10
- NBSP = Nokogiri::HTML(" ").text
11
10
 
12
11
  def initialize(ary, opts = {})
13
12
  @subject = ary
@@ -28,6 +27,10 @@ module JobParser
28
27
  str.gsub('/n', '').gsub(NBSP, '').strip
29
28
  end
30
29
 
30
+ def self.clean_plain_text(str)
31
+ str.gsub(/\r|\t/, "").gsub(NBSP, " ")
32
+ end
33
+
31
34
  def self.make_link_absolute(url, href)
32
35
  if href.include?("http")
33
36
  href
@@ -27,7 +27,10 @@ module JobParser
27
27
  private
28
28
 
29
29
  def job_location
30
- LOCATION_REGEX.match(@plain_text.gsub(/\r|\t/, "")) { |m|
30
+ special_case_result = use_special_case(:location)
31
+ return special_case_result unless special_case_result.nil?
32
+
33
+ LOCATION_REGEX.match(Cleaner.clean_plain_text(@plain_text)) { |m|
31
34
  Cleaner.strip_string(m[1].to_s)
32
35
  } || ""
33
36
  end
@@ -54,7 +57,16 @@ module JobParser
54
57
  Cleaner.new(ary, :type => type).clean
55
58
  end
56
59
 
60
+ def use_special_case(name)
61
+ if special_case = SpecialCases.case_for_url(@url)
62
+ special_case[name].call(@doc)
63
+ end
64
+ end
65
+
57
66
  def job_salary_string
67
+ special_case_result = use_special_case(:salary_string)
68
+ return special_case_result unless special_case_result.nil?
69
+
58
70
  salary = ""
59
71
  loop_over_elements do |name, elem|
60
72
  SALARY_STRING_REGEX.match(@plain_text) { |m|
@@ -65,6 +77,9 @@ module JobParser
65
77
  end
66
78
 
67
79
  def job_salary
80
+ special_case_result = use_special_case(:salary)
81
+ return special_case_result unless special_case_result.nil?
82
+
68
83
  salary = ""
69
84
  loop_over_elements do |name, elem|
70
85
  SALARY_REGEX.match(@plain_text) { |m|
@@ -75,6 +90,9 @@ module JobParser
75
90
  end
76
91
 
77
92
  def job_title
93
+ special_case_result = use_special_case(:title)
94
+ return special_case_result unless special_case_result.nil?
95
+
78
96
  title_scorer = Scorer.new
79
97
  page_title = @doc.at_css("title").content
80
98
  title_scorer.store(page_title, 20).and_score_now
@@ -0,0 +1,56 @@
1
+ # encoding: utf-8
2
+ module JobParser
3
+ class SpecialCases
4
+ def self.dictionary
5
+ {
6
+ "jobsearch.direct.gov.uk" => {
7
+ :title => Proc.new { |doc| doc.css(".jobViewContent h2")[1].content },
8
+ :location => Proc.new { |doc|
9
+ location = ""
10
+ listings = doc.css(".jobViewSummary dl dt")
11
+ listings.each do |dt|
12
+ if dt.content == "Location"
13
+ location = dt.next_element.content
14
+ break
15
+ end
16
+ end
17
+ location
18
+ },
19
+ :salary_string => Proc.new { |doc|
20
+ salary = ""
21
+ listings = doc.css(".jobViewSummary dl dt")
22
+ listings.each do |dt|
23
+ if dt.content == "Salary"
24
+ salary = Cleaner.strip_string(dt.next_element.content)
25
+ break
26
+ end
27
+ end
28
+ salary
29
+ },
30
+ :salary => Proc.new { |doc|
31
+ # get string by calling salary_string special case
32
+ salary = nil
33
+ salary_string = SpecialCases.call_special_case("jobsearch.direct.gov.uk", :salary_string, doc)
34
+ /£?([0-9,\.]+)\D*£?([0-9,\.]+)/.match(salary_string) { |m|
35
+ low = m[1].gsub(",", "").to_i
36
+ high = m[2].gsub(",", "").to_i
37
+ salary = [low, high]
38
+ }
39
+ salary
40
+ }
41
+ }
42
+ }
43
+ end
44
+
45
+ def self.call_special_case(key, method, doc)
46
+ self.dictionary[key][method].call(doc)
47
+ end
48
+
49
+ def self.case_for_url(url)
50
+ self.dictionary.keys.each do |key|
51
+ return self.dictionary[key] if url.include?(key)
52
+ end
53
+ false
54
+ end
55
+ end
56
+ end
@@ -1,3 +1,3 @@
1
1
  module JobParser
2
- VERSION = "0.0.2"
2
+ VERSION = "0.1.0"
3
3
  end
data/lib/jobparser.rb CHANGED
@@ -3,6 +3,7 @@ require "jobparser/parsehtml"
3
3
  require "jobparser/parseurl"
4
4
  require "jobparser/cleaner"
5
5
  require "jobparser/scorer"
6
+ require "jobparser/specialcases"
6
7
 
7
8
  module JobParser
8
9
  # Your code goes here...
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jobparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-22 00:00:00.000000000 Z
12
+ date: 2013-07-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -103,6 +103,7 @@ files:
103
103
  - lib/jobparser/parseurl.rb
104
104
  - lib/jobparser/regex.rb
105
105
  - lib/jobparser/scorer.rb
106
+ - lib/jobparser/specialcases.rb
106
107
  - lib/jobparser/version.rb
107
108
  - lib/jobparser.rb
108
109
  homepage: ''