jobparser 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,6 @@ module JobParser
7
7
 
8
8
  SALARY_GROUP_REGEX = /£([\d,]*)(?:.+)£([\d,]*)/
9
9
  CLEAN_SALARY_REGEX = /,|\s/
10
- NBSP = Nokogiri::HTML(" ").text
11
10
 
12
11
  def initialize(ary, opts = {})
13
12
  @subject = ary
@@ -28,6 +27,10 @@ module JobParser
28
27
  str.gsub('/n', '').gsub(NBSP, '').strip
29
28
  end
30
29
 
30
+ def self.clean_plain_text(str)
31
+ str.gsub(/\r|\t/, "").gsub(NBSP, " ")
32
+ end
33
+
31
34
  def self.make_link_absolute(url, href)
32
35
  if href.include?("http")
33
36
  href
@@ -27,7 +27,10 @@ module JobParser
27
27
  private
28
28
 
29
29
  def job_location
30
- LOCATION_REGEX.match(@plain_text.gsub(/\r|\t/, "")) { |m|
30
+ special_case_result = use_special_case(:location)
31
+ return special_case_result unless special_case_result.nil?
32
+
33
+ LOCATION_REGEX.match(Cleaner.clean_plain_text(@plain_text)) { |m|
31
34
  Cleaner.strip_string(m[1].to_s)
32
35
  } || ""
33
36
  end
@@ -54,7 +57,16 @@ module JobParser
54
57
  Cleaner.new(ary, :type => type).clean
55
58
  end
56
59
 
60
+ def use_special_case(name)
61
+ if special_case = SpecialCases.case_for_url(@url)
62
+ special_case[name].call(@doc)
63
+ end
64
+ end
65
+
57
66
  def job_salary_string
67
+ special_case_result = use_special_case(:salary_string)
68
+ return special_case_result unless special_case_result.nil?
69
+
58
70
  salary = ""
59
71
  loop_over_elements do |name, elem|
60
72
  SALARY_STRING_REGEX.match(@plain_text) { |m|
@@ -65,6 +77,9 @@ module JobParser
65
77
  end
66
78
 
67
79
  def job_salary
80
+ special_case_result = use_special_case(:salary)
81
+ return special_case_result unless special_case_result.nil?
82
+
68
83
  salary = ""
69
84
  loop_over_elements do |name, elem|
70
85
  SALARY_REGEX.match(@plain_text) { |m|
@@ -75,6 +90,9 @@ module JobParser
75
90
  end
76
91
 
77
92
  def job_title
93
+ special_case_result = use_special_case(:title)
94
+ return special_case_result unless special_case_result.nil?
95
+
78
96
  title_scorer = Scorer.new
79
97
  page_title = @doc.at_css("title").content
80
98
  title_scorer.store(page_title, 20).and_score_now
@@ -0,0 +1,56 @@
1
+ # encoding: utf-8
2
+ module JobParser
3
+ class SpecialCases
4
+ def self.dictionary
5
+ {
6
+ "jobsearch.direct.gov.uk" => {
7
+ :title => Proc.new { |doc| doc.css(".jobViewContent h2")[1].content },
8
+ :location => Proc.new { |doc|
9
+ location = ""
10
+ listings = doc.css(".jobViewSummary dl dt")
11
+ listings.each do |dt|
12
+ if dt.content == "Location"
13
+ location = dt.next_element.content
14
+ break
15
+ end
16
+ end
17
+ location
18
+ },
19
+ :salary_string => Proc.new { |doc|
20
+ salary = ""
21
+ listings = doc.css(".jobViewSummary dl dt")
22
+ listings.each do |dt|
23
+ if dt.content == "Salary"
24
+ salary = Cleaner.strip_string(dt.next_element.content)
25
+ break
26
+ end
27
+ end
28
+ salary
29
+ },
30
+ :salary => Proc.new { |doc|
31
+ # get string by calling salary_string special case
32
+ salary = nil
33
+ salary_string = SpecialCases.call_special_case("jobsearch.direct.gov.uk", :salary_string, doc)
34
+ /£?([0-9,\.]+)\D*£?([0-9,\.]+)/.match(salary_string) { |m|
35
+ low = m[1].gsub(",", "").to_i
36
+ high = m[2].gsub(",", "").to_i
37
+ salary = [low, high]
38
+ }
39
+ salary
40
+ }
41
+ }
42
+ }
43
+ end
44
+
45
+ def self.call_special_case(key, method, doc)
46
+ self.dictionary[key][method].call(doc)
47
+ end
48
+
49
+ def self.case_for_url(url)
50
+ self.dictionary.keys.each do |key|
51
+ return self.dictionary[key] if url.include?(key)
52
+ end
53
+ false
54
+ end
55
+ end
56
+ end
@@ -1,3 +1,3 @@
1
1
  module JobParser
2
- VERSION = "0.0.2"
2
+ VERSION = "0.1.0"
3
3
  end
data/lib/jobparser.rb CHANGED
@@ -3,6 +3,7 @@ require "jobparser/parsehtml"
3
3
  require "jobparser/parseurl"
4
4
  require "jobparser/cleaner"
5
5
  require "jobparser/scorer"
6
+ require "jobparser/specialcases"
6
7
 
7
8
  module JobParser
8
9
  # Your code goes here...
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jobparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-22 00:00:00.000000000 Z
12
+ date: 2013-07-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -103,6 +103,7 @@ files:
103
103
  - lib/jobparser/parseurl.rb
104
104
  - lib/jobparser/regex.rb
105
105
  - lib/jobparser/scorer.rb
106
+ - lib/jobparser/specialcases.rb
106
107
  - lib/jobparser/version.rb
107
108
  - lib/jobparser.rb
108
109
  homepage: ''