jobparser 0.13.8 → 0.13.9
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/jobparser/cache/mongostore.rb +28 -30
- data/lib/jobparser/cache/textfile.rb +17 -14
- data/lib/jobparser/parsehtml.rb +7 -7
- data/lib/jobparser/parser.rb +7 -5
- data/lib/jobparser/version.rb +1 -1
- metadata +2 -2
@@ -4,51 +4,46 @@ module JobParser
|
|
4
4
|
class MongoStore
|
5
5
|
|
6
6
|
def has_cache_for_url?(url)
|
7
|
-
|
7
|
+
job_for_url(url).count > 0
|
8
8
|
end
|
9
9
|
|
10
10
|
def store(hash)
|
11
|
-
|
12
|
-
hash =
|
11
|
+
job_for_url(hash[:url]).delete
|
12
|
+
hash = strip_fields_not_stored(hash)
|
13
13
|
Job.create(hash)
|
14
14
|
end
|
15
15
|
|
16
16
|
def cache_expired?(url)
|
17
|
-
job =
|
17
|
+
job = job_for_url(url).first
|
18
18
|
expire_time = (job.created_at + JobParser.config[:cache_expire])
|
19
19
|
Time.now > expire_time
|
20
20
|
end
|
21
21
|
|
22
22
|
def get(url)
|
23
|
-
job =
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
new_obj[k.to_sym] = v
|
23
|
+
job = job_for_url(url).first
|
24
|
+
{}.tap do |job_obj|
|
25
|
+
job.attributes.each do |k, v|
|
26
|
+
job_obj[k.to_sym] = v unless %w{created_at _id updated_at}.include?(k)
|
28
27
|
end
|
29
|
-
|
30
|
-
|
31
|
-
new_obj
|
28
|
+
job_obj[:from_cache] = true
|
29
|
+
end
|
32
30
|
end
|
33
31
|
|
34
32
|
def clear_all
|
35
|
-
MongoStore::Job.each
|
36
|
-
job.delete
|
37
|
-
end
|
33
|
+
MongoStore::Job.each(&:delete)
|
38
34
|
end
|
39
35
|
|
40
36
|
def view_cache
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
37
|
+
[].tap do |res|
|
38
|
+
Job.each do |job|
|
39
|
+
res.push({
|
40
|
+
:url => job.url,
|
41
|
+
:created => job.created_at
|
42
|
+
})
|
43
|
+
end
|
47
44
|
end
|
48
|
-
res
|
49
45
|
end
|
50
46
|
|
51
|
-
|
52
47
|
class Job
|
53
48
|
include Mongoid::Document
|
54
49
|
include Mongoid::Timestamps
|
@@ -57,6 +52,7 @@ module JobParser
|
|
57
52
|
benefits education_requirements incentives industry
|
58
53
|
occupational_category qualifications responsibilities skills special_commitments work_hours
|
59
54
|
}
|
55
|
+
|
60
56
|
EXTRA_SCHEMA_TEXT_FIELDS.each do |f|
|
61
57
|
field f.to_sym, :type => String
|
62
58
|
end
|
@@ -68,20 +64,22 @@ module JobParser
|
|
68
64
|
field :salary_string, :type => String
|
69
65
|
field :location, :type => String
|
70
66
|
field :deadline, :type => String
|
71
|
-
field :postcode,
|
67
|
+
field :postcode, :type => String
|
72
68
|
field :schema, :type => Boolean, :default => false
|
73
|
-
|
74
69
|
end
|
75
70
|
|
76
71
|
private
|
77
72
|
|
78
|
-
def
|
79
|
-
|
80
|
-
excluded_fields = [:from_cache]
|
81
|
-
hash.each { |k, v| new_hash[k] = v unless excluded_fields.include?(k) }
|
82
|
-
new_hash
|
73
|
+
def job_for_url(url)
|
74
|
+
Job.where(:url => url)
|
83
75
|
end
|
84
76
|
|
77
|
+
def strip_fields_not_stored(hash)
|
78
|
+
{}.tap do |new_hash|
|
79
|
+
excluded_fields = [:from_cache]
|
80
|
+
hash.each { |k, v| new_hash[k] = v unless excluded_fields.include?(k) }
|
81
|
+
end
|
82
|
+
end
|
85
83
|
end
|
86
84
|
end
|
87
85
|
end
|
@@ -4,20 +4,17 @@ module JobParser
|
|
4
4
|
class Cache
|
5
5
|
class TextFile
|
6
6
|
def has_cache_for_url?(url)
|
7
|
-
|
8
|
-
File.exist?(path)
|
7
|
+
File.exist?(path_for_url(url))
|
9
8
|
end
|
10
9
|
|
11
10
|
def store(job_hash)
|
12
|
-
|
13
|
-
write_to_file(path_for_url(url), job_hash.to_json)
|
11
|
+
write_to_file(path_for_url(job_hash[:url]), job_hash.to_json)
|
14
12
|
end
|
15
13
|
|
16
14
|
def get(url)
|
17
15
|
path = path_for_url(url)
|
18
16
|
obj = JSON.parse(IO.read(path))
|
19
|
-
sym_obj =
|
20
|
-
obj.each { |k, v| sym_obj[k.to_sym] = v }
|
17
|
+
sym_obj = make_object_keys_symbols(obj)
|
21
18
|
sym_obj[:from_cache] = true
|
22
19
|
sym_obj
|
23
20
|
end
|
@@ -33,19 +30,25 @@ module JobParser
|
|
33
30
|
end
|
34
31
|
|
35
32
|
def view_cache
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
33
|
+
[].tap do |res|
|
34
|
+
cache_files.each do |f|
|
35
|
+
contents = JSON.parse(IO.read(f))
|
36
|
+
res.push({
|
37
|
+
:url => contents["url"],
|
38
|
+
:created => File.mtime(f)
|
39
|
+
})
|
40
|
+
end
|
43
41
|
end
|
44
|
-
res
|
45
42
|
end
|
46
43
|
|
47
44
|
private
|
48
45
|
|
46
|
+
def make_object_keys_symbols(obj)
|
47
|
+
{}.tap do |sym_obj|
|
48
|
+
obj.each { |k, v| sym_obj[k.to_sym] = v }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
49
52
|
def cache_files
|
50
53
|
Dir[File.join(JobParser.config[:cache_location], "*.txt")]
|
51
54
|
end
|
data/lib/jobparser/parsehtml.rb
CHANGED
@@ -10,31 +10,31 @@ module JobParser
|
|
10
10
|
private
|
11
11
|
|
12
12
|
def job_location
|
13
|
-
Facets::Location.new(
|
13
|
+
Facets::Location.new(*facet_args).parse
|
14
14
|
end
|
15
15
|
|
16
16
|
def job_salary_string
|
17
|
-
Facets::SalaryString.new(
|
17
|
+
Facets::SalaryString.new(*facet_args).parse
|
18
18
|
end
|
19
19
|
|
20
20
|
def job_salary
|
21
|
-
Facets::Salary.new(
|
21
|
+
Facets::Salary.new(*facet_args).parse
|
22
22
|
end
|
23
23
|
|
24
24
|
def job_title
|
25
|
-
Facets::Title.new(
|
25
|
+
Facets::Title.new(*facet_args).parse
|
26
26
|
end
|
27
27
|
|
28
28
|
def apply_link
|
29
|
-
Facets::Apply.new(
|
29
|
+
Facets::Apply.new(*facet_args).parse
|
30
30
|
end
|
31
31
|
|
32
32
|
def deadline
|
33
|
-
Facets::Deadline.new(
|
33
|
+
Facets::Deadline.new(*facet_args).parse
|
34
34
|
end
|
35
35
|
|
36
36
|
def job_postcode
|
37
|
-
Facets::Postcode.new(
|
37
|
+
Facets::Postcode.new(*facet_args).parse
|
38
38
|
end
|
39
39
|
end
|
40
40
|
end
|
data/lib/jobparser/parser.rb
CHANGED
@@ -6,6 +6,8 @@ module JobParser
|
|
6
6
|
def initialize(html, from_url)
|
7
7
|
@url = from_url
|
8
8
|
@html = html
|
9
|
+
@doc = strip_bad_elements(Nokogiri::HTML(@html))
|
10
|
+
@plain_text = get_plain_text
|
9
11
|
end
|
10
12
|
|
11
13
|
def job
|
@@ -13,10 +15,7 @@ module JobParser
|
|
13
15
|
return JobParser.cache.fetch_result_for_url(@url)
|
14
16
|
end
|
15
17
|
|
16
|
-
|
17
|
-
@plain_text = get_plain_text
|
18
|
-
|
19
|
-
result = { :url => @url,
|
18
|
+
{ :url => @url,
|
20
19
|
:salary => job_salary,
|
21
20
|
:title => job_title,
|
22
21
|
:apply => apply_link,
|
@@ -25,11 +24,14 @@ module JobParser
|
|
25
24
|
:deadline => deadline,
|
26
25
|
:postcode => job_postcode
|
27
26
|
}
|
28
|
-
result
|
29
27
|
end
|
30
28
|
|
31
29
|
private
|
32
30
|
|
31
|
+
def facet_args
|
32
|
+
[@doc, @url, @plain_text]
|
33
|
+
end
|
34
|
+
|
33
35
|
def cache(result)
|
34
36
|
if JobParser.config[:cache_on]
|
35
37
|
store_result_to_cache(result)
|
data/lib/jobparser/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jobparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-08-
|
12
|
+
date: 2013-08-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|