jobparser 0.13.7 → 0.13.8
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/jobparser/cache.rb +15 -6
- data/lib/jobparser/parser.rb +2 -6
- data/lib/jobparser/version.rb +1 -1
- data/lib/jobparser.rb +2 -14
- metadata +2 -2
data/lib/jobparser/cache.rb
CHANGED
@@ -1,27 +1,36 @@
|
|
1
1
|
module JobParser
|
2
2
|
class Cache
|
3
|
+
|
4
|
+
def cache_instance
|
5
|
+
@cache_instance || JobParser.config[:cache_storage_class].new
|
6
|
+
end
|
7
|
+
|
8
|
+
def valid_for_url?(url)
|
9
|
+
JobParser.config[:cache_on] && has_cache_for_url?(url) && !cache_expired?(url)
|
10
|
+
end
|
11
|
+
|
3
12
|
def has_cache_for_url?(url)
|
4
|
-
|
13
|
+
cache_instance.has_cache_for_url?(url)
|
5
14
|
end
|
6
15
|
|
7
16
|
def fetch_result_for_url(url)
|
8
|
-
|
17
|
+
cache_instance.get(url)
|
9
18
|
end
|
10
19
|
|
11
20
|
def store_to_file(job_hash)
|
12
|
-
|
21
|
+
cache_instance.store(job_hash)
|
13
22
|
end
|
14
23
|
|
15
24
|
def cache_expired?(url)
|
16
|
-
|
25
|
+
cache_instance.cache_expired?(url)
|
17
26
|
end
|
18
27
|
|
19
28
|
def view_cache
|
20
|
-
|
29
|
+
cache_instance.view_cache
|
21
30
|
end
|
22
31
|
|
23
32
|
def clear_all
|
24
|
-
|
33
|
+
cache_instance.clear_all
|
25
34
|
end
|
26
35
|
end
|
27
36
|
end
|
data/lib/jobparser/parser.rb
CHANGED
@@ -9,12 +9,8 @@ module JobParser
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def job
|
12
|
-
if JobParser.
|
13
|
-
|
14
|
-
if !JobParser.cache.cache_expired?(@url)
|
15
|
-
return JobParser.cache.fetch_result_for_url(@url)
|
16
|
-
end
|
17
|
-
end
|
12
|
+
if JobParser.cache.valid_for_url?(@url)
|
13
|
+
return JobParser.cache.fetch_result_for_url(@url)
|
18
14
|
end
|
19
15
|
|
20
16
|
@doc = strip_bad_elements(Nokogiri::HTML(@html))
|
data/lib/jobparser/version.rb
CHANGED
data/lib/jobparser.rb
CHANGED
@@ -24,20 +24,14 @@ require "open-uri"
|
|
24
24
|
module JobParser
|
25
25
|
def self.parser(url)
|
26
26
|
puts "Warning: JobParser.parser is old. Use JobParser.parse"
|
27
|
-
|
28
27
|
JobParser.parse(url)
|
29
28
|
end
|
30
29
|
|
31
30
|
def self.parse(url)
|
32
|
-
if JobParser.
|
31
|
+
if JobParser.cache.valid_for_url?(url)
|
33
32
|
res = JobParser.cache.fetch_result_for_url(url)
|
34
|
-
|
35
|
-
ParseSchema.new(nil, url)
|
36
|
-
else
|
37
|
-
ParseHtml.new(nil, url)
|
38
|
-
end
|
33
|
+
res[:schema] ? ParseSchema.new(nil, url) : ParseHtml.new(nil, url)
|
39
34
|
else
|
40
|
-
# we only need the HTML if there's no cache
|
41
35
|
begin
|
42
36
|
html = open(url, :allow_redirections => :safe).read
|
43
37
|
if html.include?("http://schema.org/JobPosting")
|
@@ -48,13 +42,7 @@ module JobParser
|
|
48
42
|
rescue URI::InvalidURIError
|
49
43
|
raise JobParser::Error::InvalidUrl, "The URI given was not valid"
|
50
44
|
end
|
51
|
-
|
52
45
|
end
|
53
|
-
|
54
|
-
end
|
55
|
-
|
56
|
-
def self.has_cache_for_url?(url)
|
57
|
-
JobParser.config[:cache_on] && JobParser.cache.has_cache_for_url?(url) && !JobParser.cache.cache_expired?(url)
|
58
46
|
end
|
59
47
|
|
60
48
|
def self.config
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jobparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-08-
|
12
|
+
date: 2013-08-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|