jobparser 0.13.7 → 0.13.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/jobparser/cache.rb +15 -6
- data/lib/jobparser/parser.rb +2 -6
- data/lib/jobparser/version.rb +1 -1
- data/lib/jobparser.rb +2 -14
- metadata +2 -2
data/lib/jobparser/cache.rb
CHANGED
@@ -1,27 +1,36 @@
|
|
1
1
|
module JobParser
|
2
2
|
class Cache
|
3
|
+
|
4
|
+
def cache_instance
|
5
|
+
@cache_instance || JobParser.config[:cache_storage_class].new
|
6
|
+
end
|
7
|
+
|
8
|
+
def valid_for_url?(url)
|
9
|
+
JobParser.config[:cache_on] && has_cache_for_url?(url) && !cache_expired?(url)
|
10
|
+
end
|
11
|
+
|
3
12
|
def has_cache_for_url?(url)
|
4
|
-
|
13
|
+
cache_instance.has_cache_for_url?(url)
|
5
14
|
end
|
6
15
|
|
7
16
|
def fetch_result_for_url(url)
|
8
|
-
|
17
|
+
cache_instance.get(url)
|
9
18
|
end
|
10
19
|
|
11
20
|
def store_to_file(job_hash)
|
12
|
-
|
21
|
+
cache_instance.store(job_hash)
|
13
22
|
end
|
14
23
|
|
15
24
|
def cache_expired?(url)
|
16
|
-
|
25
|
+
cache_instance.cache_expired?(url)
|
17
26
|
end
|
18
27
|
|
19
28
|
def view_cache
|
20
|
-
|
29
|
+
cache_instance.view_cache
|
21
30
|
end
|
22
31
|
|
23
32
|
def clear_all
|
24
|
-
|
33
|
+
cache_instance.clear_all
|
25
34
|
end
|
26
35
|
end
|
27
36
|
end
|
data/lib/jobparser/parser.rb
CHANGED
@@ -9,12 +9,8 @@ module JobParser
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def job
|
12
|
-
if JobParser.
|
13
|
-
|
14
|
-
if !JobParser.cache.cache_expired?(@url)
|
15
|
-
return JobParser.cache.fetch_result_for_url(@url)
|
16
|
-
end
|
17
|
-
end
|
12
|
+
if JobParser.cache.valid_for_url?(@url)
|
13
|
+
return JobParser.cache.fetch_result_for_url(@url)
|
18
14
|
end
|
19
15
|
|
20
16
|
@doc = strip_bad_elements(Nokogiri::HTML(@html))
|
data/lib/jobparser/version.rb
CHANGED
data/lib/jobparser.rb
CHANGED
@@ -24,20 +24,14 @@ require "open-uri"
|
|
24
24
|
module JobParser
|
25
25
|
def self.parser(url)
|
26
26
|
puts "Warning: JobParser.parser is old. Use JobParser.parse"
|
27
|
-
|
28
27
|
JobParser.parse(url)
|
29
28
|
end
|
30
29
|
|
31
30
|
def self.parse(url)
|
32
|
-
if JobParser.
|
31
|
+
if JobParser.cache.valid_for_url?(url)
|
33
32
|
res = JobParser.cache.fetch_result_for_url(url)
|
34
|
-
|
35
|
-
ParseSchema.new(nil, url)
|
36
|
-
else
|
37
|
-
ParseHtml.new(nil, url)
|
38
|
-
end
|
33
|
+
res[:schema] ? ParseSchema.new(nil, url) : ParseHtml.new(nil, url)
|
39
34
|
else
|
40
|
-
# we only need the HTML if there's no cache
|
41
35
|
begin
|
42
36
|
html = open(url, :allow_redirections => :safe).read
|
43
37
|
if html.include?("http://schema.org/JobPosting")
|
@@ -48,13 +42,7 @@ module JobParser
|
|
48
42
|
rescue URI::InvalidURIError
|
49
43
|
raise JobParser::Error::InvalidUrl, "The URI given was not valid"
|
50
44
|
end
|
51
|
-
|
52
45
|
end
|
53
|
-
|
54
|
-
end
|
55
|
-
|
56
|
-
def self.has_cache_for_url?(url)
|
57
|
-
JobParser.config[:cache_on] && JobParser.cache.has_cache_for_url?(url) && !JobParser.cache.cache_expired?(url)
|
58
46
|
end
|
59
47
|
|
60
48
|
def self.config
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jobparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-08-
|
12
|
+
date: 2013-08-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|