jobparser 0.10.1 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/jobparser.rb +21 -6
- data/lib/jobparser/parsehtml.rb +5 -0
- data/lib/jobparser/parser.rb +7 -2
- data/lib/jobparser/parseschema.rb +7 -5
- data/lib/jobparser/version.rb +1 -1
- metadata +17 -2
- data/lib/jobparser/textfile.rb +0 -0
data/lib/jobparser.rb
CHANGED
@@ -27,16 +27,31 @@ module JobParser
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def self.parse(url)
|
30
|
-
|
30
|
+
if JobParser.cache.has_cache_for_url?(url)
|
31
|
+
res = JobParser.cache.fetch_result_for_url(url)
|
32
|
+
if res[:schema]
|
33
|
+
ParseSchema.new(nil, url)
|
34
|
+
else
|
35
|
+
ParseHtml.new(nil, url)
|
36
|
+
end
|
37
|
+
else
|
31
38
|
# we only need the HTML if there's no cache
|
32
39
|
html = open(url, :allow_redirections => :safe).read
|
40
|
+
if html.include?("http://schema.org/JobPosting")
|
41
|
+
ParseSchema.new(html, url)
|
42
|
+
else
|
43
|
+
ParseHtml.new(html, url)
|
44
|
+
end
|
33
45
|
end
|
34
46
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.has_cache_for_url?(url)
|
50
|
+
JobParser.config[:cache_on] && JobParser.cache.has_cache_for_url?(url) && !JobParser.cache.cache_expired?(url)
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.get_cache_for_url(url)
|
54
|
+
|
40
55
|
end
|
41
56
|
|
42
57
|
def self.config
|
data/lib/jobparser/parsehtml.rb
CHANGED
data/lib/jobparser/parser.rb
CHANGED
@@ -28,13 +28,18 @@ module JobParser
|
|
28
28
|
:location => job_location,
|
29
29
|
:deadline => deadline
|
30
30
|
}
|
31
|
-
|
32
|
-
store_result_to_cache(result) if JobParser.config[:cache_on]
|
33
31
|
result
|
34
32
|
end
|
35
33
|
|
36
34
|
private
|
37
35
|
|
36
|
+
def cache(result)
|
37
|
+
if JobParser.config[:cache_on]
|
38
|
+
store_result_to_cache(result)
|
39
|
+
end
|
40
|
+
result
|
41
|
+
end
|
42
|
+
|
38
43
|
def store_result_to_cache(result)
|
39
44
|
JobParser.cache.store_to_file(result)
|
40
45
|
end
|
@@ -31,12 +31,14 @@ module JobParser
|
|
31
31
|
def job
|
32
32
|
res = super
|
33
33
|
res[:schema] = true
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
34
|
+
unless res[:from_cache]
|
35
|
+
EXTRA_SCHEMA_TEXT_FIELDS.each do |field|
|
36
|
+
underscore_name = underscore(field).to_sym
|
37
|
+
result = send("job_#{underscore_name}")
|
38
|
+
res[underscore_name] = result unless result.nil?
|
39
|
+
end
|
38
40
|
end
|
39
|
-
res
|
41
|
+
cache(res)
|
40
42
|
end
|
41
43
|
|
42
44
|
private
|
data/lib/jobparser/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jobparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -75,6 +75,22 @@ dependencies:
|
|
75
75
|
- - ! '>='
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: webmock
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
78
94
|
- !ruby/object:Gem::Dependency
|
79
95
|
name: open_uri_redirections
|
80
96
|
requirement: !ruby/object:Gem::Requirement
|
@@ -148,7 +164,6 @@ files:
|
|
148
164
|
- lib/jobparser/regex.rb
|
149
165
|
- lib/jobparser/scorer.rb
|
150
166
|
- lib/jobparser/specialcases.rb
|
151
|
-
- lib/jobparser/textfile.rb
|
152
167
|
- lib/jobparser/version.rb
|
153
168
|
- lib/jobparser.rb
|
154
169
|
homepage: ''
|
data/lib/jobparser/textfile.rb
DELETED
File without changes
|