jobparser 0.6.1 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/jobparser/cache/mongostore.rb +51 -0
- data/lib/jobparser/cache/textfile.rb +52 -0
- data/lib/jobparser/cache.rb +5 -39
- data/lib/jobparser/parser.rb +1 -1
- data/lib/jobparser/textfile.rb +0 -0
- data/lib/jobparser/version.rb +1 -1
- data/lib/jobparser.rb +4 -1
- metadata +20 -1
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'mongoid'
|
2
|
+
module JobParser
|
3
|
+
class Cache
|
4
|
+
class MongoStore
|
5
|
+
|
6
|
+
def has_cache_for_url?(url)
|
7
|
+
Job.where(:url => url).count > 0
|
8
|
+
end
|
9
|
+
|
10
|
+
def store(hash)
|
11
|
+
Job.where(:url => hash[:url]).delete
|
12
|
+
Job.new(hash).save!
|
13
|
+
end
|
14
|
+
|
15
|
+
def cache_expired?(url)
|
16
|
+
job = Job.where(:url => url).first
|
17
|
+
expire_time = (job.created_at + JobParser.config[:cache_expire])
|
18
|
+
Time.now > expire_time
|
19
|
+
end
|
20
|
+
|
21
|
+
def get(url)
|
22
|
+
job = Job.where(:url => url).first
|
23
|
+
new_obj = {}
|
24
|
+
job.attributes.each { |k, v| new_obj[k.to_sym] = v unless k == "_id" }
|
25
|
+
new_obj[:from_cache] = true
|
26
|
+
new_obj
|
27
|
+
end
|
28
|
+
|
29
|
+
def clear_all
|
30
|
+
MongoStore::Job.each do |job|
|
31
|
+
job.delete
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
class Job
|
37
|
+
include Mongoid::Document
|
38
|
+
include Mongoid::Timestamps
|
39
|
+
|
40
|
+
field :url, :type => String
|
41
|
+
field :salary, :type => Array
|
42
|
+
field :title, :type => String
|
43
|
+
field :apply, :type => String
|
44
|
+
field :salary_string, :type => String
|
45
|
+
field :location, :type => String
|
46
|
+
field :deadline, :type => String
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'digest/md5'
|
2
|
+
require 'json'
|
3
|
+
module JobParser
|
4
|
+
class Cache
|
5
|
+
class TextFile
|
6
|
+
def has_cache_for_url?(url)
|
7
|
+
path = path_for_url(url)
|
8
|
+
File.exist?(path)
|
9
|
+
end
|
10
|
+
|
11
|
+
def store(job_hash)
|
12
|
+
url = job_hash[:url]
|
13
|
+
write_to_file(path_for_url(url), job_hash.to_json)
|
14
|
+
end
|
15
|
+
|
16
|
+
def get(url)
|
17
|
+
path = path_for_url(url)
|
18
|
+
obj = JSON.parse(IO.read(path))
|
19
|
+
sym_obj = {}
|
20
|
+
obj.each { |k, v| sym_obj[k.to_sym] = v }
|
21
|
+
sym_obj[:from_cache] = true
|
22
|
+
sym_obj
|
23
|
+
end
|
24
|
+
|
25
|
+
def clear_all
|
26
|
+
files = Dir[File.join(JobParser.config[:cache_location], "*.txt")]
|
27
|
+
files.each { |f| File.delete(f) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def cache_expired?(url)
|
31
|
+
time = File.mtime(path_for_url(url))
|
32
|
+
expire_time = time + JobParser.config[:cache_expire]
|
33
|
+
Time.now > expire_time
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def write_to_file(path, contents)
|
39
|
+
File.open(path, "w") { |f| f.puts(contents) }
|
40
|
+
end
|
41
|
+
|
42
|
+
def path_for_url(url)
|
43
|
+
cache_dir = JobParser.config[:cache_location]
|
44
|
+
File.join(cache_dir, md5_url(url))
|
45
|
+
end
|
46
|
+
|
47
|
+
def md5_url(url)
|
48
|
+
"#{Digest::MD5.hexdigest(url)}.txt"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/jobparser/cache.rb
CHANGED
@@ -1,57 +1,23 @@
|
|
1
|
-
require 'digest/md5'
|
2
|
-
require 'json'
|
3
|
-
|
4
1
|
module JobParser
|
5
2
|
class Cache
|
6
|
-
|
7
3
|
def has_cache_for_url?(url)
|
8
|
-
|
9
|
-
File.exist?(path)
|
4
|
+
JobParser.config[:cache_storage_class].new.has_cache_for_url?(url)
|
10
5
|
end
|
11
6
|
|
12
7
|
def fetch_result_for_url(url)
|
13
|
-
|
14
|
-
obj = JSON.parse(IO.read(path))
|
15
|
-
sym_obj = {}
|
16
|
-
obj.each { |k, v| sym_obj[k.to_sym] = v }
|
17
|
-
sym_obj[:from_cache] = true
|
18
|
-
sym_obj
|
8
|
+
JobParser.config[:cache_storage_class].new.get(url)
|
19
9
|
end
|
20
10
|
|
21
11
|
def store_to_file(job_hash)
|
22
|
-
|
23
|
-
write_to_file(path_for_url(url), job_hash.to_json)
|
12
|
+
JobParser.config[:cache_storage_class].new.store(job_hash)
|
24
13
|
end
|
25
14
|
|
26
15
|
def cache_expired?(url)
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
def cache_not_expired?(url)
|
31
|
-
time = File.mtime(path_for_url(url))
|
32
|
-
expire_time = time + JobParser.config[:cache_expire]
|
33
|
-
Time.now < expire_time
|
16
|
+
JobParser.config[:cache_storage_class].new.cache_expired?(url)
|
34
17
|
end
|
35
18
|
|
36
19
|
def clear_all
|
37
|
-
|
38
|
-
files.each { |f| File.delete(f) }
|
39
|
-
end
|
40
|
-
|
41
|
-
private
|
42
|
-
|
43
|
-
|
44
|
-
def write_to_file(path, contents)
|
45
|
-
File.open(path, "w") { |f| f.puts(contents) }
|
46
|
-
end
|
47
|
-
|
48
|
-
def path_for_url(url)
|
49
|
-
cache_dir = JobParser.config[:cache_location]
|
50
|
-
File.join(cache_dir, md5_url(url))
|
51
|
-
end
|
52
|
-
|
53
|
-
def md5_url(url)
|
54
|
-
"#{Digest::MD5.hexdigest(url)}.txt"
|
20
|
+
JobParser.config[:cache_storage_class].new.clear_all
|
55
21
|
end
|
56
22
|
end
|
57
23
|
end
|
data/lib/jobparser/parser.rb
CHANGED
@@ -11,7 +11,7 @@ module JobParser
|
|
11
11
|
def job
|
12
12
|
if JobParser.config[:cache_on]
|
13
13
|
if JobParser.cache.has_cache_for_url?(@url)
|
14
|
-
if JobParser.cache.
|
14
|
+
if !JobParser.cache.cache_expired?(@url)
|
15
15
|
return JobParser.cache.fetch_result_for_url(@url)
|
16
16
|
end
|
17
17
|
end
|
File without changes
|
data/lib/jobparser/version.rb
CHANGED
data/lib/jobparser.rb
CHANGED
@@ -7,6 +7,8 @@ require "jobparser/cleaner"
|
|
7
7
|
require "jobparser/scorer"
|
8
8
|
require "jobparser/specialcases"
|
9
9
|
require "jobparser/cache"
|
10
|
+
require "jobparser/cache/textfile"
|
11
|
+
require "jobparser/cache/mongostore"
|
10
12
|
require "jobparser/facets/facet"
|
11
13
|
require "jobparser/facets/salary"
|
12
14
|
require "jobparser/facets/salarystring"
|
@@ -47,7 +49,8 @@ module JobParser
|
|
47
49
|
@config = {
|
48
50
|
:cache_on => false,
|
49
51
|
:cache_expire => (1 * 60 * 60), # an hour
|
50
|
-
:cache_location => "cache"
|
52
|
+
:cache_location => "cache",
|
53
|
+
:cache_storage_class => JobParser::Cache::TextFile
|
51
54
|
}
|
52
55
|
|
53
56
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jobparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -107,6 +107,22 @@ dependencies:
|
|
107
107
|
- - ! '>='
|
108
108
|
- !ruby/object:Gem::Version
|
109
109
|
version: '0'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: mongoid
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
110
126
|
description: A parser for Job sites
|
111
127
|
email:
|
112
128
|
- jack@jackfranklin.net
|
@@ -114,6 +130,8 @@ executables: []
|
|
114
130
|
extensions: []
|
115
131
|
extra_rdoc_files: []
|
116
132
|
files:
|
133
|
+
- lib/jobparser/cache/mongostore.rb
|
134
|
+
- lib/jobparser/cache/textfile.rb
|
117
135
|
- lib/jobparser/cache.rb
|
118
136
|
- lib/jobparser/cleaner.rb
|
119
137
|
- lib/jobparser/facets/apply.rb
|
@@ -130,6 +148,7 @@ files:
|
|
130
148
|
- lib/jobparser/regex.rb
|
131
149
|
- lib/jobparser/scorer.rb
|
132
150
|
- lib/jobparser/specialcases.rb
|
151
|
+
- lib/jobparser/textfile.rb
|
133
152
|
- lib/jobparser/version.rb
|
134
153
|
- lib/jobparser.rb
|
135
154
|
homepage: ''
|