jobparser 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ require 'mongoid'
2
+ module JobParser
3
+ class Cache
4
+ class MongoStore
5
+
6
+ def has_cache_for_url?(url)
7
+ Job.where(:url => url).count > 0
8
+ end
9
+
10
+ def store(hash)
11
+ Job.where(:url => hash[:url]).delete
12
+ Job.new(hash).save!
13
+ end
14
+
15
+ def cache_expired?(url)
16
+ job = Job.where(:url => url).first
17
+ expire_time = (job.created_at + JobParser.config[:cache_expire])
18
+ Time.now > expire_time
19
+ end
20
+
21
+ def get(url)
22
+ job = Job.where(:url => url).first
23
+ new_obj = {}
24
+ job.attributes.each { |k, v| new_obj[k.to_sym] = v unless k == "_id" }
25
+ new_obj[:from_cache] = true
26
+ new_obj
27
+ end
28
+
29
+ def clear_all
30
+ MongoStore::Job.each do |job|
31
+ job.delete
32
+ end
33
+ end
34
+
35
+
36
+ class Job
37
+ include Mongoid::Document
38
+ include Mongoid::Timestamps
39
+
40
+ field :url, :type => String
41
+ field :salary, :type => Array
42
+ field :title, :type => String
43
+ field :apply, :type => String
44
+ field :salary_string, :type => String
45
+ field :location, :type => String
46
+ field :deadline, :type => String
47
+ end
48
+
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,52 @@
1
+ require 'digest/md5'
2
+ require 'json'
3
+ module JobParser
4
+ class Cache
5
+ class TextFile
6
+ def has_cache_for_url?(url)
7
+ path = path_for_url(url)
8
+ File.exist?(path)
9
+ end
10
+
11
+ def store(job_hash)
12
+ url = job_hash[:url]
13
+ write_to_file(path_for_url(url), job_hash.to_json)
14
+ end
15
+
16
+ def get(url)
17
+ path = path_for_url(url)
18
+ obj = JSON.parse(IO.read(path))
19
+ sym_obj = {}
20
+ obj.each { |k, v| sym_obj[k.to_sym] = v }
21
+ sym_obj[:from_cache] = true
22
+ sym_obj
23
+ end
24
+
25
+ def clear_all
26
+ files = Dir[File.join(JobParser.config[:cache_location], "*.txt")]
27
+ files.each { |f| File.delete(f) }
28
+ end
29
+
30
+ def cache_expired?(url)
31
+ time = File.mtime(path_for_url(url))
32
+ expire_time = time + JobParser.config[:cache_expire]
33
+ Time.now > expire_time
34
+ end
35
+
36
+ private
37
+
38
+ def write_to_file(path, contents)
39
+ File.open(path, "w") { |f| f.puts(contents) }
40
+ end
41
+
42
+ def path_for_url(url)
43
+ cache_dir = JobParser.config[:cache_location]
44
+ File.join(cache_dir, md5_url(url))
45
+ end
46
+
47
+ def md5_url(url)
48
+ "#{Digest::MD5.hexdigest(url)}.txt"
49
+ end
50
+ end
51
+ end
52
+ end
@@ -1,57 +1,23 @@
1
- require 'digest/md5'
2
- require 'json'
3
-
4
1
  module JobParser
5
2
  class Cache
6
-
7
3
  def has_cache_for_url?(url)
8
- path = path_for_url(url)
9
- File.exist?(path)
4
+ JobParser.config[:cache_storage_class].new.has_cache_for_url?(url)
10
5
  end
11
6
 
12
7
  def fetch_result_for_url(url)
13
- path = path_for_url(url)
14
- obj = JSON.parse(IO.read(path))
15
- sym_obj = {}
16
- obj.each { |k, v| sym_obj[k.to_sym] = v }
17
- sym_obj[:from_cache] = true
18
- sym_obj
8
+ JobParser.config[:cache_storage_class].new.get(url)
19
9
  end
20
10
 
21
11
  def store_to_file(job_hash)
22
- url = job_hash[:url]
23
- write_to_file(path_for_url(url), job_hash.to_json)
12
+ JobParser.config[:cache_storage_class].new.store(job_hash)
24
13
  end
25
14
 
26
15
  def cache_expired?(url)
27
- !cache_not_expired?(url)
28
- end
29
-
30
- def cache_not_expired?(url)
31
- time = File.mtime(path_for_url(url))
32
- expire_time = time + JobParser.config[:cache_expire]
33
- Time.now < expire_time
16
+ JobParser.config[:cache_storage_class].new.cache_expired?(url)
34
17
  end
35
18
 
36
19
  def clear_all
37
- files = Dir[File.join(JobParser.config[:cache_location], "*.txt")]
38
- files.each { |f| File.delete(f) }
39
- end
40
-
41
- private
42
-
43
-
44
- def write_to_file(path, contents)
45
- File.open(path, "w") { |f| f.puts(contents) }
46
- end
47
-
48
- def path_for_url(url)
49
- cache_dir = JobParser.config[:cache_location]
50
- File.join(cache_dir, md5_url(url))
51
- end
52
-
53
- def md5_url(url)
54
- "#{Digest::MD5.hexdigest(url)}.txt"
20
+ JobParser.config[:cache_storage_class].new.clear_all
55
21
  end
56
22
  end
57
23
  end
@@ -11,7 +11,7 @@ module JobParser
11
11
  def job
12
12
  if JobParser.config[:cache_on]
13
13
  if JobParser.cache.has_cache_for_url?(@url)
14
- if JobParser.cache.cache_not_expired?(@url)
14
+ if !JobParser.cache.cache_expired?(@url)
15
15
  return JobParser.cache.fetch_result_for_url(@url)
16
16
  end
17
17
  end
File without changes
@@ -1,3 +1,3 @@
1
1
  module JobParser
2
- VERSION = "0.6.1"
2
+ VERSION = "0.7.0"
3
3
  end
data/lib/jobparser.rb CHANGED
@@ -7,6 +7,8 @@ require "jobparser/cleaner"
7
7
  require "jobparser/scorer"
8
8
  require "jobparser/specialcases"
9
9
  require "jobparser/cache"
10
+ require "jobparser/cache/textfile"
11
+ require "jobparser/cache/mongostore"
10
12
  require "jobparser/facets/facet"
11
13
  require "jobparser/facets/salary"
12
14
  require "jobparser/facets/salarystring"
@@ -47,7 +49,8 @@ module JobParser
47
49
  @config = {
48
50
  :cache_on => false,
49
51
  :cache_expire => (1 * 60 * 60), # an hour
50
- :cache_location => "cache"
52
+ :cache_location => "cache",
53
+ :cache_storage_class => JobParser::Cache::TextFile
51
54
  }
52
55
 
53
56
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jobparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.7.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -107,6 +107,22 @@ dependencies:
107
107
  - - ! '>='
108
108
  - !ruby/object:Gem::Version
109
109
  version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: mongoid
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
110
126
  description: A parser for Job sites
111
127
  email:
112
128
  - jack@jackfranklin.net
@@ -114,6 +130,8 @@ executables: []
114
130
  extensions: []
115
131
  extra_rdoc_files: []
116
132
  files:
133
+ - lib/jobparser/cache/mongostore.rb
134
+ - lib/jobparser/cache/textfile.rb
117
135
  - lib/jobparser/cache.rb
118
136
  - lib/jobparser/cleaner.rb
119
137
  - lib/jobparser/facets/apply.rb
@@ -130,6 +148,7 @@ files:
130
148
  - lib/jobparser/regex.rb
131
149
  - lib/jobparser/scorer.rb
132
150
  - lib/jobparser/specialcases.rb
151
+ - lib/jobparser/textfile.rb
133
152
  - lib/jobparser/version.rb
134
153
  - lib/jobparser.rb
135
154
  homepage: ''