jobparser 0.9.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/jobparser/parseschema.rb +11 -4
- data/lib/jobparser/version.rb +1 -1
- data/lib/jobparser.rb +11 -1
- metadata +2 -18
@@ -1,5 +1,12 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
|
2
|
+
def underscore(camel_cased_word)
|
3
|
+
camel_cased_word.to_s.gsub(/::/, '/').
|
4
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
5
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
6
|
+
tr("-", "_").
|
7
|
+
downcase
|
8
|
+
end
|
9
|
+
|
3
10
|
module JobParser
|
4
11
|
class ParseSchema < Parser
|
5
12
|
|
@@ -9,11 +16,11 @@ module JobParser
|
|
9
16
|
}
|
10
17
|
|
11
18
|
EXTRA_SCHEMA_TEXT_FIELDS.each do |field|
|
12
|
-
define_method("job_#{field
|
19
|
+
define_method("job_#{underscore(field)}") {
|
13
20
|
get_content_at_prop(field)
|
14
21
|
}
|
15
22
|
|
16
|
-
private "job_#{field
|
23
|
+
private "job_#{underscore(field)}".to_sym
|
17
24
|
end
|
18
25
|
|
19
26
|
def initialize(html, from_url)
|
@@ -25,7 +32,7 @@ module JobParser
|
|
25
32
|
res = super
|
26
33
|
res[:schema] = true
|
27
34
|
EXTRA_SCHEMA_TEXT_FIELDS.each do |field|
|
28
|
-
underscore_name = field.
|
35
|
+
underscore_name = underscore(field).to_sym
|
29
36
|
result = send("job_#{underscore_name}")
|
30
37
|
res[underscore_name] = result unless result.nil?
|
31
38
|
end
|
data/lib/jobparser/version.rb
CHANGED
data/lib/jobparser.rb
CHANGED
@@ -21,7 +21,17 @@ require "open-uri"
|
|
21
21
|
|
22
22
|
module JobParser
|
23
23
|
def self.parser(url)
|
24
|
-
|
24
|
+
puts "Warning: JobParser.parser is old. Use JobParser.parse"
|
25
|
+
|
26
|
+
JobParser.parse(url)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.parse(url)
|
30
|
+
|
31
|
+
unless JobParser.config[:cache_on] && JobParser.cache.has_cache_for_url?(@url) && !JobParser.cache.cache_expired?(@url)
|
32
|
+
# we only need the HTML if there's no cache
|
33
|
+
html = open(url, :allow_redirections => :safe).read
|
34
|
+
end
|
25
35
|
|
26
36
|
if html.include?("http://schema.org/JobPosting")
|
27
37
|
ParseSchema.new(html, url)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jobparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-08-
|
12
|
+
date: 2013-08-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -123,22 +123,6 @@ dependencies:
|
|
123
123
|
- - ! '>='
|
124
124
|
- !ruby/object:Gem::Version
|
125
125
|
version: '0'
|
126
|
-
- !ruby/object:Gem::Dependency
|
127
|
-
name: activesupport
|
128
|
-
requirement: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
|
-
requirements:
|
131
|
-
- - '='
|
132
|
-
- !ruby/object:Gem::Version
|
133
|
-
version: 4.0.0
|
134
|
-
type: :runtime
|
135
|
-
prerelease: false
|
136
|
-
version_requirements: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
|
-
requirements:
|
139
|
-
- - '='
|
140
|
-
- !ruby/object:Gem::Version
|
141
|
-
version: 4.0.0
|
142
126
|
description: A parser for Job sites
|
143
127
|
email:
|
144
128
|
- jack@jackfranklin.net
|