jobboards-parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.textile ADDED
@@ -0,0 +1,38 @@
1
+ simple ruby librairy for parsing tech jobboards
2
+
3
+ h1. Available jobboards
4
+
5
+ * Authentic jobs
6
+ * Crunchboard
7
+ * Github
8
+ * Joel on Software
9
+ * Krop
10
+ * Ruby inside
11
+ * Ruby now
12
+ * 37 Signals
13
+ * Smashing magazine
14
+ * Startuply
15
+
16
+ h1. Examples
17
+
18
+ Parse all jobbooards:
19
+
20
+ bq. JobboardsParser.load
21
+
22
+ or
23
+
24
+ bq. JobboardsParser.load(:all)
25
+
26
+ If you want to parse specific jobboards:
27
+
28
+ bq. JobboardsParser.load(:crunchboard, :github)
29
+
30
+ h1. Upcoming jobboards
31
+
32
+ * Job 4 dev
33
+ * Top ruby jobs
34
+
35
+ h1. TODO
36
+ * Use regex in boards class
37
+ * Complete the readme file
38
+ * Write test
data/Rakefile ADDED
File without changes
@@ -0,0 +1,35 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "jobboards-parser"
3
+ s.version = "0.0.1"
4
+ s.date = "2010-05-15"
5
+ s.summary = "Simple ruby library for parsing tech jobboards"
6
+ s.email = "g.marcilhacy@gmail.com"
7
+ s.homepage = ""
8
+ s.description = "Simple ruby library for parsing tech jobboards"
9
+ s.has_rdoc = false
10
+ s.authors = ["Grégory Marcilhacy"]
11
+
12
+ s.require_paths = %w[lib]
13
+ s.add_dependency('boilerpipe', ">= 0.0.4")
14
+ s.add_dependency('simple-rss', ">= 1.2.3")
15
+ s.add_dependency('activesupport', ">= 2.3")
16
+
17
+ s.files = %w[
18
+ jobboards-parser.gemspec
19
+ README.textile
20
+ Rakefile
21
+ lib/jobboards_parser.rb
22
+ lib/jobboards/core.rb
23
+ lib/jobboards/boards/37_signals.rb
24
+ lib/jobboards/boards/authentic_jobs.rb
25
+ lib/jobboards/boards/crunchboard.rb
26
+ lib/jobboards/boards/github.rb
27
+ lib/jobboards/boards/joel_on_software.rb
28
+ lib/jobboards/boards/krop.rb
29
+ lib/jobboards/boards/ruby_inside.rb
30
+ lib/jobboards/boards/ruby_now.rb
31
+ lib/jobboards/boards/smashing_magazine.rb
32
+ lib/jobboards/boards/startuply.rb
33
+ ]
34
+
35
+ end
@@ -0,0 +1,27 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class Signal < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(":").last.strip
13
+ url = item.link
14
+ location = item.description.split("\n").each {|ugly|
15
+ next unless ugly.include?("Location"); ugly.split("&gt;").last.strip }
16
+ company = item.title.split(":").first.strip
17
+ content = item.description
18
+ published_at = item.pubDate
19
+
20
+ acc << self.new(title, url, location, company, content, published_at).attributes
21
+ acc
22
+ end
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class AuthenticJob < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(" at ").first.strip
13
+ url = item.link
14
+ location = nil
15
+ company = item.title.split(" at ").last.strip
16
+ content = item.description
17
+ published_at = item.pubDate
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,25 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class Crunchboard < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.strip
13
+ url = item.link
14
+ location = item.description.split("&gt;")[1].split("-").last.split("&lt;").first.strip
15
+ company = item.description.split("&gt;")[1].split(")").first.split("(").last.strip
16
+ content = item.description
17
+ published_at = item.updated
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class Github < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(":").last.split("at").first.strip
13
+ url = item.link
14
+ location = item.title.split(" in ").last.strip
15
+ company = item.title.split(" at ").last.split("in").first.strip
16
+ content = item.content
17
+ published_at = item.updated
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class JoelOnSoftware < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(" at ").first.strip
13
+ url = item.link
14
+ location = item.title.split("(").last.split(")").first.strip
15
+ company = item.title.split(" at ").last.split("(").first.strip
16
+ content = item.description
17
+ published_at = item.updated
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class Krop < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(" is looking for a").last.split(" in ").first.strip
13
+ url = item.link
14
+ location = item.title.split(" is looking for a").last.split(" in").last.strip
15
+ company = item.title.split(" is looking for a").first.strip
16
+ content = item.description
17
+ published_at = item.updated
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class RubyInside < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(" at ").first.strip
13
+ url = item.link
14
+ location = item.title.split(" at ").last.split("(").last.sub(")","").strip
15
+ company = item.title.split(" at ").last.split("(").first.strip
16
+ content = Jobboard.extract_content(item.link)
17
+ published_at = item.pubDate
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class RubyNow < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(" at ").first.strip
13
+ url = item.guid
14
+ location = item.title.split(" at:").last.strip
15
+ company = nil
16
+ content = item.description
17
+ published_at = item.pudDate
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class SmashingMagazine < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split("-")[1].strip
13
+ url = item.link
14
+ location = item.title.split("-")[2].split("(").last.split(")").first.strip
15
+ company = item.title.split("-")[1].strip
16
+ content = extract_content(item.link)
17
+ published_at = item.updated
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class Startuply < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split("-").first.strip
13
+ url = item.link
14
+ location = item.title.split(" in ").last.strip
15
+ company = item.description.split("href")[1].split("Companies")[1].split(".aspx").first.split("_").delete_if {|x| x.to_i > 0}.join(" ").split("/").last
16
+ content = extract_content(item.link)
17
+ published_at = item.pubdate
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,56 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ @title = title
7
+ @url = url
8
+ @location = location
9
+ @company = company
10
+ @content = content
11
+ @published_at = published_at
12
+ end
13
+
14
+ def attributes
15
+ attrs = {}
16
+ attrs[:title] = @title
17
+ attrs[:url] = @url
18
+ attrs[:company] = @company
19
+ attrs[:content] = @content
20
+ attrs[:published_at]= @published_at
21
+ attrs[:jobboard] = @jobboard
22
+ attrs.each{ |k,v| v.strip! if v.is_a?(String) }
23
+ attrs
24
+ end
25
+
26
+ def content=(text)
27
+ @content = htmlize(text)
28
+ end
29
+
30
+ def self.feed
31
+ JobboardsParser::BOARDS[ActiveSupport::Inflector.underscore(self.to_s.gsub(/^.*::/, '')).to_sym][:url]
32
+ end
33
+
34
+ def self.jobboard_name
35
+ _ = JobboardsParser::BOARDS[ActiveSupport::Inflector.underscore(self.to_s.gsub(/^.*::/, '')).to_sym]
36
+ _.is_a?(Hash) ? _[:name] : ''
37
+ end
38
+
39
+ def self.open_feed
40
+ SimpleRSS.parse(open(feed))
41
+ end
42
+
43
+ def self.extract_content(url)
44
+ page = Boilerpipe.extract(url, { :output => :json })
45
+ content = ActiveSupport::JSON.decode(page)["response"]["content"] rescue ""
46
+ content
47
+ end
48
+
49
+ private
50
+ def htmlize(content)
51
+ content.gsub(/&lt;/,"<").gsub(/&gt;/,">").gsub(/nbsp;/," ").gsub(/&amp;/,"&").strip
52
+ end
53
+
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,77 @@
1
+ begin
2
+ require 'rubygems'
3
+ require 'boilerpipe'
4
+ require 'simple-rss'
5
+ require 'active_support'
6
+ require "open-uri"
7
+
8
+ require "active_support/core_ext/string/inflections.rb"
9
+ require "active_support/json.rb"
10
+
11
+ rescue LoadError
12
+ require 'rubygems'
13
+ begin
14
+ gem 'simple-rss'
15
+ gem 'boilerpipe'
16
+ gem 'activesupport'
17
+ require 'simple-rss'
18
+ require 'Boilerpipe'
19
+ require 'active_support'
20
+
21
+ require "active_support/core_ext/string/inflections.rb"
22
+ require "active_support/json.rb"
23
+
24
+ rescue Gem::LoadError => e
25
+ puts "WARNING: Gem LoadError: #{e.message}"
26
+ end
27
+ end
28
+
29
+ require "jobboards/core"
30
+ require "jobboards/boards/authentic_jobs"
31
+ require "jobboards/boards/crunchboard"
32
+ require "jobboards/boards/github"
33
+ require "jobboards/boards/joel_on_software"
34
+ require "jobboards/boards/krop"
35
+ require "jobboards/boards/ruby_inside"
36
+ require "jobboards/boards/ruby_now"
37
+ require "jobboards/boards/37_signals"
38
+ require "jobboards/boards/smashing_magazine"
39
+ require "jobboards/boards/startuply"
40
+
41
+ module JobboardsParser
42
+
43
+ BOARDS = {
44
+ :authentic_job => { :url => "http://www.authenticjobs.com/rss/index.xml", :name => "Authentic jobs" },
45
+ :crunchboard => { :url => "http://feeds.feedburner.com/CrunchboardJobs?format=xml", :name => "Crunchboard" },
46
+ :github => { :url => "http://jobs.github.com/positions.atom", :name => "Github" },
47
+ :joel_on_software => { :url => "http://careers.joelonsoftware.com/Jobs/Feed?", :name => "Joel on software" },
48
+ :krop => { :url => "http://www.krop.com/services/feeds/rss/latest/", :name => "Krop" },
49
+ :ruby_inside => { :url => "http://jobs.rubyinside.com/a/jbb/find-jobs-rss", :name => "Ruby Inside" },
50
+ :ruby_now => { :url => "http://feeds.feedburner.com/jobsrubynow?format=xml", :name => "Ruby now" },
51
+ :signal => { :url => "http://jobs.37signals.com/jobs.rss", :name => "37 Signals" },
52
+ :smashing_magazine => { :url => "http://jobs.smashingmagazine.com/rss/all/all", :name => "Smashing Magazine" },
53
+ :startuply => { :url => "http://startuply.com/Rss/HomePage.aspx", :name => "Startuply" },
54
+ }.freeze
55
+
56
+ # Load jobboards
57
+ # Specify in options the jobboards you want to load
58
+ # JobboarsParser.load(:crunchboard, :github)
59
+ # Or you can pass the :all option to get all jobboards
60
+ # JObboardsParser.load(:all)
61
+ def self.load(*opts)
62
+ opts = BOARDS.keys if opts.first == :all || opts.blank?
63
+
64
+ raise InvalidJobboard if (opts.map(&:to_sym) - BOARDS.keys).size > 0
65
+
66
+ opts.inject([]) { |acc, board|
67
+ b = eval("JobboardsParser::Jobboard::#{board.to_s.classify}")
68
+ acc << (b.respond_to?(:parse) ? b.parse : [])
69
+ acc
70
+ }.flatten
71
+ end
72
+
73
+ # Raise InvalidJobboard error when a board doesn't exist
74
+ class InvalidJobboard < StandardError; end
75
+
76
+ end
77
+ print JobboardsParser.load(:crunchboard)
metadata ADDED
@@ -0,0 +1,127 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jobboards-parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - "Gr\xC3\xA9gory Marcilhacy"
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-05-15 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: boilerpipe
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 23
30
+ segments:
31
+ - 0
32
+ - 0
33
+ - 4
34
+ version: 0.0.4
35
+ type: :runtime
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: simple-rss
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ hash: 25
46
+ segments:
47
+ - 1
48
+ - 2
49
+ - 3
50
+ version: 1.2.3
51
+ type: :runtime
52
+ version_requirements: *id002
53
+ - !ruby/object:Gem::Dependency
54
+ name: activesupport
55
+ prerelease: false
56
+ requirement: &id003 !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ hash: 5
62
+ segments:
63
+ - 2
64
+ - 3
65
+ version: "2.3"
66
+ type: :runtime
67
+ version_requirements: *id003
68
+ description: Simple ruby library for parsing tech jobboards
69
+ email: g.marcilhacy@gmail.com
70
+ executables: []
71
+
72
+ extensions: []
73
+
74
+ extra_rdoc_files: []
75
+
76
+ files:
77
+ - jobboards-parser.gemspec
78
+ - README.textile
79
+ - Rakefile
80
+ - lib/jobboards_parser.rb
81
+ - lib/jobboards/core.rb
82
+ - lib/jobboards/boards/37_signals.rb
83
+ - lib/jobboards/boards/authentic_jobs.rb
84
+ - lib/jobboards/boards/crunchboard.rb
85
+ - lib/jobboards/boards/github.rb
86
+ - lib/jobboards/boards/joel_on_software.rb
87
+ - lib/jobboards/boards/krop.rb
88
+ - lib/jobboards/boards/ruby_inside.rb
89
+ - lib/jobboards/boards/ruby_now.rb
90
+ - lib/jobboards/boards/smashing_magazine.rb
91
+ - lib/jobboards/boards/startuply.rb
92
+ has_rdoc: true
93
+ homepage: ""
94
+ licenses: []
95
+
96
+ post_install_message:
97
+ rdoc_options: []
98
+
99
+ require_paths:
100
+ - lib
101
+ required_ruby_version: !ruby/object:Gem::Requirement
102
+ none: false
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ hash: 3
107
+ segments:
108
+ - 0
109
+ version: "0"
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ none: false
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ hash: 3
116
+ segments:
117
+ - 0
118
+ version: "0"
119
+ requirements: []
120
+
121
+ rubyforge_project:
122
+ rubygems_version: 1.6.2
123
+ signing_key:
124
+ specification_version: 3
125
+ summary: Simple ruby library for parsing tech jobboards
126
+ test_files: []
127
+