jobboards-parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.textile ADDED
@@ -0,0 +1,38 @@
1
+ simple ruby librairy for parsing tech jobboards
2
+
3
+ h1. Available jobboards
4
+
5
+ * Authentic jobs
6
+ * Crunchboard
7
+ * Github
8
+ * Joel on Software
9
+ * Krop
10
+ * Ruby inside
11
+ * Ruby now
12
+ * 37 Signals
13
+ * Smashing magazine
14
+ * Startuply
15
+
16
+ h1. Examples
17
+
18
+ Parse all jobbooards:
19
+
20
+ bq. JobboardsParser.load
21
+
22
+ or
23
+
24
+ bq. JobboardsParser.load(:all)
25
+
26
+ If you want to parse specific jobboards:
27
+
28
+ bq. JobboardsParser.load(:crunchboard, :github)
29
+
30
+ h1. Upcoming jobboards
31
+
32
+ * Job 4 dev
33
+ * Top ruby jobs
34
+
35
+ h1. TODO
36
+ * Use regex in boards class
37
+ * Complete the readme file
38
+ * Write test
data/Rakefile ADDED
File without changes
@@ -0,0 +1,35 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "jobboards-parser"
3
+ s.version = "0.0.1"
4
+ s.date = "2010-05-15"
5
+ s.summary = "Simple ruby library for parsing tech jobboards"
6
+ s.email = "g.marcilhacy@gmail.com"
7
+ s.homepage = ""
8
+ s.description = "Simple ruby library for parsing tech jobboards"
9
+ s.has_rdoc = false
10
+ s.authors = ["Grégory Marcilhacy"]
11
+
12
+ s.require_paths = %w[lib]
13
+ s.add_dependency('boilerpipe', ">= 0.0.4")
14
+ s.add_dependency('simple-rss', ">= 1.2.3")
15
+ s.add_dependency('activesupport', ">= 2.3")
16
+
17
+ s.files = %w[
18
+ jobboards-parser.gemspec
19
+ README.textile
20
+ Rakefile
21
+ lib/jobboards_parser.rb
22
+ lib/jobboards/core.rb
23
+ lib/jobboards/boards/37_signals.rb
24
+ lib/jobboards/boards/authentic_jobs.rb
25
+ lib/jobboards/boards/crunchboard.rb
26
+ lib/jobboards/boards/github.rb
27
+ lib/jobboards/boards/joel_on_software.rb
28
+ lib/jobboards/boards/krop.rb
29
+ lib/jobboards/boards/ruby_inside.rb
30
+ lib/jobboards/boards/ruby_now.rb
31
+ lib/jobboards/boards/smashing_magazine.rb
32
+ lib/jobboards/boards/startuply.rb
33
+ ]
34
+
35
+ end
@@ -0,0 +1,27 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class Signal < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(":").last.strip
13
+ url = item.link
14
+ location = item.description.split("\n").each {|ugly|
15
+ next unless ugly.include?("Location"); ugly.split("&gt;").last.strip }
16
+ company = item.title.split(":").first.strip
17
+ content = item.description
18
+ published_at = item.pubDate
19
+
20
+ acc << self.new(title, url, location, company, content, published_at).attributes
21
+ acc
22
+ end
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class AuthenticJob < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(" at ").first.strip
13
+ url = item.link
14
+ location = nil
15
+ company = item.title.split(" at ").last.strip
16
+ content = item.description
17
+ published_at = item.pubDate
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,25 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class Crunchboard < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.strip
13
+ url = item.link
14
+ location = item.description.split("&gt;")[1].split("-").last.split("&lt;").first.strip
15
+ company = item.description.split("&gt;")[1].split(")").first.split("(").last.strip
16
+ content = item.description
17
+ published_at = item.updated
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class Github < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(":").last.split("at").first.strip
13
+ url = item.link
14
+ location = item.title.split(" in ").last.strip
15
+ company = item.title.split(" at ").last.split("in").first.strip
16
+ content = item.content
17
+ published_at = item.updated
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class JoelOnSoftware < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(" at ").first.strip
13
+ url = item.link
14
+ location = item.title.split("(").last.split(")").first.strip
15
+ company = item.title.split(" at ").last.split("(").first.strip
16
+ content = item.description
17
+ published_at = item.updated
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class Krop < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(" is looking for a").last.split(" in ").first.strip
13
+ url = item.link
14
+ location = item.title.split(" is looking for a").last.split(" in").last.strip
15
+ company = item.title.split(" is looking for a").first.strip
16
+ content = item.description
17
+ published_at = item.updated
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class RubyInside < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(" at ").first.strip
13
+ url = item.link
14
+ location = item.title.split(" at ").last.split("(").last.sub(")","").strip
15
+ company = item.title.split(" at ").last.split("(").first.strip
16
+ content = Jobboard.extract_content(item.link)
17
+ published_at = item.pubDate
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class RubyNow < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split(" at ").first.strip
13
+ url = item.guid
14
+ location = item.title.split(" at:").last.strip
15
+ company = nil
16
+ content = item.description
17
+ published_at = item.pudDate
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class SmashingMagazine < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split("-")[1].strip
13
+ url = item.link
14
+ location = item.title.split("-")[2].split("(").last.split(")").first.strip
15
+ company = item.title.split("-")[1].strip
16
+ content = extract_content(item.link)
17
+ published_at = item.updated
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class Startuply < Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ super(title, url, location, company, content, published_at)
7
+ @jobboard = self.class.jobboard_name
8
+ end
9
+
10
+ def self.parse
11
+ (open_feed.items || []).inject([]) do |acc, item|
12
+ title = item.title.split("-").first.strip
13
+ url = item.link
14
+ location = item.title.split(" in ").last.strip
15
+ company = item.description.split("href")[1].split("Companies")[1].split(".aspx").first.split("_").delete_if {|x| x.to_i > 0}.join(" ").split("/").last
16
+ content = extract_content(item.link)
17
+ published_at = item.pubdate
18
+
19
+ acc << self.new(title, url, location, company, content, published_at).attributes
20
+ acc
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,56 @@
1
+ module JobboardsParser
2
+ module Jobboard
3
+ class Jobboard
4
+
5
+ def initialize(title, url, location, company, content, published_at)
6
+ @title = title
7
+ @url = url
8
+ @location = location
9
+ @company = company
10
+ @content = content
11
+ @published_at = published_at
12
+ end
13
+
14
+ def attributes
15
+ attrs = {}
16
+ attrs[:title] = @title
17
+ attrs[:url] = @url
18
+ attrs[:company] = @company
19
+ attrs[:content] = @content
20
+ attrs[:published_at]= @published_at
21
+ attrs[:jobboard] = @jobboard
22
+ attrs.each{ |k,v| v.strip! if v.is_a?(String) }
23
+ attrs
24
+ end
25
+
26
+ def content=(text)
27
+ @content = htmlize(text)
28
+ end
29
+
30
+ def self.feed
31
+ JobboardsParser::BOARDS[ActiveSupport::Inflector.underscore(self.to_s.gsub(/^.*::/, '')).to_sym][:url]
32
+ end
33
+
34
+ def self.jobboard_name
35
+ _ = JobboardsParser::BOARDS[ActiveSupport::Inflector.underscore(self.to_s.gsub(/^.*::/, '')).to_sym]
36
+ _.is_a?(Hash) ? _[:name] : ''
37
+ end
38
+
39
+ def self.open_feed
40
+ SimpleRSS.parse(open(feed))
41
+ end
42
+
43
+ def self.extract_content(url)
44
+ page = Boilerpipe.extract(url, { :output => :json })
45
+ content = ActiveSupport::JSON.decode(page)["response"]["content"] rescue ""
46
+ content
47
+ end
48
+
49
+ private
50
+ def htmlize(content)
51
+ content.gsub(/&lt;/,"<").gsub(/&gt;/,">").gsub(/nbsp;/," ").gsub(/&amp;/,"&").strip
52
+ end
53
+
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,77 @@
1
+ begin
2
+ require 'rubygems'
3
+ require 'boilerpipe'
4
+ require 'simple-rss'
5
+ require 'active_support'
6
+ require "open-uri"
7
+
8
+ require "active_support/core_ext/string/inflections.rb"
9
+ require "active_support/json.rb"
10
+
11
+ rescue LoadError
12
+ require 'rubygems'
13
+ begin
14
+ gem 'simple-rss'
15
+ gem 'boilerpipe'
16
+ gem 'activesupport'
17
+ require 'simple-rss'
18
+ require 'Boilerpipe'
19
+ require 'active_support'
20
+
21
+ require "active_support/core_ext/string/inflections.rb"
22
+ require "active_support/json.rb"
23
+
24
+ rescue Gem::LoadError => e
25
+ puts "WARNING: Gem LoadError: #{e.message}"
26
+ end
27
+ end
28
+
29
+ require "jobboards/core"
30
+ require "jobboards/boards/authentic_jobs"
31
+ require "jobboards/boards/crunchboard"
32
+ require "jobboards/boards/github"
33
+ require "jobboards/boards/joel_on_software"
34
+ require "jobboards/boards/krop"
35
+ require "jobboards/boards/ruby_inside"
36
+ require "jobboards/boards/ruby_now"
37
+ require "jobboards/boards/37_signals"
38
+ require "jobboards/boards/smashing_magazine"
39
+ require "jobboards/boards/startuply"
40
+
41
+ module JobboardsParser
42
+
43
+ BOARDS = {
44
+ :authentic_job => { :url => "http://www.authenticjobs.com/rss/index.xml", :name => "Authentic jobs" },
45
+ :crunchboard => { :url => "http://feeds.feedburner.com/CrunchboardJobs?format=xml", :name => "Crunchboard" },
46
+ :github => { :url => "http://jobs.github.com/positions.atom", :name => "Github" },
47
+ :joel_on_software => { :url => "http://careers.joelonsoftware.com/Jobs/Feed?", :name => "Joel on software" },
48
+ :krop => { :url => "http://www.krop.com/services/feeds/rss/latest/", :name => "Krop" },
49
+ :ruby_inside => { :url => "http://jobs.rubyinside.com/a/jbb/find-jobs-rss", :name => "Ruby Inside" },
50
+ :ruby_now => { :url => "http://feeds.feedburner.com/jobsrubynow?format=xml", :name => "Ruby now" },
51
+ :signal => { :url => "http://jobs.37signals.com/jobs.rss", :name => "37 Signals" },
52
+ :smashing_magazine => { :url => "http://jobs.smashingmagazine.com/rss/all/all", :name => "Smashing Magazine" },
53
+ :startuply => { :url => "http://startuply.com/Rss/HomePage.aspx", :name => "Startuply" },
54
+ }.freeze
55
+
56
+ # Load jobboards
57
+ # Specify in options the jobboards you want to load
58
+ # JobboarsParser.load(:crunchboard, :github)
59
+ # Or you can pass the :all option to get all jobboards
60
+ # JObboardsParser.load(:all)
61
+ def self.load(*opts)
62
+ opts = BOARDS.keys if opts.first == :all || opts.blank?
63
+
64
+ raise InvalidJobboard if (opts.map(&:to_sym) - BOARDS.keys).size > 0
65
+
66
+ opts.inject([]) { |acc, board|
67
+ b = eval("JobboardsParser::Jobboard::#{board.to_s.classify}")
68
+ acc << (b.respond_to?(:parse) ? b.parse : [])
69
+ acc
70
+ }.flatten
71
+ end
72
+
73
+ # Raise InvalidJobboard error when a board doesn't exist
74
+ class InvalidJobboard < StandardError; end
75
+
76
+ end
77
+ print JobboardsParser.load(:crunchboard)
metadata ADDED
@@ -0,0 +1,127 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jobboards-parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - "Gr\xC3\xA9gory Marcilhacy"
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-05-15 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: boilerpipe
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 23
30
+ segments:
31
+ - 0
32
+ - 0
33
+ - 4
34
+ version: 0.0.4
35
+ type: :runtime
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: simple-rss
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ hash: 25
46
+ segments:
47
+ - 1
48
+ - 2
49
+ - 3
50
+ version: 1.2.3
51
+ type: :runtime
52
+ version_requirements: *id002
53
+ - !ruby/object:Gem::Dependency
54
+ name: activesupport
55
+ prerelease: false
56
+ requirement: &id003 !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ hash: 5
62
+ segments:
63
+ - 2
64
+ - 3
65
+ version: "2.3"
66
+ type: :runtime
67
+ version_requirements: *id003
68
+ description: Simple ruby library for parsing tech jobboards
69
+ email: g.marcilhacy@gmail.com
70
+ executables: []
71
+
72
+ extensions: []
73
+
74
+ extra_rdoc_files: []
75
+
76
+ files:
77
+ - jobboards-parser.gemspec
78
+ - README.textile
79
+ - Rakefile
80
+ - lib/jobboards_parser.rb
81
+ - lib/jobboards/core.rb
82
+ - lib/jobboards/boards/37_signals.rb
83
+ - lib/jobboards/boards/authentic_jobs.rb
84
+ - lib/jobboards/boards/crunchboard.rb
85
+ - lib/jobboards/boards/github.rb
86
+ - lib/jobboards/boards/joel_on_software.rb
87
+ - lib/jobboards/boards/krop.rb
88
+ - lib/jobboards/boards/ruby_inside.rb
89
+ - lib/jobboards/boards/ruby_now.rb
90
+ - lib/jobboards/boards/smashing_magazine.rb
91
+ - lib/jobboards/boards/startuply.rb
92
+ has_rdoc: true
93
+ homepage: ""
94
+ licenses: []
95
+
96
+ post_install_message:
97
+ rdoc_options: []
98
+
99
+ require_paths:
100
+ - lib
101
+ required_ruby_version: !ruby/object:Gem::Requirement
102
+ none: false
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ hash: 3
107
+ segments:
108
+ - 0
109
+ version: "0"
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ none: false
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ hash: 3
116
+ segments:
117
+ - 0
118
+ version: "0"
119
+ requirements: []
120
+
121
+ rubyforge_project:
122
+ rubygems_version: 1.6.2
123
+ signing_key:
124
+ specification_version: 3
125
+ summary: Simple ruby library for parsing tech jobboards
126
+ test_files: []
127
+