rubyscraper 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6f4cfba8b1442632b6c54f30886d254ff25fbbd3
4
+ data.tar.gz: 4bc8431e8294d5900819d29735f69650e93448da
5
+ SHA512:
6
+ metadata.gz: 73ff93065f3079602dfcc58e35e08686ce4fff2d693b7f80c63e8cd37f011826156731b7d8a42a84472232c4a0c5b18dcb8efde1e7a7ede2ded37c431d22f95f
7
+ data.tar.gz: 04e30fe957b8d95a25cb0620539eca3b8956eb1c5638c8bd038aa6b461f4868555889b033201b350dc651f5faff7e2e38c2fac86ea98ce4bf13ac76de21834c5
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in rubyscraper.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,57 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ rubyscraper (0.1.0)
5
+ capybara
6
+ poltergeist
7
+ rest-client
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ capybara (2.4.4)
13
+ mime-types (>= 1.16)
14
+ nokogiri (>= 1.3.3)
15
+ rack (>= 1.0.0)
16
+ rack-test (>= 0.5.4)
17
+ xpath (~> 2.0)
18
+ cliver (0.3.2)
19
+ domain_name (0.5.24)
20
+ unf (>= 0.0.5, < 1.0.0)
21
+ http-cookie (1.0.2)
22
+ domain_name (~> 0.5)
23
+ mime-types (2.4.3)
24
+ mini_portile (0.6.2)
25
+ multi_json (1.11.0)
26
+ netrc (0.10.3)
27
+ nokogiri (1.6.6.2)
28
+ mini_portile (~> 0.6.0)
29
+ poltergeist (1.6.0)
30
+ capybara (~> 2.1)
31
+ cliver (~> 0.3.1)
32
+ multi_json (~> 1.0)
33
+ websocket-driver (>= 0.2.0)
34
+ rack (1.6.0)
35
+ rack-test (0.6.3)
36
+ rack (>= 1.0)
37
+ rake (10.4.2)
38
+ rest-client (1.8.0)
39
+ http-cookie (>= 1.0.2, < 2.0)
40
+ mime-types (>= 1.16, < 3.0)
41
+ netrc (~> 0.7)
42
+ unf (0.1.4)
43
+ unf_ext
44
+ unf_ext (0.0.7.1)
45
+ websocket-driver (0.5.4)
46
+ websocket-extensions (>= 0.1.0)
47
+ websocket-extensions (0.1.2)
48
+ xpath (2.0.0)
49
+ nokogiri (~> 1.3)
50
+
51
+ PLATFORMS
52
+ ruby
53
+
54
+ DEPENDENCIES
55
+ bundler (~> 1.9)
56
+ rake (~> 10.0)
57
+ rubyscraper!
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # RubyScraper
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/rubyscraper`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'rubyscraper'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install rubyscraper
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ 1. Fork it ( https://github.com/[my-github-username]/rubyscraper/fork )
36
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
37
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
38
+ 4. Push to the branch (`git push origin my-new-feature`)
39
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/console ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "rubyscraper"
data/bin/rubyscraper ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
3
+ require 'rubyscraper/binary'
4
+ RubyScraper::Binary.call(ARGV, $stdout, $stderr)
5
+
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,5 @@
1
+ ruby
2
+ ruby+on+rails
3
+ javascript
4
+ junior
5
+ full-stack
@@ -0,0 +1,90 @@
1
+ require 'capybara'
2
+ require 'capybara/poltergeist'
3
+ require 'rest-client'
4
+ require 'rubyscraper/version'
5
+
6
+ class RubyScraper
7
+ include Capybara::DSL
8
+
9
+ def initialize(endpoint)
10
+ Capybara.register_driver :poltergeist do |app|
11
+ Capybara::Poltergeist::Driver.new(app, js_errors: false)
12
+ end
13
+ Capybara.default_driver = :poltergeist
14
+ @jobs = []
15
+ @posted_jobs = 0
16
+ @endpoint = endpoint
17
+ @search_terms_file = File.expand_path('../assets/search-terms.txt', __FILE__)
18
+ @search_terms = []
19
+ File.foreach(@search_terms_file) { |x| @search_terms << x.strip }
20
+ end
21
+
22
+ def scrape
23
+ get_summaries
24
+ get_bodies
25
+ send_to_server
26
+ return @jobs.length, @posted_jobs
27
+ end
28
+
29
+ def get_summaries
30
+ @search_terms.each do |term|
31
+ visit "http://careers.stackoverflow.com/jobs?searchTerm=#{term}&sort=p"
32
+ (1..2).to_a.each do |page|
33
+ visit "http://careers.stackoverflow.com/jobs?searchTerm=ruby&sort=p&pg=#{page}"
34
+ all(".listResults .-item").each do |listing|
35
+ position = listing.find("h3.-title a").text
36
+ url = listing.find("h3.-title a")["href"]
37
+ posting_date = listing.first("p._muted").text
38
+
39
+ @jobs << { position: position, url: url, posting_date: posting_date }
40
+ end
41
+ end
42
+ puts "Pulled #{term} job summaries."
43
+ end
44
+ end
45
+
46
+ def get_bodies
47
+ @jobs.each_with_index do |job, i|
48
+ puts "Job #{i+1} pulled."
49
+ sleep 1
50
+ visit "http://careers.stackoverflow.com#{job[:url]}"
51
+ if has_css?("a.employer")
52
+ job[:company] = find("a.employer").text
53
+ end
54
+ if has_css?("span.location")
55
+ job[:location] = find("span.location").text
56
+ end
57
+ #job[:description] = first("div.description p")
58
+ description = all("div.description p").map do |p|
59
+ p.text
60
+ end
61
+ job[:description] = description.join("\n")
62
+ tags = all("div.tags a.post-tag").map do |tag|
63
+ tag.text
64
+ end
65
+ job[:tags] = tags
66
+ end
67
+ end
68
+
69
+ def send_to_server
70
+ @jobs.each_with_index do |job, i|
71
+ new_job = {
72
+ position: job[:position],
73
+ location: job[:location],
74
+ description: job[:description],
75
+ source: "http://careers.stackoverflow.com#{job[:url]}"
76
+ }
77
+ RestClient.post(@endpoint, job: new_job){ |response, request, result, &block|
78
+ case response.code
79
+ when 201
80
+ @posted_jobs += 1
81
+ puts "Job saved."
82
+ when 302
83
+ puts "Job already exists."
84
+ else
85
+ puts "Bad request."
86
+ end
87
+ }
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,17 @@
1
+ require 'rubyscraper'
2
+
3
+ class RubyScraper
4
+ class Binary
5
+ def self.call(argv, outstream, errstream)
6
+ outstream.puts "StackOverflow Job Scraper"
7
+ outstream.puts "---------------------------------------------"
8
+ outstream.puts "Started scraping..."
9
+ endpoint = argv.first
10
+ outstream.puts "Sending post requests to #{endpoint}"
11
+ jobs_scraped, jobs_saved = RubyScraper.new(endpoint).scrape
12
+ outstream.puts "Scraped #{jobs_scraped} jobs, succesfully posted #{jobs_saved} jobs."
13
+ outstream.puts "---------------------------------------------"
14
+ outstream.puts "Completed!"
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,3 @@
1
+ class RubyScraper
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,24 @@
1
+ $LOAD_PATH.unshift File.expand_path('../lib', __FILE__)
2
+ require 'rubyscraper/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'rubyscraper'
6
+ s.version = RubyScraper::VERSION
7
+ s.licenses = ['MIT']
8
+ s.summary = "Scrapes the sites..."
9
+ s.description = "Scrapes job sites for job details and sends post request to server."
10
+ s.authors = ["Nathan Owsiany"]
11
+ s.email = 'nowsiany@gmail.com'
12
+ s.files = Dir["**/*"].select { |f| File.file? f } - Dir['*.gem']
13
+ s.homepage = 'https://github.com/ndwhtlssthr/rubyscraper'
14
+ s.executables << 'rubyscraper'
15
+
16
+ s.add_dependency "capybara"
17
+ s.add_dependency "poltergeist"
18
+ s.add_dependency "rest-client"
19
+
20
+ s.add_development_dependency "bundler", "~> 1.9"
21
+ s.add_development_dependency "rake", "~> 10.0"
22
+ s.add_development_dependency 'rspec', '~> 3.0'
23
+ s.add_development_dependency 'pry'
24
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ describe Rubyscraper do
4
+ it 'has a version number' do
5
+ expect(Rubyscraper::VERSION).not_to be nil
6
+ end
7
+
8
+ it 'does something useful' do
9
+ expect(false).to eq(true)
10
+ end
11
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'rubyscraper'
metadata ADDED
@@ -0,0 +1,156 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rubyscraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Nathan Owsiany
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: capybara
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: poltergeist
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rest-client
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.9'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.9'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: pry
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: Scrapes job sites for job details and sends post request to server.
112
+ email: nowsiany@gmail.com
113
+ executables:
114
+ - rubyscraper
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - Gemfile
119
+ - Gemfile.lock
120
+ - README.md
121
+ - Rakefile
122
+ - bin/console
123
+ - bin/rubyscraper
124
+ - bin/setup
125
+ - lib/assets/search-terms.txt
126
+ - lib/rubyscraper.rb
127
+ - lib/rubyscraper/binary.rb
128
+ - lib/rubyscraper/version.rb
129
+ - rubyscraper.gemspec
130
+ - spec/rubyscraper_spec.rb
131
+ - spec/spec_helper.rb
132
+ homepage: https://github.com/ndwhtlssthr/rubyscraper
133
+ licenses:
134
+ - MIT
135
+ metadata: {}
136
+ post_install_message:
137
+ rdoc_options: []
138
+ require_paths:
139
+ - lib
140
+ required_ruby_version: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: '0'
145
+ required_rubygems_version: !ruby/object:Gem::Requirement
146
+ requirements:
147
+ - - ">="
148
+ - !ruby/object:Gem::Version
149
+ version: '0'
150
+ requirements: []
151
+ rubyforge_project:
152
+ rubygems_version: 2.4.6
153
+ signing_key:
154
+ specification_version: 4
155
+ summary: Scrapes the sites...
156
+ test_files: []