linkedincrawler 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/linkedin_crawler.rb +47 -0
  3. metadata +87 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 266cfd3d2297b67906c17aa1781ddf37b9519683
4
+ data.tar.gz: 9c46996cd2eef74e646e4378099afcfe5d357cc7
5
+ SHA512:
6
+ metadata.gz: 350fa16c241578c88e840b9ccaab17267bdc9b7af9bb38882e6b73a5a9c72b06511b752dc8e47015bc2698840b38870559c5bd40f58a4d3cdb914884244cc6cd
7
+ data.tar.gz: de3aef7908c810900828f4a9a399cf939950ddf32495835dba4cd613804b80044e4f9757f5cec707b2f009aa680ca2fbf5eb0c545e046e08f323c3603822c6d2
@@ -0,0 +1,47 @@
1
+ require 'linkedinparser'
2
+ require 'generalscraper'
3
+ require 'selenium-webdriver'
4
+ require 'pry'
5
+
6
+ class LinkedinCrawler
7
+ include ProxyManager
8
+ def initialize(search_terms)
9
+ @search_terms = search_terms
10
+ @output = Array.new
11
+ end
12
+
13
+ # Run search terms and get results
14
+ def search
15
+ # Run Google search
16
+ g = GeneralScraper.new("site:linkedin.com/pub", @search_terms, "/home/shidash/proxies", false)
17
+
18
+ # Scrape each resulting LinkedIn page
19
+ gen_driver
20
+ JSON.parse(g.getURLs).each do |profile|
21
+ scrape(profile)
22
+ end
23
+ end
24
+
25
+ # Generate driver for searches
26
+ def gen_driver
27
+ profile = Selenium::WebDriver::Firefox::Profile.new
28
+ profile['intl.accept_languages'] = 'en'
29
+ profile["javascript.enabled"] = false
30
+ @driver = Selenium::WebDriver.for :firefox, profile: profile
31
+ end
32
+
33
+ # Scrape each page
34
+ def scrape(profile_url)
35
+ # Get profile page
36
+ profile_html = getPage(profile_url, @driver, nil, 5, false).page_source
37
+
38
+ # Parse profile and add to output
39
+ l = LinkedinParser.new(profile_html, profile_url, {timestamp: Time.now})
40
+ @output += JSON.parse(l.results_by_job)
41
+ end
42
+
43
+ # Print output in JSON
44
+ def gen_json
45
+ JSON.pretty_generate(@output)
46
+ end
47
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: linkedincrawler
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - M. C. McGrath
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-11-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: linkedinparser
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: generalscraper
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: selenium-webdriver
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Crawls public LinkedIn profiles via Google
56
+ email: shidash@shidash.com
57
+ executables: []
58
+ extensions: []
59
+ extra_rdoc_files: []
60
+ files:
61
+ - lib/linkedin_crawler.rb
62
+ homepage: https://github.com/TransparencyToolkit/linkedincrawler
63
+ licenses:
64
+ - GPL
65
+ metadata: {}
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubyforge_project:
82
+ rubygems_version: 2.4.8
83
+ signing_key:
84
+ specification_version: 4
85
+ summary: Crawls public LinkedIn profiles
86
+ test_files: []
87
+ has_rdoc: