linkedincrawler 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/linkedin_crawler.rb +47 -0
  3. metadata +87 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 266cfd3d2297b67906c17aa1781ddf37b9519683
4
+ data.tar.gz: 9c46996cd2eef74e646e4378099afcfe5d357cc7
5
+ SHA512:
6
+ metadata.gz: 350fa16c241578c88e840b9ccaab17267bdc9b7af9bb38882e6b73a5a9c72b06511b752dc8e47015bc2698840b38870559c5bd40f58a4d3cdb914884244cc6cd
7
+ data.tar.gz: de3aef7908c810900828f4a9a399cf939950ddf32495835dba4cd613804b80044e4f9757f5cec707b2f009aa680ca2fbf5eb0c545e046e08f323c3603822c6d2
@@ -0,0 +1,47 @@
1
+ require 'linkedinparser'
2
+ require 'generalscraper'
3
+ require 'selenium-webdriver'
4
+ require 'pry'
5
+
6
+ class LinkedinCrawler
7
+ include ProxyManager
8
+ def initialize(search_terms)
9
+ @search_terms = search_terms
10
+ @output = Array.new
11
+ end
12
+
13
+ # Run search terms and get results
14
+ def search
15
+ # Run Google search
16
+ g = GeneralScraper.new("site:linkedin.com/pub", @search_terms, "/home/shidash/proxies", false)
17
+
18
+ # Scrape each resulting LinkedIn page
19
+ gen_driver
20
+ JSON.parse(g.getURLs).each do |profile|
21
+ scrape(profile)
22
+ end
23
+ end
24
+
25
+ # Generate driver for searches
26
+ def gen_driver
27
+ profile = Selenium::WebDriver::Firefox::Profile.new
28
+ profile['intl.accept_languages'] = 'en'
29
+ profile["javascript.enabled"] = false
30
+ @driver = Selenium::WebDriver.for :firefox, profile: profile
31
+ end
32
+
33
+ # Scrape each page
34
+ def scrape(profile_url)
35
+ # Get profile page
36
+ profile_html = getPage(profile_url, @driver, nil, 5, false).page_source
37
+
38
+ # Parse profile and add to output
39
+ l = LinkedinParser.new(profile_html, profile_url, {timestamp: Time.now})
40
+ @output += JSON.parse(l.results_by_job)
41
+ end
42
+
43
+ # Print output in JSON
44
+ def gen_json
45
+ JSON.pretty_generate(@output)
46
+ end
47
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: linkedincrawler
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - M. C. McGrath
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-11-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: linkedinparser
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: generalscraper
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: selenium-webdriver
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Crawls public LinkedIn profiles via Google
56
+ email: shidash@shidash.com
57
+ executables: []
58
+ extensions: []
59
+ extra_rdoc_files: []
60
+ files:
61
+ - lib/linkedin_crawler.rb
62
+ homepage: https://github.com/TransparencyToolkit/linkedincrawler
63
+ licenses:
64
+ - GPL
65
+ metadata: {}
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubyforge_project:
82
+ rubygems_version: 2.4.8
83
+ signing_key:
84
+ specification_version: 4
85
+ summary: Crawls public LinkedIn profiles
86
+ test_files: []
87
+ has_rdoc: