linkedincrawler 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/linkedin_crawler.rb +47 -0
- metadata +87 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 266cfd3d2297b67906c17aa1781ddf37b9519683
|
4
|
+
data.tar.gz: 9c46996cd2eef74e646e4378099afcfe5d357cc7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 350fa16c241578c88e840b9ccaab17267bdc9b7af9bb38882e6b73a5a9c72b06511b752dc8e47015bc2698840b38870559c5bd40f58a4d3cdb914884244cc6cd
|
7
|
+
data.tar.gz: de3aef7908c810900828f4a9a399cf939950ddf32495835dba4cd613804b80044e4f9757f5cec707b2f009aa680ca2fbf5eb0c545e046e08f323c3603822c6d2
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'linkedinparser'
|
2
|
+
require 'generalscraper'
|
3
|
+
require 'selenium-webdriver'
|
4
|
+
require 'pry'
|
5
|
+
|
6
|
+
class LinkedinCrawler
|
7
|
+
include ProxyManager
|
8
|
+
def initialize(search_terms)
|
9
|
+
@search_terms = search_terms
|
10
|
+
@output = Array.new
|
11
|
+
end
|
12
|
+
|
13
|
+
# Run search terms and get results
|
14
|
+
def search
|
15
|
+
# Run Google search
|
16
|
+
g = GeneralScraper.new("site:linkedin.com/pub", @search_terms, "/home/shidash/proxies", false)
|
17
|
+
|
18
|
+
# Scrape each resulting LinkedIn page
|
19
|
+
gen_driver
|
20
|
+
JSON.parse(g.getURLs).each do |profile|
|
21
|
+
scrape(profile)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Generate driver for searches
|
26
|
+
def gen_driver
|
27
|
+
profile = Selenium::WebDriver::Firefox::Profile.new
|
28
|
+
profile['intl.accept_languages'] = 'en'
|
29
|
+
profile["javascript.enabled"] = false
|
30
|
+
@driver = Selenium::WebDriver.for :firefox, profile: profile
|
31
|
+
end
|
32
|
+
|
33
|
+
# Scrape each page
|
34
|
+
def scrape(profile_url)
|
35
|
+
# Get profile page
|
36
|
+
profile_html = getPage(profile_url, @driver, nil, 5, false).page_source
|
37
|
+
|
38
|
+
# Parse profile and add to output
|
39
|
+
l = LinkedinParser.new(profile_html, profile_url, {timestamp: Time.now})
|
40
|
+
@output += JSON.parse(l.results_by_job)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Print output in JSON
|
44
|
+
def gen_json
|
45
|
+
JSON.pretty_generate(@output)
|
46
|
+
end
|
47
|
+
end
|
metadata
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: linkedincrawler
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- M. C. McGrath
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-11-01 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: linkedinparser
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: generalscraper
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: selenium-webdriver
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: Crawls public LinkedIn profiles via Google
|
56
|
+
email: shidash@shidash.com
|
57
|
+
executables: []
|
58
|
+
extensions: []
|
59
|
+
extra_rdoc_files: []
|
60
|
+
files:
|
61
|
+
- lib/linkedin_crawler.rb
|
62
|
+
homepage: https://github.com/TransparencyToolkit/linkedincrawler
|
63
|
+
licenses:
|
64
|
+
- GPL
|
65
|
+
metadata: {}
|
66
|
+
post_install_message:
|
67
|
+
rdoc_options: []
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
requirements: []
|
81
|
+
rubyforge_project:
|
82
|
+
rubygems_version: 2.4.8
|
83
|
+
signing_key:
|
84
|
+
specification_version: 4
|
85
|
+
summary: Crawls public LinkedIn profiles
|
86
|
+
test_files: []
|
87
|
+
has_rdoc:
|