linsc 0.0.11 → 0.0.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +46 -13
- data/lib/linsc/lin.rb +1 -1
- data/lib/linsc/merger.rb +1 -1
- data/lib/linsc/proxy.rb +6 -4
- data/lib/linsc/proxy_handler.rb +3 -0
- data/linsc.gemspec +1 -1
- metadata +1 -3
- data/linsc-0.0.1.gem +0 -0
- data/linsc-0.0.2.gem +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a921c0d961c96ce6b775700e1937513f1e1b7c92
|
4
|
+
data.tar.gz: e849dd28fdaf09b7e5592cde52a27ee04045205d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 20d14e5800c05b0417b1917e6983c44b05bf2b898d8e3cb601cd952ec846ff13489a2a6eb6368452bc0cb6681361688ea0504a979dc33bc0f253d7d23c6da0a5
|
7
|
+
data.tar.gz: f763c237238b6f116d69949f04dc9fdae4be1d716dcb2e936ba1f71b6519ea2f4d52f5e36b20bca0ff93f77f8e376c1e8d54f691e43fb01f4b0a5acc25d4ec80
|
data/README.md
CHANGED
@@ -1,34 +1,67 @@
|
|
1
1
|
# Linsc
|
2
2
|
|
3
|
-
|
3
|
+
## Installation
|
4
4
|
|
5
|
-
|
5
|
+
### Windows
|
6
6
|
|
7
|
-
|
7
|
+
- Make sure Powershell is installed
|
8
|
+
|
9
|
+
- Download the Windows Ruby installer from here: http://rubyinstaller.org/downloads/
|
10
|
+
|
11
|
+
- Install ruby
|
8
12
|
|
9
|
-
|
13
|
+
- Open Powershell and run the command:
|
14
|
+
```ruby
|
15
|
+
gem install linsc
|
16
|
+
```
|
10
17
|
|
18
|
+
- When installation is completed find your gem directory with:
|
11
19
|
```ruby
|
12
|
-
gem
|
20
|
+
gem environment
|
13
21
|
```
|
14
22
|
|
15
|
-
|
23
|
+
- Navigate to the path listed for INSTALLATION DIRECTORY, then gems -> linsc-(version) -> data
|
16
24
|
|
17
|
-
|
25
|
+
- Here you will find an empty proxies.txt and recruiters.txt
|
18
26
|
|
19
|
-
|
27
|
+
- Fill the recruiters file with your recruiters, ordered by priority. One recruiter per line, comma separated like this:
|
28
|
+
```
|
29
|
+
LIN1,
|
30
|
+
LIN2,
|
31
|
+
LIN3,
|
32
|
+
LIN4
|
33
|
+
```
|
20
34
|
|
21
|
-
|
35
|
+
- Fill proxies.txt, one proxy per line in the form ip:port or ip:port:username:password if using a username/password with the proxies.
|
36
|
+
```
|
37
|
+
108.123.523.23:8080:admin:pass
|
38
|
+
108.123.523.123:8080:admin:pass
|
39
|
+
108.123.523.535:8080:admin:pass
|
40
|
+
```
|
41
|
+
(username and password optional)
|
22
42
|
|
23
43
|
## Usage
|
24
44
|
|
25
|
-
|
45
|
+
- Download your linkedin connections export csv files and put them in an empty directory. Each of these files should have exactly the same filename as one of the lines from your recruiters.txt file.
|
46
|
+
It is expected that these csvs will have the following fields: First Name, Last Name, E-mail Address, Company, Job Title. All other fields are ignored.
|
47
|
+
|
48
|
+
- Run a Salesforce report and put the csv in the same directory as your LIN files. The required fields for the Salesforce file are: Contact ID, LIN ID, Email.
|
49
|
+
This file should be named sf_ref.csv
|
50
|
+
|
51
|
+
- In Powershell (or any terminal), navigate to the directory and run this command:
|
52
|
+
```
|
53
|
+
linsc
|
54
|
+
```
|
26
55
|
|
27
|
-
|
56
|
+
- There are several optional flags you can provide to modify behaviour. To see the full list of available flags, run:
|
57
|
+
```
|
58
|
+
linsc -h
|
59
|
+
```
|
28
60
|
|
29
|
-
|
61
|
+
- If all proxies get blocked the program will exit. Wait a few hours or a day for the proxies to cool off, then run linsc again. It will pick up where it left off.
|
30
62
|
|
31
|
-
|
63
|
+
- When scraping is complete, import your data. If you had the -i flag set, you must import the contact_insert csv, then export a new report with the fields: LIN ID, Contact ID.
|
64
|
+
Name this csv history_ref.csv and put it in the same folder as your data. Run linsc again with the -e flag set and the new Contact IDs will be mapped to the education and employment histories for the new contacts. You can now import these new history objects.
|
32
65
|
|
33
66
|
## Contributing
|
34
67
|
|
data/lib/linsc/lin.rb
CHANGED
@@ -167,6 +167,7 @@ class LinScraper
|
|
167
167
|
sleep(@cooldown) if @noproxy
|
168
168
|
page = agent.get(url)
|
169
169
|
puts 'ACCESS GRANTED'
|
170
|
+
proxy.good if proxy
|
170
171
|
|
171
172
|
return false unless page.at_css("#name") && page.css("#experience .positions .position")
|
172
173
|
return false unless name_check(page.at_css("#name").text, "#{row['First Name']} #{row['Last Name']}")
|
@@ -184,7 +185,6 @@ class LinScraper
|
|
184
185
|
end
|
185
186
|
end
|
186
187
|
end
|
187
|
-
proxy.good if proxy
|
188
188
|
if match
|
189
189
|
return [url, page]
|
190
190
|
else
|
data/lib/linsc/merger.rb
CHANGED
@@ -28,7 +28,7 @@ class Merger
|
|
28
28
|
emails = {}
|
29
29
|
@lin_files.each do |pn|
|
30
30
|
lin_file = pn.to_s
|
31
|
-
recruiter_name =
|
31
|
+
recruiter_name = pn.basename.to_s.match(/LIN[^.]+/)[0]
|
32
32
|
puts "merging #{recruiter_name}"
|
33
33
|
clean_file = File.read(lin_file, encoding: 'windows-1252').strip
|
34
34
|
CSV.parse(clean_file, headers: true, encoding: 'windows-1252') do |row|
|
data/lib/linsc/proxy.rb
CHANGED
@@ -1,19 +1,21 @@
|
|
1
1
|
class Proxy
|
2
|
-
attr_accessor :ip, :port, :username, :password, :status, :last_used, :user_agent
|
2
|
+
attr_accessor :ip, :port, :username, :password, :status, :last_used, :user_agent, :burnout_time, :pages_before_burnout
|
3
3
|
|
4
|
-
def initialize(ip:, port: 80, username: nil, password: nil, status: nil, last_used: nil, user_agent: nil)
|
5
|
-
@ip, @port, @username, @password, @status, @last_used =
|
6
|
-
ip, port, username, password, status, last_used
|
4
|
+
def initialize(ip:, port: 80, username: nil, password: nil, status: nil, last_used: nil, user_agent: nil, burnout_time: nil, pages_before_burnout: 0)
|
5
|
+
@ip, @port, @username, @password, @status, @last_used, @user_agent, @burnout_time, @pages_before_burnout =
|
6
|
+
ip, port, username, password, status, last_used, user_agent, burnout_time, pages_before_burnout
|
7
7
|
end
|
8
8
|
|
9
9
|
def dead
|
10
10
|
@status = 'dead'
|
11
11
|
@last_used = Time.now
|
12
|
+
@burnout_time = Time.now
|
12
13
|
end
|
13
14
|
|
14
15
|
def good
|
15
16
|
@status = 'good'
|
16
17
|
@last_used = Time.now
|
18
|
+
@pages_before_burnout += 1
|
17
19
|
end
|
18
20
|
|
19
21
|
def good?
|
data/lib/linsc/proxy_handler.rb
CHANGED
@@ -35,6 +35,9 @@ class ProxyHandler
|
|
35
35
|
best_proxy
|
36
36
|
else
|
37
37
|
puts "All proxies are dead. Wait a few hours before resuming."
|
38
|
+
@proxies.each do |proxy|
|
39
|
+
puts "ip: #{proxy.ip} .... pages before burnout: #{proxy.pages_before_burnout} .... time of burnout: #{proxy.burnout_time}"
|
40
|
+
end
|
38
41
|
exit
|
39
42
|
end
|
40
43
|
end
|
data/linsc.gemspec
CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "linsc"
|
7
|
-
spec.version = "0.0.
|
7
|
+
spec.version = "0.0.12"
|
8
8
|
spec.authors = ["Dan Molloy"]
|
9
9
|
spec.email = ["danieljmolloy1@gmail.com"]
|
10
10
|
spec.date = '2016-03-31'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linsc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dan Molloy
|
@@ -112,8 +112,6 @@ files:
|
|
112
112
|
- lib/linsc/parsers.rb
|
113
113
|
- lib/linsc/proxy.rb
|
114
114
|
- lib/linsc/proxy_handler.rb
|
115
|
-
- linsc-0.0.1.gem
|
116
|
-
- linsc-0.0.2.gem
|
117
115
|
- linsc.gemspec
|
118
116
|
homepage: https://github.com/danmolloy/linsc
|
119
117
|
licenses:
|
data/linsc-0.0.1.gem
DELETED
Binary file
|
data/linsc-0.0.2.gem
DELETED
Binary file
|