linsc 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6592e6700dca1d71ba381695184a01b2893dd35a
4
- data.tar.gz: 229bb6f3900be5049e472400c75c6d447e62b0ed
3
+ metadata.gz: 2a7c54e13a9f1ade26f5330d410af6e23d6fcb72
4
+ data.tar.gz: b5f5b89e4169506b415f8a3fceeaff678a0615ca
5
5
  SHA512:
6
- metadata.gz: 50f58ae940d139005e6ba95aad136a9bb263c0df45a07db492500c3c6153a2942d8c735ade047648df7ed46bf45f7fadbdc1a125d7fcd279270346f98791d784
7
- data.tar.gz: 279f1cfe337987b1e348a8a2beb7240a82dc2253123284f9244aa1b4081c2d4b69f12e7ec857f8134155250654439ddc2c1ec276be8d011912d061b59f24b568
6
+ metadata.gz: e79102d02acfcec2064b49611aa352b193d89bb90b93ff126995ea0e5a5e5584da6d3624208ce7bafcd223f82ea6c5baed1964d2cd1fa204dfdee0e326e19015
7
+ data.tar.gz: 48b1af2a42a580a5ad495abe53dd6f550057c7280c6942bde0126fe6226d792dbdcba62016054f9bfdbf2639e0bf57086a478f67a04fa2dec2d3a16a5f0aa7a7
@@ -42,20 +42,20 @@ class CrossRef
42
42
  b = y[@master_lookup_field]
43
43
  a && b ? a <=> b : a ? -1 : 1
44
44
  end
45
- master_lookup_values = master_data.collect {|row| row[@master_lookup_field]&.downcase}
45
+ master_lookup_values = master_data.collect {|row| row[@master_lookup_field] && row[@master_lookup_field].downcase}
46
46
  i = 0
47
47
  CSV.foreach(@child_path, headers: true, encoding: 'utf-8') do |child_row|
48
48
  i += 1
49
49
  puts "email lookup - row: #{i}/#{@child_length}"
50
- child_lookup_value = child_row[@child_lookup_field]&.downcase
51
- if child_lookup_value&.include?('@') || !@email_key ## generalize this
50
+ child_lookup_value = child_row[@child_lookup_field].downcase if child_row[@child_lookup_field]
51
+ if (child_lookup_value && child_lookup_value.include?('@')) || !@email_key ## generalize this
52
52
  match_index = master_lookup_values.bsearch_index do |master_lookup_value|
53
53
  child_lookup_value && master_lookup_value ?
54
54
  child_lookup_value <=> master_lookup_value : child_lookup_value ? -1 : 1
55
55
  end
56
56
  if !match_index
57
57
  match_index = master_data.find_index do |master_row|
58
- master_secondary_lookups = @master_secondary_lookups.collect{|x| x&.downcase}
58
+ master_secondary_lookups = @master_secondary_lookups.collect{|x| x && x.downcase}
59
59
  master_secondary_lookups.include?(child_lookup_value)
60
60
  end
61
61
  end
@@ -94,7 +94,7 @@ class CrossRef
94
94
  end
95
95
  master_row_new = CSV::Row.new(@headers, [])
96
96
  master_row.each do |key, value|
97
- master_row_new[key] = value&.encode('utf-8', invalid: :replace, undef: :replace, replace: '#')
97
+ master_row_new[key] = value.encode('utf-8', invalid: :replace, undef: :replace, replace: '#') if value
98
98
  end
99
99
  master_row_new
100
100
  end
@@ -3,7 +3,7 @@ module CSVHandlers
3
3
  values = []
4
4
  headers.each do |header|
5
5
  if encoding
6
- values << row[header]&.encode(encoding)
6
+ values << row[header].encode(encoding) if row[header]
7
7
  else
8
8
  values << row[header]
9
9
  end
@@ -33,7 +33,7 @@ module CSVHandlers
33
33
  unless File.exist?(f)
34
34
  FileUtils.touch(f)
35
35
  csv = CSV.open(f, "w+")
36
- csv << @headers.collect {|x| x&.encode('utf-8')}
36
+ csv << @headers.collect {|x| x && x.encode('utf-8')}
37
37
  csv.close
38
38
  end
39
39
  end
@@ -42,7 +42,7 @@ module CSVHandlers
42
42
  unless File.exist?(f)
43
43
  FileUtils.touch(f)
44
44
  csv = CSV.open(f, "w+")
45
- csv << headers.collect {|x| x&.encode('utf-8')}
45
+ csv << headers.collect {|x| x && x.encode('utf-8')}
46
46
  csv.close
47
47
  end
48
48
  end
data/lib/linsc/duck.rb CHANGED
@@ -42,7 +42,11 @@ class DuckScraper
42
42
  CSV.foreach(@input_file, headers: true) do |input_row|
43
43
  count += 1
44
44
  next if @start && @start >= count
45
- tries = @proxies&.length || 3
45
+ if @proxies
46
+ tries = @proxies.length
47
+ else
48
+ tries = 3
49
+ end
46
50
  puts "ddg #{count}/#{@input_length}"
47
51
  begin
48
52
  unless sufficient_data?(input_row)
data/lib/linsc/merger.rb CHANGED
@@ -29,7 +29,7 @@ class Merger
29
29
  clean_file = File.read(lin_file, encoding: 'windows-1252').strip
30
30
  CSV.parse(clean_file, headers: true, encoding: 'windows-1252') do |row|
31
31
  row["Recruiter"] = recruiter_name
32
- email = row['E-mail Address']&.downcase
32
+ email = row['E-mail Address'].downcase if row['E-mail Address']
33
33
  if emails.has_key?(email)
34
34
  emails[email] << row
35
35
  else
@@ -56,10 +56,10 @@ class Merger
56
56
  output_row = CSV::Row.new(@headers, [])
57
57
  correct_row.each do |key, value|
58
58
  if @mapping[key]
59
- output_row[@mapping[key]] = value&.encode('utf-8')
59
+ output_row[@mapping[key]] = value.encode('utf-8') if value
60
60
  end
61
61
  end
62
- output_row['Email'] = output_row['Email']&.downcase
62
+ output_row['Email'] = output_row['Email'].downcase if output_row['Email']
63
63
  else
64
64
  output_row = create_row(correct_row, @headers, 'utf-8')
65
65
  end
data/lib/linsc/parsers.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  module Parsers
2
2
  def scrape_contact(input_row, page, mode)
3
3
  row = CSV::Row.new(@headers, [])
4
- name = page.at_css("#name")&.text&.split
4
+ name = page.at_css("#name").text.split
5
5
  contact_id = input_row["Contact ID"]
6
6
  lin_id = input_row["LIN ID"]
7
7
  cv_tr = input_row["CV TR"]
@@ -11,53 +11,53 @@ module Parsers
11
11
  lin_profile = input_row["Linkedin Profile"]
12
12
  cand_id = input_row["Candidate ID"]
13
13
  cand_source = input_row["LIN 1st Degree"]
14
- title = page.at_css(".headline.title")&.text
15
- country = page.at_css("#demographics .locality")&.text
16
- sector = page.at_css("#demographics .descriptor:not(.adr)")&.text
14
+ title = page.at_css(".headline.title").text
15
+ country = page.at_css("#demographics .locality").text
16
+ sector = page.at_css("#demographics .descriptor:not(.adr)").text
17
17
 
18
18
  positions = page.css("#experience .positions .position")
19
19
  if positions
20
- e1_title = positions[0]&.at_css(".item-title")&.text
21
- e1_org = positions[0]&.at_css(".item-subtitle")&.text
22
- e1_start = positions[0]&.css(".date-range time")[0]&.text
23
- e1_end = positions[0]&.css(".date-range time")[1]&.text
24
- e1_loc = positions[0]&.at_css(".location")&.text
25
- e1_desc = positions[0]&.at_css(".description")&.text
26
- e2_title = positions[1]&.at_css(".item-title")&.text
27
- e2_org = positions[1]&.at_css(".item-subtitle")&.text
28
- e2_start = positions[1]&.css(".date-range time")[0]&.text
29
- e2_end = positions[1]&.css(".date-range time")[1]&.text
30
- e2_loc = positions[1]&.at_css(".location")&.text
31
- e2_desc = positions[1]&.at_css(".description")&.text
32
- e3_title = positions[2]&.at_css(".item-title")&.text
33
- e3_org = positions[2]&.at_css(".item-subtitle")&.text
34
- e3_start = positions[2]&.css(".date-range time")[0]&.text
35
- e3_end = positions[2]&.css(".date-range time")[1]&.text
36
- e3_loc = positions[2]&.at_css(".location")&.text
37
- e3_desc = positions[2]&.at_css(".description")&.text
20
+ e1_title = positions[0].at_css(".item-title").text
21
+ e1_org = positions[0].at_css(".item-subtitle").text
22
+ e1_start = positions[0].css(".date-range time")[0].text
23
+ e1_end = positions[0].css(".date-range time")[1].text
24
+ e1_loc = positions[0].at_css(".location").text
25
+ e1_desc = positions[0].at_css(".description").text
26
+ e2_title = positions[1].at_css(".item-title").text
27
+ e2_org = positions[1].at_css(".item-subtitle").text
28
+ e2_start = positions[1].css(".date-range time")[0].text
29
+ e2_end = positions[1].css(".date-range time")[1].text
30
+ e2_loc = positions[1].at_css(".location").text
31
+ e2_desc = positions[1].at_css(".description").text
32
+ e3_title = positions[2].at_css(".item-title").text
33
+ e3_org = positions[2].at_css(".item-subtitle").text
34
+ e3_start = positions[2].css(".date-range time")[0].text
35
+ e3_end = positions[2].css(".date-range time")[1].text
36
+ e3_loc = positions[2].at_css(".location").text
37
+ e3_desc = positions[2].at_css(".description").text
38
38
  end
39
39
 
40
40
  certs = page.css(".certifications .certification")
41
41
  if certs
42
- c1_name = certs[0]&.at_css(".item-title")&.text
43
- c2_name = certs[1]&.at_css(".item-title")&.text
44
- c_type = certs[0]&.at_css(".item-subtitle")&.text
42
+ c1_name = certs[0].at_css(".item-title").text
43
+ c2_name = certs[1].at_css(".item-title").text
44
+ c_type = certs[0].at_css(".item-subtitle").text
45
45
  end
46
46
 
47
47
  schools = page.css("#education .schools .school")
48
48
  if schools
49
- s1_name = schools[0]&.at_css(".item-title")&.text
50
- s2_name = schools[1]&.at_css(".item-title")&.text
51
- s1_start = schools[0]&.css(".date-range time")[0]&.text
52
- s2_start = schools[1]&.css(".date-range time")[0]&.text
53
- s1_end = schools[0]&.css(".date-range time")[1]&.text
54
- s2_end = schools[1]&.css(".date-range time")[1]&.text
55
- s1_degree = schools[0]&.at_css(".item-subtitle")&.text
56
- s2_degree = schools[1]&.at_css(".item-subtitle")&.text
49
+ s1_name = schools[0].at_css(".item-title").text
50
+ s2_name = schools[1].at_css(".item-title").text
51
+ s1_start = schools[0].css(".date-range time")[0].text
52
+ s2_start = schools[1].css(".date-range time")[0].text
53
+ s1_end = schools[0].css(".date-range time")[1].text
54
+ s2_end = schools[1].css(".date-range time")[1].text
55
+ s1_degree = schools[0].at_css(".item-subtitle").text
56
+ s2_degree = schools[1].at_css(".item-subtitle").text
57
57
  end
58
58
 
59
59
  summary = page.at_css("#summary .description")
60
- summary&.css('br').each{|br| br.replace "\n"} if summary
60
+ summary.css('br').each{|br| br.replace "\n"} if summary
61
61
 
62
62
  text_resume = "\n\n***IMPORTED FROM LINKEDIN***\n#{lin_profile}\n\n"
63
63
  text_resume += name.join(" ")
@@ -105,7 +105,7 @@ module Parsers
105
105
  text_resume += "\nINTERESTS\n" if interests && interests.length > 0
106
106
  ints = []
107
107
  interests.each do |interest|
108
- int = interest.at_css(".wrap")&.text
108
+ int = interest.at_css(".wrap").text
109
109
  if int
110
110
  ints << int unless (int == "See less") || (int.match(/See \d+\+/))
111
111
  end
@@ -115,7 +115,7 @@ module Parsers
115
115
  text_resume += "\n\nSKILLS\n" if skills && skills.length > 0
116
116
  sks = []
117
117
  skills.each do |skill|
118
- sk = skill.at_css(".wrap")&.text
118
+ sk = skill.at_css(".wrap").text
119
119
  if sk
120
120
  sks << sk unless (sk == "See less") || (sk.match(/See \d+\+/))
121
121
  end
@@ -125,7 +125,7 @@ module Parsers
125
125
  text_resume += "\n\nLANGUAGES\n" if languages.length > 0
126
126
  langs = []
127
127
  languages.each do |language|
128
- lang = language.at_css(".name")&.text
128
+ lang = language.at_css(".name").text
129
129
  prof = language.at_css(".proficiency")
130
130
  lang += " (#{prof.text})" if prof && prof.text.length > 0
131
131
  langs << lang if lang
@@ -230,43 +230,43 @@ module Parsers
230
230
  row["CV TR"] = "1"
231
231
  row["Account Name"] = acc_name
232
232
  row["Linkedin Import Status"] = import_status
233
- row["First Name"] = name[0]&.slice(0, 39)
234
- row["Last Name"] = name[1..-1]&.join(" ")&.slice(0, 79)
233
+ row["First Name"] = name[0].slice(0, 39)
234
+ row["Last Name"] = name[1..-1].join(" ").slice(0, 79)
235
235
  row["Email"] = email
236
236
  row["Candidate ID"] = cand_id
237
237
  row["LIN 1st Degree"] = cand_source
238
- row["Title"] = title&.slice(0, 127)
238
+ row["Title"] = title.slice(0, 127)
239
239
  row["Contact Country"] = country
240
- row["Contact LIN Sector"] = sector&.slice(0, 99)
241
- row["Employer 1 Title"] = e1_title&.slice(0, 31999)
242
- row["Employer Organization Name 1"] = e1_org&.slice(0, 254)
240
+ row["Contact LIN Sector"] = sector.slice(0, 99)
241
+ row["Employer 1 Title"] = e1_title.slice(0, 31999)
242
+ row["Employer Organization Name 1"] = e1_org.slice(0, 254)
243
243
  row["Employer 1 Start Date"] = format_date(e1_start) #format
244
244
  row["Employer 1 End Date"] = format_date(e1_end) #format
245
- row["Employer 1 Location"] = e1_loc&.slice(0, 254)
246
- row["Employer 1 Description"] = e1_desc&.slice(0, 31999)
247
- row["Employer 2 Title"] = e2_title&.slice(0, 31999)
248
- row["Employer Organization Name 2"] = e2_org&.slice(0, 254)
245
+ row["Employer 1 Location"] = e1_loc.slice(0, 254)
246
+ row["Employer 1 Description"] = e1_desc.slice(0, 31999)
247
+ row["Employer 2 Title"] = e2_title.slice(0, 31999)
248
+ row["Employer Organization Name 2"] = e2_org.slice(0, 254)
249
249
  row["Employer 2 Start Date"] = format_date(e2_start) #format
250
250
  row["Employer 2 End Date"] = format_date(e2_end) #format
251
- row["Employer 2 Location"] = e2_loc&.slice(0, 254)
252
- row["Employer 2 Description"] = e2_desc&.slice(0, 31999)
253
- row["Employer 3 Title"] = e3_title&.slice(0, 31999)
254
- row["Employer Organization Name 3"] = e3_org&.slice(0, 254)
251
+ row["Employer 2 Location"] = e2_loc.slice(0, 254)
252
+ row["Employer 2 Description"] = e2_desc.slice(0, 31999)
253
+ row["Employer 3 Title"] = e3_title.slice(0, 31999)
254
+ row["Employer Organization Name 3"] = e3_org.slice(0, 254)
255
255
  row["Employer 3 Start Date"] = format_date(e3_start) #format
256
256
  row["Employer 3 End Date"] = format_date(e3_end) #format
257
- row["Employer 3 Location"] = e3_loc&.slice(0, 254)
258
- row["Employer 3 Description"] = e3_desc&.slice(0, 31999)
259
- row["License or Certification Name 1"] = c1_name&.slice(0, 254)
260
- row["License or Certification Name 2"] = c2_name&.slice(0, 254)
261
- row["License or Certification Credential Type"] = c_type&.slice(0, 254)
262
- row["Education School 1"] = s1_name&.slice(0, 124)
263
- row["Education Degree Name 1"] = s1_degree&.slice(0, 254)
257
+ row["Employer 3 Location"] = e3_loc.slice(0, 254)
258
+ row["Employer 3 Description"] = e3_desc.slice(0, 31999)
259
+ row["License or Certification Name 1"] = c1_name.slice(0, 254)
260
+ row["License or Certification Name 2"] = c2_name.slice(0, 254)
261
+ row["License or Certification Credential Type"] = c_type.slice(0, 254)
262
+ row["Education School 1"] = s1_name.slice(0, 124)
263
+ row["Education Degree Name 1"] = s1_degree.slice(0, 254)
264
264
  row["Education Degree Date 1"] = format_date(s1_end)
265
- row["Education School 2"] = s2_name&.slice(0, 124)
266
- row["Education Degree Name 2"] = s2_degree&.slice(0, 254)
265
+ row["Education School 2"] = s2_name.slice(0, 124)
266
+ row["Education Degree Name 2"] = s2_degree.slice(0, 254)
267
267
  row["Education Degree Date 2"] = format_date(s2_end)
268
- row["Text Resume"] = text_resume&.slice(0, 31999)
269
- row["LinkedIn Profile"] = lin_profile&.slice(0, 254)
268
+ row["Text Resume"] = text_resume.slice(0, 31999)
269
+ row["LinkedIn Profile"] = lin_profile.slice(0, 254)
270
270
  row["Resume Last Updated"] = Time.now.strftime('%Y-%m-%d %H:%M:%S')
271
271
  row["LIN Import Date"] = Time.now.strftime('%Y-%m-%d')
272
272
  row["CV Uploaded"] = "1"
@@ -281,7 +281,7 @@ module Parsers
281
281
 
282
282
  schools.each do |school|
283
283
  row = CSV::Row.new(@education_headers, [])
284
- row["Contact"] = input_row["Contact ID"]
284
+ row["Contact ID"] = input_row["Contact ID"]
285
285
  row["LIN ID"] = input_row["LIN ID"]
286
286
  row["School Name"] = school.at_css(".item-title").text.slice(0, 149)
287
287
  row["Major"] = school.at_css(".item-subtitle").text.slice(0, 254)
@@ -303,7 +303,7 @@ module Parsers
303
303
 
304
304
  positions.each do |position|
305
305
  row = CSV::Row.new(@employment_headers, [])
306
- row["Contact"] = input_row["Contact ID"]
306
+ row["Contact ID"] = input_row["Contact ID"]
307
307
  row["LIN ID"] = input_row["LIN ID"]
308
308
  row["Job Title"] = position.at_css(".item-title").text.slice(0, 74)
309
309
  row["Employer Name"] = position.at_css(".item-subtitle").text.slice(0, 149)
data/lib/linsc.rb CHANGED
@@ -135,7 +135,7 @@ class Linsc
135
135
  else
136
136
  @options[:insert] = false
137
137
  end
138
- if ids.any?{|id| id && id&.length > 0}
138
+ if ids.any?{|id| id && id.length > 0}
139
139
  @options[:update] = true
140
140
  else
141
141
  @options[:update] = false
data/linsc.gemspec CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "linsc"
7
- spec.version = "0.0.5"
7
+ spec.version = "0.0.6"
8
8
  spec.authors = ["Dan Molloy"]
9
9
  spec.email = ["danieljmolloy1@gmail.com"]
10
10
  spec.date = '2016-03-31'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linsc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dan Molloy