linsc 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6592e6700dca1d71ba381695184a01b2893dd35a
4
- data.tar.gz: 229bb6f3900be5049e472400c75c6d447e62b0ed
3
+ metadata.gz: 2a7c54e13a9f1ade26f5330d410af6e23d6fcb72
4
+ data.tar.gz: b5f5b89e4169506b415f8a3fceeaff678a0615ca
5
5
  SHA512:
6
- metadata.gz: 50f58ae940d139005e6ba95aad136a9bb263c0df45a07db492500c3c6153a2942d8c735ade047648df7ed46bf45f7fadbdc1a125d7fcd279270346f98791d784
7
- data.tar.gz: 279f1cfe337987b1e348a8a2beb7240a82dc2253123284f9244aa1b4081c2d4b69f12e7ec857f8134155250654439ddc2c1ec276be8d011912d061b59f24b568
6
+ metadata.gz: e79102d02acfcec2064b49611aa352b193d89bb90b93ff126995ea0e5a5e5584da6d3624208ce7bafcd223f82ea6c5baed1964d2cd1fa204dfdee0e326e19015
7
+ data.tar.gz: 48b1af2a42a580a5ad495abe53dd6f550057c7280c6942bde0126fe6226d792dbdcba62016054f9bfdbf2639e0bf57086a478f67a04fa2dec2d3a16a5f0aa7a7
@@ -42,20 +42,20 @@ class CrossRef
42
42
  b = y[@master_lookup_field]
43
43
  a && b ? a <=> b : a ? -1 : 1
44
44
  end
45
- master_lookup_values = master_data.collect {|row| row[@master_lookup_field]&.downcase}
45
+ master_lookup_values = master_data.collect {|row| row[@master_lookup_field] && row[@master_lookup_field].downcase}
46
46
  i = 0
47
47
  CSV.foreach(@child_path, headers: true, encoding: 'utf-8') do |child_row|
48
48
  i += 1
49
49
  puts "email lookup - row: #{i}/#{@child_length}"
50
- child_lookup_value = child_row[@child_lookup_field]&.downcase
51
- if child_lookup_value&.include?('@') || !@email_key ## generalize this
50
+ child_lookup_value = child_row[@child_lookup_field].downcase if child_row[@child_lookup_field]
51
+ if (child_lookup_value && child_lookup_value.include?('@')) || !@email_key ## generalize this
52
52
  match_index = master_lookup_values.bsearch_index do |master_lookup_value|
53
53
  child_lookup_value && master_lookup_value ?
54
54
  child_lookup_value <=> master_lookup_value : child_lookup_value ? -1 : 1
55
55
  end
56
56
  if !match_index
57
57
  match_index = master_data.find_index do |master_row|
58
- master_secondary_lookups = @master_secondary_lookups.collect{|x| x&.downcase}
58
+ master_secondary_lookups = @master_secondary_lookups.collect{|x| x && x.downcase}
59
59
  master_secondary_lookups.include?(child_lookup_value)
60
60
  end
61
61
  end
@@ -94,7 +94,7 @@ class CrossRef
94
94
  end
95
95
  master_row_new = CSV::Row.new(@headers, [])
96
96
  master_row.each do |key, value|
97
- master_row_new[key] = value&.encode('utf-8', invalid: :replace, undef: :replace, replace: '#')
97
+ master_row_new[key] = value.encode('utf-8', invalid: :replace, undef: :replace, replace: '#') if value
98
98
  end
99
99
  master_row_new
100
100
  end
@@ -3,7 +3,7 @@ module CSVHandlers
3
3
  values = []
4
4
  headers.each do |header|
5
5
  if encoding
6
- values << row[header]&.encode(encoding)
6
+ values << row[header].encode(encoding) if row[header]
7
7
  else
8
8
  values << row[header]
9
9
  end
@@ -33,7 +33,7 @@ module CSVHandlers
33
33
  unless File.exist?(f)
34
34
  FileUtils.touch(f)
35
35
  csv = CSV.open(f, "w+")
36
- csv << @headers.collect {|x| x&.encode('utf-8')}
36
+ csv << @headers.collect {|x| x && x.encode('utf-8')}
37
37
  csv.close
38
38
  end
39
39
  end
@@ -42,7 +42,7 @@ module CSVHandlers
42
42
  unless File.exist?(f)
43
43
  FileUtils.touch(f)
44
44
  csv = CSV.open(f, "w+")
45
- csv << headers.collect {|x| x&.encode('utf-8')}
45
+ csv << headers.collect {|x| x && x.encode('utf-8')}
46
46
  csv.close
47
47
  end
48
48
  end
data/lib/linsc/duck.rb CHANGED
@@ -42,7 +42,11 @@ class DuckScraper
42
42
  CSV.foreach(@input_file, headers: true) do |input_row|
43
43
  count += 1
44
44
  next if @start && @start >= count
45
- tries = @proxies&.length || 3
45
+ if @proxies
46
+ tries = @proxies.length
47
+ else
48
+ tries = 3
49
+ end
46
50
  puts "ddg #{count}/#{@input_length}"
47
51
  begin
48
52
  unless sufficient_data?(input_row)
data/lib/linsc/merger.rb CHANGED
@@ -29,7 +29,7 @@ class Merger
29
29
  clean_file = File.read(lin_file, encoding: 'windows-1252').strip
30
30
  CSV.parse(clean_file, headers: true, encoding: 'windows-1252') do |row|
31
31
  row["Recruiter"] = recruiter_name
32
- email = row['E-mail Address']&.downcase
32
+ email = row['E-mail Address'].downcase if row['E-mail Address']
33
33
  if emails.has_key?(email)
34
34
  emails[email] << row
35
35
  else
@@ -56,10 +56,10 @@ class Merger
56
56
  output_row = CSV::Row.new(@headers, [])
57
57
  correct_row.each do |key, value|
58
58
  if @mapping[key]
59
- output_row[@mapping[key]] = value&.encode('utf-8')
59
+ output_row[@mapping[key]] = value.encode('utf-8') if value
60
60
  end
61
61
  end
62
- output_row['Email'] = output_row['Email']&.downcase
62
+ output_row['Email'] = output_row['Email'].downcase if output_row['Email']
63
63
  else
64
64
  output_row = create_row(correct_row, @headers, 'utf-8')
65
65
  end
data/lib/linsc/parsers.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  module Parsers
2
2
  def scrape_contact(input_row, page, mode)
3
3
  row = CSV::Row.new(@headers, [])
4
- name = page.at_css("#name")&.text&.split
4
+ name = page.at_css("#name").text.split
5
5
  contact_id = input_row["Contact ID"]
6
6
  lin_id = input_row["LIN ID"]
7
7
  cv_tr = input_row["CV TR"]
@@ -11,53 +11,53 @@ module Parsers
11
11
  lin_profile = input_row["Linkedin Profile"]
12
12
  cand_id = input_row["Candidate ID"]
13
13
  cand_source = input_row["LIN 1st Degree"]
14
- title = page.at_css(".headline.title")&.text
15
- country = page.at_css("#demographics .locality")&.text
16
- sector = page.at_css("#demographics .descriptor:not(.adr)")&.text
14
+ title = page.at_css(".headline.title").text
15
+ country = page.at_css("#demographics .locality").text
16
+ sector = page.at_css("#demographics .descriptor:not(.adr)").text
17
17
 
18
18
  positions = page.css("#experience .positions .position")
19
19
  if positions
20
- e1_title = positions[0]&.at_css(".item-title")&.text
21
- e1_org = positions[0]&.at_css(".item-subtitle")&.text
22
- e1_start = positions[0]&.css(".date-range time")[0]&.text
23
- e1_end = positions[0]&.css(".date-range time")[1]&.text
24
- e1_loc = positions[0]&.at_css(".location")&.text
25
- e1_desc = positions[0]&.at_css(".description")&.text
26
- e2_title = positions[1]&.at_css(".item-title")&.text
27
- e2_org = positions[1]&.at_css(".item-subtitle")&.text
28
- e2_start = positions[1]&.css(".date-range time")[0]&.text
29
- e2_end = positions[1]&.css(".date-range time")[1]&.text
30
- e2_loc = positions[1]&.at_css(".location")&.text
31
- e2_desc = positions[1]&.at_css(".description")&.text
32
- e3_title = positions[2]&.at_css(".item-title")&.text
33
- e3_org = positions[2]&.at_css(".item-subtitle")&.text
34
- e3_start = positions[2]&.css(".date-range time")[0]&.text
35
- e3_end = positions[2]&.css(".date-range time")[1]&.text
36
- e3_loc = positions[2]&.at_css(".location")&.text
37
- e3_desc = positions[2]&.at_css(".description")&.text
20
+ e1_title = positions[0].at_css(".item-title").text
21
+ e1_org = positions[0].at_css(".item-subtitle").text
22
+ e1_start = positions[0].css(".date-range time")[0].text
23
+ e1_end = positions[0].css(".date-range time")[1].text
24
+ e1_loc = positions[0].at_css(".location").text
25
+ e1_desc = positions[0].at_css(".description").text
26
+ e2_title = positions[1].at_css(".item-title").text
27
+ e2_org = positions[1].at_css(".item-subtitle").text
28
+ e2_start = positions[1].css(".date-range time")[0].text
29
+ e2_end = positions[1].css(".date-range time")[1].text
30
+ e2_loc = positions[1].at_css(".location").text
31
+ e2_desc = positions[1].at_css(".description").text
32
+ e3_title = positions[2].at_css(".item-title").text
33
+ e3_org = positions[2].at_css(".item-subtitle").text
34
+ e3_start = positions[2].css(".date-range time")[0].text
35
+ e3_end = positions[2].css(".date-range time")[1].text
36
+ e3_loc = positions[2].at_css(".location").text
37
+ e3_desc = positions[2].at_css(".description").text
38
38
  end
39
39
 
40
40
  certs = page.css(".certifications .certification")
41
41
  if certs
42
- c1_name = certs[0]&.at_css(".item-title")&.text
43
- c2_name = certs[1]&.at_css(".item-title")&.text
44
- c_type = certs[0]&.at_css(".item-subtitle")&.text
42
+ c1_name = certs[0].at_css(".item-title").text
43
+ c2_name = certs[1].at_css(".item-title").text
44
+ c_type = certs[0].at_css(".item-subtitle").text
45
45
  end
46
46
 
47
47
  schools = page.css("#education .schools .school")
48
48
  if schools
49
- s1_name = schools[0]&.at_css(".item-title")&.text
50
- s2_name = schools[1]&.at_css(".item-title")&.text
51
- s1_start = schools[0]&.css(".date-range time")[0]&.text
52
- s2_start = schools[1]&.css(".date-range time")[0]&.text
53
- s1_end = schools[0]&.css(".date-range time")[1]&.text
54
- s2_end = schools[1]&.css(".date-range time")[1]&.text
55
- s1_degree = schools[0]&.at_css(".item-subtitle")&.text
56
- s2_degree = schools[1]&.at_css(".item-subtitle")&.text
49
+ s1_name = schools[0].at_css(".item-title").text
50
+ s2_name = schools[1].at_css(".item-title").text
51
+ s1_start = schools[0].css(".date-range time")[0].text
52
+ s2_start = schools[1].css(".date-range time")[0].text
53
+ s1_end = schools[0].css(".date-range time")[1].text
54
+ s2_end = schools[1].css(".date-range time")[1].text
55
+ s1_degree = schools[0].at_css(".item-subtitle").text
56
+ s2_degree = schools[1].at_css(".item-subtitle").text
57
57
  end
58
58
 
59
59
  summary = page.at_css("#summary .description")
60
- summary&.css('br').each{|br| br.replace "\n"} if summary
60
+ summary.css('br').each{|br| br.replace "\n"} if summary
61
61
 
62
62
  text_resume = "\n\n***IMPORTED FROM LINKEDIN***\n#{lin_profile}\n\n"
63
63
  text_resume += name.join(" ")
@@ -105,7 +105,7 @@ module Parsers
105
105
  text_resume += "\nINTERESTS\n" if interests && interests.length > 0
106
106
  ints = []
107
107
  interests.each do |interest|
108
- int = interest.at_css(".wrap")&.text
108
+ int = interest.at_css(".wrap").text
109
109
  if int
110
110
  ints << int unless (int == "See less") || (int.match(/See \d+\+/))
111
111
  end
@@ -115,7 +115,7 @@ module Parsers
115
115
  text_resume += "\n\nSKILLS\n" if skills && skills.length > 0
116
116
  sks = []
117
117
  skills.each do |skill|
118
- sk = skill.at_css(".wrap")&.text
118
+ sk = skill.at_css(".wrap").text
119
119
  if sk
120
120
  sks << sk unless (sk == "See less") || (sk.match(/See \d+\+/))
121
121
  end
@@ -125,7 +125,7 @@ module Parsers
125
125
  text_resume += "\n\nLANGUAGES\n" if languages.length > 0
126
126
  langs = []
127
127
  languages.each do |language|
128
- lang = language.at_css(".name")&.text
128
+ lang = language.at_css(".name").text
129
129
  prof = language.at_css(".proficiency")
130
130
  lang += " (#{prof.text})" if prof && prof.text.length > 0
131
131
  langs << lang if lang
@@ -230,43 +230,43 @@ module Parsers
230
230
  row["CV TR"] = "1"
231
231
  row["Account Name"] = acc_name
232
232
  row["Linkedin Import Status"] = import_status
233
- row["First Name"] = name[0]&.slice(0, 39)
234
- row["Last Name"] = name[1..-1]&.join(" ")&.slice(0, 79)
233
+ row["First Name"] = name[0].slice(0, 39)
234
+ row["Last Name"] = name[1..-1].join(" ").slice(0, 79)
235
235
  row["Email"] = email
236
236
  row["Candidate ID"] = cand_id
237
237
  row["LIN 1st Degree"] = cand_source
238
- row["Title"] = title&.slice(0, 127)
238
+ row["Title"] = title.slice(0, 127)
239
239
  row["Contact Country"] = country
240
- row["Contact LIN Sector"] = sector&.slice(0, 99)
241
- row["Employer 1 Title"] = e1_title&.slice(0, 31999)
242
- row["Employer Organization Name 1"] = e1_org&.slice(0, 254)
240
+ row["Contact LIN Sector"] = sector.slice(0, 99)
241
+ row["Employer 1 Title"] = e1_title.slice(0, 31999)
242
+ row["Employer Organization Name 1"] = e1_org.slice(0, 254)
243
243
  row["Employer 1 Start Date"] = format_date(e1_start) #format
244
244
  row["Employer 1 End Date"] = format_date(e1_end) #format
245
- row["Employer 1 Location"] = e1_loc&.slice(0, 254)
246
- row["Employer 1 Description"] = e1_desc&.slice(0, 31999)
247
- row["Employer 2 Title"] = e2_title&.slice(0, 31999)
248
- row["Employer Organization Name 2"] = e2_org&.slice(0, 254)
245
+ row["Employer 1 Location"] = e1_loc.slice(0, 254)
246
+ row["Employer 1 Description"] = e1_desc.slice(0, 31999)
247
+ row["Employer 2 Title"] = e2_title.slice(0, 31999)
248
+ row["Employer Organization Name 2"] = e2_org.slice(0, 254)
249
249
  row["Employer 2 Start Date"] = format_date(e2_start) #format
250
250
  row["Employer 2 End Date"] = format_date(e2_end) #format
251
- row["Employer 2 Location"] = e2_loc&.slice(0, 254)
252
- row["Employer 2 Description"] = e2_desc&.slice(0, 31999)
253
- row["Employer 3 Title"] = e3_title&.slice(0, 31999)
254
- row["Employer Organization Name 3"] = e3_org&.slice(0, 254)
251
+ row["Employer 2 Location"] = e2_loc.slice(0, 254)
252
+ row["Employer 2 Description"] = e2_desc.slice(0, 31999)
253
+ row["Employer 3 Title"] = e3_title.slice(0, 31999)
254
+ row["Employer Organization Name 3"] = e3_org.slice(0, 254)
255
255
  row["Employer 3 Start Date"] = format_date(e3_start) #format
256
256
  row["Employer 3 End Date"] = format_date(e3_end) #format
257
- row["Employer 3 Location"] = e3_loc&.slice(0, 254)
258
- row["Employer 3 Description"] = e3_desc&.slice(0, 31999)
259
- row["License or Certification Name 1"] = c1_name&.slice(0, 254)
260
- row["License or Certification Name 2"] = c2_name&.slice(0, 254)
261
- row["License or Certification Credential Type"] = c_type&.slice(0, 254)
262
- row["Education School 1"] = s1_name&.slice(0, 124)
263
- row["Education Degree Name 1"] = s1_degree&.slice(0, 254)
257
+ row["Employer 3 Location"] = e3_loc.slice(0, 254)
258
+ row["Employer 3 Description"] = e3_desc.slice(0, 31999)
259
+ row["License or Certification Name 1"] = c1_name.slice(0, 254)
260
+ row["License or Certification Name 2"] = c2_name.slice(0, 254)
261
+ row["License or Certification Credential Type"] = c_type.slice(0, 254)
262
+ row["Education School 1"] = s1_name.slice(0, 124)
263
+ row["Education Degree Name 1"] = s1_degree.slice(0, 254)
264
264
  row["Education Degree Date 1"] = format_date(s1_end)
265
- row["Education School 2"] = s2_name&.slice(0, 124)
266
- row["Education Degree Name 2"] = s2_degree&.slice(0, 254)
265
+ row["Education School 2"] = s2_name.slice(0, 124)
266
+ row["Education Degree Name 2"] = s2_degree.slice(0, 254)
267
267
  row["Education Degree Date 2"] = format_date(s2_end)
268
- row["Text Resume"] = text_resume&.slice(0, 31999)
269
- row["LinkedIn Profile"] = lin_profile&.slice(0, 254)
268
+ row["Text Resume"] = text_resume.slice(0, 31999)
269
+ row["LinkedIn Profile"] = lin_profile.slice(0, 254)
270
270
  row["Resume Last Updated"] = Time.now.strftime('%Y-%m-%d %H:%M:%S')
271
271
  row["LIN Import Date"] = Time.now.strftime('%Y-%m-%d')
272
272
  row["CV Uploaded"] = "1"
@@ -281,7 +281,7 @@ module Parsers
281
281
 
282
282
  schools.each do |school|
283
283
  row = CSV::Row.new(@education_headers, [])
284
- row["Contact"] = input_row["Contact ID"]
284
+ row["Contact ID"] = input_row["Contact ID"]
285
285
  row["LIN ID"] = input_row["LIN ID"]
286
286
  row["School Name"] = school.at_css(".item-title").text.slice(0, 149)
287
287
  row["Major"] = school.at_css(".item-subtitle").text.slice(0, 254)
@@ -303,7 +303,7 @@ module Parsers
303
303
 
304
304
  positions.each do |position|
305
305
  row = CSV::Row.new(@employment_headers, [])
306
- row["Contact"] = input_row["Contact ID"]
306
+ row["Contact ID"] = input_row["Contact ID"]
307
307
  row["LIN ID"] = input_row["LIN ID"]
308
308
  row["Job Title"] = position.at_css(".item-title").text.slice(0, 74)
309
309
  row["Employer Name"] = position.at_css(".item-subtitle").text.slice(0, 149)
data/lib/linsc.rb CHANGED
@@ -135,7 +135,7 @@ class Linsc
135
135
  else
136
136
  @options[:insert] = false
137
137
  end
138
- if ids.any?{|id| id && id&.length > 0}
138
+ if ids.any?{|id| id && id.length > 0}
139
139
  @options[:update] = true
140
140
  else
141
141
  @options[:update] = false
data/linsc.gemspec CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "linsc"
7
- spec.version = "0.0.5"
7
+ spec.version = "0.0.6"
8
8
  spec.authors = ["Dan Molloy"]
9
9
  spec.email = ["danieljmolloy1@gmail.com"]
10
10
  spec.date = '2016-03-31'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linsc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dan Molloy