linkedindata 0.0.11 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. metadata +2 -3
  3. data/lib/linkedindata.rb +0 -85
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 94bb03be0b7d5757a4ec8175b829db45bf1ea0d5
4
- data.tar.gz: c51b94a00043330d9ba9b821c7ecdb47a11b3340
3
+ metadata.gz: 8f7c5ffe3de4948c6b28d505079581a10825ea91
4
+ data.tar.gz: 05df543799dd12fbc1e6c25671f539d5644aefab
5
5
  SHA512:
6
- metadata.gz: 306b9d008c6f5958357ff94b2b07379e5bbf66631a343133d7471c459a5eaf66a5e6fd3d5d8cdeb6caca3d6b56873718c7f07ee049f2c4c329f8f106105dd6a4
7
- data.tar.gz: 15d39ed929395b149807868875a614c188a22bacf773d546cc5fce84080ab8db732558bd29de5e9819f12ccb429c21c4a10c85544dd02e48bec4bc4a649df4ad
6
+ metadata.gz: a62b36e080463295eb988c37e8538f2f0181561f1fa48b7e61f0f8c13334990953c830fafd7db852fbaddace5f7c204ea441becc3f9c7995b372bb7feff90dc6
7
+ data.tar.gz: 7d1c02373a972dbaf6851179b46fcee58c30e04a321cfa54143ef5b96b053b973b4b9123531a95722393d02953d2050ecbc45ccbec34f6dd7b46faa4108e77b0
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedindata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
@@ -15,8 +15,7 @@ email: shidash@shidash.com
15
15
  executables: []
16
16
  extensions: []
17
17
  extra_rdoc_files: []
18
- files:
19
- - lib/linkedindata.rb
18
+ files: []
20
19
  homepage: https://github.com/transparencytoolkit/linkedindata
21
20
  licenses:
22
21
  - GPL
data/lib/linkedindata.rb DELETED
@@ -1,85 +0,0 @@
1
- require 'mechanize'
2
- require 'linkedin-scraper'
3
- require 'json'
4
- require 'nokogiri'
5
- require 'open-uri'
6
- load 'parseprofile.rb'
7
- require 'pry'
8
- require 'urlarchiver'
9
-
10
- class LinkedinData
11
- def initialize(input, todegree)
12
- @input = input
13
- @output = Array.new
14
- @startindex = 10
15
- end
16
-
17
- # Searches for profiles on Google
18
- def search
19
- agent = Mechanize.new
20
- agent.user_agent_alias = 'Linux Firefox'
21
- gform = agent.get("http://google.com").form("f")
22
- gform.q = "site:linkedin.com/pub " + @input
23
- page = agent.submit(gform, gform.buttons.first)
24
- examine(page)
25
- end
26
-
27
- # Examines a search page
28
- def examine(page)
29
- # Separate getting profile links and going to next page
30
- # Method for getting links to all result pages
31
- # Different method for getting all profile links on page and scraping (split to new thread for this)
32
- # Has own output set, merge into full one at end (make sure threadsafe)
33
-
34
- # Have own input and output
35
- page.links.each do |link|
36
- if (link.href.include? "linkedin.com") && (!link.href.include? "webcache") && (!link.href.include? "site:linkedin.com/pub+")
37
- saveurl = link.href.split("?q=")
38
-
39
- if saveurl[1]
40
- url = saveurl[1].split("&")
41
- begin
42
- scrape(url[0])
43
- rescue
44
- end
45
- end
46
- end
47
-
48
- # Find the link to the next page and go to it
49
- if (link.href.include? "&sa=N") && (link.href.include? "&start=")
50
- url1 = link.href.split("&start=")
51
- url2 = url1[1].split("&sa=N")
52
-
53
- if url2[0].to_i == @startindex
54
- sleep(rand(5..10))
55
- @startindex += 10
56
- agent = Mechanize.new
57
- examine(agent.get("http://google.com" + link.href))
58
- end
59
- end
60
- end
61
- end
62
-
63
- # Scrapes profile
64
- def scrape(url)
65
- # Download profile and rescue on error
66
- begin
67
- url.gsub!("https", "http")
68
- profile = Linkedin::Profile.get_profile(url)
69
- rescue
70
- end
71
-
72
- # Parse profile if returned
73
- if profile
74
- p = ParseProfile.new(profile, url)
75
- @output.concat(p.parse)
76
- end
77
- end
78
-
79
- # Gets all data and returns in JSON
80
- def getData
81
- search
82
- formatted_json = JSON.pretty_generate(@output)
83
- return formatted_json
84
- end
85
- end