linkedindata 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. metadata +2 -3
  3. data/lib/linkedindata.rb +0 -85
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 94bb03be0b7d5757a4ec8175b829db45bf1ea0d5
4
- data.tar.gz: c51b94a00043330d9ba9b821c7ecdb47a11b3340
3
+ metadata.gz: 8f7c5ffe3de4948c6b28d505079581a10825ea91
4
+ data.tar.gz: 05df543799dd12fbc1e6c25671f539d5644aefab
5
5
  SHA512:
6
- metadata.gz: 306b9d008c6f5958357ff94b2b07379e5bbf66631a343133d7471c459a5eaf66a5e6fd3d5d8cdeb6caca3d6b56873718c7f07ee049f2c4c329f8f106105dd6a4
7
- data.tar.gz: 15d39ed929395b149807868875a614c188a22bacf773d546cc5fce84080ab8db732558bd29de5e9819f12ccb429c21c4a10c85544dd02e48bec4bc4a649df4ad
6
+ metadata.gz: a62b36e080463295eb988c37e8538f2f0181561f1fa48b7e61f0f8c13334990953c830fafd7db852fbaddace5f7c204ea441becc3f9c7995b372bb7feff90dc6
7
+ data.tar.gz: 7d1c02373a972dbaf6851179b46fcee58c30e04a321cfa54143ef5b96b053b973b4b9123531a95722393d02953d2050ecbc45ccbec34f6dd7b46faa4108e77b0
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedindata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
@@ -15,8 +15,7 @@ email: shidash@shidash.com
15
15
  executables: []
16
16
  extensions: []
17
17
  extra_rdoc_files: []
18
- files:
19
- - lib/linkedindata.rb
18
+ files: []
20
19
  homepage: https://github.com/transparencytoolkit/linkedindata
21
20
  licenses:
22
21
  - GPL
data/lib/linkedindata.rb DELETED
@@ -1,85 +0,0 @@
1
- require 'mechanize'
2
- require 'linkedin-scraper'
3
- require 'json'
4
- require 'nokogiri'
5
- require 'open-uri'
6
- load 'parseprofile.rb'
7
- require 'pry'
8
- require 'urlarchiver'
9
-
10
- class LinkedinData
11
- def initialize(input, todegree)
12
- @input = input
13
- @output = Array.new
14
- @startindex = 10
15
- end
16
-
17
- # Searches for profiles on Google
18
- def search
19
- agent = Mechanize.new
20
- agent.user_agent_alias = 'Linux Firefox'
21
- gform = agent.get("http://google.com").form("f")
22
- gform.q = "site:linkedin.com/pub " + @input
23
- page = agent.submit(gform, gform.buttons.first)
24
- examine(page)
25
- end
26
-
27
- # Examines a search page
28
- def examine(page)
29
- # Separate getting profile links and going to next page
30
- # Method for getting links to all result pages
31
- # Different method for getting all profile links on page and scraping (split to new thread for this)
32
- # Has own output set, merge into full one at end (make sure threadsafe)
33
-
34
- # Have own input and output
35
- page.links.each do |link|
36
- if (link.href.include? "linkedin.com") && (!link.href.include? "webcache") && (!link.href.include? "site:linkedin.com/pub+")
37
- saveurl = link.href.split("?q=")
38
-
39
- if saveurl[1]
40
- url = saveurl[1].split("&")
41
- begin
42
- scrape(url[0])
43
- rescue
44
- end
45
- end
46
- end
47
-
48
- # Find the link to the next page and go to it
49
- if (link.href.include? "&sa=N") && (link.href.include? "&start=")
50
- url1 = link.href.split("&start=")
51
- url2 = url1[1].split("&sa=N")
52
-
53
- if url2[0].to_i == @startindex
54
- sleep(rand(5..10))
55
- @startindex += 10
56
- agent = Mechanize.new
57
- examine(agent.get("http://google.com" + link.href))
58
- end
59
- end
60
- end
61
- end
62
-
63
- # Scrapes profile
64
- def scrape(url)
65
- # Download profile and rescue on error
66
- begin
67
- url.gsub!("https", "http")
68
- profile = Linkedin::Profile.get_profile(url)
69
- rescue
70
- end
71
-
72
- # Parse profile if returned
73
- if profile
74
- p = ParseProfile.new(profile, url)
75
- @output.concat(p.parse)
76
- end
77
- end
78
-
79
- # Gets all data and returns in JSON
80
- def getData
81
- search
82
- formatted_json = JSON.pretty_generate(@output)
83
- return formatted_json
84
- end
85
- end