linkedindata 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/linkedindata.rb +9 -6
  2. metadata +2 -2
data/lib/linkedindata.rb CHANGED
@@ -6,6 +6,7 @@ class LinkedinData
6
6
  def initialize(input)
7
7
  @input = input
8
8
  @output = Array.new
9
+ @startindex = 10
9
10
  end
10
11
 
11
12
  # Searches for links on Google
@@ -16,11 +17,9 @@ class LinkedinData
16
17
  page = agent.submit(gform, gform.buttons.first)
17
18
  examine(page)
18
19
  end
19
-
20
+
20
21
  # Examines a search page
21
22
  def examine(page)
22
- startindex = 0
23
-
24
23
  page.links.each do |link|
25
24
  if (link.href.include? "linkedin.com") && (!link.href.include? "webcache") && (!link.href.include? "site:linkedin.com/pub+")
26
25
  saveurl = link.href.split("?q=")
@@ -35,10 +34,11 @@ class LinkedinData
35
34
  url1 = link.href.split("&start=")
36
35
  url2 = url1[1].split("&sa=N")
37
36
 
38
- if url2[0].to_i < startindex
37
+ if url2[0].to_i == @startindex
38
+ sleep(20)
39
+ @startindex += 10
39
40
  agent = Mechanize.new
40
41
  examine(agent.get("http://google.com" + link.href))
41
- else startindex = url2[0].to_i
42
42
  end
43
43
  end
44
44
  end
@@ -64,6 +64,9 @@ class LinkedinData
64
64
  # Gets all data and returns in JSON
65
65
  def getData
66
66
  search
67
- return @output.to_json
67
+ return JSON.pretty_generate(@output)
68
68
  end
69
69
  end
70
+
71
+ l = LinkedinData.new("National Security Agency")
72
+ puts l.getData
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedindata
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-01-07 00:00:00.000000000 Z
12
+ date: 2014-02-14 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Scrapes all LinkedIn profiles including terms you specify.
15
15
  email: shidash@shidash.com