RubyGems - linkedindata - Versions diffs - 0.0.12 → 0.0.13 - Mend

linkedindata 0.0.12 → 0.0.13

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 8f7c5ffe3de4948c6b28d505079581a10825ea91
-  data.tar.gz: 05df543799dd12fbc1e6c25671f539d5644aefab
+  metadata.gz: 3bc1d73941a6228073ef054491452a8ecc34f0d6
+  data.tar.gz: 35046bb711b902b5a8650b6995d291afc48c3702
 SHA512:
-  metadata.gz: a62b36e080463295eb988c37e8538f2f0181561f1fa48b7e61f0f8c13334990953c830fafd7db852fbaddace5f7c204ea441becc3f9c7995b372bb7feff90dc6
-  data.tar.gz: 7d1c02373a972dbaf6851179b46fcee58c30e04a321cfa54143ef5b96b053b973b4b9123531a95722393d02953d2050ecbc45ccbec34f6dd7b46faa4108e77b0
+  metadata.gz: 5286e02124965b5f02ecbad34699840071ca0938c52e9da7d796683f150e0f796fa5d12ddca85763e997c95fbc1b99e573e89cc21344dddfd5709beaaa3434c8
+  data.tar.gz: 0015c5ef88cb4c14e187412e0da448ef40194c24d12fbbddc71ea50682e5769e9fa76930925eb08e654d1db7de91d1e5a3be7027cd41003b2beba0019f6ff82b

data/lib/getrelated.rb ADDED Viewed

@@ -0,0 +1,50 @@
+require 'json'
+require 'nokogiri'
+require 'open-uri'
+class GetRelated
+  def initialize(url)
+    @url = url
+    @relatedlist = Array.new
+  end
+  # Get the list of names of related people
+  def getList
+    html = Nokogiri::HTML(open(@url))
+    if html
+       namelist = Array.new
+      # Go through each person
+      html.css("div.insights-browse-map").each do |d|
+        d.css("li").each do |l|
+          namelist.push(l.css("h4").text)
+        end
+      end
+      return namelist
+    end
+  end
+end
+# This is just an outline for the next version of getrelated
+# Add degree back as field (0 by default)
+# Loop through all profiles
+    # Load n times (need to determine optimal num)
+       # Save list of related people (for profile- make list and append if seen listed as related or in related list)
+       # Save overall list of related people (with URLs and min degree)
+          # Track min degrees out
+# Go through overall list of related people
+     # Parse profile
+     # Make sure degree is correct when saved
+     # Maybe save in JSONs by degree
+# Info:
+  # Profiles of related people
+  # Degrees for all profiles
+  # Related people list on each profile (complete)
+# Deduplicate

data/lib/linkedindata.rb ADDED Viewed

@@ -0,0 +1,85 @@
+require 'mechanize'
+require 'linkedin-scraper'
+require 'json'
+require 'nokogiri'
+require 'open-uri'
+load 'parseprofile.rb'
+require 'pry'
+require 'urlarchiver'
+class LinkedinData
+  def initialize(input, todegree)
+    @input = input
+    @output = Array.new
+    @startindex = 10
+  end
+  # Searches for profiles on Google
+  def search
+    agent = Mechanize.new
+    agent.user_agent_alias = 'Linux Firefox'
+    gform = agent.get("http://google.com").form("f")
+    gform.q = "site:linkedin.com/pub " + @input
+    page = agent.submit(gform, gform.buttons.first)
+    examine(page)
+  end
+  # Examines a search page
+  def examine(page)
+    # Separate getting profile links and going to next page
+      # Method for getting links to all result pages
+      # Different method for getting all profile links on page and scraping (split to new thread for this)
+         # Has own output set, merge into full one at end (make sure threadsafe)
+    # Have own input and output
+    page.links.each do |link|
+      if (link.href.include? "linkedin.com") && (!link.href.include? "webcache") && (!link.href.include? "site:linkedin.com/pub+")
+        saveurl = link.href.split("?q=")
+        if saveurl[1]
+          url = saveurl[1].split("&")
+          begin
+            scrape(url[0])
+          rescue
+          end
+        end
+      end
+      # Find the link to the next page and go to it
+      if (link.href.include? "&sa=N") && (link.href.include? "&start=")
+        url1 = link.href.split("&start=")
+        url2 = url1[1].split("&sa=N")
+        if url2[0].to_i == @startindex
+          sleep(rand(5..10))
+          @startindex += 10
+          agent = Mechanize.new
+          examine(agent.get("http://google.com" + link.href))
+        end
+      end
+    end
+  end
+  # Scrapes profile
+  def scrape(url)
+    # Download profile and rescue on error
+    begin
+      url.gsub!("https", "http")
+      profile = Linkedin::Profile.get_profile(url)
+    rescue
+    end
+    # Parse profile if returned
+    if profile
+      p = ParseProfile.new(profile, url)
+      @output.concat(p.parse)
+    end
+  end
+  # Gets all data and returns in JSON
+  def getData
+    search
+    formatted_json = JSON.pretty_generate(@output)
+    return formatted_json
+  end
+end

data/lib/parseprofile.rb ADDED Viewed

@@ -0,0 +1,76 @@
+require 'json'
+load 'getrelated.rb'
+class ParseProfile
+  def initialize(profile, url)
+    @profile = profile
+    @url = url
+    @output = Array.new
+    @related_people
+  end
+  # Parse profile
+  def parse
+    begin
+      g = GetRelated.new(@url)
+      @related_people = g.getList
+    rescue
+    end
+    # Parse profiles for current companies
+    @profile.current_companies.each do |c|
+      @output.push(parseCompany(c, "Yes"))
+    end
+    # Parse past position/company info
+    @profile.past_companies.each do |c|
+      @output.push(parseCompany(c, "No"))
+    end
+    # Clean up directories
+    pics = Dir["public/uploads/*.jpg.*"]
+    pics.each do |p|
+      File.delete(p)
+    end
+    return @output
+  end
+  # Merge person data with role data
+  def parseCompany(c, status)
+    c.merge!(
+             :skills => @profile.skills,
+             :certifications => @profile.certifications,
+             :languages => @profile.languages,
+             :name => @profile.first_name + " " + @profile.last_name,
+             :location => @profile.location,
+             :area => @profile.country,
+             :industry => @profile.industry,
+             :picture => @profile.picture,
+             :organizations => @profile.organizations,
+             :groups => @profile.groups,
+             :education => @profile.education,
+             :websites => @profile.websites,
+             :profile_url => @url,
+             :current => status,
+             :timestamp => Time.now,
+             :related_people => @related_people)
+    c.merge!(:pic_path => getPic)
+    return c
+  end
+  # Download pictures
+  def getPic
+    if @profile.picture
+      path = @profile.picture.split("/")
+      if !File.file?("public/uploads/pictures/" + path[path.length-1].chomp.strip)
+        begin
+          `wget -P public/uploads/pictures #{@profile.picture}`
+        rescue
+        end
+      end
+      return "public/uploads/pictures/" + path[path.length-1].chomp.strip
+    end
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: linkedindata
 version: !ruby/object:Gem::Version
-  version: 0.0.12
+  version: 0.0.13
 platform: ruby
 authors:
 - M. C. McGrath
@@ -15,7 +15,10 @@ email: shidash@shidash.com
 executables: []
 extensions: []
 extra_rdoc_files: []
-files: []
+files:
+- lib/linkedindata.rb
+- lib/parseprofile.rb
+- lib/getrelated.rb
 homepage: https://github.com/transparencytoolkit/linkedindata
 licenses:
 - GPL