RubyGems - linkedindata - Versions diffs - 0.0.12 → 0.0.13 - Mend

linkedindata 0.0.12 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 8f7c5ffe3de4948c6b28d505079581a10825ea91
-  data.tar.gz: 05df543799dd12fbc1e6c25671f539d5644aefab
+  metadata.gz: 3bc1d73941a6228073ef054491452a8ecc34f0d6
+  data.tar.gz: 35046bb711b902b5a8650b6995d291afc48c3702
 SHA512:
-  metadata.gz: a62b36e080463295eb988c37e8538f2f0181561f1fa48b7e61f0f8c13334990953c830fafd7db852fbaddace5f7c204ea441becc3f9c7995b372bb7feff90dc6
-  data.tar.gz: 7d1c02373a972dbaf6851179b46fcee58c30e04a321cfa54143ef5b96b053b973b4b9123531a95722393d02953d2050ecbc45ccbec34f6dd7b46faa4108e77b0
+  metadata.gz: 5286e02124965b5f02ecbad34699840071ca0938c52e9da7d796683f150e0f796fa5d12ddca85763e997c95fbc1b99e573e89cc21344dddfd5709beaaa3434c8
+  data.tar.gz: 0015c5ef88cb4c14e187412e0da448ef40194c24d12fbbddc71ea50682e5769e9fa76930925eb08e654d1db7de91d1e5a3be7027cd41003b2beba0019f6ff82b

data/lib/getrelated.rb ADDED Viewed

@@ -0,0 +1,50 @@
+require 'json'
+require 'nokogiri'
+require 'open-uri'
+class GetRelated
+  def initialize(url)
+    @url = url
+    @relatedlist = Array.new
+  end
+  # Get the list of names of related people
+  def getList
+    html = Nokogiri::HTML(open(@url))
+    if html
+       namelist = Array.new
+      # Go through each person
+      html.css("div.insights-browse-map").each do |d|
+        d.css("li").each do |l|
+          namelist.push(l.css("h4").text)
+        end
+      end
+      return namelist
+    end
+  end
+end
+# This is just an outline for the next version of getrelated
+# Add degree back as field (0 by default)
+# Loop through all profiles
+    # Load n times (need to determine optimal num)
+       # Save list of related people (for profile- make list and append if seen listed as related or in related list)
+       # Save overall list of related people (with URLs and min degree)
+          # Track min degrees out
+# Go through overall list of related people
+     # Parse profile
+     # Make sure degree is correct when saved
+     # Maybe save in JSONs by degree
+# Info:
+  # Profiles of related people
+  # Degrees for all profiles
+  # Related people list on each profile (complete)
+# Deduplicate

data/lib/linkedindata.rb ADDED Viewed

@@ -0,0 +1,85 @@
+require 'mechanize'
+require 'linkedin-scraper'
+require 'json'
+require 'nokogiri'
+require 'open-uri'
+load 'parseprofile.rb'
+require 'pry'
+require 'urlarchiver'
+class LinkedinData
+  def initialize(input, todegree)
+    @input = input
+    @output = Array.new
+    @startindex = 10
+  end
+  # Searches for profiles on Google
+  def search
+    agent = Mechanize.new
+    agent.user_agent_alias = 'Linux Firefox'
+    gform = agent.get("http://google.com").form("f")
+    gform.q = "site:linkedin.com/pub " + @input
+    page = agent.submit(gform, gform.buttons.first)
+    examine(page)
+  end
+  # Examines a search page
+  def examine(page)
+    # Separate getting profile links and going to next page
+      # Method for getting links to all result pages
+      # Different method for getting all profile links on page and scraping (split to new thread for this)
+         # Has own output set, merge into full one at end (make sure threadsafe)
+    # Have own input and output
+    page.links.each do |link|
+      if (link.href.include? "linkedin.com") && (!link.href.include? "webcache") && (!link.href.include? "site:linkedin.com/pub+")
+        saveurl = link.href.split("?q=")
+        if saveurl[1]
+          url = saveurl[1].split("&")
+          begin
+            scrape(url[0])
+          rescue
+          end
+        end
+      end
+      # Find the link to the next page and go to it
+      if (link.href.include? "&sa=N") && (link.href.include? "&start=")
+        url1 = link.href.split("&start=")
+        url2 = url1[1].split("&sa=N")
+        if url2[0].to_i == @startindex
+          sleep(rand(5..10))
+          @startindex += 10
+          agent = Mechanize.new
+          examine(agent.get("http://google.com" + link.href))
+        end
+      end
+    end
+  end
+  # Scrapes profile
+  def scrape(url)
+    # Download profile and rescue on error
+    begin
+      url.gsub!("https", "http")
+      profile = Linkedin::Profile.get_profile(url)
+    rescue
+    end
+    # Parse profile if returned
+    if profile
+      p = ParseProfile.new(profile, url)
+      @output.concat(p.parse)
+    end
+  end
+  # Gets all data and returns in JSON
+  def getData
+    search
+    formatted_json = JSON.pretty_generate(@output)
+    return formatted_json
+  end
+end

data/lib/parseprofile.rb ADDED Viewed

@@ -0,0 +1,76 @@
+require 'json'
+load 'getrelated.rb'
+class ParseProfile
+  def initialize(profile, url)
+    @profile = profile
+    @url = url
+    @output = Array.new
+    @related_people
+  end
+  # Parse profile
+  def parse
+    begin
+      g = GetRelated.new(@url)
+      @related_people = g.getList
+    rescue
+    end
+    # Parse profiles for current companies
+    @profile.current_companies.each do |c|
+      @output.push(parseCompany(c, "Yes"))
+    end
+    # Parse past position/company info
+    @profile.past_companies.each do |c|
+      @output.push(parseCompany(c, "No"))
+    end
+    # Clean up directories
+    pics = Dir["public/uploads/*.jpg.*"]
+    pics.each do |p|
+      File.delete(p)
+    end
+    return @output
+  end
+  # Merge person data with role data
+  def parseCompany(c, status)
+    c.merge!(
+             :skills => @profile.skills,
+             :certifications => @profile.certifications,
+             :languages => @profile.languages,
+             :name => @profile.first_name + " " + @profile.last_name,
+             :location => @profile.location,
+             :area => @profile.country,
+             :industry => @profile.industry,
+             :picture => @profile.picture,
+             :organizations => @profile.organizations,
+             :groups => @profile.groups,
+             :education => @profile.education,
+             :websites => @profile.websites,
+             :profile_url => @url,
+             :current => status,
+             :timestamp => Time.now,
+             :related_people => @related_people)
+    c.merge!(:pic_path => getPic)
+    return c
+  end
+  # Download pictures
+  def getPic
+    if @profile.picture
+      path = @profile.picture.split("/")
+      if !File.file?("public/uploads/pictures/" + path[path.length-1].chomp.strip)
+        begin
+          `wget -P public/uploads/pictures #{@profile.picture}`
+        rescue
+        end
+      end
+      return "public/uploads/pictures/" + path[path.length-1].chomp.strip
+    end
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: linkedindata
 version: !ruby/object:Gem::Version
-  version: 0.0.12
+  version: 0.0.13
 platform: ruby
 authors:
 - M. C. McGrath
@@ -15,7 +15,10 @@ email: shidash@shidash.com
 executables: []
 extensions: []
 extra_rdoc_files: []
-files: []
+files:
+- lib/linkedindata.rb
+- lib/parseprofile.rb
+- lib/getrelated.rb
 homepage: https://github.com/transparencytoolkit/linkedindata
 licenses:
 - GPL