RubyGems - linkedin-scraper - Versions diffs - 0.0.3 → 0.0.5 - Mend

linkedin-scraper 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/README.rdoc +0 -36
data/lib/linkedin-scraper.rb +1 -4
data/lib/linkedin-scraper/profile.rb +9 -22
data/lib/linkedin-scraper/version.rb +1 -1
data/linkedin-scraper.gemspec +3 -3
metadata +34 -80
data/lib/linkedin-scraper/client.rb +0 -125
data/lib/linkedin-scraper/contact.rb +0 -134

data/README.rdoc CHANGED Viewed

@@ -71,42 +71,6 @@ Then you can see the scraped data like this:
   #        :company => "Better Labs"
   #    },
-= Examples
-When a link is given, it scrapes the profile and gets the data
-  attr_accessor :country = "India",
-    attr_accessor :current_companies = [
-        [0] {
-            :current_company => "Better Labs",
-              :current_title => "Software Engineer Core Platform"
-        }
-    ],
-  attr_accessor :first_name = "Yatish",
-  attr_accessor :industry = "Information Technology and Services",
-  attr_accessor :last_name = "Mehta",
-  attr_accessor :linkedin_url = "http://in.linkedin.com/pub/yatish-mehta/22/460/a86",
-  attr_accessor :location = "Pune",
-  attr_accessor :past_companies = [
-      [0] {
-          :past_company => "Consumyze Software",
-            :past_title => "Trainee"
-      },
-      [1] {
-          :past_company => "SunGard Global Services",
-            :past_title => "Project Intern"
-      }
-  ],
-  attr_accessor :recommended_visitors = [
-      [0] {
-             :link =>  href="http://in.linkedin.com/in/nileshavhad?trk=pub-pbmap",
-             :name => "Nilesh Avhad",
-            :title => "Engineering Manager",
-          :company => "Better Labs"
-      },
-    ],
-    attr_accessor :title = "Software Engineer Core Platform at BetterLabs"
 = ZOMG Fork! Thank you!

data/lib/linkedin-scraper.rb CHANGED Viewed

@@ -1,10 +1,7 @@
 require "linkedin-scraper/version"
 require "rubygems"
 require "mechanize"
-require "awesome_print"
+Dir["#{File.expand_path(File.dirname(__FILE__))}/linkedin-scraper/*.rb"].each {|file| require file }
-%w(client contact profile).each do |file|
-  require File.join(File.dirname(__FILE__), 'linkedin-scraper', file)
-end

data/lib/linkedin-scraper/profile.rb CHANGED Viewed

@@ -1,22 +1,11 @@
-# To change this template, choose Tools | Templates
-# and open the template in the editor.
+USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
 module Linkedin
-  class Profile
-    USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
+  class Profile
     #the First name of the contact
-    attr_accessor :first_name
-    #the last name of the contact
-    attr_accessor :last_name
-    #the linkedin job title
-    attr_accessor :title
-    #the location of the contact
-    attr_accessor :location
-    #the country of the contact
-    attr_accessor :country
-    #the domain for which the contact belongs
-    attr_accessor :industry
-    #the entire profile of the contact
-    attr_accessor :profile
+    attr_accessor :first_name,:last_name,:title,:location,:country,
+                  :industry, :linkedin_url,:recommended_visitors,:profile,
+                  :page
     #Array of hash containing its past job companies and job profile
     #Example
@@ -46,9 +35,7 @@ module Linkedin
     #  ]
     attr_accessor :current_companies
     #url of the profile
-    attr_accessor :linkedin_url
-    #Array of hash containing its recommended visitors which come on the
-    attr_accessor :recommended_visitors
     def initialize(page,url)
       @first_name=get_first_name(page)
@@ -68,8 +55,8 @@ module Linkedin
         @agent=Mechanize.new
         @agent.user_agent_alias = USER_AGENTS.sample
         @agent.max_history = 0
-        page=@agent.get url
-        return Linkedin::Profile.new(page, url)
+        @page=@agent.get url
+        return Linkedin::Profile.new(@page, url)
       rescue=>e
         puts e
       end

data/lib/linkedin-scraper/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Linkedin
   module Scraper
-    VERSION = "0.0.3"
+    VERSION = "0.0.5"
   end
 end

data/linkedin-scraper.gemspec CHANGED Viewed

@@ -7,9 +7,9 @@ Gem::Specification.new do |gem|
   gem.description   = %q{Scrapes the linkedin profile when a url is given }
   gem.summary       = %q{when a url of  public linkedin profile page is given it scrapes the entire page and converts into a accessible object}
   gem.homepage      = "https://github.com/yatishmehta27/linkedin-scraper"
-   gem.add_dependency(%q<httparty>, [">= 0"])
-gem.add_dependency(%q<mechanize>, [">= 0"])
-gem.add_dependency(%q<awesome_print>, [">= 0"])
+   gem.add_dependency(%q<mechanize>, [">= 0"])
   gem.files         = `git ls-files`.split($\)
   gem.executables   = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
   gem.test_files    = gem.files.grep(%r{^(test|spec|features)/})

metadata CHANGED Viewed

@@ -1,117 +1,71 @@
---- !ruby/object:Gem::Specification
+--- !ruby/object:Gem::Specification
 name: linkedin-scraper
-version: !ruby/object:Gem::Version
-  hash: 25
+version: !ruby/object:Gem::Version
+  version: 0.0.5
   prerelease:
-  segments:
-  - 0
-  - 0
-  - 3
-  version: 0.0.3
 platform: ruby
-authors:
+authors:
 - Yatish Mehta
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-04-12 00:00:00 Z
-dependencies:
-- !ruby/object:Gem::Dependency
-  name: httparty
-  prerelease: false
-  requirement: &id001 !ruby/object:Gem::Requirement
-    none: false
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
-        version: "0"
-  type: :runtime
-  version_requirements: *id001
-- !ruby/object:Gem::Dependency
+date: 2012-07-23 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
   name: mechanize
-  prerelease: false
-  requirement: &id002 !ruby/object:Gem::Requirement
+  requirement: !ruby/object:Gem::Requirement
     none: false
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
-        version: "0"
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
   type: :runtime
-  version_requirements: *id002
-- !ruby/object:Gem::Dependency
-  name: awesome_print
   prerelease: false
-  requirement: &id003 !ruby/object:Gem::Requirement
+  version_requirements: !ruby/object:Gem::Requirement
     none: false
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
-        version: "0"
-  type: :runtime
-  version_requirements: *id003
-description: "Scrapes the linkedin profile when a url is given "
-email:
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+description: ! 'Scrapes the linkedin profile when a url is given '
+email:
 - yatishmehta27@gmail.com
 executables: []
 extensions: []
 extra_rdoc_files: []
-files:
+files:
 - .gitignore
 - Gemfile
 - LICENSE
 - README.rdoc
 - Rakefile
 - lib/linkedin-scraper.rb
-- lib/linkedin-scraper/client.rb
-- lib/linkedin-scraper/contact.rb
 - lib/linkedin-scraper/profile.rb
 - lib/linkedin-scraper/version.rb
 - linkedin-scraper.gemspec
 homepage: https://github.com/yatishmehta27/linkedin-scraper
 licenses: []
 post_install_message:
 rdoc_options: []
-require_paths:
+require_paths:
 - lib
-required_ruby_version: !ruby/object:Gem::Requirement
+required_ruby_version: !ruby/object:Gem::Requirement
   none: false
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      hash: 3
-      segments:
-      - 0
-      version: "0"
-required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      hash: 3
-      segments:
-      - 0
-      version: "0"
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 1.8.10
+rubygems_version: 1.8.24
 signing_key:
 specification_version: 3
-summary: when a url of  public linkedin profile page is given it scrapes the entire page and converts into a accessible object
+summary: when a url of  public linkedin profile page is given it scrapes the entire
+  page and converts into a accessible object
 test_files: []

data/lib/linkedin-scraper/client.rb DELETED Viewed

@@ -1,125 +0,0 @@
-# To change this template, choose Tools | Templates
-# and open the template in the editor.
-module Linkedin
-  class Client
-    USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
-    attr_accessor :contacts ,:matched_tag,:probability
-    def initialize(first_name,last_name ,company,options={})
-      @first_name=first_name.downcase
-      @last_name=last_name.downcase
-      @company=company
-      @country=options[:country] || "us"
-      @search_linkedin_url="http://#{@country}.linkedin.com/pub/dir/#{@first_name}/#{@last_name}"
-      @contacts=[]
-      @links=[]
-      get_agent
-    end
-    def get_agent
-      @agent=Mechanize.new
-      @agent.user_agent_alias = USER_AGENTS.sample
-      @agent.max_history = 0
-      @agent
-    end
-    def get_contacts
-      begin
-        sleep(2+rand(4))
-        puts "===>Father:Scrapping linkedin url "+ @search_linkedin_url
-        @page=@agent.get @search_linkedin_url
-        @page.search(".vcard").each do |node|
-          @contacts<<Linkedin::Contact.new(node)
-        end
-      rescue Mechanize::ResponseCodeError=>e
-        puts "RESCUE"
-      end
-      return @contacts
-    end
-    #TODO need to refactor this function need seperate function of each case
-    def get_verified_contact
-      get_contacts
-      @contacts.each do |contact|
-        #check current company
-        contact.current_companies.each do |company|
-          if company[:current_company]
-            if company[:current_company].match(/#{@company}/i)
-              @matched_tag="CURRENT"
-              return contact
-            end
-          end
-        end if contact.current_companies
-        #title of profile
-        if contact.title.match(/#{@company}/i)
-          @matched_tag="CURRENT"
-          return contact
-        end
-        #check past companies
-        contact.past_companies.each do |company|
-          if company[:past_company]
-            if company[:past_company].match(/#{@company}/i)
-              @matched_tag="PAST"
-              return contact
-            end
-          end
-        end if contact.past_companies
-        #
-        #Going in to profile homepage and then checking
-        #
-        sleep(2+rand(4))
-        puts "===>Child:Scrapping linkedin url: "+ contact.linkedin_url
-        profile=contact.get_profile(get_agent.get(contact.linkedin_url),contact.linkedin_url)
-        #check current company
-        profile.current_companies.each do |company|
-          if company[:current_company]
-            if company[:current_company].match(/#{@company}/i)
-              @matched_tag="CURRENT"
-              return profile
-            end
-          end
-        end if profile.current_companies
-        #title of profile
-        if profile.title
-          if profile.title.match(/#{@company}/i)
-            @matched_tag="CURRENT"
-            return profile
-          end
-        end
-        #check past companies
-        profile.past_companies.each do |company|
-          if company[:past_company]
-            if company[:past_company].match(/#{@company}/i)
-              @matched_tag="PAST"
-              return profile
-            end
-          end
-        end if profile.past_companies
-        #check recommended visitors
-        if profile.recommended_visitors
-          cnt=0
-          profile.recommended_visitors.each do |visitor|
-            if visitor[:company]
-              if visitor[:company].match(/#{@company}/i)
-                cnt+=1
-              end
-            end
-          end
-          @probability=cnt/profile.recommended_visitors.length.to_f
-          @matched_tag="RECOMMENDED"
-          return profile if @probability>=0.5
-        end
-      end unless @contacts.empty?
-      return nil
-    end
-  end
-end

data/lib/linkedin-scraper/contact.rb DELETED Viewed

@@ -1,134 +0,0 @@
-# To change this template, choose Tools | Templates
-# and open the template in the editor.
-module Linkedin
-  class Contact
-    #the First name of the contact
-    attr_accessor :first_name
-    #the last name of the contact
-    attr_accessor :last_name
-    #the linkedin job title
-    attr_accessor :title
-    #the location of the contact
-    attr_accessor :location
-    #the country of the contact
-    attr_accessor :country
-    #the domain for which the contact belongs
-    attr_accessor :industry
-    #the entire profile of the contact
-    attr_accessor :profile
-    #Array of hash containing its past job companies and job profile
-    #Example
-    #  [
-    #    [0] {
-    #          :past_title => "Intern",
-    #        :past_company => "Sungard"
-    #        },
-    #    [1] {
-    #          :past_title => "Software Developer",
-    #        :past_company => "Microsoft"
-    #        }
-    #  ]
-    attr_accessor :past_companies
-    #Array of hash containing its current job companies and job profile
-    #Example
-    #  [
-    #    [0] {
-    #          :current_title => "Intern",
-    #        :current_company => "Sungard"
-    #        },
-    #    [1] {
-    #          :current_title => "Software Developer",
-    #        :current_company => "Microsoft"
-    #        }
-    #  ]
-    attr_accessor :current_companies
-    attr_accessor :linkedin_url
-    attr_accessor :profile
-    def initialize(node=[])
-      unless node.class==Array
-        @first_name=get_first_name(node)
-        @last_name=get_last_name(node)
-        @title=get_title(node)
-        @location=get_location(node)
-        @country=get_country(node)
-        @industry=get_industry(node)
-        @current_companies=get_current_companies node
-        @past_companies=get_past_companies node
-        @linkedin_url=get_linkedin_url node
-      end
-    end
-    #page is a Nokogiri::XML node of the profile page
-    #returns object of Linkedin::Profile
-    def get_profile page,url
-      @profile=Linkedin::Profile.new(page,url)
-    end
-    private
-    def get_first_name node
-      return node.at(".given-name").text.strip if node.search(".given-name").first
-    end
-    def get_last_name node
-      return node.at(".family-name").text.strip if node.search(".family-name").first
-    end
-    def get_title node
-      return node.at(".title").text.gsub(/\s+/, " ").strip if node.search(".title").first
-    end
-    def get_location node
-      return node.at(".location").text.split(",").first.strip if node.search(".location").first
-    end
-    def get_country node
-      return node.at(".location").text.split(",").last.strip if node.search(".location").first
-    end
-    def get_industry node
-      return node.at(".industry").text.strip if node.search(".industry").first
-    end
-    def get_linkedin_url node
-      node.at("h2/strong/a").attributes["href"]
-    end
-    def get_current_companies node
-      current_cs=[]
-      if node.search(".current-content").first
-        node.at(".current-content").text.split(",").each do |content|
-          title,company=content.split(" at ")
-          company=company.gsub(/\s+/, " ").strip if company
-          title=title.gsub(/\s+/, " ").strip if title
-          current_company={:current_company=>company,:current_title=> title}
-          current_cs<<current_company
-        end
-        return current_cs
-      end
-    end
-    def get_past_companies node
-      past_cs=[]
-      if node.search(".past-content").first
-        node.at(".past-content").text.split(",").each do |content|
-          title,company=content.split(" at ")
-          company=company.gsub(/\s+/, " ").strip if company
-          title=title.gsub(/\s+/, " ").strip if title
-          past_company={:past_company=>company,:past_title=> title }
-          past_cs<<past_company
-        end
-        return past_cs
-      end
-    end
-  end
-end