RubyGems - linkedin-scraper - Versions diffs - 0.0.3 → 0.0.5 - Mend

linkedin-scraper 0.0.3 → 0.0.5

Files changed (8) hide show

data/README.rdoc +0 -36
data/lib/linkedin-scraper.rb +1 -4
data/lib/linkedin-scraper/profile.rb +9 -22
data/lib/linkedin-scraper/version.rb +1 -1
data/linkedin-scraper.gemspec +3 -3
metadata +34 -80
data/lib/linkedin-scraper/client.rb +0 -125
data/lib/linkedin-scraper/contact.rb +0 -134

data/README.rdoc CHANGED Viewed

@@ -71,42 +71,6 @@ Then you can see the scraped data like this:
   #        :company => "Better Labs"
   #    },
-= Examples
-When a link is given, it scrapes the profile and gets the data
-  attr_accessor :country = "India",
-    attr_accessor :current_companies = [
-        [0] {
-            :current_company => "Better Labs",
-              :current_title => "Software Engineer Core Platform"
-        }
-    ],
-  attr_accessor :first_name = "Yatish",
-  attr_accessor :industry = "Information Technology and Services",
-  attr_accessor :last_name = "Mehta",
-  attr_accessor :linkedin_url = "http://in.linkedin.com/pub/yatish-mehta/22/460/a86",
-  attr_accessor :location = "Pune",
-  attr_accessor :past_companies = [
-      [0] {
-          :past_company => "Consumyze Software",
-            :past_title => "Trainee"
-      },
-      [1] {
-          :past_company => "SunGard Global Services",
-            :past_title => "Project Intern"
-      }
-  ],
-  attr_accessor :recommended_visitors = [
-      [0] {
-             :link =>  href="http://in.linkedin.com/in/nileshavhad?trk=pub-pbmap",
-             :name => "Nilesh Avhad",
-            :title => "Engineering Manager",
-          :company => "Better Labs"
-      },
-    ],
-    attr_accessor :title = "Software Engineer Core Platform at BetterLabs"
 = ZOMG Fork! Thank you!

data/lib/linkedin-scraper.rb CHANGED Viewed

@@ -1,10 +1,7 @@
 require "linkedin-scraper/version"
 require "rubygems"
 require "mechanize"
-require "awesome_print"
+Dir["#{File.expand_path(File.dirname(__FILE__))}/linkedin-scraper/*.rb"].each {|file| require file }
-%w(client contact profile).each do |file|
-  require File.join(File.dirname(__FILE__), 'linkedin-scraper', file)
-end

data/lib/linkedin-scraper/profile.rb CHANGED Viewed

@@ -1,22 +1,11 @@
-# To change this template, choose Tools | Templates
-# and open the template in the editor.
+USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
 module Linkedin
-  class Profile
-    USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
+  class Profile
     #the First name of the contact
-    attr_accessor :first_name
-    #the last name of the contact
-    attr_accessor :last_name
-    #the linkedin job title
-    attr_accessor :title
-    #the location of the contact
-    attr_accessor :location
-    #the country of the contact
-    attr_accessor :country
-    #the domain for which the contact belongs
-    attr_accessor :industry
-    #the entire profile of the contact
-    attr_accessor :profile
+    attr_accessor :first_name,:last_name,:title,:location,:country,
+                  :industry, :linkedin_url,:recommended_visitors,:profile,
+                  :page
     #Array of hash containing its past job companies and job profile
     #Example
@@ -46,9 +35,7 @@ module Linkedin
     #  ]
     attr_accessor :current_companies
     #url of the profile
-    attr_accessor :linkedin_url
-    #Array of hash containing its recommended visitors which come on the
-    attr_accessor :recommended_visitors
     def initialize(page,url)
       @first_name=get_first_name(page)
@@ -68,8 +55,8 @@ module Linkedin
         @agent=Mechanize.new
         @agent.user_agent_alias = USER_AGENTS.sample
         @agent.max_history = 0
-        page=@agent.get url
-        return Linkedin::Profile.new(page, url)
+        @page=@agent.get url
+        return Linkedin::Profile.new(@page, url)
       rescue=>e
         puts e
       end

data/lib/linkedin-scraper/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Linkedin
   module Scraper
-    VERSION = "0.0.3"
+    VERSION = "0.0.5"
   end
 end

data/linkedin-scraper.gemspec CHANGED Viewed

@@ -7,9 +7,9 @@ Gem::Specification.new do |gem|
   gem.description   = %q{Scrapes the linkedin profile when a url is given }
   gem.summary       = %q{when a url of  public linkedin profile page is given it scrapes the entire page and converts into a accessible object}
   gem.homepage      = "https://github.com/yatishmehta27/linkedin-scraper"
-   gem.add_dependency(%q<httparty>, [">= 0"])
-gem.add_dependency(%q<mechanize>, [">= 0"])
-gem.add_dependency(%q<awesome_print>, [">= 0"])
+   gem.add_dependency(%q<mechanize>, [">= 0"])
   gem.files         = `git ls-files`.split($\)
   gem.executables   = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
   gem.test_files    = gem.files.grep(%r{^(test|spec|features)/})

metadata CHANGED Viewed

@@ -1,117 +1,71 @@
---- !ruby/object:Gem::Specification
+--- !ruby/object:Gem::Specification
 name: linkedin-scraper
-version: !ruby/object:Gem::Version
-  hash: 25
+version: !ruby/object:Gem::Version
+  version: 0.0.5
   prerelease:
-  segments:
-  - 0
-  - 0
-  - 3
-  version: 0.0.3
 platform: ruby
-authors:
+authors:
 - Yatish Mehta
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-04-12 00:00:00 Z
-dependencies:
-- !ruby/object:Gem::Dependency
-  name: httparty
-  prerelease: false
-  requirement: &id001 !ruby/object:Gem::Requirement
-    none: false
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
-        version: "0"
-  type: :runtime
-  version_requirements: *id001
-- !ruby/object:Gem::Dependency
+date: 2012-07-23 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
   name: mechanize
-  prerelease: false
-  requirement: &id002 !ruby/object:Gem::Requirement
+  requirement: !ruby/object:Gem::Requirement
     none: false
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
-        version: "0"
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
   type: :runtime
-  version_requirements: *id002
-- !ruby/object:Gem::Dependency
-  name: awesome_print
   prerelease: false
-  requirement: &id003 !ruby/object:Gem::Requirement
+  version_requirements: !ruby/object:Gem::Requirement
     none: false
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
-        version: "0"
-  type: :runtime
-  version_requirements: *id003
-description: "Scrapes the linkedin profile when a url is given "
-email:
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+description: ! 'Scrapes the linkedin profile when a url is given '
+email:
 - yatishmehta27@gmail.com
 executables: []
 extensions: []
 extra_rdoc_files: []
-files:
+files:
 - .gitignore
 - Gemfile
 - LICENSE
 - README.rdoc
 - Rakefile
 - lib/linkedin-scraper.rb
-- lib/linkedin-scraper/client.rb
-- lib/linkedin-scraper/contact.rb
 - lib/linkedin-scraper/profile.rb
 - lib/linkedin-scraper/version.rb
 - linkedin-scraper.gemspec
 homepage: https://github.com/yatishmehta27/linkedin-scraper
 licenses: []
 post_install_message:
 rdoc_options: []
-require_paths:
+require_paths:
 - lib
-required_ruby_version: !ruby/object:Gem::Requirement
+required_ruby_version: !ruby/object:Gem::Requirement
   none: false
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      hash: 3
-      segments:
-      - 0
-      version: "0"
-required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      hash: 3
-      segments:
-      - 0
-      version: "0"
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 1.8.10
+rubygems_version: 1.8.24
 signing_key:
 specification_version: 3
-summary: when a url of  public linkedin profile page is given it scrapes the entire page and converts into a accessible object
+summary: when a url of  public linkedin profile page is given it scrapes the entire
+  page and converts into a accessible object
 test_files: []

data/lib/linkedin-scraper/client.rb DELETED Viewed

@@ -1,125 +0,0 @@
-# To change this template, choose Tools | Templates
-# and open the template in the editor.
-module Linkedin
-  class Client
-    USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
-    attr_accessor :contacts ,:matched_tag,:probability
-    def initialize(first_name,last_name ,company,options={})
-      @first_name=first_name.downcase
-      @last_name=last_name.downcase
-      @company=company
-      @country=options[:country] || "us"
-      @search_linkedin_url="http://#{@country}.linkedin.com/pub/dir/#{@first_name}/#{@last_name}"
-      @contacts=[]
-      @links=[]
-      get_agent
-    end
-    def get_agent
-      @agent=Mechanize.new
-      @agent.user_agent_alias = USER_AGENTS.sample
-      @agent.max_history = 0
-      @agent
-    end
-    def get_contacts
-      begin
-        sleep(2+rand(4))
-        puts "===>Father:Scrapping linkedin url "+ @search_linkedin_url
-        @page=@agent.get @search_linkedin_url
-        @page.search(".vcard").each do |node|
-          @contacts<<Linkedin::Contact.new(node)
-        end
-      rescue Mechanize::ResponseCodeError=>e
-        puts "RESCUE"
-      end
-      return @contacts
-    end
-    #TODO need to refactor this function need seperate function of each case
-    def get_verified_contact
-      get_contacts
-      @contacts.each do |contact|
-        #check current company
-        contact.current_companies.each do |company|
-          if company[:current_company]
-            if company[:current_company].match(/#{@company}/i)
-              @matched_tag="CURRENT"
-              return contact
-            end
-          end
-        end if contact.current_companies
-        #title of profile
-        if contact.title.match(/#{@company}/i)
-          @matched_tag="CURRENT"
-          return contact
-        end
-        #check past companies
-        contact.past_companies.each do |company|
-          if company[:past_company]
-            if company[:past_company].match(/#{@company}/i)
-              @matched_tag="PAST"
-              return contact
-            end
-          end
-        end if contact.past_companies
-        #
-        #Going in to profile homepage and then checking
-        #
-        sleep(2+rand(4))
-        puts "===>Child:Scrapping linkedin url: "+ contact.linkedin_url
-        profile=contact.get_profile(get_agent.get(contact.linkedin_url),contact.linkedin_url)
-        #check current company
-        profile.current_companies.each do |company|
-          if company[:current_company]
-            if company[:current_company].match(/#{@company}/i)
-              @matched_tag="CURRENT"
-              return profile
-            end
-          end
-        end if profile.current_companies
-        #title of profile
-        if profile.title
-          if profile.title.match(/#{@company}/i)
-            @matched_tag="CURRENT"
-            return profile
-          end
-        end
-        #check past companies
-        profile.past_companies.each do |company|
-          if company[:past_company]
-            if company[:past_company].match(/#{@company}/i)
-              @matched_tag="PAST"
-              return profile
-            end
-          end
-        end if profile.past_companies
-        #check recommended visitors
-        if profile.recommended_visitors
-          cnt=0
-          profile.recommended_visitors.each do |visitor|
-            if visitor[:company]
-              if visitor[:company].match(/#{@company}/i)
-                cnt+=1
-              end
-            end
-          end
-          @probability=cnt/profile.recommended_visitors.length.to_f
-          @matched_tag="RECOMMENDED"
-          return profile if @probability>=0.5
-        end
-      end unless @contacts.empty?
-      return nil
-    end
-  end
-end

data/lib/linkedin-scraper/contact.rb DELETED Viewed

@@ -1,134 +0,0 @@
-# To change this template, choose Tools | Templates
-# and open the template in the editor.
-module Linkedin
-  class Contact
-    #the First name of the contact
-    attr_accessor :first_name
-    #the last name of the contact
-    attr_accessor :last_name
-    #the linkedin job title
-    attr_accessor :title
-    #the location of the contact
-    attr_accessor :location
-    #the country of the contact
-    attr_accessor :country
-    #the domain for which the contact belongs
-    attr_accessor :industry
-    #the entire profile of the contact
-    attr_accessor :profile
-    #Array of hash containing its past job companies and job profile
-    #Example
-    #  [
-    #    [0] {
-    #          :past_title => "Intern",
-    #        :past_company => "Sungard"
-    #        },
-    #    [1] {
-    #          :past_title => "Software Developer",
-    #        :past_company => "Microsoft"
-    #        }
-    #  ]
-    attr_accessor :past_companies
-    #Array of hash containing its current job companies and job profile
-    #Example
-    #  [
-    #    [0] {
-    #          :current_title => "Intern",
-    #        :current_company => "Sungard"
-    #        },
-    #    [1] {
-    #          :current_title => "Software Developer",
-    #        :current_company => "Microsoft"
-    #        }
-    #  ]
-    attr_accessor :current_companies
-    attr_accessor :linkedin_url
-    attr_accessor :profile
-    def initialize(node=[])
-      unless node.class==Array
-        @first_name=get_first_name(node)
-        @last_name=get_last_name(node)
-        @title=get_title(node)
-        @location=get_location(node)
-        @country=get_country(node)
-        @industry=get_industry(node)
-        @current_companies=get_current_companies node
-        @past_companies=get_past_companies node
-        @linkedin_url=get_linkedin_url node
-      end
-    end
-    #page is a Nokogiri::XML node of the profile page
-    #returns object of Linkedin::Profile
-    def get_profile page,url
-      @profile=Linkedin::Profile.new(page,url)
-    end
-    private
-    def get_first_name node
-      return node.at(".given-name").text.strip if node.search(".given-name").first
-    end
-    def get_last_name node
-      return node.at(".family-name").text.strip if node.search(".family-name").first
-    end
-    def get_title node
-      return node.at(".title").text.gsub(/\s+/, " ").strip if node.search(".title").first
-    end
-    def get_location node
-      return node.at(".location").text.split(",").first.strip if node.search(".location").first
-    end
-    def get_country node
-      return node.at(".location").text.split(",").last.strip if node.search(".location").first
-    end
-    def get_industry node
-      return node.at(".industry").text.strip if node.search(".industry").first
-    end
-    def get_linkedin_url node
-      node.at("h2/strong/a").attributes["href"]
-    end
-    def get_current_companies node
-      current_cs=[]
-      if node.search(".current-content").first
-        node.at(".current-content").text.split(",").each do |content|
-          title,company=content.split(" at ")
-          company=company.gsub(/\s+/, " ").strip if company
-          title=title.gsub(/\s+/, " ").strip if title
-          current_company={:current_company=>company,:current_title=> title}
-          current_cs<<current_company
-        end
-        return current_cs
-      end
-    end
-    def get_past_companies node
-      past_cs=[]
-      if node.search(".past-content").first
-        node.at(".past-content").text.split(",").each do |content|
-          title,company=content.split(" at ")
-          company=company.gsub(/\s+/, " ").strip if company
-          title=title.gsub(/\s+/, " ").strip if title
-          past_company={:past_company=>company,:past_title=> title }
-          past_cs<<past_company
-        end
-        return past_cs
-      end
-    end
-  end
-end