RubyGems - socialinvestigator - Versions diffs - 0.0.3 → 0.0.4 - Mend

socialinvestigator 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/Gemfile +1 -0
data/README.md +24 -3
data/lib/socialinvestigator/cli.rb +4 -0
data/lib/socialinvestigator/cli/net.rb +27 -0
data/lib/socialinvestigator/cli/twitter.rb +5 -1
data/lib/socialinvestigator/client/net.rb +451 -0
data/lib/socialinvestigator/client/standalone_net.rb +458 -0
data/lib/socialinvestigator/config.rb +20 -0
data/lib/socialinvestigator/version.rb +1 -1
data/socialinvestigator.gemspec +3 -0
metadata +47 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 68ac5910584d162d37369db68b69401cc5f85213
-  data.tar.gz: 0cb5ce2e0e7311a36be9a6b0a8c227a7507935cf
+  metadata.gz: b05afd1645671efac34a27455b466ae120bd0796
+  data.tar.gz: d8e96eb8befbf7fb8124b3cba94d711e18157f52
 SHA512:
-  metadata.gz: 56f8c7088524492a8e9a3f6eae31c48d73ae0a958fc41f1a569be0a0810ffd92fdbe0e040babeb72f4e5c0600980b406f7a906015c1e8abee487fdf5c7f72201
-  data.tar.gz: eb6f2479c6db084d9955632b418bfc90a7145c10d8fdf9fba696008adbf2cc84141f14a7f69edffc1a24fe185be78d5cb3f1472acbe3a89b0b5e8568174687b1
+  metadata.gz: 9481ba394a0fdc6380c48a0d2f1eeaeda14b1abedb993f83d95a423acd5be718bbedbd272e3b95e0ce6074b409fa8580550f9df1955f05f9f3dd6aec586d2661
+  data.tar.gz: 3f94308447f5a9bb6a28bd013066637c2cb588c84db01f2ff840511cd6acf9a2890a4fbfc149f037d9026d4c9643dcd359c7f9593504a01d8c2b5b5d5aa24596

data/.gitignore CHANGED Viewed

@@ -20,3 +20,4 @@ tmp
 *.o
 *.a
 mkmf.log
+apps.json

data/Gemfile CHANGED Viewed

@@ -1,4 +1,5 @@
 source 'https://rubygems.org'
+# gem 'whois', git: "https://github.com/mfasanya/whois"
 # Specify your gem's dependencies in socialinvestigator.gemspec
 gemspec

data/README.md CHANGED Viewed

@@ -17,14 +17,35 @@ Then you can run the command 'socialinvestigator' to begin using it.
 Full help
     $ socialinvestigator help
+## Hacker News Search
+Code walk through: http://willschenk.com/making-a-command-line-utility-with-gems-and-thor
 Search hacker news for a url:
     $ socialinvestigator hn search http://willschenk.com
-Setting up twitter.  You'll need to register a twitter app for this to work.
-Full walk through is here http://willschenk.com/scripting-twitter.
+## Looking up information from a URL
+Code walk through: http://willschenk.com/personal-information-from-only-a-url
+Start with a URL, figure out what you can find:
+    $ socialinvestigator net page_info http://willschenk.com
+To analyse the technology stack, you need to load the datafile from
+https://github.com/ElbertF/Wappalyzer
+which can be done with this command:
+    $ socialinvestigator net get_apps_json
+## Twitter Scripting
+_This will be documented soon_
+Code walk through: http://willschenk.com/scripting-twitter
-Once you have the twitter info, you put it in using the twitter config command:
+You'll need to register a twitter app for this to work.  Once you have the twitter info, you put it in using the twitter config command:
     $ socialinvestigator twitter config

data/lib/socialinvestigator/cli.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 require 'thor'
 require 'socialinvestigator/cli/hn'
 require 'socialinvestigator/cli/twitter'
+require 'socialinvestigator/cli/net'
 module Socialinvestigator
   class HammerOfTheGods < Thor
@@ -29,5 +30,8 @@ module Socialinvestigator
     desc "twitter COMMANDS", "Twitter Control Module"
     subcommand "twitter", Socialinvestigator::CLI::TwitterCli
+    desc "net COMMANDS", "Net control Module"
+    subcommand "net", Socialinvestigator::CLI::Net
   end
 end

data/lib/socialinvestigator/cli/net.rb ADDED Viewed

@@ -0,0 +1,27 @@
+require 'socialinvestigator/client/net'
+module Socialinvestigator
+  module CLI
+    class Net < Thor
+      desc "page_info URL", "Looks at a page to see what social links it finds"
+      def page_info( url )
+        knowledge = client.get_knowledge( url )
+        knowledge.print
+      end
+      desc "get_apps_json", "Download the apps.json file form Wappalyzer"
+      def get_apps_json
+        puts "Loading from https://raw.githubusercontent.com/ElbertF/Wappalyzer/master/share/apps.json"
+        json_data = HTTParty.get "https://raw.githubusercontent.com/ElbertF/Wappalyzer/master/share/apps.json"
+        Socialinvestigator::Config.config.apps_json= json_data
+        puts "Saved"
+      end
+      private
+      def client
+        @client ||= Socialinvestigator::Client::NetClient.new
+      end
+    end
+  end
+end

data/lib/socialinvestigator/cli/twitter.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Socialinvestigator
     class TwitterCli < Thor
       desc "user SCREENAME", "Look up info for a specific user."
       def user( username )
-        agent.print_user_info client.user( "wschenk" )
+        agent.print_user_info client.user( username )
       end
       desc "lookup URL", "Resolve a link"
@@ -150,4 +150,8 @@ module Socialinvestigator
       end
     end
   end
+end
+if __FILE__ == $0
+  Socialinvestigator::CLI::TwitterCli.start( ARGV )
 end

data/lib/socialinvestigator/client/net.rb ADDED Viewed

@@ -0,0 +1,451 @@
+#!/usr/bin/env ruby
+require 'httparty'
+require 'nokogiri'
+require 'dnsruby'
+require 'whois'
+module Socialinvestigator
+  module Client
+    class PageKnowledge
+      DEBUG = false
+      TEMPLATE = "%20s: %s\n"
+      def initialize; @knowledge = {} end
+      def remember( key, value )
+        return if value.nil?
+        p key, value if DEBUG
+        @knowledge[key] = value
+      end
+      def another( key, value )
+        return if value.nil?
+        p key, value if DEBUG
+        @knowledge[key] ||= []
+        @knowledge[key] << value
+        @knowledge[key] = @knowledge[key].uniq
+      end
+      def print
+        p :domain
+        p :created_on
+        p :expires_on
+        p :updated_on
+        p :registrar_name
+        p :registrar_url
+        p :registrant_contact
+        p :admin_contact
+        p :technical_contact
+        p :emails
+        p :title, title
+        p :description, description
+        p :twitter_author, twitter_author
+        p :twitter_ids
+        p :image, image
+        p :responsive
+        p :rss_feed
+        p :atom_feed
+        p :twitter_links
+        p :linkedin_links
+        p :instagram_links
+        p :facebook_links
+        p :googleplus_links
+        p :github_links
+        p :technologies
+      end
+      def p( key, val = nil )
+        val = @knowledge[key] if val.nil?
+        if val.is_a?( Array )
+          printf TEMPLATE, key, val.join( ", ") if val.size > 0
+        elsif val.is_a?( Whois::Record::Contact )
+          printf TEMPLATE, key, ""
+          [:name, :organization, :address, :city, :zip, :state, :country, :country_code, :phone, :fax, :email, :url, :created_on, :updated_on].each do |key|
+            out = val.send( key )
+            printf "%25s: %s\n", key, out if out && out != ""
+          end
+        else
+          printf TEMPLATE, key, val if val
+        end
+      end
+      def title
+        @knowledge[:twitter_title] || @knowledge[:og_title] || @knowledge[:page_title]
+      end
+      def twitter_author
+        @knowledge[:twitter_creator] || @knowledge[:twitter_by] || @knowledge[:twitter_site_author] || (@knowledge[:twitter_ids] || []).first
+      end
+      def description
+        @knowledge[:twitter_description] || @knowledge[:og_description] || @knowledge[:description]
+      end
+      def image
+        @knowledge[:twitter_image] || @knowledge[:og_image]
+      end
+    end
+    class NetClient
+      # Look up the domain
+      def find_domain( hostname )
+        # puts "Looking for SOA of #{hostname}"
+        dns = Dnsruby::Resolver.new
+        soa = dns.query( hostname, "SOA" ).answer.select do |rr|
+          rr.is_a? Dnsruby::RR::IN::SOA
+        end
+        return hostname if soa.length > 0
+        parts = hostname.split( /\./ )
+        return nil if parts.length <= 2
+        find_domain( parts.slice(1,100).join( "." ) )
+      end
+      def get_knowledge( url )
+        data = PageKnowledge.new
+        uri = URI( url )
+        data.remember( :hostname, uri.hostname )
+        domain = find_domain(uri.hostname)
+        data.remember( :domain, domain )
+        # Look at the domain info
+        whois = Whois.lookup( domain )
+        data.remember( :registered?, whois.registered? )
+        if whois.registrar
+          data.remember( :registrar_name, whois.registrar.name )
+          data.remember( :registrar_url, whois.registrar.url )
+        end
+        data.remember( :created_on, whois.created_on.strftime( "%Y-%m-%d") ) if whois.created_on
+        data.remember( :expires_on, whois.expires_on.strftime( "%Y-%m-%d") ) if whois.expires_on
+        data.remember( :updated_on, whois.updated_on.strftime( "%Y-%m-%d") ) if whois.updated_on
+        whois.contacts.each do |c|
+          data.another( :emails, c.email.downcase ) if c.email
+          case c.type
+          when Whois::Record::Contact::TYPE_REGISTRANT
+            data.remember( :registrant_contact, c )
+          when Whois::Record::Contact::TYPE_ADMINISTRATIVE
+            data.remember( :admin_contact, c )
+          when Whois::Record::Contact::TYPE_TECHNICAL
+            data.remember( :technical_contact, c )
+          end
+        end
+        #   [
+        #     :name,:organization,:address,:city,
+        #     :zip,:state,:country,:country_code,
+        #     :phone,:fax,:email,:url].each do |k|
+        #       val = c.send(k)
+        #       printf "%15s : %s\n", k.to_s, val if !val.nil?
+        #   end
+        # end
+        require 'whois/record/parser/blank'
+        whois.parts.each do |p|
+          if Whois::Record::Parser.parser_for(p).is_a? Whois::Record::Parser::Blank
+            puts "Couldn't find a parser for #{p.host}:"
+            data.another( :unparsed_whois, p.body )
+          end
+        end
+        # Load up the response
+        # client = HTTPClient.new
+        # client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
+        # response = client.get( url )
+        #       # @ssl = p.peer_cert
+        response = HTTParty.get url
+        # require 'pp'
+        # pp response.headers
+        data.remember( :server, response.headers['server'] )
+        # Parse the HTML
+        parsed = Nokogiri.parse response.body
+        data.remember( :page_title, parsed.title )
+        # RSS Feed:
+        if feed = parsed.css( 'link[type="application/rss+xml"]' ).first
+          feed = feed.attributes['href'].value
+          data.remember( :rss_feed, feed )
+        end
+        # Atom Feed:
+        if feed = parsed.css( 'link[type="application/atom+xml"]' ).first
+          feed = feed.attributes['href'].value
+          data.remember( :atom_feed, feed )
+        end
+        # Meta tags
+        meta = {}
+        parsed.css( "meta[name]" ).each do |t|
+          meta[t.attributes["name"].value] = t.attributes["content"].value if t.attributes["content"]
+        end
+        parsed.css( "meta[property]" ).each do |t|
+          meta[t.attributes["property"].value] = t.attributes["content"].value
+        end
+        # require 'pp'
+        # pp meta
+        data.remember( :author, meta['author'] )
+        data.remember( :description, meta['description'] )
+        data.remember( :keywords, meta['keywords'] )
+        data.remember( :generator, meta['generator'])
+        data.remember( :responsive, true )  if meta["viewport"] =~ /width=device-width/
+        # Check Twitter Card:
+        data.remember( :twitter_title, meta["twitter:title"] )
+        data.remember( :twitter_creator, meta["twitter:creator"] )
+        if /@(.*)/.match( meta["twitter:creator"] )
+          data.another( :twitter_ids, $1 )
+        end
+        data.remember( :twitter_site_author, meta["twitter:site"] )
+        if /@(.*)/.match( meta["twitter:site"] )
+          data.another( :twitter_ids, $1 )
+        end
+        data.remember( :twitter_image, meta["twitter:image"] )
+        data.remember( :twitter_description, meta["twitter:description"] )
+        # Open Graph
+        data.remember( :og_title, meta["og:title"] )
+        data.remember( :og_description, meta["og:description"] )
+        data.remember( :og_type, meta["og:type"] )
+        data.remember( :og_image, meta["og:image"] )
+        # Look inside the body:
+        # Twitter
+        # Look for twitter links
+        twitter_links = hrefs( matching_links( parsed, /twitter.com\/[^\/]*$/ ), true )
+        data.remember( :twitter_links, twitter_links )
+        twitter_ids = find_id_path( twitter_links, /twitter.com\/([^\/]*$)/  ).each do |id|
+          data.another( :twitter_ids, id )
+        end
+        # Look for twitter shared links
+        twitter_shared = matching_links( parsed, /twitter.com\/share/ )
+        twitter_shared.each do |l|
+          text = l['data-text']
+          # See if there's a "by @user" in the text
+          if /by\s*@([^\s]*)/.match text
+            data.another( :twitter_ids, $1 )
+            data.remember( :twitter_by, $1 )
+          end
+          # Look for all "@usernames" in the text
+          if text
+            text.split.select { |x| x =~ /@\s*/ }.each do |id|
+              data.another( :twitter_ids, id.slice( 1,100 ) ) # We don't want the @
+            end
+          end
+          # See if there's a via link on the anchor tag
+          if l['data-via']
+            data.another( :twitter_ids, l['data-via'])
+          end
+          possible_via = URI.decode( (URI(l['href']).query) || "" ).split( /&amp;/ ).collect { |x| x.split( /=/  ) }.select { |x| x[0] == 'via' }
+          if possible_via.size > 0
+            data.another( :twitter_ids, possible_via[0][1] )
+          end
+        end
+        # Look for intent
+        twitter_intent = hrefs( matching_links( parsed, /twitter.com\/intent/ ) )
+        twitter_intent.each do |t|
+          URI.decode( URI(t.gsub( / /, "+" )).query ).split( /&/ ).select do |x|
+            x =~ /via/
+          end.collect do |x|
+            x.gsub( /via=/, "" )
+          end.each do |via|
+            data.another( :twitter_ids, via )
+          end
+        end
+        # Look for email
+        email_links = hrefs( matching_links( parsed, /mailto:/ ) )
+        email_address = find_id_path( email_links, /mailto:(.*@.*\..*)/ ).each do |email|
+          data.another( :emails, email )
+        end
+        # Linkedin
+        linkedin_links = hrefs( matching_links( parsed, /linkedin.com/ ), true )
+        data.remember( :linkedin_links, linkedin_links )
+        # Instagram
+        instagram_links = hrefs( matching_links( parsed, /instagram.com/ ) )
+        data.remember( :instagram_links, instagram_links )
+        # Facebook
+        facebook_links = hrefs( matching_links( parsed, /facebook.com\/[^\/]*$/ ) )
+        data.remember( :facebook_links, facebook_links )
+        # Google plus
+        googleplus_links = hrefs( matching_links( parsed, /plus.google.com\/[^\/]*$/ ) )
+        data.remember( :googleplus_links, googleplus_links )
+        # Github
+        github_links = hrefs( matching_links( parsed, /github.com\/[^\/]*$/ ) )
+        data.remember( :github_links, github_links )
+        # Bonus!
+        # Get this file from https://github.com/ElbertF/Wappalyzer/tree/master/share
+        apps = Socialinvestigator::Config.config.apps_json
+        if apps
+          scripts = parsed.css( "script" ).collect { |x| x['src'] }.select { |x| x }
+          # puts scripts
+          apps['apps'].each do |app,checks|
+            if checks['html']
+              html_array = checks['html']
+              html_array = [checks['html']] if html_array.is_a? String
+              html_array.each do |html|
+                result = check_regex( html, response.body )
+                if result
+                  data.another :technologies, app
+                  data.another :technologies, checks['implies']
+                end
+              end
+            end
+            if checks['meta']
+              checks['meta'].each do |k,code|
+                result = check_regex( code, meta[k] )
+                if result
+                  data.another :technologies, app
+                  data.another :technologies, checks['implies']
+                end
+              end
+            end
+            if checks['headers']
+              checks['headers'].each do |k,code|
+                result = check_regex( code, response.headers[k] )
+                if result
+                  data.another :technologies, app
+                  data.another :technologies, checks['implies']
+                end
+              end
+            end
+            if checks['script']
+              script_array = checks['script']
+              script_array = [checks['script']] if script_array.is_a? String
+              script_array.each do |script_regex|
+                scripts.each do |script|
+                  result = check_regex( script_regex, script)
+                  if result
+                    data.another :technologies, app
+                    data.another :technologies, checks['implies']
+                  end
+                end
+              end
+            end
+          end
+        end
+        data
+      end
+      def matching_links( parsed, regex )
+        parsed.css( "a" ).collect do |x|
+          if regex.match( x['href'] )
+            x
+          else
+            nil
+          end
+        end.select do |x|
+          x
+        end
+      end
+      def hrefs( links, filter_shared = false )
+        links.collect do |x|
+          x['href']
+        end.select do |url|
+          if filter_shared
+            !(url =~ /share/)
+          else
+            true
+          end
+        end.uniq
+      end
+      def find_id_path( links, regex )
+        links.collect do |link|
+          if regex.match( link )
+            res = $1 || link
+            if (res =~ /share/)
+              nil
+            else
+              res
+            end
+          end
+        end.select do |x|
+          x
+        end.uniq
+      end
+      def check_regex( mashed_regex, value )
+        regex,result = mashed_regex.split( /\\;/ )
+        md = Regexp.new( regex ).match( value )
+        if md
+          if result
+            result = result.gsub( /\\1/, (md[1] || "" )).gsub( /\\2/, (md[2] || "") )
+          else
+            true
+          end
+        else
+          false
+        end
+      end
+    end
+  end
+end

data/lib/socialinvestigator/client/standalone_net.rb ADDED Viewed

@@ -0,0 +1,458 @@
+#!/usr/bin/env ruby
+require 'httparty'
+require 'nokogiri'
+require 'dnsruby'
+require 'whois'
+url = ARGV[0] || "http://www.fastcolabs.com/3038014/product-bootcamp-week-six-worth-it"
+class PageKnowledge
+  DEBUG = false
+  TEMPLATE = "%20s: %s\n"
+  def initialize; @knowledge = {} end
+  def remember( key, value )
+    return if value.nil?
+    p key, value if DEBUG
+    @knowledge[key] = value
+  end
+  def another( key, value )
+    return if value.nil?
+    p key, value if DEBUG
+    @knowledge[key] ||= []
+    @knowledge[key] << value
+    @knowledge[key] = @knowledge[key].uniq
+  end
+  def print
+    p :domain
+    p :created_on
+    p :expires_on
+    p :updated_on
+    p :registrar_name
+    p :registrar_url
+    p :registrant_contact
+    p :admin_contact
+    p :technical_contact
+    p :emails
+    p :title, title
+    p :description, description
+    p :twitter_author, twitter_author
+    p :twitter_ids
+    p :image, image
+    p :responsive
+    p :rss_feed
+    p :atom_feed
+    p :twitter_links
+    p :linkedin_links
+    p :instagram_links
+    p :facebook_links
+    p :googleplus_links
+    p :github_links
+    # pp @knowledge
+  end
+  def p( key, val = nil )
+    val = @knowledge[key] if val.nil?
+    if val.is_a?( Array )
+      printf TEMPLATE, key, val.join( ", ") if val.size > 0
+    elsif val.is_a?( Whois::Record::Contact )
+      printf TEMPLATE, key, ""
+      [:name, :organization, :address, :city, :zip, :state, :country, :country_code, :phone, :fax, :email, :url, :created_on, :updated_on].each do |key|
+        out = val.send( key )
+        printf "%25s: %s\n", key, out if out && out != ""
+      end
+    else
+      printf TEMPLATE, key, val if val
+    end
+  end
+  def title
+    @knowledge[:twitter_title] || @knowledge[:og_title] || @knowledge[:page_title]
+  end
+  def twitter_author
+    @knowledge[:twitter_creator] || @knowledge[:twitter_by] || @knowledge[:twitter_site_author] || (@knowledge[:twitter_ids] || []).first
+  end
+  def description
+    @knowledge[:twitter_description] || @knowledge[:og_description] || @knowledge[:description]
+  end
+  def image
+    @knowledge[:twitter_image] || @knowledge[:og_image]
+  end
+end
+data = PageKnowledge.new
+uri = URI( url )
+data.remember( :hostname, uri.hostname )
+# Look up the domain
+def find_domain( hostname )
+  # puts "Looking for SOA of #{hostname}"
+  dns = Dnsruby::Resolver.new
+  soa = dns.query( hostname, "SOA" ).answer.select do |rr|
+    rr.is_a? Dnsruby::RR::IN::SOA
+  end
+  return hostname if soa.length > 0
+  parts = hostname.split( /\./ )
+  return nil if parts.length <= 2
+  find_domain( parts.slice(1,100).join( "." ) )
+end
+domain = find_domain(uri.hostname)
+data.remember( :domain, domain )
+# Look at the domain info
+whois = Whois.lookup( domain )
+data.remember( :registered?, whois.registered? )
+if whois.registrar
+  data.remember( :registrar_name, whois.registrar.name )
+  data.remember( :registrar_url, whois.registrar.url )
+end
+data.remember( :created_on, whois.created_on.strftime( "%Y-%m-%d") ) if whois.created_on
+data.remember( :expires_on, whois.expires_on.strftime( "%Y-%m-%d") ) if whois.expires_on
+data.remember( :updated_on, whois.updated_on.strftime( "%Y-%m-%d") ) if whois.updated_on
+whois.contacts.each do |c|
+  data.another( :emails, c.email.downcase ) if c.email
+  case c.type
+  when Whois::Record::Contact::TYPE_REGISTRANT
+    data.remember( :registrant_contact, c )
+  when Whois::Record::Contact::TYPE_ADMINISTRATIVE
+    data.remember( :admin_contact, c )
+  when Whois::Record::Contact::TYPE_TECHNICAL
+    data.remember( :technical_contact, c )
+  end
+end
+#   [
+#     :name,:organization,:address,:city,
+#     :zip,:state,:country,:country_code,
+#     :phone,:fax,:email,:url].each do |k|
+#       val = c.send(k)
+#       printf "%15s : %s\n", k.to_s, val if !val.nil?
+#   end
+# end
+require 'whois/record/parser/blank'
+whois.parts.each do |p|
+  if Whois::Record::Parser.parser_for(p).is_a? Whois::Record::Parser::Blank
+    puts "Couldn't find a parser for #{p.host}:"
+    data.another( :unparsed_whois, p.body )
+  end
+end
+# Load up the response
+# client = HTTPClient.new
+# client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
+# response = client.get( url )
+#       # @ssl = p.peer_cert
+response = HTTParty.get url
+# require 'pp'
+# pp response.headers
+data.remember( :server, response.headers['server'] )
+# Parse the HTML
+parsed = Nokogiri.parse response.body
+data.remember( :page_title, parsed.title )
+# RSS Feed:
+if feed = parsed.css( 'link[type="application/rss+xml"]' ).first
+  feed = feed.attributes['href'].value
+  data.remember( :rss_feed, feed )
+end
+# Atom Feed:
+if feed = parsed.css( 'link[type="application/atom+xml"]' ).first
+  feed = feed.attributes['href'].value
+  data.remember( :atom_feed, feed )
+end
+# Meta tags
+meta = {}
+parsed.css( "meta[name]" ).each do |t|
+  meta[t.attributes["name"].value] = t.attributes["content"].value if t.attributes["content"]
+end
+parsed.css( "meta[property]" ).each do |t|
+  meta[t.attributes["property"].value] = t.attributes["content"].value
+end
+# require 'pp'
+# pp meta
+data.remember( :author, meta['author'] )
+data.remember( :description, meta['description'] )
+data.remember( :keywords, meta['keywords'] )
+data.remember( :generator, meta['generator'])
+data.remember( :responsive, true )  if meta["viewport"] =~ /width=device-width/
+# Check Twitter Card:
+data.remember( :twitter_title, meta["twitter:title"] )
+data.remember( :twitter_creator, meta["twitter:creator"] )
+if /@(.*)/.match( meta["twitter:creator"] )
+  data.another( :twitter_ids, $1 )
+end
+data.remember( :twitter_site_author, meta["twitter:site"] )
+if /@(.*)/.match( meta["twitter:site"] )
+  data.another( :twitter_ids, $1 )
+end
+data.remember( :twitter_image, meta["twitter:image"] )
+data.remember( :twitter_description, meta["twitter:description"] )
+# Open Graph
+data.remember( :og_title, meta["og:title"] )
+data.remember( :og_description, meta["og:description"] )
+data.remember( :og_type, meta["og:type"] )
+data.remember( :og_image, meta["og:image"] )
+# Look inside the body:
+def matching_links( parsed, regex )
+  parsed.css( "a" ).collect do |x|
+    if regex.match( x['href'] )
+      x
+    else
+      nil
+    end
+  end.select do |x|
+    x
+  end
+end
+def hrefs( links, filter_shared = false )
+  links.collect do |x|
+    x['href']
+  end.select do |url|
+    if filter_shared
+      !(url =~ /share/)
+    else
+      true
+    end
+  end.uniq
+end
+def find_id_path( links, regex )
+  links.collect do |link|
+    if regex.match( link )
+      res = $1 || link
+      if (res =~ /share/)
+        nil
+      else
+        res
+      end
+    end
+  end.select do |x|
+    x
+  end.uniq
+end
+# Twitter
+# Look for twitter links
+twitter_links = hrefs( matching_links( parsed, /twitter.com\/[^\/]*$/ ), true )
+data.remember( :twitter_links, twitter_links )
+twitter_ids = find_id_path( twitter_links, /twitter.com\/([^\/]*$)/  ).each do |id|
+  data.another( :twitter_ids, id )
+end
+# Look for twitter shared links
+twitter_shared = matching_links( parsed, /twitter.com\/share/ )
+twitter_shared.each do |l|
+  text = l['data-text']
+  # See if there's a "by @user" in the text
+  if /by\s*@([^\s]*)/.match text
+    data.another( :twitter_ids, $1 )
+    data.remember( :twitter_by, $1 )
+  end
+  # Look for all "@usernames" in the text
+  if text
+    text.split.select { |x| x =~ /@\s*/ }.each do |id|
+      data.another( :twitter_ids, id.slice( 1,100 ) ) # We don't want the @
+    end
+  end
+  # See if there's a via link on the anchor tag
+  if l['data-via']
+    data.another( :twitter_ids, l['data-via'])
+  end
+  possible_via = URI.decode( (URI(l['href']).query) || "" ).split( /&amp;/ ).collect { |x| x.split( /=/  ) }.select { |x| x[0] == 'via' }
+  if possible_via.size > 0
+    data.another( :twitter_ids, possible_via[0][1] )
+  end
+end
+# Look for intent
+twitter_intent = hrefs( matching_links( parsed, /twitter.com\/intent/ ) )
+twitter_intent.each do |t|
+  URI.decode( URI(t.gsub( / /, "+" )).query ).split( /&/ ).select do |x|
+    x =~ /via/
+  end.collect do |x|
+    x.gsub( /via=/, "" )
+  end.each do |via|
+    data.another( :twitter_ids, via )
+  end
+end
+# Look for email
+email_links = hrefs( matching_links( parsed, /mailto:/ ) )
+email_address = find_id_path( email_links, /mailto:(.*@.*\..*)/ ).each do |email|
+  data.another( :emails, email )
+end
+# Linkedin
+linkedin_links = hrefs( matching_links( parsed, /linkedin.com/ ), true )
+data.remember( :linkedin_links, linkedin_links )
+# Instagram
+instagram_links = hrefs( matching_links( parsed, /instagram.com/ ) )
+data.remember( :instagram_links, instagram_links )
+# Facebook
+facebook_links = hrefs( matching_links( parsed, /facebook.com\/[^\/]*$/ ) )
+data.remember( :facebook_links, facebook_links )
+# Google plus
+googleplus_links = hrefs( matching_links( parsed, /plus.google.com\/[^\/]*$/ ) )
+data.remember( :googleplus_links, googleplus_links )
+# Github
+github_links = hrefs( matching_links( parsed, /github.com\/[^\/]*$/ ) )
+data.remember( :github_links, github_links )
+puts
+puts "This is what we've figured out:"
+data.print
+# Bonus!
+# Get this file from https://github.com/ElbertF/Wappalyzer/tree/master/share
+if File.exists? "apps.json"
+  apps_json = File.read( "apps.json" )
+  technologies = []
+  apps = JSON.parse( apps_json )
+  def check_regex( mashed_regex, value )
+    regex,result = mashed_regex.split( /\\;/ )
+    md = Regexp.new( regex ).match( value )
+    if md
+      if result
+        result = result.gsub( /\\1/, (md[1] || "" )).gsub( /\\2/, (md[2] || "") )
+      else
+        true
+      end
+    else
+      false
+    end
+  end
+  scripts = parsed.css( "script" ).collect { |x| x['src'] }.select { |x| x }
+  # puts scripts
+  apps['apps'].each do |app,checks|
+    if checks['html']
+      html_array = checks['html']
+      html_array = [checks['html']] if html_array.is_a? String
+      html_array.each do |html|
+        result = check_regex( html, response.body )
+        if result
+          technologies << app
+          technologies << checks['implies'] if checks['implies']
+        end
+      end
+    end
+    if checks['meta']
+      checks['meta'].each do |k,code|
+        result = check_regex( code, meta[k] )
+        if result
+          technologies << app
+          technologies << checks['implies'] if checks['implies']
+        end
+      end
+    end
+    if checks['headers']
+      checks['headers'].each do |k,code|
+        result = check_regex( code, response.headers[k] )
+        if result
+          technologies << app
+          technologies << checks['implies'] if checks['implies']
+        end
+      end
+    end
+    if checks['script']
+      script_array = checks['script']
+      script_array = [checks['script']] if script_array.is_a? String
+      script_array.each do |script_regex|
+        scripts.each do |script|
+          result = check_regex( script_regex, script)
+          if result
+            technologies << app
+            technologies << checks['implies'] if checks['implies']
+          end
+        end
+      end
+    end
+  end
+  printf "%20s: %s\n", "technologies", technologies.sort.uniq.join( ", ")
+end

data/lib/socialinvestigator/config.rb CHANGED Viewed

@@ -27,6 +27,16 @@ module Socialinvestigator
         save_yaml( "twitter.yml", config )
       end
+      def apps_json
+        read_json( "apps.json" )
+      end
+      def apps_json=( data )
+        File.open( "#{@dir}/apps.json", "w" ) do |out|
+          out << data
+        end
+      end
       def read_yaml( name )
         file = "#{@dir}/#{name}"
@@ -42,6 +52,16 @@ module Socialinvestigator
           out.write obj.to_yaml
         end
       end
+      def read_json( name )
+        file = "#{@dir}/#{name}"
+        if File.exists? file
+          return JSON.parse( File.read( file ) )
+        end
+        nil
+      end
     end
   end
 end

data/lib/socialinvestigator/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Socialinvestigator
-  VERSION = "0.0.3"
+  VERSION = "0.0.4"
 end

data/socialinvestigator.gemspec CHANGED Viewed

@@ -21,6 +21,9 @@ Gem::Specification.new do |spec|
   spec.add_dependency 'thor'
   spec.add_dependency 'httparty'
   spec.add_dependency 'twitter'
+  spec.add_dependency 'nokogiri'
+  spec.add_dependency 'whois'
+  spec.add_dependency 'dnsruby'
   spec.add_development_dependency "bundler", "~> 1.6"
   spec.add_development_dependency "rake"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: socialinvestigator
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
 platform: ruby
 authors:
 - Will Schenk
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-11-10 00:00:00.000000000 Z
+date: 2014-11-14 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: thor
@@ -52,6 +52,48 @@ dependencies:
     - - '>='
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: whois
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: dnsruby
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: bundler
   requirement: !ruby/object:Gem::Requirement
@@ -97,8 +139,11 @@ files:
 - lib/socialinvestigator.rb
 - lib/socialinvestigator/cli.rb
 - lib/socialinvestigator/cli/hn.rb
+- lib/socialinvestigator/cli/net.rb
 - lib/socialinvestigator/cli/twitter.rb
 - lib/socialinvestigator/client/hn.rb
+- lib/socialinvestigator/client/net.rb
+- lib/socialinvestigator/client/standalone_net.rb
 - lib/socialinvestigator/client/twitter.rb
 - lib/socialinvestigator/config.rb
 - lib/socialinvestigator/version.rb