RubyGems - socialinvestigator - Versions diffs - 0.0.3 → 0.0.4 - Mend

socialinvestigator 0.0.3 → 0.0.4

Files changed (13) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/Gemfile +1 -0
data/README.md +24 -3
data/lib/socialinvestigator/cli.rb +4 -0
data/lib/socialinvestigator/cli/net.rb +27 -0
data/lib/socialinvestigator/cli/twitter.rb +5 -1
data/lib/socialinvestigator/client/net.rb +451 -0
data/lib/socialinvestigator/client/standalone_net.rb +458 -0
data/lib/socialinvestigator/config.rb +20 -0
data/lib/socialinvestigator/version.rb +1 -1
data/socialinvestigator.gemspec +3 -0
metadata +47 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 68ac5910584d162d37369db68b69401cc5f85213
-  data.tar.gz: 0cb5ce2e0e7311a36be9a6b0a8c227a7507935cf
+  metadata.gz: b05afd1645671efac34a27455b466ae120bd0796
+  data.tar.gz: d8e96eb8befbf7fb8124b3cba94d711e18157f52
 SHA512:
-  metadata.gz: 56f8c7088524492a8e9a3f6eae31c48d73ae0a958fc41f1a569be0a0810ffd92fdbe0e040babeb72f4e5c0600980b406f7a906015c1e8abee487fdf5c7f72201
-  data.tar.gz: eb6f2479c6db084d9955632b418bfc90a7145c10d8fdf9fba696008adbf2cc84141f14a7f69edffc1a24fe185be78d5cb3f1472acbe3a89b0b5e8568174687b1
+  metadata.gz: 9481ba394a0fdc6380c48a0d2f1eeaeda14b1abedb993f83d95a423acd5be718bbedbd272e3b95e0ce6074b409fa8580550f9df1955f05f9f3dd6aec586d2661
+  data.tar.gz: 3f94308447f5a9bb6a28bd013066637c2cb588c84db01f2ff840511cd6acf9a2890a4fbfc149f037d9026d4c9643dcd359c7f9593504a01d8c2b5b5d5aa24596

data/.gitignore CHANGED Viewed

@@ -20,3 +20,4 @@ tmp
 *.o
 *.a
 mkmf.log
+apps.json

data/Gemfile CHANGED Viewed

@@ -1,4 +1,5 @@
 source 'https://rubygems.org'
+# gem 'whois', git: "https://github.com/mfasanya/whois"
 # Specify your gem's dependencies in socialinvestigator.gemspec
 gemspec

data/README.md CHANGED Viewed

@@ -17,14 +17,35 @@ Then you can run the command 'socialinvestigator' to begin using it.
 Full help
     $ socialinvestigator help
+## Hacker News Search
+Code walk through: http://willschenk.com/making-a-command-line-utility-with-gems-and-thor
 Search hacker news for a url:
     $ socialinvestigator hn search http://willschenk.com
-Setting up twitter.  You'll need to register a twitter app for this to work.
-Full walk through is here http://willschenk.com/scripting-twitter.
+## Looking up information from a URL
+Code walk through: http://willschenk.com/personal-information-from-only-a-url
+Start with a URL, figure out what you can find:
+    $ socialinvestigator net page_info http://willschenk.com
+To analyse the technology stack, you need to load the datafile from
+https://github.com/ElbertF/Wappalyzer
+which can be done with this command:
+    $ socialinvestigator net get_apps_json
+## Twitter Scripting
+_This will be documented soon_
+Code walk through: http://willschenk.com/scripting-twitter
-Once you have the twitter info, you put it in using the twitter config command:
+You'll need to register a twitter app for this to work.  Once you have the twitter info, you put it in using the twitter config command:
     $ socialinvestigator twitter config

data/lib/socialinvestigator/cli.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 require 'thor'
 require 'socialinvestigator/cli/hn'
 require 'socialinvestigator/cli/twitter'
+require 'socialinvestigator/cli/net'
 module Socialinvestigator
   class HammerOfTheGods < Thor
@@ -29,5 +30,8 @@ module Socialinvestigator
     desc "twitter COMMANDS", "Twitter Control Module"
     subcommand "twitter", Socialinvestigator::CLI::TwitterCli
+    desc "net COMMANDS", "Net control Module"
+    subcommand "net", Socialinvestigator::CLI::Net
   end
 end

data/lib/socialinvestigator/cli/net.rb ADDED Viewed

@@ -0,0 +1,27 @@
+require 'socialinvestigator/client/net'
+module Socialinvestigator
+  module CLI
+    class Net < Thor
+      desc "page_info URL", "Looks at a page to see what social links it finds"
+      def page_info( url )
+        knowledge = client.get_knowledge( url )
+        knowledge.print
+      end
+      desc "get_apps_json", "Download the apps.json file form Wappalyzer"
+      def get_apps_json
+        puts "Loading from https://raw.githubusercontent.com/ElbertF/Wappalyzer/master/share/apps.json"
+        json_data = HTTParty.get "https://raw.githubusercontent.com/ElbertF/Wappalyzer/master/share/apps.json"
+        Socialinvestigator::Config.config.apps_json= json_data
+        puts "Saved"
+      end
+      private
+      def client
+        @client ||= Socialinvestigator::Client::NetClient.new
+      end
+    end
+  end
+end

data/lib/socialinvestigator/cli/twitter.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Socialinvestigator
     class TwitterCli < Thor
       desc "user SCREENAME", "Look up info for a specific user."
       def user( username )
-        agent.print_user_info client.user( "wschenk" )
+        agent.print_user_info client.user( username )
       end
       desc "lookup URL", "Resolve a link"
@@ -150,4 +150,8 @@ module Socialinvestigator
       end
     end
   end
+end
+if __FILE__ == $0
+  Socialinvestigator::CLI::TwitterCli.start( ARGV )
 end

data/lib/socialinvestigator/client/net.rb ADDED Viewed

@@ -0,0 +1,451 @@
+#!/usr/bin/env ruby
+require 'httparty'
+require 'nokogiri'
+require 'dnsruby'
+require 'whois'
+module Socialinvestigator
+  module Client
+    class PageKnowledge
+      DEBUG = false
+      TEMPLATE = "%20s: %s\n"
+      def initialize; @knowledge = {} end
+      def remember( key, value )
+        return if value.nil?
+        p key, value if DEBUG
+        @knowledge[key] = value
+      end
+      def another( key, value )
+        return if value.nil?
+        p key, value if DEBUG
+        @knowledge[key] ||= []
+        @knowledge[key] << value
+        @knowledge[key] = @knowledge[key].uniq
+      end
+      def print
+        p :domain
+        p :created_on
+        p :expires_on
+        p :updated_on
+        p :registrar_name
+        p :registrar_url
+        p :registrant_contact
+        p :admin_contact
+        p :technical_contact
+        p :emails
+        p :title, title
+        p :description, description
+        p :twitter_author, twitter_author
+        p :twitter_ids
+        p :image, image
+        p :responsive
+        p :rss_feed
+        p :atom_feed
+        p :twitter_links
+        p :linkedin_links
+        p :instagram_links
+        p :facebook_links
+        p :googleplus_links
+        p :github_links
+        p :technologies
+      end
+      def p( key, val = nil )
+        val = @knowledge[key] if val.nil?
+        if val.is_a?( Array )
+          printf TEMPLATE, key, val.join( ", ") if val.size > 0
+        elsif val.is_a?( Whois::Record::Contact )
+          printf TEMPLATE, key, ""
+          [:name, :organization, :address, :city, :zip, :state, :country, :country_code, :phone, :fax, :email, :url, :created_on, :updated_on].each do |key|
+            out = val.send( key )
+            printf "%25s: %s\n", key, out if out && out != ""
+          end
+        else
+          printf TEMPLATE, key, val if val
+        end
+      end
+      def title
+        @knowledge[:twitter_title] || @knowledge[:og_title] || @knowledge[:page_title]
+      end
+      def twitter_author
+        @knowledge[:twitter_creator] || @knowledge[:twitter_by] || @knowledge[:twitter_site_author] || (@knowledge[:twitter_ids] || []).first
+      end
+      def description
+        @knowledge[:twitter_description] || @knowledge[:og_description] || @knowledge[:description]
+      end
+      def image
+        @knowledge[:twitter_image] || @knowledge[:og_image]
+      end
+    end
+    class NetClient
+      # Look up the domain
+      def find_domain( hostname )
+        # puts "Looking for SOA of #{hostname}"
+        dns = Dnsruby::Resolver.new
+        soa = dns.query( hostname, "SOA" ).answer.select do |rr|
+          rr.is_a? Dnsruby::RR::IN::SOA
+        end
+        return hostname if soa.length > 0
+        parts = hostname.split( /\./ )
+        return nil if parts.length <= 2
+        find_domain( parts.slice(1,100).join( "." ) )
+      end
+      def get_knowledge( url )
+        data = PageKnowledge.new
+        uri = URI( url )
+        data.remember( :hostname, uri.hostname )
+        domain = find_domain(uri.hostname)
+        data.remember( :domain, domain )
+        # Look at the domain info
+        whois = Whois.lookup( domain )
+        data.remember( :registered?, whois.registered? )
+        if whois.registrar
+          data.remember( :registrar_name, whois.registrar.name )
+          data.remember( :registrar_url, whois.registrar.url )
+        end
+        data.remember( :created_on, whois.created_on.strftime( "%Y-%m-%d") ) if whois.created_on
+        data.remember( :expires_on, whois.expires_on.strftime( "%Y-%m-%d") ) if whois.expires_on
+        data.remember( :updated_on, whois.updated_on.strftime( "%Y-%m-%d") ) if whois.updated_on
+        whois.contacts.each do |c|
+          data.another( :emails, c.email.downcase ) if c.email
+          case c.type
+          when Whois::Record::Contact::TYPE_REGISTRANT
+            data.remember( :registrant_contact, c )
+          when Whois::Record::Contact::TYPE_ADMINISTRATIVE
+            data.remember( :admin_contact, c )
+          when Whois::Record::Contact::TYPE_TECHNICAL
+            data.remember( :technical_contact, c )
+          end
+        end
+        #   [
+        #     :name,:organization,:address,:city,
+        #     :zip,:state,:country,:country_code,
+        #     :phone,:fax,:email,:url].each do |k|
+        #       val = c.send(k)
+        #       printf "%15s : %s\n", k.to_s, val if !val.nil?
+        #   end
+        # end
+        require 'whois/record/parser/blank'
+        whois.parts.each do |p|
+          if Whois::Record::Parser.parser_for(p).is_a? Whois::Record::Parser::Blank
+            puts "Couldn't find a parser for #{p.host}:"
+            data.another( :unparsed_whois, p.body )
+          end
+        end
+        # Load up the response
+        # client = HTTPClient.new
+        # client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
+        # response = client.get( url )
+        #       # @ssl = p.peer_cert
+        response = HTTParty.get url
+        # require 'pp'
+        # pp response.headers
+        data.remember( :server, response.headers['server'] )
+        # Parse the HTML
+        parsed = Nokogiri.parse response.body
+        data.remember( :page_title, parsed.title )
+        # RSS Feed:
+        if feed = parsed.css( 'link[type="application/rss+xml"]' ).first
+          feed = feed.attributes['href'].value
+          data.remember( :rss_feed, feed )
+        end
+        # Atom Feed:
+        if feed = parsed.css( 'link[type="application/atom+xml"]' ).first
+          feed = feed.attributes['href'].value
+          data.remember( :atom_feed, feed )
+        end
+        # Meta tags
+        meta = {}
+        parsed.css( "meta[name]" ).each do |t|
+          meta[t.attributes["name"].value] = t.attributes["content"].value if t.attributes["content"]
+        end
+        parsed.css( "meta[property]" ).each do |t|
+          meta[t.attributes["property"].value] = t.attributes["content"].value
+        end
+        # require 'pp'
+        # pp meta
+        data.remember( :author, meta['author'] )
+        data.remember( :description, meta['description'] )
+        data.remember( :keywords, meta['keywords'] )
+        data.remember( :generator, meta['generator'])
+        data.remember( :responsive, true )  if meta["viewport"] =~ /width=device-width/
+        # Check Twitter Card:
+        data.remember( :twitter_title, meta["twitter:title"] )
+        data.remember( :twitter_creator, meta["twitter:creator"] )
+        if /@(.*)/.match( meta["twitter:creator"] )
+          data.another( :twitter_ids, $1 )
+        end
+        data.remember( :twitter_site_author, meta["twitter:site"] )
+        if /@(.*)/.match( meta["twitter:site"] )
+          data.another( :twitter_ids, $1 )
+        end
+        data.remember( :twitter_image, meta["twitter:image"] )
+        data.remember( :twitter_description, meta["twitter:description"] )
+        # Open Graph
+        data.remember( :og_title, meta["og:title"] )
+        data.remember( :og_description, meta["og:description"] )
+        data.remember( :og_type, meta["og:type"] )
+        data.remember( :og_image, meta["og:image"] )
+        # Look inside the body:
+        # Twitter
+        # Look for twitter links
+        twitter_links = hrefs( matching_links( parsed, /twitter.com\/[^\/]*$/ ), true )
+        data.remember( :twitter_links, twitter_links )
+        twitter_ids = find_id_path( twitter_links, /twitter.com\/([^\/]*$)/  ).each do |id|
+          data.another( :twitter_ids, id )
+        end
+        # Look for twitter shared links
+        twitter_shared = matching_links( parsed, /twitter.com\/share/ )
+        twitter_shared.each do |l|
+          text = l['data-text']
+          # See if there's a "by @user" in the text
+          if /by\s*@([^\s]*)/.match text
+            data.another( :twitter_ids, $1 )
+            data.remember( :twitter_by, $1 )
+          end
+          # Look for all "@usernames" in the text
+          if text
+            text.split.select { |x| x =~ /@\s*/ }.each do |id|
+              data.another( :twitter_ids, id.slice( 1,100 ) ) # We don't want the @
+            end
+          end
+          # See if there's a via link on the anchor tag
+          if l['data-via']
+            data.another( :twitter_ids, l['data-via'])
+          end
+          possible_via = URI.decode( (URI(l['href']).query) || "" ).split( /&amp;/ ).collect { |x| x.split( /=/  ) }.select { |x| x[0] == 'via' }
+          if possible_via.size > 0
+            data.another( :twitter_ids, possible_via[0][1] )
+          end
+        end
+        # Look for intent
+        twitter_intent = hrefs( matching_links( parsed, /twitter.com\/intent/ ) )
+        twitter_intent.each do |t|
+          URI.decode( URI(t.gsub( / /, "+" )).query ).split( /&/ ).select do |x|
+            x =~ /via/
+          end.collect do |x|
+            x.gsub( /via=/, "" )
+          end.each do |via|
+            data.another( :twitter_ids, via )
+          end
+        end
+        # Look for email
+        email_links = hrefs( matching_links( parsed, /mailto:/ ) )
+        email_address = find_id_path( email_links, /mailto:(.*@.*\..*)/ ).each do |email|
+          data.another( :emails, email )
+        end
+        # Linkedin
+        linkedin_links = hrefs( matching_links( parsed, /linkedin.com/ ), true )
+        data.remember( :linkedin_links, linkedin_links )
+        # Instagram
+        instagram_links = hrefs( matching_links( parsed, /instagram.com/ ) )
+        data.remember( :instagram_links, instagram_links )
+        # Facebook
+        facebook_links = hrefs( matching_links( parsed, /facebook.com\/[^\/]*$/ ) )
+        data.remember( :facebook_links, facebook_links )
+        # Google plus
+        googleplus_links = hrefs( matching_links( parsed, /plus.google.com\/[^\/]*$/ ) )
+        data.remember( :googleplus_links, googleplus_links )
+        # Github
+        github_links = hrefs( matching_links( parsed, /github.com\/[^\/]*$/ ) )
+        data.remember( :github_links, github_links )
+        # Bonus!
+        # Get this file from https://github.com/ElbertF/Wappalyzer/tree/master/share
+        apps = Socialinvestigator::Config.config.apps_json
+        if apps
+          scripts = parsed.css( "script" ).collect { |x| x['src'] }.select { |x| x }
+          # puts scripts
+          apps['apps'].each do |app,checks|
+            if checks['html']
+              html_array = checks['html']
+              html_array = [checks['html']] if html_array.is_a? String
+              html_array.each do |html|
+                result = check_regex( html, response.body )
+                if result
+                  data.another :technologies, app
+                  data.another :technologies, checks['implies']
+                end
+              end
+            end
+            if checks['meta']
+              checks['meta'].each do |k,code|
+                result = check_regex( code, meta[k] )
+                if result
+                  data.another :technologies, app
+                  data.another :technologies, checks['implies']
+                end
+              end
+            end
+            if checks['headers']
+              checks['headers'].each do |k,code|
+                result = check_regex( code, response.headers[k] )
+                if result
+                  data.another :technologies, app
+                  data.another :technologies, checks['implies']
+                end
+              end
+            end
+            if checks['script']
+              script_array = checks['script']
+              script_array = [checks['script']] if script_array.is_a? String
+              script_array.each do |script_regex|
+                scripts.each do |script|
+                  result = check_regex( script_regex, script)
+                  if result
+                    data.another :technologies, app
+                    data.another :technologies, checks['implies']
+                  end
+                end
+              end
+            end
+          end
+        end
+        data
+      end
+      def matching_links( parsed, regex )
+        parsed.css( "a" ).collect do |x|
+          if regex.match( x['href'] )
+            x
+          else
+            nil
+          end
+        end.select do |x|
+          x
+        end
+      end
+      def hrefs( links, filter_shared = false )
+        links.collect do |x|
+          x['href']
+        end.select do |url|
+          if filter_shared
+            !(url =~ /share/)
+          else
+            true
+          end
+        end.uniq
+      end
+      def find_id_path( links, regex )
+        links.collect do |link|
+          if regex.match( link )
+            res = $1 || link
+            if (res =~ /share/)
+              nil
+            else
+              res
+            end
+          end
+        end.select do |x|
+          x
+        end.uniq
+      end
+      def check_regex( mashed_regex, value )
+        regex,result = mashed_regex.split( /\\;/ )
+        md = Regexp.new( regex ).match( value )
+        if md
+          if result
+            result = result.gsub( /\\1/, (md[1] || "" )).gsub( /\\2/, (md[2] || "") )
+          else
+            true
+          end
+        else
+          false
+        end
+      end
+    end
+  end
+end

data/lib/socialinvestigator/client/standalone_net.rb ADDED Viewed

@@ -0,0 +1,458 @@
+#!/usr/bin/env ruby
+require 'httparty'
+require 'nokogiri'
+require 'dnsruby'
+require 'whois'
+url = ARGV[0] || "http://www.fastcolabs.com/3038014/product-bootcamp-week-six-worth-it"
+class PageKnowledge
+  DEBUG = false
+  TEMPLATE = "%20s: %s\n"
+  def initialize; @knowledge = {} end
+  def remember( key, value )
+    return if value.nil?
+    p key, value if DEBUG
+    @knowledge[key] = value
+  end
+  def another( key, value )
+    return if value.nil?
+    p key, value if DEBUG
+    @knowledge[key] ||= []
+    @knowledge[key] << value
+    @knowledge[key] = @knowledge[key].uniq
+  end
+  def print
+    p :domain
+    p :created_on
+    p :expires_on
+    p :updated_on
+    p :registrar_name
+    p :registrar_url
+    p :registrant_contact
+    p :admin_contact
+    p :technical_contact
+    p :emails
+    p :title, title
+    p :description, description
+    p :twitter_author, twitter_author
+    p :twitter_ids
+    p :image, image
+    p :responsive
+    p :rss_feed
+    p :atom_feed
+    p :twitter_links
+    p :linkedin_links
+    p :instagram_links
+    p :facebook_links
+    p :googleplus_links
+    p :github_links
+    # pp @knowledge
+  end
+  def p( key, val = nil )
+    val = @knowledge[key] if val.nil?
+    if val.is_a?( Array )
+      printf TEMPLATE, key, val.join( ", ") if val.size > 0
+    elsif val.is_a?( Whois::Record::Contact )
+      printf TEMPLATE, key, ""
+      [:name, :organization, :address, :city, :zip, :state, :country, :country_code, :phone, :fax, :email, :url, :created_on, :updated_on].each do |key|
+        out = val.send( key )
+        printf "%25s: %s\n", key, out if out && out != ""
+      end
+    else
+      printf TEMPLATE, key, val if val
+    end
+  end
+  def title
+    @knowledge[:twitter_title] || @knowledge[:og_title] || @knowledge[:page_title]
+  end
+  def twitter_author
+    @knowledge[:twitter_creator] || @knowledge[:twitter_by] || @knowledge[:twitter_site_author] || (@knowledge[:twitter_ids] || []).first
+  end
+  def description
+    @knowledge[:twitter_description] || @knowledge[:og_description] || @knowledge[:description]
+  end
+  def image
+    @knowledge[:twitter_image] || @knowledge[:og_image]
+  end
+end
+data = PageKnowledge.new
+uri = URI( url )
+data.remember( :hostname, uri.hostname )
+# Look up the domain
+def find_domain( hostname )
+  # puts "Looking for SOA of #{hostname}"
+  dns = Dnsruby::Resolver.new
+  soa = dns.query( hostname, "SOA" ).answer.select do |rr|
+    rr.is_a? Dnsruby::RR::IN::SOA
+  end
+  return hostname if soa.length > 0
+  parts = hostname.split( /\./ )
+  return nil if parts.length <= 2
+  find_domain( parts.slice(1,100).join( "." ) )
+end
+domain = find_domain(uri.hostname)
+data.remember( :domain, domain )
+# Look at the domain info
+whois = Whois.lookup( domain )
+data.remember( :registered?, whois.registered? )
+if whois.registrar
+  data.remember( :registrar_name, whois.registrar.name )
+  data.remember( :registrar_url, whois.registrar.url )
+end
+data.remember( :created_on, whois.created_on.strftime( "%Y-%m-%d") ) if whois.created_on
+data.remember( :expires_on, whois.expires_on.strftime( "%Y-%m-%d") ) if whois.expires_on
+data.remember( :updated_on, whois.updated_on.strftime( "%Y-%m-%d") ) if whois.updated_on
+whois.contacts.each do |c|
+  data.another( :emails, c.email.downcase ) if c.email
+  case c.type
+  when Whois::Record::Contact::TYPE_REGISTRANT
+    data.remember( :registrant_contact, c )
+  when Whois::Record::Contact::TYPE_ADMINISTRATIVE
+    data.remember( :admin_contact, c )
+  when Whois::Record::Contact::TYPE_TECHNICAL
+    data.remember( :technical_contact, c )
+  end
+end
+#   [
+#     :name,:organization,:address,:city,
+#     :zip,:state,:country,:country_code,
+#     :phone,:fax,:email,:url].each do |k|
+#       val = c.send(k)
+#       printf "%15s : %s\n", k.to_s, val if !val.nil?
+#   end
+# end
+require 'whois/record/parser/blank'
+whois.parts.each do |p|
+  if Whois::Record::Parser.parser_for(p).is_a? Whois::Record::Parser::Blank
+    puts "Couldn't find a parser for #{p.host}:"
+    data.another( :unparsed_whois, p.body )
+  end
+end
+# Load up the response
+# client = HTTPClient.new
+# client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
+# response = client.get( url )
+#       # @ssl = p.peer_cert
+response = HTTParty.get url
+# require 'pp'
+# pp response.headers
+data.remember( :server, response.headers['server'] )
+# Parse the HTML
+parsed = Nokogiri.parse response.body
+data.remember( :page_title, parsed.title )
+# RSS Feed:
+if feed = parsed.css( 'link[type="application/rss+xml"]' ).first
+  feed = feed.attributes['href'].value
+  data.remember( :rss_feed, feed )
+end
+# Atom Feed:
+if feed = parsed.css( 'link[type="application/atom+xml"]' ).first
+  feed = feed.attributes['href'].value
+  data.remember( :atom_feed, feed )
+end
+# Meta tags
+meta = {}
+parsed.css( "meta[name]" ).each do |t|
+  meta[t.attributes["name"].value] = t.attributes["content"].value if t.attributes["content"]
+end
+parsed.css( "meta[property]" ).each do |t|
+  meta[t.attributes["property"].value] = t.attributes["content"].value
+end
+# require 'pp'
+# pp meta
+data.remember( :author, meta['author'] )
+data.remember( :description, meta['description'] )
+data.remember( :keywords, meta['keywords'] )
+data.remember( :generator, meta['generator'])
+data.remember( :responsive, true )  if meta["viewport"] =~ /width=device-width/
+# Check Twitter Card:
+data.remember( :twitter_title, meta["twitter:title"] )
+data.remember( :twitter_creator, meta["twitter:creator"] )
+if /@(.*)/.match( meta["twitter:creator"] )
+  data.another( :twitter_ids, $1 )
+end
+data.remember( :twitter_site_author, meta["twitter:site"] )
+if /@(.*)/.match( meta["twitter:site"] )
+  data.another( :twitter_ids, $1 )
+end
+data.remember( :twitter_image, meta["twitter:image"] )
+data.remember( :twitter_description, meta["twitter:description"] )
+# Open Graph
+data.remember( :og_title, meta["og:title"] )
+data.remember( :og_description, meta["og:description"] )
+data.remember( :og_type, meta["og:type"] )
+data.remember( :og_image, meta["og:image"] )
+# Look inside the body:
+def matching_links( parsed, regex )
+  parsed.css( "a" ).collect do |x|
+    if regex.match( x['href'] )
+      x
+    else
+      nil
+    end
+  end.select do |x|
+    x
+  end
+end
+def hrefs( links, filter_shared = false )
+  links.collect do |x|
+    x['href']
+  end.select do |url|
+    if filter_shared
+      !(url =~ /share/)
+    else
+      true
+    end
+  end.uniq
+end
+def find_id_path( links, regex )
+  links.collect do |link|
+    if regex.match( link )
+      res = $1 || link
+      if (res =~ /share/)
+        nil
+      else
+        res
+      end
+    end
+  end.select do |x|
+    x
+  end.uniq
+end
+# Twitter
+# Look for twitter links
+twitter_links = hrefs( matching_links( parsed, /twitter.com\/[^\/]*$/ ), true )
+data.remember( :twitter_links, twitter_links )
+twitter_ids = find_id_path( twitter_links, /twitter.com\/([^\/]*$)/  ).each do |id|
+  data.another( :twitter_ids, id )
+end
+# Look for twitter shared links
+twitter_shared = matching_links( parsed, /twitter.com\/share/ )
+twitter_shared.each do |l|
+  text = l['data-text']
+  # See if there's a "by @user" in the text
+  if /by\s*@([^\s]*)/.match text
+    data.another( :twitter_ids, $1 )
+    data.remember( :twitter_by, $1 )
+  end
+  # Look for all "@usernames" in the text
+  if text
+    text.split.select { |x| x =~ /@\s*/ }.each do |id|
+      data.another( :twitter_ids, id.slice( 1,100 ) ) # We don't want the @
+    end
+  end
+  # See if there's a via link on the anchor tag
+  if l['data-via']
+    data.another( :twitter_ids, l['data-via'])
+  end
+  possible_via = URI.decode( (URI(l['href']).query) || "" ).split( /&amp;/ ).collect { |x| x.split( /=/  ) }.select { |x| x[0] == 'via' }
+  if possible_via.size > 0
+    data.another( :twitter_ids, possible_via[0][1] )
+  end
+end
+# Look for intent
+twitter_intent = hrefs( matching_links( parsed, /twitter.com\/intent/ ) )
+twitter_intent.each do |t|
+  URI.decode( URI(t.gsub( / /, "+" )).query ).split( /&/ ).select do |x|
+    x =~ /via/
+  end.collect do |x|
+    x.gsub( /via=/, "" )
+  end.each do |via|
+    data.another( :twitter_ids, via )
+  end
+end
+# Look for email
+email_links = hrefs( matching_links( parsed, /mailto:/ ) )
+email_address = find_id_path( email_links, /mailto:(.*@.*\..*)/ ).each do |email|
+  data.another( :emails, email )
+end
+# Linkedin
+linkedin_links = hrefs( matching_links( parsed, /linkedin.com/ ), true )
+data.remember( :linkedin_links, linkedin_links )
+# Instagram
+instagram_links = hrefs( matching_links( parsed, /instagram.com/ ) )
+data.remember( :instagram_links, instagram_links )
+# Facebook
+facebook_links = hrefs( matching_links( parsed, /facebook.com\/[^\/]*$/ ) )
+data.remember( :facebook_links, facebook_links )
+# Google plus
+googleplus_links = hrefs( matching_links( parsed, /plus.google.com\/[^\/]*$/ ) )
+data.remember( :googleplus_links, googleplus_links )
+# Github
+github_links = hrefs( matching_links( parsed, /github.com\/[^\/]*$/ ) )
+data.remember( :github_links, github_links )
+puts
+puts "This is what we've figured out:"
+data.print
+# Bonus!
+# Get this file from https://github.com/ElbertF/Wappalyzer/tree/master/share
+if File.exists? "apps.json"
+  apps_json = File.read( "apps.json" )
+  technologies = []
+  apps = JSON.parse( apps_json )
+  def check_regex( mashed_regex, value )
+    regex,result = mashed_regex.split( /\\;/ )
+    md = Regexp.new( regex ).match( value )
+    if md
+      if result
+        result = result.gsub( /\\1/, (md[1] || "" )).gsub( /\\2/, (md[2] || "") )
+      else
+        true
+      end
+    else
+      false
+    end
+  end
+  scripts = parsed.css( "script" ).collect { |x| x['src'] }.select { |x| x }
+  # puts scripts
+  apps['apps'].each do |app,checks|
+    if checks['html']
+      html_array = checks['html']
+      html_array = [checks['html']] if html_array.is_a? String
+      html_array.each do |html|
+        result = check_regex( html, response.body )
+        if result
+          technologies << app
+          technologies << checks['implies'] if checks['implies']
+        end
+      end
+    end
+    if checks['meta']
+      checks['meta'].each do |k,code|
+        result = check_regex( code, meta[k] )
+        if result
+          technologies << app
+          technologies << checks['implies'] if checks['implies']
+        end
+      end
+    end
+    if checks['headers']
+      checks['headers'].each do |k,code|
+        result = check_regex( code, response.headers[k] )
+        if result
+          technologies << app
+          technologies << checks['implies'] if checks['implies']
+        end
+      end
+    end
+    if checks['script']
+      script_array = checks['script']
+      script_array = [checks['script']] if script_array.is_a? String
+      script_array.each do |script_regex|
+        scripts.each do |script|
+          result = check_regex( script_regex, script)
+          if result
+            technologies << app
+            technologies << checks['implies'] if checks['implies']
+          end
+        end
+      end
+    end
+  end
+  printf "%20s: %s\n", "technologies", technologies.sort.uniq.join( ", ")
+end

data/lib/socialinvestigator/config.rb CHANGED Viewed

@@ -27,6 +27,16 @@ module Socialinvestigator
         save_yaml( "twitter.yml", config )
       end
+      def apps_json
+        read_json( "apps.json" )
+      end
+      def apps_json=( data )
+        File.open( "#{@dir}/apps.json", "w" ) do |out|
+          out << data
+        end
+      end
       def read_yaml( name )
         file = "#{@dir}/#{name}"
@@ -42,6 +52,16 @@ module Socialinvestigator
           out.write obj.to_yaml
         end
       end
+      def read_json( name )
+        file = "#{@dir}/#{name}"
+        if File.exists? file
+          return JSON.parse( File.read( file ) )
+        end
+        nil
+      end
     end
   end
 end

data/lib/socialinvestigator/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Socialinvestigator
-  VERSION = "0.0.3"
+  VERSION = "0.0.4"
 end

data/socialinvestigator.gemspec CHANGED Viewed

@@ -21,6 +21,9 @@ Gem::Specification.new do |spec|
   spec.add_dependency 'thor'
   spec.add_dependency 'httparty'
   spec.add_dependency 'twitter'
+  spec.add_dependency 'nokogiri'
+  spec.add_dependency 'whois'
+  spec.add_dependency 'dnsruby'
   spec.add_development_dependency "bundler", "~> 1.6"
   spec.add_development_dependency "rake"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: socialinvestigator
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
 platform: ruby
 authors:
 - Will Schenk
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-11-10 00:00:00.000000000 Z
+date: 2014-11-14 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: thor
@@ -52,6 +52,48 @@ dependencies:
     - - '>='
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: whois
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: dnsruby
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: bundler
   requirement: !ruby/object:Gem::Requirement
@@ -97,8 +139,11 @@ files:
 - lib/socialinvestigator.rb
 - lib/socialinvestigator/cli.rb
 - lib/socialinvestigator/cli/hn.rb
+- lib/socialinvestigator/cli/net.rb
 - lib/socialinvestigator/cli/twitter.rb
 - lib/socialinvestigator/client/hn.rb
+- lib/socialinvestigator/client/net.rb
+- lib/socialinvestigator/client/standalone_net.rb
 - lib/socialinvestigator/client/twitter.rb
 - lib/socialinvestigator/config.rb
 - lib/socialinvestigator/version.rb