socialinvestigator 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 68ac5910584d162d37369db68b69401cc5f85213
4
- data.tar.gz: 0cb5ce2e0e7311a36be9a6b0a8c227a7507935cf
3
+ metadata.gz: b05afd1645671efac34a27455b466ae120bd0796
4
+ data.tar.gz: d8e96eb8befbf7fb8124b3cba94d711e18157f52
5
5
  SHA512:
6
- metadata.gz: 56f8c7088524492a8e9a3f6eae31c48d73ae0a958fc41f1a569be0a0810ffd92fdbe0e040babeb72f4e5c0600980b406f7a906015c1e8abee487fdf5c7f72201
7
- data.tar.gz: eb6f2479c6db084d9955632b418bfc90a7145c10d8fdf9fba696008adbf2cc84141f14a7f69edffc1a24fe185be78d5cb3f1472acbe3a89b0b5e8568174687b1
6
+ metadata.gz: 9481ba394a0fdc6380c48a0d2f1eeaeda14b1abedb993f83d95a423acd5be718bbedbd272e3b95e0ce6074b409fa8580550f9df1955f05f9f3dd6aec586d2661
7
+ data.tar.gz: 3f94308447f5a9bb6a28bd013066637c2cb588c84db01f2ff840511cd6acf9a2890a4fbfc149f037d9026d4c9643dcd359c7f9593504a01d8c2b5b5d5aa24596
data/.gitignore CHANGED
@@ -20,3 +20,4 @@ tmp
20
20
  *.o
21
21
  *.a
22
22
  mkmf.log
23
+ apps.json
data/Gemfile CHANGED
@@ -1,4 +1,5 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
+ # gem 'whois', git: "https://github.com/mfasanya/whois"
3
4
  # Specify your gem's dependencies in socialinvestigator.gemspec
4
5
  gemspec
data/README.md CHANGED
@@ -17,14 +17,35 @@ Then you can run the command 'socialinvestigator' to begin using it.
17
17
  Full help
18
18
  $ socialinvestigator help
19
19
 
20
+ ## Hacker News Search
21
+
22
+ Code walk through: http://willschenk.com/making-a-command-line-utility-with-gems-and-thor
23
+
20
24
  Search hacker news for a url:
21
25
 
22
26
  $ socialinvestigator hn search http://willschenk.com
23
27
 
24
- Setting up twitter. You'll need to register a twitter app for this to work.
25
- Full walk through is here http://willschenk.com/scripting-twitter.
28
+ ## Looking up information from a URL
29
+
30
+ Code walk through: http://willschenk.com/personal-information-from-only-a-url
31
+
32
+ Start with a URL, figure out what you can find:
33
+
34
+ $ socialinvestigator net page_info http://willschenk.com
35
+
36
+ To analyse the technology stack, you need to load the datafile from
37
+ https://github.com/ElbertF/Wappalyzer
38
+ which can be done with this command:
39
+
40
+ $ socialinvestigator net get_apps_json
41
+
42
+ ## Twitter Scripting
43
+
44
+ _This will be documented soon_
45
+
46
+ Code walk through: http://willschenk.com/scripting-twitter
26
47
 
27
- Once you have the twitter info, you put it in using the twitter config command:
48
+ You'll need to register a twitter app for this to work. Once you have the twitter info, you put it in using the twitter config command:
28
49
 
29
50
  $ socialinvestigator twitter config
30
51
 
@@ -1,6 +1,7 @@
1
1
  require 'thor'
2
2
  require 'socialinvestigator/cli/hn'
3
3
  require 'socialinvestigator/cli/twitter'
4
+ require 'socialinvestigator/cli/net'
4
5
 
5
6
  module Socialinvestigator
6
7
  class HammerOfTheGods < Thor
@@ -29,5 +30,8 @@ module Socialinvestigator
29
30
 
30
31
  desc "twitter COMMANDS", "Twitter Control Module"
31
32
  subcommand "twitter", Socialinvestigator::CLI::TwitterCli
33
+
34
+ desc "net COMMANDS", "Net control Module"
35
+ subcommand "net", Socialinvestigator::CLI::Net
32
36
  end
33
37
  end
@@ -0,0 +1,27 @@
1
+ require 'socialinvestigator/client/net'
2
+
3
+ module Socialinvestigator
4
+ module CLI
5
+ class Net < Thor
6
+ desc "page_info URL", "Looks at a page to see what social links it finds"
7
+ def page_info( url )
8
+ knowledge = client.get_knowledge( url )
9
+ knowledge.print
10
+ end
11
+
12
+ desc "get_apps_json", "Download the apps.json file form Wappalyzer"
13
+ def get_apps_json
14
+ puts "Loading from https://raw.githubusercontent.com/ElbertF/Wappalyzer/master/share/apps.json"
15
+ json_data = HTTParty.get "https://raw.githubusercontent.com/ElbertF/Wappalyzer/master/share/apps.json"
16
+ Socialinvestigator::Config.config.apps_json= json_data
17
+ puts "Saved"
18
+ end
19
+
20
+
21
+ private
22
+ def client
23
+ @client ||= Socialinvestigator::Client::NetClient.new
24
+ end
25
+ end
26
+ end
27
+ end
@@ -6,7 +6,7 @@ module Socialinvestigator
6
6
  class TwitterCli < Thor
7
7
  desc "user SCREENAME", "Look up info for a specific user."
8
8
  def user( username )
9
- agent.print_user_info client.user( "wschenk" )
9
+ agent.print_user_info client.user( username )
10
10
  end
11
11
 
12
12
  desc "lookup URL", "Resolve a link"
@@ -150,4 +150,8 @@ module Socialinvestigator
150
150
  end
151
151
  end
152
152
  end
153
+ end
154
+
155
+ if __FILE__ == $0
156
+ Socialinvestigator::CLI::TwitterCli.start( ARGV )
153
157
  end
@@ -0,0 +1,451 @@
1
+ #!/usr/bin/env ruby
2
+ require 'httparty'
3
+ require 'nokogiri'
4
+ require 'dnsruby'
5
+ require 'whois'
6
+
7
+ module Socialinvestigator
8
+ module Client
9
+ class PageKnowledge
10
+ DEBUG = false
11
+ TEMPLATE = "%20s: %s\n"
12
+
13
+ def initialize; @knowledge = {} end
14
+
15
+ def remember( key, value )
16
+ return if value.nil?
17
+ p key, value if DEBUG
18
+
19
+ @knowledge[key] = value
20
+ end
21
+
22
+ def another( key, value )
23
+ return if value.nil?
24
+ p key, value if DEBUG
25
+
26
+ @knowledge[key] ||= []
27
+ @knowledge[key] << value
28
+ @knowledge[key] = @knowledge[key].uniq
29
+ end
30
+
31
+ def print
32
+ p :domain
33
+ p :created_on
34
+ p :expires_on
35
+ p :updated_on
36
+ p :registrar_name
37
+ p :registrar_url
38
+ p :registrant_contact
39
+ p :admin_contact
40
+ p :technical_contact
41
+ p :emails
42
+ p :title, title
43
+ p :description, description
44
+ p :twitter_author, twitter_author
45
+ p :twitter_ids
46
+ p :image, image
47
+ p :responsive
48
+ p :rss_feed
49
+ p :atom_feed
50
+
51
+ p :twitter_links
52
+ p :linkedin_links
53
+ p :instagram_links
54
+ p :facebook_links
55
+ p :googleplus_links
56
+ p :github_links
57
+ p :technologies
58
+ end
59
+
60
+ def p( key, val = nil )
61
+ val = @knowledge[key] if val.nil?
62
+ if val.is_a?( Array )
63
+ printf TEMPLATE, key, val.join( ", ") if val.size > 0
64
+ elsif val.is_a?( Whois::Record::Contact )
65
+ printf TEMPLATE, key, ""
66
+ [:name, :organization, :address, :city, :zip, :state, :country, :country_code, :phone, :fax, :email, :url, :created_on, :updated_on].each do |key|
67
+ out = val.send( key )
68
+ printf "%25s: %s\n", key, out if out && out != ""
69
+ end
70
+ else
71
+ printf TEMPLATE, key, val if val
72
+ end
73
+ end
74
+
75
+ def title
76
+ @knowledge[:twitter_title] || @knowledge[:og_title] || @knowledge[:page_title]
77
+ end
78
+
79
+ def twitter_author
80
+ @knowledge[:twitter_creator] || @knowledge[:twitter_by] || @knowledge[:twitter_site_author] || (@knowledge[:twitter_ids] || []).first
81
+ end
82
+
83
+ def description
84
+ @knowledge[:twitter_description] || @knowledge[:og_description] || @knowledge[:description]
85
+ end
86
+
87
+ def image
88
+ @knowledge[:twitter_image] || @knowledge[:og_image]
89
+ end
90
+ end
91
+
92
+ class NetClient
93
+ # Look up the domain
94
+
95
+ def find_domain( hostname )
96
+ # puts "Looking for SOA of #{hostname}"
97
+ dns = Dnsruby::Resolver.new
98
+ soa = dns.query( hostname, "SOA" ).answer.select do |rr|
99
+ rr.is_a? Dnsruby::RR::IN::SOA
100
+ end
101
+
102
+ return hostname if soa.length > 0
103
+
104
+ parts = hostname.split( /\./ )
105
+ return nil if parts.length <= 2
106
+
107
+ find_domain( parts.slice(1,100).join( "." ) )
108
+ end
109
+
110
+ def get_knowledge( url )
111
+ data = PageKnowledge.new
112
+
113
+ uri = URI( url )
114
+
115
+ data.remember( :hostname, uri.hostname )
116
+
117
+ domain = find_domain(uri.hostname)
118
+
119
+ data.remember( :domain, domain )
120
+
121
+ # Look at the domain info
122
+
123
+ whois = Whois.lookup( domain )
124
+
125
+ data.remember( :registered?, whois.registered? )
126
+ if whois.registrar
127
+ data.remember( :registrar_name, whois.registrar.name )
128
+ data.remember( :registrar_url, whois.registrar.url )
129
+ end
130
+
131
+ data.remember( :created_on, whois.created_on.strftime( "%Y-%m-%d") ) if whois.created_on
132
+ data.remember( :expires_on, whois.expires_on.strftime( "%Y-%m-%d") ) if whois.expires_on
133
+ data.remember( :updated_on, whois.updated_on.strftime( "%Y-%m-%d") ) if whois.updated_on
134
+
135
+ whois.contacts.each do |c|
136
+ data.another( :emails, c.email.downcase ) if c.email
137
+ case c.type
138
+ when Whois::Record::Contact::TYPE_REGISTRANT
139
+ data.remember( :registrant_contact, c )
140
+ when Whois::Record::Contact::TYPE_ADMINISTRATIVE
141
+ data.remember( :admin_contact, c )
142
+ when Whois::Record::Contact::TYPE_TECHNICAL
143
+ data.remember( :technical_contact, c )
144
+ end
145
+ end
146
+ # [
147
+ # :name,:organization,:address,:city,
148
+ # :zip,:state,:country,:country_code,
149
+ # :phone,:fax,:email,:url].each do |k|
150
+ # val = c.send(k)
151
+ # printf "%15s : %s\n", k.to_s, val if !val.nil?
152
+ # end
153
+ # end
154
+
155
+ require 'whois/record/parser/blank'
156
+ whois.parts.each do |p|
157
+ if Whois::Record::Parser.parser_for(p).is_a? Whois::Record::Parser::Blank
158
+ puts "Couldn't find a parser for #{p.host}:"
159
+ data.another( :unparsed_whois, p.body )
160
+ end
161
+ end
162
+
163
+
164
+
165
+
166
+ # Load up the response
167
+
168
+ # client = HTTPClient.new
169
+ # client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
170
+ # response = client.get( url )
171
+ # # @ssl = p.peer_cert
172
+
173
+ response = HTTParty.get url
174
+
175
+ # require 'pp'
176
+ # pp response.headers
177
+
178
+ data.remember( :server, response.headers['server'] )
179
+
180
+
181
+ # Parse the HTML
182
+
183
+ parsed = Nokogiri.parse response.body
184
+
185
+ data.remember( :page_title, parsed.title )
186
+
187
+ # RSS Feed:
188
+ if feed = parsed.css( 'link[type="application/rss+xml"]' ).first
189
+ feed = feed.attributes['href'].value
190
+ data.remember( :rss_feed, feed )
191
+ end
192
+
193
+ # Atom Feed:
194
+ if feed = parsed.css( 'link[type="application/atom+xml"]' ).first
195
+ feed = feed.attributes['href'].value
196
+ data.remember( :atom_feed, feed )
197
+ end
198
+
199
+
200
+
201
+ # Meta tags
202
+
203
+ meta = {}
204
+ parsed.css( "meta[name]" ).each do |t|
205
+ meta[t.attributes["name"].value] = t.attributes["content"].value if t.attributes["content"]
206
+ end
207
+
208
+ parsed.css( "meta[property]" ).each do |t|
209
+ meta[t.attributes["property"].value] = t.attributes["content"].value
210
+ end
211
+
212
+ # require 'pp'
213
+ # pp meta
214
+
215
+ data.remember( :author, meta['author'] )
216
+ data.remember( :description, meta['description'] )
217
+ data.remember( :keywords, meta['keywords'] )
218
+ data.remember( :generator, meta['generator'])
219
+
220
+ data.remember( :responsive, true ) if meta["viewport"] =~ /width=device-width/
221
+
222
+
223
+ # Check Twitter Card:
224
+
225
+ data.remember( :twitter_title, meta["twitter:title"] )
226
+ data.remember( :twitter_creator, meta["twitter:creator"] )
227
+ if /@(.*)/.match( meta["twitter:creator"] )
228
+ data.another( :twitter_ids, $1 )
229
+ end
230
+ data.remember( :twitter_site_author, meta["twitter:site"] )
231
+ if /@(.*)/.match( meta["twitter:site"] )
232
+ data.another( :twitter_ids, $1 )
233
+ end
234
+ data.remember( :twitter_image, meta["twitter:image"] )
235
+ data.remember( :twitter_description, meta["twitter:description"] )
236
+
237
+ # Open Graph
238
+
239
+ data.remember( :og_title, meta["og:title"] )
240
+ data.remember( :og_description, meta["og:description"] )
241
+ data.remember( :og_type, meta["og:type"] )
242
+ data.remember( :og_image, meta["og:image"] )
243
+
244
+
245
+ # Look inside the body:
246
+
247
+
248
+ # Twitter
249
+
250
+ # Look for twitter links
251
+ twitter_links = hrefs( matching_links( parsed, /twitter.com\/[^\/]*$/ ), true )
252
+ data.remember( :twitter_links, twitter_links )
253
+
254
+ twitter_ids = find_id_path( twitter_links, /twitter.com\/([^\/]*$)/ ).each do |id|
255
+ data.another( :twitter_ids, id )
256
+ end
257
+
258
+ # Look for twitter shared links
259
+
260
+ twitter_shared = matching_links( parsed, /twitter.com\/share/ )
261
+
262
+ twitter_shared.each do |l|
263
+ text = l['data-text']
264
+
265
+ # See if there's a "by @user" in the text
266
+ if /by\s*@([^\s]*)/.match text
267
+ data.another( :twitter_ids, $1 )
268
+ data.remember( :twitter_by, $1 )
269
+ end
270
+
271
+ # Look for all "@usernames" in the text
272
+ if text
273
+ text.split.select { |x| x =~ /@\s*/ }.each do |id|
274
+ data.another( :twitter_ids, id.slice( 1,100 ) ) # We don't want the @
275
+ end
276
+ end
277
+
278
+ # See if there's a via link on the anchor tag
279
+ if l['data-via']
280
+ data.another( :twitter_ids, l['data-via'])
281
+ end
282
+
283
+
284
+ possible_via = URI.decode( (URI(l['href']).query) || "" ).split( /&amp;/ ).collect { |x| x.split( /=/ ) }.select { |x| x[0] == 'via' }
285
+ if possible_via.size > 0
286
+ data.another( :twitter_ids, possible_via[0][1] )
287
+ end
288
+ end
289
+
290
+ # Look for intent
291
+
292
+ twitter_intent = hrefs( matching_links( parsed, /twitter.com\/intent/ ) )
293
+
294
+ twitter_intent.each do |t|
295
+ URI.decode( URI(t.gsub( / /, "+" )).query ).split( /&/ ).select do |x|
296
+ x =~ /via/
297
+ end.collect do |x|
298
+ x.gsub( /via=/, "" )
299
+ end.each do |via|
300
+ data.another( :twitter_ids, via )
301
+ end
302
+ end
303
+ # Look for email
304
+
305
+ email_links = hrefs( matching_links( parsed, /mailto:/ ) )
306
+ email_address = find_id_path( email_links, /mailto:(.*@.*\..*)/ ).each do |email|
307
+ data.another( :emails, email )
308
+ end
309
+
310
+ # Linkedin
311
+
312
+ linkedin_links = hrefs( matching_links( parsed, /linkedin.com/ ), true )
313
+ data.remember( :linkedin_links, linkedin_links )
314
+
315
+ # Instagram
316
+
317
+ instagram_links = hrefs( matching_links( parsed, /instagram.com/ ) )
318
+ data.remember( :instagram_links, instagram_links )
319
+
320
+ # Facebook
321
+
322
+ facebook_links = hrefs( matching_links( parsed, /facebook.com\/[^\/]*$/ ) )
323
+ data.remember( :facebook_links, facebook_links )
324
+
325
+ # Google plus
326
+
327
+ googleplus_links = hrefs( matching_links( parsed, /plus.google.com\/[^\/]*$/ ) )
328
+ data.remember( :googleplus_links, googleplus_links )
329
+
330
+ # Github
331
+
332
+ github_links = hrefs( matching_links( parsed, /github.com\/[^\/]*$/ ) )
333
+ data.remember( :github_links, github_links )
334
+
335
+
336
+ # Bonus!
337
+
338
+ # Get this file from https://github.com/ElbertF/Wappalyzer/tree/master/share
339
+
340
+ apps = Socialinvestigator::Config.config.apps_json
341
+ if apps
342
+ scripts = parsed.css( "script" ).collect { |x| x['src'] }.select { |x| x }
343
+ # puts scripts
344
+
345
+ apps['apps'].each do |app,checks|
346
+ if checks['html']
347
+ html_array = checks['html']
348
+ html_array = [checks['html']] if html_array.is_a? String
349
+
350
+ html_array.each do |html|
351
+ result = check_regex( html, response.body )
352
+ if result
353
+ data.another :technologies, app
354
+ data.another :technologies, checks['implies']
355
+ end
356
+ end
357
+ end
358
+
359
+ if checks['meta']
360
+ checks['meta'].each do |k,code|
361
+ result = check_regex( code, meta[k] )
362
+ if result
363
+ data.another :technologies, app
364
+ data.another :technologies, checks['implies']
365
+ end
366
+ end
367
+ end
368
+
369
+ if checks['headers']
370
+ checks['headers'].each do |k,code|
371
+ result = check_regex( code, response.headers[k] )
372
+ if result
373
+ data.another :technologies, app
374
+ data.another :technologies, checks['implies']
375
+ end
376
+ end
377
+ end
378
+
379
+ if checks['script']
380
+ script_array = checks['script']
381
+ script_array = [checks['script']] if script_array.is_a? String
382
+ script_array.each do |script_regex|
383
+ scripts.each do |script|
384
+ result = check_regex( script_regex, script)
385
+ if result
386
+ data.another :technologies, app
387
+ data.another :technologies, checks['implies']
388
+ end
389
+ end
390
+ end
391
+ end
392
+ end
393
+ end
394
+ data
395
+ end
396
+
397
+ def matching_links( parsed, regex )
398
+ parsed.css( "a" ).collect do |x|
399
+ if regex.match( x['href'] )
400
+ x
401
+ else
402
+ nil
403
+ end
404
+ end.select do |x|
405
+ x
406
+ end
407
+ end
408
+
409
+ def hrefs( links, filter_shared = false )
410
+ links.collect do |x|
411
+ x['href']
412
+ end.select do |url|
413
+ if filter_shared
414
+ !(url =~ /share/)
415
+ else
416
+ true
417
+ end
418
+ end.uniq
419
+ end
420
+
421
+ def find_id_path( links, regex )
422
+ links.collect do |link|
423
+ if regex.match( link )
424
+ res = $1 || link
425
+ if (res =~ /share/)
426
+ nil
427
+ else
428
+ res
429
+ end
430
+ end
431
+ end.select do |x|
432
+ x
433
+ end.uniq
434
+ end
435
+
436
+ def check_regex( mashed_regex, value )
437
+ regex,result = mashed_regex.split( /\\;/ )
438
+ md = Regexp.new( regex ).match( value )
439
+ if md
440
+ if result
441
+ result = result.gsub( /\\1/, (md[1] || "" )).gsub( /\\2/, (md[2] || "") )
442
+ else
443
+ true
444
+ end
445
+ else
446
+ false
447
+ end
448
+ end
449
+ end
450
+ end
451
+ end
@@ -0,0 +1,458 @@
1
+ #!/usr/bin/env ruby
2
+ require 'httparty'
3
+ require 'nokogiri'
4
+ require 'dnsruby'
5
+ require 'whois'
6
+
7
+ url = ARGV[0] || "http://www.fastcolabs.com/3038014/product-bootcamp-week-six-worth-it"
8
+
9
+ class PageKnowledge
10
+ DEBUG = false
11
+ TEMPLATE = "%20s: %s\n"
12
+
13
+ def initialize; @knowledge = {} end
14
+
15
+ def remember( key, value )
16
+ return if value.nil?
17
+ p key, value if DEBUG
18
+
19
+ @knowledge[key] = value
20
+ end
21
+
22
+ def another( key, value )
23
+ return if value.nil?
24
+ p key, value if DEBUG
25
+
26
+ @knowledge[key] ||= []
27
+ @knowledge[key] << value
28
+ @knowledge[key] = @knowledge[key].uniq
29
+ end
30
+
31
+ def print
32
+ p :domain
33
+ p :created_on
34
+ p :expires_on
35
+ p :updated_on
36
+ p :registrar_name
37
+ p :registrar_url
38
+ p :registrant_contact
39
+ p :admin_contact
40
+ p :technical_contact
41
+ p :emails
42
+ p :title, title
43
+ p :description, description
44
+ p :twitter_author, twitter_author
45
+ p :twitter_ids
46
+ p :image, image
47
+ p :responsive
48
+ p :rss_feed
49
+ p :atom_feed
50
+
51
+ p :twitter_links
52
+ p :linkedin_links
53
+ p :instagram_links
54
+ p :facebook_links
55
+ p :googleplus_links
56
+ p :github_links
57
+
58
+ # pp @knowledge
59
+ end
60
+
61
+ def p( key, val = nil )
62
+ val = @knowledge[key] if val.nil?
63
+ if val.is_a?( Array )
64
+ printf TEMPLATE, key, val.join( ", ") if val.size > 0
65
+ elsif val.is_a?( Whois::Record::Contact )
66
+ printf TEMPLATE, key, ""
67
+ [:name, :organization, :address, :city, :zip, :state, :country, :country_code, :phone, :fax, :email, :url, :created_on, :updated_on].each do |key|
68
+ out = val.send( key )
69
+ printf "%25s: %s\n", key, out if out && out != ""
70
+ end
71
+ else
72
+ printf TEMPLATE, key, val if val
73
+ end
74
+ end
75
+
76
+ def title
77
+ @knowledge[:twitter_title] || @knowledge[:og_title] || @knowledge[:page_title]
78
+ end
79
+
80
+ def twitter_author
81
+ @knowledge[:twitter_creator] || @knowledge[:twitter_by] || @knowledge[:twitter_site_author] || (@knowledge[:twitter_ids] || []).first
82
+ end
83
+
84
+ def description
85
+ @knowledge[:twitter_description] || @knowledge[:og_description] || @knowledge[:description]
86
+ end
87
+
88
+ def image
89
+ @knowledge[:twitter_image] || @knowledge[:og_image]
90
+ end
91
+ end
92
+
93
+ data = PageKnowledge.new
94
+
95
+ uri = URI( url )
96
+
97
+ data.remember( :hostname, uri.hostname )
98
+
99
+
100
+
101
+ # Look up the domain
102
+
103
+ def find_domain( hostname )
104
+ # puts "Looking for SOA of #{hostname}"
105
+ dns = Dnsruby::Resolver.new
106
+ soa = dns.query( hostname, "SOA" ).answer.select do |rr|
107
+ rr.is_a? Dnsruby::RR::IN::SOA
108
+ end
109
+
110
+ return hostname if soa.length > 0
111
+
112
+ parts = hostname.split( /\./ )
113
+ return nil if parts.length <= 2
114
+
115
+ find_domain( parts.slice(1,100).join( "." ) )
116
+ end
117
+
118
+ domain = find_domain(uri.hostname)
119
+
120
+ data.remember( :domain, domain )
121
+
122
+ # Look at the domain info
123
+
124
+ whois = Whois.lookup( domain )
125
+
126
+ data.remember( :registered?, whois.registered? )
127
+ if whois.registrar
128
+ data.remember( :registrar_name, whois.registrar.name )
129
+ data.remember( :registrar_url, whois.registrar.url )
130
+ end
131
+
132
+ data.remember( :created_on, whois.created_on.strftime( "%Y-%m-%d") ) if whois.created_on
133
+ data.remember( :expires_on, whois.expires_on.strftime( "%Y-%m-%d") ) if whois.expires_on
134
+ data.remember( :updated_on, whois.updated_on.strftime( "%Y-%m-%d") ) if whois.updated_on
135
+
136
+ whois.contacts.each do |c|
137
+ data.another( :emails, c.email.downcase ) if c.email
138
+ case c.type
139
+ when Whois::Record::Contact::TYPE_REGISTRANT
140
+ data.remember( :registrant_contact, c )
141
+ when Whois::Record::Contact::TYPE_ADMINISTRATIVE
142
+ data.remember( :admin_contact, c )
143
+ when Whois::Record::Contact::TYPE_TECHNICAL
144
+ data.remember( :technical_contact, c )
145
+ end
146
+ end
147
+ # [
148
+ # :name,:organization,:address,:city,
149
+ # :zip,:state,:country,:country_code,
150
+ # :phone,:fax,:email,:url].each do |k|
151
+ # val = c.send(k)
152
+ # printf "%15s : %s\n", k.to_s, val if !val.nil?
153
+ # end
154
+ # end
155
+
156
+ require 'whois/record/parser/blank'
157
+ whois.parts.each do |p|
158
+ if Whois::Record::Parser.parser_for(p).is_a? Whois::Record::Parser::Blank
159
+ puts "Couldn't find a parser for #{p.host}:"
160
+ data.another( :unparsed_whois, p.body )
161
+ end
162
+ end
163
+
164
+
165
+
166
+
167
+ # Load up the response
168
+
169
+ # client = HTTPClient.new
170
+ # client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
171
+ # response = client.get( url )
172
+ # # @ssl = p.peer_cert
173
+
174
+ response = HTTParty.get url
175
+
176
+ # require 'pp'
177
+ # pp response.headers
178
+
179
+ data.remember( :server, response.headers['server'] )
180
+
181
+
182
+ # Parse the HTML
183
+
184
+ parsed = Nokogiri.parse response.body
185
+
186
+ data.remember( :page_title, parsed.title )
187
+
188
+ # RSS Feed:
189
+ if feed = parsed.css( 'link[type="application/rss+xml"]' ).first
190
+ feed = feed.attributes['href'].value
191
+ data.remember( :rss_feed, feed )
192
+ end
193
+
194
+ # Atom Feed:
195
+ if feed = parsed.css( 'link[type="application/atom+xml"]' ).first
196
+ feed = feed.attributes['href'].value
197
+ data.remember( :atom_feed, feed )
198
+ end
199
+
200
+
201
+
202
+ # Meta tags
203
+
204
+ meta = {}
205
+ parsed.css( "meta[name]" ).each do |t|
206
+ meta[t.attributes["name"].value] = t.attributes["content"].value if t.attributes["content"]
207
+ end
208
+
209
+ parsed.css( "meta[property]" ).each do |t|
210
+ meta[t.attributes["property"].value] = t.attributes["content"].value
211
+ end
212
+
213
+ # require 'pp'
214
+ # pp meta
215
+
216
+ data.remember( :author, meta['author'] )
217
+ data.remember( :description, meta['description'] )
218
+ data.remember( :keywords, meta['keywords'] )
219
+ data.remember( :generator, meta['generator'])
220
+
221
+ data.remember( :responsive, true ) if meta["viewport"] =~ /width=device-width/
222
+
223
+
224
+ # Check Twitter Card:
225
+
226
+ data.remember( :twitter_title, meta["twitter:title"] )
227
+ data.remember( :twitter_creator, meta["twitter:creator"] )
228
+ if /@(.*)/.match( meta["twitter:creator"] )
229
+ data.another( :twitter_ids, $1 )
230
+ end
231
+ data.remember( :twitter_site_author, meta["twitter:site"] )
232
+ if /@(.*)/.match( meta["twitter:site"] )
233
+ data.another( :twitter_ids, $1 )
234
+ end
235
+ data.remember( :twitter_image, meta["twitter:image"] )
236
+ data.remember( :twitter_description, meta["twitter:description"] )
237
+
238
+ # Open Graph
239
+
240
+ data.remember( :og_title, meta["og:title"] )
241
+ data.remember( :og_description, meta["og:description"] )
242
+ data.remember( :og_type, meta["og:type"] )
243
+ data.remember( :og_image, meta["og:image"] )
244
+
245
+
246
+ # Look inside the body:
247
+
248
+ def matching_links( parsed, regex )
249
+ parsed.css( "a" ).collect do |x|
250
+ if regex.match( x['href'] )
251
+ x
252
+ else
253
+ nil
254
+ end
255
+ end.select do |x|
256
+ x
257
+ end
258
+ end
259
+
260
+ def hrefs( links, filter_shared = false )
261
+ links.collect do |x|
262
+ x['href']
263
+ end.select do |url|
264
+ if filter_shared
265
+ !(url =~ /share/)
266
+ else
267
+ true
268
+ end
269
+ end.uniq
270
+ end
271
+
272
+ def find_id_path( links, regex )
273
+ links.collect do |link|
274
+ if regex.match( link )
275
+ res = $1 || link
276
+ if (res =~ /share/)
277
+ nil
278
+ else
279
+ res
280
+ end
281
+ end
282
+ end.select do |x|
283
+ x
284
+ end.uniq
285
+ end
286
+
287
+ # Twitter
288
+
289
+ # Look for twitter links
290
+ twitter_links = hrefs( matching_links( parsed, /twitter.com\/[^\/]*$/ ), true )
291
+ data.remember( :twitter_links, twitter_links )
292
+
293
+ twitter_ids = find_id_path( twitter_links, /twitter.com\/([^\/]*$)/ ).each do |id|
294
+ data.another( :twitter_ids, id )
295
+ end
296
+
297
+ # Look for twitter shared links
298
+
299
+ twitter_shared = matching_links( parsed, /twitter.com\/share/ )
300
+
301
+ twitter_shared.each do |l|
302
+ text = l['data-text']
303
+
304
+ # See if there's a "by @user" in the text
305
+ if /by\s*@([^\s]*)/.match text
306
+ data.another( :twitter_ids, $1 )
307
+ data.remember( :twitter_by, $1 )
308
+ end
309
+
310
+ # Look for all "@usernames" in the text
311
+ if text
312
+ text.split.select { |x| x =~ /@\s*/ }.each do |id|
313
+ data.another( :twitter_ids, id.slice( 1,100 ) ) # We don't want the @
314
+ end
315
+ end
316
+
317
+ # See if there's a via link on the anchor tag
318
+ if l['data-via']
319
+ data.another( :twitter_ids, l['data-via'])
320
+ end
321
+
322
+
323
+ possible_via = URI.decode( (URI(l['href']).query) || "" ).split( /&amp;/ ).collect { |x| x.split( /=/ ) }.select { |x| x[0] == 'via' }
324
+ if possible_via.size > 0
325
+ data.another( :twitter_ids, possible_via[0][1] )
326
+ end
327
+ end
328
+
329
+ # Look for intent
330
+
331
+ twitter_intent = hrefs( matching_links( parsed, /twitter.com\/intent/ ) )
332
+
333
+ twitter_intent.each do |t|
334
+ URI.decode( URI(t.gsub( / /, "+" )).query ).split( /&/ ).select do |x|
335
+ x =~ /via/
336
+ end.collect do |x|
337
+ x.gsub( /via=/, "" )
338
+ end.each do |via|
339
+ data.another( :twitter_ids, via )
340
+ end
341
+ end
342
+ # Look for email
343
+
344
+ email_links = hrefs( matching_links( parsed, /mailto:/ ) )
345
+ email_address = find_id_path( email_links, /mailto:(.*@.*\..*)/ ).each do |email|
346
+ data.another( :emails, email )
347
+ end
348
+
349
+ # Linkedin
350
+
351
+ linkedin_links = hrefs( matching_links( parsed, /linkedin.com/ ), true )
352
+ data.remember( :linkedin_links, linkedin_links )
353
+
354
+ # Instagram
355
+
356
+ instagram_links = hrefs( matching_links( parsed, /instagram.com/ ) )
357
+ data.remember( :instagram_links, instagram_links )
358
+
359
+ # Facebook
360
+
361
+ facebook_links = hrefs( matching_links( parsed, /facebook.com\/[^\/]*$/ ) )
362
+ data.remember( :facebook_links, facebook_links )
363
+
364
+ # Google plus
365
+
366
+ googleplus_links = hrefs( matching_links( parsed, /plus.google.com\/[^\/]*$/ ) )
367
+ data.remember( :googleplus_links, googleplus_links )
368
+
369
+ # Github
370
+
371
+ github_links = hrefs( matching_links( parsed, /github.com\/[^\/]*$/ ) )
372
+ data.remember( :github_links, github_links )
373
+
374
+
375
+ puts
376
+ puts "This is what we've figured out:"
377
+
378
+
379
+ data.print
380
+
381
+
382
+ # Bonus!
383
+
384
+ # Get this file from https://github.com/ElbertF/Wappalyzer/tree/master/share
385
+ if File.exists? "apps.json"
386
+ apps_json = File.read( "apps.json" )
387
+ technologies = []
388
+
389
+ apps = JSON.parse( apps_json )
390
+
391
+ def check_regex( mashed_regex, value )
392
+ regex,result = mashed_regex.split( /\\;/ )
393
+ md = Regexp.new( regex ).match( value )
394
+ if md
395
+ if result
396
+ result = result.gsub( /\\1/, (md[1] || "" )).gsub( /\\2/, (md[2] || "") )
397
+ else
398
+ true
399
+ end
400
+ else
401
+ false
402
+ end
403
+ end
404
+
405
+ scripts = parsed.css( "script" ).collect { |x| x['src'] }.select { |x| x }
406
+ # puts scripts
407
+
408
+ apps['apps'].each do |app,checks|
409
+ if checks['html']
410
+ html_array = checks['html']
411
+ html_array = [checks['html']] if html_array.is_a? String
412
+
413
+ html_array.each do |html|
414
+ result = check_regex( html, response.body )
415
+ if result
416
+ technologies << app
417
+ technologies << checks['implies'] if checks['implies']
418
+ end
419
+ end
420
+ end
421
+
422
+ if checks['meta']
423
+ checks['meta'].each do |k,code|
424
+ result = check_regex( code, meta[k] )
425
+ if result
426
+ technologies << app
427
+ technologies << checks['implies'] if checks['implies']
428
+ end
429
+ end
430
+ end
431
+
432
+ if checks['headers']
433
+ checks['headers'].each do |k,code|
434
+ result = check_regex( code, response.headers[k] )
435
+ if result
436
+ technologies << app
437
+ technologies << checks['implies'] if checks['implies']
438
+ end
439
+ end
440
+ end
441
+
442
+ if checks['script']
443
+ script_array = checks['script']
444
+ script_array = [checks['script']] if script_array.is_a? String
445
+ script_array.each do |script_regex|
446
+ scripts.each do |script|
447
+ result = check_regex( script_regex, script)
448
+ if result
449
+ technologies << app
450
+ technologies << checks['implies'] if checks['implies']
451
+ end
452
+ end
453
+ end
454
+ end
455
+ end
456
+
457
+ printf "%20s: %s\n", "technologies", technologies.sort.uniq.join( ", ")
458
+ end
@@ -27,6 +27,16 @@ module Socialinvestigator
27
27
  save_yaml( "twitter.yml", config )
28
28
  end
29
29
 
30
+ def apps_json
31
+ read_json( "apps.json" )
32
+ end
33
+
34
+ def apps_json=( data )
35
+ File.open( "#{@dir}/apps.json", "w" ) do |out|
36
+ out << data
37
+ end
38
+ end
39
+
30
40
  def read_yaml( name )
31
41
  file = "#{@dir}/#{name}"
32
42
 
@@ -42,6 +52,16 @@ module Socialinvestigator
42
52
  out.write obj.to_yaml
43
53
  end
44
54
  end
55
+
56
+ def read_json( name )
57
+ file = "#{@dir}/#{name}"
58
+
59
+ if File.exists? file
60
+ return JSON.parse( File.read( file ) )
61
+ end
62
+
63
+ nil
64
+ end
45
65
  end
46
66
  end
47
67
  end
@@ -1,3 +1,3 @@
1
1
  module Socialinvestigator
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -21,6 +21,9 @@ Gem::Specification.new do |spec|
21
21
  spec.add_dependency 'thor'
22
22
  spec.add_dependency 'httparty'
23
23
  spec.add_dependency 'twitter'
24
+ spec.add_dependency 'nokogiri'
25
+ spec.add_dependency 'whois'
26
+ spec.add_dependency 'dnsruby'
24
27
 
25
28
  spec.add_development_dependency "bundler", "~> 1.6"
26
29
  spec.add_development_dependency "rake"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: socialinvestigator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Will Schenk
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-10 00:00:00.000000000 Z
11
+ date: 2014-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -52,6 +52,48 @@ dependencies:
52
52
  - - '>='
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: whois
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: dnsruby
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
55
97
  - !ruby/object:Gem::Dependency
56
98
  name: bundler
57
99
  requirement: !ruby/object:Gem::Requirement
@@ -97,8 +139,11 @@ files:
97
139
  - lib/socialinvestigator.rb
98
140
  - lib/socialinvestigator/cli.rb
99
141
  - lib/socialinvestigator/cli/hn.rb
142
+ - lib/socialinvestigator/cli/net.rb
100
143
  - lib/socialinvestigator/cli/twitter.rb
101
144
  - lib/socialinvestigator/client/hn.rb
145
+ - lib/socialinvestigator/client/net.rb
146
+ - lib/socialinvestigator/client/standalone_net.rb
102
147
  - lib/socialinvestigator/client/twitter.rb
103
148
  - lib/socialinvestigator/config.rb
104
149
  - lib/socialinvestigator/version.rb