socialinvestigator 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 68ac5910584d162d37369db68b69401cc5f85213
4
- data.tar.gz: 0cb5ce2e0e7311a36be9a6b0a8c227a7507935cf
3
+ metadata.gz: b05afd1645671efac34a27455b466ae120bd0796
4
+ data.tar.gz: d8e96eb8befbf7fb8124b3cba94d711e18157f52
5
5
  SHA512:
6
- metadata.gz: 56f8c7088524492a8e9a3f6eae31c48d73ae0a958fc41f1a569be0a0810ffd92fdbe0e040babeb72f4e5c0600980b406f7a906015c1e8abee487fdf5c7f72201
7
- data.tar.gz: eb6f2479c6db084d9955632b418bfc90a7145c10d8fdf9fba696008adbf2cc84141f14a7f69edffc1a24fe185be78d5cb3f1472acbe3a89b0b5e8568174687b1
6
+ metadata.gz: 9481ba394a0fdc6380c48a0d2f1eeaeda14b1abedb993f83d95a423acd5be718bbedbd272e3b95e0ce6074b409fa8580550f9df1955f05f9f3dd6aec586d2661
7
+ data.tar.gz: 3f94308447f5a9bb6a28bd013066637c2cb588c84db01f2ff840511cd6acf9a2890a4fbfc149f037d9026d4c9643dcd359c7f9593504a01d8c2b5b5d5aa24596
data/.gitignore CHANGED
@@ -20,3 +20,4 @@ tmp
20
20
  *.o
21
21
  *.a
22
22
  mkmf.log
23
+ apps.json
data/Gemfile CHANGED
@@ -1,4 +1,5 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
+ # gem 'whois', git: "https://github.com/mfasanya/whois"
3
4
  # Specify your gem's dependencies in socialinvestigator.gemspec
4
5
  gemspec
data/README.md CHANGED
@@ -17,14 +17,35 @@ Then you can run the command 'socialinvestigator' to begin using it.
17
17
  Full help
18
18
  $ socialinvestigator help
19
19
 
20
+ ## Hacker News Search
21
+
22
+ Code walk through: http://willschenk.com/making-a-command-line-utility-with-gems-and-thor
23
+
20
24
  Search hacker news for a url:
21
25
 
22
26
  $ socialinvestigator hn search http://willschenk.com
23
27
 
24
- Setting up twitter. You'll need to register a twitter app for this to work.
25
- Full walk through is here http://willschenk.com/scripting-twitter.
28
+ ## Looking up information from a URL
29
+
30
+ Code walk through: http://willschenk.com/personal-information-from-only-a-url
31
+
32
+ Start with a URL, figure out what you can find:
33
+
34
+ $ socialinvestigator net page_info http://willschenk.com
35
+
36
+ To analyse the technology stack, you need to load the datafile from
37
+ https://github.com/ElbertF/Wappalyzer
38
+ which can be done with this command:
39
+
40
+ $ socialinvestigator net get_apps_json
41
+
42
+ ## Twitter Scripting
43
+
44
+ _This will be documented soon_
45
+
46
+ Code walk through: http://willschenk.com/scripting-twitter
26
47
 
27
- Once you have the twitter info, you put it in using the twitter config command:
48
+ You'll need to register a twitter app for this to work. Once you have the twitter info, you put it in using the twitter config command:
28
49
 
29
50
  $ socialinvestigator twitter config
30
51
 
@@ -1,6 +1,7 @@
1
1
  require 'thor'
2
2
  require 'socialinvestigator/cli/hn'
3
3
  require 'socialinvestigator/cli/twitter'
4
+ require 'socialinvestigator/cli/net'
4
5
 
5
6
  module Socialinvestigator
6
7
  class HammerOfTheGods < Thor
@@ -29,5 +30,8 @@ module Socialinvestigator
29
30
 
30
31
  desc "twitter COMMANDS", "Twitter Control Module"
31
32
  subcommand "twitter", Socialinvestigator::CLI::TwitterCli
33
+
34
+ desc "net COMMANDS", "Net control Module"
35
+ subcommand "net", Socialinvestigator::CLI::Net
32
36
  end
33
37
  end
@@ -0,0 +1,27 @@
1
+ require 'socialinvestigator/client/net'
2
+
3
+ module Socialinvestigator
4
+ module CLI
5
+ class Net < Thor
6
+ desc "page_info URL", "Looks at a page to see what social links it finds"
7
+ def page_info( url )
8
+ knowledge = client.get_knowledge( url )
9
+ knowledge.print
10
+ end
11
+
12
+ desc "get_apps_json", "Download the apps.json file form Wappalyzer"
13
+ def get_apps_json
14
+ puts "Loading from https://raw.githubusercontent.com/ElbertF/Wappalyzer/master/share/apps.json"
15
+ json_data = HTTParty.get "https://raw.githubusercontent.com/ElbertF/Wappalyzer/master/share/apps.json"
16
+ Socialinvestigator::Config.config.apps_json= json_data
17
+ puts "Saved"
18
+ end
19
+
20
+
21
+ private
22
+ def client
23
+ @client ||= Socialinvestigator::Client::NetClient.new
24
+ end
25
+ end
26
+ end
27
+ end
@@ -6,7 +6,7 @@ module Socialinvestigator
6
6
  class TwitterCli < Thor
7
7
  desc "user SCREENAME", "Look up info for a specific user."
8
8
  def user( username )
9
- agent.print_user_info client.user( "wschenk" )
9
+ agent.print_user_info client.user( username )
10
10
  end
11
11
 
12
12
  desc "lookup URL", "Resolve a link"
@@ -150,4 +150,8 @@ module Socialinvestigator
150
150
  end
151
151
  end
152
152
  end
153
+ end
154
+
155
+ if __FILE__ == $0
156
+ Socialinvestigator::CLI::TwitterCli.start( ARGV )
153
157
  end
@@ -0,0 +1,451 @@
1
+ #!/usr/bin/env ruby
2
+ require 'httparty'
3
+ require 'nokogiri'
4
+ require 'dnsruby'
5
+ require 'whois'
6
+
7
+ module Socialinvestigator
8
+ module Client
9
+ class PageKnowledge
10
+ DEBUG = false
11
+ TEMPLATE = "%20s: %s\n"
12
+
13
+ def initialize; @knowledge = {} end
14
+
15
+ def remember( key, value )
16
+ return if value.nil?
17
+ p key, value if DEBUG
18
+
19
+ @knowledge[key] = value
20
+ end
21
+
22
+ def another( key, value )
23
+ return if value.nil?
24
+ p key, value if DEBUG
25
+
26
+ @knowledge[key] ||= []
27
+ @knowledge[key] << value
28
+ @knowledge[key] = @knowledge[key].uniq
29
+ end
30
+
31
+ def print
32
+ p :domain
33
+ p :created_on
34
+ p :expires_on
35
+ p :updated_on
36
+ p :registrar_name
37
+ p :registrar_url
38
+ p :registrant_contact
39
+ p :admin_contact
40
+ p :technical_contact
41
+ p :emails
42
+ p :title, title
43
+ p :description, description
44
+ p :twitter_author, twitter_author
45
+ p :twitter_ids
46
+ p :image, image
47
+ p :responsive
48
+ p :rss_feed
49
+ p :atom_feed
50
+
51
+ p :twitter_links
52
+ p :linkedin_links
53
+ p :instagram_links
54
+ p :facebook_links
55
+ p :googleplus_links
56
+ p :github_links
57
+ p :technologies
58
+ end
59
+
60
+ def p( key, val = nil )
61
+ val = @knowledge[key] if val.nil?
62
+ if val.is_a?( Array )
63
+ printf TEMPLATE, key, val.join( ", ") if val.size > 0
64
+ elsif val.is_a?( Whois::Record::Contact )
65
+ printf TEMPLATE, key, ""
66
+ [:name, :organization, :address, :city, :zip, :state, :country, :country_code, :phone, :fax, :email, :url, :created_on, :updated_on].each do |key|
67
+ out = val.send( key )
68
+ printf "%25s: %s\n", key, out if out && out != ""
69
+ end
70
+ else
71
+ printf TEMPLATE, key, val if val
72
+ end
73
+ end
74
+
75
+ def title
76
+ @knowledge[:twitter_title] || @knowledge[:og_title] || @knowledge[:page_title]
77
+ end
78
+
79
+ def twitter_author
80
+ @knowledge[:twitter_creator] || @knowledge[:twitter_by] || @knowledge[:twitter_site_author] || (@knowledge[:twitter_ids] || []).first
81
+ end
82
+
83
+ def description
84
+ @knowledge[:twitter_description] || @knowledge[:og_description] || @knowledge[:description]
85
+ end
86
+
87
+ def image
88
+ @knowledge[:twitter_image] || @knowledge[:og_image]
89
+ end
90
+ end
91
+
92
+ class NetClient
93
+ # Look up the domain
94
+
95
+ def find_domain( hostname )
96
+ # puts "Looking for SOA of #{hostname}"
97
+ dns = Dnsruby::Resolver.new
98
+ soa = dns.query( hostname, "SOA" ).answer.select do |rr|
99
+ rr.is_a? Dnsruby::RR::IN::SOA
100
+ end
101
+
102
+ return hostname if soa.length > 0
103
+
104
+ parts = hostname.split( /\./ )
105
+ return nil if parts.length <= 2
106
+
107
+ find_domain( parts.slice(1,100).join( "." ) )
108
+ end
109
+
110
+ def get_knowledge( url )
111
+ data = PageKnowledge.new
112
+
113
+ uri = URI( url )
114
+
115
+ data.remember( :hostname, uri.hostname )
116
+
117
+ domain = find_domain(uri.hostname)
118
+
119
+ data.remember( :domain, domain )
120
+
121
+ # Look at the domain info
122
+
123
+ whois = Whois.lookup( domain )
124
+
125
+ data.remember( :registered?, whois.registered? )
126
+ if whois.registrar
127
+ data.remember( :registrar_name, whois.registrar.name )
128
+ data.remember( :registrar_url, whois.registrar.url )
129
+ end
130
+
131
+ data.remember( :created_on, whois.created_on.strftime( "%Y-%m-%d") ) if whois.created_on
132
+ data.remember( :expires_on, whois.expires_on.strftime( "%Y-%m-%d") ) if whois.expires_on
133
+ data.remember( :updated_on, whois.updated_on.strftime( "%Y-%m-%d") ) if whois.updated_on
134
+
135
+ whois.contacts.each do |c|
136
+ data.another( :emails, c.email.downcase ) if c.email
137
+ case c.type
138
+ when Whois::Record::Contact::TYPE_REGISTRANT
139
+ data.remember( :registrant_contact, c )
140
+ when Whois::Record::Contact::TYPE_ADMINISTRATIVE
141
+ data.remember( :admin_contact, c )
142
+ when Whois::Record::Contact::TYPE_TECHNICAL
143
+ data.remember( :technical_contact, c )
144
+ end
145
+ end
146
+ # [
147
+ # :name,:organization,:address,:city,
148
+ # :zip,:state,:country,:country_code,
149
+ # :phone,:fax,:email,:url].each do |k|
150
+ # val = c.send(k)
151
+ # printf "%15s : %s\n", k.to_s, val if !val.nil?
152
+ # end
153
+ # end
154
+
155
+ require 'whois/record/parser/blank'
156
+ whois.parts.each do |p|
157
+ if Whois::Record::Parser.parser_for(p).is_a? Whois::Record::Parser::Blank
158
+ puts "Couldn't find a parser for #{p.host}:"
159
+ data.another( :unparsed_whois, p.body )
160
+ end
161
+ end
162
+
163
+
164
+
165
+
166
+ # Load up the response
167
+
168
+ # client = HTTPClient.new
169
+ # client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
170
+ # response = client.get( url )
171
+ # # @ssl = p.peer_cert
172
+
173
+ response = HTTParty.get url
174
+
175
+ # require 'pp'
176
+ # pp response.headers
177
+
178
+ data.remember( :server, response.headers['server'] )
179
+
180
+
181
+ # Parse the HTML
182
+
183
+ parsed = Nokogiri.parse response.body
184
+
185
+ data.remember( :page_title, parsed.title )
186
+
187
+ # RSS Feed:
188
+ if feed = parsed.css( 'link[type="application/rss+xml"]' ).first
189
+ feed = feed.attributes['href'].value
190
+ data.remember( :rss_feed, feed )
191
+ end
192
+
193
+ # Atom Feed:
194
+ if feed = parsed.css( 'link[type="application/atom+xml"]' ).first
195
+ feed = feed.attributes['href'].value
196
+ data.remember( :atom_feed, feed )
197
+ end
198
+
199
+
200
+
201
+ # Meta tags
202
+
203
+ meta = {}
204
+ parsed.css( "meta[name]" ).each do |t|
205
+ meta[t.attributes["name"].value] = t.attributes["content"].value if t.attributes["content"]
206
+ end
207
+
208
+ parsed.css( "meta[property]" ).each do |t|
209
+ meta[t.attributes["property"].value] = t.attributes["content"].value
210
+ end
211
+
212
+ # require 'pp'
213
+ # pp meta
214
+
215
+ data.remember( :author, meta['author'] )
216
+ data.remember( :description, meta['description'] )
217
+ data.remember( :keywords, meta['keywords'] )
218
+ data.remember( :generator, meta['generator'])
219
+
220
+ data.remember( :responsive, true ) if meta["viewport"] =~ /width=device-width/
221
+
222
+
223
+ # Check Twitter Card:
224
+
225
+ data.remember( :twitter_title, meta["twitter:title"] )
226
+ data.remember( :twitter_creator, meta["twitter:creator"] )
227
+ if /@(.*)/.match( meta["twitter:creator"] )
228
+ data.another( :twitter_ids, $1 )
229
+ end
230
+ data.remember( :twitter_site_author, meta["twitter:site"] )
231
+ if /@(.*)/.match( meta["twitter:site"] )
232
+ data.another( :twitter_ids, $1 )
233
+ end
234
+ data.remember( :twitter_image, meta["twitter:image"] )
235
+ data.remember( :twitter_description, meta["twitter:description"] )
236
+
237
+ # Open Graph
238
+
239
+ data.remember( :og_title, meta["og:title"] )
240
+ data.remember( :og_description, meta["og:description"] )
241
+ data.remember( :og_type, meta["og:type"] )
242
+ data.remember( :og_image, meta["og:image"] )
243
+
244
+
245
+ # Look inside the body:
246
+
247
+
248
+ # Twitter
249
+
250
+ # Look for twitter links
251
+ twitter_links = hrefs( matching_links( parsed, /twitter.com\/[^\/]*$/ ), true )
252
+ data.remember( :twitter_links, twitter_links )
253
+
254
+ twitter_ids = find_id_path( twitter_links, /twitter.com\/([^\/]*$)/ ).each do |id|
255
+ data.another( :twitter_ids, id )
256
+ end
257
+
258
+ # Look for twitter shared links
259
+
260
+ twitter_shared = matching_links( parsed, /twitter.com\/share/ )
261
+
262
+ twitter_shared.each do |l|
263
+ text = l['data-text']
264
+
265
+ # See if there's a "by @user" in the text
266
+ if /by\s*@([^\s]*)/.match text
267
+ data.another( :twitter_ids, $1 )
268
+ data.remember( :twitter_by, $1 )
269
+ end
270
+
271
+ # Look for all "@usernames" in the text
272
+ if text
273
+ text.split.select { |x| x =~ /@\s*/ }.each do |id|
274
+ data.another( :twitter_ids, id.slice( 1,100 ) ) # We don't want the @
275
+ end
276
+ end
277
+
278
+ # See if there's a via link on the anchor tag
279
+ if l['data-via']
280
+ data.another( :twitter_ids, l['data-via'])
281
+ end
282
+
283
+
284
+ possible_via = URI.decode( (URI(l['href']).query) || "" ).split( /&amp;/ ).collect { |x| x.split( /=/ ) }.select { |x| x[0] == 'via' }
285
+ if possible_via.size > 0
286
+ data.another( :twitter_ids, possible_via[0][1] )
287
+ end
288
+ end
289
+
290
+ # Look for intent
291
+
292
+ twitter_intent = hrefs( matching_links( parsed, /twitter.com\/intent/ ) )
293
+
294
+ twitter_intent.each do |t|
295
+ URI.decode( URI(t.gsub( / /, "+" )).query ).split( /&/ ).select do |x|
296
+ x =~ /via/
297
+ end.collect do |x|
298
+ x.gsub( /via=/, "" )
299
+ end.each do |via|
300
+ data.another( :twitter_ids, via )
301
+ end
302
+ end
303
+ # Look for email
304
+
305
+ email_links = hrefs( matching_links( parsed, /mailto:/ ) )
306
+ email_address = find_id_path( email_links, /mailto:(.*@.*\..*)/ ).each do |email|
307
+ data.another( :emails, email )
308
+ end
309
+
310
+ # Linkedin
311
+
312
+ linkedin_links = hrefs( matching_links( parsed, /linkedin.com/ ), true )
313
+ data.remember( :linkedin_links, linkedin_links )
314
+
315
+ # Instagram
316
+
317
+ instagram_links = hrefs( matching_links( parsed, /instagram.com/ ) )
318
+ data.remember( :instagram_links, instagram_links )
319
+
320
+ # Facebook
321
+
322
+ facebook_links = hrefs( matching_links( parsed, /facebook.com\/[^\/]*$/ ) )
323
+ data.remember( :facebook_links, facebook_links )
324
+
325
+ # Google plus
326
+
327
+ googleplus_links = hrefs( matching_links( parsed, /plus.google.com\/[^\/]*$/ ) )
328
+ data.remember( :googleplus_links, googleplus_links )
329
+
330
+ # Github
331
+
332
+ github_links = hrefs( matching_links( parsed, /github.com\/[^\/]*$/ ) )
333
+ data.remember( :github_links, github_links )
334
+
335
+
336
+ # Bonus!
337
+
338
+ # Get this file from https://github.com/ElbertF/Wappalyzer/tree/master/share
339
+
340
+ apps = Socialinvestigator::Config.config.apps_json
341
+ if apps
342
+ scripts = parsed.css( "script" ).collect { |x| x['src'] }.select { |x| x }
343
+ # puts scripts
344
+
345
+ apps['apps'].each do |app,checks|
346
+ if checks['html']
347
+ html_array = checks['html']
348
+ html_array = [checks['html']] if html_array.is_a? String
349
+
350
+ html_array.each do |html|
351
+ result = check_regex( html, response.body )
352
+ if result
353
+ data.another :technologies, app
354
+ data.another :technologies, checks['implies']
355
+ end
356
+ end
357
+ end
358
+
359
+ if checks['meta']
360
+ checks['meta'].each do |k,code|
361
+ result = check_regex( code, meta[k] )
362
+ if result
363
+ data.another :technologies, app
364
+ data.another :technologies, checks['implies']
365
+ end
366
+ end
367
+ end
368
+
369
+ if checks['headers']
370
+ checks['headers'].each do |k,code|
371
+ result = check_regex( code, response.headers[k] )
372
+ if result
373
+ data.another :technologies, app
374
+ data.another :technologies, checks['implies']
375
+ end
376
+ end
377
+ end
378
+
379
+ if checks['script']
380
+ script_array = checks['script']
381
+ script_array = [checks['script']] if script_array.is_a? String
382
+ script_array.each do |script_regex|
383
+ scripts.each do |script|
384
+ result = check_regex( script_regex, script)
385
+ if result
386
+ data.another :technologies, app
387
+ data.another :technologies, checks['implies']
388
+ end
389
+ end
390
+ end
391
+ end
392
+ end
393
+ end
394
+ data
395
+ end
396
+
397
+ def matching_links( parsed, regex )
398
+ parsed.css( "a" ).collect do |x|
399
+ if regex.match( x['href'] )
400
+ x
401
+ else
402
+ nil
403
+ end
404
+ end.select do |x|
405
+ x
406
+ end
407
+ end
408
+
409
+ def hrefs( links, filter_shared = false )
410
+ links.collect do |x|
411
+ x['href']
412
+ end.select do |url|
413
+ if filter_shared
414
+ !(url =~ /share/)
415
+ else
416
+ true
417
+ end
418
+ end.uniq
419
+ end
420
+
421
+ def find_id_path( links, regex )
422
+ links.collect do |link|
423
+ if regex.match( link )
424
+ res = $1 || link
425
+ if (res =~ /share/)
426
+ nil
427
+ else
428
+ res
429
+ end
430
+ end
431
+ end.select do |x|
432
+ x
433
+ end.uniq
434
+ end
435
+
436
+ def check_regex( mashed_regex, value )
437
+ regex,result = mashed_regex.split( /\\;/ )
438
+ md = Regexp.new( regex ).match( value )
439
+ if md
440
+ if result
441
+ result = result.gsub( /\\1/, (md[1] || "" )).gsub( /\\2/, (md[2] || "") )
442
+ else
443
+ true
444
+ end
445
+ else
446
+ false
447
+ end
448
+ end
449
+ end
450
+ end
451
+ end
@@ -0,0 +1,458 @@
1
+ #!/usr/bin/env ruby
2
+ require 'httparty'
3
+ require 'nokogiri'
4
+ require 'dnsruby'
5
+ require 'whois'
6
+
7
+ url = ARGV[0] || "http://www.fastcolabs.com/3038014/product-bootcamp-week-six-worth-it"
8
+
9
+ class PageKnowledge
10
+ DEBUG = false
11
+ TEMPLATE = "%20s: %s\n"
12
+
13
+ def initialize; @knowledge = {} end
14
+
15
+ def remember( key, value )
16
+ return if value.nil?
17
+ p key, value if DEBUG
18
+
19
+ @knowledge[key] = value
20
+ end
21
+
22
+ def another( key, value )
23
+ return if value.nil?
24
+ p key, value if DEBUG
25
+
26
+ @knowledge[key] ||= []
27
+ @knowledge[key] << value
28
+ @knowledge[key] = @knowledge[key].uniq
29
+ end
30
+
31
+ def print
32
+ p :domain
33
+ p :created_on
34
+ p :expires_on
35
+ p :updated_on
36
+ p :registrar_name
37
+ p :registrar_url
38
+ p :registrant_contact
39
+ p :admin_contact
40
+ p :technical_contact
41
+ p :emails
42
+ p :title, title
43
+ p :description, description
44
+ p :twitter_author, twitter_author
45
+ p :twitter_ids
46
+ p :image, image
47
+ p :responsive
48
+ p :rss_feed
49
+ p :atom_feed
50
+
51
+ p :twitter_links
52
+ p :linkedin_links
53
+ p :instagram_links
54
+ p :facebook_links
55
+ p :googleplus_links
56
+ p :github_links
57
+
58
+ # pp @knowledge
59
+ end
60
+
61
+ def p( key, val = nil )
62
+ val = @knowledge[key] if val.nil?
63
+ if val.is_a?( Array )
64
+ printf TEMPLATE, key, val.join( ", ") if val.size > 0
65
+ elsif val.is_a?( Whois::Record::Contact )
66
+ printf TEMPLATE, key, ""
67
+ [:name, :organization, :address, :city, :zip, :state, :country, :country_code, :phone, :fax, :email, :url, :created_on, :updated_on].each do |key|
68
+ out = val.send( key )
69
+ printf "%25s: %s\n", key, out if out && out != ""
70
+ end
71
+ else
72
+ printf TEMPLATE, key, val if val
73
+ end
74
+ end
75
+
76
+ def title
77
+ @knowledge[:twitter_title] || @knowledge[:og_title] || @knowledge[:page_title]
78
+ end
79
+
80
+ def twitter_author
81
+ @knowledge[:twitter_creator] || @knowledge[:twitter_by] || @knowledge[:twitter_site_author] || (@knowledge[:twitter_ids] || []).first
82
+ end
83
+
84
+ def description
85
+ @knowledge[:twitter_description] || @knowledge[:og_description] || @knowledge[:description]
86
+ end
87
+
88
+ def image
89
+ @knowledge[:twitter_image] || @knowledge[:og_image]
90
+ end
91
+ end
92
+
93
+ data = PageKnowledge.new
94
+
95
+ uri = URI( url )
96
+
97
+ data.remember( :hostname, uri.hostname )
98
+
99
+
100
+
101
+ # Look up the domain
102
+
103
+ def find_domain( hostname )
104
+ # puts "Looking for SOA of #{hostname}"
105
+ dns = Dnsruby::Resolver.new
106
+ soa = dns.query( hostname, "SOA" ).answer.select do |rr|
107
+ rr.is_a? Dnsruby::RR::IN::SOA
108
+ end
109
+
110
+ return hostname if soa.length > 0
111
+
112
+ parts = hostname.split( /\./ )
113
+ return nil if parts.length <= 2
114
+
115
+ find_domain( parts.slice(1,100).join( "." ) )
116
+ end
117
+
118
+ domain = find_domain(uri.hostname)
119
+
120
+ data.remember( :domain, domain )
121
+
122
+ # Look at the domain info
123
+
124
+ whois = Whois.lookup( domain )
125
+
126
+ data.remember( :registered?, whois.registered? )
127
+ if whois.registrar
128
+ data.remember( :registrar_name, whois.registrar.name )
129
+ data.remember( :registrar_url, whois.registrar.url )
130
+ end
131
+
132
+ data.remember( :created_on, whois.created_on.strftime( "%Y-%m-%d") ) if whois.created_on
133
+ data.remember( :expires_on, whois.expires_on.strftime( "%Y-%m-%d") ) if whois.expires_on
134
+ data.remember( :updated_on, whois.updated_on.strftime( "%Y-%m-%d") ) if whois.updated_on
135
+
136
+ whois.contacts.each do |c|
137
+ data.another( :emails, c.email.downcase ) if c.email
138
+ case c.type
139
+ when Whois::Record::Contact::TYPE_REGISTRANT
140
+ data.remember( :registrant_contact, c )
141
+ when Whois::Record::Contact::TYPE_ADMINISTRATIVE
142
+ data.remember( :admin_contact, c )
143
+ when Whois::Record::Contact::TYPE_TECHNICAL
144
+ data.remember( :technical_contact, c )
145
+ end
146
+ end
147
+ # [
148
+ # :name,:organization,:address,:city,
149
+ # :zip,:state,:country,:country_code,
150
+ # :phone,:fax,:email,:url].each do |k|
151
+ # val = c.send(k)
152
+ # printf "%15s : %s\n", k.to_s, val if !val.nil?
153
+ # end
154
+ # end
155
+
156
+ require 'whois/record/parser/blank'
157
+ whois.parts.each do |p|
158
+ if Whois::Record::Parser.parser_for(p).is_a? Whois::Record::Parser::Blank
159
+ puts "Couldn't find a parser for #{p.host}:"
160
+ data.another( :unparsed_whois, p.body )
161
+ end
162
+ end
163
+
164
+
165
+
166
+
167
+ # Load up the response
168
+
169
+ # client = HTTPClient.new
170
+ # client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
171
+ # response = client.get( url )
172
+ # # @ssl = p.peer_cert
173
+
174
+ response = HTTParty.get url
175
+
176
+ # require 'pp'
177
+ # pp response.headers
178
+
179
+ data.remember( :server, response.headers['server'] )
180
+
181
+
182
+ # Parse the HTML
183
+
184
+ parsed = Nokogiri.parse response.body
185
+
186
+ data.remember( :page_title, parsed.title )
187
+
188
+ # RSS Feed:
189
+ if feed = parsed.css( 'link[type="application/rss+xml"]' ).first
190
+ feed = feed.attributes['href'].value
191
+ data.remember( :rss_feed, feed )
192
+ end
193
+
194
+ # Atom Feed:
195
+ if feed = parsed.css( 'link[type="application/atom+xml"]' ).first
196
+ feed = feed.attributes['href'].value
197
+ data.remember( :atom_feed, feed )
198
+ end
199
+
200
+
201
+
202
+ # Meta tags
203
+
204
+ meta = {}
205
+ parsed.css( "meta[name]" ).each do |t|
206
+ meta[t.attributes["name"].value] = t.attributes["content"].value if t.attributes["content"]
207
+ end
208
+
209
+ parsed.css( "meta[property]" ).each do |t|
210
+ meta[t.attributes["property"].value] = t.attributes["content"].value
211
+ end
212
+
213
+ # require 'pp'
214
+ # pp meta
215
+
216
+ data.remember( :author, meta['author'] )
217
+ data.remember( :description, meta['description'] )
218
+ data.remember( :keywords, meta['keywords'] )
219
+ data.remember( :generator, meta['generator'])
220
+
221
+ data.remember( :responsive, true ) if meta["viewport"] =~ /width=device-width/
222
+
223
+
224
+ # Check Twitter Card:
225
+
226
+ data.remember( :twitter_title, meta["twitter:title"] )
227
+ data.remember( :twitter_creator, meta["twitter:creator"] )
228
+ if /@(.*)/.match( meta["twitter:creator"] )
229
+ data.another( :twitter_ids, $1 )
230
+ end
231
+ data.remember( :twitter_site_author, meta["twitter:site"] )
232
+ if /@(.*)/.match( meta["twitter:site"] )
233
+ data.another( :twitter_ids, $1 )
234
+ end
235
+ data.remember( :twitter_image, meta["twitter:image"] )
236
+ data.remember( :twitter_description, meta["twitter:description"] )
237
+
238
+ # Open Graph
239
+
240
+ data.remember( :og_title, meta["og:title"] )
241
+ data.remember( :og_description, meta["og:description"] )
242
+ data.remember( :og_type, meta["og:type"] )
243
+ data.remember( :og_image, meta["og:image"] )
244
+
245
+
246
+ # Look inside the body:
247
+
248
+ def matching_links( parsed, regex )
249
+ parsed.css( "a" ).collect do |x|
250
+ if regex.match( x['href'] )
251
+ x
252
+ else
253
+ nil
254
+ end
255
+ end.select do |x|
256
+ x
257
+ end
258
+ end
259
+
260
+ def hrefs( links, filter_shared = false )
261
+ links.collect do |x|
262
+ x['href']
263
+ end.select do |url|
264
+ if filter_shared
265
+ !(url =~ /share/)
266
+ else
267
+ true
268
+ end
269
+ end.uniq
270
+ end
271
+
272
+ def find_id_path( links, regex )
273
+ links.collect do |link|
274
+ if regex.match( link )
275
+ res = $1 || link
276
+ if (res =~ /share/)
277
+ nil
278
+ else
279
+ res
280
+ end
281
+ end
282
+ end.select do |x|
283
+ x
284
+ end.uniq
285
+ end
286
+
287
+ # Twitter
288
+
289
+ # Look for twitter links
290
+ twitter_links = hrefs( matching_links( parsed, /twitter.com\/[^\/]*$/ ), true )
291
+ data.remember( :twitter_links, twitter_links )
292
+
293
+ twitter_ids = find_id_path( twitter_links, /twitter.com\/([^\/]*$)/ ).each do |id|
294
+ data.another( :twitter_ids, id )
295
+ end
296
+
297
+ # Look for twitter shared links
298
+
299
+ twitter_shared = matching_links( parsed, /twitter.com\/share/ )
300
+
301
+ twitter_shared.each do |l|
302
+ text = l['data-text']
303
+
304
+ # See if there's a "by @user" in the text
305
+ if /by\s*@([^\s]*)/.match text
306
+ data.another( :twitter_ids, $1 )
307
+ data.remember( :twitter_by, $1 )
308
+ end
309
+
310
+ # Look for all "@usernames" in the text
311
+ if text
312
+ text.split.select { |x| x =~ /@\s*/ }.each do |id|
313
+ data.another( :twitter_ids, id.slice( 1,100 ) ) # We don't want the @
314
+ end
315
+ end
316
+
317
+ # See if there's a via link on the anchor tag
318
+ if l['data-via']
319
+ data.another( :twitter_ids, l['data-via'])
320
+ end
321
+
322
+
323
+ possible_via = URI.decode( (URI(l['href']).query) || "" ).split( /&amp;/ ).collect { |x| x.split( /=/ ) }.select { |x| x[0] == 'via' }
324
+ if possible_via.size > 0
325
+ data.another( :twitter_ids, possible_via[0][1] )
326
+ end
327
+ end
328
+
329
+ # Look for intent
330
+
331
+ twitter_intent = hrefs( matching_links( parsed, /twitter.com\/intent/ ) )
332
+
333
+ twitter_intent.each do |t|
334
+ URI.decode( URI(t.gsub( / /, "+" )).query ).split( /&/ ).select do |x|
335
+ x =~ /via/
336
+ end.collect do |x|
337
+ x.gsub( /via=/, "" )
338
+ end.each do |via|
339
+ data.another( :twitter_ids, via )
340
+ end
341
+ end
342
+ # Look for email
343
+
344
+ email_links = hrefs( matching_links( parsed, /mailto:/ ) )
345
+ email_address = find_id_path( email_links, /mailto:(.*@.*\..*)/ ).each do |email|
346
+ data.another( :emails, email )
347
+ end
348
+
349
+ # Linkedin
350
+
351
+ linkedin_links = hrefs( matching_links( parsed, /linkedin.com/ ), true )
352
+ data.remember( :linkedin_links, linkedin_links )
353
+
354
+ # Instagram
355
+
356
+ instagram_links = hrefs( matching_links( parsed, /instagram.com/ ) )
357
+ data.remember( :instagram_links, instagram_links )
358
+
359
+ # Facebook
360
+
361
+ facebook_links = hrefs( matching_links( parsed, /facebook.com\/[^\/]*$/ ) )
362
+ data.remember( :facebook_links, facebook_links )
363
+
364
+ # Google plus
365
+
366
+ googleplus_links = hrefs( matching_links( parsed, /plus.google.com\/[^\/]*$/ ) )
367
+ data.remember( :googleplus_links, googleplus_links )
368
+
369
+ # Github
370
+
371
+ github_links = hrefs( matching_links( parsed, /github.com\/[^\/]*$/ ) )
372
+ data.remember( :github_links, github_links )
373
+
374
+
375
+ puts
376
+ puts "This is what we've figured out:"
377
+
378
+
379
+ data.print
380
+
381
+
382
+ # Bonus!
383
+
384
+ # Get this file from https://github.com/ElbertF/Wappalyzer/tree/master/share
385
+ if File.exists? "apps.json"
386
+ apps_json = File.read( "apps.json" )
387
+ technologies = []
388
+
389
+ apps = JSON.parse( apps_json )
390
+
391
+ def check_regex( mashed_regex, value )
392
+ regex,result = mashed_regex.split( /\\;/ )
393
+ md = Regexp.new( regex ).match( value )
394
+ if md
395
+ if result
396
+ result = result.gsub( /\\1/, (md[1] || "" )).gsub( /\\2/, (md[2] || "") )
397
+ else
398
+ true
399
+ end
400
+ else
401
+ false
402
+ end
403
+ end
404
+
405
+ scripts = parsed.css( "script" ).collect { |x| x['src'] }.select { |x| x }
406
+ # puts scripts
407
+
408
+ apps['apps'].each do |app,checks|
409
+ if checks['html']
410
+ html_array = checks['html']
411
+ html_array = [checks['html']] if html_array.is_a? String
412
+
413
+ html_array.each do |html|
414
+ result = check_regex( html, response.body )
415
+ if result
416
+ technologies << app
417
+ technologies << checks['implies'] if checks['implies']
418
+ end
419
+ end
420
+ end
421
+
422
+ if checks['meta']
423
+ checks['meta'].each do |k,code|
424
+ result = check_regex( code, meta[k] )
425
+ if result
426
+ technologies << app
427
+ technologies << checks['implies'] if checks['implies']
428
+ end
429
+ end
430
+ end
431
+
432
+ if checks['headers']
433
+ checks['headers'].each do |k,code|
434
+ result = check_regex( code, response.headers[k] )
435
+ if result
436
+ technologies << app
437
+ technologies << checks['implies'] if checks['implies']
438
+ end
439
+ end
440
+ end
441
+
442
+ if checks['script']
443
+ script_array = checks['script']
444
+ script_array = [checks['script']] if script_array.is_a? String
445
+ script_array.each do |script_regex|
446
+ scripts.each do |script|
447
+ result = check_regex( script_regex, script)
448
+ if result
449
+ technologies << app
450
+ technologies << checks['implies'] if checks['implies']
451
+ end
452
+ end
453
+ end
454
+ end
455
+ end
456
+
457
+ printf "%20s: %s\n", "technologies", technologies.sort.uniq.join( ", ")
458
+ end
@@ -27,6 +27,16 @@ module Socialinvestigator
27
27
  save_yaml( "twitter.yml", config )
28
28
  end
29
29
 
30
+ def apps_json
31
+ read_json( "apps.json" )
32
+ end
33
+
34
+ def apps_json=( data )
35
+ File.open( "#{@dir}/apps.json", "w" ) do |out|
36
+ out << data
37
+ end
38
+ end
39
+
30
40
  def read_yaml( name )
31
41
  file = "#{@dir}/#{name}"
32
42
 
@@ -42,6 +52,16 @@ module Socialinvestigator
42
52
  out.write obj.to_yaml
43
53
  end
44
54
  end
55
+
56
+ def read_json( name )
57
+ file = "#{@dir}/#{name}"
58
+
59
+ if File.exists? file
60
+ return JSON.parse( File.read( file ) )
61
+ end
62
+
63
+ nil
64
+ end
45
65
  end
46
66
  end
47
67
  end
@@ -1,3 +1,3 @@
1
1
  module Socialinvestigator
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -21,6 +21,9 @@ Gem::Specification.new do |spec|
21
21
  spec.add_dependency 'thor'
22
22
  spec.add_dependency 'httparty'
23
23
  spec.add_dependency 'twitter'
24
+ spec.add_dependency 'nokogiri'
25
+ spec.add_dependency 'whois'
26
+ spec.add_dependency 'dnsruby'
24
27
 
25
28
  spec.add_development_dependency "bundler", "~> 1.6"
26
29
  spec.add_development_dependency "rake"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: socialinvestigator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Will Schenk
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-10 00:00:00.000000000 Z
11
+ date: 2014-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -52,6 +52,48 @@ dependencies:
52
52
  - - '>='
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: whois
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: dnsruby
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
55
97
  - !ruby/object:Gem::Dependency
56
98
  name: bundler
57
99
  requirement: !ruby/object:Gem::Requirement
@@ -97,8 +139,11 @@ files:
97
139
  - lib/socialinvestigator.rb
98
140
  - lib/socialinvestigator/cli.rb
99
141
  - lib/socialinvestigator/cli/hn.rb
142
+ - lib/socialinvestigator/cli/net.rb
100
143
  - lib/socialinvestigator/cli/twitter.rb
101
144
  - lib/socialinvestigator/client/hn.rb
145
+ - lib/socialinvestigator/client/net.rb
146
+ - lib/socialinvestigator/client/standalone_net.rb
102
147
  - lib/socialinvestigator/client/twitter.rb
103
148
  - lib/socialinvestigator/config.rb
104
149
  - lib/socialinvestigator/version.rb