socialinvestigator 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +1 -0
- data/README.md +24 -3
- data/lib/socialinvestigator/cli.rb +4 -0
- data/lib/socialinvestigator/cli/net.rb +27 -0
- data/lib/socialinvestigator/cli/twitter.rb +5 -1
- data/lib/socialinvestigator/client/net.rb +451 -0
- data/lib/socialinvestigator/client/standalone_net.rb +458 -0
- data/lib/socialinvestigator/config.rb +20 -0
- data/lib/socialinvestigator/version.rb +1 -1
- data/socialinvestigator.gemspec +3 -0
- metadata +47 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b05afd1645671efac34a27455b466ae120bd0796
|
|
4
|
+
data.tar.gz: d8e96eb8befbf7fb8124b3cba94d711e18157f52
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9481ba394a0fdc6380c48a0d2f1eeaeda14b1abedb993f83d95a423acd5be718bbedbd272e3b95e0ce6074b409fa8580550f9df1955f05f9f3dd6aec586d2661
|
|
7
|
+
data.tar.gz: 3f94308447f5a9bb6a28bd013066637c2cb588c84db01f2ff840511cd6acf9a2890a4fbfc149f037d9026d4c9643dcd359c7f9593504a01d8c2b5b5d5aa24596
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
|
@@ -17,14 +17,35 @@ Then you can run the command 'socialinvestigator' to begin using it.
|
|
|
17
17
|
Full help
|
|
18
18
|
$ socialinvestigator help
|
|
19
19
|
|
|
20
|
+
## Hacker News Search
|
|
21
|
+
|
|
22
|
+
Code walk through: http://willschenk.com/making-a-command-line-utility-with-gems-and-thor
|
|
23
|
+
|
|
20
24
|
Search hacker news for a url:
|
|
21
25
|
|
|
22
26
|
$ socialinvestigator hn search http://willschenk.com
|
|
23
27
|
|
|
24
|
-
|
|
25
|
-
|
|
28
|
+
## Looking up information from a URL
|
|
29
|
+
|
|
30
|
+
Code walk through: http://willschenk.com/personal-information-from-only-a-url
|
|
31
|
+
|
|
32
|
+
Start with a URL, figure out what you can find:
|
|
33
|
+
|
|
34
|
+
$ socialinvestigator net page_info http://willschenk.com
|
|
35
|
+
|
|
36
|
+
To analyse the technology stack, you need to load the datafile from
|
|
37
|
+
https://github.com/ElbertF/Wappalyzer
|
|
38
|
+
which can be done with this command:
|
|
39
|
+
|
|
40
|
+
$ socialinvestigator net get_apps_json
|
|
41
|
+
|
|
42
|
+
## Twitter Scripting
|
|
43
|
+
|
|
44
|
+
_This will be documented soon_
|
|
45
|
+
|
|
46
|
+
Code walk through: http://willschenk.com/scripting-twitter
|
|
26
47
|
|
|
27
|
-
Once you have the twitter info, you put it in using the twitter config command:
|
|
48
|
+
You'll need to register a twitter app for this to work. Once you have the twitter info, you put it in using the twitter config command:
|
|
28
49
|
|
|
29
50
|
$ socialinvestigator twitter config
|
|
30
51
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
require 'thor'
|
|
2
2
|
require 'socialinvestigator/cli/hn'
|
|
3
3
|
require 'socialinvestigator/cli/twitter'
|
|
4
|
+
require 'socialinvestigator/cli/net'
|
|
4
5
|
|
|
5
6
|
module Socialinvestigator
|
|
6
7
|
class HammerOfTheGods < Thor
|
|
@@ -29,5 +30,8 @@ module Socialinvestigator
|
|
|
29
30
|
|
|
30
31
|
desc "twitter COMMANDS", "Twitter Control Module"
|
|
31
32
|
subcommand "twitter", Socialinvestigator::CLI::TwitterCli
|
|
33
|
+
|
|
34
|
+
desc "net COMMANDS", "Net control Module"
|
|
35
|
+
subcommand "net", Socialinvestigator::CLI::Net
|
|
32
36
|
end
|
|
33
37
|
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
require 'socialinvestigator/client/net'
|
|
2
|
+
|
|
3
|
+
module Socialinvestigator
|
|
4
|
+
module CLI
|
|
5
|
+
class Net < Thor
|
|
6
|
+
desc "page_info URL", "Looks at a page to see what social links it finds"
|
|
7
|
+
def page_info( url )
|
|
8
|
+
knowledge = client.get_knowledge( url )
|
|
9
|
+
knowledge.print
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
desc "get_apps_json", "Download the apps.json file form Wappalyzer"
|
|
13
|
+
def get_apps_json
|
|
14
|
+
puts "Loading from https://raw.githubusercontent.com/ElbertF/Wappalyzer/master/share/apps.json"
|
|
15
|
+
json_data = HTTParty.get "https://raw.githubusercontent.com/ElbertF/Wappalyzer/master/share/apps.json"
|
|
16
|
+
Socialinvestigator::Config.config.apps_json= json_data
|
|
17
|
+
puts "Saved"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
def client
|
|
23
|
+
@client ||= Socialinvestigator::Client::NetClient.new
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -6,7 +6,7 @@ module Socialinvestigator
|
|
|
6
6
|
class TwitterCli < Thor
|
|
7
7
|
desc "user SCREENAME", "Look up info for a specific user."
|
|
8
8
|
def user( username )
|
|
9
|
-
agent.print_user_info client.user(
|
|
9
|
+
agent.print_user_info client.user( username )
|
|
10
10
|
end
|
|
11
11
|
|
|
12
12
|
desc "lookup URL", "Resolve a link"
|
|
@@ -150,4 +150,8 @@ module Socialinvestigator
|
|
|
150
150
|
end
|
|
151
151
|
end
|
|
152
152
|
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
if __FILE__ == $0
|
|
156
|
+
Socialinvestigator::CLI::TwitterCli.start( ARGV )
|
|
153
157
|
end
|
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'httparty'
|
|
3
|
+
require 'nokogiri'
|
|
4
|
+
require 'dnsruby'
|
|
5
|
+
require 'whois'
|
|
6
|
+
|
|
7
|
+
module Socialinvestigator
|
|
8
|
+
module Client
|
|
9
|
+
class PageKnowledge
|
|
10
|
+
DEBUG = false
|
|
11
|
+
TEMPLATE = "%20s: %s\n"
|
|
12
|
+
|
|
13
|
+
def initialize; @knowledge = {} end
|
|
14
|
+
|
|
15
|
+
def remember( key, value )
|
|
16
|
+
return if value.nil?
|
|
17
|
+
p key, value if DEBUG
|
|
18
|
+
|
|
19
|
+
@knowledge[key] = value
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def another( key, value )
|
|
23
|
+
return if value.nil?
|
|
24
|
+
p key, value if DEBUG
|
|
25
|
+
|
|
26
|
+
@knowledge[key] ||= []
|
|
27
|
+
@knowledge[key] << value
|
|
28
|
+
@knowledge[key] = @knowledge[key].uniq
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def print
|
|
32
|
+
p :domain
|
|
33
|
+
p :created_on
|
|
34
|
+
p :expires_on
|
|
35
|
+
p :updated_on
|
|
36
|
+
p :registrar_name
|
|
37
|
+
p :registrar_url
|
|
38
|
+
p :registrant_contact
|
|
39
|
+
p :admin_contact
|
|
40
|
+
p :technical_contact
|
|
41
|
+
p :emails
|
|
42
|
+
p :title, title
|
|
43
|
+
p :description, description
|
|
44
|
+
p :twitter_author, twitter_author
|
|
45
|
+
p :twitter_ids
|
|
46
|
+
p :image, image
|
|
47
|
+
p :responsive
|
|
48
|
+
p :rss_feed
|
|
49
|
+
p :atom_feed
|
|
50
|
+
|
|
51
|
+
p :twitter_links
|
|
52
|
+
p :linkedin_links
|
|
53
|
+
p :instagram_links
|
|
54
|
+
p :facebook_links
|
|
55
|
+
p :googleplus_links
|
|
56
|
+
p :github_links
|
|
57
|
+
p :technologies
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def p( key, val = nil )
|
|
61
|
+
val = @knowledge[key] if val.nil?
|
|
62
|
+
if val.is_a?( Array )
|
|
63
|
+
printf TEMPLATE, key, val.join( ", ") if val.size > 0
|
|
64
|
+
elsif val.is_a?( Whois::Record::Contact )
|
|
65
|
+
printf TEMPLATE, key, ""
|
|
66
|
+
[:name, :organization, :address, :city, :zip, :state, :country, :country_code, :phone, :fax, :email, :url, :created_on, :updated_on].each do |key|
|
|
67
|
+
out = val.send( key )
|
|
68
|
+
printf "%25s: %s\n", key, out if out && out != ""
|
|
69
|
+
end
|
|
70
|
+
else
|
|
71
|
+
printf TEMPLATE, key, val if val
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def title
|
|
76
|
+
@knowledge[:twitter_title] || @knowledge[:og_title] || @knowledge[:page_title]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def twitter_author
|
|
80
|
+
@knowledge[:twitter_creator] || @knowledge[:twitter_by] || @knowledge[:twitter_site_author] || (@knowledge[:twitter_ids] || []).first
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def description
|
|
84
|
+
@knowledge[:twitter_description] || @knowledge[:og_description] || @knowledge[:description]
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def image
|
|
88
|
+
@knowledge[:twitter_image] || @knowledge[:og_image]
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
class NetClient
|
|
93
|
+
# Look up the domain
|
|
94
|
+
|
|
95
|
+
def find_domain( hostname )
|
|
96
|
+
# puts "Looking for SOA of #{hostname}"
|
|
97
|
+
dns = Dnsruby::Resolver.new
|
|
98
|
+
soa = dns.query( hostname, "SOA" ).answer.select do |rr|
|
|
99
|
+
rr.is_a? Dnsruby::RR::IN::SOA
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
return hostname if soa.length > 0
|
|
103
|
+
|
|
104
|
+
parts = hostname.split( /\./ )
|
|
105
|
+
return nil if parts.length <= 2
|
|
106
|
+
|
|
107
|
+
find_domain( parts.slice(1,100).join( "." ) )
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def get_knowledge( url )
|
|
111
|
+
data = PageKnowledge.new
|
|
112
|
+
|
|
113
|
+
uri = URI( url )
|
|
114
|
+
|
|
115
|
+
data.remember( :hostname, uri.hostname )
|
|
116
|
+
|
|
117
|
+
domain = find_domain(uri.hostname)
|
|
118
|
+
|
|
119
|
+
data.remember( :domain, domain )
|
|
120
|
+
|
|
121
|
+
# Look at the domain info
|
|
122
|
+
|
|
123
|
+
whois = Whois.lookup( domain )
|
|
124
|
+
|
|
125
|
+
data.remember( :registered?, whois.registered? )
|
|
126
|
+
if whois.registrar
|
|
127
|
+
data.remember( :registrar_name, whois.registrar.name )
|
|
128
|
+
data.remember( :registrar_url, whois.registrar.url )
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
data.remember( :created_on, whois.created_on.strftime( "%Y-%m-%d") ) if whois.created_on
|
|
132
|
+
data.remember( :expires_on, whois.expires_on.strftime( "%Y-%m-%d") ) if whois.expires_on
|
|
133
|
+
data.remember( :updated_on, whois.updated_on.strftime( "%Y-%m-%d") ) if whois.updated_on
|
|
134
|
+
|
|
135
|
+
whois.contacts.each do |c|
|
|
136
|
+
data.another( :emails, c.email.downcase ) if c.email
|
|
137
|
+
case c.type
|
|
138
|
+
when Whois::Record::Contact::TYPE_REGISTRANT
|
|
139
|
+
data.remember( :registrant_contact, c )
|
|
140
|
+
when Whois::Record::Contact::TYPE_ADMINISTRATIVE
|
|
141
|
+
data.remember( :admin_contact, c )
|
|
142
|
+
when Whois::Record::Contact::TYPE_TECHNICAL
|
|
143
|
+
data.remember( :technical_contact, c )
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
# [
|
|
147
|
+
# :name,:organization,:address,:city,
|
|
148
|
+
# :zip,:state,:country,:country_code,
|
|
149
|
+
# :phone,:fax,:email,:url].each do |k|
|
|
150
|
+
# val = c.send(k)
|
|
151
|
+
# printf "%15s : %s\n", k.to_s, val if !val.nil?
|
|
152
|
+
# end
|
|
153
|
+
# end
|
|
154
|
+
|
|
155
|
+
require 'whois/record/parser/blank'
|
|
156
|
+
whois.parts.each do |p|
|
|
157
|
+
if Whois::Record::Parser.parser_for(p).is_a? Whois::Record::Parser::Blank
|
|
158
|
+
puts "Couldn't find a parser for #{p.host}:"
|
|
159
|
+
data.another( :unparsed_whois, p.body )
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# Load up the response
|
|
167
|
+
|
|
168
|
+
# client = HTTPClient.new
|
|
169
|
+
# client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
170
|
+
# response = client.get( url )
|
|
171
|
+
# # @ssl = p.peer_cert
|
|
172
|
+
|
|
173
|
+
response = HTTParty.get url
|
|
174
|
+
|
|
175
|
+
# require 'pp'
|
|
176
|
+
# pp response.headers
|
|
177
|
+
|
|
178
|
+
data.remember( :server, response.headers['server'] )
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# Parse the HTML
|
|
182
|
+
|
|
183
|
+
parsed = Nokogiri.parse response.body
|
|
184
|
+
|
|
185
|
+
data.remember( :page_title, parsed.title )
|
|
186
|
+
|
|
187
|
+
# RSS Feed:
|
|
188
|
+
if feed = parsed.css( 'link[type="application/rss+xml"]' ).first
|
|
189
|
+
feed = feed.attributes['href'].value
|
|
190
|
+
data.remember( :rss_feed, feed )
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Atom Feed:
|
|
194
|
+
if feed = parsed.css( 'link[type="application/atom+xml"]' ).first
|
|
195
|
+
feed = feed.attributes['href'].value
|
|
196
|
+
data.remember( :atom_feed, feed )
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
# Meta tags
|
|
202
|
+
|
|
203
|
+
meta = {}
|
|
204
|
+
parsed.css( "meta[name]" ).each do |t|
|
|
205
|
+
meta[t.attributes["name"].value] = t.attributes["content"].value if t.attributes["content"]
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
parsed.css( "meta[property]" ).each do |t|
|
|
209
|
+
meta[t.attributes["property"].value] = t.attributes["content"].value
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# require 'pp'
|
|
213
|
+
# pp meta
|
|
214
|
+
|
|
215
|
+
data.remember( :author, meta['author'] )
|
|
216
|
+
data.remember( :description, meta['description'] )
|
|
217
|
+
data.remember( :keywords, meta['keywords'] )
|
|
218
|
+
data.remember( :generator, meta['generator'])
|
|
219
|
+
|
|
220
|
+
data.remember( :responsive, true ) if meta["viewport"] =~ /width=device-width/
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
# Check Twitter Card:
|
|
224
|
+
|
|
225
|
+
data.remember( :twitter_title, meta["twitter:title"] )
|
|
226
|
+
data.remember( :twitter_creator, meta["twitter:creator"] )
|
|
227
|
+
if /@(.*)/.match( meta["twitter:creator"] )
|
|
228
|
+
data.another( :twitter_ids, $1 )
|
|
229
|
+
end
|
|
230
|
+
data.remember( :twitter_site_author, meta["twitter:site"] )
|
|
231
|
+
if /@(.*)/.match( meta["twitter:site"] )
|
|
232
|
+
data.another( :twitter_ids, $1 )
|
|
233
|
+
end
|
|
234
|
+
data.remember( :twitter_image, meta["twitter:image"] )
|
|
235
|
+
data.remember( :twitter_description, meta["twitter:description"] )
|
|
236
|
+
|
|
237
|
+
# Open Graph
|
|
238
|
+
|
|
239
|
+
data.remember( :og_title, meta["og:title"] )
|
|
240
|
+
data.remember( :og_description, meta["og:description"] )
|
|
241
|
+
data.remember( :og_type, meta["og:type"] )
|
|
242
|
+
data.remember( :og_image, meta["og:image"] )
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
# Look inside the body:
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# Twitter
|
|
249
|
+
|
|
250
|
+
# Look for twitter links
|
|
251
|
+
twitter_links = hrefs( matching_links( parsed, /twitter.com\/[^\/]*$/ ), true )
|
|
252
|
+
data.remember( :twitter_links, twitter_links )
|
|
253
|
+
|
|
254
|
+
twitter_ids = find_id_path( twitter_links, /twitter.com\/([^\/]*$)/ ).each do |id|
|
|
255
|
+
data.another( :twitter_ids, id )
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# Look for twitter shared links
|
|
259
|
+
|
|
260
|
+
twitter_shared = matching_links( parsed, /twitter.com\/share/ )
|
|
261
|
+
|
|
262
|
+
twitter_shared.each do |l|
|
|
263
|
+
text = l['data-text']
|
|
264
|
+
|
|
265
|
+
# See if there's a "by @user" in the text
|
|
266
|
+
if /by\s*@([^\s]*)/.match text
|
|
267
|
+
data.another( :twitter_ids, $1 )
|
|
268
|
+
data.remember( :twitter_by, $1 )
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Look for all "@usernames" in the text
|
|
272
|
+
if text
|
|
273
|
+
text.split.select { |x| x =~ /@\s*/ }.each do |id|
|
|
274
|
+
data.another( :twitter_ids, id.slice( 1,100 ) ) # We don't want the @
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# See if there's a via link on the anchor tag
|
|
279
|
+
if l['data-via']
|
|
280
|
+
data.another( :twitter_ids, l['data-via'])
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
possible_via = URI.decode( (URI(l['href']).query) || "" ).split( /&/ ).collect { |x| x.split( /=/ ) }.select { |x| x[0] == 'via' }
|
|
285
|
+
if possible_via.size > 0
|
|
286
|
+
data.another( :twitter_ids, possible_via[0][1] )
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# Look for intent
|
|
291
|
+
|
|
292
|
+
twitter_intent = hrefs( matching_links( parsed, /twitter.com\/intent/ ) )
|
|
293
|
+
|
|
294
|
+
twitter_intent.each do |t|
|
|
295
|
+
URI.decode( URI(t.gsub( / /, "+" )).query ).split( /&/ ).select do |x|
|
|
296
|
+
x =~ /via/
|
|
297
|
+
end.collect do |x|
|
|
298
|
+
x.gsub( /via=/, "" )
|
|
299
|
+
end.each do |via|
|
|
300
|
+
data.another( :twitter_ids, via )
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
# Look for email
|
|
304
|
+
|
|
305
|
+
email_links = hrefs( matching_links( parsed, /mailto:/ ) )
|
|
306
|
+
email_address = find_id_path( email_links, /mailto:(.*@.*\..*)/ ).each do |email|
|
|
307
|
+
data.another( :emails, email )
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# Linkedin
|
|
311
|
+
|
|
312
|
+
linkedin_links = hrefs( matching_links( parsed, /linkedin.com/ ), true )
|
|
313
|
+
data.remember( :linkedin_links, linkedin_links )
|
|
314
|
+
|
|
315
|
+
# Instagram
|
|
316
|
+
|
|
317
|
+
instagram_links = hrefs( matching_links( parsed, /instagram.com/ ) )
|
|
318
|
+
data.remember( :instagram_links, instagram_links )
|
|
319
|
+
|
|
320
|
+
# Facebook
|
|
321
|
+
|
|
322
|
+
facebook_links = hrefs( matching_links( parsed, /facebook.com\/[^\/]*$/ ) )
|
|
323
|
+
data.remember( :facebook_links, facebook_links )
|
|
324
|
+
|
|
325
|
+
# Google plus
|
|
326
|
+
|
|
327
|
+
googleplus_links = hrefs( matching_links( parsed, /plus.google.com\/[^\/]*$/ ) )
|
|
328
|
+
data.remember( :googleplus_links, googleplus_links )
|
|
329
|
+
|
|
330
|
+
# Github
|
|
331
|
+
|
|
332
|
+
github_links = hrefs( matching_links( parsed, /github.com\/[^\/]*$/ ) )
|
|
333
|
+
data.remember( :github_links, github_links )
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
# Bonus!
|
|
337
|
+
|
|
338
|
+
# Get this file from https://github.com/ElbertF/Wappalyzer/tree/master/share
|
|
339
|
+
|
|
340
|
+
apps = Socialinvestigator::Config.config.apps_json
|
|
341
|
+
if apps
|
|
342
|
+
scripts = parsed.css( "script" ).collect { |x| x['src'] }.select { |x| x }
|
|
343
|
+
# puts scripts
|
|
344
|
+
|
|
345
|
+
apps['apps'].each do |app,checks|
|
|
346
|
+
if checks['html']
|
|
347
|
+
html_array = checks['html']
|
|
348
|
+
html_array = [checks['html']] if html_array.is_a? String
|
|
349
|
+
|
|
350
|
+
html_array.each do |html|
|
|
351
|
+
result = check_regex( html, response.body )
|
|
352
|
+
if result
|
|
353
|
+
data.another :technologies, app
|
|
354
|
+
data.another :technologies, checks['implies']
|
|
355
|
+
end
|
|
356
|
+
end
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
if checks['meta']
|
|
360
|
+
checks['meta'].each do |k,code|
|
|
361
|
+
result = check_regex( code, meta[k] )
|
|
362
|
+
if result
|
|
363
|
+
data.another :technologies, app
|
|
364
|
+
data.another :technologies, checks['implies']
|
|
365
|
+
end
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
if checks['headers']
|
|
370
|
+
checks['headers'].each do |k,code|
|
|
371
|
+
result = check_regex( code, response.headers[k] )
|
|
372
|
+
if result
|
|
373
|
+
data.another :technologies, app
|
|
374
|
+
data.another :technologies, checks['implies']
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
if checks['script']
|
|
380
|
+
script_array = checks['script']
|
|
381
|
+
script_array = [checks['script']] if script_array.is_a? String
|
|
382
|
+
script_array.each do |script_regex|
|
|
383
|
+
scripts.each do |script|
|
|
384
|
+
result = check_regex( script_regex, script)
|
|
385
|
+
if result
|
|
386
|
+
data.another :technologies, app
|
|
387
|
+
data.another :technologies, checks['implies']
|
|
388
|
+
end
|
|
389
|
+
end
|
|
390
|
+
end
|
|
391
|
+
end
|
|
392
|
+
end
|
|
393
|
+
end
|
|
394
|
+
data
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def matching_links( parsed, regex )
|
|
398
|
+
parsed.css( "a" ).collect do |x|
|
|
399
|
+
if regex.match( x['href'] )
|
|
400
|
+
x
|
|
401
|
+
else
|
|
402
|
+
nil
|
|
403
|
+
end
|
|
404
|
+
end.select do |x|
|
|
405
|
+
x
|
|
406
|
+
end
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
def hrefs( links, filter_shared = false )
|
|
410
|
+
links.collect do |x|
|
|
411
|
+
x['href']
|
|
412
|
+
end.select do |url|
|
|
413
|
+
if filter_shared
|
|
414
|
+
!(url =~ /share/)
|
|
415
|
+
else
|
|
416
|
+
true
|
|
417
|
+
end
|
|
418
|
+
end.uniq
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
def find_id_path( links, regex )
|
|
422
|
+
links.collect do |link|
|
|
423
|
+
if regex.match( link )
|
|
424
|
+
res = $1 || link
|
|
425
|
+
if (res =~ /share/)
|
|
426
|
+
nil
|
|
427
|
+
else
|
|
428
|
+
res
|
|
429
|
+
end
|
|
430
|
+
end
|
|
431
|
+
end.select do |x|
|
|
432
|
+
x
|
|
433
|
+
end.uniq
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
def check_regex( mashed_regex, value )
|
|
437
|
+
regex,result = mashed_regex.split( /\\;/ )
|
|
438
|
+
md = Regexp.new( regex ).match( value )
|
|
439
|
+
if md
|
|
440
|
+
if result
|
|
441
|
+
result = result.gsub( /\\1/, (md[1] || "" )).gsub( /\\2/, (md[2] || "") )
|
|
442
|
+
else
|
|
443
|
+
true
|
|
444
|
+
end
|
|
445
|
+
else
|
|
446
|
+
false
|
|
447
|
+
end
|
|
448
|
+
end
|
|
449
|
+
end
|
|
450
|
+
end
|
|
451
|
+
end
|
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'httparty'
|
|
3
|
+
require 'nokogiri'
|
|
4
|
+
require 'dnsruby'
|
|
5
|
+
require 'whois'
|
|
6
|
+
|
|
7
|
+
url = ARGV[0] || "http://www.fastcolabs.com/3038014/product-bootcamp-week-six-worth-it"
|
|
8
|
+
|
|
9
|
+
class PageKnowledge
|
|
10
|
+
DEBUG = false
|
|
11
|
+
TEMPLATE = "%20s: %s\n"
|
|
12
|
+
|
|
13
|
+
def initialize; @knowledge = {} end
|
|
14
|
+
|
|
15
|
+
def remember( key, value )
|
|
16
|
+
return if value.nil?
|
|
17
|
+
p key, value if DEBUG
|
|
18
|
+
|
|
19
|
+
@knowledge[key] = value
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def another( key, value )
|
|
23
|
+
return if value.nil?
|
|
24
|
+
p key, value if DEBUG
|
|
25
|
+
|
|
26
|
+
@knowledge[key] ||= []
|
|
27
|
+
@knowledge[key] << value
|
|
28
|
+
@knowledge[key] = @knowledge[key].uniq
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def print
|
|
32
|
+
p :domain
|
|
33
|
+
p :created_on
|
|
34
|
+
p :expires_on
|
|
35
|
+
p :updated_on
|
|
36
|
+
p :registrar_name
|
|
37
|
+
p :registrar_url
|
|
38
|
+
p :registrant_contact
|
|
39
|
+
p :admin_contact
|
|
40
|
+
p :technical_contact
|
|
41
|
+
p :emails
|
|
42
|
+
p :title, title
|
|
43
|
+
p :description, description
|
|
44
|
+
p :twitter_author, twitter_author
|
|
45
|
+
p :twitter_ids
|
|
46
|
+
p :image, image
|
|
47
|
+
p :responsive
|
|
48
|
+
p :rss_feed
|
|
49
|
+
p :atom_feed
|
|
50
|
+
|
|
51
|
+
p :twitter_links
|
|
52
|
+
p :linkedin_links
|
|
53
|
+
p :instagram_links
|
|
54
|
+
p :facebook_links
|
|
55
|
+
p :googleplus_links
|
|
56
|
+
p :github_links
|
|
57
|
+
|
|
58
|
+
# pp @knowledge
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def p( key, val = nil )
|
|
62
|
+
val = @knowledge[key] if val.nil?
|
|
63
|
+
if val.is_a?( Array )
|
|
64
|
+
printf TEMPLATE, key, val.join( ", ") if val.size > 0
|
|
65
|
+
elsif val.is_a?( Whois::Record::Contact )
|
|
66
|
+
printf TEMPLATE, key, ""
|
|
67
|
+
[:name, :organization, :address, :city, :zip, :state, :country, :country_code, :phone, :fax, :email, :url, :created_on, :updated_on].each do |key|
|
|
68
|
+
out = val.send( key )
|
|
69
|
+
printf "%25s: %s\n", key, out if out && out != ""
|
|
70
|
+
end
|
|
71
|
+
else
|
|
72
|
+
printf TEMPLATE, key, val if val
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def title
|
|
77
|
+
@knowledge[:twitter_title] || @knowledge[:og_title] || @knowledge[:page_title]
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def twitter_author
|
|
81
|
+
@knowledge[:twitter_creator] || @knowledge[:twitter_by] || @knowledge[:twitter_site_author] || (@knowledge[:twitter_ids] || []).first
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def description
|
|
85
|
+
@knowledge[:twitter_description] || @knowledge[:og_description] || @knowledge[:description]
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def image
|
|
89
|
+
@knowledge[:twitter_image] || @knowledge[:og_image]
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
data = PageKnowledge.new
|
|
94
|
+
|
|
95
|
+
uri = URI( url )
|
|
96
|
+
|
|
97
|
+
data.remember( :hostname, uri.hostname )
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# Look up the domain
|
|
102
|
+
|
|
103
|
+
def find_domain( hostname )
|
|
104
|
+
# puts "Looking for SOA of #{hostname}"
|
|
105
|
+
dns = Dnsruby::Resolver.new
|
|
106
|
+
soa = dns.query( hostname, "SOA" ).answer.select do |rr|
|
|
107
|
+
rr.is_a? Dnsruby::RR::IN::SOA
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
return hostname if soa.length > 0
|
|
111
|
+
|
|
112
|
+
parts = hostname.split( /\./ )
|
|
113
|
+
return nil if parts.length <= 2
|
|
114
|
+
|
|
115
|
+
find_domain( parts.slice(1,100).join( "." ) )
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
domain = find_domain(uri.hostname)
|
|
119
|
+
|
|
120
|
+
data.remember( :domain, domain )
|
|
121
|
+
|
|
122
|
+
# Look at the domain info
|
|
123
|
+
|
|
124
|
+
whois = Whois.lookup( domain )
|
|
125
|
+
|
|
126
|
+
data.remember( :registered?, whois.registered? )
|
|
127
|
+
if whois.registrar
|
|
128
|
+
data.remember( :registrar_name, whois.registrar.name )
|
|
129
|
+
data.remember( :registrar_url, whois.registrar.url )
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
data.remember( :created_on, whois.created_on.strftime( "%Y-%m-%d") ) if whois.created_on
|
|
133
|
+
data.remember( :expires_on, whois.expires_on.strftime( "%Y-%m-%d") ) if whois.expires_on
|
|
134
|
+
data.remember( :updated_on, whois.updated_on.strftime( "%Y-%m-%d") ) if whois.updated_on
|
|
135
|
+
|
|
136
|
+
whois.contacts.each do |c|
|
|
137
|
+
data.another( :emails, c.email.downcase ) if c.email
|
|
138
|
+
case c.type
|
|
139
|
+
when Whois::Record::Contact::TYPE_REGISTRANT
|
|
140
|
+
data.remember( :registrant_contact, c )
|
|
141
|
+
when Whois::Record::Contact::TYPE_ADMINISTRATIVE
|
|
142
|
+
data.remember( :admin_contact, c )
|
|
143
|
+
when Whois::Record::Contact::TYPE_TECHNICAL
|
|
144
|
+
data.remember( :technical_contact, c )
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
# [
|
|
148
|
+
# :name,:organization,:address,:city,
|
|
149
|
+
# :zip,:state,:country,:country_code,
|
|
150
|
+
# :phone,:fax,:email,:url].each do |k|
|
|
151
|
+
# val = c.send(k)
|
|
152
|
+
# printf "%15s : %s\n", k.to_s, val if !val.nil?
|
|
153
|
+
# end
|
|
154
|
+
# end
|
|
155
|
+
|
|
156
|
+
require 'whois/record/parser/blank'
|
|
157
|
+
whois.parts.each do |p|
|
|
158
|
+
if Whois::Record::Parser.parser_for(p).is_a? Whois::Record::Parser::Blank
|
|
159
|
+
puts "Couldn't find a parser for #{p.host}:"
|
|
160
|
+
data.another( :unparsed_whois, p.body )
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# Load up the response
|
|
168
|
+
|
|
169
|
+
# client = HTTPClient.new
|
|
170
|
+
# client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
171
|
+
# response = client.get( url )
|
|
172
|
+
# # @ssl = p.peer_cert
|
|
173
|
+
|
|
174
|
+
response = HTTParty.get url
|
|
175
|
+
|
|
176
|
+
# require 'pp'
|
|
177
|
+
# pp response.headers
|
|
178
|
+
|
|
179
|
+
data.remember( :server, response.headers['server'] )
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
# Parse the HTML
|
|
183
|
+
|
|
184
|
+
parsed = Nokogiri.parse response.body
|
|
185
|
+
|
|
186
|
+
data.remember( :page_title, parsed.title )
|
|
187
|
+
|
|
188
|
+
# RSS Feed:
|
|
189
|
+
if feed = parsed.css( 'link[type="application/rss+xml"]' ).first
|
|
190
|
+
feed = feed.attributes['href'].value
|
|
191
|
+
data.remember( :rss_feed, feed )
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Atom Feed:
|
|
195
|
+
if feed = parsed.css( 'link[type="application/atom+xml"]' ).first
|
|
196
|
+
feed = feed.attributes['href'].value
|
|
197
|
+
data.remember( :atom_feed, feed )
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# Meta tags
|
|
203
|
+
|
|
204
|
+
meta = {}
|
|
205
|
+
parsed.css( "meta[name]" ).each do |t|
|
|
206
|
+
meta[t.attributes["name"].value] = t.attributes["content"].value if t.attributes["content"]
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
parsed.css( "meta[property]" ).each do |t|
|
|
210
|
+
meta[t.attributes["property"].value] = t.attributes["content"].value
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# require 'pp'
|
|
214
|
+
# pp meta
|
|
215
|
+
|
|
216
|
+
data.remember( :author, meta['author'] )
|
|
217
|
+
data.remember( :description, meta['description'] )
|
|
218
|
+
data.remember( :keywords, meta['keywords'] )
|
|
219
|
+
data.remember( :generator, meta['generator'])
|
|
220
|
+
|
|
221
|
+
data.remember( :responsive, true ) if meta["viewport"] =~ /width=device-width/
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
# Check Twitter Card:
|
|
225
|
+
|
|
226
|
+
data.remember( :twitter_title, meta["twitter:title"] )
|
|
227
|
+
data.remember( :twitter_creator, meta["twitter:creator"] )
|
|
228
|
+
if /@(.*)/.match( meta["twitter:creator"] )
|
|
229
|
+
data.another( :twitter_ids, $1 )
|
|
230
|
+
end
|
|
231
|
+
data.remember( :twitter_site_author, meta["twitter:site"] )
|
|
232
|
+
if /@(.*)/.match( meta["twitter:site"] )
|
|
233
|
+
data.another( :twitter_ids, $1 )
|
|
234
|
+
end
|
|
235
|
+
data.remember( :twitter_image, meta["twitter:image"] )
|
|
236
|
+
data.remember( :twitter_description, meta["twitter:description"] )
|
|
237
|
+
|
|
238
|
+
# Open Graph
|
|
239
|
+
|
|
240
|
+
data.remember( :og_title, meta["og:title"] )
|
|
241
|
+
data.remember( :og_description, meta["og:description"] )
|
|
242
|
+
data.remember( :og_type, meta["og:type"] )
|
|
243
|
+
data.remember( :og_image, meta["og:image"] )
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
# Look inside the body:
|
|
247
|
+
|
|
248
|
+
def matching_links( parsed, regex )
|
|
249
|
+
parsed.css( "a" ).collect do |x|
|
|
250
|
+
if regex.match( x['href'] )
|
|
251
|
+
x
|
|
252
|
+
else
|
|
253
|
+
nil
|
|
254
|
+
end
|
|
255
|
+
end.select do |x|
|
|
256
|
+
x
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def hrefs( links, filter_shared = false )
|
|
261
|
+
links.collect do |x|
|
|
262
|
+
x['href']
|
|
263
|
+
end.select do |url|
|
|
264
|
+
if filter_shared
|
|
265
|
+
!(url =~ /share/)
|
|
266
|
+
else
|
|
267
|
+
true
|
|
268
|
+
end
|
|
269
|
+
end.uniq
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def find_id_path( links, regex )
|
|
273
|
+
links.collect do |link|
|
|
274
|
+
if regex.match( link )
|
|
275
|
+
res = $1 || link
|
|
276
|
+
if (res =~ /share/)
|
|
277
|
+
nil
|
|
278
|
+
else
|
|
279
|
+
res
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
end.select do |x|
|
|
283
|
+
x
|
|
284
|
+
end.uniq
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# Twitter
|
|
288
|
+
|
|
289
|
+
# Look for twitter links
|
|
290
|
+
twitter_links = hrefs( matching_links( parsed, /twitter.com\/[^\/]*$/ ), true )
|
|
291
|
+
data.remember( :twitter_links, twitter_links )
|
|
292
|
+
|
|
293
|
+
twitter_ids = find_id_path( twitter_links, /twitter.com\/([^\/]*$)/ ).each do |id|
|
|
294
|
+
data.another( :twitter_ids, id )
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# Look for twitter shared links
|
|
298
|
+
|
|
299
|
+
twitter_shared = matching_links( parsed, /twitter.com\/share/ )
|
|
300
|
+
|
|
301
|
+
twitter_shared.each do |l|
|
|
302
|
+
text = l['data-text']
|
|
303
|
+
|
|
304
|
+
# See if there's a "by @user" in the text
|
|
305
|
+
if /by\s*@([^\s]*)/.match text
|
|
306
|
+
data.another( :twitter_ids, $1 )
|
|
307
|
+
data.remember( :twitter_by, $1 )
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# Look for all "@usernames" in the text
|
|
311
|
+
if text
|
|
312
|
+
text.split.select { |x| x =~ /@\s*/ }.each do |id|
|
|
313
|
+
data.another( :twitter_ids, id.slice( 1,100 ) ) # We don't want the @
|
|
314
|
+
end
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# See if there's a via link on the anchor tag
|
|
318
|
+
if l['data-via']
|
|
319
|
+
data.another( :twitter_ids, l['data-via'])
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
possible_via = URI.decode( (URI(l['href']).query) || "" ).split( /&/ ).collect { |x| x.split( /=/ ) }.select { |x| x[0] == 'via' }
|
|
324
|
+
if possible_via.size > 0
|
|
325
|
+
data.another( :twitter_ids, possible_via[0][1] )
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# Look for intent
|
|
330
|
+
|
|
331
|
+
twitter_intent = hrefs( matching_links( parsed, /twitter.com\/intent/ ) )
|
|
332
|
+
|
|
333
|
+
twitter_intent.each do |t|
|
|
334
|
+
URI.decode( URI(t.gsub( / /, "+" )).query ).split( /&/ ).select do |x|
|
|
335
|
+
x =~ /via/
|
|
336
|
+
end.collect do |x|
|
|
337
|
+
x.gsub( /via=/, "" )
|
|
338
|
+
end.each do |via|
|
|
339
|
+
data.another( :twitter_ids, via )
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
# Look for email
|
|
343
|
+
|
|
344
|
+
email_links = hrefs( matching_links( parsed, /mailto:/ ) )
|
|
345
|
+
email_address = find_id_path( email_links, /mailto:(.*@.*\..*)/ ).each do |email|
|
|
346
|
+
data.another( :emails, email )
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
# Linkedin
|
|
350
|
+
|
|
351
|
+
linkedin_links = hrefs( matching_links( parsed, /linkedin.com/ ), true )
|
|
352
|
+
data.remember( :linkedin_links, linkedin_links )
|
|
353
|
+
|
|
354
|
+
# Instagram
|
|
355
|
+
|
|
356
|
+
instagram_links = hrefs( matching_links( parsed, /instagram.com/ ) )
|
|
357
|
+
data.remember( :instagram_links, instagram_links )
|
|
358
|
+
|
|
359
|
+
# Facebook
|
|
360
|
+
|
|
361
|
+
facebook_links = hrefs( matching_links( parsed, /facebook.com\/[^\/]*$/ ) )
|
|
362
|
+
data.remember( :facebook_links, facebook_links )
|
|
363
|
+
|
|
364
|
+
# Google plus
|
|
365
|
+
|
|
366
|
+
googleplus_links = hrefs( matching_links( parsed, /plus.google.com\/[^\/]*$/ ) )
|
|
367
|
+
data.remember( :googleplus_links, googleplus_links )
|
|
368
|
+
|
|
369
|
+
# Github
|
|
370
|
+
|
|
371
|
+
github_links = hrefs( matching_links( parsed, /github.com\/[^\/]*$/ ) )
|
|
372
|
+
data.remember( :github_links, github_links )
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
puts
|
|
376
|
+
puts "This is what we've figured out:"
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
data.print
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
# Bonus!
|
|
383
|
+
|
|
384
|
+
# Get this file from https://github.com/ElbertF/Wappalyzer/tree/master/share
|
|
385
|
+
if File.exists? "apps.json"
|
|
386
|
+
apps_json = File.read( "apps.json" )
|
|
387
|
+
technologies = []
|
|
388
|
+
|
|
389
|
+
apps = JSON.parse( apps_json )
|
|
390
|
+
|
|
391
|
+
def check_regex( mashed_regex, value )
|
|
392
|
+
regex,result = mashed_regex.split( /\\;/ )
|
|
393
|
+
md = Regexp.new( regex ).match( value )
|
|
394
|
+
if md
|
|
395
|
+
if result
|
|
396
|
+
result = result.gsub( /\\1/, (md[1] || "" )).gsub( /\\2/, (md[2] || "") )
|
|
397
|
+
else
|
|
398
|
+
true
|
|
399
|
+
end
|
|
400
|
+
else
|
|
401
|
+
false
|
|
402
|
+
end
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
scripts = parsed.css( "script" ).collect { |x| x['src'] }.select { |x| x }
|
|
406
|
+
# puts scripts
|
|
407
|
+
|
|
408
|
+
apps['apps'].each do |app,checks|
|
|
409
|
+
if checks['html']
|
|
410
|
+
html_array = checks['html']
|
|
411
|
+
html_array = [checks['html']] if html_array.is_a? String
|
|
412
|
+
|
|
413
|
+
html_array.each do |html|
|
|
414
|
+
result = check_regex( html, response.body )
|
|
415
|
+
if result
|
|
416
|
+
technologies << app
|
|
417
|
+
technologies << checks['implies'] if checks['implies']
|
|
418
|
+
end
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
if checks['meta']
|
|
423
|
+
checks['meta'].each do |k,code|
|
|
424
|
+
result = check_regex( code, meta[k] )
|
|
425
|
+
if result
|
|
426
|
+
technologies << app
|
|
427
|
+
technologies << checks['implies'] if checks['implies']
|
|
428
|
+
end
|
|
429
|
+
end
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
if checks['headers']
|
|
433
|
+
checks['headers'].each do |k,code|
|
|
434
|
+
result = check_regex( code, response.headers[k] )
|
|
435
|
+
if result
|
|
436
|
+
technologies << app
|
|
437
|
+
technologies << checks['implies'] if checks['implies']
|
|
438
|
+
end
|
|
439
|
+
end
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
if checks['script']
|
|
443
|
+
script_array = checks['script']
|
|
444
|
+
script_array = [checks['script']] if script_array.is_a? String
|
|
445
|
+
script_array.each do |script_regex|
|
|
446
|
+
scripts.each do |script|
|
|
447
|
+
result = check_regex( script_regex, script)
|
|
448
|
+
if result
|
|
449
|
+
technologies << app
|
|
450
|
+
technologies << checks['implies'] if checks['implies']
|
|
451
|
+
end
|
|
452
|
+
end
|
|
453
|
+
end
|
|
454
|
+
end
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
printf "%20s: %s\n", "technologies", technologies.sort.uniq.join( ", ")
|
|
458
|
+
end
|
|
@@ -27,6 +27,16 @@ module Socialinvestigator
|
|
|
27
27
|
save_yaml( "twitter.yml", config )
|
|
28
28
|
end
|
|
29
29
|
|
|
30
|
+
def apps_json
|
|
31
|
+
read_json( "apps.json" )
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def apps_json=( data )
|
|
35
|
+
File.open( "#{@dir}/apps.json", "w" ) do |out|
|
|
36
|
+
out << data
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
30
40
|
def read_yaml( name )
|
|
31
41
|
file = "#{@dir}/#{name}"
|
|
32
42
|
|
|
@@ -42,6 +52,16 @@ module Socialinvestigator
|
|
|
42
52
|
out.write obj.to_yaml
|
|
43
53
|
end
|
|
44
54
|
end
|
|
55
|
+
|
|
56
|
+
def read_json( name )
|
|
57
|
+
file = "#{@dir}/#{name}"
|
|
58
|
+
|
|
59
|
+
if File.exists? file
|
|
60
|
+
return JSON.parse( File.read( file ) )
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
nil
|
|
64
|
+
end
|
|
45
65
|
end
|
|
46
66
|
end
|
|
47
67
|
end
|
data/socialinvestigator.gemspec
CHANGED
|
@@ -21,6 +21,9 @@ Gem::Specification.new do |spec|
|
|
|
21
21
|
spec.add_dependency 'thor'
|
|
22
22
|
spec.add_dependency 'httparty'
|
|
23
23
|
spec.add_dependency 'twitter'
|
|
24
|
+
spec.add_dependency 'nokogiri'
|
|
25
|
+
spec.add_dependency 'whois'
|
|
26
|
+
spec.add_dependency 'dnsruby'
|
|
24
27
|
|
|
25
28
|
spec.add_development_dependency "bundler", "~> 1.6"
|
|
26
29
|
spec.add_development_dependency "rake"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: socialinvestigator
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Will Schenk
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2014-11-
|
|
11
|
+
date: 2014-11-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: thor
|
|
@@ -52,6 +52,48 @@ dependencies:
|
|
|
52
52
|
- - '>='
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
54
|
version: '0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: nokogiri
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - '>='
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0'
|
|
62
|
+
type: :runtime
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - '>='
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: whois
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - '>='
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '0'
|
|
76
|
+
type: :runtime
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - '>='
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '0'
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
84
|
+
name: dnsruby
|
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - '>='
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: '0'
|
|
90
|
+
type: :runtime
|
|
91
|
+
prerelease: false
|
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - '>='
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: '0'
|
|
55
97
|
- !ruby/object:Gem::Dependency
|
|
56
98
|
name: bundler
|
|
57
99
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -97,8 +139,11 @@ files:
|
|
|
97
139
|
- lib/socialinvestigator.rb
|
|
98
140
|
- lib/socialinvestigator/cli.rb
|
|
99
141
|
- lib/socialinvestigator/cli/hn.rb
|
|
142
|
+
- lib/socialinvestigator/cli/net.rb
|
|
100
143
|
- lib/socialinvestigator/cli/twitter.rb
|
|
101
144
|
- lib/socialinvestigator/client/hn.rb
|
|
145
|
+
- lib/socialinvestigator/client/net.rb
|
|
146
|
+
- lib/socialinvestigator/client/standalone_net.rb
|
|
102
147
|
- lib/socialinvestigator/client/twitter.rb
|
|
103
148
|
- lib/socialinvestigator/config.rb
|
|
104
149
|
- lib/socialinvestigator/version.rb
|