dap 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +55 -0
- data/LICENSE +20 -0
- data/README.md +15 -0
- data/bin/dap +137 -0
- data/dap.gemspec +42 -0
- data/data/.gitkeep +0 -0
- data/lib/dap.rb +101 -0
- data/lib/dap/filter.rb +8 -0
- data/lib/dap/filter/base.rb +37 -0
- data/lib/dap/filter/geoip.rb +72 -0
- data/lib/dap/filter/http.rb +173 -0
- data/lib/dap/filter/names.rb +151 -0
- data/lib/dap/filter/openssl.rb +53 -0
- data/lib/dap/filter/recog.rb +23 -0
- data/lib/dap/filter/simple.rb +340 -0
- data/lib/dap/filter/udp.rb +401 -0
- data/lib/dap/input.rb +74 -0
- data/lib/dap/input/csv.rb +60 -0
- data/lib/dap/input/warc.rb +81 -0
- data/lib/dap/output.rb +117 -0
- data/lib/dap/proto/addp.rb +0 -0
- data/lib/dap/proto/dtls.rb +21 -0
- data/lib/dap/proto/ipmi.rb +94 -0
- data/lib/dap/proto/natpmp.rb +19 -0
- data/lib/dap/proto/wdbrpc.rb +58 -0
- data/lib/dap/utils/oui.rb +16586 -0
- data/lib/dap/version.rb +3 -0
- data/samples/http_get_reply.ic12.bz2 +0 -0
- data/samples/http_get_reply.ic12.sh +1 -0
- data/samples/http_get_reply_iframes.json.bz2 +0 -0
- data/samples/http_get_reply_iframes.json.sh +1 -0
- data/samples/http_get_reply_links.json.sh +1 -0
- data/samples/iawide.warc.bz2 +0 -0
- data/samples/iawide_warc.sh +1 -0
- data/samples/ipmi_chan_auth_replies.crd.bz2 +0 -0
- data/samples/ipmi_chan_auth_replies.sh +1 -0
- data/samples/ssl_certs.bz2 +0 -0
- data/samples/ssl_certs_geo.sh +1 -0
- data/samples/ssl_certs_names.sh +1 -0
- data/samples/ssl_certs_names_expanded.sh +1 -0
- data/samples/ssl_certs_org.sh +1 -0
- data/samples/udp-netbios.csv.bz2 +0 -0
- data/samples/udp-netbios.sh +1 -0
- data/spec/dap/proto/ipmi_spec.rb +19 -0
- data/tools/geo-ip-summary.rb +149 -0
- data/tools/ipmi-vulns.rb +27 -0
- data/tools/json-summarize.rb +81 -0
- data/tools/netbios-counts.rb +271 -0
- data/tools/upnp-vulns.rb +35 -0
- data/tools/value-counts-to-md-table.rb +23 -0
- metadata +264 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
gem 'nokogiri'
|
4
|
+
gem 'oj'
|
5
|
+
gem 'htmlentities'
|
6
|
+
gem 'net-dns'
|
7
|
+
gem 'bit-struct'
|
8
|
+
gem 'geoip-c'
|
9
|
+
gem 'recog'
|
10
|
+
|
11
|
+
group :test do
|
12
|
+
gem 'rspec', '~> 2.14.1'
|
13
|
+
gem 'cucumber', '~> 1.3.8'
|
14
|
+
gem 'aruba', '~> 0.5.3'
|
15
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
aruba (0.5.4)
|
5
|
+
childprocess (>= 0.3.6)
|
6
|
+
cucumber (>= 1.1.1)
|
7
|
+
rspec-expectations (>= 2.7.0)
|
8
|
+
bit-struct (0.15.0)
|
9
|
+
builder (3.2.2)
|
10
|
+
childprocess (0.5.3)
|
11
|
+
ffi (~> 1.0, >= 1.0.11)
|
12
|
+
cucumber (1.3.15)
|
13
|
+
builder (>= 2.1.2)
|
14
|
+
diff-lcs (>= 1.1.3)
|
15
|
+
gherkin (~> 2.12)
|
16
|
+
multi_json (>= 1.7.5, < 2.0)
|
17
|
+
multi_test (>= 0.1.1)
|
18
|
+
diff-lcs (1.2.5)
|
19
|
+
ffi (1.9.3)
|
20
|
+
geoip-c (0.9.1)
|
21
|
+
gherkin (2.12.2)
|
22
|
+
multi_json (~> 1.3)
|
23
|
+
htmlentities (4.3.1)
|
24
|
+
mini_portile (0.6.0)
|
25
|
+
multi_json (1.10.0)
|
26
|
+
multi_test (0.1.1)
|
27
|
+
net-dns (0.8.0)
|
28
|
+
nokogiri (1.6.2.1)
|
29
|
+
mini_portile (= 0.6.0)
|
30
|
+
oj (2.9.0)
|
31
|
+
recog (0.01)
|
32
|
+
nokogiri
|
33
|
+
rspec (2.14.1)
|
34
|
+
rspec-core (~> 2.14.0)
|
35
|
+
rspec-expectations (~> 2.14.0)
|
36
|
+
rspec-mocks (~> 2.14.0)
|
37
|
+
rspec-core (2.14.8)
|
38
|
+
rspec-expectations (2.14.5)
|
39
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
40
|
+
rspec-mocks (2.14.6)
|
41
|
+
|
42
|
+
PLATFORMS
|
43
|
+
ruby
|
44
|
+
|
45
|
+
DEPENDENCIES
|
46
|
+
aruba (~> 0.5.3)
|
47
|
+
bit-struct
|
48
|
+
cucumber (~> 1.3.8)
|
49
|
+
geoip-c
|
50
|
+
htmlentities
|
51
|
+
net-dns
|
52
|
+
nokogiri
|
53
|
+
oj
|
54
|
+
recog
|
55
|
+
rspec (~> 2.14.1)
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2013 Rapid7
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# DAP: The Data Analysis Pipeline
|
2
|
+
|
3
|
+
DAP was created to transform text-based data on the command-line, specializing in transforms that are annoying or difficult to do with existing tools.
|
4
|
+
|
5
|
+
DAP reads data using an input plugin, transforms it through a series of filters, and prints it out again using an output plugin. Every record is treated as a document (aka: hash/dict) and filters are used to reduce, expand, and transform these documents as they pass through. Think of DAP as a mashup between sed, awk, grep, csvtool, and jq, with map/reduce capabilities.
|
6
|
+
|
7
|
+
DAP was written to process terabyte-sized public scan datasets, such as those provided by https://scans.io/. Although DAP isn't particularly fast, it can be used across multiple cores (and machines) by splitting the input source and wrapping the execution with GNU Parallel.
|
8
|
+
|
9
|
+
## Prerequisites
|
10
|
+
|
11
|
+
DAP depends on GeoIP (http://dev.maxmind.com/geoip/legacy/downloadable/) to be able to append geographic metadata to analyzed datasets. At least on Ubuntu, the libgeoip-dev package provides this capability.
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
See [tree/master/samples](/tree/master/samples)
|
data/bin/dap
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'bundler/setup'
|
7
|
+
require 'shellwords'
|
8
|
+
require 'dap'
|
9
|
+
|
10
|
+
def version
|
11
|
+
$stderr.puts "dap #{Dap::VERSION}"
|
12
|
+
exit(0)
|
13
|
+
end
|
14
|
+
|
15
|
+
def usage
|
16
|
+
$stderr.puts ""
|
17
|
+
$stderr.puts " Usage: #{$0} [input] + [filter] + [output]"
|
18
|
+
$stderr.puts " --inputs"
|
19
|
+
$stderr.puts " --outputs"
|
20
|
+
$stderr.puts " --filters"
|
21
|
+
$stderr.puts ""
|
22
|
+
$stderr.puts "Example: echo world | #{$0} lines stdin + rename line=hello + json stdout"
|
23
|
+
$stderr.puts ""
|
24
|
+
exit(1)
|
25
|
+
end
|
26
|
+
|
27
|
+
def show_inputs
|
28
|
+
$stderr.puts "Inputs:"
|
29
|
+
Dap::Factory.inputs.each_pair do |k,v|
|
30
|
+
$stderr.puts " * #{k}"
|
31
|
+
end
|
32
|
+
$stderr.puts
|
33
|
+
exit(1)
|
34
|
+
end
|
35
|
+
|
36
|
+
def show_outputs
|
37
|
+
$stderr.puts "Outputs:"
|
38
|
+
Dap::Factory.outputs.each_pair do |k,v|
|
39
|
+
$stderr.puts " * #{k}"
|
40
|
+
end
|
41
|
+
$stderr.puts
|
42
|
+
exit(1)
|
43
|
+
end
|
44
|
+
|
45
|
+
def show_filters
|
46
|
+
$stderr.puts "Filters:"
|
47
|
+
Dap::Factory.filters.each_pair do |k,v|
|
48
|
+
$stderr.puts " * #{k}"
|
49
|
+
end
|
50
|
+
$stderr.puts
|
51
|
+
exit(1)
|
52
|
+
end
|
53
|
+
|
54
|
+
trace = false
|
55
|
+
args = []
|
56
|
+
|
57
|
+
#
|
58
|
+
# Tokenize on + then treat each stage as a separate name + argument list
|
59
|
+
#
|
60
|
+
ARGV.join(' ').split(/\s*\+\s*/).each do |bit|
|
61
|
+
|
62
|
+
# Handle quoted arguments as needed
|
63
|
+
# XXX: Doesn't work as expected since ARGV parsing gobbles them up
|
64
|
+
aset = Shellwords.shellwords(bit)
|
65
|
+
|
66
|
+
# Check the first argument for help or usage flags
|
67
|
+
arg = aset.first
|
68
|
+
|
69
|
+
if arg == "--trace"
|
70
|
+
trace = true
|
71
|
+
arg = aset.shift
|
72
|
+
end
|
73
|
+
|
74
|
+
if arg == "-h" or arg == "--help"
|
75
|
+
usage
|
76
|
+
end
|
77
|
+
|
78
|
+
if arg == "--version" or arg == "-v"
|
79
|
+
version
|
80
|
+
end
|
81
|
+
|
82
|
+
if arg == "--inputs"
|
83
|
+
show_inputs
|
84
|
+
end
|
85
|
+
|
86
|
+
if arg == "--outputs"
|
87
|
+
show_outputs
|
88
|
+
end
|
89
|
+
|
90
|
+
if arg == "--filters"
|
91
|
+
show_filters
|
92
|
+
end
|
93
|
+
|
94
|
+
args << aset if aset.length > 0
|
95
|
+
end
|
96
|
+
|
97
|
+
inp_args = args.shift
|
98
|
+
out_args = args.pop
|
99
|
+
|
100
|
+
usage if (inp_args == nil or out_args == nil)
|
101
|
+
|
102
|
+
filters = []
|
103
|
+
|
104
|
+
inp = Dap::Factory.create_input(inp_args)
|
105
|
+
out = Dap::Factory.create_output(out_args)
|
106
|
+
args.each do |a|
|
107
|
+
filters << Dap::Factory.create_filter(a)
|
108
|
+
end
|
109
|
+
|
110
|
+
out.start
|
111
|
+
|
112
|
+
while true
|
113
|
+
data = inp.read_record
|
114
|
+
break if data == Dap::Input::Error::EOF
|
115
|
+
next if data == Dap::Input::Error::Empty
|
116
|
+
|
117
|
+
docs = [ data ]
|
118
|
+
|
119
|
+
fcount = 1
|
120
|
+
filters.each do |f|
|
121
|
+
$stderr.puts "T: #{" " * (fcount * 2)}#{f.name} -> #{docs.inspect} " if trace
|
122
|
+
docs = docs.collect {|doc| f.process(doc) }.flatten
|
123
|
+
$stderr.puts "T: #{" " * (fcount * 2)}#{" " * f.name.length} == #{docs.inspect}" if trace
|
124
|
+
fcount += 1
|
125
|
+
break if docs.length == 0
|
126
|
+
end
|
127
|
+
|
128
|
+
begin
|
129
|
+
docs.each do |doc|
|
130
|
+
out.write_record(doc)
|
131
|
+
end
|
132
|
+
rescue ::Errno::EPIPE
|
133
|
+
break
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
out.stop
|
data/dap.gemspec
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$LOAD_PATH.push File.expand_path('../lib', __FILE__)
|
3
|
+
require 'dap/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'dap'
|
7
|
+
s.version = Dap::VERSION
|
8
|
+
s.authors = [
|
9
|
+
'Rapid7 Research'
|
10
|
+
]
|
11
|
+
s.email = [
|
12
|
+
'research@rapid7.com'
|
13
|
+
]
|
14
|
+
s.homepage = "https://www.github.com/rapid7/dap"
|
15
|
+
s.summary = %q{DAP: The Data Analysis Pipeline}
|
16
|
+
s.description = %q{
|
17
|
+
DAP reads data using an input plugin, transforms it through a series of filters, and prints it out again
|
18
|
+
using an output plugin. Every record is treated as a document (aka: hash/dict) and filters are used to
|
19
|
+
reduce, expand, and transform these documents as they pass through. Think of DAP as a mashup between
|
20
|
+
sed, awk, grep, csvtool, and jq, with map/reduce capabilities.
|
21
|
+
}.gsub(/\s+/, ' ').strip
|
22
|
+
|
23
|
+
s.files = `git ls-files`.split("\n")
|
24
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
25
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
26
|
+
s.require_paths = ['lib']
|
27
|
+
|
28
|
+
# ---- Dependencies ----
|
29
|
+
|
30
|
+
s.add_development_dependency 'rspec'
|
31
|
+
s.add_development_dependency 'cucumber'
|
32
|
+
s.add_development_dependency 'aruba'
|
33
|
+
|
34
|
+
s.add_runtime_dependency 'nokogiri'
|
35
|
+
s.add_runtime_dependency 'oj'
|
36
|
+
s.add_runtime_dependency 'htmlentities'
|
37
|
+
s.add_runtime_dependency 'net-dns'
|
38
|
+
s.add_runtime_dependency 'bit-struct'
|
39
|
+
s.add_runtime_dependency 'geoip-c'
|
40
|
+
s.add_runtime_dependency 'recog'
|
41
|
+
|
42
|
+
end
|
data/data/.gitkeep
ADDED
File without changes
|
data/lib/dap.rb
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
module Dap
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
|
5
|
+
require 'dap/version'
|
6
|
+
require 'dap/input'
|
7
|
+
require 'dap/output'
|
8
|
+
require 'dap/filter'
|
9
|
+
|
10
|
+
class Factory
|
11
|
+
|
12
|
+
@@inputs = {}
|
13
|
+
@@outputs = {}
|
14
|
+
@@filters = {}
|
15
|
+
|
16
|
+
def self.create_input(args)
|
17
|
+
name = args.shift
|
18
|
+
raise RuntimeError, "Invalid input plugin: #{name}" unless @@inputs[name]
|
19
|
+
@@inputs[name].new(args)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.create_output(args)
|
23
|
+
name = args.shift
|
24
|
+
raise RuntimeError, "Invalid output plugin: #{name}" unless @@outputs[name]
|
25
|
+
@@outputs[name].new(args)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.create_filter(args)
|
29
|
+
name = args.shift
|
30
|
+
raise RuntimeError, "Invalid filter plugin: #{name}" unless @@filters[name]
|
31
|
+
@@filters[name].new(args)
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# Create nice-looking filter names from classes
|
36
|
+
# Ex: FilterHTTPDecode => http_decode
|
37
|
+
# Ex: FilterLimitLen => limit_len
|
38
|
+
#
|
39
|
+
def self.name_from_class(name)
|
40
|
+
name.to_s.split('::').last.
|
41
|
+
gsub(/([A-Z][a-z])/) { |c| "_#{c[0,1].downcase}#{c[1,1]}" }.
|
42
|
+
gsub(/([a-z][A-Z])/) { |c| "#{c[0,1]}_#{c[1,1].downcase}" }.
|
43
|
+
gsub(/_+/, '_').
|
44
|
+
sub(/^_(input|filter|output)_/, '').downcase
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# Load input formats
|
49
|
+
#
|
50
|
+
def self.load_inputs
|
51
|
+
Dap::Input.constants.each do |c|
|
52
|
+
next unless c.to_s =~ /^Input/
|
53
|
+
o = Dap::Input.const_get(c)
|
54
|
+
@@inputs[ name_from_class(c) ] = o
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
#
|
59
|
+
# Load output formats
|
60
|
+
#
|
61
|
+
def self.load_outputs
|
62
|
+
Dap::Output.constants.each do |c|
|
63
|
+
o = Dap::Output.const_get(c)
|
64
|
+
next unless c.to_s =~ /^Output/
|
65
|
+
@@outputs[ name_from_class(c) ] = o
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
#
|
70
|
+
# Load filters
|
71
|
+
#
|
72
|
+
def self.load_filters
|
73
|
+
Dap::Filter.constants.each do |c|
|
74
|
+
o = Dap::Filter.const_get(c)
|
75
|
+
next unless c.to_s =~ /^Filter/
|
76
|
+
@@filters[ name_from_class(c) ] = o
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.inputs
|
81
|
+
@@inputs
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.outputs
|
85
|
+
@@outputs
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.filters
|
89
|
+
@@filters
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.load_modules
|
93
|
+
self.load_inputs
|
94
|
+
self.load_outputs
|
95
|
+
self.load_filters
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
Factory.load_modules
|
100
|
+
|
101
|
+
end
|
data/lib/dap/filter.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
module Dap
|
2
|
+
module Filter
|
3
|
+
|
4
|
+
module Base
|
5
|
+
attr_accessor :name, :opts
|
6
|
+
|
7
|
+
def initialize(args)
|
8
|
+
self.opts = {}
|
9
|
+
args.each do |arg|
|
10
|
+
k,v = arg.split("=", 2)
|
11
|
+
self.opts[k] = v
|
12
|
+
end
|
13
|
+
self.name = Dap::Factory.name_from_class(self.class)
|
14
|
+
end
|
15
|
+
|
16
|
+
def process(doc)
|
17
|
+
raise RuntimeError, "No process() method defined for filter #{self.name}"
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
module BaseDecoder
|
23
|
+
include Base
|
24
|
+
def process(doc)
|
25
|
+
self.opts.each_pair do |k,v|
|
26
|
+
next unless doc.has_key?(k)
|
27
|
+
info = decode(doc[k]) || {}
|
28
|
+
info.each_pair do |x,y|
|
29
|
+
doc[ "#{k}.#{x}" ] = y
|
30
|
+
end
|
31
|
+
end
|
32
|
+
[ doc ]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|