dap 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +55 -0
- data/LICENSE +20 -0
- data/README.md +15 -0
- data/bin/dap +137 -0
- data/dap.gemspec +42 -0
- data/data/.gitkeep +0 -0
- data/lib/dap.rb +101 -0
- data/lib/dap/filter.rb +8 -0
- data/lib/dap/filter/base.rb +37 -0
- data/lib/dap/filter/geoip.rb +72 -0
- data/lib/dap/filter/http.rb +173 -0
- data/lib/dap/filter/names.rb +151 -0
- data/lib/dap/filter/openssl.rb +53 -0
- data/lib/dap/filter/recog.rb +23 -0
- data/lib/dap/filter/simple.rb +340 -0
- data/lib/dap/filter/udp.rb +401 -0
- data/lib/dap/input.rb +74 -0
- data/lib/dap/input/csv.rb +60 -0
- data/lib/dap/input/warc.rb +81 -0
- data/lib/dap/output.rb +117 -0
- data/lib/dap/proto/addp.rb +0 -0
- data/lib/dap/proto/dtls.rb +21 -0
- data/lib/dap/proto/ipmi.rb +94 -0
- data/lib/dap/proto/natpmp.rb +19 -0
- data/lib/dap/proto/wdbrpc.rb +58 -0
- data/lib/dap/utils/oui.rb +16586 -0
- data/lib/dap/version.rb +3 -0
- data/samples/http_get_reply.ic12.bz2 +0 -0
- data/samples/http_get_reply.ic12.sh +1 -0
- data/samples/http_get_reply_iframes.json.bz2 +0 -0
- data/samples/http_get_reply_iframes.json.sh +1 -0
- data/samples/http_get_reply_links.json.sh +1 -0
- data/samples/iawide.warc.bz2 +0 -0
- data/samples/iawide_warc.sh +1 -0
- data/samples/ipmi_chan_auth_replies.crd.bz2 +0 -0
- data/samples/ipmi_chan_auth_replies.sh +1 -0
- data/samples/ssl_certs.bz2 +0 -0
- data/samples/ssl_certs_geo.sh +1 -0
- data/samples/ssl_certs_names.sh +1 -0
- data/samples/ssl_certs_names_expanded.sh +1 -0
- data/samples/ssl_certs_org.sh +1 -0
- data/samples/udp-netbios.csv.bz2 +0 -0
- data/samples/udp-netbios.sh +1 -0
- data/spec/dap/proto/ipmi_spec.rb +19 -0
- data/tools/geo-ip-summary.rb +149 -0
- data/tools/ipmi-vulns.rb +27 -0
- data/tools/json-summarize.rb +81 -0
- data/tools/netbios-counts.rb +271 -0
- data/tools/upnp-vulns.rb +35 -0
- data/tools/value-counts-to-md-table.rb +23 -0
- metadata +264 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
gem 'nokogiri'
|
4
|
+
gem 'oj'
|
5
|
+
gem 'htmlentities'
|
6
|
+
gem 'net-dns'
|
7
|
+
gem 'bit-struct'
|
8
|
+
gem 'geoip-c'
|
9
|
+
gem 'recog'
|
10
|
+
|
11
|
+
group :test do
|
12
|
+
gem 'rspec', '~> 2.14.1'
|
13
|
+
gem 'cucumber', '~> 1.3.8'
|
14
|
+
gem 'aruba', '~> 0.5.3'
|
15
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
aruba (0.5.4)
|
5
|
+
childprocess (>= 0.3.6)
|
6
|
+
cucumber (>= 1.1.1)
|
7
|
+
rspec-expectations (>= 2.7.0)
|
8
|
+
bit-struct (0.15.0)
|
9
|
+
builder (3.2.2)
|
10
|
+
childprocess (0.5.3)
|
11
|
+
ffi (~> 1.0, >= 1.0.11)
|
12
|
+
cucumber (1.3.15)
|
13
|
+
builder (>= 2.1.2)
|
14
|
+
diff-lcs (>= 1.1.3)
|
15
|
+
gherkin (~> 2.12)
|
16
|
+
multi_json (>= 1.7.5, < 2.0)
|
17
|
+
multi_test (>= 0.1.1)
|
18
|
+
diff-lcs (1.2.5)
|
19
|
+
ffi (1.9.3)
|
20
|
+
geoip-c (0.9.1)
|
21
|
+
gherkin (2.12.2)
|
22
|
+
multi_json (~> 1.3)
|
23
|
+
htmlentities (4.3.1)
|
24
|
+
mini_portile (0.6.0)
|
25
|
+
multi_json (1.10.0)
|
26
|
+
multi_test (0.1.1)
|
27
|
+
net-dns (0.8.0)
|
28
|
+
nokogiri (1.6.2.1)
|
29
|
+
mini_portile (= 0.6.0)
|
30
|
+
oj (2.9.0)
|
31
|
+
recog (0.01)
|
32
|
+
nokogiri
|
33
|
+
rspec (2.14.1)
|
34
|
+
rspec-core (~> 2.14.0)
|
35
|
+
rspec-expectations (~> 2.14.0)
|
36
|
+
rspec-mocks (~> 2.14.0)
|
37
|
+
rspec-core (2.14.8)
|
38
|
+
rspec-expectations (2.14.5)
|
39
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
40
|
+
rspec-mocks (2.14.6)
|
41
|
+
|
42
|
+
PLATFORMS
|
43
|
+
ruby
|
44
|
+
|
45
|
+
DEPENDENCIES
|
46
|
+
aruba (~> 0.5.3)
|
47
|
+
bit-struct
|
48
|
+
cucumber (~> 1.3.8)
|
49
|
+
geoip-c
|
50
|
+
htmlentities
|
51
|
+
net-dns
|
52
|
+
nokogiri
|
53
|
+
oj
|
54
|
+
recog
|
55
|
+
rspec (~> 2.14.1)
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2013 Rapid7
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# DAP: The Data Analysis Pipeline
|
2
|
+
|
3
|
+
DAP was created to transform text-based data on the command-line, specializing in transforms that are annoying or difficult to do with existing tools.
|
4
|
+
|
5
|
+
DAP reads data using an input plugin, transforms it through a series of filters, and prints it out again using an output plugin. Every record is treated as a document (aka: hash/dict) and filters are used to reduce, expand, and transform these documents as they pass through. Think of DAP as a mashup between sed, awk, grep, csvtool, and jq, with map/reduce capabilities.
|
6
|
+
|
7
|
+
DAP was written to process terabyte-sized public scan datasets, such as those provided by https://scans.io/. Although DAP isn't particularly fast, it can be used across multiple cores (and machines) by splitting the input source and wrapping the execution with GNU Parallel.
|
8
|
+
|
9
|
+
## Prerequisites
|
10
|
+
|
11
|
+
DAP depends on GeoIP (http://dev.maxmind.com/geoip/legacy/downloadable/) to be able to append geographic metadata to analyzed datasets. At least on Ubuntu, the libgeoip-dev package provides this capability.
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
See [tree/master/samples](/tree/master/samples)
|
data/bin/dap
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'bundler/setup'
|
7
|
+
require 'shellwords'
|
8
|
+
require 'dap'
|
9
|
+
|
10
|
+
def version
|
11
|
+
$stderr.puts "dap #{Dap::VERSION}"
|
12
|
+
exit(0)
|
13
|
+
end
|
14
|
+
|
15
|
+
def usage
|
16
|
+
$stderr.puts ""
|
17
|
+
$stderr.puts " Usage: #{$0} [input] + [filter] + [output]"
|
18
|
+
$stderr.puts " --inputs"
|
19
|
+
$stderr.puts " --outputs"
|
20
|
+
$stderr.puts " --filters"
|
21
|
+
$stderr.puts ""
|
22
|
+
$stderr.puts "Example: echo world | #{$0} lines stdin + rename line=hello + json stdout"
|
23
|
+
$stderr.puts ""
|
24
|
+
exit(1)
|
25
|
+
end
|
26
|
+
|
27
|
+
def show_inputs
|
28
|
+
$stderr.puts "Inputs:"
|
29
|
+
Dap::Factory.inputs.each_pair do |k,v|
|
30
|
+
$stderr.puts " * #{k}"
|
31
|
+
end
|
32
|
+
$stderr.puts
|
33
|
+
exit(1)
|
34
|
+
end
|
35
|
+
|
36
|
+
def show_outputs
|
37
|
+
$stderr.puts "Outputs:"
|
38
|
+
Dap::Factory.outputs.each_pair do |k,v|
|
39
|
+
$stderr.puts " * #{k}"
|
40
|
+
end
|
41
|
+
$stderr.puts
|
42
|
+
exit(1)
|
43
|
+
end
|
44
|
+
|
45
|
+
def show_filters
|
46
|
+
$stderr.puts "Filters:"
|
47
|
+
Dap::Factory.filters.each_pair do |k,v|
|
48
|
+
$stderr.puts " * #{k}"
|
49
|
+
end
|
50
|
+
$stderr.puts
|
51
|
+
exit(1)
|
52
|
+
end
|
53
|
+
|
54
|
+
trace = false
|
55
|
+
args = []
|
56
|
+
|
57
|
+
#
|
58
|
+
# Tokenize on + then treat each stage as a separate name + argument list
|
59
|
+
#
|
60
|
+
ARGV.join(' ').split(/\s*\+\s*/).each do |bit|
|
61
|
+
|
62
|
+
# Handle quoted arguments as needed
|
63
|
+
# XXX: Doesn't work as expected since ARGV parsing gobbles them up
|
64
|
+
aset = Shellwords.shellwords(bit)
|
65
|
+
|
66
|
+
# Check the first argument for help or usage flags
|
67
|
+
arg = aset.first
|
68
|
+
|
69
|
+
if arg == "--trace"
|
70
|
+
trace = true
|
71
|
+
arg = aset.shift
|
72
|
+
end
|
73
|
+
|
74
|
+
if arg == "-h" or arg == "--help"
|
75
|
+
usage
|
76
|
+
end
|
77
|
+
|
78
|
+
if arg == "--version" or arg == "-v"
|
79
|
+
version
|
80
|
+
end
|
81
|
+
|
82
|
+
if arg == "--inputs"
|
83
|
+
show_inputs
|
84
|
+
end
|
85
|
+
|
86
|
+
if arg == "--outputs"
|
87
|
+
show_outputs
|
88
|
+
end
|
89
|
+
|
90
|
+
if arg == "--filters"
|
91
|
+
show_filters
|
92
|
+
end
|
93
|
+
|
94
|
+
args << aset if aset.length > 0
|
95
|
+
end
|
96
|
+
|
97
|
+
inp_args = args.shift
|
98
|
+
out_args = args.pop
|
99
|
+
|
100
|
+
usage if (inp_args == nil or out_args == nil)
|
101
|
+
|
102
|
+
filters = []
|
103
|
+
|
104
|
+
inp = Dap::Factory.create_input(inp_args)
|
105
|
+
out = Dap::Factory.create_output(out_args)
|
106
|
+
args.each do |a|
|
107
|
+
filters << Dap::Factory.create_filter(a)
|
108
|
+
end
|
109
|
+
|
110
|
+
out.start
|
111
|
+
|
112
|
+
while true
|
113
|
+
data = inp.read_record
|
114
|
+
break if data == Dap::Input::Error::EOF
|
115
|
+
next if data == Dap::Input::Error::Empty
|
116
|
+
|
117
|
+
docs = [ data ]
|
118
|
+
|
119
|
+
fcount = 1
|
120
|
+
filters.each do |f|
|
121
|
+
$stderr.puts "T: #{" " * (fcount * 2)}#{f.name} -> #{docs.inspect} " if trace
|
122
|
+
docs = docs.collect {|doc| f.process(doc) }.flatten
|
123
|
+
$stderr.puts "T: #{" " * (fcount * 2)}#{" " * f.name.length} == #{docs.inspect}" if trace
|
124
|
+
fcount += 1
|
125
|
+
break if docs.length == 0
|
126
|
+
end
|
127
|
+
|
128
|
+
begin
|
129
|
+
docs.each do |doc|
|
130
|
+
out.write_record(doc)
|
131
|
+
end
|
132
|
+
rescue ::Errno::EPIPE
|
133
|
+
break
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
out.stop
|
data/dap.gemspec
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$LOAD_PATH.push File.expand_path('../lib', __FILE__)
|
3
|
+
require 'dap/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'dap'
|
7
|
+
s.version = Dap::VERSION
|
8
|
+
s.authors = [
|
9
|
+
'Rapid7 Research'
|
10
|
+
]
|
11
|
+
s.email = [
|
12
|
+
'research@rapid7.com'
|
13
|
+
]
|
14
|
+
s.homepage = "https://www.github.com/rapid7/dap"
|
15
|
+
s.summary = %q{DAP: The Data Analysis Pipeline}
|
16
|
+
s.description = %q{
|
17
|
+
DAP reads data using an input plugin, transforms it through a series of filters, and prints it out again
|
18
|
+
using an output plugin. Every record is treated as a document (aka: hash/dict) and filters are used to
|
19
|
+
reduce, expand, and transform these documents as they pass through. Think of DAP as a mashup between
|
20
|
+
sed, awk, grep, csvtool, and jq, with map/reduce capabilities.
|
21
|
+
}.gsub(/\s+/, ' ').strip
|
22
|
+
|
23
|
+
s.files = `git ls-files`.split("\n")
|
24
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
25
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
26
|
+
s.require_paths = ['lib']
|
27
|
+
|
28
|
+
# ---- Dependencies ----
|
29
|
+
|
30
|
+
s.add_development_dependency 'rspec'
|
31
|
+
s.add_development_dependency 'cucumber'
|
32
|
+
s.add_development_dependency 'aruba'
|
33
|
+
|
34
|
+
s.add_runtime_dependency 'nokogiri'
|
35
|
+
s.add_runtime_dependency 'oj'
|
36
|
+
s.add_runtime_dependency 'htmlentities'
|
37
|
+
s.add_runtime_dependency 'net-dns'
|
38
|
+
s.add_runtime_dependency 'bit-struct'
|
39
|
+
s.add_runtime_dependency 'geoip-c'
|
40
|
+
s.add_runtime_dependency 'recog'
|
41
|
+
|
42
|
+
end
|
data/data/.gitkeep
ADDED
File without changes
|
data/lib/dap.rb
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
module Dap
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
|
5
|
+
require 'dap/version'
|
6
|
+
require 'dap/input'
|
7
|
+
require 'dap/output'
|
8
|
+
require 'dap/filter'
|
9
|
+
|
10
|
+
class Factory
|
11
|
+
|
12
|
+
@@inputs = {}
|
13
|
+
@@outputs = {}
|
14
|
+
@@filters = {}
|
15
|
+
|
16
|
+
def self.create_input(args)
|
17
|
+
name = args.shift
|
18
|
+
raise RuntimeError, "Invalid input plugin: #{name}" unless @@inputs[name]
|
19
|
+
@@inputs[name].new(args)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.create_output(args)
|
23
|
+
name = args.shift
|
24
|
+
raise RuntimeError, "Invalid output plugin: #{name}" unless @@outputs[name]
|
25
|
+
@@outputs[name].new(args)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.create_filter(args)
|
29
|
+
name = args.shift
|
30
|
+
raise RuntimeError, "Invalid filter plugin: #{name}" unless @@filters[name]
|
31
|
+
@@filters[name].new(args)
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# Create nice-looking filter names from classes
|
36
|
+
# Ex: FilterHTTPDecode => http_decode
|
37
|
+
# Ex: FilterLimitLen => limit_len
|
38
|
+
#
|
39
|
+
def self.name_from_class(name)
|
40
|
+
name.to_s.split('::').last.
|
41
|
+
gsub(/([A-Z][a-z])/) { |c| "_#{c[0,1].downcase}#{c[1,1]}" }.
|
42
|
+
gsub(/([a-z][A-Z])/) { |c| "#{c[0,1]}_#{c[1,1].downcase}" }.
|
43
|
+
gsub(/_+/, '_').
|
44
|
+
sub(/^_(input|filter|output)_/, '').downcase
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# Load input formats
|
49
|
+
#
|
50
|
+
def self.load_inputs
|
51
|
+
Dap::Input.constants.each do |c|
|
52
|
+
next unless c.to_s =~ /^Input/
|
53
|
+
o = Dap::Input.const_get(c)
|
54
|
+
@@inputs[ name_from_class(c) ] = o
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
#
|
59
|
+
# Load output formats
|
60
|
+
#
|
61
|
+
def self.load_outputs
|
62
|
+
Dap::Output.constants.each do |c|
|
63
|
+
o = Dap::Output.const_get(c)
|
64
|
+
next unless c.to_s =~ /^Output/
|
65
|
+
@@outputs[ name_from_class(c) ] = o
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
#
|
70
|
+
# Load filters
|
71
|
+
#
|
72
|
+
def self.load_filters
|
73
|
+
Dap::Filter.constants.each do |c|
|
74
|
+
o = Dap::Filter.const_get(c)
|
75
|
+
next unless c.to_s =~ /^Filter/
|
76
|
+
@@filters[ name_from_class(c) ] = o
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.inputs
|
81
|
+
@@inputs
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.outputs
|
85
|
+
@@outputs
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.filters
|
89
|
+
@@filters
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.load_modules
|
93
|
+
self.load_inputs
|
94
|
+
self.load_outputs
|
95
|
+
self.load_filters
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
Factory.load_modules
|
100
|
+
|
101
|
+
end
|
data/lib/dap/filter.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
module Dap
|
2
|
+
module Filter
|
3
|
+
|
4
|
+
module Base
|
5
|
+
attr_accessor :name, :opts
|
6
|
+
|
7
|
+
def initialize(args)
|
8
|
+
self.opts = {}
|
9
|
+
args.each do |arg|
|
10
|
+
k,v = arg.split("=", 2)
|
11
|
+
self.opts[k] = v
|
12
|
+
end
|
13
|
+
self.name = Dap::Factory.name_from_class(self.class)
|
14
|
+
end
|
15
|
+
|
16
|
+
def process(doc)
|
17
|
+
raise RuntimeError, "No process() method defined for filter #{self.name}"
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
module BaseDecoder
|
23
|
+
include Base
|
24
|
+
def process(doc)
|
25
|
+
self.opts.each_pair do |k,v|
|
26
|
+
next unless doc.has_key?(k)
|
27
|
+
info = decode(doc[k]) || {}
|
28
|
+
info.each_pair do |x,y|
|
29
|
+
doc[ "#{k}.#{x}" ] = y
|
30
|
+
end
|
31
|
+
end
|
32
|
+
[ doc ]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|