dap 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. data/.gitignore +6 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +15 -0
  4. data/Gemfile.lock +55 -0
  5. data/LICENSE +20 -0
  6. data/README.md +15 -0
  7. data/bin/dap +137 -0
  8. data/dap.gemspec +42 -0
  9. data/data/.gitkeep +0 -0
  10. data/lib/dap.rb +101 -0
  11. data/lib/dap/filter.rb +8 -0
  12. data/lib/dap/filter/base.rb +37 -0
  13. data/lib/dap/filter/geoip.rb +72 -0
  14. data/lib/dap/filter/http.rb +173 -0
  15. data/lib/dap/filter/names.rb +151 -0
  16. data/lib/dap/filter/openssl.rb +53 -0
  17. data/lib/dap/filter/recog.rb +23 -0
  18. data/lib/dap/filter/simple.rb +340 -0
  19. data/lib/dap/filter/udp.rb +401 -0
  20. data/lib/dap/input.rb +74 -0
  21. data/lib/dap/input/csv.rb +60 -0
  22. data/lib/dap/input/warc.rb +81 -0
  23. data/lib/dap/output.rb +117 -0
  24. data/lib/dap/proto/addp.rb +0 -0
  25. data/lib/dap/proto/dtls.rb +21 -0
  26. data/lib/dap/proto/ipmi.rb +94 -0
  27. data/lib/dap/proto/natpmp.rb +19 -0
  28. data/lib/dap/proto/wdbrpc.rb +58 -0
  29. data/lib/dap/utils/oui.rb +16586 -0
  30. data/lib/dap/version.rb +3 -0
  31. data/samples/http_get_reply.ic12.bz2 +0 -0
  32. data/samples/http_get_reply.ic12.sh +1 -0
  33. data/samples/http_get_reply_iframes.json.bz2 +0 -0
  34. data/samples/http_get_reply_iframes.json.sh +1 -0
  35. data/samples/http_get_reply_links.json.sh +1 -0
  36. data/samples/iawide.warc.bz2 +0 -0
  37. data/samples/iawide_warc.sh +1 -0
  38. data/samples/ipmi_chan_auth_replies.crd.bz2 +0 -0
  39. data/samples/ipmi_chan_auth_replies.sh +1 -0
  40. data/samples/ssl_certs.bz2 +0 -0
  41. data/samples/ssl_certs_geo.sh +1 -0
  42. data/samples/ssl_certs_names.sh +1 -0
  43. data/samples/ssl_certs_names_expanded.sh +1 -0
  44. data/samples/ssl_certs_org.sh +1 -0
  45. data/samples/udp-netbios.csv.bz2 +0 -0
  46. data/samples/udp-netbios.sh +1 -0
  47. data/spec/dap/proto/ipmi_spec.rb +19 -0
  48. data/tools/geo-ip-summary.rb +149 -0
  49. data/tools/ipmi-vulns.rb +27 -0
  50. data/tools/json-summarize.rb +81 -0
  51. data/tools/netbios-counts.rb +271 -0
  52. data/tools/upnp-vulns.rb +35 -0
  53. data/tools/value-counts-to-md-table.rb +23 -0
  54. metadata +264 -0
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ # Ignore rvm files
2
+ .ruby-version
3
+ .ruby-gemset
4
+
5
+ # Ignore geoip data file
6
+ data/geoip.dat
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --colour
2
+ --format d
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'nokogiri'
4
+ gem 'oj'
5
+ gem 'htmlentities'
6
+ gem 'net-dns'
7
+ gem 'bit-struct'
8
+ gem 'geoip-c'
9
+ gem 'recog'
10
+
11
+ group :test do
12
+ gem 'rspec', '~> 2.14.1'
13
+ gem 'cucumber', '~> 1.3.8'
14
+ gem 'aruba', '~> 0.5.3'
15
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,55 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ aruba (0.5.4)
5
+ childprocess (>= 0.3.6)
6
+ cucumber (>= 1.1.1)
7
+ rspec-expectations (>= 2.7.0)
8
+ bit-struct (0.15.0)
9
+ builder (3.2.2)
10
+ childprocess (0.5.3)
11
+ ffi (~> 1.0, >= 1.0.11)
12
+ cucumber (1.3.15)
13
+ builder (>= 2.1.2)
14
+ diff-lcs (>= 1.1.3)
15
+ gherkin (~> 2.12)
16
+ multi_json (>= 1.7.5, < 2.0)
17
+ multi_test (>= 0.1.1)
18
+ diff-lcs (1.2.5)
19
+ ffi (1.9.3)
20
+ geoip-c (0.9.1)
21
+ gherkin (2.12.2)
22
+ multi_json (~> 1.3)
23
+ htmlentities (4.3.1)
24
+ mini_portile (0.6.0)
25
+ multi_json (1.10.0)
26
+ multi_test (0.1.1)
27
+ net-dns (0.8.0)
28
+ nokogiri (1.6.2.1)
29
+ mini_portile (= 0.6.0)
30
+ oj (2.9.0)
31
+ recog (0.01)
32
+ nokogiri
33
+ rspec (2.14.1)
34
+ rspec-core (~> 2.14.0)
35
+ rspec-expectations (~> 2.14.0)
36
+ rspec-mocks (~> 2.14.0)
37
+ rspec-core (2.14.8)
38
+ rspec-expectations (2.14.5)
39
+ diff-lcs (>= 1.1.3, < 2.0)
40
+ rspec-mocks (2.14.6)
41
+
42
+ PLATFORMS
43
+ ruby
44
+
45
+ DEPENDENCIES
46
+ aruba (~> 0.5.3)
47
+ bit-struct
48
+ cucumber (~> 1.3.8)
49
+ geoip-c
50
+ htmlentities
51
+ net-dns
52
+ nokogiri
53
+ oj
54
+ recog
55
+ rspec (~> 2.14.1)
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Rapid7
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,15 @@
1
+ # DAP: The Data Analysis Pipeline
2
+
3
+ DAP was created to transform text-based data on the command-line, specializing in transforms that are annoying or difficult to do with existing tools.
4
+
5
+ DAP reads data using an input plugin, transforms it through a series of filters, and prints it out again using an output plugin. Every record is treated as a document (aka: hash/dict) and filters are used to reduce, expand, and transform these documents as they pass through. Think of DAP as a mashup between sed, awk, grep, csvtool, and jq, with map/reduce capabilities.
6
+
7
+ DAP was written to process terabyte-sized public scan datasets, such as those provided by https://scans.io/. Although DAP isn't particularly fast, it can be used across multiple cores (and machines) by splitting the input source and wrapping the execution with GNU Parallel.
8
+
9
+ ## Prerequisites
10
+
11
+ DAP depends on GeoIP (http://dev.maxmind.com/geoip/legacy/downloadable/) to be able to append geographic metadata to analyzed datasets. At least on Ubuntu, the libgeoip-dev package provides this capability.
12
+
13
+ ## Usage
14
+
15
+ See [tree/master/samples](/tree/master/samples)
data/bin/dap ADDED
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
4
+
5
+ require 'rubygems'
6
+ require 'bundler/setup'
7
+ require 'shellwords'
8
+ require 'dap'
9
+
10
+ def version
11
+ $stderr.puts "dap #{Dap::VERSION}"
12
+ exit(0)
13
+ end
14
+
15
+ def usage
16
+ $stderr.puts ""
17
+ $stderr.puts " Usage: #{$0} [input] + [filter] + [output]"
18
+ $stderr.puts " --inputs"
19
+ $stderr.puts " --outputs"
20
+ $stderr.puts " --filters"
21
+ $stderr.puts ""
22
+ $stderr.puts "Example: echo world | #{$0} lines stdin + rename line=hello + json stdout"
23
+ $stderr.puts ""
24
+ exit(1)
25
+ end
26
+
27
+ def show_inputs
28
+ $stderr.puts "Inputs:"
29
+ Dap::Factory.inputs.each_pair do |k,v|
30
+ $stderr.puts " * #{k}"
31
+ end
32
+ $stderr.puts
33
+ exit(1)
34
+ end
35
+
36
+ def show_outputs
37
+ $stderr.puts "Outputs:"
38
+ Dap::Factory.outputs.each_pair do |k,v|
39
+ $stderr.puts " * #{k}"
40
+ end
41
+ $stderr.puts
42
+ exit(1)
43
+ end
44
+
45
+ def show_filters
46
+ $stderr.puts "Filters:"
47
+ Dap::Factory.filters.each_pair do |k,v|
48
+ $stderr.puts " * #{k}"
49
+ end
50
+ $stderr.puts
51
+ exit(1)
52
+ end
53
+
54
+ trace = false
55
+ args = []
56
+
57
+ #
58
+ # Tokenize on + then treat each stage as a separate name + argument list
59
+ #
60
+ ARGV.join(' ').split(/\s*\+\s*/).each do |bit|
61
+
62
+ # Handle quoted arguments as needed
63
+ # XXX: Doesn't work as expected since ARGV parsing gobbles them up
64
+ aset = Shellwords.shellwords(bit)
65
+
66
+ # Check the first argument for help or usage flags
67
+ arg = aset.first
68
+
69
+ if arg == "--trace"
70
+ trace = true
71
+ arg = aset.shift
72
+ end
73
+
74
+ if arg == "-h" or arg == "--help"
75
+ usage
76
+ end
77
+
78
+ if arg == "--version" or arg == "-v"
79
+ version
80
+ end
81
+
82
+ if arg == "--inputs"
83
+ show_inputs
84
+ end
85
+
86
+ if arg == "--outputs"
87
+ show_outputs
88
+ end
89
+
90
+ if arg == "--filters"
91
+ show_filters
92
+ end
93
+
94
+ args << aset if aset.length > 0
95
+ end
96
+
97
+ inp_args = args.shift
98
+ out_args = args.pop
99
+
100
+ usage if (inp_args == nil or out_args == nil)
101
+
102
+ filters = []
103
+
104
+ inp = Dap::Factory.create_input(inp_args)
105
+ out = Dap::Factory.create_output(out_args)
106
+ args.each do |a|
107
+ filters << Dap::Factory.create_filter(a)
108
+ end
109
+
110
+ out.start
111
+
112
+ while true
113
+ data = inp.read_record
114
+ break if data == Dap::Input::Error::EOF
115
+ next if data == Dap::Input::Error::Empty
116
+
117
+ docs = [ data ]
118
+
119
+ fcount = 1
120
+ filters.each do |f|
121
+ $stderr.puts "T: #{" " * (fcount * 2)}#{f.name} -> #{docs.inspect} " if trace
122
+ docs = docs.collect {|doc| f.process(doc) }.flatten
123
+ $stderr.puts "T: #{" " * (fcount * 2)}#{" " * f.name.length} == #{docs.inspect}" if trace
124
+ fcount += 1
125
+ break if docs.length == 0
126
+ end
127
+
128
+ begin
129
+ docs.each do |doc|
130
+ out.write_record(doc)
131
+ end
132
+ rescue ::Errno::EPIPE
133
+ break
134
+ end
135
+ end
136
+
137
+ out.stop
data/dap.gemspec ADDED
@@ -0,0 +1,42 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $LOAD_PATH.push File.expand_path('../lib', __FILE__)
3
+ require 'dap/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'dap'
7
+ s.version = Dap::VERSION
8
+ s.authors = [
9
+ 'Rapid7 Research'
10
+ ]
11
+ s.email = [
12
+ 'research@rapid7.com'
13
+ ]
14
+ s.homepage = "https://www.github.com/rapid7/dap"
15
+ s.summary = %q{DAP: The Data Analysis Pipeline}
16
+ s.description = %q{
17
+ DAP reads data using an input plugin, transforms it through a series of filters, and prints it out again
18
+ using an output plugin. Every record is treated as a document (aka: hash/dict) and filters are used to
19
+ reduce, expand, and transform these documents as they pass through. Think of DAP as a mashup between
20
+ sed, awk, grep, csvtool, and jq, with map/reduce capabilities.
21
+ }.gsub(/\s+/, ' ').strip
22
+
23
+ s.files = `git ls-files`.split("\n")
24
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
+ s.require_paths = ['lib']
27
+
28
+ # ---- Dependencies ----
29
+
30
+ s.add_development_dependency 'rspec'
31
+ s.add_development_dependency 'cucumber'
32
+ s.add_development_dependency 'aruba'
33
+
34
+ s.add_runtime_dependency 'nokogiri'
35
+ s.add_runtime_dependency 'oj'
36
+ s.add_runtime_dependency 'htmlentities'
37
+ s.add_runtime_dependency 'net-dns'
38
+ s.add_runtime_dependency 'bit-struct'
39
+ s.add_runtime_dependency 'geoip-c'
40
+ s.add_runtime_dependency 'recog'
41
+
42
+ end
data/data/.gitkeep ADDED
File without changes
data/lib/dap.rb ADDED
@@ -0,0 +1,101 @@
1
+ module Dap
2
+
3
+ require 'bundler/setup'
4
+
5
+ require 'dap/version'
6
+ require 'dap/input'
7
+ require 'dap/output'
8
+ require 'dap/filter'
9
+
10
+ class Factory
11
+
12
+ @@inputs = {}
13
+ @@outputs = {}
14
+ @@filters = {}
15
+
16
+ def self.create_input(args)
17
+ name = args.shift
18
+ raise RuntimeError, "Invalid input plugin: #{name}" unless @@inputs[name]
19
+ @@inputs[name].new(args)
20
+ end
21
+
22
+ def self.create_output(args)
23
+ name = args.shift
24
+ raise RuntimeError, "Invalid output plugin: #{name}" unless @@outputs[name]
25
+ @@outputs[name].new(args)
26
+ end
27
+
28
+ def self.create_filter(args)
29
+ name = args.shift
30
+ raise RuntimeError, "Invalid filter plugin: #{name}" unless @@filters[name]
31
+ @@filters[name].new(args)
32
+ end
33
+
34
+ #
35
+ # Create nice-looking filter names from classes
36
+ # Ex: FilterHTTPDecode => http_decode
37
+ # Ex: FilterLimitLen => limit_len
38
+ #
39
+ def self.name_from_class(name)
40
+ name.to_s.split('::').last.
41
+ gsub(/([A-Z][a-z])/) { |c| "_#{c[0,1].downcase}#{c[1,1]}" }.
42
+ gsub(/([a-z][A-Z])/) { |c| "#{c[0,1]}_#{c[1,1].downcase}" }.
43
+ gsub(/_+/, '_').
44
+ sub(/^_(input|filter|output)_/, '').downcase
45
+ end
46
+
47
+ #
48
+ # Load input formats
49
+ #
50
+ def self.load_inputs
51
+ Dap::Input.constants.each do |c|
52
+ next unless c.to_s =~ /^Input/
53
+ o = Dap::Input.const_get(c)
54
+ @@inputs[ name_from_class(c) ] = o
55
+ end
56
+ end
57
+
58
+ #
59
+ # Load output formats
60
+ #
61
+ def self.load_outputs
62
+ Dap::Output.constants.each do |c|
63
+ o = Dap::Output.const_get(c)
64
+ next unless c.to_s =~ /^Output/
65
+ @@outputs[ name_from_class(c) ] = o
66
+ end
67
+ end
68
+
69
+ #
70
+ # Load filters
71
+ #
72
+ def self.load_filters
73
+ Dap::Filter.constants.each do |c|
74
+ o = Dap::Filter.const_get(c)
75
+ next unless c.to_s =~ /^Filter/
76
+ @@filters[ name_from_class(c) ] = o
77
+ end
78
+ end
79
+
80
+ def self.inputs
81
+ @@inputs
82
+ end
83
+
84
+ def self.outputs
85
+ @@outputs
86
+ end
87
+
88
+ def self.filters
89
+ @@filters
90
+ end
91
+
92
+ def self.load_modules
93
+ self.load_inputs
94
+ self.load_outputs
95
+ self.load_filters
96
+ end
97
+ end
98
+
99
+ Factory.load_modules
100
+
101
+ end
data/lib/dap/filter.rb ADDED
@@ -0,0 +1,8 @@
1
+ require 'dap/filter/base'
2
+ require 'dap/filter/simple'
3
+ require 'dap/filter/http'
4
+ require 'dap/filter/udp'
5
+ require 'dap/filter/openssl'
6
+ require 'dap/filter/names'
7
+ require 'dap/filter/geoip'
8
+ require 'dap/filter/recog'
@@ -0,0 +1,37 @@
1
+ module Dap
2
+ module Filter
3
+
4
+ module Base
5
+ attr_accessor :name, :opts
6
+
7
+ def initialize(args)
8
+ self.opts = {}
9
+ args.each do |arg|
10
+ k,v = arg.split("=", 2)
11
+ self.opts[k] = v
12
+ end
13
+ self.name = Dap::Factory.name_from_class(self.class)
14
+ end
15
+
16
+ def process(doc)
17
+ raise RuntimeError, "No process() method defined for filter #{self.name}"
18
+ end
19
+
20
+ end
21
+
22
+ module BaseDecoder
23
+ include Base
24
+ def process(doc)
25
+ self.opts.each_pair do |k,v|
26
+ next unless doc.has_key?(k)
27
+ info = decode(doc[k]) || {}
28
+ info.each_pair do |x,y|
29
+ doc[ "#{k}.#{x}" ] = y
30
+ end
31
+ end
32
+ [ doc ]
33
+ end
34
+ end
35
+
36
+ end
37
+ end