dap 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. data/.gitignore +6 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +15 -0
  4. data/Gemfile.lock +55 -0
  5. data/LICENSE +20 -0
  6. data/README.md +15 -0
  7. data/bin/dap +137 -0
  8. data/dap.gemspec +42 -0
  9. data/data/.gitkeep +0 -0
  10. data/lib/dap.rb +101 -0
  11. data/lib/dap/filter.rb +8 -0
  12. data/lib/dap/filter/base.rb +37 -0
  13. data/lib/dap/filter/geoip.rb +72 -0
  14. data/lib/dap/filter/http.rb +173 -0
  15. data/lib/dap/filter/names.rb +151 -0
  16. data/lib/dap/filter/openssl.rb +53 -0
  17. data/lib/dap/filter/recog.rb +23 -0
  18. data/lib/dap/filter/simple.rb +340 -0
  19. data/lib/dap/filter/udp.rb +401 -0
  20. data/lib/dap/input.rb +74 -0
  21. data/lib/dap/input/csv.rb +60 -0
  22. data/lib/dap/input/warc.rb +81 -0
  23. data/lib/dap/output.rb +117 -0
  24. data/lib/dap/proto/addp.rb +0 -0
  25. data/lib/dap/proto/dtls.rb +21 -0
  26. data/lib/dap/proto/ipmi.rb +94 -0
  27. data/lib/dap/proto/natpmp.rb +19 -0
  28. data/lib/dap/proto/wdbrpc.rb +58 -0
  29. data/lib/dap/utils/oui.rb +16586 -0
  30. data/lib/dap/version.rb +3 -0
  31. data/samples/http_get_reply.ic12.bz2 +0 -0
  32. data/samples/http_get_reply.ic12.sh +1 -0
  33. data/samples/http_get_reply_iframes.json.bz2 +0 -0
  34. data/samples/http_get_reply_iframes.json.sh +1 -0
  35. data/samples/http_get_reply_links.json.sh +1 -0
  36. data/samples/iawide.warc.bz2 +0 -0
  37. data/samples/iawide_warc.sh +1 -0
  38. data/samples/ipmi_chan_auth_replies.crd.bz2 +0 -0
  39. data/samples/ipmi_chan_auth_replies.sh +1 -0
  40. data/samples/ssl_certs.bz2 +0 -0
  41. data/samples/ssl_certs_geo.sh +1 -0
  42. data/samples/ssl_certs_names.sh +1 -0
  43. data/samples/ssl_certs_names_expanded.sh +1 -0
  44. data/samples/ssl_certs_org.sh +1 -0
  45. data/samples/udp-netbios.csv.bz2 +0 -0
  46. data/samples/udp-netbios.sh +1 -0
  47. data/spec/dap/proto/ipmi_spec.rb +19 -0
  48. data/tools/geo-ip-summary.rb +149 -0
  49. data/tools/ipmi-vulns.rb +27 -0
  50. data/tools/json-summarize.rb +81 -0
  51. data/tools/netbios-counts.rb +271 -0
  52. data/tools/upnp-vulns.rb +35 -0
  53. data/tools/value-counts-to-md-table.rb +23 -0
  54. metadata +264 -0
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ # Ignore rvm files
2
+ .ruby-version
3
+ .ruby-gemset
4
+
5
+ # Ignore geoip data file
6
+ data/geoip.dat
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --colour
2
+ --format d
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'nokogiri'
4
+ gem 'oj'
5
+ gem 'htmlentities'
6
+ gem 'net-dns'
7
+ gem 'bit-struct'
8
+ gem 'geoip-c'
9
+ gem 'recog'
10
+
11
+ group :test do
12
+ gem 'rspec', '~> 2.14.1'
13
+ gem 'cucumber', '~> 1.3.8'
14
+ gem 'aruba', '~> 0.5.3'
15
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,55 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ aruba (0.5.4)
5
+ childprocess (>= 0.3.6)
6
+ cucumber (>= 1.1.1)
7
+ rspec-expectations (>= 2.7.0)
8
+ bit-struct (0.15.0)
9
+ builder (3.2.2)
10
+ childprocess (0.5.3)
11
+ ffi (~> 1.0, >= 1.0.11)
12
+ cucumber (1.3.15)
13
+ builder (>= 2.1.2)
14
+ diff-lcs (>= 1.1.3)
15
+ gherkin (~> 2.12)
16
+ multi_json (>= 1.7.5, < 2.0)
17
+ multi_test (>= 0.1.1)
18
+ diff-lcs (1.2.5)
19
+ ffi (1.9.3)
20
+ geoip-c (0.9.1)
21
+ gherkin (2.12.2)
22
+ multi_json (~> 1.3)
23
+ htmlentities (4.3.1)
24
+ mini_portile (0.6.0)
25
+ multi_json (1.10.0)
26
+ multi_test (0.1.1)
27
+ net-dns (0.8.0)
28
+ nokogiri (1.6.2.1)
29
+ mini_portile (= 0.6.0)
30
+ oj (2.9.0)
31
+ recog (0.01)
32
+ nokogiri
33
+ rspec (2.14.1)
34
+ rspec-core (~> 2.14.0)
35
+ rspec-expectations (~> 2.14.0)
36
+ rspec-mocks (~> 2.14.0)
37
+ rspec-core (2.14.8)
38
+ rspec-expectations (2.14.5)
39
+ diff-lcs (>= 1.1.3, < 2.0)
40
+ rspec-mocks (2.14.6)
41
+
42
+ PLATFORMS
43
+ ruby
44
+
45
+ DEPENDENCIES
46
+ aruba (~> 0.5.3)
47
+ bit-struct
48
+ cucumber (~> 1.3.8)
49
+ geoip-c
50
+ htmlentities
51
+ net-dns
52
+ nokogiri
53
+ oj
54
+ recog
55
+ rspec (~> 2.14.1)
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Rapid7
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,15 @@
1
+ # DAP: The Data Analysis Pipeline
2
+
3
+ DAP was created to transform text-based data on the command-line, specializing in transforms that are annoying or difficult to do with existing tools.
4
+
5
+ DAP reads data using an input plugin, transforms it through a series of filters, and prints it out again using an output plugin. Every record is treated as a document (aka: hash/dict) and filters are used to reduce, expand, and transform these documents as they pass through. Think of DAP as a mashup between sed, awk, grep, csvtool, and jq, with map/reduce capabilities.
6
+
7
+ DAP was written to process terabyte-sized public scan datasets, such as those provided by https://scans.io/. Although DAP isn't particularly fast, it can be used across multiple cores (and machines) by splitting the input source and wrapping the execution with GNU Parallel.
8
+
9
+ ## Prerequisites
10
+
11
+ DAP depends on GeoIP (http://dev.maxmind.com/geoip/legacy/downloadable/) to be able to append geographic metadata to analyzed datasets. At least on Ubuntu, the libgeoip-dev package provides this capability.
12
+
13
+ ## Usage
14
+
15
+ See [tree/master/samples](/tree/master/samples)
data/bin/dap ADDED
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
4
+
5
+ require 'rubygems'
6
+ require 'bundler/setup'
7
+ require 'shellwords'
8
+ require 'dap'
9
+
10
+ def version
11
+ $stderr.puts "dap #{Dap::VERSION}"
12
+ exit(0)
13
+ end
14
+
15
+ def usage
16
+ $stderr.puts ""
17
+ $stderr.puts " Usage: #{$0} [input] + [filter] + [output]"
18
+ $stderr.puts " --inputs"
19
+ $stderr.puts " --outputs"
20
+ $stderr.puts " --filters"
21
+ $stderr.puts ""
22
+ $stderr.puts "Example: echo world | #{$0} lines stdin + rename line=hello + json stdout"
23
+ $stderr.puts ""
24
+ exit(1)
25
+ end
26
+
27
+ def show_inputs
28
+ $stderr.puts "Inputs:"
29
+ Dap::Factory.inputs.each_pair do |k,v|
30
+ $stderr.puts " * #{k}"
31
+ end
32
+ $stderr.puts
33
+ exit(1)
34
+ end
35
+
36
+ def show_outputs
37
+ $stderr.puts "Outputs:"
38
+ Dap::Factory.outputs.each_pair do |k,v|
39
+ $stderr.puts " * #{k}"
40
+ end
41
+ $stderr.puts
42
+ exit(1)
43
+ end
44
+
45
+ def show_filters
46
+ $stderr.puts "Filters:"
47
+ Dap::Factory.filters.each_pair do |k,v|
48
+ $stderr.puts " * #{k}"
49
+ end
50
+ $stderr.puts
51
+ exit(1)
52
+ end
53
+
54
+ trace = false
55
+ args = []
56
+
57
+ #
58
+ # Tokenize on + then treat each stage as a separate name + argument list
59
+ #
60
+ ARGV.join(' ').split(/\s*\+\s*/).each do |bit|
61
+
62
+ # Handle quoted arguments as needed
63
+ # XXX: Doesn't work as expected since ARGV parsing gobbles them up
64
+ aset = Shellwords.shellwords(bit)
65
+
66
+ # Check the first argument for help or usage flags
67
+ arg = aset.first
68
+
69
+ if arg == "--trace"
70
+ trace = true
71
+ arg = aset.shift
72
+ end
73
+
74
+ if arg == "-h" or arg == "--help"
75
+ usage
76
+ end
77
+
78
+ if arg == "--version" or arg == "-v"
79
+ version
80
+ end
81
+
82
+ if arg == "--inputs"
83
+ show_inputs
84
+ end
85
+
86
+ if arg == "--outputs"
87
+ show_outputs
88
+ end
89
+
90
+ if arg == "--filters"
91
+ show_filters
92
+ end
93
+
94
+ args << aset if aset.length > 0
95
+ end
96
+
97
+ inp_args = args.shift
98
+ out_args = args.pop
99
+
100
+ usage if (inp_args == nil or out_args == nil)
101
+
102
+ filters = []
103
+
104
+ inp = Dap::Factory.create_input(inp_args)
105
+ out = Dap::Factory.create_output(out_args)
106
+ args.each do |a|
107
+ filters << Dap::Factory.create_filter(a)
108
+ end
109
+
110
+ out.start
111
+
112
+ while true
113
+ data = inp.read_record
114
+ break if data == Dap::Input::Error::EOF
115
+ next if data == Dap::Input::Error::Empty
116
+
117
+ docs = [ data ]
118
+
119
+ fcount = 1
120
+ filters.each do |f|
121
+ $stderr.puts "T: #{" " * (fcount * 2)}#{f.name} -> #{docs.inspect} " if trace
122
+ docs = docs.collect {|doc| f.process(doc) }.flatten
123
+ $stderr.puts "T: #{" " * (fcount * 2)}#{" " * f.name.length} == #{docs.inspect}" if trace
124
+ fcount += 1
125
+ break if docs.length == 0
126
+ end
127
+
128
+ begin
129
+ docs.each do |doc|
130
+ out.write_record(doc)
131
+ end
132
+ rescue ::Errno::EPIPE
133
+ break
134
+ end
135
+ end
136
+
137
+ out.stop
data/dap.gemspec ADDED
@@ -0,0 +1,42 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $LOAD_PATH.push File.expand_path('../lib', __FILE__)
3
+ require 'dap/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'dap'
7
+ s.version = Dap::VERSION
8
+ s.authors = [
9
+ 'Rapid7 Research'
10
+ ]
11
+ s.email = [
12
+ 'research@rapid7.com'
13
+ ]
14
+ s.homepage = "https://www.github.com/rapid7/dap"
15
+ s.summary = %q{DAP: The Data Analysis Pipeline}
16
+ s.description = %q{
17
+ DAP reads data using an input plugin, transforms it through a series of filters, and prints it out again
18
+ using an output plugin. Every record is treated as a document (aka: hash/dict) and filters are used to
19
+ reduce, expand, and transform these documents as they pass through. Think of DAP as a mashup between
20
+ sed, awk, grep, csvtool, and jq, with map/reduce capabilities.
21
+ }.gsub(/\s+/, ' ').strip
22
+
23
+ s.files = `git ls-files`.split("\n")
24
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
+ s.require_paths = ['lib']
27
+
28
+ # ---- Dependencies ----
29
+
30
+ s.add_development_dependency 'rspec'
31
+ s.add_development_dependency 'cucumber'
32
+ s.add_development_dependency 'aruba'
33
+
34
+ s.add_runtime_dependency 'nokogiri'
35
+ s.add_runtime_dependency 'oj'
36
+ s.add_runtime_dependency 'htmlentities'
37
+ s.add_runtime_dependency 'net-dns'
38
+ s.add_runtime_dependency 'bit-struct'
39
+ s.add_runtime_dependency 'geoip-c'
40
+ s.add_runtime_dependency 'recog'
41
+
42
+ end
data/data/.gitkeep ADDED
File without changes
data/lib/dap.rb ADDED
@@ -0,0 +1,101 @@
1
+ module Dap
2
+
3
+ require 'bundler/setup'
4
+
5
+ require 'dap/version'
6
+ require 'dap/input'
7
+ require 'dap/output'
8
+ require 'dap/filter'
9
+
10
+ class Factory
11
+
12
+ @@inputs = {}
13
+ @@outputs = {}
14
+ @@filters = {}
15
+
16
+ def self.create_input(args)
17
+ name = args.shift
18
+ raise RuntimeError, "Invalid input plugin: #{name}" unless @@inputs[name]
19
+ @@inputs[name].new(args)
20
+ end
21
+
22
+ def self.create_output(args)
23
+ name = args.shift
24
+ raise RuntimeError, "Invalid output plugin: #{name}" unless @@outputs[name]
25
+ @@outputs[name].new(args)
26
+ end
27
+
28
+ def self.create_filter(args)
29
+ name = args.shift
30
+ raise RuntimeError, "Invalid filter plugin: #{name}" unless @@filters[name]
31
+ @@filters[name].new(args)
32
+ end
33
+
34
+ #
35
+ # Create nice-looking filter names from classes
36
+ # Ex: FilterHTTPDecode => http_decode
37
+ # Ex: FilterLimitLen => limit_len
38
+ #
39
+ def self.name_from_class(name)
40
+ name.to_s.split('::').last.
41
+ gsub(/([A-Z][a-z])/) { |c| "_#{c[0,1].downcase}#{c[1,1]}" }.
42
+ gsub(/([a-z][A-Z])/) { |c| "#{c[0,1]}_#{c[1,1].downcase}" }.
43
+ gsub(/_+/, '_').
44
+ sub(/^_(input|filter|output)_/, '').downcase
45
+ end
46
+
47
+ #
48
+ # Load input formats
49
+ #
50
+ def self.load_inputs
51
+ Dap::Input.constants.each do |c|
52
+ next unless c.to_s =~ /^Input/
53
+ o = Dap::Input.const_get(c)
54
+ @@inputs[ name_from_class(c) ] = o
55
+ end
56
+ end
57
+
58
+ #
59
+ # Load output formats
60
+ #
61
+ def self.load_outputs
62
+ Dap::Output.constants.each do |c|
63
+ o = Dap::Output.const_get(c)
64
+ next unless c.to_s =~ /^Output/
65
+ @@outputs[ name_from_class(c) ] = o
66
+ end
67
+ end
68
+
69
+ #
70
+ # Load filters
71
+ #
72
+ def self.load_filters
73
+ Dap::Filter.constants.each do |c|
74
+ o = Dap::Filter.const_get(c)
75
+ next unless c.to_s =~ /^Filter/
76
+ @@filters[ name_from_class(c) ] = o
77
+ end
78
+ end
79
+
80
+ def self.inputs
81
+ @@inputs
82
+ end
83
+
84
+ def self.outputs
85
+ @@outputs
86
+ end
87
+
88
+ def self.filters
89
+ @@filters
90
+ end
91
+
92
+ def self.load_modules
93
+ self.load_inputs
94
+ self.load_outputs
95
+ self.load_filters
96
+ end
97
+ end
98
+
99
+ Factory.load_modules
100
+
101
+ end
data/lib/dap/filter.rb ADDED
@@ -0,0 +1,8 @@
1
+ require 'dap/filter/base'
2
+ require 'dap/filter/simple'
3
+ require 'dap/filter/http'
4
+ require 'dap/filter/udp'
5
+ require 'dap/filter/openssl'
6
+ require 'dap/filter/names'
7
+ require 'dap/filter/geoip'
8
+ require 'dap/filter/recog'
@@ -0,0 +1,37 @@
1
+ module Dap
2
+ module Filter
3
+
4
+ module Base
5
+ attr_accessor :name, :opts
6
+
7
+ def initialize(args)
8
+ self.opts = {}
9
+ args.each do |arg|
10
+ k,v = arg.split("=", 2)
11
+ self.opts[k] = v
12
+ end
13
+ self.name = Dap::Factory.name_from_class(self.class)
14
+ end
15
+
16
+ def process(doc)
17
+ raise RuntimeError, "No process() method defined for filter #{self.name}"
18
+ end
19
+
20
+ end
21
+
22
+ module BaseDecoder
23
+ include Base
24
+ def process(doc)
25
+ self.opts.each_pair do |k,v|
26
+ next unless doc.has_key?(k)
27
+ info = decode(doc[k]) || {}
28
+ info.each_pair do |x,y|
29
+ doc[ "#{k}.#{x}" ] = y
30
+ end
31
+ end
32
+ [ doc ]
33
+ end
34
+ end
35
+
36
+ end
37
+ end