RubyGems - pcap_tools - Versions diffs - 0.0.2 - Mend

pcap_tools 0.0.2

Files changed (5) hide show

data/README.markdown ADDED Viewed

@@ -0,0 +1,90 @@
+# What is it ?
+It's a ruby library to help tcpdump file processing : do some offline analysis on tcpdump files.
+Main functionnalities :
+* Rebuild tcp streams
+* Extract and parse http request
+# How use it
+## Make a tcpdump
+* `tcpdump -w out.pcap -s 4096 <filter>`
+* Get the output file out.pcap
+Please adjust the 4096 value, to the max packet size to capture.
+## Write a ruby script
+    require 'pcap_tools'
+    # Load tcpdump file
+    capture = PCAPRUB::Pcap.open_offline('out.pcap')
+## Available functions
+### Extract tcp streams
+This function rebuild tcp streams from an array of pcap capture object.
+    tcp_streams = PcapTools::extract_tcp_streams(captures)
+`tcp_streams` is an array of hash, each hash has tree keys :
+* `:type` : `:in` or `:out`, if the packet was sent or received
+* `:time` : timestamp of packet
+* `:data` : payload of packet
+Remarks :
+* Packets are in the rigth ordere
+* Packets are not merged (eg an http response can be splitted on serval consecutive packets,
+with the same type `:in` or `:out`).
+To reassemble packet of the same type, please use `stream.rebuild_packets`
+### Extract http calls
+This function extract http calls from a tcp stream, returned from the `extract_tcp_streams` function.
+    http_calls = PcapTools::extract_http_calls(stream)
+`http_calls` is an array of `http_call`.
+A `http_call` is an array of two objects :
+* The http request, an instance of `Net::HTTPRequest`, eg `Net::HTTPGet` or `Net::HTTPPost`. You can use this object
+like any http request of [std lib `net/http`](http://www.ruby-doc.org/stdlib/libdoc/net/http/rdoc/index.html)
+  * `req.path` : get the request path
+  * `req['User-Agent']` : get the User-Agent
+  * `req.body` : get the request body
+  * ...
+* The http response, an instance of `Net::HTTPResponse`, eg `Net::HTTPOk` or `Net::HTTPMovedPermanently`. You can use this object
+  like any http response of [std lib `net/http`](http://www.ruby-doc.org/stdlib/libdoc/net/http/rdoc/index.html)
+  * `resp.code` : get the http return code
+  * `resp['User-Agent']` : get the User-Agent
+  * `resp.body` : get the request body
+  * ...
+The response can be `nil` if there is no response in the tcp stream.
+The request and response object have some new attributes
+* `req.time` : get the time where the request or response was captured
+For the response object body, the following "Content-Encoding" type are honored :
+* gzip
+### Extract http calls from captures
+The two in one : extract http calls from an array of captures objects
+    http_calls = PcapTools::extract_http_calls_from_captures(captures)
+### Load multiple files
+Load multiple pcap files, in time order. Useful when you use `tcpdump -C 5 -W 100000`, to split captured data into pieces of 5M
+    captures = PcapTools::load_mutliple_files '*pcap*'

data/bin/pcap_tools_http ADDED Viewed

@@ -0,0 +1,37 @@
+#!/usr/bin/env ruby
+require 'pcap_tools'
+require 'optparse'
+options = {}
+OptionParser.new do |opts|
+  opts.banner = "Usage: pcap_tools_http [options] pcap_files"
+  opts.on("--no-body", "Do not display body") do
+    options[:no_body] = true
+  end
+end.parse!
+data = ARGV.map{|f| PacketFu::PcapFile.file_to_array(f)}
+tcps = PcapTools::extract_tcp_streams(data)
+tcps.each do |tcp|
+  PcapTools::extract_http_calls(tcp).each do |req, resp|
+    puts ">>>> #{req["pcap-src"]}:#{req["pcap-src-port"]} > #{req["pcap-dst"]}:#{req["pcap-dst-port"]}"
+    puts "#{req.method} #{req.path}"
+    req.each_capitalized_name.reject{|x| x =~ /^Pcap/ }.each do |x|
+      puts "#{x}: #{req[x]}"
+    end
+    puts
+    puts req.body unless options[:no_body]
+    puts "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #{resp.time}"
+    puts "#{resp.code} #{resp.message}"
+    resp.each_capitalized_name.reject{|x| x =~ /^Pcap/ }.each do |x|
+      puts "#{x}: #{resp[x]}"
+    end
+    puts
+    puts resp.body unless options[:no_body]
+    puts
+  end
+end

data/lib/pcap_tools.rb ADDED Viewed

@@ -0,0 +1,191 @@
+require 'rubygems'
+require 'packetfu'
+require 'net/http'
+require 'zlib'
+module Net
+  class HTTPRequest
+    attr_accessor :time
+  end
+  class HTTPResponse
+    attr_accessor :time
+    def body= body
+      @body = body
+      @read = true
+    end
+  end
+end
+module PcapTools
+  class TcpStream < Array
+    def insert_tcp sym, packet
+      data = packet.payload
+      return if data.size == 0
+      self << {:type => sym, :data => data, :from => packet.ip_saddr, :to => packet.ip_daddr, :from_port => packet.tcp_src, :to_port => packet.tcp_dst}
+    end
+    def rebuild_packets
+      out = TcpStream.new
+      current = nil
+      self.each do |packet|
+        if current
+          if packet[:type] == current[:type]
+            current[:data] += packet[:data]
+          else
+            out << current
+            current = packet.clone
+          end
+        else
+          current = packet.clone
+        end
+      end
+      out << current if current
+      out
+    end
+  end
+  def load_mutliple_files dir
+    Dir.glob(dir).sort{|a, b| File.new(a).mtime <=> File.new(b).mtime}.map{|file| PacketFu::PcapFile.file_to_array(file)}
+  end
+  module_function :load_mutliple_files
+  def extract_http_calls_from_captures captures
+    calls = []
+    extract_tcp_streams(captures).each do |tcp|
+      calls.concat(extract_http_calls(tcp))
+    end
+    calls
+  end
+  module_function :extract_http_calls_from_captures
+  def extract_tcp_streams captures
+    packets = []
+    captures.each do |capture|
+      capture.each do |packet|
+        packets << PacketFu::Packet.parse(packet)
+      end
+    end
+    streams = []
+    packets.each_with_index do |packet, k|
+      if packet.is_a?(PacketFu::TCPPacket) && packet.tcp_flags.syn == 1 && packet.tcp_flags.ack == 0
+        kk = k
+        tcp = TcpStream.new
+        while kk < packets.size
+          packet2 = packets[kk]
+          if packet2.is_a?(PacketFu::TCPPacket)
+            if packet.tcp_dst == packet2.tcp_dst && packet.tcp_src == packet2.tcp_src
+              tcp.insert_tcp :out, packet2
+              break if packet.tcp_flags.fin == 1 || packet2.tcp_flags.fin == 1
+            end
+            if packet.tcp_dst == packet2.tcp_src && packet.tcp_src == packet2.tcp_dst
+              tcp.insert_tcp :in, packet2
+              break if packet.tcp_flags.fin == 1 || packet2.tcp_flags.fin == 1
+            end
+          end
+          kk += 1
+        end
+        streams << tcp
+      end
+    end
+    streams
+  end
+  module_function :extract_tcp_streams
+  def extract_http_calls stream
+    rebuilded = stream.rebuild_packets
+    calls = []
+    data_out = ""
+    data_in = nil
+    k = 0
+    while k < rebuilded.size
+      begin
+        req = HttpParser::parse_request(rebuilded[k])
+        resp = k + 1 < rebuilded.size ? HttpParser::parse_response(rebuilded[k + 1]) : nil
+        calls << [req, resp]
+      rescue Exception => e
+        warn "Unable to parse http call : #{e}"
+      end
+      k += 2
+    end
+    calls
+  end
+  module_function :extract_http_calls
+  module HttpParser
+    def parse_request stream
+      headers, body = split_headers(stream[:data])
+      line0 = headers.shift
+      m = /(\S+)\s+(\S+)\s+(\S+)/.match(line0) or raise "Unable to parse first line of http request #{line0}"
+      clazz = {'POST' => Net::HTTP::Post, 'GET' => Net::HTTP::Get, 'PUT' => Net::HTTP::Put}[m[1]] or raise "Unknown http request type #{m[1]}"
+      req = clazz.new m[2]
+      req['Pcap-Src'] = stream[:from]
+      req['Pcap-Src-Port'] = stream[:from_port]
+      req['Pcap-Dst'] = stream[:to]
+      req['Pcap-Dst-Port'] = stream[:to_port]
+      req.time = stream[:time]
+      req.body = body
+      add_headers req, headers
+      req.body.size == req['Content-Length'].to_i or raise "Wrong content-length for http request, header say #{req['Content-Length'].chomp}, found #{req.body.size}"
+      req
+    end
+    module_function :parse_request
+    def parse_response stream
+      headers, body = split_headers(stream[:data])
+      line0 = headers.shift
+      m = /^(\S+)\s+(\S+)\s+(.*)$/.match(line0) or raise "Unable to parse first line of http response #{line0}"
+      resp = Net::HTTPResponse.send(:response_class, m[2]).new(m[1], m[2], m[3])
+      resp.time = stream[:time]
+      add_headers resp, headers
+      if resp.chunked?
+        resp.body = read_chunked("\r\n" + body)
+      else
+        resp.body = body
+        resp.body.size == resp['Content-Length'].to_i or raise "Wrong content-length for http response, header say #{resp['Content-Length'].chomp}, found #{resp.body.size}"
+      end
+      resp.body = Zlib::GzipReader.new(StringIO.new(resp.body)).read if resp['Content-Encoding'] == 'gzip'
+      resp
+    end
+    module_function :parse_response
+    private
+    def self.add_headers o, headers
+      headers.each do |line|
+        m = /\A([^:]+):\s*/.match(line) or raise "Unable to parse line #{line}"
+        o[m[1]] = m.post_match
+      end
+    end
+    def self.split_headers str
+      index = str.index("\r\n\r\n")
+      return str[0 .. index].split("\r\n"), str[index + 4 .. -1]
+    end
+    def self.read_chunked str
+      return "" if str == "\r\n"
+      m = /\r\n([0-9a-fA-F]+)\r\n/.match(str) or raise "Unable to read chunked body in #{str.split("\r\n")[0]}"
+      len = m[1].hex
+      return "" if len == 0
+      m.post_match[0..len - 1] + read_chunked(m.post_match[len .. -1])
+    end
+  end
+end

data/pcap_tools.gemspec ADDED Viewed

@@ -0,0 +1,15 @@
+require 'rake'
+Gem::Specification.new do |s|
+  s.name        = 'pcap_tools'
+  s.version     = '0.0.2'
+  s.authors     = ['Bertrand Paquet']
+  s.email       = 'bertrand.paquet@gmail.com'
+  s.summary     = 'Tools for extracting data from pcap files'
+  s.homepage    = 'https://github.com/bpaquet/pcap_tools'
+  s.executables << 'pcap_tools_http'
+  s.files       = `git ls-files`.split($/)
+  s.license     = 'BSD'
+  s.add_development_dependency('packetfu', '>= 1.1.9')
+end

metadata ADDED Viewed

@@ -0,0 +1,66 @@
+--- !ruby/object:Gem::Specification
+name: pcap_tools
+version: !ruby/object:Gem::Version
+  version: 0.0.2
+  prerelease:
+platform: ruby
+authors:
+- Bertrand Paquet
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2013-09-25 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: packetfu
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.1.9
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.1.9
+description:
+email: bertrand.paquet@gmail.com
+executables:
+- pcap_tools_http
+extensions: []
+extra_rdoc_files: []
+files:
+- README.markdown
+- bin/pcap_tools_http
+- lib/pcap_tools.rb
+- pcap_tools.gemspec
+homepage: https://github.com/bpaquet/pcap_tools
+licenses:
+- BSD
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.24
+signing_key:
+specification_version: 3
+summary: Tools for extracting data from pcap files
+test_files: []