opendns-dnsdb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.rspec +4 -0
  4. data/Gemfile +8 -0
  5. data/LICENSE +20 -0
  6. data/README.md +64 -0
  7. data/Rakefile +6 -0
  8. data/THANKS +1 -0
  9. data/docs/Makefile +177 -0
  10. data/docs/_themes/LICENSE +45 -0
  11. data/docs/_themes/README.rst +25 -0
  12. data/docs/_themes/flask_theme_support.py +86 -0
  13. data/docs/_themes/kr/layout.html +32 -0
  14. data/docs/_themes/kr/relations.html +19 -0
  15. data/docs/_themes/kr/static/flasky.css_t +469 -0
  16. data/docs/_themes/kr/static/small_flask.css +70 -0
  17. data/docs/_themes/kr/theme.conf +7 -0
  18. data/docs/_themes/kr_small/layout.html +22 -0
  19. data/docs/_themes/kr_small/static/flasky.css_t +287 -0
  20. data/docs/_themes/kr_small/theme.conf +10 -0
  21. data/docs/conf.py +261 -0
  22. data/docs/index.rst +101 -0
  23. data/docs/make.bat +242 -0
  24. data/docs/operations/by_ip.rst +229 -0
  25. data/docs/operations/by_name.rst +256 -0
  26. data/docs/operations/label.rst +217 -0
  27. data/docs/operations/related.rst +127 -0
  28. data/docs/operations/traffic.rst +126 -0
  29. data/lib/opendns-dnsdb.rb +5 -0
  30. data/lib/opendns-dnsdb/dnsdb.rb +58 -0
  31. data/lib/opendns-dnsdb/dnsdb/by_ip.rb +69 -0
  32. data/lib/opendns-dnsdb/dnsdb/by_name.rb +93 -0
  33. data/lib/opendns-dnsdb/dnsdb/label.rb +105 -0
  34. data/lib/opendns-dnsdb/dnsdb/related.rb +92 -0
  35. data/lib/opendns-dnsdb/dnsdb/response.rb +41 -0
  36. data/lib/opendns-dnsdb/dnsdb/rrutils.rb +11 -0
  37. data/lib/opendns-dnsdb/dnsdb/siphash.rb +94 -0
  38. data/lib/opendns-dnsdb/dnsdb/traffic.rb +80 -0
  39. data/lib/opendns-dnsdb/version.rb +5 -0
  40. data/opendns-dnsdb.gemspec +20 -0
  41. data/spec/by_ip_spec.rb +54 -0
  42. data/spec/by_name_spec.rb +88 -0
  43. data/spec/label_spec.rb +88 -0
  44. data/spec/related_spec.rb +92 -0
  45. data/spec/spec_helper.rb +5 -0
  46. data/spec/traffic_spec.rb +36 -0
  47. metadata +123 -0
@@ -0,0 +1,127 @@
1
+ Related names
2
+ =============
3
+
4
+ Related names are names that have been frequently observed shortly
5
+ before or after a reference name.
6
+
7
+ This has proven to be very useful to discover command and control
8
+ domains used by malware when only a few of them were previously known.
9
+ This is also useful to investigate an infection chain.
10
+
11
+ Internally, multiple complementary matching algorithms are used, but
12
+ this client library takes care of aggregating and normalizing the
13
+ results.
14
+
15
+ Getting the list of related names
16
+ ---------------------------------
17
+
18
+ Related names for a single name can be looked up, as well as for
19
+ a vector of names:
20
+
21
+ .. code-block:: ruby
22
+
23
+ db.related_names('www.github.com')
24
+ db.relates_names(['www.github.com', 'www.mozilla.org')
25
+
26
+ These functions return a ``Response::Distinct`` object, if a single
27
+ name was used as a starting point, or a ``Response::HashByName`` if a
28
+ vector was provided.
29
+
30
+ The maximum number of results can be specified:
31
+
32
+ .. code-block:: ruby
33
+
34
+ db.related_names('www.skyrock.com', max_names: 50)
35
+
36
+ An optional block can also be given.
37
+
38
+ This block is a filter: it will be given each (name, score) as an
39
+ argument, and only names for which the return value of this block is
40
+ not ``false``/``nil`` will be kept.
41
+
42
+ For example, this only retrieves names matching a given regular
43
+ expression:
44
+
45
+ .. code-block:: ruby
46
+
47
+ db.related_names('www.skyrock.com') { |name| name.match /^miss-/ }
48
+
49
+ And this only retrieves names whose score is more than 0.1:
50
+
51
+ .. code-block:: ruby
52
+
53
+ db.related_names('www.skyrock.com') { |name, score| score > 0.1 }
54
+
55
+ Getting the list of related names, with scores
56
+ ----------------------------------------------
57
+
58
+ In addition to a list of names, a "score" can be returned for each
59
+ name found. This score is in the [0.0, 1.0] range, 1.0 meaning that a
60
+ name is likely to be closely related to the reference name, 0.0
61
+ meaning that these have not been observed together very frequently.
62
+
63
+ Related names for a single name can be looked up, as well as for
64
+ a vector of names:
65
+
66
+ .. code-block:: ruby
67
+
68
+ db.related_names_with_score('www.github.com')
69
+ db.relates_names_with_score(['www.github.com', 'www.mozilla.org')
70
+
71
+ These functions return a ``Response::HashByName``.
72
+
73
+ An optional filter can be provided:
74
+
75
+ .. code-block:: ruby
76
+
77
+ db.related_names_with_score('www.skyrock.com') do |name|
78
+ name.match /^miss-/
79
+ end
80
+
81
+ Getting a set of distinct related names for a list of names
82
+ -----------------------------------------------------------
83
+
84
+ Given a list of names, this returns a set of names related to these.
85
+
86
+ .. code-block:: ruby
87
+
88
+ db.distinct_related_names(['www.github.com', 'www.github.io'])
89
+
90
+ This returns a ``Result::Distinct`` object.
91
+
92
+ The maximum number of results can be specified:
93
+
94
+ .. code-block:: ruby
95
+
96
+ db.distinct_related_names(['www.github.com', 'www.github.io'],
97
+ max_results: 250)
98
+
99
+ By default, only direct neighbors of the given names are returned, but
100
+ deep traversal is also fully supported.
101
+
102
+ This will return a list of names related to those provided in the
103
+ vector, but also names related to these newly found names, names
104
+ related to these related names:
105
+
106
+ .. code-block:: ruby
107
+
108
+ db.distinct_related_names(['www.github.com', 'www.github.io'],
109
+ max_results: 250,
110
+ max_depth: 3)
111
+
112
+ Since a deep traversal can return a lot of results, some not being of
113
+ interest, a filter can be provided. This filter will be automatically applied
114
+ after each iteration:
115
+
116
+ .. code-block:: ruby
117
+
118
+ db.distinct_related_names(['www.github.com', 'www.github.io'],
119
+ max_results: 250,
120
+ max_depth: 3) do |name, score|
121
+ name.match(/^com-/) && score > 0.1
122
+ end
123
+
124
+ A single name can also be given instead of a vector. This is
125
+ equivalent to ``related_names`` when a deep traversal is not performed.
126
+
127
+ This function returns a ``Response::Distinct`` object.
@@ -0,0 +1,126 @@
1
+ DNS traffic
2
+ ===========
3
+
4
+ The number of DNS queries observed for a name over a time period can
5
+ be retrieved.
6
+
7
+ This is especially useful to see if a domain is popular, and to spot
8
+ anomalies in its traffic.
9
+
10
+ Getting the number of queries observed for a name
11
+ -------------------------------------------------
12
+
13
+ The ``daily_traffic_by_name`` method returns a vector with the number
14
+ of queries observed for each day, within a time period.
15
+
16
+ By default, the time period starts 7 days before the current day, and
17
+ ends at the current day, a day starting at 00:00 UTC.
18
+
19
+ .. code-block:: ruby
20
+
21
+ db.daily_traffic_by_name('www.github.com')
22
+
23
+ The output is a ``Result::TimeSeries`` object:
24
+
25
+ ::
26
+
27
+ [
28
+ [0] 6152525,
29
+ [1] 4756714,
30
+ [2] 4670300,
31
+ [3] 5954983,
32
+ [4] 6140915,
33
+ [5] 6040669,
34
+ [6] 5529869
35
+ ]
36
+
37
+ This method accepts several options:
38
+
39
+ - ``start``: a ``Date`` object representing the lower bound of the time interval
40
+ - ``end``: a ``Date`` object representing the higher bound of the time interval
41
+ - ``days_back``: if ``start`` is not provided, this represents the number of days to go back in time.
42
+
43
+ Here are some examples featuring these options:
44
+
45
+ .. code-block:: ruby
46
+
47
+ db.daily_traffic_by_name('www.github.com', end: Date.today - 2, days_back: 10)
48
+
49
+ db.daily_traffic_by_name('www.github.com', start: Date.today - 10)
50
+
51
+ The traffic for multiple domains can be looked up, provided that a
52
+ vector is given instead of a single name. In that case, the output is
53
+ a ``Result::HashByName`` object.
54
+
55
+ .. code-block:: ruby
56
+
57
+ db.daily_traffic_by_name(['www.github.com', 'www.github.io'])
58
+
59
+ For example, the following snippet compares the median number of
60
+ queries for a set of domains:
61
+
62
+ .. code-block:: ruby
63
+
64
+ ts = db.daily_traffic_by_name(['www.github.com', 'www.github.io'])
65
+ ts.merge(ts) { |name, ts| ts.median.to_i }
66
+
67
+ ::
68
+
69
+ {
70
+ "www.github.com" => 5954983,
71
+ "www.github.io" => 528002
72
+ }
73
+
74
+ Anomaly detection in traffic
75
+ ----------------------------
76
+
77
+ A benign web site tends to have a comparable traffic every day. Sudden
78
+ spikes or drop of traffic usually indicate a major event (incident,
79
+ unusual volume of sent email), or some suspicious activity.
80
+
81
+ Domain names used as C&C typically receive very little traffic, and
82
+ suddenly get a spike of traffic for a short period of time. The same
83
+ can be observed with compromised hosts acting as intermediaries.
84
+
85
+ After having retrieved the traffic for a name, computing the relative
86
+ standard deviation is a simple and efficient way to detect anomalies.
87
+
88
+ To do so, the library includes the ``descriptive_statistics`` module
89
+ and implements a ``relative_standard_deviation`` method. This method
90
+ can work on the time series of a single domain, as well as on a set
91
+ of multiple time series.
92
+
93
+ .. code-block:: ruby
94
+
95
+ ts = d.daily_traffic_by_name(['skyrock.com', 'github.com', 'ooctmxmgwigqt.info'])
96
+ ap d.relative_standard_deviation(ts)
97
+
98
+ This outputs either a ``Response::TimeSeries`` or a ``Response::HashByName`` object:
99
+
100
+ ::
101
+
102
+ {
103
+ "skyrock.com" => 2.4300100908269657,
104
+ "github.com" => 10.628632305278618,
105
+ "ooctmxmgwigqt.info" => 244.18566965045403
106
+ }
107
+
108
+ In this example, we can clearly spot a domain name whose traffic
109
+ doesn't follow what we usually observe for a benign domain.
110
+
111
+ High-pass filter
112
+ ----------------
113
+
114
+ Domains receiving little traffic are frequently receiving more noise
115
+ (bots, internal traffic) than queries sent by actual users.
116
+
117
+ A simple high pass filter sets to 0 all entries of a time series below
118
+ a cutoff value. This is provided by the ``high_pass_filter`` method:
119
+
120
+ .. code-block:: ruby
121
+
122
+ ts = d.high_pass_filter(ts, cutoff: 5.0)
123
+
124
+ This method works on the time series of a single domain, as well as on
125
+ a set of multiple time series. The result is either a
126
+ `Response::TimeSeries` or a `Response::HashByName` object.
@@ -0,0 +1,5 @@
1
+ require_relative 'opendns-dnsdb/version'
2
+ require_relative 'opendns-dnsdb/dnsdb'
3
+
4
+ module OpenDNS
5
+ end
@@ -0,0 +1,58 @@
1
+
2
+ require 'date'
3
+ require 'ethon'
4
+ require 'hashie'
5
+ require 'multi_json'
6
+
7
+ require_relative 'dnsdb/response'
8
+ require_relative 'dnsdb/by_ip'
9
+ require_relative 'dnsdb/by_name'
10
+ require_relative 'dnsdb/label'
11
+ require_relative 'dnsdb/related'
12
+ require_relative 'dnsdb/traffic'
13
+
14
+ module OpenDNS
15
+ class DNSDB
16
+ include OpenDNS::DNSDB::Response
17
+ include OpenDNS::DNSDB::ByIP
18
+ include OpenDNS::DNSDB::ByName
19
+ include OpenDNS::DNSDB::Label
20
+ include OpenDNS::DNSDB::Related
21
+ include OpenDNS::DNSDB::Traffic
22
+
23
+ DEFAULT_TIMEOUT = 15
24
+ DEFAULT_MAXCONNECTS = 10
25
+ SGRAPH_API_BASE_URL = 'https://sgraph.umbrella.com'
26
+
27
+ attr_reader :timeout
28
+ attr_reader :sslcert
29
+ attr_reader :sslcerttype
30
+ attr_reader :sslcertpasswd
31
+ attr_reader :maxconnects
32
+
33
+ def initialize(params = { })
34
+ raise UsageError, 'Missing certificate file' unless params[:sslcert]
35
+ @sslcert = params[:sslcert]
36
+ @timeout = DEFAULT_TIMEOUT
37
+ @timeout = params[:timeout].to_f if params[:timeout]
38
+ @maxconnects = DEFAULT_MAXCONNECTS
39
+ @maxconnects = params[:maxconnects].to_i if params[:maxconnects]
40
+ @sslcerttype = params[:sslcerttype] || 'p12'
41
+ @sslcertpasswd = params[:sslcertpasswd] || ''
42
+ @options = {
43
+ followlocation: true,
44
+ timeout: @timeout,
45
+ sslcert: @sslcert,
46
+ sslcerttype: @sslcerttype,
47
+ sslcertpasswd: @sslcertpasswd
48
+ }
49
+ end
50
+
51
+ def query_handler(endpoint, method = :get, options = { })
52
+ url = SGRAPH_API_BASE_URL + endpoint
53
+ query = Ethon::Easy.new(@options)
54
+ query.http_request(url, method, options)
55
+ query
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,69 @@
1
+
2
+ require_relative 'rrutils'
3
+
4
+ module OpenDNS
5
+ class DNSDB
6
+ module ByIP
7
+ include OpenDNS::DNSDB::RRUtils
8
+
9
+ def rr_only_for_ips(responses)
10
+ responses_is_hash = responses.kind_of?(Hash)
11
+ responses = { a: responses } unless responses_is_hash
12
+ responses.each_pair do |key, history|
13
+ responses[key] = Response::Distinct.new(history.collect do |rr|
14
+ rr.rr
15
+ end.flatten.uniq)
16
+ end
17
+ responses = responses.values.first unless responses_is_hash
18
+ responses
19
+ end
20
+
21
+ def history_by_ip(ips, type)
22
+ ips_is_array = ips.kind_of?(Enumerable)
23
+ ips = [ ips ] unless ips_is_array
24
+ multi = Ethon::Multi.new
25
+ queries = { }
26
+ ips.each do |ip|
27
+ next if queries[ip]
28
+ url = "/dnsdb/ip/#{type}/#{ip}.json"
29
+ query = query_handler(url)
30
+ multi.add(query)
31
+ queries[ip] = query
32
+ end
33
+ multi.perform
34
+ responses = { }
35
+ queries.each_pair do |ip, query|
36
+ obj = MultiJson.load(query.response_body)
37
+ responses[ip] = Response::Raw.new(obj).rrs
38
+ end
39
+ responses = Response::HashByIP[responses]
40
+ responses = responses.values.first unless ips_is_array
41
+ responses
42
+ end
43
+
44
+ def names_history_by_nameserver_ip(ips)
45
+ history_by_ip(ips, 'ns')
46
+ end
47
+
48
+ def names_by_nameserver_ip(ips)
49
+ rr_only_for_ips(names_history_by_nameserver_ip(ips))
50
+ end
51
+
52
+ def distinct_names_by_nameserver_ip(ips)
53
+ distinct_rrs(names_by_nameserver_ip(ips))
54
+ end
55
+
56
+ def names_history_by_ip(ips)
57
+ history_by_ip(ips, 'a')
58
+ end
59
+
60
+ def names_by_ip(ips)
61
+ rr_only_for_ips(names_history_by_ip(ips))
62
+ end
63
+
64
+ def distinct_names_by_ip(ips)
65
+ distinct_rrs(names_by_ip(ips))
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,93 @@
1
+
2
+ require_relative 'rrutils'
3
+
4
+ module OpenDNS
5
+ class DNSDB
6
+ module ByName
7
+ include OpenDNS::DNSDB::RRUtils
8
+
9
+ def rr_only_for_names(responses)
10
+ responses_is_hash = responses.kind_of?(Hash)
11
+ responses = { a: responses } unless responses_is_hash
12
+ responses.each_pair do |key, history|
13
+ responses[key] = Response::Distinct.new(history.collect do |hrecord|
14
+ hrecord.rrs.collect { |rr| rr.rr }
15
+ end.flatten.uniq)
16
+ end
17
+ responses = responses.values.first unless responses_is_hash
18
+ responses
19
+ end
20
+
21
+ def history_by_name(names, type)
22
+ names_is_array = names.kind_of?(Enumerable)
23
+ names = [ names ] unless names_is_array
24
+ multi = Ethon::Multi.new
25
+ queries = { }
26
+ names.each do |name|
27
+ next if queries[name]
28
+ url = "/dnsdb/name/#{type}/#{name}.json"
29
+ query = query_handler(url)
30
+ multi.add(query)
31
+ queries[name] = query
32
+ end
33
+ multi.perform
34
+ responses = { }
35
+ queries.each_pair do |name, query|
36
+ obj = MultiJson.load(query.response_body)
37
+ responses[name] = Response::Raw.new(obj).rrs_tf
38
+ end
39
+ responses = Response::HashByName[responses]
40
+ responses = responses.values.first unless names_is_array
41
+ responses
42
+ end
43
+
44
+ def nameservers_ips_history_by_name(names)
45
+ history_by_name(names, 'ns')
46
+ end
47
+
48
+ def nameservers_ips_by_name(names)
49
+ rr_only_for_names(nameservers_ips_history_by_name(names))
50
+ end
51
+
52
+ def distinct_nameservers_ips_by_name(names)
53
+ Response::Distinct.new(distinct_rrs(nameservers_ips_by_name(names)))
54
+ end
55
+
56
+ def ips_history_by_name(names)
57
+ history_by_name(names, 'a')
58
+ end
59
+
60
+ def ips_by_name(names)
61
+ rr_only_for_names(ips_history_by_name(names))
62
+ end
63
+
64
+ def distinct_ips_by_name(names)
65
+ distinct_rrs(ips_by_name(names))
66
+ end
67
+
68
+ def mxs_history_by_name(names)
69
+ history_by_name(names, 'mx')
70
+ end
71
+
72
+ def mxs_by_name(names)
73
+ rr_only_for_names(mxs_history_by_name(names))
74
+ end
75
+
76
+ def distinct_mxs_by_name(names)
77
+ distinct_rrs(mxs_by_name(names))
78
+ end
79
+
80
+ def cnames_history_by_name(names)
81
+ history_by_name(names, 'cname')
82
+ end
83
+
84
+ def cnames_by_name(names)
85
+ rr_only_for_names(cnames_history_by_name(names))
86
+ end
87
+
88
+ def distinct_cnames_by_name(names)
89
+ distinct_rrs(cnames_by_name(names))
90
+ end
91
+ end
92
+ end
93
+ end