recog 2.3.23 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/README.md +25 -16
  4. data/Rakefile +2 -9
  5. data/lib/recog/db_manager.rb +1 -1
  6. data/lib/recog/version.rb +1 -1
  7. data/{bin → recog/bin}/recog_match +0 -1
  8. data/{xml → recog/xml}/apache_modules.xml +0 -0
  9. data/{xml → recog/xml}/apache_os.xml +0 -0
  10. data/{xml → recog/xml}/architecture.xml +0 -0
  11. data/{xml → recog/xml}/dhcp_vendor_class.xml +9 -9
  12. data/{xml → recog/xml}/dns_versionbind.xml +0 -0
  13. data/{xml → recog/xml}/favicons.xml +63 -1
  14. data/{xml → recog/xml}/fingerprints.xsd +0 -0
  15. data/{xml → recog/xml}/ftp_banners.xml +0 -0
  16. data/{xml → recog/xml}/h323_callresp.xml +0 -0
  17. data/{xml → recog/xml}/hp_pjl_id.xml +0 -0
  18. data/{xml → recog/xml}/html_title.xml +47 -0
  19. data/{xml → recog/xml}/http_cookies.xml +19 -0
  20. data/{xml → recog/xml}/http_servers.xml +74 -1
  21. data/{xml → recog/xml}/http_wwwauth.xml +13 -0
  22. data/{xml → recog/xml}/imap_banners.xml +0 -0
  23. data/{xml → recog/xml}/ldap_searchresult.xml +0 -0
  24. data/{xml → recog/xml}/mdns_device-info_txt.xml +0 -0
  25. data/{xml → recog/xml}/mdns_workstation_txt.xml +0 -0
  26. data/{xml → recog/xml}/mysql_banners.xml +0 -0
  27. data/{xml → recog/xml}/mysql_error.xml +0 -0
  28. data/{xml → recog/xml}/nntp_banners.xml +0 -0
  29. data/{xml → recog/xml}/ntp_banners.xml +0 -0
  30. data/{xml → recog/xml}/operating_system.xml +0 -0
  31. data/{xml → recog/xml}/pop_banners.xml +0 -0
  32. data/{xml → recog/xml}/rsh_resp.xml +0 -0
  33. data/{xml → recog/xml}/rtsp_servers.xml +0 -0
  34. data/{xml → recog/xml}/sip_banners.xml +0 -0
  35. data/{xml → recog/xml}/sip_user_agents.xml +0 -0
  36. data/{xml → recog/xml}/smb_native_lm.xml +0 -0
  37. data/{xml → recog/xml}/smb_native_os.xml +0 -0
  38. data/{xml → recog/xml}/smtp_banners.xml +0 -0
  39. data/{xml → recog/xml}/smtp_debug.xml +0 -0
  40. data/{xml → recog/xml}/smtp_ehlo.xml +0 -0
  41. data/{xml → recog/xml}/smtp_expn.xml +0 -0
  42. data/{xml → recog/xml}/smtp_help.xml +0 -0
  43. data/{xml → recog/xml}/smtp_mailfrom.xml +0 -0
  44. data/{xml → recog/xml}/smtp_noop.xml +0 -0
  45. data/{xml → recog/xml}/smtp_quit.xml +0 -0
  46. data/{xml → recog/xml}/smtp_rcptto.xml +0 -0
  47. data/{xml → recog/xml}/smtp_rset.xml +0 -0
  48. data/{xml → recog/xml}/smtp_turn.xml +0 -0
  49. data/{xml → recog/xml}/smtp_vrfy.xml +0 -0
  50. data/{xml → recog/xml}/snmp_sysdescr.xml +21 -6
  51. data/{xml → recog/xml}/snmp_sysobjid.xml +11 -0
  52. data/{xml → recog/xml}/ssh_banners.xml +0 -0
  53. data/{xml → recog/xml}/telnet_banners.xml +34 -1
  54. data/{xml → recog/xml}/tls_jarm.xml +8 -0
  55. data/{xml → recog/xml}/x11_banners.xml +0 -0
  56. data/{xml → recog/xml}/x509_issuers.xml +13 -2
  57. data/{xml → recog/xml}/x509_subjects.xml +0 -0
  58. data/recog.gemspec +9 -5
  59. data/spec/spec_helper.rb +4 -0
  60. metadata +56 -145
  61. data/.github/ISSUE_TEMPLATE/bug_report.md +0 -37
  62. data/.github/ISSUE_TEMPLATE/feature_request.md +0 -17
  63. data/.github/ISSUE_TEMPLATE/fingerprint_request.md +0 -27
  64. data/.github/PULL_REQUEST_TEMPLATE +0 -24
  65. data/.github/SECURITY.md +0 -35
  66. data/.github/dependabot.yml +0 -8
  67. data/.github/workflows/ci.yml +0 -26
  68. data/.github/workflows/verify.yml +0 -89
  69. data/.gitignore +0 -23
  70. data/.rspec +0 -3
  71. data/.ruby-gemset +0 -1
  72. data/.ruby-version +0 -1
  73. data/.snyk +0 -10
  74. data/.travis.yml +0 -25
  75. data/.vscode/bin/monitor-recog-fingerprints.sh +0 -54
  76. data/.vscode/extensions.json +0 -5
  77. data/.vscode/settings.json +0 -8
  78. data/.vscode/tasks.json +0 -77
  79. data/CONTRIBUTING.md +0 -278
  80. data/bin/recog_cleanup +0 -16
  81. data/bin/recog_export +0 -81
  82. data/bin/recog_standardize +0 -163
  83. data/bin/recog_verify +0 -98
  84. data/cpe-remap.yaml +0 -374
  85. data/features/data/failing_banners_fingerprints.xml +0 -20
  86. data/features/data/matching_banners_fingerprints.xml +0 -23
  87. data/features/data/multiple_banners_fingerprints.xml +0 -32
  88. data/features/data/no_tests.xml +0 -3
  89. data/features/data/sample_banner.txt +0 -2
  90. data/features/data/schema_failure.xml +0 -4
  91. data/features/data/successful_tests.xml +0 -18
  92. data/features/data/tests_with_failures.xml +0 -26
  93. data/features/data/tests_with_warnings.xml +0 -17
  94. data/features/match.feature +0 -36
  95. data/features/support/aruba.rb +0 -3
  96. data/features/support/env.rb +0 -6
  97. data/features/support/hooks.rb +0 -9
  98. data/features/verify.feature +0 -112
  99. data/identifiers/README.md +0 -70
  100. data/identifiers/fields.txt +0 -105
  101. data/identifiers/hw_device.txt +0 -86
  102. data/identifiers/hw_family.txt +0 -121
  103. data/identifiers/hw_product.txt +0 -463
  104. data/identifiers/os_architecture.txt +0 -10
  105. data/identifiers/os_device.txt +0 -77
  106. data/identifiers/os_family.txt +0 -235
  107. data/identifiers/os_product.txt +0 -357
  108. data/identifiers/service_family.txt +0 -249
  109. data/identifiers/service_product.txt +0 -778
  110. data/identifiers/vendor.txt +0 -859
  111. data/misc/convert_mysql_err +0 -61
  112. data/misc/order.xsl +0 -17
  113. data/requirements.txt +0 -2
  114. data/spec/lib/fingerprint_self_test_spec.rb +0 -175
  115. data/tools/dev/hooks/pre-commit +0 -21
  116. data/update_cpes.py +0 -343
@@ -1,61 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # Takes the MySQL error messages from sql/share/errmsg-utf8.txt, locates the
4
- # provided error message type (for example, ER_HOST_NOT_PRIVILEGED), then
5
- # creates XML snippets for each locale to be used in Recog. Note that this
6
- # cannot be used as-is to generate mysql_errors.xml, or oftentimes even parts
7
- # -- it merely spits out XML snippets that you can start with; many will still
8
- # need to be modified by hand.
9
-
10
- require 'builder'
11
- require 'open-uri'
12
- require 'securerandom'
13
-
14
- def generate_recog(error_name, locale, error_message)
15
- xml = Builder::XmlMarkup.new(target: STDOUT, indent: 2)
16
- xml.fingerprint(pattern: error_message) do
17
- xml.description "Oracle MySQL error #{error_name} (#{locale})"
18
- xml.example(error_message)
19
- xml.param(pos: 0, name: 'service.vendor', value: 'Oracle')
20
- xml.param(pos: 0, name: 'service.family', value: 'MySQL')
21
- xml.param(pos: 0, name: 'service.product', value: 'MySQL')
22
- end
23
- end
24
-
25
- unless ARGV.size == 2
26
- fail "Usage: #{$PROGRAM_NAME} <path/URI for errmsg-utf8.txt> <error name>"
27
- end
28
-
29
- path = ARGV.first
30
- error_name = ARGV.last
31
-
32
- lines = IO.readlines(open(path))
33
-
34
- fail "Nothing read from #{path}" if lines.empty?
35
-
36
- unless (error_start = lines.find_index { |line| line.strip =~ /^#{error_name}(?:\s+\S+)?$/ })
37
- fail "Unable to find #{error_name} in #{path}"
38
- end
39
-
40
- locale_map = {}
41
- lines.slice(error_start + 1, lines.size).each do |line|
42
- if /^\s+(?<locale>\S+)\s+"(?<error_message>.*)",?$/ =~ line
43
- locale_map[locale] = error_message
44
- else
45
- break
46
- end
47
- end
48
-
49
- # Many of the error messages contain format strings. This can be problematic
50
- # in that they need to be removed or otherwise handled as part of the 'pattern'
51
- # attribute and appropriately filled in in any example elements. So simply try
52
- # a rough count of the possible format strings and warn the user so that they
53
- # can deal with it.
54
- format_count = locale_map.values.map { |error_message| error_message.scan(/%/).size }.inject(&:+)
55
- unless format_count == 0
56
- warn("#{format_count} possible format strings found -- you'll need to deal with this")
57
- end
58
-
59
- Hash[locale_map.sort].map do |locale, error_message|
60
- generate_recog(error_name, locale, error_message)
61
- end
data/misc/order.xsl DELETED
@@ -1,17 +0,0 @@
1
- <?xml version="1.0"?>
2
- <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
3
- <xsl:output encoding="UTF-8" indent="yes" method="xml"/>
4
- <xsl:template match="@*|node()">
5
- <xsl:copy>
6
- <xsl:apply-templates select="@*|node()"/>
7
- </xsl:copy>
8
- </xsl:template>
9
- <xsl:template match="fingerprints/fingerprint">
10
- <xsl:copy>
11
- <xsl:copy-of select="@*"/>
12
- <xsl:apply-templates select="description"/>
13
- <xsl:apply-templates select="example"/>
14
- <xsl:apply-templates select="param"/>
15
- </xsl:copy>
16
- </xsl:template>
17
- </xsl:stylesheet>
data/requirements.txt DELETED
@@ -1,2 +0,0 @@
1
- lxml==4.6.5
2
- pyyaml
@@ -1,175 +0,0 @@
1
- require 'recog/db'
2
- require 'regexp_parser'
3
- require 'nokogiri'
4
-
5
- describe Recog::DB do
6
- let(:schema) { Nokogiri::XML::Schema(open(File.expand_path(File.join(%w(xml fingerprints.xsd))))) }
7
- Dir[File.expand_path File.join('xml', '*.xml')].each do |xml_file_name|
8
-
9
- describe "##{File.basename(xml_file_name)}" do
10
-
11
- it "is valid XML" do
12
- doc = Nokogiri::XML(open(xml_file_name))
13
- errors = schema.validate(doc)
14
- expect(errors).to be_empty, "#{xml_file_name} is invalid recog XML -- #{errors.inspect}"
15
- end
16
-
17
- db = Recog::DB.new(xml_file_name)
18
-
19
- it "has a match key" do
20
- expect(db.match_key).not_to be_nil
21
- expect(db.match_key).not_to be_empty
22
- end
23
-
24
- it "has valid 'preference' value" do
25
- # Reserve values below 0.10 and above 0.90 for users
26
- # See xml/fingerprints.xsd
27
- expect(db.preference.class).to be ::Float
28
- expect(db.preference).to be_between(0.10, 0.90)
29
- end
30
-
31
- fp_descriptions = []
32
- db.fingerprints.each_index do |i|
33
- fp = db.fingerprints[i]
34
-
35
- it "doesn't have a duplicate description" do
36
- if fp_descriptions.include?(fp.name)
37
- fail "'#{fp.name}'s description is not unique"
38
- else
39
- fp_descriptions << fp.name
40
- end
41
- end
42
-
43
- context "#{fp.name}" do
44
- param_names = []
45
- it "has consistent os.device and hw.device" do
46
- if fp.params['os.device'] && fp.params['hw.device'] && (fp.params['os.device'] != fp.params['hw.device'])
47
- fail "#{fp.name} has both hw.device and os.device but with differing values"
48
- end
49
- end
50
- fp.params.each do |param_name, pos_value|
51
- pos, value = pos_value
52
- it "has valid looking fingerprint parameter names" do
53
- unless param_name =~ /^(?:cookie|[^\.]+\..*)$/
54
- fail "'#{param_name}' is invalid"
55
- end
56
- end
57
-
58
- it "doesn't have param values for capture params" do
59
- if pos > 0 && !value.to_s.empty?
60
- fail "'#{fp.name}'s #{param_name} is a non-zero pos but specifies a value of '#{value}'"
61
- end
62
- end
63
-
64
- it "has parameter values other than General, Server or Unknown, which are not helpful" do
65
- if pos == 0 && value =~ /^(?i:general|server|unknown)$/
66
- fail "'#{param_name}' has general/server/unknown value '#{value}'"
67
- end
68
- end
69
-
70
- it "doesn't omit values for non-capture params" do
71
- if pos == 0 && value.to_s.empty?
72
- fail "'#{fp.name}'s #{param_name} is not a capture (pos=0) but doesn't specify a value"
73
- end
74
- end
75
-
76
- it "doesn't have duplicate params" do
77
- if param_names.include?(param_name)
78
- fail "'#{fp.name}'s has duplicate #{param_name}"
79
- else
80
- param_names << param_name
81
- end
82
- end
83
-
84
- it "uses interpolation correctly" do
85
- if pos == 0 && /\{(?<interpolated>[^\s{}]+)\}/ =~ value
86
- unless fp.params.key?(interpolated)
87
- fail "'#{fp.name}' uses interpolated value '#{interpolated}' that does not exist"
88
- end
89
- end
90
- end
91
- end
92
- end
93
-
94
- context "#{fp.regex}" do
95
-
96
- it "has a valid looking name" do
97
- expect(fp.name).not_to be_nil
98
- expect(fp.name).not_to be_empty
99
- end
100
-
101
- it "has a regex" do
102
- expect(fp.regex).not_to be_nil
103
- expect(fp.regex.class).to be ::Regexp
104
- end
105
-
106
- it 'uses capturing regular expressions properly' do
107
- # the list of index-based captures that the fingerprint is expecting
108
- expected_capture_positions = fp.params.values.map(&:first).map(&:to_i).select { |position| position > 0 }
109
- if fp.params.empty? && expected_capture_positions.size > 0
110
- fail "Non-asserting fingerprint with regex #{fp.regex} captures #{expected_capture_positions.size} time(s); 0 are needed"
111
- else
112
- # parse the regex and count the number of captures
113
- actual_capture_positions = []
114
- capture_number = 1
115
- Regexp::Scanner.scan(fp.regex).each do |token_parts|
116
- if token_parts.first == :group && ![:close, :passive, :options, :options_switch].include?(token_parts[1])
117
- actual_capture_positions << capture_number
118
- capture_number += 1
119
- end
120
- end
121
- # compare the captures actually performed to those being used and ensure that they contain
122
- # the same elements regardless of order, preventing, over-, under- and other forms of mis-capturing.
123
- actual_capture_positions = actual_capture_positions.sort.uniq
124
- expected_capture_positions = expected_capture_positions.sort.uniq
125
- expect(actual_capture_positions).to eq(expected_capture_positions),
126
- "Regex has #{actual_capture_positions.size} capture groups, but the fingerprint expected #{expected_capture_positions.size} extractions."
127
- end
128
- end
129
-
130
- # Not yet enforced
131
- # it "has test cases" do
132
- # expect(fp.tests.length).not_to equal(0)
133
- # end
134
-
135
- it "Has a reasonable number (<= 20) of test cases" do
136
- expect(fp.tests.length).to be <= 20
137
- end
138
-
139
- fp_examples = []
140
- fp.tests.each do |example|
141
- it "doesn't have a duplicate examples" do
142
- if fp_examples.include?(example.content)
143
- fail "'#{fp.name}' has duplicate example '#{example.content}'"
144
- else
145
- fp_examples << example.content
146
- end
147
- end
148
- it "Example '#{example.content}' matches this regex" do
149
- match = fp.match(example.content)
150
- expect(match).to_not be_nil, 'Regex did not match'
151
- # test any extractions specified in the example
152
- example.attributes.each_pair do |k,v|
153
- next if k == '_encoding'
154
- next if k == '_filename'
155
- expect(match[k]).to eq(v), "Regex didn't extract expected value for fingerprint attribute #{k} -- got #{match[k]} instead of #{v}"
156
- end
157
- end
158
-
159
- it "Example '#{example.content}' matches this regex first" do
160
- db.fingerprints.slice(0, i).each_index do |previous_i|
161
- prev_fp = db.fingerprints[previous_i]
162
- prev_fp.tests.each do |prev_example|
163
- match = prev_fp.match(example.content)
164
- expect(match).to be_nil, "Matched regex ##{previous_i} (#{db.fingerprints[previous_i].regex}) rather than ##{i} (#{db.fingerprints[i].regex})"
165
- end
166
- end
167
- end
168
- end
169
-
170
- end
171
- end
172
-
173
- end
174
- end
175
- end
@@ -1,21 +0,0 @@
1
- #!/bin/sh
2
- #
3
- # Hook script to verify changes about to be committed.
4
- # The hook should exit with non-zero status after issuing an appropriate
5
- # message if it wants to stop the commit.
6
-
7
- # Verify that each fingerprint asserts known identifiers.
8
- git diff --cached --name-only --diff-filter=ACM -z xml/*.xml | xargs -0 ./bin/recog_standardize --write
9
-
10
- # get status
11
- status=$?
12
-
13
- if [ $status -ne 0 ]; then
14
- echo "Please review any new additions to the text files under 'identifiers/'."
15
- echo "If any of these names are close to an existing name, update the offending"
16
- echo "fingerprint to use the existing name instead. Once the fingerprints are fixed,"
17
- echo "remove the 'extra' names from the identifiers files, and run the tool again."
18
- exit 1
19
- fi
20
-
21
- exit 0
data/update_cpes.py DELETED
@@ -1,343 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- import logging
4
- import re
5
- import sys
6
-
7
- import yaml
8
- from lxml import etree
9
-
10
- BASE_LOG_FORMAT = '%(levelname)s: %(message)s'
11
-
12
- # CPE w/o 2.3 component: cpe:/a:nginx:nginx:0.1.0"
13
- REGEX_CPE = re.compile('^cpe:/([aho]):([^:]+):([^:]+)')
14
- # CPE w/ 2.3 component: cpe:2.3:a:f5:nginx:0.1.0:*:*:*:*:*:*:*
15
- REGEX_CPE_23 = re.compile('^cpe:2.3:([aho]):([^:]+):([^:]+)')
16
-
17
- XML_PATH_DEPRECATED_BY = "./{http://scap.nist.gov/schema/cpe-extension/2.3}cpe23-item/{http://scap.nist.gov/schema/cpe-extension/2.3}deprecation/{http://scap.nist.gov/schema/cpe-extension/2.3}deprecated-by"
18
-
19
-
20
- def parse_r7_remapping(file):
21
- with open(file) as remap_file:
22
- return yaml.safe_load(remap_file)["mappings"]
23
-
24
-
25
- def update_vp_map(target_map, cpe_type, vendor, product):
26
- """Add an entry to the dict tracking valid combinations
27
- """
28
-
29
- if cpe_type not in target_map:
30
- target_map[cpe_type] = {}
31
-
32
- if vendor not in target_map[cpe_type]:
33
- target_map[cpe_type][vendor] = set()
34
-
35
- product = product.replace('%2f', '/')
36
- target_map[cpe_type][vendor].add(product)
37
-
38
-
39
- def update_deprecated_map(target_map, dep_string, entry):
40
- """Add an entry to the dict tracking deprecations
41
-
42
- target_map example:
43
-
44
- {
45
- "a:100plus:101eip":
46
- {
47
- "deprecated_date": "2021-06-10T15:28:05.490Z",
48
- "deprecated_by": "a:hundredplus:101eip"
49
- }
50
- }
51
-
52
- Args:
53
- target_map (dict): dict containing deprecations
54
- dep_string (str): key to add in the format of 'type:vendor:product'
55
- entry (lxml.etree._Element): XML element to pull additional data from
56
-
57
- Returns:
58
- None, target_map modified in place
59
- """
60
-
61
- deprecated_date = entry.get("deprecation_date", "")
62
-
63
- # Find the CPE that deprecated this entry
64
- raw_dep_by = entry.find(XML_PATH_DEPRECATED_BY).get('name')
65
-
66
- # Extract the type, vendor, product
67
- dep_by_match = REGEX_CPE_23.match(raw_dep_by)
68
- if not dep_by_match:
69
- logging.error("CPE %s is deprecated but we can't build the deprecation mapping entry for some reason.", dep_string)
70
- return
71
-
72
- dep_type, dep_vendor, dep_product = dep_by_match.group(1, 2, 3)
73
- deprecated_by = "{}:{}:{}".format(dep_type, dep_vendor, dep_product)
74
-
75
- if dep_string not in target_map:
76
- target_map[dep_string] = {}
77
-
78
- if not target_map[dep_string].get('deprecated_date'):
79
- target_map[dep_string]['deprecated_date'] = deprecated_date
80
-
81
- if not target_map[dep_string].get('deprecated_by'):
82
- target_map[dep_string]['deprecated_by'] = deprecated_by
83
-
84
-
85
- def parse_cpe_vp_map(file):
86
- deprecated_map = {}
87
- vp_map = {} # cpe_type -> vendor -> products
88
-
89
- parser = etree.XMLParser(remove_comments=False)
90
- doc = etree.parse(file, parser)
91
- namespaces = {
92
- 'ns': 'http://cpe.mitre.org/dictionary/2.0',
93
- 'meta': 'http://scap.nist.gov/schema/cpe-dictionary-metadata/0.2'
94
- }
95
- for entry in doc.xpath("//ns:cpe-list/ns:cpe-item", namespaces=namespaces):
96
- cpe_name = entry.get("name")
97
- if not cpe_name:
98
- continue
99
-
100
- cpe_match = REGEX_CPE.match(cpe_name)
101
- if cpe_match:
102
- cpe_type, vendor, product = cpe_match.group(1, 2, 3)
103
- # If the entry is deprecated then don't add it to our list of valid
104
- # CPEs, but instead add it to a list for reference later.
105
- if entry.get("deprecated"):
106
- # This will be the key under which we store the deprecation data
107
- deprecated_string = "{}:{}:{}".format(cpe_type, vendor, product)
108
-
109
- update_deprecated_map(deprecated_map, deprecated_string, entry)
110
- continue
111
-
112
- update_vp_map(vp_map, cpe_type, vendor, product)
113
-
114
- else:
115
- logging.error("Unexpected CPE %s", cpe_name)
116
-
117
- return vp_map, deprecated_map
118
-
119
-
120
- def lookup_cpe(vendor, product, cpe_type, cpe_table, remap, deprecated_map):
121
- """Identify the correct vendor and product values for a CPE
122
-
123
- This function attempts to determine the correct CPE using vendor and product
124
- values supplied by the caller as well as a remapping dictionary for mapping
125
- these values to more correct values used by NIST.
126
-
127
- For example, the remapping might tell us that a value of 'alpine' for the
128
- vendor string should be 'alpinelinux' instead, or for product 'solaris'
129
- should be 'sunos'.
130
-
131
- This function should only emit values seen in the official NIST CPE list
132
- which is provided to it in cpe_table.
133
-
134
- Lookup priority:
135
- 1. Original vendor / product
136
- 2. Original vendor / remap product
137
- 3. Remap vendor / original product
138
- 4. Remap vendor / remap product
139
-
140
- Args:
141
- vendor (str): vendor name
142
- product (str): product name
143
- cpe_type (str): CPE type - o, a, h, etc.
144
- cpe_table (dict): dict containing the official NIST CPE data
145
- remap (dict): dict containing the remapping values
146
- deprecated_cves (set): set of all deprecated CPEs in the format
147
- 'type:vendor:product'
148
- Returns:
149
- success, vendor, product
150
- """
151
-
152
- if (
153
- vendor in cpe_table[cpe_type]
154
- and product in cpe_table[cpe_type][vendor]
155
- ):
156
- # Hot path, success with original values
157
- return True, vendor, product
158
-
159
- # Everything else depends on a remap of some sort.
160
- # get the remappings for this one vendor string.
161
- vendor_remap = None
162
-
163
- remap_type = remap.get(cpe_type, None)
164
- if remap_type:
165
- vendor_remap = remap_type.get(vendor, None)
166
-
167
- if vendor_remap:
168
- # If we have product remappings, work that angle next
169
- possible_product = None
170
- if (
171
- vendor_remap.get('products', None)
172
- and product in vendor_remap['products']
173
- ):
174
- possible_product = vendor_remap['products'][product]
175
-
176
- if (vendor in cpe_table[cpe_type]
177
- and possible_product
178
- and possible_product in cpe_table[cpe_type][vendor]):
179
- # Found original vendor, remap product
180
- return True, vendor, possible_product
181
-
182
- # Start working the process to find a match with a remapped vendor name
183
- if vendor_remap.get('vendor', None):
184
- new_vendor = vendor_remap['vendor']
185
-
186
- if new_vendor in cpe_table[cpe_type]:
187
-
188
- if product in cpe_table[cpe_type][new_vendor]:
189
- # Found remap vendor, original product
190
- return True, new_vendor, product
191
-
192
- if possible_product and possible_product in cpe_table[cpe_type][new_vendor]:
193
- # Found remap vendor, remap product
194
- return True, new_vendor, possible_product
195
-
196
- deprecated_string = "{}:{}:{}".format(cpe_type, vendor, product)
197
- if deprecated_map.get(deprecated_string, False):
198
- dep_by = deprecated_map[deprecated_string].get("deprecated_by", "")
199
- dep_date = deprecated_map[deprecated_string].get("deprecated_date", "")
200
- logging.error("Product %s from vendor %s invalid for CPE %s and no mapping. This combination is DEPRECATED by %s at %s",
201
- product, vendor, cpe_type, dep_by, dep_date)
202
- else:
203
- logging.error("Product %s from vendor %s invalid for CPE %s and no mapping.",
204
- product, vendor, cpe_type)
205
-
206
- return False, None, None
207
-
208
-
209
- def update_cpes(xml_file, cpe_vp_map, r7_vp_map, deprecated_cves):
210
- parser = etree.XMLParser(remove_comments=False, remove_blank_text=True)
211
- doc = etree.parse(xml_file, parser)
212
-
213
- for fingerprint in doc.xpath('//fingerprint'):
214
-
215
- # collect all the params, grouping by os and service params that could be used to compute a CPE
216
- params = {}
217
- for param in fingerprint.xpath('./param'):
218
- name = param.attrib['name']
219
- # remove any existing CPE params
220
- if re.match(r'^.*\.cpe\d{0,2}$', name):
221
- param.getparent().remove(param)
222
- continue
223
-
224
- match = re.search(r'^(?P<fp_type>hw|os|service(?:\.component)?)\.', name)
225
- if match:
226
- fp_type = match.group('fp_type')
227
- if not fp_type in params:
228
- params[fp_type] = {}
229
- if name in params[fp_type]:
230
- raise ValueError('Duplicated fingerprint named {} in fingerprint {} in file {}'.format(name, fingerprint.attrib['pattern'], xml_file))
231
- params[fp_type][name] = param
232
-
233
- # for each of the applicable os/service param groups, build a CPE
234
- for fp_type in params:
235
- if fp_type == 'os':
236
- cpe_type = 'o'
237
- elif fp_type.startswith('service'):
238
- cpe_type = 'a'
239
- elif fp_type == 'hw':
240
- cpe_type = 'h'
241
- else:
242
- raise ValueError('Unhandled param type {}'.format(fp_type))
243
-
244
- # extract the vendor/product/version values from each os/service group,
245
- # using the static value ('Apache', for example) when pos is 0, and
246
- # otherwise use a value that contains interpolation markers such that
247
- # products/projects that use recog content can insert the value
248
- # extracted from the banner/other data via regex capturing groups
249
- fp_data = {
250
- 'vendor': None,
251
- 'product': None,
252
- 'version': '-',
253
- }
254
- for fp_datum in fp_data:
255
- fp_datum_param_name = "{}.{}".format(fp_type, fp_datum)
256
- if fp_datum_param_name in params[fp_type]:
257
- fp_datum_e = params[fp_type][fp_datum_param_name]
258
- if fp_datum_e.attrib['pos'] == '0':
259
- fp_data[fp_datum] = fp_datum_e.attrib['value']
260
- else:
261
- fp_data[fp_datum] = "{{{}}}".format(fp_datum_e.attrib['name'])
262
-
263
- vendor = fp_data['vendor']
264
- product = fp_data['product']
265
- version = fp_data['version']
266
-
267
- # build a reasonable looking CPE value from the vendor/product/version,
268
- # lowercasing, replacing whitespace with _, and more
269
- if vendor and product:
270
- if not cpe_type in cpe_vp_map:
271
- logging.error("Didn't find CPE type '%s' for '%s' '%s'", cpe_type, vendor, product)
272
- continue
273
-
274
- vendor = vendor.lower().replace(' ', '_').replace(',', '')
275
- product = product.lower().replace(' ', '_').replace(',', '').replace('!', '%21')
276
- if 'unknown' in [vendor, product]:
277
- continue
278
-
279
- if (vendor.startswith('{') and vendor.endswith('}')) or (product.startswith('{') and product.endswith('}')):
280
- continue
281
-
282
- success, vendor, product = lookup_cpe(vendor, product, cpe_type, cpe_vp_map, r7_vp_map, deprecated_cves)
283
- if not success:
284
- continue
285
-
286
- # Sanity check the value to ensure that no invalid values will
287
- # slip in due to logic or mapping bugs.
288
- # If it's not in the official NIST list then log it and kick it out
289
- if product not in cpe_vp_map[cpe_type][vendor]:
290
- logging.error("Invalid CPE type %s created for vendor %s and product %s. This may be due to an invalid mapping.", cpe_type, vendor, product)
291
- continue
292
-
293
- # building the CPE string
294
- # Last minute escaping of '/' and `!`
295
- product = product.replace('/', '\/').replace('%21', '\!')
296
- cpe_value = 'cpe:/{}:{}:{}'.format(cpe_type, vendor, product)
297
-
298
- if version:
299
- cpe_value += ":{}".format(version)
300
-
301
- cpe_param = etree.Element('param')
302
- cpe_param.attrib['pos'] = '0'
303
- cpe_param.attrib['name'] = '{}.cpe23'.format(fp_type)
304
- cpe_param.attrib['value'] = cpe_value
305
-
306
- for param_name in params[fp_type]:
307
- param = params[fp_type][param_name]
308
- parent = param.getparent()
309
- index = parent.index(param) + 1
310
- parent.insert(index, cpe_param)
311
-
312
- root = doc.getroot()
313
-
314
- with open(xml_file, 'wb') as xml_out:
315
- xml_out.write(etree.tostring(root, pretty_print=True, xml_declaration=True, encoding=doc.docinfo.encoding))
316
-
317
-
318
- def main():
319
- if len(sys.argv) != 4:
320
- logging.critical("Expecting exactly 3 arguments; recog XML file, CPE 2.3 XML dictionary, JSON remapping, got %s", (len(sys.argv) - 1))
321
- sys.exit(1)
322
-
323
- cpe_vp_map, deprecated_map = parse_cpe_vp_map(sys.argv[2])
324
- if not cpe_vp_map:
325
- logging.critical("No CPE vendor => product mappings read from CPE 2.3 XML dictionary %s", sys.argv[2])
326
- sys.exit(1)
327
-
328
- r7_vp_map = parse_r7_remapping(sys.argv[3])
329
- if not r7_vp_map:
330
- logging.warning("No Rapid7 vendor/product => CPE mapping read from %s", sys.argv[3])
331
-
332
- # update format string for the logging handler to include the recog XML filename
333
- logging.basicConfig(force=True, format=f"{sys.argv[1]}: {BASE_LOG_FORMAT}")
334
-
335
- update_cpes(sys.argv[1], cpe_vp_map, r7_vp_map, deprecated_map)
336
-
337
-
338
- if __name__ == '__main__':
339
- logging.basicConfig(format=BASE_LOG_FORMAT)
340
- try:
341
- sys.exit(main())
342
- except KeyboardInterrupt:
343
- pass