fingerprinter 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1d85553bfd3f1487b0185188933d1e409f12cc8e8a96762e5f2e8e5a2ab7f7d1
4
+ data.tar.gz: bd5fc3fd49ceac7667bcaca1eaf4cb3e0fb7be4dc68d62bc58bbf15f6ff8751d
5
+ SHA512:
6
+ metadata.gz: e598f2f6025e1db35ca2089aace62176b048cc6e432868086199d5af999138257dcd6a3f8134c6b9c303c189e565d15ffb8e0e1e6736eae8b3be9f45c16f360b
7
+ data.tar.gz: c9829ca7d4288d7f096b4c3dd7dea07369bebbee12e9b308c743bd749176f03c9898a3de1b937bf30f1ea33b5ef675d317ccf1546aeb3feba22a57f0909fdf91
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This class builds and maintains the list of options provided for the scan to make it available
4
+ # when needed
5
+ class ScanOptions
6
+ def self.build(options)
7
+ @proxy = options[:proxy]
8
+ @user_agent = options[:ua]
9
+ @timeout = options[:timeout]
10
+ @concurrency = options[:concurrency]
11
+ @silent = options[:silent]
12
+ end
13
+
14
+ def self.proxy?
15
+ !!@proxy
16
+ end
17
+
18
+ def self.proxy_host
19
+ uri = URI(@proxy)
20
+ return '' unless uri
21
+
22
+ uri.host.to_s
23
+ end
24
+
25
+ def self.proxy_url
26
+ @proxy
27
+ end
28
+
29
+ def self.proxy_port
30
+ uri = URI(@proxy)
31
+ return nil unless uri
32
+
33
+ uri.port.to_s
34
+ end
35
+
36
+ def self.user_agent
37
+ @user_agent || 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
38
+ end
39
+
40
+ def self.timeout
41
+ @timeout ? @timeout.to_i : 10
42
+ end
43
+
44
+ def self.http_concurrency
45
+ @concurrency || 10
46
+ end
47
+
48
+ def self.silent?
49
+ !!@silent
50
+ end
51
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Fingerprinter : Entry point
4
+ module Fingerprinter
5
+ # Technologies : Groups the different detection methods
6
+ class Technologies
7
+ def self.response_headers_check(response, regexes)
8
+ response.headers&.each do |header, value|
9
+ regex = regexes[header.downcase]
10
+ next unless regex
11
+
12
+ if value.is_a?(Array)
13
+ return true if value.any? { |v| regex.match?(v) }
14
+ else
15
+ return true if regex.match?(value)
16
+ end
17
+ end
18
+
19
+ false
20
+ end
21
+
22
+ def self.whole_body_check(response, regexes)
23
+ return false if response.body.nil?
24
+
25
+ regexes.each do |regex|
26
+ return true if response.body.match?(regex)
27
+ end
28
+
29
+ false
30
+ end
31
+
32
+ def self.meta_detection(doc, regexes, type = 'generator')
33
+ nodes = doc.xpath("//meta[@name='#{type}']/@content")
34
+ nodes&.each do |node|
35
+ return true if regexes.any? { |regex| node.value.match?(regex) }
36
+ end
37
+
38
+ false
39
+ end
40
+
41
+ def self.title_detection(doc, title)
42
+ doc.title&.downcase == title.downcase
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'typhoeus'
4
+
5
+ # HTTP Client used to perform some requests (check for some content for example) or as a fallback in some
6
+ # browser processing (less resource intensive than using a Chrome instance but won't provide access to the DOM
7
+ # for example)
8
+ class HttpClient
9
+ def initialize
10
+ Typhoeus::Config.user_agent = ScanOptions.user_agent
11
+ @hydra = Typhoeus::Hydra.new(max_concurrency: ScanOptions.http_concurrency)
12
+ end
13
+
14
+ def request_options(method, options, body = nil)
15
+ req_options = {
16
+ ssl_verifypeer: false,
17
+ ssl_verifyhost: 0,
18
+ followlocation: options[:follow_location] || false,
19
+ method: method,
20
+ headers: options[:headers] || {},
21
+ body: body
22
+ }
23
+
24
+ req_options[:headers].merge!({
25
+ 'Priority' => 'u=0, i',
26
+ 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8'
27
+ })
28
+
29
+ req_options[:params] = options[:params] if options[:params]
30
+
31
+ req_options[:proxy] = ScanOptions.proxy_url if ScanOptions.proxy?
32
+ req_options[:timeout] = ScanOptions.timeout
33
+
34
+ req_options
35
+ end
36
+
37
+ def get(urls, options = {})
38
+ responses = {}
39
+
40
+ urls = [urls] if urls.is_a?(String)
41
+ urls.each do |url|
42
+ http_request = Typhoeus::Request.new(url, request_options(:get, options))
43
+
44
+ http_request.on_complete { |response| responses[url] = response }
45
+
46
+ @hydra.queue(http_request)
47
+ end
48
+
49
+ @hydra.run
50
+
51
+ responses
52
+ end
53
+
54
+ def post(urls, body, options = {})
55
+ responses = {}
56
+
57
+ urls = [urls] if urls.is_a?(String)
58
+ urls.each do |url|
59
+ http_request = Typhoeus::Request.new(url, request_options(:post, options, body))
60
+
61
+ http_request.on_complete { |response| responses[url] = response }
62
+
63
+ @hydra.queue(http_request)
64
+ end
65
+
66
+ @hydra.run
67
+
68
+ responses
69
+ end
70
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Magento Detection
4
+ class Magento < Fingerprinter::Technologies
5
+ META_CONTENT_REGEX = [
6
+ 'Magento'
7
+ ].freeze
8
+
9
+ HEADERS_REGEX = {
10
+ 'x-magento-debug' => /\d/,
11
+ 'x-magento-cache-control' => /\w/
12
+ }.freeze
13
+
14
+ BODY_CONTENT_REGEX = [
15
+ /Magento_PageCache/,
16
+ /Mage\.Cookies\.path/,
17
+ /data-requiremodule="(mage|Magento_)/,
18
+ /mage\/cookies/,
19
+ /MAGENTO_/,
20
+ /Magento Security Scan/,
21
+ /js\/mage\//,
22
+ /x-magento-init/
23
+ ].freeze
24
+
25
+ def self.get_graphql(url)
26
+ url = File.join(Utilities::Urls.up_to_port(url), '/graphql?query=+{customerDownloadableProducts+{+items+{+date+download_url}}+}')
27
+ return if Utilities::Kb.inspected?(self, url)
28
+
29
+ Utilities::Kb.inspected(self, url)
30
+ Fingerprinter.http_client.get(url)[url]
31
+ end
32
+
33
+ def self.run(data)
34
+ detected = meta_detection(data[:doc], META_CONTENT_REGEX) ||
35
+ whole_body_check(data[:response], BODY_CONTENT_REGEX)
36
+
37
+ if detected
38
+ 'Magento'
39
+ else
40
+ response = get_graphql(data[:url])
41
+ return unless response&.code == 200 && ['The current customer', 'graphql-authorization'].all? { |pattern| response.body.include?(pattern) }
42
+
43
+ 'Magento'
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Wordpress Detection
4
+ class Wordpress < Fingerprinter::Technologies
5
+ HEADERS_REGEX = {
6
+ 'link' => %r{rel="https//api\.w\.org/"},
7
+ 'x-pingback' => %r{/xmlrpc.php}
8
+ }.freeze
9
+
10
+ META_CONTENT_REGEX = [
11
+ 'WordPress'
12
+ ].freeze
13
+
14
+ BODY_CONTENT_REGEX = [
15
+ %r{<script src=['"]https?://[\w./-]+wp-embed\.min\.js},
16
+ %r{<link rel=['"]stylesheet['"] id=['"][\w-]+['"]\s+href=['"]https?://[\w.-]+/wp-(?:content|includes)/},
17
+ %r{<script src=['"]https?://[\w./-]+wp-(?:content|includes)}
18
+ ].freeze
19
+
20
+ def self.run(data)
21
+ return unless response_headers_check(data[:response], HEADERS_REGEX) ||
22
+ meta_detection(data[:doc], META_CONTENT_REGEX) ||
23
+ whole_body_check(data[:response], BODY_CONTENT_REGEX)
24
+
25
+ 'WordPress'
26
+ end
27
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Apache Ofbiz Detection
4
+ class ApacheOfbiz < Fingerprinter::Technologies
5
+ META_CONTENT_REGEX = [
6
+ 'Apache OFBiz'
7
+ ].freeze
8
+
9
+ HEADERS_REGEX = {
10
+ 'set-cookie' => /OFBiz\.Visitor/
11
+ }.freeze
12
+
13
+ BODY_CONTENT_REGEX = [
14
+ /Powered by.*OFBiz/
15
+ ].freeze
16
+
17
+ def self.get_xmlrpc(url)
18
+ url = File.join(Utilities::Urls.up_to_port(url), '/webtools/control/xmlrpc')
19
+ return if Utilities::Kb.inspected?(self, url)
20
+
21
+ Utilities::Kb.inspected(self, url)
22
+ Fingerprinter.http_client.get(url)[url]
23
+ end
24
+
25
+ def self.run(data)
26
+ response = data[:response]
27
+ if response.code == 404
28
+ response = get_xmlrpc(data[:url])
29
+ return unless response&.code == 200
30
+
31
+ doc = Utilities::Parser.doc(response.body)
32
+ end
33
+
34
+ return unless response_headers_check(response, HEADERS_REGEX) ||
35
+ meta_detection(data[:doc], META_CONTENT_REGEX) ||
36
+ whole_body_check(response, BODY_CONTENT_REGEX)
37
+
38
+ 'Apache Ofbiz'
39
+ end
40
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # F5 Next Central Manager Detection
4
+ class F5NextCentralManager < Fingerprinter::Technologies
5
+ def self.run(data)
6
+ return unless title_detection(data[:doc], 'BIG-IP Next | Central Manager')
7
+
8
+ 'F5 Big-IP Next Central manager'
9
+ end
10
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Nexus Repository Detection
4
+ class NexusRepository < Fingerprinter::Technologies
5
+ META_CONTENT_REGEX = [
6
+ 'Nexus Repository'
7
+ ].freeze
8
+
9
+ def self.check_path(url)
10
+ url = File.join(Utilities::Urls.up_to_port(url), '/nexus/')
11
+ return if Utilities::Kb.inspected?(self, url)
12
+
13
+ Utilities::Kb.inspected(self, url)
14
+ Fingerprinter.http_client.get(url, { follow_location: true })[url]
15
+ end
16
+
17
+ def self.run(data)
18
+ detected = title_detection(data[:doc], 'Sonatype Nexus Repository') || meta_detection(data[:doc], META_CONTENT_REGEX, 'description')
19
+ unless detected
20
+ response = check_path(data[:url])
21
+ return unless response&.code == 200
22
+
23
+ doc = Utilities::Parser.doc(response.body)
24
+ detected = title_detection(doc, 'Sonatype Nexus Repository') || meta_detection(doc, META_CONTENT_REGEX, 'description')
25
+ end
26
+ return unless detected
27
+
28
+ 'Nexus Repository'
29
+ end
30
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ # TinyProxy Detection
4
+ class TinyProxy < Fingerprinter::Technologies
5
+ HEADERS_REGEX = {
6
+ 'server' => /tinyproxy/
7
+ }.freeze
8
+
9
+ def self.run(data)
10
+ return unless response_headers_check(data[:response], HEADERS_REGEX)
11
+
12
+ 'TinyProxy'
13
+ end
14
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Utilities
4
+ # Files : Utilities related to files
5
+ class Kb
6
+ def self.coverage_id(plugin, url)
7
+ "#{plugin}:#{url}"
8
+ end
9
+
10
+ def self.inspected(plugin, url)
11
+ Fingerprinter::Technologies.kb[:inspected] << coverage_id(plugin, url)
12
+ end
13
+
14
+ def self.inspected?(plugin, url)
15
+ Fingerprinter::Technologies.kb[:inspected].include?(coverage_id(plugin, url))
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Utilities
4
+ # Parser : Utilities related to parsing
5
+ class Parser
6
+ require 'nokogiri'
7
+
8
+ def self.doc(body)
9
+ Nokogiri::HTML(body)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Utilities
4
+ # Urls : Utilities related to urls
5
+ class Urls
6
+ require 'uri'
7
+
8
+ def self.uri_parse(url)
9
+ URI(url)
10
+ rescue ArgumentError, URI::InvalidURIError
11
+ nil
12
+ end
13
+
14
+ def self.up_to_port(url)
15
+ uri = url.is_a?(URI) ? url : uri_parse(url)
16
+ return unless uri
17
+
18
+ url = "#{uri.scheme}://#{uri.host}"
19
+ url += ":#{uri.port}" unless [80, 443].include?(uri.port)
20
+
21
+ url
22
+ end
23
+
24
+ def self.without_query(url)
25
+ uri = url.is_a?(URI) ? url : uri_parse(url)
26
+ return unless uri
27
+
28
+ uri.to_s.split('?', 2).first.to_s
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'concurrent'
4
+
5
+ # Fingerprinter : Entry point
6
+ module Fingerprinter
7
+ Dir[File.join(__dir__, 'fingerprinter', 'configs/*.rb')].sort.each { |file| require file }
8
+ Dir[File.join(__dir__, 'fingerprinter', 'core/*.rb')].sort.each { |file| require file }
9
+ Dir[File.join(__dir__, 'fingerprinter', 'utilities/*.rb')].sort.each { |file| require file }
10
+
11
+ def self.http_client
12
+ @http_client ||= HttpClient.new
13
+ end
14
+
15
+ # Technologies : Groups the different subcategories of technologies
16
+ class Technologies
17
+ Dir[File.join(__dir__, 'fingerprinter', 'technologies', '**/*.rb')].sort.each { |file| require file }
18
+
19
+ attr_accessor :results
20
+ attr_reader :http_client
21
+
22
+ def initialize(options = {})
23
+ ScanOptions.build(options)
24
+ @results ||= Concurrent::Hash.new
25
+ end
26
+
27
+ def self.kb
28
+ @kb ||= {
29
+ inspected: Concurrent::Array.new
30
+ }
31
+ end
32
+
33
+ def run(urls)
34
+ urls.each do |url|
35
+ response = get_response(url)
36
+ next unless response
37
+
38
+ url = Utilities::Urls.up_to_port(response.effective_url)
39
+
40
+ responses = response.redirections
41
+ responses << response
42
+
43
+ responses.each do |response|
44
+ doc = Utilities::Parser.doc(response.body)
45
+
46
+ results[url] = Concurrent::Array.new
47
+ data = { response:, doc:, url: }
48
+ Technologies.subclasses.each { |technology| results[url] << technology.run(data) }
49
+ end
50
+ end
51
+
52
+ results.transform_values! { |v| v.compact.uniq }
53
+ results
54
+ end
55
+
56
+ private
57
+
58
+ def get_response(url)
59
+ response = Fingerprinter.http_client.get(url, { follow_location: true })[url]
60
+ return if response&.code&.zero?
61
+ return response if same_scope?(url, response)
62
+
63
+ Fingerprinter.http_client.get(url, { follow_location: false })[url]
64
+ end
65
+
66
+ def same_scope?(url, response)
67
+ url = "https://#{url}" unless url.start_with?('http')
68
+
69
+ Utilities::Urls.uri_parse(url)&.host == Utilities::Urls.uri_parse(response&.effective_url)&.host
70
+ end
71
+ end
72
+ end
metadata ADDED
@@ -0,0 +1,97 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fingerprinter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Joshua MARTINELLE
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-06-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: typhoeus
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: concurrent-ruby
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description:
56
+ email:
57
+ - contact@jomar.fr
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - lib/fingerprinter.rb
63
+ - lib/fingerprinter/configs/scan_options.rb
64
+ - lib/fingerprinter/core/detector.rb
65
+ - lib/fingerprinter/core/http_client.rb
66
+ - lib/fingerprinter/technologies/cms/magento.rb
67
+ - lib/fingerprinter/technologies/cms/wordpress.rb
68
+ - lib/fingerprinter/technologies/softwares/apache_ofbiz.rb
69
+ - lib/fingerprinter/technologies/softwares/f5_next_central_manager.rb
70
+ - lib/fingerprinter/technologies/softwares/nexus_repository.rb
71
+ - lib/fingerprinter/technologies/softwares/tinyproxy.rb
72
+ - lib/fingerprinter/utilities/kb.rb
73
+ - lib/fingerprinter/utilities/parser.rb
74
+ - lib/fingerprinter/utilities/urls.rb
75
+ homepage:
76
+ licenses: []
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: 2.7.1
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ requirements: []
93
+ rubygems_version: 3.4.19
94
+ signing_key:
95
+ specification_version: 4
96
+ summary: Lorem Ipsum
97
+ test_files: []