sniffles 0.0.3 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.rspec +1 -1
- data/Guardfile +1 -1
- data/lib/sniffles/html.rb +19 -0
- data/lib/sniffles/sniffers/analytics/google_analytics.rb +29 -0
- data/lib/sniffles/sniffers/analytics/mixpanel.rb +24 -0
- data/lib/sniffles/sniffers/analytics/quantcast.rb +24 -0
- data/lib/sniffles/sniffers/cms/wordpress.rb +53 -0
- data/lib/sniffles/sniffers/javascript/jquery.rb +24 -0
- data/lib/sniffles/sniffers.rb +30 -0
- data/lib/sniffles/text.rb +15 -0
- data/lib/sniffles/utils.rb +7 -0
- data/lib/sniffles/version.rb +1 -1
- data/lib/sniffles.rb +33 -20
- data/spec/cassettes/google_com.yml +785 -0
- data/spec/cassettes/humemes_com.yml +496 -0
- data/spec/cassettes/pearsonified_com.yml +458 -0
- data/spec/cassettes/squidoo.yml +500 -227
- data/spec/cassettes/squidoo_com.yml +522 -0
- data/spec/cassettes/wordpress.yml +816 -814
- data/spec/sniffles/html_spec.rb +33 -0
- data/spec/sniffles/sniffers/analytics/google_analytics_spec.rb +47 -0
- data/spec/sniffles/sniffers/analytics/mixpanel_spec.rb +25 -0
- data/spec/sniffles/sniffers/analytics/quantcast_spec.rb +25 -0
- data/spec/sniffles/sniffers/cms/wordpress_spec.rb +64 -0
- data/spec/sniffles/sniffers/javascript/jquery_spec.rb +25 -0
- data/spec/sniffles/sniffers_spec.rb +21 -0
- data/spec/sniffles/text_spec.rb +48 -0
- data/spec/sniffles/utils_spec.rb +20 -0
- data/spec/sniffles_spec.rb +32 -24
- data/spec/spec_helper.rb +5 -1
- metadata +45 -10
data/.rspec
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
--color
|
2
|
-
--format
|
2
|
+
--format documentation
|
data/Guardfile
CHANGED
@@ -0,0 +1,19 @@
|
|
1
|
+
module Sniffles
|
2
|
+
module HTML
|
3
|
+
def parse(html)
|
4
|
+
@doc = Nokogiri::HTML(html)
|
5
|
+
end
|
6
|
+
|
7
|
+
def text_at(pattern)
|
8
|
+
if (nodes = @doc.search(pattern)).any?
|
9
|
+
nodes.text
|
10
|
+
else
|
11
|
+
false
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def text_match?(pattern, text)
|
16
|
+
(@doc.search(pattern).text == text)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Sniffles
|
2
|
+
module Sniffers
|
3
|
+
class GoogleAnalytics
|
4
|
+
include Text
|
5
|
+
|
6
|
+
attr_accessor :doc
|
7
|
+
attr_reader :output
|
8
|
+
|
9
|
+
def initialize(response_body)
|
10
|
+
@output = {}
|
11
|
+
parse(response_body) && process_document
|
12
|
+
end
|
13
|
+
|
14
|
+
def process_document
|
15
|
+
@output[:found] = google_analytics?
|
16
|
+
parse_google_analytics_ua
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
def google_analytics?
|
21
|
+
match?(/\.google\-analytics\.com|urchinTracker/i)
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_google_analytics_ua
|
25
|
+
@output[:ua] = capture(/[\"|\'](UA\-[\d]+\-[\d])[\"|\']/)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Sniffles
|
2
|
+
module Sniffers
|
3
|
+
class Mixpanel
|
4
|
+
include Text
|
5
|
+
|
6
|
+
attr_accessor :doc
|
7
|
+
attr_reader :output
|
8
|
+
|
9
|
+
def initialize(response_body)
|
10
|
+
@output = {}
|
11
|
+
parse(response_body) && process_document
|
12
|
+
end
|
13
|
+
|
14
|
+
def process_document
|
15
|
+
@output[:found] = mixpanel?
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
def mixpanel?
|
20
|
+
match?(/api\.mixpanel\.com\S+mixpanel\.js/)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Sniffles
|
2
|
+
module Sniffers
|
3
|
+
class Quantcast
|
4
|
+
include Text
|
5
|
+
|
6
|
+
attr_accessor :doc
|
7
|
+
attr_reader :output
|
8
|
+
|
9
|
+
def initialize(response_body)
|
10
|
+
@output = {}
|
11
|
+
parse(response_body) && process_document
|
12
|
+
end
|
13
|
+
|
14
|
+
def process_document
|
15
|
+
@output[:found] = quantcast?
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
def quantcast?
|
20
|
+
match?(/\.quantserve\.com\/quant\.js/)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Sniffles
|
2
|
+
module Sniffers
|
3
|
+
class Wordpress
|
4
|
+
include HTML
|
5
|
+
attr_accessor :doc
|
6
|
+
attr_reader :name, :group, :output, :response
|
7
|
+
|
8
|
+
def initialize(response_body)
|
9
|
+
@output = {}
|
10
|
+
parse(response_body) && process_document
|
11
|
+
end
|
12
|
+
|
13
|
+
def process_document
|
14
|
+
@output[:found] = wordpress?
|
15
|
+
parse_version
|
16
|
+
parse_feed
|
17
|
+
parse_theme
|
18
|
+
parse_pingback
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def wordpress?
|
23
|
+
@doc.xpath('//link[contains(@href,"wp-content")]').any?
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse_feed
|
27
|
+
@output[:feed] = text_at("//link[@rel='alternate' and @type='application/rss+xml']/@href")
|
28
|
+
end
|
29
|
+
|
30
|
+
def parse_theme
|
31
|
+
theme_uri = text_at("//link[@rel='stylesheet' and contains(@href,'wp-content/themes/')][1]/@href")
|
32
|
+
@output[:theme] = (theme_uri ? clean_theme_uri(theme_uri)[1] : false)
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_version
|
36
|
+
version_meta_tag = text_at("//meta[@name='generator']/@content")
|
37
|
+
@output[:version] = (version_meta_tag ? extract_version(version_meta_tag)[1] : version_meta_tag)
|
38
|
+
end
|
39
|
+
|
40
|
+
def parse_pingback
|
41
|
+
@output[:pingback] = text_at("//link[@rel='pingback']/@href")
|
42
|
+
end
|
43
|
+
|
44
|
+
def clean_theme_uri(uri)
|
45
|
+
/wp-content\/themes\/([^\/]*)\//i.match uri
|
46
|
+
end
|
47
|
+
|
48
|
+
def extract_version(string)
|
49
|
+
/([\d]+\.[\d]+[\.]?[\d]?)/.match string
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Sniffles
|
2
|
+
module Sniffers
|
3
|
+
class Jquery
|
4
|
+
include Text
|
5
|
+
|
6
|
+
attr_accessor :doc
|
7
|
+
attr_reader :output
|
8
|
+
|
9
|
+
def initialize(response_body)
|
10
|
+
@output = {}
|
11
|
+
parse(response_body) && process_document
|
12
|
+
end
|
13
|
+
|
14
|
+
def process_document
|
15
|
+
@output[:found] = jquery?
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
def jquery?
|
20
|
+
match?(/jQuery/)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Sniffles
|
2
|
+
module Sniffers
|
3
|
+
def self.use(response_body, name)
|
4
|
+
file = Dir.glob("lib/sniffles/sniffers/**/#{name.to_s}.rb").first
|
5
|
+
class_name = get_sniffer_class(name.to_s)
|
6
|
+
require File.expand_path(File.dirname(__FILE__) + "/../../#{file}")
|
7
|
+
eval("Sniffers::#{class_name}.new(response_body).output")
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.list_all(group = "**")
|
11
|
+
Dir.glob("lib/sniffles/sniffers/#{group}/*.rb").collect do |sniffer|
|
12
|
+
sniffer.match(/sniffers\/[a-z]+\/(.*)\.rb$/)[1].to_sym
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.list_groups
|
17
|
+
Dir.glob("lib/sniffles/sniffers/**").collect { |group| group.match(/sniffers\/(.*)$/)[1].to_sym }
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.list_all_by_group
|
21
|
+
output = {}
|
22
|
+
list_groups.each { |group| output[group] = list_all(group).to_a }
|
23
|
+
output
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.get_sniffer_class(name)
|
27
|
+
name.gsub(/\/(.?)/) { "::#{ $1.upcase }" }.gsub(/(?:^|[_-])(.)/) { $1.upcase }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/sniffles/version.rb
CHANGED
data/lib/sniffles.rb
CHANGED
@@ -1,33 +1,46 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
|
3
|
-
require
|
3
|
+
require 'sniffles/version'
|
4
|
+
require 'sniffles/sniffers'
|
5
|
+
require 'sniffles/utils'
|
6
|
+
require 'sniffles/html'
|
7
|
+
require 'sniffles/text'
|
4
8
|
|
5
|
-
module Sniffles
|
6
|
-
def self.sniff(
|
7
|
-
doc = Nokogiri::HTML::parse(html)
|
8
|
-
|
9
|
+
module Sniffles
|
10
|
+
def self.sniff(response_body, *sniffers_or_groups)
|
9
11
|
output = {}
|
10
|
-
output[:wordpress] = true if wordpress?(doc)
|
11
|
-
output[:jquery] = true if jquery?(html)
|
12
|
-
output[:quantcast] = true if quantcast?(html)
|
13
|
-
output[:mixpanel] = true if mixpanel?(html)
|
14
12
|
|
13
|
+
if sniffers_or_groups.empty?
|
14
|
+
list_all.each do |sniffer|
|
15
|
+
output[sniffer] = Sniffers.use(response_body, sniffer)
|
16
|
+
end
|
17
|
+
else
|
18
|
+
sniffers_or_groups.each do |sniffer_or_group|
|
19
|
+
if list_all.include?(sniffer_or_group)
|
20
|
+
output[sniffer_or_group] = Sniffers.use(response_body, sniffer_or_group)
|
21
|
+
elsif list_groups.include?(sniffer_or_group)
|
22
|
+
list_all_by_group[sniffer_or_group].each do |sniffer|
|
23
|
+
output[sniffer] = Sniffers.use(response_body, sniffer)
|
24
|
+
end
|
25
|
+
else
|
26
|
+
raise UnknownSniffer, "#{sniffer_or_group} not found!"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
15
30
|
output
|
16
31
|
end
|
17
32
|
|
18
|
-
def self.
|
19
|
-
|
33
|
+
def self.list_all
|
34
|
+
Sniffers.list_all
|
20
35
|
end
|
21
|
-
|
22
|
-
def self.
|
23
|
-
|
36
|
+
|
37
|
+
def self.list_groups
|
38
|
+
Sniffers.list_groups
|
24
39
|
end
|
25
40
|
|
26
|
-
def self.
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.mixpanel?(html)
|
31
|
-
!!(html =~ /api.mixpanel.com\S+mixpanel.js/)
|
41
|
+
def self.list_all_by_group
|
42
|
+
Sniffers.list_all_by_group
|
32
43
|
end
|
44
|
+
|
45
|
+
class UnknownSniffer < Exception;end
|
33
46
|
end
|