framework_guesser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/.gitignore +17 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +45 -0
  5. data/Rakefile +1 -0
  6. data/framework_guesser.gemspec +24 -0
  7. data/lib/framework_guesser.rb +9 -0
  8. data/lib/framework_guesser/matcher.rb +327 -0
  9. data/lib/framework_guesser/version.rb +3 -0
  10. data/spec/fixtures/adsense-1.html +567 -0
  11. data/spec/fixtures/adsense-2.html +200 -0
  12. data/spec/fixtures/drupal-1.html +294 -0
  13. data/spec/fixtures/drupal-1.yaml +9 -0
  14. data/spec/fixtures/drupal-2.html +296 -0
  15. data/spec/fixtures/drupal-2.yaml +9 -0
  16. data/spec/fixtures/drupal-3.html +161 -0
  17. data/spec/fixtures/drupal-3.yaml +11 -0
  18. data/spec/fixtures/drupal-4.html +173 -0
  19. data/spec/fixtures/drupal-4.yaml +9 -0
  20. data/spec/fixtures/drupal-5.html +159 -0
  21. data/spec/fixtures/drupal-5.yaml +9 -0
  22. data/spec/fixtures/drupal-6.html +199 -0
  23. data/spec/fixtures/drupal-7.html +83 -0
  24. data/spec/fixtures/etarget-1.html +206 -0
  25. data/spec/fixtures/etarget-2.html +369 -0
  26. data/spec/fixtures/etarget-3.html +430 -0
  27. data/spec/fixtures/facebook-1.html +1256 -0
  28. data/spec/fixtures/facebook-1.yaml +11 -0
  29. data/spec/fixtures/facebook-2.html +119 -0
  30. data/spec/fixtures/facebook-2.yaml +11 -0
  31. data/spec/fixtures/facebook-3.html +315 -0
  32. data/spec/fixtures/facebook-3.yaml +8 -0
  33. data/spec/fixtures/opencart-1.html +625 -0
  34. data/spec/fixtures/opencart-2.html +271 -0
  35. data/spec/fixtures/prestashop-1.html +341 -0
  36. data/spec/fixtures/prestashop-2.html +435 -0
  37. data/spec/fixtures/rails-30-1.html +215 -0
  38. data/spec/fixtures/rails-30-2.html +209 -0
  39. data/spec/fixtures/rails-30-3.html +206 -0
  40. data/spec/fixtures/rails-31.html +120 -0
  41. data/spec/fixtures/ubercart-1.html +358 -0
  42. data/spec/fixtures/ubercart-2.html +447 -0
  43. data/spec/framework_guesser_spec.rb +19 -0
  44. data/spec/matcher_spec.rb +227 -0
  45. data/spec/spec_helper.rb +21 -0
  46. data/spec/support/helpers.rb +18 -0
  47. metadata +178 -0
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in framework_guesser.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Martin Lipták
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,45 @@
1
+ # framework_guesser
2
+
3
+ Framework guesser tries to detect frameworks and javascript libraries from HTML code
4
+ and HTTP headers hash. Some extra information like server, server-side programming language, doctype, meta description and keywords are returned as well.
5
+
6
+ It is used by [statscrawler.com][1] to analyze sites and collect statistics about
7
+ Internet domains. This is a sample (though working and pretty usable) for everyone
8
+ interested in framework detection on statscrawler.com.
9
+
10
+ [1]: http://www.statscrawler.com
11
+
12
+ ## Usage
13
+
14
+ Requires nokogiri and rspec for tests.
15
+
16
+ ```ruby
17
+ require 'open-uri'
18
+ require 'openssl'
19
+ require 'framework_guesser'
20
+
21
+ for domain in ['rubyonrails.org', 'drupal.org', 'wordpress.org', 'joomla.org']
22
+ begin
23
+ open("http://www." + domain,
24
+ :read_timeout => 10,
25
+ :ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE) do |file|
26
+
27
+ url = file.base_uri.to_s
28
+
29
+ result = FrameworkGuesser.guess(file.meta, file.read)
30
+ puts "#{domain} => #{url}"
31
+ puts "Description: #{result[:description]}"
32
+ puts "Keywords: #{result[:keywords]}"
33
+ puts "Server: #{result[:server]}"
34
+ puts "Engine: #{result[:engine]}"
35
+ puts "Doctype: #{result[:doctype]}"
36
+ puts "Framework: #{result[:framework]}"
37
+ puts "Features: #{result[:features].join(', ')}"
38
+
39
+ puts
40
+ end
41
+ rescue StandardError => err
42
+ puts "#{domain} => #{err.message}"
43
+ end
44
+ end
45
+ ```
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'framework_guesser/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "framework_guesser"
8
+ gem.version = FrameworkGuesser::VERSION
9
+ gem.authors = ["Martin Lipták"]
10
+ gem.email = ["mliptak@gmail.com"]
11
+ gem.description = %q{Detection of framework and javascript libraries from HTML code.}
12
+ gem.summary = %q{framework_guesser tries to detect frameworks and javascript libraries from HTML code and HTTP headers hash. Some extra information like server, server-side programming language, doctype, meta description and keywords are returned as well.}
13
+ gem.homepage = "https://github.com/martinliptak/framework_guesser"
14
+
15
+ gem.add_dependency "nokogiri"
16
+
17
+ gem.add_development_dependency "rspec"
18
+ gem.add_development_dependency "fakeweb"
19
+
20
+ gem.files = `git ls-files`.split($/)
21
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
22
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
23
+ gem.require_paths = ["lib"]
24
+ end
@@ -0,0 +1,9 @@
1
+ require "framework_guesser/version"
2
+ require "framework_guesser/matcher"
3
+
4
+ module FrameworkGuesser
5
+ def self.guess(headers, body)
6
+ matcher = Matcher.new
7
+ matcher.match(headers, body)
8
+ end
9
+ end
@@ -0,0 +1,327 @@
1
+ require 'nokogiri.rb'
2
+
3
+ module FrameworkGuesser
4
+ class Matcher
5
+ def match(headers, body)
6
+ @headers = headers
7
+ @data = body
8
+ @result = { :features => [] }
9
+ @document = Nokogiri::HTML(@data)
10
+
11
+ description = @document.css("meta[name=description], meta[name=Description]").first
12
+ @result[:description] = description['content'] if description
13
+
14
+ keywords = @document.css("meta[name=keywords], meta[name=keywords]").first
15
+ @result[:keywords] = keywords['content'] if keywords
16
+
17
+ run
18
+ @result[:features].uniq!
19
+ @result
20
+ end
21
+
22
+ private
23
+
24
+ def run
25
+ resources = []
26
+
27
+ # server
28
+ header "server" do |serv|
29
+ case serv
30
+ when /^apache/i
31
+ server :apache
32
+ when /^nginx/i
33
+ server :nginx
34
+ when /^microsoft-iis/i
35
+ server :microsoft_iis
36
+ when /^lighttpd/i
37
+ server :lighttpd
38
+ when /^mongrel/i
39
+ engine :ruby
40
+ server :mongrel
41
+ else
42
+ server serv
43
+ end
44
+ end
45
+
46
+ # engine
47
+ header "x-powered-by" do |x_powered_by|
48
+ case x_powered_by
49
+ when /^php/i
50
+ engine :php
51
+ when /^asp/i
52
+ engine :asp
53
+ when /^nette/i
54
+ framework :nette
55
+ engine :php
56
+ when /^w3 total cache/i
57
+ framework :wordpress
58
+ engine :php
59
+ when /^(servlet|jsp|jsf)/i
60
+ engine :java
61
+ when /^phusion passenger/i
62
+ engine :ruby
63
+ framework :rails
64
+ when "Chuck Norris!"
65
+ engine_final x_powered_by # chuck norris is final
66
+ else
67
+ engine x_powered_by
68
+ end
69
+ end
70
+
71
+ # cookies
72
+ header "set-cookie" do |set_cookie|
73
+ case set_cookie
74
+ when /^PHPSESSID/
75
+ engine :php
76
+ when /^ASP\.NET/
77
+ engine :asp
78
+ when /^JSESSIONID/
79
+ engine :java
80
+ when /^CAKEPHP/
81
+ framework :cakephp
82
+ when /^symfony/
83
+ framework :symfony
84
+ when /^zenid/
85
+ framework :zencart
86
+ end
87
+ end
88
+
89
+ # doctype
90
+ regexp /^\s*(<\?xml[^?]+\?>)?\s*<!doctype\s*(html\s*public\s*".[^"]*"|html)/i do |match|
91
+ doctype match[2].delete("\n\r").strip.squeeze(" ").downcase
92
+ end
93
+
94
+ # powered by opencart
95
+ regexp /<!--[^>]*Powered By OpenCart/ do
96
+ framework :opencart
97
+ end
98
+
99
+ # google adsense
100
+ regexp /<!--\s*google_ad_section_start/ do
101
+ feature :google_adsense
102
+ end
103
+
104
+ # scripts
105
+ element("script") do |script|
106
+ attribute(script, "src") do |src|
107
+ # full path
108
+ case src
109
+ when "/media/system/js/mootools.js"
110
+ framework :joomla
111
+ when "http://www.google-analytics.com/ga.js"
112
+ feature :google_analytics
113
+ when %r{^(http://static\.ak\.fbcdn\.net|http://connect\.facebook\.net)}
114
+ feature :facebook
115
+ when %r{^http://\w{2,3}.search.etargetnet.com}
116
+ feature :etarget
117
+ when "http://pagead2.googlesyndication.com/pagead/show_ads.js"
118
+ feature :google_adsense
119
+ end
120
+
121
+ # filename
122
+ filename(src) do |filename|
123
+ case filename
124
+ when /^jquery/i
125
+ feature :jquery
126
+ when /^jquery-ui/i
127
+ feature :jquery_ui
128
+ when /^prototype/i
129
+ feature :prototype
130
+ when /^scriptaculous/i
131
+ feature :scriptaculous
132
+ when /^mootools/i
133
+ feature :mootools
134
+ when /^drupal/i
135
+ framework :drupal
136
+ end
137
+ end
138
+
139
+ resources << src
140
+ end
141
+
142
+ # script text content
143
+ case script
144
+ when /^.{0,30}jQuery\.extend\(Drupal\.settings/
145
+ framework :drupal
146
+ when /jQuery/
147
+ feature :jquery
148
+ when /['"]UA-[\d-]{5,}["']/
149
+ feature :google_analytics
150
+ when %r{//connect\.facebook\.net}
151
+ feature :facebook
152
+ when %r{var\s*EtargetBannerIdent|http://\w{2,3}.search.etargetnet.com}
153
+ feature :etarget
154
+ end
155
+ end
156
+
157
+ # stylesheets
158
+ element("link[rel=stylesheet]") do |style|
159
+ attribute(style, "href") do |href|
160
+ # full path
161
+ case href
162
+ when /modules\/node\/node\.css/
163
+ framework :drupal
164
+ when %r{/themes/[^/]+/css/global\.css}
165
+ framework :prestashop
166
+ when %r{^catalog/view/theme/[^/]+/stylesheet/stylesheet.css}
167
+ framework :opencart
168
+ end
169
+
170
+ # filename
171
+ filename(href) do |filename|
172
+ case filename
173
+ when /^uc_product/i
174
+ framework_final :ubercart
175
+ end
176
+ end
177
+
178
+ resources << href
179
+ end
180
+ end
181
+
182
+ # inline styles
183
+ element("style") do |style|
184
+ case style
185
+ when /^@import "(\/modules\/node\/node\.css|\/misc\/drupal\.css)";/
186
+ framework :drupal
187
+ end
188
+ end
189
+
190
+ # images
191
+ element("img") do |img|
192
+ attribute(img, 'src') do |src|
193
+ resources << src
194
+ end
195
+ end
196
+
197
+ # iframe
198
+ element("iframe") do |iframe|
199
+ attribute(iframe, "src") do |src|
200
+ case src
201
+ when /^http:\/\/www.facebook.com/
202
+ feature :facebook
203
+ end
204
+ end
205
+ end
206
+
207
+ # image, stylesheet, script paths
208
+ resources.each do |link|
209
+ case link
210
+ when /^\/sites\/[\w\d\-.]+\/(files|themes|modules)/
211
+ framework :drupal
212
+ when /\/(wp-includes\/js|wp-content\/(plugins|themes))\//
213
+ framework :wordpress
214
+ when /com_virtuemart/
215
+ framework_final :virtuemart
216
+ when %r{^/(javascripts|stylesheets|images)/([^/]+/)*[^/]*\.\w{2,4}\?\d{10}$|^/assets/([^/+])*/[^/]*-[a-f0-9]{32}}
217
+ framework :rails
218
+ end
219
+ end
220
+
221
+ # meta name=generator
222
+ generator do |content|
223
+ case content
224
+ when /^joomla/i
225
+ framework :joomla
226
+ when /^typo3/i
227
+ framework :typo3
228
+ when /^wordpress/i
229
+ framework :wordpress
230
+ when /^drupal/i
231
+ framework :drupal
232
+ when /^shopping cart program by Zen Cart/
233
+ framework :zencart
234
+ when /^prestashop/i
235
+ framework :prestashop
236
+ end
237
+ end
238
+
239
+ # rails authenticity token
240
+ element("form input[name=authenticity_token], meta[name=csrf-param], meta[name=csrf-token]") do
241
+ framework :rails
242
+ end
243
+
244
+ # ubercart classes
245
+ element(".uc-price, .uc-price-product, .block-uc_cart") do
246
+ framework_final :ubercart
247
+ end
248
+
249
+ # framework implies engine
250
+ engine :php if framework_in? [:joomla, :wordpress, :drupal, :typo3, :virtuemart, :nette, :ubercart,
251
+ :cakephp, :symfony, :zencart, :prestashop, :opencart]
252
+ engine :ruby if framework_in? [:rails]
253
+ end
254
+
255
+ # HTTP headers
256
+ def header(name)
257
+ yield @headers[name] if @headers[name]
258
+ end
259
+
260
+ # Regexps over raw HTML
261
+ def regexp(exp)
262
+ begin
263
+ match = exp.match(@data)
264
+ yield match if match
265
+ rescue ArgumentError # velmi zriedkava UTF8 chyba
266
+ end
267
+ end
268
+
269
+ # DOM element
270
+ def element(path)
271
+ @document.css(path).each do |script|
272
+ yield script
273
+ end
274
+ end
275
+
276
+ # DOM attribute
277
+ def attribute(script, attr)
278
+ yield script[attr] if script[attr]
279
+ end
280
+
281
+ # filename from path
282
+ def filename(path)
283
+ yield $2 if /(\/|^)([^\/]*$)/i.match(path)
284
+ end
285
+
286
+ # meta name=generator
287
+ def generator
288
+ generator = @document.css("meta[name=generator], meta[name=Generator]").first
289
+ yield generator["content"] if (generator and generator["content"])
290
+ end
291
+
292
+ def framework_in?(frameworks)
293
+ frameworks.index(@result[:framework])
294
+ end
295
+
296
+ # Feature's attributes
297
+ def server(val)
298
+ @result[:server] = val
299
+ end
300
+
301
+ def engine(val)
302
+ @result[:engine] = val unless @engine_final
303
+ end
304
+
305
+ def engine_final(val)
306
+ @result[:engine] = val
307
+ @engine_final = true
308
+ end
309
+
310
+ def doctype(val)
311
+ @result[:doctype] = val
312
+ end
313
+
314
+ def framework(val)
315
+ @result[:framework] = val unless @final
316
+ end
317
+
318
+ def framework_final(val)
319
+ @result[:framework] = val
320
+ @final = true
321
+ end
322
+
323
+ def feature(val)
324
+ @result[:features] << val
325
+ end
326
+ end
327
+ end