wwwtf 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in wwwtf.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 abhishekkr
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,33 @@
1
+ # WWWTF
2
+
3
+ World Wide Web's Trolls and Flaws
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'wwwtf'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install wwwtf
18
+
19
+ ## Usage
20
+
21
+ Cache is King like summary
22
+ $ wwwtf --cache-king $URL
23
+
24
+ in Debug mode, let's you know of all dependent URLs
25
+ $ WWWTF_DEBUG=on bin/wwwtf --cache-king $URL
26
+
27
+ ## Contributing
28
+
29
+ 1. Fork it
30
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
31
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
32
+ 4. Push to the branch (`git push origin my-new-feature`)
33
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
3
+
4
+ require 'arg0'
5
+ require 'wwwtf'
6
+
7
+ if %x{which curl}.empty?
8
+ puts 'Error: curl utility is \'currently\' required but not found in system path, fix it.'
9
+ puts 'Notice: Will change curl dependency to Ruby HTTP but there are few missing features in internal library, WIP.'
10
+ exit 1
11
+ end
12
+
13
+ cache_stats_for = Arg0::Console.value_for(['-ck', '--cache-king'])
14
+ cache_stats_for.each do |url|
15
+ url = "http://#{url}" unless url.match(/^https*\:\/\//)
16
+ Wwwtf.cache_king url
17
+ end
@@ -0,0 +1,24 @@
1
+ # Wwwtf
2
+ wwwtf_all = File.join(File.dirname(File.expand_path __FILE__), '*', '*.rb')
3
+ Dir.glob(wwwtf_all).each {|lib| require lib}
4
+ wwwtf_all_all = File.join(File.dirname(File.expand_path __FILE__), '*', '*', '*.rb')
5
+ Dir.glob(wwwtf_all_all).each {|lib| require lib}
6
+ require 'xml-motor'
7
+ require 'time'
8
+
9
+ module Wwwtf
10
+
11
+ def self.cache_king(url)
12
+ cache_stat = Wwwtf::Cache::King.stat url
13
+ lm_median_hrs = cache_stat['last_modified_hours_median'].to_i
14
+ lm_median_days = lm_median_hrs / 24
15
+ puts "#{url} has:
16
+ #{cache_stat['total_count']} number of dependent content locations,
17
+ #{cache_stat['cache_control%']}% of it is Cache-Control decisive,
18
+ #{cache_stat['expire_in_day%']}% of cached expires within a day,
19
+ #{cache_stat['last_modified%']}% is aware of Last-Modified,
20
+ #{lm_median_hrs}hrs. (i.e. #{lm_median_days} days) is L-M median
21
+ "
22
+ puts "\tDependent URLs:\n\t#{cache_stat['page_urls'].join("\n\t")}\n" unless ENV['WWWTF_DEBUG'].nil?
23
+ end
24
+ end
@@ -0,0 +1,11 @@
1
+ # Wwwtf::Cache
2
+
3
+ module Wwwtf
4
+ module Cache
5
+
6
+ def self.cache_control_max_age(cache_control)
7
+ return nil if cache_control.nil?
8
+ cache_control.scan(/max-age\s*=\s*([0-9]*)/).flatten[0]
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,95 @@
1
+ # Wwwtf::Cache::King
2
+
3
+ module Wwwtf
4
+ module Cache
5
+ module King
6
+
7
+ def self.stat(url)
8
+ begin
9
+ page_urls = Wwwtf::HTTP.page_dependency(url)
10
+ all_headers = headers(page_urls)
11
+ cache_control, last_modified = [], []
12
+ all_headers.each do |url, headr|
13
+ max_age = Wwwtf::Cache.cache_control_max_age(headr['Cache-Control'])
14
+ cache_control.push max_age
15
+ last_modified.push headr['Last-Modified']
16
+ end
17
+ {
18
+ 'page_urls' => page_urls,
19
+ 'total_count' => page_urls.size,
20
+ 'cache_control%' => cache_control_percent(cache_control),
21
+ 'expire_in_day%' => expire_in_day_percent(cache_control),
22
+ 'last_modified%' => last_modified_percent(last_modified),
23
+ 'last_modified_hours_median' => last_modified_median(last_modified)
24
+ }
25
+ rescue
26
+ puts "WWWTF Cache King failed in collecting statistic for #{url}."
27
+ exit 1
28
+ end
29
+ end
30
+
31
+ def self.headers(page_urls)
32
+ headers = {}
33
+ page_urls.each do |src|
34
+ headers = headers.merge({src => Wwwtf::HTTP.headers(src)})
35
+ end
36
+ filter_cache_headers(headers)
37
+ end
38
+
39
+ def self.filter_cache_headers(headers_batch)
40
+ filtered_headers = {}
41
+ reqd_headers = ['Cache-Control', 'Last-Modified']
42
+ headers_batch.each do |url, headers|
43
+ filtered = {}
44
+ reqd_headers.each {|hdr|
45
+ filtered = filtered.merge(
46
+ {hdr => headers[hdr]}
47
+ )
48
+ }
49
+ filtered_headers = filtered_headers.merge( {url => filtered} )
50
+ end
51
+ filtered_headers
52
+ end
53
+
54
+ def self.cache_control_percent(cache_control)
55
+ (cache_control.compact.size * 100) / cache_control.size
56
+ end
57
+
58
+ def self.expire_in_day_percent(cache_control)
59
+ max_age_in_days = cache_control.compact.collect{|max_age|
60
+ max_age.to_i / 86400 # i.e. 60 * 60 * 24
61
+ }
62
+ max_age_in_days.delete(0)
63
+ (max_age_in_days.size * 100 ) / cache_control.size
64
+ end
65
+
66
+ def self.last_modified_percent(last_modified)
67
+ (last_modified.compact.size * 100) / last_modified.size
68
+ end
69
+
70
+ def self.last_modified_median(last_modified)
71
+ require 'time'
72
+ last_modified = last_modified.compact
73
+ return nil if last_modified.empty?
74
+ last_mod_sorted = last_modified.each {|mod_time|
75
+ Time.parse(mod_time)
76
+ }.sort
77
+ half_size = last_mod_sorted.size / 2
78
+ if ( last_mod_sorted.size % 2 ) == 0
79
+ median_mod_time = Wwwtf::UtilsTime.hours_until_now(
80
+ last_mod_sorted[half_size]
81
+ )
82
+ median_mod_time += Wwwtf::UtilsTime.hours_until_now(
83
+ last_mod_sorted[half_size - 1]
84
+ )
85
+ median_mod_time = median_mod_time / 2
86
+ else
87
+ median_mod_time = Wwwtf::UtilsTime.hours_until_now(
88
+ last_mod_sorted[half_size]
89
+ )
90
+ end
91
+ median_mod_time
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,54 @@
1
+ # Wwwtf::HTTP
2
+
3
+ module Wwwtf
4
+ module HTTP
5
+
6
+ class << self
7
+ attr_accessor :curl_headers
8
+ end
9
+ @curl_headers = 'User Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.4 (KHTML, Linuxke Gecko) Chrome/22.0.1229.52 Safari/537.4'
10
+
11
+ def self.headers(url)
12
+ hdrs = %x{curl -IkLs -H '#{curl_headers}' #{url}}
13
+ headr = {}
14
+ return headr if hdrs.match(/HTTP\/1\.[01]\s*[45][0-9][0-9]\s*\w/)
15
+ hdr = hdrs.split("\r\n")
16
+ hdr = hdr.collect{|h| {h.split(':')[0].strip => h.split(':')[1..-1].join.strip} }
17
+ hdr.each{|h| headr = headr.merge h}
18
+ headr
19
+ end
20
+
21
+ def self.body(url)
22
+ %x{curl -kLs -H '#{curl_headers}' #{url}}
23
+ end
24
+
25
+ def self.page_dependency(url)
26
+ page_content = body url
27
+ xtmp_nodes = XMLMotor.splitter page_content
28
+ xtmp_tags = XMLMotor.indexify xtmp_nodes
29
+ tag_src = XMLMotor.xmlattrib 'src', xtmp_nodes, xtmp_tags
30
+ src_in_page = tag_src.uniq.collect{|src|
31
+ fix_urls url, src
32
+ }
33
+ Wwwtf::HTTP::URL.uniq([url] + src_in_page).compact
34
+ end
35
+
36
+ def self.fix_urls(base_url, src_string)
37
+ begin
38
+ src_string = src_string.unescape.strip
39
+ src_string = src_string[1..-2] if src_string.match(/^\".*\"$/)
40
+ http = base_url.scan(/^(https*)\:\/\//).flatten.join
41
+ if src_string.match(/^https*\:\/\//i)
42
+ src_string
43
+ elsif src_string.match(/^\/\//)
44
+ "#{http}:#{src_string}"
45
+ else
46
+ File.join base_url, src_string
47
+ end
48
+ rescue
49
+ puts "ERR: #{src_string}"
50
+ return nil
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,22 @@
1
+ # Wwwtf::HTTP::URL
2
+
3
+ module Wwwtf
4
+ module HTTP
5
+ module URL
6
+
7
+ def self.unroot(url)
8
+ File.join File.dirname(url), File.basename(url)
9
+ end
10
+
11
+ def self.same(url1, url2)
12
+ url1 = url1.gsub(/^https*\:\/\//, '')
13
+ url2 = url2.gsub(/^https*\:\/\//, '')
14
+ File.expand_path(url) === File.expand_path(url2)
15
+ end
16
+
17
+ def self.uniq(urls)
18
+ urls.collect{|url| unroot(url)}.uniq
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,14 @@
1
+ class String
2
+
3
+ def unescape
4
+ string = self
5
+ unescaped_string = ''
6
+ escaped_at = string.index(/\\/)
7
+ until escaped_at.nil?
8
+ unescaped_string = "#{unescaped_string}#{string[0...escaped_at]}#{string[escaped_at + 1]}"
9
+ string = string[(escaped_at + 2)..-1]
10
+ escaped_at = string.index(/\\/)
11
+ end
12
+ "#{unescaped_string}#{string}"
13
+ end
14
+ end
@@ -0,0 +1,16 @@
1
+ # Wwwtf::Utils::Time
2
+
3
+ module Wwwtf
4
+ module UtilsTime
5
+
6
+ def self.hours_until_now(time)
7
+ begin
8
+ (Time.now - Time.parse(time)).to_i / 3600
9
+ rescue
10
+ msg = time.nil? ? 'NIL Times' : "Bad Times #{time}"
11
+ puts "This app has seen #{msg}"
12
+ exit 1
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ module Wwwtf
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'wwwtf/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "wwwtf"
8
+ gem.version = Wwwtf::VERSION
9
+ gem.authors = ["abhishekkr"]
10
+ gem.email = ["abhikumar163@gmail.com"]
11
+ gem.description = %q{World Wide Web's Trolls and Flaws giving varied Trolls & Flaws analyzing the BAD WWW around.}
12
+ gem.summary = %q{World Wide Web's Trolls and Flaws}
13
+ gem.homepage = "https://github.com/abhishekkr/wwwtf"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.executables = %w( wwwtf )
21
+
22
+ gem.add_runtime_dependency 'xml-motor', '>= 0.1.6'
23
+ gem.add_runtime_dependency 'arg0', '>= 0.0.2'
24
+ end
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wwwtf
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - abhishekkr
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-02-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: xml-motor
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.1.6
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 0.1.6
30
+ - !ruby/object:Gem::Dependency
31
+ name: arg0
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: 0.0.2
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 0.0.2
46
+ description: World Wide Web's Trolls and Flaws giving varied Trolls & Flaws analyzing
47
+ the BAD WWW around.
48
+ email:
49
+ - abhikumar163@gmail.com
50
+ executables:
51
+ - wwwtf
52
+ extensions: []
53
+ extra_rdoc_files: []
54
+ files:
55
+ - .gitignore
56
+ - Gemfile
57
+ - LICENSE.txt
58
+ - README.md
59
+ - Rakefile
60
+ - bin/wwwtf
61
+ - lib/wwwtf.rb
62
+ - lib/wwwtf/cache.rb
63
+ - lib/wwwtf/cache/king.rb
64
+ - lib/wwwtf/http.rb
65
+ - lib/wwwtf/http/url.rb
66
+ - lib/wwwtf/utils/string.rb
67
+ - lib/wwwtf/utils/time.rb
68
+ - lib/wwwtf/version.rb
69
+ - wwwtf.gemspec
70
+ homepage: https://github.com/abhishekkr/wwwtf
71
+ licenses: []
72
+ post_install_message:
73
+ rdoc_options: []
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 1.8.24
91
+ signing_key:
92
+ specification_version: 3
93
+ summary: World Wide Web's Trolls and Flaws
94
+ test_files: []