snatch 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 AlphaSights
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = snatch
2
+
3
+ Ruby wrapper around wget to recursively download a site and copy it in to a public directory, typically within Rails.
4
+
5
+ We use this gem to download the static files produced by a CMS used by our Marketing department and then serve them using Rack within our Rails app.
6
+
7
+ == Note on Patches/Pull Requests
8
+
9
+ * Fork the project.
10
+ * Make your feature addition or bug fix.
11
+ * Add tests for it. This is important so I don't break it in a
12
+ future version unintentionally.
13
+ * Commit, do not mess with rakefile, version, or history.
14
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
15
+ * Send me a pull request. Bonus points for topic branches.
16
+
17
+ == Copyright
18
+
19
+ Copyright (c) 2010 James Conroy-Finn. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "snatch"
8
+ gem.summary = %Q{wget your site and replace any nasty PHP CSS files}
9
+ gem.description = %Q{Simple site downloaded that wraps wget and converts PHP CSS files in to regular CSS files.}
10
+ gem.email = "james@logi.cl"
11
+ gem.homepage = "http://github.com/jcf/snatch"
12
+ gem.authors = ["James Conroy-Finn"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ begin
36
+ require 'roodi'
37
+ require 'roodi_task'
38
+ RoodiTask.new do |t|
39
+ t.verbose = false
40
+ end
41
+ rescue LoadError
42
+ task :roodi do
43
+ abort "Roodi is not available. In order to run roodi, you must: sudo gem install roodi"
44
+ end
45
+ end
46
+
47
+ task :default => :spec
48
+
49
+ require 'rake/rdoctask'
50
+ Rake::RDocTask.new do |rdoc|
51
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
52
+
53
+ rdoc.rdoc_dir = 'rdoc'
54
+ rdoc.title = "snatch #{version}"
55
+ rdoc.rdoc_files.include('README*')
56
+ rdoc.rdoc_files.include('lib/**/*.rb')
57
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
data/lib/extensions.rb ADDED
@@ -0,0 +1,5 @@
1
+ class String
2
+ def quote(quote = '"')
3
+ [quote, self, quote].map(&:to_s).join
4
+ end
5
+ end
@@ -0,0 +1,85 @@
1
+ require 'nokogiri'
2
+
3
+ class Snatch
4
+ class Clean
5
+ attr_reader :doc, :working_directory
6
+
7
+ def initialize(file_name, working_directory = nil)
8
+ @file_name = file_name
9
+ @working_directory = working_directory || Snatch::PUBLIC_PATH
10
+ @doc = Nokogiri::HTML(File.open(@file_name, 'r'))
11
+ end
12
+
13
+ # Convenience method for creating Snatch::Clean with HTML.
14
+ #
15
+ # Returns instance of Snatch::Clean
16
+ def self.process(html, working_directory = nil)
17
+ instance = new(html, working_directory)
18
+ instance.process
19
+ instance
20
+ end
21
+
22
+ # Loop through each link with a stylesheet rel attribute and remove
23
+ # dynamic PHP hrefs, replacing with plain CSS paths.
24
+ #
25
+ # Returns link tags
26
+ def process
27
+ @doc.css('link[rel=stylesheet]').each do |stylesheet_node|
28
+ stylesheet_node['href'] = rewrite_href(stylesheet_node['href'])
29
+ end
30
+
31
+ return unless @css_path
32
+ File.open(@file_name, 'w') { |f| f.write @doc.to_html }
33
+ end
34
+
35
+ private
36
+ def log(*messages)
37
+ width = messages.max(&:size).size
38
+ puts "\e[36;1m#{messages.map(&:inspect).join("\n")}\e[0m"
39
+ puts '=' * width.to_i
40
+ end
41
+
42
+ def rewrite_href(href)
43
+ return href unless href.present?
44
+ css_path = href
45
+ matches = extract_path_components(href)
46
+
47
+ if matches.present?
48
+ path = matches.size == 3 ? matches.shift : nil
49
+
50
+ file_name = matches.join('-')
51
+ css_path = File.join(*[path, "#{file_name}.css"].compact)
52
+
53
+ mv_stylesheet(href, css_path)
54
+ end
55
+
56
+ css_path
57
+ end
58
+
59
+ # Look for a match within our stylesheet link href. If it's there
60
+ # reject the original string from MatchData.
61
+ #
62
+ # Returns Array of matches or nil
63
+ def extract_path_components(href)
64
+ m = href.match(%r{^(.+)?stylesheet\.php\?cssid=(\d+)(?:&amp;|&)mediatype=(\w+)})
65
+ m.present? ? m.to_a[1..-1].compact : nil
66
+ end
67
+
68
+ def remove_query_params(href)
69
+ href.sub(%r{\.php\?.*?$}, '.php')
70
+ end
71
+
72
+ # Convert any number of paths in to absolute paths prepending with
73
+ # the public path (e.g. /Users/jcf/git/static/public/#{path}).
74
+ #
75
+ # Returns an Array of expanded paths
76
+ def expand_paths(*paths)
77
+ paths.map { |path| File.expand_path(File.join(@working_directory, path)) }
78
+ end
79
+
80
+ def mv_stylesheet(php_path, css_path)
81
+ php_path, @css_path = *expand_paths(php_path, css_path)
82
+ FileUtils.mv(php_path, @css_path) if File.exist?(php_path)
83
+ end
84
+ end
85
+ end
data/lib/snatch.rb ADDED
@@ -0,0 +1,81 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+
3
+ require "#{File.dirname(__FILE__)}/extensions"
4
+ require 'snatch/clean'
5
+
6
+ class Snatch
7
+ PUBLIC_PATH = File.expand_path("#{File.dirname(__FILE__)}/../public")
8
+
9
+ def initialize(url = nil)
10
+ @url = url || 'www.google.com'
11
+ end
12
+
13
+ def self.fetch(url = nil)
14
+ new(url).fetch
15
+ end
16
+
17
+ def self.wget
18
+ new.send(:download_files)
19
+ end
20
+
21
+ def self.clean
22
+ new.send(:convert_dynamic_stylesheets)
23
+ end
24
+
25
+ def self.push
26
+ new.send(:git_push)
27
+ end
28
+
29
+ def fetch
30
+ download_files &&
31
+ convert_dynamic_stylesheets &&
32
+ git_push
33
+ end
34
+
35
+ private
36
+
37
+ def log(message)
38
+ bang = "\e[36;40;1m=>\e[0m"
39
+ puts "#{bang} #{message}"
40
+ end
41
+
42
+ def which(name)
43
+ @which ||= {}
44
+ return @which[name] unless @which[name].nil?
45
+ path = `which #{name}`.strip
46
+ @which[name] = path
47
+ end
48
+
49
+ def wget(arguments = nil)
50
+ wget_path = which :wget
51
+ log "#{wget_path} #{arguments}"
52
+ %x{#{wget_path} #{arguments}}
53
+ end
54
+
55
+ def git(command, *args)
56
+ options = args.last.is_a?(Hash) ? args.pop : {}
57
+ arguments = args.join(' ')
58
+ redirect = ' > /dev/null' if options[:silent]
59
+ git_path = which :git
60
+ log "#{git_path} #{command} #{arguments}#{redirect}"
61
+ %x(#{git_path} #{command} #{arguments}#{redirect})
62
+ end
63
+
64
+ def download_files
65
+ puts "Downloading #{@url.quote}"
66
+ wget "-P #{PUBLIC_PATH} -nH -rkq #{@url.quote}"
67
+ end
68
+
69
+ def convert_dynamic_stylesheets
70
+ Dir.glob("#{PUBLIC_PATH}/**/*.html").each do |file|
71
+ Clean.process(file, File.dirname(file))
72
+ end
73
+ end
74
+
75
+ def git_push
76
+ git :rm, "-rq --cached #{PUBLIC_PATH.quote}"
77
+ git :add, "public"
78
+ git :commit, "-q -m 'Automatic snatch'"
79
+ git :push, :silent => true
80
+ end
81
+ end
data/snatch.gemspec ADDED
@@ -0,0 +1,59 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{snatch}
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["James Conroy-Finn"]
12
+ s.date = %q{2010-02-02}
13
+ s.description = %q{Simple site downloaded that wraps wget and converts PHP CSS files in to regular CSS files.}
14
+ s.email = %q{james@logi.cl}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/extensions.rb",
27
+ "lib/snatch.rb",
28
+ "lib/snatch/clean.rb",
29
+ "snatch.gemspec",
30
+ "spec/snatch/clean_spec.rb",
31
+ "spec/snatch_spec.rb",
32
+ "spec/spec.opts",
33
+ "spec/spec_helper.rb"
34
+ ]
35
+ s.homepage = %q{http://github.com/jcf/snatch}
36
+ s.rdoc_options = ["--charset=UTF-8"]
37
+ s.require_paths = ["lib"]
38
+ s.rubygems_version = %q{1.3.5}
39
+ s.summary = %q{wget your site and replace any nasty PHP CSS files}
40
+ s.test_files = [
41
+ "spec/snatch/clean_spec.rb",
42
+ "spec/snatch_spec.rb",
43
+ "spec/spec_helper.rb"
44
+ ]
45
+
46
+ if s.respond_to? :specification_version then
47
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
48
+ s.specification_version = 3
49
+
50
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
51
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
52
+ else
53
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
54
+ end
55
+ else
56
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
57
+ end
58
+ end
59
+
@@ -0,0 +1,94 @@
1
+ require 'spec_helper'
2
+
3
+ describe Snatch::Clean do
4
+ before(:each) do
5
+ File.stub!(:open)
6
+ end
7
+
8
+ it "should remove query params after a PHP CSS path" do
9
+ clean = Snatch::Clean.new('file_name')
10
+ href = '../../stylesheet.php?blah=woot'
11
+ expected = '../../stylesheet.php'
12
+ clean.send(:remove_query_params, href).should == expected
13
+ end
14
+
15
+ describe "rewrite_href" do
16
+ before(:each) do
17
+ @clean = Snatch::Clean.new('file_name')
18
+ end
19
+
20
+ it "should do nothing without an href" do
21
+ @clean.send(:rewrite_href, '').should == ''
22
+ end
23
+
24
+ it "should move a top-level stylesheet file" do
25
+ href = 'stylesheet.php?cssid=12&amp;mediatype=screen'
26
+ @clean.should_receive(:mv_stylesheet).with(href, '12-screen.css')
27
+ @clean.send(:rewrite_href, href)
28
+ end
29
+
30
+ it "should move a nested stylesheet file" do
31
+ href = '../stylesheet.php?cssid=12&amp;mediatype=screen'
32
+ @clean.should_receive(:mv_stylesheet).with(href, '../12-screen.css')
33
+ @clean.send(:rewrite_href, href)
34
+ end
35
+ end
36
+
37
+ describe "extract_path_components" do
38
+ it "should find parents and values for cssid and mediatype" do
39
+ clean = Snatch::Clean.new('file_name')
40
+ php_path = '/css/something/stylesheet.php?cssid=12&amp;mediatype=screen'
41
+ path = clean.send(:extract_path_components, php_path)
42
+ path.to_a.should == ['/css/something/', '12', 'screen']
43
+ end
44
+
45
+ it "should find relative path compontents and values for cssid and mediatype" do
46
+ clean = Snatch::Clean.new('file_name')
47
+ php_path = '../../stylesheet.php?cssid=12&amp;mediatype=screen'
48
+ path = clean.send(:extract_path_components, php_path)
49
+ path.to_a.should == ['../../', '12', 'screen']
50
+ end
51
+
52
+ it "should find values for cssid and mediatype" do
53
+ clean = Snatch::Clean.new('file_name')
54
+ php_path = 'stylesheet.php?cssid=12&amp;mediatype=screen'
55
+ path = clean.send(:extract_path_components, php_path)
56
+ path.to_a.should == ['12', 'screen']
57
+ end
58
+ end
59
+
60
+ describe "moving PHP files to CSS path" do
61
+ it "should expand multiple paths to include the public directory" do
62
+ clean = Snatch::Clean.new('file_name')
63
+ public_path = Snatch::PUBLIC_PATH
64
+ expected = [
65
+ File.expand_path(File.join(public_path, 'a/b')),
66
+ File.expand_path(File.join(public_path, 'c/d'))
67
+ ]
68
+ clean.send(:expand_paths, 'a/b', 'c/d').should == expected
69
+ end
70
+
71
+ it "should assign multiple paths with a splat" do
72
+ clean = Snatch::Clean.new('file_name')
73
+ public_path = Snatch::PUBLIC_PATH
74
+ expected = [
75
+ File.expand_path(File.join(public_path, 'a/b')),
76
+ File.expand_path(File.join(public_path, 'c/d')),
77
+ File.expand_path(File.join(public_path, 'e/f'))
78
+ ]
79
+ a, b, c = *clean.send(:expand_paths, 'a/b', 'c/d', 'e/f')
80
+ a.should == expected.first
81
+ b.should == expected.second
82
+ c.should == expected.last
83
+ end
84
+
85
+ it "should expand absolute paths to include the public directory" do
86
+ clean = Snatch::Clean.new('file_name')
87
+ public_path = Snatch::PUBLIC_PATH
88
+ expected = [
89
+ File.expand_path(File.join(public_path, '/a/b'))
90
+ ]
91
+ clean.send(:expand_paths, '/a/b').should == expected
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Snatch" do
4
+ it "fails" do
5
+ fail "hey buddy, you should probably rename this file and start specing for real"
6
+ end
7
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'snatch'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: snatch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - James Conroy-Finn
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-02-02 00:00:00 +00:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.9
24
+ version:
25
+ description: Simple site downloaded that wraps wget and converts PHP CSS files in to regular CSS files.
26
+ email: james@logi.cl
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ files:
35
+ - .document
36
+ - .gitignore
37
+ - LICENSE
38
+ - README.rdoc
39
+ - Rakefile
40
+ - VERSION
41
+ - lib/extensions.rb
42
+ - lib/snatch.rb
43
+ - lib/snatch/clean.rb
44
+ - snatch.gemspec
45
+ - spec/snatch/clean_spec.rb
46
+ - spec/snatch_spec.rb
47
+ - spec/spec.opts
48
+ - spec/spec_helper.rb
49
+ has_rdoc: true
50
+ homepage: http://github.com/jcf/snatch
51
+ licenses: []
52
+
53
+ post_install_message:
54
+ rdoc_options:
55
+ - --charset=UTF-8
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: "0"
63
+ version:
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: "0"
69
+ version:
70
+ requirements: []
71
+
72
+ rubyforge_project:
73
+ rubygems_version: 1.3.5
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: wget your site and replace any nasty PHP CSS files
77
+ test_files:
78
+ - spec/snatch/clean_spec.rb
79
+ - spec/snatch_spec.rb
80
+ - spec/spec_helper.rb