surgescrape 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b3d1e58be8a7ae49097955652334f148e92f0949
4
+ data.tar.gz: 9087e21894944b34a4a92b0ad89cf6a5cd3472b9
5
+ SHA512:
6
+ metadata.gz: a07cba90208e7c06d5c638a216ca0137ae5ea425a5063ad1c92a04f10f0508ce8858bdfff54df4657e86a5698c8d4c53556e46ad7fb47e2b44eda5b2ea525773
7
+ data.tar.gz: 62a55f03e9ec0b6fc68d56fe99a768eb2c626170e62e24bb0efda801da63d1ca13ed5ee5b4781621ab8afe37afae2100e280552cd3aca93e07a5f0d0c9d14812
data/.rake_tasks~ ADDED
@@ -0,0 +1,6 @@
1
+ build
2
+ clean
3
+ clobber
4
+ install
5
+ install:local
6
+ release[remote]
data/COPYING ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2016 Daniel E
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ # SurgeScrape - Scrape your surge.sh website's files
2
+
3
+ ## Installation
4
+
5
+ + `[sudo] gem install surgescrape` Download & install via RubyGems
6
+
7
+ ## Usage
8
+
9
+ `sscrape website.surge.sh` A simple really, just specify a domain
10
+ and watch all the files download. Run each time you need to update your
11
+ directory, the tool won't download untouched files.
12
+
13
+ ## What it lookes like
14
+
15
+ ```
16
+ Installing new gem...
17
+ surgescrape (0.1.0) installed.
18
+ daniel@pancake:~/surgescrape$ sscrape
19
+ Usage : `sscrape website.surge.sh`
20
+ daniel@pancake:~/Desktop/surge$ sscrape surge.sh
21
+ Scraping the files from https://surge.sh...
22
+ The website has different versions of your files and/or new files, your version of these will be overwritten :
23
+ /css/index.css
24
+ /help/about-the-surge-cdn.html
25
+ [...]
26
+ /stickers/thanks.html
27
+ /tour.html
28
+
29
+ Downloading 107 file(s)...
30
+ Downloaded /css/index.css
31
+ Downloaded /help/about-the-surge-cdn.html
32
+ [...]
33
+ Downloaded /stickers/thanks.html
34
+ Downloaded /tour.html
35
+ Finished!
36
+ daniel@pancake:~/Desktop/surge$ sudo serve 80
37
+ Serving on http://pancake:80
38
+ ```
39
+
40
+ ## MIT License
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
data/bin/setup ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
data/bin/sscrape ADDED
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+ # SurgeScrape - Scrape your surge.sh website's files
3
+ # Daniel Ethridge
4
+
5
+ require "surgescrape"
6
+ include SurgeScrape
7
+
8
+ usage = "Usage : `sscrape website.surge.sh`"
9
+ version = "SurgeScrape version : 0.1.0" #{SurgeScrape::VERSION}"
10
+
11
+ help = "\n#{usage}
12
+
13
+ #{version}
14
+
15
+ A simple utility, just specify a domain
16
+ and watch all the files download. Run each time
17
+ you need to update your directory, the tool won't
18
+ download untouched files.
19
+
20
+ --------------------------------------------------
21
+
22
+ If you find a bug, want extra features, or have a suggestion,
23
+ make a branch on github : https://github.com/wlib/surgescrape\n\n"
24
+
25
+ # If there are no arguments, print usage
26
+ if ARGV.length == 0
27
+ puts usage
28
+ exit
29
+ end
30
+
31
+ # Argument "parser", though only takes one argument...
32
+ arg = ARGV[0].downcase
33
+ case (arg)
34
+ when "help", "--help", "h", "-h", "?"
35
+ puts help
36
+ exit
37
+ when "version", "--version", "v", "-v"
38
+ puts version
39
+ exit
40
+ else
41
+ if arg.match( /^https:\/\/.+/i )
42
+ scrape(arg)
43
+ exit
44
+ elsif arg.match( /^http:\/\/.+/i )
45
+ puts "https:// only"
46
+ exit
47
+ else
48
+ scrape("https://#{arg}")
49
+ exit
50
+ end
51
+ exit
52
+ end
@@ -0,0 +1,3 @@
1
+ module SurgeScrape
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env ruby
2
+ # "Scrape" the website based on its `/auto.appcache` file
3
+ # Daniel Ethridge
4
+
5
+ require 'net/http'
6
+ require 'digest'
7
+ require 'fileutils'
8
+
9
+ module SurgeScrape
10
+ def rm_dot(path)
11
+ return path[1..-1]
12
+ end
13
+
14
+ def get(site, file)
15
+ uri = URI.parse("#{site}/#{file}")
16
+ http = Net::HTTP.new(uri.host, uri.port)
17
+ http.use_ssl = true
18
+ request = Net::HTTP::Get.new(uri.request_uri, {"Accept-Encoding" => "gzip"})
19
+ response = http.request(request)
20
+ return response.body
21
+ end
22
+
23
+ def get_cache_list(site)
24
+ cache_manifest = get(site, "auto.appcache")
25
+ cache = cache_manifest
26
+ files = cache.scan( /^\/.+\..+$/ )
27
+ hashes = cache.scan( /[a-f0-9]{32}/i )
28
+ out = {}
29
+ for i in 0...hashes.length
30
+ out[files[i].chomp] = hashes[i].chomp
31
+ end
32
+ if out.empty?
33
+ puts "Website does not have a `/auto.appcache` file, try a *.surge.sh URL"
34
+ exit
35
+ end
36
+ return out
37
+ end
38
+
39
+ def get_local_list()
40
+ files = Dir.glob("./**/*", File::FNM_DOTMATCH).select{ |e| File.file?(e) }
41
+ hashes = []
42
+ files.each { |f| hashes << Digest::MD5.file(f).hexdigest }
43
+ out = {}
44
+ for i in 0...hashes.length
45
+ out[files[i].chomp] = hashes[i].chomp
46
+ end
47
+ return out
48
+ end
49
+
50
+ def compare_lists(cache, local)
51
+ local.keys.each { |k| local[rm_dot(k)] = local.delete k }
52
+ diffs = ( cache.to_a - local.to_a ).to_h.keys
53
+ extra = ( local.to_a - cache.to_a ).to_h.keys - diffs
54
+ if ! extra.empty?
55
+ puts "This directory has file(s) that are not recorded on the website, \
56
+ they will not be overwritten : \n#{extra.join("\n")} \n\n"
57
+ end
58
+ if ! diffs.empty?
59
+ puts "The website has different versions of your files and/or new files, \
60
+ your version of these will be overwritten : \n#{diffs.join("\n")} \n\n"
61
+ return diffs
62
+ else
63
+ return nil
64
+ end
65
+ end
66
+
67
+ def get_files(site, files)
68
+ puts "Downloading #{files.length} file(s)..."
69
+ files.each do |file|
70
+ retrieved = get(site, file)
71
+ if retrieved.nil?
72
+ puts "#{file} could not be downloaded"
73
+ next
74
+ end
75
+ FileUtils.mkdir_p("./#{File.dirname(file)}") unless Dir.exists?("./#{File.dirname(file)}")
76
+ local = File.open("./#{file}", "w")
77
+ local.write retrieved
78
+ puts "Downloaded #{file}"
79
+ end
80
+ end
81
+
82
+ def scrape(site)
83
+ puts "Scraping the files from #{site}..."
84
+ cache = get_cache_list(site)
85
+ local = get_local_list()
86
+ files = compare_lists(cache, local)
87
+ if files.nil?
88
+ puts "Nothing to download"
89
+ exit
90
+ end
91
+ get_files(site, files)
92
+ puts "Finished!"
93
+ end
94
+ end
Binary file
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "surgescrape/version"
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "surgescrape"
8
+ gem.version = SurgeScrape::VERSION
9
+ gem.authors = ["Daniel Ethridge"]
10
+ gem.email = ["danielethridge@icloud.com"]
11
+ gem.license = "MIT"
12
+
13
+ gem.summary = %q{Scrape your surge.sh website's files}
14
+ gem.homepage = "https://github.com/wlib/surgescrape"
15
+
16
+ gem.files = `git ls-files -z`.split("\x0").reject do |f|
17
+ f.match(%r{^(test|spec|features)/})
18
+ end
19
+ gem.bindir = "bin"
20
+ gem.executables = ["sscrape"]
21
+ gem.require_paths = ["lib"]
22
+
23
+ gem.add_development_dependency "bundler", "~> 1.13"
24
+ gem.add_development_dependency "rake", "~> 10.0"
25
+ end
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: surgescrape
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Daniel Ethridge
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-12-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.13'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.13'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description:
42
+ email:
43
+ - danielethridge@icloud.com
44
+ executables:
45
+ - sscrape
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - ".rake_tasks~"
50
+ - COPYING
51
+ - README.md
52
+ - Rakefile
53
+ - bin/setup
54
+ - bin/sscrape
55
+ - lib/surgescrape.rb
56
+ - lib/surgescrape/version.rb
57
+ - pkg/surgescrape-0.1.0.gem
58
+ - surgescrape.gemspec
59
+ homepage: https://github.com/wlib/surgescrape
60
+ licenses:
61
+ - MIT
62
+ metadata: {}
63
+ post_install_message:
64
+ rdoc_options: []
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ requirements: []
78
+ rubyforge_project:
79
+ rubygems_version: 2.5.1
80
+ signing_key:
81
+ specification_version: 4
82
+ summary: Scrape your surge.sh website's files
83
+ test_files: []