surgescrape 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b3d1e58be8a7ae49097955652334f148e92f0949
4
+ data.tar.gz: 9087e21894944b34a4a92b0ad89cf6a5cd3472b9
5
+ SHA512:
6
+ metadata.gz: a07cba90208e7c06d5c638a216ca0137ae5ea425a5063ad1c92a04f10f0508ce8858bdfff54df4657e86a5698c8d4c53556e46ad7fb47e2b44eda5b2ea525773
7
+ data.tar.gz: 62a55f03e9ec0b6fc68d56fe99a768eb2c626170e62e24bb0efda801da63d1ca13ed5ee5b4781621ab8afe37afae2100e280552cd3aca93e07a5f0d0c9d14812
data/.rake_tasks~ ADDED
@@ -0,0 +1,6 @@
1
+ build
2
+ clean
3
+ clobber
4
+ install
5
+ install:local
6
+ release[remote]
data/COPYING ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2016 Daniel E
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ # SurgeScrape - Scrape your surge.sh website's files
2
+
3
+ ## Installation
4
+
5
+ + `[sudo] gem install surgescrape` Download & install via RubyGems
6
+
7
+ ## Usage
8
+
9
+ `sscrape website.surge.sh` A simple really, just specify a domain
10
+ and watch all the files download. Run each time you need to update your
11
+ directory, the tool won't download untouched files.
12
+
13
+ ## What it lookes like
14
+
15
+ ```
16
+ Installing new gem...
17
+ surgescrape (0.1.0) installed.
18
+ daniel@pancake:~/surgescrape$ sscrape
19
+ Usage : `sscrape website.surge.sh`
20
+ daniel@pancake:~/Desktop/surge$ sscrape surge.sh
21
+ Scraping the files from https://surge.sh...
22
+ The website has different versions of your files and/or new files, your version of these will be overwritten :
23
+ /css/index.css
24
+ /help/about-the-surge-cdn.html
25
+ [...]
26
+ /stickers/thanks.html
27
+ /tour.html
28
+
29
+ Downloading 107 file(s)...
30
+ Downloaded /css/index.css
31
+ Downloaded /help/about-the-surge-cdn.html
32
+ [...]
33
+ Downloaded /stickers/thanks.html
34
+ Downloaded /tour.html
35
+ Finished!
36
+ daniel@pancake:~/Desktop/surge$ sudo serve 80
37
+ Serving on http://pancake:80
38
+ ```
39
+
40
+ ## MIT License
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
data/bin/setup ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
data/bin/sscrape ADDED
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+ # SurgeScrape - Scrape your surge.sh website's files
3
+ # Daniel Ethridge
4
+
5
+ require "surgescrape"
6
+ include SurgeScrape
7
+
8
+ usage = "Usage : `sscrape website.surge.sh`"
9
+ version = "SurgeScrape version : 0.1.0" #{SurgeScrape::VERSION}"
10
+
11
+ help = "\n#{usage}
12
+
13
+ #{version}
14
+
15
+ A simple utility, just specify a domain
16
+ and watch all the files download. Run each time
17
+ you need to update your directory, the tool won't
18
+ download untouched files.
19
+
20
+ --------------------------------------------------
21
+
22
+ If you find a bug, want extra features, or have a suggestion,
23
+ make a branch on github : https://github.com/wlib/surgescrape\n\n"
24
+
25
+ # If there are no arguments, print usage
26
+ if ARGV.length == 0
27
+ puts usage
28
+ exit
29
+ end
30
+
31
+ # Argument "parser", though only takes one argument...
32
+ arg = ARGV[0].downcase
33
+ case (arg)
34
+ when "help", "--help", "h", "-h", "?"
35
+ puts help
36
+ exit
37
+ when "version", "--version", "v", "-v"
38
+ puts version
39
+ exit
40
+ else
41
+ if arg.match( /^https:\/\/.+/i )
42
+ scrape(arg)
43
+ exit
44
+ elsif arg.match( /^http:\/\/.+/i )
45
+ puts "https:// only"
46
+ exit
47
+ else
48
+ scrape("https://#{arg}")
49
+ exit
50
+ end
51
+ exit
52
+ end
@@ -0,0 +1,3 @@
1
+ module SurgeScrape
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env ruby
2
+ # "Scrape" the website based on its `/auto.appcache` file
3
+ # Daniel Ethridge
4
+
5
+ require 'net/http'
6
+ require 'digest'
7
+ require 'fileutils'
8
+
9
+ module SurgeScrape
10
+ def rm_dot(path)
11
+ return path[1..-1]
12
+ end
13
+
14
+ def get(site, file)
15
+ uri = URI.parse("#{site}/#{file}")
16
+ http = Net::HTTP.new(uri.host, uri.port)
17
+ http.use_ssl = true
18
+ request = Net::HTTP::Get.new(uri.request_uri, {"Accept-Encoding" => "gzip"})
19
+ response = http.request(request)
20
+ return response.body
21
+ end
22
+
23
+ def get_cache_list(site)
24
+ cache_manifest = get(site, "auto.appcache")
25
+ cache = cache_manifest
26
+ files = cache.scan( /^\/.+\..+$/ )
27
+ hashes = cache.scan( /[a-f0-9]{32}/i )
28
+ out = {}
29
+ for i in 0...hashes.length
30
+ out[files[i].chomp] = hashes[i].chomp
31
+ end
32
+ if out.empty?
33
+ puts "Website does not have a `/auto.appcache` file, try a *.surge.sh URL"
34
+ exit
35
+ end
36
+ return out
37
+ end
38
+
39
+ def get_local_list()
40
+ files = Dir.glob("./**/*", File::FNM_DOTMATCH).select{ |e| File.file?(e) }
41
+ hashes = []
42
+ files.each { |f| hashes << Digest::MD5.file(f).hexdigest }
43
+ out = {}
44
+ for i in 0...hashes.length
45
+ out[files[i].chomp] = hashes[i].chomp
46
+ end
47
+ return out
48
+ end
49
+
50
+ def compare_lists(cache, local)
51
+ local.keys.each { |k| local[rm_dot(k)] = local.delete k }
52
+ diffs = ( cache.to_a - local.to_a ).to_h.keys
53
+ extra = ( local.to_a - cache.to_a ).to_h.keys - diffs
54
+ if ! extra.empty?
55
+ puts "This directory has file(s) that are not recorded on the website, \
56
+ they will not be overwritten : \n#{extra.join("\n")} \n\n"
57
+ end
58
+ if ! diffs.empty?
59
+ puts "The website has different versions of your files and/or new files, \
60
+ your version of these will be overwritten : \n#{diffs.join("\n")} \n\n"
61
+ return diffs
62
+ else
63
+ return nil
64
+ end
65
+ end
66
+
67
+ def get_files(site, files)
68
+ puts "Downloading #{files.length} file(s)..."
69
+ files.each do |file|
70
+ retrieved = get(site, file)
71
+ if retrieved.nil?
72
+ puts "#{file} could not be downloaded"
73
+ next
74
+ end
75
+ FileUtils.mkdir_p("./#{File.dirname(file)}") unless Dir.exists?("./#{File.dirname(file)}")
76
+ local = File.open("./#{file}", "w")
77
+ local.write retrieved
78
+ puts "Downloaded #{file}"
79
+ end
80
+ end
81
+
82
+ def scrape(site)
83
+ puts "Scraping the files from #{site}..."
84
+ cache = get_cache_list(site)
85
+ local = get_local_list()
86
+ files = compare_lists(cache, local)
87
+ if files.nil?
88
+ puts "Nothing to download"
89
+ exit
90
+ end
91
+ get_files(site, files)
92
+ puts "Finished!"
93
+ end
94
+ end
Binary file
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "surgescrape/version"
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "surgescrape"
8
+ gem.version = SurgeScrape::VERSION
9
+ gem.authors = ["Daniel Ethridge"]
10
+ gem.email = ["danielethridge@icloud.com"]
11
+ gem.license = "MIT"
12
+
13
+ gem.summary = %q{Scrape your surge.sh website's files}
14
+ gem.homepage = "https://github.com/wlib/surgescrape"
15
+
16
+ gem.files = `git ls-files -z`.split("\x0").reject do |f|
17
+ f.match(%r{^(test|spec|features)/})
18
+ end
19
+ gem.bindir = "bin"
20
+ gem.executables = ["sscrape"]
21
+ gem.require_paths = ["lib"]
22
+
23
+ gem.add_development_dependency "bundler", "~> 1.13"
24
+ gem.add_development_dependency "rake", "~> 10.0"
25
+ end
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: surgescrape
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Daniel Ethridge
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-12-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.13'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.13'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description:
42
+ email:
43
+ - danielethridge@icloud.com
44
+ executables:
45
+ - sscrape
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - ".rake_tasks~"
50
+ - COPYING
51
+ - README.md
52
+ - Rakefile
53
+ - bin/setup
54
+ - bin/sscrape
55
+ - lib/surgescrape.rb
56
+ - lib/surgescrape/version.rb
57
+ - pkg/surgescrape-0.1.0.gem
58
+ - surgescrape.gemspec
59
+ homepage: https://github.com/wlib/surgescrape
60
+ licenses:
61
+ - MIT
62
+ metadata: {}
63
+ post_install_message:
64
+ rdoc_options: []
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ requirements: []
78
+ rubyforge_project:
79
+ rubygems_version: 2.5.1
80
+ signing_key:
81
+ specification_version: 4
82
+ summary: Scrape your surge.sh website's files
83
+ test_files: []