wayback_archiver 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b50fa3a16a416da9fea6e7eb544173f9bd8c10ea
4
+ data.tar.gz: cfd8ebea5a0ed30fbc06d038a157e10961a4b5c9
5
+ SHA512:
6
+ metadata.gz: 0ab0b992bbc1da9433684c5d4f6266d603f07f73ecaa7c9c3a3fafcb1932bb30077e53d5f2e866990756943bb23638e542d7a9cee80d91e71768467042f26b68
7
+ data.tar.gz: 37ab8183c5738c0d1c9540b39395b25c06f80fbb5c8412848892f71a8ada68d5a0611d9fc831a427d638d350fd58c32aac27bf9d19df0eade960359b51b3b4d3
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in wayback_archiver.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Jacob Burenstam
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,30 @@
1
+ # WaybackArchiver
2
+ Send URLs to [Wayback Machine](https://archive.org/web/) from [/sitemap.xml](http://www.sitemaps.org), single URL or file with URLs.
3
+
4
+ ## Installation
5
+ Install the gem:
6
+ ```bash
7
+ gem install wayback_archiver
8
+ ```
9
+
10
+ ## Usage
11
+
12
+ ```bash
13
+ wayback_archiver http://example.com # Send each line defined in http://example.com/sitemap.xml
14
+ wayback_archiver http://example.com/some/path url # Only send http://example.com/some/path to Wayback Machine
15
+ wayback_archiver /path/to/some/file # With an URL on each line
16
+ ```
17
+
18
+ View domain archive: https://web.archive.org/web/*/http://example.com
19
+
20
+
21
+ ## Contributing
22
+
23
+ 1. Fork it
24
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
25
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
26
+ 4. Push to the branch (`git push origin my-new-feature`)
27
+ 5. Create new Pull Request
28
+
29
+ ---------
30
+ Don't know what a sitemap is? [http://sitemaps.org](http://www.sitemaps.org)
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'wayback_archiver'
4
+ WaybackArchiver.archive(ARGV[0], ARGV[1])
@@ -0,0 +1,46 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'rexml/document'
4
+
5
+ module WaybackArchiver
6
+ WAYBACK_BASE_URL = 'https://web.archive.org/save/'
7
+
8
+ def self.archive(source, from)
9
+ from = from || :sitemap
10
+ urls = Array.new
11
+ case from.to_sym
12
+ when :sitemap
13
+ urls = WaybackArchiver.urls_from_sitemap("#{source}/sitemap.xml")
14
+ when :url
15
+ urls << source
16
+ when :file
17
+ urls = WaybackArchiver.urls_from_file(source)
18
+ end
19
+
20
+ urls.each_with_index do |url, index|
21
+ puts "Archiving (#{index + 1}/#{urls.length}): #{url}"
22
+ request_url = URI("#{WAYBACK_BASE_URL}#{url}")
23
+ res = Net::HTTP.get_response(request_url)
24
+
25
+ puts "#{res.code} => #{res.message}"
26
+ end
27
+ end
28
+
29
+ def self.urls_from_sitemap(url)
30
+ urls = Array.new
31
+ xml_data = Net::HTTP.get_response(URI.parse(url)).body
32
+ document = REXML::Document.new(xml_data)
33
+
34
+ document.elements.each('urlset/url/loc') { |element| urls << element.text }
35
+ urls
36
+ end
37
+
38
+ def self.urls_from_file(path)
39
+ urls = Array.new
40
+ text = File.open(path).read
41
+ text.gsub!(/\r\n?/, "\n") # Normalize line endings
42
+ text.each_line { |line| urls << line.gsub(/\n/, '') }
43
+ urls
44
+ end
45
+
46
+ end
@@ -0,0 +1,3 @@
1
+ module WaybackArchiver
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,34 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'wayback_archiver/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'wayback_archiver'
8
+ spec.version = WaybackArchiver::VERSION
9
+
10
+ spec.required_rubygems_version = Gem::Requirement.new('>= 0') if spec.respond_to? :required_rubygems_version=
11
+ spec.authors = ['Jacob Burenstam']
12
+ spec.email = ['burenstam@gmail.com']
13
+ spec.summary = %q{Send URLs to Wayback Machine}
14
+ spec.description = %q{Send URLs to Wayback Machine. From: sitemap, file or single URL.}
15
+ spec.homepage = 'https://github.com/buren/wayback_archiver'
16
+ spec.license = 'MIT'
17
+
18
+ spec.files = `git ls-files`.split($/)
19
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
20
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
21
+ spec.require_paths = ['lib']
22
+
23
+ if spec.respond_to? :specification_version then
24
+ spec.specification_version = 3
25
+
26
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
27
+ else
28
+ end
29
+ else
30
+ end
31
+
32
+ spec.add_development_dependency 'bundler', '~> 1.3'
33
+ spec.add_development_dependency 'rake'
34
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wayback_archiver
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Jacob Burenstam
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-07-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: 'Send URLs to Wayback Machine. From: sitemap, file or single URL.'
42
+ email:
43
+ - burenstam@gmail.com
44
+ executables:
45
+ - wayback_archiver
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - .gitignore
50
+ - Gemfile
51
+ - LICENSE.txt
52
+ - README.md
53
+ - Rakefile
54
+ - bin/wayback_archiver
55
+ - lib/wayback_archiver.rb
56
+ - lib/wayback_archiver/version.rb
57
+ - wayback_archiver.gemspec
58
+ homepage: https://github.com/buren/wayback_archiver
59
+ licenses:
60
+ - MIT
61
+ metadata: {}
62
+ post_install_message:
63
+ rdoc_options: []
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirements: []
77
+ rubyforge_project:
78
+ rubygems_version: 2.0.0
79
+ signing_key:
80
+ specification_version: 3
81
+ summary: Send URLs to Wayback Machine
82
+ test_files: []