pedophile 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +90 -0
- data/LICENSE.txt +165 -0
- data/README.md +25 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/lib/pedophile.rb +11 -0
- data/lib/pedophile/big_files.rb +107 -0
- data/lib/pedophile/downloader.rb +25 -0
- data/lib/pedophile/login.rb +54 -0
- data/lib/pedophile/offline_tree.rb +307 -0
- data/lib/pedophile/wget.rb +59 -0
- metadata +141 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a88a4801a4b0f5e66827ee308edee32de2f9882b
|
4
|
+
data.tar.gz: 31a68e039b567c0085d810000e959071322ec6ad
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cdcc88116835bc2678a3f8f4c28cac9767c2037bf2cf9ba7e4e83eda05c128b90f08920d90a7f2de1f53185aa8a0fa081b0b8ecf881f6d985347c66bed377c73
|
7
|
+
data.tar.gz: eb284fdaedd82df541ca2248280eb030778467f5339f068f41d942c6f381e3ce73bff2b2e59a77c1e1ed18dd26edb5adcc9c2e39865930615f5d9908e79dfd20
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
activesupport (4.2.0)
|
5
|
+
i18n (~> 0.7)
|
6
|
+
json (~> 1.7, >= 1.7.7)
|
7
|
+
minitest (~> 5.1)
|
8
|
+
thread_safe (~> 0.3, >= 0.3.4)
|
9
|
+
tzinfo (~> 1.1)
|
10
|
+
addressable (2.3.7)
|
11
|
+
builder (3.2.2)
|
12
|
+
colorize (0.7.5)
|
13
|
+
descendants_tracker (0.0.4)
|
14
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
15
|
+
diff-lcs (1.2.5)
|
16
|
+
docile (1.1.5)
|
17
|
+
faraday (0.9.1)
|
18
|
+
multipart-post (>= 1.2, < 3)
|
19
|
+
git (1.2.9.1)
|
20
|
+
github_api (0.12.3)
|
21
|
+
addressable (~> 2.3)
|
22
|
+
descendants_tracker (~> 0.0.4)
|
23
|
+
faraday (~> 0.8, < 0.10)
|
24
|
+
hashie (>= 3.3)
|
25
|
+
multi_json (>= 1.7.5, < 2.0)
|
26
|
+
nokogiri (~> 1.6.3)
|
27
|
+
oauth2
|
28
|
+
hashie (3.4.0)
|
29
|
+
highline (1.7.1)
|
30
|
+
i18n (0.7.0)
|
31
|
+
jeweler (2.0.1)
|
32
|
+
builder
|
33
|
+
bundler (>= 1.0)
|
34
|
+
git (>= 1.2.5)
|
35
|
+
github_api
|
36
|
+
highline (>= 1.6.15)
|
37
|
+
nokogiri (>= 1.5.10)
|
38
|
+
rake
|
39
|
+
rdoc
|
40
|
+
json (1.8.2)
|
41
|
+
jwt (1.3.0)
|
42
|
+
mini_portile (0.6.2)
|
43
|
+
minitest (5.5.1)
|
44
|
+
multi_json (1.10.1)
|
45
|
+
multi_xml (0.5.5)
|
46
|
+
multipart-post (2.0.0)
|
47
|
+
nokogiri (1.6.6.2)
|
48
|
+
mini_portile (~> 0.6.0)
|
49
|
+
oauth2 (1.0.0)
|
50
|
+
faraday (>= 0.8, < 0.10)
|
51
|
+
jwt (~> 1.0)
|
52
|
+
multi_json (~> 1.3)
|
53
|
+
multi_xml (~> 0.5)
|
54
|
+
rack (~> 1.2)
|
55
|
+
rack (1.6.0)
|
56
|
+
rake (10.4.2)
|
57
|
+
rdoc (4.2.0)
|
58
|
+
json (~> 1.4)
|
59
|
+
rspec (3.2.0)
|
60
|
+
rspec-core (~> 3.2.0)
|
61
|
+
rspec-expectations (~> 3.2.0)
|
62
|
+
rspec-mocks (~> 3.2.0)
|
63
|
+
rspec-core (3.2.1)
|
64
|
+
rspec-support (~> 3.2.0)
|
65
|
+
rspec-expectations (3.2.0)
|
66
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
67
|
+
rspec-support (~> 3.2.0)
|
68
|
+
rspec-mocks (3.2.1)
|
69
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
70
|
+
rspec-support (~> 3.2.0)
|
71
|
+
rspec-support (3.2.2)
|
72
|
+
simplecov (0.9.2)
|
73
|
+
docile (~> 1.1.0)
|
74
|
+
multi_json (~> 1.0)
|
75
|
+
simplecov-html (~> 0.9.0)
|
76
|
+
simplecov-html (0.9.0)
|
77
|
+
thread_safe (0.3.4)
|
78
|
+
tzinfo (1.2.2)
|
79
|
+
thread_safe (~> 0.1)
|
80
|
+
|
81
|
+
PLATFORMS
|
82
|
+
ruby
|
83
|
+
|
84
|
+
DEPENDENCIES
|
85
|
+
activesupport
|
86
|
+
bundler
|
87
|
+
colorize
|
88
|
+
jeweler
|
89
|
+
rspec
|
90
|
+
simplecov
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
GNU LESSER GENERAL PUBLIC LICENSE
|
2
|
+
Version 3, 29 June 2007
|
3
|
+
|
4
|
+
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
5
|
+
Everyone is permitted to copy and distribute verbatim copies
|
6
|
+
of this license document, but changing it is not allowed.
|
7
|
+
|
8
|
+
|
9
|
+
This version of the GNU Lesser General Public License incorporates
|
10
|
+
the terms and conditions of version 3 of the GNU General Public
|
11
|
+
License, supplemented by the additional permissions listed below.
|
12
|
+
|
13
|
+
0. Additional Definitions.
|
14
|
+
|
15
|
+
As used herein, "this License" refers to version 3 of the GNU Lesser
|
16
|
+
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
17
|
+
General Public License.
|
18
|
+
|
19
|
+
"The Library" refers to a covered work governed by this License,
|
20
|
+
other than an Application or a Combined Work as defined below.
|
21
|
+
|
22
|
+
An "Application" is any work that makes use of an interface provided
|
23
|
+
by the Library, but which is not otherwise based on the Library.
|
24
|
+
Defining a subclass of a class defined by the Library is deemed a mode
|
25
|
+
of using an interface provided by the Library.
|
26
|
+
|
27
|
+
A "Combined Work" is a work produced by combining or linking an
|
28
|
+
Application with the Library. The particular version of the Library
|
29
|
+
with which the Combined Work was made is also called the "Linked
|
30
|
+
Version".
|
31
|
+
|
32
|
+
The "Minimal Corresponding Source" for a Combined Work means the
|
33
|
+
Corresponding Source for the Combined Work, excluding any source code
|
34
|
+
for portions of the Combined Work that, considered in isolation, are
|
35
|
+
based on the Application, and not on the Linked Version.
|
36
|
+
|
37
|
+
The "Corresponding Application Code" for a Combined Work means the
|
38
|
+
object code and/or source code for the Application, including any data
|
39
|
+
and utility programs needed for reproducing the Combined Work from the
|
40
|
+
Application, but excluding the System Libraries of the Combined Work.
|
41
|
+
|
42
|
+
1. Exception to Section 3 of the GNU GPL.
|
43
|
+
|
44
|
+
You may convey a covered work under sections 3 and 4 of this License
|
45
|
+
without being bound by section 3 of the GNU GPL.
|
46
|
+
|
47
|
+
2. Conveying Modified Versions.
|
48
|
+
|
49
|
+
If you modify a copy of the Library, and, in your modifications, a
|
50
|
+
facility refers to a function or data to be supplied by an Application
|
51
|
+
that uses the facility (other than as an argument passed when the
|
52
|
+
facility is invoked), then you may convey a copy of the modified
|
53
|
+
version:
|
54
|
+
|
55
|
+
a) under this License, provided that you make a good faith effort to
|
56
|
+
ensure that, in the event an Application does not supply the
|
57
|
+
function or data, the facility still operates, and performs
|
58
|
+
whatever part of its purpose remains meaningful, or
|
59
|
+
|
60
|
+
b) under the GNU GPL, with none of the additional permissions of
|
61
|
+
this License applicable to that copy.
|
62
|
+
|
63
|
+
3. Object Code Incorporating Material from Library Header Files.
|
64
|
+
|
65
|
+
The object code form of an Application may incorporate material from
|
66
|
+
a header file that is part of the Library. You may convey such object
|
67
|
+
code under terms of your choice, provided that, if the incorporated
|
68
|
+
material is not limited to numerical parameters, data structure
|
69
|
+
layouts and accessors, or small macros, inline functions and templates
|
70
|
+
(ten or fewer lines in length), you do both of the following:
|
71
|
+
|
72
|
+
a) Give prominent notice with each copy of the object code that the
|
73
|
+
Library is used in it and that the Library and its use are
|
74
|
+
covered by this License.
|
75
|
+
|
76
|
+
b) Accompany the object code with a copy of the GNU GPL and this license
|
77
|
+
document.
|
78
|
+
|
79
|
+
4. Combined Works.
|
80
|
+
|
81
|
+
You may convey a Combined Work under terms of your choice that,
|
82
|
+
taken together, effectively do not restrict modification of the
|
83
|
+
portions of the Library contained in the Combined Work and reverse
|
84
|
+
engineering for debugging such modifications, if you also do each of
|
85
|
+
the following:
|
86
|
+
|
87
|
+
a) Give prominent notice with each copy of the Combined Work that
|
88
|
+
the Library is used in it and that the Library and its use are
|
89
|
+
covered by this License.
|
90
|
+
|
91
|
+
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
92
|
+
document.
|
93
|
+
|
94
|
+
c) For a Combined Work that displays copyright notices during
|
95
|
+
execution, include the copyright notice for the Library among
|
96
|
+
these notices, as well as a reference directing the user to the
|
97
|
+
copies of the GNU GPL and this license document.
|
98
|
+
|
99
|
+
d) Do one of the following:
|
100
|
+
|
101
|
+
0) Convey the Minimal Corresponding Source under the terms of this
|
102
|
+
License, and the Corresponding Application Code in a form
|
103
|
+
suitable for, and under terms that permit, the user to
|
104
|
+
recombine or relink the Application with a modified version of
|
105
|
+
the Linked Version to produce a modified Combined Work, in the
|
106
|
+
manner specified by section 6 of the GNU GPL for conveying
|
107
|
+
Corresponding Source.
|
108
|
+
|
109
|
+
1) Use a suitable shared library mechanism for linking with the
|
110
|
+
Library. A suitable mechanism is one that (a) uses at run time
|
111
|
+
a copy of the Library already present on the user's computer
|
112
|
+
system, and (b) will operate properly with a modified version
|
113
|
+
of the Library that is interface-compatible with the Linked
|
114
|
+
Version.
|
115
|
+
|
116
|
+
e) Provide Installation Information, but only if you would otherwise
|
117
|
+
be required to provide such information under section 6 of the
|
118
|
+
GNU GPL, and only to the extent that such information is
|
119
|
+
necessary to install and execute a modified version of the
|
120
|
+
Combined Work produced by recombining or relinking the
|
121
|
+
Application with a modified version of the Linked Version. (If
|
122
|
+
you use option 4d0, the Installation Information must accompany
|
123
|
+
the Minimal Corresponding Source and Corresponding Application
|
124
|
+
Code. If you use option 4d1, you must provide the Installation
|
125
|
+
Information in the manner specified by section 6 of the GNU GPL
|
126
|
+
for conveying Corresponding Source.)
|
127
|
+
|
128
|
+
5. Combined Libraries.
|
129
|
+
|
130
|
+
You may place library facilities that are a work based on the
|
131
|
+
Library side by side in a single library together with other library
|
132
|
+
facilities that are not Applications and are not covered by this
|
133
|
+
License, and convey such a combined library under terms of your
|
134
|
+
choice, if you do both of the following:
|
135
|
+
|
136
|
+
a) Accompany the combined library with a copy of the same work based
|
137
|
+
on the Library, uncombined with any other library facilities,
|
138
|
+
conveyed under the terms of this License.
|
139
|
+
|
140
|
+
b) Give prominent notice with the combined library that part of it
|
141
|
+
is a work based on the Library, and explaining where to find the
|
142
|
+
accompanying uncombined form of the same work.
|
143
|
+
|
144
|
+
6. Revised Versions of the GNU Lesser General Public License.
|
145
|
+
|
146
|
+
The Free Software Foundation may publish revised and/or new versions
|
147
|
+
of the GNU Lesser General Public License from time to time. Such new
|
148
|
+
versions will be similar in spirit to the present version, but may
|
149
|
+
differ in detail to address new problems or concerns.
|
150
|
+
|
151
|
+
Each version is given a distinguishing version number. If the
|
152
|
+
Library as you received it specifies that a certain numbered version
|
153
|
+
of the GNU Lesser General Public License "or any later version"
|
154
|
+
applies to it, you have the option of following the terms and
|
155
|
+
conditions either of that published version or of any later version
|
156
|
+
published by the Free Software Foundation. If the Library as you
|
157
|
+
received it does not specify a version number of the GNU Lesser
|
158
|
+
General Public License, you may choose any version of the GNU Lesser
|
159
|
+
General Public License ever published by the Free Software Foundation.
|
160
|
+
|
161
|
+
If the Library as you received it specifies that a proxy can decide
|
162
|
+
whether future versions of the GNU Lesser General Public License shall
|
163
|
+
apply, that proxy's public statement of acceptance of any version is
|
164
|
+
permanent authorization for you to choose that version for the
|
165
|
+
Library.
|
data/README.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
Pedophile
|
2
|
+
=========
|
3
|
+
|
4
|
+
Download static web pages.
|
5
|
+
|
6
|
+
Sample usage
|
7
|
+
------------
|
8
|
+
|
9
|
+
<pre><code>
|
10
|
+
p = Pedophile::Downloader.new
|
11
|
+
|
12
|
+
p.url = "http://www.classnamer.com/"
|
13
|
+
|
14
|
+
# clear tmp directory
|
15
|
+
p.wget.clear!
|
16
|
+
|
17
|
+
# sign in using devise like form
|
18
|
+
#p.login.devise_login("http://www.classnamer.com/login", "email@email.com", "password")
|
19
|
+
|
20
|
+
# download, process
|
21
|
+
p.make_it_so
|
22
|
+
|
23
|
+
# zip into single file in tmp/site/site.zip
|
24
|
+
p.zip("site.zip")
|
25
|
+
</code></pre>
|
data/Rakefile
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "pedophile"
|
18
|
+
gem.homepage = "http://github.com/akwiatkowski/pedophile"
|
19
|
+
gem.license = "LGPLv3"
|
20
|
+
gem.summary = %Q{download static pages for offline usage}
|
21
|
+
gem.description = %Q{download static pages for offline usage.}
|
22
|
+
gem.email = "bobikx@poczta.fm"
|
23
|
+
gem.authors = ["Aleksander Kwiatkowski"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
|
26
|
+
gem.files = FileList[
|
27
|
+
"[A-Z]*", "{bin,generators,lib,test}/**/*"
|
28
|
+
]
|
29
|
+
end
|
30
|
+
Jeweler::RubygemsDotOrgTasks.new
|
31
|
+
|
32
|
+
require 'rspec/core'
|
33
|
+
require 'rspec/core/rake_task'
|
34
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
35
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
36
|
+
end
|
37
|
+
|
38
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
39
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
40
|
+
spec.rcov = true
|
41
|
+
end
|
42
|
+
|
43
|
+
task :default => :spec
|
44
|
+
|
45
|
+
require 'rdoc/task'
|
46
|
+
|
47
|
+
desc "Run RSpec with code coverage"
|
48
|
+
task :coverage do
|
49
|
+
`rake spec COVERAGE=true`
|
50
|
+
#`open coverage/index.html`
|
51
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.0
|
data/lib/pedophile.rb
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
module Pedophile
|
5
|
+
class BigFiles
|
6
|
+
USE_MIME = false
|
7
|
+
TMP_STRUCTURE_PATH = File.absolute_path(File.join(Wget::TMP_PATH, "big_files.yaml"))
|
8
|
+
|
9
|
+
def initialize(downloader)
|
10
|
+
@downloader = downloader
|
11
|
+
@files = Array.new
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_reader :downloader, :full_path, :files, :files_path
|
15
|
+
|
16
|
+
def offline_path
|
17
|
+
self.downloader.wget.offline_path
|
18
|
+
end
|
19
|
+
|
20
|
+
def copy_folder(path)
|
21
|
+
puts "copying big files path #{path.to_s.cyan}"
|
22
|
+
FileUtils.cp_r(path, offline_path)
|
23
|
+
puts "done copying path #{path.to_s.cyan}"
|
24
|
+
big_files_path = path
|
25
|
+
end
|
26
|
+
|
27
|
+
def big_files_path=(path)
|
28
|
+
@files_path = path
|
29
|
+
@full_path = File.join(offline_path, path)
|
30
|
+
end
|
31
|
+
|
32
|
+
def analyze
|
33
|
+
glob_path = "#{full_path}/**/**"
|
34
|
+
puts "big files path #{full_path.to_s.cyan}"
|
35
|
+
|
36
|
+
Dir.glob(glob_path) do |item|
|
37
|
+
next if item == '.' or item == '..' or File.directory?(item)
|
38
|
+
|
39
|
+
puts "analyze file #{item.to_s.yellow}"
|
40
|
+
|
41
|
+
h = Hash.new
|
42
|
+
h[:path] = item
|
43
|
+
|
44
|
+
if USE_MIME
|
45
|
+
mime = `file --mime #{item}`
|
46
|
+
if mime =~ /(\w+\/\w+);/
|
47
|
+
mime = $1
|
48
|
+
else
|
49
|
+
mime = nil
|
50
|
+
end
|
51
|
+
h[:mime] = mime
|
52
|
+
end
|
53
|
+
|
54
|
+
@files << h
|
55
|
+
end
|
56
|
+
|
57
|
+
save_analyzed
|
58
|
+
end
|
59
|
+
|
60
|
+
def save_analyzed
|
61
|
+
f = File.new(TMP_STRUCTURE_PATH, "w")
|
62
|
+
f.puts @files.to_yaml
|
63
|
+
f.close
|
64
|
+
end
|
65
|
+
|
66
|
+
def load_analyzed
|
67
|
+
@files = YAML.load_file(TMP_STRUCTURE_PATH)
|
68
|
+
end
|
69
|
+
|
70
|
+
def gsub_links
|
71
|
+
files.each do |f|
|
72
|
+
file_path = f[:path].clone
|
73
|
+
smaller_path = file_path.gsub(full_path, "")
|
74
|
+
smaller_path.gsub!(/^\//, '')
|
75
|
+
|
76
|
+
gsub_big_file(smaller_path)
|
77
|
+
f[:done] = true
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def gsub_big_file(smaller_path)
|
82
|
+
puts "process big file #{smaller_path.to_s.green}"
|
83
|
+
|
84
|
+
self.downloader.offline_tree.files.each do |f|
|
85
|
+
if f[:inside]
|
86
|
+
to_rename = f[:inside].select do |fi|
|
87
|
+
fi[:path].index(smaller_path)
|
88
|
+
end
|
89
|
+
|
90
|
+
# TODO gsub path issue with html files inside
|
91
|
+
to_rename.each do |fi|
|
92
|
+
original_string = fi[:path]
|
93
|
+
new_string = File.join(files_path, smaller_path)
|
94
|
+
|
95
|
+
puts "rename big file #{original_string.to_s.blue} to #{new_string.to_s.green}"
|
96
|
+
|
97
|
+
self.downloader.offline_tree.process_massive_gsub(original_string, new_string, true)
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'active_support/all'
|
2
|
+
|
3
|
+
module Pedophile
|
4
|
+
class Downloader
|
5
|
+
def initialize
|
6
|
+
@login = Login.new(self)
|
7
|
+
@wget = Wget.new(self)
|
8
|
+
@offline_tree = OfflineTree.new(self)
|
9
|
+
@big_files = BigFiles.new(self)
|
10
|
+
end
|
11
|
+
|
12
|
+
attr_reader :login, :wget, :offline_tree, :big_files
|
13
|
+
attr_accessor :url
|
14
|
+
|
15
|
+
def make_it_so
|
16
|
+
wget.mirror
|
17
|
+
offline_tree.make_it_so
|
18
|
+
end
|
19
|
+
|
20
|
+
def zip(name = "site.zip")
|
21
|
+
offline_tree.zip(name)
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'colorize'
|
3
|
+
|
4
|
+
module Pedophile
|
5
|
+
class Login
|
6
|
+
|
7
|
+
attr_reader :downloader
|
8
|
+
|
9
|
+
def initialize(downloader)
|
10
|
+
@downloader = downloader
|
11
|
+
end
|
12
|
+
|
13
|
+
def devise_login(url, email, password)
|
14
|
+
uri = URI.parse(url)
|
15
|
+
|
16
|
+
string = @downloader.wget.download(url)
|
17
|
+
|
18
|
+
token = nil
|
19
|
+
if string =~ /<input name=\"authenticity_token\" type=\"hidden\" value=\"([^"]+)\" \/>/
|
20
|
+
token = $1
|
21
|
+
puts "got devise token #{token.to_s.blue}"
|
22
|
+
end
|
23
|
+
|
24
|
+
action_url = nil
|
25
|
+
if string =~ /action=\"([^"]+)\"/
|
26
|
+
action_url = $1
|
27
|
+
puts "got action url #{action_url.to_s.blue}"
|
28
|
+
end
|
29
|
+
|
30
|
+
sign_url = "http://#{uri.host}#{action_url}"
|
31
|
+
puts "sign action url #{sign_url.to_s.blue}"
|
32
|
+
|
33
|
+
post_params = {
|
34
|
+
"authenticity_token" => token,
|
35
|
+
"user" => {
|
36
|
+
"email" => email,
|
37
|
+
"password" => password,
|
38
|
+
"remember_me" => 1
|
39
|
+
}
|
40
|
+
}
|
41
|
+
post_params = {
|
42
|
+
"utf8"=>"✓",
|
43
|
+
"authenticity_token" => token,
|
44
|
+
"user[email]" => email,
|
45
|
+
"user[password]" => password,
|
46
|
+
"user[remember_me]" => 1
|
47
|
+
}
|
48
|
+
|
49
|
+
string = @downloader.wget.post(url, post_params)
|
50
|
+
string
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,307 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
module Pedophile
|
5
|
+
class OfflineTree
|
6
|
+
TMP_STRUCTURE_PATH = File.absolute_path(File.join(Wget::TMP_PATH, "files.yaml"))
|
7
|
+
TMP_CHANGES_PATH = File.absolute_path(File.join(Wget::TMP_PATH, "changes.yaml"))
|
8
|
+
FIX_RELATIVE_PATH = false
|
9
|
+
|
10
|
+
def initialize(downloader)
|
11
|
+
@downloader = downloader
|
12
|
+
@files = Array.new
|
13
|
+
@changes = Array.new
|
14
|
+
end
|
15
|
+
|
16
|
+
attr_reader :downloader, :files
|
17
|
+
|
18
|
+
def make_it_so
|
19
|
+
analyze
|
20
|
+
load_analyzed
|
21
|
+
|
22
|
+
process_bad_suffix1
|
23
|
+
process_bad_suffix2
|
24
|
+
process_bad_filenames
|
25
|
+
save_analyzed
|
26
|
+
save_changes
|
27
|
+
end
|
28
|
+
|
29
|
+
def zip(output_file = 'site.zip')
|
30
|
+
command = "cd #{Wget::TMP_OFFLINE_PATH}; zip -r #{output_file} #{self.downloader.wget.site_last_path}"
|
31
|
+
puts command
|
32
|
+
`#{command}`
|
33
|
+
end
|
34
|
+
|
35
|
+
# Desctructive part
|
36
|
+
def after_process
|
37
|
+
load_processed
|
38
|
+
remove_bad_suffix
|
39
|
+
rename_files
|
40
|
+
end
|
41
|
+
|
42
|
+
def path
|
43
|
+
@path ||= self.downloader.wget.offline_path
|
44
|
+
@path
|
45
|
+
end
|
46
|
+
|
47
|
+
def analyze
|
48
|
+
# because I don't want to read all wget options...
|
49
|
+
glob_path = "#{path}/**/**"
|
50
|
+
puts "offline path #{path.to_s.cyan}"
|
51
|
+
|
52
|
+
Dir.glob(glob_path) do |item|
|
53
|
+
next if item == '.' or item == '..' or File.directory?(item)
|
54
|
+
|
55
|
+
puts "analyze file #{item.to_s.yellow}"
|
56
|
+
|
57
|
+
h = Hash.new
|
58
|
+
h[:path] = item
|
59
|
+
|
60
|
+
mime = `file --mime #{item}`
|
61
|
+
if mime =~ /(\w+\/\w+);/
|
62
|
+
mime = $1
|
63
|
+
else
|
64
|
+
mime = nil
|
65
|
+
end
|
66
|
+
|
67
|
+
h[:mime] = mime
|
68
|
+
|
69
|
+
if mime == 'text/html' or mime == 'text/plain'
|
70
|
+
h[:inside] = analyze_file(item)
|
71
|
+
end
|
72
|
+
|
73
|
+
@files << h
|
74
|
+
end
|
75
|
+
|
76
|
+
save_analyzed
|
77
|
+
end
|
78
|
+
|
79
|
+
def save_analyzed
|
80
|
+
f = File.new(TMP_STRUCTURE_PATH, "w")
|
81
|
+
f.puts @files.to_yaml
|
82
|
+
f.close
|
83
|
+
end
|
84
|
+
|
85
|
+
def save_changes
|
86
|
+
f = File.new(TMP_CHANGES_PATH, "w")
|
87
|
+
f.puts @changes.to_yaml
|
88
|
+
f.close
|
89
|
+
end
|
90
|
+
|
91
|
+
def load_analyzed
|
92
|
+
@files = YAML.load_file(TMP_STRUCTURE_PATH)
|
93
|
+
end
|
94
|
+
|
95
|
+
def analyze_file(file)
|
96
|
+
s = File.read(file)
|
97
|
+
|
98
|
+
possible_paths_regexp = /"([^"]+)"/
|
99
|
+
possible_paths = s.scan(possible_paths_regexp).flatten.uniq
|
100
|
+
|
101
|
+
possible_paths_regexp = /'([^']+)'/
|
102
|
+
possible_paths += s.scan(possible_paths_regexp).flatten.uniq
|
103
|
+
|
104
|
+
relative_file_path = File.dirname(file)
|
105
|
+
|
106
|
+
paths = Array.new
|
107
|
+
possible_paths.each do |pp|
|
108
|
+
if is_path_ok?(pp)
|
109
|
+
h = Hash.new
|
110
|
+
f = File.join(relative_file_path, pp)
|
111
|
+
h[:exists] = File.exists?(f)
|
112
|
+
h[:is_file] = File.file?(f)
|
113
|
+
h[:path] = pp
|
114
|
+
|
115
|
+
paths << h if should_add_path?(h)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
paths
|
120
|
+
end
|
121
|
+
|
122
|
+
# TODO - check if this string is correct unix path
|
123
|
+
def is_path_ok?(pp)
|
124
|
+
# pp =~ /\A(?:[0-9a-zA-Z\_\-]+\/?)+\z/
|
125
|
+
pp.size < 200
|
126
|
+
end
|
127
|
+
|
128
|
+
# TODO
|
129
|
+
def should_add_path?(h)
|
130
|
+
return true
|
131
|
+
#return h[:is_file]
|
132
|
+
end
|
133
|
+
|
134
|
+
def base_path
|
135
|
+
@base_path ||= self.downloader.wget.offline_path
|
136
|
+
@base_path
|
137
|
+
end
|
138
|
+
|
139
|
+
# PROCESSING
|
140
|
+
def process_bad_suffix2
|
141
|
+
@files.each do |f|
|
142
|
+
old_file = f[:path]
|
143
|
+
new_file = old_file.gsub(/\?body=1/, '')
|
144
|
+
|
145
|
+
if not new_file == old_file
|
146
|
+
process_rename_file(old_file, new_file)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
process_massive_gsub("%3Fbody=1", "", false)
|
151
|
+
end
|
152
|
+
|
153
|
+
def process_bad_suffix1
|
154
|
+
@files.each do |f|
|
155
|
+
old_file = f[:path]
|
156
|
+
new_file = old_file.gsub(/\?\d+/, '').gsub(/\%3F\d+/, '')
|
157
|
+
|
158
|
+
if not new_file == old_file
|
159
|
+
process_rename_file(old_file, new_file)
|
160
|
+
end
|
161
|
+
|
162
|
+
if f[:inside]
|
163
|
+
f[:inside].each do |fi|
|
164
|
+
old_file = fi[:path]
|
165
|
+
if File.exists?(old_file)
|
166
|
+
new_file = old_file.gsub(/\?\d+/, '').gsub(/\%3F\d+/, '')
|
167
|
+
|
168
|
+
if not new_file == old_file
|
169
|
+
process_rename_file(old_file, new_file)
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
process_massive_gsub(/\%3F\d+/, "", false)
|
178
|
+
end
|
179
|
+
|
180
|
+
def process_bad_filenames
|
181
|
+
@files.each do |f|
|
182
|
+
old_file = f[:path]
|
183
|
+
new_file = old_file.gsub(/[^0-9A-Za-z.\-\/:]/, '_')
|
184
|
+
|
185
|
+
if not new_file == old_file
|
186
|
+
process_rename_file(old_file, new_file)
|
187
|
+
end
|
188
|
+
|
189
|
+
if f[:inside]
|
190
|
+
f[:inside].each do |fi|
|
191
|
+
old_file = fi[:path]
|
192
|
+
if File.exists?(old_file)
|
193
|
+
new_file = old_file.gsub(/[^0-9A-Za-z.\-\/:]/, '_')
|
194
|
+
|
195
|
+
if not new_file == old_file
|
196
|
+
process_rename_file(old_file, new_file)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
#def process_bad_filenames_links
|
205
|
+
# process_massive_gsub(/\%3F/, "_", false)
|
206
|
+
#end
|
207
|
+
|
208
|
+
def process_rename_file(old_file_path, new_file_path)
|
209
|
+
puts "rename from #{old_file_path.to_s.blue} to #{new_file_path.to_s.green}"
|
210
|
+
|
211
|
+
# clone to not allow modify of @files
|
212
|
+
old_file = old_file_path.clone
|
213
|
+
new_file = new_file_path.clone
|
214
|
+
# this will be with full path
|
215
|
+
old_file_with_path = old_file_path.clone
|
216
|
+
|
217
|
+
old_file.gsub!(base_path, '')
|
218
|
+
new_file.gsub!(base_path, '')
|
219
|
+
|
220
|
+
# ignore slashes
|
221
|
+
old_file.gsub!(/^\//, '')
|
222
|
+
new_file.gsub!(/^\//, '')
|
223
|
+
|
224
|
+
# 1. rename 1 file
|
225
|
+
new_file_path = old_file_with_path.gsub(old_file, new_file)
|
226
|
+
File.rename(old_file_with_path, new_file_path)
|
227
|
+
|
228
|
+
# internal log-like
|
229
|
+
@changes << { rename: { old: old_file_with_path, new: new_file_path } }
|
230
|
+
|
231
|
+
# 2. rename in @files
|
232
|
+
@files.each do |f|
|
233
|
+
if f[:path] == old_file_with_path
|
234
|
+
f[:path] = new_file_path
|
235
|
+
end
|
236
|
+
|
237
|
+
if f[:inside]
|
238
|
+
f[:inside].each do |fi|
|
239
|
+
if fi[:path] == old_file_with_path
|
240
|
+
fi[:path] = new_file_path
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# 3. gsub all files
|
247
|
+
# gsub files after renaming
|
248
|
+
process_massive_gsub(old_file, new_file, true)
|
249
|
+
process_massive_gsub(old_file.gsub("?", "%3F"), new_file, true)
|
250
|
+
|
251
|
+
puts "RENAMED #{old_file.to_s.blue} to #{new_file.to_s.green}"
|
252
|
+
end
|
253
|
+
|
254
|
+
def process_massive_gsub(from, to, check_paths = false)
|
255
|
+
puts "massive gsub #{from.to_s.blue} to #{to.to_s.green}"
|
256
|
+
|
257
|
+
@files.each do |f|
|
258
|
+
# must be proper mime before, so not needed to check
|
259
|
+
if f[:inside]
|
260
|
+
file_path = f[:path].clone
|
261
|
+
|
262
|
+
puts " open #{file_path.to_s.red}"
|
263
|
+
|
264
|
+
old_from = from.to_s
|
265
|
+
old_to = to.to_s
|
266
|
+
|
267
|
+
# relative path fix
|
268
|
+
if check_paths and FIX_RELATIVE_PATH
|
269
|
+
absolute_path = File.absolute_path(File.dirname(file_path))
|
270
|
+
first = Pathname.new(absolute_path)
|
271
|
+
|
272
|
+
to_path = File.join(path, to)
|
273
|
+
second = Pathname.new(File.absolute_path(to_path))
|
274
|
+
to = second.relative_path_from(first).to_s
|
275
|
+
end
|
276
|
+
|
277
|
+
exists = File.exists?(file_path)
|
278
|
+
if exists
|
279
|
+
j = File.open(file_path)
|
280
|
+
s = j.read
|
281
|
+
j.close
|
282
|
+
|
283
|
+
# logs
|
284
|
+
if s.index(from)
|
285
|
+
@changes << { gsub: { old: from, new: to, file: file_path, old_from: old_from, old_to: old_to } }
|
286
|
+
end
|
287
|
+
|
288
|
+
s.gsub!(from, to)
|
289
|
+
|
290
|
+
j = File.open(file_path, "w")
|
291
|
+
j.puts(s)
|
292
|
+
j.close
|
293
|
+
|
294
|
+
f[:inside].each do |fi|
|
295
|
+
fi[:path].gsub!(from, to)
|
296
|
+
end
|
297
|
+
|
298
|
+
puts " done #{file_path.to_s.red}"
|
299
|
+
else
|
300
|
+
raise "file #{file_path} not found"
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
end
|
307
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'active_support/all'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
module Pedophile
|
5
|
+
class Wget
|
6
|
+
TMP_PATH = "tmp"
|
7
|
+
TMP_ABSOLUTE_PATH = File.absolute_path(TMP_PATH)
|
8
|
+
TMP_FILE_PATH = File.absolute_path(File.join(TMP_PATH, "tmp.tmp"))
|
9
|
+
COOKIES_FILE_PATH = File.absolute_path(File.join(TMP_PATH, "cookies.txt"))
|
10
|
+
TMP_OFFLINE_PATH = File.join(TMP_PATH, "site")
|
11
|
+
|
12
|
+
WGET_PARAMS = "-v --random-wait --user-agent=Mozilla/5.0 --keep-session-cookies --load-cookies #{COOKIES_FILE_PATH} --save-cookies #{COOKIES_FILE_PATH}"
|
13
|
+
# http://www.gnu.org/software/wget/manual/html_node/Download-Options.html
|
14
|
+
#WGET_RESTRICT_FILE_NAMES = "windows" # windows, ascii, unix
|
15
|
+
WGET_RESTRICT_FILE_NAMES = "unix"
|
16
|
+
WGET_MIRROR_PARAMS = "--adjust-extension --mirror --page-requisites --convert-links --restrict-file-names=#{WGET_RESTRICT_FILE_NAMES}"
|
17
|
+
|
18
|
+
def initialize(downloader)
|
19
|
+
@downloader = downloader
|
20
|
+
prepare_tmp_path
|
21
|
+
end
|
22
|
+
|
23
|
+
attr_reader :downloader
|
24
|
+
|
25
|
+
def prepare_tmp_path
|
26
|
+
Dir.mkdir(TMP_PATH) unless File.exists?(TMP_PATH)
|
27
|
+
Dir.mkdir(TMP_OFFLINE_PATH) unless File.exists?(TMP_OFFLINE_PATH)
|
28
|
+
end
|
29
|
+
|
30
|
+
def download(url)
|
31
|
+
`wget #{WGET_PARAMS} #{url} -O#{TMP_FILE_PATH}`
|
32
|
+
File.open(TMP_FILE_PATH).read
|
33
|
+
end
|
34
|
+
|
35
|
+
def post(url, params)
|
36
|
+
post_data = params.to_query
|
37
|
+
`wget #{WGET_PARAMS} #{url} --post-data '#{post_data}' -O#{TMP_FILE_PATH}`
|
38
|
+
File.open(TMP_FILE_PATH).read
|
39
|
+
end
|
40
|
+
|
41
|
+
def mirror
|
42
|
+
`cd #{TMP_OFFLINE_PATH}; wget #{WGET_PARAMS} #{WGET_MIRROR_PARAMS} #{self.downloader.url}`
|
43
|
+
end
|
44
|
+
|
45
|
+
def clear!
|
46
|
+
FileUtils.rm_rf(TMP_ABSOLUTE_PATH)
|
47
|
+
prepare_tmp_path
|
48
|
+
end
|
49
|
+
|
50
|
+
def site_last_path
|
51
|
+
(Dir.entries(Wget::TMP_OFFLINE_PATH) - ["..", "."]).first
|
52
|
+
end
|
53
|
+
|
54
|
+
def offline_path
|
55
|
+
File.join(TMP_OFFLINE_PATH, site_last_path)
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
metadata
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pedophile
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Aleksander Kwiatkowski
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-03-16 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: colorize
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bundler
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: jeweler
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: simplecov
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: download static pages for offline usage.
|
98
|
+
email: bobikx@poczta.fm
|
99
|
+
executables: []
|
100
|
+
extensions: []
|
101
|
+
extra_rdoc_files:
|
102
|
+
- LICENSE.txt
|
103
|
+
- README.md
|
104
|
+
files:
|
105
|
+
- Gemfile
|
106
|
+
- Gemfile.lock
|
107
|
+
- LICENSE.txt
|
108
|
+
- README.md
|
109
|
+
- Rakefile
|
110
|
+
- VERSION
|
111
|
+
- lib/pedophile.rb
|
112
|
+
- lib/pedophile/big_files.rb
|
113
|
+
- lib/pedophile/downloader.rb
|
114
|
+
- lib/pedophile/login.rb
|
115
|
+
- lib/pedophile/offline_tree.rb
|
116
|
+
- lib/pedophile/wget.rb
|
117
|
+
homepage: http://github.com/akwiatkowski/pedophile
|
118
|
+
licenses:
|
119
|
+
- LGPLv3
|
120
|
+
metadata: {}
|
121
|
+
post_install_message:
|
122
|
+
rdoc_options: []
|
123
|
+
require_paths:
|
124
|
+
- lib
|
125
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - ">="
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0'
|
130
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
131
|
+
requirements:
|
132
|
+
- - ">="
|
133
|
+
- !ruby/object:Gem::Version
|
134
|
+
version: '0'
|
135
|
+
requirements: []
|
136
|
+
rubyforge_project:
|
137
|
+
rubygems_version: 2.2.2
|
138
|
+
signing_key:
|
139
|
+
specification_version: 4
|
140
|
+
summary: download static pages for offline usage
|
141
|
+
test_files: []
|