justflow 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/justflow +12 -0
- data/lib/justflow.rb +179 -0
- metadata +46 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6935ba30de3bac6f28a87fcecd09c428818548c0
|
4
|
+
data.tar.gz: 2b65dd78ba571d21e790391c469fd825f1385bd8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 98c8f5c394d82f70a02829615e3bf001ca1b4b8b70d2e146373da9fe9d45338aed8ba0715cc03eb441b0c1dd1622d57a08f2ee8aa166a8ab0037e498f654e32b
|
7
|
+
data.tar.gz: f4a07631856cf67823471f5d4958620842e30b78a1503916979746f48aa74abf9ffc2b29f084b2e9622dd3ffd58a03a047ee83378dbb29fed3923aef6efacc4a
|
data/bin/justflow
ADDED
data/lib/justflow.rb
ADDED
@@ -0,0 +1,179 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'net/http'
|
4
|
+
require 'colorize'
|
5
|
+
|
6
|
+
module JustFlow
|
7
|
+
module_function
|
8
|
+
|
9
|
+
def convert(url)
|
10
|
+
puts "Converting".yellow + " #{url}..."
|
11
|
+
@url = url
|
12
|
+
url_parsed = URI.parse(url)
|
13
|
+
url_parsed2 = url_parsed.host.to_s + url_parsed.path.to_s + url_parsed.query.to_s
|
14
|
+
target_dir = url_parsed2.gsub(/[\x00\/\\:\*\?\"<>\|]/, '_')
|
15
|
+
ensure_mkdir(target_dir)
|
16
|
+
Dir.chdir(target_dir)
|
17
|
+
@doc = Nokogiri::HTML(open(url))
|
18
|
+
|
19
|
+
get_scripts()
|
20
|
+
get_css()
|
21
|
+
get_images()
|
22
|
+
|
23
|
+
File.open('index.html', 'wb') { |file|
|
24
|
+
file.write(@doc)
|
25
|
+
}
|
26
|
+
puts "We done.".green
|
27
|
+
end
|
28
|
+
|
29
|
+
def is_img?(extension)
|
30
|
+
img_exts = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.tif', '.tiff']
|
31
|
+
img_exts.include?(extension.downcase)
|
32
|
+
end
|
33
|
+
|
34
|
+
def is_font?(extension)
|
35
|
+
return !is_img?(extension)
|
36
|
+
end
|
37
|
+
|
38
|
+
def fix_uri(url, uri)
|
39
|
+
uri = uri.strip
|
40
|
+
if uri.start_with?('//')
|
41
|
+
uri = 'http:' + uri
|
42
|
+
else # relative or absolute
|
43
|
+
begin
|
44
|
+
uri = URI.join(url, uri).to_s
|
45
|
+
rescue Exception => ex
|
46
|
+
puts "x".red + " Will try to download anyway. #{ex}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
return uri
|
50
|
+
end
|
51
|
+
|
52
|
+
def valid_uri_scheme?(uri)
|
53
|
+
uri.start_with?('http') || uri.start_with?('https')
|
54
|
+
end
|
55
|
+
|
56
|
+
def get_contents(uri)
|
57
|
+
uri_parsed = URI.parse(uri)
|
58
|
+
Net::HTTP.get_response(uri_parsed)
|
59
|
+
end
|
60
|
+
|
61
|
+
def ensure_mkdir(dirname)
|
62
|
+
if !File.directory?(dirname)
|
63
|
+
Dir.mkdir dirname
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def save_contents(resp, save_path)
|
68
|
+
if File.file? save_path
|
69
|
+
extension = File.extname(save_path)
|
70
|
+
basename = File.basename(save_path)
|
71
|
+
filename = File.basename(save_path, extension)
|
72
|
+
if (!is_img?(extension))
|
73
|
+
save_path = save_path.gsub(filename, filename + "_" + Time.now.to_i.to_s)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
File.open(save_path, 'wb') { |file|
|
77
|
+
file.write(resp.body)
|
78
|
+
}
|
79
|
+
return save_path
|
80
|
+
end
|
81
|
+
|
82
|
+
def remove_args(url)
|
83
|
+
url[/[^\?]+/]
|
84
|
+
end
|
85
|
+
|
86
|
+
def download_resource(selector, source_attr, out_path)
|
87
|
+
resources = @doc.search(selector)
|
88
|
+
resources.each { |resource|
|
89
|
+
resource_uri = resource[source_attr]
|
90
|
+
|
91
|
+
begin
|
92
|
+
resource_uri = fix_uri(@url, resource_uri)
|
93
|
+
rescue
|
94
|
+
puts "URI is funky. Going for it anyway... #{resource_uri}".red
|
95
|
+
end
|
96
|
+
|
97
|
+
save_path = File.join(out_path, File.basename(resource_uri))
|
98
|
+
save_path = remove_args(save_path)
|
99
|
+
|
100
|
+
begin
|
101
|
+
puts "✓".green + " Downloading ... " + resource_uri
|
102
|
+
resp = get_contents(resource_uri)
|
103
|
+
ensure_mkdir(out_path)
|
104
|
+
save_path = save_contents(resp, save_path)
|
105
|
+
resource[source_attr] = save_path
|
106
|
+
rescue Exception => ex
|
107
|
+
puts "✗".red + " FAIL. Couldn't do it: #{ex}"
|
108
|
+
end
|
109
|
+
|
110
|
+
}
|
111
|
+
end
|
112
|
+
|
113
|
+
def process_css_urls(css_source, original_css_url)
|
114
|
+
url_regex = /url\(['"]?(.*?)['"]?\)/i
|
115
|
+
|
116
|
+
if original_css_url.start_with?('//')
|
117
|
+
original_css_url = 'http:' + original_css_url
|
118
|
+
elsif original_css_url.start_with?('.') || original_css_url.start_with?('/')
|
119
|
+
original_css_url = fix_uri(@url, original_css_url)
|
120
|
+
end
|
121
|
+
|
122
|
+
puts ">".yellow + " Parsing css ... #{original_css_url}"
|
123
|
+
|
124
|
+
css_source = css_source.gsub(url_regex) {
|
125
|
+
original_item_url = $1
|
126
|
+
absolute_item_url = fix_uri(original_css_url, original_item_url)
|
127
|
+
original_item_url = remove_args(original_item_url)
|
128
|
+
|
129
|
+
extension = File.extname(original_item_url)
|
130
|
+
basename = File.basename(original_item_url)
|
131
|
+
|
132
|
+
out_dir = is_img?(extension) ? 'img' : 'fonts'
|
133
|
+
ensure_mkdir(out_dir)
|
134
|
+
out_path = File.join(out_dir, basename)
|
135
|
+
|
136
|
+
begin
|
137
|
+
resp = get_contents(absolute_item_url)
|
138
|
+
ensure_mkdir(out_dir)
|
139
|
+
save_contents(resp, out_path)
|
140
|
+
rescue Exception => ex
|
141
|
+
puts "Failed. Couldnt download from CSS: #{ex}".red
|
142
|
+
end
|
143
|
+
|
144
|
+
"url('#{File.join('..', out_path)}')"
|
145
|
+
}
|
146
|
+
|
147
|
+
return css_source
|
148
|
+
end
|
149
|
+
|
150
|
+
def get_images()
|
151
|
+
download_resource('img[src]', 'src', 'img')
|
152
|
+
end
|
153
|
+
|
154
|
+
def get_scripts()
|
155
|
+
download_resource('script[src]', 'src', 'js')
|
156
|
+
end
|
157
|
+
|
158
|
+
def get_css()
|
159
|
+
cloned_doc = @doc.clone
|
160
|
+
orig_link_tags = cloned_doc.search('link[rel=stylesheet]')
|
161
|
+
download_resource('link[rel=stylesheet]', 'href', 'css')
|
162
|
+
link_tags = @doc.search('link[rel=stylesheet]')
|
163
|
+
link_tags.each_with_index { |link, idx|
|
164
|
+
|
165
|
+
original_css_path = orig_link_tags[idx]['href']
|
166
|
+
local_css_path = link['href']
|
167
|
+
|
168
|
+
if File.exists?(local_css_path)
|
169
|
+
src = ""
|
170
|
+
File.open(local_css_path, 'r') { |file|
|
171
|
+
src = process_css_urls(file.read(), original_css_path)
|
172
|
+
}
|
173
|
+
File.open(local_css_path, 'w') { |file|
|
174
|
+
file.write(src)
|
175
|
+
}
|
176
|
+
end
|
177
|
+
}
|
178
|
+
end
|
179
|
+
end
|
metadata
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: justflow
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Eddie Flores
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-03-06 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Fetches web resources and puts them into folders.
|
14
|
+
email: eddflrs@gmail.com
|
15
|
+
executables:
|
16
|
+
- justflow
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- bin/justflow
|
21
|
+
- lib/justflow.rb
|
22
|
+
homepage:
|
23
|
+
licenses:
|
24
|
+
- Private
|
25
|
+
metadata: {}
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
requirements: []
|
41
|
+
rubyforge_project:
|
42
|
+
rubygems_version: 2.2.2
|
43
|
+
signing_key:
|
44
|
+
specification_version: 4
|
45
|
+
summary: Gets you ridin
|
46
|
+
test_files: []
|