justflow 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/justflow +12 -0
- data/lib/justflow.rb +179 -0
- metadata +46 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6935ba30de3bac6f28a87fcecd09c428818548c0
|
4
|
+
data.tar.gz: 2b65dd78ba571d21e790391c469fd825f1385bd8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 98c8f5c394d82f70a02829615e3bf001ca1b4b8b70d2e146373da9fe9d45338aed8ba0715cc03eb441b0c1dd1622d57a08f2ee8aa166a8ab0037e498f654e32b
|
7
|
+
data.tar.gz: f4a07631856cf67823471f5d4958620842e30b78a1503916979746f48aa74abf9ffc2b29f084b2e9622dd3ffd58a03a047ee83378dbb29fed3923aef6efacc4a
|
data/bin/justflow
ADDED
data/lib/justflow.rb
ADDED
@@ -0,0 +1,179 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'net/http'
|
4
|
+
require 'colorize'
|
5
|
+
|
6
|
+
module JustFlow
|
7
|
+
module_function
|
8
|
+
|
9
|
+
def convert(url)
|
10
|
+
puts "Converting".yellow + " #{url}..."
|
11
|
+
@url = url
|
12
|
+
url_parsed = URI.parse(url)
|
13
|
+
url_parsed2 = url_parsed.host.to_s + url_parsed.path.to_s + url_parsed.query.to_s
|
14
|
+
target_dir = url_parsed2.gsub(/[\x00\/\\:\*\?\"<>\|]/, '_')
|
15
|
+
ensure_mkdir(target_dir)
|
16
|
+
Dir.chdir(target_dir)
|
17
|
+
@doc = Nokogiri::HTML(open(url))
|
18
|
+
|
19
|
+
get_scripts()
|
20
|
+
get_css()
|
21
|
+
get_images()
|
22
|
+
|
23
|
+
File.open('index.html', 'wb') { |file|
|
24
|
+
file.write(@doc)
|
25
|
+
}
|
26
|
+
puts "We done.".green
|
27
|
+
end
|
28
|
+
|
29
|
+
def is_img?(extension)
|
30
|
+
img_exts = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.tif', '.tiff']
|
31
|
+
img_exts.include?(extension.downcase)
|
32
|
+
end
|
33
|
+
|
34
|
+
def is_font?(extension)
|
35
|
+
return !is_img?(extension)
|
36
|
+
end
|
37
|
+
|
38
|
+
def fix_uri(url, uri)
|
39
|
+
uri = uri.strip
|
40
|
+
if uri.start_with?('//')
|
41
|
+
uri = 'http:' + uri
|
42
|
+
else # relative or absolute
|
43
|
+
begin
|
44
|
+
uri = URI.join(url, uri).to_s
|
45
|
+
rescue Exception => ex
|
46
|
+
puts "x".red + " Will try to download anyway. #{ex}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
return uri
|
50
|
+
end
|
51
|
+
|
52
|
+
def valid_uri_scheme?(uri)
|
53
|
+
uri.start_with?('http') || uri.start_with?('https')
|
54
|
+
end
|
55
|
+
|
56
|
+
def get_contents(uri)
|
57
|
+
uri_parsed = URI.parse(uri)
|
58
|
+
Net::HTTP.get_response(uri_parsed)
|
59
|
+
end
|
60
|
+
|
61
|
+
def ensure_mkdir(dirname)
|
62
|
+
if !File.directory?(dirname)
|
63
|
+
Dir.mkdir dirname
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def save_contents(resp, save_path)
|
68
|
+
if File.file? save_path
|
69
|
+
extension = File.extname(save_path)
|
70
|
+
basename = File.basename(save_path)
|
71
|
+
filename = File.basename(save_path, extension)
|
72
|
+
if (!is_img?(extension))
|
73
|
+
save_path = save_path.gsub(filename, filename + "_" + Time.now.to_i.to_s)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
File.open(save_path, 'wb') { |file|
|
77
|
+
file.write(resp.body)
|
78
|
+
}
|
79
|
+
return save_path
|
80
|
+
end
|
81
|
+
|
82
|
+
def remove_args(url)
|
83
|
+
url[/[^\?]+/]
|
84
|
+
end
|
85
|
+
|
86
|
+
def download_resource(selector, source_attr, out_path)
|
87
|
+
resources = @doc.search(selector)
|
88
|
+
resources.each { |resource|
|
89
|
+
resource_uri = resource[source_attr]
|
90
|
+
|
91
|
+
begin
|
92
|
+
resource_uri = fix_uri(@url, resource_uri)
|
93
|
+
rescue
|
94
|
+
puts "URI is funky. Going for it anyway... #{resource_uri}".red
|
95
|
+
end
|
96
|
+
|
97
|
+
save_path = File.join(out_path, File.basename(resource_uri))
|
98
|
+
save_path = remove_args(save_path)
|
99
|
+
|
100
|
+
begin
|
101
|
+
puts "✓".green + " Downloading ... " + resource_uri
|
102
|
+
resp = get_contents(resource_uri)
|
103
|
+
ensure_mkdir(out_path)
|
104
|
+
save_path = save_contents(resp, save_path)
|
105
|
+
resource[source_attr] = save_path
|
106
|
+
rescue Exception => ex
|
107
|
+
puts "✗".red + " FAIL. Couldn't do it: #{ex}"
|
108
|
+
end
|
109
|
+
|
110
|
+
}
|
111
|
+
end
|
112
|
+
|
113
|
+
def process_css_urls(css_source, original_css_url)
|
114
|
+
url_regex = /url\(['"]?(.*?)['"]?\)/i
|
115
|
+
|
116
|
+
if original_css_url.start_with?('//')
|
117
|
+
original_css_url = 'http:' + original_css_url
|
118
|
+
elsif original_css_url.start_with?('.') || original_css_url.start_with?('/')
|
119
|
+
original_css_url = fix_uri(@url, original_css_url)
|
120
|
+
end
|
121
|
+
|
122
|
+
puts ">".yellow + " Parsing css ... #{original_css_url}"
|
123
|
+
|
124
|
+
css_source = css_source.gsub(url_regex) {
|
125
|
+
original_item_url = $1
|
126
|
+
absolute_item_url = fix_uri(original_css_url, original_item_url)
|
127
|
+
original_item_url = remove_args(original_item_url)
|
128
|
+
|
129
|
+
extension = File.extname(original_item_url)
|
130
|
+
basename = File.basename(original_item_url)
|
131
|
+
|
132
|
+
out_dir = is_img?(extension) ? 'img' : 'fonts'
|
133
|
+
ensure_mkdir(out_dir)
|
134
|
+
out_path = File.join(out_dir, basename)
|
135
|
+
|
136
|
+
begin
|
137
|
+
resp = get_contents(absolute_item_url)
|
138
|
+
ensure_mkdir(out_dir)
|
139
|
+
save_contents(resp, out_path)
|
140
|
+
rescue Exception => ex
|
141
|
+
puts "Failed. Couldnt download from CSS: #{ex}".red
|
142
|
+
end
|
143
|
+
|
144
|
+
"url('#{File.join('..', out_path)}')"
|
145
|
+
}
|
146
|
+
|
147
|
+
return css_source
|
148
|
+
end
|
149
|
+
|
150
|
+
def get_images()
|
151
|
+
download_resource('img[src]', 'src', 'img')
|
152
|
+
end
|
153
|
+
|
154
|
+
def get_scripts()
|
155
|
+
download_resource('script[src]', 'src', 'js')
|
156
|
+
end
|
157
|
+
|
158
|
+
def get_css()
|
159
|
+
cloned_doc = @doc.clone
|
160
|
+
orig_link_tags = cloned_doc.search('link[rel=stylesheet]')
|
161
|
+
download_resource('link[rel=stylesheet]', 'href', 'css')
|
162
|
+
link_tags = @doc.search('link[rel=stylesheet]')
|
163
|
+
link_tags.each_with_index { |link, idx|
|
164
|
+
|
165
|
+
original_css_path = orig_link_tags[idx]['href']
|
166
|
+
local_css_path = link['href']
|
167
|
+
|
168
|
+
if File.exists?(local_css_path)
|
169
|
+
src = ""
|
170
|
+
File.open(local_css_path, 'r') { |file|
|
171
|
+
src = process_css_urls(file.read(), original_css_path)
|
172
|
+
}
|
173
|
+
File.open(local_css_path, 'w') { |file|
|
174
|
+
file.write(src)
|
175
|
+
}
|
176
|
+
end
|
177
|
+
}
|
178
|
+
end
|
179
|
+
end
|
metadata
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: justflow
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Eddie Flores
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-03-06 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Fetches web resources and puts them into folders.
|
14
|
+
email: eddflrs@gmail.com
|
15
|
+
executables:
|
16
|
+
- justflow
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- bin/justflow
|
21
|
+
- lib/justflow.rb
|
22
|
+
homepage:
|
23
|
+
licenses:
|
24
|
+
- Private
|
25
|
+
metadata: {}
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
requirements: []
|
41
|
+
rubyforge_project:
|
42
|
+
rubygems_version: 2.2.2
|
43
|
+
signing_key:
|
44
|
+
specification_version: 4
|
45
|
+
summary: Gets you ridin
|
46
|
+
test_files: []
|