justflow 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/justflow +12 -0
  3. data/lib/justflow.rb +179 -0
  4. metadata +46 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6935ba30de3bac6f28a87fcecd09c428818548c0
4
+ data.tar.gz: 2b65dd78ba571d21e790391c469fd825f1385bd8
5
+ SHA512:
6
+ metadata.gz: 98c8f5c394d82f70a02829615e3bf001ca1b4b8b70d2e146373da9fe9d45338aed8ba0715cc03eb441b0c1dd1622d57a08f2ee8aa166a8ab0037e498f654e32b
7
+ data.tar.gz: f4a07631856cf67823471f5d4958620842e30b78a1503916979746f48aa74abf9ffc2b29f084b2e9622dd3ffd58a03a047ee83378dbb29fed3923aef6efacc4a
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'justflow'
4
+
5
+ abort "Provide the url to get flowin (e.g: http://google.com)" if ARGV.size < 1
6
+ url = ARGV[0]
7
+
8
+ begin
9
+ JustFlow.convert(url)
10
+ rescue Exception => ex
11
+ warn "We couldnt just flow :( -> #{ex}"
12
+ end
@@ -0,0 +1,179 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+ require 'net/http'
4
+ require 'colorize'
5
+
6
+ module JustFlow
7
+ module_function
8
+
9
+ def convert(url)
10
+ puts "Converting".yellow + " #{url}..."
11
+ @url = url
12
+ url_parsed = URI.parse(url)
13
+ url_parsed2 = url_parsed.host.to_s + url_parsed.path.to_s + url_parsed.query.to_s
14
+ target_dir = url_parsed2.gsub(/[\x00\/\\:\*\?\"<>\|]/, '_')
15
+ ensure_mkdir(target_dir)
16
+ Dir.chdir(target_dir)
17
+ @doc = Nokogiri::HTML(open(url))
18
+
19
+ get_scripts()
20
+ get_css()
21
+ get_images()
22
+
23
+ File.open('index.html', 'wb') { |file|
24
+ file.write(@doc)
25
+ }
26
+ puts "We done.".green
27
+ end
28
+
29
+ def is_img?(extension)
30
+ img_exts = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.tif', '.tiff']
31
+ img_exts.include?(extension.downcase)
32
+ end
33
+
34
+ def is_font?(extension)
35
+ return !is_img?(extension)
36
+ end
37
+
38
+ def fix_uri(url, uri)
39
+ uri = uri.strip
40
+ if uri.start_with?('//')
41
+ uri = 'http:' + uri
42
+ else # relative or absolute
43
+ begin
44
+ uri = URI.join(url, uri).to_s
45
+ rescue Exception => ex
46
+ puts "x".red + " Will try to download anyway. #{ex}"
47
+ end
48
+ end
49
+ return uri
50
+ end
51
+
52
+ def valid_uri_scheme?(uri)
53
+ uri.start_with?('http') || uri.start_with?('https')
54
+ end
55
+
56
+ def get_contents(uri)
57
+ uri_parsed = URI.parse(uri)
58
+ Net::HTTP.get_response(uri_parsed)
59
+ end
60
+
61
+ def ensure_mkdir(dirname)
62
+ if !File.directory?(dirname)
63
+ Dir.mkdir dirname
64
+ end
65
+ end
66
+
67
+ def save_contents(resp, save_path)
68
+ if File.file? save_path
69
+ extension = File.extname(save_path)
70
+ basename = File.basename(save_path)
71
+ filename = File.basename(save_path, extension)
72
+ if (!is_img?(extension))
73
+ save_path = save_path.gsub(filename, filename + "_" + Time.now.to_i.to_s)
74
+ end
75
+ end
76
+ File.open(save_path, 'wb') { |file|
77
+ file.write(resp.body)
78
+ }
79
+ return save_path
80
+ end
81
+
82
+ def remove_args(url)
83
+ url[/[^\?]+/]
84
+ end
85
+
86
+ def download_resource(selector, source_attr, out_path)
87
+ resources = @doc.search(selector)
88
+ resources.each { |resource|
89
+ resource_uri = resource[source_attr]
90
+
91
+ begin
92
+ resource_uri = fix_uri(@url, resource_uri)
93
+ rescue
94
+ puts "URI is funky. Going for it anyway... #{resource_uri}".red
95
+ end
96
+
97
+ save_path = File.join(out_path, File.basename(resource_uri))
98
+ save_path = remove_args(save_path)
99
+
100
+ begin
101
+ puts "✓".green + " Downloading ... " + resource_uri
102
+ resp = get_contents(resource_uri)
103
+ ensure_mkdir(out_path)
104
+ save_path = save_contents(resp, save_path)
105
+ resource[source_attr] = save_path
106
+ rescue Exception => ex
107
+ puts "✗".red + " FAIL. Couldn't do it: #{ex}"
108
+ end
109
+
110
+ }
111
+ end
112
+
113
+ def process_css_urls(css_source, original_css_url)
114
+ url_regex = /url\(['"]?(.*?)['"]?\)/i
115
+
116
+ if original_css_url.start_with?('//')
117
+ original_css_url = 'http:' + original_css_url
118
+ elsif original_css_url.start_with?('.') || original_css_url.start_with?('/')
119
+ original_css_url = fix_uri(@url, original_css_url)
120
+ end
121
+
122
+ puts ">".yellow + " Parsing css ... #{original_css_url}"
123
+
124
+ css_source = css_source.gsub(url_regex) {
125
+ original_item_url = $1
126
+ absolute_item_url = fix_uri(original_css_url, original_item_url)
127
+ original_item_url = remove_args(original_item_url)
128
+
129
+ extension = File.extname(original_item_url)
130
+ basename = File.basename(original_item_url)
131
+
132
+ out_dir = is_img?(extension) ? 'img' : 'fonts'
133
+ ensure_mkdir(out_dir)
134
+ out_path = File.join(out_dir, basename)
135
+
136
+ begin
137
+ resp = get_contents(absolute_item_url)
138
+ ensure_mkdir(out_dir)
139
+ save_contents(resp, out_path)
140
+ rescue Exception => ex
141
+ puts "Failed. Couldnt download from CSS: #{ex}".red
142
+ end
143
+
144
+ "url('#{File.join('..', out_path)}')"
145
+ }
146
+
147
+ return css_source
148
+ end
149
+
150
+ def get_images()
151
+ download_resource('img[src]', 'src', 'img')
152
+ end
153
+
154
+ def get_scripts()
155
+ download_resource('script[src]', 'src', 'js')
156
+ end
157
+
158
+ def get_css()
159
+ cloned_doc = @doc.clone
160
+ orig_link_tags = cloned_doc.search('link[rel=stylesheet]')
161
+ download_resource('link[rel=stylesheet]', 'href', 'css')
162
+ link_tags = @doc.search('link[rel=stylesheet]')
163
+ link_tags.each_with_index { |link, idx|
164
+
165
+ original_css_path = orig_link_tags[idx]['href']
166
+ local_css_path = link['href']
167
+
168
+ if File.exists?(local_css_path)
169
+ src = ""
170
+ File.open(local_css_path, 'r') { |file|
171
+ src = process_css_urls(file.read(), original_css_path)
172
+ }
173
+ File.open(local_css_path, 'w') { |file|
174
+ file.write(src)
175
+ }
176
+ end
177
+ }
178
+ end
179
+ end
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: justflow
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Eddie Flores
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-03-06 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Fetches web resources and puts them into folders.
14
+ email: eddflrs@gmail.com
15
+ executables:
16
+ - justflow
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - bin/justflow
21
+ - lib/justflow.rb
22
+ homepage:
23
+ licenses:
24
+ - Private
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.2.2
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: Gets you ridin
46
+ test_files: []