elsmore 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile.lock +1 -1
- data/Rakefile +0 -5
- data/lib/elsmore/command.rb +15 -4
- data/lib/elsmore/emitter.rb +1 -1
- data/lib/elsmore/resource.rb +25 -2
- data/lib/elsmore/rewriter.rb +5 -5
- data/lib/elsmore/scraper.rb +1 -5
- data/lib/elsmore/url.rb +8 -0
- data/lib/elsmore/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5ea0ee5842f11d5c686b5904b963cccb7e55c5e
|
4
|
+
data.tar.gz: 41422ea9b6381c4d2f7e873c3e3b198dd66bfe2d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 06daf64396d3cef4e3fdb65d942d668161959af0eeb2f7b426804d8cc76bcfa66f1cdae19f35a4de1e8da41fe7a9e1a3d98c4c0333618f6217269d7113805ad9
|
7
|
+
data.tar.gz: 746aa284b811bfe5670f495a5a95491449d48628e3b4fd067e915fc55618625d11dcec60f5643d90c4db98951068f97a8848e5bdb458efbf93479d5a76c9ecc5
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
data/Rakefile
CHANGED
data/lib/elsmore/command.rb
CHANGED
@@ -19,25 +19,36 @@ module Elsmore
|
|
19
19
|
global_option('--debug') { emitter.debug! }
|
20
20
|
|
21
21
|
command :snap do |c|
|
22
|
-
c.syntax = 'spider <url>
|
22
|
+
c.syntax = 'spider <url>'
|
23
23
|
c.description = 'Spiders a URL within from the given page, sticking within the original domain'
|
24
24
|
c.action do |args, options|
|
25
25
|
scraper = Elsmore::Scraper.new(args.first)
|
26
26
|
scraper.emitter = emitter
|
27
|
-
|
27
|
+
scraper.run
|
28
28
|
|
29
29
|
emitter.newline
|
30
30
|
emitter.newline
|
31
31
|
emitter.say "Processed"
|
32
|
-
emitter.pretty
|
32
|
+
emitter.pretty scraper.processed
|
33
33
|
emitter.newline
|
34
34
|
emitter.say "Could not be processed"
|
35
|
-
emitter.pretty
|
35
|
+
emitter.pretty scraper.invalid
|
36
|
+
|
37
|
+
emitter.newline
|
38
|
+
emitter.say "Run 'elsmore serve #{args.first}' to start a webserver on port 8000 with your local copy"
|
36
39
|
end
|
37
40
|
end
|
38
41
|
alias_command :'go fetch', :'snap'
|
39
42
|
default_command :snap
|
40
43
|
|
44
|
+
command :serve do |c|
|
45
|
+
c.syntax = 'serve <folder_name>'
|
46
|
+
c.description = 'Serve local folder'
|
47
|
+
c.action do |args, options|
|
48
|
+
exec "ruby -run -ehttpd ./#{ARGV[1]} -p8000"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
41
52
|
run!
|
42
53
|
end
|
43
54
|
end
|
data/lib/elsmore/emitter.rb
CHANGED
data/lib/elsmore/resource.rb
CHANGED
@@ -2,13 +2,16 @@ require 'httparty'
|
|
2
2
|
|
3
3
|
module Elsmore
|
4
4
|
class Resource
|
5
|
-
attr_accessor :url, :filename, :emitter
|
5
|
+
attr_accessor :url, :filename, :emitter, :parent
|
6
6
|
|
7
7
|
def initialize url, parent
|
8
8
|
self.url = Elsmore::Url.new(url, parent)
|
9
|
+
self.parent = parent
|
9
10
|
end
|
10
11
|
|
11
|
-
def write!
|
12
|
+
def write! nested_urls = false
|
13
|
+
process_nested_urls if nested_urls
|
14
|
+
|
12
15
|
writer = Elsmore::Writer.new(self)
|
13
16
|
writer.emitter = emitter
|
14
17
|
writer.write
|
@@ -18,5 +21,25 @@ module Elsmore
|
|
18
21
|
def data
|
19
22
|
@data ||= HTTParty.get(url.canonical_url)
|
20
23
|
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def process_nested_urls
|
28
|
+
urls = data.scan(/url\((.*?)\)/i).map do |match|
|
29
|
+
if match[0].start_with?('"') || match[0].start_with?("'")
|
30
|
+
match[0][1...-1]
|
31
|
+
else
|
32
|
+
match[0]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
urls.each do |nested_url|
|
37
|
+
resource = Elsmore::Resource.new(nested_url, url)
|
38
|
+
resource.emitter = emitter
|
39
|
+
resource.write!
|
40
|
+
|
41
|
+
@data.gsub!(nested_url, resource.url.resource_path)
|
42
|
+
end
|
43
|
+
end
|
21
44
|
end
|
22
45
|
end
|
data/lib/elsmore/rewriter.rb
CHANGED
@@ -21,28 +21,28 @@ module Elsmore
|
|
21
21
|
|
22
22
|
def write_css
|
23
23
|
resource.doc.xpath('//link[@rel="stylesheet"]').each do |element|
|
24
|
-
write_element(element, 'href')
|
24
|
+
write_element(element, 'href', replace: true)
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
28
|
def write_images
|
29
29
|
resource.doc.xpath('//img').each do |element|
|
30
|
-
write_element(element, 'src')
|
30
|
+
write_element(element, 'src', replace: false)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
34
|
def write_js
|
35
35
|
resource.doc.xpath('//script').each do |element|
|
36
|
-
write_element(element, 'src')
|
36
|
+
write_element(element, 'src', replace: false)
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
-
def write_element element, key
|
40
|
+
def write_element element, key, options = {}
|
41
41
|
return unless element.attribute(key)
|
42
42
|
url = element.attribute(key).value
|
43
43
|
_resource = Elsmore::Resource.new(url, resource.url)
|
44
44
|
_resource.emitter = emitter
|
45
|
-
_resource.write!
|
45
|
+
_resource.write!(options[:replace])
|
46
46
|
|
47
47
|
emitter.log("# Rewriting #{url} => #{_resource.filename}") if url != _resource.filename
|
48
48
|
element.attribute(key).value = _resource.filename
|
data/lib/elsmore/scraper.rb
CHANGED
data/lib/elsmore/url.rb
CHANGED
data/lib/elsmore/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elsmore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cristiano Betta
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-10-
|
12
|
+
date: 2016-10-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|