elsmore 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile.lock +1 -1
- data/Rakefile +0 -5
- data/lib/elsmore/command.rb +15 -4
- data/lib/elsmore/emitter.rb +1 -1
- data/lib/elsmore/resource.rb +25 -2
- data/lib/elsmore/rewriter.rb +5 -5
- data/lib/elsmore/scraper.rb +1 -5
- data/lib/elsmore/url.rb +8 -0
- data/lib/elsmore/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5ea0ee5842f11d5c686b5904b963cccb7e55c5e
|
4
|
+
data.tar.gz: 41422ea9b6381c4d2f7e873c3e3b198dd66bfe2d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 06daf64396d3cef4e3fdb65d942d668161959af0eeb2f7b426804d8cc76bcfa66f1cdae19f35a4de1e8da41fe7a9e1a3d98c4c0333618f6217269d7113805ad9
|
7
|
+
data.tar.gz: 746aa284b811bfe5670f495a5a95491449d48628e3b4fd067e915fc55618625d11dcec60f5643d90c4db98951068f97a8848e5bdb458efbf93479d5a76c9ecc5
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
data/Rakefile
CHANGED
data/lib/elsmore/command.rb
CHANGED
@@ -19,25 +19,36 @@ module Elsmore
|
|
19
19
|
global_option('--debug') { emitter.debug! }
|
20
20
|
|
21
21
|
command :snap do |c|
|
22
|
-
c.syntax = 'spider <url>
|
22
|
+
c.syntax = 'spider <url>'
|
23
23
|
c.description = 'Spiders a URL within from the given page, sticking within the original domain'
|
24
24
|
c.action do |args, options|
|
25
25
|
scraper = Elsmore::Scraper.new(args.first)
|
26
26
|
scraper.emitter = emitter
|
27
|
-
|
27
|
+
scraper.run
|
28
28
|
|
29
29
|
emitter.newline
|
30
30
|
emitter.newline
|
31
31
|
emitter.say "Processed"
|
32
|
-
emitter.pretty
|
32
|
+
emitter.pretty scraper.processed
|
33
33
|
emitter.newline
|
34
34
|
emitter.say "Could not be processed"
|
35
|
-
emitter.pretty
|
35
|
+
emitter.pretty scraper.invalid
|
36
|
+
|
37
|
+
emitter.newline
|
38
|
+
emitter.say "Run 'elsmore serve #{args.first}' to start a webserver on port 8000 with your local copy"
|
36
39
|
end
|
37
40
|
end
|
38
41
|
alias_command :'go fetch', :'snap'
|
39
42
|
default_command :snap
|
40
43
|
|
44
|
+
command :serve do |c|
|
45
|
+
c.syntax = 'serve <folder_name>'
|
46
|
+
c.description = 'Serve local folder'
|
47
|
+
c.action do |args, options|
|
48
|
+
exec "ruby -run -ehttpd ./#{ARGV[1]} -p8000"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
41
52
|
run!
|
42
53
|
end
|
43
54
|
end
|
data/lib/elsmore/emitter.rb
CHANGED
data/lib/elsmore/resource.rb
CHANGED
@@ -2,13 +2,16 @@ require 'httparty'
|
|
2
2
|
|
3
3
|
module Elsmore
|
4
4
|
class Resource
|
5
|
-
attr_accessor :url, :filename, :emitter
|
5
|
+
attr_accessor :url, :filename, :emitter, :parent
|
6
6
|
|
7
7
|
def initialize url, parent
|
8
8
|
self.url = Elsmore::Url.new(url, parent)
|
9
|
+
self.parent = parent
|
9
10
|
end
|
10
11
|
|
11
|
-
def write!
|
12
|
+
def write! nested_urls = false
|
13
|
+
process_nested_urls if nested_urls
|
14
|
+
|
12
15
|
writer = Elsmore::Writer.new(self)
|
13
16
|
writer.emitter = emitter
|
14
17
|
writer.write
|
@@ -18,5 +21,25 @@ module Elsmore
|
|
18
21
|
def data
|
19
22
|
@data ||= HTTParty.get(url.canonical_url)
|
20
23
|
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def process_nested_urls
|
28
|
+
urls = data.scan(/url\((.*?)\)/i).map do |match|
|
29
|
+
if match[0].start_with?('"') || match[0].start_with?("'")
|
30
|
+
match[0][1...-1]
|
31
|
+
else
|
32
|
+
match[0]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
urls.each do |nested_url|
|
37
|
+
resource = Elsmore::Resource.new(nested_url, url)
|
38
|
+
resource.emitter = emitter
|
39
|
+
resource.write!
|
40
|
+
|
41
|
+
@data.gsub!(nested_url, resource.url.resource_path)
|
42
|
+
end
|
43
|
+
end
|
21
44
|
end
|
22
45
|
end
|
data/lib/elsmore/rewriter.rb
CHANGED
@@ -21,28 +21,28 @@ module Elsmore
|
|
21
21
|
|
22
22
|
def write_css
|
23
23
|
resource.doc.xpath('//link[@rel="stylesheet"]').each do |element|
|
24
|
-
write_element(element, 'href')
|
24
|
+
write_element(element, 'href', replace: true)
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
28
|
def write_images
|
29
29
|
resource.doc.xpath('//img').each do |element|
|
30
|
-
write_element(element, 'src')
|
30
|
+
write_element(element, 'src', replace: false)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
34
|
def write_js
|
35
35
|
resource.doc.xpath('//script').each do |element|
|
36
|
-
write_element(element, 'src')
|
36
|
+
write_element(element, 'src', replace: false)
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
-
def write_element element, key
|
40
|
+
def write_element element, key, options = {}
|
41
41
|
return unless element.attribute(key)
|
42
42
|
url = element.attribute(key).value
|
43
43
|
_resource = Elsmore::Resource.new(url, resource.url)
|
44
44
|
_resource.emitter = emitter
|
45
|
-
_resource.write!
|
45
|
+
_resource.write!(options[:replace])
|
46
46
|
|
47
47
|
emitter.log("# Rewriting #{url} => #{_resource.filename}") if url != _resource.filename
|
48
48
|
element.attribute(key).value = _resource.filename
|
data/lib/elsmore/scraper.rb
CHANGED
data/lib/elsmore/url.rb
CHANGED
data/lib/elsmore/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elsmore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cristiano Betta
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-10-
|
12
|
+
date: 2016-10-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|