elsmore 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2ff6fae9fc1f96066e8c755fa7ae3625e067a53c
4
- data.tar.gz: 0745051590992a18546840199a85d50defff3769
3
+ metadata.gz: c5ea0ee5842f11d5c686b5904b963cccb7e55c5e
4
+ data.tar.gz: 41422ea9b6381c4d2f7e873c3e3b198dd66bfe2d
5
5
  SHA512:
6
- metadata.gz: a156725315f92cc72094bac0a58e224158dc3d01433a194a66389df20f740c67491ed4ccdab5cbd0688e468170beefe68abfecb10e930bbdd50289b97bae70ef
7
- data.tar.gz: f8974e4d1fa3ae7e73a37b9f1047de010c25a6d6501a7e5a21ba1dc71279a8a4534a4f31cdb4b8b8891c0a65590a375563a2424ad77fe7262a9721fb64c63c1c
6
+ metadata.gz: 06daf64396d3cef4e3fdb65d942d668161959af0eeb2f7b426804d8cc76bcfa66f1cdae19f35a4de1e8da41fe7a9e1a3d98c4c0333618f6217269d7113805ad9
7
+ data.tar.gz: 746aa284b811bfe5670f495a5a95491449d48628e3b4fd067e915fc55618625d11dcec60f5643d90c4db98951068f97a8848e5bdb458efbf93479d5a76c9ecc5
data/.gitignore CHANGED
@@ -1,2 +1,3 @@
1
1
  site
2
2
  pkg
3
+ hackference.co.uk
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- elsmore (0.2.1)
4
+ elsmore (0.3.0)
5
5
  awesome_print
6
6
  colorize
7
7
  commander
data/Rakefile CHANGED
@@ -4,8 +4,3 @@ require 'rake/testtask'
4
4
  task :console do
5
5
  exec "irb -r elsmore -I ./lib"
6
6
  end
7
-
8
- task :bin do
9
- ARGV.shift
10
- exec "ruby -Ilib ./bin/elsmore #{ARGV.join(' ')}"
11
- end
@@ -19,25 +19,36 @@ module Elsmore
19
19
  global_option('--debug') { emitter.debug! }
20
20
 
21
21
  command :snap do |c|
22
- c.syntax = 'spider <url> [options]'
22
+ c.syntax = 'spider <url>'
23
23
  c.description = 'Spiders a URL within from the given page, sticking within the original domain'
24
24
  c.action do |args, options|
25
25
  scraper = Elsmore::Scraper.new(args.first)
26
26
  scraper.emitter = emitter
27
- result = scraper.run
27
+ scraper.run
28
28
 
29
29
  emitter.newline
30
30
  emitter.newline
31
31
  emitter.say "Processed"
32
- emitter.pretty result[:processed]
32
+ emitter.pretty scraper.processed
33
33
  emitter.newline
34
34
  emitter.say "Could not be processed"
35
- emitter.pretty result[:invalid]
35
+ emitter.pretty scraper.invalid
36
+
37
+ emitter.newline
38
+ emitter.say "Run 'elsmore serve #{args.first}' to start a webserver on port 8000 with your local copy"
36
39
  end
37
40
  end
38
41
  alias_command :'go fetch', :'snap'
39
42
  default_command :snap
40
43
 
44
+ command :serve do |c|
45
+ c.syntax = 'serve <folder_name>'
46
+ c.description = 'Serve local folder'
47
+ c.action do |args, options|
48
+ exec "ruby -run -ehttpd ./#{ARGV[1]} -p8000"
49
+ end
50
+ end
51
+
41
52
  run!
42
53
  end
43
54
  end
@@ -14,7 +14,7 @@ module Elsmore
14
14
  end
15
15
 
16
16
  def newline
17
- say "\n"
17
+ puts "\n"
18
18
  end
19
19
 
20
20
  def pretty value
@@ -2,13 +2,16 @@ require 'httparty'
2
2
 
3
3
  module Elsmore
4
4
  class Resource
5
- attr_accessor :url, :filename, :emitter
5
+ attr_accessor :url, :filename, :emitter, :parent
6
6
 
7
7
  def initialize url, parent
8
8
  self.url = Elsmore::Url.new(url, parent)
9
+ self.parent = parent
9
10
  end
10
11
 
11
- def write!
12
+ def write! nested_urls = false
13
+ process_nested_urls if nested_urls
14
+
12
15
  writer = Elsmore::Writer.new(self)
13
16
  writer.emitter = emitter
14
17
  writer.write
@@ -18,5 +21,25 @@ module Elsmore
18
21
  def data
19
22
  @data ||= HTTParty.get(url.canonical_url)
20
23
  end
24
+
25
+ private
26
+
27
+ def process_nested_urls
28
+ urls = data.scan(/url\((.*?)\)/i).map do |match|
29
+ if match[0].start_with?('"') || match[0].start_with?("'")
30
+ match[0][1...-1]
31
+ else
32
+ match[0]
33
+ end
34
+ end
35
+
36
+ urls.each do |nested_url|
37
+ resource = Elsmore::Resource.new(nested_url, url)
38
+ resource.emitter = emitter
39
+ resource.write!
40
+
41
+ @data.gsub!(nested_url, resource.url.resource_path)
42
+ end
43
+ end
21
44
  end
22
45
  end
@@ -21,28 +21,28 @@ module Elsmore
21
21
 
22
22
  def write_css
23
23
  resource.doc.xpath('//link[@rel="stylesheet"]').each do |element|
24
- write_element(element, 'href')
24
+ write_element(element, 'href', replace: true)
25
25
  end
26
26
  end
27
27
 
28
28
  def write_images
29
29
  resource.doc.xpath('//img').each do |element|
30
- write_element(element, 'src')
30
+ write_element(element, 'src', replace: false)
31
31
  end
32
32
  end
33
33
 
34
34
  def write_js
35
35
  resource.doc.xpath('//script').each do |element|
36
- write_element(element, 'src')
36
+ write_element(element, 'src', replace: false)
37
37
  end
38
38
  end
39
39
 
40
- def write_element element, key
40
+ def write_element element, key, options = {}
41
41
  return unless element.attribute(key)
42
42
  url = element.attribute(key).value
43
43
  _resource = Elsmore::Resource.new(url, resource.url)
44
44
  _resource.emitter = emitter
45
- _resource.write!
45
+ _resource.write!(options[:replace])
46
46
 
47
47
  emitter.log("# Rewriting #{url} => #{_resource.filename}") if url != _resource.filename
48
48
  element.attribute(key).value = _resource.filename
@@ -19,11 +19,7 @@ module Elsmore
19
19
  document = unprocessed.shift
20
20
  process document
21
21
  end
22
-
23
- {
24
- processed: processed,
25
- invalid: invalid
26
- }
22
+ self
27
23
  end
28
24
 
29
25
  private
data/lib/elsmore/url.rb CHANGED
@@ -36,6 +36,14 @@ module Elsmore
36
36
  end
37
37
  end
38
38
 
39
+ def resource_path
40
+ if parent && parent.host == host
41
+ uri.path
42
+ else
43
+ canonical_url.gsub('http:/', '').gsub('https:/', '')
44
+ end
45
+ end
46
+
39
47
  private
40
48
 
41
49
  def sanitize_string
@@ -1,3 +1,3 @@
1
1
  module Elsmore
2
- VERSION = '0.2.1'
2
+ VERSION = '0.3.0'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elsmore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cristiano Betta
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-10-22 00:00:00.000000000 Z
12
+ date: 2016-10-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake