elsmore 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2ff6fae9fc1f96066e8c755fa7ae3625e067a53c
4
- data.tar.gz: 0745051590992a18546840199a85d50defff3769
3
+ metadata.gz: c5ea0ee5842f11d5c686b5904b963cccb7e55c5e
4
+ data.tar.gz: 41422ea9b6381c4d2f7e873c3e3b198dd66bfe2d
5
5
  SHA512:
6
- metadata.gz: a156725315f92cc72094bac0a58e224158dc3d01433a194a66389df20f740c67491ed4ccdab5cbd0688e468170beefe68abfecb10e930bbdd50289b97bae70ef
7
- data.tar.gz: f8974e4d1fa3ae7e73a37b9f1047de010c25a6d6501a7e5a21ba1dc71279a8a4534a4f31cdb4b8b8891c0a65590a375563a2424ad77fe7262a9721fb64c63c1c
6
+ metadata.gz: 06daf64396d3cef4e3fdb65d942d668161959af0eeb2f7b426804d8cc76bcfa66f1cdae19f35a4de1e8da41fe7a9e1a3d98c4c0333618f6217269d7113805ad9
7
+ data.tar.gz: 746aa284b811bfe5670f495a5a95491449d48628e3b4fd067e915fc55618625d11dcec60f5643d90c4db98951068f97a8848e5bdb458efbf93479d5a76c9ecc5
data/.gitignore CHANGED
@@ -1,2 +1,3 @@
1
1
  site
2
2
  pkg
3
+ hackference.co.uk
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- elsmore (0.2.1)
4
+ elsmore (0.3.0)
5
5
  awesome_print
6
6
  colorize
7
7
  commander
data/Rakefile CHANGED
@@ -4,8 +4,3 @@ require 'rake/testtask'
4
4
  task :console do
5
5
  exec "irb -r elsmore -I ./lib"
6
6
  end
7
-
8
- task :bin do
9
- ARGV.shift
10
- exec "ruby -Ilib ./bin/elsmore #{ARGV.join(' ')}"
11
- end
@@ -19,25 +19,36 @@ module Elsmore
19
19
  global_option('--debug') { emitter.debug! }
20
20
 
21
21
  command :snap do |c|
22
- c.syntax = 'spider <url> [options]'
22
+ c.syntax = 'spider <url>'
23
23
  c.description = 'Spiders a URL within from the given page, sticking within the original domain'
24
24
  c.action do |args, options|
25
25
  scraper = Elsmore::Scraper.new(args.first)
26
26
  scraper.emitter = emitter
27
- result = scraper.run
27
+ scraper.run
28
28
 
29
29
  emitter.newline
30
30
  emitter.newline
31
31
  emitter.say "Processed"
32
- emitter.pretty result[:processed]
32
+ emitter.pretty scraper.processed
33
33
  emitter.newline
34
34
  emitter.say "Could not be processed"
35
- emitter.pretty result[:invalid]
35
+ emitter.pretty scraper.invalid
36
+
37
+ emitter.newline
38
+ emitter.say "Run 'elsmore serve #{args.first}' to start a webserver on port 8000 with your local copy"
36
39
  end
37
40
  end
38
41
  alias_command :'go fetch', :'snap'
39
42
  default_command :snap
40
43
 
44
+ command :serve do |c|
45
+ c.syntax = 'serve <folder_name>'
46
+ c.description = 'Serve local folder'
47
+ c.action do |args, options|
48
+ exec "ruby -run -ehttpd ./#{ARGV[1]} -p8000"
49
+ end
50
+ end
51
+
41
52
  run!
42
53
  end
43
54
  end
@@ -14,7 +14,7 @@ module Elsmore
14
14
  end
15
15
 
16
16
  def newline
17
- say "\n"
17
+ puts "\n"
18
18
  end
19
19
 
20
20
  def pretty value
@@ -2,13 +2,16 @@ require 'httparty'
2
2
 
3
3
  module Elsmore
4
4
  class Resource
5
- attr_accessor :url, :filename, :emitter
5
+ attr_accessor :url, :filename, :emitter, :parent
6
6
 
7
7
  def initialize url, parent
8
8
  self.url = Elsmore::Url.new(url, parent)
9
+ self.parent = parent
9
10
  end
10
11
 
11
- def write!
12
+ def write! nested_urls = false
13
+ process_nested_urls if nested_urls
14
+
12
15
  writer = Elsmore::Writer.new(self)
13
16
  writer.emitter = emitter
14
17
  writer.write
@@ -18,5 +21,25 @@ module Elsmore
18
21
  def data
19
22
  @data ||= HTTParty.get(url.canonical_url)
20
23
  end
24
+
25
+ private
26
+
27
+ def process_nested_urls
28
+ urls = data.scan(/url\((.*?)\)/i).map do |match|
29
+ if match[0].start_with?('"') || match[0].start_with?("'")
30
+ match[0][1...-1]
31
+ else
32
+ match[0]
33
+ end
34
+ end
35
+
36
+ urls.each do |nested_url|
37
+ resource = Elsmore::Resource.new(nested_url, url)
38
+ resource.emitter = emitter
39
+ resource.write!
40
+
41
+ @data.gsub!(nested_url, resource.url.resource_path)
42
+ end
43
+ end
21
44
  end
22
45
  end
@@ -21,28 +21,28 @@ module Elsmore
21
21
 
22
22
  def write_css
23
23
  resource.doc.xpath('//link[@rel="stylesheet"]').each do |element|
24
- write_element(element, 'href')
24
+ write_element(element, 'href', replace: true)
25
25
  end
26
26
  end
27
27
 
28
28
  def write_images
29
29
  resource.doc.xpath('//img').each do |element|
30
- write_element(element, 'src')
30
+ write_element(element, 'src', replace: false)
31
31
  end
32
32
  end
33
33
 
34
34
  def write_js
35
35
  resource.doc.xpath('//script').each do |element|
36
- write_element(element, 'src')
36
+ write_element(element, 'src', replace: false)
37
37
  end
38
38
  end
39
39
 
40
- def write_element element, key
40
+ def write_element element, key, options = {}
41
41
  return unless element.attribute(key)
42
42
  url = element.attribute(key).value
43
43
  _resource = Elsmore::Resource.new(url, resource.url)
44
44
  _resource.emitter = emitter
45
- _resource.write!
45
+ _resource.write!(options[:replace])
46
46
 
47
47
  emitter.log("# Rewriting #{url} => #{_resource.filename}") if url != _resource.filename
48
48
  element.attribute(key).value = _resource.filename
@@ -19,11 +19,7 @@ module Elsmore
19
19
  document = unprocessed.shift
20
20
  process document
21
21
  end
22
-
23
- {
24
- processed: processed,
25
- invalid: invalid
26
- }
22
+ self
27
23
  end
28
24
 
29
25
  private
data/lib/elsmore/url.rb CHANGED
@@ -36,6 +36,14 @@ module Elsmore
36
36
  end
37
37
  end
38
38
 
39
+ def resource_path
40
+ if parent && parent.host == host
41
+ uri.path
42
+ else
43
+ canonical_url.gsub('http:/', '').gsub('https:/', '')
44
+ end
45
+ end
46
+
39
47
  private
40
48
 
41
49
  def sanitize_string
@@ -1,3 +1,3 @@
1
1
  module Elsmore
2
- VERSION = '0.2.1'
2
+ VERSION = '0.3.0'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elsmore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cristiano Betta
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-10-22 00:00:00.000000000 Z
12
+ date: 2016-10-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake