elsmore 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/hackference.co.uk/assets/css/site.css +488 -0
  4. data/hackference.co.uk/assets/css/slicknav.css +178 -0
  5. data/hackference.co.uk/assets/css/tito.css +130 -0
  6. data/hackference.co.uk/assets/img/algolia.png +0 -0
  7. data/hackference.co.uk/assets/img/andrew-faraday.jpg +0 -0
  8. data/hackference.co.uk/assets/img/conf.png +0 -0
  9. data/hackference.co.uk/assets/img/contentful.jpg +0 -0
  10. data/hackference.co.uk/assets/img/cristiano-betta.jpg +0 -0
  11. data/hackference.co.uk/assets/img/dan-jenkins.png +0 -0
  12. data/hackference.co.uk/assets/img/daniel-knell.jpg +0 -0
  13. data/hackference.co.uk/assets/img/etiene-dalcol.jpg +0 -0
  14. data/hackference.co.uk/assets/img/felienne-hermans.jpg +0 -0
  15. data/hackference.co.uk/assets/img/hack.png +0 -0
  16. data/hackference.co.uk/assets/img/hugh-rawlinson.jpg +0 -0
  17. data/hackference.co.uk/assets/img/improbable.png +0 -0
  18. data/hackference.co.uk/assets/img/jessica-rose-cartoon.png +0 -0
  19. data/hackference.co.uk/assets/img/jonathan-kingsley.jpeg +0 -0
  20. data/hackference.co.uk/assets/img/logo-small.png +0 -0
  21. data/hackference.co.uk/assets/img/martin-splitt.jpg +0 -0
  22. data/hackference.co.uk/assets/img/microsoft.png +0 -0
  23. data/hackference.co.uk/assets/img/mozilla.png +0 -0
  24. data/hackference.co.uk/assets/img/nexmo.png +0 -0
  25. data/hackference.co.uk/assets/img/packt.png +0 -0
  26. data/hackference.co.uk/assets/img/pebble.png +0 -0
  27. data/hackference.co.uk/assets/img/proactive.png +0 -0
  28. data/hackference.co.uk/assets/img/pusher.png +0 -0
  29. data/hackference.co.uk/assets/img/remy-sharp.jpg +0 -0
  30. data/hackference.co.uk/assets/img/sam-wierema.jpg +0 -0
  31. data/hackference.co.uk/assets/img/samathy-barratt.jpg +0 -0
  32. data/hackference.co.uk/assets/img/soledad.png +0 -0
  33. data/hackference.co.uk/assets/img/technical-team-solutions.png +0 -0
  34. data/hackference.co.uk/assets/img/terence-eden.png +0 -0
  35. data/hackference.co.uk/cdn-cgi/l/email-protection/index.html +75 -0
  36. data/hackference.co.uk/cdn-cgi/scripts/cf.common.js +78 -0
  37. data/hackference.co.uk/cdn-cgi/scripts/zepto.min.js +2 -0
  38. data/hackference.co.uk/cdn-cgi/styles/cf.errors.css +1 -0
  39. data/hackference.co.uk/cdnjs.cloudflare.com/ajax/libs/SlickNav/1.0.7/jquery.slicknav.min.js +6 -0
  40. data/hackference.co.uk/code-of-conduct.html +525 -0
  41. data/hackference.co.uk/code.jquery.com/jquery-1.12.3.min.js +5 -0
  42. data/hackference.co.uk/conference.html +776 -0
  43. data/hackference.co.uk/contact.html +365 -0
  44. data/hackference.co.uk/credits.html +399 -0
  45. data/hackference.co.uk/hackathon.html +585 -0
  46. data/hackference.co.uk/index.html +543 -0
  47. data/hackference.co.uk/js.tito.io/v1/index.html +1 -0
  48. data/hackference.co.uk/maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css +6 -0
  49. data/hackference.co.uk/maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css +4 -0
  50. data/hackference.co.uk/platform.twitter.com/js/tweet.00c2bb81d4f3f312a9a021c715a1c9dc.js +1 -0
  51. data/hackference.co.uk/platform.twitter.com/widgets.js +11 -0
  52. data/hackference.co.uk/speakers/andrew-faraday.html +363 -0
  53. data/hackference.co.uk/speakers/dan-jenkins.html +378 -0
  54. data/hackference.co.uk/speakers/daniel-knell.html +382 -0
  55. data/hackference.co.uk/speakers/etiene-dalcol.html +378 -0
  56. data/hackference.co.uk/speakers/felienne-hermans.html +386 -0
  57. data/hackference.co.uk/speakers/hugh-rawlinson.html +378 -0
  58. data/hackference.co.uk/speakers/jonathan-kingsley.html +386 -0
  59. data/hackference.co.uk/speakers/martin-splitt.html +402 -0
  60. data/hackference.co.uk/speakers/remy-sharp.html +380 -0
  61. data/hackference.co.uk/speakers/sam-wierema.html +378 -0
  62. data/hackference.co.uk/speakers/samathy-barratt.html +397 -0
  63. data/hackference.co.uk/speakers/soledad-penades.html +382 -0
  64. data/hackference.co.uk/speakers/terence-eden.html +388 -0
  65. data/hackference.co.uk/sponsors/algolia.html +350 -0
  66. data/hackference.co.uk/sponsors/contentful.html +350 -0
  67. data/hackference.co.uk/sponsors/improbable.html +351 -0
  68. data/hackference.co.uk/sponsors/index.html +655 -0
  69. data/hackference.co.uk/sponsors/microsoft.html +350 -0
  70. data/hackference.co.uk/sponsors/nexmo.html +350 -0
  71. data/hackference.co.uk/sponsors/packt.html +350 -0
  72. data/hackference.co.uk/sponsors/pebble.html +350 -0
  73. data/hackference.co.uk/sponsors/pusher.html +350 -0
  74. data/hackference.co.uk/sponsors/sponsor-us.html +658 -0
  75. data/hackference.co.uk/tickets.html +406 -0
  76. data/lib/elsmore/command.rb +2 -1
  77. data/lib/elsmore/document.rb +7 -3
  78. data/lib/elsmore/emitter.rb +22 -12
  79. data/lib/elsmore/resource.rb +4 -3
  80. data/lib/elsmore/rewriter.rb +13 -3
  81. data/lib/elsmore/scraper.rb +42 -26
  82. data/lib/elsmore/version.rb +1 -1
  83. data/lib/elsmore/writer.rb +4 -1
  84. metadata +74 -1
@@ -1,8 +1,8 @@
1
- require 'open-uri'
1
+ require 'httparty'
2
2
 
3
3
  module Elsmore
4
4
  class Resource
5
- attr_accessor :url, :filename
5
+ attr_accessor :url, :filename, :emitter
6
6
 
7
7
  def initialize url, parent
8
8
  self.url = Elsmore::Url.new(url, parent)
@@ -10,12 +10,13 @@ module Elsmore
10
10
 
11
11
  def write!
12
12
  writer = Elsmore::Writer.new(self)
13
+ writer.emitter = emitter
13
14
  writer.write
14
15
  self.filename = writer.canonical_filename
15
16
  end
16
17
 
17
18
  def data
18
- @data ||= open(url.canonical_url).read
19
+ @data ||= HTTParty.get(url.canonical_url)
19
20
  end
20
21
  end
21
22
  end
@@ -1,6 +1,6 @@
1
1
  module Elsmore
2
2
  class Rewriter
3
- attr_accessor :resource
3
+ attr_accessor :resource, :emitter
4
4
 
5
5
  def initialize resource
6
6
  self.resource = resource
@@ -20,7 +20,7 @@ module Elsmore
20
20
  end
21
21
 
22
22
  def write_css
23
- resource.doc.xpath('//link[rel=stylesheet]').each do |element|
23
+ resource.doc.xpath('//link[@rel="stylesheet"]').each do |element|
24
24
  write_element(element, 'href')
25
25
  end
26
26
  end
@@ -41,7 +41,10 @@ module Elsmore
41
41
  return unless element.attribute(key)
42
42
  url = element.attribute(key).value
43
43
  _resource = Elsmore::Resource.new(url, resource.url)
44
+ _resource.emitter = emitter
44
45
  _resource.write!
46
+
47
+ emitter.log("# Rewriting #{url} => #{_resource.filename}") if url != _resource.filename
45
48
  element.attribute(key).value = _resource.filename
46
49
  end
47
50
 
@@ -49,8 +52,15 @@ module Elsmore
49
52
  resource.doc.xpath('//a').each do |element|
50
53
  return unless element.attribute('href')
51
54
  href = element.attribute('href').value
55
+
52
56
  url = Elsmore::Url.new(href, resource.url)
53
- element.attribute('href').value = url.absolute_path_or_external_url
57
+ if url.valid
58
+ new_url = url.absolute_path_or_external_url
59
+ else
60
+ new_url = href
61
+ end
62
+ emitter.log("# Rewriting #{href} => #{new_url}") if href != new_url
63
+ element.attribute('href').value = new_url
54
64
  end
55
65
  end
56
66
  end
@@ -1,49 +1,65 @@
1
1
  module Elsmore
2
2
  class Scraper
3
- attr_accessor :emitter
3
+ attr_accessor :emitter, :unprocessed, :processed, :invalid, :unprocessed_urls, :valid_domains
4
4
 
5
5
  def initialize initial_url
6
6
  seed = Elsmore::Document.new(initial_url)
7
7
 
8
- @valid_domains = [seed.url.host]
9
- @unprocessed = [seed]
10
- @processed = []
11
- @invalid = []
12
- end
8
+ self.valid_domains = [seed.url.host]
13
9
 
14
- def run
15
- while !@unprocessed.empty?
16
- document = @unprocessed.shift
17
- next if @processed.include?(document.url.canonical_url)
18
- emitter.dot
10
+ self.unprocessed = [seed]
11
+ self.unprocessed_urls = [seed.url.canonical_url]
19
12
 
20
- enqueue(document.links)
21
- document.rewrite
22
- document.write!
13
+ self.processed = []
14
+ self.invalid = []
15
+ end
23
16
 
24
- @processed << document.url.canonical_url
17
+ def run
18
+ while !unprocessed.empty?
19
+ document = unprocessed.shift
20
+ process document
25
21
  end
26
22
 
27
23
  {
28
- processed: @processed,
29
- invalid: @invalid
24
+ processed: processed,
25
+ invalid: invalid
30
26
  }
31
27
  end
32
28
 
33
29
  private
34
30
 
31
+ def process document
32
+ emitter.log(document.url.canonical_url.colorize(:green))
33
+
34
+ document.emitter = emitter
35
+
36
+ enqueue(document.links)
37
+ document.rewrite
38
+ document.write!
39
+
40
+ processed << document.url.canonical_url
41
+ end
42
+
35
43
  def enqueue links
36
44
  links.each_with_index do |document, index|
37
- if !document.url.valid
38
- emitter.unsure
39
- @invalid << document.url.raw_url
40
- next
41
- end
42
-
43
- next if !@valid_domains.include?(document.url.host)
44
- next if @processed.include?(document.url.canonical_url)
45
- @unprocessed << document
45
+ next unless valid?(document)
46
+ next if !valid_domains.include?(document.url.host)
47
+ next if processed.include?(document.url.canonical_url)
48
+ next if unprocessed_urls.include?(document.url.canonical_url)
49
+
50
+ emitter.log("> Enqueued: #{document.url.canonical_url}")
51
+
52
+ unprocessed << document
53
+ unprocessed_urls << document.url.canonical_url
54
+ end
55
+ end
56
+
57
+ def valid?(document)
58
+ if !document.url.valid && !self.invalid.include?(document.url.raw_url)
59
+ emitter.warning("> Invalid URL: #{document.url.raw_url}")
60
+ invalid << document.url.raw_url
46
61
  end
62
+ document.url.valid
47
63
  end
48
64
  end
49
65
  end
@@ -1,3 +1,3 @@
1
1
  module Elsmore
2
- VERSION = '0.1.7'
2
+ VERSION = '0.2.0'
3
3
  end
@@ -1,6 +1,6 @@
1
1
  module Elsmore
2
2
  class Writer
3
- attr_accessor :resource
3
+ attr_accessor :resource, :emitter
4
4
 
5
5
  def initialize resource
6
6
  self.resource = resource
@@ -24,6 +24,9 @@ module Elsmore
24
24
  def write_file
25
25
  return if File.exist?(full_filename)
26
26
  ensure_directory full_filename
27
+
28
+ emitter.log("! Saving #{full_filename}")
29
+
27
30
  File.open(full_filename, 'w') do |file|
28
31
  file.write(resource.data)
29
32
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elsmore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cristiano Betta
@@ -111,6 +111,79 @@ files:
111
111
  - Rakefile
112
112
  - bin/elsmore
113
113
  - elsmore.gemspec
114
+ - hackference.co.uk/assets/css/site.css
115
+ - hackference.co.uk/assets/css/slicknav.css
116
+ - hackference.co.uk/assets/css/tito.css
117
+ - hackference.co.uk/assets/img/algolia.png
118
+ - hackference.co.uk/assets/img/andrew-faraday.jpg
119
+ - hackference.co.uk/assets/img/conf.png
120
+ - hackference.co.uk/assets/img/contentful.jpg
121
+ - hackference.co.uk/assets/img/cristiano-betta.jpg
122
+ - hackference.co.uk/assets/img/dan-jenkins.png
123
+ - hackference.co.uk/assets/img/daniel-knell.jpg
124
+ - hackference.co.uk/assets/img/etiene-dalcol.jpg
125
+ - hackference.co.uk/assets/img/felienne-hermans.jpg
126
+ - hackference.co.uk/assets/img/hack.png
127
+ - hackference.co.uk/assets/img/hugh-rawlinson.jpg
128
+ - hackference.co.uk/assets/img/improbable.png
129
+ - hackference.co.uk/assets/img/jessica-rose-cartoon.png
130
+ - hackference.co.uk/assets/img/jonathan-kingsley.jpeg
131
+ - hackference.co.uk/assets/img/logo-small.png
132
+ - hackference.co.uk/assets/img/martin-splitt.jpg
133
+ - hackference.co.uk/assets/img/microsoft.png
134
+ - hackference.co.uk/assets/img/mozilla.png
135
+ - hackference.co.uk/assets/img/nexmo.png
136
+ - hackference.co.uk/assets/img/packt.png
137
+ - hackference.co.uk/assets/img/pebble.png
138
+ - hackference.co.uk/assets/img/proactive.png
139
+ - hackference.co.uk/assets/img/pusher.png
140
+ - hackference.co.uk/assets/img/remy-sharp.jpg
141
+ - hackference.co.uk/assets/img/sam-wierema.jpg
142
+ - hackference.co.uk/assets/img/samathy-barratt.jpg
143
+ - hackference.co.uk/assets/img/soledad.png
144
+ - hackference.co.uk/assets/img/technical-team-solutions.png
145
+ - hackference.co.uk/assets/img/terence-eden.png
146
+ - hackference.co.uk/cdn-cgi/l/email-protection/index.html
147
+ - hackference.co.uk/cdn-cgi/scripts/cf.common.js
148
+ - hackference.co.uk/cdn-cgi/scripts/zepto.min.js
149
+ - hackference.co.uk/cdn-cgi/styles/cf.errors.css
150
+ - hackference.co.uk/cdnjs.cloudflare.com/ajax/libs/SlickNav/1.0.7/jquery.slicknav.min.js
151
+ - hackference.co.uk/code-of-conduct.html
152
+ - hackference.co.uk/code.jquery.com/jquery-1.12.3.min.js
153
+ - hackference.co.uk/conference.html
154
+ - hackference.co.uk/contact.html
155
+ - hackference.co.uk/credits.html
156
+ - hackference.co.uk/hackathon.html
157
+ - hackference.co.uk/index.html
158
+ - hackference.co.uk/js.tito.io/v1/index.html
159
+ - hackference.co.uk/maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css
160
+ - hackference.co.uk/maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css
161
+ - hackference.co.uk/platform.twitter.com/js/tweet.00c2bb81d4f3f312a9a021c715a1c9dc.js
162
+ - hackference.co.uk/platform.twitter.com/widgets.js
163
+ - hackference.co.uk/speakers/andrew-faraday.html
164
+ - hackference.co.uk/speakers/dan-jenkins.html
165
+ - hackference.co.uk/speakers/daniel-knell.html
166
+ - hackference.co.uk/speakers/etiene-dalcol.html
167
+ - hackference.co.uk/speakers/felienne-hermans.html
168
+ - hackference.co.uk/speakers/hugh-rawlinson.html
169
+ - hackference.co.uk/speakers/jonathan-kingsley.html
170
+ - hackference.co.uk/speakers/martin-splitt.html
171
+ - hackference.co.uk/speakers/remy-sharp.html
172
+ - hackference.co.uk/speakers/sam-wierema.html
173
+ - hackference.co.uk/speakers/samathy-barratt.html
174
+ - hackference.co.uk/speakers/soledad-penades.html
175
+ - hackference.co.uk/speakers/terence-eden.html
176
+ - hackference.co.uk/sponsors/algolia.html
177
+ - hackference.co.uk/sponsors/contentful.html
178
+ - hackference.co.uk/sponsors/improbable.html
179
+ - hackference.co.uk/sponsors/index.html
180
+ - hackference.co.uk/sponsors/microsoft.html
181
+ - hackference.co.uk/sponsors/nexmo.html
182
+ - hackference.co.uk/sponsors/packt.html
183
+ - hackference.co.uk/sponsors/pebble.html
184
+ - hackference.co.uk/sponsors/pusher.html
185
+ - hackference.co.uk/sponsors/sponsor-us.html
186
+ - hackference.co.uk/tickets.html
114
187
  - lib/elsmore.rb
115
188
  - lib/elsmore/command.rb
116
189
  - lib/elsmore/document.rb