elsmore 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/hackference.co.uk/assets/css/site.css +488 -0
  4. data/hackference.co.uk/assets/css/slicknav.css +178 -0
  5. data/hackference.co.uk/assets/css/tito.css +130 -0
  6. data/hackference.co.uk/assets/img/algolia.png +0 -0
  7. data/hackference.co.uk/assets/img/andrew-faraday.jpg +0 -0
  8. data/hackference.co.uk/assets/img/conf.png +0 -0
  9. data/hackference.co.uk/assets/img/contentful.jpg +0 -0
  10. data/hackference.co.uk/assets/img/cristiano-betta.jpg +0 -0
  11. data/hackference.co.uk/assets/img/dan-jenkins.png +0 -0
  12. data/hackference.co.uk/assets/img/daniel-knell.jpg +0 -0
  13. data/hackference.co.uk/assets/img/etiene-dalcol.jpg +0 -0
  14. data/hackference.co.uk/assets/img/felienne-hermans.jpg +0 -0
  15. data/hackference.co.uk/assets/img/hack.png +0 -0
  16. data/hackference.co.uk/assets/img/hugh-rawlinson.jpg +0 -0
  17. data/hackference.co.uk/assets/img/improbable.png +0 -0
  18. data/hackference.co.uk/assets/img/jessica-rose-cartoon.png +0 -0
  19. data/hackference.co.uk/assets/img/jonathan-kingsley.jpeg +0 -0
  20. data/hackference.co.uk/assets/img/logo-small.png +0 -0
  21. data/hackference.co.uk/assets/img/martin-splitt.jpg +0 -0
  22. data/hackference.co.uk/assets/img/microsoft.png +0 -0
  23. data/hackference.co.uk/assets/img/mozilla.png +0 -0
  24. data/hackference.co.uk/assets/img/nexmo.png +0 -0
  25. data/hackference.co.uk/assets/img/packt.png +0 -0
  26. data/hackference.co.uk/assets/img/pebble.png +0 -0
  27. data/hackference.co.uk/assets/img/proactive.png +0 -0
  28. data/hackference.co.uk/assets/img/pusher.png +0 -0
  29. data/hackference.co.uk/assets/img/remy-sharp.jpg +0 -0
  30. data/hackference.co.uk/assets/img/sam-wierema.jpg +0 -0
  31. data/hackference.co.uk/assets/img/samathy-barratt.jpg +0 -0
  32. data/hackference.co.uk/assets/img/soledad.png +0 -0
  33. data/hackference.co.uk/assets/img/technical-team-solutions.png +0 -0
  34. data/hackference.co.uk/assets/img/terence-eden.png +0 -0
  35. data/hackference.co.uk/cdn-cgi/l/email-protection/index.html +75 -0
  36. data/hackference.co.uk/cdn-cgi/scripts/cf.common.js +78 -0
  37. data/hackference.co.uk/cdn-cgi/scripts/zepto.min.js +2 -0
  38. data/hackference.co.uk/cdn-cgi/styles/cf.errors.css +1 -0
  39. data/hackference.co.uk/cdnjs.cloudflare.com/ajax/libs/SlickNav/1.0.7/jquery.slicknav.min.js +6 -0
  40. data/hackference.co.uk/code-of-conduct.html +525 -0
  41. data/hackference.co.uk/code.jquery.com/jquery-1.12.3.min.js +5 -0
  42. data/hackference.co.uk/conference.html +776 -0
  43. data/hackference.co.uk/contact.html +365 -0
  44. data/hackference.co.uk/credits.html +399 -0
  45. data/hackference.co.uk/hackathon.html +585 -0
  46. data/hackference.co.uk/index.html +543 -0
  47. data/hackference.co.uk/js.tito.io/v1/index.html +1 -0
  48. data/hackference.co.uk/maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css +6 -0
  49. data/hackference.co.uk/maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css +4 -0
  50. data/hackference.co.uk/platform.twitter.com/js/tweet.00c2bb81d4f3f312a9a021c715a1c9dc.js +1 -0
  51. data/hackference.co.uk/platform.twitter.com/widgets.js +11 -0
  52. data/hackference.co.uk/speakers/andrew-faraday.html +363 -0
  53. data/hackference.co.uk/speakers/dan-jenkins.html +378 -0
  54. data/hackference.co.uk/speakers/daniel-knell.html +382 -0
  55. data/hackference.co.uk/speakers/etiene-dalcol.html +378 -0
  56. data/hackference.co.uk/speakers/felienne-hermans.html +386 -0
  57. data/hackference.co.uk/speakers/hugh-rawlinson.html +378 -0
  58. data/hackference.co.uk/speakers/jonathan-kingsley.html +386 -0
  59. data/hackference.co.uk/speakers/martin-splitt.html +402 -0
  60. data/hackference.co.uk/speakers/remy-sharp.html +380 -0
  61. data/hackference.co.uk/speakers/sam-wierema.html +378 -0
  62. data/hackference.co.uk/speakers/samathy-barratt.html +397 -0
  63. data/hackference.co.uk/speakers/soledad-penades.html +382 -0
  64. data/hackference.co.uk/speakers/terence-eden.html +388 -0
  65. data/hackference.co.uk/sponsors/algolia.html +350 -0
  66. data/hackference.co.uk/sponsors/contentful.html +350 -0
  67. data/hackference.co.uk/sponsors/improbable.html +351 -0
  68. data/hackference.co.uk/sponsors/index.html +655 -0
  69. data/hackference.co.uk/sponsors/microsoft.html +350 -0
  70. data/hackference.co.uk/sponsors/nexmo.html +350 -0
  71. data/hackference.co.uk/sponsors/packt.html +350 -0
  72. data/hackference.co.uk/sponsors/pebble.html +350 -0
  73. data/hackference.co.uk/sponsors/pusher.html +350 -0
  74. data/hackference.co.uk/sponsors/sponsor-us.html +658 -0
  75. data/hackference.co.uk/tickets.html +406 -0
  76. data/lib/elsmore/command.rb +2 -1
  77. data/lib/elsmore/document.rb +7 -3
  78. data/lib/elsmore/emitter.rb +22 -12
  79. data/lib/elsmore/resource.rb +4 -3
  80. data/lib/elsmore/rewriter.rb +13 -3
  81. data/lib/elsmore/scraper.rb +42 -26
  82. data/lib/elsmore/version.rb +1 -1
  83. data/lib/elsmore/writer.rb +4 -1
  84. metadata +74 -1
@@ -1,8 +1,8 @@
1
- require 'open-uri'
1
+ require 'httparty'
2
2
 
3
3
  module Elsmore
4
4
  class Resource
5
- attr_accessor :url, :filename
5
+ attr_accessor :url, :filename, :emitter
6
6
 
7
7
  def initialize url, parent
8
8
  self.url = Elsmore::Url.new(url, parent)
@@ -10,12 +10,13 @@ module Elsmore
10
10
 
11
11
  def write!
12
12
  writer = Elsmore::Writer.new(self)
13
+ writer.emitter = emitter
13
14
  writer.write
14
15
  self.filename = writer.canonical_filename
15
16
  end
16
17
 
17
18
  def data
18
- @data ||= open(url.canonical_url).read
19
+ @data ||= HTTParty.get(url.canonical_url)
19
20
  end
20
21
  end
21
22
  end
@@ -1,6 +1,6 @@
1
1
  module Elsmore
2
2
  class Rewriter
3
- attr_accessor :resource
3
+ attr_accessor :resource, :emitter
4
4
 
5
5
  def initialize resource
6
6
  self.resource = resource
@@ -20,7 +20,7 @@ module Elsmore
20
20
  end
21
21
 
22
22
  def write_css
23
- resource.doc.xpath('//link[rel=stylesheet]').each do |element|
23
+ resource.doc.xpath('//link[@rel="stylesheet"]').each do |element|
24
24
  write_element(element, 'href')
25
25
  end
26
26
  end
@@ -41,7 +41,10 @@ module Elsmore
41
41
  return unless element.attribute(key)
42
42
  url = element.attribute(key).value
43
43
  _resource = Elsmore::Resource.new(url, resource.url)
44
+ _resource.emitter = emitter
44
45
  _resource.write!
46
+
47
+ emitter.log("# Rewriting #{url} => #{_resource.filename}") if url != _resource.filename
45
48
  element.attribute(key).value = _resource.filename
46
49
  end
47
50
 
@@ -49,8 +52,15 @@ module Elsmore
49
52
  resource.doc.xpath('//a').each do |element|
50
53
  return unless element.attribute('href')
51
54
  href = element.attribute('href').value
55
+
52
56
  url = Elsmore::Url.new(href, resource.url)
53
- element.attribute('href').value = url.absolute_path_or_external_url
57
+ if url.valid
58
+ new_url = url.absolute_path_or_external_url
59
+ else
60
+ new_url = href
61
+ end
62
+ emitter.log("# Rewriting #{href} => #{new_url}") if href != new_url
63
+ element.attribute('href').value = new_url
54
64
  end
55
65
  end
56
66
  end
@@ -1,49 +1,65 @@
1
1
  module Elsmore
2
2
  class Scraper
3
- attr_accessor :emitter
3
+ attr_accessor :emitter, :unprocessed, :processed, :invalid, :unprocessed_urls, :valid_domains
4
4
 
5
5
  def initialize initial_url
6
6
  seed = Elsmore::Document.new(initial_url)
7
7
 
8
- @valid_domains = [seed.url.host]
9
- @unprocessed = [seed]
10
- @processed = []
11
- @invalid = []
12
- end
8
+ self.valid_domains = [seed.url.host]
13
9
 
14
- def run
15
- while !@unprocessed.empty?
16
- document = @unprocessed.shift
17
- next if @processed.include?(document.url.canonical_url)
18
- emitter.dot
10
+ self.unprocessed = [seed]
11
+ self.unprocessed_urls = [seed.url.canonical_url]
19
12
 
20
- enqueue(document.links)
21
- document.rewrite
22
- document.write!
13
+ self.processed = []
14
+ self.invalid = []
15
+ end
23
16
 
24
- @processed << document.url.canonical_url
17
+ def run
18
+ while !unprocessed.empty?
19
+ document = unprocessed.shift
20
+ process document
25
21
  end
26
22
 
27
23
  {
28
- processed: @processed,
29
- invalid: @invalid
24
+ processed: processed,
25
+ invalid: invalid
30
26
  }
31
27
  end
32
28
 
33
29
  private
34
30
 
31
+ def process document
32
+ emitter.log(document.url.canonical_url.colorize(:green))
33
+
34
+ document.emitter = emitter
35
+
36
+ enqueue(document.links)
37
+ document.rewrite
38
+ document.write!
39
+
40
+ processed << document.url.canonical_url
41
+ end
42
+
35
43
  def enqueue links
36
44
  links.each_with_index do |document, index|
37
- if !document.url.valid
38
- emitter.unsure
39
- @invalid << document.url.raw_url
40
- next
41
- end
42
-
43
- next if !@valid_domains.include?(document.url.host)
44
- next if @processed.include?(document.url.canonical_url)
45
- @unprocessed << document
45
+ next unless valid?(document)
46
+ next if !valid_domains.include?(document.url.host)
47
+ next if processed.include?(document.url.canonical_url)
48
+ next if unprocessed_urls.include?(document.url.canonical_url)
49
+
50
+ emitter.log("> Enqueued: #{document.url.canonical_url}")
51
+
52
+ unprocessed << document
53
+ unprocessed_urls << document.url.canonical_url
54
+ end
55
+ end
56
+
57
+ def valid?(document)
58
+ if !document.url.valid && !self.invalid.include?(document.url.raw_url)
59
+ emitter.warning("> Invalid URL: #{document.url.raw_url}")
60
+ invalid << document.url.raw_url
46
61
  end
62
+ document.url.valid
47
63
  end
48
64
  end
49
65
  end
@@ -1,3 +1,3 @@
1
1
  module Elsmore
2
- VERSION = '0.1.7'
2
+ VERSION = '0.2.0'
3
3
  end
@@ -1,6 +1,6 @@
1
1
  module Elsmore
2
2
  class Writer
3
- attr_accessor :resource
3
+ attr_accessor :resource, :emitter
4
4
 
5
5
  def initialize resource
6
6
  self.resource = resource
@@ -24,6 +24,9 @@ module Elsmore
24
24
  def write_file
25
25
  return if File.exist?(full_filename)
26
26
  ensure_directory full_filename
27
+
28
+ emitter.log("! Saving #{full_filename}")
29
+
27
30
  File.open(full_filename, 'w') do |file|
28
31
  file.write(resource.data)
29
32
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elsmore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cristiano Betta
@@ -111,6 +111,79 @@ files:
111
111
  - Rakefile
112
112
  - bin/elsmore
113
113
  - elsmore.gemspec
114
+ - hackference.co.uk/assets/css/site.css
115
+ - hackference.co.uk/assets/css/slicknav.css
116
+ - hackference.co.uk/assets/css/tito.css
117
+ - hackference.co.uk/assets/img/algolia.png
118
+ - hackference.co.uk/assets/img/andrew-faraday.jpg
119
+ - hackference.co.uk/assets/img/conf.png
120
+ - hackference.co.uk/assets/img/contentful.jpg
121
+ - hackference.co.uk/assets/img/cristiano-betta.jpg
122
+ - hackference.co.uk/assets/img/dan-jenkins.png
123
+ - hackference.co.uk/assets/img/daniel-knell.jpg
124
+ - hackference.co.uk/assets/img/etiene-dalcol.jpg
125
+ - hackference.co.uk/assets/img/felienne-hermans.jpg
126
+ - hackference.co.uk/assets/img/hack.png
127
+ - hackference.co.uk/assets/img/hugh-rawlinson.jpg
128
+ - hackference.co.uk/assets/img/improbable.png
129
+ - hackference.co.uk/assets/img/jessica-rose-cartoon.png
130
+ - hackference.co.uk/assets/img/jonathan-kingsley.jpeg
131
+ - hackference.co.uk/assets/img/logo-small.png
132
+ - hackference.co.uk/assets/img/martin-splitt.jpg
133
+ - hackference.co.uk/assets/img/microsoft.png
134
+ - hackference.co.uk/assets/img/mozilla.png
135
+ - hackference.co.uk/assets/img/nexmo.png
136
+ - hackference.co.uk/assets/img/packt.png
137
+ - hackference.co.uk/assets/img/pebble.png
138
+ - hackference.co.uk/assets/img/proactive.png
139
+ - hackference.co.uk/assets/img/pusher.png
140
+ - hackference.co.uk/assets/img/remy-sharp.jpg
141
+ - hackference.co.uk/assets/img/sam-wierema.jpg
142
+ - hackference.co.uk/assets/img/samathy-barratt.jpg
143
+ - hackference.co.uk/assets/img/soledad.png
144
+ - hackference.co.uk/assets/img/technical-team-solutions.png
145
+ - hackference.co.uk/assets/img/terence-eden.png
146
+ - hackference.co.uk/cdn-cgi/l/email-protection/index.html
147
+ - hackference.co.uk/cdn-cgi/scripts/cf.common.js
148
+ - hackference.co.uk/cdn-cgi/scripts/zepto.min.js
149
+ - hackference.co.uk/cdn-cgi/styles/cf.errors.css
150
+ - hackference.co.uk/cdnjs.cloudflare.com/ajax/libs/SlickNav/1.0.7/jquery.slicknav.min.js
151
+ - hackference.co.uk/code-of-conduct.html
152
+ - hackference.co.uk/code.jquery.com/jquery-1.12.3.min.js
153
+ - hackference.co.uk/conference.html
154
+ - hackference.co.uk/contact.html
155
+ - hackference.co.uk/credits.html
156
+ - hackference.co.uk/hackathon.html
157
+ - hackference.co.uk/index.html
158
+ - hackference.co.uk/js.tito.io/v1/index.html
159
+ - hackference.co.uk/maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css
160
+ - hackference.co.uk/maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css
161
+ - hackference.co.uk/platform.twitter.com/js/tweet.00c2bb81d4f3f312a9a021c715a1c9dc.js
162
+ - hackference.co.uk/platform.twitter.com/widgets.js
163
+ - hackference.co.uk/speakers/andrew-faraday.html
164
+ - hackference.co.uk/speakers/dan-jenkins.html
165
+ - hackference.co.uk/speakers/daniel-knell.html
166
+ - hackference.co.uk/speakers/etiene-dalcol.html
167
+ - hackference.co.uk/speakers/felienne-hermans.html
168
+ - hackference.co.uk/speakers/hugh-rawlinson.html
169
+ - hackference.co.uk/speakers/jonathan-kingsley.html
170
+ - hackference.co.uk/speakers/martin-splitt.html
171
+ - hackference.co.uk/speakers/remy-sharp.html
172
+ - hackference.co.uk/speakers/sam-wierema.html
173
+ - hackference.co.uk/speakers/samathy-barratt.html
174
+ - hackference.co.uk/speakers/soledad-penades.html
175
+ - hackference.co.uk/speakers/terence-eden.html
176
+ - hackference.co.uk/sponsors/algolia.html
177
+ - hackference.co.uk/sponsors/contentful.html
178
+ - hackference.co.uk/sponsors/improbable.html
179
+ - hackference.co.uk/sponsors/index.html
180
+ - hackference.co.uk/sponsors/microsoft.html
181
+ - hackference.co.uk/sponsors/nexmo.html
182
+ - hackference.co.uk/sponsors/packt.html
183
+ - hackference.co.uk/sponsors/pebble.html
184
+ - hackference.co.uk/sponsors/pusher.html
185
+ - hackference.co.uk/sponsors/sponsor-us.html
186
+ - hackference.co.uk/tickets.html
114
187
  - lib/elsmore.rb
115
188
  - lib/elsmore/command.rb
116
189
  - lib/elsmore/document.rb