link_scrapper 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/link_scrapper.rb +14 -4
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 213970214a5f8e494141c4172373f5ae21190714
4
- data.tar.gz: 0ba291b9a887f6fc244ebd25939b342f77c61b46
3
+ metadata.gz: b6cb0bc3b1eacf9d8fdff2f6ee057bc411c6da03
4
+ data.tar.gz: 53c1f9da27e968db305e5fbaae3d3e4480905e90
5
5
  SHA512:
6
- metadata.gz: 579ab33563beb450bf54e78c88b2b19b4d034a87acb722224b91ddb15e075a527ef05fd2a6dfd7b5a69dac464f616bba161a4dedaaff14285dadfbb8a04e0717
7
- data.tar.gz: a7180512da166efd1f9322199e69d454128b159610306289a6575220852a678991fee7922b29fe9fb7ce26833e1ce7178121475d786cd3fdd0a442dec2a87469
6
+ metadata.gz: 5aa5eac8873d6275b298beaf2663441a530f57389a42794250e137084d1ad2d98705f5ab83db11c0fc71c66e14899e27f066c39985ade3d50486a39a290c5f66
7
+ data.tar.gz: 821fe2bfba46ed516be52a7803e15a78a2e00e4a656d07e787b1830d4d2b0089b85901a51945505fd719649094a5d06a12aeefd53fb9224b8570fec8931d7055
data/lib/link_scrapper.rb CHANGED
@@ -82,7 +82,7 @@ class LinkScrapper
82
82
  if !@external_links[@search_uri.to_sym]
83
83
  begin
84
84
  t1 = Time.now
85
- response = Net::HTTP.get_response(URI.parse(URI.encode(@search_uri)))
85
+ response = Net::HTTP.get_response(URI.parse(@search_uri))
86
86
  t2 = Time.now
87
87
  delta = t2 - t1
88
88
  code = response.code
@@ -170,7 +170,7 @@ class LinkScrapper
170
170
  # gather page request response
171
171
  begin
172
172
  t1 = Time.now
173
- response = Net::HTTP.get_response(URI.parse(URI.encode(@search_uri.strip)))
173
+ response = Net::HTTP.get_response(URI.parse(@search_uri.strip))
174
174
  t2 = Time.now
175
175
  delta = t2 - t1
176
176
 
@@ -184,7 +184,8 @@ class LinkScrapper
184
184
  links_array = body.scan(/<a[^>]+href\s*=\s*["']([^"']+)["'][^>]*>(.*?)<\/a>/mi)
185
185
 
186
186
  # update anchors and indirect links to use direct links
187
- links_array.each { |val|
187
+ links_array.each_with_index { |val, index|
188
+ skip = 0
188
189
  if (val[0][0,2] == "//" || val[0][0] == "/" || val[0][0,3] == "../") && val[0] !~ /^htt(p|ps):/
189
190
  if val[0][0,3] == "../"
190
191
  val[0][0,3] = ""
@@ -197,7 +198,16 @@ class LinkScrapper
197
198
  end
198
199
  val[0] = "#{@search_domain}#{val[0]}"
199
200
  end
200
- @link_parents[val[0].chomp.to_sym] = @search_uri.strip
201
+ @links.each { |lnk|
202
+ if val[0] == lnk[0]
203
+ skip = 1
204
+ end
205
+ }
206
+ if skip == 0
207
+ @link_parents[val[0].chomp.to_sym] = @search_uri.strip
208
+ else
209
+ val.delete_at(index)
210
+ end
201
211
  }
202
212
 
203
213
  # combine found links with links array
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: link_scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert McDowell