link_scrapper 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/link_scrapper.rb +14 -4
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 213970214a5f8e494141c4172373f5ae21190714
4
- data.tar.gz: 0ba291b9a887f6fc244ebd25939b342f77c61b46
3
+ metadata.gz: b6cb0bc3b1eacf9d8fdff2f6ee057bc411c6da03
4
+ data.tar.gz: 53c1f9da27e968db305e5fbaae3d3e4480905e90
5
5
  SHA512:
6
- metadata.gz: 579ab33563beb450bf54e78c88b2b19b4d034a87acb722224b91ddb15e075a527ef05fd2a6dfd7b5a69dac464f616bba161a4dedaaff14285dadfbb8a04e0717
7
- data.tar.gz: a7180512da166efd1f9322199e69d454128b159610306289a6575220852a678991fee7922b29fe9fb7ce26833e1ce7178121475d786cd3fdd0a442dec2a87469
6
+ metadata.gz: 5aa5eac8873d6275b298beaf2663441a530f57389a42794250e137084d1ad2d98705f5ab83db11c0fc71c66e14899e27f066c39985ade3d50486a39a290c5f66
7
+ data.tar.gz: 821fe2bfba46ed516be52a7803e15a78a2e00e4a656d07e787b1830d4d2b0089b85901a51945505fd719649094a5d06a12aeefd53fb9224b8570fec8931d7055
data/lib/link_scrapper.rb CHANGED
@@ -82,7 +82,7 @@ class LinkScrapper
82
82
  if !@external_links[@search_uri.to_sym]
83
83
  begin
84
84
  t1 = Time.now
85
- response = Net::HTTP.get_response(URI.parse(URI.encode(@search_uri)))
85
+ response = Net::HTTP.get_response(URI.parse(@search_uri))
86
86
  t2 = Time.now
87
87
  delta = t2 - t1
88
88
  code = response.code
@@ -170,7 +170,7 @@ class LinkScrapper
170
170
  # gather page request response
171
171
  begin
172
172
  t1 = Time.now
173
- response = Net::HTTP.get_response(URI.parse(URI.encode(@search_uri.strip)))
173
+ response = Net::HTTP.get_response(URI.parse(@search_uri.strip))
174
174
  t2 = Time.now
175
175
  delta = t2 - t1
176
176
 
@@ -184,7 +184,8 @@ class LinkScrapper
184
184
  links_array = body.scan(/<a[^>]+href\s*=\s*["']([^"']+)["'][^>]*>(.*?)<\/a>/mi)
185
185
 
186
186
  # update anchors and indirect links to use direct links
187
- links_array.each { |val|
187
+ links_array.each_with_index { |val, index|
188
+ skip = 0
188
189
  if (val[0][0,2] == "//" || val[0][0] == "/" || val[0][0,3] == "../") && val[0] !~ /^htt(p|ps):/
189
190
  if val[0][0,3] == "../"
190
191
  val[0][0,3] = ""
@@ -197,7 +198,16 @@ class LinkScrapper
197
198
  end
198
199
  val[0] = "#{@search_domain}#{val[0]}"
199
200
  end
200
- @link_parents[val[0].chomp.to_sym] = @search_uri.strip
201
+ @links.each { |lnk|
202
+ if val[0] == lnk[0]
203
+ skip = 1
204
+ end
205
+ }
206
+ if skip == 0
207
+ @link_parents[val[0].chomp.to_sym] = @search_uri.strip
208
+ else
209
+ val.delete_at(index)
210
+ end
201
211
  }
202
212
 
203
213
  # combine found links with links array
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: link_scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert McDowell