link_scrapper 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/link_scrapper.rb +16 -5
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 98b6d54cc75862b1e063a04e355a6f638deff14b
4
- data.tar.gz: fbe3a8ecfbf27d81458862ec96543700e994a049
3
+ metadata.gz: 213970214a5f8e494141c4172373f5ae21190714
4
+ data.tar.gz: 0ba291b9a887f6fc244ebd25939b342f77c61b46
5
5
  SHA512:
6
- metadata.gz: 34296971fcebf4d437aba6703c36ca93a8599ee51d5ee73032e2e58217e8c25029feabb9c83c51f298fdbe5a27155196faaec2bf860e5751a228da843dcd499b
7
- data.tar.gz: f3d82c1973ec70e37815ad88f35523b5edab699d0c473e7c8ce61e9387e83b1ffbf3ec5b456eb05001cc7e653282b88954790bc4136c79e24b818c8cabf05b28
6
+ metadata.gz: 579ab33563beb450bf54e78c88b2b19b4d034a87acb722224b91ddb15e075a527ef05fd2a6dfd7b5a69dac464f616bba161a4dedaaff14285dadfbb8a04e0717
7
+ data.tar.gz: a7180512da166efd1f9322199e69d454128b159610306289a6575220852a678991fee7922b29fe9fb7ce26833e1ce7178121475d786cd3fdd0a442dec2a87469
data/lib/link_scrapper.rb CHANGED
@@ -19,6 +19,7 @@ class LinkScrapper
19
19
  @search_index = 0
20
20
  @search_iteration = 0
21
21
  @links = Array.new
22
+ @link_parents = Hash.new
22
23
  @checked_links = Hash.new
23
24
  @error_links = Hash.new
24
25
  @external_links = Hash.new
@@ -184,9 +185,19 @@ class LinkScrapper
184
185
 
185
186
  # update anchors and indirect links to use direct links
186
187
  links_array.each { |val|
187
- if val[0] != '/' || val !~ /^htt(p|ps):/ || val[0,2] != '//'
188
- val = "#{@search_uri}#{val}"
188
+ if (val[0][0,2] == "//" || val[0][0] == "/" || val[0][0,3] == "../") && val[0] !~ /^htt(p|ps):/
189
+ if val[0][0,3] == "../"
190
+ val[0][0,3] = ""
191
+ end
192
+ if val[0][0,2] == "//"
193
+ val[0][0,2] = ""
194
+ end
195
+ if val[0][0] == "/"
196
+ val[0][0] = ""
197
+ end
198
+ val[0] = "#{@search_domain}#{val[0]}"
189
199
  end
200
+ @link_parents[val[0].chomp.to_sym] = @search_uri.strip
190
201
  }
191
202
 
192
203
  # combine found links with links array
@@ -200,7 +211,7 @@ class LinkScrapper
200
211
  end
201
212
 
202
213
  # store results in checked hash
203
- @checked_links[@search_uri.to_sym] = {res: code, time: delta}
214
+ @checked_links[@search_uri.to_sym] = {res: code, time: delta, parent: @link_parents[@search_uri.to_sym]}
204
215
 
205
216
  end
206
217
 
@@ -215,13 +226,13 @@ class LinkScrapper
215
226
  # save search results
216
227
  CSV.open('results.csv', 'wb') {|csv|
217
228
  @checked_links.each {|link|
218
- csv << [link[0], link[1][:res], link[1][:time]]
229
+ csv << [link[0], link[1][:res], link[1][:time], link[1][:parent]]
219
230
  }
220
231
  }
221
232
  # save list of external links
222
233
  CSV.open('external-links.csv', 'wb') {|csv|
223
234
  @external_links.each do |link|
224
- csv << [link[0], link[1][:res], link[1][:time]]
235
+ csv << [link[0], link[1][:res], link[1][:time], link[1][:parent]]
225
236
  end
226
237
  }
227
238
  # save list of invalid links
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: link_scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert McDowell
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-02 00:00:00.000000000 Z
11
+ date: 2017-02-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A simple gem for scrapping links within an assigned website. Results
14
14
  for domain, external links, and invalid URLs can be saved as CSVs or returned as
@@ -19,7 +19,7 @@ extensions: []
19
19
  extra_rdoc_files: []
20
20
  files:
21
21
  - lib/link_scrapper.rb
22
- homepage: http://virginiabeachwebdevelopment.com
22
+ homepage: https://github.com/Studio-Center/Ruby-Link-Extractor
23
23
  licenses:
24
24
  - MIT
25
25
  metadata: {}