link_scrapper 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/link_scrapper.rb +16 -5
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 98b6d54cc75862b1e063a04e355a6f638deff14b
4
- data.tar.gz: fbe3a8ecfbf27d81458862ec96543700e994a049
3
+ metadata.gz: 213970214a5f8e494141c4172373f5ae21190714
4
+ data.tar.gz: 0ba291b9a887f6fc244ebd25939b342f77c61b46
5
5
  SHA512:
6
- metadata.gz: 34296971fcebf4d437aba6703c36ca93a8599ee51d5ee73032e2e58217e8c25029feabb9c83c51f298fdbe5a27155196faaec2bf860e5751a228da843dcd499b
7
- data.tar.gz: f3d82c1973ec70e37815ad88f35523b5edab699d0c473e7c8ce61e9387e83b1ffbf3ec5b456eb05001cc7e653282b88954790bc4136c79e24b818c8cabf05b28
6
+ metadata.gz: 579ab33563beb450bf54e78c88b2b19b4d034a87acb722224b91ddb15e075a527ef05fd2a6dfd7b5a69dac464f616bba161a4dedaaff14285dadfbb8a04e0717
7
+ data.tar.gz: a7180512da166efd1f9322199e69d454128b159610306289a6575220852a678991fee7922b29fe9fb7ce26833e1ce7178121475d786cd3fdd0a442dec2a87469
data/lib/link_scrapper.rb CHANGED
@@ -19,6 +19,7 @@ class LinkScrapper
19
19
  @search_index = 0
20
20
  @search_iteration = 0
21
21
  @links = Array.new
22
+ @link_parents = Hash.new
22
23
  @checked_links = Hash.new
23
24
  @error_links = Hash.new
24
25
  @external_links = Hash.new
@@ -184,9 +185,19 @@ class LinkScrapper
184
185
 
185
186
  # update anchors and indirect links to use direct links
186
187
  links_array.each { |val|
187
- if val[0] != '/' || val !~ /^htt(p|ps):/ || val[0,2] != '//'
188
- val = "#{@search_uri}#{val}"
188
+ if (val[0][0,2] == "//" || val[0][0] == "/" || val[0][0,3] == "../") && val[0] !~ /^htt(p|ps):/
189
+ if val[0][0,3] == "../"
190
+ val[0][0,3] = ""
191
+ end
192
+ if val[0][0,2] == "//"
193
+ val[0][0,2] = ""
194
+ end
195
+ if val[0][0] == "/"
196
+ val[0][0] = ""
197
+ end
198
+ val[0] = "#{@search_domain}#{val[0]}"
189
199
  end
200
+ @link_parents[val[0].chomp.to_sym] = @search_uri.strip
190
201
  }
191
202
 
192
203
  # combine found links with links array
@@ -200,7 +211,7 @@ class LinkScrapper
200
211
  end
201
212
 
202
213
  # store results in checked hash
203
- @checked_links[@search_uri.to_sym] = {res: code, time: delta}
214
+ @checked_links[@search_uri.to_sym] = {res: code, time: delta, parent: @link_parents[@search_uri.to_sym]}
204
215
 
205
216
  end
206
217
 
@@ -215,13 +226,13 @@ class LinkScrapper
215
226
  # save search results
216
227
  CSV.open('results.csv', 'wb') {|csv|
217
228
  @checked_links.each {|link|
218
- csv << [link[0], link[1][:res], link[1][:time]]
229
+ csv << [link[0], link[1][:res], link[1][:time], link[1][:parent]]
219
230
  }
220
231
  }
221
232
  # save list of external links
222
233
  CSV.open('external-links.csv', 'wb') {|csv|
223
234
  @external_links.each do |link|
224
- csv << [link[0], link[1][:res], link[1][:time]]
235
+ csv << [link[0], link[1][:res], link[1][:time], link[1][:parent]]
225
236
  end
226
237
  }
227
238
  # save list of invalid links
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: link_scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert McDowell
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-02 00:00:00.000000000 Z
11
+ date: 2017-02-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A simple gem for scrapping links within an assigned website. Results
14
14
  for domain, external links, and invalid URLs can be saved as CSVs or returned as
@@ -19,7 +19,7 @@ extensions: []
19
19
  extra_rdoc_files: []
20
20
  files:
21
21
  - lib/link_scrapper.rb
22
- homepage: http://virginiabeachwebdevelopment.com
22
+ homepage: https://github.com/Studio-Center/Ruby-Link-Extractor
23
23
  licenses:
24
24
  - MIT
25
25
  metadata: {}