href_scraper 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/href_scraper +2 -0
  3. data/lib/href_scraper.rb +25 -0
  4. metadata +81 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 8cceaad0196924b1a15e206e44d0dbe317e475ae3883e33cf1365a16d2aa7a6f
4
+ data.tar.gz: 0eb0a090760855586ec91029fe68e22cbfd0b292e9df233b1bf79770870fde7e
5
+ SHA512:
6
+ metadata.gz: fd34dc317c92e5b4fdbbd550798f77ea2b733e54d00bf08ce1fe6480124601d343e1d879a3c67dcecb12fa95574aec4b50b8aa62608ce4740c02405f81060082
7
+ data.tar.gz: c0ccddf4643773ccec9a67f9b1e65fcbcd47233e1d8765665591aae246e38a88ffa8d93f58fe0d3efb444e9bdef5fa7342da21091616e43cd33f291808fac68e
data/bin/href_scraper ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env ruby
2
+ require 'href_scraper'
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'open-uri'
5
+ require 'nokogiri'
6
+
7
+ module HrefScraper
8
+ def self.fetch_html(url)
9
+ html = open(url)
10
+ Nokogiri::HTML(html)
11
+ end
12
+
13
+ OptionParser.new do |opt|
14
+ opt.on('-u', '--url URL', 'URL to scrape links from') do |url|
15
+ url !~ /^http/ && abort('Invalid URL passed')
16
+ anchors = fetch_html(url).xpath('//a')
17
+ hrefs = anchors.map { |tag| tag['href'] }
18
+ hrefs.each do |link|
19
+ if link =~ /^http/
20
+ puts link
21
+ end
22
+ end
23
+ end
24
+ end.parse!
25
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: href_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Caleb Lemoine
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-08-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.16'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.16'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 1.8.4
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: 1.8.4
37
+ type: :runtime
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: 1.8.4
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.8.4
47
+ description: Scrape hrefs from a single webpage
48
+ email:
49
+ - caleblemoine@gmail.com
50
+ executables:
51
+ - href_scraper
52
+ extensions: []
53
+ extra_rdoc_files: []
54
+ files:
55
+ - bin/href_scraper
56
+ - lib/href_scraper.rb
57
+ homepage: https://github.com/circa10a/href-scraper.git
58
+ licenses:
59
+ - MIT
60
+ metadata: {}
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ requirements: []
76
+ rubyforge_project:
77
+ rubygems_version: 2.7.6
78
+ signing_key:
79
+ specification_version: 4
80
+ summary: CLI to scrape hrefs
81
+ test_files: []