href_scraper 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/href_scraper +2 -0
  3. data/lib/href_scraper.rb +25 -0
  4. metadata +81 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 8cceaad0196924b1a15e206e44d0dbe317e475ae3883e33cf1365a16d2aa7a6f
4
+ data.tar.gz: 0eb0a090760855586ec91029fe68e22cbfd0b292e9df233b1bf79770870fde7e
5
+ SHA512:
6
+ metadata.gz: fd34dc317c92e5b4fdbbd550798f77ea2b733e54d00bf08ce1fe6480124601d343e1d879a3c67dcecb12fa95574aec4b50b8aa62608ce4740c02405f81060082
7
+ data.tar.gz: c0ccddf4643773ccec9a67f9b1e65fcbcd47233e1d8765665591aae246e38a88ffa8d93f58fe0d3efb444e9bdef5fa7342da21091616e43cd33f291808fac68e
data/bin/href_scraper ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env ruby
2
+ require 'href_scraper'
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'open-uri'
5
+ require 'nokogiri'
6
+
7
+ module HrefScraper
8
+ def self.fetch_html(url)
9
+ html = open(url)
10
+ Nokogiri::HTML(html)
11
+ end
12
+
13
+ OptionParser.new do |opt|
14
+ opt.on('-u', '--url URL', 'URL to scrape links from') do |url|
15
+ url !~ /^http/ && abort('Invalid URL passed')
16
+ anchors = fetch_html(url).xpath('//a')
17
+ hrefs = anchors.map { |tag| tag['href'] }
18
+ hrefs.each do |link|
19
+ if link =~ /^http/
20
+ puts link
21
+ end
22
+ end
23
+ end
24
+ end.parse!
25
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: href_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Caleb Lemoine
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-08-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.16'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.16'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 1.8.4
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: 1.8.4
37
+ type: :runtime
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: 1.8.4
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.8.4
47
+ description: Scrape hrefs from a single webpage
48
+ email:
49
+ - caleblemoine@gmail.com
50
+ executables:
51
+ - href_scraper
52
+ extensions: []
53
+ extra_rdoc_files: []
54
+ files:
55
+ - bin/href_scraper
56
+ - lib/href_scraper.rb
57
+ homepage: https://github.com/circa10a/href-scraper.git
58
+ licenses:
59
+ - MIT
60
+ metadata: {}
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ requirements: []
76
+ rubyforge_project:
77
+ rubygems_version: 2.7.6
78
+ signing_key:
79
+ specification_version: 4
80
+ summary: CLI to scrape hrefs
81
+ test_files: []