scrapinator 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NmY1NDcwNGU3MzRlYzIxNGZhMjg2NDEzNjFlYmQzNTM4NmRkOWYyOA==
5
+ data.tar.gz: !binary |-
6
+ MWYxYTIwY2FlMjM0NzA1OTk5MTc3YTQ2Yzc3ZWNiNTBhZjgzMjkzNQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YWQxZDM3YTQ4OWNmMzc4OTRkMWY0M2UyOTQ0ODRiNzhlYzgyMDg0MDVjMmQw
10
+ MjljNTkxNmFiOTVjODQ5NjY3ODIwYmJhMDIyNzhjMjQ4NmZmOWRjYWVmZjI4
11
+ OTE3YTUxZGQxYTEwMzJjMDg4Mzc1NmM2Zjk4NWNkZWNjZDIxY2E=
12
+ data.tar.gz: !binary |-
13
+ ZGFkMGVmMzRiZjc0OGRkNDUwNmRlMDFmYTU2NzQ5MzM4MWY2MGFmMjE5OWFl
14
+ YmIzZmRhZDcyMWI2Yjc5ODEzZGYzOWY1MjgyMjNlZmNhODFiYjMzYTJlOGQ0
15
+ MzA0YjYyY2Y0ZDFhZGNlZTcwYzUyNjJmNWMxZTNmNDU3MDEwMzk=
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in scrapinator.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Karl Coelho
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,9 @@
1
+ # Scrapinator
2
+
3
+ A scraper which will output every and every link it finds on a page, to the console.
4
+
5
+ ## Usage
6
+
7
+ ```
8
+ scrapinator [URL]
9
+ ```
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'nokogiri'
4
+ require 'open-uri'
5
+ require 'uri'
6
+
7
+ def find_links(link)
8
+ page = Nokogiri::HTML(open(link))
9
+ arr = []
10
+ page.css('a').each do |a|
11
+ a = URI.join(link, a['href']).to_s
12
+ arr.push(a)
13
+ end
14
+
15
+ return arr
16
+ end
17
+
18
+
19
+ link = ARGV[0]
20
+ b = find_links(link)
21
+ puts
22
+
23
+ while l = b.pop
24
+ begin
25
+ puts "Link is: #{l}"
26
+ aq = find_links(l)
27
+ b.unshift(*aq)
28
+ rescue Exception => e
29
+ next
30
+ end
31
+ end
32
+
33
+
@@ -0,0 +1,5 @@
1
+ require "scrapinator/version"
2
+
3
+ module Scrapinator
4
+ # Your code goes here...
5
+ end
@@ -0,0 +1,3 @@
1
+ module Scrapinator
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'scrapinator/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "scrapinator"
8
+ spec.version = Scrapinator::VERSION
9
+ spec.authors = ["Karl Coelho"]
10
+ spec.email = ["karl.coelho1@gmail.com"]
11
+ spec.summary = %q{A scraper which will output every and every link it finds on a page, to the console.}
12
+ spec.description = %q{A scraper which will output every and every link it finds on a page, to the console.}
13
+ spec.homepage = "http://github.com/karlcoelho/scrapinator"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency "nokogiri", "~> 1.6.1"
22
+ spec.add_development_dependency "bundler", "~> 1.6"
23
+ spec.add_development_dependency "rake"
24
+ end
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scrapinator
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Karl Coelho
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 1.6.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.6.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.6'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: A scraper which will output every and every link it finds on a page,
56
+ to the console.
57
+ email:
58
+ - karl.coelho1@gmail.com
59
+ executables:
60
+ - scrapinator
61
+ extensions: []
62
+ extra_rdoc_files: []
63
+ files:
64
+ - .gitignore
65
+ - Gemfile
66
+ - LICENSE.txt
67
+ - README.md
68
+ - Rakefile
69
+ - bin/scrapinator
70
+ - lib/scrapinator.rb
71
+ - lib/scrapinator/version.rb
72
+ - scrapinator.gemspec
73
+ homepage: http://github.com/karlcoelho/scrapinator
74
+ licenses:
75
+ - MIT
76
+ metadata: {}
77
+ post_install_message:
78
+ rdoc_options: []
79
+ require_paths:
80
+ - lib
81
+ required_ruby_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ required_rubygems_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ requirements: []
92
+ rubyforge_project:
93
+ rubygems_version: 2.2.0
94
+ signing_key:
95
+ specification_version: 4
96
+ summary: A scraper which will output every and every link it finds on a page, to the
97
+ console.
98
+ test_files: []