crawl 1.1.3 → 1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rspec +2 -0
- data/Gemfile +9 -0
- data/README.md +3 -0
- data/circle.yml +3 -0
- data/lib/crawl/page.rb +6 -2
- data/lib/crawl/version.rb +1 -1
- data/spec/lib/crawl/page_spec.rb +0 -0
- data/spec/page_spec.rb +11 -0
- data/spec/spec_helper.rb +17 -0
- metadata +12 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 103da22dba2987ec6ec5bbf700473cb243170fc5
|
4
|
+
data.tar.gz: 8197231448106e4fe0b198b53e734110eb1748a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 206ff34e285a074b7dad8ebb68453295df14d67b95b3a4755d823a0d4bd2a3bae92859ff02fe52dee6b134c02966c7f4d9c61e52e1938c3815d56fc7debf7387
|
7
|
+
data.tar.gz: 2f8f655862a28cd4772317866b72ca31c392c7c63997226bac8c2ceccfe15fca203501189a4a0b9db0ebe002555fc9932265a016cd63c247b85b355398434592
|
data/.gitignore
CHANGED
data/.rspec
ADDED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# Crawl
|
2
2
|
|
3
|
+
[![Circle CI](https://circleci.com/gh/alphasights/crawl/tree/master.svg?style=shield&circle-token=188c328ee0669c8eba8dbda7f7f9943445a50c70)](https://circleci.com/gh/alphasights/crawl/tree/master)
|
4
|
+
|
5
|
+
|
3
6
|
Crawl pages within a domain, reporting any page that returns a bad response code
|
4
7
|
|
5
8
|
Usage:
|
data/circle.yml
ADDED
data/lib/crawl/page.rb
CHANGED
@@ -17,10 +17,14 @@ class Page
|
|
17
17
|
if url.start_with?('/')
|
18
18
|
url
|
19
19
|
else
|
20
|
-
"#{
|
20
|
+
"#{source_directory}/#{url}"
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
+
def source_directory
|
25
|
+
File.split(source).first.sub(/^\./, '').sub(/\/$/, '')
|
26
|
+
end
|
27
|
+
|
24
28
|
def <=>(other)
|
25
29
|
url <=> other.url
|
26
30
|
end
|
@@ -58,4 +62,4 @@ class Page
|
|
58
62
|
def to_s
|
59
63
|
"#{url} found on #{source} - #{error || 'OK'}"
|
60
64
|
end
|
61
|
-
end
|
65
|
+
end
|
data/lib/crawl/version.rb
CHANGED
File without changes
|
data/spec/page_spec.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require './lib/crawl/page'
|
2
|
+
|
3
|
+
RSpec.describe Page do
|
4
|
+
describe "#relative_url" do
|
5
|
+
specify { expect(Page.new(:register, "/", "/").relative_url).to eq "/" }
|
6
|
+
specify { expect(Page.new(:register, "page.html", "").relative_url).to eq "/page.html" }
|
7
|
+
specify { expect(Page.new(:register, "/interview", "/").relative_url).to eq "/interview" }
|
8
|
+
specify { expect(Page.new(:register, "overview.html", "/").relative_url).to eq "/overview.html" }
|
9
|
+
specify { expect(Page.new(:register, "post-5.html", "/posts/index.html").relative_url).to eq "/posts/post-5.html" }
|
10
|
+
end
|
11
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
RSpec.configure do |config|
|
2
|
+
config.expect_with :rspec do |expectations|
|
3
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
4
|
+
end
|
5
|
+
|
6
|
+
if config.files_to_run.one?
|
7
|
+
config.default_formatter = 'doc'
|
8
|
+
end
|
9
|
+
|
10
|
+
config.shared_context_metadata_behavior = :apply_to_host_groups
|
11
|
+
config.filter_run_when_matching :focus
|
12
|
+
config.example_status_persistence_file_path = "spec/examples.txt"
|
13
|
+
config.disable_monkey_patching!
|
14
|
+
config.warnings = true
|
15
|
+
config.order = :random
|
16
|
+
Kernel.srand config.seed
|
17
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tor Erik Linnerud
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2016-09-07 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
@@ -79,11 +79,13 @@ extensions: []
|
|
79
79
|
extra_rdoc_files: []
|
80
80
|
files:
|
81
81
|
- ".gitignore"
|
82
|
+
- ".rspec"
|
82
83
|
- Gemfile
|
83
84
|
- LICENSE.txt
|
84
85
|
- README.md
|
85
86
|
- Rakefile
|
86
87
|
- bin/crawl
|
88
|
+
- circle.yml
|
87
89
|
- crawl.gemspec
|
88
90
|
- lib/crawl.rb
|
89
91
|
- lib/crawl/engine.rb
|
@@ -92,6 +94,9 @@ files:
|
|
92
94
|
- lib/crawl/register.rb
|
93
95
|
- lib/crawl/string.rb
|
94
96
|
- lib/crawl/version.rb
|
97
|
+
- spec/lib/crawl/page_spec.rb
|
98
|
+
- spec/page_spec.rb
|
99
|
+
- spec/spec_helper.rb
|
95
100
|
homepage: http://github.com/alphasights/crawl
|
96
101
|
licenses:
|
97
102
|
- MIT
|
@@ -112,10 +117,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
117
|
version: '0'
|
113
118
|
requirements: []
|
114
119
|
rubyforge_project:
|
115
|
-
rubygems_version: 2.
|
120
|
+
rubygems_version: 2.5.1
|
116
121
|
signing_key:
|
117
122
|
specification_version: 4
|
118
123
|
summary: Crawl pages witin a domain, reporting any page that returns a bad response
|
119
124
|
code
|
120
|
-
test_files:
|
121
|
-
|
125
|
+
test_files:
|
126
|
+
- spec/lib/crawl/page_spec.rb
|
127
|
+
- spec/page_spec.rb
|
128
|
+
- spec/spec_helper.rb
|