creepy-crawler 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MGQ0ZWRlNjU1NjljMGU5MmI3YWQ4YmU3OTJiNTRkOWU2MTE5YTEzYQ==
4
+ YzA0Njc0ZTcyMTZjMmVmMGY0ZDljMjFmYmJiM2U1OTY1NGVmMmNkZg==
5
5
  data.tar.gz: !binary |-
6
- MDNkZGI0ZjUxOTliYmY0MzJmNzBkZjgyMzEyMzYzZTVkZDg1MTY3OA==
6
+ MmY2OTk0OWYzN2MyMGExMWZmZmRiZmY5YzNkNDNmZDQzZGE0OWUwZA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- MDlmMjk0MDNhMGI4NDg5YzkyOGM4YzQ5YjgyMWE2YjljYTBlZjgyMmI5NDBi
10
- MTdjOTNjOGZkYzcyMDE2ZmNhOGM5NmVmYWYzYTY4MTI4OTA0YThjMWE5Mjhh
11
- ZTkxZTNkYzI3MTdhMmYwOTM4MmIzODhkZDA4YmM5ODdjYjZjM2Y=
9
+ ZWFjMjU5NDRkZDRhODMyMTQ1OGEzZjE2N2Y2MDYxMDQ0M2EzYzc5ZWVjYTkx
10
+ MjAyZTMwZmU1Y2EyZjBlNzdjOTIzOWJmN2YyMGM0YTdiYTljOWRhMDI2NWEw
11
+ OGE1MmYwNDQ4ZDc3NDQ3ODAwNWJlOTEwM2ZmNTViYTliOGM1MDk=
12
12
  data.tar.gz: !binary |-
13
- ZmEyYzdhYjc3ZjgwNTdjM2EwN2MwOGFjNjFhZGUxZmQyZDhlY2UxNTk1MWNk
14
- NWZkMmRlMDFhMGRjYjdmOWVlMGEwODgwOTJiMDhjZGM1OTkzMDM5MzRjNTQ5
15
- ZWY1N2RiMjM1OGU1YjU3YWNmMTUyNzY3ODQ0NDdiMWNhNjc1NmM=
13
+ ZWVkMTg2NDU0ZmI1N2U5NWFhYTM5NjE3Y2JmMmJlZmRhYmJiNmU1MjA5NTM4
14
+ YWJmYmI3ODU3MTViYjBhMzYwZDBlYmU2Njg3ZmVkZjU1NmU0MjYwMTI1OGI0
15
+ MzQyZTQ0NWI5MGFmYzI2MjU0ZTA4OGVhYjYzOTMyNmE2YzBlZTk=
data/.gitignore CHANGED
@@ -1 +1,2 @@
1
- neo4j/
1
+ neo4j/
2
+ *.gem
data/README.md CHANGED
@@ -8,7 +8,7 @@ Ruby web crawler that takes a url as input and produces a sitemap using a neo4j
8
8
 
9
9
  ##Installation
10
10
  ####Clone
11
- git clone https://github.com/udryan10/creepy-crawler.git
11
+ git clone https://github.com/udryan10/creepy-crawler.git && cd creepy-crawler
12
12
  ####Install Required Gems
13
13
  bundle install
14
14
  ####Install graph database
@@ -1,7 +1,7 @@
1
1
  #!/bin/sh
2
2
 
3
3
  MAX_PAGE_CRAWL=50
4
- CRAWL_URL="http://www.yahoo.com"
4
+ CRAWL_URL="https://www.digitalocean.com"
5
5
  echo "Crawler is set to crawl ${CRAWL_URL}"
6
6
  echo "Crawler is set to crawl ${MAX_PAGE_CRAWL} pages"
7
7
 
@@ -42,6 +42,13 @@ end
42
42
 
43
43
  # allow the initiating of a crawl from command line
44
44
  if __FILE__==$0
45
+
46
+ # Exit cleanly from an early interrupt
47
+ Signal.trap("INT") {
48
+ puts "Received interrupt. Stopping crawl"
49
+ exit 1
50
+ }
51
+
45
52
  # setup options
46
53
  opts = Trollop::options do
47
54
  opt :site, "Url of site to crawl", :type => :string # flag --site
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: creepy-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Grothouse
@@ -17,7 +17,7 @@ dependencies:
17
17
  - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.5'
20
- type: :development
20
+ type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
@@ -31,21 +31,7 @@ dependencies:
31
31
  - - ~>
32
32
  - !ruby/object:Gem::Version
33
33
  version: '2.14'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ~>
39
- - !ruby/object:Gem::Version
40
- version: '2.14'
41
- - !ruby/object:Gem::Dependency
42
- name: rspec-core
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ~>
46
- - !ruby/object:Gem::Version
47
- version: '2.14'
48
- type: :development
34
+ type: :runtime
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements: