site_mapper 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 49d4c0ab215ac8872234e3c552275f89688f997d
4
- data.tar.gz: 4b75bae288524a38fe3a8f7d6df9ab42e715211a
3
+ metadata.gz: dc4b21c14dc15f1cc6df4b6406d12acf5cb821d9
4
+ data.tar.gz: 34ef0ab2fcd0a74bbcdd53d9e47681d6440f951d
5
5
  SHA512:
6
- metadata.gz: e0cad8aedfb27fc5a4d56b297098b76a9abd43df86b6c20b9d6f228256091191bb020e620217a9bb40bdc68c981bf8d2ca4fdf2930c55158bb321fa11dfc960b
7
- data.tar.gz: c3b1c1863e3de70793d127772b7a314b1e788483fe9151e441c1c209b7fed59c8b210cdae5d453017b0d49eff179f7b51eee47544f68a15c6a62c94783b7b24e
6
+ metadata.gz: 1d4da1f2753dfb5f06ea577c02183efbf4cb919b783ce128f07b46f29b6af7a330cc01d839895ea4d4fb53cf68db2a6b5adccba47530a0278aaca9bfe1fa4c02
7
+ data.tar.gz: c32dde9478240d63b63d6e521e04f3c914322b544c79eee7ee25e1b2ea46b5ad2529ddc7d778d30fe2c10cde074f88a72c53458a814fc5d2ab74bc87288e63ed
@@ -20,10 +20,19 @@ module SiteMapper
20
20
  # @param [String] link to domain
21
21
  # @param [Hash] options hash
22
22
  # @example Collect all URLs from example.com
23
- # SiteMapper.map('example.com')
23
+ # SiteMapper.map('example.com')
24
+ # @example Collect all URLs from example.com with custom User-agent
25
+ # SiteMapper.map('example.com', user_agent: 'MyUserAgent')
26
+ # @example Collect all URLs from example.com with custom logger class
27
+ # class MyLogger
28
+ # def self.log(msg); puts msg;end
29
+ # def self.err_log(msg); puts msg;end
30
+ # end
31
+ # SiteMapper.map('example.com', logger: MyLogger)
24
32
  def self.map(link, options = {})
25
- set_logger(options[:logger])
26
- Crawler.collect_urls(link) { |url| yield(url) if block_given? }
33
+ set_logger(options.delete(:logger))
34
+ options = { user_agent: USER_AGENT }.merge(options)
35
+ Crawler.collect_urls(link, options) { |url| yield(url) if block_given? }
27
36
  end
28
37
 
29
38
  # Set logger.
@@ -1,85 +1,50 @@
1
1
  module SiteMapper
2
2
  # Crawl URL formatter.
3
3
  class CrawlUrl
4
- attr_reader :resolved_base_url, :base_hostname
4
+ attr_reader :resolved_base_url
5
5
 
6
+ # Too many request error message
7
+ TOO_MANY_REQUEST_MSG = "You're being challenged with a 'too many requests' captcha"
8
+
9
+ # Initialize CrawlUrl
6
10
  # @param [String] base_url
11
+ # @example Intitialize CrawlUrl with example.com as base_url
12
+ # CrawlUrl.new('example.com')
7
13
  def initialize(base_url)
8
- @resolved_base_url = Request.resolve_url(base_url, with_query: false)
14
+ @resolved_base_url = Request.resolve_url(base_url) # "#{protocol}#{host}"
9
15
  @base_hostname = URI.parse(@resolved_base_url).hostname
10
- @resolved_base_url.prepend('http://') unless @resolved_base_url.start_with?('http')
11
16
  end
12
17
 
13
18
  # Given a link it constructs the absolute path,
14
19
  # if valid URL & URL has same domain as @resolved_base_url.
15
- # @param [String] raw_url url found on page
16
- # @param [String] get_url current page url
20
+ # @param [String] page_url url found on page
21
+ # @param [String] current_url current page url
17
22
  # @return [String] with absolute path to resource
18
23
  # @example Construct absolute URL for '/path', example.com
19
24
  # cu = CrawlUrl.new('example.com')
20
25
  # cu.absolute_url_from('/path', 'example.com/some/path')
21
26
  # # => http://example.com/some/path
22
- def absolute_url_from(raw_url, get_url)
23
- return nil unless eligible_url?(raw_url)
24
- parsed_url = URI.parse(raw_url) rescue URI.parse('')
25
- if parsed_url.relative?
26
- url_from_relative(raw_url, get_url)
27
- elsif same_domain?(raw_url, @resolved_base_url)
28
- raw_url
29
- else
30
- nil
31
- end
27
+ def absolute_url_from(page_url, current_url)
28
+ return unless eligible_url?(page_url)
29
+ parsed_uri = URI.join(current_url, page_url) rescue return
30
+ return unless parsed_uri.hostname == @base_hostname
31
+ parsed_uri.to_s
32
32
  end
33
33
 
34
34
  private
35
35
 
36
- def url_from_relative(url, current_page_url)
37
- if url.start_with?('/')
38
- "#{without_path_suffix(resolved_base_url)}#{url}"
39
- elsif url.start_with?('../')
40
- "#{url_from_dotted_url(url, current_page_url)}"
41
- else
42
- "#{with_path_suffix(resolved_base_url)}#{url}"
43
- end
44
- end
45
-
46
- def url_from_dotted_url(url, current_page_url)
47
- absolute_url = with_path_suffix(current_page_url.dup)
48
- found_dots = without_path_suffix(url).scan('../').length
49
- removed_dots = 0
50
- max_levels = 4
51
- while found_dots >= removed_dots && max_levels > removed_dots
52
- index = absolute_url.rindex('/') or break
53
- absolute_url = absolute_url[0..(index - 1)]
54
- removed_dots += 1
55
- end
56
- "#{with_path_suffix(absolute_url)}#{url.gsub('../', '')}"
57
- end
58
-
59
- def with_path_suffix(passed_url)
60
- url = passed_url.dup
61
- url.end_with?('/') ? url : url << '/'
62
- end
63
-
64
- def without_path_suffix(passed_url)
65
- url = passed_url.dup
66
- url.end_with?('/') ? url[0...(url.length - 1)] : url
67
- end
68
-
69
36
  def eligible_url?(href)
70
37
  return false if href.nil? || href.empty?
71
38
  dont_start = %w(javascript: callto: mailto: tel: skype: facetime: wtai: #)
72
39
  dont_include = %w(/email-protection#)
73
- dont_end = %w(.zip .rar .pdf .exe .dmg .pkg .dpkg .bat)
40
+ err_include = %w(/sorry/IndexRedirect?)
41
+ dont_end = %w(.zip .rar .json .pdf .exe .dmg .pkg .dpkg .bat)
74
42
 
43
+ err_include.each { |pattern| fail TOO_MANY_REQUEST_MSG if href.include?(pattern) }
75
44
  dont_start.each { |pattern| return false if href.start_with?(pattern) }
76
45
  dont_include.each { |pattern| return false if href.include?(pattern) }
77
46
  dont_end.each { |pattern| return false if href.end_with?(pattern) }
78
47
  true
79
48
  end
80
-
81
- def same_domain?(first, second)
82
- first.include?(second)
83
- end
84
49
  end
85
50
  end
@@ -4,20 +4,39 @@ require 'nokogiri'
4
4
  module SiteMapper
5
5
  # Crawls a given site.
6
6
  class Crawler
7
+ # Default options
8
+ OPTIONS = {
9
+ resolve: false,
10
+ sleep_length: 0.5,
11
+ max_requests: Float::INFINITY
12
+ }
13
+
7
14
  # @param [String] url base url for crawler
8
15
  # @param [Hash] options hash, resolve key (optional false by default)
16
+ # add user_agent key to specify custom User-agent
17
+ # @example Create crawler with custom User-agent
18
+ # Crawler.new('example.com', user_agent: 'MyUserAgent')
19
+ # @example Create crawler and resolve all urls
20
+ # Crawler.new('example.com', resolve: true)
21
+ # @example Create crawler and sleep 1 second between each request
22
+ # Crawler.new('example.com', sleep_length: 1)
23
+ # @example Create crawler and perform max 3 requests
24
+ # Crawler.new('example.com', max_requests: 3)
9
25
  def initialize(url, options = {})
10
26
  @base_url = Request.resolve_url(url)
11
- @options = { resolve: false }.merge(options)
27
+ @options = OPTIONS.dup.merge(options)
28
+ @user_agent = @options.fetch(:user_agent)
12
29
  @crawl_url = CrawlUrl.new(@base_url)
13
30
  @fetch_queue = CrawlQueue.new
14
31
  @processed = Set.new
15
32
  @robots = nil
16
33
  end
17
34
 
35
+ # See documentation for the instance variant of this method.
36
+ # @return [Array] with links.
18
37
  # @see #collect_urls
19
- def self.collect_urls(base_url)
20
- new(base_url).collect_urls { |url| yield(url) }
38
+ def self.collect_urls(*args)
39
+ new(*args).collect_urls { |url| yield(url) }
21
40
  end
22
41
 
23
42
  # Collects all links on domain for domain.
@@ -32,13 +51,16 @@ module SiteMapper
32
51
  # end
33
52
  def collect_urls
34
53
  @fetch_queue << @crawl_url.resolved_base_url
35
- until @fetch_queue.empty?
54
+ until @fetch_queue.empty? || @processed.length >= @options[:max_requests]
36
55
  url = @fetch_queue.pop
37
56
  yield(url)
38
- page_links(url)
57
+ page_urls_for(url)
39
58
  end
40
- Logger.log "Crawling finished, #{@processed.length} links found"
41
- @processed.to_a
59
+ result = @processed + @fetch_queue
60
+ Logger.log "Crawling finished:"
61
+ Logger.log "Processed links: #{@processed.length}"
62
+ Logger.log "Found links: #{result.length}"
63
+ result.to_a
42
64
  rescue Interrupt, IRB::Abort
43
65
  Logger.err_log 'Crawl interrupted.'
44
66
  @fetch_queue.to_a
@@ -46,12 +68,13 @@ module SiteMapper
46
68
 
47
69
  private
48
70
 
49
- def page_links(get_url)
50
- Logger.log "Queue length: #{@fetch_queue.length}, Parsing: #{get_url}"
51
- link_elements = Request.get_page(get_url).css('a') rescue []
52
- @processed << get_url
71
+ def page_urls_for(current_url)
72
+ Logger.log "Queue length: #{@fetch_queue.length}, Parsing: #{current_url}"
73
+ link_elements = Request.document(current_url, user_agent: @options[:user_agent]).css('a')
74
+ wait
75
+ @processed << current_url
53
76
  link_elements.each do |page_link|
54
- url = @crawl_url.absolute_url_from(page_link.attr('href'), get_url)
77
+ url = @crawl_url.absolute_url_from(page_link.attr('href'), current_url)
55
78
  @fetch_queue << url if url && eligible_for_queue?(resolve(url))
56
79
  end
57
80
  end
@@ -62,7 +85,7 @@ module SiteMapper
62
85
 
63
86
  def robots
64
87
  return @robots unless @robots.nil?
65
- robots_body = Request.get_response_body("#{@base_url}/robots.txt")
88
+ robots_body = Request.response_body("#{@base_url}/robots.txt", user_agent: @options[:user_agent])
66
89
  @robots = Robots.new(robots_body, URI.parse(@base_url).host, SiteMapper::USER_AGENT)
67
90
  @robots
68
91
  end
@@ -71,13 +94,17 @@ module SiteMapper
71
94
  @options[:resolve] ? Request.resolve_url(url) : url
72
95
  end
73
96
 
97
+ def wait
98
+ sleep @options[:sleep_length]
99
+ end
100
+
74
101
  # Queue of urls to be crawled.
75
102
  class CrawlQueue
76
103
  # @return [Set] that exends EnumerablePop module
77
104
  def self.new
78
105
  Set.new.extend(EnumerablePop)
79
106
  end
80
-
107
+
81
108
  # Add pop method when added to class.
82
109
  # The class that extends this module need to implement #first and #delete.
83
110
  module EnumerablePop
@@ -90,5 +117,5 @@ module SiteMapper
90
117
  end
91
118
  end
92
119
  end
93
- end
120
+ end
94
121
  end
@@ -1,6 +1,9 @@
1
1
  module SiteMapper
2
2
  # Handles logging
3
3
  class Logger
4
+
5
+ # Choose what logger to use by type.
6
+ # @return [Object] returns the appropiate logger.
4
7
  # @param [Symbol] type of logger class to be used
5
8
  def self.use_logger_type(type)
6
9
  fail 'Logger already set' if defined?(@@log)
@@ -15,18 +18,22 @@ module SiteMapper
15
18
  @@log
16
19
  end
17
20
 
21
+ # Choose what logger to use.
22
+ # @return [Object] returns logger.
18
23
  # @param [Class, #log, #err_log] logger a logger class
19
24
  def self.use_logger(logger)
20
25
  fail 'Logger already set' if defined?(@@log)
21
26
  @@log = logger
22
27
  end
23
28
 
29
+ # Send a message to the logger
24
30
  # @param [String] msg to be logged
25
31
  def self.log(msg)
26
32
  @@log ||= use_logger_type(:default)
27
33
  @@log.log(msg)
28
34
  end
29
35
 
36
+ # Send an error message to the logger
30
37
  # @param [String] err_msg to be logged
31
38
  def self.err_log(err_msg)
32
39
  @@log ||= use_logger_type(:default)
@@ -35,11 +42,13 @@ module SiteMapper
35
42
 
36
43
  # Log to terminal.
37
44
  module SystemOutLogger
45
+ # Log to STDOUT
38
46
  # @param [String] msg to be logged to STDOUT
39
47
  def self.log(msg)
40
48
  STDOUT.puts(msg)
41
49
  end
42
50
 
51
+ # Log to STDERR
43
52
  # @param [String] msg to be logged to STDERR
44
53
  def self.err_log(msg)
45
54
  STDERR.puts("[ERROR] #{msg}")
@@ -48,8 +57,10 @@ module SiteMapper
48
57
 
49
58
  # Don't log
50
59
  module NilLogger
60
+ # Don't log
51
61
  # @param [String] msg to be ignored
52
62
  def self.log(msg);end
63
+ # Don't error log
53
64
  # @param [String] msg to be ignored
54
65
  def self.err_log(msg);end
55
66
  end
@@ -11,61 +11,62 @@ module SiteMapper
11
11
  class << self
12
12
  # Given an URL get it then parse it with Nokogiri::HTML.
13
13
  # @param [String] url
14
+ # @param [Hash] options
14
15
  # @return [Nokogiri::HTML] a nokogiri HTML object
15
- def get_page(url)
16
- Nokogiri::HTML(Request.get_response_body(url))
16
+ def document(url, options = {})
17
+ Nokogiri::HTML(Request.response_body(url, options))
17
18
  end
18
19
 
19
20
  # Given an URL get the response.
20
21
  # @param [String] url
21
- # @param [Boolean] resolve (optional and false by default)
22
+ # @param [Hash] options
22
23
  # @return [Net::HTTPOK] if response is successfull, raises error otherwise
23
24
  # @example get example.com and resolve the URL
24
- # Request.get_response('example.com', true)
25
+ # Request.response('example.com', resolve: true)
25
26
  # @example get example.com and do *not* resolve the URL
26
- # Request.get_response('http://example.com')
27
- # Request.get_response('http://example.com', false)
28
- def get_response(url, resolve = false)
29
- resolved_url = resolve ? resolve_url(url) : url
27
+ # Request.response('http://example.com')
28
+ # @example get example.com and resolve the URL
29
+ # Request.response('http://example.com', resolve: true)
30
+ # @example get example.com and resolve the URL and use a custom User-Agent
31
+ # Request.response('http://example.com', resolve: true, user_agent: 'MyUserAgent')
32
+ def response(url, options = {})
33
+ options = {
34
+ resolve: false,
35
+ user_agent: SiteMapper::USER_AGENT
36
+ }.merge(options)
37
+ resolved_url = options[:resolve] ? resolve_url(url) : url
30
38
  uri = URI.parse(resolved_url)
31
39
  http = Net::HTTP.new(uri.host, uri.port)
32
- http.use_ssl = true if resolved_url.include?('https://')
40
+ http.use_ssl = true if resolved_url.start_with?('https://')
33
41
 
34
42
  request = Net::HTTP::Get.new(uri.request_uri)
35
- request['User-Agent'] = SiteMapper::USER_AGENT
43
+ request['User-Agent'] = options[:user_agent]
36
44
  http.request(request)
37
45
  end
38
46
 
39
47
  # Get response body, rescues with nil if an exception is raised.
40
- # @see Request#get_response
41
- def get_response_body(*args)
42
- get_response(*args).body rescue nil
48
+ # @see Request#response
49
+ def response_body(*args)
50
+ response(*args).body
43
51
  end
44
52
 
45
53
  # Resolve an URL string and follows redirects.
46
54
  # if the URL can't be resolved the original URL is returned.
47
- # @param [String] url
48
- # @param [Hash] options hash, with_query key (optional and true by default)
55
+ # @param [String] url to resolve
49
56
  # @return [String] a URL string that potentially is a redirected URL
50
57
  # @example Resolve google.com
51
58
  # resolve_url('google.com')
52
59
  # # => 'https://www.google.com'
53
- def resolve_url(url, options = {})
54
- options = { with_query: true }.merge(options)
60
+ def resolve_url(url)
55
61
  resolved = UrlResolver.resolve(url)
56
- resolved = remove_query(resolved) unless options[:with_query]
62
+ resolved = resolved.prepend('http://') unless has_protocol?(resolved)
57
63
  resolved
58
64
  end
59
65
 
60
- # Removes query string from URL string.
61
- # @param [String] url
62
- # @return [String] an URL string without query
63
- # @example Removes query string
64
- # remove_query('example.com/path?q=keyword')
65
- # # => 'example.com/path'
66
- def remove_query(url)
67
- index = url.index('?')
68
- index.nil? ? url : url[0...index]
66
+ private
67
+
68
+ def has_protocol?(url)
69
+ url.start_with?('https://') || url.start_with?('http://')
69
70
  end
70
71
  end
71
72
  end
@@ -6,6 +6,7 @@ module SiteMapper
6
6
  class Robots
7
7
  # Parses robots.txt
8
8
  class ParsedRobots
9
+ # Initializes ParsedRobots
9
10
  def initialize(body, user_agent)
10
11
  @other = {}
11
12
  @disallows = {}
@@ -61,7 +62,7 @@ module SiteMapper
61
62
  path = uri.request_uri
62
63
 
63
64
  user_agent.downcase!
64
-
65
+
65
66
  @disallows.each do |key, value|
66
67
  if user_agent =~ key
67
68
  value.each do |rule|
@@ -71,9 +72,9 @@ module SiteMapper
71
72
  end
72
73
  end
73
74
  end
74
-
75
+
75
76
  @allows.each do |key, value|
76
- unless allowed
77
+ unless allowed
77
78
  if user_agent =~ key
78
79
  value.each do |rule|
79
80
  if path =~ rule
@@ -93,7 +94,8 @@ module SiteMapper
93
94
  agent = to_regex(agent.downcase) if user_agent.is_a?(String)
94
95
  @delays[agent]
95
96
  end
96
-
97
+
98
+ # Return key/value paris with unknown meaning.
97
99
  # @return [Hash] key/value pairs from robots.txt
98
100
  def other_values
99
101
  @other
@@ -103,9 +105,11 @@ module SiteMapper
103
105
  def sitemaps
104
106
  @sitemaps
105
107
  end
106
-
108
+
107
109
  protected
108
-
110
+
111
+ # @return [Regex] regex from pattern
112
+ # @param [String] pattern to compile to Regex
109
113
  def to_regex(pattern)
110
114
  return /should-not-match-anything-123456789/ if pattern.strip.empty?
111
115
  pattern = Regexp.escape(pattern)
@@ -123,7 +127,7 @@ module SiteMapper
123
127
  @user_agent = user_agent
124
128
  @parsed = {}
125
129
  end
126
-
130
+
127
131
  # @param [String, URI] uri String or URI to check
128
132
  # @return [Boolean] true if uri is allowed to be crawled
129
133
  # @example Check if http://www.google.com/googlesites is allowed to be crawled
@@ -134,8 +138,6 @@ module SiteMapper
134
138
  host = uri.host
135
139
  @parsed[host] ||= ParsedRobots.new(@robots_txt, @user_agent)
136
140
  @parsed[host].allowed?(uri, @user_agent)
137
- rescue
138
- true
139
141
  end
140
142
 
141
143
  # @return [Array] array of sitemaps defined in robots.txt
@@ -146,10 +148,8 @@ module SiteMapper
146
148
  host = @hostname
147
149
  @parsed[host] ||= ParsedRobots.new(@robots_txt, @user_agent)
148
150
  @parsed[host].sitemaps
149
- rescue
150
- []
151
151
  end
152
-
152
+
153
153
  # @param [String, URI] uri String or URI get other_values from
154
154
  # @return [Hash] key/value pairs from robots.txt
155
155
  # @example Get other values for google.com
@@ -159,12 +159,10 @@ module SiteMapper
159
159
  host = @hostname
160
160
  @parsed[host] ||= ParsedRobots.new(@robots_txt, @user_agent)
161
161
  @parsed[host].other_values
162
- rescue
163
- {}
164
162
  end
165
163
 
166
- private
167
-
164
+ private
165
+
168
166
  def to_uri(uri)
169
167
  uri = URI.parse(uri.to_s) unless uri.is_a?(URI)
170
168
  uri
@@ -1,4 +1,4 @@
1
1
  module SiteMapper
2
2
  # Gem version
3
- VERSION = '0.0.10'
3
+ VERSION = '0.0.12'
4
4
  end
metadata CHANGED
@@ -1,125 +1,125 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: site_mapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jacob Burenstam
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-24 00:00:00.000000000 Z
11
+ date: 2015-04-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.6'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.6'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: url_resolver
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0.1'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ~>
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0.1'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: bundler
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ~>
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
47
  version: '1.3'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ~>
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.3'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rake
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ~>
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
61
  version: '10.3'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ~>
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '10.3'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rspec
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ~>
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
75
  version: '3.1'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ~>
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '3.1'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: yard
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ~>
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0.8'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ~>
94
+ - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0.8'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: redcarpet
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - ~>
101
+ - - "~>"
102
102
  - !ruby/object:Gem::Version
103
103
  version: '3.2'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ~>
108
+ - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '3.2'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: coveralls
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - ~>
115
+ - - "~>"
116
116
  - !ruby/object:Gem::Version
117
117
  version: '0.7'
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - ~>
122
+ - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0.7'
125
125
  description: Map all links on a given site.
@@ -131,13 +131,13 @@ extensions: []
131
131
  extra_rdoc_files: []
132
132
  files:
133
133
  - bin/site_mapper
134
+ - lib/site_mapper.rb
134
135
  - lib/site_mapper/crawl_url.rb
135
136
  - lib/site_mapper/crawler.rb
136
137
  - lib/site_mapper/logger.rb
137
138
  - lib/site_mapper/request.rb
138
139
  - lib/site_mapper/robots.rb
139
140
  - lib/site_mapper/version.rb
140
- - lib/site_mapper.rb
141
141
  homepage: https://github.com/buren/site_mapper
142
142
  licenses:
143
143
  - MIT
@@ -148,17 +148,17 @@ require_paths:
148
148
  - lib
149
149
  required_ruby_version: !ruby/object:Gem::Requirement
150
150
  requirements:
151
- - - '>='
151
+ - - ">="
152
152
  - !ruby/object:Gem::Version
153
153
  version: 1.9.3
154
154
  required_rubygems_version: !ruby/object:Gem::Requirement
155
155
  requirements:
156
- - - '>='
156
+ - - ">="
157
157
  - !ruby/object:Gem::Version
158
158
  version: '0'
159
159
  requirements: []
160
160
  rubyforge_project:
161
- rubygems_version: 2.0.0
161
+ rubygems_version: 2.2.2
162
162
  signing_key:
163
163
  specification_version: 4
164
164
  summary: Map all links on a given site.