site_mapper 0.0.10 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 49d4c0ab215ac8872234e3c552275f89688f997d
4
- data.tar.gz: 4b75bae288524a38fe3a8f7d6df9ab42e715211a
3
+ metadata.gz: dc4b21c14dc15f1cc6df4b6406d12acf5cb821d9
4
+ data.tar.gz: 34ef0ab2fcd0a74bbcdd53d9e47681d6440f951d
5
5
  SHA512:
6
- metadata.gz: e0cad8aedfb27fc5a4d56b297098b76a9abd43df86b6c20b9d6f228256091191bb020e620217a9bb40bdc68c981bf8d2ca4fdf2930c55158bb321fa11dfc960b
7
- data.tar.gz: c3b1c1863e3de70793d127772b7a314b1e788483fe9151e441c1c209b7fed59c8b210cdae5d453017b0d49eff179f7b51eee47544f68a15c6a62c94783b7b24e
6
+ metadata.gz: 1d4da1f2753dfb5f06ea577c02183efbf4cb919b783ce128f07b46f29b6af7a330cc01d839895ea4d4fb53cf68db2a6b5adccba47530a0278aaca9bfe1fa4c02
7
+ data.tar.gz: c32dde9478240d63b63d6e521e04f3c914322b544c79eee7ee25e1b2ea46b5ad2529ddc7d778d30fe2c10cde074f88a72c53458a814fc5d2ab74bc87288e63ed
@@ -20,10 +20,19 @@ module SiteMapper
20
20
  # @param [String] link to domain
21
21
  # @param [Hash] options hash
22
22
  # @example Collect all URLs from example.com
23
- # SiteMapper.map('example.com')
23
+ # SiteMapper.map('example.com')
24
+ # @example Collect all URLs from example.com with custom User-agent
25
+ # SiteMapper.map('example.com', user_agent: 'MyUserAgent')
26
+ # @example Collect all URLs from example.com with custom logger class
27
+ # class MyLogger
28
+ # def self.log(msg); puts msg;end
29
+ # def self.err_log(msg); puts msg;end
30
+ # end
31
+ # SiteMapper.map('example.com', logger: MyLogger)
24
32
  def self.map(link, options = {})
25
- set_logger(options[:logger])
26
- Crawler.collect_urls(link) { |url| yield(url) if block_given? }
33
+ set_logger(options.delete(:logger))
34
+ options = { user_agent: USER_AGENT }.merge(options)
35
+ Crawler.collect_urls(link, options) { |url| yield(url) if block_given? }
27
36
  end
28
37
 
29
38
  # Set logger.
@@ -1,85 +1,50 @@
1
1
  module SiteMapper
2
2
  # Crawl URL formatter.
3
3
  class CrawlUrl
4
- attr_reader :resolved_base_url, :base_hostname
4
+ attr_reader :resolved_base_url
5
5
 
6
+ # Too many request error message
7
+ TOO_MANY_REQUEST_MSG = "You're being challenged with a 'too many requests' captcha"
8
+
9
+ # Initialize CrawlUrl
6
10
  # @param [String] base_url
11
+ # @example Intitialize CrawlUrl with example.com as base_url
12
+ # CrawlUrl.new('example.com')
7
13
  def initialize(base_url)
8
- @resolved_base_url = Request.resolve_url(base_url, with_query: false)
14
+ @resolved_base_url = Request.resolve_url(base_url) # "#{protocol}#{host}"
9
15
  @base_hostname = URI.parse(@resolved_base_url).hostname
10
- @resolved_base_url.prepend('http://') unless @resolved_base_url.start_with?('http')
11
16
  end
12
17
 
13
18
  # Given a link it constructs the absolute path,
14
19
  # if valid URL & URL has same domain as @resolved_base_url.
15
- # @param [String] raw_url url found on page
16
- # @param [String] get_url current page url
20
+ # @param [String] page_url url found on page
21
+ # @param [String] current_url current page url
17
22
  # @return [String] with absolute path to resource
18
23
  # @example Construct absolute URL for '/path', example.com
19
24
  # cu = CrawlUrl.new('example.com')
20
25
  # cu.absolute_url_from('/path', 'example.com/some/path')
21
26
  # # => http://example.com/some/path
22
- def absolute_url_from(raw_url, get_url)
23
- return nil unless eligible_url?(raw_url)
24
- parsed_url = URI.parse(raw_url) rescue URI.parse('')
25
- if parsed_url.relative?
26
- url_from_relative(raw_url, get_url)
27
- elsif same_domain?(raw_url, @resolved_base_url)
28
- raw_url
29
- else
30
- nil
31
- end
27
+ def absolute_url_from(page_url, current_url)
28
+ return unless eligible_url?(page_url)
29
+ parsed_uri = URI.join(current_url, page_url) rescue return
30
+ return unless parsed_uri.hostname == @base_hostname
31
+ parsed_uri.to_s
32
32
  end
33
33
 
34
34
  private
35
35
 
36
- def url_from_relative(url, current_page_url)
37
- if url.start_with?('/')
38
- "#{without_path_suffix(resolved_base_url)}#{url}"
39
- elsif url.start_with?('../')
40
- "#{url_from_dotted_url(url, current_page_url)}"
41
- else
42
- "#{with_path_suffix(resolved_base_url)}#{url}"
43
- end
44
- end
45
-
46
- def url_from_dotted_url(url, current_page_url)
47
- absolute_url = with_path_suffix(current_page_url.dup)
48
- found_dots = without_path_suffix(url).scan('../').length
49
- removed_dots = 0
50
- max_levels = 4
51
- while found_dots >= removed_dots && max_levels > removed_dots
52
- index = absolute_url.rindex('/') or break
53
- absolute_url = absolute_url[0..(index - 1)]
54
- removed_dots += 1
55
- end
56
- "#{with_path_suffix(absolute_url)}#{url.gsub('../', '')}"
57
- end
58
-
59
- def with_path_suffix(passed_url)
60
- url = passed_url.dup
61
- url.end_with?('/') ? url : url << '/'
62
- end
63
-
64
- def without_path_suffix(passed_url)
65
- url = passed_url.dup
66
- url.end_with?('/') ? url[0...(url.length - 1)] : url
67
- end
68
-
69
36
  def eligible_url?(href)
70
37
  return false if href.nil? || href.empty?
71
38
  dont_start = %w(javascript: callto: mailto: tel: skype: facetime: wtai: #)
72
39
  dont_include = %w(/email-protection#)
73
- dont_end = %w(.zip .rar .pdf .exe .dmg .pkg .dpkg .bat)
40
+ err_include = %w(/sorry/IndexRedirect?)
41
+ dont_end = %w(.zip .rar .json .pdf .exe .dmg .pkg .dpkg .bat)
74
42
 
43
+ err_include.each { |pattern| fail TOO_MANY_REQUEST_MSG if href.include?(pattern) }
75
44
  dont_start.each { |pattern| return false if href.start_with?(pattern) }
76
45
  dont_include.each { |pattern| return false if href.include?(pattern) }
77
46
  dont_end.each { |pattern| return false if href.end_with?(pattern) }
78
47
  true
79
48
  end
80
-
81
- def same_domain?(first, second)
82
- first.include?(second)
83
- end
84
49
  end
85
50
  end
@@ -4,20 +4,39 @@ require 'nokogiri'
4
4
  module SiteMapper
5
5
  # Crawls a given site.
6
6
  class Crawler
7
+ # Default options
8
+ OPTIONS = {
9
+ resolve: false,
10
+ sleep_length: 0.5,
11
+ max_requests: Float::INFINITY
12
+ }
13
+
7
14
  # @param [String] url base url for crawler
8
15
  # @param [Hash] options hash, resolve key (optional false by default)
16
+ # add user_agent key to specify custom User-agent
17
+ # @example Create crawler with custom User-agent
18
+ # Crawler.new('example.com', user_agent: 'MyUserAgent')
19
+ # @example Create crawler and resolve all urls
20
+ # Crawler.new('example.com', resolve: true)
21
+ # @example Create crawler and sleep 1 second between each request
22
+ # Crawler.new('example.com', sleep_length: 1)
23
+ # @example Create crawler and perform max 3 requests
24
+ # Crawler.new('example.com', max_requests: 3)
9
25
  def initialize(url, options = {})
10
26
  @base_url = Request.resolve_url(url)
11
- @options = { resolve: false }.merge(options)
27
+ @options = OPTIONS.dup.merge(options)
28
+ @user_agent = @options.fetch(:user_agent)
12
29
  @crawl_url = CrawlUrl.new(@base_url)
13
30
  @fetch_queue = CrawlQueue.new
14
31
  @processed = Set.new
15
32
  @robots = nil
16
33
  end
17
34
 
35
+ # See documentation for the instance variant of this method.
36
+ # @return [Array] with links.
18
37
  # @see #collect_urls
19
- def self.collect_urls(base_url)
20
- new(base_url).collect_urls { |url| yield(url) }
38
+ def self.collect_urls(*args)
39
+ new(*args).collect_urls { |url| yield(url) }
21
40
  end
22
41
 
23
42
  # Collects all links on domain for domain.
@@ -32,13 +51,16 @@ module SiteMapper
32
51
  # end
33
52
  def collect_urls
34
53
  @fetch_queue << @crawl_url.resolved_base_url
35
- until @fetch_queue.empty?
54
+ until @fetch_queue.empty? || @processed.length >= @options[:max_requests]
36
55
  url = @fetch_queue.pop
37
56
  yield(url)
38
- page_links(url)
57
+ page_urls_for(url)
39
58
  end
40
- Logger.log "Crawling finished, #{@processed.length} links found"
41
- @processed.to_a
59
+ result = @processed + @fetch_queue
60
+ Logger.log "Crawling finished:"
61
+ Logger.log "Processed links: #{@processed.length}"
62
+ Logger.log "Found links: #{result.length}"
63
+ result.to_a
42
64
  rescue Interrupt, IRB::Abort
43
65
  Logger.err_log 'Crawl interrupted.'
44
66
  @fetch_queue.to_a
@@ -46,12 +68,13 @@ module SiteMapper
46
68
 
47
69
  private
48
70
 
49
- def page_links(get_url)
50
- Logger.log "Queue length: #{@fetch_queue.length}, Parsing: #{get_url}"
51
- link_elements = Request.get_page(get_url).css('a') rescue []
52
- @processed << get_url
71
+ def page_urls_for(current_url)
72
+ Logger.log "Queue length: #{@fetch_queue.length}, Parsing: #{current_url}"
73
+ link_elements = Request.document(current_url, user_agent: @options[:user_agent]).css('a')
74
+ wait
75
+ @processed << current_url
53
76
  link_elements.each do |page_link|
54
- url = @crawl_url.absolute_url_from(page_link.attr('href'), get_url)
77
+ url = @crawl_url.absolute_url_from(page_link.attr('href'), current_url)
55
78
  @fetch_queue << url if url && eligible_for_queue?(resolve(url))
56
79
  end
57
80
  end
@@ -62,7 +85,7 @@ module SiteMapper
62
85
 
63
86
  def robots
64
87
  return @robots unless @robots.nil?
65
- robots_body = Request.get_response_body("#{@base_url}/robots.txt")
88
+ robots_body = Request.response_body("#{@base_url}/robots.txt", user_agent: @options[:user_agent])
66
89
  @robots = Robots.new(robots_body, URI.parse(@base_url).host, SiteMapper::USER_AGENT)
67
90
  @robots
68
91
  end
@@ -71,13 +94,17 @@ module SiteMapper
71
94
  @options[:resolve] ? Request.resolve_url(url) : url
72
95
  end
73
96
 
97
+ def wait
98
+ sleep @options[:sleep_length]
99
+ end
100
+
74
101
  # Queue of urls to be crawled.
75
102
  class CrawlQueue
76
103
  # @return [Set] that exends EnumerablePop module
77
104
  def self.new
78
105
  Set.new.extend(EnumerablePop)
79
106
  end
80
-
107
+
81
108
  # Add pop method when added to class.
82
109
  # The class that extends this module need to implement #first and #delete.
83
110
  module EnumerablePop
@@ -90,5 +117,5 @@ module SiteMapper
90
117
  end
91
118
  end
92
119
  end
93
- end
120
+ end
94
121
  end
@@ -1,6 +1,9 @@
1
1
  module SiteMapper
2
2
  # Handles logging
3
3
  class Logger
4
+
5
+ # Choose what logger to use by type.
6
+ # @return [Object] returns the appropiate logger.
4
7
  # @param [Symbol] type of logger class to be used
5
8
  def self.use_logger_type(type)
6
9
  fail 'Logger already set' if defined?(@@log)
@@ -15,18 +18,22 @@ module SiteMapper
15
18
  @@log
16
19
  end
17
20
 
21
+ # Choose what logger to use.
22
+ # @return [Object] returns logger.
18
23
  # @param [Class, #log, #err_log] logger a logger class
19
24
  def self.use_logger(logger)
20
25
  fail 'Logger already set' if defined?(@@log)
21
26
  @@log = logger
22
27
  end
23
28
 
29
+ # Send a message to the logger
24
30
  # @param [String] msg to be logged
25
31
  def self.log(msg)
26
32
  @@log ||= use_logger_type(:default)
27
33
  @@log.log(msg)
28
34
  end
29
35
 
36
+ # Send an error message to the logger
30
37
  # @param [String] err_msg to be logged
31
38
  def self.err_log(err_msg)
32
39
  @@log ||= use_logger_type(:default)
@@ -35,11 +42,13 @@ module SiteMapper
35
42
 
36
43
  # Log to terminal.
37
44
  module SystemOutLogger
45
+ # Log to STDOUT
38
46
  # @param [String] msg to be logged to STDOUT
39
47
  def self.log(msg)
40
48
  STDOUT.puts(msg)
41
49
  end
42
50
 
51
+ # Log to STDERR
43
52
  # @param [String] msg to be logged to STDERR
44
53
  def self.err_log(msg)
45
54
  STDERR.puts("[ERROR] #{msg}")
@@ -48,8 +57,10 @@ module SiteMapper
48
57
 
49
58
  # Don't log
50
59
  module NilLogger
60
+ # Don't log
51
61
  # @param [String] msg to be ignored
52
62
  def self.log(msg);end
63
+ # Don't error log
53
64
  # @param [String] msg to be ignored
54
65
  def self.err_log(msg);end
55
66
  end
@@ -11,61 +11,62 @@ module SiteMapper
11
11
  class << self
12
12
  # Given an URL get it then parse it with Nokogiri::HTML.
13
13
  # @param [String] url
14
+ # @param [Hash] options
14
15
  # @return [Nokogiri::HTML] a nokogiri HTML object
15
- def get_page(url)
16
- Nokogiri::HTML(Request.get_response_body(url))
16
+ def document(url, options = {})
17
+ Nokogiri::HTML(Request.response_body(url, options))
17
18
  end
18
19
 
19
20
  # Given an URL get the response.
20
21
  # @param [String] url
21
- # @param [Boolean] resolve (optional and false by default)
22
+ # @param [Hash] options
22
23
  # @return [Net::HTTPOK] if response is successfull, raises error otherwise
23
24
  # @example get example.com and resolve the URL
24
- # Request.get_response('example.com', true)
25
+ # Request.response('example.com', resolve: true)
25
26
  # @example get example.com and do *not* resolve the URL
26
- # Request.get_response('http://example.com')
27
- # Request.get_response('http://example.com', false)
28
- def get_response(url, resolve = false)
29
- resolved_url = resolve ? resolve_url(url) : url
27
+ # Request.response('http://example.com')
28
+ # @example get example.com and resolve the URL
29
+ # Request.response('http://example.com', resolve: true)
30
+ # @example get example.com and resolve the URL and use a custom User-Agent
31
+ # Request.response('http://example.com', resolve: true, user_agent: 'MyUserAgent')
32
+ def response(url, options = {})
33
+ options = {
34
+ resolve: false,
35
+ user_agent: SiteMapper::USER_AGENT
36
+ }.merge(options)
37
+ resolved_url = options[:resolve] ? resolve_url(url) : url
30
38
  uri = URI.parse(resolved_url)
31
39
  http = Net::HTTP.new(uri.host, uri.port)
32
- http.use_ssl = true if resolved_url.include?('https://')
40
+ http.use_ssl = true if resolved_url.start_with?('https://')
33
41
 
34
42
  request = Net::HTTP::Get.new(uri.request_uri)
35
- request['User-Agent'] = SiteMapper::USER_AGENT
43
+ request['User-Agent'] = options[:user_agent]
36
44
  http.request(request)
37
45
  end
38
46
 
39
47
  # Get response body, rescues with nil if an exception is raised.
40
- # @see Request#get_response
41
- def get_response_body(*args)
42
- get_response(*args).body rescue nil
48
+ # @see Request#response
49
+ def response_body(*args)
50
+ response(*args).body
43
51
  end
44
52
 
45
53
  # Resolve an URL string and follows redirects.
46
54
  # if the URL can't be resolved the original URL is returned.
47
- # @param [String] url
48
- # @param [Hash] options hash, with_query key (optional and true by default)
55
+ # @param [String] url to resolve
49
56
  # @return [String] a URL string that potentially is a redirected URL
50
57
  # @example Resolve google.com
51
58
  # resolve_url('google.com')
52
59
  # # => 'https://www.google.com'
53
- def resolve_url(url, options = {})
54
- options = { with_query: true }.merge(options)
60
+ def resolve_url(url)
55
61
  resolved = UrlResolver.resolve(url)
56
- resolved = remove_query(resolved) unless options[:with_query]
62
+ resolved = resolved.prepend('http://') unless has_protocol?(resolved)
57
63
  resolved
58
64
  end
59
65
 
60
- # Removes query string from URL string.
61
- # @param [String] url
62
- # @return [String] an URL string without query
63
- # @example Removes query string
64
- # remove_query('example.com/path?q=keyword')
65
- # # => 'example.com/path'
66
- def remove_query(url)
67
- index = url.index('?')
68
- index.nil? ? url : url[0...index]
66
+ private
67
+
68
+ def has_protocol?(url)
69
+ url.start_with?('https://') || url.start_with?('http://')
69
70
  end
70
71
  end
71
72
  end
@@ -6,6 +6,7 @@ module SiteMapper
6
6
  class Robots
7
7
  # Parses robots.txt
8
8
  class ParsedRobots
9
+ # Initializes ParsedRobots
9
10
  def initialize(body, user_agent)
10
11
  @other = {}
11
12
  @disallows = {}
@@ -61,7 +62,7 @@ module SiteMapper
61
62
  path = uri.request_uri
62
63
 
63
64
  user_agent.downcase!
64
-
65
+
65
66
  @disallows.each do |key, value|
66
67
  if user_agent =~ key
67
68
  value.each do |rule|
@@ -71,9 +72,9 @@ module SiteMapper
71
72
  end
72
73
  end
73
74
  end
74
-
75
+
75
76
  @allows.each do |key, value|
76
- unless allowed
77
+ unless allowed
77
78
  if user_agent =~ key
78
79
  value.each do |rule|
79
80
  if path =~ rule
@@ -93,7 +94,8 @@ module SiteMapper
93
94
  agent = to_regex(agent.downcase) if user_agent.is_a?(String)
94
95
  @delays[agent]
95
96
  end
96
-
97
+
98
+ # Return key/value paris with unknown meaning.
97
99
  # @return [Hash] key/value pairs from robots.txt
98
100
  def other_values
99
101
  @other
@@ -103,9 +105,11 @@ module SiteMapper
103
105
  def sitemaps
104
106
  @sitemaps
105
107
  end
106
-
108
+
107
109
  protected
108
-
110
+
111
+ # @return [Regex] regex from pattern
112
+ # @param [String] pattern to compile to Regex
109
113
  def to_regex(pattern)
110
114
  return /should-not-match-anything-123456789/ if pattern.strip.empty?
111
115
  pattern = Regexp.escape(pattern)
@@ -123,7 +127,7 @@ module SiteMapper
123
127
  @user_agent = user_agent
124
128
  @parsed = {}
125
129
  end
126
-
130
+
127
131
  # @param [String, URI] uri String or URI to check
128
132
  # @return [Boolean] true if uri is allowed to be crawled
129
133
  # @example Check if http://www.google.com/googlesites is allowed to be crawled
@@ -134,8 +138,6 @@ module SiteMapper
134
138
  host = uri.host
135
139
  @parsed[host] ||= ParsedRobots.new(@robots_txt, @user_agent)
136
140
  @parsed[host].allowed?(uri, @user_agent)
137
- rescue
138
- true
139
141
  end
140
142
 
141
143
  # @return [Array] array of sitemaps defined in robots.txt
@@ -146,10 +148,8 @@ module SiteMapper
146
148
  host = @hostname
147
149
  @parsed[host] ||= ParsedRobots.new(@robots_txt, @user_agent)
148
150
  @parsed[host].sitemaps
149
- rescue
150
- []
151
151
  end
152
-
152
+
153
153
  # @param [String, URI] uri String or URI get other_values from
154
154
  # @return [Hash] key/value pairs from robots.txt
155
155
  # @example Get other values for google.com
@@ -159,12 +159,10 @@ module SiteMapper
159
159
  host = @hostname
160
160
  @parsed[host] ||= ParsedRobots.new(@robots_txt, @user_agent)
161
161
  @parsed[host].other_values
162
- rescue
163
- {}
164
162
  end
165
163
 
166
- private
167
-
164
+ private
165
+
168
166
  def to_uri(uri)
169
167
  uri = URI.parse(uri.to_s) unless uri.is_a?(URI)
170
168
  uri
@@ -1,4 +1,4 @@
1
1
  module SiteMapper
2
2
  # Gem version
3
- VERSION = '0.0.10'
3
+ VERSION = '0.0.12'
4
4
  end
metadata CHANGED
@@ -1,125 +1,125 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: site_mapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jacob Burenstam
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-24 00:00:00.000000000 Z
11
+ date: 2015-04-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.6'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.6'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: url_resolver
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0.1'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ~>
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0.1'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: bundler
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ~>
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
47
  version: '1.3'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ~>
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.3'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rake
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ~>
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
61
  version: '10.3'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ~>
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '10.3'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rspec
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ~>
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
75
  version: '3.1'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ~>
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '3.1'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: yard
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ~>
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0.8'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ~>
94
+ - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0.8'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: redcarpet
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - ~>
101
+ - - "~>"
102
102
  - !ruby/object:Gem::Version
103
103
  version: '3.2'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ~>
108
+ - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '3.2'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: coveralls
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - ~>
115
+ - - "~>"
116
116
  - !ruby/object:Gem::Version
117
117
  version: '0.7'
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - ~>
122
+ - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0.7'
125
125
  description: Map all links on a given site.
@@ -131,13 +131,13 @@ extensions: []
131
131
  extra_rdoc_files: []
132
132
  files:
133
133
  - bin/site_mapper
134
+ - lib/site_mapper.rb
134
135
  - lib/site_mapper/crawl_url.rb
135
136
  - lib/site_mapper/crawler.rb
136
137
  - lib/site_mapper/logger.rb
137
138
  - lib/site_mapper/request.rb
138
139
  - lib/site_mapper/robots.rb
139
140
  - lib/site_mapper/version.rb
140
- - lib/site_mapper.rb
141
141
  homepage: https://github.com/buren/site_mapper
142
142
  licenses:
143
143
  - MIT
@@ -148,17 +148,17 @@ require_paths:
148
148
  - lib
149
149
  required_ruby_version: !ruby/object:Gem::Requirement
150
150
  requirements:
151
- - - '>='
151
+ - - ">="
152
152
  - !ruby/object:Gem::Version
153
153
  version: 1.9.3
154
154
  required_rubygems_version: !ruby/object:Gem::Requirement
155
155
  requirements:
156
- - - '>='
156
+ - - ">="
157
157
  - !ruby/object:Gem::Version
158
158
  version: '0'
159
159
  requirements: []
160
160
  rubyforge_project:
161
- rubygems_version: 2.0.0
161
+ rubygems_version: 2.2.2
162
162
  signing_key:
163
163
  specification_version: 4
164
164
  summary: Map all links on a given site.