wgit 0.0.18 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/wgit.rb +0 -1
- data/lib/wgit/assertable.rb +20 -23
- data/lib/wgit/core_ext.rb +6 -14
- data/lib/wgit/crawler.rb +94 -183
- data/lib/wgit/database/database.rb +209 -185
- data/lib/wgit/database/model.rb +7 -7
- data/lib/wgit/document.rb +281 -241
- data/lib/wgit/indexer.rb +99 -92
- data/lib/wgit/logger.rb +5 -1
- data/lib/wgit/url.rb +171 -185
- data/lib/wgit/utils.rb +57 -68
- data/lib/wgit/version.rb +1 -1
- metadata +86 -60
- data/CHANGELOG.md +0 -61
- data/LICENSE.txt +0 -21
- data/README.md +0 -361
- data/TODO.txt +0 -34
- data/lib/wgit/database/connection_details.rb +0 -41
data/lib/wgit/utils.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Wgit
|
4
|
-
# Utility module containing generic methods.
|
4
|
+
# Utility module containing generic methods that don't belong to a Class.
|
5
5
|
module Utils
|
6
6
|
# Returns the current time stamp.
|
7
7
|
#
|
@@ -13,75 +13,70 @@ module Wgit
|
|
13
13
|
# Returns a Hash created from obj's instance vars and values.
|
14
14
|
#
|
15
15
|
# @param obj [Object] The object to process.
|
16
|
-
# @param ignore [Array<String>] Attributes to ignore.
|
17
|
-
# @param use_strings_as_keys [Boolean] Whether
|
18
|
-
#
|
16
|
+
# @param ignore [Array<String>] Attributes to ignore e.g. [':@html'].
|
17
|
+
# @param use_strings_as_keys [Boolean] Whether to use Strings or Symbols as
|
18
|
+
# keys.
|
19
19
|
# @return [Hash] A Hash created from obj's instance vars and values.
|
20
|
-
def self.to_h(obj, ignore
|
20
|
+
def self.to_h(obj, ignore: [], use_strings_as_keys: true)
|
21
21
|
hash = {}
|
22
|
+
|
22
23
|
obj.instance_variables.each do |var|
|
23
24
|
next if ignore.include?(var.to_s)
|
24
25
|
|
25
|
-
key = var.to_s[1..-1]
|
26
|
+
key = var.to_s[1..-1] # Remove the @ prefix.
|
26
27
|
key = key.to_sym unless use_strings_as_keys
|
27
28
|
hash[key] = obj.instance_variable_get(var)
|
28
29
|
end
|
29
|
-
hash
|
30
|
-
end
|
31
30
|
|
32
|
-
|
33
|
-
#
|
34
|
-
# @param model_hash [Hash] The model Hash to process.
|
35
|
-
# @return [Hash] The model Hash with non bson types removed.
|
36
|
-
def self.remove_non_bson_types(model_hash)
|
37
|
-
model_hash.select do |_k, v|
|
38
|
-
v.respond_to? :bson_type
|
39
|
-
end
|
31
|
+
hash
|
40
32
|
end
|
41
33
|
|
42
|
-
# An improved :each method which
|
34
|
+
# An improved :each method which supports both singleton and Enumerable
|
43
35
|
# objects (as opposed to just an Enumerable object).
|
44
36
|
#
|
45
|
-
# @yield [el] Gives each element of obj_or_objects if it's
|
46
|
-
#
|
37
|
+
# @yield [el] Gives each element (Object) of obj_or_objects if it's
|
38
|
+
# Enumerable, otherwise obj_or_objs itself is given.
|
39
|
+
# @return [Object] The obj_or_objs parameter is returned.
|
47
40
|
def self.each(obj_or_objs)
|
48
41
|
if obj_or_objs.respond_to?(:each)
|
49
42
|
obj_or_objs.each { |obj| yield(obj) }
|
50
43
|
else
|
51
44
|
yield(obj_or_objs)
|
52
45
|
end
|
46
|
+
|
47
|
+
obj_or_objs
|
53
48
|
end
|
54
49
|
|
55
50
|
# Formats the sentence (modifies the receiver) and returns its value.
|
56
51
|
# The formatting is essentially to shorten the sentence and ensure that
|
57
52
|
# the index is present somewhere in the sentence. Used for search query
|
58
|
-
# results.
|
53
|
+
# results with the index of the matching query.
|
59
54
|
#
|
60
55
|
# @param sentence [String] The sentence to be formatted.
|
61
56
|
# @param index [Integer] The first index of a word in sentence. This is
|
62
|
-
#
|
57
|
+
# usually a word in a search query.
|
63
58
|
# @param sentence_limit [Integer] The max length of the formatted sentence
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
59
|
+
# being returned. The length will be decided by the sentence_limit
|
60
|
+
# parameter or the full length of the original sentence, which ever
|
61
|
+
# is less. The full sentence is returned if the sentence_limit is 0.
|
67
62
|
# @return [String] The sentence once formatted.
|
68
63
|
def self.format_sentence_length(sentence, index, sentence_limit)
|
69
64
|
raise 'A sentence value must be provided' if sentence.empty?
|
70
65
|
raise 'The sentence length value must be even' if sentence_limit.odd?
|
71
|
-
if
|
66
|
+
if index.negative? || (index > sentence.length)
|
72
67
|
raise "Incorrect index value: #{index}"
|
73
68
|
end
|
74
69
|
|
75
|
-
return sentence if sentence_limit
|
70
|
+
return sentence if sentence_limit.zero?
|
76
71
|
|
77
|
-
start
|
72
|
+
start = 0
|
78
73
|
finish = sentence.length
|
79
74
|
|
80
75
|
if sentence.length > sentence_limit
|
81
|
-
start
|
76
|
+
start = index - (sentence_limit / 2)
|
82
77
|
finish = index + (sentence_limit / 2)
|
83
78
|
|
84
|
-
if start
|
79
|
+
if start.negative?
|
85
80
|
diff = 0 - start
|
86
81
|
if (finish + diff) > sentence.length
|
87
82
|
finish = sentence.length
|
@@ -91,7 +86,7 @@ module Wgit
|
|
91
86
|
start = 0
|
92
87
|
elsif finish > sentence.length
|
93
88
|
diff = finish - sentence.length
|
94
|
-
if (start - diff)
|
89
|
+
if (start - diff).negative?
|
95
90
|
start = 0
|
96
91
|
else
|
97
92
|
start -= diff
|
@@ -106,54 +101,38 @@ module Wgit
|
|
106
101
|
end
|
107
102
|
|
108
103
|
# Prints out the search results in a search engine like format.
|
109
|
-
# Most of the params are passed to Wgit::Document#search; see the docs.
|
110
104
|
# The format for each result looks like:
|
111
105
|
#
|
112
106
|
# Title
|
113
107
|
#
|
114
108
|
# Keywords (if there are some)
|
115
109
|
#
|
116
|
-
# Text Snippet (
|
110
|
+
# Text Snippet (formatted to show the searched for query, if provided)
|
117
111
|
#
|
118
112
|
# URL
|
119
113
|
#
|
120
114
|
# <empty_line_seperator>
|
121
115
|
#
|
122
|
-
# @param results [Array<Wgit::Document>]
|
123
|
-
#
|
124
|
-
#
|
125
|
-
# @param
|
126
|
-
#
|
127
|
-
# @param
|
128
|
-
#
|
129
|
-
# @
|
130
|
-
|
131
|
-
|
132
|
-
# to output text somewhere e.g. STDOUT (the default).
|
133
|
-
# @return [nil]
|
134
|
-
def self.printf_search_results(results, query = nil, _case_sensitive = false,
|
135
|
-
sentence_length = 80, keyword_count = 5,
|
136
|
-
stream = Kernel)
|
137
|
-
raise 'stream must respond_to? :puts' unless stream.respond_to? :puts
|
138
|
-
|
139
|
-
keyword_count -= 1 # Because Array's are zero indexed.
|
116
|
+
# @param results [Array<Wgit::Document>] Array of Wgit::Document's which
|
117
|
+
# each have had #search!(query) called (to update it's @text with the
|
118
|
+
# the search results). The first @text sentence gets printed.
|
119
|
+
# @param keyword_limit [Integer] The max amount of keywords to be
|
120
|
+
# outputted to the stream.
|
121
|
+
# @param stream [#puts] Any object that respond_to?(:puts). It is used
|
122
|
+
# to output text somewhere e.g. a file or STDOUT.
|
123
|
+
# @return [NilClass] Returns nil.
|
124
|
+
def self.printf_search_results(results, keyword_limit: 5, stream: STDOUT)
|
125
|
+
raise 'stream must respond_to? :puts' unless stream.respond_to?(:puts)
|
140
126
|
|
141
127
|
results.each do |doc|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
end
|
151
|
-
end
|
152
|
-
stream.puts doc.title
|
153
|
-
unless doc.keywords.nil? || doc.keywords.empty?
|
154
|
-
stream.puts doc.keywords[0..keyword_count].join(', ')
|
155
|
-
end
|
156
|
-
stream.puts sentence unless sentence.nil?
|
128
|
+
title = (doc.title || '<no title>')
|
129
|
+
missing_keywords = (doc.keywords.nil? || doc.keywords.empty?)
|
130
|
+
keywords = missing_keywords ? nil : doc.keywords.take(keyword_limit)
|
131
|
+
sentence = doc.text.first
|
132
|
+
|
133
|
+
stream.puts title
|
134
|
+
stream.puts keywords.join(', ') if keywords
|
135
|
+
stream.puts sentence if sentence
|
157
136
|
stream.puts doc.url
|
158
137
|
stream.puts
|
159
138
|
end
|
@@ -161,8 +140,8 @@ module Wgit
|
|
161
140
|
nil
|
162
141
|
end
|
163
142
|
|
164
|
-
# Processes a String to make it uniform. Strips
|
165
|
-
#
|
143
|
+
# Processes a String to make it uniform. Strips any leading/trailing white
|
144
|
+
# space and converts to UTF-8.
|
166
145
|
#
|
167
146
|
# @param str [String] The String to process. str is modified.
|
168
147
|
# @return [String] The processed str is both modified and then returned.
|
@@ -171,11 +150,12 @@ module Wgit
|
|
171
150
|
str.encode!('UTF-8', 'UTF-8', invalid: :replace)
|
172
151
|
str.strip!
|
173
152
|
end
|
153
|
+
|
174
154
|
str
|
175
155
|
end
|
176
156
|
|
177
157
|
# Processes an Array to make it uniform. Removes empty Strings and nils,
|
178
|
-
# processes non empty Strings using Wgit::Utils.process_str
|
158
|
+
# processes non empty Strings using Wgit::Utils.process_str and removes
|
179
159
|
# duplicates.
|
180
160
|
#
|
181
161
|
# @param arr [Enumerable] The Array to process. arr is modified.
|
@@ -187,7 +167,16 @@ module Wgit
|
|
187
167
|
arr.compact!
|
188
168
|
arr.uniq!
|
189
169
|
end
|
170
|
+
|
190
171
|
arr
|
191
172
|
end
|
173
|
+
|
174
|
+
# Returns the model having removed non bson types (for use with MongoDB).
|
175
|
+
#
|
176
|
+
# @param model_hash [Hash] The model Hash to process.
|
177
|
+
# @return [Hash] The model Hash with non bson types removed.
|
178
|
+
def self.remove_non_bson_types(model_hash)
|
179
|
+
model_hash.select { |_k, v| v.respond_to?(:bson_type) }
|
180
|
+
end
|
192
181
|
end
|
193
182
|
end
|
data/lib/wgit/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wgit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-09-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: addressable
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.6.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.6.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: mongo
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.9.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 2.9.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.10.3
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.10.3
|
13
55
|
- !ruby/object:Gem::Dependency
|
14
56
|
name: byebug
|
15
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -53,19 +95,33 @@ dependencies:
|
|
53
95
|
- !ruby/object:Gem::Version
|
54
96
|
version: '1.3'
|
55
97
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
98
|
+
name: inch
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0.8'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0.8'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: maxitest
|
57
113
|
requirement: !ruby/object:Gem::Requirement
|
58
114
|
requirements:
|
59
115
|
- - "~>"
|
60
116
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
117
|
+
version: '3.3'
|
62
118
|
type: :development
|
63
119
|
prerelease: false
|
64
120
|
version_requirements: !ruby/object:Gem::Requirement
|
65
121
|
requirements:
|
66
122
|
- - "~>"
|
67
123
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
124
|
+
version: '3.3'
|
69
125
|
- !ruby/object:Gem::Dependency
|
70
126
|
name: pry
|
71
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,6 +150,20 @@ dependencies:
|
|
94
150
|
- - "~>"
|
95
151
|
- !ruby/object:Gem::Version
|
96
152
|
version: '12.3'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: rubocop
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0.74'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0.74'
|
97
167
|
- !ruby/object:Gem::Dependency
|
98
168
|
name: webmock
|
99
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -128,55 +198,16 @@ dependencies:
|
|
128
198
|
- - "<"
|
129
199
|
- !ruby/object:Gem::Version
|
130
200
|
version: '1.0'
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
requirements:
|
142
|
-
- - "~>"
|
143
|
-
- !ruby/object:Gem::Version
|
144
|
-
version: 2.6.0
|
145
|
-
- !ruby/object:Gem::Dependency
|
146
|
-
name: mongo
|
147
|
-
requirement: !ruby/object:Gem::Requirement
|
148
|
-
requirements:
|
149
|
-
- - "~>"
|
150
|
-
- !ruby/object:Gem::Version
|
151
|
-
version: 2.9.0
|
152
|
-
type: :runtime
|
153
|
-
prerelease: false
|
154
|
-
version_requirements: !ruby/object:Gem::Requirement
|
155
|
-
requirements:
|
156
|
-
- - "~>"
|
157
|
-
- !ruby/object:Gem::Version
|
158
|
-
version: 2.9.0
|
159
|
-
- !ruby/object:Gem::Dependency
|
160
|
-
name: nokogiri
|
161
|
-
requirement: !ruby/object:Gem::Requirement
|
162
|
-
requirements:
|
163
|
-
- - "~>"
|
164
|
-
- !ruby/object:Gem::Version
|
165
|
-
version: 1.10.3
|
166
|
-
type: :runtime
|
167
|
-
prerelease: false
|
168
|
-
version_requirements: !ruby/object:Gem::Requirement
|
169
|
-
requirements:
|
170
|
-
- - "~>"
|
171
|
-
- !ruby/object:Gem::Version
|
172
|
-
version: 1.10.3
|
173
|
-
description: Fundamentally, Wgit is a WWW indexer/scraper which crawls URL's, retrieves
|
174
|
-
and serialises their page contents for later use. You can use Wgit to copy entire
|
175
|
-
websites if required. Wgit also provides a means to search indexed documents stored
|
176
|
-
in a database. Therefore, this library provides the main components of a WWW search
|
177
|
-
engine. The Wgit API is easily extended allowing you to pull out the parts of a
|
178
|
-
webpage that are important to you, the code snippets or tables for example. As Wgit
|
179
|
-
is a library, it has uses in many different application types.
|
201
|
+
description: 'Fundamentally, Wgit is a HTTP indexer/scraper which crawls URL''s to
|
202
|
+
retrieve and serialise their page contents for later use. You can use Wgit to copy
|
203
|
+
entire websites if required. Wgit also provides a means to search indexed documents
|
204
|
+
stored in a database. Therefore, this library provides the main components of a
|
205
|
+
WWW search engine. The Wgit API is easily extended allowing you to pull out the
|
206
|
+
parts of a webpage that are important to you, the code snippets or tables for example.
|
207
|
+
As Wgit is a library, it supports many different use cases including data mining,
|
208
|
+
analytics, web indexing and URL parsing to name a few.
|
209
|
+
|
210
|
+
'
|
180
211
|
email: michael.telford@live.com
|
181
212
|
executables: []
|
182
213
|
extensions: []
|
@@ -186,7 +217,6 @@ files:
|
|
186
217
|
- "./lib/wgit/assertable.rb"
|
187
218
|
- "./lib/wgit/core_ext.rb"
|
188
219
|
- "./lib/wgit/crawler.rb"
|
189
|
-
- "./lib/wgit/database/connection_details.rb"
|
190
220
|
- "./lib/wgit/database/database.rb"
|
191
221
|
- "./lib/wgit/database/model.rb"
|
192
222
|
- "./lib/wgit/document.rb"
|
@@ -196,10 +226,6 @@ files:
|
|
196
226
|
- "./lib/wgit/url.rb"
|
197
227
|
- "./lib/wgit/utils.rb"
|
198
228
|
- "./lib/wgit/version.rb"
|
199
|
-
- CHANGELOG.md
|
200
|
-
- LICENSE.txt
|
201
|
-
- README.md
|
202
|
-
- TODO.txt
|
203
229
|
homepage: https://github.com/michaeltelford/wgit
|
204
230
|
licenses:
|
205
231
|
- MIT
|
@@ -227,5 +253,5 @@ rubygems_version: 2.7.6
|
|
227
253
|
signing_key:
|
228
254
|
specification_version: 4
|
229
255
|
summary: Wgit is a Ruby gem similar in nature to GNU's `wget` tool. It provides an
|
230
|
-
easy to use API for programmatic
|
256
|
+
easy to use API for programmatic URL parsing, HTML indexing and searching.
|
231
257
|
test_files: []
|
data/CHANGELOG.md
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
# Wgit Change Log
|
2
|
-
|
3
|
-
## v0.0.0 (TEMPLATE - DO NOT EDIT)
|
4
|
-
### Added
|
5
|
-
- ...
|
6
|
-
### Changed/Removed
|
7
|
-
- ...
|
8
|
-
### Fixed
|
9
|
-
- ...
|
10
|
-
---
|
11
|
-
|
12
|
-
## v0.0.18
|
13
|
-
### Added
|
14
|
-
- `Wgit::Url#to_brand` method and updated `Wgit::Url#is_relative?` to support it.
|
15
|
-
### Changed/Removed
|
16
|
-
- Updated the documentation by changing some `private` methods to `protected`. These methods are now documented (on rubydocs) as a result.
|
17
|
-
### Fixed
|
18
|
-
- ...
|
19
|
-
---
|
20
|
-
|
21
|
-
## v0.0.17
|
22
|
-
### Added
|
23
|
-
- Support for `<base>` element in `Wgit::Document`'s.
|
24
|
-
- New `Wgit::Url` methods: `without_query_string`, `is_query_string?`, `is_anchor?`, `replace` (override of `String#replace`).
|
25
|
-
### Changed/Removed
|
26
|
-
- Breaking changes: Removed `Wgit::Document#internal_links_without_anchors` method.
|
27
|
-
- Breaking changes (potentially): `Wgit::Url`'s are now replaced with the redirected to Url during a crawl.
|
28
|
-
- Updated `Wgit::Document#base_url` to support an optional `link:` named parameter.
|
29
|
-
- Updated `Wgit::Crawler#crawl_site` to allow the initial url to redirect to another host.
|
30
|
-
- Updated `Wgit::Url#is_relative?` to support an optional `domain:` named parameter.
|
31
|
-
### Fixed
|
32
|
-
- Bug in `Wgit::Document#internal_full_links` affecting anchor and query string links including those used during `Wgit::Crawler#crawl_site`.
|
33
|
-
- Bug causing an 'Invalid URL' error for `Wgit::Crawler#crawl_site`.
|
34
|
-
---
|
35
|
-
|
36
|
-
## v0.0.16
|
37
|
-
### Added
|
38
|
-
- Added `Url.parse` class method as alias for `Url.new`.
|
39
|
-
### Changed/Removed
|
40
|
-
- Breaking changes: Removed `Wgit::Url.relative_link?` (class method). Use `Wgit::Url#is_relative?` (instance method) instead e.g. `Wgit::Url.new('/blah').is_relative?`.
|
41
|
-
### Fixed
|
42
|
-
- Several URI related bugs in `Wgit::Url` affecting crawls.
|
43
|
-
---
|
44
|
-
|
45
|
-
## v0.0.15
|
46
|
-
### Added
|
47
|
-
- Support for IRI's (non ASCII based URL's).
|
48
|
-
### Changed/Removed
|
49
|
-
- Breaking changes: Removed `Document` and `Url#to_hash` aliases. Call `to_h` instead.
|
50
|
-
### Fixed
|
51
|
-
- Bug in `Crawler#crawl_site` where an internal redirect to an external site's page was being followed.
|
52
|
-
---
|
53
|
-
|
54
|
-
## v0.0.14
|
55
|
-
### Added
|
56
|
-
- `Indexer#index_this_page` method.
|
57
|
-
### Changed/Removed
|
58
|
-
- Breaking Changes: `Wgit::CONNECTION_DETAILS` now only requires `DB_CONNECTION_STRING`.
|
59
|
-
### Fixed
|
60
|
-
- Found and fixed a bug in `Document#new`.
|
61
|
-
---
|