wgit 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/wgit/crawler.rb +155 -66
- data/lib/wgit/database/database.rb +9 -8
- data/lib/wgit/database/model.rb +2 -2
- data/lib/wgit/document.rb +55 -62
- data/lib/wgit/document_extensions.rb +2 -2
- data/lib/wgit/indexer.rb +27 -15
- data/lib/wgit/response.rb +144 -0
- data/lib/wgit/url.rb +149 -85
- data/lib/wgit/utils.rb +6 -3
- data/lib/wgit/version.rb +7 -2
- metadata +3 -2
data/lib/wgit/utils.rb
CHANGED
@@ -166,13 +166,16 @@ module Wgit
|
|
166
166
|
end
|
167
167
|
|
168
168
|
# Processes a String to make it uniform. Strips any leading/trailing white
|
169
|
-
# space
|
169
|
+
# space. Also applies UTF-8 encoding (replacing invalid characters) if
|
170
|
+
# `encode: true`.
|
170
171
|
#
|
171
172
|
# @param str [String] The String to process. str is modified.
|
173
|
+
# @param encode [Boolean] Whether or not to encode to UTF-8 replacing
|
174
|
+
# invalid characters.
|
172
175
|
# @return [String] The processed str is both modified and then returned.
|
173
|
-
def self.process_str(str)
|
176
|
+
def self.process_str(str, encode: true)
|
174
177
|
if str.is_a?(String)
|
175
|
-
str.encode!('UTF-8',
|
178
|
+
str.encode!('UTF-8', undef: :replace, invalid: :replace) if encode
|
176
179
|
str.strip!
|
177
180
|
end
|
178
181
|
|
data/lib/wgit/version.rb
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
# Wgit is a WWW indexer/scraper which crawls URL's and retrieves their page
|
4
|
-
# contents for later use.
|
4
|
+
# contents for later use by serialisation.
|
5
5
|
# @author Michael Telford
|
6
6
|
module Wgit
|
7
7
|
# The current gem version of Wgit.
|
8
|
-
VERSION = '0.
|
8
|
+
VERSION = '0.5.0'
|
9
|
+
|
10
|
+
# Returns the current gem version of Wgit as a String.
|
11
|
+
def self.version
|
12
|
+
VERSION
|
13
|
+
end
|
9
14
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wgit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-11-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -209,6 +209,7 @@ files:
|
|
209
209
|
- "./lib/wgit/document_extensions.rb"
|
210
210
|
- "./lib/wgit/indexer.rb"
|
211
211
|
- "./lib/wgit/logger.rb"
|
212
|
+
- "./lib/wgit/response.rb"
|
212
213
|
- "./lib/wgit/url.rb"
|
213
214
|
- "./lib/wgit/utils.rb"
|
214
215
|
- "./lib/wgit/version.rb"
|