wgit 0.10.8 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/wgit/utils.rb CHANGED
@@ -18,17 +18,15 @@ module Wgit
18
18
  # keys.
19
19
  # @return [Hash] A Hash created from obj's instance vars and values.
20
20
  def self.to_h(obj, ignore: [], use_strings_as_keys: true)
21
- hash = {}
21
+ obj.instance_variables.reduce({}) do |hash, var|
22
+ next hash if ignore.include?(var.to_s)
22
23
 
23
- obj.instance_variables.each do |var|
24
- next if ignore.include?(var.to_s)
25
-
26
- key = var.to_s[1..-1] # Remove the @ prefix.
24
+ key = var.to_s[1..] # Remove the @ prefix.
27
25
  key = key.to_sym unless use_strings_as_keys
28
26
  hash[key] = obj.instance_variable_get(var)
29
- end
30
27
 
31
- hash
28
+ hash
29
+ end
32
30
  end
33
31
 
34
32
  # An improved :each method which supports both singleton and Enumerable
@@ -37,9 +35,9 @@ module Wgit
37
35
  # @yield [el] Gives each element (Object) of obj_or_objects if it's
38
36
  # Enumerable, otherwise obj_or_objs itself is given.
39
37
  # @return [Object] The obj_or_objs parameter is returned.
40
- def self.each(obj_or_objs)
38
+ def self.each(obj_or_objs, &block)
41
39
  if obj_or_objs.respond_to?(:each)
42
- obj_or_objs.each { |obj| yield(obj) }
40
+ obj_or_objs.each(&block)
43
41
  else
44
42
  yield(obj_or_objs)
45
43
  end
@@ -127,96 +125,261 @@ module Wgit
127
125
  end
128
126
 
129
127
  # Prints out the search results in a search engine like format.
130
- # The format for each result looks like:
131
- #
132
- # Title
133
128
  #
134
- # Keywords (if there are some)
129
+ # The given results should be matching documents from a DB and should have
130
+ # `doc.search_text!` called for each document - to turn doc.text into only
131
+ # matching text, which this method uses.
135
132
  #
136
- # Text Snippet (formatted to show the searched for query, if provided)
133
+ # The format for each result looks something like:
137
134
  #
135
+ # ```
136
+ # Title
137
+ # Keywords (if there are some)
138
+ # Text Snippet (formatted to show the searched for query)
138
139
  # URL
139
- #
140
+ # Score (if include_score: true)
140
141
  # <empty_line_seperator>
142
+ # ```
141
143
  #
142
144
  # @param results [Array<Wgit::Document>] Array of Wgit::Document's which
143
145
  # each have had #search!(query) called (to update it's @text with the
144
146
  # the search results). The first @text sentence gets printed.
145
147
  # @param keyword_limit [Integer] The max amount of keywords to be
146
148
  # outputted to the stream.
149
+ # @param include_score [Boolean] Wether or not to puts the document score.
147
150
  # @param stream [#puts] Any object that respond_to?(:puts). It is used
148
151
  # to output text somewhere e.g. a file or STDERR.
149
152
  # @return [Integer] The number of results.
150
- def self.printf_search_results(results, keyword_limit: 5, stream: STDOUT)
153
+ def self.pprint_top_search_results(
154
+ results, keyword_limit: 5, include_score: false, stream: $stdout
155
+ )
151
156
  raise 'stream must respond_to? :puts' unless stream.respond_to?(:puts)
152
157
 
153
158
  results.each do |doc|
154
- title = (doc.title || '<no title>')
159
+ title = doc.title || '<no title>'
155
160
  keywords = doc.keywords&.take(keyword_limit)&.join(', ')
156
161
  sentence = doc.text.first
157
162
  url = doc.url
163
+ score = doc.score
158
164
 
159
165
  stream.puts title
160
166
  stream.puts keywords if keywords
161
167
  stream.puts sentence
162
168
  stream.puts url
169
+ stream.puts score if include_score
163
170
  stream.puts
164
171
  end
165
172
 
166
173
  results.size
167
174
  end
168
175
 
176
+ # Prints out the search results listing all of the matching text in each
177
+ # document.
178
+ #
179
+ # The given results should be matching documents from a DB and should have
180
+ # `doc.search_text!` called for each document - to turn doc.text into only
181
+ # matching text, which this method uses.
182
+ #
183
+ # The format for each result looks something like:
184
+ #
185
+ # ```
186
+ # Title
187
+ # Keywords (if there are some)
188
+ # URL
189
+ # Score (if include_score: true)
190
+ #
191
+ # "<text_snippet_1>"
192
+ # "<text_snippet_2>"
193
+ # ...
194
+ #
195
+ # <seperator>
196
+ # ```
197
+ #
198
+ # @param results [Array<Wgit::Document>] Array of Wgit::Document's which
199
+ # each have had #search!(query) called (to update it's @text with the
200
+ # the search results). The first @text sentence gets printed.
201
+ # @param keyword_limit [Integer] The max amount of keywords to be
202
+ # outputted to the stream.
203
+ # @param include_score [Boolean] Wether or not to puts the document score.
204
+ # @param stream [#puts] Any object that respond_to?(:puts). It is used
205
+ # to output text somewhere e.g. a file or STDERR.
206
+ # @return [Integer] The number of results.
207
+ def self.pprint_all_search_results(
208
+ results, keyword_limit: 5, include_score: false, stream: $stdout
209
+ )
210
+ raise 'stream must respond_to? :puts' unless stream.respond_to?(:puts)
211
+
212
+ results.each_with_index do |doc, i|
213
+ last_result = i == (results.size-1)
214
+
215
+ title = doc.title || '<no title>'
216
+ keywords = doc.keywords&.take(keyword_limit)&.join(', ')
217
+ url = doc.url
218
+ score = doc.score
219
+
220
+ stream.puts title
221
+ stream.puts keywords if keywords
222
+ stream.puts url
223
+ stream.puts score if include_score
224
+ stream.puts
225
+ doc.text.each { |text| stream.puts text }
226
+ stream.puts
227
+ stream.puts "-----" unless last_result
228
+ stream.puts unless last_result
229
+ end
230
+
231
+ results.size
232
+ end
233
+
169
234
  # Sanitises the obj to make it uniform by calling the correct sanitize_*
170
- # method for its type e.g. if obj.is_a? String then sanitize(obj). Any type
171
- # not in the case statement will be ignored and returned as is.
235
+ # method for its type e.g. if obj.is_a? String then sanitize_str(obj) is called.
236
+ # Any type not in the case statement will be ignored and returned as is.
237
+ # Call this method if unsure what obj's type is.
172
238
  #
173
239
  # @param obj [Object] The object to be sanitized.
174
240
  # @param encode [Boolean] Whether or not to encode to UTF-8 replacing
175
241
  # invalid characters.
176
- # @return [Object] The sanitized obj is both modified and then returned.
242
+ # @return [Object] The sanitized obj.
177
243
  def self.sanitize(obj, encode: true)
178
244
  case obj
245
+ when Wgit::Url
246
+ sanitize_url(obj, encode:)
179
247
  when String
180
- sanitize_str(obj, encode: encode)
248
+ sanitize_str(obj, encode:)
181
249
  when Array
182
- sanitize_arr(obj, encode: encode)
250
+ sanitize_arr(obj, encode:)
183
251
  else
184
252
  obj
185
253
  end
186
254
  end
187
255
 
256
+ # Sanitises a Wgit::Url to make it uniform. First sanitizes the Url as a
257
+ # String before replacing the Url value with the sanitized version. This
258
+ # method therefore modifies the given url param and also returns it.
259
+ #
260
+ # @param url [Wgit::Url] The Wgit::Url to sanitize. url is modified.
261
+ # @param encode [Boolean] Whether or not to encode to UTF-8 replacing
262
+ # invalid characters.
263
+ # @return [Wgit::Url] The sanitized url, which is also modified.
264
+ def self.sanitize_url(url, encode: true)
265
+ str = sanitize_str(url.to_s, encode:)
266
+ url.replace(str)
267
+ end
268
+
188
269
  # Sanitises a String to make it uniform. Strips any leading/trailing white
189
270
  # space. Also applies UTF-8 encoding (replacing invalid characters) if
190
271
  # `encode: true`.
191
272
  #
192
- # @param str [String] The String to sanitize. str is modified.
273
+ # @param str [String] The String to sanitize. str is not modified.
193
274
  # @param encode [Boolean] Whether or not to encode to UTF-8 replacing
194
275
  # invalid characters.
195
- # @return [String] The sanitized str is both modified and then returned.
276
+ # @return [String] The sanitized str.
196
277
  def self.sanitize_str(str, encode: true)
197
- if str.is_a?(String)
198
- str.encode!('UTF-8', undef: :replace, invalid: :replace) if encode
199
- str.strip!
200
- end
278
+ return str unless str.is_a?(String)
201
279
 
202
- str
280
+ str = str.encode('UTF-8', undef: :replace, invalid: :replace) if encode
281
+ str.strip
203
282
  end
204
283
 
205
284
  # Sanitises an Array to make it uniform. Removes empty Strings and nils,
206
285
  # processes non empty Strings using Wgit::Utils.sanitize and removes
207
286
  # duplicates.
208
287
  #
209
- # @param arr [Enumerable] The Array to sanitize. arr is modified.
210
- # @return [Enumerable] The sanitized arr is both modified and then returned.
288
+ # @param arr [Enumerable] The Array to sanitize. arr is not modified.
289
+ # @return [Enumerable] The sanitized arr.
211
290
  def self.sanitize_arr(arr, encode: true)
212
- if arr.is_a?(Array)
213
- arr.map! { |str| sanitize(str, encode: encode) }
214
- arr.reject! { |str| str.is_a?(String) ? str.empty? : false }
215
- arr.compact!
216
- arr.uniq!
217
- end
291
+ return arr unless arr.is_a?(Array)
218
292
 
219
293
  arr
294
+ .map { |str| sanitize(str, encode:) }
295
+ .reject { |str| str.is_a?(String) && str.empty? }
296
+ .compact
297
+ .uniq
298
+ end
299
+
300
+ # Build a regular expression from a query string, for searching text with.
301
+ #
302
+ # All searches using this regex are always whole word based while whole
303
+ # sentence searches are configurable using the whole_sentence: param. For
304
+ # example:
305
+ #
306
+ # ```
307
+ # text = "hello world"
308
+ # query = "world hello", whole_sentence: true # => No match
309
+ # query = "world hello", whole_sentence: false # => Match
310
+ # query = "he" # => Never matches
311
+ # ```
312
+ #
313
+ # @param query [String, Regexp] The query string to build a regex from.
314
+ # @param case_sensitive [Boolean] Whether character case must match.
315
+ # @param whole_sentence [Boolean] Whether multiple words should be searched
316
+ # for separately, matching in any order.
317
+ # @return [Regexp] The regex with which to search text.
318
+ def self.build_search_regex(
319
+ query, case_sensitive: false, whole_sentence: true
320
+ )
321
+ return query if query.is_a?(Regexp)
322
+
323
+ # query: "hello world", whole_sentence: false produces:
324
+ # (?<=^|\s|[^a-zA-Z0-9])hello(?=$|\s|[^a-zA-Z0-9])|(?<=^|\s|[^a-zA-Z0-9])world(?=$|\s|[^a-zA-Z0-9])
325
+
326
+ sep = whole_sentence ? " " : "|"
327
+ segs = query.split(" ").map do |word|
328
+ word = Regexp.escape(word)
329
+ "(?<=^|\\s|[^a-zA-Z0-9])#{word}(?=$|\\s|[^a-zA-Z0-9])"
330
+ end
331
+ query = segs.join(sep)
332
+
333
+ Regexp.new(query, !case_sensitive)
334
+ end
335
+
336
+ # Pretty prints a log statement, used for debugging purposes.
337
+ #
338
+ # Use like:
339
+ #
340
+ # ```
341
+ # Wgit::Utils.pprint 1, include_html: include_html, ignore: ignore_vars
342
+ # ```
343
+ #
344
+ # Which produces a log like:
345
+ #
346
+ # ```
347
+ # DEBUG_1 - include_html: true | ignore: ['@html', '@parser']
348
+ # ```
349
+ #
350
+ # @param identifier [#to_s] A log identifier e.g. "START" or 1 etc.
351
+ # @param display [Boolean] Setting as false will cause a noop, useful for
352
+ # switching off several/all pprint statements at once e.g. via ENV var.
353
+ # @param stream [#puts] Any object that respond_to? :puts and :print. It is
354
+ # used to output the log text somewhere e.g. a file or STDERR.
355
+ # @param prefix [String] The log prefix, useful for visibility/greping.
356
+ # @param new_line [Boolean] Wether or not to use a new line (\n) as the
357
+ # separator.
358
+ # @param vars [Hash<#inspect, #inspect>] The vars to inspect in the log.
359
+ def self.pprint(identifier, display: true, stream: $stdout, prefix: 'DEBUG', new_line: false, **vars)
360
+ return unless display
361
+
362
+ sep1 = new_line ? "\n" : ' - '
363
+ sep1 = '' if vars.empty?
364
+ sep2 = new_line ? "\n" : ' | '
365
+
366
+ stream.print "\n#{prefix}_#{identifier}#{sep1}"
367
+
368
+ vars.each_with_index do |arr, i|
369
+ is_last_item = (i + 1) == vars.size
370
+ sep3 = sep2
371
+ sep3 = new_line ? "\n" : '' if is_last_item
372
+ k, v = arr
373
+
374
+ stream.print "#{k}: #{v.inspect}#{sep3}"
375
+ end
376
+
377
+ stream.puts "\n"
378
+ stream.puts "\n" unless new_line
379
+
380
+ nil
220
381
  end
382
+
383
+ self.singleton_class.alias_method :pprint_search_results, :pprint_top_search_results
221
384
  end
222
385
  end
data/lib/wgit/version.rb CHANGED
@@ -6,7 +6,7 @@
6
6
  # @author Michael Telford
7
7
  module Wgit
8
8
  # The current gem version of Wgit.
9
- VERSION = '0.10.8'
9
+ VERSION = "0.12.0"
10
10
 
11
11
  # Returns the current gem version of Wgit as a String.
12
12
  def self.version
data/lib/wgit.rb CHANGED
@@ -1,16 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'wgit/version'
4
- require_relative 'wgit/logger'
5
- require_relative 'wgit/assertable'
6
- require_relative 'wgit/utils'
7
- require_relative 'wgit/url'
8
- require_relative 'wgit/document'
9
- require_relative 'wgit/document_extractors'
10
- require_relative 'wgit/crawler'
11
- require_relative 'wgit/database/model'
12
- require_relative 'wgit/database/database'
13
- require_relative 'wgit/indexer'
14
- require_relative 'wgit/dsl'
15
- require_relative 'wgit/base'
3
+ require_relative "wgit/version"
4
+ require_relative "wgit/logger"
5
+ require_relative "wgit/assertable"
6
+ require_relative "wgit/utils"
7
+ require_relative "wgit/url"
8
+ require_relative "wgit/html_to_text"
9
+ require_relative "wgit/document"
10
+ require_relative "wgit/document_extractors"
11
+ require_relative "wgit/crawler"
12
+ require_relative "wgit/model"
13
+ require_relative "wgit/database/database"
14
+ require_relative "wgit/database/database_adapter"
15
+ require_relative "wgit/database/adapters/mongo_db"
16
+ require_relative "wgit/database/adapters/in_memory"
17
+ require_relative "wgit/robots_parser"
18
+ require_relative "wgit/indexer"
19
+ require_relative "wgit/dsl"
20
+ require_relative "wgit/base"
16
21
  # require_relative 'wgit/core_ext' - Must be explicitly required.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wgit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.8
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Telford
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-18 00:00:00.000000000 Z
11
+ date: 2024-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -16,188 +16,196 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.6'
19
+ version: '2.8'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.6'
26
+ version: '2.8'
27
27
  - !ruby/object:Gem::Dependency
28
- name: mongo
28
+ name: base64
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.9'
33
+ version: '0.2'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.9'
40
+ version: '0.2'
41
41
  - !ruby/object:Gem::Dependency
42
- name: nokogiri
42
+ name: ferrum
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.10'
47
+ version: '0.14'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1.10'
54
+ version: '0.14'
55
55
  - !ruby/object:Gem::Dependency
56
- name: typhoeus
56
+ name: mongo
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1.3'
61
+ version: '2.19'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1.3'
68
+ version: '2.19'
69
69
  - !ruby/object:Gem::Dependency
70
- name: ferrum
70
+ name: nokogiri
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '0.8'
75
+ version: '1.15'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '0.8'
82
+ version: '1.15'
83
+ - !ruby/object:Gem::Dependency
84
+ name: typhoeus
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.4'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.4'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: byebug
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
101
  - - "~>"
88
102
  - !ruby/object:Gem::Version
89
- version: '10.0'
103
+ version: '11.1'
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
108
  - - "~>"
95
109
  - !ruby/object:Gem::Version
96
- version: '10.0'
110
+ version: '11.1'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: dotenv
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
115
  - - "~>"
102
116
  - !ruby/object:Gem::Version
103
- version: '2.5'
117
+ version: '2.8'
104
118
  type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
122
  - - "~>"
109
123
  - !ruby/object:Gem::Version
110
- version: '2.5'
124
+ version: '2.8'
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: maxitest
113
127
  requirement: !ruby/object:Gem::Requirement
114
128
  requirements:
115
129
  - - "~>"
116
130
  - !ruby/object:Gem::Version
117
- version: '3.3'
131
+ version: '5.4'
118
132
  type: :development
119
133
  prerelease: false
120
134
  version_requirements: !ruby/object:Gem::Requirement
121
135
  requirements:
122
136
  - - "~>"
123
137
  - !ruby/object:Gem::Version
124
- version: '3.3'
138
+ version: '5.4'
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: pry
127
141
  requirement: !ruby/object:Gem::Requirement
128
142
  requirements:
129
143
  - - "~>"
130
144
  - !ruby/object:Gem::Version
131
- version: '0.12'
145
+ version: '0.14'
132
146
  type: :development
133
147
  prerelease: false
134
148
  version_requirements: !ruby/object:Gem::Requirement
135
149
  requirements:
136
150
  - - "~>"
137
151
  - !ruby/object:Gem::Version
138
- version: '0.12'
152
+ version: '0.14'
139
153
  - !ruby/object:Gem::Dependency
140
154
  name: rubocop
141
155
  requirement: !ruby/object:Gem::Requirement
142
156
  requirements:
143
157
  - - "~>"
144
158
  - !ruby/object:Gem::Version
145
- version: '0.74'
159
+ version: '1.57'
146
160
  type: :development
147
161
  prerelease: false
148
162
  version_requirements: !ruby/object:Gem::Requirement
149
163
  requirements:
150
164
  - - "~>"
151
165
  - !ruby/object:Gem::Version
152
- version: '0.74'
166
+ version: '1.57'
153
167
  - !ruby/object:Gem::Dependency
154
168
  name: toys
155
169
  requirement: !ruby/object:Gem::Requirement
156
170
  requirements:
157
171
  - - "~>"
158
172
  - !ruby/object:Gem::Version
159
- version: '0.8'
173
+ version: '0.15'
160
174
  type: :development
161
175
  prerelease: false
162
176
  version_requirements: !ruby/object:Gem::Requirement
163
177
  requirements:
164
178
  - - "~>"
165
179
  - !ruby/object:Gem::Version
166
- version: '0.8'
180
+ version: '0.15'
167
181
  - !ruby/object:Gem::Dependency
168
182
  name: webmock
169
183
  requirement: !ruby/object:Gem::Requirement
170
184
  requirements:
171
185
  - - "~>"
172
186
  - !ruby/object:Gem::Version
173
- version: '3.6'
187
+ version: '3.19'
174
188
  type: :development
175
189
  prerelease: false
176
190
  version_requirements: !ruby/object:Gem::Requirement
177
191
  requirements:
178
192
  - - "~>"
179
193
  - !ruby/object:Gem::Version
180
- version: '3.6'
194
+ version: '3.19'
181
195
  - !ruby/object:Gem::Dependency
182
196
  name: yard
183
197
  requirement: !ruby/object:Gem::Requirement
184
198
  requirements:
185
- - - ">="
186
- - !ruby/object:Gem::Version
187
- version: 0.9.20
188
- - - "<"
199
+ - - "~>"
189
200
  - !ruby/object:Gem::Version
190
- version: '1.0'
201
+ version: '0.9'
191
202
  type: :development
192
203
  prerelease: false
193
204
  version_requirements: !ruby/object:Gem::Requirement
194
205
  requirements:
195
- - - ">="
196
- - !ruby/object:Gem::Version
197
- version: 0.9.20
198
- - - "<"
206
+ - - "~>"
199
207
  - !ruby/object:Gem::Version
200
- version: '1.0'
208
+ version: '0.9'
201
209
  description: 'Wgit was primarily designed to crawl static HTML websites to index and
202
210
  search their content - providing the basis of any search engine; but Wgit is suitable
203
211
  for many application domains including: URL parsing, data mining and statistical
@@ -215,14 +223,19 @@ files:
215
223
  - "./lib/wgit/base.rb"
216
224
  - "./lib/wgit/core_ext.rb"
217
225
  - "./lib/wgit/crawler.rb"
226
+ - "./lib/wgit/database/adapters/in_memory.rb"
227
+ - "./lib/wgit/database/adapters/mongo_db.rb"
218
228
  - "./lib/wgit/database/database.rb"
219
- - "./lib/wgit/database/model.rb"
229
+ - "./lib/wgit/database/database_adapter.rb"
220
230
  - "./lib/wgit/document.rb"
221
231
  - "./lib/wgit/document_extractors.rb"
222
232
  - "./lib/wgit/dsl.rb"
233
+ - "./lib/wgit/html_to_text.rb"
223
234
  - "./lib/wgit/indexer.rb"
224
235
  - "./lib/wgit/logger.rb"
236
+ - "./lib/wgit/model.rb"
225
237
  - "./lib/wgit/response.rb"
238
+ - "./lib/wgit/robots_parser.rb"
226
239
  - "./lib/wgit/url.rb"
227
240
  - "./lib/wgit/utils.rb"
228
241
  - "./lib/wgit/version.rb"
@@ -251,7 +264,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
251
264
  requirements:
252
265
  - - ">="
253
266
  - !ruby/object:Gem::Version
254
- version: '2.6'
267
+ version: '3'
255
268
  - - "<"
256
269
  - !ruby/object:Gem::Version
257
270
  version: '4'
@@ -261,7 +274,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
261
274
  - !ruby/object:Gem::Version
262
275
  version: '0'
263
276
  requirements: []
264
- rubygems_version: 3.2.22
277
+ rubygems_version: 3.5.22
265
278
  signing_key:
266
279
  specification_version: 4
267
280
  summary: Wgit is a HTML web crawler, written in Ruby, that allows you to programmatically