wgit 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +1 -1
- data/CHANGELOG.md +39 -0
- data/LICENSE.txt +1 -1
- data/README.md +118 -323
- data/bin/wgit +9 -5
- data/lib/wgit.rb +3 -1
- data/lib/wgit/assertable.rb +3 -3
- data/lib/wgit/base.rb +30 -0
- data/lib/wgit/crawler.rb +206 -76
- data/lib/wgit/database/database.rb +309 -134
- data/lib/wgit/database/model.rb +10 -3
- data/lib/wgit/document.rb +138 -95
- data/lib/wgit/{document_extensions.rb → document_extractors.rb} +11 -11
- data/lib/wgit/dsl.rb +324 -0
- data/lib/wgit/indexer.rb +65 -162
- data/lib/wgit/response.rb +5 -2
- data/lib/wgit/url.rb +133 -31
- data/lib/wgit/utils.rb +32 -20
- data/lib/wgit/version.rb +2 -1
- metadata +26 -14
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wgit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '1.3'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: ferrum
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.8'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.8'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: byebug
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -184,14 +198,10 @@ dependencies:
|
|
184
198
|
- - "<"
|
185
199
|
- !ruby/object:Gem::Version
|
186
200
|
version: '1.0'
|
187
|
-
description: '
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
WWW search engine. The Wgit API is easily extended allowing you to pull out the
|
192
|
-
parts of a webpage that are important to you, the code snippets or tables for example.
|
193
|
-
As Wgit is a library, it supports many different use cases including data mining,
|
194
|
-
analytics, web indexing and URL parsing to name a few.
|
201
|
+
description: 'Wgit was primarily designed to crawl static HTML websites to index and
|
202
|
+
search their content - providing the basis of any search engine; but Wgit is suitable
|
203
|
+
for many application domains including: URL parsing, data mining and statistical
|
204
|
+
analysis.
|
195
205
|
|
196
206
|
'
|
197
207
|
email: michael.telford@live.com
|
@@ -202,12 +212,14 @@ extra_rdoc_files: []
|
|
202
212
|
files:
|
203
213
|
- "./lib/wgit.rb"
|
204
214
|
- "./lib/wgit/assertable.rb"
|
215
|
+
- "./lib/wgit/base.rb"
|
205
216
|
- "./lib/wgit/core_ext.rb"
|
206
217
|
- "./lib/wgit/crawler.rb"
|
207
218
|
- "./lib/wgit/database/database.rb"
|
208
219
|
- "./lib/wgit/database/model.rb"
|
209
220
|
- "./lib/wgit/document.rb"
|
210
|
-
- "./lib/wgit/
|
221
|
+
- "./lib/wgit/document_extractors.rb"
|
222
|
+
- "./lib/wgit/dsl.rb"
|
211
223
|
- "./lib/wgit/indexer.rb"
|
212
224
|
- "./lib/wgit/logger.rb"
|
213
225
|
- "./lib/wgit/response.rb"
|
@@ -246,9 +258,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
246
258
|
- !ruby/object:Gem::Version
|
247
259
|
version: '0'
|
248
260
|
requirements: []
|
249
|
-
rubygems_version: 3.
|
261
|
+
rubygems_version: 3.1.2
|
250
262
|
signing_key:
|
251
263
|
specification_version: 4
|
252
|
-
summary: Wgit is a
|
253
|
-
|
264
|
+
summary: Wgit is a HTML web crawler, written in Ruby, that allows you to programmatically
|
265
|
+
extract the data you want from the web.
|
254
266
|
test_files: []
|