RubyGems - wgit - Versions diffs - 0.8.0 → 0.9.0 - Mend

wgit 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
data/.yardopts +1 -1
data/CHANGELOG.md +39 -0
data/LICENSE.txt +1 -1
data/README.md +118 -323
data/bin/wgit +9 -5
data/lib/wgit.rb +3 -1
data/lib/wgit/assertable.rb +3 -3
data/lib/wgit/base.rb +30 -0
data/lib/wgit/crawler.rb +206 -76
data/lib/wgit/database/database.rb +309 -134
data/lib/wgit/database/model.rb +10 -3
data/lib/wgit/document.rb +138 -95
data/lib/wgit/{document_extensions.rb → document_extractors.rb} +11 -11
data/lib/wgit/dsl.rb +324 -0
data/lib/wgit/indexer.rb +65 -162
data/lib/wgit/response.rb +5 -2
data/lib/wgit/url.rb +133 -31
data/lib/wgit/utils.rb +32 -20
data/lib/wgit/version.rb +2 -1
metadata +26 -14

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: wgit
 version: !ruby/object:Gem::Version
-  version: 0.8.0
+  version: 0.9.0
 platform: ruby
 authors:
 - Michael Telford
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-01-27 00:00:00.000000000 Z
+date: 2020-07-31 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: addressable
@@ -66,6 +66,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '1.3'
+- !ruby/object:Gem::Dependency
+  name: ferrum
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.8'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.8'
 - !ruby/object:Gem::Dependency
   name: byebug
   requirement: !ruby/object:Gem::Requirement
@@ -184,14 +198,10 @@ dependencies:
     - - "<"
       - !ruby/object:Gem::Version
         version: '1.0'
-description: 'Fundamentally, Wgit is a HTTP indexer/scraper which crawls URL''s to
-  retrieve and serialise their page contents for later use. You can use Wgit to scrape
-  entire websites if required. Wgit also provides a means to search indexed documents
-  stored in a database. Therefore, this library provides the main components of a
-  WWW search engine. The Wgit API is easily extended allowing you to pull out the
-  parts of a webpage that are important to you, the code snippets or tables for example.
-  As Wgit is a library, it supports many different use cases including data mining,
-  analytics, web indexing and URL parsing to name a few.
+description: 'Wgit was primarily designed to crawl static HTML websites to index and
+  search their content - providing the basis of any search engine; but Wgit is suitable
+  for many application domains including: URL parsing, data mining and statistical
+  analysis.
   '
 email: michael.telford@live.com
@@ -202,12 +212,14 @@ extra_rdoc_files: []
 files:
 - "./lib/wgit.rb"
 - "./lib/wgit/assertable.rb"
+- "./lib/wgit/base.rb"
 - "./lib/wgit/core_ext.rb"
 - "./lib/wgit/crawler.rb"
 - "./lib/wgit/database/database.rb"
 - "./lib/wgit/database/model.rb"
 - "./lib/wgit/document.rb"
-- "./lib/wgit/document_extensions.rb"
+- "./lib/wgit/document_extractors.rb"
+- "./lib/wgit/dsl.rb"
 - "./lib/wgit/indexer.rb"
 - "./lib/wgit/logger.rb"
 - "./lib/wgit/response.rb"
@@ -246,9 +258,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.6
+rubygems_version: 3.1.2
 signing_key:
 specification_version: 4
-summary: Wgit is a Ruby library primarily used for crawling, indexing and searching
-  HTML webpages.
+summary: Wgit is a HTML web crawler, written in Ruby, that allows you to programmatically
+  extract the data you want from the web.
 test_files: []