RubyGems - rubyretriever - Versions diffs - 0.1.0 → 0.1.1 - Mend

rubyretriever 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: b3eba5658a9b3aa77a522d46a9564acb2a8eea5d
-  data.tar.gz: d3315f447ca908cf14f31fe1ff7196f9ca5e6744
+  metadata.gz: c9f829e46e94b82625b0c0c67e492ee2433e25cd
+  data.tar.gz: 2caeb5719fe47661d29097c9ab9818b6f493710a
 SHA512:
-  metadata.gz: 99c13086efcb81db33e48a1ebee4e7021059dddcacc3e5fa2ec6f8e6159a0d0840dbe3068fa35f98eca026d1f8d5c5f90ed5fade7e7e2abbbe111e70dc6dbdc8
-  data.tar.gz: 24c5de4333f44d0391d8bdca3325a820e42b37534a1005fc608150855eed852205c85d5687828cdc75475ca8788a6522d9ca82032b9df7b3c67a17c36ae184b1
+  metadata.gz: d031365939289932696c6483db762ad2ba8c5fd62234c2418f72b7548b18943aa94646e660c349b4a37cb4b00388b28bb6107ed8086d1264f1812dcd284c6343
+  data.tar.gz: ad75737457a5cf6ace00f0b83e60b961fde291d7b4a3c1a5e4b8af90c98a5b919e144ff85223175073ef2c6072e08d329ce5b2d10512ee8abbd1a314611134b7

data/bin/rr CHANGED Viewed

@@ -4,8 +4,15 @@ options = {}
  optparse = OptionParser.new do|opts|
    # Set a banner, displayed at the top
    # of the help screen.
-   opts.banner = "Usage: rr [options] Target_URL"
+   opts.banner = "Usage: rr [MODE FLAG] [options] Target_URL"
+  options[:sitemap] = false
+   opts.on( '-s', '--sitemap FORMAT', 'MODE FLAG: Sitemap mode - Crawl site and output sitemap, format choices: CSV or XML' ) do |output_type|
+     options[:sitemap] = output_type
+   end
+  options[:fileharvest] = false
+   opts.on( '-f', '--files FILETYPE', 'MODE FLAG: Fileharvest mode - Crawl site and collect links for files found, extension for filetype' ) do |file_ext|
+     options[:fileharvest] = file_ext
+   end
     options[:filename] = nil
    opts.on( '-o', '--out FILENAME', 'Dump output to selected filename' ) do|filename|
      options[:filename] = filename
@@ -15,32 +22,18 @@ options = {}
    opts.on( '-v', '--verbose', 'Output more information' ) do
      options[:verbose] = true
    end
    options[:progress] = false
-   opts.on( '-p', '--progressbar', 'Output more information' ) do
+   opts.on( '-p', '--progress', 'Output progress bar' ) do
      options[:progress] = true
    end
-  options[:sitemap] = false
-   opts.on( '-s', '--sitemap FORMAT', 'Crawl site and output sitemap' ) do |output_type|
-     options[:sitemap] = output_type
-   end
-  options[:fileharvest] = false
-   opts.on( '-f', '--files FILETYPE', 'Crawl site and collect links for files found' ) do |file_ext|
-     options[:fileharvest] = file_ext
-   end
   options[:maxpages] = false
    opts.on( '-l', '--limit PAGE_LIMIT_#', 'set a max on the total number of crawled pages' ) do |maxpages|
      options[:maxpages] = maxpages
    end
    options[:autodown] = false
    opts.on( '-a', '--auto', 'Automatically download all files of filetype located' ) do
      options[:autodown] = true
    end
    # This displays the help screen, all programs are
    # assumed to have this option.
    opts.on( '-h', '--help', 'Display this screen' ) do

data/lib/retriever/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Retriever
-  VERSION = '0.1.0'
+  VERSION = '0.1.1'
 end

data/readme.md CHANGED Viewed

@@ -30,11 +30,11 @@ This would go to http://www.cnet.com and map it until it crawled a max of 100 pa
  **Example: File Harvesting mode**
 ```sh
-rr --files --ext pdf --progress --limit 1000 --output hubspot http://www.hubspot.com
+rr --files pdf --progress --limit 1000 --output hubspot http://www.hubspot.com
 ```
 OR -- SAME COMMAND
 ```sh
-rr -f -e pdf -p -l 1000 http://www.hubspot.com
+rr -f pdf -p -l 1000 http://www.hubspot.com
 ```
 This would go to http://www.hubspot.com and crawl it looking for filetype:PDF until it crawled a max of 1,000 pages, and then it would write out a list of filepaths to a csv named hubspot (based on the website host name. Optionally we could have the script then go and autodownload all the files by adding the -a/--auto flag -- however this current example would just dump to stdout a list of all the PDF's found.

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: rubyretriever
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.1
 platform: ruby
 authors:
 - Joe Norton
@@ -108,7 +108,7 @@ dependencies:
     - - ~>
       - !ruby/object:Gem::Version
         version: '2.14'
-description: General purpose web crawler, site mapper, and file harvester
+description: Asynchronous web crawler, file harvester & autdownloader
 email:
 - joe@softwarebyjoe.com
 executables: