RubyGems - blogbot - Versions diffs - 0.0.4 → 0.0.5 - Mend

blogbot 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/blogbot.gemspec +2 -2
data/lib/blogbot.rb +4 -15
data/lib/blogbot/extraction.rb +17 -5
data/lib/blogbot/navigation.rb +5 -20
data/lib/blogbot/reflection.rb +1 -1
data/test/blogbot.rb +59 -0
data/test/blogbot/extraction.rb +39 -0
data/test/blogbot/memorization.rb +25 -0
data/test/blogbot/navigation.rb +62 -0
data/test/blogbot/reflection.rb +47 -0
metadata +13 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 89e4fdb5e3c981f8fc1eba19c63b2643870ff903
-  data.tar.gz: 637a092abf302128fc5bdd3bfddb04f599b44978
+  metadata.gz: 5ba38caa6667e36e2396162590ecf399cc604a3a
+  data.tar.gz: acd1158109ad0e5c749e2ba8083a3a28f51ba6af
 SHA512:
-  metadata.gz: cfcfc8c1207fc3946302f67d3445668f2ef418dc8c3619a63cd1ae70fcec96c836cab2123d17949bc28c60f1ee2240f2a1f1a269c3ea9db85c0bba60423fa535
-  data.tar.gz: b4b6f70582599014fa00cad198e15493573350d3db99d3842e657898ac72e19df54e30b7e36ee465230ad93a3632578337ffc36e304b5acbbd6091ee10124ade
+  metadata.gz: 41f3cd5d74c3fa5ab90d3bbc4453add4c59216d49985b248701303c123c71b9c12f3e95db9ec8ce18fc06bd2fe3a1dbbd6fea458de8fa46b1024297fd3c6569c
+  data.tar.gz: 1b69e32e940fa81e48f5e13b6339859f1dd0d32f317e5f42f71b94a3465899918ef66bed864bbf84751a06b90806819aa1064eabf885caaa8aa47344a90f05c8

data/blogbot.gemspec CHANGED

@@ -2,8 +2,8 @@
 Gem::Specification.new do |gem|
   gem.name        = 'blogbot'
-  gem.version     = '0.0.4'
-  gem.date        = '2015-09-22'
+  gem.version     = '0.0.5'
+  gem.date        = '2015-09-25'
   gem.platform = Gem::Platform::RUBY
   gem.required_ruby_version = '>= 1.8'

data/lib/blogbot.rb CHANGED

@@ -6,17 +6,6 @@ require 'blogbot/memorization'
 require 'blogbot/navigation'
 require 'blogbot/reflection'
-# MASTER GAME PLAN:
-#
-# 1) accept target site
-# 2) scan for keyword popular
-#   a- if on homepage and nothing comes up, find blogpage then scan again
-#   b- if nothing is coming up, scan sitemap.xml, etc for keyword popular
-# 3) determine div id/class of where keyword popular is located
-# 4) extract all hyperlinks and store in hash
-# 5) index??
-#
 # Make a new blogbot that can scan pages and extract the most popular links!
 class Blogbot
   include Extraction
@@ -45,7 +34,7 @@ class Blogbot
     set_agent
     @current_page = nil
     @current_element = nil
-    @popular_links = {}
+    @popular_links = []
     @indicator = nil
   end
@@ -59,12 +48,12 @@ class Blogbot
     set_agent
     @current_page = nil
     @current_element = nil
-    @popular_links = {}
+    @popular_links = []
     @indicator = nil
   end
   def ignorance_error
-    raise "Sorry, either there are no popular links present
-          or this bot isn't smart enough to extract this site yet/n"
+    fail "Sorry, either there are no popular links present
+          or this bot isn't smart enough to extract this site yet\n"
   end
 end

data/lib/blogbot/extraction.rb CHANGED

@@ -8,12 +8,24 @@ module Extraction
     puts 'Not enough links to extract' if see_multiple_links? == false
     @current_element.css('a').each do |a|
-      next if a.text == '' || a['href'] == '#'
+      next if a.text == '' || a['href'] == '#'
       title = a.text
       link = a['href']
-      @popular_links[link] = title
+      hash = {title: title, link: link}
+      @popular_links << hash
     end
-    @popular_links # Returns entire hash.
+  end
+  def display_links
+    puts "-"*50
+    @popular_links.each do |hash|
+      hash.each do |k, v|
+        puts "#{k.upcase}: #{v}"
+      end
+      puts
+    end
+    puts "-"*50
+    @popular_links
   end
   def extract(url)
@@ -21,7 +33,7 @@ module Extraction
     puts "\nExtracting ...\n"
     scan url
     locate_popular_links
-    extract_links
-    @popular_links.nil? == true ? simple_error : @popular_links
+    extract_links
+    @popular_links.nil? == true ? simple_error : display_links
   end
 end

data/lib/blogbot/navigation.rb CHANGED

@@ -1,14 +1,12 @@
 # Adds capability to navigate through posts pages.
 module Navigation
-  # TODO: add method to auto navigate parents until proper links are present
   # Navigates to posts page based off of store_posts_url.
   def go_to_posts_page
     find_posts_url
     store_posts_url
     @current_page = @posts_url.click
   end
   # Navigates to previous Mechanize page.
   def previous_page
     @current_page = @agent.get(@agent.back['href'])
@@ -21,6 +19,7 @@ module Navigation
     else
       store_indicator
       @current_element = @indicator
+      @current_element
     end
   end
@@ -38,25 +37,11 @@ module Navigation
     end
   end
-  ####
-  # FIXME: this selects the first child and can lead
-  # bot into rabit hole.  Only use when at lower levels of html.
-  # Need to change this to an iteration in the future.
-  #
-  # Changes current Nokogiri element to first child.
-  def descend
-    if @current_element.children.empty? == true # no more room to descend
-      puts 'At lowest element.  Nothing left to descend.'
-    else
-      @current_element = @current_element.child
-    end
-  end
   def auto_ascend
     ascend until see_multiple_links? == true
     @current_element
   end
   def locate_popular_links
     if possible_success? == false
       ignorance_error
@@ -67,8 +52,8 @@ module Navigation
       see_popular? == true ? crawl_popular : ignorance_error
     end
   end
-  # Examine popular element and climb DOM tree until multiple
+  # Examine popular element and climb DOM tree until multiple
   # links are present.
   def crawl_popular
     go_to_popular

data/lib/blogbot/reflection.rb CHANGED

@@ -25,7 +25,7 @@ module Reflection
   def see_multiple_links?
     @current_element.css('a').length < 3 ? false : true
   end
   ####
   # Determines if bot can crawl a Popular section.
   # If it doesn't see popular at first glance, it searches and navigates

data/test/blogbot.rb ADDED

@@ -0,0 +1,59 @@
+#!/usr/bin/env ruby
+require 'mechanize'
+require 'blogbot/extraction'
+require 'blogbot/memorization'
+require 'blogbot/navigation'
+require 'blogbot/reflection'
+# Make a new blogbot that can scan pages and extract the most popular links!
+class Blogbot
+  include Extraction
+  include Memorization
+  include Navigation
+  include Reflection
+  attr_accessor(
+    :agent,
+    :article_link,
+    :blog_link,
+    :current_element,
+    :current_page,
+    :indicator,
+    :parent,
+    :posts_url,
+    :target_url
+  )
+  def set_agent
+    self.agent = Mechanize.new { |a| a.user_agent_alias = 'Mac Safari' }
+  end
+  def initialize
+    puts 'Powering up the rubatron generators!!'
+    set_agent
+    @current_page = nil
+    @current_element = nil
+    @popular_links = []
+    @indicator = nil
+  end
+  # GET a page using Mechanize and set to current page.
+  def scan(url)
+    @target_url = url
+    @current_page = @agent.get(@target_url)
+  end
+  def reset
+    set_agent
+    @current_page = nil
+    @current_element = nil
+    @popular_links = []
+    @indicator = nil
+  end
+  def ignorance_error
+    fail "Sorry, either there are no popular links present
+          or this bot isn't smart enough to extract this site yet\n"
+  end
+end

data/test/blogbot/extraction.rb ADDED

@@ -0,0 +1,39 @@
+# Adds capability to extract data in an organized format from webpage.
+module Extraction
+  ####
+  # Extracts titles and hyperlinks from element being examined.
+  # If the text is an empty '' it's an <img>.
+  # Images are typically duplicate links and ok to skip.
+  def extract_links
+    puts 'Not enough links to extract' if see_multiple_links? == false
+    @current_element.css('a').each do |a|
+      next if a.text == '' || a['href'] == '#'
+      title = a.text
+      link = a['href']
+      hash = {title: title, link: link}
+      @popular_links << hash
+    end
+  end
+  def display_links
+    puts "-"*50
+    @popular_links.each do |hash|
+      hash.each do |k, v|
+        puts "#{k.upcase}: #{v}"
+      end
+      puts
+    end
+    puts "-"*50
+    @popular_links
+  end
+  def extract(url)
+    reset
+    puts "\nExtracting ...\n"
+    scan url
+    locate_popular_links
+    extract_links
+    @popular_links.nil? == true ? simple_error : display_links
+  end
+end

data/test/blogbot/memorization.rb ADDED

@@ -0,0 +1,25 @@
+# Adds capability to memorize things such as URLs, etc.
+# Most variables are stored here.
+module Memorization
+  # Searches page for link that says Articles or Blog.
+  def find_posts_url
+    @article_link = @current_page.link_with(text: /Articles/)
+    @blog_link = @current_page.link_with(text: /Blog/)
+  end
+  # Memorize posts_url found by find_posts.
+  def store_posts_url
+    @posts_url =
+      case
+      when @article_link.nil? == false
+        @article_link
+      when @blog_link.nil? == false
+        @blog_link
+      end
+  end
+  # Sets search indicator to whatever had 'Popular' in its text.
+  def store_indicator
+    @indicator = @current_page.search("[text()*='Popular']").first
+  end
+end

data/test/blogbot/navigation.rb ADDED

@@ -0,0 +1,62 @@
+# Adds capability to navigate through posts pages.
+module Navigation
+  # Navigates to posts page based off of store_posts_url.
+  def go_to_posts_page
+    find_posts_url
+    store_posts_url
+    @current_page = @posts_url.click
+  end
+  # Navigates to previous Mechanize page.
+  def previous_page
+    @current_page = @agent.get(@agent.back['href'])
+  end
+  # Sets current element to 'Popular' indicator.
+  def go_to_popular
+    if see_popular? == false
+      puts 'Nothing says "Popular" on this page'
+    else
+      store_indicator
+      @current_element = @indicator
+      @current_element
+    end
+  end
+  # Returns parent of current Nokogiri element
+  def find_parent
+    @parent = @current_element.parent # one element higher, the div container
+  end
+  # Changes current Nokogiri element to its parent.
+  def ascend
+    if @current_element.ancestors.empty? == true # no more room to ascend
+      puts 'At highest element.  Nothing left to ascend.'
+    else
+      @current_element = @current_element.parent
+    end
+  end
+  def auto_ascend
+    ascend until see_multiple_links? == true
+    @current_element
+  end
+  def locate_popular_links
+    if possible_success? == false
+      ignorance_error
+    elsif see_popular? == true
+      crawl_popular
+    elsif see_posts? == true
+      go_to_posts_page
+      see_popular? == true ? crawl_popular : ignorance_error
+    end
+  end
+  # Examine popular element and climb DOM tree until multiple
+  # links are present.
+  def crawl_popular
+    go_to_popular
+    auto_ascend
+  end
+end

data/test/blogbot/reflection.rb ADDED

@@ -0,0 +1,47 @@
+# Adds capability to examine for posts, articles, and links.
+module Reflection
+  # Searches for link that says articles or blog.
+  def see_posts?
+    find_posts_url
+    if @article_link.nil? == true && @blog_link.nil? == true
+      false # => no article links found
+    else
+      true
+    end
+  end
+  # Searches for keyword 'Popular' on current page.
+  def see_popular?
+    search = @current_page.search "[text()*='Popular']"
+    search.empty? ? false : true
+  end
+  # Searches for presence of <a> tags in current element.
+  def see_links?
+    @current_element.css('a').empty? ? false : true
+  end
+  # Searches for presences of more than two <a> tags in current element.
+  def see_multiple_links?
+    @current_element.css('a').length < 3 ? false : true
+  end
+  ####
+  # Determines if bot can crawl a Popular section.
+  # If it doesn't see popular at first glance, it searches and navigates
+  # to Posts page (Articles or Blog).
+  #
+  # If Popular section still doesn't exist, it's a no-go.
+  def possible_success?
+    if see_popular? == true
+      true
+    elsif see_posts? == true
+      go_to_posts_page # Changes page to look for popular.
+      answer =  see_popular? == true ? true : false # true if popular is present
+      previous_page # Check is complete. Return to original page.
+      answer
+    else
+      false
+    end
+  end
+end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: blogbot
 version: !ruby/object:Gem::Version
-  version: 0.0.4
+  version: 0.0.5
 platform: ruby
 authors:
 - John Mason
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-09-22 00:00:00.000000000 Z
+date: 2015-09-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mechanize
@@ -37,6 +37,11 @@ files:
 - lib/blogbot/memorization.rb
 - lib/blogbot/navigation.rb
 - lib/blogbot/reflection.rb
+- test/blogbot.rb
+- test/blogbot/extraction.rb
+- test/blogbot/memorization.rb
+- test/blogbot/navigation.rb
+- test/blogbot/reflection.rb
 homepage: https://github.com/m8ss/blogbot
 licenses:
 - MIT
@@ -61,4 +66,9 @@ rubygems_version: 2.4.6
 signing_key:
 specification_version: 4
 summary: The internet is full of noise.  Only read the best.
-test_files: []
+test_files:
+- test/blogbot.rb
+- test/blogbot/extraction.rb
+- test/blogbot/memorization.rb
+- test/blogbot/navigation.rb
+- test/blogbot/reflection.rb