RubyGems - arrogance - Versions diffs - 0.0.1 - Mend

arrogance 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/.gitignore ADDED Viewed

@@ -0,0 +1,4 @@
+*.gem
+.bundle
+Gemfile.lock
+pkg/*

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source "http://rubygems.org"
+# Specify your gem's dependencies in arrogance.gemspec
+gemspec

data/Rakefile ADDED Viewed

	@@ -0,0 +1 @@
1	+ require "bundler/gem_tasks"

data/arrogance.gemspec ADDED Viewed

@@ -0,0 +1,25 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "arrogance/version"
+Gem::Specification.new do |s|
+  s.name        = "arrogance"
+  s.version     = Arrogance::VERSION
+  s.authors     = ["Gideon Providence"]
+  s.email       = ["jprovidence@live.ca"]
+  s.homepage    = ""
+  s.summary     = %q{A tool to easily manage RSS feeds of all kinds}
+  s.description = %q{Easily manage RSS and Atom feeds of all kinds.}
+  s.rubyforge_project = "arrogance"
+  s.add_dependency 'nokogiri'
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+  # specify any dependencies here; for example:
+  # s.add_development_dependency "rspec"
+  # s.add_runtime_dependency "rest-client"
+end

data/lib/arrogance/objects.rb ADDED Viewed

@@ -0,0 +1,81 @@
+module Arrogance
+  class BlogObject
+    attr_accessor :aoh, :data, :positions
+    def initialize(array_of_hashes, data)
+      @aoh, @data = array_of_hashes, data
+      @positions = {:first => 0,
+                    :second => 1,
+                    :third => 2,
+                    :fourth => 3,
+                    :fifth => 4,
+                    :sixth => 5,
+                    :seventh => 6,
+                    :eighth => 7,
+                    :ninth => 8,
+                    :tenth => 10}
+    end
+    def guts
+      return @aoh
+    end
+    def most_recent
+      return @aoh[0]
+    end
+    def least_recent
+      return @aoh[@aoh.length - 1]
+    end
+    def last
+      return least_recent
+    end
+    def feed_title
+      return @data.feed_title
+    end
+    def site_link
+      return @data.site_link
+    end
+    def feed_image
+      return @data.feed_image
+    end
+    def get(*args)
+      ary = []
+      if args.length > 1
+        args.each do |pos|
+          ary << @aoh[@positions[pos.intern]]
+        end
+      else
+        return @aoh[@positions[args[0].intern]]
+      end
+      return ary
+    end
+    def method_missing(meth, *args, &block)
+      if contains_positions(meth.to_s)
+        if meth.to_s.split('_and_').length > 1
+          return meth.to_s.split('_and_').collect {|arg| get(arg)}
+        else
+          return get(meth.to_s)
+        end
+      else
+        super
+      end
+    end
+    #-- internal methods. I would usually make these private, but I was once the recipient of a rant
+    #-- on how private methods are Ruby's version of Satan. I shall refrain from marking them private.
+    def contains_positions(str)
+      str.split('_and_').each {|s| return false unless @positions.keys.collect {|k| k.to_s}.include?(s)}
+      return true
+    end
+  end
+end

data/lib/arrogance/tools.rb ADDED Viewed

@@ -0,0 +1,311 @@
+module Arrogance
+  #-- This class uses a bunch of hand coded rules to establish which tag is which.
+  #-- You may think that this would be super easy, but alot of folks use unconventional
+  #-- tag names, and there are even variations on typical tags:
+  #-- Eg:
+      #-- <link>www.link.com</link>
+  #-- VS:
+      #-- <link www.link.com />   <-- if you do this, please stop. It makes life less delightful
+  class TagHandler
+    #-- clutter
+    require 'rubygems'
+    require 'nokogiri'
+    require 'open-uri'
+    attr_accessor :url, :date, :title, :author, :description, :link
+    #-- Add mode method to Enumerable. Don't you love ruby? :)
+    Enumerable.class_eval do
+      def mode
+        group_by do |e|
+          e
+        end.values.max_by(&:size).first
+      end
+    end
+    def initialize(url)
+      @url = url
+      discover_tags
+    end
+    def discover_tags
+      doc = Nokogiri::XML(open(self.url)).remove_namespaces!
+      set = (doc.xpath('//item').empty? ? doc.xpath('//entry') : doc.xpath('//item'))
+      discover_author_tag(set, doc)
+      discover_link_tag(set)
+      discover_description_tag(set)
+      discover_title_tag
+      discover_date_tag(set)
+    end
+    #-- get author_tag
+    def discover_author_tag(set, doc)
+      ary = []
+      set.each do |node|
+        node.children.each do |child|
+          str, class_ary = child.to_s.gsub(/<.*?>/, ''), []
+          str.split(//).each {|s| class_ary << s.class }
+          unless class_ary.empty?
+            if /<.*?>/.match(child.to_s) && class_ary.mode != Fixnum && str.split(//).length < 20
+              not_tags = ['title', 'total', 'category', 'comments', 'updated', 'id']
+              tag = /<.*?>/.match(child.to_s).to_s.gsub(/(<|>)/, '').strip.split(' ')[0] #string brutality...
+              unless not_tags.include?(tag)
+                ary << tag
+              end
+            end
+          end
+        end
+        unless ary.empty?
+          self.author = ary.mode.to_s.gsub(/(<|>)/, '')
+        else
+          if (auth = doc.xpath('//author//name')[0].to_s) && ! auth.empty?
+            self.author = '//author//name'
+          else
+            self.author = 'none'
+          end
+        end
+      end
+    end
+    #-- get link tag
+    def discover_link_tag(set)
+      ary = []
+      not_links = ['id', 'author']
+      set.each do |node|
+        node.children.each do |child|
+          str = /<.*?>/.match(child.to_s).to_s.gsub(/<.*?>/, '')
+          if str.length < 100 && /<.*?>/.match(child.to_s).to_s.gsub(/(<|>)/, '').strip.split(' ')[0] != 'content'
+            if /http:/.match(child.to_s) || /www./.match(child.to_s)
+              unless not_links.include?(/<.*?>/.match(child.to_s).to_s.gsub(/(<|>)/, ''))
+                ary << (/<.*?>/.match(child.to_s).to_s)
+              end
+            end
+          end
+        end
+      end
+      unless ary.empty?
+        if ary.include?('<origLink>')
+          self.link = 'origLink'
+        elsif !ary.include?('<origLink>') && ary.include?('<link>')
+          self.link = 'link'
+        else
+          self.link = (ary.mode.to_s.split(' ').length > 1 ? 'manual' : ary.mode.to_s.gsub(/(<|>)/, ''))
+        end
+      else
+        self.link = ''
+      end
+    end
+    #-- get description tag
+    def discover_description_tag(set)
+      ary, hash = [], {}
+      set.each do |node|
+        node.children.each do |child|
+          hash[/<.*?>/.match(child.to_s).to_s] ||= 0
+          hash[/<.*?>/.match(child.to_s).to_s] += child.to_s.length
+        end
+        largest = ''
+        hash.each_with_index do |kv, idx|
+          if idx == 0
+            largest = kv[0]
+          else
+            if kv[1] > hash[largest]
+              largest = kv[0]
+            end
+          end
+        end
+        ary << largest
+      end
+      self.description = ary.mode.to_s.gsub(/(<|>)/, '').strip.split(' ')[0]
+    end
+    #-- get title tag, its always title.
+    def discover_title_tag
+      return self.title = 'title'
+    end
+    #-- get date tag
+    def discover_date_tag(set)
+      ary = []
+      set.each do |node|
+        node.children.each do |child|
+          str = /<.*?>/.match(child.to_s)
+          if /[^(u(P|p)|U(P|p)](D|d)ate/.match(str.to_s) && str.to_s.length < 20
+            ary << str
+          end
+        end
+      end
+      if ary.empty?
+        self.date = 'updated'
+      else
+        self.date = ary.mode.to_s.gsub(/(<|>)/, '')
+      end
+      self.date = 'updated' unless self.date.length > 1
+      return self.date
+    end
+  end
+  #-- This gathers information on the feed, such as the site link and feed image
+  class DataHandler
+    require 'open-uri'
+    require 'rubygems'
+    require 'nokogiri'
+    attr_accessor :url, :feed_title, :feed_image, :site_link
+    def initialize(url)
+      self.url = url
+      discover_data
+    end
+    def discover_data
+      doc = Nokogiri::XML(open(self.url)).remove_namespaces!
+      [:discover_feed_title, :discover_feed_image, :discover_site_link].each do |meth|
+        self.send(meth, doc)
+      end
+    end
+    def discover_feed_title(doc)
+      xpath = doc.xpath('//channel//title')
+      unless (ft = xpath.inner_text).empty?
+        self.feed_title = doc.xpath('//channel//title').inner_text.to_s
+        if ft.length > 50
+          self.feed_title = xpath[0].inner_text
+        end
+      else
+        self.feed_title = ''
+      end
+    end
+    def discover_feed_image(doc)
+      unless (img = doc.xpath('//channel//image//url').inner_text).empty?
+        self.feed_image = img.to_s
+      else
+        #-- for some reason this is causing a seg-fault. hope to get to the bottom of it
+        #-- in fact, someone should really get to the bottom of why ruby seg-faults on most
+        #-- gems written in C. C is for cool, guys, lets show it some love.
+        #unless doc.xpath('//channel//image').to_s.empty?
+        #  tag = doc.xpath('//channel//image').to_s
+        #  self.feed_image = /".*?"/.match(tag).to_s
+        #else
+          self.feed_image = ''
+        #end
+      end
+    end
+    def discover_site_link(doc)
+      link = doc.xpath('//channel//link').inner_text.to_s
+      if link.length > 49
+        link = /(.*?.com).*?/.match(link)[0]
+      end
+      self.site_link = link
+    end
+  end
+  class PostHandler
+    require 'rubygems'
+    require 'nokogiri'
+    attr_accessor :url, :posts
+    def initialize(url, tag_set)
+      self.url = url
+      init(tag_set)
+    end
+    def init(tag_set)
+      doc, ary = Nokogiri::XML(open(self.url)).remove_namespaces!, []
+      unless doc.xpath('//entry').empty?
+        doc.xpath('//entry').each_with_index do |i, idx|
+          ary << build_post(i, idx, tag_set)
+        end
+      end
+      unless doc.xpath('//item').empty?
+        doc.xpath('//item').each_with_index do |i, idx|
+          ary << build_post(i, idx, tag_set)
+        end
+      end
+      self.posts = ary
+    end
+    def build_post(item, idx, tag_set)
+      hash = {}
+      unless tag_set.title.nil?
+        hash[:title] = item.xpath(tag_set.title).inner_text.to_s
+      end
+      unless tag_set.author.nil?
+        if tag_set.author == 'none'
+          hash[:author] == 'Unknown'
+        else
+          if tag_set.author == '//author//name'
+            if item.xpath(tag_set.author).length == 1
+              hash[:author] = item.xpath(tag_set.author).inner_text.to_s
+            else
+              hash[:author] = item.xpath(tag_set.author)[idx].inner_text.to_s
+            end
+          else
+            hash[:author] = item.xpath(tag_set.author).inner_text.to_s
+          end
+        end
+      end
+      unless tag_set.description.nil?
+        hash[:description] = item.xpath(tag_set.description).inner_text.to_s
+      end
+      unless tag_set.date.nil?
+        hash[:date] = item.xpath(tag_set.date).inner_text.to_s
+        if hash[:date].nil? || hash[:date].empty?
+          if (m = /[0-9]\{4, 4\}-[0-9]\{2,2\}-[0-9]\{2, 2\}/.match(item.xpath('id').to_s).to_s)
+            hash[:date] = /,.*?:/.match(item.xpath('id').to_s).to_s.gsub(/(,|:)/, '')
+          end
+        end
+      end
+      unless tag_set.link.nil?
+        if tag_set.link == 'manual'
+          item.xpath('link').each do |link|
+            if /rel=/.match(link.to_s)
+              if /type=/.match(link.to_s)
+                hash[:link] = /("|')http.*?("|')/.match(link.to_s).to_s.gsub(/('|")/, '')
+                break
+              elsif /href=/.match(link.to_s) && /alternate/.match(link.to_s)
+                hash[:link] = /("|')http.*?("|')/.match(link.to_s).to_s.gsub(/("|')/, '')
+                break
+              end
+            end
+          end
+          if hash[:link].nil? || hash[:link].empty?
+            item.xpath('link').each do |link|
+              if /href=/.match(link.to_s)
+                hash[:link] = /("|').*?("|')/.match(link.to_s).to_s.gsub(/("|')/, '')
+                break
+              end
+            end
+          end
+        else
+          hash[:link] = item.xpath(tag_set.link).inner_text.to_s
+          if hash[:link].nil? || hash[:link].empty?
+            item.xpath('link').each do |link|
+              if /type=("|')text\/html("|')/.match(link.to_s)
+                if /rel=("|')alternate("|') /.match(link.to_s)
+                  hash[:link] = /("|').*?("|')/.match(link.to_s).to_s
+                  break
+                end
+              end
+            end
+          end
+        end
+      end
+      [:link, :date, :description, :author, :title].each do |i|
+        if !hash[i]
+          hash[i] = ''
+        end
+      end
+      return hash
+    end
+  end
+end

data/lib/arrogance/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Arrogance
+  VERSION = "0.0.1"
+end

data/lib/arrogance/wrangler.rb ADDED Viewed

@@ -0,0 +1,15 @@
+module Arrogance
+  class Wrangler
+    class << self
+      def beat_into_submission(url)
+        tags = TagHandler.new(url)
+        data = DataHandler.new(url)
+        posts = PostHandler.new(url, tags)
+        blog_obj = BlogObject.new(posts.posts, data)
+      end
+    end
+  end
+end

data/lib/arrogance.rb ADDED Viewed

@@ -0,0 +1,64 @@
+require "arrogance/version"
+require "arrogance/wrangler"
+require "arrogance/tools"
+require "arrogance/objects"
+module Arrogance
+  class Lasso
+    #-- @param urls -> A list of urls. This param can take one of two forms:
+        #-- 1: Hash structured as follows:
+            #-- {:term => [Array, of, urls],
+            #--  :barack_obamas_second_term => [Another, array, of, urls]
+            #--  :etc ...
+            #-- }
+        #-- 2: Array:
+            #-- [Array, of, urls, that, can, be, as, long, as, you, want]
+    #-- Returns: In either case, the return will be of the same structure.
+        #-- However, any array(s) of urls passed in will be replaced (non-destructive)
+        #-- with array(s) of blogobjects:
+            #-- {:term => [BlogObject<#*@&#>, BlogObject<@&^#^@>, BlogObject<rubbish_chars>]}
+        #-- OR
+            #-- [BlogObject<#*@&#>, BlogObject<@&^#^@>, BlogObject<rubbish_chars>]
+    #NOTE: The returned BlogObject(s) can be manipulated as follows
+        #my_blog_object.most_recent
+        #-- => {:title => 'Blogger stating opinions!', :date => '2011-11-11...', :etc => '...'} <-- Thats a hash, treat it like one
+    #-- You can retrive individual posts by stating their position in reverse chronological order (newest first)
+        #my_blog_object.first
+        #-- => {:title => 'Blogger stating opinions!', :date => '2011-11-11 12:34:25', :etc => '...'}
+        #my_blog_object.first_and_seventh
+        #-- => [{:title => 'Blogger stating opinions', :yada => 'yada'}, {:title => 'Look at ma kittehs!!1!one', ...}]
+    #-- feel free to get rediculous:
+        #my_blog_object.first_and_second_and_fourth_and_ninth
+    #-- Arrogance will return a a max of 10
+    #-- Additionally, you can retrive the feed title, site link and feed_image (if they exist) by calling
+        #feed_title, #site_link and #feed_image methods of the BlogObject
+    #-- The guts of a BlogObject is really just an array of hashes. If you just want to deal with that yourself
+    #-- and not take advantage of all the cool methods I made for you, thats fine. Just call my_blog_object.guts
+    #-- and you can get that array of hashes to play with.
+    class << self
+      def read(urls)
+        if urls.kind_of? Hash
+          return build_a_hash_response urls
+        elsif urls.kind_of? Array
+          return build_an_array_reponse urls
+        end
+      end
+      def build_a_hash_response(urls)
+        arrogance = {}
+        urls.each do |k, v|
+          arrogance[k] = v.collect {|url| Wrangler.beat_into_submission(url)}
+        end
+        return arrogance
+      end
+      def build_an_array_reponse(urls)
+        return urls.collect {|url| Wrangler.beat_into_submission(url)}
+      end
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,73 @@
+--- !ruby/object:Gem::Specification
+name: arrogance
+version: !ruby/object:Gem::Version
+  prerelease:
+  version: 0.0.1
+platform: ruby
+authors:
+- Gideon Providence
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-08-17 00:00:00 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  prerelease: false
+  requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: "0"
+  type: :runtime
+  version_requirements: *id001
+description: Easily manage RSS and Atom feeds of all kinds.
+email:
+- jprovidence@live.ca
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- Rakefile
+- arrogance.gemspec
+- lib/arrogance.rb
+- lib/arrogance/objects.rb
+- lib/arrogance/tools.rb
+- lib/arrogance/version.rb
+- lib/arrogance/wrangler.rb
+homepage: ""
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+requirements: []
+rubyforge_project: arrogance
+rubygems_version: 1.8.7
+signing_key:
+specification_version: 3
+summary: A tool to easily manage RSS feeds of all kinds
+test_files: []