RubyGems - arrogance - Versions diffs - 0.0.1 - Mend

arrogance 0.0.1

Files changed (10) hide show

data/.gitignore ADDED Viewed

@@ -0,0 +1,4 @@
+*.gem
+.bundle
+Gemfile.lock
+pkg/*

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source "http://rubygems.org"
+# Specify your gem's dependencies in arrogance.gemspec
+gemspec

data/Rakefile ADDED Viewed

	@@ -0,0 +1 @@
1	+ require "bundler/gem_tasks"

data/arrogance.gemspec ADDED Viewed

@@ -0,0 +1,25 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "arrogance/version"
+Gem::Specification.new do |s|
+  s.name        = "arrogance"
+  s.version     = Arrogance::VERSION
+  s.authors     = ["Gideon Providence"]
+  s.email       = ["jprovidence@live.ca"]
+  s.homepage    = ""
+  s.summary     = %q{A tool to easily manage RSS feeds of all kinds}
+  s.description = %q{Easily manage RSS and Atom feeds of all kinds.}
+  s.rubyforge_project = "arrogance"
+  s.add_dependency 'nokogiri'
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+  # specify any dependencies here; for example:
+  # s.add_development_dependency "rspec"
+  # s.add_runtime_dependency "rest-client"
+end

data/lib/arrogance/objects.rb ADDED Viewed

@@ -0,0 +1,81 @@
+module Arrogance
+  class BlogObject
+    attr_accessor :aoh, :data, :positions
+    def initialize(array_of_hashes, data)
+      @aoh, @data = array_of_hashes, data
+      @positions = {:first => 0,
+                    :second => 1,
+                    :third => 2,
+                    :fourth => 3,
+                    :fifth => 4,
+                    :sixth => 5,
+                    :seventh => 6,
+                    :eighth => 7,
+                    :ninth => 8,
+                    :tenth => 10}
+    end
+    def guts
+      return @aoh
+    end
+    def most_recent
+      return @aoh[0]
+    end
+    def least_recent
+      return @aoh[@aoh.length - 1]
+    end
+    def last
+      return least_recent
+    end
+    def feed_title
+      return @data.feed_title
+    end
+    def site_link
+      return @data.site_link
+    end
+    def feed_image
+      return @data.feed_image
+    end
+    def get(*args)
+      ary = []
+      if args.length > 1
+        args.each do |pos|
+          ary << @aoh[@positions[pos.intern]]
+        end
+      else
+        return @aoh[@positions[args[0].intern]]
+      end
+      return ary
+    end
+    def method_missing(meth, *args, &block)
+      if contains_positions(meth.to_s)
+        if meth.to_s.split('_and_').length > 1
+          return meth.to_s.split('_and_').collect {|arg| get(arg)}
+        else
+          return get(meth.to_s)
+        end
+      else
+        super
+      end
+    end
+    #-- internal methods. I would usually make these private, but I was once the recipient of a rant
+    #-- on how private methods are Ruby's version of Satan. I shall refrain from marking them private.
+    def contains_positions(str)
+      str.split('_and_').each {|s| return false unless @positions.keys.collect {|k| k.to_s}.include?(s)}
+      return true
+    end
+  end
+end

data/lib/arrogance/tools.rb ADDED Viewed

@@ -0,0 +1,311 @@
+module Arrogance
+  #-- This class uses a bunch of hand coded rules to establish which tag is which.
+  #-- You may think that this would be super easy, but alot of folks use unconventional
+  #-- tag names, and there are even variations on typical tags:
+  #-- Eg:
+      #-- <link>www.link.com</link>
+  #-- VS:
+      #-- <link www.link.com />   <-- if you do this, please stop. It makes life less delightful
+  class TagHandler
+    #-- clutter
+    require 'rubygems'
+    require 'nokogiri'
+    require 'open-uri'
+    attr_accessor :url, :date, :title, :author, :description, :link
+    #-- Add mode method to Enumerable. Don't you love ruby? :)
+    Enumerable.class_eval do
+      def mode
+        group_by do |e|
+          e
+        end.values.max_by(&:size).first
+      end
+    end
+    def initialize(url)
+      @url = url
+      discover_tags
+    end
+    def discover_tags
+      doc = Nokogiri::XML(open(self.url)).remove_namespaces!
+      set = (doc.xpath('//item').empty? ? doc.xpath('//entry') : doc.xpath('//item'))
+      discover_author_tag(set, doc)
+      discover_link_tag(set)
+      discover_description_tag(set)
+      discover_title_tag
+      discover_date_tag(set)
+    end
+    #-- get author_tag
+    def discover_author_tag(set, doc)
+      ary = []
+      set.each do |node|
+        node.children.each do |child|
+          str, class_ary = child.to_s.gsub(/<.*?>/, ''), []
+          str.split(//).each {|s| class_ary << s.class }
+          unless class_ary.empty?
+            if /<.*?>/.match(child.to_s) && class_ary.mode != Fixnum && str.split(//).length < 20
+              not_tags = ['title', 'total', 'category', 'comments', 'updated', 'id']
+              tag = /<.*?>/.match(child.to_s).to_s.gsub(/(<|>)/, '').strip.split(' ')[0] #string brutality...
+              unless not_tags.include?(tag)
+                ary << tag
+              end
+            end
+          end
+        end
+        unless ary.empty?
+          self.author = ary.mode.to_s.gsub(/(<|>)/, '')
+        else
+          if (auth = doc.xpath('//author//name')[0].to_s) && ! auth.empty?
+            self.author = '//author//name'
+          else
+            self.author = 'none'
+          end
+        end
+      end
+    end
+    #-- get link tag
+    def discover_link_tag(set)
+      ary = []
+      not_links = ['id', 'author']
+      set.each do |node|
+        node.children.each do |child|
+          str = /<.*?>/.match(child.to_s).to_s.gsub(/<.*?>/, '')
+          if str.length < 100 && /<.*?>/.match(child.to_s).to_s.gsub(/(<|>)/, '').strip.split(' ')[0] != 'content'
+            if /http:/.match(child.to_s) || /www./.match(child.to_s)
+              unless not_links.include?(/<.*?>/.match(child.to_s).to_s.gsub(/(<|>)/, ''))
+                ary << (/<.*?>/.match(child.to_s).to_s)
+              end
+            end
+          end
+        end
+      end
+      unless ary.empty?
+        if ary.include?('<origLink>')
+          self.link = 'origLink'
+        elsif !ary.include?('<origLink>') && ary.include?('<link>')
+          self.link = 'link'
+        else
+          self.link = (ary.mode.to_s.split(' ').length > 1 ? 'manual' : ary.mode.to_s.gsub(/(<|>)/, ''))
+        end
+      else
+        self.link = ''
+      end
+    end
+    #-- get description tag
+    def discover_description_tag(set)
+      ary, hash = [], {}
+      set.each do |node|
+        node.children.each do |child|
+          hash[/<.*?>/.match(child.to_s).to_s] ||= 0
+          hash[/<.*?>/.match(child.to_s).to_s] += child.to_s.length
+        end
+        largest = ''
+        hash.each_with_index do |kv, idx|
+          if idx == 0
+            largest = kv[0]
+          else
+            if kv[1] > hash[largest]
+              largest = kv[0]
+            end
+          end
+        end
+        ary << largest
+      end
+      self.description = ary.mode.to_s.gsub(/(<|>)/, '').strip.split(' ')[0]
+    end
+    #-- get title tag, its always title.
+    def discover_title_tag
+      return self.title = 'title'
+    end
+    #-- get date tag
+    def discover_date_tag(set)
+      ary = []
+      set.each do |node|
+        node.children.each do |child|
+          str = /<.*?>/.match(child.to_s)
+          if /[^(u(P|p)|U(P|p)](D|d)ate/.match(str.to_s) && str.to_s.length < 20
+            ary << str
+          end
+        end
+      end
+      if ary.empty?
+        self.date = 'updated'
+      else
+        self.date = ary.mode.to_s.gsub(/(<|>)/, '')
+      end
+      self.date = 'updated' unless self.date.length > 1
+      return self.date
+    end
+  end
+  #-- This gathers information on the feed, such as the site link and feed image
+  class DataHandler
+    require 'open-uri'
+    require 'rubygems'
+    require 'nokogiri'
+    attr_accessor :url, :feed_title, :feed_image, :site_link
+    def initialize(url)
+      self.url = url
+      discover_data
+    end
+    def discover_data
+      doc = Nokogiri::XML(open(self.url)).remove_namespaces!
+      [:discover_feed_title, :discover_feed_image, :discover_site_link].each do |meth|
+        self.send(meth, doc)
+      end
+    end
+    def discover_feed_title(doc)
+      xpath = doc.xpath('//channel//title')
+      unless (ft = xpath.inner_text).empty?
+        self.feed_title = doc.xpath('//channel//title').inner_text.to_s
+        if ft.length > 50
+          self.feed_title = xpath[0].inner_text
+        end
+      else
+        self.feed_title = ''
+      end
+    end
+    def discover_feed_image(doc)
+      unless (img = doc.xpath('//channel//image//url').inner_text).empty?
+        self.feed_image = img.to_s
+      else
+        #-- for some reason this is causing a seg-fault. hope to get to the bottom of it
+        #-- in fact, someone should really get to the bottom of why ruby seg-faults on most
+        #-- gems written in C. C is for cool, guys, lets show it some love.
+        #unless doc.xpath('//channel//image').to_s.empty?
+        #  tag = doc.xpath('//channel//image').to_s
+        #  self.feed_image = /".*?"/.match(tag).to_s
+        #else
+          self.feed_image = ''
+        #end
+      end
+    end
+    def discover_site_link(doc)
+      link = doc.xpath('//channel//link').inner_text.to_s
+      if link.length > 49
+        link = /(.*?.com).*?/.match(link)[0]
+      end
+      self.site_link = link
+    end
+  end
+  class PostHandler
+    require 'rubygems'
+    require 'nokogiri'
+    attr_accessor :url, :posts
+    def initialize(url, tag_set)
+      self.url = url
+      init(tag_set)
+    end
+    def init(tag_set)
+      doc, ary = Nokogiri::XML(open(self.url)).remove_namespaces!, []
+      unless doc.xpath('//entry').empty?
+        doc.xpath('//entry').each_with_index do |i, idx|
+          ary << build_post(i, idx, tag_set)
+        end
+      end
+      unless doc.xpath('//item').empty?
+        doc.xpath('//item').each_with_index do |i, idx|
+          ary << build_post(i, idx, tag_set)
+        end
+      end
+      self.posts = ary
+    end
+    def build_post(item, idx, tag_set)
+      hash = {}
+      unless tag_set.title.nil?
+        hash[:title] = item.xpath(tag_set.title).inner_text.to_s
+      end
+      unless tag_set.author.nil?
+        if tag_set.author == 'none'
+          hash[:author] == 'Unknown'
+        else
+          if tag_set.author == '//author//name'
+            if item.xpath(tag_set.author).length == 1
+              hash[:author] = item.xpath(tag_set.author).inner_text.to_s
+            else
+              hash[:author] = item.xpath(tag_set.author)[idx].inner_text.to_s
+            end
+          else
+            hash[:author] = item.xpath(tag_set.author).inner_text.to_s
+          end
+        end
+      end
+      unless tag_set.description.nil?
+        hash[:description] = item.xpath(tag_set.description).inner_text.to_s
+      end
+      unless tag_set.date.nil?
+        hash[:date] = item.xpath(tag_set.date).inner_text.to_s
+        if hash[:date].nil? || hash[:date].empty?
+          if (m = /[0-9]\{4, 4\}-[0-9]\{2,2\}-[0-9]\{2, 2\}/.match(item.xpath('id').to_s).to_s)
+            hash[:date] = /,.*?:/.match(item.xpath('id').to_s).to_s.gsub(/(,|:)/, '')
+          end
+        end
+      end
+      unless tag_set.link.nil?
+        if tag_set.link == 'manual'
+          item.xpath('link').each do |link|
+            if /rel=/.match(link.to_s)
+              if /type=/.match(link.to_s)
+                hash[:link] = /("|')http.*?("|')/.match(link.to_s).to_s.gsub(/('|")/, '')
+                break
+              elsif /href=/.match(link.to_s) && /alternate/.match(link.to_s)
+                hash[:link] = /("|')http.*?("|')/.match(link.to_s).to_s.gsub(/("|')/, '')
+                break
+              end
+            end
+          end
+          if hash[:link].nil? || hash[:link].empty?
+            item.xpath('link').each do |link|
+              if /href=/.match(link.to_s)
+                hash[:link] = /("|').*?("|')/.match(link.to_s).to_s.gsub(/("|')/, '')
+                break
+              end
+            end
+          end
+        else
+          hash[:link] = item.xpath(tag_set.link).inner_text.to_s
+          if hash[:link].nil? || hash[:link].empty?
+            item.xpath('link').each do |link|
+              if /type=("|')text\/html("|')/.match(link.to_s)
+                if /rel=("|')alternate("|') /.match(link.to_s)
+                  hash[:link] = /("|').*?("|')/.match(link.to_s).to_s
+                  break
+                end
+              end
+            end
+          end
+        end
+      end
+      [:link, :date, :description, :author, :title].each do |i|
+        if !hash[i]
+          hash[i] = ''
+        end
+      end
+      return hash
+    end
+  end
+end

data/lib/arrogance/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Arrogance
+  VERSION = "0.0.1"
+end

data/lib/arrogance/wrangler.rb ADDED Viewed

@@ -0,0 +1,15 @@
+module Arrogance
+  class Wrangler
+    class << self
+      def beat_into_submission(url)
+        tags = TagHandler.new(url)
+        data = DataHandler.new(url)
+        posts = PostHandler.new(url, tags)
+        blog_obj = BlogObject.new(posts.posts, data)
+      end
+    end
+  end
+end

data/lib/arrogance.rb ADDED Viewed

@@ -0,0 +1,64 @@
+require "arrogance/version"
+require "arrogance/wrangler"
+require "arrogance/tools"
+require "arrogance/objects"
+module Arrogance
+  class Lasso
+    #-- @param urls -> A list of urls. This param can take one of two forms:
+        #-- 1: Hash structured as follows:
+            #-- {:term => [Array, of, urls],
+            #--  :barack_obamas_second_term => [Another, array, of, urls]
+            #--  :etc ...
+            #-- }
+        #-- 2: Array:
+            #-- [Array, of, urls, that, can, be, as, long, as, you, want]
+    #-- Returns: In either case, the return will be of the same structure.
+        #-- However, any array(s) of urls passed in will be replaced (non-destructive)
+        #-- with array(s) of blogobjects:
+            #-- {:term => [BlogObject<#*@&#>, BlogObject<@&^#^@>, BlogObject<rubbish_chars>]}
+        #-- OR
+            #-- [BlogObject<#*@&#>, BlogObject<@&^#^@>, BlogObject<rubbish_chars>]
+    #NOTE: The returned BlogObject(s) can be manipulated as follows
+        #my_blog_object.most_recent
+        #-- => {:title => 'Blogger stating opinions!', :date => '2011-11-11...', :etc => '...'} <-- Thats a hash, treat it like one
+    #-- You can retrive individual posts by stating their position in reverse chronological order (newest first)
+        #my_blog_object.first
+        #-- => {:title => 'Blogger stating opinions!', :date => '2011-11-11 12:34:25', :etc => '...'}
+        #my_blog_object.first_and_seventh
+        #-- => [{:title => 'Blogger stating opinions', :yada => 'yada'}, {:title => 'Look at ma kittehs!!1!one', ...}]
+    #-- feel free to get rediculous:
+        #my_blog_object.first_and_second_and_fourth_and_ninth
+    #-- Arrogance will return a a max of 10
+    #-- Additionally, you can retrive the feed title, site link and feed_image (if they exist) by calling
+        #feed_title, #site_link and #feed_image methods of the BlogObject
+    #-- The guts of a BlogObject is really just an array of hashes. If you just want to deal with that yourself
+    #-- and not take advantage of all the cool methods I made for you, thats fine. Just call my_blog_object.guts
+    #-- and you can get that array of hashes to play with.
+    class << self
+      def read(urls)
+        if urls.kind_of? Hash
+          return build_a_hash_response urls
+        elsif urls.kind_of? Array
+          return build_an_array_reponse urls
+        end
+      end
+      def build_a_hash_response(urls)
+        arrogance = {}
+        urls.each do |k, v|
+          arrogance[k] = v.collect {|url| Wrangler.beat_into_submission(url)}
+        end
+        return arrogance
+      end
+      def build_an_array_reponse(urls)
+        return urls.collect {|url| Wrangler.beat_into_submission(url)}
+      end
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,73 @@
+--- !ruby/object:Gem::Specification
+name: arrogance
+version: !ruby/object:Gem::Version
+  prerelease:
+  version: 0.0.1
+platform: ruby
+authors:
+- Gideon Providence
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-08-17 00:00:00 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  prerelease: false
+  requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: "0"
+  type: :runtime
+  version_requirements: *id001
+description: Easily manage RSS and Atom feeds of all kinds.
+email:
+- jprovidence@live.ca
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- Rakefile
+- arrogance.gemspec
+- lib/arrogance.rb
+- lib/arrogance/objects.rb
+- lib/arrogance/tools.rb
+- lib/arrogance/version.rb
+- lib/arrogance/wrangler.rb
+homepage: ""
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+requirements: []
+rubyforge_project: arrogance
+rubygems_version: 1.8.7
+signing_key:
+specification_version: 3
+summary: A tool to easily manage RSS feeds of all kinds
+test_files: []