RubyGems - ruby-reddit - Versions diffs - 0.1.1 → 0.2.0 - Mend

ruby-reddit 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

data/History.txt CHANGED

@@ -1,9 +1,20 @@
-== 0.1.0 / 2008-01-22
+== 0.2.0 / 2008-02-18
-* 1 minor enhancement
-  * 1st release. Link scraping from reddit's hot and new pages.
+* 5 minor enhancements
+  * Added reading top links from subreddits.
+  * Defaulted read options hash to nil so that page number does not have to be passed to read. Page now defaults to first.
+  * Added author attribute to Link.
+  * Added points attribute to Link.
+  * Removed Data class, so read is now a Reddit module method. Deprecates Reddit::Data.read
+* 1 bug fix
+  * Unescaped submitted URLs.
 == 0.1.1 / 2008-01-22
 * 1 bug fix
   * Link attributes assigned in proper order.
+== 0.1.0 / 2008-01-22
+* 1 minor enhancement
+  * 1st release. Link scraping from reddit's hot and new pages.

data/README.txt CHANGED

@@ -12,8 +12,10 @@ Interact with reddit.com. Read links and post links (coming soon!).
 == SYNOPSIS:
-	# Get all the "hot" links on the first page
-	links = Reddit::Data.read :hot, :page => 0
+	require "reddit"
+	# Get all the links from the "hot" page
+	links = Reddit.read :hot
 	# Check out the links!
 	for link in links
@@ -23,6 +25,12 @@ Interact with reddit.com. Read links and post links (coming soon!).
 		puts link.title
 		puts link.date
 	end
+	# Get all the links from the first page of the ruby subreddit
+	ruby_links = Reddit.read :ruby
+	# Get all the links from the second page of the ruby subreddit
+	ruby_links_2 = Reddit.read :ruby, :page => 1
 == REQUIREMENTS:

data/lib/reddit.rb CHANGED

@@ -4,29 +4,20 @@ require "rubygems"
 require "hpricot"
 require "open-uri"
 require "mechanize"
+require "cgi"
 module Reddit
-  VERSION = '0.1.1'
+  VERSION = '0.2.0'
   DefaultOptions = {
     :page => 0
   }
-  Subreddits = %w{programming science politics business gadgets sports gaming entertainment netsec}
-  class Data
-    def self.read(section, options)
-      conf = Reddit::DefaultOptions.update options
-      reader = Reader.new section, conf[:page]
-      reader.links
-    end
-    def self.subreddit_url(subreddit)
-      "http://reddit.com/r/#{subreddit}/.rss"
-    end
+  def self.read(section, options={})
+    conf = Reddit::DefaultOptions.update options
+    reader = Reader.new section, conf[:page]
+    reader.links
   end
   class Reader
@@ -39,43 +30,60 @@ module Reddit
     }
     def initialize(section, page)
-      @section  = section
       @page     = page
+      @feed_url = generate_feed_url section
     end
     def links
       index=0
-      collection = (page_data/:item).map do |item|
-        rank        = link_start + (index += 1)
-        site_id     = parse_guid((item/:guid).inner_html)
-        title       = (item/:title).inner_html
-        date        = (item/:"dc:date").inner_html
-        url         = parse_description((item/:description).inner_html)
-        Link.new(rank, site_id, url, title, date)
-      end
+      begin
+        collection = (page_data/:item).map do |item|
+          rank        = link_start + (index += 1)
+          site_id     = parse_guid((item/:guid).inner_html)
+          title       = (item/:title).inner_html
+          date        = (item/:"dc:date").inner_html
+          url         = CGI.unescapeHTML(parse_description((item/:description).inner_html))
+          Link.new(rank, site_id, url, title, date)
+        end
+      rescue OpenURI::HTTPError
+        []
+      end
     end
-    def parse_guid(guid)
-      GuidRegExp.match(guid)[1]
+    def self.subreddit_url(subreddit)
+      "http://reddit.com/r/#{subreddit}/.rss"
     end
-    def parse_description(description)
-      DescriptionRegExp.match(description)[1]
-    end
+    private
+      def generate_feed_url(section)
+        params = "?count=#{link_start}"
+        if Urls[section]
+          "#{Urls[section]}#{params}"
+        else
+          "#{self.class.subreddit_url(section)}#{params}"
+        end
+      end
-    def page_data
-      params = "?count=#{link_start}"
-      doc = Hpricot.XML(open("#{Urls[@section]}#{params}"))
-    end
+      def parse_guid(guid)
+        GuidRegExp.match(guid)[1]
+      end
-    def link_start
-      @page * 25
-    end
+      def parse_description(description)
+        DescriptionRegExp.match(description)[1]
+      end
+      def page_data
+        Hpricot.XML(open(@feed_url))
+      end
+      def link_start
+        @page * 25
+      end
   end
   class Link
-    attr_accessor :rank, :site_id, :url, :title, :date, :points, :author
+    attr_accessor :rank, :site_id, :url, :title, :date
     def initialize(rank, site_id, url, title, date, points=nil, author=nil)
       @rank     = rank
@@ -86,6 +94,30 @@ module Reddit
       @points   = points
       @author   = author
     end
+    def author
+      @author ||= parse_author
+    end
+    def points
+      @points ||= parse_points
+    end
+    private
+      def parse_author
+        doc = Hpricot(open(link_url))
+        (doc/"div.little a").first.inner_html
+      end
+      def parse_points
+        doc = Hpricot(open(link_url))
+        points_string = (doc/"div.little span.inside").inner_html
+        /\d+/.match(points_string)[0]
+      end
+      def link_url
+        "http://reddit.com/info/#{@site_id}/comments/"
+      end
   end
 end

data/test/test_reddit.rb CHANGED

@@ -2,13 +2,41 @@ require File.dirname(__FILE__) + '/test_helper.rb'
 class RedditTest < Test::Unit::TestCase
-  def setup
+  # TODO: write tests for these assertions
+  def assert_not_blank(attribute, message=nil)
+    message = build_message message, '<?> is blank.', attribute
+    assert_block message do
+      !attribute.nil? && attribute != ""
+    end
+  end
+  def assert_valid_site_id(site_id, message=nil)
+    assert(/^[a-zA-Z0-9]+$/.match(site_id), message)
+  end
+  def assert_valid_url(url, message=nil)
+    uri = URI.parse(URI.encode(url))
+    assert_block message do
+      if uri.scheme
+        true
+      else
+        !/.*\/info\/.+/.match(url).nil?
+      end
+    end
   end
-  def assert_not_blank(attribute)
-    !attribute.nil? && attribute != ""
+  def assert_valid_date(date, message=nil)
+    assert_nothing_raised ArgumentError, message do
+      DateTime.parse date
+    end
+  end
+  def generate_random_string(size=15)
+    (1..size).collect { (i = Kernel.rand(62); i += ((i < 10) ? 48 : ((i < 36) ? 55 : 61 ))).chr }.join
   end
+  ####
   def test_main_urls
     for section, url in Reddit::Reader::Urls
       page = open(url)
@@ -16,52 +44,71 @@ class RedditTest < Test::Unit::TestCase
     end
   end
-  def test_subreddit_urls
-    # First assert a bad subreddit will throw an HTTPError
-    assert_raise OpenURI::HTTPError do
-      page = open(Reddit::Data.subreddit_url("thefakestsubreddit"))
-    end
-    # Now make sure these are all 200s
-    for subreddit in Reddit::Subreddits
-      page = open(Reddit::Data.subreddit_url(subreddit))
-      assert_equal "200", page.status[0]
-    end
+  def test_get_subreddit_links_for_valid_subreddit
+    links = Reddit.read :ruby, :page => 0
+    assert_equal 25, links.length
+  end
+  def test_dont_get_subreddit_links_for_invalid_subreddit
+    links = Reddit.read generate_random_string.to_sym, :page =>0
+    assert_equal 0, links.length
   end
   def test_get_hot_links
-    links = Reddit::Data.read :hot, :page => 0
+    links = Reddit.read :hot, :page => 0
     assert_equal 25, links.length
     links.each_with_index do |link, index|
       assert_equal index + 1, link.rank
       assert_not_blank link.site_id
+      assert_valid_site_id link.site_id
       assert_not_blank link.url
+      assert_valid_url link.url
       assert_not_blank link.title
       assert_not_blank link.date
+      assert_valid_date link.date
     end
   end
+  def test_get_link_author
+    link = Reddit.read(:ruby).first
+    assert_not_nil link.author
+    page = open("http://reddit.com/user/#{link.author}")
+    assert_equal "200", page.status[0]
+  end
+  def test_get_link_points
+    link = Reddit.read(:ruby).first
+    assert_not_nil link.points
+    assert(/^\d+$/.match(link.points))
+  end
   def test_parse_guid
     reader      = Reddit::Reader.new :hot, 0
-    link_start  = reader.link_start
-    page_data   = reader.page_data
+    link_start  = reader.send(:link_start)
+    page_data   = reader.send(:page_data)
     item    = (page_data/:item)[0]
-    site_id = reader.parse_guid((item/:guid).inner_html)
+    site_id = reader.send(:parse_guid, ((item/:guid).inner_html))
     assert_not_blank site_id
     assert site_id.length >= 4
-    assert /^[a-zA-Z0-9]+$/.match(site_id)
+    assert_valid_site_id site_id
   end
   def test_parse_description
     reader      = Reddit::Reader.new :hot, 0
-    link_start  = reader.link_start
-    page_data   = reader.page_data
+    link_start  = reader.send(:link_start)
+    page_data   = reader.send(:page_data)
     (page_data/:item).each do |item|
-      assert URI.parse(reader.parse_description((item/:description).inner_html))
+      assert_valid_url reader.send(:parse_description, ((item/:description).inner_html))
     end
   end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: ruby-reddit
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.2.0
 platform: ruby
 authors:
 - Julia West
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2008-01-22 00:00:00 -05:00
+date: 2008-02-18 00:00:00 -05:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -19,9 +19,9 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 1.4.0
+        version: 1.5.0
     version:
-description: "== FEATURES/PROBLEMS:  * Scrapes links from reddit's hot page and new page.  == SYNOPSIS:  # Get all the \"hot\" links on the first page links = Reddit::Data.read :hot, :page => 0  # Check out the links! for link in links puts link.rank puts link.site_id puts link.url puts link.title puts link.date end  == REQUIREMENTS:  * hpricot * open-uri * mechanize"
+description: "== FEATURES/PROBLEMS:  * Scrapes links from reddit's hot page and new page.  == SYNOPSIS:  require \"reddit\"  # Get all the links from the \"hot\" page links = Reddit.read :hot  # Check out the links! for link in links puts link.rank puts link.site_id puts link.url puts link.title puts link.date end  # Get all the links from the first page of the ruby subreddit ruby_links = Reddit.read :ruby  # Get all the links from the second page of the ruby subreddit ruby_links_2 = Reddit.read :ruby, :page => 1   == REQUIREMENTS:  * hpricot * open-uri * mechanize"
 email: juliamae@gmail.com
 executables:
 - ruby-reddit