ruby-reddit 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/History.txt +14 -3
  2. data/README.txt +10 -2
  3. data/lib/reddit.rb +72 -40
  4. data/test/test_reddit.rb +69 -22
  5. metadata +4 -4
@@ -1,9 +1,20 @@
1
- == 0.1.0 / 2008-01-22
1
+ == 0.2.0 / 2008-02-18
2
2
 
3
- * 1 minor enhancement
4
- * 1st release. Link scraping from reddit's hot and new pages.
3
+ * 5 minor enhancements
4
+ * Added reading top links from subreddits.
5
+ * Defaulted read options hash to nil so that page number does not have to be passed to read. Page now defaults to first.
6
+ * Added author attribute to Link.
7
+ * Added points attribute to Link.
8
+ * Removed Data class, so read is now a Reddit module method. Deprecates Reddit::Data.read
9
+ * 1 bug fix
10
+ * Unescaped submitted URLs.
5
11
 
6
12
  == 0.1.1 / 2008-01-22
7
13
 
8
14
  * 1 bug fix
9
15
  * Link attributes assigned in proper order.
16
+
17
+ == 0.1.0 / 2008-01-22
18
+
19
+ * 1 minor enhancement
20
+ * 1st release. Link scraping from reddit's hot and new pages.
data/README.txt CHANGED
@@ -12,8 +12,10 @@ Interact with reddit.com. Read links and post links (coming soon!).
12
12
 
13
13
  == SYNOPSIS:
14
14
 
15
- # Get all the "hot" links on the first page
16
- links = Reddit::Data.read :hot, :page => 0
15
+ require "reddit"
16
+
17
+ # Get all the links from the "hot" page
18
+ links = Reddit.read :hot
17
19
 
18
20
  # Check out the links!
19
21
  for link in links
@@ -23,6 +25,12 @@ Interact with reddit.com. Read links and post links (coming soon!).
23
25
  puts link.title
24
26
  puts link.date
25
27
  end
28
+
29
+ # Get all the links from the first page of the ruby subreddit
30
+ ruby_links = Reddit.read :ruby
31
+
32
+ # Get all the links from the second page of the ruby subreddit
33
+ ruby_links_2 = Reddit.read :ruby, :page => 1
26
34
 
27
35
  == REQUIREMENTS:
28
36
 
@@ -4,29 +4,20 @@ require "rubygems"
4
4
  require "hpricot"
5
5
  require "open-uri"
6
6
  require "mechanize"
7
+ require "cgi"
7
8
 
8
9
  module Reddit
9
10
 
10
- VERSION = '0.1.1'
11
+ VERSION = '0.2.0'
11
12
 
12
13
  DefaultOptions = {
13
14
  :page => 0
14
15
  }
15
-
16
- Subreddits = %w{programming science politics business gadgets sports gaming entertainment netsec}
17
-
18
- class Data
19
-
20
- def self.read(section, options)
21
- conf = Reddit::DefaultOptions.update options
22
- reader = Reader.new section, conf[:page]
23
- reader.links
24
- end
25
-
26
- def self.subreddit_url(subreddit)
27
- "http://reddit.com/r/#{subreddit}/.rss"
28
- end
29
-
16
+
17
+ def self.read(section, options={})
18
+ conf = Reddit::DefaultOptions.update options
19
+ reader = Reader.new section, conf[:page]
20
+ reader.links
30
21
  end
31
22
 
32
23
  class Reader
@@ -39,43 +30,60 @@ module Reddit
39
30
  }
40
31
 
41
32
  def initialize(section, page)
42
- @section = section
43
33
  @page = page
34
+ @feed_url = generate_feed_url section
44
35
  end
45
36
 
46
37
  def links
47
38
  index=0
48
- collection = (page_data/:item).map do |item|
49
- rank = link_start + (index += 1)
50
- site_id = parse_guid((item/:guid).inner_html)
51
- title = (item/:title).inner_html
52
- date = (item/:"dc:date").inner_html
53
- url = parse_description((item/:description).inner_html)
54
-
55
- Link.new(rank, site_id, url, title, date)
56
- end
39
+ begin
40
+ collection = (page_data/:item).map do |item|
41
+ rank = link_start + (index += 1)
42
+ site_id = parse_guid((item/:guid).inner_html)
43
+ title = (item/:title).inner_html
44
+ date = (item/:"dc:date").inner_html
45
+ url = CGI.unescapeHTML(parse_description((item/:description).inner_html))
46
+
47
+ Link.new(rank, site_id, url, title, date)
48
+ end
49
+ rescue OpenURI::HTTPError
50
+ []
51
+ end
57
52
  end
58
53
 
59
- def parse_guid(guid)
60
- GuidRegExp.match(guid)[1]
54
+ def self.subreddit_url(subreddit)
55
+ "http://reddit.com/r/#{subreddit}/.rss"
61
56
  end
62
57
 
63
- def parse_description(description)
64
- DescriptionRegExp.match(description)[1]
65
- end
58
+ private
59
+ def generate_feed_url(section)
60
+ params = "?count=#{link_start}"
61
+ if Urls[section]
62
+ "#{Urls[section]}#{params}"
63
+ else
64
+ "#{self.class.subreddit_url(section)}#{params}"
65
+ end
66
+ end
66
67
 
67
- def page_data
68
- params = "?count=#{link_start}"
69
- doc = Hpricot.XML(open("#{Urls[@section]}#{params}"))
70
- end
68
+ def parse_guid(guid)
69
+ GuidRegExp.match(guid)[1]
70
+ end
71
71
 
72
- def link_start
73
- @page * 25
74
- end
72
+ def parse_description(description)
73
+ DescriptionRegExp.match(description)[1]
74
+ end
75
+
76
+ def page_data
77
+ Hpricot.XML(open(@feed_url))
78
+ end
79
+
80
+ def link_start
81
+ @page * 25
82
+ end
75
83
  end
76
84
 
77
85
  class Link
78
- attr_accessor :rank, :site_id, :url, :title, :date, :points, :author
86
+ attr_accessor :rank, :site_id, :url, :title, :date
79
87
 
80
88
  def initialize(rank, site_id, url, title, date, points=nil, author=nil)
81
89
  @rank = rank
@@ -86,6 +94,30 @@ module Reddit
86
94
  @points = points
87
95
  @author = author
88
96
  end
97
+
98
+ def author
99
+ @author ||= parse_author
100
+ end
101
+
102
+ def points
103
+ @points ||= parse_points
104
+ end
105
+
106
+ private
107
+ def parse_author
108
+ doc = Hpricot(open(link_url))
109
+ (doc/"div.little a").first.inner_html
110
+ end
111
+
112
+ def parse_points
113
+ doc = Hpricot(open(link_url))
114
+ points_string = (doc/"div.little span.inside").inner_html
115
+ /\d+/.match(points_string)[0]
116
+ end
117
+
118
+ def link_url
119
+ "http://reddit.com/info/#{@site_id}/comments/"
120
+ end
121
+
89
122
  end
90
-
91
123
  end
@@ -2,13 +2,41 @@ require File.dirname(__FILE__) + '/test_helper.rb'
2
2
 
3
3
  class RedditTest < Test::Unit::TestCase
4
4
 
5
- def setup
5
+ # TODO: write tests for these assertions
6
+ def assert_not_blank(attribute, message=nil)
7
+ message = build_message message, '<?> is blank.', attribute
8
+ assert_block message do
9
+ !attribute.nil? && attribute != ""
10
+ end
11
+ end
12
+
13
+ def assert_valid_site_id(site_id, message=nil)
14
+ assert(/^[a-zA-Z0-9]+$/.match(site_id), message)
15
+ end
16
+
17
+ def assert_valid_url(url, message=nil)
18
+ uri = URI.parse(URI.encode(url))
19
+ assert_block message do
20
+ if uri.scheme
21
+ true
22
+ else
23
+ !/.*\/info\/.+/.match(url).nil?
24
+ end
25
+ end
6
26
  end
7
27
 
8
- def assert_not_blank(attribute)
9
- !attribute.nil? && attribute != ""
28
+ def assert_valid_date(date, message=nil)
29
+ assert_nothing_raised ArgumentError, message do
30
+ DateTime.parse date
31
+ end
32
+ end
33
+
34
+ def generate_random_string(size=15)
35
+ (1..size).collect { (i = Kernel.rand(62); i += ((i < 10) ? 48 : ((i < 36) ? 55 : 61 ))).chr }.join
10
36
  end
11
37
 
38
+ ####
39
+
12
40
  def test_main_urls
13
41
  for section, url in Reddit::Reader::Urls
14
42
  page = open(url)
@@ -16,52 +44,71 @@ class RedditTest < Test::Unit::TestCase
16
44
  end
17
45
  end
18
46
 
19
- def test_subreddit_urls
20
- # First assert a bad subreddit will throw an HTTPError
21
- assert_raise OpenURI::HTTPError do
22
- page = open(Reddit::Data.subreddit_url("thefakestsubreddit"))
23
- end
24
-
25
- # Now make sure these are all 200s
26
- for subreddit in Reddit::Subreddits
27
- page = open(Reddit::Data.subreddit_url(subreddit))
28
- assert_equal "200", page.status[0]
29
- end
47
+ def test_get_subreddit_links_for_valid_subreddit
48
+ links = Reddit.read :ruby, :page => 0
49
+ assert_equal 25, links.length
50
+ end
51
+
52
+ def test_dont_get_subreddit_links_for_invalid_subreddit
53
+ links = Reddit.read generate_random_string.to_sym, :page =>0
54
+ assert_equal 0, links.length
30
55
  end
31
56
 
32
57
  def test_get_hot_links
33
- links = Reddit::Data.read :hot, :page => 0
58
+ links = Reddit.read :hot, :page => 0
34
59
 
35
60
  assert_equal 25, links.length
36
61
  links.each_with_index do |link, index|
37
62
  assert_equal index + 1, link.rank
63
+
38
64
  assert_not_blank link.site_id
65
+ assert_valid_site_id link.site_id
66
+
39
67
  assert_not_blank link.url
68
+ assert_valid_url link.url
69
+
40
70
  assert_not_blank link.title
71
+
41
72
  assert_not_blank link.date
73
+ assert_valid_date link.date
42
74
  end
43
75
  end
76
+
77
+ def test_get_link_author
78
+ link = Reddit.read(:ruby).first
79
+ assert_not_nil link.author
80
+
81
+ page = open("http://reddit.com/user/#{link.author}")
82
+ assert_equal "200", page.status[0]
83
+ end
84
+
85
+ def test_get_link_points
86
+ link = Reddit.read(:ruby).first
87
+ assert_not_nil link.points
88
+
89
+ assert(/^\d+$/.match(link.points))
90
+ end
44
91
 
45
92
  def test_parse_guid
46
93
  reader = Reddit::Reader.new :hot, 0
47
- link_start = reader.link_start
48
- page_data = reader.page_data
94
+ link_start = reader.send(:link_start)
95
+ page_data = reader.send(:page_data)
49
96
 
50
97
  item = (page_data/:item)[0]
51
- site_id = reader.parse_guid((item/:guid).inner_html)
98
+ site_id = reader.send(:parse_guid, ((item/:guid).inner_html))
52
99
 
53
100
  assert_not_blank site_id
54
101
  assert site_id.length >= 4
55
- assert /^[a-zA-Z0-9]+$/.match(site_id)
102
+ assert_valid_site_id site_id
56
103
  end
57
104
 
58
105
  def test_parse_description
59
106
  reader = Reddit::Reader.new :hot, 0
60
- link_start = reader.link_start
61
- page_data = reader.page_data
107
+ link_start = reader.send(:link_start)
108
+ page_data = reader.send(:page_data)
62
109
 
63
110
  (page_data/:item).each do |item|
64
- assert URI.parse(reader.parse_description((item/:description).inner_html))
111
+ assert_valid_url reader.send(:parse_description, ((item/:description).inner_html))
65
112
  end
66
113
  end
67
114
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-reddit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Julia West
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-01-22 00:00:00 -05:00
12
+ date: 2008-02-18 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -19,9 +19,9 @@ dependencies:
19
19
  requirements:
20
20
  - - ">="
21
21
  - !ruby/object:Gem::Version
22
- version: 1.4.0
22
+ version: 1.5.0
23
23
  version:
24
- description: "== FEATURES/PROBLEMS: * Scrapes links from reddit's hot page and new page. == SYNOPSIS: # Get all the \"hot\" links on the first page links = Reddit::Data.read :hot, :page => 0 # Check out the links! for link in links puts link.rank puts link.site_id puts link.url puts link.title puts link.date end == REQUIREMENTS: * hpricot * open-uri * mechanize"
24
+ description: "== FEATURES/PROBLEMS: * Scrapes links from reddit's hot page and new page. == SYNOPSIS: require \"reddit\" # Get all the links from the \"hot\" page links = Reddit.read :hot # Check out the links! for link in links puts link.rank puts link.site_id puts link.url puts link.title puts link.date end # Get all the links from the first page of the ruby subreddit ruby_links = Reddit.read :ruby # Get all the links from the second page of the ruby subreddit ruby_links_2 = Reddit.read :ruby, :page => 1 == REQUIREMENTS: * hpricot * open-uri * mechanize"
25
25
  email: juliamae@gmail.com
26
26
  executables:
27
27
  - ruby-reddit