ruby-reddit 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/History.txt +14 -3
  2. data/README.txt +10 -2
  3. data/lib/reddit.rb +72 -40
  4. data/test/test_reddit.rb +69 -22
  5. metadata +4 -4
@@ -1,9 +1,20 @@
1
- == 0.1.0 / 2008-01-22
1
+ == 0.2.0 / 2008-02-18
2
2
 
3
- * 1 minor enhancement
4
- * 1st release. Link scraping from reddit's hot and new pages.
3
+ * 5 minor enhancements
4
+ * Added reading top links from subreddits.
5
+ * Defaulted read options hash to nil so that page number does not have to be passed to read. Page now defaults to first.
6
+ * Added author attribute to Link.
7
+ * Added points attribute to Link.
8
+ * Removed Data class, so read is now a Reddit module method. Deprecates Reddit::Data.read
9
+ * 1 bug fix
10
+ * Unescaped submitted URLs.
5
11
 
6
12
  == 0.1.1 / 2008-01-22
7
13
 
8
14
  * 1 bug fix
9
15
  * Link attributes assigned in proper order.
16
+
17
+ == 0.1.0 / 2008-01-22
18
+
19
+ * 1 minor enhancement
20
+ * 1st release. Link scraping from reddit's hot and new pages.
data/README.txt CHANGED
@@ -12,8 +12,10 @@ Interact with reddit.com. Read links and post links (coming soon!).
12
12
 
13
13
  == SYNOPSIS:
14
14
 
15
- # Get all the "hot" links on the first page
16
- links = Reddit::Data.read :hot, :page => 0
15
+ require "reddit"
16
+
17
+ # Get all the links from the "hot" page
18
+ links = Reddit.read :hot
17
19
 
18
20
  # Check out the links!
19
21
  for link in links
@@ -23,6 +25,12 @@ Interact with reddit.com. Read links and post links (coming soon!).
23
25
  puts link.title
24
26
  puts link.date
25
27
  end
28
+
29
+ # Get all the links from the first page of the ruby subreddit
30
+ ruby_links = Reddit.read :ruby
31
+
32
+ # Get all the links from the second page of the ruby subreddit
33
+ ruby_links_2 = Reddit.read :ruby, :page => 1
26
34
 
27
35
  == REQUIREMENTS:
28
36
 
@@ -4,29 +4,20 @@ require "rubygems"
4
4
  require "hpricot"
5
5
  require "open-uri"
6
6
  require "mechanize"
7
+ require "cgi"
7
8
 
8
9
  module Reddit
9
10
 
10
- VERSION = '0.1.1'
11
+ VERSION = '0.2.0'
11
12
 
12
13
  DefaultOptions = {
13
14
  :page => 0
14
15
  }
15
-
16
- Subreddits = %w{programming science politics business gadgets sports gaming entertainment netsec}
17
-
18
- class Data
19
-
20
- def self.read(section, options)
21
- conf = Reddit::DefaultOptions.update options
22
- reader = Reader.new section, conf[:page]
23
- reader.links
24
- end
25
-
26
- def self.subreddit_url(subreddit)
27
- "http://reddit.com/r/#{subreddit}/.rss"
28
- end
29
-
16
+
17
+ def self.read(section, options={})
18
+ conf = Reddit::DefaultOptions.update options
19
+ reader = Reader.new section, conf[:page]
20
+ reader.links
30
21
  end
31
22
 
32
23
  class Reader
@@ -39,43 +30,60 @@ module Reddit
39
30
  }
40
31
 
41
32
  def initialize(section, page)
42
- @section = section
43
33
  @page = page
34
+ @feed_url = generate_feed_url section
44
35
  end
45
36
 
46
37
  def links
47
38
  index=0
48
- collection = (page_data/:item).map do |item|
49
- rank = link_start + (index += 1)
50
- site_id = parse_guid((item/:guid).inner_html)
51
- title = (item/:title).inner_html
52
- date = (item/:"dc:date").inner_html
53
- url = parse_description((item/:description).inner_html)
54
-
55
- Link.new(rank, site_id, url, title, date)
56
- end
39
+ begin
40
+ collection = (page_data/:item).map do |item|
41
+ rank = link_start + (index += 1)
42
+ site_id = parse_guid((item/:guid).inner_html)
43
+ title = (item/:title).inner_html
44
+ date = (item/:"dc:date").inner_html
45
+ url = CGI.unescapeHTML(parse_description((item/:description).inner_html))
46
+
47
+ Link.new(rank, site_id, url, title, date)
48
+ end
49
+ rescue OpenURI::HTTPError
50
+ []
51
+ end
57
52
  end
58
53
 
59
- def parse_guid(guid)
60
- GuidRegExp.match(guid)[1]
54
+ def self.subreddit_url(subreddit)
55
+ "http://reddit.com/r/#{subreddit}/.rss"
61
56
  end
62
57
 
63
- def parse_description(description)
64
- DescriptionRegExp.match(description)[1]
65
- end
58
+ private
59
+ def generate_feed_url(section)
60
+ params = "?count=#{link_start}"
61
+ if Urls[section]
62
+ "#{Urls[section]}#{params}"
63
+ else
64
+ "#{self.class.subreddit_url(section)}#{params}"
65
+ end
66
+ end
66
67
 
67
- def page_data
68
- params = "?count=#{link_start}"
69
- doc = Hpricot.XML(open("#{Urls[@section]}#{params}"))
70
- end
68
+ def parse_guid(guid)
69
+ GuidRegExp.match(guid)[1]
70
+ end
71
71
 
72
- def link_start
73
- @page * 25
74
- end
72
+ def parse_description(description)
73
+ DescriptionRegExp.match(description)[1]
74
+ end
75
+
76
+ def page_data
77
+ Hpricot.XML(open(@feed_url))
78
+ end
79
+
80
+ def link_start
81
+ @page * 25
82
+ end
75
83
  end
76
84
 
77
85
  class Link
78
- attr_accessor :rank, :site_id, :url, :title, :date, :points, :author
86
+ attr_accessor :rank, :site_id, :url, :title, :date
79
87
 
80
88
  def initialize(rank, site_id, url, title, date, points=nil, author=nil)
81
89
  @rank = rank
@@ -86,6 +94,30 @@ module Reddit
86
94
  @points = points
87
95
  @author = author
88
96
  end
97
+
98
+ def author
99
+ @author ||= parse_author
100
+ end
101
+
102
+ def points
103
+ @points ||= parse_points
104
+ end
105
+
106
+ private
107
+ def parse_author
108
+ doc = Hpricot(open(link_url))
109
+ (doc/"div.little a").first.inner_html
110
+ end
111
+
112
+ def parse_points
113
+ doc = Hpricot(open(link_url))
114
+ points_string = (doc/"div.little span.inside").inner_html
115
+ /\d+/.match(points_string)[0]
116
+ end
117
+
118
+ def link_url
119
+ "http://reddit.com/info/#{@site_id}/comments/"
120
+ end
121
+
89
122
  end
90
-
91
123
  end
@@ -2,13 +2,41 @@ require File.dirname(__FILE__) + '/test_helper.rb'
2
2
 
3
3
  class RedditTest < Test::Unit::TestCase
4
4
 
5
- def setup
5
+ # TODO: write tests for these assertions
6
+ def assert_not_blank(attribute, message=nil)
7
+ message = build_message message, '<?> is blank.', attribute
8
+ assert_block message do
9
+ !attribute.nil? && attribute != ""
10
+ end
11
+ end
12
+
13
+ def assert_valid_site_id(site_id, message=nil)
14
+ assert(/^[a-zA-Z0-9]+$/.match(site_id), message)
15
+ end
16
+
17
+ def assert_valid_url(url, message=nil)
18
+ uri = URI.parse(URI.encode(url))
19
+ assert_block message do
20
+ if uri.scheme
21
+ true
22
+ else
23
+ !/.*\/info\/.+/.match(url).nil?
24
+ end
25
+ end
6
26
  end
7
27
 
8
- def assert_not_blank(attribute)
9
- !attribute.nil? && attribute != ""
28
+ def assert_valid_date(date, message=nil)
29
+ assert_nothing_raised ArgumentError, message do
30
+ DateTime.parse date
31
+ end
32
+ end
33
+
34
+ def generate_random_string(size=15)
35
+ (1..size).collect { (i = Kernel.rand(62); i += ((i < 10) ? 48 : ((i < 36) ? 55 : 61 ))).chr }.join
10
36
  end
11
37
 
38
+ ####
39
+
12
40
  def test_main_urls
13
41
  for section, url in Reddit::Reader::Urls
14
42
  page = open(url)
@@ -16,52 +44,71 @@ class RedditTest < Test::Unit::TestCase
16
44
  end
17
45
  end
18
46
 
19
- def test_subreddit_urls
20
- # First assert a bad subreddit will throw an HTTPError
21
- assert_raise OpenURI::HTTPError do
22
- page = open(Reddit::Data.subreddit_url("thefakestsubreddit"))
23
- end
24
-
25
- # Now make sure these are all 200s
26
- for subreddit in Reddit::Subreddits
27
- page = open(Reddit::Data.subreddit_url(subreddit))
28
- assert_equal "200", page.status[0]
29
- end
47
+ def test_get_subreddit_links_for_valid_subreddit
48
+ links = Reddit.read :ruby, :page => 0
49
+ assert_equal 25, links.length
50
+ end
51
+
52
+ def test_dont_get_subreddit_links_for_invalid_subreddit
53
+ links = Reddit.read generate_random_string.to_sym, :page =>0
54
+ assert_equal 0, links.length
30
55
  end
31
56
 
32
57
  def test_get_hot_links
33
- links = Reddit::Data.read :hot, :page => 0
58
+ links = Reddit.read :hot, :page => 0
34
59
 
35
60
  assert_equal 25, links.length
36
61
  links.each_with_index do |link, index|
37
62
  assert_equal index + 1, link.rank
63
+
38
64
  assert_not_blank link.site_id
65
+ assert_valid_site_id link.site_id
66
+
39
67
  assert_not_blank link.url
68
+ assert_valid_url link.url
69
+
40
70
  assert_not_blank link.title
71
+
41
72
  assert_not_blank link.date
73
+ assert_valid_date link.date
42
74
  end
43
75
  end
76
+
77
+ def test_get_link_author
78
+ link = Reddit.read(:ruby).first
79
+ assert_not_nil link.author
80
+
81
+ page = open("http://reddit.com/user/#{link.author}")
82
+ assert_equal "200", page.status[0]
83
+ end
84
+
85
+ def test_get_link_points
86
+ link = Reddit.read(:ruby).first
87
+ assert_not_nil link.points
88
+
89
+ assert(/^\d+$/.match(link.points))
90
+ end
44
91
 
45
92
  def test_parse_guid
46
93
  reader = Reddit::Reader.new :hot, 0
47
- link_start = reader.link_start
48
- page_data = reader.page_data
94
+ link_start = reader.send(:link_start)
95
+ page_data = reader.send(:page_data)
49
96
 
50
97
  item = (page_data/:item)[0]
51
- site_id = reader.parse_guid((item/:guid).inner_html)
98
+ site_id = reader.send(:parse_guid, ((item/:guid).inner_html))
52
99
 
53
100
  assert_not_blank site_id
54
101
  assert site_id.length >= 4
55
- assert /^[a-zA-Z0-9]+$/.match(site_id)
102
+ assert_valid_site_id site_id
56
103
  end
57
104
 
58
105
  def test_parse_description
59
106
  reader = Reddit::Reader.new :hot, 0
60
- link_start = reader.link_start
61
- page_data = reader.page_data
107
+ link_start = reader.send(:link_start)
108
+ page_data = reader.send(:page_data)
62
109
 
63
110
  (page_data/:item).each do |item|
64
- assert URI.parse(reader.parse_description((item/:description).inner_html))
111
+ assert_valid_url reader.send(:parse_description, ((item/:description).inner_html))
65
112
  end
66
113
  end
67
114
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-reddit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Julia West
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-01-22 00:00:00 -05:00
12
+ date: 2008-02-18 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -19,9 +19,9 @@ dependencies:
19
19
  requirements:
20
20
  - - ">="
21
21
  - !ruby/object:Gem::Version
22
- version: 1.4.0
22
+ version: 1.5.0
23
23
  version:
24
- description: "== FEATURES/PROBLEMS: * Scrapes links from reddit's hot page and new page. == SYNOPSIS: # Get all the \"hot\" links on the first page links = Reddit::Data.read :hot, :page => 0 # Check out the links! for link in links puts link.rank puts link.site_id puts link.url puts link.title puts link.date end == REQUIREMENTS: * hpricot * open-uri * mechanize"
24
+ description: "== FEATURES/PROBLEMS: * Scrapes links from reddit's hot page and new page. == SYNOPSIS: require \"reddit\" # Get all the links from the \"hot\" page links = Reddit.read :hot # Check out the links! for link in links puts link.rank puts link.site_id puts link.url puts link.title puts link.date end # Get all the links from the first page of the ruby subreddit ruby_links = Reddit.read :ruby # Get all the links from the second page of the ruby subreddit ruby_links_2 = Reddit.read :ruby, :page => 1 == REQUIREMENTS: * hpricot * open-uri * mechanize"
25
25
  email: juliamae@gmail.com
26
26
  executables:
27
27
  - ruby-reddit