ruby-reddit 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +14 -3
- data/README.txt +10 -2
- data/lib/reddit.rb +72 -40
- data/test/test_reddit.rb +69 -22
- metadata +4 -4
data/History.txt
CHANGED
@@ -1,9 +1,20 @@
|
|
1
|
-
== 0.
|
1
|
+
== 0.2.0 / 2008-02-18
|
2
2
|
|
3
|
-
*
|
4
|
-
*
|
3
|
+
* 5 minor enhancements
|
4
|
+
* Added reading top links from subreddits.
|
5
|
+
* Defaulted read options hash to nil so that page number does not have to be passed to read. Page now defaults to first.
|
6
|
+
* Added author attribute to Link.
|
7
|
+
* Added points attribute to Link.
|
8
|
+
* Removed Data class, so read is now a Reddit module method. Deprecates Reddit::Data.read
|
9
|
+
* 1 bug fix
|
10
|
+
* Unescaped submitted URLs.
|
5
11
|
|
6
12
|
== 0.1.1 / 2008-01-22
|
7
13
|
|
8
14
|
* 1 bug fix
|
9
15
|
* Link attributes assigned in proper order.
|
16
|
+
|
17
|
+
== 0.1.0 / 2008-01-22
|
18
|
+
|
19
|
+
* 1 minor enhancement
|
20
|
+
* 1st release. Link scraping from reddit's hot and new pages.
|
data/README.txt
CHANGED
@@ -12,8 +12,10 @@ Interact with reddit.com. Read links and post links (coming soon!).
|
|
12
12
|
|
13
13
|
== SYNOPSIS:
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
require "reddit"
|
16
|
+
|
17
|
+
# Get all the links from the "hot" page
|
18
|
+
links = Reddit.read :hot
|
17
19
|
|
18
20
|
# Check out the links!
|
19
21
|
for link in links
|
@@ -23,6 +25,12 @@ Interact with reddit.com. Read links and post links (coming soon!).
|
|
23
25
|
puts link.title
|
24
26
|
puts link.date
|
25
27
|
end
|
28
|
+
|
29
|
+
# Get all the links from the first page of the ruby subreddit
|
30
|
+
ruby_links = Reddit.read :ruby
|
31
|
+
|
32
|
+
# Get all the links from the second page of the ruby subreddit
|
33
|
+
ruby_links_2 = Reddit.read :ruby, :page => 1
|
26
34
|
|
27
35
|
== REQUIREMENTS:
|
28
36
|
|
data/lib/reddit.rb
CHANGED
@@ -4,29 +4,20 @@ require "rubygems"
|
|
4
4
|
require "hpricot"
|
5
5
|
require "open-uri"
|
6
6
|
require "mechanize"
|
7
|
+
require "cgi"
|
7
8
|
|
8
9
|
module Reddit
|
9
10
|
|
10
|
-
VERSION = '0.
|
11
|
+
VERSION = '0.2.0'
|
11
12
|
|
12
13
|
DefaultOptions = {
|
13
14
|
:page => 0
|
14
15
|
}
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
def self.read(section, options)
|
21
|
-
conf = Reddit::DefaultOptions.update options
|
22
|
-
reader = Reader.new section, conf[:page]
|
23
|
-
reader.links
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.subreddit_url(subreddit)
|
27
|
-
"http://reddit.com/r/#{subreddit}/.rss"
|
28
|
-
end
|
29
|
-
|
16
|
+
|
17
|
+
def self.read(section, options={})
|
18
|
+
conf = Reddit::DefaultOptions.update options
|
19
|
+
reader = Reader.new section, conf[:page]
|
20
|
+
reader.links
|
30
21
|
end
|
31
22
|
|
32
23
|
class Reader
|
@@ -39,43 +30,60 @@ module Reddit
|
|
39
30
|
}
|
40
31
|
|
41
32
|
def initialize(section, page)
|
42
|
-
@section = section
|
43
33
|
@page = page
|
34
|
+
@feed_url = generate_feed_url section
|
44
35
|
end
|
45
36
|
|
46
37
|
def links
|
47
38
|
index=0
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
39
|
+
begin
|
40
|
+
collection = (page_data/:item).map do |item|
|
41
|
+
rank = link_start + (index += 1)
|
42
|
+
site_id = parse_guid((item/:guid).inner_html)
|
43
|
+
title = (item/:title).inner_html
|
44
|
+
date = (item/:"dc:date").inner_html
|
45
|
+
url = CGI.unescapeHTML(parse_description((item/:description).inner_html))
|
46
|
+
|
47
|
+
Link.new(rank, site_id, url, title, date)
|
48
|
+
end
|
49
|
+
rescue OpenURI::HTTPError
|
50
|
+
[]
|
51
|
+
end
|
57
52
|
end
|
58
53
|
|
59
|
-
def
|
60
|
-
|
54
|
+
def self.subreddit_url(subreddit)
|
55
|
+
"http://reddit.com/r/#{subreddit}/.rss"
|
61
56
|
end
|
62
57
|
|
63
|
-
|
64
|
-
|
65
|
-
|
58
|
+
private
|
59
|
+
def generate_feed_url(section)
|
60
|
+
params = "?count=#{link_start}"
|
61
|
+
if Urls[section]
|
62
|
+
"#{Urls[section]}#{params}"
|
63
|
+
else
|
64
|
+
"#{self.class.subreddit_url(section)}#{params}"
|
65
|
+
end
|
66
|
+
end
|
66
67
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
end
|
68
|
+
def parse_guid(guid)
|
69
|
+
GuidRegExp.match(guid)[1]
|
70
|
+
end
|
71
71
|
|
72
|
-
|
73
|
-
|
74
|
-
|
72
|
+
def parse_description(description)
|
73
|
+
DescriptionRegExp.match(description)[1]
|
74
|
+
end
|
75
|
+
|
76
|
+
def page_data
|
77
|
+
Hpricot.XML(open(@feed_url))
|
78
|
+
end
|
79
|
+
|
80
|
+
def link_start
|
81
|
+
@page * 25
|
82
|
+
end
|
75
83
|
end
|
76
84
|
|
77
85
|
class Link
|
78
|
-
attr_accessor :rank, :site_id, :url, :title, :date
|
86
|
+
attr_accessor :rank, :site_id, :url, :title, :date
|
79
87
|
|
80
88
|
def initialize(rank, site_id, url, title, date, points=nil, author=nil)
|
81
89
|
@rank = rank
|
@@ -86,6 +94,30 @@ module Reddit
|
|
86
94
|
@points = points
|
87
95
|
@author = author
|
88
96
|
end
|
97
|
+
|
98
|
+
def author
|
99
|
+
@author ||= parse_author
|
100
|
+
end
|
101
|
+
|
102
|
+
def points
|
103
|
+
@points ||= parse_points
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
def parse_author
|
108
|
+
doc = Hpricot(open(link_url))
|
109
|
+
(doc/"div.little a").first.inner_html
|
110
|
+
end
|
111
|
+
|
112
|
+
def parse_points
|
113
|
+
doc = Hpricot(open(link_url))
|
114
|
+
points_string = (doc/"div.little span.inside").inner_html
|
115
|
+
/\d+/.match(points_string)[0]
|
116
|
+
end
|
117
|
+
|
118
|
+
def link_url
|
119
|
+
"http://reddit.com/info/#{@site_id}/comments/"
|
120
|
+
end
|
121
|
+
|
89
122
|
end
|
90
|
-
|
91
123
|
end
|
data/test/test_reddit.rb
CHANGED
@@ -2,13 +2,41 @@ require File.dirname(__FILE__) + '/test_helper.rb'
|
|
2
2
|
|
3
3
|
class RedditTest < Test::Unit::TestCase
|
4
4
|
|
5
|
-
|
5
|
+
# TODO: write tests for these assertions
|
6
|
+
def assert_not_blank(attribute, message=nil)
|
7
|
+
message = build_message message, '<?> is blank.', attribute
|
8
|
+
assert_block message do
|
9
|
+
!attribute.nil? && attribute != ""
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def assert_valid_site_id(site_id, message=nil)
|
14
|
+
assert(/^[a-zA-Z0-9]+$/.match(site_id), message)
|
15
|
+
end
|
16
|
+
|
17
|
+
def assert_valid_url(url, message=nil)
|
18
|
+
uri = URI.parse(URI.encode(url))
|
19
|
+
assert_block message do
|
20
|
+
if uri.scheme
|
21
|
+
true
|
22
|
+
else
|
23
|
+
!/.*\/info\/.+/.match(url).nil?
|
24
|
+
end
|
25
|
+
end
|
6
26
|
end
|
7
27
|
|
8
|
-
def
|
9
|
-
|
28
|
+
def assert_valid_date(date, message=nil)
|
29
|
+
assert_nothing_raised ArgumentError, message do
|
30
|
+
DateTime.parse date
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def generate_random_string(size=15)
|
35
|
+
(1..size).collect { (i = Kernel.rand(62); i += ((i < 10) ? 48 : ((i < 36) ? 55 : 61 ))).chr }.join
|
10
36
|
end
|
11
37
|
|
38
|
+
####
|
39
|
+
|
12
40
|
def test_main_urls
|
13
41
|
for section, url in Reddit::Reader::Urls
|
14
42
|
page = open(url)
|
@@ -16,52 +44,71 @@ class RedditTest < Test::Unit::TestCase
|
|
16
44
|
end
|
17
45
|
end
|
18
46
|
|
19
|
-
def
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
page = open(Reddit::Data.subreddit_url(subreddit))
|
28
|
-
assert_equal "200", page.status[0]
|
29
|
-
end
|
47
|
+
def test_get_subreddit_links_for_valid_subreddit
|
48
|
+
links = Reddit.read :ruby, :page => 0
|
49
|
+
assert_equal 25, links.length
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_dont_get_subreddit_links_for_invalid_subreddit
|
53
|
+
links = Reddit.read generate_random_string.to_sym, :page =>0
|
54
|
+
assert_equal 0, links.length
|
30
55
|
end
|
31
56
|
|
32
57
|
def test_get_hot_links
|
33
|
-
links = Reddit
|
58
|
+
links = Reddit.read :hot, :page => 0
|
34
59
|
|
35
60
|
assert_equal 25, links.length
|
36
61
|
links.each_with_index do |link, index|
|
37
62
|
assert_equal index + 1, link.rank
|
63
|
+
|
38
64
|
assert_not_blank link.site_id
|
65
|
+
assert_valid_site_id link.site_id
|
66
|
+
|
39
67
|
assert_not_blank link.url
|
68
|
+
assert_valid_url link.url
|
69
|
+
|
40
70
|
assert_not_blank link.title
|
71
|
+
|
41
72
|
assert_not_blank link.date
|
73
|
+
assert_valid_date link.date
|
42
74
|
end
|
43
75
|
end
|
76
|
+
|
77
|
+
def test_get_link_author
|
78
|
+
link = Reddit.read(:ruby).first
|
79
|
+
assert_not_nil link.author
|
80
|
+
|
81
|
+
page = open("http://reddit.com/user/#{link.author}")
|
82
|
+
assert_equal "200", page.status[0]
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_get_link_points
|
86
|
+
link = Reddit.read(:ruby).first
|
87
|
+
assert_not_nil link.points
|
88
|
+
|
89
|
+
assert(/^\d+$/.match(link.points))
|
90
|
+
end
|
44
91
|
|
45
92
|
def test_parse_guid
|
46
93
|
reader = Reddit::Reader.new :hot, 0
|
47
|
-
link_start = reader.link_start
|
48
|
-
page_data = reader.page_data
|
94
|
+
link_start = reader.send(:link_start)
|
95
|
+
page_data = reader.send(:page_data)
|
49
96
|
|
50
97
|
item = (page_data/:item)[0]
|
51
|
-
site_id = reader.parse_guid((item/:guid).inner_html)
|
98
|
+
site_id = reader.send(:parse_guid, ((item/:guid).inner_html))
|
52
99
|
|
53
100
|
assert_not_blank site_id
|
54
101
|
assert site_id.length >= 4
|
55
|
-
|
102
|
+
assert_valid_site_id site_id
|
56
103
|
end
|
57
104
|
|
58
105
|
def test_parse_description
|
59
106
|
reader = Reddit::Reader.new :hot, 0
|
60
|
-
link_start = reader.link_start
|
61
|
-
page_data = reader.page_data
|
107
|
+
link_start = reader.send(:link_start)
|
108
|
+
page_data = reader.send(:page_data)
|
62
109
|
|
63
110
|
(page_data/:item).each do |item|
|
64
|
-
|
111
|
+
assert_valid_url reader.send(:parse_description, ((item/:description).inner_html))
|
65
112
|
end
|
66
113
|
end
|
67
114
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-reddit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julia West
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-02-18 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -19,9 +19,9 @@ dependencies:
|
|
19
19
|
requirements:
|
20
20
|
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: 1.
|
22
|
+
version: 1.5.0
|
23
23
|
version:
|
24
|
-
description: "== FEATURES/PROBLEMS: * Scrapes links from reddit's hot page and new page. == SYNOPSIS: # Get all the \"hot\"
|
24
|
+
description: "== FEATURES/PROBLEMS: * Scrapes links from reddit's hot page and new page. == SYNOPSIS: require \"reddit\" # Get all the links from the \"hot\" page links = Reddit.read :hot # Check out the links! for link in links puts link.rank puts link.site_id puts link.url puts link.title puts link.date end # Get all the links from the first page of the ruby subreddit ruby_links = Reddit.read :ruby # Get all the links from the second page of the ruby subreddit ruby_links_2 = Reddit.read :ruby, :page => 1 == REQUIREMENTS: * hpricot * open-uri * mechanize"
|
25
25
|
email: juliamae@gmail.com
|
26
26
|
executables:
|
27
27
|
- ruby-reddit
|