ruby-reddit 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +14 -3
- data/README.txt +10 -2
- data/lib/reddit.rb +72 -40
- data/test/test_reddit.rb +69 -22
- metadata +4 -4
data/History.txt
CHANGED
@@ -1,9 +1,20 @@
|
|
1
|
-
== 0.
|
1
|
+
== 0.2.0 / 2008-02-18
|
2
2
|
|
3
|
-
*
|
4
|
-
*
|
3
|
+
* 5 minor enhancements
|
4
|
+
* Added reading top links from subreddits.
|
5
|
+
* Defaulted read options hash to nil so that page number does not have to be passed to read. Page now defaults to first.
|
6
|
+
* Added author attribute to Link.
|
7
|
+
* Added points attribute to Link.
|
8
|
+
* Removed Data class, so read is now a Reddit module method. Deprecates Reddit::Data.read
|
9
|
+
* 1 bug fix
|
10
|
+
* Unescaped submitted URLs.
|
5
11
|
|
6
12
|
== 0.1.1 / 2008-01-22
|
7
13
|
|
8
14
|
* 1 bug fix
|
9
15
|
* Link attributes assigned in proper order.
|
16
|
+
|
17
|
+
== 0.1.0 / 2008-01-22
|
18
|
+
|
19
|
+
* 1 minor enhancement
|
20
|
+
* 1st release. Link scraping from reddit's hot and new pages.
|
data/README.txt
CHANGED
@@ -12,8 +12,10 @@ Interact with reddit.com. Read links and post links (coming soon!).
|
|
12
12
|
|
13
13
|
== SYNOPSIS:
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
require "reddit"
|
16
|
+
|
17
|
+
# Get all the links from the "hot" page
|
18
|
+
links = Reddit.read :hot
|
17
19
|
|
18
20
|
# Check out the links!
|
19
21
|
for link in links
|
@@ -23,6 +25,12 @@ Interact with reddit.com. Read links and post links (coming soon!).
|
|
23
25
|
puts link.title
|
24
26
|
puts link.date
|
25
27
|
end
|
28
|
+
|
29
|
+
# Get all the links from the first page of the ruby subreddit
|
30
|
+
ruby_links = Reddit.read :ruby
|
31
|
+
|
32
|
+
# Get all the links from the second page of the ruby subreddit
|
33
|
+
ruby_links_2 = Reddit.read :ruby, :page => 1
|
26
34
|
|
27
35
|
== REQUIREMENTS:
|
28
36
|
|
data/lib/reddit.rb
CHANGED
@@ -4,29 +4,20 @@ require "rubygems"
|
|
4
4
|
require "hpricot"
|
5
5
|
require "open-uri"
|
6
6
|
require "mechanize"
|
7
|
+
require "cgi"
|
7
8
|
|
8
9
|
module Reddit
|
9
10
|
|
10
|
-
VERSION = '0.
|
11
|
+
VERSION = '0.2.0'
|
11
12
|
|
12
13
|
DefaultOptions = {
|
13
14
|
:page => 0
|
14
15
|
}
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
def self.read(section, options)
|
21
|
-
conf = Reddit::DefaultOptions.update options
|
22
|
-
reader = Reader.new section, conf[:page]
|
23
|
-
reader.links
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.subreddit_url(subreddit)
|
27
|
-
"http://reddit.com/r/#{subreddit}/.rss"
|
28
|
-
end
|
29
|
-
|
16
|
+
|
17
|
+
def self.read(section, options={})
|
18
|
+
conf = Reddit::DefaultOptions.update options
|
19
|
+
reader = Reader.new section, conf[:page]
|
20
|
+
reader.links
|
30
21
|
end
|
31
22
|
|
32
23
|
class Reader
|
@@ -39,43 +30,60 @@ module Reddit
|
|
39
30
|
}
|
40
31
|
|
41
32
|
def initialize(section, page)
|
42
|
-
@section = section
|
43
33
|
@page = page
|
34
|
+
@feed_url = generate_feed_url section
|
44
35
|
end
|
45
36
|
|
46
37
|
def links
|
47
38
|
index=0
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
39
|
+
begin
|
40
|
+
collection = (page_data/:item).map do |item|
|
41
|
+
rank = link_start + (index += 1)
|
42
|
+
site_id = parse_guid((item/:guid).inner_html)
|
43
|
+
title = (item/:title).inner_html
|
44
|
+
date = (item/:"dc:date").inner_html
|
45
|
+
url = CGI.unescapeHTML(parse_description((item/:description).inner_html))
|
46
|
+
|
47
|
+
Link.new(rank, site_id, url, title, date)
|
48
|
+
end
|
49
|
+
rescue OpenURI::HTTPError
|
50
|
+
[]
|
51
|
+
end
|
57
52
|
end
|
58
53
|
|
59
|
-
def
|
60
|
-
|
54
|
+
def self.subreddit_url(subreddit)
|
55
|
+
"http://reddit.com/r/#{subreddit}/.rss"
|
61
56
|
end
|
62
57
|
|
63
|
-
|
64
|
-
|
65
|
-
|
58
|
+
private
|
59
|
+
def generate_feed_url(section)
|
60
|
+
params = "?count=#{link_start}"
|
61
|
+
if Urls[section]
|
62
|
+
"#{Urls[section]}#{params}"
|
63
|
+
else
|
64
|
+
"#{self.class.subreddit_url(section)}#{params}"
|
65
|
+
end
|
66
|
+
end
|
66
67
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
end
|
68
|
+
def parse_guid(guid)
|
69
|
+
GuidRegExp.match(guid)[1]
|
70
|
+
end
|
71
71
|
|
72
|
-
|
73
|
-
|
74
|
-
|
72
|
+
def parse_description(description)
|
73
|
+
DescriptionRegExp.match(description)[1]
|
74
|
+
end
|
75
|
+
|
76
|
+
def page_data
|
77
|
+
Hpricot.XML(open(@feed_url))
|
78
|
+
end
|
79
|
+
|
80
|
+
def link_start
|
81
|
+
@page * 25
|
82
|
+
end
|
75
83
|
end
|
76
84
|
|
77
85
|
class Link
|
78
|
-
attr_accessor :rank, :site_id, :url, :title, :date
|
86
|
+
attr_accessor :rank, :site_id, :url, :title, :date
|
79
87
|
|
80
88
|
def initialize(rank, site_id, url, title, date, points=nil, author=nil)
|
81
89
|
@rank = rank
|
@@ -86,6 +94,30 @@ module Reddit
|
|
86
94
|
@points = points
|
87
95
|
@author = author
|
88
96
|
end
|
97
|
+
|
98
|
+
def author
|
99
|
+
@author ||= parse_author
|
100
|
+
end
|
101
|
+
|
102
|
+
def points
|
103
|
+
@points ||= parse_points
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
def parse_author
|
108
|
+
doc = Hpricot(open(link_url))
|
109
|
+
(doc/"div.little a").first.inner_html
|
110
|
+
end
|
111
|
+
|
112
|
+
def parse_points
|
113
|
+
doc = Hpricot(open(link_url))
|
114
|
+
points_string = (doc/"div.little span.inside").inner_html
|
115
|
+
/\d+/.match(points_string)[0]
|
116
|
+
end
|
117
|
+
|
118
|
+
def link_url
|
119
|
+
"http://reddit.com/info/#{@site_id}/comments/"
|
120
|
+
end
|
121
|
+
|
89
122
|
end
|
90
|
-
|
91
123
|
end
|
data/test/test_reddit.rb
CHANGED
@@ -2,13 +2,41 @@ require File.dirname(__FILE__) + '/test_helper.rb'
|
|
2
2
|
|
3
3
|
class RedditTest < Test::Unit::TestCase
|
4
4
|
|
5
|
-
|
5
|
+
# TODO: write tests for these assertions
|
6
|
+
def assert_not_blank(attribute, message=nil)
|
7
|
+
message = build_message message, '<?> is blank.', attribute
|
8
|
+
assert_block message do
|
9
|
+
!attribute.nil? && attribute != ""
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def assert_valid_site_id(site_id, message=nil)
|
14
|
+
assert(/^[a-zA-Z0-9]+$/.match(site_id), message)
|
15
|
+
end
|
16
|
+
|
17
|
+
def assert_valid_url(url, message=nil)
|
18
|
+
uri = URI.parse(URI.encode(url))
|
19
|
+
assert_block message do
|
20
|
+
if uri.scheme
|
21
|
+
true
|
22
|
+
else
|
23
|
+
!/.*\/info\/.+/.match(url).nil?
|
24
|
+
end
|
25
|
+
end
|
6
26
|
end
|
7
27
|
|
8
|
-
def
|
9
|
-
|
28
|
+
def assert_valid_date(date, message=nil)
|
29
|
+
assert_nothing_raised ArgumentError, message do
|
30
|
+
DateTime.parse date
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def generate_random_string(size=15)
|
35
|
+
(1..size).collect { (i = Kernel.rand(62); i += ((i < 10) ? 48 : ((i < 36) ? 55 : 61 ))).chr }.join
|
10
36
|
end
|
11
37
|
|
38
|
+
####
|
39
|
+
|
12
40
|
def test_main_urls
|
13
41
|
for section, url in Reddit::Reader::Urls
|
14
42
|
page = open(url)
|
@@ -16,52 +44,71 @@ class RedditTest < Test::Unit::TestCase
|
|
16
44
|
end
|
17
45
|
end
|
18
46
|
|
19
|
-
def
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
page = open(Reddit::Data.subreddit_url(subreddit))
|
28
|
-
assert_equal "200", page.status[0]
|
29
|
-
end
|
47
|
+
def test_get_subreddit_links_for_valid_subreddit
|
48
|
+
links = Reddit.read :ruby, :page => 0
|
49
|
+
assert_equal 25, links.length
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_dont_get_subreddit_links_for_invalid_subreddit
|
53
|
+
links = Reddit.read generate_random_string.to_sym, :page =>0
|
54
|
+
assert_equal 0, links.length
|
30
55
|
end
|
31
56
|
|
32
57
|
def test_get_hot_links
|
33
|
-
links = Reddit
|
58
|
+
links = Reddit.read :hot, :page => 0
|
34
59
|
|
35
60
|
assert_equal 25, links.length
|
36
61
|
links.each_with_index do |link, index|
|
37
62
|
assert_equal index + 1, link.rank
|
63
|
+
|
38
64
|
assert_not_blank link.site_id
|
65
|
+
assert_valid_site_id link.site_id
|
66
|
+
|
39
67
|
assert_not_blank link.url
|
68
|
+
assert_valid_url link.url
|
69
|
+
|
40
70
|
assert_not_blank link.title
|
71
|
+
|
41
72
|
assert_not_blank link.date
|
73
|
+
assert_valid_date link.date
|
42
74
|
end
|
43
75
|
end
|
76
|
+
|
77
|
+
def test_get_link_author
|
78
|
+
link = Reddit.read(:ruby).first
|
79
|
+
assert_not_nil link.author
|
80
|
+
|
81
|
+
page = open("http://reddit.com/user/#{link.author}")
|
82
|
+
assert_equal "200", page.status[0]
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_get_link_points
|
86
|
+
link = Reddit.read(:ruby).first
|
87
|
+
assert_not_nil link.points
|
88
|
+
|
89
|
+
assert(/^\d+$/.match(link.points))
|
90
|
+
end
|
44
91
|
|
45
92
|
def test_parse_guid
|
46
93
|
reader = Reddit::Reader.new :hot, 0
|
47
|
-
link_start = reader.link_start
|
48
|
-
page_data = reader.page_data
|
94
|
+
link_start = reader.send(:link_start)
|
95
|
+
page_data = reader.send(:page_data)
|
49
96
|
|
50
97
|
item = (page_data/:item)[0]
|
51
|
-
site_id = reader.parse_guid((item/:guid).inner_html)
|
98
|
+
site_id = reader.send(:parse_guid, ((item/:guid).inner_html))
|
52
99
|
|
53
100
|
assert_not_blank site_id
|
54
101
|
assert site_id.length >= 4
|
55
|
-
|
102
|
+
assert_valid_site_id site_id
|
56
103
|
end
|
57
104
|
|
58
105
|
def test_parse_description
|
59
106
|
reader = Reddit::Reader.new :hot, 0
|
60
|
-
link_start = reader.link_start
|
61
|
-
page_data = reader.page_data
|
107
|
+
link_start = reader.send(:link_start)
|
108
|
+
page_data = reader.send(:page_data)
|
62
109
|
|
63
110
|
(page_data/:item).each do |item|
|
64
|
-
|
111
|
+
assert_valid_url reader.send(:parse_description, ((item/:description).inner_html))
|
65
112
|
end
|
66
113
|
end
|
67
114
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-reddit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julia West
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-02-18 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -19,9 +19,9 @@ dependencies:
|
|
19
19
|
requirements:
|
20
20
|
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: 1.
|
22
|
+
version: 1.5.0
|
23
23
|
version:
|
24
|
-
description: "== FEATURES/PROBLEMS: * Scrapes links from reddit's hot page and new page. == SYNOPSIS: # Get all the \"hot\"
|
24
|
+
description: "== FEATURES/PROBLEMS: * Scrapes links from reddit's hot page and new page. == SYNOPSIS: require \"reddit\" # Get all the links from the \"hot\" page links = Reddit.read :hot # Check out the links! for link in links puts link.rank puts link.site_id puts link.url puts link.title puts link.date end # Get all the links from the first page of the ruby subreddit ruby_links = Reddit.read :ruby # Get all the links from the second page of the ruby subreddit ruby_links_2 = Reddit.read :ruby, :page => 1 == REQUIREMENTS: * hpricot * open-uri * mechanize"
|
25
25
|
email: juliamae@gmail.com
|
26
26
|
executables:
|
27
27
|
- ruby-reddit
|