hn2json 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Joseph Adams
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ Software), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
File without changes
data/Rakefile ADDED
@@ -0,0 +1,41 @@
1
+ require File.dirname(__FILE__) + '/lib/hn2json/version'
2
+
3
+ def command?(command)
4
+ system("type #{command} > /dev/null 2>&1")
5
+ end
6
+
7
+ #
8
+ # Gems
9
+ #
10
+
11
+ desc "Build gem."
12
+ task :gem do
13
+ sh "gem build hn2json.gemspec"
14
+ end
15
+
16
+ task :push => [:gem] do
17
+ file = Dir["*-#{HN2JSON::VERSION}.gem"].first
18
+ sh "gem push #{file}"
19
+ end
20
+
21
+ desc "Install gem."
22
+ task :install => [:gem] do
23
+ sh "gem install hn2json-#{HN2JSON::VERSION}.gem"
24
+ end
25
+
26
+ desc "Build the gem, install it and open irb."
27
+ task :irb => [:install] do
28
+ sh "irb -r hn2json"
29
+ end
30
+
31
+ desc "tag version"
32
+ task :tag do
33
+ sh "git tag v#{HN2JSON::VERSION}"
34
+ sh "git push origin master --tags"
35
+ sh "git clean -fd"
36
+ end
37
+
38
+ desc "tag version and push gem to server"
39
+ task :release => [:push, :tag] do
40
+ puts "And away it goes!"
41
+ end
@@ -0,0 +1,97 @@
1
+ module HN2JSON
2
+ # Internal: Represents a post, poll, discussion or comment on HackerNews.
3
+ class Entity
4
+
5
+ # Public: Returns the IDs of all top-level comments.
6
+ attr_reader :comments
7
+
8
+ # Public: Returns the String date when the Entity was posted to HackerNews.
9
+ attr_reader :date_posted
10
+
11
+ # Public: Return the String text of the Entity, if available.
12
+ attr_reader :fulltext
13
+
14
+ # Public: Returns the Integer ID of the Entity
15
+ attr_reader :id
16
+
17
+ # Public: Returns the Integer ID of the parent, if available
18
+ attr_reader :parent
19
+
20
+ # Public: Returns the String username of the user who posted the Entity
21
+ attr_reader :posted_by
22
+
23
+ # Public: Returns the String title of the Entity, if available
24
+ attr_reader :title
25
+
26
+ # Public: Returns the Symbol type of the Entity (:post, :poll, :discussion, :comment)
27
+ attr_reader :type
28
+
29
+ # Public: Returns the String url of an Entity, if available
30
+ attr_reader :url
31
+
32
+ # Public: Returns the Interger number of upvotes the Entity has recieved
33
+ attr_reader :votes
34
+
35
+ # Public: Returns a 2D Array of ["Thing you're voting on", number of upvotes]
36
+ attr_reader :voting_on
37
+
38
+
39
+ def initialize id
40
+ @id = id
41
+
42
+ @type = nil
43
+ @parent = nil
44
+ @url = nil
45
+ @title = nil
46
+ @fulltext = nil
47
+ @posted_by = nil
48
+ @date_posted = nil
49
+ @voting_on = nil
50
+ @comments = nil
51
+ @votes = nil
52
+
53
+ get_page
54
+ determine_type
55
+
56
+ get_attrs
57
+ end
58
+
59
+ def get_page
60
+ @html = Request.new(id)
61
+ @parser = Parser.new @html
62
+ end
63
+
64
+ def determine_type
65
+ @type = @parser.determine_type
66
+ end
67
+
68
+ def get_attrs
69
+ case @type
70
+ when :post
71
+ @parser.get_attrs_post self
72
+ when :comment
73
+ @parser.get_attrs_comment self
74
+ when :poll
75
+ @parser.get_attrs_poll self
76
+ when :discussion
77
+ @parser.get_attrs_discussion self
78
+ when :error
79
+ end
80
+ end
81
+
82
+ # Internal: Yields self for adding attr
83
+ #
84
+ # Examples
85
+ #
86
+ # entity.add_attrs do |entity|
87
+ # entity.title = "Hello World!"
88
+ # end
89
+ #
90
+ # Returns nothing.
91
+ def add_attrs
92
+ yield self
93
+ end
94
+
95
+ end
96
+
97
+ end
@@ -0,0 +1,8 @@
1
+ module HN2JSON
2
+ class InvalidIdError < StandardError
3
+ end
4
+ class RequestError < StandardError
5
+ end
6
+ class ParseError < StandardError
7
+ end
8
+ end
@@ -0,0 +1,246 @@
1
+ module HN2JSON
2
+
3
+ # Public: Parse HTML to produce HackerNews entities
4
+
5
+ class Parser
6
+
7
+ attr_reader :doc
8
+
9
+ def initialize response
10
+
11
+ html = response.html
12
+ html.force_encoding "UTF-8"
13
+ begin
14
+ @doc = Nokogiri::HTML::DocumentFragment.parse html
15
+ rescue
16
+ raise ParseError, "there was an error parsing the page"
17
+ end
18
+ end
19
+
20
+
21
+ def determine_type
22
+ title = @doc.css('.title a')
23
+
24
+
25
+ if title.length < 1 || title[0].content == "More"
26
+ if @doc.css('td').length > 7
27
+ return :comment
28
+ else
29
+ return :error
30
+ end
31
+ else
32
+ td = @doc.css('td')[12]
33
+
34
+ if td.css('table').length > 0
35
+ return :poll
36
+ elsif td.css('form').length == 1
37
+ return :discussion
38
+ else
39
+ return :post
40
+ end
41
+ end
42
+
43
+ end
44
+
45
+ def get_attrs_comment entity
46
+
47
+ parent_url = @doc.css('.comhead a')[2]['href']
48
+ parent_regex = /id\=(.*)/
49
+ parent = parent_regex.match(parent_url)[1]
50
+
51
+ fulltext_nolinks = @doc.css('.comment')[0].to_s
52
+ fulltext_nolinks = fulltext_nolinks.gsub(/\<a\shref\=['"](.*)['"].*rel\=.*\>.*\<\/a\>/, '\1')
53
+ fulltext = fulltext_nolinks.gsub(/<\/?[^>]*>/, '')
54
+
55
+ comhead = @doc.css('.comhead')[0]
56
+
57
+ date_posted = get_date comhead
58
+
59
+ posted_by = get_posted_by comhead
60
+
61
+ comments = get_comments
62
+
63
+ entity.add_attrs do |e|
64
+ e.parent = parent
65
+ e.fulltext = fulltext
66
+ e.date_posted = date_posted
67
+ e.comments = comments
68
+ e.posted_by = posted_by
69
+ end
70
+ end
71
+
72
+
73
+ def get_attrs_post entity
74
+
75
+ subtext = @doc.css('.subtext')[0]
76
+
77
+ date_posted = get_date subtext
78
+
79
+ posted_by = get_posted_by subtext
80
+
81
+ votes = get_num_votes subtext
82
+
83
+ comments = get_comments
84
+
85
+ head = @doc.css('.title a')[0]
86
+
87
+ title = head.content
88
+
89
+ url = head['href']
90
+
91
+ entity.add_attrs do |e|
92
+ e.url = url
93
+ e.title = title
94
+ e.date_posted = date_posted
95
+ e.comments = comments
96
+ e.votes = votes
97
+ e.posted_by = posted_by
98
+ end
99
+ end
100
+
101
+ def get_attrs_poll entity
102
+
103
+ title = @doc.css('.title a')[0].content
104
+
105
+ td = @doc.css('td')[10]
106
+
107
+ subtext = @doc.css('.subtext')[0]
108
+
109
+ date_posted = get_date subtext
110
+
111
+ posted_by = get_posted_by subtext
112
+
113
+ votes = get_num_votes subtext
114
+
115
+ comments = get_comments
116
+
117
+
118
+ fulltext_elem = @doc.css('tr[style="height:2px"] + tr > td')
119
+
120
+ if fulltext_elem.length == 2
121
+ fulltext = fulltext_elem[1].content
122
+ else
123
+ fulltext = ''
124
+ end
125
+
126
+ voting_on = get_voting_on
127
+
128
+ entity.add_attrs do |e|
129
+ e.title = title
130
+ e.fulltext = fulltext
131
+ e.date_posted = date_posted
132
+ e.posted_by = posted_by
133
+ e.votes = votes
134
+ e.comments = comments
135
+ e.voting_on = voting_on
136
+ end
137
+ end
138
+
139
+ def get_attrs_discussion entity
140
+
141
+ title = @doc.css('.title a')[0].content
142
+
143
+ fulltext = @doc.css('td')[10].content
144
+
145
+ subtext = @doc.css('.subtext')[0]
146
+
147
+ date_posted = get_date subtext
148
+
149
+ posted_by = get_posted_by subtext
150
+
151
+ votes = get_num_votes subtext
152
+
153
+ comments = get_comments
154
+
155
+ entity.add_attrs do |e|
156
+ e.title = title
157
+ e.fulltext = fulltext
158
+ e.date_posted = date_posted
159
+ e.posted_by = posted_by
160
+ e.comments = comments
161
+ e.votes = votes
162
+ end
163
+ end
164
+
165
+
166
+ def get_voting_on
167
+ voting_on = []
168
+ trs = @doc.css('tr > td + td > table tr')
169
+
170
+ trs.each_with_index do |tr, index|
171
+ if index % 3 == 0
172
+ voting_on.push []
173
+ voting_on[(index / 3).floor].push tr.content
174
+ elsif index % 3 == 1
175
+ voting_on[(index / 3).floor].push tr.content.gsub(/\spoints/, '')
176
+ end
177
+ end
178
+
179
+ return voting_on
180
+
181
+ end
182
+
183
+ def get_comments
184
+ comments = []
185
+
186
+ full_comments = @doc.css('td > img[width="0"]').xpath("..").xpath("..").css('.default')
187
+
188
+ full_comments.each do |comment|
189
+ comment_id = comment.css('span a')[1]['href'].gsub("item?id=", '')
190
+ comments.push comment_id
191
+ end
192
+
193
+ # TODO: Follow the [More] link
194
+ #
195
+ # $tr = $('tr')
196
+ # $($tr[$tr.length - 3]).find('a').eq(0).attr('href')
197
+
198
+ comments = get_comments_more doc, comments
199
+
200
+ return comments
201
+ end
202
+
203
+ def get_comments_more doc, comments
204
+ trs = doc.css('tr .title a')
205
+
206
+ if trs.length == 0
207
+ return comments
208
+ end
209
+
210
+ url = trs.last['href']
211
+
212
+ url_regex = /\/x\?fnid=(.*)/
213
+
214
+ match = url_regex.match(url)
215
+
216
+ if match == nil
217
+ return comments
218
+ end
219
+
220
+ req = Request.new match[1], true
221
+ parser = Parser.new req
222
+
223
+ comments = comments + parser.get_comments
224
+
225
+ return comments
226
+ end
227
+
228
+ def get_num_votes subtext
229
+ return subtext.css('span')[0].content.to_i
230
+ end
231
+
232
+ def get_posted_by subtext
233
+ return subtext.css('a')[0].content
234
+ end
235
+
236
+ def get_date subtext
237
+ date_regex = /.*\s(.*\s.*\sago)/
238
+ ago = date_regex.match(subtext.content)[1]
239
+ date_posted = Chronic.parse(ago).to_s
240
+
241
+ return date_posted
242
+ end
243
+
244
+ end
245
+
246
+ end
@@ -0,0 +1,34 @@
1
+ module HN2JSON
2
+
3
+ class Request
4
+ attr_accessor :html
5
+
6
+ def initialize id, more_page=false
7
+ @base_url = "http://news.ycombinator.com/item?id="
8
+ @complete_url = @base_url + id.to_s
9
+
10
+ if more_page
11
+ @complete_url = "http://news.ycombinator.com/x?fnid=" + id
12
+ end
13
+
14
+ request_page
15
+ end
16
+
17
+ private
18
+
19
+ def request_page
20
+ begin
21
+ @html = RestClient.get @complete_url
22
+ rescue
23
+ raise RequestError, "there was an error requesting the page, check your connection"
24
+ end
25
+
26
+ if @html == "No such item." || @html == "Unknown."
27
+ id = @complete_url.gsub(/^.*id\=/, '')
28
+ raise RequestError, "no such item or id, #{id}"
29
+ end
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,4 @@
1
+ module HN2JSON
2
+ # Public: String current version of HN2JSON
3
+ VERSION = '0.0.4'
4
+ end
data/lib/hn2json.rb ADDED
@@ -0,0 +1,52 @@
1
+ require 'rest-client'
2
+ require 'nokogiri'
3
+ require 'chronic'
4
+
5
+
6
+ # Public: Interface to HackerNews (news.ycombinator.com)
7
+ #
8
+ # Examples
9
+ #
10
+ # HN2JSON.find 123456
11
+ # # => HN2JSON::Entity:0xffffffffffffff
12
+ module HN2JSON
13
+ extend HN2JSON
14
+
15
+ autoload :Request, 'hn2json/request'
16
+
17
+ autoload :Parser, 'hn2json/parser'
18
+
19
+ autoload :Entity, 'hn2json/entity'
20
+
21
+ autoload :InvalidIdError, 'hn2json/exceptions'
22
+ autoload :RequestError, 'hn2json/exceptions'
23
+ autoload :ParseError, 'hn2json/exceptions'
24
+
25
+ autoload :VERSION, 'hn2json/version'
26
+
27
+ # Public: Make a request to HackerNews and extract retrieved data.
28
+ #
29
+ # id - The ID of the page to request
30
+ #
31
+ #
32
+ # Returns the fetched HackerNews Entity.
33
+ def find id
34
+ check_for_falsy_id id
35
+ Entity.new id
36
+ end
37
+
38
+ private
39
+
40
+ # Internal: Check if a given ID is valid to be requested.
41
+ #
42
+ # id - The ID to check.
43
+ #
44
+ #
45
+ # Returns nothing.
46
+ # Raises HN2JSON::InvalidIdError if the ID is invalid.
47
+ def check_for_falsy_id id
48
+ if id.class != Fixnum || id < 1
49
+ raise InvalidIdError, "id must be > 0 and a Fixnum, you passed #{id}"
50
+ end
51
+ end
52
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hn2json
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.4
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Joseph Adams
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rest-client
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 1.6.7
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 1.6.7
30
+ - !ruby/object:Gem::Dependency
31
+ name: nokogiri
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 1.5.5
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.5.5
46
+ - !ruby/object:Gem::Dependency
47
+ name: chronic
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 0.7.0
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.7.0
62
+ description: ! " HN2JSON is a developer frendly interface to HackerNews.\n It
63
+ provides the functionality to retrieve any HN content\n page in stringified JSON
64
+ or a Ruby object.\n"
65
+ email: whitegolem@gmail.com
66
+ executables: []
67
+ extensions: []
68
+ extra_rdoc_files: []
69
+ files:
70
+ - README.md
71
+ - Rakefile
72
+ - LICENSE
73
+ - lib/hn2json/entity.rb
74
+ - lib/hn2json/exceptions.rb
75
+ - lib/hn2json/parser.rb
76
+ - lib/hn2json/request.rb
77
+ - lib/hn2json/version.rb
78
+ - lib/hn2json.rb
79
+ homepage: http://github.com/jcla1/HN2JSON
80
+ licenses: []
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ none: false
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubyforge_project:
99
+ rubygems_version: 1.8.24
100
+ signing_key:
101
+ specification_version: 3
102
+ summary: A Ruby interface to HackerNews
103
+ test_files: []