hn2json 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Joseph Adams
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ Software), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
File without changes
data/Rakefile ADDED
@@ -0,0 +1,41 @@
1
+ require File.dirname(__FILE__) + '/lib/hn2json/version'
2
+
3
+ def command?(command)
4
+ system("type #{command} > /dev/null 2>&1")
5
+ end
6
+
7
+ #
8
+ # Gems
9
+ #
10
+
11
+ desc "Build gem."
12
+ task :gem do
13
+ sh "gem build hn2json.gemspec"
14
+ end
15
+
16
+ task :push => [:gem] do
17
+ file = Dir["*-#{HN2JSON::VERSION}.gem"].first
18
+ sh "gem push #{file}"
19
+ end
20
+
21
+ desc "Install gem."
22
+ task :install => [:gem] do
23
+ sh "gem install hn2json-#{HN2JSON::VERSION}.gem"
24
+ end
25
+
26
+ desc "Build the gem, install it and open irb."
27
+ task :irb => [:install] do
28
+ sh "irb -r hn2json"
29
+ end
30
+
31
+ desc "tag version"
32
+ task :tag do
33
+ sh "git tag v#{HN2JSON::VERSION}"
34
+ sh "git push origin master --tags"
35
+ sh "git clean -fd"
36
+ end
37
+
38
+ desc "tag version and push gem to server"
39
+ task :release => [:push, :tag] do
40
+ puts "And away it goes!"
41
+ end
@@ -0,0 +1,97 @@
1
+ module HN2JSON
2
+ # Internal: Represents a post, poll, discussion or comment on HackerNews.
3
+ class Entity
4
+
5
+ # Public: Returns the IDs of all top-level comments.
6
+ attr_reader :comments
7
+
8
+ # Public: Returns the String date when the Entity was posted to HackerNews.
9
+ attr_reader :date_posted
10
+
11
+ # Public: Return the String text of the Entity, if available.
12
+ attr_reader :fulltext
13
+
14
+ # Public: Returns the Integer ID of the Entity
15
+ attr_reader :id
16
+
17
+ # Public: Returns the Integer ID of the parent, if available
18
+ attr_reader :parent
19
+
20
+ # Public: Returns the String username of the user who posted the Entity
21
+ attr_reader :posted_by
22
+
23
+ # Public: Returns the String title of the Entity, if available
24
+ attr_reader :title
25
+
26
+ # Public: Returns the Symbol type of the Entity (:post, :poll, :discussion, :comment)
27
+ attr_reader :type
28
+
29
+ # Public: Returns the String url of an Entity, if available
30
+ attr_reader :url
31
+
32
+ # Public: Returns the Interger number of upvotes the Entity has recieved
33
+ attr_reader :votes
34
+
35
+ # Public: Returns a 2D Array of ["Thing you're voting on", number of upvotes]
36
+ attr_reader :voting_on
37
+
38
+
39
+ def initialize id
40
+ @id = id
41
+
42
+ @type = nil
43
+ @parent = nil
44
+ @url = nil
45
+ @title = nil
46
+ @fulltext = nil
47
+ @posted_by = nil
48
+ @date_posted = nil
49
+ @voting_on = nil
50
+ @comments = nil
51
+ @votes = nil
52
+
53
+ get_page
54
+ determine_type
55
+
56
+ get_attrs
57
+ end
58
+
59
+ def get_page
60
+ @html = Request.new(id)
61
+ @parser = Parser.new @html
62
+ end
63
+
64
+ def determine_type
65
+ @type = @parser.determine_type
66
+ end
67
+
68
+ def get_attrs
69
+ case @type
70
+ when :post
71
+ @parser.get_attrs_post self
72
+ when :comment
73
+ @parser.get_attrs_comment self
74
+ when :poll
75
+ @parser.get_attrs_poll self
76
+ when :discussion
77
+ @parser.get_attrs_discussion self
78
+ when :error
79
+ end
80
+ end
81
+
82
+ # Internal: Yields self for adding attr
83
+ #
84
+ # Examples
85
+ #
86
+ # entity.add_attrs do |entity|
87
+ # entity.title = "Hello World!"
88
+ # end
89
+ #
90
+ # Returns nothing.
91
+ def add_attrs
92
+ yield self
93
+ end
94
+
95
+ end
96
+
97
+ end
@@ -0,0 +1,8 @@
1
+ module HN2JSON
2
+ class InvalidIdError < StandardError
3
+ end
4
+ class RequestError < StandardError
5
+ end
6
+ class ParseError < StandardError
7
+ end
8
+ end
@@ -0,0 +1,246 @@
1
+ module HN2JSON
2
+
3
+ # Public: Parse HTML to produce HackerNews entities
4
+
5
+ class Parser
6
+
7
+ attr_reader :doc
8
+
9
+ def initialize response
10
+
11
+ html = response.html
12
+ html.force_encoding "UTF-8"
13
+ begin
14
+ @doc = Nokogiri::HTML::DocumentFragment.parse html
15
+ rescue
16
+ raise ParseError, "there was an error parsing the page"
17
+ end
18
+ end
19
+
20
+
21
+ def determine_type
22
+ title = @doc.css('.title a')
23
+
24
+
25
+ if title.length < 1 || title[0].content == "More"
26
+ if @doc.css('td').length > 7
27
+ return :comment
28
+ else
29
+ return :error
30
+ end
31
+ else
32
+ td = @doc.css('td')[12]
33
+
34
+ if td.css('table').length > 0
35
+ return :poll
36
+ elsif td.css('form').length == 1
37
+ return :discussion
38
+ else
39
+ return :post
40
+ end
41
+ end
42
+
43
+ end
44
+
45
+ def get_attrs_comment entity
46
+
47
+ parent_url = @doc.css('.comhead a')[2]['href']
48
+ parent_regex = /id\=(.*)/
49
+ parent = parent_regex.match(parent_url)[1]
50
+
51
+ fulltext_nolinks = @doc.css('.comment')[0].to_s
52
+ fulltext_nolinks = fulltext_nolinks.gsub(/\<a\shref\=['"](.*)['"].*rel\=.*\>.*\<\/a\>/, '\1')
53
+ fulltext = fulltext_nolinks.gsub(/<\/?[^>]*>/, '')
54
+
55
+ comhead = @doc.css('.comhead')[0]
56
+
57
+ date_posted = get_date comhead
58
+
59
+ posted_by = get_posted_by comhead
60
+
61
+ comments = get_comments
62
+
63
+ entity.add_attrs do |e|
64
+ e.parent = parent
65
+ e.fulltext = fulltext
66
+ e.date_posted = date_posted
67
+ e.comments = comments
68
+ e.posted_by = posted_by
69
+ end
70
+ end
71
+
72
+
73
+ def get_attrs_post entity
74
+
75
+ subtext = @doc.css('.subtext')[0]
76
+
77
+ date_posted = get_date subtext
78
+
79
+ posted_by = get_posted_by subtext
80
+
81
+ votes = get_num_votes subtext
82
+
83
+ comments = get_comments
84
+
85
+ head = @doc.css('.title a')[0]
86
+
87
+ title = head.content
88
+
89
+ url = head['href']
90
+
91
+ entity.add_attrs do |e|
92
+ e.url = url
93
+ e.title = title
94
+ e.date_posted = date_posted
95
+ e.comments = comments
96
+ e.votes = votes
97
+ e.posted_by = posted_by
98
+ end
99
+ end
100
+
101
+ def get_attrs_poll entity
102
+
103
+ title = @doc.css('.title a')[0].content
104
+
105
+ td = @doc.css('td')[10]
106
+
107
+ subtext = @doc.css('.subtext')[0]
108
+
109
+ date_posted = get_date subtext
110
+
111
+ posted_by = get_posted_by subtext
112
+
113
+ votes = get_num_votes subtext
114
+
115
+ comments = get_comments
116
+
117
+
118
+ fulltext_elem = @doc.css('tr[style="height:2px"] + tr > td')
119
+
120
+ if fulltext_elem.length == 2
121
+ fulltext = fulltext_elem[1].content
122
+ else
123
+ fulltext = ''
124
+ end
125
+
126
+ voting_on = get_voting_on
127
+
128
+ entity.add_attrs do |e|
129
+ e.title = title
130
+ e.fulltext = fulltext
131
+ e.date_posted = date_posted
132
+ e.posted_by = posted_by
133
+ e.votes = votes
134
+ e.comments = comments
135
+ e.voting_on = voting_on
136
+ end
137
+ end
138
+
139
+ def get_attrs_discussion entity
140
+
141
+ title = @doc.css('.title a')[0].content
142
+
143
+ fulltext = @doc.css('td')[10].content
144
+
145
+ subtext = @doc.css('.subtext')[0]
146
+
147
+ date_posted = get_date subtext
148
+
149
+ posted_by = get_posted_by subtext
150
+
151
+ votes = get_num_votes subtext
152
+
153
+ comments = get_comments
154
+
155
+ entity.add_attrs do |e|
156
+ e.title = title
157
+ e.fulltext = fulltext
158
+ e.date_posted = date_posted
159
+ e.posted_by = posted_by
160
+ e.comments = comments
161
+ e.votes = votes
162
+ end
163
+ end
164
+
165
+
166
+ def get_voting_on
167
+ voting_on = []
168
+ trs = @doc.css('tr > td + td > table tr')
169
+
170
+ trs.each_with_index do |tr, index|
171
+ if index % 3 == 0
172
+ voting_on.push []
173
+ voting_on[(index / 3).floor].push tr.content
174
+ elsif index % 3 == 1
175
+ voting_on[(index / 3).floor].push tr.content.gsub(/\spoints/, '')
176
+ end
177
+ end
178
+
179
+ return voting_on
180
+
181
+ end
182
+
183
+ def get_comments
184
+ comments = []
185
+
186
+ full_comments = @doc.css('td > img[width="0"]').xpath("..").xpath("..").css('.default')
187
+
188
+ full_comments.each do |comment|
189
+ comment_id = comment.css('span a')[1]['href'].gsub("item?id=", '')
190
+ comments.push comment_id
191
+ end
192
+
193
+ # TODO: Follow the [More] link
194
+ #
195
+ # $tr = $('tr')
196
+ # $($tr[$tr.length - 3]).find('a').eq(0).attr('href')
197
+
198
+ comments = get_comments_more doc, comments
199
+
200
+ return comments
201
+ end
202
+
203
+ def get_comments_more doc, comments
204
+ trs = doc.css('tr .title a')
205
+
206
+ if trs.length == 0
207
+ return comments
208
+ end
209
+
210
+ url = trs.last['href']
211
+
212
+ url_regex = /\/x\?fnid=(.*)/
213
+
214
+ match = url_regex.match(url)
215
+
216
+ if match == nil
217
+ return comments
218
+ end
219
+
220
+ req = Request.new match[1], true
221
+ parser = Parser.new req
222
+
223
+ comments = comments + parser.get_comments
224
+
225
+ return comments
226
+ end
227
+
228
+ def get_num_votes subtext
229
+ return subtext.css('span')[0].content.to_i
230
+ end
231
+
232
+ def get_posted_by subtext
233
+ return subtext.css('a')[0].content
234
+ end
235
+
236
+ def get_date subtext
237
+ date_regex = /.*\s(.*\s.*\sago)/
238
+ ago = date_regex.match(subtext.content)[1]
239
+ date_posted = Chronic.parse(ago).to_s
240
+
241
+ return date_posted
242
+ end
243
+
244
+ end
245
+
246
+ end
@@ -0,0 +1,34 @@
1
+ module HN2JSON
2
+
3
+ class Request
4
+ attr_accessor :html
5
+
6
+ def initialize id, more_page=false
7
+ @base_url = "http://news.ycombinator.com/item?id="
8
+ @complete_url = @base_url + id.to_s
9
+
10
+ if more_page
11
+ @complete_url = "http://news.ycombinator.com/x?fnid=" + id
12
+ end
13
+
14
+ request_page
15
+ end
16
+
17
+ private
18
+
19
+ def request_page
20
+ begin
21
+ @html = RestClient.get @complete_url
22
+ rescue
23
+ raise RequestError, "there was an error requesting the page, check your connection"
24
+ end
25
+
26
+ if @html == "No such item." || @html == "Unknown."
27
+ id = @complete_url.gsub(/^.*id\=/, '')
28
+ raise RequestError, "no such item or id, #{id}"
29
+ end
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,4 @@
1
+ module HN2JSON
2
+ # Public: String current version of HN2JSON
3
+ VERSION = '0.0.4'
4
+ end
data/lib/hn2json.rb ADDED
@@ -0,0 +1,52 @@
1
+ require 'rest-client'
2
+ require 'nokogiri'
3
+ require 'chronic'
4
+
5
+
6
+ # Public: Interface to HackerNews (news.ycombinator.com)
7
+ #
8
+ # Examples
9
+ #
10
+ # HN2JSON.find 123456
11
+ # # => HN2JSON::Entity:0xffffffffffffff
12
+ module HN2JSON
13
+ extend HN2JSON
14
+
15
+ autoload :Request, 'hn2json/request'
16
+
17
+ autoload :Parser, 'hn2json/parser'
18
+
19
+ autoload :Entity, 'hn2json/entity'
20
+
21
+ autoload :InvalidIdError, 'hn2json/exceptions'
22
+ autoload :RequestError, 'hn2json/exceptions'
23
+ autoload :ParseError, 'hn2json/exceptions'
24
+
25
+ autoload :VERSION, 'hn2json/version'
26
+
27
+ # Public: Make a request to HackerNews and extract retrieved data.
28
+ #
29
+ # id - The ID of the page to request
30
+ #
31
+ #
32
+ # Returns the fetched HackerNews Entity.
33
+ def find id
34
+ check_for_falsy_id id
35
+ Entity.new id
36
+ end
37
+
38
+ private
39
+
40
+ # Internal: Check if a given ID is valid to be requested.
41
+ #
42
+ # id - The ID to check.
43
+ #
44
+ #
45
+ # Returns nothing.
46
+ # Raises HN2JSON::InvalidIdError if the ID is invalid.
47
+ def check_for_falsy_id id
48
+ if id.class != Fixnum || id < 1
49
+ raise InvalidIdError, "id must be > 0 and a Fixnum, you passed #{id}"
50
+ end
51
+ end
52
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hn2json
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.4
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Joseph Adams
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rest-client
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 1.6.7
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 1.6.7
30
+ - !ruby/object:Gem::Dependency
31
+ name: nokogiri
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 1.5.5
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.5.5
46
+ - !ruby/object:Gem::Dependency
47
+ name: chronic
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 0.7.0
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.7.0
62
+ description: ! " HN2JSON is a developer frendly interface to HackerNews.\n It
63
+ provides the functionality to retrieve any HN content\n page in stringified JSON
64
+ or a Ruby object.\n"
65
+ email: whitegolem@gmail.com
66
+ executables: []
67
+ extensions: []
68
+ extra_rdoc_files: []
69
+ files:
70
+ - README.md
71
+ - Rakefile
72
+ - LICENSE
73
+ - lib/hn2json/entity.rb
74
+ - lib/hn2json/exceptions.rb
75
+ - lib/hn2json/parser.rb
76
+ - lib/hn2json/request.rb
77
+ - lib/hn2json/version.rb
78
+ - lib/hn2json.rb
79
+ homepage: http://github.com/jcla1/HN2JSON
80
+ licenses: []
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ none: false
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ none: false
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubyforge_project:
99
+ rubygems_version: 1.8.24
100
+ signing_key:
101
+ specification_version: 3
102
+ summary: A Ruby interface to HackerNews
103
+ test_files: []