hacker-curse 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +37 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/README.md +89 -0
- data/Rakefile +2 -0
- data/bin/corvus +2320 -0
- data/bin/hacker-comments.rb +182 -0
- data/bin/hacker-tsv.rb +144 -0
- data/bin/hacker-yml.rb +100 -0
- data/bin/hacker.rb +68 -0
- data/bin/hacker.sh +90 -0
- data/bin/redford +946 -0
- data/hacker-curse.gemspec +24 -0
- data/lib/hacker/curse.rb +7 -0
- data/lib/hacker/curse/abstractsiteparser.rb +353 -0
- data/lib/hacker/curse/hackernewsparser.rb +226 -0
- data/lib/hacker/curse/redditnewsparser.rb +241 -0
- data/lib/hacker/curse/version.rb +5 -0
- data/redford.yml +68 -0
- metadata +112 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'hacker/curse/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "hacker-curse"
|
8
|
+
spec.version = Hacker::Curse::VERSION
|
9
|
+
spec.authors = ["kepler"]
|
10
|
+
spec.email = ["githubkepler.50s@gishpuppy.com"]
|
11
|
+
spec.summary = %q{View hacker news and reddit articles on terminal using ncurses}
|
12
|
+
spec.description = %q{View Hacker News and reddit articles on terminal using ncurses}
|
13
|
+
spec.homepage = "https://github.com/mare-imbrium/hacker-curse"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
22
|
+
spec.add_development_dependency "rake", ">= 0.9.6"
|
23
|
+
spec.add_runtime_dependency "canis", ">= 0.0.3", ">= 0.0.3"
|
24
|
+
end
|
data/lib/hacker/curse.rb
ADDED
@@ -0,0 +1,353 @@
|
|
1
|
+
#!/usr/bin/env ruby -w
|
2
|
+
#
|
3
|
+
# Fetch hacker news front page entries into a hash.
|
4
|
+
# TODO : get next page. Nexts is /news2 but after that it changes
|
5
|
+
# TODO : 2014-07-27 - 12:42 put items in hash in an order, so printers can use first 4 cols for long listing
|
6
|
+
# title, age_text, comment_count, points, article_url, comments_url, age, submitter, submitter_url
|
7
|
+
#
|
8
|
+
require 'open-uri'
|
9
|
+
require 'nokogiri'
|
10
|
+
|
11
|
+
# this is from hacker news itself
|
12
|
+
#file = "news.html"
|
13
|
+
|
14
|
+
module HackerCurse
|
15
|
+
class ForumPage
|
16
|
+
include Enumerable
|
17
|
+
# new newest hot rising etc
|
18
|
+
attr_accessor :url
|
19
|
+
attr_accessor :next_url
|
20
|
+
attr_accessor :create_date
|
21
|
+
attr_accessor :subforum
|
22
|
+
# array of article objects
|
23
|
+
attr_accessor :articles
|
24
|
+
def each
|
25
|
+
@articles.each do |e| yield(e) ; end
|
26
|
+
end
|
27
|
+
alias :each_article :each
|
28
|
+
def merge_page page
|
29
|
+
self.next_url = page.next_url
|
30
|
+
self.articles.push(*page.articles)
|
31
|
+
self
|
32
|
+
end
|
33
|
+
end
|
34
|
+
class ForumArticle
|
35
|
+
attr_accessor :title
|
36
|
+
attr_accessor :article_url
|
37
|
+
attr_accessor :points
|
38
|
+
attr_accessor :comment_count
|
39
|
+
attr_accessor :comments_url
|
40
|
+
attr_accessor :age_text
|
41
|
+
attr_accessor :age
|
42
|
+
attr_accessor :submitter
|
43
|
+
attr_accessor :submitter_url
|
44
|
+
attr_accessor :domain
|
45
|
+
attr_accessor :domain_url
|
46
|
+
# byline is dump of text on top containing all the info on points, # of comments, nn hours aga
|
47
|
+
attr_accessor :byline
|
48
|
+
attr_accessor :parent
|
49
|
+
attr_writer :comments
|
50
|
+
attr_reader :hash
|
51
|
+
def initialize h
|
52
|
+
@comments = nil
|
53
|
+
@hash = h
|
54
|
+
[:title, :article_url, :points, :comment_count, :comments_url, :age_text, :age,
|
55
|
+
:submitter, :submitter_url, :domain, :domain_url, :byline].each do |sym|
|
56
|
+
instance_variable_set("@#{sym.to_s}", h[sym]) if h.key? sym
|
57
|
+
end
|
58
|
+
if h.key? :comments
|
59
|
+
c = h[:comments]
|
60
|
+
@comments = Array.new
|
61
|
+
c.each do |h|
|
62
|
+
fc = ForumComment.new h
|
63
|
+
@comments << fc
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def comments
|
69
|
+
@comments || retrieve_comments(@comments_url)
|
70
|
+
end
|
71
|
+
def each
|
72
|
+
comments.each do |e| yield(e) ; end
|
73
|
+
end
|
74
|
+
def retrieve_comments url
|
75
|
+
raise "Parent must be set in order to retrieve comments " unless @parent
|
76
|
+
@parent._retrieve_comments url
|
77
|
+
end
|
78
|
+
alias :each_comment :each
|
79
|
+
def [](sym)
|
80
|
+
@hash[sym]
|
81
|
+
end
|
82
|
+
def keys
|
83
|
+
@hash.keys
|
84
|
+
end
|
85
|
+
def values
|
86
|
+
@hash.values
|
87
|
+
end
|
88
|
+
end
|
89
|
+
class ForumComment
|
90
|
+
attr_accessor :submitter, :submitter_url
|
91
|
+
attr_accessor :age, :age_text, :points, :head
|
92
|
+
attr_accessor :comment_text
|
93
|
+
attr_accessor :comment_url
|
94
|
+
attr_reader :hash
|
95
|
+
def initialize h
|
96
|
+
|
97
|
+
@hash = h
|
98
|
+
[:points, :comment_url, :age_text, :age,
|
99
|
+
:submitter, :submitter_url, :comment_text, :head].each do |sym|
|
100
|
+
instance_variable_set("@#{sym.to_s}", h[sym])
|
101
|
+
end
|
102
|
+
end
|
103
|
+
def [](sym)
|
104
|
+
@hash[sym]
|
105
|
+
end
|
106
|
+
def keys
|
107
|
+
@hash.keys
|
108
|
+
end
|
109
|
+
def values
|
110
|
+
@hash.values
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
#
|
115
|
+
# rn = RNParser.new [url]
|
116
|
+
# rn.subreddit = "ruby"
|
117
|
+
# resultset = rn.get_next_page :page => prevresultset, :number => 5
|
118
|
+
# resultset.each do |art|
|
119
|
+
# art.title, art.points
|
120
|
+
# art.comments
|
121
|
+
# end
|
122
|
+
#
|
123
|
+
# hn = HNewsParser @options
|
124
|
+
# hn.subxxx = "news" / "newest"
|
125
|
+
#
|
126
|
+
# redditnews.rb -s ruby --pages 2
|
127
|
+
# hackernews.rb -s newest --pages 2 -d '|'
|
128
|
+
#
|
129
|
+
|
130
|
+
class AbstractSiteParser
|
131
|
+
attr_reader :more_url
|
132
|
+
attr_accessor :host
|
133
|
+
attr_accessor :num_pages
|
134
|
+
attr_accessor :subforum
|
135
|
+
# should the html be saved
|
136
|
+
attr_accessor :save_html
|
137
|
+
attr_accessor :htmloutfile
|
138
|
+
#HOST = "https://news.ycombinator.com"
|
139
|
+
def initialize options={}
|
140
|
+
@options = options
|
141
|
+
@url = @options[:url]
|
142
|
+
@save_html = @options[:save_html]
|
143
|
+
@htmloutfile = @options[:htmloutfile]
|
144
|
+
@num_pages = @options[:num_pages] || 1
|
145
|
+
@more_url = nil
|
146
|
+
#puts "initialize: url is #{@url} "
|
147
|
+
end
|
148
|
+
def get_first_page
|
149
|
+
#@arr = to_hash @url
|
150
|
+
page = _retrieve_page @url
|
151
|
+
end
|
152
|
+
def get_next_page opts={}
|
153
|
+
page = opts[:page]
|
154
|
+
num_pages = opts[:num_pages] || @num_pages
|
155
|
+
num_pages ||= 1
|
156
|
+
u = @more_url || @url
|
157
|
+
if page
|
158
|
+
u = page.next_url
|
159
|
+
end
|
160
|
+
pages = nil
|
161
|
+
num_pages.times do |i|
|
162
|
+
page = _retrieve_page u
|
163
|
+
if pages.nil?
|
164
|
+
pages = page
|
165
|
+
else
|
166
|
+
pages.merge_page page
|
167
|
+
end
|
168
|
+
u = page.next_url
|
169
|
+
break unless u # sometimes there is no next
|
170
|
+
@more_url = u
|
171
|
+
end
|
172
|
+
return pages
|
173
|
+
end
|
174
|
+
alias :get_next :get_next_page
|
175
|
+
def _retrieve_page url
|
176
|
+
raise "must be implemented by concrete class"
|
177
|
+
end
|
178
|
+
# write as yml, this doesn't work if multiple pages since we call x times
|
179
|
+
# so previous is overwritten
|
180
|
+
# This should be called with final class
|
181
|
+
def to_yml outfile, arr = @arr
|
182
|
+
require 'yaml'
|
183
|
+
# cannot just convert / to __ in filename since path gets converted too
|
184
|
+
#if outfile.index("/")
|
185
|
+
#outfile = outfile.gsub("/","__")
|
186
|
+
#end
|
187
|
+
File.open(outfile, 'w' ) do |f|
|
188
|
+
f << YAML::dump(arr)
|
189
|
+
end
|
190
|
+
end
|
191
|
+
# after called get_next_page, one may pass its return value
|
192
|
+
# to this method to convert it into an array of hashes and store it as a yml file
|
193
|
+
# It's a bit silly, first we break the hash down into this structure
|
194
|
+
# and then deconstruct the whole thing.
|
195
|
+
def save_page_as_yml outputfile, page
|
196
|
+
h = {}
|
197
|
+
h[:url] = page.url
|
198
|
+
h[:next_url] = page.next_url
|
199
|
+
h[:subforum] = page.subforum
|
200
|
+
h[:create_date] = page.create_date
|
201
|
+
articles = []
|
202
|
+
page.each do |a| articles << a.hash; end
|
203
|
+
|
204
|
+
h[:articles] = articles
|
205
|
+
|
206
|
+
to_yml outputfile, h
|
207
|
+
end
|
208
|
+
# retrieves the comments for a url and stores in outputfile in YML format
|
209
|
+
def save_comments_as_yml outputfile, url
|
210
|
+
pages = _retrieve_comments url
|
211
|
+
if pages
|
212
|
+
to_yml outputfile, pages.hash
|
213
|
+
end
|
214
|
+
end
|
215
|
+
# returns nokogiri html doc and writes html is required.
|
216
|
+
def get_doc_for_url url
|
217
|
+
#puts "get_doc #{url} "
|
218
|
+
out = open(url)
|
219
|
+
doc = Nokogiri::HTML(out)
|
220
|
+
if @save_html
|
221
|
+
subforum = @subforum || "unknown"
|
222
|
+
outfile = @htmloutfile || "#{subforum}.html"
|
223
|
+
#if !File.exists? url
|
224
|
+
out.rewind
|
225
|
+
File.open(outfile, 'w') {|f| f.write(out.read) }
|
226
|
+
#end
|
227
|
+
end
|
228
|
+
return doc
|
229
|
+
end
|
230
|
+
# this is a test method so we don't keep hitting HN while testing out and getting IP blocked.
|
231
|
+
def load_from_yml filename="hn.yml"
|
232
|
+
@arr = YAML::load( File.open( filename ) )
|
233
|
+
next_url = @arr.last[:article_url]
|
234
|
+
unless next_url.index("http")
|
235
|
+
next_url = @host + "/" + next_url
|
236
|
+
end
|
237
|
+
@more_url = next_url
|
238
|
+
end
|
239
|
+
def _retrieve_comments url
|
240
|
+
raise "Must be implemented by concrete class "
|
241
|
+
end
|
242
|
+
public
|
243
|
+
def get_comments_url index
|
244
|
+
arr = @arr
|
245
|
+
entry = arr[index]
|
246
|
+
if entry
|
247
|
+
if entry.key? :comments_url
|
248
|
+
return entry[:comments_url]
|
249
|
+
end
|
250
|
+
end
|
251
|
+
return nil
|
252
|
+
end
|
253
|
+
public
|
254
|
+
def get_comments index
|
255
|
+
url = get_comments_url index
|
256
|
+
if url
|
257
|
+
#puts url
|
258
|
+
comments = convert_comment_url url
|
259
|
+
return comments
|
260
|
+
#else
|
261
|
+
#puts "Sorry no url for #{index} "
|
262
|
+
end
|
263
|
+
return []
|
264
|
+
end
|
265
|
+
alias :get_comments_for_link :get_comments
|
266
|
+
def human_age_to_unix age_text
|
267
|
+
i = age_text.to_i
|
268
|
+
ff=1
|
269
|
+
if age_text.index("hour")
|
270
|
+
i *= ff*60*60
|
271
|
+
elsif age_text.index("second")
|
272
|
+
i *= ff
|
273
|
+
elsif age_text.index("minute")
|
274
|
+
i *= ff*60
|
275
|
+
elsif age_text.index("day")
|
276
|
+
i *= ff*60*60*24
|
277
|
+
elsif age_text.index("month")
|
278
|
+
i *= ff*60*60*24*30
|
279
|
+
elsif age_text.index("week")
|
280
|
+
i *= ff*60*60*24*7
|
281
|
+
elsif age_text.index("year")
|
282
|
+
i *= ff*60*60*24*365
|
283
|
+
else
|
284
|
+
#raise "don't know how to convert #{age_text} "
|
285
|
+
return 0
|
286
|
+
end
|
287
|
+
return (Time.now.to_i - i)
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
291
|
+
include HackerCurse
|
292
|
+
|
293
|
+
|
294
|
+
if __FILE__ == $0
|
295
|
+
#rn = HackerNewsParser.new :url => "hackernews.html"
|
296
|
+
rn = RedditNewsParser.new :url => "reddit-prog.html"
|
297
|
+
|
298
|
+
page = rn.get_next_page # [page if supplied, take page.next_url, otherwise store??]
|
299
|
+
puts "For each article :::"
|
300
|
+
page.each do |art|
|
301
|
+
puts art.title, art.points, art.age_text, art.age, Time.at(art.age)
|
302
|
+
end # each_article
|
303
|
+
art = page.articles.first
|
304
|
+
puts "PRINTING comments "
|
305
|
+
art.each_comment do |c|
|
306
|
+
puts
|
307
|
+
puts " ======"
|
308
|
+
puts c.head
|
309
|
+
s = nil
|
310
|
+
if c.age
|
311
|
+
s = Time.at(c.age)
|
312
|
+
end
|
313
|
+
puts " #{c.age_text} | #{c.submitter} | #{c.age} . #{s} "
|
314
|
+
puts c.comment_text
|
315
|
+
end
|
316
|
+
|
317
|
+
exit
|
318
|
+
articles = page.articles
|
319
|
+
co = articles.first.comments
|
320
|
+
puts "PRINTING comments "
|
321
|
+
puts co[:title], co[:subtext]
|
322
|
+
comments = co[:comments]
|
323
|
+
comments.each_with_index do |c,i|
|
324
|
+
puts "======= #{c[:head]} : "
|
325
|
+
puts " - #{c[:head]} : "
|
326
|
+
puts " #{c[:comment]} "
|
327
|
+
puts " "
|
328
|
+
end
|
329
|
+
|
330
|
+
#comments.each_with_index do |c,i|
|
331
|
+
#puts " #{i}: #{c} "
|
332
|
+
#end
|
333
|
+
exit
|
334
|
+
art.each_comment do |cc|
|
335
|
+
end
|
336
|
+
#rn.next_url = page.next_url
|
337
|
+
rn.set_next_url(page)
|
338
|
+
#arr = rn.convert_comment_url "hn_comments.html"
|
339
|
+
#rn.to_yml "hn_comments.yml", arr
|
340
|
+
|
341
|
+
|
342
|
+
arr = rn.get_next_page
|
343
|
+
rn.to_yml "hn.yml"
|
344
|
+
puts "getting comments for link 1"
|
345
|
+
comments = rn.get_comments_for_link 1
|
346
|
+
if comments.empty?
|
347
|
+
comments = rn.get_comments_for_link 9
|
348
|
+
end
|
349
|
+
rn.to_yml "hn-comments.yml", comments
|
350
|
+
puts "getting next page"
|
351
|
+
arr1 = rn.get_next_page
|
352
|
+
rn.to_yml "hn-1.yml", arr1
|
353
|
+
end
|
@@ -0,0 +1,226 @@
|
|
1
|
+
require 'hacker/curse/abstractsiteparser'
|
2
|
+
|
3
|
+
module HackerCurse
|
4
|
+
|
5
|
+
class HackerNewsParser < AbstractSiteParser
|
6
|
+
def initialize config={}
|
7
|
+
@host = config[:host] || "https://news.ycombinator.com"
|
8
|
+
subforum = config[:subforum] || "news"
|
9
|
+
_url="#{@host}/#{subforum}"
|
10
|
+
@subforum = subforum
|
11
|
+
config[:url] ||= _url
|
12
|
+
super config
|
13
|
+
end
|
14
|
+
def _retrieve_page url
|
15
|
+
#puts "got url #{url} "
|
16
|
+
raise "url should be string" unless url.is_a? String
|
17
|
+
arr = to_hash url
|
18
|
+
page = hash_to_class arr
|
19
|
+
#to_yml "#{@subforum}.yml", arr
|
20
|
+
return page
|
21
|
+
end
|
22
|
+
# currently returns a Hash. containing various entries relating to the main article
|
23
|
+
# which can be avoiced.
|
24
|
+
# Contains an array :comments which contains hashes, :head contains text of head, :comment contains
|
25
|
+
# text of comment, and then there are entries for submitter.
|
26
|
+
# hash[:comments].each do |e| e[:comment] ; end
|
27
|
+
# @return Array of ForumComment objects.
|
28
|
+
# pages.each do |co| puts co.comment_text, co.head; end
|
29
|
+
def _retrieve_comments url
|
30
|
+
arr = to_hash_comment url
|
31
|
+
# TODO break head into points age etc
|
32
|
+
pages = hash_to_comment_class arr
|
33
|
+
return pages
|
34
|
+
end
|
35
|
+
def hash_to_comment_class arr
|
36
|
+
page = ForumArticle.new arr
|
37
|
+
return page
|
38
|
+
end
|
39
|
+
def oldhash_to_comment_class arr
|
40
|
+
co = arr[:comments]
|
41
|
+
pages = Array.new
|
42
|
+
co.each do |h|
|
43
|
+
page = ForumComment.new h
|
44
|
+
pages << page
|
45
|
+
end
|
46
|
+
return pages
|
47
|
+
end
|
48
|
+
def to_hash_comment url
|
49
|
+
# for testing i may send in a saved file, so i don't keep hitting HN
|
50
|
+
if !File.exists? url
|
51
|
+
unless url.index("http")
|
52
|
+
url = @host + "/" + url
|
53
|
+
end
|
54
|
+
end
|
55
|
+
page = Nokogiri::HTML(open(url))
|
56
|
+
h = {}
|
57
|
+
title = page.css("td.title")
|
58
|
+
article_url = title.css("a").first["href"]
|
59
|
+
h[:title] = title.text
|
60
|
+
h[:article_url] = article_url
|
61
|
+
|
62
|
+
subtext = page.css("td.subtext")
|
63
|
+
h[:byline] = subtext.text
|
64
|
+
# TODO extract age_text
|
65
|
+
h[:age_text] = subtext.text.scan(/\d+ \w+ ago/).first
|
66
|
+
score = subtext.css("span").text
|
67
|
+
h[:points] = score
|
68
|
+
subtext.css("a").each_with_index do |e, i|
|
69
|
+
link = e["href"]
|
70
|
+
text = e.text
|
71
|
+
if link.index("user") == 0
|
72
|
+
h[:submitter] = text
|
73
|
+
h[:submitter_url] = link
|
74
|
+
elsif link.index("item") == 0
|
75
|
+
h[:comment_count] = text
|
76
|
+
h[:comments_url] = link
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# need to get points
|
81
|
+
comheads = page.css("span.comhead") # .collect do |e| e.text ; end
|
82
|
+
comments = page.css("span.comment").collect do |e| e.text ; end
|
83
|
+
comheads.delete(comheads.first)
|
84
|
+
# array of comments
|
85
|
+
carr = Array.new
|
86
|
+
comheads.zip(comments) do |head,c|
|
87
|
+
hh={}; hh[:head] = head.text;
|
88
|
+
#$stderr.puts "head:: #{head.text}"
|
89
|
+
m = head.text.scan(/\d+ \w+ ago/)
|
90
|
+
if !m.empty?
|
91
|
+
hh[:age_text] = m.first.scan(/\d+ \w/).first.rjust(4)
|
92
|
+
hh[:age] = human_age_to_unix(m.first)
|
93
|
+
head.css("a").each_with_index do |e, i|
|
94
|
+
link = e["href"]
|
95
|
+
text = e.text
|
96
|
+
if link.index("user") == 0
|
97
|
+
hh[:submitter] = text
|
98
|
+
hh[:submitter_url] = link
|
99
|
+
elsif link.index("item") == 0
|
100
|
+
hh[:text] = text
|
101
|
+
hh[:comment_url] = link
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
hh[:comment_text]=c;
|
106
|
+
carr << hh
|
107
|
+
end
|
108
|
+
|
109
|
+
h[:comments] = carr
|
110
|
+
return h
|
111
|
+
end
|
112
|
+
def hash_to_class h
|
113
|
+
p = ForumPage.new
|
114
|
+
p.url = h[:url]
|
115
|
+
p.next_url = h[:next_url]
|
116
|
+
p.create_date = h[:create_date]
|
117
|
+
p.subforum = h[:subforum]
|
118
|
+
art = h[:articles]
|
119
|
+
arts = []
|
120
|
+
art.each do |a|
|
121
|
+
fa = ForumArticle.new a
|
122
|
+
fa.parent = self
|
123
|
+
arts << fa
|
124
|
+
end
|
125
|
+
p.articles = arts
|
126
|
+
return p
|
127
|
+
end
|
128
|
+
# convert the front page to a hash
|
129
|
+
def to_hash url
|
130
|
+
doc = get_doc_for_url url
|
131
|
+
count = 0
|
132
|
+
page = {}
|
133
|
+
page[:url] = url
|
134
|
+
now = Time.now
|
135
|
+
page[:create_date_seconds] = now.to_i
|
136
|
+
page[:create_date] = now
|
137
|
+
page[:subforum] = @subforum
|
138
|
+
|
139
|
+
arr = Array.new
|
140
|
+
h = {}
|
141
|
+
links = doc.xpath("//table/tr/td/table/tr")
|
142
|
+
links.each_with_index do |li, i|
|
143
|
+
x = li.css("td.title")
|
144
|
+
if !x.empty?
|
145
|
+
#puts " ---- title ----- #{x.count} "
|
146
|
+
count = x[0].text
|
147
|
+
#puts count
|
148
|
+
if x.count < 2
|
149
|
+
# this block is for the next_url
|
150
|
+
article_url = x[0].css("a")[0]["href"] # link url
|
151
|
+
#puts article_url
|
152
|
+
h = {}
|
153
|
+
h[:title] = count
|
154
|
+
h[:article_url] = article_url
|
155
|
+
more = count
|
156
|
+
more_url = "#{@host}/#{article_url}"
|
157
|
+
#arr << h
|
158
|
+
page[:next_url] = more_url
|
159
|
+
#puts li
|
160
|
+
end
|
161
|
+
break if x.count < 2
|
162
|
+
|
163
|
+
# actual article url
|
164
|
+
title = x[1].css("a")[0].text # title
|
165
|
+
article_url = x[1].css("a")[0]["href"] # link url
|
166
|
+
#puts article_url
|
167
|
+
#puts title
|
168
|
+
h = {}
|
169
|
+
#h[:number] = count
|
170
|
+
h[:title] = title
|
171
|
+
# ask option does not have hostname since it is relative to HN
|
172
|
+
if article_url.index("http") != 0
|
173
|
+
article_url = "#{@host}/#{article_url}"
|
174
|
+
end
|
175
|
+
|
176
|
+
h[:article_url] = article_url
|
177
|
+
arr << h
|
178
|
+
else
|
179
|
+
x = li.css("td.subtext")
|
180
|
+
if !x.empty?
|
181
|
+
fulltext = x.text
|
182
|
+
#puts " ---- subtext ----- (#{fulltext})"
|
183
|
+
submitter = nil
|
184
|
+
submitter_url = nil
|
185
|
+
comment = nil
|
186
|
+
comments_url = nil
|
187
|
+
t = x.css("a")
|
188
|
+
t.each_with_index do |tt, ii|
|
189
|
+
case ii
|
190
|
+
when 0
|
191
|
+
submitter = tt.text
|
192
|
+
submitter_url = tt["href"]
|
193
|
+
when 1
|
194
|
+
comment = tt.text
|
195
|
+
comments_url = tt["href"]
|
196
|
+
comments_url = "#{@host}/#{comments_url}"
|
197
|
+
end
|
198
|
+
end
|
199
|
+
points = x.css("span").text rescue ""
|
200
|
+
#puts submitter
|
201
|
+
#puts submitter_url
|
202
|
+
#puts comment
|
203
|
+
#puts comments_url
|
204
|
+
#puts points
|
205
|
+
h[:submitter] = submitter
|
206
|
+
h[:submitter_url] = submitter_url
|
207
|
+
h[:comment_count] = comment.to_i.to_s.rjust(4)
|
208
|
+
h[:comments_url] = comments_url
|
209
|
+
h[:points] = points.to_i.to_s.rjust(4)
|
210
|
+
m = fulltext.scan(/\d+ \w+ ago/)
|
211
|
+
if m
|
212
|
+
#h[:age_text] = m.first
|
213
|
+
h[:age_text] = m.first.scan(/\d+ \w/).first.rjust(4)
|
214
|
+
h[:age] = human_age_to_unix(m.first)
|
215
|
+
end
|
216
|
+
#puts "fulltext: #{fulltext} "
|
217
|
+
h[:byline] = fulltext
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
#return arr
|
222
|
+
page[:articles] = arr
|
223
|
+
return page
|
224
|
+
end
|
225
|
+
end # class
|
226
|
+
end # module
|