hacker-curse 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +37 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/README.md +89 -0
- data/Rakefile +2 -0
- data/bin/corvus +2320 -0
- data/bin/hacker-comments.rb +182 -0
- data/bin/hacker-tsv.rb +144 -0
- data/bin/hacker-yml.rb +100 -0
- data/bin/hacker.rb +68 -0
- data/bin/hacker.sh +90 -0
- data/bin/redford +946 -0
- data/hacker-curse.gemspec +24 -0
- data/lib/hacker/curse.rb +7 -0
- data/lib/hacker/curse/abstractsiteparser.rb +353 -0
- data/lib/hacker/curse/hackernewsparser.rb +226 -0
- data/lib/hacker/curse/redditnewsparser.rb +241 -0
- data/lib/hacker/curse/version.rb +5 -0
- data/redford.yml +68 -0
- metadata +112 -0
@@ -0,0 +1,241 @@
|
|
1
|
+
require 'hacker/curse/abstractsiteparser'
|
2
|
+
|
3
|
+
module HackerCurse
|
4
|
+
|
5
|
+
class RedditNewsParser < AbstractSiteParser
|
6
|
+
def initialize config={}
|
7
|
+
@host = config[:host] || "http://www.reddit.com/"
|
8
|
+
subforum = config[:subforum] || "unknown"
|
9
|
+
_url="#{@host}/r/#{subforum}/.mobile"
|
10
|
+
config[:url] ||= _url
|
11
|
+
@subforum = subforum
|
12
|
+
super config
|
13
|
+
end
|
14
|
+
def _retrieve_page url
|
15
|
+
$stderr.puts "_retrieve_page got url #{url} "
|
16
|
+
raise "url should be string" unless url.is_a? String
|
17
|
+
arr = to_hash url
|
18
|
+
page = hash_to_class arr
|
19
|
+
#to_yml "#{@subforum}OLD.yml", arr
|
20
|
+
return page
|
21
|
+
end
|
22
|
+
# reddit
|
23
|
+
# @return array of ForumComment objects
|
24
|
+
# For each, you may retrieve +hash+ or individual items such as comment_text, points, age, age_text, submitter, head
|
25
|
+
def _retrieve_comments url
|
26
|
+
arr = to_hash_comment url
|
27
|
+
pages = hash_to_comment_class arr
|
28
|
+
return pages
|
29
|
+
end
|
30
|
+
# reddit parse to hash containing :url, :mext_url and :articles (an array of hashes for each article)
|
31
|
+
def to_hash url
|
32
|
+
page = {}
|
33
|
+
arr = Array.new
|
34
|
+
doc = get_doc_for_url url
|
35
|
+
page[:url] = url
|
36
|
+
now = Time.now
|
37
|
+
page[:create_date_seconds] = now.to_i
|
38
|
+
page[:create_date] = now
|
39
|
+
page[:subforum] = @subforum
|
40
|
+
#filename = "r.#{subr}.yml"
|
41
|
+
links = doc.css("li div.link")
|
42
|
+
links.each do |li|
|
43
|
+
h = {}
|
44
|
+
e = li.css("a.title")
|
45
|
+
if !e.empty?
|
46
|
+
e = e.first
|
47
|
+
h[:title] = e.text
|
48
|
+
h[:article_url] = e["href"]
|
49
|
+
end
|
50
|
+
e = li.css("a.domain")
|
51
|
+
if !e.empty?
|
52
|
+
e = e.first
|
53
|
+
h[:domain] = e.text
|
54
|
+
h[:domain_url] = e["href"]
|
55
|
+
end
|
56
|
+
e = li.css("a.author")
|
57
|
+
if !e.empty?
|
58
|
+
e = e.first
|
59
|
+
h[:submitter] = e.text
|
60
|
+
h[:submitter_url] = e["href"]
|
61
|
+
end
|
62
|
+
e = li.css("span.buttons > a")
|
63
|
+
if !e.empty?
|
64
|
+
e = e.first
|
65
|
+
#h[:comment_count] = e.text.to_i
|
66
|
+
h[:comment_count] = e.text.to_i.to_s.rjust(4)
|
67
|
+
h[:comments_url] = e["href"]
|
68
|
+
else
|
69
|
+
h[:comment_count] = " 0"
|
70
|
+
h[:comments_url] = ""
|
71
|
+
end
|
72
|
+
byline = li.css("p.byline").text
|
73
|
+
h[:byline] = byline
|
74
|
+
# 2014-08-14 - 13:34 in some cases the byline just says "17 minutes ago" with no BAR or "by"
|
75
|
+
# In one case in 'science' the name itself had BARs to the parse failed
|
76
|
+
# In another case there was no comments, so parts[2] was nil !!
|
77
|
+
parts = byline.split("|")
|
78
|
+
age = points = nil
|
79
|
+
parts.each do |ppp|
|
80
|
+
if ppp.index("points")
|
81
|
+
points = ppp.strip
|
82
|
+
elsif ppp.index("comments")
|
83
|
+
# we've taken it already
|
84
|
+
elsif ppp.index(" ago ")
|
85
|
+
age = ppp.split("by").first.strip
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
#age = parts.last.split("by").first.strip
|
91
|
+
|
92
|
+
#age = parts[2].split("by").first.strip
|
93
|
+
if age
|
94
|
+
if age.scan(/\d+ \w/).first.nil?
|
95
|
+
raise "Nil in age: #{age} , parts = #{parts}"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
h[:age_text]= age.scan(/\d+ \w/).first.rjust(4) if age
|
99
|
+
#h[:age_text]= age
|
100
|
+
h[:age] = human_age_to_unix(age) if age
|
101
|
+
#h[:points]= points.to_i
|
102
|
+
h[:points]= points.to_i.to_s.rjust(4)
|
103
|
+
#puts points
|
104
|
+
#puts age
|
105
|
+
arr << h
|
106
|
+
end
|
107
|
+
# some cases like rising do not have next prev
|
108
|
+
#next_prev_url= doc.css("p.nextprev").first.css("a").first["href"]
|
109
|
+
next_prev_url= doc.css("p.nextprev").first
|
110
|
+
if next_prev_url #&& !next_prev_url.empty?
|
111
|
+
next_prev_url = next_prev_url.css("a").first["href"]
|
112
|
+
page[:next_url] = next_prev_url
|
113
|
+
end
|
114
|
+
page[:articles] = arr
|
115
|
+
#arr << { :next_prev_url => next_prev_url }
|
116
|
+
#@more_url = next_prev_url
|
117
|
+
return page
|
118
|
+
end
|
119
|
+
# reddit
|
120
|
+
def hash_to_class h
|
121
|
+
p = ForumPage.new
|
122
|
+
p.url = h[:url]
|
123
|
+
p.next_url = h[:next_url]
|
124
|
+
p.create_date = h[:create_date]
|
125
|
+
p.subforum = h[:subforum]
|
126
|
+
#p.create_date_seconds = h[:create_date_seconds]
|
127
|
+
art = h[:articles]
|
128
|
+
arts = []
|
129
|
+
art.each do |a|
|
130
|
+
fa = ForumArticle.new a
|
131
|
+
fa.parent = self
|
132
|
+
arts << fa
|
133
|
+
end
|
134
|
+
p.articles = arts
|
135
|
+
return p
|
136
|
+
end
|
137
|
+
# #child_t1_cixd8gn > ul:nth-child(1) > li:nth-child(2) > div:nth-child(1) > div:nth-child(2) > p:nth-child(1)
|
138
|
+
# If you want to get the heirarchy, of comments within comments.
|
139
|
+
# toplevelcomments = page.css("body > ul > li > div.comment")
|
140
|
+
# go for body > ul
|
141
|
+
# then get the li
|
142
|
+
# within the li look for levels using
|
143
|
+
# > div > ul > li
|
144
|
+
# to get the next level of entries
|
145
|
+
# This will require recursive going down levels
|
146
|
+
# NOTE: currently this returns a flat list of comments. Actually they are nested
|
147
|
+
# and contain block-quotes, so ideally user to check the actual page on the browser
|
148
|
+
#private
|
149
|
+
public
|
150
|
+
# returns a hash. hash[:comments] returns an array of hashes containing comment details
|
151
|
+
def to_hash_comment url
|
152
|
+
# for testing i may send in a saved file, so i don't keep hitting HN
|
153
|
+
if !File.exists? url
|
154
|
+
unless url.index("http")
|
155
|
+
url = @host + "/" + url
|
156
|
+
end
|
157
|
+
end
|
158
|
+
# comments are nested and there is a div for that,
|
159
|
+
# Also blockquotes for when commenter quotes another.
|
160
|
+
doc = Nokogiri::HTML(open(url))
|
161
|
+
h = {}
|
162
|
+
main = doc.css("li div.link")
|
163
|
+
maintext = main.text
|
164
|
+
#puts maintext
|
165
|
+
#puts main.css("a").count
|
166
|
+
#puts main.css("a").first
|
167
|
+
# this dumps the whole line
|
168
|
+
h[:main_text] = maintext
|
169
|
+
main.css("a").each_with_index do |l, i|
|
170
|
+
# this breaks the main line into text and links
|
171
|
+
case i
|
172
|
+
when 0
|
173
|
+
h[:title] = l.text
|
174
|
+
h[:article_url] = l["href"]
|
175
|
+
when 1
|
176
|
+
h[:comment_count] = l.text
|
177
|
+
h[:comments_url] = l["href"]
|
178
|
+
when 2
|
179
|
+
h[:submitter] = l.text
|
180
|
+
h[:submitter_url] = l["href"]
|
181
|
+
when 3
|
182
|
+
h[:domain] = l.text
|
183
|
+
h[:domain_url] = l["href"]
|
184
|
+
end
|
185
|
+
end
|
186
|
+
byline = main.css("p.byline").text
|
187
|
+
h[:byline] = byline
|
188
|
+
points = byline.scan(/\d+ point/).first
|
189
|
+
age_text = byline.scan(/\d+ \w+ ago/).first
|
190
|
+
h[:points] = points
|
191
|
+
h[:age_text] = age_text
|
192
|
+
|
193
|
+
arr = []
|
194
|
+
comments = doc.css("li div.comment")
|
195
|
+
comments.each_with_index do |co, ix|
|
196
|
+
#puts ix
|
197
|
+
hh = {}
|
198
|
+
arr << hh
|
199
|
+
comment = co.css("div.md").text
|
200
|
+
hh[:comment_text] = comment
|
201
|
+
byline = co.css("p.byline")
|
202
|
+
#puts "byline:"
|
203
|
+
#puts byline
|
204
|
+
bytext = byline.text
|
205
|
+
hh[:head] = bytext
|
206
|
+
#puts "bytext:"
|
207
|
+
#puts bytext
|
208
|
+
m = bytext.scan(/\d+ \w+ ago/)
|
209
|
+
hh[:age_text] = m.first.sub(/ago/,"")
|
210
|
+
hh[:age] = human_age_to_unix(m.first)
|
211
|
+
link = byline.css("a").first
|
212
|
+
if link
|
213
|
+
commenter = link.text
|
214
|
+
hh[:submitter] = commenter
|
215
|
+
submitter_url = link["href"]
|
216
|
+
hh[:submitter_url] = submitter_url
|
217
|
+
end
|
218
|
+
points = byline.css("span.score").text rescue ""
|
219
|
+
hh[:points] = points.sub(/points?/,"")
|
220
|
+
end
|
221
|
+
h[:comments] = arr
|
222
|
+
return h
|
223
|
+
end
|
224
|
+
# reddit
|
225
|
+
def hash_to_comment_class arr
|
226
|
+
page = ForumArticle.new arr
|
227
|
+
return page
|
228
|
+
end
|
229
|
+
# this returns an array of Forumcomments but that means the article title
|
230
|
+
# etc is not there, and if the output is saved, then that info may be required.
|
231
|
+
def old_hash_to_comment_class arr
|
232
|
+
co = arr[:comments]
|
233
|
+
pages = Array.new
|
234
|
+
co.each do |h|
|
235
|
+
page = ForumComment.new h
|
236
|
+
pages << page
|
237
|
+
end
|
238
|
+
return pages
|
239
|
+
end
|
240
|
+
end # class
|
241
|
+
end # module
|
data/redford.yml
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
---
|
2
|
+
:binding:
|
3
|
+
'`': main_menu
|
4
|
+
=: toggle_menu
|
5
|
+
'>': next_forum
|
6
|
+
<: prev_forum
|
7
|
+
z: goto_article
|
8
|
+
o: display_links
|
9
|
+
<C-k>: display_links
|
10
|
+
<CR>: display_links
|
11
|
+
<F2>: choose_forum
|
12
|
+
<F3>: view_properties_as_tree
|
13
|
+
:forumlist:
|
14
|
+
- news
|
15
|
+
- newest
|
16
|
+
- ruby
|
17
|
+
- programming
|
18
|
+
- scifi
|
19
|
+
- science
|
20
|
+
- commandline
|
21
|
+
- vimplugins
|
22
|
+
- vim
|
23
|
+
- haskell
|
24
|
+
- java
|
25
|
+
- scala
|
26
|
+
- cpp
|
27
|
+
- c_programming
|
28
|
+
- d_language
|
29
|
+
- golang
|
30
|
+
- emacs
|
31
|
+
- unix
|
32
|
+
- linux
|
33
|
+
- bash
|
34
|
+
- zsh
|
35
|
+
- python
|
36
|
+
:browser_gui: open
|
37
|
+
:browser_text: elinks
|
38
|
+
:cache_path: ~/tmp/hacker-curse
|
39
|
+
:color_schemes:
|
40
|
+
deep blue: &1
|
41
|
+
:header_bg: 20
|
42
|
+
:menu_bg: 19
|
43
|
+
:body_bg: 17
|
44
|
+
:status_bg: 18
|
45
|
+
:body_fg: :white
|
46
|
+
:body_detail: :green
|
47
|
+
medium blue:
|
48
|
+
:header_bg: 17
|
49
|
+
:menu_bg: 19
|
50
|
+
:body_bg: 18
|
51
|
+
:status_bg: 20
|
52
|
+
:body_fg: :white
|
53
|
+
:body_detail: :green
|
54
|
+
black body:
|
55
|
+
:header_bg: 236
|
56
|
+
:menu_bg: 236
|
57
|
+
:body_bg: 0
|
58
|
+
:status_bg: 232
|
59
|
+
:body_fg: :white
|
60
|
+
:body_detail: :green
|
61
|
+
grey body:
|
62
|
+
:header_bg: 236
|
63
|
+
:menu_bg: 236
|
64
|
+
:body_bg: 244
|
65
|
+
:status_bg: 250
|
66
|
+
:body_fg: :black
|
67
|
+
:body_detail: :green
|
68
|
+
:color_scheme: *1
|
metadata
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hacker-curse
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- kepler
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-09-12 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.9.6
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.9.6
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: canis
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.0.3
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.0.3
|
55
|
+
description: View Hacker News and reddit articles on terminal using ncurses
|
56
|
+
email:
|
57
|
+
- githubkepler.50s@gishpuppy.com
|
58
|
+
executables:
|
59
|
+
- corvus
|
60
|
+
- hacker-comments.rb
|
61
|
+
- hacker-tsv.rb
|
62
|
+
- hacker-yml.rb
|
63
|
+
- hacker.rb
|
64
|
+
- hacker.sh
|
65
|
+
- redford
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- .gitignore
|
70
|
+
- Gemfile
|
71
|
+
- LICENSE
|
72
|
+
- README.md
|
73
|
+
- Rakefile
|
74
|
+
- bin/corvus
|
75
|
+
- bin/hacker-comments.rb
|
76
|
+
- bin/hacker-tsv.rb
|
77
|
+
- bin/hacker-yml.rb
|
78
|
+
- bin/hacker.rb
|
79
|
+
- bin/hacker.sh
|
80
|
+
- bin/redford
|
81
|
+
- hacker-curse.gemspec
|
82
|
+
- lib/hacker/curse.rb
|
83
|
+
- lib/hacker/curse/abstractsiteparser.rb
|
84
|
+
- lib/hacker/curse/hackernewsparser.rb
|
85
|
+
- lib/hacker/curse/redditnewsparser.rb
|
86
|
+
- lib/hacker/curse/version.rb
|
87
|
+
- redford.yml
|
88
|
+
homepage: https://github.com/mare-imbrium/hacker-curse
|
89
|
+
licenses:
|
90
|
+
- MIT
|
91
|
+
metadata: {}
|
92
|
+
post_install_message:
|
93
|
+
rdoc_options: []
|
94
|
+
require_paths:
|
95
|
+
- lib
|
96
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - '>='
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
|
+
requirements:
|
103
|
+
- - '>='
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: '0'
|
106
|
+
requirements: []
|
107
|
+
rubyforge_project:
|
108
|
+
rubygems_version: 2.2.2
|
109
|
+
signing_key:
|
110
|
+
specification_version: 4
|
111
|
+
summary: View hacker news and reddit articles on terminal using ncurses
|
112
|
+
test_files: []
|