hacker-curse 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +37 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/README.md +89 -0
- data/Rakefile +2 -0
- data/bin/corvus +2320 -0
- data/bin/hacker-comments.rb +182 -0
- data/bin/hacker-tsv.rb +144 -0
- data/bin/hacker-yml.rb +100 -0
- data/bin/hacker.rb +68 -0
- data/bin/hacker.sh +90 -0
- data/bin/redford +946 -0
- data/hacker-curse.gemspec +24 -0
- data/lib/hacker/curse.rb +7 -0
- data/lib/hacker/curse/abstractsiteparser.rb +353 -0
- data/lib/hacker/curse/hackernewsparser.rb +226 -0
- data/lib/hacker/curse/redditnewsparser.rb +241 -0
- data/lib/hacker/curse/version.rb +5 -0
- data/redford.yml +68 -0
- metadata +112 -0
@@ -0,0 +1,241 @@
|
|
1
|
+
require 'hacker/curse/abstractsiteparser'
|
2
|
+
|
3
|
+
module HackerCurse
|
4
|
+
|
5
|
+
class RedditNewsParser < AbstractSiteParser
|
6
|
+
def initialize config={}
|
7
|
+
@host = config[:host] || "http://www.reddit.com/"
|
8
|
+
subforum = config[:subforum] || "unknown"
|
9
|
+
_url="#{@host}/r/#{subforum}/.mobile"
|
10
|
+
config[:url] ||= _url
|
11
|
+
@subforum = subforum
|
12
|
+
super config
|
13
|
+
end
|
14
|
+
def _retrieve_page url
|
15
|
+
$stderr.puts "_retrieve_page got url #{url} "
|
16
|
+
raise "url should be string" unless url.is_a? String
|
17
|
+
arr = to_hash url
|
18
|
+
page = hash_to_class arr
|
19
|
+
#to_yml "#{@subforum}OLD.yml", arr
|
20
|
+
return page
|
21
|
+
end
|
22
|
+
# reddit
|
23
|
+
# @return array of ForumComment objects
|
24
|
+
# For each, you may retrieve +hash+ or individual items such as comment_text, points, age, age_text, submitter, head
|
25
|
+
def _retrieve_comments url
|
26
|
+
arr = to_hash_comment url
|
27
|
+
pages = hash_to_comment_class arr
|
28
|
+
return pages
|
29
|
+
end
|
30
|
+
# reddit parse to hash containing :url, :mext_url and :articles (an array of hashes for each article)
|
31
|
+
def to_hash url
|
32
|
+
page = {}
|
33
|
+
arr = Array.new
|
34
|
+
doc = get_doc_for_url url
|
35
|
+
page[:url] = url
|
36
|
+
now = Time.now
|
37
|
+
page[:create_date_seconds] = now.to_i
|
38
|
+
page[:create_date] = now
|
39
|
+
page[:subforum] = @subforum
|
40
|
+
#filename = "r.#{subr}.yml"
|
41
|
+
links = doc.css("li div.link")
|
42
|
+
links.each do |li|
|
43
|
+
h = {}
|
44
|
+
e = li.css("a.title")
|
45
|
+
if !e.empty?
|
46
|
+
e = e.first
|
47
|
+
h[:title] = e.text
|
48
|
+
h[:article_url] = e["href"]
|
49
|
+
end
|
50
|
+
e = li.css("a.domain")
|
51
|
+
if !e.empty?
|
52
|
+
e = e.first
|
53
|
+
h[:domain] = e.text
|
54
|
+
h[:domain_url] = e["href"]
|
55
|
+
end
|
56
|
+
e = li.css("a.author")
|
57
|
+
if !e.empty?
|
58
|
+
e = e.first
|
59
|
+
h[:submitter] = e.text
|
60
|
+
h[:submitter_url] = e["href"]
|
61
|
+
end
|
62
|
+
e = li.css("span.buttons > a")
|
63
|
+
if !e.empty?
|
64
|
+
e = e.first
|
65
|
+
#h[:comment_count] = e.text.to_i
|
66
|
+
h[:comment_count] = e.text.to_i.to_s.rjust(4)
|
67
|
+
h[:comments_url] = e["href"]
|
68
|
+
else
|
69
|
+
h[:comment_count] = " 0"
|
70
|
+
h[:comments_url] = ""
|
71
|
+
end
|
72
|
+
byline = li.css("p.byline").text
|
73
|
+
h[:byline] = byline
|
74
|
+
# 2014-08-14 - 13:34 in some cases the byline just says "17 minutes ago" with no BAR or "by"
|
75
|
+
# In one case in 'science' the name itself had BARs to the parse failed
|
76
|
+
# In another case there was no comments, so parts[2] was nil !!
|
77
|
+
parts = byline.split("|")
|
78
|
+
age = points = nil
|
79
|
+
parts.each do |ppp|
|
80
|
+
if ppp.index("points")
|
81
|
+
points = ppp.strip
|
82
|
+
elsif ppp.index("comments")
|
83
|
+
# we've taken it already
|
84
|
+
elsif ppp.index(" ago ")
|
85
|
+
age = ppp.split("by").first.strip
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
#age = parts.last.split("by").first.strip
|
91
|
+
|
92
|
+
#age = parts[2].split("by").first.strip
|
93
|
+
if age
|
94
|
+
if age.scan(/\d+ \w/).first.nil?
|
95
|
+
raise "Nil in age: #{age} , parts = #{parts}"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
h[:age_text]= age.scan(/\d+ \w/).first.rjust(4) if age
|
99
|
+
#h[:age_text]= age
|
100
|
+
h[:age] = human_age_to_unix(age) if age
|
101
|
+
#h[:points]= points.to_i
|
102
|
+
h[:points]= points.to_i.to_s.rjust(4)
|
103
|
+
#puts points
|
104
|
+
#puts age
|
105
|
+
arr << h
|
106
|
+
end
|
107
|
+
# some cases like rising do not have next prev
|
108
|
+
#next_prev_url= doc.css("p.nextprev").first.css("a").first["href"]
|
109
|
+
next_prev_url= doc.css("p.nextprev").first
|
110
|
+
if next_prev_url #&& !next_prev_url.empty?
|
111
|
+
next_prev_url = next_prev_url.css("a").first["href"]
|
112
|
+
page[:next_url] = next_prev_url
|
113
|
+
end
|
114
|
+
page[:articles] = arr
|
115
|
+
#arr << { :next_prev_url => next_prev_url }
|
116
|
+
#@more_url = next_prev_url
|
117
|
+
return page
|
118
|
+
end
|
119
|
+
# reddit
|
120
|
+
def hash_to_class h
|
121
|
+
p = ForumPage.new
|
122
|
+
p.url = h[:url]
|
123
|
+
p.next_url = h[:next_url]
|
124
|
+
p.create_date = h[:create_date]
|
125
|
+
p.subforum = h[:subforum]
|
126
|
+
#p.create_date_seconds = h[:create_date_seconds]
|
127
|
+
art = h[:articles]
|
128
|
+
arts = []
|
129
|
+
art.each do |a|
|
130
|
+
fa = ForumArticle.new a
|
131
|
+
fa.parent = self
|
132
|
+
arts << fa
|
133
|
+
end
|
134
|
+
p.articles = arts
|
135
|
+
return p
|
136
|
+
end
|
137
|
+
# #child_t1_cixd8gn > ul:nth-child(1) > li:nth-child(2) > div:nth-child(1) > div:nth-child(2) > p:nth-child(1)
|
138
|
+
# If you want to get the heirarchy, of comments within comments.
|
139
|
+
# toplevelcomments = page.css("body > ul > li > div.comment")
|
140
|
+
# go for body > ul
|
141
|
+
# then get the li
|
142
|
+
# within the li look for levels using
|
143
|
+
# > div > ul > li
|
144
|
+
# to get the next level of entries
|
145
|
+
# This will require recursive going down levels
|
146
|
+
# NOTE: currently this returns a flat list of comments. Actually they are nested
|
147
|
+
# and contain block-quotes, so ideally user to check the actual page on the browser
|
148
|
+
#private
|
149
|
+
public
|
150
|
+
# returns a hash. hash[:comments] returns an array of hashes containing comment details
|
151
|
+
def to_hash_comment url
|
152
|
+
# for testing i may send in a saved file, so i don't keep hitting HN
|
153
|
+
if !File.exists? url
|
154
|
+
unless url.index("http")
|
155
|
+
url = @host + "/" + url
|
156
|
+
end
|
157
|
+
end
|
158
|
+
# comments are nested and there is a div for that,
|
159
|
+
# Also blockquotes for when commenter quotes another.
|
160
|
+
doc = Nokogiri::HTML(open(url))
|
161
|
+
h = {}
|
162
|
+
main = doc.css("li div.link")
|
163
|
+
maintext = main.text
|
164
|
+
#puts maintext
|
165
|
+
#puts main.css("a").count
|
166
|
+
#puts main.css("a").first
|
167
|
+
# this dumps the whole line
|
168
|
+
h[:main_text] = maintext
|
169
|
+
main.css("a").each_with_index do |l, i|
|
170
|
+
# this breaks the main line into text and links
|
171
|
+
case i
|
172
|
+
when 0
|
173
|
+
h[:title] = l.text
|
174
|
+
h[:article_url] = l["href"]
|
175
|
+
when 1
|
176
|
+
h[:comment_count] = l.text
|
177
|
+
h[:comments_url] = l["href"]
|
178
|
+
when 2
|
179
|
+
h[:submitter] = l.text
|
180
|
+
h[:submitter_url] = l["href"]
|
181
|
+
when 3
|
182
|
+
h[:domain] = l.text
|
183
|
+
h[:domain_url] = l["href"]
|
184
|
+
end
|
185
|
+
end
|
186
|
+
byline = main.css("p.byline").text
|
187
|
+
h[:byline] = byline
|
188
|
+
points = byline.scan(/\d+ point/).first
|
189
|
+
age_text = byline.scan(/\d+ \w+ ago/).first
|
190
|
+
h[:points] = points
|
191
|
+
h[:age_text] = age_text
|
192
|
+
|
193
|
+
arr = []
|
194
|
+
comments = doc.css("li div.comment")
|
195
|
+
comments.each_with_index do |co, ix|
|
196
|
+
#puts ix
|
197
|
+
hh = {}
|
198
|
+
arr << hh
|
199
|
+
comment = co.css("div.md").text
|
200
|
+
hh[:comment_text] = comment
|
201
|
+
byline = co.css("p.byline")
|
202
|
+
#puts "byline:"
|
203
|
+
#puts byline
|
204
|
+
bytext = byline.text
|
205
|
+
hh[:head] = bytext
|
206
|
+
#puts "bytext:"
|
207
|
+
#puts bytext
|
208
|
+
m = bytext.scan(/\d+ \w+ ago/)
|
209
|
+
hh[:age_text] = m.first.sub(/ago/,"")
|
210
|
+
hh[:age] = human_age_to_unix(m.first)
|
211
|
+
link = byline.css("a").first
|
212
|
+
if link
|
213
|
+
commenter = link.text
|
214
|
+
hh[:submitter] = commenter
|
215
|
+
submitter_url = link["href"]
|
216
|
+
hh[:submitter_url] = submitter_url
|
217
|
+
end
|
218
|
+
points = byline.css("span.score").text rescue ""
|
219
|
+
hh[:points] = points.sub(/points?/,"")
|
220
|
+
end
|
221
|
+
h[:comments] = arr
|
222
|
+
return h
|
223
|
+
end
|
224
|
+
# reddit
|
225
|
+
def hash_to_comment_class arr
|
226
|
+
page = ForumArticle.new arr
|
227
|
+
return page
|
228
|
+
end
|
229
|
+
# this returns an array of Forumcomments but that means the article title
|
230
|
+
# etc is not there, and if the output is saved, then that info may be required.
|
231
|
+
def old_hash_to_comment_class arr
|
232
|
+
co = arr[:comments]
|
233
|
+
pages = Array.new
|
234
|
+
co.each do |h|
|
235
|
+
page = ForumComment.new h
|
236
|
+
pages << page
|
237
|
+
end
|
238
|
+
return pages
|
239
|
+
end
|
240
|
+
end # class
|
241
|
+
end # module
|
data/redford.yml
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
---
|
2
|
+
:binding:
|
3
|
+
'`': main_menu
|
4
|
+
=: toggle_menu
|
5
|
+
'>': next_forum
|
6
|
+
<: prev_forum
|
7
|
+
z: goto_article
|
8
|
+
o: display_links
|
9
|
+
<C-k>: display_links
|
10
|
+
<CR>: display_links
|
11
|
+
<F2>: choose_forum
|
12
|
+
<F3>: view_properties_as_tree
|
13
|
+
:forumlist:
|
14
|
+
- news
|
15
|
+
- newest
|
16
|
+
- ruby
|
17
|
+
- programming
|
18
|
+
- scifi
|
19
|
+
- science
|
20
|
+
- commandline
|
21
|
+
- vimplugins
|
22
|
+
- vim
|
23
|
+
- haskell
|
24
|
+
- java
|
25
|
+
- scala
|
26
|
+
- cpp
|
27
|
+
- c_programming
|
28
|
+
- d_language
|
29
|
+
- golang
|
30
|
+
- emacs
|
31
|
+
- unix
|
32
|
+
- linux
|
33
|
+
- bash
|
34
|
+
- zsh
|
35
|
+
- python
|
36
|
+
:browser_gui: open
|
37
|
+
:browser_text: elinks
|
38
|
+
:cache_path: ~/tmp/hacker-curse
|
39
|
+
:color_schemes:
|
40
|
+
deep blue: &1
|
41
|
+
:header_bg: 20
|
42
|
+
:menu_bg: 19
|
43
|
+
:body_bg: 17
|
44
|
+
:status_bg: 18
|
45
|
+
:body_fg: :white
|
46
|
+
:body_detail: :green
|
47
|
+
medium blue:
|
48
|
+
:header_bg: 17
|
49
|
+
:menu_bg: 19
|
50
|
+
:body_bg: 18
|
51
|
+
:status_bg: 20
|
52
|
+
:body_fg: :white
|
53
|
+
:body_detail: :green
|
54
|
+
black body:
|
55
|
+
:header_bg: 236
|
56
|
+
:menu_bg: 236
|
57
|
+
:body_bg: 0
|
58
|
+
:status_bg: 232
|
59
|
+
:body_fg: :white
|
60
|
+
:body_detail: :green
|
61
|
+
grey body:
|
62
|
+
:header_bg: 236
|
63
|
+
:menu_bg: 236
|
64
|
+
:body_bg: 244
|
65
|
+
:status_bg: 250
|
66
|
+
:body_fg: :black
|
67
|
+
:body_detail: :green
|
68
|
+
:color_scheme: *1
|
metadata
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hacker-curse
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- kepler
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-09-12 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.9.6
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.9.6
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: canis
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.0.3
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.0.3
|
55
|
+
description: View Hacker News and reddit articles on terminal using ncurses
|
56
|
+
email:
|
57
|
+
- githubkepler.50s@gishpuppy.com
|
58
|
+
executables:
|
59
|
+
- corvus
|
60
|
+
- hacker-comments.rb
|
61
|
+
- hacker-tsv.rb
|
62
|
+
- hacker-yml.rb
|
63
|
+
- hacker.rb
|
64
|
+
- hacker.sh
|
65
|
+
- redford
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- .gitignore
|
70
|
+
- Gemfile
|
71
|
+
- LICENSE
|
72
|
+
- README.md
|
73
|
+
- Rakefile
|
74
|
+
- bin/corvus
|
75
|
+
- bin/hacker-comments.rb
|
76
|
+
- bin/hacker-tsv.rb
|
77
|
+
- bin/hacker-yml.rb
|
78
|
+
- bin/hacker.rb
|
79
|
+
- bin/hacker.sh
|
80
|
+
- bin/redford
|
81
|
+
- hacker-curse.gemspec
|
82
|
+
- lib/hacker/curse.rb
|
83
|
+
- lib/hacker/curse/abstractsiteparser.rb
|
84
|
+
- lib/hacker/curse/hackernewsparser.rb
|
85
|
+
- lib/hacker/curse/redditnewsparser.rb
|
86
|
+
- lib/hacker/curse/version.rb
|
87
|
+
- redford.yml
|
88
|
+
homepage: https://github.com/mare-imbrium/hacker-curse
|
89
|
+
licenses:
|
90
|
+
- MIT
|
91
|
+
metadata: {}
|
92
|
+
post_install_message:
|
93
|
+
rdoc_options: []
|
94
|
+
require_paths:
|
95
|
+
- lib
|
96
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - '>='
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
|
+
requirements:
|
103
|
+
- - '>='
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: '0'
|
106
|
+
requirements: []
|
107
|
+
rubyforge_project:
|
108
|
+
rubygems_version: 2.2.2
|
109
|
+
signing_key:
|
110
|
+
specification_version: 4
|
111
|
+
summary: View hacker news and reddit articles on terminal using ncurses
|
112
|
+
test_files: []
|