hacker-curse 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,241 @@
1
+ require 'hacker/curse/abstractsiteparser'
2
+
3
+ module HackerCurse
4
+
5
+ class RedditNewsParser < AbstractSiteParser
6
+ def initialize config={}
7
+ @host = config[:host] || "http://www.reddit.com/"
8
+ subforum = config[:subforum] || "unknown"
9
+ _url="#{@host}/r/#{subforum}/.mobile"
10
+ config[:url] ||= _url
11
+ @subforum = subforum
12
+ super config
13
+ end
14
+ def _retrieve_page url
15
+ $stderr.puts "_retrieve_page got url #{url} "
16
+ raise "url should be string" unless url.is_a? String
17
+ arr = to_hash url
18
+ page = hash_to_class arr
19
+ #to_yml "#{@subforum}OLD.yml", arr
20
+ return page
21
+ end
22
+ # reddit
23
+ # @return array of ForumComment objects
24
+ # For each, you may retrieve +hash+ or individual items such as comment_text, points, age, age_text, submitter, head
25
+ def _retrieve_comments url
26
+ arr = to_hash_comment url
27
+ pages = hash_to_comment_class arr
28
+ return pages
29
+ end
30
+ # reddit parse to hash containing :url, :mext_url and :articles (an array of hashes for each article)
31
+ def to_hash url
32
+ page = {}
33
+ arr = Array.new
34
+ doc = get_doc_for_url url
35
+ page[:url] = url
36
+ now = Time.now
37
+ page[:create_date_seconds] = now.to_i
38
+ page[:create_date] = now
39
+ page[:subforum] = @subforum
40
+ #filename = "r.#{subr}.yml"
41
+ links = doc.css("li div.link")
42
+ links.each do |li|
43
+ h = {}
44
+ e = li.css("a.title")
45
+ if !e.empty?
46
+ e = e.first
47
+ h[:title] = e.text
48
+ h[:article_url] = e["href"]
49
+ end
50
+ e = li.css("a.domain")
51
+ if !e.empty?
52
+ e = e.first
53
+ h[:domain] = e.text
54
+ h[:domain_url] = e["href"]
55
+ end
56
+ e = li.css("a.author")
57
+ if !e.empty?
58
+ e = e.first
59
+ h[:submitter] = e.text
60
+ h[:submitter_url] = e["href"]
61
+ end
62
+ e = li.css("span.buttons > a")
63
+ if !e.empty?
64
+ e = e.first
65
+ #h[:comment_count] = e.text.to_i
66
+ h[:comment_count] = e.text.to_i.to_s.rjust(4)
67
+ h[:comments_url] = e["href"]
68
+ else
69
+ h[:comment_count] = " 0"
70
+ h[:comments_url] = ""
71
+ end
72
+ byline = li.css("p.byline").text
73
+ h[:byline] = byline
74
+ # 2014-08-14 - 13:34 in some cases the byline just says "17 minutes ago" with no BAR or "by"
75
+ # In one case in 'science' the name itself had BARs to the parse failed
76
+ # In another case there was no comments, so parts[2] was nil !!
77
+ parts = byline.split("|")
78
+ age = points = nil
79
+ parts.each do |ppp|
80
+ if ppp.index("points")
81
+ points = ppp.strip
82
+ elsif ppp.index("comments")
83
+ # we've taken it already
84
+ elsif ppp.index(" ago ")
85
+ age = ppp.split("by").first.strip
86
+ end
87
+ end
88
+
89
+
90
+ #age = parts.last.split("by").first.strip
91
+
92
+ #age = parts[2].split("by").first.strip
93
+ if age
94
+ if age.scan(/\d+ \w/).first.nil?
95
+ raise "Nil in age: #{age} , parts = #{parts}"
96
+ end
97
+ end
98
+ h[:age_text]= age.scan(/\d+ \w/).first.rjust(4) if age
99
+ #h[:age_text]= age
100
+ h[:age] = human_age_to_unix(age) if age
101
+ #h[:points]= points.to_i
102
+ h[:points]= points.to_i.to_s.rjust(4)
103
+ #puts points
104
+ #puts age
105
+ arr << h
106
+ end
107
+ # some cases like rising do not have next prev
108
+ #next_prev_url= doc.css("p.nextprev").first.css("a").first["href"]
109
+ next_prev_url= doc.css("p.nextprev").first
110
+ if next_prev_url #&& !next_prev_url.empty?
111
+ next_prev_url = next_prev_url.css("a").first["href"]
112
+ page[:next_url] = next_prev_url
113
+ end
114
+ page[:articles] = arr
115
+ #arr << { :next_prev_url => next_prev_url }
116
+ #@more_url = next_prev_url
117
+ return page
118
+ end
119
+ # reddit
120
+ def hash_to_class h
121
+ p = ForumPage.new
122
+ p.url = h[:url]
123
+ p.next_url = h[:next_url]
124
+ p.create_date = h[:create_date]
125
+ p.subforum = h[:subforum]
126
+ #p.create_date_seconds = h[:create_date_seconds]
127
+ art = h[:articles]
128
+ arts = []
129
+ art.each do |a|
130
+ fa = ForumArticle.new a
131
+ fa.parent = self
132
+ arts << fa
133
+ end
134
+ p.articles = arts
135
+ return p
136
+ end
137
+ # #child_t1_cixd8gn > ul:nth-child(1) > li:nth-child(2) > div:nth-child(1) > div:nth-child(2) > p:nth-child(1)
138
+ # If you want to get the heirarchy, of comments within comments.
139
+ # toplevelcomments = page.css("body > ul > li > div.comment")
140
+ # go for body > ul
141
+ # then get the li
142
+ # within the li look for levels using
143
+ # > div > ul > li
144
+ # to get the next level of entries
145
+ # This will require recursive going down levels
146
+ # NOTE: currently this returns a flat list of comments. Actually they are nested
147
+ # and contain block-quotes, so ideally user to check the actual page on the browser
148
+ #private
149
+ public
150
+ # returns a hash. hash[:comments] returns an array of hashes containing comment details
151
+ def to_hash_comment url
152
+ # for testing i may send in a saved file, so i don't keep hitting HN
153
+ if !File.exists? url
154
+ unless url.index("http")
155
+ url = @host + "/" + url
156
+ end
157
+ end
158
+ # comments are nested and there is a div for that,
159
+ # Also blockquotes for when commenter quotes another.
160
+ doc = Nokogiri::HTML(open(url))
161
+ h = {}
162
+ main = doc.css("li div.link")
163
+ maintext = main.text
164
+ #puts maintext
165
+ #puts main.css("a").count
166
+ #puts main.css("a").first
167
+ # this dumps the whole line
168
+ h[:main_text] = maintext
169
+ main.css("a").each_with_index do |l, i|
170
+ # this breaks the main line into text and links
171
+ case i
172
+ when 0
173
+ h[:title] = l.text
174
+ h[:article_url] = l["href"]
175
+ when 1
176
+ h[:comment_count] = l.text
177
+ h[:comments_url] = l["href"]
178
+ when 2
179
+ h[:submitter] = l.text
180
+ h[:submitter_url] = l["href"]
181
+ when 3
182
+ h[:domain] = l.text
183
+ h[:domain_url] = l["href"]
184
+ end
185
+ end
186
+ byline = main.css("p.byline").text
187
+ h[:byline] = byline
188
+ points = byline.scan(/\d+ point/).first
189
+ age_text = byline.scan(/\d+ \w+ ago/).first
190
+ h[:points] = points
191
+ h[:age_text] = age_text
192
+
193
+ arr = []
194
+ comments = doc.css("li div.comment")
195
+ comments.each_with_index do |co, ix|
196
+ #puts ix
197
+ hh = {}
198
+ arr << hh
199
+ comment = co.css("div.md").text
200
+ hh[:comment_text] = comment
201
+ byline = co.css("p.byline")
202
+ #puts "byline:"
203
+ #puts byline
204
+ bytext = byline.text
205
+ hh[:head] = bytext
206
+ #puts "bytext:"
207
+ #puts bytext
208
+ m = bytext.scan(/\d+ \w+ ago/)
209
+ hh[:age_text] = m.first.sub(/ago/,"")
210
+ hh[:age] = human_age_to_unix(m.first)
211
+ link = byline.css("a").first
212
+ if link
213
+ commenter = link.text
214
+ hh[:submitter] = commenter
215
+ submitter_url = link["href"]
216
+ hh[:submitter_url] = submitter_url
217
+ end
218
+ points = byline.css("span.score").text rescue ""
219
+ hh[:points] = points.sub(/points?/,"")
220
+ end
221
+ h[:comments] = arr
222
+ return h
223
+ end
224
+ # reddit
225
+ def hash_to_comment_class arr
226
+ page = ForumArticle.new arr
227
+ return page
228
+ end
229
+ # this returns an array of Forumcomments but that means the article title
230
+ # etc is not there, and if the output is saved, then that info may be required.
231
+ def old_hash_to_comment_class arr
232
+ co = arr[:comments]
233
+ pages = Array.new
234
+ co.each do |h|
235
+ page = ForumComment.new h
236
+ pages << page
237
+ end
238
+ return pages
239
+ end
240
+ end # class
241
+ end # module
@@ -0,0 +1,5 @@
1
+ module Hacker
2
+ module Curse
3
+ VERSION = "0.0.2"
4
+ end
5
+ end
data/redford.yml ADDED
@@ -0,0 +1,68 @@
1
+ ---
2
+ :binding:
3
+ '`': main_menu
4
+ =: toggle_menu
5
+ '>': next_forum
6
+ <: prev_forum
7
+ z: goto_article
8
+ o: display_links
9
+ <C-k>: display_links
10
+ <CR>: display_links
11
+ <F2>: choose_forum
12
+ <F3>: view_properties_as_tree
13
+ :forumlist:
14
+ - news
15
+ - newest
16
+ - ruby
17
+ - programming
18
+ - scifi
19
+ - science
20
+ - commandline
21
+ - vimplugins
22
+ - vim
23
+ - haskell
24
+ - java
25
+ - scala
26
+ - cpp
27
+ - c_programming
28
+ - d_language
29
+ - golang
30
+ - emacs
31
+ - unix
32
+ - linux
33
+ - bash
34
+ - zsh
35
+ - python
36
+ :browser_gui: open
37
+ :browser_text: elinks
38
+ :cache_path: ~/tmp/hacker-curse
39
+ :color_schemes:
40
+ deep blue: &1
41
+ :header_bg: 20
42
+ :menu_bg: 19
43
+ :body_bg: 17
44
+ :status_bg: 18
45
+ :body_fg: :white
46
+ :body_detail: :green
47
+ medium blue:
48
+ :header_bg: 17
49
+ :menu_bg: 19
50
+ :body_bg: 18
51
+ :status_bg: 20
52
+ :body_fg: :white
53
+ :body_detail: :green
54
+ black body:
55
+ :header_bg: 236
56
+ :menu_bg: 236
57
+ :body_bg: 0
58
+ :status_bg: 232
59
+ :body_fg: :white
60
+ :body_detail: :green
61
+ grey body:
62
+ :header_bg: 236
63
+ :menu_bg: 236
64
+ :body_bg: 244
65
+ :status_bg: 250
66
+ :body_fg: :black
67
+ :body_detail: :green
68
+ :color_scheme: *1
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hacker-curse
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - kepler
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.9.6
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.9.6
41
+ - !ruby/object:Gem::Dependency
42
+ name: canis
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.0.3
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.0.3
55
+ description: View Hacker News and reddit articles on terminal using ncurses
56
+ email:
57
+ - githubkepler.50s@gishpuppy.com
58
+ executables:
59
+ - corvus
60
+ - hacker-comments.rb
61
+ - hacker-tsv.rb
62
+ - hacker-yml.rb
63
+ - hacker.rb
64
+ - hacker.sh
65
+ - redford
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - .gitignore
70
+ - Gemfile
71
+ - LICENSE
72
+ - README.md
73
+ - Rakefile
74
+ - bin/corvus
75
+ - bin/hacker-comments.rb
76
+ - bin/hacker-tsv.rb
77
+ - bin/hacker-yml.rb
78
+ - bin/hacker.rb
79
+ - bin/hacker.sh
80
+ - bin/redford
81
+ - hacker-curse.gemspec
82
+ - lib/hacker/curse.rb
83
+ - lib/hacker/curse/abstractsiteparser.rb
84
+ - lib/hacker/curse/hackernewsparser.rb
85
+ - lib/hacker/curse/redditnewsparser.rb
86
+ - lib/hacker/curse/version.rb
87
+ - redford.yml
88
+ homepage: https://github.com/mare-imbrium/hacker-curse
89
+ licenses:
90
+ - MIT
91
+ metadata: {}
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 2.2.2
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: View hacker news and reddit articles on terminal using ncurses
112
+ test_files: []