hacker-curse 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,241 @@
1
+ require 'hacker/curse/abstractsiteparser'
2
+
3
+ module HackerCurse
4
+
5
+ class RedditNewsParser < AbstractSiteParser
6
+ def initialize config={}
7
+ @host = config[:host] || "http://www.reddit.com/"
8
+ subforum = config[:subforum] || "unknown"
9
+ _url="#{@host}/r/#{subforum}/.mobile"
10
+ config[:url] ||= _url
11
+ @subforum = subforum
12
+ super config
13
+ end
14
+ def _retrieve_page url
15
+ $stderr.puts "_retrieve_page got url #{url} "
16
+ raise "url should be string" unless url.is_a? String
17
+ arr = to_hash url
18
+ page = hash_to_class arr
19
+ #to_yml "#{@subforum}OLD.yml", arr
20
+ return page
21
+ end
22
+ # reddit
23
+ # @return array of ForumComment objects
24
+ # For each, you may retrieve +hash+ or individual items such as comment_text, points, age, age_text, submitter, head
25
+ def _retrieve_comments url
26
+ arr = to_hash_comment url
27
+ pages = hash_to_comment_class arr
28
+ return pages
29
+ end
30
+ # reddit parse to hash containing :url, :mext_url and :articles (an array of hashes for each article)
31
+ def to_hash url
32
+ page = {}
33
+ arr = Array.new
34
+ doc = get_doc_for_url url
35
+ page[:url] = url
36
+ now = Time.now
37
+ page[:create_date_seconds] = now.to_i
38
+ page[:create_date] = now
39
+ page[:subforum] = @subforum
40
+ #filename = "r.#{subr}.yml"
41
+ links = doc.css("li div.link")
42
+ links.each do |li|
43
+ h = {}
44
+ e = li.css("a.title")
45
+ if !e.empty?
46
+ e = e.first
47
+ h[:title] = e.text
48
+ h[:article_url] = e["href"]
49
+ end
50
+ e = li.css("a.domain")
51
+ if !e.empty?
52
+ e = e.first
53
+ h[:domain] = e.text
54
+ h[:domain_url] = e["href"]
55
+ end
56
+ e = li.css("a.author")
57
+ if !e.empty?
58
+ e = e.first
59
+ h[:submitter] = e.text
60
+ h[:submitter_url] = e["href"]
61
+ end
62
+ e = li.css("span.buttons > a")
63
+ if !e.empty?
64
+ e = e.first
65
+ #h[:comment_count] = e.text.to_i
66
+ h[:comment_count] = e.text.to_i.to_s.rjust(4)
67
+ h[:comments_url] = e["href"]
68
+ else
69
+ h[:comment_count] = " 0"
70
+ h[:comments_url] = ""
71
+ end
72
+ byline = li.css("p.byline").text
73
+ h[:byline] = byline
74
+ # 2014-08-14 - 13:34 in some cases the byline just says "17 minutes ago" with no BAR or "by"
75
+ # In one case in 'science' the name itself had BARs to the parse failed
76
+ # In another case there was no comments, so parts[2] was nil !!
77
+ parts = byline.split("|")
78
+ age = points = nil
79
+ parts.each do |ppp|
80
+ if ppp.index("points")
81
+ points = ppp.strip
82
+ elsif ppp.index("comments")
83
+ # we've taken it already
84
+ elsif ppp.index(" ago ")
85
+ age = ppp.split("by").first.strip
86
+ end
87
+ end
88
+
89
+
90
+ #age = parts.last.split("by").first.strip
91
+
92
+ #age = parts[2].split("by").first.strip
93
+ if age
94
+ if age.scan(/\d+ \w/).first.nil?
95
+ raise "Nil in age: #{age} , parts = #{parts}"
96
+ end
97
+ end
98
+ h[:age_text]= age.scan(/\d+ \w/).first.rjust(4) if age
99
+ #h[:age_text]= age
100
+ h[:age] = human_age_to_unix(age) if age
101
+ #h[:points]= points.to_i
102
+ h[:points]= points.to_i.to_s.rjust(4)
103
+ #puts points
104
+ #puts age
105
+ arr << h
106
+ end
107
+ # some cases like rising do not have next prev
108
+ #next_prev_url= doc.css("p.nextprev").first.css("a").first["href"]
109
+ next_prev_url= doc.css("p.nextprev").first
110
+ if next_prev_url #&& !next_prev_url.empty?
111
+ next_prev_url = next_prev_url.css("a").first["href"]
112
+ page[:next_url] = next_prev_url
113
+ end
114
+ page[:articles] = arr
115
+ #arr << { :next_prev_url => next_prev_url }
116
+ #@more_url = next_prev_url
117
+ return page
118
+ end
119
+ # reddit
120
+ def hash_to_class h
121
+ p = ForumPage.new
122
+ p.url = h[:url]
123
+ p.next_url = h[:next_url]
124
+ p.create_date = h[:create_date]
125
+ p.subforum = h[:subforum]
126
+ #p.create_date_seconds = h[:create_date_seconds]
127
+ art = h[:articles]
128
+ arts = []
129
+ art.each do |a|
130
+ fa = ForumArticle.new a
131
+ fa.parent = self
132
+ arts << fa
133
+ end
134
+ p.articles = arts
135
+ return p
136
+ end
137
+ # #child_t1_cixd8gn > ul:nth-child(1) > li:nth-child(2) > div:nth-child(1) > div:nth-child(2) > p:nth-child(1)
138
+ # If you want to get the heirarchy, of comments within comments.
139
+ # toplevelcomments = page.css("body > ul > li > div.comment")
140
+ # go for body > ul
141
+ # then get the li
142
+ # within the li look for levels using
143
+ # > div > ul > li
144
+ # to get the next level of entries
145
+ # This will require recursive going down levels
146
+ # NOTE: currently this returns a flat list of comments. Actually they are nested
147
+ # and contain block-quotes, so ideally user to check the actual page on the browser
148
+ #private
149
+ public
150
+ # returns a hash. hash[:comments] returns an array of hashes containing comment details
151
+ def to_hash_comment url
152
+ # for testing i may send in a saved file, so i don't keep hitting HN
153
+ if !File.exists? url
154
+ unless url.index("http")
155
+ url = @host + "/" + url
156
+ end
157
+ end
158
+ # comments are nested and there is a div for that,
159
+ # Also blockquotes for when commenter quotes another.
160
+ doc = Nokogiri::HTML(open(url))
161
+ h = {}
162
+ main = doc.css("li div.link")
163
+ maintext = main.text
164
+ #puts maintext
165
+ #puts main.css("a").count
166
+ #puts main.css("a").first
167
+ # this dumps the whole line
168
+ h[:main_text] = maintext
169
+ main.css("a").each_with_index do |l, i|
170
+ # this breaks the main line into text and links
171
+ case i
172
+ when 0
173
+ h[:title] = l.text
174
+ h[:article_url] = l["href"]
175
+ when 1
176
+ h[:comment_count] = l.text
177
+ h[:comments_url] = l["href"]
178
+ when 2
179
+ h[:submitter] = l.text
180
+ h[:submitter_url] = l["href"]
181
+ when 3
182
+ h[:domain] = l.text
183
+ h[:domain_url] = l["href"]
184
+ end
185
+ end
186
+ byline = main.css("p.byline").text
187
+ h[:byline] = byline
188
+ points = byline.scan(/\d+ point/).first
189
+ age_text = byline.scan(/\d+ \w+ ago/).first
190
+ h[:points] = points
191
+ h[:age_text] = age_text
192
+
193
+ arr = []
194
+ comments = doc.css("li div.comment")
195
+ comments.each_with_index do |co, ix|
196
+ #puts ix
197
+ hh = {}
198
+ arr << hh
199
+ comment = co.css("div.md").text
200
+ hh[:comment_text] = comment
201
+ byline = co.css("p.byline")
202
+ #puts "byline:"
203
+ #puts byline
204
+ bytext = byline.text
205
+ hh[:head] = bytext
206
+ #puts "bytext:"
207
+ #puts bytext
208
+ m = bytext.scan(/\d+ \w+ ago/)
209
+ hh[:age_text] = m.first.sub(/ago/,"")
210
+ hh[:age] = human_age_to_unix(m.first)
211
+ link = byline.css("a").first
212
+ if link
213
+ commenter = link.text
214
+ hh[:submitter] = commenter
215
+ submitter_url = link["href"]
216
+ hh[:submitter_url] = submitter_url
217
+ end
218
+ points = byline.css("span.score").text rescue ""
219
+ hh[:points] = points.sub(/points?/,"")
220
+ end
221
+ h[:comments] = arr
222
+ return h
223
+ end
224
+ # reddit
225
+ def hash_to_comment_class arr
226
+ page = ForumArticle.new arr
227
+ return page
228
+ end
229
+ # this returns an array of Forumcomments but that means the article title
230
+ # etc is not there, and if the output is saved, then that info may be required.
231
+ def old_hash_to_comment_class arr
232
+ co = arr[:comments]
233
+ pages = Array.new
234
+ co.each do |h|
235
+ page = ForumComment.new h
236
+ pages << page
237
+ end
238
+ return pages
239
+ end
240
+ end # class
241
+ end # module
@@ -0,0 +1,5 @@
1
+ module Hacker
2
+ module Curse
3
+ VERSION = "0.0.2"
4
+ end
5
+ end
data/redford.yml ADDED
@@ -0,0 +1,68 @@
1
+ ---
2
+ :binding:
3
+ '`': main_menu
4
+ =: toggle_menu
5
+ '>': next_forum
6
+ <: prev_forum
7
+ z: goto_article
8
+ o: display_links
9
+ <C-k>: display_links
10
+ <CR>: display_links
11
+ <F2>: choose_forum
12
+ <F3>: view_properties_as_tree
13
+ :forumlist:
14
+ - news
15
+ - newest
16
+ - ruby
17
+ - programming
18
+ - scifi
19
+ - science
20
+ - commandline
21
+ - vimplugins
22
+ - vim
23
+ - haskell
24
+ - java
25
+ - scala
26
+ - cpp
27
+ - c_programming
28
+ - d_language
29
+ - golang
30
+ - emacs
31
+ - unix
32
+ - linux
33
+ - bash
34
+ - zsh
35
+ - python
36
+ :browser_gui: open
37
+ :browser_text: elinks
38
+ :cache_path: ~/tmp/hacker-curse
39
+ :color_schemes:
40
+ deep blue: &1
41
+ :header_bg: 20
42
+ :menu_bg: 19
43
+ :body_bg: 17
44
+ :status_bg: 18
45
+ :body_fg: :white
46
+ :body_detail: :green
47
+ medium blue:
48
+ :header_bg: 17
49
+ :menu_bg: 19
50
+ :body_bg: 18
51
+ :status_bg: 20
52
+ :body_fg: :white
53
+ :body_detail: :green
54
+ black body:
55
+ :header_bg: 236
56
+ :menu_bg: 236
57
+ :body_bg: 0
58
+ :status_bg: 232
59
+ :body_fg: :white
60
+ :body_detail: :green
61
+ grey body:
62
+ :header_bg: 236
63
+ :menu_bg: 236
64
+ :body_bg: 244
65
+ :status_bg: 250
66
+ :body_fg: :black
67
+ :body_detail: :green
68
+ :color_scheme: *1
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hacker-curse
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - kepler
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.9.6
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.9.6
41
+ - !ruby/object:Gem::Dependency
42
+ name: canis
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.0.3
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.0.3
55
+ description: View Hacker News and reddit articles on terminal using ncurses
56
+ email:
57
+ - githubkepler.50s@gishpuppy.com
58
+ executables:
59
+ - corvus
60
+ - hacker-comments.rb
61
+ - hacker-tsv.rb
62
+ - hacker-yml.rb
63
+ - hacker.rb
64
+ - hacker.sh
65
+ - redford
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - .gitignore
70
+ - Gemfile
71
+ - LICENSE
72
+ - README.md
73
+ - Rakefile
74
+ - bin/corvus
75
+ - bin/hacker-comments.rb
76
+ - bin/hacker-tsv.rb
77
+ - bin/hacker-yml.rb
78
+ - bin/hacker.rb
79
+ - bin/hacker.sh
80
+ - bin/redford
81
+ - hacker-curse.gemspec
82
+ - lib/hacker/curse.rb
83
+ - lib/hacker/curse/abstractsiteparser.rb
84
+ - lib/hacker/curse/hackernewsparser.rb
85
+ - lib/hacker/curse/redditnewsparser.rb
86
+ - lib/hacker/curse/version.rb
87
+ - redford.yml
88
+ homepage: https://github.com/mare-imbrium/hacker-curse
89
+ licenses:
90
+ - MIT
91
+ metadata: {}
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 2.2.2
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: View hacker news and reddit articles on terminal using ncurses
112
+ test_files: []