hacker-curse 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,182 @@
1
+ #!/usr/bin/env ruby
2
+ # ----------------------------------------------------------------------------- #
3
+ # File: hacker-comments.rb
4
+ # Description: view comments on terminal or save to file and view
5
+ # Author: j kepler http://github.com/mare-imbrium/canis/
6
+ # Date: 2014-07-16 - 13:10
7
+ # License: MIT
8
+ # Last update: 2014-07-30 01:19
9
+ # ----------------------------------------------------------------------------- #
10
+ # hacker-comments.rb Copyright (C) 2012-2014 j kepler
11
+
12
+ ## NOTE: This uses a comments page from ycombinator.com and from the reddit MOBILE page.
13
+ # If you give a comment url from the normal reddit.com page, it will NOT work as all.
14
+ #
15
+ # The comment URL is what is given out by the hacker-tsv.rb program, and can be taken
16
+ # from reddit.com/programming/.mobile
17
+ #
18
+ # This is a sample front-end to the hacker-curse and prints out
19
+ # comments given a comment url.
20
+ # It the comment url is given it determines the host from the URL.
21
+ #
22
+ # Two formats are provided:
23
+ # - line : each item is in a separate line, which can be used for further processing
24
+ # - compact : some fields clubbed together on a line, to make viewing easier
25
+ # One may have the output save to a YML file using '-y' and further use that by loading it into a hash.
26
+ #
27
+ # In case, the comments page is saved to disk, you may provide the file name, but then you must give
28
+ # the host name also, so we know which parser to use.
29
+ #
30
+ require 'hacker/curse/hackernewsparser.rb'
31
+ require 'hacker/curse/redditnewsparser.rb'
32
+
33
+ def format_line article
34
+ puts "# #{article.title}"
35
+ puts " "
36
+ puts article.article_url
37
+ puts "By: #{article.submitter}"
38
+ puts "Points: #{article.points}"
39
+ puts "Age: #{article.age_text}"
40
+ puts "Comments: #{article.comment_count}"
41
+ comments = article.comments
42
+ unless comments
43
+ $stderr.puts "No comments found for #{url} "
44
+ exit(2)
45
+ end
46
+ puts " "
47
+ comments.each_with_index do |e, i|
48
+ ctr = i+1
49
+ puts "## : #{ctr}"
50
+ # #{e.head} "
51
+ #puts " #{e.age_text} | #{e.age} | #{e.points} | #{e.submitter} | #{e.submitter_url} "
52
+ puts "By: #{e.submitter} (#{e.submitter_url}) "
53
+ puts "Age: #{e.age_text}"
54
+ puts "Seconds: #{e.age} "
55
+ puts "Points: #{e.points} " if e.points and e.points != ""
56
+ puts "Text:"
57
+ puts e.comment_text
58
+ puts " "
59
+ end
60
+ end
61
+ def format_compact article
62
+ puts "# #{HEADER_START} #{article.title}#{HEADER_END}"
63
+ puts " "
64
+ puts "(#{ULINE}#{article.article_url}#{CLEAR}) "
65
+ puts "#{article.points} | #{BOLD} by #{article.submitter} #{BOLDOFF} | #{article.age_text} | #{article.comment_count} "
66
+ comments = article.comments
67
+ unless comments
68
+ $stderr.puts "No comments found for #{url} "
69
+ exit(2)
70
+ end
71
+ puts " "
72
+ comments.each_with_index do |e, i|
73
+ ctr = i+1
74
+ puts "## :#{HEADER_START} #{ctr} #{HEADER_END}"
75
+ # #{e.head} "
76
+ #puts " #{e.age_text} | #{e.age} | #{e.points} | #{e.submitter} | #{e.submitter_url} "
77
+ print "#{BOLD} #{e.submitter} #{BOLDOFF} | #{e.age_text} ago"
78
+ print "| #{e.points} points " if e.points and e.points != ""
79
+ print "\n"
80
+ puts e.comment_text
81
+ puts " "
82
+ end
83
+ end
84
+ CLEAR = "\e[0m"
85
+ COLOR_BOLD = "\e[1m"
86
+ COLOR_BOLD_OFF = "\e[22m"
87
+ RED = "\e[31m"
88
+ ON_RED = "\e[41m"
89
+ GREEN = "\e[32m"
90
+ YELLOW = "\e[33m"
91
+ BLUE = "\e[1;34m"
92
+
93
+ ON_BLUE = "\e[44m"
94
+ REVERSE = "\e[7m"
95
+ UNDERLINE = "\e[4m"
96
+ if $stdout.tty?
97
+ BOLD=COLOR_BOLD
98
+ BOLDOFF=COLOR_BOLD_OFF
99
+ HEADER_START=ON_BLUE
100
+ HEADER_END=CLEAR
101
+ ULINE=UNDERLINE
102
+ else
103
+ BOLD="**"
104
+ BOLDOFF="**"
105
+ HEADER_START=""
106
+ HEADER_END=""
107
+ ULINE=""
108
+ end
109
+
110
+ url = nil
111
+ host = nil
112
+ format = "line"
113
+ ymlfile = nil
114
+ # http://www.ruby-doc.org/stdlib/libdoc/optparse/rdoc/classes/OptionParser.html
115
+ require 'optparse'
116
+ options = {}
117
+ OptionParser.new do |opts|
118
+ opts.banner = "Usage: #{$0} [options]"
119
+
120
+ opts.on("-d SEP", String,"--delimiter", "Delimit columns with SEP") do |v|
121
+ options[:delimiter] = v
122
+ end
123
+ opts.on("-H HOST", String,"--hostname", "hostname rn/hn") do |v|
124
+ # this is only required if you pass in a saved file, so we need to know which parser to use
125
+ host = v
126
+ end
127
+ #opts.on("-H (reddit|hn)", String,"--hostname", "Get articles from HOST") do |v|
128
+ #host = v
129
+ #end
130
+ opts.on("-f FORMAT", String,"--format", "write in format: compact, line") do |v|
131
+ format = v
132
+ end
133
+ opts.on("-w PATH", String,"--save-html-path", "Save html to file PATH") do |v|
134
+ options[:htmloutfile] = v
135
+ options[:save_html] = true
136
+ end
137
+ opts.on("-y PATH", String,"--save-yml-path", "Save yml to file PATH") do |v|
138
+ ymlfile = v
139
+ end
140
+ end.parse!
141
+
142
+ #p options
143
+ #p ARGV
144
+
145
+ url=ARGV[0];
146
+ unless url
147
+ $stderr.puts "URL of comment expected"
148
+ exit(1)
149
+ end
150
+ # this is only required if you pass in a saved file, so we need to know which parser to use
151
+ if host
152
+ case host
153
+ when "hn"
154
+ hn = HackerNewsParser.new options
155
+ when "rn"
156
+ hn = RedditNewsParser.new options
157
+ end
158
+ end
159
+
160
+ unless hn
161
+ if url.index("reddit.com")
162
+ hn = RedditNewsParser.new options
163
+ elsif url.index("ycombinator.com")
164
+ hn = HackerNewsParser.new options
165
+ else
166
+ $stderr.puts "Unknown host. Expecting reddit.com or ycombinator.com"
167
+ exit(1)
168
+ end
169
+ end
170
+ if ymlfile
171
+ hn.save_comments_as_yml ymlfile, url
172
+ exit
173
+ end
174
+ #comments = hn._retrieve_comments url
175
+ article = hn._retrieve_comments url
176
+ #hn.to_yml "comments.yml", article.hash
177
+ case format
178
+ when "compact"
179
+ format_compact article
180
+ else
181
+ format_line article
182
+ end
data/bin/hacker-tsv.rb ADDED
@@ -0,0 +1,144 @@
1
+ # ----------------------------------------------------------------------------- #
2
+ # File: hacker-curse.rb
3
+ # Description: view hacker news on terminal
4
+ # Author: j kepler http://github.com/mare-imbrium/canis/
5
+ # Date: 2014-07-16 - 13:10
6
+ # License: MIT
7
+ # Last update: 2014-08-03 20:17
8
+ # ----------------------------------------------------------------------------- #
9
+ # hacker-curse.rb Copyright (C) 2012-2014 j kepler
10
+ #!/usr/bin/env ruby
11
+
12
+ require 'hacker/curse/hackernewsparser.rb'
13
+ require 'hacker/curse/redditnewsparser.rb'
14
+
15
+ if true
16
+ begin
17
+ url = nil
18
+ host = nil
19
+ # http://www.ruby-doc.org/stdlib/libdoc/optparse/rdoc/classes/OptionParser.html
20
+ require 'optparse'
21
+ options = {}
22
+ options[:num_pages] = 1
23
+ OptionParser.new do |opts|
24
+ opts.banner = %Q{
25
+ Usage: #{$0} [options]
26
+ Outputs stories from Hacker News front page or Reddit.com as tab separated values
27
+
28
+ Examples:
29
+
30
+ Retrieves two pages of stories from Hacker News and save the retrieved HTML file
31
+ and redirect output to a file.
32
+
33
+ hacker-tsv.rb -H hn -p 2 -s news -w news.html > news.tsv
34
+
35
+ Retrieves one page of articles from reddit.com/r/ruby and save output in a file.
36
+
37
+ hacker-tsv.rb -H rn -s ruby > ruby.tsv
38
+ }
39
+
40
+ opts.separator ""
41
+ opts.separator "Common Options:"
42
+
43
+ opts.on("-s subforum", String,"--subforum", "Get articles from subforum such as newest") do |v|
44
+ options[:subforum] = v
45
+ end
46
+ opts.on("-H (rn|hn)", String,"--hostname", "Get articles from HOST") do |v|
47
+ host = v
48
+ end
49
+ opts.on("-p N", Integer,"--pages", "Retrieve N number of pages") do |v|
50
+ options[:num_pages] = v
51
+ end
52
+ opts.separator ""
53
+ opts.separator "Specific Options:"
54
+ opts.on("-n N", "--limit", Integer, "limit to N stories") do |v|
55
+ options[:number] = v
56
+ end
57
+ opts.on("-t", "print only titles") do |v|
58
+ options[:titles] = true
59
+ end
60
+ opts.on("-d SEP", String,"--delimiter", "Delimit columns with SEP") do |v|
61
+ options[:delimiter] = v
62
+ end
63
+ opts.on("-u URL", String,"--url", "Get articles from URL/file") do |v|
64
+ options[:url] = v
65
+ end
66
+ opts.on("-w PATH", String,"--save-html-path", "Save html to file PATH") do |v|
67
+ options[:htmloutfile] = v
68
+ options[:save_html] = true
69
+ end
70
+ opts.on("-v", "--[no-]verbose", "Print description also") do |v|
71
+ options[:verbose] = v
72
+ end
73
+ end.parse!
74
+
75
+ #p options
76
+ #p ARGV
77
+
78
+ #filename=ARGV[0];
79
+ #url ||= "https://news.ycombinator.com/news"
80
+ hn = nil
81
+ case host
82
+ when "reddit", "rn"
83
+ hn = RedditNewsParser.new options
84
+ else
85
+ hn = HackerNewsParser.new options
86
+ end
87
+
88
+ arr = hn.get_next_page
89
+ if arr.articles.nil? or arr.articles.empty?
90
+ $stderr.puts "No articles"
91
+ exit
92
+ end
93
+ # arr is ForumPage, arr.first is ForumAricle
94
+ titles_only = options[:titles]
95
+ sep = options[:delimiter] || "\t"
96
+ limit = options[:number] || arr.count
97
+ headings = %w[ title age_text comment_count points article_url comments_url age submitter submitter_url ]
98
+ arr.first.keys.each do |k|
99
+ unless headings.include? k.to_s
100
+ headings << k.to_s
101
+ end
102
+ end
103
+ headings.delete("byline")
104
+ headings << "byline"
105
+ # this yields a ForumArticle not a hash.
106
+ arr.each_with_index do |e, i|
107
+ break if i >= limit
108
+ h = e.hash
109
+ if titles_only
110
+ puts "#{e[:title]}"
111
+ else
112
+ unless options[:verbose]
113
+ #e.delete(:description)
114
+ end
115
+ if i == 0
116
+ #s = e.keys.join(sep)
117
+ s = headings.join(sep)
118
+ puts s
119
+ end
120
+ # if missing value then we get one column missing !!! FIXME
121
+ s = ""
122
+ # insert into s in the right order, so all outputs are standard in terms of columns
123
+ headings.each do |h|
124
+ s << "#{e[h.to_sym]}#{sep}"
125
+ end
126
+ #s = e.values.join(sep)
127
+ puts s
128
+ #puts "#{e[:title]}#{sep}#{e[:url]}#{sep}#{e[:comments_url]}"
129
+ end
130
+ end
131
+ #puts " testing block "
132
+ #klass.run do | t,u,c|
133
+ #puts t
134
+ #end
135
+ ensure
136
+ end
137
+ end
138
+ exit
139
+ hn = HackerNewsParser.new
140
+ page = hn.get_next_page
141
+ sep = "\t"
142
+ page.each do |art|
143
+ puts "#{art.title}#{sep}#{art.points}#{sep}#{art.age_text}"
144
+ end
data/bin/hacker-yml.rb ADDED
@@ -0,0 +1,100 @@
1
+ # ----------------------------------------------------------------------------- #
2
+ # File: hacker-yml.rb
3
+ # Description: saves hacker or reddit output as a YML file
4
+ # Author: j kepler http://github.com/mare-imbrium/canis/
5
+ # Date: 2014-08-05 - 01:08
6
+ # License: MIT
7
+ # Last update: 2014-08-05 13:21
8
+ # ----------------------------------------------------------------------------- #
9
+ # hacker-yml.rb Copyright (C) 2012-2014 j kepler
10
+ #!/usr/bin/env ruby
11
+
12
+ require 'hacker/curse/hackernewsparser.rb'
13
+ require 'hacker/curse/redditnewsparser.rb'
14
+
15
+ if true
16
+ begin
17
+ url = nil
18
+ host = nil
19
+ outputfile = nil
20
+ # http://www.ruby-doc.org/stdlib/libdoc/optparse/rdoc/classes/OptionParser.html
21
+ require 'optparse'
22
+ options = {}
23
+ options[:num_pages] = 1
24
+ OptionParser.new do |opts|
25
+ opts.banner = %Q{
26
+ Usage: #{$0} [options]
27
+ Outputs stories from Hacker News front page or Reddit.com to a YML file
28
+
29
+ Examples:
30
+
31
+
32
+ Retrieves one page of articles from reddit.com/r/ruby and save yml output in a file (default is
33
+ <subforum>.yml).
34
+
35
+ hacker-yml.rb -H rn -s ruby
36
+
37
+ hacker-yml.rb -H rn -s ruby -y ~/tmp/ruby.yml
38
+
39
+ Retrieves two pages of stories from Hacker News and save the retrieved HTML file to news.html,
40
+ and redirect YML output to news.yml (default).
41
+
42
+ hacker-yml.rb -H hn -p 2 -s news -w news.html
43
+ }
44
+
45
+ opts.separator ""
46
+ opts.separator "Common Options:"
47
+
48
+ opts.on("-s subforum", String,"--subforum", "Get articles from subforum such as newest") do |v|
49
+ options[:subforum] = v
50
+ end
51
+ opts.on("-H (rn|hn)", String,"--hostname", "Get articles from HOST") do |v|
52
+ host = v
53
+ end
54
+ opts.on("-p N", Integer,"--pages", "Retrieve N number of pages") do |v|
55
+ options[:num_pages] = v
56
+ end
57
+ opts.separator ""
58
+ opts.separator "Specific Options:"
59
+ opts.on("-n N", "--limit", Integer, "limit to N stories") do |v|
60
+ options[:number] = v
61
+ end
62
+ opts.on("-t", "print only titles") do |v|
63
+ options[:titles] = true
64
+ end
65
+ opts.on("-d SEP", String,"--delimiter", "Delimit columns with SEP") do |v|
66
+ options[:delimiter] = v
67
+ end
68
+ opts.on("-u URL", String,"--url", "Get articles from URL/file") do |v|
69
+ options[:url] = v
70
+ end
71
+ opts.on("-w PATH", String,"--save-html-path", "Save html to file PATH") do |v|
72
+ options[:htmloutfile] = v
73
+ options[:save_html] = true
74
+ end
75
+ opts.on("-y PATH", String,"--save-yml-path", "Save YML to file PATH") do |v|
76
+ outputfile = v
77
+ end
78
+ opts.on("-v", "--[no-]verbose", "Print description also") do |v|
79
+ options[:verbose] = v
80
+ end
81
+ end.parse!
82
+
83
+ hn = nil
84
+ case host
85
+ when "reddit", "rn"
86
+ hn = RedditNewsParser.new options
87
+ else
88
+ hn = HackerNewsParser.new options
89
+ end
90
+
91
+ outputfile ||= options[:subforum].gsub("/","__") + ".yml"
92
+ arr = hn.get_next_page
93
+ hn.save_page_as_yml outputfile, arr
94
+ $stderr.puts "Saved to #{outputfile} "
95
+ if arr.articles.nil? or arr.articles.empty?
96
+ $stderr.puts "No articles"
97
+ end
98
+ ensure
99
+ end
100
+ end
data/bin/hacker.rb ADDED
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env ruby -w
2
+ #
3
+ # This is just a wrapper over hacker-tsv.rb
4
+ # If called with news or newest it calls hacker news, otherwise by default it will call
5
+ # reddit.com for other args. Use -H to specify host if it is hacker news.
6
+ # It also puts the output in a TSV file.
7
+ # Currently, corvus is calling this.
8
+
9
+ if true
10
+ begin
11
+ pages = 1
12
+ outputfile = nil
13
+ hostname = nil
14
+ # http://www.ruby-doc.org/stdlib/libdoc/optparse/rdoc/classes/OptionParser.html
15
+ require 'optparse'
16
+ #options = {}
17
+ prog = File.basename $0
18
+ OptionParser.new do |opts|
19
+ opts.banner = %Q{
20
+ Usage: #{prog} [options] subforum
21
+ Examples:
22
+ #{prog} --pages 2 news
23
+ #{prog} programming
24
+ #{prog} programming/new
25
+
26
+ subforum can be news / newest (Hacker News)
27
+ or any subforum from reddit such as programming, ruby, vim, zsh, commandline, etc.
28
+
29
+ This program is a wrapper over hacker-tsv.rb and writes the output into a tab separated file
30
+ of the same name as the subforum, with a ".tsv" extension, such as news.tsv or ruby.tsv.
31
+ }
32
+
33
+ opts.on("-H HN", String, "--hostname", "hostname [hn|rn]") do |v|
34
+ hostname = v
35
+ end
36
+ opts.on("-p pages", Integer, "--pages", "pages to retrieve ") do |v|
37
+ pages = v
38
+ end
39
+ opts.on("-o outputfile", String, "--outputfile", "name of TSV file to create ") do |v|
40
+ outputfile = v
41
+ end
42
+ end.parse!
43
+
44
+ subr=ARGV[0] || "news"
45
+ subr2 = subr.gsub("/", "__")
46
+ outputfile ||= "#{subr2}.tsv"
47
+ outputhtml ||= "#{subr2}.html"
48
+
49
+ puts "subreddit is: #{subr} "
50
+ exec_str = nil
51
+ case subr
52
+ when "news", "newest", "ask", "jobs", "show"
53
+ exec_str = "hacker-tsv.rb -H hn -p #{pages} -s #{subr} -w #{outputhtml} > #{outputfile}"
54
+ else
55
+ hostname ||= "rn"
56
+ exec_str = "hacker-tsv.rb -H #{hostname} -p #{pages} -s #{subr} -w #{outputhtml} > #{outputfile}"
57
+ #hacker-tsv.rb -H "$hostname" -p $pages -s "$subr" -w $outputhtml > $outputfile
58
+ end
59
+ ret = system( exec_str )
60
+ status = $?
61
+ unless ret
62
+ $stderr.puts exec_str
63
+ $stderr.puts "hacker-tsv returned with error/s #{ret}, #{status}"
64
+ exit(status)
65
+ end
66
+ ensure
67
+ end
68
+ end