RubyGems - hacker-curse - Versions diffs - 0.0.2 - Mend

hacker-curse 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +7 -0
data/.gitignore +37 -0
data/Gemfile +4 -0
data/LICENSE +21 -0
data/README.md +89 -0
data/Rakefile +2 -0
data/bin/corvus +2320 -0
data/bin/hacker-comments.rb +182 -0
data/bin/hacker-tsv.rb +144 -0
data/bin/hacker-yml.rb +100 -0
data/bin/hacker.rb +68 -0
data/bin/hacker.sh +90 -0
data/bin/redford +946 -0
data/hacker-curse.gemspec +24 -0
data/lib/hacker/curse.rb +7 -0
data/lib/hacker/curse/abstractsiteparser.rb +353 -0
data/lib/hacker/curse/hackernewsparser.rb +226 -0
data/lib/hacker/curse/redditnewsparser.rb +241 -0
data/lib/hacker/curse/version.rb +5 -0
data/redford.yml +68 -0
metadata +112 -0

data/bin/hacker-comments.rb ADDED Viewed

@@ -0,0 +1,182 @@
+#!/usr/bin/env ruby
+# ----------------------------------------------------------------------------- #
+#         File: hacker-comments.rb
+#  Description: view comments on terminal or save to file and view
+#       Author: j kepler  http://github.com/mare-imbrium/canis/
+#         Date: 2014-07-16 - 13:10
+#      License: MIT
+#  Last update: 2014-07-30 01:19
+# ----------------------------------------------------------------------------- #
+#  hacker-comments.rb  Copyright (C) 2012-2014 j kepler
+## NOTE: This uses a comments page from ycombinator.com and from the reddit MOBILE page.
+#  If you give a comment url from the normal reddit.com page, it will NOT work as all.
+#
+#  The comment URL is what is given out by the hacker-tsv.rb program, and can be taken
+#   from reddit.com/programming/.mobile
+#
+# This is a sample front-end to the hacker-curse and prints out
+#  comments given a comment url.
+#  It the comment url is given it determines the host from the URL.
+#
+# Two formats are provided:
+#   - line : each item is in a separate line, which can be used for further processing
+#   - compact : some fields clubbed together on a line, to make viewing easier
+# One may have the output save to a YML file using '-y' and further use that by loading it into a hash.
+#
+# In case, the comments page is saved to disk, you may provide the file name, but then you must give
+#  the host name also, so we know which parser to use.
+#
+require 'hacker/curse/hackernewsparser.rb'
+require 'hacker/curse/redditnewsparser.rb'
+def format_line article
+  puts "# #{article.title}"
+  puts " "
+  puts article.article_url
+  puts "By:         #{article.submitter}"
+  puts "Points:     #{article.points}"
+  puts "Age:        #{article.age_text}"
+  puts "Comments:   #{article.comment_count}"
+  comments = article.comments
+  unless comments
+    $stderr.puts "No comments found for #{url} "
+    exit(2)
+  end
+  puts " "
+  comments.each_with_index do |e, i|
+    ctr = i+1
+    puts "## : #{ctr}"
+    # #{e.head} "
+    #puts " #{e.age_text} | #{e.age} | #{e.points} | #{e.submitter} | #{e.submitter_url} "
+    puts "By:        #{e.submitter} (#{e.submitter_url}) "
+    puts "Age:       #{e.age_text}"
+    puts "Seconds:   #{e.age} "
+    puts "Points:    #{e.points} " if e.points and e.points != ""
+    puts "Text:"
+    puts e.comment_text
+    puts " "
+  end
+end
+def format_compact article
+  puts "# #{HEADER_START} #{article.title}#{HEADER_END}"
+  puts " "
+  puts "(#{ULINE}#{article.article_url}#{CLEAR}) "
+  puts "#{article.points} | #{BOLD} by #{article.submitter} #{BOLDOFF} | #{article.age_text} | #{article.comment_count} "
+  comments = article.comments
+  unless comments
+    $stderr.puts "No comments found for #{url} "
+    exit(2)
+  end
+  puts " "
+  comments.each_with_index do |e, i|
+    ctr = i+1
+    puts "## :#{HEADER_START} #{ctr} #{HEADER_END}"
+    # #{e.head} "
+    #puts " #{e.age_text} | #{e.age} | #{e.points} | #{e.submitter} | #{e.submitter_url} "
+    print "#{BOLD} #{e.submitter} #{BOLDOFF} | #{e.age_text} ago"
+    print "| #{e.points} points " if e.points and e.points != ""
+    print "\n"
+    puts e.comment_text
+    puts " "
+  end
+end
+CLEAR      = "\e[0m"
+COLOR_BOLD       = "\e[1m"
+COLOR_BOLD_OFF       = "\e[22m"
+RED        = "\e[31m"
+ON_RED        = "\e[41m"
+GREEN      = "\e[32m"
+YELLOW     = "\e[33m"
+BLUE       = "\e[1;34m"
+ON_BLUE    = "\e[44m"
+REVERSE    = "\e[7m"
+UNDERLINE    = "\e[4m"
+if $stdout.tty?
+  BOLD=COLOR_BOLD
+  BOLDOFF=COLOR_BOLD_OFF
+  HEADER_START=ON_BLUE
+  HEADER_END=CLEAR
+  ULINE=UNDERLINE
+else
+  BOLD="**"
+  BOLDOFF="**"
+  HEADER_START=""
+  HEADER_END=""
+  ULINE=""
+end
+url = nil
+host = nil
+format = "line"
+ymlfile = nil
+# http://www.ruby-doc.org/stdlib/libdoc/optparse/rdoc/classes/OptionParser.html
+require 'optparse'
+options = {}
+OptionParser.new do |opts|
+  opts.banner = "Usage: #{$0} [options]"
+  opts.on("-d SEP", String,"--delimiter", "Delimit columns with SEP") do |v|
+    options[:delimiter] = v
+  end
+  opts.on("-H HOST", String,"--hostname", "hostname rn/hn") do |v|
+    # this is only required if you pass in a saved file, so we need to know which parser to use
+    host = v
+  end
+  #opts.on("-H (reddit|hn)", String,"--hostname", "Get articles from HOST") do |v|
+    #host = v
+  #end
+  opts.on("-f FORMAT", String,"--format", "write in format: compact, line") do |v|
+    format = v
+  end
+  opts.on("-w PATH", String,"--save-html-path", "Save html to file PATH") do |v|
+    options[:htmloutfile] = v
+    options[:save_html] = true
+  end
+  opts.on("-y PATH", String,"--save-yml-path", "Save yml to file PATH") do |v|
+    ymlfile = v
+  end
+end.parse!
+#p options
+#p ARGV
+url=ARGV[0];
+unless url
+  $stderr.puts "URL of comment expected"
+  exit(1)
+end
+# this is only required if you pass in a saved file, so we need to know which parser to use
+if host
+  case host
+  when "hn"
+    hn = HackerNewsParser.new options
+  when "rn"
+    hn = RedditNewsParser.new options
+  end
+end
+unless hn
+  if url.index("reddit.com")
+    hn = RedditNewsParser.new options
+  elsif url.index("ycombinator.com")
+    hn = HackerNewsParser.new options
+  else
+    $stderr.puts "Unknown host. Expecting reddit.com or ycombinator.com"
+    exit(1)
+  end
+end
+if ymlfile
+  hn.save_comments_as_yml ymlfile, url
+  exit
+end
+#comments = hn._retrieve_comments url
+article = hn._retrieve_comments url
+#hn.to_yml "comments.yml", article.hash
+case format
+when "compact"
+  format_compact article
+else
+  format_line article
+end

data/bin/hacker-tsv.rb ADDED Viewed

@@ -0,0 +1,144 @@
+# ----------------------------------------------------------------------------- #
+#         File: hacker-curse.rb
+#  Description: view hacker news on terminal
+#       Author: j kepler  http://github.com/mare-imbrium/canis/
+#         Date: 2014-07-16 - 13:10
+#      License: MIT
+#  Last update: 2014-08-03 20:17
+# ----------------------------------------------------------------------------- #
+#  hacker-curse.rb  Copyright (C) 2012-2014 j kepler
+#!/usr/bin/env ruby
+require 'hacker/curse/hackernewsparser.rb'
+require 'hacker/curse/redditnewsparser.rb'
+if true
+  begin
+    url = nil
+    host = nil
+    # http://www.ruby-doc.org/stdlib/libdoc/optparse/rdoc/classes/OptionParser.html
+    require 'optparse'
+    options = {}
+    options[:num_pages] = 1
+    OptionParser.new do |opts|
+      opts.banner = %Q{
+ Usage: #{$0} [options]
+ Outputs stories from Hacker News front page or Reddit.com as tab separated values
+ Examples:
+ Retrieves two pages of stories from Hacker News and save the retrieved HTML file
+ and redirect output to a file.
+     hacker-tsv.rb -H hn -p 2 -s news -w news.html > news.tsv
+ Retrieves one page of articles from reddit.com/r/ruby and save output in a file.
+     hacker-tsv.rb -H rn -s ruby > ruby.tsv
+    }
+      opts.separator ""
+      opts.separator "Common Options:"
+      opts.on("-s subforum", String,"--subforum", "Get articles from subforum such as newest") do |v|
+        options[:subforum] = v
+      end
+      opts.on("-H (rn|hn)", String,"--hostname", "Get articles from HOST") do |v|
+        host = v
+      end
+      opts.on("-p N", Integer,"--pages", "Retrieve N number of pages") do |v|
+        options[:num_pages] = v
+      end
+      opts.separator ""
+      opts.separator "Specific Options:"
+      opts.on("-n N", "--limit", Integer, "limit to N stories") do |v|
+        options[:number] = v
+      end
+      opts.on("-t", "print only titles") do |v|
+        options[:titles] = true
+      end
+      opts.on("-d SEP", String,"--delimiter", "Delimit columns with SEP") do |v|
+        options[:delimiter] = v
+      end
+      opts.on("-u URL", String,"--url", "Get articles from URL/file") do |v|
+        options[:url] = v
+      end
+      opts.on("-w PATH", String,"--save-html-path", "Save html to file PATH") do |v|
+        options[:htmloutfile] = v
+        options[:save_html] = true
+      end
+      opts.on("-v", "--[no-]verbose", "Print description also") do |v|
+        options[:verbose] = v
+      end
+    end.parse!
+    #p options
+    #p ARGV
+    #filename=ARGV[0];
+    #url ||= "https://news.ycombinator.com/news"
+    hn = nil
+    case host
+    when "reddit", "rn"
+      hn = RedditNewsParser.new options
+    else
+      hn = HackerNewsParser.new options
+    end
+    arr = hn.get_next_page
+    if arr.articles.nil? or arr.articles.empty?
+      $stderr.puts "No articles"
+      exit
+    end
+    # arr is ForumPage, arr.first is ForumAricle
+    titles_only = options[:titles]
+    sep = options[:delimiter] || "\t"
+    limit = options[:number] || arr.count
+    headings = %w[ title age_text comment_count points article_url comments_url age submitter submitter_url ]
+    arr.first.keys.each do |k|
+      unless headings.include? k.to_s
+        headings << k.to_s
+      end
+    end
+    headings.delete("byline")
+    headings << "byline"
+    # this yields a ForumArticle not a hash.
+    arr.each_with_index do |e, i|
+      break if i >= limit
+      h = e.hash
+      if titles_only
+        puts "#{e[:title]}"
+      else
+        unless options[:verbose]
+          #e.delete(:description)
+        end
+        if i == 0
+          #s = e.keys.join(sep)
+          s = headings.join(sep)
+          puts s
+        end
+        # if missing value then we get one column missing !!! FIXME
+        s = ""
+        # insert into s in the right order, so all outputs are standard in terms of columns
+        headings.each do |h|
+          s << "#{e[h.to_sym]}#{sep}"
+        end
+        #s = e.values.join(sep)
+        puts s
+        #puts "#{e[:title]}#{sep}#{e[:url]}#{sep}#{e[:comments_url]}"
+      end
+    end
+    #puts " testing block "
+    #klass.run do | t,u,c|
+      #puts t
+    #end
+  ensure
+  end
+end
+exit
+hn = HackerNewsParser.new
+page = hn.get_next_page
+sep = "\t"
+page.each do |art|
+  puts "#{art.title}#{sep}#{art.points}#{sep}#{art.age_text}"
+end

data/bin/hacker-yml.rb ADDED Viewed

@@ -0,0 +1,100 @@
+# ----------------------------------------------------------------------------- #
+#         File: hacker-yml.rb
+#  Description: saves hacker or reddit output as a YML file
+#       Author: j kepler  http://github.com/mare-imbrium/canis/
+#         Date: 2014-08-05 - 01:08
+#      License: MIT
+#  Last update: 2014-08-05 13:21
+# ----------------------------------------------------------------------------- #
+#  hacker-yml.rb  Copyright (C) 2012-2014 j kepler
+#!/usr/bin/env ruby
+require 'hacker/curse/hackernewsparser.rb'
+require 'hacker/curse/redditnewsparser.rb'
+if true
+  begin
+    url = nil
+    host = nil
+    outputfile = nil
+    # http://www.ruby-doc.org/stdlib/libdoc/optparse/rdoc/classes/OptionParser.html
+    require 'optparse'
+    options = {}
+    options[:num_pages] = 1
+    OptionParser.new do |opts|
+      opts.banner = %Q{
+ Usage: #{$0} [options]
+ Outputs stories from Hacker News front page or Reddit.com to a YML file
+ Examples:
+ Retrieves one page of articles from reddit.com/r/ruby and save yml output in a file (default is
+ <subforum>.yml).
+     hacker-yml.rb -H rn -s ruby
+     hacker-yml.rb -H rn -s ruby -y ~/tmp/ruby.yml
+ Retrieves two pages of stories from Hacker News and save the retrieved HTML file to news.html,
+ and redirect YML output to news.yml (default).
+     hacker-yml.rb -H hn -p 2 -s news -w news.html
+    }
+      opts.separator ""
+      opts.separator "Common Options:"
+      opts.on("-s subforum", String,"--subforum", "Get articles from subforum such as newest") do |v|
+        options[:subforum] = v
+      end
+      opts.on("-H (rn|hn)", String,"--hostname", "Get articles from HOST") do |v|
+        host = v
+      end
+      opts.on("-p N", Integer,"--pages", "Retrieve N number of pages") do |v|
+        options[:num_pages] = v
+      end
+      opts.separator ""
+      opts.separator "Specific Options:"
+      opts.on("-n N", "--limit", Integer, "limit to N stories") do |v|
+        options[:number] = v
+      end
+      opts.on("-t", "print only titles") do |v|
+        options[:titles] = true
+      end
+      opts.on("-d SEP", String,"--delimiter", "Delimit columns with SEP") do |v|
+        options[:delimiter] = v
+      end
+      opts.on("-u URL", String,"--url", "Get articles from URL/file") do |v|
+        options[:url] = v
+      end
+      opts.on("-w PATH", String,"--save-html-path", "Save html to file PATH") do |v|
+        options[:htmloutfile] = v
+        options[:save_html] = true
+      end
+      opts.on("-y PATH", String,"--save-yml-path", "Save YML to file PATH") do |v|
+        outputfile = v
+      end
+      opts.on("-v", "--[no-]verbose", "Print description also") do |v|
+        options[:verbose] = v
+      end
+    end.parse!
+    hn = nil
+    case host
+    when "reddit", "rn"
+      hn = RedditNewsParser.new options
+    else
+      hn = HackerNewsParser.new options
+    end
+    outputfile ||= options[:subforum].gsub("/","__") + ".yml"
+    arr = hn.get_next_page
+    hn.save_page_as_yml outputfile, arr
+    $stderr.puts "Saved to #{outputfile} "
+    if arr.articles.nil? or arr.articles.empty?
+      $stderr.puts "No articles"
+    end
+  ensure
+  end
+end

data/bin/hacker.rb ADDED Viewed

@@ -0,0 +1,68 @@
+#!/usr/bin/env ruby -w
+#
+# This is just a wrapper over hacker-tsv.rb
+# If called with news or newest it calls hacker news, otherwise by default it will call
+#  reddit.com for other args. Use -H to specify host if it is hacker news.
+# It also puts the output in a TSV file.
+# Currently, corvus is calling this.
+if true
+  begin
+    pages = 1
+    outputfile = nil
+    hostname = nil
+    # http://www.ruby-doc.org/stdlib/libdoc/optparse/rdoc/classes/OptionParser.html
+    require 'optparse'
+    #options = {}
+    prog = File.basename $0
+    OptionParser.new do |opts|
+      opts.banner  = %Q{
+      Usage: #{prog} [options] subforum
+      Examples:
+             #{prog} --pages 2 news
+             #{prog} programming
+             #{prog} programming/new
+        subforum can be news / newest (Hacker News)
+        or any subforum from reddit such as programming, ruby, vim, zsh, commandline, etc.
+        This program is a wrapper over hacker-tsv.rb and writes the output into a tab separated file
+        of the same name as the subforum, with a ".tsv" extension, such as news.tsv or ruby.tsv.
+        }
+      opts.on("-H HN", String, "--hostname", "hostname [hn|rn]") do |v|
+        hostname = v
+      end
+      opts.on("-p pages", Integer, "--pages", "pages to retrieve ") do |v|
+        pages = v
+      end
+      opts.on("-o outputfile", String, "--outputfile", "name of TSV file to create ") do |v|
+        outputfile = v
+      end
+    end.parse!
+    subr=ARGV[0] || "news"
+    subr2 = subr.gsub("/", "__")
+    outputfile ||= "#{subr2}.tsv"
+    outputhtml ||= "#{subr2}.html"
+    puts "subreddit is: #{subr} "
+    exec_str = nil
+    case subr
+    when "news", "newest", "ask", "jobs", "show"
+      exec_str = "hacker-tsv.rb -H hn -p #{pages} -s #{subr} -w #{outputhtml} > #{outputfile}"
+    else
+      hostname ||= "rn"
+      exec_str = "hacker-tsv.rb -H #{hostname} -p #{pages} -s #{subr} -w #{outputhtml} > #{outputfile}"
+      #hacker-tsv.rb -H "$hostname" -p $pages -s "$subr" -w $outputhtml > $outputfile
+    end
+    ret = system( exec_str )
+    status = $?
+    unless ret
+      $stderr.puts exec_str
+      $stderr.puts "hacker-tsv returned with error/s #{ret}, #{status}"
+      exit(status)
+    end
+  ensure
+  end
+end