RubyGems - hacker-curse - Versions diffs - 0.0.2 - Mend

hacker-curse 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +7 -0
data/.gitignore +37 -0
data/Gemfile +4 -0
data/LICENSE +21 -0
data/README.md +89 -0
data/Rakefile +2 -0
data/bin/corvus +2320 -0
data/bin/hacker-comments.rb +182 -0
data/bin/hacker-tsv.rb +144 -0
data/bin/hacker-yml.rb +100 -0
data/bin/hacker.rb +68 -0
data/bin/hacker.sh +90 -0
data/bin/redford +946 -0
data/hacker-curse.gemspec +24 -0
data/lib/hacker/curse.rb +7 -0
data/lib/hacker/curse/abstractsiteparser.rb +353 -0
data/lib/hacker/curse/hackernewsparser.rb +226 -0
data/lib/hacker/curse/redditnewsparser.rb +241 -0
data/lib/hacker/curse/version.rb +5 -0
data/redford.yml +68 -0
metadata +112 -0

data/hacker-curse.gemspec ADDED Viewed

@@ -0,0 +1,24 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'hacker/curse/version'
+Gem::Specification.new do |spec|
+  spec.name          = "hacker-curse"
+  spec.version       = Hacker::Curse::VERSION
+  spec.authors       = ["kepler"]
+  spec.email         = ["githubkepler.50s@gishpuppy.com"]
+  spec.summary       = %q{View hacker news and reddit articles on terminal using ncurses}
+  spec.description   = %q{View Hacker News and reddit articles on terminal using ncurses}
+  spec.homepage      = "https://github.com/mare-imbrium/hacker-curse"
+  spec.license       = "MIT"
+  spec.files         = `git ls-files -z`.split("\x0")
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ["lib"]
+  spec.add_development_dependency "bundler", "~> 1.6"
+  spec.add_development_dependency "rake", ">= 0.9.6"
+  spec.add_runtime_dependency "canis", ">= 0.0.3", ">= 0.0.3"
+end

data/lib/hacker/curse.rb ADDED Viewed

@@ -0,0 +1,7 @@
+require "hacker/curse/version"
+module Hacker
+  module Curse
+    # Your code goes here...
+  end
+end

data/lib/hacker/curse/abstractsiteparser.rb ADDED Viewed

@@ -0,0 +1,353 @@
+#!/usr/bin/env ruby -w
+#
+# Fetch hacker news front page entries into a hash.
+# TODO : get next page. Nexts is /news2 but after that it changes
+# TODO : 2014-07-27 - 12:42 put items in hash in an order, so printers can use first 4 cols for long listing
+#       title, age_text, comment_count, points, article_url, comments_url, age, submitter, submitter_url
+#
+require 'open-uri'
+require 'nokogiri'
+# this is from hacker news itself
+#file = "news.html"
+module HackerCurse
+  class ForumPage
+    include Enumerable
+    # new newest hot rising etc
+    attr_accessor :url
+    attr_accessor :next_url
+    attr_accessor :create_date
+    attr_accessor :subforum
+    # array of article objects
+    attr_accessor :articles
+    def each
+      @articles.each do |e| yield(e) ; end
+    end
+    alias :each_article :each
+    def merge_page page
+      self.next_url = page.next_url
+      self.articles.push(*page.articles)
+      self
+    end
+  end
+  class ForumArticle
+    attr_accessor :title
+    attr_accessor :article_url
+    attr_accessor :points
+    attr_accessor :comment_count
+    attr_accessor :comments_url
+    attr_accessor :age_text
+    attr_accessor :age
+    attr_accessor :submitter
+    attr_accessor :submitter_url
+    attr_accessor :domain
+    attr_accessor :domain_url
+    # byline is dump of text on top containing all the info on points, # of comments, nn hours aga
+    attr_accessor :byline
+    attr_accessor :parent
+    attr_writer :comments
+    attr_reader :hash
+    def initialize h
+      @comments = nil
+      @hash = h
+      [:title, :article_url, :points, :comment_count, :comments_url, :age_text, :age,
+       :submitter, :submitter_url, :domain, :domain_url, :byline].each do |sym|
+        instance_variable_set("@#{sym.to_s}", h[sym]) if h.key? sym
+      end
+      if h.key? :comments
+        c = h[:comments]
+        @comments = Array.new
+        c.each do |h|
+          fc = ForumComment.new h
+          @comments << fc
+        end
+      end
+    end
+    def comments
+      @comments || retrieve_comments(@comments_url)
+    end
+    def each
+      comments.each do |e| yield(e) ; end
+    end
+    def retrieve_comments url
+      raise "Parent must be set in order to retrieve comments " unless @parent
+      @parent._retrieve_comments url
+    end
+    alias :each_comment :each
+    def [](sym)
+      @hash[sym]
+    end
+    def keys
+      @hash.keys
+    end
+    def values
+      @hash.values
+    end
+  end
+  class ForumComment
+    attr_accessor :submitter, :submitter_url
+    attr_accessor :age, :age_text, :points, :head
+    attr_accessor :comment_text
+    attr_accessor :comment_url
+    attr_reader :hash
+    def initialize h
+      @hash = h
+    [:points, :comment_url, :age_text, :age,
+    :submitter, :submitter_url, :comment_text, :head].each do |sym|
+      instance_variable_set("@#{sym.to_s}", h[sym])
+    end
+    end
+    def [](sym)
+      @hash[sym]
+    end
+    def keys
+      @hash.keys
+    end
+    def values
+      @hash.values
+    end
+  end
+  #
+  # rn = RNParser.new [url]
+  # rn.subreddit = "ruby"
+  # resultset = rn.get_next_page :page => prevresultset, :number => 5
+  # resultset.each do |art|
+  #    art.title, art.points
+  #    art.comments
+  # end
+  #
+  # hn = HNewsParser @options
+  # hn.subxxx = "news" / "newest"
+  #
+  # redditnews.rb -s ruby --pages 2
+  # hackernews.rb -s newest --pages 2 -d '|'
+  #
+  class AbstractSiteParser
+    attr_reader :more_url
+    attr_accessor :host
+    attr_accessor :num_pages
+    attr_accessor :subforum
+    # should the html be saved
+    attr_accessor :save_html
+    attr_accessor :htmloutfile
+    #HOST = "https://news.ycombinator.com"
+    def initialize options={}
+      @options = options
+      @url = @options[:url]
+      @save_html = @options[:save_html]
+      @htmloutfile = @options[:htmloutfile]
+      @num_pages = @options[:num_pages] || 1
+      @more_url = nil
+      #puts "initialize: url is #{@url} "
+    end
+    def get_first_page
+      #@arr = to_hash @url
+      page = _retrieve_page @url
+    end
+    def get_next_page opts={}
+      page = opts[:page]
+      num_pages = opts[:num_pages] || @num_pages
+      num_pages ||= 1
+      u = @more_url || @url
+      if page
+        u = page.next_url
+      end
+      pages = nil
+      num_pages.times do |i|
+        page = _retrieve_page u
+        if pages.nil?
+          pages = page
+        else
+          pages.merge_page page
+        end
+        u = page.next_url
+        break unless u  # sometimes there is no next
+        @more_url = u
+      end
+      return pages
+    end
+    alias :get_next :get_next_page
+    def _retrieve_page url
+      raise "must be implemented by concrete class"
+    end
+    # write as yml, this doesn't work if multiple pages since we call x times
+    #  so previous is overwritten
+    #  This should be called with final class
+    def to_yml outfile, arr = @arr
+      require 'yaml'
+      # cannot just convert / to __ in filename since path gets converted too
+      #if outfile.index("/")
+        #outfile = outfile.gsub("/","__")
+      #end
+      File.open(outfile, 'w' ) do |f|
+        f << YAML::dump(arr)
+      end
+    end
+    # after called get_next_page, one may pass its return value
+    # to this method to convert it into an array of hashes and store it as a yml file
+    # It's a bit silly, first we break the hash down into this structure
+    #  and then deconstruct the whole thing.
+    def save_page_as_yml outputfile, page
+      h = {}
+      h[:url] = page.url
+      h[:next_url] = page.next_url
+      h[:subforum] = page.subforum
+      h[:create_date] = page.create_date
+      articles = []
+      page.each do |a| articles << a.hash; end
+      h[:articles] = articles
+      to_yml outputfile, h
+    end
+    # retrieves the comments for a url and stores in outputfile in YML format
+    def save_comments_as_yml outputfile, url
+      pages = _retrieve_comments url
+      if pages
+        to_yml outputfile, pages.hash
+      end
+    end
+    # returns nokogiri html doc and writes html is required.
+    def get_doc_for_url url
+      #puts "get_doc #{url} "
+      out = open(url)
+      doc  = Nokogiri::HTML(out)
+      if @save_html
+        subforum = @subforum || "unknown"
+        outfile = @htmloutfile || "#{subforum}.html"
+        #if !File.exists? url
+        out.rewind
+          File.open(outfile, 'w') {|f| f.write(out.read) }
+        #end
+      end
+      return doc
+    end
+    # this is a test method so we don't keep hitting HN while testing out and getting IP blocked.
+    def load_from_yml filename="hn.yml"
+      @arr = YAML::load( File.open( filename ) )
+      next_url = @arr.last[:article_url]
+      unless next_url.index("http")
+        next_url = @host + "/" + next_url
+      end
+      @more_url = next_url
+    end
+    def _retrieve_comments url
+      raise "Must be implemented by concrete class "
+    end
+    public
+    def get_comments_url index
+      arr = @arr
+      entry = arr[index]
+      if entry
+        if entry.key? :comments_url
+          return entry[:comments_url]
+        end
+      end
+      return nil
+    end
+    public
+    def get_comments index
+      url = get_comments_url index
+      if url
+        #puts url
+        comments = convert_comment_url url
+        return comments
+      #else
+        #puts "Sorry no url for #{index} "
+      end
+      return []
+    end
+    alias :get_comments_for_link :get_comments
+    def human_age_to_unix age_text
+      i = age_text.to_i
+      ff=1
+      if age_text.index("hour")
+        i *= ff*60*60
+      elsif age_text.index("second")
+        i *= ff
+      elsif age_text.index("minute")
+        i *= ff*60
+      elsif age_text.index("day")
+        i *= ff*60*60*24
+      elsif age_text.index("month")
+        i *= ff*60*60*24*30
+      elsif age_text.index("week")
+        i *= ff*60*60*24*7
+      elsif age_text.index("year")
+        i *= ff*60*60*24*365
+      else
+        #raise "don't know how to convert #{age_text} "
+        return 0
+      end
+      return (Time.now.to_i - i)
+    end
+  end
+end
+include HackerCurse
+if __FILE__ == $0
+  #rn = HackerNewsParser.new :url => "hackernews.html"
+  rn = RedditNewsParser.new :url => "reddit-prog.html"
+  page = rn.get_next_page  # [page if supplied, take page.next_url, otherwise store??]
+  puts "For each article :::"
+  page.each do |art|
+    puts art.title, art.points, art.age_text, art.age, Time.at(art.age)
+  end # each_article
+  art = page.articles.first
+  puts "PRINTING comments "
+  art.each_comment do |c|
+    puts
+    puts " ======"
+    puts c.head
+    s = nil
+    if c.age
+      s = Time.at(c.age)
+    end
+    puts " #{c.age_text} | #{c.submitter} | #{c.age} . #{s} "
+    puts c.comment_text
+  end
+  exit
+  articles = page.articles
+  co = articles.first.comments
+  puts "PRINTING comments "
+  puts co[:title], co[:subtext]
+  comments = co[:comments]
+  comments.each_with_index do |c,i|
+    puts "=======  #{c[:head]} : "
+    puts " - #{c[:head]} : "
+    puts " #{c[:comment]} "
+    puts " "
+  end
+  #comments.each_with_index do |c,i|
+    #puts " #{i}:  #{c} "
+  #end
+  exit
+  art.each_comment do |cc|
+  end
+  #rn.next_url = page.next_url
+  rn.set_next_url(page)
+  #arr = rn.convert_comment_url "hn_comments.html"
+  #rn.to_yml "hn_comments.yml", arr
+  arr = rn.get_next_page
+  rn.to_yml "hn.yml"
+  puts "getting comments for link 1"
+  comments = rn.get_comments_for_link 1
+  if comments.empty?
+    comments = rn.get_comments_for_link 9
+  end
+  rn.to_yml "hn-comments.yml", comments
+  puts "getting next page"
+  arr1 = rn.get_next_page
+  rn.to_yml "hn-1.yml", arr1
+end

data/lib/hacker/curse/hackernewsparser.rb ADDED Viewed

@@ -0,0 +1,226 @@
+require 'hacker/curse/abstractsiteparser'
+module HackerCurse
+  class HackerNewsParser < AbstractSiteParser
+    def initialize config={}
+      @host = config[:host] || "https://news.ycombinator.com"
+      subforum = config[:subforum] || "news"
+      _url="#{@host}/#{subforum}"
+      @subforum = subforum
+      config[:url] ||= _url
+      super config
+    end
+    def _retrieve_page url
+      #puts "got url #{url} "
+      raise "url should be string" unless url.is_a? String
+      arr = to_hash url
+      page = hash_to_class arr
+      #to_yml "#{@subforum}.yml", arr
+      return page
+    end
+    # currently returns a Hash. containing various entries relating to the main article
+    #  which can be avoiced.
+    #  Contains an array :comments which contains hashes, :head contains text of head, :comment contains
+    #   text of comment, and then there are entries for submitter.
+    #   hash[:comments].each do |e| e[:comment] ; end
+    # @return Array of ForumComment objects.
+    #    pages.each do |co| puts co.comment_text, co.head; end
+    def _retrieve_comments url
+      arr = to_hash_comment url
+      # TODO break head into points age etc
+      pages = hash_to_comment_class arr
+      return pages
+    end
+    def hash_to_comment_class arr
+      page = ForumArticle.new arr
+      return page
+    end
+    def oldhash_to_comment_class arr
+      co = arr[:comments]
+      pages = Array.new
+      co.each do |h|
+        page = ForumComment.new h
+        pages << page
+      end
+      return pages
+    end
+    def to_hash_comment url
+      # for testing i may send in a saved file, so i don't keep hitting HN
+      if !File.exists? url
+        unless url.index("http")
+          url = @host + "/" + url
+        end
+      end
+      page = Nokogiri::HTML(open(url))
+      h = {}
+      title = page.css("td.title")
+      article_url = title.css("a").first["href"]
+      h[:title] = title.text
+      h[:article_url] = article_url
+      subtext = page.css("td.subtext")
+      h[:byline] = subtext.text
+      # TODO extract age_text
+      h[:age_text] = subtext.text.scan(/\d+ \w+ ago/).first
+      score = subtext.css("span").text
+      h[:points] = score
+      subtext.css("a").each_with_index do |e, i|
+        link = e["href"]
+        text = e.text
+        if link.index("user") == 0
+          h[:submitter] = text
+          h[:submitter_url] = link
+        elsif link.index("item") == 0
+          h[:comment_count] = text
+          h[:comments_url] = link
+        end
+      end
+      # need to get points
+      comheads = page.css("span.comhead") # .collect do |e| e.text ; end
+      comments = page.css("span.comment").collect do |e| e.text ; end
+      comheads.delete(comheads.first)
+      # array of comments
+      carr = Array.new
+      comheads.zip(comments) do |head,c|
+        hh={}; hh[:head] = head.text;
+        #$stderr.puts "head:: #{head.text}"
+        m = head.text.scan(/\d+ \w+ ago/)
+        if !m.empty?
+          hh[:age_text] = m.first.scan(/\d+ \w/).first.rjust(4)
+          hh[:age] = human_age_to_unix(m.first)
+          head.css("a").each_with_index do |e, i|
+            link = e["href"]
+            text = e.text
+            if link.index("user") == 0
+              hh[:submitter] = text
+              hh[:submitter_url] = link
+            elsif link.index("item") == 0
+              hh[:text] = text
+              hh[:comment_url] = link
+            end
+          end
+        end
+        hh[:comment_text]=c;
+        carr << hh
+      end
+      h[:comments] = carr
+      return h
+    end
+    def hash_to_class h
+      p = ForumPage.new
+      p.url = h[:url]
+      p.next_url = h[:next_url]
+      p.create_date = h[:create_date]
+      p.subforum = h[:subforum]
+      art = h[:articles]
+      arts = []
+      art.each do |a|
+        fa = ForumArticle.new a
+        fa.parent = self
+        arts << fa
+      end
+      p.articles = arts
+      return p
+    end
+    # convert the front page to a hash
+    def to_hash url
+      doc  = get_doc_for_url url
+      count = 0
+      page = {}
+      page[:url] = url
+      now = Time.now
+      page[:create_date_seconds] = now.to_i
+      page[:create_date] = now
+      page[:subforum] = @subforum
+      arr = Array.new
+      h = {}
+      links = doc.xpath("//table/tr/td/table/tr")
+      links.each_with_index do |li, i|
+        x = li.css("td.title")
+        if !x.empty?
+          #puts "   ---- title ----- #{x.count} "
+          count = x[0].text
+          #puts count
+          if x.count < 2
+            # this block is for the next_url
+            article_url = x[0].css("a")[0]["href"]   # link url
+            #puts article_url
+            h = {}
+            h[:title] = count
+            h[:article_url] = article_url
+            more = count
+            more_url = "#{@host}/#{article_url}"
+            #arr << h
+            page[:next_url] = more_url
+            #puts li
+          end
+          break if x.count < 2
+          # actual article url
+          title = x[1].css("a")[0].text   # title
+          article_url = x[1].css("a")[0]["href"]   # link url
+          #puts article_url
+          #puts title
+          h = {}
+          #h[:number] = count
+          h[:title] = title
+          # ask option does not have hostname since it is relative to HN
+          if article_url.index("http") != 0
+            article_url = "#{@host}/#{article_url}"
+          end
+          h[:article_url] = article_url
+          arr << h
+        else
+          x = li.css("td.subtext")
+          if !x.empty?
+            fulltext = x.text
+            #puts "   ---- subtext ----- (#{fulltext})"
+            submitter = nil
+            submitter_url = nil
+            comment = nil
+            comments_url = nil
+            t = x.css("a")
+            t.each_with_index do |tt, ii|
+              case ii
+              when 0
+                submitter = tt.text
+                submitter_url = tt["href"]
+              when 1
+                comment = tt.text
+                comments_url = tt["href"]
+                comments_url = "#{@host}/#{comments_url}"
+              end
+            end
+            points = x.css("span").text rescue ""
+            #puts submitter
+            #puts submitter_url
+            #puts comment
+            #puts comments_url
+            #puts points
+            h[:submitter] = submitter
+            h[:submitter_url] = submitter_url
+            h[:comment_count] = comment.to_i.to_s.rjust(4)
+            h[:comments_url] = comments_url
+            h[:points] = points.to_i.to_s.rjust(4)
+            m = fulltext.scan(/\d+ \w+ ago/)
+            if m
+              #h[:age_text] = m.first
+              h[:age_text] = m.first.scan(/\d+ \w/).first.rjust(4)
+              h[:age] = human_age_to_unix(m.first)
+            end
+            #puts "fulltext: #{fulltext} "
+            h[:byline] = fulltext
+          end
+        end
+      end
+      #return arr
+      page[:articles] = arr
+      return page
+    end
+  end # class
+end # module