hacker-curse 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/.gitignore +37 -0
 - data/Gemfile +4 -0
 - data/LICENSE +21 -0
 - data/README.md +89 -0
 - data/Rakefile +2 -0
 - data/bin/corvus +2320 -0
 - data/bin/hacker-comments.rb +182 -0
 - data/bin/hacker-tsv.rb +144 -0
 - data/bin/hacker-yml.rb +100 -0
 - data/bin/hacker.rb +68 -0
 - data/bin/hacker.sh +90 -0
 - data/bin/redford +946 -0
 - data/hacker-curse.gemspec +24 -0
 - data/lib/hacker/curse.rb +7 -0
 - data/lib/hacker/curse/abstractsiteparser.rb +353 -0
 - data/lib/hacker/curse/hackernewsparser.rb +226 -0
 - data/lib/hacker/curse/redditnewsparser.rb +241 -0
 - data/lib/hacker/curse/version.rb +5 -0
 - data/redford.yml +68 -0
 - metadata +112 -0
 
| 
         @@ -0,0 +1,24 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # coding: utf-8
         
     | 
| 
      
 2 
     | 
    
         
            +
            lib = File.expand_path('../lib', __FILE__)
         
     | 
| 
      
 3 
     | 
    
         
            +
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'hacker/curse/version'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            Gem::Specification.new do |spec|
         
     | 
| 
      
 7 
     | 
    
         
            +
              spec.name          = "hacker-curse"
         
     | 
| 
      
 8 
     | 
    
         
            +
              spec.version       = Hacker::Curse::VERSION
         
     | 
| 
      
 9 
     | 
    
         
            +
              spec.authors       = ["kepler"]
         
     | 
| 
      
 10 
     | 
    
         
            +
              spec.email         = ["githubkepler.50s@gishpuppy.com"]
         
     | 
| 
      
 11 
     | 
    
         
            +
              spec.summary       = %q{View hacker news and reddit articles on terminal using ncurses}
         
     | 
| 
      
 12 
     | 
    
         
            +
              spec.description   = %q{View Hacker News and reddit articles on terminal using ncurses}
         
     | 
| 
      
 13 
     | 
    
         
            +
              spec.homepage      = "https://github.com/mare-imbrium/hacker-curse"
         
     | 
| 
      
 14 
     | 
    
         
            +
              spec.license       = "MIT"
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              spec.files         = `git ls-files -z`.split("\x0")
         
     | 
| 
      
 17 
     | 
    
         
            +
              spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
         
     | 
| 
      
 18 
     | 
    
         
            +
              spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
         
     | 
| 
      
 19 
     | 
    
         
            +
              spec.require_paths = ["lib"]
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              spec.add_development_dependency "bundler", "~> 1.6"
         
     | 
| 
      
 22 
     | 
    
         
            +
              spec.add_development_dependency "rake", ">= 0.9.6"
         
     | 
| 
      
 23 
     | 
    
         
            +
              spec.add_runtime_dependency "canis", ">= 0.0.3", ">= 0.0.3"
         
     | 
| 
      
 24 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/hacker/curse.rb
    ADDED
    
    
| 
         @@ -0,0 +1,353 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby -w
         
     | 
| 
      
 2 
     | 
    
         
            +
            #
         
     | 
| 
      
 3 
     | 
    
         
            +
            # Fetch hacker news front page entries into a hash.
         
     | 
| 
      
 4 
     | 
    
         
            +
            # TODO : get next page. Nexts is /news2 but after that it changes
         
     | 
| 
      
 5 
     | 
    
         
            +
            # TODO : 2014-07-27 - 12:42 put items in hash in an order, so printers can use first 4 cols for long listing
         
     | 
| 
      
 6 
     | 
    
         
            +
            #       title, age_text, comment_count, points, article_url, comments_url, age, submitter, submitter_url
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            require 'open-uri'
         
     | 
| 
      
 9 
     | 
    
         
            +
            require 'nokogiri'
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            # this is from hacker news itself
         
     | 
| 
      
 12 
     | 
    
         
            +
            #file = "news.html"
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            module HackerCurse
         
     | 
| 
      
 15 
     | 
    
         
            +
              class ForumPage
         
     | 
| 
      
 16 
     | 
    
         
            +
                include Enumerable
         
     | 
| 
      
 17 
     | 
    
         
            +
                # new newest hot rising etc
         
     | 
| 
      
 18 
     | 
    
         
            +
                attr_accessor :url
         
     | 
| 
      
 19 
     | 
    
         
            +
                attr_accessor :next_url
         
     | 
| 
      
 20 
     | 
    
         
            +
                attr_accessor :create_date
         
     | 
| 
      
 21 
     | 
    
         
            +
                attr_accessor :subforum
         
     | 
| 
      
 22 
     | 
    
         
            +
                # array of article objects
         
     | 
| 
      
 23 
     | 
    
         
            +
                attr_accessor :articles
         
     | 
| 
      
 24 
     | 
    
         
            +
                def each
         
     | 
| 
      
 25 
     | 
    
         
            +
                  @articles.each do |e| yield(e) ; end
         
     | 
| 
      
 26 
     | 
    
         
            +
                end 
         
     | 
| 
      
 27 
     | 
    
         
            +
                alias :each_article :each
         
     | 
| 
      
 28 
     | 
    
         
            +
                def merge_page page
         
     | 
| 
      
 29 
     | 
    
         
            +
                  self.next_url = page.next_url
         
     | 
| 
      
 30 
     | 
    
         
            +
                  self.articles.push(*page.articles)
         
     | 
| 
      
 31 
     | 
    
         
            +
                  self
         
     | 
| 
      
 32 
     | 
    
         
            +
                end
         
     | 
| 
      
 33 
     | 
    
         
            +
              end
         
     | 
| 
      
 34 
     | 
    
         
            +
              class ForumArticle
         
     | 
| 
      
 35 
     | 
    
         
            +
                attr_accessor :title
         
     | 
| 
      
 36 
     | 
    
         
            +
                attr_accessor :article_url
         
     | 
| 
      
 37 
     | 
    
         
            +
                attr_accessor :points
         
     | 
| 
      
 38 
     | 
    
         
            +
                attr_accessor :comment_count
         
     | 
| 
      
 39 
     | 
    
         
            +
                attr_accessor :comments_url
         
     | 
| 
      
 40 
     | 
    
         
            +
                attr_accessor :age_text
         
     | 
| 
      
 41 
     | 
    
         
            +
                attr_accessor :age
         
     | 
| 
      
 42 
     | 
    
         
            +
                attr_accessor :submitter
         
     | 
| 
      
 43 
     | 
    
         
            +
                attr_accessor :submitter_url
         
     | 
| 
      
 44 
     | 
    
         
            +
                attr_accessor :domain
         
     | 
| 
      
 45 
     | 
    
         
            +
                attr_accessor :domain_url
         
     | 
| 
      
 46 
     | 
    
         
            +
                # byline is dump of text on top containing all the info on points, # of comments, nn hours aga
         
     | 
| 
      
 47 
     | 
    
         
            +
                attr_accessor :byline
         
     | 
| 
      
 48 
     | 
    
         
            +
                attr_accessor :parent
         
     | 
| 
      
 49 
     | 
    
         
            +
                attr_writer :comments
         
     | 
| 
      
 50 
     | 
    
         
            +
                attr_reader :hash
         
     | 
| 
      
 51 
     | 
    
         
            +
                def initialize h
         
     | 
| 
      
 52 
     | 
    
         
            +
                  @comments = nil
         
     | 
| 
      
 53 
     | 
    
         
            +
                  @hash = h
         
     | 
| 
      
 54 
     | 
    
         
            +
                  [:title, :article_url, :points, :comment_count, :comments_url, :age_text, :age,
         
     | 
| 
      
 55 
     | 
    
         
            +
                   :submitter, :submitter_url, :domain, :domain_url, :byline].each do |sym|
         
     | 
| 
      
 56 
     | 
    
         
            +
                    instance_variable_set("@#{sym.to_s}", h[sym]) if h.key? sym
         
     | 
| 
      
 57 
     | 
    
         
            +
                  end
         
     | 
| 
      
 58 
     | 
    
         
            +
                  if h.key? :comments
         
     | 
| 
      
 59 
     | 
    
         
            +
                    c = h[:comments]
         
     | 
| 
      
 60 
     | 
    
         
            +
                    @comments = Array.new
         
     | 
| 
      
 61 
     | 
    
         
            +
                    c.each do |h|
         
     | 
| 
      
 62 
     | 
    
         
            +
                      fc = ForumComment.new h
         
     | 
| 
      
 63 
     | 
    
         
            +
                      @comments << fc
         
     | 
| 
      
 64 
     | 
    
         
            +
                    end
         
     | 
| 
      
 65 
     | 
    
         
            +
                  end
         
     | 
| 
      
 66 
     | 
    
         
            +
                end
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
                def comments
         
     | 
| 
      
 69 
     | 
    
         
            +
                  @comments || retrieve_comments(@comments_url)
         
     | 
| 
      
 70 
     | 
    
         
            +
                end
         
     | 
| 
      
 71 
     | 
    
         
            +
                def each
         
     | 
| 
      
 72 
     | 
    
         
            +
                  comments.each do |e| yield(e) ; end
         
     | 
| 
      
 73 
     | 
    
         
            +
                end 
         
     | 
| 
      
 74 
     | 
    
         
            +
                def retrieve_comments url
         
     | 
| 
      
 75 
     | 
    
         
            +
                  raise "Parent must be set in order to retrieve comments " unless @parent
         
     | 
| 
      
 76 
     | 
    
         
            +
                  @parent._retrieve_comments url
         
     | 
| 
      
 77 
     | 
    
         
            +
                end
         
     | 
| 
      
 78 
     | 
    
         
            +
                alias :each_comment :each
         
     | 
| 
      
 79 
     | 
    
         
            +
                def [](sym)
         
     | 
| 
      
 80 
     | 
    
         
            +
                  @hash[sym]
         
     | 
| 
      
 81 
     | 
    
         
            +
                end
         
     | 
| 
      
 82 
     | 
    
         
            +
                def keys
         
     | 
| 
      
 83 
     | 
    
         
            +
                  @hash.keys
         
     | 
| 
      
 84 
     | 
    
         
            +
                end
         
     | 
| 
      
 85 
     | 
    
         
            +
                def values
         
     | 
| 
      
 86 
     | 
    
         
            +
                  @hash.values
         
     | 
| 
      
 87 
     | 
    
         
            +
                end
         
     | 
| 
      
 88 
     | 
    
         
            +
              end
         
     | 
| 
      
 89 
     | 
    
         
            +
              class ForumComment
         
     | 
| 
      
 90 
     | 
    
         
            +
                attr_accessor :submitter, :submitter_url
         
     | 
| 
      
 91 
     | 
    
         
            +
                attr_accessor :age, :age_text, :points, :head
         
     | 
| 
      
 92 
     | 
    
         
            +
                attr_accessor :comment_text
         
     | 
| 
      
 93 
     | 
    
         
            +
                attr_accessor :comment_url
         
     | 
| 
      
 94 
     | 
    
         
            +
                attr_reader :hash
         
     | 
| 
      
 95 
     | 
    
         
            +
                def initialize h
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
                  @hash = h
         
     | 
| 
      
 98 
     | 
    
         
            +
                [:points, :comment_url, :age_text, :age,
         
     | 
| 
      
 99 
     | 
    
         
            +
                :submitter, :submitter_url, :comment_text, :head].each do |sym|
         
     | 
| 
      
 100 
     | 
    
         
            +
                  instance_variable_set("@#{sym.to_s}", h[sym])
         
     | 
| 
      
 101 
     | 
    
         
            +
                end
         
     | 
| 
      
 102 
     | 
    
         
            +
                end
         
     | 
| 
      
 103 
     | 
    
         
            +
                def [](sym)
         
     | 
| 
      
 104 
     | 
    
         
            +
                  @hash[sym]
         
     | 
| 
      
 105 
     | 
    
         
            +
                end
         
     | 
| 
      
 106 
     | 
    
         
            +
                def keys
         
     | 
| 
      
 107 
     | 
    
         
            +
                  @hash.keys
         
     | 
| 
      
 108 
     | 
    
         
            +
                end
         
     | 
| 
      
 109 
     | 
    
         
            +
                def values
         
     | 
| 
      
 110 
     | 
    
         
            +
                  @hash.values
         
     | 
| 
      
 111 
     | 
    
         
            +
                end
         
     | 
| 
      
 112 
     | 
    
         
            +
              end
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
              # 
         
     | 
| 
      
 115 
     | 
    
         
            +
              # rn = RNParser.new [url]
         
     | 
| 
      
 116 
     | 
    
         
            +
              # rn.subreddit = "ruby"
         
     | 
| 
      
 117 
     | 
    
         
            +
              # resultset = rn.get_next_page :page => prevresultset, :number => 5
         
     | 
| 
      
 118 
     | 
    
         
            +
              # resultset.each do |art|
         
     | 
| 
      
 119 
     | 
    
         
            +
              #    art.title, art.points
         
     | 
| 
      
 120 
     | 
    
         
            +
              #    art.comments
         
     | 
| 
      
 121 
     | 
    
         
            +
              # end
         
     | 
| 
      
 122 
     | 
    
         
            +
              #
         
     | 
| 
      
 123 
     | 
    
         
            +
              # hn = HNewsParser @options
         
     | 
| 
      
 124 
     | 
    
         
            +
              # hn.subxxx = "news" / "newest"
         
     | 
| 
      
 125 
     | 
    
         
            +
              #
         
     | 
| 
      
 126 
     | 
    
         
            +
              # redditnews.rb -s ruby --pages 2
         
     | 
| 
      
 127 
     | 
    
         
            +
              # hackernews.rb -s newest --pages 2 -d '|'
         
     | 
| 
      
 128 
     | 
    
         
            +
              #
         
     | 
| 
      
 129 
     | 
    
         
            +
             
     | 
| 
      
 130 
     | 
    
         
            +
              class AbstractSiteParser
         
     | 
| 
      
 131 
     | 
    
         
            +
                attr_reader :more_url
         
     | 
| 
      
 132 
     | 
    
         
            +
                attr_accessor :host
         
     | 
| 
      
 133 
     | 
    
         
            +
                attr_accessor :num_pages
         
     | 
| 
      
 134 
     | 
    
         
            +
                attr_accessor :subforum
         
     | 
| 
      
 135 
     | 
    
         
            +
                # should the html be saved
         
     | 
| 
      
 136 
     | 
    
         
            +
                attr_accessor :save_html
         
     | 
| 
      
 137 
     | 
    
         
            +
                attr_accessor :htmloutfile
         
     | 
| 
      
 138 
     | 
    
         
            +
                #HOST = "https://news.ycombinator.com"
         
     | 
| 
      
 139 
     | 
    
         
            +
                def initialize options={}
         
     | 
| 
      
 140 
     | 
    
         
            +
                  @options = options
         
     | 
| 
      
 141 
     | 
    
         
            +
                  @url = @options[:url]
         
     | 
| 
      
 142 
     | 
    
         
            +
                  @save_html = @options[:save_html]
         
     | 
| 
      
 143 
     | 
    
         
            +
                  @htmloutfile = @options[:htmloutfile]
         
     | 
| 
      
 144 
     | 
    
         
            +
                  @num_pages = @options[:num_pages] || 1
         
     | 
| 
      
 145 
     | 
    
         
            +
                  @more_url = nil
         
     | 
| 
      
 146 
     | 
    
         
            +
                  #puts "initialize: url is #{@url} "
         
     | 
| 
      
 147 
     | 
    
         
            +
                end
         
     | 
| 
      
 148 
     | 
    
         
            +
                def get_first_page
         
     | 
| 
      
 149 
     | 
    
         
            +
                  #@arr = to_hash @url
         
     | 
| 
      
 150 
     | 
    
         
            +
                  page = _retrieve_page @url
         
     | 
| 
      
 151 
     | 
    
         
            +
                end
         
     | 
| 
      
 152 
     | 
    
         
            +
                def get_next_page opts={}
         
     | 
| 
      
 153 
     | 
    
         
            +
                  page = opts[:page]
         
     | 
| 
      
 154 
     | 
    
         
            +
                  num_pages = opts[:num_pages] || @num_pages
         
     | 
| 
      
 155 
     | 
    
         
            +
                  num_pages ||= 1
         
     | 
| 
      
 156 
     | 
    
         
            +
                  u = @more_url || @url
         
     | 
| 
      
 157 
     | 
    
         
            +
                  if page 
         
     | 
| 
      
 158 
     | 
    
         
            +
                    u = page.next_url
         
     | 
| 
      
 159 
     | 
    
         
            +
                  end
         
     | 
| 
      
 160 
     | 
    
         
            +
                  pages = nil
         
     | 
| 
      
 161 
     | 
    
         
            +
                  num_pages.times do |i|
         
     | 
| 
      
 162 
     | 
    
         
            +
                    page = _retrieve_page u
         
     | 
| 
      
 163 
     | 
    
         
            +
                    if pages.nil?
         
     | 
| 
      
 164 
     | 
    
         
            +
                      pages = page
         
     | 
| 
      
 165 
     | 
    
         
            +
                    else
         
     | 
| 
      
 166 
     | 
    
         
            +
                      pages.merge_page page
         
     | 
| 
      
 167 
     | 
    
         
            +
                    end
         
     | 
| 
      
 168 
     | 
    
         
            +
                    u = page.next_url
         
     | 
| 
      
 169 
     | 
    
         
            +
                    break unless u  # sometimes there is no next
         
     | 
| 
      
 170 
     | 
    
         
            +
                    @more_url = u
         
     | 
| 
      
 171 
     | 
    
         
            +
                  end
         
     | 
| 
      
 172 
     | 
    
         
            +
                  return pages
         
     | 
| 
      
 173 
     | 
    
         
            +
                end
         
     | 
| 
      
 174 
     | 
    
         
            +
                alias :get_next :get_next_page
         
     | 
| 
      
 175 
     | 
    
         
            +
                def _retrieve_page url
         
     | 
| 
      
 176 
     | 
    
         
            +
                  raise "must be implemented by concrete class"
         
     | 
| 
      
 177 
     | 
    
         
            +
                end
         
     | 
| 
      
 178 
     | 
    
         
            +
                # write as yml, this doesn't work if multiple pages since we call x times
         
     | 
| 
      
 179 
     | 
    
         
            +
                #  so previous is overwritten
         
     | 
| 
      
 180 
     | 
    
         
            +
                #  This should be called with final class
         
     | 
| 
      
 181 
     | 
    
         
            +
                def to_yml outfile, arr = @arr
         
     | 
| 
      
 182 
     | 
    
         
            +
                  require 'yaml'
         
     | 
| 
      
 183 
     | 
    
         
            +
                  # cannot just convert / to __ in filename since path gets converted too
         
     | 
| 
      
 184 
     | 
    
         
            +
                  #if outfile.index("/")
         
     | 
| 
      
 185 
     | 
    
         
            +
                    #outfile = outfile.gsub("/","__")
         
     | 
| 
      
 186 
     | 
    
         
            +
                  #end
         
     | 
| 
      
 187 
     | 
    
         
            +
                  File.open(outfile, 'w' ) do |f|
         
     | 
| 
      
 188 
     | 
    
         
            +
                    f << YAML::dump(arr)
         
     | 
| 
      
 189 
     | 
    
         
            +
                  end
         
     | 
| 
      
 190 
     | 
    
         
            +
                end
         
     | 
| 
      
 191 
     | 
    
         
            +
                # after called get_next_page, one may pass its return value 
         
     | 
| 
      
 192 
     | 
    
         
            +
                # to this method to convert it into an array of hashes and store it as a yml file
         
     | 
| 
      
 193 
     | 
    
         
            +
                # It's a bit silly, first we break the hash down into this structure
         
     | 
| 
      
 194 
     | 
    
         
            +
                #  and then deconstruct the whole thing. 
         
     | 
| 
      
 195 
     | 
    
         
            +
                def save_page_as_yml outputfile, page
         
     | 
| 
      
 196 
     | 
    
         
            +
                  h = {}
         
     | 
| 
      
 197 
     | 
    
         
            +
                  h[:url] = page.url
         
     | 
| 
      
 198 
     | 
    
         
            +
                  h[:next_url] = page.next_url
         
     | 
| 
      
 199 
     | 
    
         
            +
                  h[:subforum] = page.subforum
         
     | 
| 
      
 200 
     | 
    
         
            +
                  h[:create_date] = page.create_date
         
     | 
| 
      
 201 
     | 
    
         
            +
                  articles = []
         
     | 
| 
      
 202 
     | 
    
         
            +
                  page.each do |a| articles << a.hash; end
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
      
 204 
     | 
    
         
            +
                  h[:articles] = articles
         
     | 
| 
      
 205 
     | 
    
         
            +
             
     | 
| 
      
 206 
     | 
    
         
            +
                  to_yml outputfile, h
         
     | 
| 
      
 207 
     | 
    
         
            +
                end
         
     | 
| 
      
 208 
     | 
    
         
            +
                # retrieves the comments for a url and stores in outputfile in YML format
         
     | 
| 
      
 209 
     | 
    
         
            +
                def save_comments_as_yml outputfile, url
         
     | 
| 
      
 210 
     | 
    
         
            +
                  pages = _retrieve_comments url
         
     | 
| 
      
 211 
     | 
    
         
            +
                  if pages 
         
     | 
| 
      
 212 
     | 
    
         
            +
                    to_yml outputfile, pages.hash
         
     | 
| 
      
 213 
     | 
    
         
            +
                  end
         
     | 
| 
      
 214 
     | 
    
         
            +
                end
         
     | 
| 
      
 215 
     | 
    
         
            +
                # returns nokogiri html doc and writes html is required.
         
     | 
| 
      
 216 
     | 
    
         
            +
                def get_doc_for_url url
         
     | 
| 
      
 217 
     | 
    
         
            +
                  #puts "get_doc #{url} "
         
     | 
| 
      
 218 
     | 
    
         
            +
                  out = open(url)
         
     | 
| 
      
 219 
     | 
    
         
            +
                  doc  = Nokogiri::HTML(out)
         
     | 
| 
      
 220 
     | 
    
         
            +
                  if @save_html
         
     | 
| 
      
 221 
     | 
    
         
            +
                    subforum = @subforum || "unknown"
         
     | 
| 
      
 222 
     | 
    
         
            +
                    outfile = @htmloutfile || "#{subforum}.html"
         
     | 
| 
      
 223 
     | 
    
         
            +
                    #if !File.exists? url
         
     | 
| 
      
 224 
     | 
    
         
            +
                    out.rewind
         
     | 
| 
      
 225 
     | 
    
         
            +
                      File.open(outfile, 'w') {|f| f.write(out.read) }
         
     | 
| 
      
 226 
     | 
    
         
            +
                    #end
         
     | 
| 
      
 227 
     | 
    
         
            +
                  end
         
     | 
| 
      
 228 
     | 
    
         
            +
                  return doc
         
     | 
| 
      
 229 
     | 
    
         
            +
                end
         
     | 
| 
      
 230 
     | 
    
         
            +
                # this is a test method so we don't keep hitting HN while testing out and getting IP blocked.
         
     | 
| 
      
 231 
     | 
    
         
            +
                def load_from_yml filename="hn.yml"
         
     | 
| 
      
 232 
     | 
    
         
            +
                  @arr = YAML::load( File.open( filename ) )
         
     | 
| 
      
 233 
     | 
    
         
            +
                  next_url = @arr.last[:article_url]
         
     | 
| 
      
 234 
     | 
    
         
            +
                  unless next_url.index("http")
         
     | 
| 
      
 235 
     | 
    
         
            +
                    next_url = @host + "/" + next_url
         
     | 
| 
      
 236 
     | 
    
         
            +
                  end
         
     | 
| 
      
 237 
     | 
    
         
            +
                  @more_url = next_url
         
     | 
| 
      
 238 
     | 
    
         
            +
                end
         
     | 
| 
      
 239 
     | 
    
         
            +
                def _retrieve_comments url
         
     | 
| 
      
 240 
     | 
    
         
            +
                  raise "Must be implemented by concrete class "
         
     | 
| 
      
 241 
     | 
    
         
            +
                end
         
     | 
| 
      
 242 
     | 
    
         
            +
                public
         
     | 
| 
      
 243 
     | 
    
         
            +
                def get_comments_url index
         
     | 
| 
      
 244 
     | 
    
         
            +
                  arr = @arr
         
     | 
| 
      
 245 
     | 
    
         
            +
                  entry = arr[index]
         
     | 
| 
      
 246 
     | 
    
         
            +
                  if entry
         
     | 
| 
      
 247 
     | 
    
         
            +
                    if entry.key? :comments_url
         
     | 
| 
      
 248 
     | 
    
         
            +
                      return entry[:comments_url]
         
     | 
| 
      
 249 
     | 
    
         
            +
                    end
         
     | 
| 
      
 250 
     | 
    
         
            +
                  end
         
     | 
| 
      
 251 
     | 
    
         
            +
                  return nil
         
     | 
| 
      
 252 
     | 
    
         
            +
                end
         
     | 
| 
      
 253 
     | 
    
         
            +
                public
         
     | 
| 
      
 254 
     | 
    
         
            +
                def get_comments index
         
     | 
| 
      
 255 
     | 
    
         
            +
                  url = get_comments_url index
         
     | 
| 
      
 256 
     | 
    
         
            +
                  if url
         
     | 
| 
      
 257 
     | 
    
         
            +
                    #puts url
         
     | 
| 
      
 258 
     | 
    
         
            +
                    comments = convert_comment_url url
         
     | 
| 
      
 259 
     | 
    
         
            +
                    return comments
         
     | 
| 
      
 260 
     | 
    
         
            +
                  #else
         
     | 
| 
      
 261 
     | 
    
         
            +
                    #puts "Sorry no url for #{index} "
         
     | 
| 
      
 262 
     | 
    
         
            +
                  end
         
     | 
| 
      
 263 
     | 
    
         
            +
                  return []
         
     | 
| 
      
 264 
     | 
    
         
            +
                end
         
     | 
| 
      
 265 
     | 
    
         
            +
                alias :get_comments_for_link :get_comments
         
     | 
| 
      
 266 
     | 
    
         
            +
                def human_age_to_unix age_text
         
     | 
| 
      
 267 
     | 
    
         
            +
                  i = age_text.to_i
         
     | 
| 
      
 268 
     | 
    
         
            +
                  ff=1
         
     | 
| 
      
 269 
     | 
    
         
            +
                  if age_text.index("hour")
         
     | 
| 
      
 270 
     | 
    
         
            +
                    i *= ff*60*60
         
     | 
| 
      
 271 
     | 
    
         
            +
                  elsif age_text.index("second")
         
     | 
| 
      
 272 
     | 
    
         
            +
                    i *= ff
         
     | 
| 
      
 273 
     | 
    
         
            +
                  elsif age_text.index("minute")
         
     | 
| 
      
 274 
     | 
    
         
            +
                    i *= ff*60
         
     | 
| 
      
 275 
     | 
    
         
            +
                  elsif age_text.index("day")
         
     | 
| 
      
 276 
     | 
    
         
            +
                    i *= ff*60*60*24
         
     | 
| 
      
 277 
     | 
    
         
            +
                  elsif age_text.index("month")
         
     | 
| 
      
 278 
     | 
    
         
            +
                    i *= ff*60*60*24*30
         
     | 
| 
      
 279 
     | 
    
         
            +
                  elsif age_text.index("week")
         
     | 
| 
      
 280 
     | 
    
         
            +
                    i *= ff*60*60*24*7
         
     | 
| 
      
 281 
     | 
    
         
            +
                  elsif age_text.index("year")
         
     | 
| 
      
 282 
     | 
    
         
            +
                    i *= ff*60*60*24*365
         
     | 
| 
      
 283 
     | 
    
         
            +
                  else
         
     | 
| 
      
 284 
     | 
    
         
            +
                    #raise "don't know how to convert #{age_text} "
         
     | 
| 
      
 285 
     | 
    
         
            +
                    return 0
         
     | 
| 
      
 286 
     | 
    
         
            +
                  end
         
     | 
| 
      
 287 
     | 
    
         
            +
                  return (Time.now.to_i - i)
         
     | 
| 
      
 288 
     | 
    
         
            +
                end
         
     | 
| 
      
 289 
     | 
    
         
            +
              end
         
     | 
| 
      
 290 
     | 
    
         
            +
            end
         
     | 
| 
      
 291 
     | 
    
         
            +
            include HackerCurse
         
     | 
| 
      
 292 
     | 
    
         
            +
             
     | 
| 
      
 293 
     | 
    
         
            +
             
     | 
| 
      
 294 
     | 
    
         
            +
            if __FILE__ == $0
         
     | 
| 
      
 295 
     | 
    
         
            +
              #rn = HackerNewsParser.new :url => "hackernews.html"
         
     | 
| 
      
 296 
     | 
    
         
            +
              rn = RedditNewsParser.new :url => "reddit-prog.html"
         
     | 
| 
      
 297 
     | 
    
         
            +
             
     | 
| 
      
 298 
     | 
    
         
            +
              page = rn.get_next_page  # [page if supplied, take page.next_url, otherwise store??]
         
     | 
| 
      
 299 
     | 
    
         
            +
              puts "For each article :::"
         
     | 
| 
      
 300 
     | 
    
         
            +
              page.each do |art|
         
     | 
| 
      
 301 
     | 
    
         
            +
                puts art.title, art.points, art.age_text, art.age, Time.at(art.age)
         
     | 
| 
      
 302 
     | 
    
         
            +
              end # each_article
         
     | 
| 
      
 303 
     | 
    
         
            +
              art = page.articles.first
         
     | 
| 
      
 304 
     | 
    
         
            +
              puts "PRINTING comments "
         
     | 
| 
      
 305 
     | 
    
         
            +
              art.each_comment do |c|
         
     | 
| 
      
 306 
     | 
    
         
            +
                puts 
         
     | 
| 
      
 307 
     | 
    
         
            +
                puts " ======"
         
     | 
| 
      
 308 
     | 
    
         
            +
                puts c.head
         
     | 
| 
      
 309 
     | 
    
         
            +
                s = nil
         
     | 
| 
      
 310 
     | 
    
         
            +
                if c.age
         
     | 
| 
      
 311 
     | 
    
         
            +
                  s = Time.at(c.age)
         
     | 
| 
      
 312 
     | 
    
         
            +
                end
         
     | 
| 
      
 313 
     | 
    
         
            +
                puts " #{c.age_text} | #{c.submitter} | #{c.age} . #{s} "
         
     | 
| 
      
 314 
     | 
    
         
            +
                puts c.comment_text
         
     | 
| 
      
 315 
     | 
    
         
            +
              end
         
     | 
| 
      
 316 
     | 
    
         
            +
             
     | 
| 
      
 317 
     | 
    
         
            +
              exit
         
     | 
| 
      
 318 
     | 
    
         
            +
              articles = page.articles
         
     | 
| 
      
 319 
     | 
    
         
            +
              co = articles.first.comments
         
     | 
| 
      
 320 
     | 
    
         
            +
              puts "PRINTING comments "
         
     | 
| 
      
 321 
     | 
    
         
            +
              puts co[:title], co[:subtext]
         
     | 
| 
      
 322 
     | 
    
         
            +
              comments = co[:comments]
         
     | 
| 
      
 323 
     | 
    
         
            +
              comments.each_with_index do |c,i|
         
     | 
| 
      
 324 
     | 
    
         
            +
                puts "=======  #{c[:head]} : " 
         
     | 
| 
      
 325 
     | 
    
         
            +
                puts " - #{c[:head]} : " 
         
     | 
| 
      
 326 
     | 
    
         
            +
                puts " #{c[:comment]} "
         
     | 
| 
      
 327 
     | 
    
         
            +
                puts " "
         
     | 
| 
      
 328 
     | 
    
         
            +
              end
         
     | 
| 
      
 329 
     | 
    
         
            +
             
     | 
| 
      
 330 
     | 
    
         
            +
              #comments.each_with_index do |c,i|
         
     | 
| 
      
 331 
     | 
    
         
            +
                #puts " #{i}:  #{c} "
         
     | 
| 
      
 332 
     | 
    
         
            +
              #end
         
     | 
| 
      
 333 
     | 
    
         
            +
              exit
         
     | 
| 
      
 334 
     | 
    
         
            +
              art.each_comment do |cc|
         
     | 
| 
      
 335 
     | 
    
         
            +
              end
         
     | 
| 
      
 336 
     | 
    
         
            +
              #rn.next_url = page.next_url
         
     | 
| 
      
 337 
     | 
    
         
            +
              rn.set_next_url(page)
         
     | 
| 
      
 338 
     | 
    
         
            +
              #arr = rn.convert_comment_url "hn_comments.html"
         
     | 
| 
      
 339 
     | 
    
         
            +
              #rn.to_yml "hn_comments.yml", arr
         
     | 
| 
      
 340 
     | 
    
         
            +
             
     | 
| 
      
 341 
     | 
    
         
            +
             
     | 
| 
      
 342 
     | 
    
         
            +
              arr = rn.get_next_page
         
     | 
| 
      
 343 
     | 
    
         
            +
              rn.to_yml "hn.yml"
         
     | 
| 
      
 344 
     | 
    
         
            +
              puts "getting comments for link 1"
         
     | 
| 
      
 345 
     | 
    
         
            +
              comments = rn.get_comments_for_link 1
         
     | 
| 
      
 346 
     | 
    
         
            +
              if comments.empty?
         
     | 
| 
      
 347 
     | 
    
         
            +
                comments = rn.get_comments_for_link 9
         
     | 
| 
      
 348 
     | 
    
         
            +
              end
         
     | 
| 
      
 349 
     | 
    
         
            +
              rn.to_yml "hn-comments.yml", comments
         
     | 
| 
      
 350 
     | 
    
         
            +
              puts "getting next page"
         
     | 
| 
      
 351 
     | 
    
         
            +
              arr1 = rn.get_next_page
         
     | 
| 
      
 352 
     | 
    
         
            +
              rn.to_yml "hn-1.yml", arr1
         
     | 
| 
      
 353 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,226 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'hacker/curse/abstractsiteparser'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module HackerCurse
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
              class HackerNewsParser < AbstractSiteParser
         
     | 
| 
      
 6 
     | 
    
         
            +
                def initialize config={}
         
     | 
| 
      
 7 
     | 
    
         
            +
                  @host = config[:host] || "https://news.ycombinator.com"
         
     | 
| 
      
 8 
     | 
    
         
            +
                  subforum = config[:subforum] || "news"
         
     | 
| 
      
 9 
     | 
    
         
            +
                  _url="#{@host}/#{subforum}"
         
     | 
| 
      
 10 
     | 
    
         
            +
                  @subforum = subforum
         
     | 
| 
      
 11 
     | 
    
         
            +
                  config[:url] ||= _url
         
     | 
| 
      
 12 
     | 
    
         
            +
                  super config
         
     | 
| 
      
 13 
     | 
    
         
            +
                end
         
     | 
| 
      
 14 
     | 
    
         
            +
                def _retrieve_page url
         
     | 
| 
      
 15 
     | 
    
         
            +
                  #puts "got url #{url} "
         
     | 
| 
      
 16 
     | 
    
         
            +
                  raise "url should be string" unless url.is_a? String
         
     | 
| 
      
 17 
     | 
    
         
            +
                  arr = to_hash url
         
     | 
| 
      
 18 
     | 
    
         
            +
                  page = hash_to_class arr
         
     | 
| 
      
 19 
     | 
    
         
            +
                  #to_yml "#{@subforum}.yml", arr
         
     | 
| 
      
 20 
     | 
    
         
            +
                  return page
         
     | 
| 
      
 21 
     | 
    
         
            +
                end
         
     | 
| 
      
 22 
     | 
    
         
            +
                # currently returns a Hash. containing various entries relating to the main article
         
     | 
| 
      
 23 
     | 
    
         
            +
                #  which can be avoiced.
         
     | 
| 
      
 24 
     | 
    
         
            +
                #  Contains an array :comments which contains hashes, :head contains text of head, :comment contains 
         
     | 
| 
      
 25 
     | 
    
         
            +
                #   text of comment, and then there are entries for submitter.
         
     | 
| 
      
 26 
     | 
    
         
            +
                #   hash[:comments].each do |e| e[:comment] ; end
         
     | 
| 
      
 27 
     | 
    
         
            +
                # @return Array of ForumComment objects.
         
     | 
| 
      
 28 
     | 
    
         
            +
                #    pages.each do |co| puts co.comment_text, co.head; end
         
     | 
| 
      
 29 
     | 
    
         
            +
                def _retrieve_comments url
         
     | 
| 
      
 30 
     | 
    
         
            +
                  arr = to_hash_comment url
         
     | 
| 
      
 31 
     | 
    
         
            +
                  # TODO break head into points age etc
         
     | 
| 
      
 32 
     | 
    
         
            +
                  pages = hash_to_comment_class arr
         
     | 
| 
      
 33 
     | 
    
         
            +
                  return pages
         
     | 
| 
      
 34 
     | 
    
         
            +
                end
         
     | 
| 
      
 35 
     | 
    
         
            +
                def hash_to_comment_class arr
         
     | 
| 
      
 36 
     | 
    
         
            +
                  page = ForumArticle.new arr
         
     | 
| 
      
 37 
     | 
    
         
            +
                  return page
         
     | 
| 
      
 38 
     | 
    
         
            +
                end
         
     | 
| 
      
 39 
     | 
    
         
            +
                def oldhash_to_comment_class arr
         
     | 
| 
      
 40 
     | 
    
         
            +
                  co = arr[:comments]
         
     | 
| 
      
 41 
     | 
    
         
            +
                  pages = Array.new
         
     | 
| 
      
 42 
     | 
    
         
            +
                  co.each do |h|
         
     | 
| 
      
 43 
     | 
    
         
            +
                    page = ForumComment.new h
         
     | 
| 
      
 44 
     | 
    
         
            +
                    pages << page
         
     | 
| 
      
 45 
     | 
    
         
            +
                  end
         
     | 
| 
      
 46 
     | 
    
         
            +
                  return pages
         
     | 
| 
      
 47 
     | 
    
         
            +
                end
         
     | 
| 
      
 48 
     | 
    
         
            +
                def to_hash_comment url
         
     | 
| 
      
 49 
     | 
    
         
            +
                  # for testing i may send in a saved file, so i don't keep hitting HN
         
     | 
| 
      
 50 
     | 
    
         
            +
                  if !File.exists? url
         
     | 
| 
      
 51 
     | 
    
         
            +
                    unless url.index("http")
         
     | 
| 
      
 52 
     | 
    
         
            +
                      url = @host + "/" + url
         
     | 
| 
      
 53 
     | 
    
         
            +
                    end
         
     | 
| 
      
 54 
     | 
    
         
            +
                  end
         
     | 
| 
      
 55 
     | 
    
         
            +
                  page = Nokogiri::HTML(open(url))
         
     | 
| 
      
 56 
     | 
    
         
            +
                  h = {}
         
     | 
| 
      
 57 
     | 
    
         
            +
                  title = page.css("td.title")
         
     | 
| 
      
 58 
     | 
    
         
            +
                  article_url = title.css("a").first["href"]
         
     | 
| 
      
 59 
     | 
    
         
            +
                  h[:title] = title.text
         
     | 
| 
      
 60 
     | 
    
         
            +
                  h[:article_url] = article_url
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                  subtext = page.css("td.subtext")
         
     | 
| 
      
 63 
     | 
    
         
            +
                  h[:byline] = subtext.text
         
     | 
| 
      
 64 
     | 
    
         
            +
                  # TODO extract age_text
         
     | 
| 
      
 65 
     | 
    
         
            +
                  h[:age_text] = subtext.text.scan(/\d+ \w+ ago/).first
         
     | 
| 
      
 66 
     | 
    
         
            +
                  score = subtext.css("span").text
         
     | 
| 
      
 67 
     | 
    
         
            +
                  h[:points] = score
         
     | 
| 
      
 68 
     | 
    
         
            +
                  subtext.css("a").each_with_index do |e, i|
         
     | 
| 
      
 69 
     | 
    
         
            +
                    link = e["href"]
         
     | 
| 
      
 70 
     | 
    
         
            +
                    text = e.text
         
     | 
| 
      
 71 
     | 
    
         
            +
                    if link.index("user") == 0
         
     | 
| 
      
 72 
     | 
    
         
            +
                      h[:submitter] = text
         
     | 
| 
      
 73 
     | 
    
         
            +
                      h[:submitter_url] = link
         
     | 
| 
      
 74 
     | 
    
         
            +
                    elsif link.index("item") == 0
         
     | 
| 
      
 75 
     | 
    
         
            +
                      h[:comment_count] = text
         
     | 
| 
      
 76 
     | 
    
         
            +
                      h[:comments_url] = link
         
     | 
| 
      
 77 
     | 
    
         
            +
                    end
         
     | 
| 
      
 78 
     | 
    
         
            +
                  end
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
                  # need to get points
         
     | 
| 
      
 81 
     | 
    
         
            +
                  comheads = page.css("span.comhead") # .collect do |e| e.text ; end
         
     | 
| 
      
 82 
     | 
    
         
            +
                  comments = page.css("span.comment").collect do |e| e.text ; end
         
     | 
| 
      
 83 
     | 
    
         
            +
                  comheads.delete(comheads.first)
         
     | 
| 
      
 84 
     | 
    
         
            +
                  # array of comments
         
     | 
| 
      
 85 
     | 
    
         
            +
                  carr = Array.new
         
     | 
| 
      
 86 
     | 
    
         
            +
                  comheads.zip(comments) do |head,c| 
         
     | 
| 
      
 87 
     | 
    
         
            +
                    hh={}; hh[:head] = head.text; 
         
     | 
| 
      
 88 
     | 
    
         
            +
                    #$stderr.puts "head:: #{head.text}"
         
     | 
| 
      
 89 
     | 
    
         
            +
                    m = head.text.scan(/\d+ \w+ ago/)
         
     | 
| 
      
 90 
     | 
    
         
            +
                    if !m.empty?
         
     | 
| 
      
 91 
     | 
    
         
            +
                      hh[:age_text] = m.first.scan(/\d+ \w/).first.rjust(4)
         
     | 
| 
      
 92 
     | 
    
         
            +
                      hh[:age] = human_age_to_unix(m.first)
         
     | 
| 
      
 93 
     | 
    
         
            +
                      head.css("a").each_with_index do |e, i|
         
     | 
| 
      
 94 
     | 
    
         
            +
                        link = e["href"]
         
     | 
| 
      
 95 
     | 
    
         
            +
                        text = e.text
         
     | 
| 
      
 96 
     | 
    
         
            +
                        if link.index("user") == 0
         
     | 
| 
      
 97 
     | 
    
         
            +
                          hh[:submitter] = text
         
     | 
| 
      
 98 
     | 
    
         
            +
                          hh[:submitter_url] = link
         
     | 
| 
      
 99 
     | 
    
         
            +
                        elsif link.index("item") == 0
         
     | 
| 
      
 100 
     | 
    
         
            +
                          hh[:text] = text
         
     | 
| 
      
 101 
     | 
    
         
            +
                          hh[:comment_url] = link
         
     | 
| 
      
 102 
     | 
    
         
            +
                        end
         
     | 
| 
      
 103 
     | 
    
         
            +
                      end
         
     | 
| 
      
 104 
     | 
    
         
            +
                    end
         
     | 
| 
      
 105 
     | 
    
         
            +
                    hh[:comment_text]=c; 
         
     | 
| 
      
 106 
     | 
    
         
            +
                    carr << hh 
         
     | 
| 
      
 107 
     | 
    
         
            +
                  end
         
     | 
| 
      
 108 
     | 
    
         
            +
             
     | 
| 
      
 109 
     | 
    
         
            +
                  h[:comments] = carr
         
     | 
| 
      
 110 
     | 
    
         
            +
                  return h
         
     | 
| 
      
 111 
     | 
    
         
            +
                end
         
     | 
| 
      
 112 
     | 
    
         
            +
                def hash_to_class h
         
     | 
| 
      
 113 
     | 
    
         
            +
                  p = ForumPage.new
         
     | 
| 
      
 114 
     | 
    
         
            +
                  p.url = h[:url]
         
     | 
| 
      
 115 
     | 
    
         
            +
                  p.next_url = h[:next_url]
         
     | 
| 
      
 116 
     | 
    
         
            +
                  p.create_date = h[:create_date]
         
     | 
| 
      
 117 
     | 
    
         
            +
                  p.subforum = h[:subforum]
         
     | 
| 
      
 118 
     | 
    
         
            +
                  art = h[:articles]
         
     | 
| 
      
 119 
     | 
    
         
            +
                  arts = []
         
     | 
| 
      
 120 
     | 
    
         
            +
                  art.each do |a|
         
     | 
| 
      
 121 
     | 
    
         
            +
                    fa = ForumArticle.new a
         
     | 
| 
      
 122 
     | 
    
         
            +
                    fa.parent = self
         
     | 
| 
      
 123 
     | 
    
         
            +
                    arts << fa
         
     | 
| 
      
 124 
     | 
    
         
            +
                  end
         
     | 
| 
      
 125 
     | 
    
         
            +
                  p.articles = arts
         
     | 
| 
      
 126 
     | 
    
         
            +
                  return p
         
     | 
| 
      
 127 
     | 
    
         
            +
                end
         
     | 
| 
      
 128 
     | 
    
         
            +
                # convert the front page to a hash
         
     | 
| 
      
 129 
     | 
    
         
            +
                def to_hash url
         
     | 
| 
      
 130 
     | 
    
         
            +
                  doc  = get_doc_for_url url
         
     | 
| 
      
 131 
     | 
    
         
            +
                  count = 0
         
     | 
| 
      
 132 
     | 
    
         
            +
                  page = {}
         
     | 
| 
      
 133 
     | 
    
         
            +
                  page[:url] = url
         
     | 
| 
      
 134 
     | 
    
         
            +
                  now = Time.now
         
     | 
| 
      
 135 
     | 
    
         
            +
                  page[:create_date_seconds] = now.to_i
         
     | 
| 
      
 136 
     | 
    
         
            +
                  page[:create_date] = now
         
     | 
| 
      
 137 
     | 
    
         
            +
                  page[:subforum] = @subforum
         
     | 
| 
      
 138 
     | 
    
         
            +
             
     | 
| 
      
 139 
     | 
    
         
            +
                  arr = Array.new
         
     | 
| 
      
 140 
     | 
    
         
            +
                  h = {}
         
     | 
| 
      
 141 
     | 
    
         
            +
                  links = doc.xpath("//table/tr/td/table/tr")
         
     | 
| 
      
 142 
     | 
    
         
            +
                  links.each_with_index do |li, i|
         
     | 
| 
      
 143 
     | 
    
         
            +
                    x = li.css("td.title")
         
     | 
| 
      
 144 
     | 
    
         
            +
                    if !x.empty?
         
     | 
| 
      
 145 
     | 
    
         
            +
                      #puts "   ---- title ----- #{x.count} "
         
     | 
| 
      
 146 
     | 
    
         
            +
                      count = x[0].text
         
     | 
| 
      
 147 
     | 
    
         
            +
                      #puts count
         
     | 
| 
      
 148 
     | 
    
         
            +
                      if x.count < 2
         
     | 
| 
      
 149 
     | 
    
         
            +
                        # this block is for the next_url
         
     | 
| 
      
 150 
     | 
    
         
            +
                        article_url = x[0].css("a")[0]["href"]   # link url
         
     | 
| 
      
 151 
     | 
    
         
            +
                        #puts article_url
         
     | 
| 
      
 152 
     | 
    
         
            +
                        h = {}
         
     | 
| 
      
 153 
     | 
    
         
            +
                        h[:title] = count
         
     | 
| 
      
 154 
     | 
    
         
            +
                        h[:article_url] = article_url
         
     | 
| 
      
 155 
     | 
    
         
            +
                        more = count
         
     | 
| 
      
 156 
     | 
    
         
            +
                        more_url = "#{@host}/#{article_url}"
         
     | 
| 
      
 157 
     | 
    
         
            +
                        #arr << h
         
     | 
| 
      
 158 
     | 
    
         
            +
                        page[:next_url] = more_url
         
     | 
| 
      
 159 
     | 
    
         
            +
                        #puts li
         
     | 
| 
      
 160 
     | 
    
         
            +
                      end
         
     | 
| 
      
 161 
     | 
    
         
            +
                      break if x.count < 2
         
     | 
| 
      
 162 
     | 
    
         
            +
             
     | 
| 
      
 163 
     | 
    
         
            +
                      # actual article url
         
     | 
| 
      
 164 
     | 
    
         
            +
                      title = x[1].css("a")[0].text   # title
         
     | 
| 
      
 165 
     | 
    
         
            +
                      article_url = x[1].css("a")[0]["href"]   # link url
         
     | 
| 
      
 166 
     | 
    
         
            +
                      #puts article_url
         
     | 
| 
      
 167 
     | 
    
         
            +
                      #puts title
         
     | 
| 
      
 168 
     | 
    
         
            +
                      h = {}
         
     | 
| 
      
 169 
     | 
    
         
            +
                      #h[:number] = count
         
     | 
| 
      
 170 
     | 
    
         
            +
                      h[:title] = title
         
     | 
| 
      
 171 
     | 
    
         
            +
                      # ask option does not have hostname since it is relative to HN
         
     | 
| 
      
 172 
     | 
    
         
            +
                      if article_url.index("http") != 0
         
     | 
| 
      
 173 
     | 
    
         
            +
                        article_url = "#{@host}/#{article_url}"
         
     | 
| 
      
 174 
     | 
    
         
            +
                      end
         
     | 
| 
      
 175 
     | 
    
         
            +
             
     | 
| 
      
 176 
     | 
    
         
            +
                      h[:article_url] = article_url
         
     | 
| 
      
 177 
     | 
    
         
            +
                      arr << h
         
     | 
| 
      
 178 
     | 
    
         
            +
                    else 
         
     | 
| 
      
 179 
     | 
    
         
            +
                      x = li.css("td.subtext")
         
     | 
| 
      
 180 
     | 
    
         
            +
                      if !x.empty?
         
     | 
| 
      
 181 
     | 
    
         
            +
                        fulltext = x.text
         
     | 
| 
      
 182 
     | 
    
         
            +
                        #puts "   ---- subtext ----- (#{fulltext})"
         
     | 
| 
      
 183 
     | 
    
         
            +
                        submitter = nil
         
     | 
| 
      
 184 
     | 
    
         
            +
                        submitter_url = nil
         
     | 
| 
      
 185 
     | 
    
         
            +
                        comment = nil
         
     | 
| 
      
 186 
     | 
    
         
            +
                        comments_url = nil
         
     | 
| 
      
 187 
     | 
    
         
            +
                        t = x.css("a")
         
     | 
| 
      
 188 
     | 
    
         
            +
                        t.each_with_index do |tt, ii|
         
     | 
| 
      
 189 
     | 
    
         
            +
                          case ii
         
     | 
| 
      
 190 
     | 
    
         
            +
                          when 0
         
     | 
| 
      
 191 
     | 
    
         
            +
                            submitter = tt.text
         
     | 
| 
      
 192 
     | 
    
         
            +
                            submitter_url = tt["href"]
         
     | 
| 
      
 193 
     | 
    
         
            +
                          when 1
         
     | 
| 
      
 194 
     | 
    
         
            +
                            comment = tt.text
         
     | 
| 
      
 195 
     | 
    
         
            +
                            comments_url = tt["href"]
         
     | 
| 
      
 196 
     | 
    
         
            +
                            comments_url = "#{@host}/#{comments_url}"
         
     | 
| 
      
 197 
     | 
    
         
            +
                          end
         
     | 
| 
      
 198 
     | 
    
         
            +
                        end
         
     | 
| 
      
 199 
     | 
    
         
            +
                        points = x.css("span").text rescue ""
         
     | 
| 
      
 200 
     | 
    
         
            +
                        #puts submitter
         
     | 
| 
      
 201 
     | 
    
         
            +
                        #puts submitter_url
         
     | 
| 
      
 202 
     | 
    
         
            +
                        #puts comment
         
     | 
| 
      
 203 
     | 
    
         
            +
                        #puts comments_url
         
     | 
| 
      
 204 
     | 
    
         
            +
                        #puts points
         
     | 
| 
      
 205 
     | 
    
         
            +
                        h[:submitter] = submitter
         
     | 
| 
      
 206 
     | 
    
         
            +
                        h[:submitter_url] = submitter_url
         
     | 
| 
      
 207 
     | 
    
         
            +
                        h[:comment_count] = comment.to_i.to_s.rjust(4)
         
     | 
| 
      
 208 
     | 
    
         
            +
                        h[:comments_url] = comments_url
         
     | 
| 
      
 209 
     | 
    
         
            +
                        h[:points] = points.to_i.to_s.rjust(4)
         
     | 
| 
      
 210 
     | 
    
         
            +
                        m = fulltext.scan(/\d+ \w+ ago/)
         
     | 
| 
      
 211 
     | 
    
         
            +
                        if m
         
     | 
| 
      
 212 
     | 
    
         
            +
                          #h[:age_text] = m.first
         
     | 
| 
      
 213 
     | 
    
         
            +
                          h[:age_text] = m.first.scan(/\d+ \w/).first.rjust(4)
         
     | 
| 
      
 214 
     | 
    
         
            +
                          h[:age] = human_age_to_unix(m.first)
         
     | 
| 
      
 215 
     | 
    
         
            +
                        end
         
     | 
| 
      
 216 
     | 
    
         
            +
                        #puts "fulltext: #{fulltext} "
         
     | 
| 
      
 217 
     | 
    
         
            +
                        h[:byline] = fulltext
         
     | 
| 
      
 218 
     | 
    
         
            +
                      end
         
     | 
| 
      
 219 
     | 
    
         
            +
                    end
         
     | 
| 
      
 220 
     | 
    
         
            +
                  end
         
     | 
| 
      
 221 
     | 
    
         
            +
                  #return arr
         
     | 
| 
      
 222 
     | 
    
         
            +
                  page[:articles] = arr
         
     | 
| 
      
 223 
     | 
    
         
            +
                  return page
         
     | 
| 
      
 224 
     | 
    
         
            +
                end
         
     | 
| 
      
 225 
     | 
    
         
            +
              end # class
         
     | 
| 
      
 226 
     | 
    
         
            +
            end # module
         
     |