twitterscraper-ruby 0.15.2 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/twitterscraper/cache.rb +7 -1
- data/lib/twitterscraper/cli.rb +3 -3
- data/lib/twitterscraper/query.rb +23 -12
- data/lib/twitterscraper/template.rb +23 -41
- data/lib/twitterscraper/template/tweets.html.erb +82 -0
- data/lib/version.rb +1 -1
- metadata +3 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 66dda5275a9067d328f6637f127895ded954534d304e5e4b349f286a271a08d8
         | 
| 4 | 
            +
              data.tar.gz: 6c3ffb3fba82376fc2de49514245ea96c7cb4fa16c32dcd2fff1ab1ae327bd14
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 24267284f4f29adc86d5bbe70a30bbe31d6d898546576065f1a9accafc3944a352117bbf6eb0de273743a00fb2d26c5cf37ed016cc0324187a25ca279230d812
         | 
| 7 | 
            +
              data.tar.gz: 0bc9f01659560c83b0289bf63119849135b7ec27520dd03c7abd645da99ef660ca4b5fd12301b359cd5cc45a82914d7ceae88ad93ad756fde166718b3d0fe6c2
         | 
    
        data/Gemfile.lock
    CHANGED
    
    
    
        data/lib/twitterscraper/cache.rb
    CHANGED
    
    | @@ -4,7 +4,7 @@ require 'digest/md5' | |
| 4 4 | 
             
            module Twitterscraper
         | 
| 5 5 | 
             
              class Cache
         | 
| 6 6 | 
             
                def initialize()
         | 
| 7 | 
            -
                  @ttl =  | 
| 7 | 
            +
                  @ttl = 86400 # 1 day
         | 
| 8 8 | 
             
                  @dir = 'cache'
         | 
| 9 9 | 
             
                  Dir.mkdir(@dir) unless File.exist?(@dir)
         | 
| 10 10 | 
             
                end
         | 
| @@ -25,6 +25,12 @@ module Twitterscraper | |
| 25 25 | 
             
                  File.write(file, entry.to_json)
         | 
| 26 26 | 
             
                end
         | 
| 27 27 |  | 
| 28 | 
            +
                def delete(key)
         | 
| 29 | 
            +
                  key = cache_key(key)
         | 
| 30 | 
            +
                  file = File.join(@dir, key)
         | 
| 31 | 
            +
                  File.delete(file) if File.exist?(file)
         | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
             | 
| 28 34 | 
             
                def fetch(key, &block)
         | 
| 29 35 | 
             
                  if (value = read(key))
         | 
| 30 36 | 
             
                    value
         | 
    
        data/lib/twitterscraper/cli.rb
    CHANGED
    
    | @@ -27,16 +27,16 @@ module Twitterscraper | |
| 27 27 | 
             
                  }
         | 
| 28 28 | 
             
                  client = Twitterscraper::Client.new(cache: options['cache'], proxy: options['proxy'])
         | 
| 29 29 | 
             
                  tweets = client.query_tweets(options['query'], query_options)
         | 
| 30 | 
            -
                  export(tweets) unless tweets.empty?
         | 
| 30 | 
            +
                  export(options['query'], tweets) unless tweets.empty?
         | 
| 31 31 | 
             
                end
         | 
| 32 32 |  | 
| 33 | 
            -
                def export(tweets)
         | 
| 33 | 
            +
                def export(name, tweets)
         | 
| 34 34 | 
             
                  write_json = lambda { File.write(options['output'], generate_json(tweets)) }
         | 
| 35 35 |  | 
| 36 36 | 
             
                  if options['format'] == 'json'
         | 
| 37 37 | 
             
                    write_json.call
         | 
| 38 38 | 
             
                  elsif options['format'] == 'html'
         | 
| 39 | 
            -
                    File.write(' | 
| 39 | 
            +
                    File.write(options['output'], Template.new.tweets_embedded_html(name, tweets, options))
         | 
| 40 40 | 
             
                  else
         | 
| 41 41 | 
             
                    write_json.call
         | 
| 42 42 | 
             
                  end
         | 
    
        data/lib/twitterscraper/query.rb
    CHANGED
    
    | @@ -69,7 +69,6 @@ module Twitterscraper | |
| 69 69 | 
             
                  else
         | 
| 70 70 | 
             
                    json_resp = JSON.parse(text)
         | 
| 71 71 | 
             
                    items_html = json_resp['items_html'] || ''
         | 
| 72 | 
            -
                    logger.warn json_resp['message'] if json_resp['message'] # Sorry, you are rate limited.
         | 
| 73 72 | 
             
                  end
         | 
| 74 73 |  | 
| 75 74 | 
             
                  [items_html, json_resp]
         | 
| @@ -100,6 +99,12 @@ module Twitterscraper | |
| 100 99 |  | 
| 101 100 | 
             
                  html, json_resp = parse_single_page(response, pos.nil?)
         | 
| 102 101 |  | 
| 102 | 
            +
                  if json_resp && json_resp['message']
         | 
| 103 | 
            +
                    logger.warn json_resp['message'] # Sorry, you are rate limited.
         | 
| 104 | 
            +
                    @stop_requested = true
         | 
| 105 | 
            +
                    Cache.new.delete(url) if cache_enabled?
         | 
| 106 | 
            +
                  end
         | 
| 107 | 
            +
             | 
| 103 108 | 
             
                  tweets = Tweet.from_html(html)
         | 
| 104 109 |  | 
| 105 110 | 
             
                  if tweets.empty?
         | 
| @@ -140,19 +145,27 @@ module Twitterscraper | |
| 140 145 | 
             
                      raise Error.new(":start_date must be greater than or equal to #{OLDEST_DATE}")
         | 
| 141 146 | 
             
                    end
         | 
| 142 147 | 
             
                  end
         | 
| 143 | 
            -
             | 
| 144 | 
            -
                  if end_date
         | 
| 145 | 
            -
                    today = Date.today
         | 
| 146 | 
            -
                    if end_date > Date.today
         | 
| 147 | 
            -
                      raise Error.new(":end_date must be less than or equal to today(#{today})")
         | 
| 148 | 
            -
                    end
         | 
| 149 | 
            -
                  end
         | 
| 150 148 | 
             
                end
         | 
| 151 149 |  | 
| 152 150 | 
             
                def build_queries(query, start_date, end_date)
         | 
| 153 151 | 
             
                  if start_date && end_date
         | 
| 154 | 
            -
                    date_range = start_date.upto(end_date - 1)
         | 
| 155 | 
            -
                    date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
         | 
| 152 | 
            +
                    # date_range = start_date.upto(end_date - 1)
         | 
| 153 | 
            +
                    # date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                    queries = []
         | 
| 156 | 
            +
                    time = Time.utc(start_date.year, start_date.month, start_date.day, 0, 0, 0)
         | 
| 157 | 
            +
                    end_time = Time.utc(end_date.year, end_date.month, end_date.day, 0, 0, 0)
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                    while true
         | 
| 160 | 
            +
                      if time < Time.now.utc
         | 
| 161 | 
            +
                        queries << (query + " since:#{time.strftime('%Y-%m-%d_%H:00:00')}_UTC until:#{(time + 3600).strftime('%Y-%m-%d_%H:00:00')}_UTC")
         | 
| 162 | 
            +
                      end
         | 
| 163 | 
            +
                      time += 3600
         | 
| 164 | 
            +
                      break if time >= end_time
         | 
| 165 | 
            +
                    end
         | 
| 166 | 
            +
             | 
| 167 | 
            +
                    queries
         | 
| 168 | 
            +
             | 
| 156 169 | 
             
                  elsif start_date
         | 
| 157 170 | 
             
                    [query + " since:#{start_date}"]
         | 
| 158 171 | 
             
                  elsif end_date
         | 
| @@ -202,7 +215,6 @@ module Twitterscraper | |
| 202 215 | 
             
                  queries = build_queries(query, start_date, end_date)
         | 
| 203 216 | 
             
                  type = Type.new(type)
         | 
| 204 217 | 
             
                  if threads > queries.size
         | 
| 205 | 
            -
                    logger.warn 'The maximum number of :threads is the number of dates between :start_date and :end_date.'
         | 
| 206 218 | 
             
                    threads = queries.size
         | 
| 207 219 | 
             
                  end
         | 
| 208 220 | 
             
                  if proxy_enabled?
         | 
| @@ -214,7 +226,6 @@ module Twitterscraper | |
| 214 226 | 
             
                  end
         | 
| 215 227 | 
             
                  logger.debug "Cache #{cache_enabled? ? 'enabled' : 'disabled'}"
         | 
| 216 228 |  | 
| 217 | 
            -
             | 
| 218 229 | 
             
                  validate_options!(queries, type: type, start_date: start_date, end_date: end_date, lang: lang, limit: limit, threads: threads)
         | 
| 219 230 |  | 
| 220 231 | 
             
                  logger.info "The number of threads #{threads}"
         | 
| @@ -1,48 +1,30 @@ | |
| 1 1 | 
             
            module Twitterscraper
         | 
| 2 | 
            -
               | 
| 3 | 
            -
                 | 
| 2 | 
            +
              class Template
         | 
| 3 | 
            +
                def tweets_embedded_html(name, tweets, options)
         | 
| 4 | 
            +
                  path = File.join(File.dirname(__FILE__), 'template/tweets.html.erb')
         | 
| 5 | 
            +
                  template = ERB.new(File.read(path))
         | 
| 4 6 |  | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 7 | 
            +
                  template.result_with_hash(
         | 
| 8 | 
            +
                      chart_name: name,
         | 
| 9 | 
            +
                      chart_data: chart_data(tweets).to_json,
         | 
| 10 | 
            +
                      first_tweet: tweets.sort_by { |t| t.created_at.to_i }[0],
         | 
| 11 | 
            +
                      last_tweet: tweets.sort_by { |t| t.created_at.to_i }[-1],
         | 
| 12 | 
            +
                      tweets_size: tweets.size,
         | 
| 13 | 
            +
                      tweets: tweets.take(50)
         | 
| 14 | 
            +
                  )
         | 
| 8 15 | 
             
                end
         | 
| 9 16 |  | 
| 10 | 
            -
                 | 
| 11 | 
            -
                   | 
| 12 | 
            -
                     | 
| 13 | 
            -
             | 
| 14 | 
            -
             | 
| 17 | 
            +
                def chart_data(tweets)
         | 
| 18 | 
            +
                  data = tweets.each_with_object(Hash.new(0)) do |tweet, memo|
         | 
| 19 | 
            +
                    t = tweet.created_at
         | 
| 20 | 
            +
                    min = (t.min.to_f / 5).floor * 5
         | 
| 21 | 
            +
                    time = Time.new(t.year, t.month, t.day, t.hour, min, 0, '+00:00')
         | 
| 22 | 
            +
                    memo[time.to_i] += 1
         | 
| 23 | 
            +
                  end
         | 
| 15 24 |  | 
| 16 | 
            -
             | 
| 17 | 
            -
             | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 20 | 
            -
                        .twitter-tweet {
         | 
| 21 | 
            -
                          margin: 30px auto 0 auto !important;
         | 
| 22 | 
            -
                        }
         | 
| 23 | 
            -
                      </style>
         | 
| 24 | 
            -
                      <script>
         | 
| 25 | 
            -
                        window.twttr = (function(d, s, id) {
         | 
| 26 | 
            -
                          var js, fjs = d.getElementsByTagName(s)[0], t = window.twttr || {};
         | 
| 27 | 
            -
                          if (d.getElementById(id)) return t;
         | 
| 28 | 
            -
                          js = d.createElement(s);
         | 
| 29 | 
            -
                          js.id = id;
         | 
| 30 | 
            -
                          js.src = "https://platform.twitter.com/widgets.js";
         | 
| 31 | 
            -
                          fjs.parentNode.insertBefore(js, fjs);
         | 
| 32 | 
            -
             | 
| 33 | 
            -
                          t._e = [];
         | 
| 34 | 
            -
                          t.ready = function(f) {
         | 
| 35 | 
            -
                              t._e.push(f);
         | 
| 36 | 
            -
                          };
         | 
| 37 | 
            -
             | 
| 38 | 
            -
                          return t;
         | 
| 39 | 
            -
                        }(document, "script", "twitter-wjs"));
         | 
| 40 | 
            -
                      </script>
         | 
| 41 | 
            -
                    </head>
         | 
| 42 | 
            -
                    <body>
         | 
| 43 | 
            -
                      __TWEETS__
         | 
| 44 | 
            -
                    </body>
         | 
| 45 | 
            -
                  </html>
         | 
| 46 | 
            -
                HTML
         | 
| 25 | 
            +
                  data.sort_by { |k, v| k }.map do |timestamp, count|
         | 
| 26 | 
            +
                    [timestamp * 1000, count]
         | 
| 27 | 
            +
                  end
         | 
| 28 | 
            +
                end
         | 
| 47 29 | 
             
              end
         | 
| 48 30 | 
             
            end
         | 
| @@ -0,0 +1,82 @@ | |
| 1 | 
            +
            <html>
         | 
| 2 | 
            +
            <head>
         | 
| 3 | 
            +
              <script>
         | 
| 4 | 
            +
                  window.twttr = (function (d, s, id) {
         | 
| 5 | 
            +
                      var js, fjs = d.getElementsByTagName(s)[0], t = window.twttr || {};
         | 
| 6 | 
            +
                      if (d.getElementById(id)) return t;
         | 
| 7 | 
            +
                      js = d.createElement(s);
         | 
| 8 | 
            +
                      js.id = id;
         | 
| 9 | 
            +
                      js.src = "https://platform.twitter.com/widgets.js";
         | 
| 10 | 
            +
                      fjs.parentNode.insertBefore(js, fjs);
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                      t._e = [];
         | 
| 13 | 
            +
                      t.ready = function (f) {
         | 
| 14 | 
            +
                          t._e.push(f);
         | 
| 15 | 
            +
                      };
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                      return t;
         | 
| 18 | 
            +
                  }(document, "script", "twitter-wjs"));
         | 
| 19 | 
            +
              </script>
         | 
| 20 | 
            +
             | 
| 21 | 
            +
              <script src="https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.27.0/moment.min.js" integrity="sha512-rmZcZsyhe0/MAjquhTgiUcb4d9knaFc7b5xAfju483gbEXTkeJRUMIPk6s3ySZMYUHEcjKbjLjyddGWMrNEvZg==" crossorigin="anonymous"></script>
         | 
| 22 | 
            +
              <script src="https://cdnjs.cloudflare.com/ajax/libs/moment-timezone/0.5.31/moment-timezone-with-data.min.js" integrity="sha512-HZcf3uHWA+Y2P5KNv+F/xa87/flKVP92kUTe/KXjU8URPshczF1Dx+cL5bw0VBGhmqWAK0UbhcqxBbyiNtAnWQ==" crossorigin="anonymous"></script>
         | 
| 23 | 
            +
              <script src="https://code.highcharts.com/stock/highstock.js"></script>
         | 
| 24 | 
            +
              <script>
         | 
| 25 | 
            +
                  function drawChart() {
         | 
| 26 | 
            +
                      Highcharts.setOptions({
         | 
| 27 | 
            +
                          time: {
         | 
| 28 | 
            +
                              timezone: moment.tz.guess()
         | 
| 29 | 
            +
                          }
         | 
| 30 | 
            +
                      });
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                      Highcharts.stockChart('chart', {
         | 
| 33 | 
            +
                          title: {
         | 
| 34 | 
            +
                              text: '<%= tweets_size %> tweets of <%= chart_name %>'
         | 
| 35 | 
            +
                          },
         | 
| 36 | 
            +
                          subtitle: {
         | 
| 37 | 
            +
                              text: 'since:<%= first_tweet.created_at.localtime %> until:<%= last_tweet.created_at.localtime %>'
         | 
| 38 | 
            +
                          },
         | 
| 39 | 
            +
                          series: [{
         | 
| 40 | 
            +
                              data: <%= chart_data %>
         | 
| 41 | 
            +
                          }],
         | 
| 42 | 
            +
                          rangeSelector: {enabled: false},
         | 
| 43 | 
            +
                          scrollbar: {enabled: false},
         | 
| 44 | 
            +
                          navigator: {enabled: false},
         | 
| 45 | 
            +
                          exporting: {enabled: false},
         | 
| 46 | 
            +
                          credits: {enabled: false}
         | 
| 47 | 
            +
                      });
         | 
| 48 | 
            +
                  }
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                  document.addEventListener("DOMContentLoaded", function () {
         | 
| 51 | 
            +
                      drawChart();
         | 
| 52 | 
            +
                  });
         | 
| 53 | 
            +
              </script>
         | 
| 54 | 
            +
             | 
| 55 | 
            +
              <style type=text/css>
         | 
| 56 | 
            +
                .tweets-container {
         | 
| 57 | 
            +
                  max-width: 550px;
         | 
| 58 | 
            +
                  margin: 0 auto 0 auto;
         | 
| 59 | 
            +
                }
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                .twitter-tweet {
         | 
| 62 | 
            +
                  margin: 15px 0 15px 0 !important;
         | 
| 63 | 
            +
                }
         | 
| 64 | 
            +
              </style>
         | 
| 65 | 
            +
            </head>
         | 
| 66 | 
            +
            <body>
         | 
| 67 | 
            +
            <div id="chart"></div>
         | 
| 68 | 
            +
             | 
| 69 | 
            +
            <div class="tweets-container">
         | 
| 70 | 
            +
              <% tweets.each do |tweet| %>
         | 
| 71 | 
            +
                <blockquote class="twitter-tweet">
         | 
| 72 | 
            +
                  <a href="<%= tweet.tweet_url %>"></a>
         | 
| 73 | 
            +
                </blockquote>
         | 
| 74 | 
            +
              <% end %>
         | 
| 75 | 
            +
             | 
| 76 | 
            +
              <% if tweets_size > tweets.size %>
         | 
| 77 | 
            +
                <div>and more!</div>
         | 
| 78 | 
            +
              <% end %>
         | 
| 79 | 
            +
            </div>
         | 
| 80 | 
            +
             | 
| 81 | 
            +
            </body>
         | 
| 82 | 
            +
            </html>
         | 
    
        data/lib/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: twitterscraper-ruby
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.16.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - ts-3156
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2020-07- | 
| 11 | 
            +
            date: 2020-07-18 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: nokogiri
         | 
| @@ -72,6 +72,7 @@ files: | |
| 72 72 | 
             
            - lib/twitterscraper/proxy.rb
         | 
| 73 73 | 
             
            - lib/twitterscraper/query.rb
         | 
| 74 74 | 
             
            - lib/twitterscraper/template.rb
         | 
| 75 | 
            +
            - lib/twitterscraper/template/tweets.html.erb
         | 
| 75 76 | 
             
            - lib/twitterscraper/tweet.rb
         | 
| 76 77 | 
             
            - lib/twitterscraper/type.rb
         | 
| 77 78 | 
             
            - lib/version.rb
         |