twitterscraper-ruby 0.15.2 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7f7d320841125d9a582ece6083f421f0abf301addbc5c5c2a3d2b2c09bedbc33
4
- data.tar.gz: 6ea43165ffa4f37c4319566689a42f2f275d8a70402b0d6b4164df519fee90b5
3
+ metadata.gz: 66dda5275a9067d328f6637f127895ded954534d304e5e4b349f286a271a08d8
4
+ data.tar.gz: 6c3ffb3fba82376fc2de49514245ea96c7cb4fa16c32dcd2fff1ab1ae327bd14
5
5
  SHA512:
6
- metadata.gz: ee3756538ec28e9f0113e611e2731ec33107dabacf7cb730b257d6c94351407ef171a9bc91402a589fa73fdb6b705f73b11582766af1d04a3413b8bc79dc6619
7
- data.tar.gz: 78200dc658a9c1cf43ed7367e499b0d1b243728aecb2ffd7366b5612f8905bb33d27ab7e1412327d05b7fff159196fe9e24d18c8cc4c24898af10533fbdf43df
6
+ metadata.gz: 24267284f4f29adc86d5bbe70a30bbe31d6d898546576065f1a9accafc3944a352117bbf6eb0de273743a00fb2d26c5cf37ed016cc0324187a25ca279230d812
7
+ data.tar.gz: 0bc9f01659560c83b0289bf63119849135b7ec27520dd03c7abd645da99ef660ca4b5fd12301b359cd5cc45a82914d7ceae88ad93ad756fde166718b3d0fe6c2
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- twitterscraper-ruby (0.15.2)
4
+ twitterscraper-ruby (0.16.0)
5
5
  nokogiri
6
6
  parallel
7
7
 
@@ -4,7 +4,7 @@ require 'digest/md5'
4
4
  module Twitterscraper
5
5
  class Cache
6
6
  def initialize()
7
- @ttl = 3600 # 1 hour
7
+ @ttl = 86400 # 1 day
8
8
  @dir = 'cache'
9
9
  Dir.mkdir(@dir) unless File.exist?(@dir)
10
10
  end
@@ -25,6 +25,12 @@ module Twitterscraper
25
25
  File.write(file, entry.to_json)
26
26
  end
27
27
 
28
+ def delete(key)
29
+ key = cache_key(key)
30
+ file = File.join(@dir, key)
31
+ File.delete(file) if File.exist?(file)
32
+ end
33
+
28
34
  def fetch(key, &block)
29
35
  if (value = read(key))
30
36
  value
@@ -27,16 +27,16 @@ module Twitterscraper
27
27
  }
28
28
  client = Twitterscraper::Client.new(cache: options['cache'], proxy: options['proxy'])
29
29
  tweets = client.query_tweets(options['query'], query_options)
30
- export(tweets) unless tweets.empty?
30
+ export(options['query'], tweets) unless tweets.empty?
31
31
  end
32
32
 
33
- def export(tweets)
33
+ def export(name, tweets)
34
34
  write_json = lambda { File.write(options['output'], generate_json(tweets)) }
35
35
 
36
36
  if options['format'] == 'json'
37
37
  write_json.call
38
38
  elsif options['format'] == 'html'
39
- File.write('tweets.html', Template.tweets_embedded_html(tweets))
39
+ File.write(options['output'], Template.new.tweets_embedded_html(name, tweets, options))
40
40
  else
41
41
  write_json.call
42
42
  end
@@ -69,7 +69,6 @@ module Twitterscraper
69
69
  else
70
70
  json_resp = JSON.parse(text)
71
71
  items_html = json_resp['items_html'] || ''
72
- logger.warn json_resp['message'] if json_resp['message'] # Sorry, you are rate limited.
73
72
  end
74
73
 
75
74
  [items_html, json_resp]
@@ -100,6 +99,12 @@ module Twitterscraper
100
99
 
101
100
  html, json_resp = parse_single_page(response, pos.nil?)
102
101
 
102
+ if json_resp && json_resp['message']
103
+ logger.warn json_resp['message'] # Sorry, you are rate limited.
104
+ @stop_requested = true
105
+ Cache.new.delete(url) if cache_enabled?
106
+ end
107
+
103
108
  tweets = Tweet.from_html(html)
104
109
 
105
110
  if tweets.empty?
@@ -140,19 +145,27 @@ module Twitterscraper
140
145
  raise Error.new(":start_date must be greater than or equal to #{OLDEST_DATE}")
141
146
  end
142
147
  end
143
-
144
- if end_date
145
- today = Date.today
146
- if end_date > Date.today
147
- raise Error.new(":end_date must be less than or equal to today(#{today})")
148
- end
149
- end
150
148
  end
151
149
 
152
150
  def build_queries(query, start_date, end_date)
153
151
  if start_date && end_date
154
- date_range = start_date.upto(end_date - 1)
155
- date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
152
+ # date_range = start_date.upto(end_date - 1)
153
+ # date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
154
+
155
+ queries = []
156
+ time = Time.utc(start_date.year, start_date.month, start_date.day, 0, 0, 0)
157
+ end_time = Time.utc(end_date.year, end_date.month, end_date.day, 0, 0, 0)
158
+
159
+ while true
160
+ if time < Time.now.utc
161
+ queries << (query + " since:#{time.strftime('%Y-%m-%d_%H:00:00')}_UTC until:#{(time + 3600).strftime('%Y-%m-%d_%H:00:00')}_UTC")
162
+ end
163
+ time += 3600
164
+ break if time >= end_time
165
+ end
166
+
167
+ queries
168
+
156
169
  elsif start_date
157
170
  [query + " since:#{start_date}"]
158
171
  elsif end_date
@@ -202,7 +215,6 @@ module Twitterscraper
202
215
  queries = build_queries(query, start_date, end_date)
203
216
  type = Type.new(type)
204
217
  if threads > queries.size
205
- logger.warn 'The maximum number of :threads is the number of dates between :start_date and :end_date.'
206
218
  threads = queries.size
207
219
  end
208
220
  if proxy_enabled?
@@ -214,7 +226,6 @@ module Twitterscraper
214
226
  end
215
227
  logger.debug "Cache #{cache_enabled? ? 'enabled' : 'disabled'}"
216
228
 
217
-
218
229
  validate_options!(queries, type: type, start_date: start_date, end_date: end_date, lang: lang, limit: limit, threads: threads)
219
230
 
220
231
  logger.info "The number of threads #{threads}"
@@ -1,48 +1,30 @@
1
1
  module Twitterscraper
2
- module Template
3
- module_function
2
+ class Template
3
+ def tweets_embedded_html(name, tweets, options)
4
+ path = File.join(File.dirname(__FILE__), 'template/tweets.html.erb')
5
+ template = ERB.new(File.read(path))
4
6
 
5
- def tweets_embedded_html(tweets)
6
- tweets_html = tweets.map { |t| EMBED_TWEET_HTML.sub('__TWEET_URL__', t.tweet_url) }
7
- EMBED_TWEETS_HTML.sub('__TWEETS__', tweets_html.join)
7
+ template.result_with_hash(
8
+ chart_name: name,
9
+ chart_data: chart_data(tweets).to_json,
10
+ first_tweet: tweets.sort_by { |t| t.created_at.to_i }[0],
11
+ last_tweet: tweets.sort_by { |t| t.created_at.to_i }[-1],
12
+ tweets_size: tweets.size,
13
+ tweets: tweets.take(50)
14
+ )
8
15
  end
9
16
 
10
- EMBED_TWEET_HTML = <<~'HTML'
11
- <blockquote class="twitter-tweet">
12
- <a href="__TWEET_URL__"></a>
13
- </blockquote>
14
- HTML
17
+ def chart_data(tweets)
18
+ data = tweets.each_with_object(Hash.new(0)) do |tweet, memo|
19
+ t = tweet.created_at
20
+ min = (t.min.to_f / 5).floor * 5
21
+ time = Time.new(t.year, t.month, t.day, t.hour, min, 0, '+00:00')
22
+ memo[time.to_i] += 1
23
+ end
15
24
 
16
- EMBED_TWEETS_HTML = <<~'HTML'
17
- <html>
18
- <head>
19
- <style type=text/css>
20
- .twitter-tweet {
21
- margin: 30px auto 0 auto !important;
22
- }
23
- </style>
24
- <script>
25
- window.twttr = (function(d, s, id) {
26
- var js, fjs = d.getElementsByTagName(s)[0], t = window.twttr || {};
27
- if (d.getElementById(id)) return t;
28
- js = d.createElement(s);
29
- js.id = id;
30
- js.src = "https://platform.twitter.com/widgets.js";
31
- fjs.parentNode.insertBefore(js, fjs);
32
-
33
- t._e = [];
34
- t.ready = function(f) {
35
- t._e.push(f);
36
- };
37
-
38
- return t;
39
- }(document, "script", "twitter-wjs"));
40
- </script>
41
- </head>
42
- <body>
43
- __TWEETS__
44
- </body>
45
- </html>
46
- HTML
25
+ data.sort_by { |k, v| k }.map do |timestamp, count|
26
+ [timestamp * 1000, count]
27
+ end
28
+ end
47
29
  end
48
30
  end
@@ -0,0 +1,82 @@
1
+ <html>
2
+ <head>
3
+ <script>
4
+ window.twttr = (function (d, s, id) {
5
+ var js, fjs = d.getElementsByTagName(s)[0], t = window.twttr || {};
6
+ if (d.getElementById(id)) return t;
7
+ js = d.createElement(s);
8
+ js.id = id;
9
+ js.src = "https://platform.twitter.com/widgets.js";
10
+ fjs.parentNode.insertBefore(js, fjs);
11
+
12
+ t._e = [];
13
+ t.ready = function (f) {
14
+ t._e.push(f);
15
+ };
16
+
17
+ return t;
18
+ }(document, "script", "twitter-wjs"));
19
+ </script>
20
+
21
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.27.0/moment.min.js" integrity="sha512-rmZcZsyhe0/MAjquhTgiUcb4d9knaFc7b5xAfju483gbEXTkeJRUMIPk6s3ySZMYUHEcjKbjLjyddGWMrNEvZg==" crossorigin="anonymous"></script>
22
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/moment-timezone/0.5.31/moment-timezone-with-data.min.js" integrity="sha512-HZcf3uHWA+Y2P5KNv+F/xa87/flKVP92kUTe/KXjU8URPshczF1Dx+cL5bw0VBGhmqWAK0UbhcqxBbyiNtAnWQ==" crossorigin="anonymous"></script>
23
+ <script src="https://code.highcharts.com/stock/highstock.js"></script>
24
+ <script>
25
+ function drawChart() {
26
+ Highcharts.setOptions({
27
+ time: {
28
+ timezone: moment.tz.guess()
29
+ }
30
+ });
31
+
32
+ Highcharts.stockChart('chart', {
33
+ title: {
34
+ text: '<%= tweets_size %> tweets of <%= chart_name %>'
35
+ },
36
+ subtitle: {
37
+ text: 'since:<%= first_tweet.created_at.localtime %> until:<%= last_tweet.created_at.localtime %>'
38
+ },
39
+ series: [{
40
+ data: <%= chart_data %>
41
+ }],
42
+ rangeSelector: {enabled: false},
43
+ scrollbar: {enabled: false},
44
+ navigator: {enabled: false},
45
+ exporting: {enabled: false},
46
+ credits: {enabled: false}
47
+ });
48
+ }
49
+
50
+ document.addEventListener("DOMContentLoaded", function () {
51
+ drawChart();
52
+ });
53
+ </script>
54
+
55
+ <style type=text/css>
56
+ .tweets-container {
57
+ max-width: 550px;
58
+ margin: 0 auto 0 auto;
59
+ }
60
+
61
+ .twitter-tweet {
62
+ margin: 15px 0 15px 0 !important;
63
+ }
64
+ </style>
65
+ </head>
66
+ <body>
67
+ <div id="chart"></div>
68
+
69
+ <div class="tweets-container">
70
+ <% tweets.each do |tweet| %>
71
+ <blockquote class="twitter-tweet">
72
+ <a href="<%= tweet.tweet_url %>"></a>
73
+ </blockquote>
74
+ <% end %>
75
+
76
+ <% if tweets_size > tweets.size %>
77
+ <div>and more!</div>
78
+ <% end %>
79
+ </div>
80
+
81
+ </body>
82
+ </html>
@@ -1,3 +1,3 @@
1
1
  module Twitterscraper
2
- VERSION = '0.15.2'
2
+ VERSION = '0.16.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitterscraper-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.2
4
+ version: 0.16.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ts-3156
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-17 00:00:00.000000000 Z
11
+ date: 2020-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -72,6 +72,7 @@ files:
72
72
  - lib/twitterscraper/proxy.rb
73
73
  - lib/twitterscraper/query.rb
74
74
  - lib/twitterscraper/template.rb
75
+ - lib/twitterscraper/template/tweets.html.erb
75
76
  - lib/twitterscraper/tweet.rb
76
77
  - lib/twitterscraper/type.rb
77
78
  - lib/version.rb