twitterscraper-ruby 0.15.2 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7f7d320841125d9a582ece6083f421f0abf301addbc5c5c2a3d2b2c09bedbc33
4
- data.tar.gz: 6ea43165ffa4f37c4319566689a42f2f275d8a70402b0d6b4164df519fee90b5
3
+ metadata.gz: 66dda5275a9067d328f6637f127895ded954534d304e5e4b349f286a271a08d8
4
+ data.tar.gz: 6c3ffb3fba82376fc2de49514245ea96c7cb4fa16c32dcd2fff1ab1ae327bd14
5
5
  SHA512:
6
- metadata.gz: ee3756538ec28e9f0113e611e2731ec33107dabacf7cb730b257d6c94351407ef171a9bc91402a589fa73fdb6b705f73b11582766af1d04a3413b8bc79dc6619
7
- data.tar.gz: 78200dc658a9c1cf43ed7367e499b0d1b243728aecb2ffd7366b5612f8905bb33d27ab7e1412327d05b7fff159196fe9e24d18c8cc4c24898af10533fbdf43df
6
+ metadata.gz: 24267284f4f29adc86d5bbe70a30bbe31d6d898546576065f1a9accafc3944a352117bbf6eb0de273743a00fb2d26c5cf37ed016cc0324187a25ca279230d812
7
+ data.tar.gz: 0bc9f01659560c83b0289bf63119849135b7ec27520dd03c7abd645da99ef660ca4b5fd12301b359cd5cc45a82914d7ceae88ad93ad756fde166718b3d0fe6c2
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- twitterscraper-ruby (0.15.2)
4
+ twitterscraper-ruby (0.16.0)
5
5
  nokogiri
6
6
  parallel
7
7
 
@@ -4,7 +4,7 @@ require 'digest/md5'
4
4
  module Twitterscraper
5
5
  class Cache
6
6
  def initialize()
7
- @ttl = 3600 # 1 hour
7
+ @ttl = 86400 # 1 day
8
8
  @dir = 'cache'
9
9
  Dir.mkdir(@dir) unless File.exist?(@dir)
10
10
  end
@@ -25,6 +25,12 @@ module Twitterscraper
25
25
  File.write(file, entry.to_json)
26
26
  end
27
27
 
28
+ def delete(key)
29
+ key = cache_key(key)
30
+ file = File.join(@dir, key)
31
+ File.delete(file) if File.exist?(file)
32
+ end
33
+
28
34
  def fetch(key, &block)
29
35
  if (value = read(key))
30
36
  value
@@ -27,16 +27,16 @@ module Twitterscraper
27
27
  }
28
28
  client = Twitterscraper::Client.new(cache: options['cache'], proxy: options['proxy'])
29
29
  tweets = client.query_tweets(options['query'], query_options)
30
- export(tweets) unless tweets.empty?
30
+ export(options['query'], tweets) unless tweets.empty?
31
31
  end
32
32
 
33
- def export(tweets)
33
+ def export(name, tweets)
34
34
  write_json = lambda { File.write(options['output'], generate_json(tweets)) }
35
35
 
36
36
  if options['format'] == 'json'
37
37
  write_json.call
38
38
  elsif options['format'] == 'html'
39
- File.write('tweets.html', Template.tweets_embedded_html(tweets))
39
+ File.write(options['output'], Template.new.tweets_embedded_html(name, tweets, options))
40
40
  else
41
41
  write_json.call
42
42
  end
@@ -69,7 +69,6 @@ module Twitterscraper
69
69
  else
70
70
  json_resp = JSON.parse(text)
71
71
  items_html = json_resp['items_html'] || ''
72
- logger.warn json_resp['message'] if json_resp['message'] # Sorry, you are rate limited.
73
72
  end
74
73
 
75
74
  [items_html, json_resp]
@@ -100,6 +99,12 @@ module Twitterscraper
100
99
 
101
100
  html, json_resp = parse_single_page(response, pos.nil?)
102
101
 
102
+ if json_resp && json_resp['message']
103
+ logger.warn json_resp['message'] # Sorry, you are rate limited.
104
+ @stop_requested = true
105
+ Cache.new.delete(url) if cache_enabled?
106
+ end
107
+
103
108
  tweets = Tweet.from_html(html)
104
109
 
105
110
  if tweets.empty?
@@ -140,19 +145,27 @@ module Twitterscraper
140
145
  raise Error.new(":start_date must be greater than or equal to #{OLDEST_DATE}")
141
146
  end
142
147
  end
143
-
144
- if end_date
145
- today = Date.today
146
- if end_date > Date.today
147
- raise Error.new(":end_date must be less than or equal to today(#{today})")
148
- end
149
- end
150
148
  end
151
149
 
152
150
  def build_queries(query, start_date, end_date)
153
151
  if start_date && end_date
154
- date_range = start_date.upto(end_date - 1)
155
- date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
152
+ # date_range = start_date.upto(end_date - 1)
153
+ # date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
154
+
155
+ queries = []
156
+ time = Time.utc(start_date.year, start_date.month, start_date.day, 0, 0, 0)
157
+ end_time = Time.utc(end_date.year, end_date.month, end_date.day, 0, 0, 0)
158
+
159
+ while true
160
+ if time < Time.now.utc
161
+ queries << (query + " since:#{time.strftime('%Y-%m-%d_%H:00:00')}_UTC until:#{(time + 3600).strftime('%Y-%m-%d_%H:00:00')}_UTC")
162
+ end
163
+ time += 3600
164
+ break if time >= end_time
165
+ end
166
+
167
+ queries
168
+
156
169
  elsif start_date
157
170
  [query + " since:#{start_date}"]
158
171
  elsif end_date
@@ -202,7 +215,6 @@ module Twitterscraper
202
215
  queries = build_queries(query, start_date, end_date)
203
216
  type = Type.new(type)
204
217
  if threads > queries.size
205
- logger.warn 'The maximum number of :threads is the number of dates between :start_date and :end_date.'
206
218
  threads = queries.size
207
219
  end
208
220
  if proxy_enabled?
@@ -214,7 +226,6 @@ module Twitterscraper
214
226
  end
215
227
  logger.debug "Cache #{cache_enabled? ? 'enabled' : 'disabled'}"
216
228
 
217
-
218
229
  validate_options!(queries, type: type, start_date: start_date, end_date: end_date, lang: lang, limit: limit, threads: threads)
219
230
 
220
231
  logger.info "The number of threads #{threads}"
@@ -1,48 +1,30 @@
1
1
  module Twitterscraper
2
- module Template
3
- module_function
2
+ class Template
3
+ def tweets_embedded_html(name, tweets, options)
4
+ path = File.join(File.dirname(__FILE__), 'template/tweets.html.erb')
5
+ template = ERB.new(File.read(path))
4
6
 
5
- def tweets_embedded_html(tweets)
6
- tweets_html = tweets.map { |t| EMBED_TWEET_HTML.sub('__TWEET_URL__', t.tweet_url) }
7
- EMBED_TWEETS_HTML.sub('__TWEETS__', tweets_html.join)
7
+ template.result_with_hash(
8
+ chart_name: name,
9
+ chart_data: chart_data(tweets).to_json,
10
+ first_tweet: tweets.sort_by { |t| t.created_at.to_i }[0],
11
+ last_tweet: tweets.sort_by { |t| t.created_at.to_i }[-1],
12
+ tweets_size: tweets.size,
13
+ tweets: tweets.take(50)
14
+ )
8
15
  end
9
16
 
10
- EMBED_TWEET_HTML = <<~'HTML'
11
- <blockquote class="twitter-tweet">
12
- <a href="__TWEET_URL__"></a>
13
- </blockquote>
14
- HTML
17
+ def chart_data(tweets)
18
+ data = tweets.each_with_object(Hash.new(0)) do |tweet, memo|
19
+ t = tweet.created_at
20
+ min = (t.min.to_f / 5).floor * 5
21
+ time = Time.new(t.year, t.month, t.day, t.hour, min, 0, '+00:00')
22
+ memo[time.to_i] += 1
23
+ end
15
24
 
16
- EMBED_TWEETS_HTML = <<~'HTML'
17
- <html>
18
- <head>
19
- <style type=text/css>
20
- .twitter-tweet {
21
- margin: 30px auto 0 auto !important;
22
- }
23
- </style>
24
- <script>
25
- window.twttr = (function(d, s, id) {
26
- var js, fjs = d.getElementsByTagName(s)[0], t = window.twttr || {};
27
- if (d.getElementById(id)) return t;
28
- js = d.createElement(s);
29
- js.id = id;
30
- js.src = "https://platform.twitter.com/widgets.js";
31
- fjs.parentNode.insertBefore(js, fjs);
32
-
33
- t._e = [];
34
- t.ready = function(f) {
35
- t._e.push(f);
36
- };
37
-
38
- return t;
39
- }(document, "script", "twitter-wjs"));
40
- </script>
41
- </head>
42
- <body>
43
- __TWEETS__
44
- </body>
45
- </html>
46
- HTML
25
+ data.sort_by { |k, v| k }.map do |timestamp, count|
26
+ [timestamp * 1000, count]
27
+ end
28
+ end
47
29
  end
48
30
  end
@@ -0,0 +1,82 @@
1
+ <html>
2
+ <head>
3
+ <script>
4
+ window.twttr = (function (d, s, id) {
5
+ var js, fjs = d.getElementsByTagName(s)[0], t = window.twttr || {};
6
+ if (d.getElementById(id)) return t;
7
+ js = d.createElement(s);
8
+ js.id = id;
9
+ js.src = "https://platform.twitter.com/widgets.js";
10
+ fjs.parentNode.insertBefore(js, fjs);
11
+
12
+ t._e = [];
13
+ t.ready = function (f) {
14
+ t._e.push(f);
15
+ };
16
+
17
+ return t;
18
+ }(document, "script", "twitter-wjs"));
19
+ </script>
20
+
21
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.27.0/moment.min.js" integrity="sha512-rmZcZsyhe0/MAjquhTgiUcb4d9knaFc7b5xAfju483gbEXTkeJRUMIPk6s3ySZMYUHEcjKbjLjyddGWMrNEvZg==" crossorigin="anonymous"></script>
22
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/moment-timezone/0.5.31/moment-timezone-with-data.min.js" integrity="sha512-HZcf3uHWA+Y2P5KNv+F/xa87/flKVP92kUTe/KXjU8URPshczF1Dx+cL5bw0VBGhmqWAK0UbhcqxBbyiNtAnWQ==" crossorigin="anonymous"></script>
23
+ <script src="https://code.highcharts.com/stock/highstock.js"></script>
24
+ <script>
25
+ function drawChart() {
26
+ Highcharts.setOptions({
27
+ time: {
28
+ timezone: moment.tz.guess()
29
+ }
30
+ });
31
+
32
+ Highcharts.stockChart('chart', {
33
+ title: {
34
+ text: '<%= tweets_size %> tweets of <%= chart_name %>'
35
+ },
36
+ subtitle: {
37
+ text: 'since:<%= first_tweet.created_at.localtime %> until:<%= last_tweet.created_at.localtime %>'
38
+ },
39
+ series: [{
40
+ data: <%= chart_data %>
41
+ }],
42
+ rangeSelector: {enabled: false},
43
+ scrollbar: {enabled: false},
44
+ navigator: {enabled: false},
45
+ exporting: {enabled: false},
46
+ credits: {enabled: false}
47
+ });
48
+ }
49
+
50
+ document.addEventListener("DOMContentLoaded", function () {
51
+ drawChart();
52
+ });
53
+ </script>
54
+
55
+ <style type=text/css>
56
+ .tweets-container {
57
+ max-width: 550px;
58
+ margin: 0 auto 0 auto;
59
+ }
60
+
61
+ .twitter-tweet {
62
+ margin: 15px 0 15px 0 !important;
63
+ }
64
+ </style>
65
+ </head>
66
+ <body>
67
+ <div id="chart"></div>
68
+
69
+ <div class="tweets-container">
70
+ <% tweets.each do |tweet| %>
71
+ <blockquote class="twitter-tweet">
72
+ <a href="<%= tweet.tweet_url %>"></a>
73
+ </blockquote>
74
+ <% end %>
75
+
76
+ <% if tweets_size > tweets.size %>
77
+ <div>and more!</div>
78
+ <% end %>
79
+ </div>
80
+
81
+ </body>
82
+ </html>
@@ -1,3 +1,3 @@
1
1
  module Twitterscraper
2
- VERSION = '0.15.2'
2
+ VERSION = '0.16.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitterscraper-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.15.2
4
+ version: 0.16.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ts-3156
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-17 00:00:00.000000000 Z
11
+ date: 2020-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -72,6 +72,7 @@ files:
72
72
  - lib/twitterscraper/proxy.rb
73
73
  - lib/twitterscraper/query.rb
74
74
  - lib/twitterscraper/template.rb
75
+ - lib/twitterscraper/template/tweets.html.erb
75
76
  - lib/twitterscraper/tweet.rb
76
77
  - lib/twitterscraper/type.rb
77
78
  - lib/version.rb