twitterscraper-ruby 0.15.2 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/twitterscraper/cache.rb +7 -1
- data/lib/twitterscraper/cli.rb +3 -3
- data/lib/twitterscraper/query.rb +23 -12
- data/lib/twitterscraper/template.rb +23 -41
- data/lib/twitterscraper/template/tweets.html.erb +82 -0
- data/lib/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 66dda5275a9067d328f6637f127895ded954534d304e5e4b349f286a271a08d8
|
4
|
+
data.tar.gz: 6c3ffb3fba82376fc2de49514245ea96c7cb4fa16c32dcd2fff1ab1ae327bd14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24267284f4f29adc86d5bbe70a30bbe31d6d898546576065f1a9accafc3944a352117bbf6eb0de273743a00fb2d26c5cf37ed016cc0324187a25ca279230d812
|
7
|
+
data.tar.gz: 0bc9f01659560c83b0289bf63119849135b7ec27520dd03c7abd645da99ef660ca4b5fd12301b359cd5cc45a82914d7ceae88ad93ad756fde166718b3d0fe6c2
|
data/Gemfile.lock
CHANGED
data/lib/twitterscraper/cache.rb
CHANGED
@@ -4,7 +4,7 @@ require 'digest/md5'
|
|
4
4
|
module Twitterscraper
|
5
5
|
class Cache
|
6
6
|
def initialize()
|
7
|
-
@ttl =
|
7
|
+
@ttl = 86400 # 1 day
|
8
8
|
@dir = 'cache'
|
9
9
|
Dir.mkdir(@dir) unless File.exist?(@dir)
|
10
10
|
end
|
@@ -25,6 +25,12 @@ module Twitterscraper
|
|
25
25
|
File.write(file, entry.to_json)
|
26
26
|
end
|
27
27
|
|
28
|
+
def delete(key)
|
29
|
+
key = cache_key(key)
|
30
|
+
file = File.join(@dir, key)
|
31
|
+
File.delete(file) if File.exist?(file)
|
32
|
+
end
|
33
|
+
|
28
34
|
def fetch(key, &block)
|
29
35
|
if (value = read(key))
|
30
36
|
value
|
data/lib/twitterscraper/cli.rb
CHANGED
@@ -27,16 +27,16 @@ module Twitterscraper
|
|
27
27
|
}
|
28
28
|
client = Twitterscraper::Client.new(cache: options['cache'], proxy: options['proxy'])
|
29
29
|
tweets = client.query_tweets(options['query'], query_options)
|
30
|
-
export(tweets) unless tweets.empty?
|
30
|
+
export(options['query'], tweets) unless tweets.empty?
|
31
31
|
end
|
32
32
|
|
33
|
-
def export(tweets)
|
33
|
+
def export(name, tweets)
|
34
34
|
write_json = lambda { File.write(options['output'], generate_json(tweets)) }
|
35
35
|
|
36
36
|
if options['format'] == 'json'
|
37
37
|
write_json.call
|
38
38
|
elsif options['format'] == 'html'
|
39
|
-
File.write('
|
39
|
+
File.write(options['output'], Template.new.tweets_embedded_html(name, tweets, options))
|
40
40
|
else
|
41
41
|
write_json.call
|
42
42
|
end
|
data/lib/twitterscraper/query.rb
CHANGED
@@ -69,7 +69,6 @@ module Twitterscraper
|
|
69
69
|
else
|
70
70
|
json_resp = JSON.parse(text)
|
71
71
|
items_html = json_resp['items_html'] || ''
|
72
|
-
logger.warn json_resp['message'] if json_resp['message'] # Sorry, you are rate limited.
|
73
72
|
end
|
74
73
|
|
75
74
|
[items_html, json_resp]
|
@@ -100,6 +99,12 @@ module Twitterscraper
|
|
100
99
|
|
101
100
|
html, json_resp = parse_single_page(response, pos.nil?)
|
102
101
|
|
102
|
+
if json_resp && json_resp['message']
|
103
|
+
logger.warn json_resp['message'] # Sorry, you are rate limited.
|
104
|
+
@stop_requested = true
|
105
|
+
Cache.new.delete(url) if cache_enabled?
|
106
|
+
end
|
107
|
+
|
103
108
|
tweets = Tweet.from_html(html)
|
104
109
|
|
105
110
|
if tweets.empty?
|
@@ -140,19 +145,27 @@ module Twitterscraper
|
|
140
145
|
raise Error.new(":start_date must be greater than or equal to #{OLDEST_DATE}")
|
141
146
|
end
|
142
147
|
end
|
143
|
-
|
144
|
-
if end_date
|
145
|
-
today = Date.today
|
146
|
-
if end_date > Date.today
|
147
|
-
raise Error.new(":end_date must be less than or equal to today(#{today})")
|
148
|
-
end
|
149
|
-
end
|
150
148
|
end
|
151
149
|
|
152
150
|
def build_queries(query, start_date, end_date)
|
153
151
|
if start_date && end_date
|
154
|
-
date_range = start_date.upto(end_date - 1)
|
155
|
-
date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
|
152
|
+
# date_range = start_date.upto(end_date - 1)
|
153
|
+
# date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
|
154
|
+
|
155
|
+
queries = []
|
156
|
+
time = Time.utc(start_date.year, start_date.month, start_date.day, 0, 0, 0)
|
157
|
+
end_time = Time.utc(end_date.year, end_date.month, end_date.day, 0, 0, 0)
|
158
|
+
|
159
|
+
while true
|
160
|
+
if time < Time.now.utc
|
161
|
+
queries << (query + " since:#{time.strftime('%Y-%m-%d_%H:00:00')}_UTC until:#{(time + 3600).strftime('%Y-%m-%d_%H:00:00')}_UTC")
|
162
|
+
end
|
163
|
+
time += 3600
|
164
|
+
break if time >= end_time
|
165
|
+
end
|
166
|
+
|
167
|
+
queries
|
168
|
+
|
156
169
|
elsif start_date
|
157
170
|
[query + " since:#{start_date}"]
|
158
171
|
elsif end_date
|
@@ -202,7 +215,6 @@ module Twitterscraper
|
|
202
215
|
queries = build_queries(query, start_date, end_date)
|
203
216
|
type = Type.new(type)
|
204
217
|
if threads > queries.size
|
205
|
-
logger.warn 'The maximum number of :threads is the number of dates between :start_date and :end_date.'
|
206
218
|
threads = queries.size
|
207
219
|
end
|
208
220
|
if proxy_enabled?
|
@@ -214,7 +226,6 @@ module Twitterscraper
|
|
214
226
|
end
|
215
227
|
logger.debug "Cache #{cache_enabled? ? 'enabled' : 'disabled'}"
|
216
228
|
|
217
|
-
|
218
229
|
validate_options!(queries, type: type, start_date: start_date, end_date: end_date, lang: lang, limit: limit, threads: threads)
|
219
230
|
|
220
231
|
logger.info "The number of threads #{threads}"
|
@@ -1,48 +1,30 @@
|
|
1
1
|
module Twitterscraper
|
2
|
-
|
3
|
-
|
2
|
+
class Template
|
3
|
+
def tweets_embedded_html(name, tweets, options)
|
4
|
+
path = File.join(File.dirname(__FILE__), 'template/tweets.html.erb')
|
5
|
+
template = ERB.new(File.read(path))
|
4
6
|
|
5
|
-
|
6
|
-
|
7
|
-
|
7
|
+
template.result_with_hash(
|
8
|
+
chart_name: name,
|
9
|
+
chart_data: chart_data(tweets).to_json,
|
10
|
+
first_tweet: tweets.sort_by { |t| t.created_at.to_i }[0],
|
11
|
+
last_tweet: tweets.sort_by { |t| t.created_at.to_i }[-1],
|
12
|
+
tweets_size: tweets.size,
|
13
|
+
tweets: tweets.take(50)
|
14
|
+
)
|
8
15
|
end
|
9
16
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
17
|
+
def chart_data(tweets)
|
18
|
+
data = tweets.each_with_object(Hash.new(0)) do |tweet, memo|
|
19
|
+
t = tweet.created_at
|
20
|
+
min = (t.min.to_f / 5).floor * 5
|
21
|
+
time = Time.new(t.year, t.month, t.day, t.hour, min, 0, '+00:00')
|
22
|
+
memo[time.to_i] += 1
|
23
|
+
end
|
15
24
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
.twitter-tweet {
|
21
|
-
margin: 30px auto 0 auto !important;
|
22
|
-
}
|
23
|
-
</style>
|
24
|
-
<script>
|
25
|
-
window.twttr = (function(d, s, id) {
|
26
|
-
var js, fjs = d.getElementsByTagName(s)[0], t = window.twttr || {};
|
27
|
-
if (d.getElementById(id)) return t;
|
28
|
-
js = d.createElement(s);
|
29
|
-
js.id = id;
|
30
|
-
js.src = "https://platform.twitter.com/widgets.js";
|
31
|
-
fjs.parentNode.insertBefore(js, fjs);
|
32
|
-
|
33
|
-
t._e = [];
|
34
|
-
t.ready = function(f) {
|
35
|
-
t._e.push(f);
|
36
|
-
};
|
37
|
-
|
38
|
-
return t;
|
39
|
-
}(document, "script", "twitter-wjs"));
|
40
|
-
</script>
|
41
|
-
</head>
|
42
|
-
<body>
|
43
|
-
__TWEETS__
|
44
|
-
</body>
|
45
|
-
</html>
|
46
|
-
HTML
|
25
|
+
data.sort_by { |k, v| k }.map do |timestamp, count|
|
26
|
+
[timestamp * 1000, count]
|
27
|
+
end
|
28
|
+
end
|
47
29
|
end
|
48
30
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<script>
|
4
|
+
window.twttr = (function (d, s, id) {
|
5
|
+
var js, fjs = d.getElementsByTagName(s)[0], t = window.twttr || {};
|
6
|
+
if (d.getElementById(id)) return t;
|
7
|
+
js = d.createElement(s);
|
8
|
+
js.id = id;
|
9
|
+
js.src = "https://platform.twitter.com/widgets.js";
|
10
|
+
fjs.parentNode.insertBefore(js, fjs);
|
11
|
+
|
12
|
+
t._e = [];
|
13
|
+
t.ready = function (f) {
|
14
|
+
t._e.push(f);
|
15
|
+
};
|
16
|
+
|
17
|
+
return t;
|
18
|
+
}(document, "script", "twitter-wjs"));
|
19
|
+
</script>
|
20
|
+
|
21
|
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.27.0/moment.min.js" integrity="sha512-rmZcZsyhe0/MAjquhTgiUcb4d9knaFc7b5xAfju483gbEXTkeJRUMIPk6s3ySZMYUHEcjKbjLjyddGWMrNEvZg==" crossorigin="anonymous"></script>
|
22
|
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/moment-timezone/0.5.31/moment-timezone-with-data.min.js" integrity="sha512-HZcf3uHWA+Y2P5KNv+F/xa87/flKVP92kUTe/KXjU8URPshczF1Dx+cL5bw0VBGhmqWAK0UbhcqxBbyiNtAnWQ==" crossorigin="anonymous"></script>
|
23
|
+
<script src="https://code.highcharts.com/stock/highstock.js"></script>
|
24
|
+
<script>
|
25
|
+
function drawChart() {
|
26
|
+
Highcharts.setOptions({
|
27
|
+
time: {
|
28
|
+
timezone: moment.tz.guess()
|
29
|
+
}
|
30
|
+
});
|
31
|
+
|
32
|
+
Highcharts.stockChart('chart', {
|
33
|
+
title: {
|
34
|
+
text: '<%= tweets_size %> tweets of <%= chart_name %>'
|
35
|
+
},
|
36
|
+
subtitle: {
|
37
|
+
text: 'since:<%= first_tweet.created_at.localtime %> until:<%= last_tweet.created_at.localtime %>'
|
38
|
+
},
|
39
|
+
series: [{
|
40
|
+
data: <%= chart_data %>
|
41
|
+
}],
|
42
|
+
rangeSelector: {enabled: false},
|
43
|
+
scrollbar: {enabled: false},
|
44
|
+
navigator: {enabled: false},
|
45
|
+
exporting: {enabled: false},
|
46
|
+
credits: {enabled: false}
|
47
|
+
});
|
48
|
+
}
|
49
|
+
|
50
|
+
document.addEventListener("DOMContentLoaded", function () {
|
51
|
+
drawChart();
|
52
|
+
});
|
53
|
+
</script>
|
54
|
+
|
55
|
+
<style type=text/css>
|
56
|
+
.tweets-container {
|
57
|
+
max-width: 550px;
|
58
|
+
margin: 0 auto 0 auto;
|
59
|
+
}
|
60
|
+
|
61
|
+
.twitter-tweet {
|
62
|
+
margin: 15px 0 15px 0 !important;
|
63
|
+
}
|
64
|
+
</style>
|
65
|
+
</head>
|
66
|
+
<body>
|
67
|
+
<div id="chart"></div>
|
68
|
+
|
69
|
+
<div class="tweets-container">
|
70
|
+
<% tweets.each do |tweet| %>
|
71
|
+
<blockquote class="twitter-tweet">
|
72
|
+
<a href="<%= tweet.tweet_url %>"></a>
|
73
|
+
</blockquote>
|
74
|
+
<% end %>
|
75
|
+
|
76
|
+
<% if tweets_size > tweets.size %>
|
77
|
+
<div>and more!</div>
|
78
|
+
<% end %>
|
79
|
+
</div>
|
80
|
+
|
81
|
+
</body>
|
82
|
+
</html>
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitterscraper-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.16.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ts-3156
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -72,6 +72,7 @@ files:
|
|
72
72
|
- lib/twitterscraper/proxy.rb
|
73
73
|
- lib/twitterscraper/query.rb
|
74
74
|
- lib/twitterscraper/template.rb
|
75
|
+
- lib/twitterscraper/template/tweets.html.erb
|
75
76
|
- lib/twitterscraper/tweet.rb
|
76
77
|
- lib/twitterscraper/type.rb
|
77
78
|
- lib/version.rb
|