twitterscraper-ruby 0.15.2 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/twitterscraper/cache.rb +7 -1
- data/lib/twitterscraper/cli.rb +3 -3
- data/lib/twitterscraper/query.rb +23 -12
- data/lib/twitterscraper/template.rb +23 -41
- data/lib/twitterscraper/template/tweets.html.erb +82 -0
- data/lib/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 66dda5275a9067d328f6637f127895ded954534d304e5e4b349f286a271a08d8
|
4
|
+
data.tar.gz: 6c3ffb3fba82376fc2de49514245ea96c7cb4fa16c32dcd2fff1ab1ae327bd14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24267284f4f29adc86d5bbe70a30bbe31d6d898546576065f1a9accafc3944a352117bbf6eb0de273743a00fb2d26c5cf37ed016cc0324187a25ca279230d812
|
7
|
+
data.tar.gz: 0bc9f01659560c83b0289bf63119849135b7ec27520dd03c7abd645da99ef660ca4b5fd12301b359cd5cc45a82914d7ceae88ad93ad756fde166718b3d0fe6c2
|
data/Gemfile.lock
CHANGED
data/lib/twitterscraper/cache.rb
CHANGED
@@ -4,7 +4,7 @@ require 'digest/md5'
|
|
4
4
|
module Twitterscraper
|
5
5
|
class Cache
|
6
6
|
def initialize()
|
7
|
-
@ttl =
|
7
|
+
@ttl = 86400 # 1 day
|
8
8
|
@dir = 'cache'
|
9
9
|
Dir.mkdir(@dir) unless File.exist?(@dir)
|
10
10
|
end
|
@@ -25,6 +25,12 @@ module Twitterscraper
|
|
25
25
|
File.write(file, entry.to_json)
|
26
26
|
end
|
27
27
|
|
28
|
+
def delete(key)
|
29
|
+
key = cache_key(key)
|
30
|
+
file = File.join(@dir, key)
|
31
|
+
File.delete(file) if File.exist?(file)
|
32
|
+
end
|
33
|
+
|
28
34
|
def fetch(key, &block)
|
29
35
|
if (value = read(key))
|
30
36
|
value
|
data/lib/twitterscraper/cli.rb
CHANGED
@@ -27,16 +27,16 @@ module Twitterscraper
|
|
27
27
|
}
|
28
28
|
client = Twitterscraper::Client.new(cache: options['cache'], proxy: options['proxy'])
|
29
29
|
tweets = client.query_tweets(options['query'], query_options)
|
30
|
-
export(tweets) unless tweets.empty?
|
30
|
+
export(options['query'], tweets) unless tweets.empty?
|
31
31
|
end
|
32
32
|
|
33
|
-
def export(tweets)
|
33
|
+
def export(name, tweets)
|
34
34
|
write_json = lambda { File.write(options['output'], generate_json(tweets)) }
|
35
35
|
|
36
36
|
if options['format'] == 'json'
|
37
37
|
write_json.call
|
38
38
|
elsif options['format'] == 'html'
|
39
|
-
File.write('
|
39
|
+
File.write(options['output'], Template.new.tweets_embedded_html(name, tweets, options))
|
40
40
|
else
|
41
41
|
write_json.call
|
42
42
|
end
|
data/lib/twitterscraper/query.rb
CHANGED
@@ -69,7 +69,6 @@ module Twitterscraper
|
|
69
69
|
else
|
70
70
|
json_resp = JSON.parse(text)
|
71
71
|
items_html = json_resp['items_html'] || ''
|
72
|
-
logger.warn json_resp['message'] if json_resp['message'] # Sorry, you are rate limited.
|
73
72
|
end
|
74
73
|
|
75
74
|
[items_html, json_resp]
|
@@ -100,6 +99,12 @@ module Twitterscraper
|
|
100
99
|
|
101
100
|
html, json_resp = parse_single_page(response, pos.nil?)
|
102
101
|
|
102
|
+
if json_resp && json_resp['message']
|
103
|
+
logger.warn json_resp['message'] # Sorry, you are rate limited.
|
104
|
+
@stop_requested = true
|
105
|
+
Cache.new.delete(url) if cache_enabled?
|
106
|
+
end
|
107
|
+
|
103
108
|
tweets = Tweet.from_html(html)
|
104
109
|
|
105
110
|
if tweets.empty?
|
@@ -140,19 +145,27 @@ module Twitterscraper
|
|
140
145
|
raise Error.new(":start_date must be greater than or equal to #{OLDEST_DATE}")
|
141
146
|
end
|
142
147
|
end
|
143
|
-
|
144
|
-
if end_date
|
145
|
-
today = Date.today
|
146
|
-
if end_date > Date.today
|
147
|
-
raise Error.new(":end_date must be less than or equal to today(#{today})")
|
148
|
-
end
|
149
|
-
end
|
150
148
|
end
|
151
149
|
|
152
150
|
def build_queries(query, start_date, end_date)
|
153
151
|
if start_date && end_date
|
154
|
-
date_range = start_date.upto(end_date - 1)
|
155
|
-
date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
|
152
|
+
# date_range = start_date.upto(end_date - 1)
|
153
|
+
# date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
|
154
|
+
|
155
|
+
queries = []
|
156
|
+
time = Time.utc(start_date.year, start_date.month, start_date.day, 0, 0, 0)
|
157
|
+
end_time = Time.utc(end_date.year, end_date.month, end_date.day, 0, 0, 0)
|
158
|
+
|
159
|
+
while true
|
160
|
+
if time < Time.now.utc
|
161
|
+
queries << (query + " since:#{time.strftime('%Y-%m-%d_%H:00:00')}_UTC until:#{(time + 3600).strftime('%Y-%m-%d_%H:00:00')}_UTC")
|
162
|
+
end
|
163
|
+
time += 3600
|
164
|
+
break if time >= end_time
|
165
|
+
end
|
166
|
+
|
167
|
+
queries
|
168
|
+
|
156
169
|
elsif start_date
|
157
170
|
[query + " since:#{start_date}"]
|
158
171
|
elsif end_date
|
@@ -202,7 +215,6 @@ module Twitterscraper
|
|
202
215
|
queries = build_queries(query, start_date, end_date)
|
203
216
|
type = Type.new(type)
|
204
217
|
if threads > queries.size
|
205
|
-
logger.warn 'The maximum number of :threads is the number of dates between :start_date and :end_date.'
|
206
218
|
threads = queries.size
|
207
219
|
end
|
208
220
|
if proxy_enabled?
|
@@ -214,7 +226,6 @@ module Twitterscraper
|
|
214
226
|
end
|
215
227
|
logger.debug "Cache #{cache_enabled? ? 'enabled' : 'disabled'}"
|
216
228
|
|
217
|
-
|
218
229
|
validate_options!(queries, type: type, start_date: start_date, end_date: end_date, lang: lang, limit: limit, threads: threads)
|
219
230
|
|
220
231
|
logger.info "The number of threads #{threads}"
|
@@ -1,48 +1,30 @@
|
|
1
1
|
module Twitterscraper
|
2
|
-
|
3
|
-
|
2
|
+
class Template
|
3
|
+
def tweets_embedded_html(name, tweets, options)
|
4
|
+
path = File.join(File.dirname(__FILE__), 'template/tweets.html.erb')
|
5
|
+
template = ERB.new(File.read(path))
|
4
6
|
|
5
|
-
|
6
|
-
|
7
|
-
|
7
|
+
template.result_with_hash(
|
8
|
+
chart_name: name,
|
9
|
+
chart_data: chart_data(tweets).to_json,
|
10
|
+
first_tweet: tweets.sort_by { |t| t.created_at.to_i }[0],
|
11
|
+
last_tweet: tweets.sort_by { |t| t.created_at.to_i }[-1],
|
12
|
+
tweets_size: tweets.size,
|
13
|
+
tweets: tweets.take(50)
|
14
|
+
)
|
8
15
|
end
|
9
16
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
17
|
+
def chart_data(tweets)
|
18
|
+
data = tweets.each_with_object(Hash.new(0)) do |tweet, memo|
|
19
|
+
t = tweet.created_at
|
20
|
+
min = (t.min.to_f / 5).floor * 5
|
21
|
+
time = Time.new(t.year, t.month, t.day, t.hour, min, 0, '+00:00')
|
22
|
+
memo[time.to_i] += 1
|
23
|
+
end
|
15
24
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
.twitter-tweet {
|
21
|
-
margin: 30px auto 0 auto !important;
|
22
|
-
}
|
23
|
-
</style>
|
24
|
-
<script>
|
25
|
-
window.twttr = (function(d, s, id) {
|
26
|
-
var js, fjs = d.getElementsByTagName(s)[0], t = window.twttr || {};
|
27
|
-
if (d.getElementById(id)) return t;
|
28
|
-
js = d.createElement(s);
|
29
|
-
js.id = id;
|
30
|
-
js.src = "https://platform.twitter.com/widgets.js";
|
31
|
-
fjs.parentNode.insertBefore(js, fjs);
|
32
|
-
|
33
|
-
t._e = [];
|
34
|
-
t.ready = function(f) {
|
35
|
-
t._e.push(f);
|
36
|
-
};
|
37
|
-
|
38
|
-
return t;
|
39
|
-
}(document, "script", "twitter-wjs"));
|
40
|
-
</script>
|
41
|
-
</head>
|
42
|
-
<body>
|
43
|
-
__TWEETS__
|
44
|
-
</body>
|
45
|
-
</html>
|
46
|
-
HTML
|
25
|
+
data.sort_by { |k, v| k }.map do |timestamp, count|
|
26
|
+
[timestamp * 1000, count]
|
27
|
+
end
|
28
|
+
end
|
47
29
|
end
|
48
30
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<script>
|
4
|
+
window.twttr = (function (d, s, id) {
|
5
|
+
var js, fjs = d.getElementsByTagName(s)[0], t = window.twttr || {};
|
6
|
+
if (d.getElementById(id)) return t;
|
7
|
+
js = d.createElement(s);
|
8
|
+
js.id = id;
|
9
|
+
js.src = "https://platform.twitter.com/widgets.js";
|
10
|
+
fjs.parentNode.insertBefore(js, fjs);
|
11
|
+
|
12
|
+
t._e = [];
|
13
|
+
t.ready = function (f) {
|
14
|
+
t._e.push(f);
|
15
|
+
};
|
16
|
+
|
17
|
+
return t;
|
18
|
+
}(document, "script", "twitter-wjs"));
|
19
|
+
</script>
|
20
|
+
|
21
|
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.27.0/moment.min.js" integrity="sha512-rmZcZsyhe0/MAjquhTgiUcb4d9knaFc7b5xAfju483gbEXTkeJRUMIPk6s3ySZMYUHEcjKbjLjyddGWMrNEvZg==" crossorigin="anonymous"></script>
|
22
|
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/moment-timezone/0.5.31/moment-timezone-with-data.min.js" integrity="sha512-HZcf3uHWA+Y2P5KNv+F/xa87/flKVP92kUTe/KXjU8URPshczF1Dx+cL5bw0VBGhmqWAK0UbhcqxBbyiNtAnWQ==" crossorigin="anonymous"></script>
|
23
|
+
<script src="https://code.highcharts.com/stock/highstock.js"></script>
|
24
|
+
<script>
|
25
|
+
function drawChart() {
|
26
|
+
Highcharts.setOptions({
|
27
|
+
time: {
|
28
|
+
timezone: moment.tz.guess()
|
29
|
+
}
|
30
|
+
});
|
31
|
+
|
32
|
+
Highcharts.stockChart('chart', {
|
33
|
+
title: {
|
34
|
+
text: '<%= tweets_size %> tweets of <%= chart_name %>'
|
35
|
+
},
|
36
|
+
subtitle: {
|
37
|
+
text: 'since:<%= first_tweet.created_at.localtime %> until:<%= last_tweet.created_at.localtime %>'
|
38
|
+
},
|
39
|
+
series: [{
|
40
|
+
data: <%= chart_data %>
|
41
|
+
}],
|
42
|
+
rangeSelector: {enabled: false},
|
43
|
+
scrollbar: {enabled: false},
|
44
|
+
navigator: {enabled: false},
|
45
|
+
exporting: {enabled: false},
|
46
|
+
credits: {enabled: false}
|
47
|
+
});
|
48
|
+
}
|
49
|
+
|
50
|
+
document.addEventListener("DOMContentLoaded", function () {
|
51
|
+
drawChart();
|
52
|
+
});
|
53
|
+
</script>
|
54
|
+
|
55
|
+
<style type=text/css>
|
56
|
+
.tweets-container {
|
57
|
+
max-width: 550px;
|
58
|
+
margin: 0 auto 0 auto;
|
59
|
+
}
|
60
|
+
|
61
|
+
.twitter-tweet {
|
62
|
+
margin: 15px 0 15px 0 !important;
|
63
|
+
}
|
64
|
+
</style>
|
65
|
+
</head>
|
66
|
+
<body>
|
67
|
+
<div id="chart"></div>
|
68
|
+
|
69
|
+
<div class="tweets-container">
|
70
|
+
<% tweets.each do |tweet| %>
|
71
|
+
<blockquote class="twitter-tweet">
|
72
|
+
<a href="<%= tweet.tweet_url %>"></a>
|
73
|
+
</blockquote>
|
74
|
+
<% end %>
|
75
|
+
|
76
|
+
<% if tweets_size > tweets.size %>
|
77
|
+
<div>and more!</div>
|
78
|
+
<% end %>
|
79
|
+
</div>
|
80
|
+
|
81
|
+
</body>
|
82
|
+
</html>
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitterscraper-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.16.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ts-3156
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -72,6 +72,7 @@ files:
|
|
72
72
|
- lib/twitterscraper/proxy.rb
|
73
73
|
- lib/twitterscraper/query.rb
|
74
74
|
- lib/twitterscraper/template.rb
|
75
|
+
- lib/twitterscraper/template/tweets.html.erb
|
75
76
|
- lib/twitterscraper/tweet.rb
|
76
77
|
- lib/twitterscraper/type.rb
|
77
78
|
- lib/version.rb
|