twitterscraper-ruby 0.17.0 → 0.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile.lock +1 -1
- data/lib/twitterscraper/cli.rb +6 -4
- data/lib/twitterscraper/query.rb +7 -3
- data/lib/twitterscraper/template.rb +34 -5
- data/lib/twitterscraper/template/tweets.html.erb +16 -5
- data/lib/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8e9bdefe1c4d10e6d9f1d12aeb279b2a3751c570e96e05daaf849dd423bb03bf
|
4
|
+
data.tar.gz: 7de97de19daeecce2837fe8e5999b6c9490ab49a18a2ab9e603bf4d039abc4b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55b7e0b52b2ce44418305798ed27a677405244a48f5ad0a797e3abf7958b0581a313ebd33f3f69b891ba7454f8f5c9c0db845c9ca8be321cd27212932821776e
|
7
|
+
data.tar.gz: 8fe97a0dc164fc0108b8e6a35843fba19ade5fbaf4f1ee2b4a400afbd3bdbb220a49dfbef4fceb1d8ecc43df3b4f4b7bad0ee5ea94c0aac464c0477e42efb866
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
data/lib/twitterscraper/cli.rb
CHANGED
@@ -32,12 +32,14 @@ module Twitterscraper
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def export(name, tweets)
|
35
|
-
|
35
|
+
filepath = options['output']
|
36
|
+
Dir.mkdir(File.dirname(filepath)) unless File.exist?(File.dirname(filepath))
|
37
|
+
write_json = lambda { File.write(filepath, generate_json(tweets)) }
|
36
38
|
|
37
39
|
if options['format'] == 'json'
|
38
40
|
write_json.call
|
39
41
|
elsif options['format'] == 'html'
|
40
|
-
File.write(
|
42
|
+
File.write(filepath, Template.new.tweets_embedded_html(name, tweets, options))
|
41
43
|
else
|
42
44
|
write_json.call
|
43
45
|
end
|
@@ -97,9 +99,9 @@ module Twitterscraper
|
|
97
99
|
end
|
98
100
|
|
99
101
|
def build_output_name(options)
|
100
|
-
query =
|
102
|
+
query = options['query'].gsub(/[ :?#&]/, '_')
|
101
103
|
date = [options['start_date'], options['end_date']].select { |val| val && !val.empty? }.join('_')
|
102
|
-
[options['type'], 'tweets', date, query].compact.join('_') + '.' + options['format']
|
104
|
+
File.join('out', [options['type'], 'tweets', date, query].compact.join('_') + '.' + options['format'])
|
103
105
|
end
|
104
106
|
|
105
107
|
def initialize_logger
|
data/lib/twitterscraper/query.rb
CHANGED
@@ -76,9 +76,9 @@ module Twitterscraper
|
|
76
76
|
|
77
77
|
def query_single_page(query, lang, type, pos, headers: [], proxies: [])
|
78
78
|
logger.info "Querying #{query}"
|
79
|
-
|
79
|
+
encoded_query = ERB::Util.url_encode(query)
|
80
80
|
|
81
|
-
url = build_query_url(
|
81
|
+
url = build_query_url(encoded_query, lang, type, pos)
|
82
82
|
http_request = lambda do
|
83
83
|
logger.debug "Scraping tweets from url=#{url}"
|
84
84
|
get_single_page(url, headers, proxies)
|
@@ -92,6 +92,10 @@ module Twitterscraper
|
|
92
92
|
response = http_request.call
|
93
93
|
client.write(url, response) unless stop_requested?
|
94
94
|
end
|
95
|
+
if @queries && query == @queries.last && pos.nil?
|
96
|
+
logger.debug "Delete a cache query=#{query}"
|
97
|
+
client.delete(url)
|
98
|
+
end
|
95
99
|
else
|
96
100
|
response = http_request.call
|
97
101
|
end
|
@@ -170,7 +174,7 @@ module Twitterscraper
|
|
170
174
|
end
|
171
175
|
end
|
172
176
|
|
173
|
-
queries
|
177
|
+
@queries = queries
|
174
178
|
|
175
179
|
elsif start_date
|
176
180
|
[query + " since:#{start_date}"]
|
@@ -4,25 +4,54 @@ module Twitterscraper
|
|
4
4
|
path = File.join(File.dirname(__FILE__), 'template/tweets.html.erb')
|
5
5
|
template = ERB.new(File.read(path))
|
6
6
|
|
7
|
+
tweets = tweets.sort_by { |t| t.created_at.to_i }
|
8
|
+
|
7
9
|
template.result_with_hash(
|
8
10
|
chart_name: name,
|
9
11
|
chart_data: chart_data(tweets).to_json,
|
10
|
-
first_tweet: tweets
|
11
|
-
last_tweet: tweets
|
12
|
+
first_tweet: tweets[0],
|
13
|
+
last_tweet: tweets[-1],
|
12
14
|
tweets: tweets,
|
13
15
|
convert_limit: 30,
|
14
16
|
)
|
15
17
|
end
|
16
18
|
|
17
|
-
def chart_data(tweets)
|
19
|
+
def chart_data(tweets, trimming: true, smoothing: true)
|
20
|
+
min_interval = 5
|
21
|
+
|
18
22
|
data = tweets.each_with_object(Hash.new(0)) do |tweet, memo|
|
19
23
|
t = tweet.created_at
|
20
|
-
min = (t.min.to_f /
|
24
|
+
min = (t.min.to_f / min_interval).floor * min_interval
|
21
25
|
time = Time.new(t.year, t.month, t.day, t.hour, min, 0, '+00:00')
|
22
26
|
memo[time.to_i] += 1
|
23
27
|
end
|
24
28
|
|
25
|
-
|
29
|
+
if false && trimming
|
30
|
+
data.keys.sort.each.with_index do |timestamp, i|
|
31
|
+
break if data.size - 1 == i
|
32
|
+
if data[i] == 0 && data[i + 1] == 0
|
33
|
+
data.delete(timestamp)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
if false && smoothing
|
39
|
+
time = data.keys.min
|
40
|
+
max_time = data.keys.max
|
41
|
+
sec_interval = 60 * min_interval
|
42
|
+
|
43
|
+
while true
|
44
|
+
next_time = time + sec_interval
|
45
|
+
break if next_time + sec_interval > max_time
|
46
|
+
|
47
|
+
unless data.has_key?(next_time)
|
48
|
+
data[next_time] = (data[time] + data[next_time + sec_interval]) / 2
|
49
|
+
end
|
50
|
+
time = next_time
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
data.sort_by { |k, _| k }.map do |timestamp, count|
|
26
55
|
[timestamp * 1000, count]
|
27
56
|
end
|
28
57
|
end
|
@@ -23,14 +23,14 @@
|
|
23
23
|
}
|
24
24
|
|
25
25
|
function drawChart() {
|
26
|
-
var data = <%= chart_data %>;
|
27
26
|
Highcharts.setOptions({
|
28
27
|
time: {
|
29
28
|
timezone: moment.tz.guess()
|
30
29
|
}
|
31
30
|
});
|
32
31
|
|
33
|
-
|
32
|
+
var data = <%= chart_data %>;
|
33
|
+
var config = {
|
34
34
|
title: {
|
35
35
|
text: '<%= tweets.size %> tweets of <%= chart_name %>'
|
36
36
|
},
|
@@ -45,7 +45,9 @@
|
|
45
45
|
navigator: {enabled: false},
|
46
46
|
exporting: {enabled: false},
|
47
47
|
credits: {enabled: false}
|
48
|
-
}
|
48
|
+
};
|
49
|
+
|
50
|
+
Highcharts.stockChart('chart-container', config);
|
49
51
|
}
|
50
52
|
|
51
53
|
document.addEventListener("DOMContentLoaded", function () {
|
@@ -55,6 +57,15 @@
|
|
55
57
|
</script>
|
56
58
|
|
57
59
|
<style type=text/css>
|
60
|
+
#chart-container {
|
61
|
+
max-width: 1200px;
|
62
|
+
height: 675px;
|
63
|
+
margin: 0 auto;
|
64
|
+
border: 1px solid rgb(204, 214, 221);
|
65
|
+
display: flex;
|
66
|
+
justify-content: center;
|
67
|
+
align-items: center;
|
68
|
+
}
|
58
69
|
.tweets-container {
|
59
70
|
max-width: 550px;
|
60
71
|
margin: 0 auto 0 auto;
|
@@ -66,10 +77,10 @@
|
|
66
77
|
</style>
|
67
78
|
</head>
|
68
79
|
<body>
|
69
|
-
<div id="chart" style="
|
80
|
+
<div id="chart-container"><div style="color: gray;">Loading...</div></div>
|
70
81
|
|
71
82
|
<div class="tweets-container">
|
72
|
-
<% tweets.each.with_index do |tweet, i| %>
|
83
|
+
<% tweets.sort_by { |t| -t.created_at.to_i }.each.with_index do |tweet, i| %>
|
73
84
|
<% tweet_time = tweet.created_at.localtime.strftime('%Y-%m-%d %H:%M') %>
|
74
85
|
<% if i < convert_limit %>
|
75
86
|
<blockquote class="twitter-tweet">
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitterscraper-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ts-3156
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|