twitterscraper-ruby 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile.lock +1 -1
- data/lib/twitterscraper/cli.rb +6 -4
- data/lib/twitterscraper/query.rb +7 -3
- data/lib/twitterscraper/template.rb +34 -5
- data/lib/twitterscraper/template/tweets.html.erb +16 -5
- data/lib/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8e9bdefe1c4d10e6d9f1d12aeb279b2a3751c570e96e05daaf849dd423bb03bf
|
4
|
+
data.tar.gz: 7de97de19daeecce2837fe8e5999b6c9490ab49a18a2ab9e603bf4d039abc4b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55b7e0b52b2ce44418305798ed27a677405244a48f5ad0a797e3abf7958b0581a313ebd33f3f69b891ba7454f8f5c9c0db845c9ca8be321cd27212932821776e
|
7
|
+
data.tar.gz: 8fe97a0dc164fc0108b8e6a35843fba19ade5fbaf4f1ee2b4a400afbd3bdbb220a49dfbef4fceb1d8ecc43df3b4f4b7bad0ee5ea94c0aac464c0477e42efb866
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
data/lib/twitterscraper/cli.rb
CHANGED
@@ -32,12 +32,14 @@ module Twitterscraper
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def export(name, tweets)
|
35
|
-
|
35
|
+
filepath = options['output']
|
36
|
+
Dir.mkdir(File.dirname(filepath)) unless File.exist?(File.dirname(filepath))
|
37
|
+
write_json = lambda { File.write(filepath, generate_json(tweets)) }
|
36
38
|
|
37
39
|
if options['format'] == 'json'
|
38
40
|
write_json.call
|
39
41
|
elsif options['format'] == 'html'
|
40
|
-
File.write(
|
42
|
+
File.write(filepath, Template.new.tweets_embedded_html(name, tweets, options))
|
41
43
|
else
|
42
44
|
write_json.call
|
43
45
|
end
|
@@ -97,9 +99,9 @@ module Twitterscraper
|
|
97
99
|
end
|
98
100
|
|
99
101
|
def build_output_name(options)
|
100
|
-
query =
|
102
|
+
query = options['query'].gsub(/[ :?#&]/, '_')
|
101
103
|
date = [options['start_date'], options['end_date']].select { |val| val && !val.empty? }.join('_')
|
102
|
-
[options['type'], 'tweets', date, query].compact.join('_') + '.' + options['format']
|
104
|
+
File.join('out', [options['type'], 'tweets', date, query].compact.join('_') + '.' + options['format'])
|
103
105
|
end
|
104
106
|
|
105
107
|
def initialize_logger
|
data/lib/twitterscraper/query.rb
CHANGED
@@ -76,9 +76,9 @@ module Twitterscraper
|
|
76
76
|
|
77
77
|
def query_single_page(query, lang, type, pos, headers: [], proxies: [])
|
78
78
|
logger.info "Querying #{query}"
|
79
|
-
|
79
|
+
encoded_query = ERB::Util.url_encode(query)
|
80
80
|
|
81
|
-
url = build_query_url(
|
81
|
+
url = build_query_url(encoded_query, lang, type, pos)
|
82
82
|
http_request = lambda do
|
83
83
|
logger.debug "Scraping tweets from url=#{url}"
|
84
84
|
get_single_page(url, headers, proxies)
|
@@ -92,6 +92,10 @@ module Twitterscraper
|
|
92
92
|
response = http_request.call
|
93
93
|
client.write(url, response) unless stop_requested?
|
94
94
|
end
|
95
|
+
if @queries && query == @queries.last && pos.nil?
|
96
|
+
logger.debug "Delete a cache query=#{query}"
|
97
|
+
client.delete(url)
|
98
|
+
end
|
95
99
|
else
|
96
100
|
response = http_request.call
|
97
101
|
end
|
@@ -170,7 +174,7 @@ module Twitterscraper
|
|
170
174
|
end
|
171
175
|
end
|
172
176
|
|
173
|
-
queries
|
177
|
+
@queries = queries
|
174
178
|
|
175
179
|
elsif start_date
|
176
180
|
[query + " since:#{start_date}"]
|
@@ -4,25 +4,54 @@ module Twitterscraper
|
|
4
4
|
path = File.join(File.dirname(__FILE__), 'template/tweets.html.erb')
|
5
5
|
template = ERB.new(File.read(path))
|
6
6
|
|
7
|
+
tweets = tweets.sort_by { |t| t.created_at.to_i }
|
8
|
+
|
7
9
|
template.result_with_hash(
|
8
10
|
chart_name: name,
|
9
11
|
chart_data: chart_data(tweets).to_json,
|
10
|
-
first_tweet: tweets
|
11
|
-
last_tweet: tweets
|
12
|
+
first_tweet: tweets[0],
|
13
|
+
last_tweet: tweets[-1],
|
12
14
|
tweets: tweets,
|
13
15
|
convert_limit: 30,
|
14
16
|
)
|
15
17
|
end
|
16
18
|
|
17
|
-
def chart_data(tweets)
|
19
|
+
def chart_data(tweets, trimming: true, smoothing: true)
|
20
|
+
min_interval = 5
|
21
|
+
|
18
22
|
data = tweets.each_with_object(Hash.new(0)) do |tweet, memo|
|
19
23
|
t = tweet.created_at
|
20
|
-
min = (t.min.to_f /
|
24
|
+
min = (t.min.to_f / min_interval).floor * min_interval
|
21
25
|
time = Time.new(t.year, t.month, t.day, t.hour, min, 0, '+00:00')
|
22
26
|
memo[time.to_i] += 1
|
23
27
|
end
|
24
28
|
|
25
|
-
|
29
|
+
if false && trimming
|
30
|
+
data.keys.sort.each.with_index do |timestamp, i|
|
31
|
+
break if data.size - 1 == i
|
32
|
+
if data[i] == 0 && data[i + 1] == 0
|
33
|
+
data.delete(timestamp)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
if false && smoothing
|
39
|
+
time = data.keys.min
|
40
|
+
max_time = data.keys.max
|
41
|
+
sec_interval = 60 * min_interval
|
42
|
+
|
43
|
+
while true
|
44
|
+
next_time = time + sec_interval
|
45
|
+
break if next_time + sec_interval > max_time
|
46
|
+
|
47
|
+
unless data.has_key?(next_time)
|
48
|
+
data[next_time] = (data[time] + data[next_time + sec_interval]) / 2
|
49
|
+
end
|
50
|
+
time = next_time
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
data.sort_by { |k, _| k }.map do |timestamp, count|
|
26
55
|
[timestamp * 1000, count]
|
27
56
|
end
|
28
57
|
end
|
@@ -23,14 +23,14 @@
|
|
23
23
|
}
|
24
24
|
|
25
25
|
function drawChart() {
|
26
|
-
var data = <%= chart_data %>;
|
27
26
|
Highcharts.setOptions({
|
28
27
|
time: {
|
29
28
|
timezone: moment.tz.guess()
|
30
29
|
}
|
31
30
|
});
|
32
31
|
|
33
|
-
|
32
|
+
var data = <%= chart_data %>;
|
33
|
+
var config = {
|
34
34
|
title: {
|
35
35
|
text: '<%= tweets.size %> tweets of <%= chart_name %>'
|
36
36
|
},
|
@@ -45,7 +45,9 @@
|
|
45
45
|
navigator: {enabled: false},
|
46
46
|
exporting: {enabled: false},
|
47
47
|
credits: {enabled: false}
|
48
|
-
}
|
48
|
+
};
|
49
|
+
|
50
|
+
Highcharts.stockChart('chart-container', config);
|
49
51
|
}
|
50
52
|
|
51
53
|
document.addEventListener("DOMContentLoaded", function () {
|
@@ -55,6 +57,15 @@
|
|
55
57
|
</script>
|
56
58
|
|
57
59
|
<style type=text/css>
|
60
|
+
#chart-container {
|
61
|
+
max-width: 1200px;
|
62
|
+
height: 675px;
|
63
|
+
margin: 0 auto;
|
64
|
+
border: 1px solid rgb(204, 214, 221);
|
65
|
+
display: flex;
|
66
|
+
justify-content: center;
|
67
|
+
align-items: center;
|
68
|
+
}
|
58
69
|
.tweets-container {
|
59
70
|
max-width: 550px;
|
60
71
|
margin: 0 auto 0 auto;
|
@@ -66,10 +77,10 @@
|
|
66
77
|
</style>
|
67
78
|
</head>
|
68
79
|
<body>
|
69
|
-
<div id="chart" style="
|
80
|
+
<div id="chart-container"><div style="color: gray;">Loading...</div></div>
|
70
81
|
|
71
82
|
<div class="tweets-container">
|
72
|
-
<% tweets.each.with_index do |tweet, i| %>
|
83
|
+
<% tweets.sort_by { |t| -t.created_at.to_i }.each.with_index do |tweet, i| %>
|
73
84
|
<% tweet_time = tweet.created_at.localtime.strftime('%Y-%m-%d %H:%M') %>
|
74
85
|
<% if i < convert_limit %>
|
75
86
|
<blockquote class="twitter-tweet">
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitterscraper-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ts-3156
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|