twitterscraper-ruby 0.17.0 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ac0c10b18d836983cc6b73e25b9ed333af2f620106a07c6bc6a40058fb127895
4
- data.tar.gz: e6fc18219d9127fb30ba57e39dc4656c0f0a3c108428d959de5bac9e7d317088
3
+ metadata.gz: 8e9bdefe1c4d10e6d9f1d12aeb279b2a3751c570e96e05daaf849dd423bb03bf
4
+ data.tar.gz: 7de97de19daeecce2837fe8e5999b6c9490ab49a18a2ab9e603bf4d039abc4b9
5
5
  SHA512:
6
- metadata.gz: 90cbf06b606878dc36b4bba44669139c273bf03b08a777ad87036834841bcb4b052e0559813dc56e4be124442abfc5a7fc44c5c9524c74929ca02b1d287d346b
7
- data.tar.gz: ada0b74ee42ff62964b73ad9b49358227cdaf4fc87420cf12cf65af95168ad9775615a504345ebc83d3b791e9c0d892691c55bc477eddd647b3e8934f752fb9c
6
+ metadata.gz: 55b7e0b52b2ce44418305798ed27a677405244a48f5ad0a797e3abf7958b0581a313ebd33f3f69b891ba7454f8f5c9c0db845c9ca8be321cd27212932821776e
7
+ data.tar.gz: 8fe97a0dc164fc0108b8e6a35843fba19ade5fbaf4f1ee2b4a400afbd3bdbb220a49dfbef4fceb1d8ecc43df3b4f4b7bad0ee5ea94c0aac464c0477e42efb866
data/.gitignore CHANGED
@@ -8,3 +8,4 @@
8
8
  /tmp/
9
9
  /cache
10
10
  /.idea
11
+ .DS_Store
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- twitterscraper-ruby (0.17.0)
4
+ twitterscraper-ruby (0.18.0)
5
5
  nokogiri
6
6
  parallel
7
7
 
@@ -32,12 +32,14 @@ module Twitterscraper
32
32
  end
33
33
 
34
34
  def export(name, tweets)
35
- write_json = lambda { File.write(options['output'], generate_json(tweets)) }
35
+ filepath = options['output']
36
+ Dir.mkdir(File.dirname(filepath)) unless File.exist?(File.dirname(filepath))
37
+ write_json = lambda { File.write(filepath, generate_json(tweets)) }
36
38
 
37
39
  if options['format'] == 'json'
38
40
  write_json.call
39
41
  elsif options['format'] == 'html'
40
- File.write(options['output'], Template.new.tweets_embedded_html(name, tweets, options))
42
+ File.write(filepath, Template.new.tweets_embedded_html(name, tweets, options))
41
43
  else
42
44
  write_json.call
43
45
  end
@@ -97,9 +99,9 @@ module Twitterscraper
97
99
  end
98
100
 
99
101
  def build_output_name(options)
100
- query = ERB::Util.url_encode(options['query'])
102
+ query = options['query'].gsub(/[ :?#&]/, '_')
101
103
  date = [options['start_date'], options['end_date']].select { |val| val && !val.empty? }.join('_')
102
- [options['type'], 'tweets', date, query].compact.join('_') + '.' + options['format']
104
+ File.join('out', [options['type'], 'tweets', date, query].compact.join('_') + '.' + options['format'])
103
105
  end
104
106
 
105
107
  def initialize_logger
@@ -76,9 +76,9 @@ module Twitterscraper
76
76
 
77
77
  def query_single_page(query, lang, type, pos, headers: [], proxies: [])
78
78
  logger.info "Querying #{query}"
79
- query = ERB::Util.url_encode(query)
79
+ encoded_query = ERB::Util.url_encode(query)
80
80
 
81
- url = build_query_url(query, lang, type, pos)
81
+ url = build_query_url(encoded_query, lang, type, pos)
82
82
  http_request = lambda do
83
83
  logger.debug "Scraping tweets from url=#{url}"
84
84
  get_single_page(url, headers, proxies)
@@ -92,6 +92,10 @@ module Twitterscraper
92
92
  response = http_request.call
93
93
  client.write(url, response) unless stop_requested?
94
94
  end
95
+ if @queries && query == @queries.last && pos.nil?
96
+ logger.debug "Delete a cache query=#{query}"
97
+ client.delete(url)
98
+ end
95
99
  else
96
100
  response = http_request.call
97
101
  end
@@ -170,7 +174,7 @@ module Twitterscraper
170
174
  end
171
175
  end
172
176
 
173
- queries
177
+ @queries = queries
174
178
 
175
179
  elsif start_date
176
180
  [query + " since:#{start_date}"]
@@ -4,25 +4,54 @@ module Twitterscraper
4
4
  path = File.join(File.dirname(__FILE__), 'template/tweets.html.erb')
5
5
  template = ERB.new(File.read(path))
6
6
 
7
+ tweets = tweets.sort_by { |t| t.created_at.to_i }
8
+
7
9
  template.result_with_hash(
8
10
  chart_name: name,
9
11
  chart_data: chart_data(tweets).to_json,
10
- first_tweet: tweets.sort_by { |t| t.created_at.to_i }[0],
11
- last_tweet: tweets.sort_by { |t| t.created_at.to_i }[-1],
12
+ first_tweet: tweets[0],
13
+ last_tweet: tweets[-1],
12
14
  tweets: tweets,
13
15
  convert_limit: 30,
14
16
  )
15
17
  end
16
18
 
17
- def chart_data(tweets)
19
+ def chart_data(tweets, trimming: true, smoothing: true)
20
+ min_interval = 5
21
+
18
22
  data = tweets.each_with_object(Hash.new(0)) do |tweet, memo|
19
23
  t = tweet.created_at
20
- min = (t.min.to_f / 5).floor * 5
24
+ min = (t.min.to_f / min_interval).floor * min_interval
21
25
  time = Time.new(t.year, t.month, t.day, t.hour, min, 0, '+00:00')
22
26
  memo[time.to_i] += 1
23
27
  end
24
28
 
25
- data.sort_by { |k, v| k }.map do |timestamp, count|
29
+ if false && trimming
30
+ data.keys.sort.each.with_index do |timestamp, i|
31
+ break if data.size - 1 == i
32
+ if data[i] == 0 && data[i + 1] == 0
33
+ data.delete(timestamp)
34
+ end
35
+ end
36
+ end
37
+
38
+ if false && smoothing
39
+ time = data.keys.min
40
+ max_time = data.keys.max
41
+ sec_interval = 60 * min_interval
42
+
43
+ while true
44
+ next_time = time + sec_interval
45
+ break if next_time + sec_interval > max_time
46
+
47
+ unless data.has_key?(next_time)
48
+ data[next_time] = (data[time] + data[next_time + sec_interval]) / 2
49
+ end
50
+ time = next_time
51
+ end
52
+ end
53
+
54
+ data.sort_by { |k, _| k }.map do |timestamp, count|
26
55
  [timestamp * 1000, count]
27
56
  end
28
57
  end
@@ -23,14 +23,14 @@
23
23
  }
24
24
 
25
25
  function drawChart() {
26
- var data = <%= chart_data %>;
27
26
  Highcharts.setOptions({
28
27
  time: {
29
28
  timezone: moment.tz.guess()
30
29
  }
31
30
  });
32
31
 
33
- Highcharts.stockChart('chart', {
32
+ var data = <%= chart_data %>;
33
+ var config = {
34
34
  title: {
35
35
  text: '<%= tweets.size %> tweets of <%= chart_name %>'
36
36
  },
@@ -45,7 +45,9 @@
45
45
  navigator: {enabled: false},
46
46
  exporting: {enabled: false},
47
47
  credits: {enabled: false}
48
- });
48
+ };
49
+
50
+ Highcharts.stockChart('chart-container', config);
49
51
  }
50
52
 
51
53
  document.addEventListener("DOMContentLoaded", function () {
@@ -55,6 +57,15 @@
55
57
  </script>
56
58
 
57
59
  <style type=text/css>
60
+ #chart-container {
61
+ max-width: 1200px;
62
+ height: 675px;
63
+ margin: 0 auto;
64
+ border: 1px solid rgb(204, 214, 221);
65
+ display: flex;
66
+ justify-content: center;
67
+ align-items: center;
68
+ }
58
69
  .tweets-container {
59
70
  max-width: 550px;
60
71
  margin: 0 auto 0 auto;
@@ -66,10 +77,10 @@
66
77
  </style>
67
78
  </head>
68
79
  <body>
69
- <div id="chart" style="width: 100vw; height: 400px;"></div>
80
+ <div id="chart-container"><div style="color: gray;">Loading...</div></div>
70
81
 
71
82
  <div class="tweets-container">
72
- <% tweets.each.with_index do |tweet, i| %>
83
+ <% tweets.sort_by { |t| -t.created_at.to_i }.each.with_index do |tweet, i| %>
73
84
  <% tweet_time = tweet.created_at.localtime.strftime('%Y-%m-%d %H:%M') %>
74
85
  <% if i < convert_limit %>
75
86
  <blockquote class="twitter-tweet">
@@ -1,3 +1,3 @@
1
1
  module Twitterscraper
2
- VERSION = '0.17.0'
2
+ VERSION = '0.18.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitterscraper-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 0.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ts-3156
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-18 00:00:00.000000000 Z
11
+ date: 2020-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri