twitterscraper-ruby 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ac0c10b18d836983cc6b73e25b9ed333af2f620106a07c6bc6a40058fb127895
4
- data.tar.gz: e6fc18219d9127fb30ba57e39dc4656c0f0a3c108428d959de5bac9e7d317088
3
+ metadata.gz: 8e9bdefe1c4d10e6d9f1d12aeb279b2a3751c570e96e05daaf849dd423bb03bf
4
+ data.tar.gz: 7de97de19daeecce2837fe8e5999b6c9490ab49a18a2ab9e603bf4d039abc4b9
5
5
  SHA512:
6
- metadata.gz: 90cbf06b606878dc36b4bba44669139c273bf03b08a777ad87036834841bcb4b052e0559813dc56e4be124442abfc5a7fc44c5c9524c74929ca02b1d287d346b
7
- data.tar.gz: ada0b74ee42ff62964b73ad9b49358227cdaf4fc87420cf12cf65af95168ad9775615a504345ebc83d3b791e9c0d892691c55bc477eddd647b3e8934f752fb9c
6
+ metadata.gz: 55b7e0b52b2ce44418305798ed27a677405244a48f5ad0a797e3abf7958b0581a313ebd33f3f69b891ba7454f8f5c9c0db845c9ca8be321cd27212932821776e
7
+ data.tar.gz: 8fe97a0dc164fc0108b8e6a35843fba19ade5fbaf4f1ee2b4a400afbd3bdbb220a49dfbef4fceb1d8ecc43df3b4f4b7bad0ee5ea94c0aac464c0477e42efb866
data/.gitignore CHANGED
@@ -8,3 +8,4 @@
8
8
  /tmp/
9
9
  /cache
10
10
  /.idea
11
+ .DS_Store
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- twitterscraper-ruby (0.17.0)
4
+ twitterscraper-ruby (0.18.0)
5
5
  nokogiri
6
6
  parallel
7
7
 
@@ -32,12 +32,14 @@ module Twitterscraper
32
32
  end
33
33
 
34
34
  def export(name, tweets)
35
- write_json = lambda { File.write(options['output'], generate_json(tweets)) }
35
+ filepath = options['output']
36
+ Dir.mkdir(File.dirname(filepath)) unless File.exist?(File.dirname(filepath))
37
+ write_json = lambda { File.write(filepath, generate_json(tweets)) }
36
38
 
37
39
  if options['format'] == 'json'
38
40
  write_json.call
39
41
  elsif options['format'] == 'html'
40
- File.write(options['output'], Template.new.tweets_embedded_html(name, tweets, options))
42
+ File.write(filepath, Template.new.tweets_embedded_html(name, tweets, options))
41
43
  else
42
44
  write_json.call
43
45
  end
@@ -97,9 +99,9 @@ module Twitterscraper
97
99
  end
98
100
 
99
101
  def build_output_name(options)
100
- query = ERB::Util.url_encode(options['query'])
102
+ query = options['query'].gsub(/[ :?#&]/, '_')
101
103
  date = [options['start_date'], options['end_date']].select { |val| val && !val.empty? }.join('_')
102
- [options['type'], 'tweets', date, query].compact.join('_') + '.' + options['format']
104
+ File.join('out', [options['type'], 'tweets', date, query].compact.join('_') + '.' + options['format'])
103
105
  end
104
106
 
105
107
  def initialize_logger
@@ -76,9 +76,9 @@ module Twitterscraper
76
76
 
77
77
  def query_single_page(query, lang, type, pos, headers: [], proxies: [])
78
78
  logger.info "Querying #{query}"
79
- query = ERB::Util.url_encode(query)
79
+ encoded_query = ERB::Util.url_encode(query)
80
80
 
81
- url = build_query_url(query, lang, type, pos)
81
+ url = build_query_url(encoded_query, lang, type, pos)
82
82
  http_request = lambda do
83
83
  logger.debug "Scraping tweets from url=#{url}"
84
84
  get_single_page(url, headers, proxies)
@@ -92,6 +92,10 @@ module Twitterscraper
92
92
  response = http_request.call
93
93
  client.write(url, response) unless stop_requested?
94
94
  end
95
+ if @queries && query == @queries.last && pos.nil?
96
+ logger.debug "Delete a cache query=#{query}"
97
+ client.delete(url)
98
+ end
95
99
  else
96
100
  response = http_request.call
97
101
  end
@@ -170,7 +174,7 @@ module Twitterscraper
170
174
  end
171
175
  end
172
176
 
173
- queries
177
+ @queries = queries
174
178
 
175
179
  elsif start_date
176
180
  [query + " since:#{start_date}"]
@@ -4,25 +4,54 @@ module Twitterscraper
4
4
  path = File.join(File.dirname(__FILE__), 'template/tweets.html.erb')
5
5
  template = ERB.new(File.read(path))
6
6
 
7
+ tweets = tweets.sort_by { |t| t.created_at.to_i }
8
+
7
9
  template.result_with_hash(
8
10
  chart_name: name,
9
11
  chart_data: chart_data(tweets).to_json,
10
- first_tweet: tweets.sort_by { |t| t.created_at.to_i }[0],
11
- last_tweet: tweets.sort_by { |t| t.created_at.to_i }[-1],
12
+ first_tweet: tweets[0],
13
+ last_tweet: tweets[-1],
12
14
  tweets: tweets,
13
15
  convert_limit: 30,
14
16
  )
15
17
  end
16
18
 
17
- def chart_data(tweets)
19
+ def chart_data(tweets, trimming: true, smoothing: true)
20
+ min_interval = 5
21
+
18
22
  data = tweets.each_with_object(Hash.new(0)) do |tweet, memo|
19
23
  t = tweet.created_at
20
- min = (t.min.to_f / 5).floor * 5
24
+ min = (t.min.to_f / min_interval).floor * min_interval
21
25
  time = Time.new(t.year, t.month, t.day, t.hour, min, 0, '+00:00')
22
26
  memo[time.to_i] += 1
23
27
  end
24
28
 
25
- data.sort_by { |k, v| k }.map do |timestamp, count|
29
+ if false && trimming
30
+ data.keys.sort.each.with_index do |timestamp, i|
31
+ break if data.size - 1 == i
32
+ if data[i] == 0 && data[i + 1] == 0
33
+ data.delete(timestamp)
34
+ end
35
+ end
36
+ end
37
+
38
+ if false && smoothing
39
+ time = data.keys.min
40
+ max_time = data.keys.max
41
+ sec_interval = 60 * min_interval
42
+
43
+ while true
44
+ next_time = time + sec_interval
45
+ break if next_time + sec_interval > max_time
46
+
47
+ unless data.has_key?(next_time)
48
+ data[next_time] = (data[time] + data[next_time + sec_interval]) / 2
49
+ end
50
+ time = next_time
51
+ end
52
+ end
53
+
54
+ data.sort_by { |k, _| k }.map do |timestamp, count|
26
55
  [timestamp * 1000, count]
27
56
  end
28
57
  end
@@ -23,14 +23,14 @@
23
23
  }
24
24
 
25
25
  function drawChart() {
26
- var data = <%= chart_data %>;
27
26
  Highcharts.setOptions({
28
27
  time: {
29
28
  timezone: moment.tz.guess()
30
29
  }
31
30
  });
32
31
 
33
- Highcharts.stockChart('chart', {
32
+ var data = <%= chart_data %>;
33
+ var config = {
34
34
  title: {
35
35
  text: '<%= tweets.size %> tweets of <%= chart_name %>'
36
36
  },
@@ -45,7 +45,9 @@
45
45
  navigator: {enabled: false},
46
46
  exporting: {enabled: false},
47
47
  credits: {enabled: false}
48
- });
48
+ };
49
+
50
+ Highcharts.stockChart('chart-container', config);
49
51
  }
50
52
 
51
53
  document.addEventListener("DOMContentLoaded", function () {
@@ -55,6 +57,15 @@
55
57
  </script>
56
58
 
57
59
  <style type=text/css>
60
+ #chart-container {
61
+ max-width: 1200px;
62
+ height: 675px;
63
+ margin: 0 auto;
64
+ border: 1px solid rgb(204, 214, 221);
65
+ display: flex;
66
+ justify-content: center;
67
+ align-items: center;
68
+ }
58
69
  .tweets-container {
59
70
  max-width: 550px;
60
71
  margin: 0 auto 0 auto;
@@ -66,10 +77,10 @@
66
77
  </style>
67
78
  </head>
68
79
  <body>
69
- <div id="chart" style="width: 100vw; height: 400px;"></div>
80
+ <div id="chart-container"><div style="color: gray;">Loading...</div></div>
70
81
 
71
82
  <div class="tweets-container">
72
- <% tweets.each.with_index do |tweet, i| %>
83
+ <% tweets.sort_by { |t| -t.created_at.to_i }.each.with_index do |tweet, i| %>
73
84
  <% tweet_time = tweet.created_at.localtime.strftime('%Y-%m-%d %H:%M') %>
74
85
  <% if i < convert_limit %>
75
86
  <blockquote class="twitter-tweet">
@@ -1,3 +1,3 @@
1
1
  module Twitterscraper
2
- VERSION = '0.17.0'
2
+ VERSION = '0.18.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitterscraper-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.0
4
+ version: 0.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ts-3156
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-18 00:00:00.000000000 Z
11
+ date: 2020-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri