twitterscraper-ruby 0.19.0 → 0.20.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2056b4a3d9fe7af49429e35b3a1688256fb31b74cabab841a4dd2376a79889d5
4
- data.tar.gz: aaaf949da2ba2ae07a0d66e981aebc635c18120de06be705f96c19c92c309911
3
+ metadata.gz: 73a9e9108284fc79cf5ec6b36b6f7ad3f83f2b4f03a2bc527dc18cb4b33e83c7
4
+ data.tar.gz: c7fcfdbdd1d808780c56610be9b8717352c812759b9344d9fa87cbd430a8d8e2
5
5
  SHA512:
6
- metadata.gz: c60824e4c1c0021a3e27451b1708a77bd2e15dd6258fce63ac1b95111d0230c8ab7317bcd76c2faf14d02ebe75ab8d7453924e01eee7d3fcb46eef374f16c575
7
- data.tar.gz: 984204bd430b41b76a2d9108df4e778e2bb242010ebd18569bcb662473496826644ba5693db1d475d565bff49a3de7f0eb95fd4c9a3da9e5ed4d6a6219ebb62e
6
+ metadata.gz: 1019547fe8c37a1bb5b4a9cd96a2737a14491087075ff448b48f72538758337c76ab513e153d4567454b192d30fafaa374913ae0c3548d7802e7bdd478fe4a2f
7
+ data.tar.gz: 48134e8b6858154850003da8684d3c8b7f124cab6d19e0ce76d05326dc8fef44694b32211e245509993e8b7b1afafa6d95914b05c66b9c95c54bb27d041983fe
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- twitterscraper-ruby (0.19.0)
4
+ twitterscraper-ruby (0.20.0)
5
5
  nokogiri
6
6
  parallel
7
7
 
data/README.md CHANGED
@@ -175,7 +175,8 @@ Search operators documentation is in [Standard search operators](https://develop
175
175
  | `--limit` | integer | Stop scraping when *at least* the number of tweets indicated with --limit is scraped. | 100 |
176
176
  | `--order` | string | Sort a order of the results. | desc(default) or asc |
177
177
  | `--threads` | integer | Set the number of threads twitterscraper-ruby should initiate while scraping for your query. | 2 |
178
- | `--threads_granularity` | string | | auto |
178
+ | `--threads_granularity` | string | day or hour | auto |
179
+ | `--chart_grouping` | string | day, hour or minute | auto |
179
180
  | `--proxy` | boolean | Scrape https://twitter.com/search via proxies. | true(default) or false |
180
181
  | `--cache` | boolean | Enable caching. | true(default) or false |
181
182
  | `--format` | string | The format of the output. | json(default) or html |
@@ -74,6 +74,7 @@ module Twitterscraper
74
74
  'order:',
75
75
  'threads:',
76
76
  'threads_granularity:',
77
+ 'chart_grouping:',
77
78
  'output:',
78
79
  'format:',
79
80
  'cache:',
@@ -131,19 +131,27 @@ module Twitterscraper
131
131
  if start_date && end_date
132
132
  if start_date == end_date
133
133
  raise Error.new('Please specify different values for :start_date and :end_date.')
134
- elsif start_date > end_date
134
+ elsif Date.parse(start_date) > Date.parse(end_date)
135
135
  raise Error.new(':start_date must occur before :end_date.')
136
136
  end
137
137
  end
138
138
 
139
139
  if start_date
140
- if start_date < OLDEST_DATE
140
+ if Date.parse(start_date) < OLDEST_DATE
141
141
  raise Error.new(":start_date must be greater than or equal to #{OLDEST_DATE}")
142
142
  end
143
143
  end
144
144
  end
145
145
 
146
- def build_queries(query, start_date, end_date, threads_granularity)
146
+ def build_queries(query, start_date, end_date, threads_granularity, type)
147
+ if type.search?
148
+ start_date = Date.parse(start_date) if start_date.is_a?(String)
149
+ end_date = Date.parse(end_date) if end_date.is_a?(String)
150
+ elsif type.user?
151
+ start_date = nil
152
+ end_date = nil
153
+ end
154
+
147
155
  if start_date && end_date
148
156
  if threads_granularity == 'auto'
149
157
  threads_granularity = start_date.upto(end_date - 1).to_a.size >= 28 ? 'day' : 'hour'
@@ -151,7 +159,7 @@ module Twitterscraper
151
159
 
152
160
  if threads_granularity == 'day'
153
161
  date_range = start_date.upto(end_date - 1)
154
- queries = date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
162
+ queries = date_range.map { |date| query + " since:#{date}_00:00:00_UTC until:#{date + 1}_00:00:00_UTC" }
155
163
  elsif threads_granularity == 'hour'
156
164
  time = Time.utc(start_date.year, start_date.month, start_date.day, 0, 0, 0)
157
165
  end_time = Time.utc(end_date.year, end_date.month, end_date.day, 0, 0, 0)
@@ -159,19 +167,21 @@ module Twitterscraper
159
167
 
160
168
  while true
161
169
  if time < Time.now.utc
162
- queries << (query + " since:#{time.strftime('%Y-%m-%d_%H:00:00')}_UTC until:#{(time + 3600).strftime('%Y-%m-%d_%H:00:00')}_UTC")
170
+ queries << (query + " since:#{time.strftime('%Y-%m-%d_%H')}:00:00_UTC until:#{(time + 3600).strftime('%Y-%m-%d_%H')}:00:00_UTC")
163
171
  end
164
172
  time += 3600
165
173
  break if time >= end_time
166
174
  end
175
+ else
176
+ raise Error.new("Invalid :threads_granularity value=#{threads_granularity}")
167
177
  end
168
178
 
169
179
  @queries = queries
170
180
 
171
181
  elsif start_date
172
- [query + " since:#{start_date}"]
182
+ [query + " since:#{start_date}_00:00:00_UTC"]
173
183
  elsif end_date
174
- [query + " until:#{end_date}"]
184
+ [query + " until:#{end_date}_00:00:00_UTC"]
175
185
  else
176
186
  [query]
177
187
  end
@@ -214,15 +224,7 @@ module Twitterscraper
214
224
 
215
225
  def query_tweets(query, type: 'search', start_date: nil, end_date: nil, lang: nil, limit: 100, daily_limit: nil, order: 'desc', threads: 10, threads_granularity: 'auto')
216
226
  type = Type.new(type)
217
- if type.search?
218
- start_date = Date.parse(start_date) if start_date && start_date.is_a?(String)
219
- end_date = Date.parse(end_date) if end_date && end_date.is_a?(String)
220
- elsif type.user?
221
- start_date = nil
222
- end_date = nil
223
- end
224
-
225
- queries = build_queries(query, start_date, end_date, threads_granularity)
227
+ queries = build_queries(query, start_date, end_date, threads_granularity, type)
226
228
  if threads > queries.size
227
229
  threads = queries.size
228
230
  end
@@ -5,10 +5,11 @@ module Twitterscraper
5
5
  template = ERB.new(File.read(path))
6
6
 
7
7
  tweets = tweets.sort_by { |t| t.created_at.to_i }
8
+ grouping = options['chart_grouping'] || 'auto'
8
9
 
9
10
  template.result_with_hash(
10
11
  chart_name: name,
11
- chart_data: chart_data(tweets).to_json,
12
+ chart_data: chart_data(tweets, grouping: grouping).to_json,
12
13
  first_tweet: tweets[0],
13
14
  last_tweet: tweets[-1],
14
15
  tweets: tweets,
@@ -1,3 +1,3 @@
1
1
  module Twitterscraper
2
- VERSION = '0.19.0'
2
+ VERSION = '0.20.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitterscraper-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.0
4
+ version: 0.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ts-3156
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-23 00:00:00.000000000 Z
11
+ date: 2020-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri