twitterscraper-ruby 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2056b4a3d9fe7af49429e35b3a1688256fb31b74cabab841a4dd2376a79889d5
4
- data.tar.gz: aaaf949da2ba2ae07a0d66e981aebc635c18120de06be705f96c19c92c309911
3
+ metadata.gz: 73a9e9108284fc79cf5ec6b36b6f7ad3f83f2b4f03a2bc527dc18cb4b33e83c7
4
+ data.tar.gz: c7fcfdbdd1d808780c56610be9b8717352c812759b9344d9fa87cbd430a8d8e2
5
5
  SHA512:
6
- metadata.gz: c60824e4c1c0021a3e27451b1708a77bd2e15dd6258fce63ac1b95111d0230c8ab7317bcd76c2faf14d02ebe75ab8d7453924e01eee7d3fcb46eef374f16c575
7
- data.tar.gz: 984204bd430b41b76a2d9108df4e778e2bb242010ebd18569bcb662473496826644ba5693db1d475d565bff49a3de7f0eb95fd4c9a3da9e5ed4d6a6219ebb62e
6
+ metadata.gz: 1019547fe8c37a1bb5b4a9cd96a2737a14491087075ff448b48f72538758337c76ab513e153d4567454b192d30fafaa374913ae0c3548d7802e7bdd478fe4a2f
7
+ data.tar.gz: 48134e8b6858154850003da8684d3c8b7f124cab6d19e0ce76d05326dc8fef44694b32211e245509993e8b7b1afafa6d95914b05c66b9c95c54bb27d041983fe
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- twitterscraper-ruby (0.19.0)
4
+ twitterscraper-ruby (0.20.0)
5
5
  nokogiri
6
6
  parallel
7
7
 
data/README.md CHANGED
@@ -175,7 +175,8 @@ Search operators documentation is in [Standard search operators](https://develop
175
175
  | `--limit` | integer | Stop scraping when *at least* the number of tweets indicated with --limit is scraped. | 100 |
176
176
  | `--order` | string | Sort a order of the results. | desc(default) or asc |
177
177
  | `--threads` | integer | Set the number of threads twitterscraper-ruby should initiate while scraping for your query. | 2 |
178
- | `--threads_granularity` | string | | auto |
178
+ | `--threads_granularity` | string | day or hour | auto |
179
+ | `--chart_grouping` | string | day, hour or minute | auto |
179
180
  | `--proxy` | boolean | Scrape https://twitter.com/search via proxies. | true(default) or false |
180
181
  | `--cache` | boolean | Enable caching. | true(default) or false |
181
182
  | `--format` | string | The format of the output. | json(default) or html |
@@ -74,6 +74,7 @@ module Twitterscraper
74
74
  'order:',
75
75
  'threads:',
76
76
  'threads_granularity:',
77
+ 'chart_grouping:',
77
78
  'output:',
78
79
  'format:',
79
80
  'cache:',
@@ -131,19 +131,27 @@ module Twitterscraper
131
131
  if start_date && end_date
132
132
  if start_date == end_date
133
133
  raise Error.new('Please specify different values for :start_date and :end_date.')
134
- elsif start_date > end_date
134
+ elsif Date.parse(start_date) > Date.parse(end_date)
135
135
  raise Error.new(':start_date must occur before :end_date.')
136
136
  end
137
137
  end
138
138
 
139
139
  if start_date
140
- if start_date < OLDEST_DATE
140
+ if Date.parse(start_date) < OLDEST_DATE
141
141
  raise Error.new(":start_date must be greater than or equal to #{OLDEST_DATE}")
142
142
  end
143
143
  end
144
144
  end
145
145
 
146
- def build_queries(query, start_date, end_date, threads_granularity)
146
+ def build_queries(query, start_date, end_date, threads_granularity, type)
147
+ if type.search?
148
+ start_date = Date.parse(start_date) if start_date.is_a?(String)
149
+ end_date = Date.parse(end_date) if end_date.is_a?(String)
150
+ elsif type.user?
151
+ start_date = nil
152
+ end_date = nil
153
+ end
154
+
147
155
  if start_date && end_date
148
156
  if threads_granularity == 'auto'
149
157
  threads_granularity = start_date.upto(end_date - 1).to_a.size >= 28 ? 'day' : 'hour'
@@ -151,7 +159,7 @@ module Twitterscraper
151
159
 
152
160
  if threads_granularity == 'day'
153
161
  date_range = start_date.upto(end_date - 1)
154
- queries = date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
162
+ queries = date_range.map { |date| query + " since:#{date}_00:00:00_UTC until:#{date + 1}_00:00:00_UTC" }
155
163
  elsif threads_granularity == 'hour'
156
164
  time = Time.utc(start_date.year, start_date.month, start_date.day, 0, 0, 0)
157
165
  end_time = Time.utc(end_date.year, end_date.month, end_date.day, 0, 0, 0)
@@ -159,19 +167,21 @@ module Twitterscraper
159
167
 
160
168
  while true
161
169
  if time < Time.now.utc
162
- queries << (query + " since:#{time.strftime('%Y-%m-%d_%H:00:00')}_UTC until:#{(time + 3600).strftime('%Y-%m-%d_%H:00:00')}_UTC")
170
+ queries << (query + " since:#{time.strftime('%Y-%m-%d_%H')}:00:00_UTC until:#{(time + 3600).strftime('%Y-%m-%d_%H')}:00:00_UTC")
163
171
  end
164
172
  time += 3600
165
173
  break if time >= end_time
166
174
  end
175
+ else
176
+ raise Error.new("Invalid :threads_granularity value=#{threads_granularity}")
167
177
  end
168
178
 
169
179
  @queries = queries
170
180
 
171
181
  elsif start_date
172
- [query + " since:#{start_date}"]
182
+ [query + " since:#{start_date}_00:00:00_UTC"]
173
183
  elsif end_date
174
- [query + " until:#{end_date}"]
184
+ [query + " until:#{end_date}_00:00:00_UTC"]
175
185
  else
176
186
  [query]
177
187
  end
@@ -214,15 +224,7 @@ module Twitterscraper
214
224
 
215
225
  def query_tweets(query, type: 'search', start_date: nil, end_date: nil, lang: nil, limit: 100, daily_limit: nil, order: 'desc', threads: 10, threads_granularity: 'auto')
216
226
  type = Type.new(type)
217
- if type.search?
218
- start_date = Date.parse(start_date) if start_date && start_date.is_a?(String)
219
- end_date = Date.parse(end_date) if end_date && end_date.is_a?(String)
220
- elsif type.user?
221
- start_date = nil
222
- end_date = nil
223
- end
224
-
225
- queries = build_queries(query, start_date, end_date, threads_granularity)
227
+ queries = build_queries(query, start_date, end_date, threads_granularity, type)
226
228
  if threads > queries.size
227
229
  threads = queries.size
228
230
  end
@@ -5,10 +5,11 @@ module Twitterscraper
5
5
  template = ERB.new(File.read(path))
6
6
 
7
7
  tweets = tweets.sort_by { |t| t.created_at.to_i }
8
+ grouping = options['chart_grouping'] || 'auto'
8
9
 
9
10
  template.result_with_hash(
10
11
  chart_name: name,
11
- chart_data: chart_data(tweets).to_json,
12
+ chart_data: chart_data(tweets, grouping: grouping).to_json,
12
13
  first_tweet: tweets[0],
13
14
  last_tweet: tweets[-1],
14
15
  tweets: tweets,
@@ -1,3 +1,3 @@
1
1
  module Twitterscraper
2
- VERSION = '0.19.0'
2
+ VERSION = '0.20.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitterscraper-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.0
4
+ version: 0.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ts-3156
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-23 00:00:00.000000000 Z
11
+ date: 2020-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri