twitterscraper-ruby 0.19.0 → 0.20.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +2 -1
- data/lib/twitterscraper/cli.rb +1 -0
- data/lib/twitterscraper/query.rb +18 -16
- data/lib/twitterscraper/template.rb +2 -1
- data/lib/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 73a9e9108284fc79cf5ec6b36b6f7ad3f83f2b4f03a2bc527dc18cb4b33e83c7
|
4
|
+
data.tar.gz: c7fcfdbdd1d808780c56610be9b8717352c812759b9344d9fa87cbd430a8d8e2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1019547fe8c37a1bb5b4a9cd96a2737a14491087075ff448b48f72538758337c76ab513e153d4567454b192d30fafaa374913ae0c3548d7802e7bdd478fe4a2f
|
7
|
+
data.tar.gz: 48134e8b6858154850003da8684d3c8b7f124cab6d19e0ce76d05326dc8fef44694b32211e245509993e8b7b1afafa6d95914b05c66b9c95c54bb27d041983fe
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -175,7 +175,8 @@ Search operators documentation is in [Standard search operators](https://develop
|
|
175
175
|
| `--limit` | integer | Stop scraping when *at least* the number of tweets indicated with --limit is scraped. | 100 |
|
176
176
|
| `--order` | string | Sort a order of the results. | desc(default) or asc |
|
177
177
|
| `--threads` | integer | Set the number of threads twitterscraper-ruby should initiate while scraping for your query. | 2 |
|
178
|
-
| `--threads_granularity` | string | | auto |
|
178
|
+
| `--threads_granularity` | string | day or hour | auto |
|
179
|
+
| `--chart_grouping` | string | day, hour or minute | auto |
|
179
180
|
| `--proxy` | boolean | Scrape https://twitter.com/search via proxies. | true(default) or false |
|
180
181
|
| `--cache` | boolean | Enable caching. | true(default) or false |
|
181
182
|
| `--format` | string | The format of the output. | json(default) or html |
|
data/lib/twitterscraper/cli.rb
CHANGED
data/lib/twitterscraper/query.rb
CHANGED
@@ -131,19 +131,27 @@ module Twitterscraper
|
|
131
131
|
if start_date && end_date
|
132
132
|
if start_date == end_date
|
133
133
|
raise Error.new('Please specify different values for :start_date and :end_date.')
|
134
|
-
elsif start_date > end_date
|
134
|
+
elsif Date.parse(start_date) > Date.parse(end_date)
|
135
135
|
raise Error.new(':start_date must occur before :end_date.')
|
136
136
|
end
|
137
137
|
end
|
138
138
|
|
139
139
|
if start_date
|
140
|
-
if start_date < OLDEST_DATE
|
140
|
+
if Date.parse(start_date) < OLDEST_DATE
|
141
141
|
raise Error.new(":start_date must be greater than or equal to #{OLDEST_DATE}")
|
142
142
|
end
|
143
143
|
end
|
144
144
|
end
|
145
145
|
|
146
|
-
def build_queries(query, start_date, end_date, threads_granularity)
|
146
|
+
def build_queries(query, start_date, end_date, threads_granularity, type)
|
147
|
+
if type.search?
|
148
|
+
start_date = Date.parse(start_date) if start_date.is_a?(String)
|
149
|
+
end_date = Date.parse(end_date) if end_date.is_a?(String)
|
150
|
+
elsif type.user?
|
151
|
+
start_date = nil
|
152
|
+
end_date = nil
|
153
|
+
end
|
154
|
+
|
147
155
|
if start_date && end_date
|
148
156
|
if threads_granularity == 'auto'
|
149
157
|
threads_granularity = start_date.upto(end_date - 1).to_a.size >= 28 ? 'day' : 'hour'
|
@@ -151,7 +159,7 @@ module Twitterscraper
|
|
151
159
|
|
152
160
|
if threads_granularity == 'day'
|
153
161
|
date_range = start_date.upto(end_date - 1)
|
154
|
-
queries = date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
|
162
|
+
queries = date_range.map { |date| query + " since:#{date}_00:00:00_UTC until:#{date + 1}_00:00:00_UTC" }
|
155
163
|
elsif threads_granularity == 'hour'
|
156
164
|
time = Time.utc(start_date.year, start_date.month, start_date.day, 0, 0, 0)
|
157
165
|
end_time = Time.utc(end_date.year, end_date.month, end_date.day, 0, 0, 0)
|
@@ -159,19 +167,21 @@ module Twitterscraper
|
|
159
167
|
|
160
168
|
while true
|
161
169
|
if time < Time.now.utc
|
162
|
-
queries << (query + " since:#{time.strftime('%Y-%m-%d_%H:00:
|
170
|
+
queries << (query + " since:#{time.strftime('%Y-%m-%d_%H')}:00:00_UTC until:#{(time + 3600).strftime('%Y-%m-%d_%H')}:00:00_UTC")
|
163
171
|
end
|
164
172
|
time += 3600
|
165
173
|
break if time >= end_time
|
166
174
|
end
|
175
|
+
else
|
176
|
+
raise Error.new("Invalid :threads_granularity value=#{threads_granularity}")
|
167
177
|
end
|
168
178
|
|
169
179
|
@queries = queries
|
170
180
|
|
171
181
|
elsif start_date
|
172
|
-
[query + " since:#{start_date}"]
|
182
|
+
[query + " since:#{start_date}_00:00:00_UTC"]
|
173
183
|
elsif end_date
|
174
|
-
[query + " until:#{end_date}"]
|
184
|
+
[query + " until:#{end_date}_00:00:00_UTC"]
|
175
185
|
else
|
176
186
|
[query]
|
177
187
|
end
|
@@ -214,15 +224,7 @@ module Twitterscraper
|
|
214
224
|
|
215
225
|
def query_tweets(query, type: 'search', start_date: nil, end_date: nil, lang: nil, limit: 100, daily_limit: nil, order: 'desc', threads: 10, threads_granularity: 'auto')
|
216
226
|
type = Type.new(type)
|
217
|
-
|
218
|
-
start_date = Date.parse(start_date) if start_date && start_date.is_a?(String)
|
219
|
-
end_date = Date.parse(end_date) if end_date && end_date.is_a?(String)
|
220
|
-
elsif type.user?
|
221
|
-
start_date = nil
|
222
|
-
end_date = nil
|
223
|
-
end
|
224
|
-
|
225
|
-
queries = build_queries(query, start_date, end_date, threads_granularity)
|
227
|
+
queries = build_queries(query, start_date, end_date, threads_granularity, type)
|
226
228
|
if threads > queries.size
|
227
229
|
threads = queries.size
|
228
230
|
end
|
@@ -5,10 +5,11 @@ module Twitterscraper
|
|
5
5
|
template = ERB.new(File.read(path))
|
6
6
|
|
7
7
|
tweets = tweets.sort_by { |t| t.created_at.to_i }
|
8
|
+
grouping = options['chart_grouping'] || 'auto'
|
8
9
|
|
9
10
|
template.result_with_hash(
|
10
11
|
chart_name: name,
|
11
|
-
chart_data: chart_data(tweets).to_json,
|
12
|
+
chart_data: chart_data(tweets, grouping: grouping).to_json,
|
12
13
|
first_tweet: tweets[0],
|
13
14
|
last_tweet: tweets[-1],
|
14
15
|
tweets: tweets,
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitterscraper-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.20.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ts-3156
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|