twitterscraper-ruby 0.19.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +2 -1
- data/lib/twitterscraper/cli.rb +1 -0
- data/lib/twitterscraper/query.rb +18 -16
- data/lib/twitterscraper/template.rb +2 -1
- data/lib/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 73a9e9108284fc79cf5ec6b36b6f7ad3f83f2b4f03a2bc527dc18cb4b33e83c7
|
4
|
+
data.tar.gz: c7fcfdbdd1d808780c56610be9b8717352c812759b9344d9fa87cbd430a8d8e2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1019547fe8c37a1bb5b4a9cd96a2737a14491087075ff448b48f72538758337c76ab513e153d4567454b192d30fafaa374913ae0c3548d7802e7bdd478fe4a2f
|
7
|
+
data.tar.gz: 48134e8b6858154850003da8684d3c8b7f124cab6d19e0ce76d05326dc8fef44694b32211e245509993e8b7b1afafa6d95914b05c66b9c95c54bb27d041983fe
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -175,7 +175,8 @@ Search operators documentation is in [Standard search operators](https://develop
|
|
175
175
|
| `--limit` | integer | Stop scraping when *at least* the number of tweets indicated with --limit is scraped. | 100 |
|
176
176
|
| `--order` | string | Sort a order of the results. | desc(default) or asc |
|
177
177
|
| `--threads` | integer | Set the number of threads twitterscraper-ruby should initiate while scraping for your query. | 2 |
|
178
|
-
| `--threads_granularity` | string | | auto |
|
178
|
+
| `--threads_granularity` | string | day or hour | auto |
|
179
|
+
| `--chart_grouping` | string | day, hour or minute | auto |
|
179
180
|
| `--proxy` | boolean | Scrape https://twitter.com/search via proxies. | true(default) or false |
|
180
181
|
| `--cache` | boolean | Enable caching. | true(default) or false |
|
181
182
|
| `--format` | string | The format of the output. | json(default) or html |
|
data/lib/twitterscraper/cli.rb
CHANGED
data/lib/twitterscraper/query.rb
CHANGED
@@ -131,19 +131,27 @@ module Twitterscraper
|
|
131
131
|
if start_date && end_date
|
132
132
|
if start_date == end_date
|
133
133
|
raise Error.new('Please specify different values for :start_date and :end_date.')
|
134
|
-
elsif start_date > end_date
|
134
|
+
elsif Date.parse(start_date) > Date.parse(end_date)
|
135
135
|
raise Error.new(':start_date must occur before :end_date.')
|
136
136
|
end
|
137
137
|
end
|
138
138
|
|
139
139
|
if start_date
|
140
|
-
if start_date < OLDEST_DATE
|
140
|
+
if Date.parse(start_date) < OLDEST_DATE
|
141
141
|
raise Error.new(":start_date must be greater than or equal to #{OLDEST_DATE}")
|
142
142
|
end
|
143
143
|
end
|
144
144
|
end
|
145
145
|
|
146
|
-
def build_queries(query, start_date, end_date, threads_granularity)
|
146
|
+
def build_queries(query, start_date, end_date, threads_granularity, type)
|
147
|
+
if type.search?
|
148
|
+
start_date = Date.parse(start_date) if start_date.is_a?(String)
|
149
|
+
end_date = Date.parse(end_date) if end_date.is_a?(String)
|
150
|
+
elsif type.user?
|
151
|
+
start_date = nil
|
152
|
+
end_date = nil
|
153
|
+
end
|
154
|
+
|
147
155
|
if start_date && end_date
|
148
156
|
if threads_granularity == 'auto'
|
149
157
|
threads_granularity = start_date.upto(end_date - 1).to_a.size >= 28 ? 'day' : 'hour'
|
@@ -151,7 +159,7 @@ module Twitterscraper
|
|
151
159
|
|
152
160
|
if threads_granularity == 'day'
|
153
161
|
date_range = start_date.upto(end_date - 1)
|
154
|
-
queries = date_range.map { |date| query + " since:#{date} until:#{date + 1}" }
|
162
|
+
queries = date_range.map { |date| query + " since:#{date}_00:00:00_UTC until:#{date + 1}_00:00:00_UTC" }
|
155
163
|
elsif threads_granularity == 'hour'
|
156
164
|
time = Time.utc(start_date.year, start_date.month, start_date.day, 0, 0, 0)
|
157
165
|
end_time = Time.utc(end_date.year, end_date.month, end_date.day, 0, 0, 0)
|
@@ -159,19 +167,21 @@ module Twitterscraper
|
|
159
167
|
|
160
168
|
while true
|
161
169
|
if time < Time.now.utc
|
162
|
-
queries << (query + " since:#{time.strftime('%Y-%m-%d_%H:00:
|
170
|
+
queries << (query + " since:#{time.strftime('%Y-%m-%d_%H')}:00:00_UTC until:#{(time + 3600).strftime('%Y-%m-%d_%H')}:00:00_UTC")
|
163
171
|
end
|
164
172
|
time += 3600
|
165
173
|
break if time >= end_time
|
166
174
|
end
|
175
|
+
else
|
176
|
+
raise Error.new("Invalid :threads_granularity value=#{threads_granularity}")
|
167
177
|
end
|
168
178
|
|
169
179
|
@queries = queries
|
170
180
|
|
171
181
|
elsif start_date
|
172
|
-
[query + " since:#{start_date}"]
|
182
|
+
[query + " since:#{start_date}_00:00:00_UTC"]
|
173
183
|
elsif end_date
|
174
|
-
[query + " until:#{end_date}"]
|
184
|
+
[query + " until:#{end_date}_00:00:00_UTC"]
|
175
185
|
else
|
176
186
|
[query]
|
177
187
|
end
|
@@ -214,15 +224,7 @@ module Twitterscraper
|
|
214
224
|
|
215
225
|
def query_tweets(query, type: 'search', start_date: nil, end_date: nil, lang: nil, limit: 100, daily_limit: nil, order: 'desc', threads: 10, threads_granularity: 'auto')
|
216
226
|
type = Type.new(type)
|
217
|
-
|
218
|
-
start_date = Date.parse(start_date) if start_date && start_date.is_a?(String)
|
219
|
-
end_date = Date.parse(end_date) if end_date && end_date.is_a?(String)
|
220
|
-
elsif type.user?
|
221
|
-
start_date = nil
|
222
|
-
end_date = nil
|
223
|
-
end
|
224
|
-
|
225
|
-
queries = build_queries(query, start_date, end_date, threads_granularity)
|
227
|
+
queries = build_queries(query, start_date, end_date, threads_granularity, type)
|
226
228
|
if threads > queries.size
|
227
229
|
threads = queries.size
|
228
230
|
end
|
@@ -5,10 +5,11 @@ module Twitterscraper
|
|
5
5
|
template = ERB.new(File.read(path))
|
6
6
|
|
7
7
|
tweets = tweets.sort_by { |t| t.created_at.to_i }
|
8
|
+
grouping = options['chart_grouping'] || 'auto'
|
8
9
|
|
9
10
|
template.result_with_hash(
|
10
11
|
chart_name: name,
|
11
|
-
chart_data: chart_data(tweets).to_json,
|
12
|
+
chart_data: chart_data(tweets, grouping: grouping).to_json,
|
12
13
|
first_tweet: tweets[0],
|
13
14
|
last_tweet: tweets[-1],
|
14
15
|
tweets: tweets,
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitterscraper-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.20.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ts-3156
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|