twitterscraper-ruby 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +2 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +16 -1
- data/lib/twitterscraper/cli.rb +6 -4
- data/lib/twitterscraper/client.rb +6 -1
- data/lib/twitterscraper/query.rb +17 -6
- data/lib/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bafdfd47b386ef7f717dc5846102c8a5153f4660e61d3559f6834cdca340c19c
|
4
|
+
data.tar.gz: fb5564629d89ae83c916d868e9fd401fdca1b423fbeb2d6945b0831c0d8ecf11
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5e9c819b318a908c73f56de0a638c7c819cc1e31c867812ec9ffa3e23362318db3dfc1fe5ffde53c4769bffdf1f62efdb4701bf9b1efe874625cdb6ce21ef1bc
|
7
|
+
data.tar.gz: aa75f3a328f6c2c278738962e7d6e9ea747841343362e8a0f226fd76b316b6ab05e63c93e495ae39009fd3f0a1c4eb0657bc66e1b22416e6a695cc34b4059643
|
data/.rspec
ADDED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,19 +1,33 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
twitterscraper-ruby (0.
|
4
|
+
twitterscraper-ruby (0.13.0)
|
5
5
|
nokogiri
|
6
6
|
parallel
|
7
7
|
|
8
8
|
GEM
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
|
+
diff-lcs (1.4.4)
|
11
12
|
mini_portile2 (2.4.0)
|
12
13
|
minitest (5.14.1)
|
13
14
|
nokogiri (1.10.10)
|
14
15
|
mini_portile2 (~> 2.4.0)
|
15
16
|
parallel (1.19.2)
|
16
17
|
rake (12.3.3)
|
18
|
+
rspec (3.9.0)
|
19
|
+
rspec-core (~> 3.9.0)
|
20
|
+
rspec-expectations (~> 3.9.0)
|
21
|
+
rspec-mocks (~> 3.9.0)
|
22
|
+
rspec-core (3.9.2)
|
23
|
+
rspec-support (~> 3.9.3)
|
24
|
+
rspec-expectations (3.9.2)
|
25
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
26
|
+
rspec-support (~> 3.9.0)
|
27
|
+
rspec-mocks (3.9.1)
|
28
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
29
|
+
rspec-support (~> 3.9.0)
|
30
|
+
rspec-support (3.9.3)
|
17
31
|
|
18
32
|
PLATFORMS
|
19
33
|
ruby
|
@@ -21,6 +35,7 @@ PLATFORMS
|
|
21
35
|
DEPENDENCIES
|
22
36
|
minitest (~> 5.0)
|
23
37
|
rake (~> 12.0)
|
38
|
+
rspec
|
24
39
|
twitterscraper-ruby!
|
25
40
|
|
26
41
|
BUNDLED WITH
|
data/lib/twitterscraper/cli.rb
CHANGED
@@ -22,9 +22,8 @@ module Twitterscraper
|
|
22
22
|
limit: options['limit'],
|
23
23
|
daily_limit: options['daily_limit'],
|
24
24
|
threads: options['threads'],
|
25
|
-
proxy: options['proxy']
|
26
25
|
}
|
27
|
-
client = Twitterscraper::Client.new(cache: options['cache'])
|
26
|
+
client = Twitterscraper::Client.new(cache: options['cache'], proxy: options['proxy'])
|
28
27
|
tweets = client.query_tweets(options['query'], query_options)
|
29
28
|
export(tweets) unless tweets.empty?
|
30
29
|
end
|
@@ -68,8 +67,8 @@ module Twitterscraper
|
|
68
67
|
'threads:',
|
69
68
|
'output:',
|
70
69
|
'format:',
|
71
|
-
'cache',
|
72
|
-
'proxy',
|
70
|
+
'cache:',
|
71
|
+
'proxy:',
|
73
72
|
'pretty',
|
74
73
|
'verbose',
|
75
74
|
)
|
@@ -82,6 +81,9 @@ module Twitterscraper
|
|
82
81
|
options['format'] ||= 'json'
|
83
82
|
options['output'] ||= "tweets.#{options['format']}"
|
84
83
|
|
84
|
+
options['cache'] = options['cache'] != 'false'
|
85
|
+
options['proxy'] = options['proxy'] != 'false'
|
86
|
+
|
85
87
|
options
|
86
88
|
end
|
87
89
|
|
@@ -2,12 +2,17 @@ module Twitterscraper
|
|
2
2
|
class Client
|
3
3
|
include Query
|
4
4
|
|
5
|
-
def initialize(cache:
|
5
|
+
def initialize(cache: true, proxy: true)
|
6
6
|
@cache = cache
|
7
|
+
@proxy = proxy
|
7
8
|
end
|
8
9
|
|
9
10
|
def cache_enabled?
|
10
11
|
@cache
|
11
12
|
end
|
13
|
+
|
14
|
+
def proxy_enabled?
|
15
|
+
@proxy
|
16
|
+
end
|
12
17
|
end
|
13
18
|
end
|
data/lib/twitterscraper/query.rb
CHANGED
@@ -116,7 +116,8 @@ module Twitterscraper
|
|
116
116
|
|
117
117
|
OLDEST_DATE = Date.parse('2006-03-21')
|
118
118
|
|
119
|
-
def validate_options!(
|
119
|
+
def validate_options!(queries, start_date:, end_date:, lang:, limit:, threads:)
|
120
|
+
query = queries[0]
|
120
121
|
if query.nil? || query == ''
|
121
122
|
raise Error.new('Please specify a search query.')
|
122
123
|
end
|
@@ -194,16 +195,26 @@ module Twitterscraper
|
|
194
195
|
@stop_requested
|
195
196
|
end
|
196
197
|
|
197
|
-
def query_tweets(query, start_date: nil, end_date: nil, lang: '', limit: 100, daily_limit: nil, threads: 2
|
198
|
+
def query_tweets(query, start_date: nil, end_date: nil, lang: '', limit: 100, daily_limit: nil, threads: 2)
|
198
199
|
start_date = Date.parse(start_date) if start_date && start_date.is_a?(String)
|
199
200
|
end_date = Date.parse(end_date) if end_date && end_date.is_a?(String)
|
200
201
|
queries = build_queries(query, start_date, end_date)
|
201
|
-
|
202
|
-
|
202
|
+
if threads > queries.size
|
203
|
+
logger.warn 'The maximum number of :threads is the number of dates between :start_date and :end_date.'
|
204
|
+
threads = queries.size
|
205
|
+
end
|
206
|
+
if proxy_enabled?
|
207
|
+
proxies = Proxy::Pool.new
|
208
|
+
logger.debug "Fetch #{proxies.size} proxies"
|
209
|
+
else
|
210
|
+
proxies = []
|
211
|
+
logger.debug 'Proxy disabled'
|
212
|
+
end
|
213
|
+
logger.debug "Cache #{cache_enabled? ? 'enabled' : 'disabled'}"
|
214
|
+
|
203
215
|
|
204
|
-
validate_options!(queries
|
216
|
+
validate_options!(queries, start_date: start_date, end_date: end_date, lang: lang, limit: limit, threads: threads)
|
205
217
|
|
206
|
-
logger.debug "Fetch #{proxies.size} proxies" if proxy
|
207
218
|
logger.info "The number of threads #{threads}"
|
208
219
|
|
209
220
|
headers = {'User-Agent': USER_AGENT_LIST.sample, 'X-Requested-With': 'XMLHttpRequest'}
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitterscraper-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ts-3156
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -48,6 +48,7 @@ extra_rdoc_files: []
|
|
48
48
|
files:
|
49
49
|
- ".gitignore"
|
50
50
|
- ".irbrc"
|
51
|
+
- ".rspec"
|
51
52
|
- ".ruby-version"
|
52
53
|
- ".travis.yml"
|
53
54
|
- CODE_OF_CONDUCT.md
|