twitterscraper-ruby 0.14.0 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cf902c947e866cc99e79fbb9f8a51c829accd44aed03ef7657562bf41932c73d
4
- data.tar.gz: 1bc5a0698a17b244ee9228d7728767dd00218179a5a49e0852a74cc722322ef0
3
+ metadata.gz: a950fb24329aaa1020441e258a8a2144100d732142b6c227bb9b026b8bb73996
4
+ data.tar.gz: 1f64f31e43189e2ee439f5ef6f6d54bc6ea58895adbed67cb8ddbe91af07681a
5
5
  SHA512:
6
- metadata.gz: 629de8698af1391c210b496e9aadb51ad5f9d7157b1be5d0aa669ae821671e2b5624ba51083fb14b61f93618ff3e90aea1ac0eccb6ea00360fac48a2dfc436c7
7
- data.tar.gz: 3f3706bee5f2a92a2addae034201e2e8cee3fef43efdc323be963cbaf1b94c31c53aa49a19e58a068498722dfe07e9796e097fb04364a9afda56d06132e6b935
6
+ metadata.gz: 8573affbc9a5faa05e5e489364bb2ba0da1aa4f12af35445e5de8b1f8c399eb0575cc9f408b2ba96c3d7fd8b2a74b7dd703229053a33c1f8a883856818033cb9
7
+ data.tar.gz: 2b2b3ad0b2dd9d089a7b6127ed1b0db21e7f4fa5f0c31e6b366d9b5ae444e2244d4200c813b7a3257f43702d2caa9f264515e701602c24f4482a746b89d41328
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- twitterscraper-ruby (0.14.0)
4
+ twitterscraper-ruby (0.15.0)
5
5
  nokogiri
6
6
  parallel
7
7
 
data/README.md CHANGED
@@ -33,26 +33,39 @@ $ gem install twitterscraper-ruby
33
33
  Command-line interface:
34
34
 
35
35
  ```shell script
36
- $ twitterscraper --query KEYWORD --start_date 2020-06-01 --end_date 2020-06-30 --lang ja \
37
- --limit 100 --threads 10 --output output.json
36
+ # Returns a collection of relevant tweets matching a specified query.
37
+ $ twitterscraper --type search --query KEYWORD --start_date 2020-06-01 --end_date 2020-06-30 --lang ja \
38
+ --limit 100 --threads 10 --output tweets.json
39
+ ```
40
+
41
+ ```shell script
42
+ # Returns a collection of the most recent tweets posted by the user indicated by the screen_name
43
+ $ twitterscraper --type user --query SCREEN_NAME --limit 100 --output tweets.json
38
44
  ```
39
45
 
40
46
  From Within Ruby:
41
47
 
42
48
  ```ruby
43
49
  require 'twitterscraper'
50
+ client = Twitterscraper::Client.new(cache: true, proxy: true)
51
+ ```
44
52
 
45
- options = {
46
- start_date: '2020-06-01',
47
- end_date: '2020-06-30',
48
- lang: 'ja',
49
- limit: 100,
50
- threads: 10,
51
- }
53
+ ```ruby
54
+ # Returns a collection of relevant tweets matching a specified query.
55
+ tweets = client.search(KEYWORD, start_date: '2020-06-01', end_date: '2020-06-30', lang: 'ja', limit: 100, threads: 10)
56
+ ```
57
+
58
+ ```ruby
59
+ # Returns a collection of the most recent tweets posted by the user indicated by the screen_name
60
+ tweets = client.user_timeline(SCREEN_NAME, limit: 100)
61
+ ```
52
62
 
53
- client = Twitterscraper::Client.new(cache: true, proxy: true)
54
- tweets = client.query_tweets(KEYWORD, options)
55
63
 
64
+ ## Attributes
65
+
66
+ ### Tweet
67
+
68
+ ```ruby
56
69
  tweets.each do |tweet|
57
70
  puts tweet.tweet_id
58
71
  puts tweet.text
@@ -64,11 +77,6 @@ tweets.each do |tweet|
64
77
  end
65
78
  ```
66
79
 
67
-
68
- ## Attributes
69
-
70
- ### Tweet
71
-
72
80
  - screen_name
73
81
  - name
74
82
  - user_id
@@ -136,6 +144,7 @@ $ cat tweets.json | jq . | less
136
144
  | Option | Description | Default |
137
145
  | ------------- | ------------- | ------------- |
138
146
  | `-h`, `--help` | This option displays a summary of twitterscraper. | |
147
+ | `--type` | Specify a search type. | search |
139
148
  | `--query` | Specify a keyword used during the search. | |
140
149
  | `--start_date` | Used as "since:yyyy-mm-dd for your query. This means "since the date". | |
141
150
  | `--end_date` | Used as "until:yyyy-mm-dd for your query. This means "before the date". | |
@@ -16,6 +16,7 @@ module Twitterscraper
16
16
  print_version || return if print_version?
17
17
 
18
18
  query_options = {
19
+ type: options['type'],
19
20
  start_date: options['start_date'],
20
21
  end_date: options['end_date'],
21
22
  lang: options['lang'],
@@ -59,6 +60,7 @@ module Twitterscraper
59
60
  'help',
60
61
  'v',
61
62
  'version',
63
+ 'type:',
62
64
  'query:',
63
65
  'start_date:',
64
66
  'end_date:',
@@ -75,6 +77,7 @@ module Twitterscraper
75
77
  'verbose',
76
78
  )
77
79
 
80
+ options['type'] ||= 'search'
78
81
  options['start_date'] = Query::OLDEST_DATE if options['start_date'] == 'oldest'
79
82
  options['lang'] ||= ''
80
83
  options['limit'] = (options['limit'] || 100).to_i
@@ -22,23 +22,24 @@ module Twitterscraper
22
22
  RELOAD_URL = 'https://twitter.com/i/search/timeline?f=tweets&vertical=' +
23
23
  'default&include_available_features=1&include_entities=1&' +
24
24
  'reset_error_state=false&src=typd&max_position=__POS__&q=__QUERY__&l=__LANG__'
25
- INIT_URL_USER = 'https://twitter.com/{u}'
26
- RELOAD_URL_USER = 'https://twitter.com/i/profiles/show/{u}/timeline/tweets?' +
25
+ INIT_URL_USER = 'https://twitter.com/__USER__'
26
+ RELOAD_URL_USER = 'https://twitter.com/i/profiles/show/__USER__/timeline/tweets?' +
27
27
  'include_available_features=1&include_entities=1&' +
28
- 'max_position={pos}&reset_error_state=false'
29
-
30
- def build_query_url(query, lang, pos, from_user = false)
31
- # if from_user
32
- # if !pos
33
- # INIT_URL_USER.format(u = query)
34
- # else
35
- # RELOAD_URL_USER.format(u = query, pos = pos)
36
- # end
37
- # end
38
- if pos
39
- RELOAD_URL.sub('__QUERY__', query).sub('__LANG__', lang.to_s).sub('__POS__', pos)
28
+ 'max_position=__POS__&reset_error_state=false'
29
+
30
+ def build_query_url(query, lang, from_user, pos)
31
+ if from_user
32
+ if pos
33
+ RELOAD_URL_USER.sub('__USER__', query).sub('__POS__', pos.to_s)
34
+ else
35
+ INIT_URL_USER.sub('__USER__', query)
36
+ end
40
37
  else
41
- INIT_URL.sub('__QUERY__', query).sub('__LANG__', lang.to_s)
38
+ if pos
39
+ RELOAD_URL.sub('__QUERY__', query).sub('__LANG__', lang.to_s).sub('__POS__', pos)
40
+ else
41
+ INIT_URL.sub('__QUERY__', query).sub('__LANG__', lang.to_s)
42
+ end
42
43
  end
43
44
  end
44
45
 
@@ -74,11 +75,11 @@ module Twitterscraper
74
75
  [items_html, json_resp]
75
76
  end
76
77
 
77
- def query_single_page(query, lang, pos, from_user = false, headers: [], proxies: [])
78
+ def query_single_page(query, lang, type, pos, headers: [], proxies: [])
78
79
  logger.info "Querying #{query}"
79
80
  query = ERB::Util.url_encode(query)
80
81
 
81
- url = build_query_url(query, lang, pos, from_user)
82
+ url = build_query_url(query, lang, type == 'user', pos)
82
83
  http_request = lambda do
83
84
  logger.debug "Scraping tweets from #{url}"
84
85
  get_single_page(url, headers, proxies)
@@ -107,8 +108,8 @@ module Twitterscraper
107
108
 
108
109
  if json_resp
109
110
  [tweets, json_resp['min_position']]
110
- elsif from_user
111
- raise NotImplementedError
111
+ elsif type
112
+ [tweets, tweets[-1].tweet_id]
112
113
  else
113
114
  [tweets, "TWEET-#{tweets[-1].tweet_id}-#{tweets[0].tweet_id}"]
114
115
  end
@@ -116,7 +117,7 @@ module Twitterscraper
116
117
 
117
118
  OLDEST_DATE = Date.parse('2006-03-21')
118
119
 
119
- def validate_options!(queries, start_date:, end_date:, lang:, limit:, threads:)
120
+ def validate_options!(queries, type:, start_date:, end_date:, lang:, limit:, threads:)
120
121
  query = queries[0]
121
122
  if query.nil? || query == ''
122
123
  raise Error.new('Please specify a search query.')
@@ -161,12 +162,12 @@ module Twitterscraper
161
162
  end
162
163
  end
163
164
 
164
- def main_loop(query, lang, limit, daily_limit, headers, proxies)
165
+ def main_loop(query, lang, type, limit, daily_limit, headers, proxies)
165
166
  pos = nil
166
167
  daily_tweets = []
167
168
 
168
169
  while true
169
- new_tweets, new_pos = query_single_page(query, lang, pos, headers: headers, proxies: proxies)
170
+ new_tweets, new_pos = query_single_page(query, lang, type, pos, headers: headers, proxies: proxies)
170
171
  unless new_tweets.empty?
171
172
  daily_tweets.concat(new_tweets)
172
173
  daily_tweets.uniq! { |t| t.tweet_id }
@@ -195,7 +196,7 @@ module Twitterscraper
195
196
  @stop_requested
196
197
  end
197
198
 
198
- def query_tweets(query, start_date: nil, end_date: nil, lang: '', limit: 100, daily_limit: nil, order: 'desc', threads: 2)
199
+ def query_tweets(query, type: 'search', start_date: nil, end_date: nil, lang: nil, limit: 100, daily_limit: nil, order: 'desc', threads: 2)
199
200
  start_date = Date.parse(start_date) if start_date && start_date.is_a?(String)
200
201
  end_date = Date.parse(end_date) if end_date && end_date.is_a?(String)
201
202
  queries = build_queries(query, start_date, end_date)
@@ -213,7 +214,7 @@ module Twitterscraper
213
214
  logger.debug "Cache #{cache_enabled? ? 'enabled' : 'disabled'}"
214
215
 
215
216
 
216
- validate_options!(queries, start_date: start_date, end_date: end_date, lang: lang, limit: limit, threads: threads)
217
+ validate_options!(queries, type: type, start_date: start_date, end_date: end_date, lang: lang, limit: limit, threads: threads)
217
218
 
218
219
  logger.info "The number of threads #{threads}"
219
220
 
@@ -229,17 +230,25 @@ module Twitterscraper
229
230
  logger.debug "Set 'Thread.abort_on_exception' to true"
230
231
 
231
232
  Parallel.each(queries, in_threads: threads) do |query|
232
- main_loop(query, lang, limit, daily_limit, headers, proxies)
233
+ main_loop(query, lang, type, limit, daily_limit, headers, proxies)
233
234
  raise Parallel::Break if stop_requested?
234
235
  end
235
236
  else
236
237
  queries.each do |query|
237
- main_loop(query, lang, limit, daily_limit, headers, proxies)
238
+ main_loop(query, lang, type, limit, daily_limit, headers, proxies)
238
239
  break if stop_requested?
239
240
  end
240
241
  end
241
242
 
242
243
  @all_tweets.sort_by { |tweet| (order == 'desc' ? -1 : 1) * tweet.created_at.to_i }
243
244
  end
245
+
246
+ def search(query, start_date: nil, end_date: nil, lang: '', limit: 100, daily_limit: nil, order: 'desc', threads: 2)
247
+ query_tweets(query, type: 'search', start_date: start_date, end_date: end_date, lang: lang, limit: limit, daily_limit: daily_limit, order: order, threads: threads)
248
+ end
249
+
250
+ def user_timeline(screen_name, limit: 100, order: 'desc')
251
+ query_tweets(screen_name, type: 'user', start_date: nil, end_date: nil, lang: nil, limit: limit, daily_limit: nil, order: order, threads: 1)
252
+ end
244
253
  end
245
254
  end
@@ -1,3 +1,3 @@
1
1
  module Twitterscraper
2
- VERSION = '0.14.0'
2
+ VERSION = '0.15.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitterscraper-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ts-3156
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-16 00:00:00.000000000 Z
11
+ date: 2020-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri