twitterscraper-ruby 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cf902c947e866cc99e79fbb9f8a51c829accd44aed03ef7657562bf41932c73d
4
- data.tar.gz: 1bc5a0698a17b244ee9228d7728767dd00218179a5a49e0852a74cc722322ef0
3
+ metadata.gz: a950fb24329aaa1020441e258a8a2144100d732142b6c227bb9b026b8bb73996
4
+ data.tar.gz: 1f64f31e43189e2ee439f5ef6f6d54bc6ea58895adbed67cb8ddbe91af07681a
5
5
  SHA512:
6
- metadata.gz: 629de8698af1391c210b496e9aadb51ad5f9d7157b1be5d0aa669ae821671e2b5624ba51083fb14b61f93618ff3e90aea1ac0eccb6ea00360fac48a2dfc436c7
7
- data.tar.gz: 3f3706bee5f2a92a2addae034201e2e8cee3fef43efdc323be963cbaf1b94c31c53aa49a19e58a068498722dfe07e9796e097fb04364a9afda56d06132e6b935
6
+ metadata.gz: 8573affbc9a5faa05e5e489364bb2ba0da1aa4f12af35445e5de8b1f8c399eb0575cc9f408b2ba96c3d7fd8b2a74b7dd703229053a33c1f8a883856818033cb9
7
+ data.tar.gz: 2b2b3ad0b2dd9d089a7b6127ed1b0db21e7f4fa5f0c31e6b366d9b5ae444e2244d4200c813b7a3257f43702d2caa9f264515e701602c24f4482a746b89d41328
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- twitterscraper-ruby (0.14.0)
4
+ twitterscraper-ruby (0.15.0)
5
5
  nokogiri
6
6
  parallel
7
7
 
data/README.md CHANGED
@@ -33,26 +33,39 @@ $ gem install twitterscraper-ruby
33
33
  Command-line interface:
34
34
 
35
35
  ```shell script
36
- $ twitterscraper --query KEYWORD --start_date 2020-06-01 --end_date 2020-06-30 --lang ja \
37
- --limit 100 --threads 10 --output output.json
36
+ # Returns a collection of relevant tweets matching a specified query.
37
+ $ twitterscraper --type search --query KEYWORD --start_date 2020-06-01 --end_date 2020-06-30 --lang ja \
38
+ --limit 100 --threads 10 --output tweets.json
39
+ ```
40
+
41
+ ```shell script
42
+ # Returns a collection of the most recent tweets posted by the user indicated by the screen_name
43
+ $ twitterscraper --type user --query SCREEN_NAME --limit 100 --output tweets.json
38
44
  ```
39
45
 
40
46
  From Within Ruby:
41
47
 
42
48
  ```ruby
43
49
  require 'twitterscraper'
50
+ client = Twitterscraper::Client.new(cache: true, proxy: true)
51
+ ```
44
52
 
45
- options = {
46
- start_date: '2020-06-01',
47
- end_date: '2020-06-30',
48
- lang: 'ja',
49
- limit: 100,
50
- threads: 10,
51
- }
53
+ ```ruby
54
+ # Returns a collection of relevant tweets matching a specified query.
55
+ tweets = client.search(KEYWORD, start_date: '2020-06-01', end_date: '2020-06-30', lang: 'ja', limit: 100, threads: 10)
56
+ ```
57
+
58
+ ```ruby
59
+ # Returns a collection of the most recent tweets posted by the user indicated by the screen_name
60
+ tweets = client.user_timeline(SCREEN_NAME, limit: 100)
61
+ ```
52
62
 
53
- client = Twitterscraper::Client.new(cache: true, proxy: true)
54
- tweets = client.query_tweets(KEYWORD, options)
55
63
 
64
+ ## Attributes
65
+
66
+ ### Tweet
67
+
68
+ ```ruby
56
69
  tweets.each do |tweet|
57
70
  puts tweet.tweet_id
58
71
  puts tweet.text
@@ -64,11 +77,6 @@ tweets.each do |tweet|
64
77
  end
65
78
  ```
66
79
 
67
-
68
- ## Attributes
69
-
70
- ### Tweet
71
-
72
80
  - screen_name
73
81
  - name
74
82
  - user_id
@@ -136,6 +144,7 @@ $ cat tweets.json | jq . | less
136
144
  | Option | Description | Default |
137
145
  | ------------- | ------------- | ------------- |
138
146
  | `-h`, `--help` | This option displays a summary of twitterscraper. | |
147
+ | `--type` | Specify a search type. | search |
139
148
  | `--query` | Specify a keyword used during the search. | |
140
149
  | `--start_date` | Used as "since:yyyy-mm-dd for your query. This means "since the date". | |
141
150
  | `--end_date` | Used as "until:yyyy-mm-dd for your query. This means "before the date". | |
@@ -16,6 +16,7 @@ module Twitterscraper
16
16
  print_version || return if print_version?
17
17
 
18
18
  query_options = {
19
+ type: options['type'],
19
20
  start_date: options['start_date'],
20
21
  end_date: options['end_date'],
21
22
  lang: options['lang'],
@@ -59,6 +60,7 @@ module Twitterscraper
59
60
  'help',
60
61
  'v',
61
62
  'version',
63
+ 'type:',
62
64
  'query:',
63
65
  'start_date:',
64
66
  'end_date:',
@@ -75,6 +77,7 @@ module Twitterscraper
75
77
  'verbose',
76
78
  )
77
79
 
80
+ options['type'] ||= 'search'
78
81
  options['start_date'] = Query::OLDEST_DATE if options['start_date'] == 'oldest'
79
82
  options['lang'] ||= ''
80
83
  options['limit'] = (options['limit'] || 100).to_i
@@ -22,23 +22,24 @@ module Twitterscraper
22
22
  RELOAD_URL = 'https://twitter.com/i/search/timeline?f=tweets&vertical=' +
23
23
  'default&include_available_features=1&include_entities=1&' +
24
24
  'reset_error_state=false&src=typd&max_position=__POS__&q=__QUERY__&l=__LANG__'
25
- INIT_URL_USER = 'https://twitter.com/{u}'
26
- RELOAD_URL_USER = 'https://twitter.com/i/profiles/show/{u}/timeline/tweets?' +
25
+ INIT_URL_USER = 'https://twitter.com/__USER__'
26
+ RELOAD_URL_USER = 'https://twitter.com/i/profiles/show/__USER__/timeline/tweets?' +
27
27
  'include_available_features=1&include_entities=1&' +
28
- 'max_position={pos}&reset_error_state=false'
29
-
30
- def build_query_url(query, lang, pos, from_user = false)
31
- # if from_user
32
- # if !pos
33
- # INIT_URL_USER.format(u = query)
34
- # else
35
- # RELOAD_URL_USER.format(u = query, pos = pos)
36
- # end
37
- # end
38
- if pos
39
- RELOAD_URL.sub('__QUERY__', query).sub('__LANG__', lang.to_s).sub('__POS__', pos)
28
+ 'max_position=__POS__&reset_error_state=false'
29
+
30
+ def build_query_url(query, lang, from_user, pos)
31
+ if from_user
32
+ if pos
33
+ RELOAD_URL_USER.sub('__USER__', query).sub('__POS__', pos.to_s)
34
+ else
35
+ INIT_URL_USER.sub('__USER__', query)
36
+ end
40
37
  else
41
- INIT_URL.sub('__QUERY__', query).sub('__LANG__', lang.to_s)
38
+ if pos
39
+ RELOAD_URL.sub('__QUERY__', query).sub('__LANG__', lang.to_s).sub('__POS__', pos)
40
+ else
41
+ INIT_URL.sub('__QUERY__', query).sub('__LANG__', lang.to_s)
42
+ end
42
43
  end
43
44
  end
44
45
 
@@ -74,11 +75,11 @@ module Twitterscraper
74
75
  [items_html, json_resp]
75
76
  end
76
77
 
77
- def query_single_page(query, lang, pos, from_user = false, headers: [], proxies: [])
78
+ def query_single_page(query, lang, type, pos, headers: [], proxies: [])
78
79
  logger.info "Querying #{query}"
79
80
  query = ERB::Util.url_encode(query)
80
81
 
81
- url = build_query_url(query, lang, pos, from_user)
82
+ url = build_query_url(query, lang, type == 'user', pos)
82
83
  http_request = lambda do
83
84
  logger.debug "Scraping tweets from #{url}"
84
85
  get_single_page(url, headers, proxies)
@@ -107,8 +108,8 @@ module Twitterscraper
107
108
 
108
109
  if json_resp
109
110
  [tweets, json_resp['min_position']]
110
- elsif from_user
111
- raise NotImplementedError
111
+ elsif type
112
+ [tweets, tweets[-1].tweet_id]
112
113
  else
113
114
  [tweets, "TWEET-#{tweets[-1].tweet_id}-#{tweets[0].tweet_id}"]
114
115
  end
@@ -116,7 +117,7 @@ module Twitterscraper
116
117
 
117
118
  OLDEST_DATE = Date.parse('2006-03-21')
118
119
 
119
- def validate_options!(queries, start_date:, end_date:, lang:, limit:, threads:)
120
+ def validate_options!(queries, type:, start_date:, end_date:, lang:, limit:, threads:)
120
121
  query = queries[0]
121
122
  if query.nil? || query == ''
122
123
  raise Error.new('Please specify a search query.')
@@ -161,12 +162,12 @@ module Twitterscraper
161
162
  end
162
163
  end
163
164
 
164
- def main_loop(query, lang, limit, daily_limit, headers, proxies)
165
+ def main_loop(query, lang, type, limit, daily_limit, headers, proxies)
165
166
  pos = nil
166
167
  daily_tweets = []
167
168
 
168
169
  while true
169
- new_tweets, new_pos = query_single_page(query, lang, pos, headers: headers, proxies: proxies)
170
+ new_tweets, new_pos = query_single_page(query, lang, type, pos, headers: headers, proxies: proxies)
170
171
  unless new_tweets.empty?
171
172
  daily_tweets.concat(new_tweets)
172
173
  daily_tweets.uniq! { |t| t.tweet_id }
@@ -195,7 +196,7 @@ module Twitterscraper
195
196
  @stop_requested
196
197
  end
197
198
 
198
- def query_tweets(query, start_date: nil, end_date: nil, lang: '', limit: 100, daily_limit: nil, order: 'desc', threads: 2)
199
+ def query_tweets(query, type: 'search', start_date: nil, end_date: nil, lang: nil, limit: 100, daily_limit: nil, order: 'desc', threads: 2)
199
200
  start_date = Date.parse(start_date) if start_date && start_date.is_a?(String)
200
201
  end_date = Date.parse(end_date) if end_date && end_date.is_a?(String)
201
202
  queries = build_queries(query, start_date, end_date)
@@ -213,7 +214,7 @@ module Twitterscraper
213
214
  logger.debug "Cache #{cache_enabled? ? 'enabled' : 'disabled'}"
214
215
 
215
216
 
216
- validate_options!(queries, start_date: start_date, end_date: end_date, lang: lang, limit: limit, threads: threads)
217
+ validate_options!(queries, type: type, start_date: start_date, end_date: end_date, lang: lang, limit: limit, threads: threads)
217
218
 
218
219
  logger.info "The number of threads #{threads}"
219
220
 
@@ -229,17 +230,25 @@ module Twitterscraper
229
230
  logger.debug "Set 'Thread.abort_on_exception' to true"
230
231
 
231
232
  Parallel.each(queries, in_threads: threads) do |query|
232
- main_loop(query, lang, limit, daily_limit, headers, proxies)
233
+ main_loop(query, lang, type, limit, daily_limit, headers, proxies)
233
234
  raise Parallel::Break if stop_requested?
234
235
  end
235
236
  else
236
237
  queries.each do |query|
237
- main_loop(query, lang, limit, daily_limit, headers, proxies)
238
+ main_loop(query, lang, type, limit, daily_limit, headers, proxies)
238
239
  break if stop_requested?
239
240
  end
240
241
  end
241
242
 
242
243
  @all_tweets.sort_by { |tweet| (order == 'desc' ? -1 : 1) * tweet.created_at.to_i }
243
244
  end
245
+
246
+ def search(query, start_date: nil, end_date: nil, lang: '', limit: 100, daily_limit: nil, order: 'desc', threads: 2)
247
+ query_tweets(query, type: 'search', start_date: start_date, end_date: end_date, lang: lang, limit: limit, daily_limit: daily_limit, order: order, threads: threads)
248
+ end
249
+
250
+ def user_timeline(screen_name, limit: 100, order: 'desc')
251
+ query_tweets(screen_name, type: 'user', start_date: nil, end_date: nil, lang: nil, limit: limit, daily_limit: nil, order: order, threads: 1)
252
+ end
244
253
  end
245
254
  end
@@ -1,3 +1,3 @@
1
1
  module Twitterscraper
2
- VERSION = '0.14.0'
2
+ VERSION = '0.15.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitterscraper-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ts-3156
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-16 00:00:00.000000000 Z
11
+ date: 2020-07-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri