twitterscraper-ruby 0.12.0 → 0.15.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +31 -0
- data/.rspec +2 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +16 -1
- data/README.md +87 -56
- data/lib/twitterscraper.rb +1 -0
- data/lib/twitterscraper/cli.rb +12 -4
- data/lib/twitterscraper/client.rb +6 -1
- data/lib/twitterscraper/query.rb +52 -31
- data/lib/twitterscraper/type.rb +15 -0
- data/lib/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7f7d320841125d9a582ece6083f421f0abf301addbc5c5c2a3d2b2c09bedbc33
|
4
|
+
data.tar.gz: 6ea43165ffa4f37c4319566689a42f2f275d8a70402b0d6b4164df519fee90b5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee3756538ec28e9f0113e611e2731ec33107dabacf7cb730b257d6c94351407ef171a9bc91402a589fa73fdb6b705f73b11582766af1d04a3413b8bc79dc6619
|
7
|
+
data.tar.gz: 78200dc658a9c1cf43ed7367e499b0d1b243728aecb2ffd7366b5612f8905bb33d27ab7e1412327d05b7fff159196fe9e24d18c8cc4c24898af10533fbdf43df
|
@@ -0,0 +1,31 @@
|
|
1
|
+
version: 2.1
|
2
|
+
orbs:
|
3
|
+
ruby: circleci/ruby@0.1.2
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
docker:
|
8
|
+
- image: circleci/ruby:2.6.4-stretch-node
|
9
|
+
environment:
|
10
|
+
BUNDLER_VERSION: 2.1.4
|
11
|
+
executor: ruby/default
|
12
|
+
steps:
|
13
|
+
- checkout
|
14
|
+
- run:
|
15
|
+
name: Update bundler
|
16
|
+
command: gem update bundler
|
17
|
+
- run:
|
18
|
+
name: Which bundler?
|
19
|
+
command: bundle -v
|
20
|
+
- restore_cache:
|
21
|
+
keys:
|
22
|
+
- gem-cache-v1-{{ arch }}-{{ .Branch }}-{{ checksum "Gemfile.lock" }}
|
23
|
+
- gem-cache-v1-{{ arch }}-{{ .Branch }}
|
24
|
+
- gem-cache-v1
|
25
|
+
- run: bundle install --path vendor/bundle
|
26
|
+
- run: bundle clean
|
27
|
+
- save_cache:
|
28
|
+
key: gem-cache-v1-{{ arch }}-{{ .Branch }}-{{ checksum "Gemfile.lock" }}
|
29
|
+
paths:
|
30
|
+
- vendor/bundle
|
31
|
+
- run: bundle exec rspec
|
data/.rspec
ADDED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,19 +1,33 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
twitterscraper-ruby (0.
|
4
|
+
twitterscraper-ruby (0.15.2)
|
5
5
|
nokogiri
|
6
6
|
parallel
|
7
7
|
|
8
8
|
GEM
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
|
+
diff-lcs (1.4.4)
|
11
12
|
mini_portile2 (2.4.0)
|
12
13
|
minitest (5.14.1)
|
13
14
|
nokogiri (1.10.10)
|
14
15
|
mini_portile2 (~> 2.4.0)
|
15
16
|
parallel (1.19.2)
|
16
17
|
rake (12.3.3)
|
18
|
+
rspec (3.9.0)
|
19
|
+
rspec-core (~> 3.9.0)
|
20
|
+
rspec-expectations (~> 3.9.0)
|
21
|
+
rspec-mocks (~> 3.9.0)
|
22
|
+
rspec-core (3.9.2)
|
23
|
+
rspec-support (~> 3.9.3)
|
24
|
+
rspec-expectations (3.9.2)
|
25
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
26
|
+
rspec-support (~> 3.9.0)
|
27
|
+
rspec-mocks (3.9.1)
|
28
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
29
|
+
rspec-support (~> 3.9.0)
|
30
|
+
rspec-support (3.9.3)
|
17
31
|
|
18
32
|
PLATFORMS
|
19
33
|
ruby
|
@@ -21,6 +35,7 @@ PLATFORMS
|
|
21
35
|
DEPENDENCIES
|
22
36
|
minitest (~> 5.0)
|
23
37
|
rake (~> 12.0)
|
38
|
+
rspec
|
24
39
|
twitterscraper-ruby!
|
25
40
|
|
26
41
|
BUNDLED WITH
|
data/README.md
CHANGED
@@ -1,18 +1,21 @@
|
|
1
1
|
# twitterscraper-ruby
|
2
2
|
|
3
|
+
[](https://circleci.com/gh/ts-3156/twitterscraper-ruby)
|
3
4
|
[](https://badge.fury.io/rb/twitterscraper-ruby)
|
4
5
|
|
5
6
|
A gem to scrape https://twitter.com/search. This gem is inspired by [taspinar/twitterscraper](https://github.com/taspinar/twitterscraper).
|
6
7
|
|
8
|
+
Please feel free to ask [@ts_3156](https://twitter.com/ts_3156) if you have any questions.
|
9
|
+
|
7
10
|
|
8
11
|
## Twitter Search API vs. twitterscraper-ruby
|
9
12
|
|
10
|
-
|
13
|
+
#### Twitter Search API
|
11
14
|
|
12
15
|
- The number of tweets: 180 - 450 requests/15 minutes (18,000 - 45,000 tweets/15 minutes)
|
13
16
|
- The time window: the past 7 days
|
14
17
|
|
15
|
-
|
18
|
+
#### twitterscraper-ruby
|
16
19
|
|
17
20
|
- The number of tweets: Unlimited
|
18
21
|
- The time window: from 2006-3-21 to today
|
@@ -29,45 +32,92 @@ $ gem install twitterscraper-ruby
|
|
29
32
|
|
30
33
|
## Usage
|
31
34
|
|
32
|
-
Command-line interface:
|
35
|
+
#### Command-line interface:
|
36
|
+
|
37
|
+
Returns a collection of relevant tweets matching a specified query.
|
33
38
|
|
34
39
|
```shell script
|
35
|
-
$ twitterscraper --query KEYWORD --start_date 2020-06-01 --end_date 2020-06-30 --lang ja \
|
36
|
-
--limit 100 --threads 10 --
|
40
|
+
$ twitterscraper --type search --query KEYWORD --start_date 2020-06-01 --end_date 2020-06-30 --lang ja \
|
41
|
+
--limit 100 --threads 10 --output tweets.json
|
37
42
|
```
|
38
43
|
|
39
|
-
|
44
|
+
Returns a collection of the most recent tweets posted by the user indicated by the screen_name
|
45
|
+
|
46
|
+
```shell script
|
47
|
+
$ twitterscraper --type user --query SCREEN_NAME --limit 100 --output tweets.json
|
48
|
+
```
|
49
|
+
|
50
|
+
#### From Within Ruby:
|
40
51
|
|
41
52
|
```ruby
|
42
53
|
require 'twitterscraper'
|
54
|
+
client = Twitterscraper::Client.new(cache: true, proxy: true)
|
55
|
+
```
|
43
56
|
|
44
|
-
|
45
|
-
start_date: '2020-06-01',
|
46
|
-
end_date: '2020-06-30',
|
47
|
-
lang: 'ja',
|
48
|
-
limit: 100,
|
49
|
-
threads: 10,
|
50
|
-
proxy: true
|
51
|
-
}
|
57
|
+
Returns a collection of relevant tweets matching a specified query.
|
52
58
|
|
53
|
-
|
54
|
-
tweets = client.
|
59
|
+
```ruby
|
60
|
+
tweets = client.search(KEYWORD, start_date: '2020-06-01', end_date: '2020-06-30', lang: 'ja', limit: 100, threads: 10)
|
61
|
+
```
|
62
|
+
|
63
|
+
Returns a collection of the most recent tweets posted by the user indicated by the screen_name
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
tweets = client.user_timeline(SCREEN_NAME, limit: 100)
|
67
|
+
```
|
55
68
|
|
69
|
+
|
70
|
+
## Examples
|
71
|
+
|
72
|
+
```shell script
|
73
|
+
$ twitterscraper --query twitter --limit 1000
|
74
|
+
$ cat tweets.json | jq . | less
|
75
|
+
```
|
76
|
+
|
77
|
+
|
78
|
+
## Attributes
|
79
|
+
|
80
|
+
### Tweet
|
81
|
+
|
82
|
+
```ruby
|
56
83
|
tweets.each do |tweet|
|
57
84
|
puts tweet.tweet_id
|
58
85
|
puts tweet.text
|
59
86
|
puts tweet.tweet_url
|
60
87
|
puts tweet.created_at
|
61
88
|
|
89
|
+
attr_names = hash.keys
|
62
90
|
hash = tweet.attrs
|
63
|
-
|
91
|
+
json = tweet.to_json
|
64
92
|
end
|
65
93
|
```
|
66
94
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
95
|
+
```json
|
96
|
+
[
|
97
|
+
{
|
98
|
+
"screen_name": "@name",
|
99
|
+
"name": "Name",
|
100
|
+
"user_id": 12340000,
|
101
|
+
"tweet_id": 1234000000000000,
|
102
|
+
"text": "Thanks Twitter!",
|
103
|
+
"links": [],
|
104
|
+
"hashtags": [],
|
105
|
+
"image_urls": [],
|
106
|
+
"video_url": null,
|
107
|
+
"has_media": null,
|
108
|
+
"likes": 10,
|
109
|
+
"retweets": 20,
|
110
|
+
"replies": 0,
|
111
|
+
"is_replied": false,
|
112
|
+
"is_reply_to": false,
|
113
|
+
"parent_tweet_id": null,
|
114
|
+
"reply_to_users": [],
|
115
|
+
"tweet_url": "https://twitter.com/name/status/1234000000000000",
|
116
|
+
"timestamp": 1594793000,
|
117
|
+
"created_at": "2020-07-15 00:00:00 +0000"
|
118
|
+
}
|
119
|
+
]
|
120
|
+
```
|
71
121
|
|
72
122
|
- screen_name
|
73
123
|
- name
|
@@ -110,43 +160,24 @@ end
|
|
110
160
|
Search operators documentation is in [Standard search operators](https://developer.twitter.com/en/docs/tweets/rules-and-filtering/overview/standard-operators).
|
111
161
|
|
112
162
|
|
113
|
-
## Examples
|
114
|
-
|
115
|
-
```shell script
|
116
|
-
$ twitterscraper --query twitter --limit 1000
|
117
|
-
$ cat tweets.json | jq . | less
|
118
|
-
```
|
119
|
-
|
120
|
-
```json
|
121
|
-
[
|
122
|
-
{
|
123
|
-
"screen_name": "@screenname",
|
124
|
-
"name": "name",
|
125
|
-
"user_id": 1194529546483000000,
|
126
|
-
"tweet_id": 1282659891992000000,
|
127
|
-
"tweet_url": "https://twitter.com/screenname/status/1282659891992000000",
|
128
|
-
"created_at": "2020-07-13 12:00:00 +0000",
|
129
|
-
"text": "Thanks Twitter!"
|
130
|
-
}
|
131
|
-
]
|
132
|
-
```
|
133
|
-
|
134
163
|
## CLI Options
|
135
164
|
|
136
|
-
| Option | Description |
|
137
|
-
| ------------- | ------------- | ------------- |
|
138
|
-
|
|
139
|
-
| `--
|
140
|
-
| `--
|
141
|
-
| `--
|
142
|
-
| `--
|
143
|
-
| `--
|
144
|
-
| `--
|
145
|
-
| `--
|
146
|
-
| `--
|
147
|
-
| `--
|
148
|
-
| `--
|
149
|
-
| `--
|
165
|
+
| Option | Type | Description | Value |
|
166
|
+
| ------------- | ------------- | ------------- | ------------- |
|
167
|
+
| `--help` | string | This option displays a summary of twitterscraper. | |
|
168
|
+
| `--type` | string | Specify a search type. | search(default) or user |
|
169
|
+
| `--query` | string | Specify a keyword used during the search. | |
|
170
|
+
| `--start_date` | string | Used as "since:yyyy-mm-dd for your query. This means "since the date". | |
|
171
|
+
| `--end_date` | string | Used as "until:yyyy-mm-dd for your query. This means "before the date". | |
|
172
|
+
| `--lang` | string | Retrieve tweets written in a specific language. | |
|
173
|
+
| `--limit` | integer | Stop scraping when *at least* the number of tweets indicated with --limit is scraped. | 100 |
|
174
|
+
| `--order` | string | Sort a order of the results. | desc(default) or asc |
|
175
|
+
| `--threads` | integer | Set the number of threads twitterscraper-ruby should initiate while scraping for your query. | 2 |
|
176
|
+
| `--proxy` | boolean | Scrape https://twitter.com/search via proxies. | true(default) or false |
|
177
|
+
| `--cache` | boolean | Enable caching. | true(default) or false |
|
178
|
+
| `--format` | string | The format of the output. | json(default) or html |
|
179
|
+
| `--output` | string | The name of the output file. | tweets.json |
|
180
|
+
| `--verbose` | | Print debug messages. | |
|
150
181
|
|
151
182
|
|
152
183
|
## Contributing
|
data/lib/twitterscraper.rb
CHANGED
data/lib/twitterscraper/cli.rb
CHANGED
@@ -16,15 +16,16 @@ module Twitterscraper
|
|
16
16
|
print_version || return if print_version?
|
17
17
|
|
18
18
|
query_options = {
|
19
|
+
type: options['type'],
|
19
20
|
start_date: options['start_date'],
|
20
21
|
end_date: options['end_date'],
|
21
22
|
lang: options['lang'],
|
22
23
|
limit: options['limit'],
|
23
24
|
daily_limit: options['daily_limit'],
|
25
|
+
order: options['order'],
|
24
26
|
threads: options['threads'],
|
25
|
-
proxy: options['proxy']
|
26
27
|
}
|
27
|
-
client = Twitterscraper::Client.new(cache: options['cache'])
|
28
|
+
client = Twitterscraper::Client.new(cache: options['cache'], proxy: options['proxy'])
|
28
29
|
tweets = client.query_tweets(options['query'], query_options)
|
29
30
|
export(tweets) unless tweets.empty?
|
30
31
|
end
|
@@ -59,29 +60,36 @@ module Twitterscraper
|
|
59
60
|
'help',
|
60
61
|
'v',
|
61
62
|
'version',
|
63
|
+
'type:',
|
62
64
|
'query:',
|
63
65
|
'start_date:',
|
64
66
|
'end_date:',
|
65
67
|
'lang:',
|
66
68
|
'limit:',
|
67
69
|
'daily_limit:',
|
70
|
+
'order:',
|
68
71
|
'threads:',
|
69
72
|
'output:',
|
70
73
|
'format:',
|
71
|
-
'cache',
|
72
|
-
'proxy',
|
74
|
+
'cache:',
|
75
|
+
'proxy:',
|
73
76
|
'pretty',
|
74
77
|
'verbose',
|
75
78
|
)
|
76
79
|
|
80
|
+
options['type'] ||= 'search'
|
77
81
|
options['start_date'] = Query::OLDEST_DATE if options['start_date'] == 'oldest'
|
78
82
|
options['lang'] ||= ''
|
79
83
|
options['limit'] = (options['limit'] || 100).to_i
|
80
84
|
options['daily_limit'] = options['daily_limit'].to_i if options['daily_limit']
|
81
85
|
options['threads'] = (options['threads'] || 2).to_i
|
82
86
|
options['format'] ||= 'json'
|
87
|
+
options['order'] ||= 'desc'
|
83
88
|
options['output'] ||= "tweets.#{options['format']}"
|
84
89
|
|
90
|
+
options['cache'] = options['cache'] != 'false'
|
91
|
+
options['proxy'] = options['proxy'] != 'false'
|
92
|
+
|
85
93
|
options
|
86
94
|
end
|
87
95
|
|
@@ -2,12 +2,17 @@ module Twitterscraper
|
|
2
2
|
class Client
|
3
3
|
include Query
|
4
4
|
|
5
|
-
def initialize(cache:
|
5
|
+
def initialize(cache: true, proxy: true)
|
6
6
|
@cache = cache
|
7
|
+
@proxy = proxy
|
7
8
|
end
|
8
9
|
|
9
10
|
def cache_enabled?
|
10
11
|
@cache
|
11
12
|
end
|
13
|
+
|
14
|
+
def proxy_enabled?
|
15
|
+
@proxy
|
16
|
+
end
|
12
17
|
end
|
13
18
|
end
|
data/lib/twitterscraper/query.rb
CHANGED
@@ -22,23 +22,24 @@ module Twitterscraper
|
|
22
22
|
RELOAD_URL = 'https://twitter.com/i/search/timeline?f=tweets&vertical=' +
|
23
23
|
'default&include_available_features=1&include_entities=1&' +
|
24
24
|
'reset_error_state=false&src=typd&max_position=__POS__&q=__QUERY__&l=__LANG__'
|
25
|
-
INIT_URL_USER = 'https://twitter.com/
|
26
|
-
RELOAD_URL_USER = 'https://twitter.com/i/profiles/show/
|
25
|
+
INIT_URL_USER = 'https://twitter.com/__USER__'
|
26
|
+
RELOAD_URL_USER = 'https://twitter.com/i/profiles/show/__USER__/timeline/tweets?' +
|
27
27
|
'include_available_features=1&include_entities=1&' +
|
28
|
-
'max_position=
|
29
|
-
|
30
|
-
def build_query_url(query, lang,
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
# end
|
38
|
-
if pos
|
39
|
-
RELOAD_URL.sub('__QUERY__', query).sub('__LANG__', lang.to_s).sub('__POS__', pos)
|
28
|
+
'max_position=__POS__&reset_error_state=false'
|
29
|
+
|
30
|
+
def build_query_url(query, lang, type, pos)
|
31
|
+
if type.user?
|
32
|
+
if pos
|
33
|
+
RELOAD_URL_USER.sub('__USER__', query).sub('__POS__', pos.to_s)
|
34
|
+
else
|
35
|
+
INIT_URL_USER.sub('__USER__', query)
|
36
|
+
end
|
40
37
|
else
|
41
|
-
|
38
|
+
if pos
|
39
|
+
RELOAD_URL.sub('__QUERY__', query).sub('__LANG__', lang.to_s).sub('__POS__', pos)
|
40
|
+
else
|
41
|
+
INIT_URL.sub('__QUERY__', query).sub('__LANG__', lang.to_s)
|
42
|
+
end
|
42
43
|
end
|
43
44
|
end
|
44
45
|
|
@@ -50,7 +51,7 @@ module Twitterscraper
|
|
50
51
|
end
|
51
52
|
Http.get(url, headers, proxy, timeout)
|
52
53
|
rescue => e
|
53
|
-
logger.debug "
|
54
|
+
logger.debug "get_single_page: #{e.inspect}"
|
54
55
|
if (retries -= 1) > 0
|
55
56
|
logger.info "Retrying... (Attempts left: #{retries - 1})"
|
56
57
|
retry
|
@@ -74,11 +75,11 @@ module Twitterscraper
|
|
74
75
|
[items_html, json_resp]
|
75
76
|
end
|
76
77
|
|
77
|
-
def query_single_page(query, lang,
|
78
|
+
def query_single_page(query, lang, type, pos, headers: [], proxies: [])
|
78
79
|
logger.info "Querying #{query}"
|
79
80
|
query = ERB::Util.url_encode(query)
|
80
81
|
|
81
|
-
url = build_query_url(query, lang,
|
82
|
+
url = build_query_url(query, lang, type, pos)
|
82
83
|
http_request = lambda do
|
83
84
|
logger.debug "Scraping tweets from #{url}"
|
84
85
|
get_single_page(url, headers, proxies)
|
@@ -107,8 +108,8 @@ module Twitterscraper
|
|
107
108
|
|
108
109
|
if json_resp
|
109
110
|
[tweets, json_resp['min_position']]
|
110
|
-
elsif
|
111
|
-
|
111
|
+
elsif type.user?
|
112
|
+
[tweets, tweets[-1].tweet_id]
|
112
113
|
else
|
113
114
|
[tweets, "TWEET-#{tweets[-1].tweet_id}-#{tweets[0].tweet_id}"]
|
114
115
|
end
|
@@ -116,7 +117,8 @@ module Twitterscraper
|
|
116
117
|
|
117
118
|
OLDEST_DATE = Date.parse('2006-03-21')
|
118
119
|
|
119
|
-
def validate_options!(
|
120
|
+
def validate_options!(queries, type:, start_date:, end_date:, lang:, limit:, threads:)
|
121
|
+
query = queries[0]
|
120
122
|
if query.nil? || query == ''
|
121
123
|
raise Error.new('Please specify a search query.')
|
122
124
|
end
|
@@ -160,12 +162,12 @@ module Twitterscraper
|
|
160
162
|
end
|
161
163
|
end
|
162
164
|
|
163
|
-
def main_loop(query, lang, limit, daily_limit, headers, proxies)
|
165
|
+
def main_loop(query, lang, type, limit, daily_limit, headers, proxies)
|
164
166
|
pos = nil
|
165
167
|
daily_tweets = []
|
166
168
|
|
167
169
|
while true
|
168
|
-
new_tweets, new_pos = query_single_page(query, lang, pos, headers: headers, proxies: proxies)
|
170
|
+
new_tweets, new_pos = query_single_page(query, lang, type, pos, headers: headers, proxies: proxies)
|
169
171
|
unless new_tweets.empty?
|
170
172
|
daily_tweets.concat(new_tweets)
|
171
173
|
daily_tweets.uniq! { |t| t.tweet_id }
|
@@ -194,16 +196,27 @@ module Twitterscraper
|
|
194
196
|
@stop_requested
|
195
197
|
end
|
196
198
|
|
197
|
-
def query_tweets(query, start_date: nil, end_date: nil, lang:
|
199
|
+
def query_tweets(query, type: 'search', start_date: nil, end_date: nil, lang: nil, limit: 100, daily_limit: nil, order: 'desc', threads: 2)
|
198
200
|
start_date = Date.parse(start_date) if start_date && start_date.is_a?(String)
|
199
201
|
end_date = Date.parse(end_date) if end_date && end_date.is_a?(String)
|
200
202
|
queries = build_queries(query, start_date, end_date)
|
201
|
-
|
202
|
-
|
203
|
+
type = Type.new(type)
|
204
|
+
if threads > queries.size
|
205
|
+
logger.warn 'The maximum number of :threads is the number of dates between :start_date and :end_date.'
|
206
|
+
threads = queries.size
|
207
|
+
end
|
208
|
+
if proxy_enabled?
|
209
|
+
proxies = Proxy::Pool.new
|
210
|
+
logger.debug "Fetch #{proxies.size} proxies"
|
211
|
+
else
|
212
|
+
proxies = []
|
213
|
+
logger.debug 'Proxy disabled'
|
214
|
+
end
|
215
|
+
logger.debug "Cache #{cache_enabled? ? 'enabled' : 'disabled'}"
|
216
|
+
|
203
217
|
|
204
|
-
validate_options!(queries
|
218
|
+
validate_options!(queries, type: type, start_date: start_date, end_date: end_date, lang: lang, limit: limit, threads: threads)
|
205
219
|
|
206
|
-
logger.debug "Fetch #{proxies.size} proxies" if proxy
|
207
220
|
logger.info "The number of threads #{threads}"
|
208
221
|
|
209
222
|
headers = {'User-Agent': USER_AGENT_LIST.sample, 'X-Requested-With': 'XMLHttpRequest'}
|
@@ -218,17 +231,25 @@ module Twitterscraper
|
|
218
231
|
logger.debug "Set 'Thread.abort_on_exception' to true"
|
219
232
|
|
220
233
|
Parallel.each(queries, in_threads: threads) do |query|
|
221
|
-
main_loop(query, lang, limit, daily_limit, headers, proxies)
|
234
|
+
main_loop(query, lang, type, limit, daily_limit, headers, proxies)
|
222
235
|
raise Parallel::Break if stop_requested?
|
223
236
|
end
|
224
237
|
else
|
225
238
|
queries.each do |query|
|
226
|
-
main_loop(query, lang, limit, daily_limit, headers, proxies)
|
239
|
+
main_loop(query, lang, type, limit, daily_limit, headers, proxies)
|
227
240
|
break if stop_requested?
|
228
241
|
end
|
229
242
|
end
|
230
243
|
|
231
|
-
@all_tweets.sort_by { |tweet| -tweet.created_at.to_i }
|
244
|
+
@all_tweets.sort_by { |tweet| (order == 'desc' ? -1 : 1) * tweet.created_at.to_i }
|
245
|
+
end
|
246
|
+
|
247
|
+
def search(query, start_date: nil, end_date: nil, lang: '', limit: 100, daily_limit: nil, order: 'desc', threads: 2)
|
248
|
+
query_tweets(query, type: 'search', start_date: start_date, end_date: end_date, lang: lang, limit: limit, daily_limit: daily_limit, order: order, threads: threads)
|
249
|
+
end
|
250
|
+
|
251
|
+
def user_timeline(screen_name, limit: 100, order: 'desc')
|
252
|
+
query_tweets(screen_name, type: 'user', start_date: nil, end_date: nil, lang: nil, limit: limit, daily_limit: nil, order: order, threads: 1)
|
232
253
|
end
|
233
254
|
end
|
234
255
|
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitterscraper-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.15.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ts-3156
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-07-
|
11
|
+
date: 2020-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -46,8 +46,10 @@ executables:
|
|
46
46
|
extensions: []
|
47
47
|
extra_rdoc_files: []
|
48
48
|
files:
|
49
|
+
- ".circleci/config.yml"
|
49
50
|
- ".gitignore"
|
50
51
|
- ".irbrc"
|
52
|
+
- ".rspec"
|
51
53
|
- ".ruby-version"
|
52
54
|
- ".travis.yml"
|
53
55
|
- CODE_OF_CONDUCT.md
|
@@ -71,6 +73,7 @@ files:
|
|
71
73
|
- lib/twitterscraper/query.rb
|
72
74
|
- lib/twitterscraper/template.rb
|
73
75
|
- lib/twitterscraper/tweet.rb
|
76
|
+
- lib/twitterscraper/type.rb
|
74
77
|
- lib/version.rb
|
75
78
|
- twitterscraper-ruby.gemspec
|
76
79
|
homepage: https://github.com/ts-3156/twitterscraper-ruby
|