twitterscraper-ruby 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +31 -0
- data/Gemfile.lock +1 -1
- data/README.md +6 -5
- data/lib/twitterscraper/cli.rb +3 -0
- data/lib/twitterscraper/query.rb +2 -2
- data/lib/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf902c947e866cc99e79fbb9f8a51c829accd44aed03ef7657562bf41932c73d
|
4
|
+
data.tar.gz: 1bc5a0698a17b244ee9228d7728767dd00218179a5a49e0852a74cc722322ef0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 629de8698af1391c210b496e9aadb51ad5f9d7157b1be5d0aa669ae821671e2b5624ba51083fb14b61f93618ff3e90aea1ac0eccb6ea00360fac48a2dfc436c7
|
7
|
+
data.tar.gz: 3f3706bee5f2a92a2addae034201e2e8cee3fef43efdc323be963cbaf1b94c31c53aa49a19e58a068498722dfe07e9796e097fb04364a9afda56d06132e6b935
|
@@ -0,0 +1,31 @@
|
|
1
|
+
version: 2.1
|
2
|
+
orbs:
|
3
|
+
ruby: circleci/ruby@0.1.2
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
docker:
|
8
|
+
- image: circleci/ruby:2.6.4-stretch-node
|
9
|
+
environment:
|
10
|
+
BUNDLER_VERSION: 2.1.4
|
11
|
+
executor: ruby/default
|
12
|
+
steps:
|
13
|
+
- checkout
|
14
|
+
- run:
|
15
|
+
name: Update bundler
|
16
|
+
command: gem update bundler
|
17
|
+
- run:
|
18
|
+
name: Which bundler?
|
19
|
+
command: bundle -v
|
20
|
+
- restore_cache:
|
21
|
+
keys:
|
22
|
+
- gem-cache-v1-{{ arch }}-{{ .Branch }}-{{ checksum "Gemfile.lock" }}
|
23
|
+
- gem-cache-v1-{{ arch }}-{{ .Branch }}
|
24
|
+
- gem-cache-v1
|
25
|
+
- run: bundle install --path vendor/bundle
|
26
|
+
- run: bundle clean
|
27
|
+
- save_cache:
|
28
|
+
key: gem-cache-v1-{{ arch }}-{{ .Branch }}-{{ checksum "Gemfile.lock" }}
|
29
|
+
paths:
|
30
|
+
- vendor/bundle
|
31
|
+
- run: bundle exec rspec
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# twitterscraper-ruby
|
2
2
|
|
3
|
+
[](https://circleci.com/gh/ts-3156/twitterscraper-ruby)
|
3
4
|
[](https://badge.fury.io/rb/twitterscraper-ruby)
|
4
5
|
|
5
6
|
A gem to scrape https://twitter.com/search. This gem is inspired by [taspinar/twitterscraper](https://github.com/taspinar/twitterscraper).
|
@@ -33,7 +34,7 @@ Command-line interface:
|
|
33
34
|
|
34
35
|
```shell script
|
35
36
|
$ twitterscraper --query KEYWORD --start_date 2020-06-01 --end_date 2020-06-30 --lang ja \
|
36
|
-
--limit 100 --threads 10 --
|
37
|
+
--limit 100 --threads 10 --output output.json
|
37
38
|
```
|
38
39
|
|
39
40
|
From Within Ruby:
|
@@ -47,10 +48,9 @@ options = {
|
|
47
48
|
lang: 'ja',
|
48
49
|
limit: 100,
|
49
50
|
threads: 10,
|
50
|
-
proxy: true
|
51
51
|
}
|
52
52
|
|
53
|
-
client = Twitterscraper::Client.new
|
53
|
+
client = Twitterscraper::Client.new(cache: true, proxy: true)
|
54
54
|
tweets = client.query_tweets(KEYWORD, options)
|
55
55
|
|
56
56
|
tweets.each do |tweet|
|
@@ -141,9 +141,10 @@ $ cat tweets.json | jq . | less
|
|
141
141
|
| `--end_date` | Used as "until:yyyy-mm-dd for your query. This means "before the date". | |
|
142
142
|
| `--lang` | Retrieve tweets written in a specific language. | |
|
143
143
|
| `--limit` | Stop scraping when *at least* the number of tweets indicated with --limit is scraped. | 100 |
|
144
|
+
| `--order` | Sort order of the results. | desc |
|
144
145
|
| `--threads` | Set the number of threads twitterscraper-ruby should initiate while scraping for your query. | 2 |
|
145
|
-
| `--proxy` | Scrape https://twitter.com/search via proxies. |
|
146
|
-
| `--cache` | Enable caching. |
|
146
|
+
| `--proxy` | Scrape https://twitter.com/search via proxies. | true |
|
147
|
+
| `--cache` | Enable caching. | true |
|
147
148
|
| `--format` | The format of the output. | json |
|
148
149
|
| `--output` | The name of the output file. | tweets.json |
|
149
150
|
| `--verbose` | Print debug messages. | tweets.json |
|
data/lib/twitterscraper/cli.rb
CHANGED
@@ -21,6 +21,7 @@ module Twitterscraper
|
|
21
21
|
lang: options['lang'],
|
22
22
|
limit: options['limit'],
|
23
23
|
daily_limit: options['daily_limit'],
|
24
|
+
order: options['order'],
|
24
25
|
threads: options['threads'],
|
25
26
|
}
|
26
27
|
client = Twitterscraper::Client.new(cache: options['cache'], proxy: options['proxy'])
|
@@ -64,6 +65,7 @@ module Twitterscraper
|
|
64
65
|
'lang:',
|
65
66
|
'limit:',
|
66
67
|
'daily_limit:',
|
68
|
+
'order:',
|
67
69
|
'threads:',
|
68
70
|
'output:',
|
69
71
|
'format:',
|
@@ -79,6 +81,7 @@ module Twitterscraper
|
|
79
81
|
options['daily_limit'] = options['daily_limit'].to_i if options['daily_limit']
|
80
82
|
options['threads'] = (options['threads'] || 2).to_i
|
81
83
|
options['format'] ||= 'json'
|
84
|
+
options['order'] ||= 'desc'
|
82
85
|
options['output'] ||= "tweets.#{options['format']}"
|
83
86
|
|
84
87
|
options['cache'] = options['cache'] != 'false'
|
data/lib/twitterscraper/query.rb
CHANGED
@@ -195,7 +195,7 @@ module Twitterscraper
|
|
195
195
|
@stop_requested
|
196
196
|
end
|
197
197
|
|
198
|
-
def query_tweets(query, start_date: nil, end_date: nil, lang: '', limit: 100, daily_limit: nil, threads: 2)
|
198
|
+
def query_tweets(query, start_date: nil, end_date: nil, lang: '', limit: 100, daily_limit: nil, order: 'desc', threads: 2)
|
199
199
|
start_date = Date.parse(start_date) if start_date && start_date.is_a?(String)
|
200
200
|
end_date = Date.parse(end_date) if end_date && end_date.is_a?(String)
|
201
201
|
queries = build_queries(query, start_date, end_date)
|
@@ -239,7 +239,7 @@ module Twitterscraper
|
|
239
239
|
end
|
240
240
|
end
|
241
241
|
|
242
|
-
@all_tweets.sort_by { |tweet| -tweet.created_at.to_i }
|
242
|
+
@all_tweets.sort_by { |tweet| (order == 'desc' ? -1 : 1) * tweet.created_at.to_i }
|
243
243
|
end
|
244
244
|
end
|
245
245
|
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitterscraper-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ts-3156
|
@@ -46,6 +46,7 @@ executables:
|
|
46
46
|
extensions: []
|
47
47
|
extra_rdoc_files: []
|
48
48
|
files:
|
49
|
+
- ".circleci/config.yml"
|
49
50
|
- ".gitignore"
|
50
51
|
- ".irbrc"
|
51
52
|
- ".rspec"
|