twitterscraper-ruby 0.13.0 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +31 -0
- data/Gemfile.lock +1 -1
- data/README.md +6 -5
- data/lib/twitterscraper/cli.rb +3 -0
- data/lib/twitterscraper/query.rb +2 -2
- data/lib/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf902c947e866cc99e79fbb9f8a51c829accd44aed03ef7657562bf41932c73d
|
4
|
+
data.tar.gz: 1bc5a0698a17b244ee9228d7728767dd00218179a5a49e0852a74cc722322ef0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 629de8698af1391c210b496e9aadb51ad5f9d7157b1be5d0aa669ae821671e2b5624ba51083fb14b61f93618ff3e90aea1ac0eccb6ea00360fac48a2dfc436c7
|
7
|
+
data.tar.gz: 3f3706bee5f2a92a2addae034201e2e8cee3fef43efdc323be963cbaf1b94c31c53aa49a19e58a068498722dfe07e9796e097fb04364a9afda56d06132e6b935
|
@@ -0,0 +1,31 @@
|
|
1
|
+
version: 2.1
|
2
|
+
orbs:
|
3
|
+
ruby: circleci/ruby@0.1.2
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
docker:
|
8
|
+
- image: circleci/ruby:2.6.4-stretch-node
|
9
|
+
environment:
|
10
|
+
BUNDLER_VERSION: 2.1.4
|
11
|
+
executor: ruby/default
|
12
|
+
steps:
|
13
|
+
- checkout
|
14
|
+
- run:
|
15
|
+
name: Update bundler
|
16
|
+
command: gem update bundler
|
17
|
+
- run:
|
18
|
+
name: Which bundler?
|
19
|
+
command: bundle -v
|
20
|
+
- restore_cache:
|
21
|
+
keys:
|
22
|
+
- gem-cache-v1-{{ arch }}-{{ .Branch }}-{{ checksum "Gemfile.lock" }}
|
23
|
+
- gem-cache-v1-{{ arch }}-{{ .Branch }}
|
24
|
+
- gem-cache-v1
|
25
|
+
- run: bundle install --path vendor/bundle
|
26
|
+
- run: bundle clean
|
27
|
+
- save_cache:
|
28
|
+
key: gem-cache-v1-{{ arch }}-{{ .Branch }}-{{ checksum "Gemfile.lock" }}
|
29
|
+
paths:
|
30
|
+
- vendor/bundle
|
31
|
+
- run: bundle exec rspec
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# twitterscraper-ruby
|
2
2
|
|
3
|
+
[![Build Status](https://circleci.com/gh/ts-3156/twitterscraper-ruby.svg?style=svg)](https://circleci.com/gh/ts-3156/twitterscraper-ruby)
|
3
4
|
[![Gem Version](https://badge.fury.io/rb/twitterscraper-ruby.svg)](https://badge.fury.io/rb/twitterscraper-ruby)
|
4
5
|
|
5
6
|
A gem to scrape https://twitter.com/search. This gem is inspired by [taspinar/twitterscraper](https://github.com/taspinar/twitterscraper).
|
@@ -33,7 +34,7 @@ Command-line interface:
|
|
33
34
|
|
34
35
|
```shell script
|
35
36
|
$ twitterscraper --query KEYWORD --start_date 2020-06-01 --end_date 2020-06-30 --lang ja \
|
36
|
-
--limit 100 --threads 10 --
|
37
|
+
--limit 100 --threads 10 --output output.json
|
37
38
|
```
|
38
39
|
|
39
40
|
From Within Ruby:
|
@@ -47,10 +48,9 @@ options = {
|
|
47
48
|
lang: 'ja',
|
48
49
|
limit: 100,
|
49
50
|
threads: 10,
|
50
|
-
proxy: true
|
51
51
|
}
|
52
52
|
|
53
|
-
client = Twitterscraper::Client.new
|
53
|
+
client = Twitterscraper::Client.new(cache: true, proxy: true)
|
54
54
|
tweets = client.query_tweets(KEYWORD, options)
|
55
55
|
|
56
56
|
tweets.each do |tweet|
|
@@ -141,9 +141,10 @@ $ cat tweets.json | jq . | less
|
|
141
141
|
| `--end_date` | Used as "until:yyyy-mm-dd for your query. This means "before the date". | |
|
142
142
|
| `--lang` | Retrieve tweets written in a specific language. | |
|
143
143
|
| `--limit` | Stop scraping when *at least* the number of tweets indicated with --limit is scraped. | 100 |
|
144
|
+
| `--order` | Sort order of the results. | desc |
|
144
145
|
| `--threads` | Set the number of threads twitterscraper-ruby should initiate while scraping for your query. | 2 |
|
145
|
-
| `--proxy` | Scrape https://twitter.com/search via proxies. |
|
146
|
-
| `--cache` | Enable caching. |
|
146
|
+
| `--proxy` | Scrape https://twitter.com/search via proxies. | true |
|
147
|
+
| `--cache` | Enable caching. | true |
|
147
148
|
| `--format` | The format of the output. | json |
|
148
149
|
| `--output` | The name of the output file. | tweets.json |
|
149
150
|
| `--verbose` | Print debug messages. | tweets.json |
|
data/lib/twitterscraper/cli.rb
CHANGED
@@ -21,6 +21,7 @@ module Twitterscraper
|
|
21
21
|
lang: options['lang'],
|
22
22
|
limit: options['limit'],
|
23
23
|
daily_limit: options['daily_limit'],
|
24
|
+
order: options['order'],
|
24
25
|
threads: options['threads'],
|
25
26
|
}
|
26
27
|
client = Twitterscraper::Client.new(cache: options['cache'], proxy: options['proxy'])
|
@@ -64,6 +65,7 @@ module Twitterscraper
|
|
64
65
|
'lang:',
|
65
66
|
'limit:',
|
66
67
|
'daily_limit:',
|
68
|
+
'order:',
|
67
69
|
'threads:',
|
68
70
|
'output:',
|
69
71
|
'format:',
|
@@ -79,6 +81,7 @@ module Twitterscraper
|
|
79
81
|
options['daily_limit'] = options['daily_limit'].to_i if options['daily_limit']
|
80
82
|
options['threads'] = (options['threads'] || 2).to_i
|
81
83
|
options['format'] ||= 'json'
|
84
|
+
options['order'] ||= 'desc'
|
82
85
|
options['output'] ||= "tweets.#{options['format']}"
|
83
86
|
|
84
87
|
options['cache'] = options['cache'] != 'false'
|
data/lib/twitterscraper/query.rb
CHANGED
@@ -195,7 +195,7 @@ module Twitterscraper
|
|
195
195
|
@stop_requested
|
196
196
|
end
|
197
197
|
|
198
|
-
def query_tweets(query, start_date: nil, end_date: nil, lang: '', limit: 100, daily_limit: nil, threads: 2)
|
198
|
+
def query_tweets(query, start_date: nil, end_date: nil, lang: '', limit: 100, daily_limit: nil, order: 'desc', threads: 2)
|
199
199
|
start_date = Date.parse(start_date) if start_date && start_date.is_a?(String)
|
200
200
|
end_date = Date.parse(end_date) if end_date && end_date.is_a?(String)
|
201
201
|
queries = build_queries(query, start_date, end_date)
|
@@ -239,7 +239,7 @@ module Twitterscraper
|
|
239
239
|
end
|
240
240
|
end
|
241
241
|
|
242
|
-
@all_tweets.sort_by { |tweet| -tweet.created_at.to_i }
|
242
|
+
@all_tweets.sort_by { |tweet| (order == 'desc' ? -1 : 1) * tweet.created_at.to_i }
|
243
243
|
end
|
244
244
|
end
|
245
245
|
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitterscraper-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ts-3156
|
@@ -46,6 +46,7 @@ executables:
|
|
46
46
|
extensions: []
|
47
47
|
extra_rdoc_files: []
|
48
48
|
files:
|
49
|
+
- ".circleci/config.yml"
|
49
50
|
- ".gitignore"
|
50
51
|
- ".irbrc"
|
51
52
|
- ".rspec"
|