twitterscraper-ruby 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +50 -19
- data/lib/twitterscraper/cli.rb +1 -1
- data/lib/twitterscraper/query.rb +1 -1
- data/lib/twitterscraper/tweet.rb +9 -3
- data/lib/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59b71fc6129f6d8c5a441981dc1577fa9b761380ff119bed4985cfcd88ccb31b
|
4
|
+
data.tar.gz: 2de3fcadc334ee2689d3083ea9324127c3b22ec94cf1b08dec920f9c95771445
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b1e392bc021f6f758b79b7bdcd099af2ac391863f8712dadb5fd19248946867cfd89f140b836532fb40554c82697b26ef3af00b7cbb2cb13b0d5a8e2a38c87e7
|
7
|
+
data.tar.gz: 8c0e81589202e4a094c17604354f0f23a08b4536fe60b58ffe616cf1233c0531547ef02b8e88b6f70b1870ce2d134e4518ee093a5349144e2edfce3b1088e06c
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -56,12 +56,60 @@ tweets = client.query_tweets(KEYWORD, options)
|
|
56
56
|
tweets.each do |tweet|
|
57
57
|
puts tweet.tweet_id
|
58
58
|
puts tweet.text
|
59
|
-
puts tweet.created_at
|
60
59
|
puts tweet.tweet_url
|
60
|
+
puts tweet.created_at
|
61
|
+
|
62
|
+
hash = tweet.attrs
|
63
|
+
puts hash.keys
|
61
64
|
end
|
62
65
|
```
|
63
66
|
|
64
67
|
|
68
|
+
## Attributes
|
69
|
+
|
70
|
+
### Tweet
|
71
|
+
|
72
|
+
- screen_name
|
73
|
+
- name
|
74
|
+
- user_id
|
75
|
+
- tweet_id
|
76
|
+
- text
|
77
|
+
- links
|
78
|
+
- hashtags
|
79
|
+
- image_urls
|
80
|
+
- video_url
|
81
|
+
- has_media
|
82
|
+
- likes
|
83
|
+
- retweets
|
84
|
+
- replies
|
85
|
+
- is_replied
|
86
|
+
- is_reply_to
|
87
|
+
- parent_tweet_id
|
88
|
+
- reply_to_users
|
89
|
+
- tweet_url
|
90
|
+
- created_at
|
91
|
+
|
92
|
+
|
93
|
+
## Search operators
|
94
|
+
|
95
|
+
| Operator | Finds Tweets... |
|
96
|
+
| ------------- | ------------- |
|
97
|
+
| watching now | containing both "watching" and "now". This is the default operator. |
|
98
|
+
| "happy hour" | containing the exact phrase "happy hour". |
|
99
|
+
| love OR hate | containing either "love" or "hate" (or both). |
|
100
|
+
| beer -root | containing "beer" but not "root". |
|
101
|
+
| #haiku | containing the hashtag "haiku". |
|
102
|
+
| from:interior | sent from Twitter account "interior". |
|
103
|
+
| to:NASA | a Tweet authored in reply to Twitter account "NASA". |
|
104
|
+
| @NASA | mentioning Twitter account "NASA". |
|
105
|
+
| puppy filter:media | containing "puppy" and an image or video. |
|
106
|
+
| puppy -filter:retweets | containing "puppy", filtering out retweets |
|
107
|
+
| superhero since:2015-12-21 | containing "superhero" and sent since date "2015-12-21" (year-month-day). |
|
108
|
+
| puppy until:2015-12-21 | containing "puppy" and sent before the date "2015-12-21". |
|
109
|
+
|
110
|
+
Search operators documentation is in [Standard search operators](https://developer.twitter.com/en/docs/tweets/rules-and-filtering/overview/standard-operators).
|
111
|
+
|
112
|
+
|
65
113
|
## Examples
|
66
114
|
|
67
115
|
```shell script
|
@@ -79,27 +127,10 @@ $ cat tweets.json | jq . | less
|
|
79
127
|
"tweet_url": "https://twitter.com/screenname/status/1282659891992000000",
|
80
128
|
"created_at": "2020-07-13 12:00:00 +0000",
|
81
129
|
"text": "Thanks Twitter!"
|
82
|
-
}
|
83
|
-
...
|
130
|
+
}
|
84
131
|
]
|
85
132
|
```
|
86
133
|
|
87
|
-
## Attributes
|
88
|
-
|
89
|
-
### Tweet
|
90
|
-
|
91
|
-
- tweet_id
|
92
|
-
- text
|
93
|
-
- user_id
|
94
|
-
- screen_name
|
95
|
-
- name
|
96
|
-
- links
|
97
|
-
- hashtags
|
98
|
-
- image_urls
|
99
|
-
- tweet_url
|
100
|
-
- created_at
|
101
|
-
|
102
|
-
|
103
134
|
## CLI Options
|
104
135
|
|
105
136
|
| Option | Description | Default |
|
data/lib/twitterscraper/cli.rb
CHANGED
@@ -25,7 +25,7 @@ module Twitterscraper
|
|
25
25
|
}
|
26
26
|
client = Twitterscraper::Client.new
|
27
27
|
tweets = client.query_tweets(options['query'], query_options)
|
28
|
-
File.write(options['output'], generate_json(tweets))
|
28
|
+
File.write(options['output'], generate_json(tweets)) unless tweets.empty?
|
29
29
|
end
|
30
30
|
|
31
31
|
def generate_json(tweets)
|
data/lib/twitterscraper/query.rb
CHANGED
data/lib/twitterscraper/tweet.rb
CHANGED
@@ -21,6 +21,7 @@ module Twitterscraper
|
|
21
21
|
:parent_tweet_id,
|
22
22
|
:reply_to_users,
|
23
23
|
:tweet_url,
|
24
|
+
:timestamp,
|
24
25
|
:created_at,
|
25
26
|
]
|
26
27
|
attr_reader *KEYS
|
@@ -31,10 +32,14 @@ module Twitterscraper
|
|
31
32
|
end
|
32
33
|
end
|
33
34
|
|
34
|
-
def
|
35
|
+
def attrs
|
35
36
|
KEYS.map do |key|
|
36
37
|
[key, send(key)]
|
37
|
-
end.to_h
|
38
|
+
end.to_h
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_json(options = {})
|
42
|
+
attrs.to_json
|
38
43
|
end
|
39
44
|
|
40
45
|
class << self
|
@@ -74,7 +79,7 @@ module Twitterscraper
|
|
74
79
|
reply_to_users = inner_html.xpath("//div[@class[contains(., 'ReplyingToContextBelowAuthor')]]/a").map { |user| {screen_name: user.text.delete_prefix('@'), user_id: user.attr('data-user-id')} }
|
75
80
|
end
|
76
81
|
|
77
|
-
timestamp = inner_html.xpath("//span[@class[contains(., '
|
82
|
+
timestamp = inner_html.xpath("//span[@class[contains(., 'js-short-timestamp')]]").first.attr('data-time').to_i
|
78
83
|
new(
|
79
84
|
screen_name: html.attr('data-screen-name'),
|
80
85
|
name: html.attr('data-name'),
|
@@ -94,6 +99,7 @@ module Twitterscraper
|
|
94
99
|
parent_tweet_id: parent_tweet_id,
|
95
100
|
reply_to_users: reply_to_users,
|
96
101
|
tweet_url: 'https://twitter.com' + html.attr('data-permalink-path'),
|
102
|
+
timestamp: timestamp,
|
97
103
|
created_at: Time.at(timestamp, in: '+00:00'),
|
98
104
|
)
|
99
105
|
end
|
data/lib/version.rb
CHANGED