twitterscraper-ruby 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +50 -19
- data/lib/twitterscraper/cli.rb +1 -1
- data/lib/twitterscraper/query.rb +1 -1
- data/lib/twitterscraper/tweet.rb +9 -3
- data/lib/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59b71fc6129f6d8c5a441981dc1577fa9b761380ff119bed4985cfcd88ccb31b
|
4
|
+
data.tar.gz: 2de3fcadc334ee2689d3083ea9324127c3b22ec94cf1b08dec920f9c95771445
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b1e392bc021f6f758b79b7bdcd099af2ac391863f8712dadb5fd19248946867cfd89f140b836532fb40554c82697b26ef3af00b7cbb2cb13b0d5a8e2a38c87e7
|
7
|
+
data.tar.gz: 8c0e81589202e4a094c17604354f0f23a08b4536fe60b58ffe616cf1233c0531547ef02b8e88b6f70b1870ce2d134e4518ee093a5349144e2edfce3b1088e06c
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -56,12 +56,60 @@ tweets = client.query_tweets(KEYWORD, options)
|
|
56
56
|
tweets.each do |tweet|
|
57
57
|
puts tweet.tweet_id
|
58
58
|
puts tweet.text
|
59
|
-
puts tweet.created_at
|
60
59
|
puts tweet.tweet_url
|
60
|
+
puts tweet.created_at
|
61
|
+
|
62
|
+
hash = tweet.attrs
|
63
|
+
puts hash.keys
|
61
64
|
end
|
62
65
|
```
|
63
66
|
|
64
67
|
|
68
|
+
## Attributes
|
69
|
+
|
70
|
+
### Tweet
|
71
|
+
|
72
|
+
- screen_name
|
73
|
+
- name
|
74
|
+
- user_id
|
75
|
+
- tweet_id
|
76
|
+
- text
|
77
|
+
- links
|
78
|
+
- hashtags
|
79
|
+
- image_urls
|
80
|
+
- video_url
|
81
|
+
- has_media
|
82
|
+
- likes
|
83
|
+
- retweets
|
84
|
+
- replies
|
85
|
+
- is_replied
|
86
|
+
- is_reply_to
|
87
|
+
- parent_tweet_id
|
88
|
+
- reply_to_users
|
89
|
+
- tweet_url
|
90
|
+
- created_at
|
91
|
+
|
92
|
+
|
93
|
+
## Search operators
|
94
|
+
|
95
|
+
| Operator | Finds Tweets... |
|
96
|
+
| ------------- | ------------- |
|
97
|
+
| watching now | containing both "watching" and "now". This is the default operator. |
|
98
|
+
| "happy hour" | containing the exact phrase "happy hour". |
|
99
|
+
| love OR hate | containing either "love" or "hate" (or both). |
|
100
|
+
| beer -root | containing "beer" but not "root". |
|
101
|
+
| #haiku | containing the hashtag "haiku". |
|
102
|
+
| from:interior | sent from Twitter account "interior". |
|
103
|
+
| to:NASA | a Tweet authored in reply to Twitter account "NASA". |
|
104
|
+
| @NASA | mentioning Twitter account "NASA". |
|
105
|
+
| puppy filter:media | containing "puppy" and an image or video. |
|
106
|
+
| puppy -filter:retweets | containing "puppy", filtering out retweets |
|
107
|
+
| superhero since:2015-12-21 | containing "superhero" and sent since date "2015-12-21" (year-month-day). |
|
108
|
+
| puppy until:2015-12-21 | containing "puppy" and sent before the date "2015-12-21". |
|
109
|
+
|
110
|
+
Search operators documentation is in [Standard search operators](https://developer.twitter.com/en/docs/tweets/rules-and-filtering/overview/standard-operators).
|
111
|
+
|
112
|
+
|
65
113
|
## Examples
|
66
114
|
|
67
115
|
```shell script
|
@@ -79,27 +127,10 @@ $ cat tweets.json | jq . | less
|
|
79
127
|
"tweet_url": "https://twitter.com/screenname/status/1282659891992000000",
|
80
128
|
"created_at": "2020-07-13 12:00:00 +0000",
|
81
129
|
"text": "Thanks Twitter!"
|
82
|
-
}
|
83
|
-
...
|
130
|
+
}
|
84
131
|
]
|
85
132
|
```
|
86
133
|
|
87
|
-
## Attributes
|
88
|
-
|
89
|
-
### Tweet
|
90
|
-
|
91
|
-
- tweet_id
|
92
|
-
- text
|
93
|
-
- user_id
|
94
|
-
- screen_name
|
95
|
-
- name
|
96
|
-
- links
|
97
|
-
- hashtags
|
98
|
-
- image_urls
|
99
|
-
- tweet_url
|
100
|
-
- created_at
|
101
|
-
|
102
|
-
|
103
134
|
## CLI Options
|
104
135
|
|
105
136
|
| Option | Description | Default |
|
data/lib/twitterscraper/cli.rb
CHANGED
@@ -25,7 +25,7 @@ module Twitterscraper
|
|
25
25
|
}
|
26
26
|
client = Twitterscraper::Client.new
|
27
27
|
tweets = client.query_tweets(options['query'], query_options)
|
28
|
-
File.write(options['output'], generate_json(tweets))
|
28
|
+
File.write(options['output'], generate_json(tweets)) unless tweets.empty?
|
29
29
|
end
|
30
30
|
|
31
31
|
def generate_json(tweets)
|
data/lib/twitterscraper/query.rb
CHANGED
data/lib/twitterscraper/tweet.rb
CHANGED
@@ -21,6 +21,7 @@ module Twitterscraper
|
|
21
21
|
:parent_tweet_id,
|
22
22
|
:reply_to_users,
|
23
23
|
:tweet_url,
|
24
|
+
:timestamp,
|
24
25
|
:created_at,
|
25
26
|
]
|
26
27
|
attr_reader *KEYS
|
@@ -31,10 +32,14 @@ module Twitterscraper
|
|
31
32
|
end
|
32
33
|
end
|
33
34
|
|
34
|
-
def
|
35
|
+
def attrs
|
35
36
|
KEYS.map do |key|
|
36
37
|
[key, send(key)]
|
37
|
-
end.to_h
|
38
|
+
end.to_h
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_json(options = {})
|
42
|
+
attrs.to_json
|
38
43
|
end
|
39
44
|
|
40
45
|
class << self
|
@@ -74,7 +79,7 @@ module Twitterscraper
|
|
74
79
|
reply_to_users = inner_html.xpath("//div[@class[contains(., 'ReplyingToContextBelowAuthor')]]/a").map { |user| {screen_name: user.text.delete_prefix('@'), user_id: user.attr('data-user-id')} }
|
75
80
|
end
|
76
81
|
|
77
|
-
timestamp = inner_html.xpath("//span[@class[contains(., '
|
82
|
+
timestamp = inner_html.xpath("//span[@class[contains(., 'js-short-timestamp')]]").first.attr('data-time').to_i
|
78
83
|
new(
|
79
84
|
screen_name: html.attr('data-screen-name'),
|
80
85
|
name: html.attr('data-name'),
|
@@ -94,6 +99,7 @@ module Twitterscraper
|
|
94
99
|
parent_tweet_id: parent_tweet_id,
|
95
100
|
reply_to_users: reply_to_users,
|
96
101
|
tweet_url: 'https://twitter.com' + html.attr('data-permalink-path'),
|
102
|
+
timestamp: timestamp,
|
97
103
|
created_at: Time.at(timestamp, in: '+00:00'),
|
98
104
|
)
|
99
105
|
end
|
data/lib/version.rb
CHANGED