birdsong 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/birdsong/tweet.rb +16 -132
- data/lib/birdsong/user.rb +17 -103
- data/lib/birdsong/version.rb +1 -1
- data/lib/birdsong.rb +5 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9db624502354ba7be08fbe804ff22d1b1bc474fde29ff179126b0781ff42d95a
|
4
|
+
data.tar.gz: bfea616431da77fb69d1fa5587a5e2d7b39d6ca9ef2c09bc81adade2af7de451
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2bb4cc4b1ace6b587290aa3952fe97c024939f1ef03e3c903297fb262eb1c4719f99db288c8d7519a14b26a902eb52fc4190fc3cda0a96fac8f8622666da8eab
|
7
|
+
data.tar.gz: d9f5c15d5d2c0577146641785d21041264126b0827a2150a2a94ef9513b72ebbb942f1c62b037e7bb1269143e908dcd8b1bc8c7e40e363496b9d2912cc2dcc0f
|
data/CHANGELOG.md
CHANGED
data/lib/birdsong/tweet.rb
CHANGED
@@ -9,13 +9,10 @@ module Birdsong
|
|
9
9
|
# Check that the ids are at least real ids
|
10
10
|
ids.each { |id| raise Birdsong::InvalidIdError if !/\A\d+\z/.match(id) }
|
11
11
|
|
12
|
-
|
12
|
+
tweet_objects = ids.map { |id| Birdsong::TweetScraper.new.parse(id) }
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
json_response.map do |json_tweet|
|
18
|
-
Tweet.new(json_tweet)
|
14
|
+
tweet_objects.map do |tweet_object|
|
15
|
+
Tweet.new(tweet_object)
|
19
16
|
end
|
20
17
|
end
|
21
18
|
|
@@ -30,139 +27,26 @@ module Birdsong
|
|
30
27
|
attr_reader :image_file_names
|
31
28
|
attr_reader :video_file_names
|
32
29
|
attr_reader :video_file_type
|
30
|
+
attr_reader :video_preview_image
|
33
31
|
|
34
32
|
alias_method :user, :author # Every other gem uses `user` so we can just alias it
|
35
33
|
|
36
34
|
private
|
37
35
|
|
38
|
-
def initialize(
|
39
|
-
@
|
40
|
-
parse(
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
@
|
46
|
-
@
|
47
|
-
@
|
48
|
-
@
|
49
|
-
|
50
|
-
# A sanity check to make sure we have media in there correctly
|
51
|
-
if json_tweet["extended_entities"]&.has_key?("media")
|
52
|
-
media_items = json_tweet["extended_entities"]["media"]
|
53
|
-
else
|
54
|
-
media_items = []
|
55
|
-
end
|
56
|
-
|
57
|
-
@image_file_names = media_items.filter_map do |media_item|
|
58
|
-
next unless media_item["type"] == "photo"
|
59
|
-
Birdsong.retrieve_media(media_item["url"])
|
60
|
-
end
|
61
|
-
|
62
|
-
@video_file_names = media_items.filter_map do |media_item|
|
63
|
-
next unless (media_item["type"] == "video") || (media_item["type"] == "animated_gif")
|
64
|
-
|
65
|
-
# If the media is video we need to fall back to V1 of the API since V2 doesn't support
|
66
|
-
# videos yet. This is dumb, but not a big deal.
|
67
|
-
media_url = get_largest_variant_url(media_items)
|
68
|
-
media_preview_url = media_items.first["media_url_https"]
|
69
|
-
@video_file_type = media_item["type"]
|
70
|
-
|
71
|
-
# We're returning an array because, in the case that someday more videos are available our
|
72
|
-
# implementations won't breaks
|
73
|
-
[{ url: Birdsong.retrieve_media(media_url), preview_url: Birdsong.retrieve_media(media_preview_url) }]
|
74
|
-
end
|
75
|
-
|
36
|
+
def initialize(tweet_object)
|
37
|
+
@id = tweet_object[:id]
|
38
|
+
@created_at = DateTime.parse(tweet_object[:date])
|
39
|
+
@text = tweet_object[:text]
|
40
|
+
@language = tweet_object[:language]
|
41
|
+
@author_id = tweet_object[:user][:id]
|
42
|
+
|
43
|
+
@image_file_names = tweet_object[:images]
|
44
|
+
@video_file_names = tweet_object[:video]
|
45
|
+
@video_file_type = tweet_object[:video_file_type]
|
46
|
+
@video_preview_image = tweet_object[:video_preview_image]
|
76
47
|
# Look up the author given the new id.
|
77
48
|
# NOTE: This doesn't *seem* like the right place for this, but I"m not sure where else
|
78
|
-
@author = User.
|
79
|
-
end
|
80
|
-
|
81
|
-
def get_largest_variant_url(media_items)
|
82
|
-
# The API response is pretty deeply nested, but this handles that structure
|
83
|
-
largest_bitrate_variant = nil
|
84
|
-
media_items.each do |media_item|
|
85
|
-
# The API returns multiple different resolutions usually. Since we only want to archive
|
86
|
-
# the largest we'll run through and find it
|
87
|
-
media_item["video_info"]["variants"].each do |variant|
|
88
|
-
# Usually there's constant bitrate variants, and sometimes, a .m3u playlist which is for
|
89
|
-
# streaming. We want to ignore that one here.
|
90
|
-
next unless variant&.keys.include?("bitrate")
|
91
|
-
|
92
|
-
if largest_bitrate_variant.nil? || largest_bitrate_variant["bitrate"] < variant["bitrate"]
|
93
|
-
largest_bitrate_variant = variant
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
largest_bitrate_variant["url"]
|
98
|
-
end
|
99
|
-
|
100
|
-
# Note that unlike the V2 this only supports one url at a time
|
101
|
-
def self.retrieve_data_v1(id)
|
102
|
-
bearer_token = Birdsong.twitter_bearer_token
|
103
|
-
|
104
|
-
tweet_lookup_url = "https://api.twitter.com/1.1/statuses/show.json?tweet_mode=extended&id=#{id}"
|
105
|
-
|
106
|
-
response = tweet_lookup_v1(tweet_lookup_url, bearer_token)
|
107
|
-
raise Birdsong::RateLimitExceeded.new(
|
108
|
-
response.headers["x-rate-limit-limit"],
|
109
|
-
response.headers["x-rate-limit-remaining"],
|
110
|
-
response.headers["x-rate-limit-reset"]
|
111
|
-
) if response.code === 429
|
112
|
-
raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
|
113
|
-
|
114
|
-
response
|
115
|
-
end
|
116
|
-
|
117
|
-
# V2 of the Twitter API (which we use everywhere else) doesn't include videos or gifs yet,
|
118
|
-
# so we have to fall back to V1.
|
119
|
-
#
|
120
|
-
# There's a tracker for this at https://twittercommunity.com/t/where-would-i-find-the-direct-link-to-an-mp4-video-posted-in-v2/146933/2
|
121
|
-
def self.tweet_lookup_v1(url, bearer_token)
|
122
|
-
options = {
|
123
|
-
method: "get",
|
124
|
-
headers: {
|
125
|
-
"Authorization": "Bearer #{bearer_token}"
|
126
|
-
}
|
127
|
-
}
|
128
|
-
|
129
|
-
request = Typhoeus::Request.new(url, options)
|
130
|
-
response = request.run
|
131
|
-
|
132
|
-
raise Birdsong::RateLimitExceeded.new(
|
133
|
-
response.headers["x-rate-limit-limit"],
|
134
|
-
response.headers["x-rate-limit-remaining"],
|
135
|
-
response.headers["x-rate-limit-reset"]
|
136
|
-
) if response.code === 429
|
137
|
-
|
138
|
-
raise Birdsong::NoTweetFoundError, "Tweet with id #{url} not found" if response.code === 404
|
139
|
-
if response.code === 403
|
140
|
-
json = JSON.parse(response.body)
|
141
|
-
if json.has_key?("errors")
|
142
|
-
json["errors"].each do |error|
|
143
|
-
raise Birdsong::NoTweetFoundError, "User with id #{url} suspended" if error["code"] == 63
|
144
|
-
end
|
145
|
-
end
|
146
|
-
end
|
147
|
-
raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
|
148
|
-
|
149
|
-
response
|
150
|
-
end
|
151
|
-
|
152
|
-
|
153
|
-
def self.check_for_errors(parsed_json)
|
154
|
-
parsed_json.each do |json|
|
155
|
-
next unless json.key?("errors")
|
156
|
-
next if json["errors"].empty?
|
157
|
-
|
158
|
-
json["errors"].each do |error|
|
159
|
-
# If the tweet is removed, or if the user is suspended you get an Authorization Error
|
160
|
-
if error["title"] == "Not Found Error" || error["title"] == "Authorization Error"
|
161
|
-
raise Birdsong::NoTweetFoundError, "Tweet with id #{error["value"]} not found"
|
162
|
-
end
|
163
|
-
end
|
164
|
-
end
|
165
|
-
false
|
49
|
+
@author = User.new(tweet_object[:user])
|
166
50
|
end
|
167
51
|
end
|
168
52
|
end
|
data/lib/birdsong/user.rb
CHANGED
@@ -2,21 +2,6 @@
|
|
2
2
|
|
3
3
|
module Birdsong
|
4
4
|
class User
|
5
|
-
def self.lookup(ids = [])
|
6
|
-
# If a single id is passed in we make it the appropriate array
|
7
|
-
ids = [ids] unless ids.kind_of?(Array)
|
8
|
-
|
9
|
-
# Check that the ids are at least real ids
|
10
|
-
ids.each { |id| raise Birdsong::InvalidIdError if !/\A\d+\z/.match(id) }
|
11
|
-
self.lookup_primative(ids: ids)
|
12
|
-
end
|
13
|
-
|
14
|
-
def self.lookup_by_usernames(usernames = [])
|
15
|
-
# If a single id is passed in we make it the appropriate array
|
16
|
-
usernames = [usernames] unless usernames.kind_of?(Array)
|
17
|
-
self.lookup_primative(usernames: usernames)
|
18
|
-
end
|
19
|
-
|
20
5
|
# Attributes for after the response is parsed from Twitter
|
21
6
|
attr_reader :json
|
22
7
|
attr_reader :id
|
@@ -37,102 +22,31 @@ module Birdsong
|
|
37
22
|
|
38
23
|
private
|
39
24
|
|
40
|
-
def initialize(
|
41
|
-
@json =
|
42
|
-
parse(
|
25
|
+
def initialize(user_object)
|
26
|
+
@json = user_object.to_json
|
27
|
+
parse(user_object)
|
43
28
|
end
|
44
29
|
|
45
|
-
def parse(
|
46
|
-
@id =
|
47
|
-
@name =
|
48
|
-
@username =
|
49
|
-
@created_at = DateTime.parse(
|
50
|
-
@location =
|
30
|
+
def parse(user_object)
|
31
|
+
@id = user_object[:id]
|
32
|
+
@name = user_object[:name]
|
33
|
+
@username = user_object[:screen_name]
|
34
|
+
@created_at = DateTime.parse(user_object[:sign_up_date])
|
35
|
+
@location = user_object[:location]
|
51
36
|
|
52
37
|
# Removing the "normal" here gets us the full-sized image, instead of the 150x150 thumbnail
|
53
|
-
@profile_image_url =
|
38
|
+
@profile_image_url = user_object[:profile_image_url].sub!("_normal", "")
|
54
39
|
|
55
|
-
@description =
|
56
|
-
@url =
|
40
|
+
@description = user_object[:description]
|
41
|
+
@url = user_object[:url]
|
57
42
|
@url = "https://www.twitter.com/#{@username}" if @url.nil?
|
58
43
|
|
59
|
-
@followers_count =
|
60
|
-
@following_count =
|
61
|
-
@tweet_count =
|
62
|
-
@listed_count =
|
63
|
-
@verified =
|
44
|
+
@followers_count = user_object[:followers_count]
|
45
|
+
@following_count = user_object[:friends_count]
|
46
|
+
@tweet_count = user_object[:statuses_count]
|
47
|
+
@listed_count = user_object[:listed_count]
|
48
|
+
@verified = user_object[:verified] # this will always be `false` but we're keeping it here for compatibility
|
64
49
|
@profile_image_file_name = Birdsong.retrieve_media(@profile_image_url)
|
65
50
|
end
|
66
|
-
|
67
|
-
def self.lookup_primative(usernames: [], ids: [])
|
68
|
-
raise Birdsong::InvalidIdError if usernames.empty? && ids.empty? # can't pass in nothing
|
69
|
-
|
70
|
-
if usernames.empty? == false
|
71
|
-
response = usernames.map { |username| self.retrieve_data(username: username) }
|
72
|
-
elsif ids.empty? == false
|
73
|
-
response = ids.map { |id| self.retrieve_data(id: id) }
|
74
|
-
else
|
75
|
-
raise Birdsong::InvalidIdError
|
76
|
-
end
|
77
|
-
|
78
|
-
json_response = response.map { |r| JSON.parse(r.body) }
|
79
|
-
|
80
|
-
json_response.map do |json_user|
|
81
|
-
User.new(json_user)
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
def self.retrieve_data(username: nil, id: nil)
|
86
|
-
bearer_token = Birdsong.twitter_bearer_token
|
87
|
-
|
88
|
-
raise Birdsong::InvalidIdError if username.nil? && id.nil? # can't pass in nothing
|
89
|
-
raise Birdsong::InvalidIdError if username.nil? == false && id.nil? == false # don't pass in both
|
90
|
-
|
91
|
-
user_lookup_url = "https://api.twitter.com/1.1/users/show.json"
|
92
|
-
|
93
|
-
params = {}
|
94
|
-
if username.nil? == false
|
95
|
-
# Specify the Usernames that you want to lookup below (to 100 per request)
|
96
|
-
params["screen_name"] = username
|
97
|
-
elsif id.nil? == false
|
98
|
-
# Specify the User IDs that you want to lookup below (to 100 per request)
|
99
|
-
params["user_id"] = id
|
100
|
-
end
|
101
|
-
|
102
|
-
response = self.user_lookup(user_lookup_url, bearer_token, params)
|
103
|
-
|
104
|
-
raise Birdsong::RateLimitExceeded.new(
|
105
|
-
response.headers["x-rate-limit-limit"],
|
106
|
-
response.headers["x-rate-limit-remaining"],
|
107
|
-
response.headers["x-rate-limit-reset"]
|
108
|
-
) if response.code === 429
|
109
|
-
raise Birdsong::NoTweetFoundError, "User with id #{id} or username #{username} not found" if response.code === 404
|
110
|
-
raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
|
111
|
-
|
112
|
-
response
|
113
|
-
end
|
114
|
-
|
115
|
-
def self.user_lookup(url, bearer_token, params)
|
116
|
-
options = {
|
117
|
-
method: "get",
|
118
|
-
headers: {
|
119
|
-
"User-Agent": "v2UserLookupRuby",
|
120
|
-
"Authorization": "Bearer #{bearer_token}"
|
121
|
-
},
|
122
|
-
params: params
|
123
|
-
}
|
124
|
-
|
125
|
-
request = Typhoeus::Request.new(url, options)
|
126
|
-
response = request.run
|
127
|
-
|
128
|
-
raise Birdsong::RateLimitExceeded.new(
|
129
|
-
response.headers["x-rate-limit-limit"],
|
130
|
-
response.headers["x-rate-limit-remaining"],
|
131
|
-
response.headers["x-rate-limit-reset"]
|
132
|
-
) if response.code === 429
|
133
|
-
raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
|
134
|
-
|
135
|
-
response
|
136
|
-
end
|
137
51
|
end
|
138
52
|
end
|
data/lib/birdsong/version.rb
CHANGED
data/lib/birdsong.rb
CHANGED
@@ -10,6 +10,11 @@ require "fileutils"
|
|
10
10
|
require_relative "birdsong/version"
|
11
11
|
require_relative "birdsong/tweet"
|
12
12
|
require_relative "birdsong/user"
|
13
|
+
require_relative "birdsong/scrapers/scraper"
|
14
|
+
require_relative "birdsong/scrapers/tweet_scraper"
|
15
|
+
require_relative "birdsong/twitter_guest_key"
|
16
|
+
|
17
|
+
require_relative "birdsong/monkeypatch"
|
13
18
|
|
14
19
|
module Birdsong
|
15
20
|
extend Configuration
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: birdsong
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christopher Guess
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: typhoeus
|
@@ -197,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
197
197
|
- !ruby/object:Gem::Version
|
198
198
|
version: '0'
|
199
199
|
requirements: []
|
200
|
-
rubygems_version: 3.4.
|
200
|
+
rubygems_version: 3.4.19
|
201
201
|
signing_key:
|
202
202
|
specification_version: 4
|
203
203
|
summary: A gem to interface with Twitter's API V2
|