birdsong 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/birdsong/tweet.rb +16 -132
- data/lib/birdsong/user.rb +17 -103
- data/lib/birdsong/version.rb +1 -1
- data/lib/birdsong.rb +5 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9db624502354ba7be08fbe804ff22d1b1bc474fde29ff179126b0781ff42d95a
|
4
|
+
data.tar.gz: bfea616431da77fb69d1fa5587a5e2d7b39d6ca9ef2c09bc81adade2af7de451
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2bb4cc4b1ace6b587290aa3952fe97c024939f1ef03e3c903297fb262eb1c4719f99db288c8d7519a14b26a902eb52fc4190fc3cda0a96fac8f8622666da8eab
|
7
|
+
data.tar.gz: d9f5c15d5d2c0577146641785d21041264126b0827a2150a2a94ef9513b72ebbb942f1c62b037e7bb1269143e908dcd8b1bc8c7e40e363496b9d2912cc2dcc0f
|
data/CHANGELOG.md
CHANGED
data/lib/birdsong/tweet.rb
CHANGED
@@ -9,13 +9,10 @@ module Birdsong
|
|
9
9
|
# Check that the ids are at least real ids
|
10
10
|
ids.each { |id| raise Birdsong::InvalidIdError if !/\A\d+\z/.match(id) }
|
11
11
|
|
12
|
-
|
12
|
+
tweet_objects = ids.map { |id| Birdsong::TweetScraper.new.parse(id) }
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
json_response.map do |json_tweet|
|
18
|
-
Tweet.new(json_tweet)
|
14
|
+
tweet_objects.map do |tweet_object|
|
15
|
+
Tweet.new(tweet_object)
|
19
16
|
end
|
20
17
|
end
|
21
18
|
|
@@ -30,139 +27,26 @@ module Birdsong
|
|
30
27
|
attr_reader :image_file_names
|
31
28
|
attr_reader :video_file_names
|
32
29
|
attr_reader :video_file_type
|
30
|
+
attr_reader :video_preview_image
|
33
31
|
|
34
32
|
alias_method :user, :author # Every other gem uses `user` so we can just alias it
|
35
33
|
|
36
34
|
private
|
37
35
|
|
38
|
-
def initialize(
|
39
|
-
@
|
40
|
-
parse(
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
@
|
46
|
-
@
|
47
|
-
@
|
48
|
-
@
|
49
|
-
|
50
|
-
# A sanity check to make sure we have media in there correctly
|
51
|
-
if json_tweet["extended_entities"]&.has_key?("media")
|
52
|
-
media_items = json_tweet["extended_entities"]["media"]
|
53
|
-
else
|
54
|
-
media_items = []
|
55
|
-
end
|
56
|
-
|
57
|
-
@image_file_names = media_items.filter_map do |media_item|
|
58
|
-
next unless media_item["type"] == "photo"
|
59
|
-
Birdsong.retrieve_media(media_item["url"])
|
60
|
-
end
|
61
|
-
|
62
|
-
@video_file_names = media_items.filter_map do |media_item|
|
63
|
-
next unless (media_item["type"] == "video") || (media_item["type"] == "animated_gif")
|
64
|
-
|
65
|
-
# If the media is video we need to fall back to V1 of the API since V2 doesn't support
|
66
|
-
# videos yet. This is dumb, but not a big deal.
|
67
|
-
media_url = get_largest_variant_url(media_items)
|
68
|
-
media_preview_url = media_items.first["media_url_https"]
|
69
|
-
@video_file_type = media_item["type"]
|
70
|
-
|
71
|
-
# We're returning an array because, in the case that someday more videos are available our
|
72
|
-
# implementations won't breaks
|
73
|
-
[{ url: Birdsong.retrieve_media(media_url), preview_url: Birdsong.retrieve_media(media_preview_url) }]
|
74
|
-
end
|
75
|
-
|
36
|
+
def initialize(tweet_object)
|
37
|
+
@id = tweet_object[:id]
|
38
|
+
@created_at = DateTime.parse(tweet_object[:date])
|
39
|
+
@text = tweet_object[:text]
|
40
|
+
@language = tweet_object[:language]
|
41
|
+
@author_id = tweet_object[:user][:id]
|
42
|
+
|
43
|
+
@image_file_names = tweet_object[:images]
|
44
|
+
@video_file_names = tweet_object[:video]
|
45
|
+
@video_file_type = tweet_object[:video_file_type]
|
46
|
+
@video_preview_image = tweet_object[:video_preview_image]
|
76
47
|
# Look up the author given the new id.
|
77
48
|
# NOTE: This doesn't *seem* like the right place for this, but I"m not sure where else
|
78
|
-
@author = User.
|
79
|
-
end
|
80
|
-
|
81
|
-
def get_largest_variant_url(media_items)
|
82
|
-
# The API response is pretty deeply nested, but this handles that structure
|
83
|
-
largest_bitrate_variant = nil
|
84
|
-
media_items.each do |media_item|
|
85
|
-
# The API returns multiple different resolutions usually. Since we only want to archive
|
86
|
-
# the largest we'll run through and find it
|
87
|
-
media_item["video_info"]["variants"].each do |variant|
|
88
|
-
# Usually there's constant bitrate variants, and sometimes, a .m3u playlist which is for
|
89
|
-
# streaming. We want to ignore that one here.
|
90
|
-
next unless variant&.keys.include?("bitrate")
|
91
|
-
|
92
|
-
if largest_bitrate_variant.nil? || largest_bitrate_variant["bitrate"] < variant["bitrate"]
|
93
|
-
largest_bitrate_variant = variant
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
largest_bitrate_variant["url"]
|
98
|
-
end
|
99
|
-
|
100
|
-
# Note that unlike the V2 this only supports one url at a time
|
101
|
-
def self.retrieve_data_v1(id)
|
102
|
-
bearer_token = Birdsong.twitter_bearer_token
|
103
|
-
|
104
|
-
tweet_lookup_url = "https://api.twitter.com/1.1/statuses/show.json?tweet_mode=extended&id=#{id}"
|
105
|
-
|
106
|
-
response = tweet_lookup_v1(tweet_lookup_url, bearer_token)
|
107
|
-
raise Birdsong::RateLimitExceeded.new(
|
108
|
-
response.headers["x-rate-limit-limit"],
|
109
|
-
response.headers["x-rate-limit-remaining"],
|
110
|
-
response.headers["x-rate-limit-reset"]
|
111
|
-
) if response.code === 429
|
112
|
-
raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
|
113
|
-
|
114
|
-
response
|
115
|
-
end
|
116
|
-
|
117
|
-
# V2 of the Twitter API (which we use everywhere else) doesn't include videos or gifs yet,
|
118
|
-
# so we have to fall back to V1.
|
119
|
-
#
|
120
|
-
# There's a tracker for this at https://twittercommunity.com/t/where-would-i-find-the-direct-link-to-an-mp4-video-posted-in-v2/146933/2
|
121
|
-
def self.tweet_lookup_v1(url, bearer_token)
|
122
|
-
options = {
|
123
|
-
method: "get",
|
124
|
-
headers: {
|
125
|
-
"Authorization": "Bearer #{bearer_token}"
|
126
|
-
}
|
127
|
-
}
|
128
|
-
|
129
|
-
request = Typhoeus::Request.new(url, options)
|
130
|
-
response = request.run
|
131
|
-
|
132
|
-
raise Birdsong::RateLimitExceeded.new(
|
133
|
-
response.headers["x-rate-limit-limit"],
|
134
|
-
response.headers["x-rate-limit-remaining"],
|
135
|
-
response.headers["x-rate-limit-reset"]
|
136
|
-
) if response.code === 429
|
137
|
-
|
138
|
-
raise Birdsong::NoTweetFoundError, "Tweet with id #{url} not found" if response.code === 404
|
139
|
-
if response.code === 403
|
140
|
-
json = JSON.parse(response.body)
|
141
|
-
if json.has_key?("errors")
|
142
|
-
json["errors"].each do |error|
|
143
|
-
raise Birdsong::NoTweetFoundError, "User with id #{url} suspended" if error["code"] == 63
|
144
|
-
end
|
145
|
-
end
|
146
|
-
end
|
147
|
-
raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
|
148
|
-
|
149
|
-
response
|
150
|
-
end
|
151
|
-
|
152
|
-
|
153
|
-
def self.check_for_errors(parsed_json)
|
154
|
-
parsed_json.each do |json|
|
155
|
-
next unless json.key?("errors")
|
156
|
-
next if json["errors"].empty?
|
157
|
-
|
158
|
-
json["errors"].each do |error|
|
159
|
-
# If the tweet is removed, or if the user is suspended you get an Authorization Error
|
160
|
-
if error["title"] == "Not Found Error" || error["title"] == "Authorization Error"
|
161
|
-
raise Birdsong::NoTweetFoundError, "Tweet with id #{error["value"]} not found"
|
162
|
-
end
|
163
|
-
end
|
164
|
-
end
|
165
|
-
false
|
49
|
+
@author = User.new(tweet_object[:user])
|
166
50
|
end
|
167
51
|
end
|
168
52
|
end
|
data/lib/birdsong/user.rb
CHANGED
@@ -2,21 +2,6 @@
|
|
2
2
|
|
3
3
|
module Birdsong
|
4
4
|
class User
|
5
|
-
def self.lookup(ids = [])
|
6
|
-
# If a single id is passed in we make it the appropriate array
|
7
|
-
ids = [ids] unless ids.kind_of?(Array)
|
8
|
-
|
9
|
-
# Check that the ids are at least real ids
|
10
|
-
ids.each { |id| raise Birdsong::InvalidIdError if !/\A\d+\z/.match(id) }
|
11
|
-
self.lookup_primative(ids: ids)
|
12
|
-
end
|
13
|
-
|
14
|
-
def self.lookup_by_usernames(usernames = [])
|
15
|
-
# If a single id is passed in we make it the appropriate array
|
16
|
-
usernames = [usernames] unless usernames.kind_of?(Array)
|
17
|
-
self.lookup_primative(usernames: usernames)
|
18
|
-
end
|
19
|
-
|
20
5
|
# Attributes for after the response is parsed from Twitter
|
21
6
|
attr_reader :json
|
22
7
|
attr_reader :id
|
@@ -37,102 +22,31 @@ module Birdsong
|
|
37
22
|
|
38
23
|
private
|
39
24
|
|
40
|
-
def initialize(
|
41
|
-
@json =
|
42
|
-
parse(
|
25
|
+
def initialize(user_object)
|
26
|
+
@json = user_object.to_json
|
27
|
+
parse(user_object)
|
43
28
|
end
|
44
29
|
|
45
|
-
def parse(
|
46
|
-
@id =
|
47
|
-
@name =
|
48
|
-
@username =
|
49
|
-
@created_at = DateTime.parse(
|
50
|
-
@location =
|
30
|
+
def parse(user_object)
|
31
|
+
@id = user_object[:id]
|
32
|
+
@name = user_object[:name]
|
33
|
+
@username = user_object[:screen_name]
|
34
|
+
@created_at = DateTime.parse(user_object[:sign_up_date])
|
35
|
+
@location = user_object[:location]
|
51
36
|
|
52
37
|
# Removing the "normal" here gets us the full-sized image, instead of the 150x150 thumbnail
|
53
|
-
@profile_image_url =
|
38
|
+
@profile_image_url = user_object[:profile_image_url].sub!("_normal", "")
|
54
39
|
|
55
|
-
@description =
|
56
|
-
@url =
|
40
|
+
@description = user_object[:description]
|
41
|
+
@url = user_object[:url]
|
57
42
|
@url = "https://www.twitter.com/#{@username}" if @url.nil?
|
58
43
|
|
59
|
-
@followers_count =
|
60
|
-
@following_count =
|
61
|
-
@tweet_count =
|
62
|
-
@listed_count =
|
63
|
-
@verified =
|
44
|
+
@followers_count = user_object[:followers_count]
|
45
|
+
@following_count = user_object[:friends_count]
|
46
|
+
@tweet_count = user_object[:statuses_count]
|
47
|
+
@listed_count = user_object[:listed_count]
|
48
|
+
@verified = user_object[:verified] # this will always be `false` but we're keeping it here for compatibility
|
64
49
|
@profile_image_file_name = Birdsong.retrieve_media(@profile_image_url)
|
65
50
|
end
|
66
|
-
|
67
|
-
def self.lookup_primative(usernames: [], ids: [])
|
68
|
-
raise Birdsong::InvalidIdError if usernames.empty? && ids.empty? # can't pass in nothing
|
69
|
-
|
70
|
-
if usernames.empty? == false
|
71
|
-
response = usernames.map { |username| self.retrieve_data(username: username) }
|
72
|
-
elsif ids.empty? == false
|
73
|
-
response = ids.map { |id| self.retrieve_data(id: id) }
|
74
|
-
else
|
75
|
-
raise Birdsong::InvalidIdError
|
76
|
-
end
|
77
|
-
|
78
|
-
json_response = response.map { |r| JSON.parse(r.body) }
|
79
|
-
|
80
|
-
json_response.map do |json_user|
|
81
|
-
User.new(json_user)
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
def self.retrieve_data(username: nil, id: nil)
|
86
|
-
bearer_token = Birdsong.twitter_bearer_token
|
87
|
-
|
88
|
-
raise Birdsong::InvalidIdError if username.nil? && id.nil? # can't pass in nothing
|
89
|
-
raise Birdsong::InvalidIdError if username.nil? == false && id.nil? == false # don't pass in both
|
90
|
-
|
91
|
-
user_lookup_url = "https://api.twitter.com/1.1/users/show.json"
|
92
|
-
|
93
|
-
params = {}
|
94
|
-
if username.nil? == false
|
95
|
-
# Specify the Usernames that you want to lookup below (to 100 per request)
|
96
|
-
params["screen_name"] = username
|
97
|
-
elsif id.nil? == false
|
98
|
-
# Specify the User IDs that you want to lookup below (to 100 per request)
|
99
|
-
params["user_id"] = id
|
100
|
-
end
|
101
|
-
|
102
|
-
response = self.user_lookup(user_lookup_url, bearer_token, params)
|
103
|
-
|
104
|
-
raise Birdsong::RateLimitExceeded.new(
|
105
|
-
response.headers["x-rate-limit-limit"],
|
106
|
-
response.headers["x-rate-limit-remaining"],
|
107
|
-
response.headers["x-rate-limit-reset"]
|
108
|
-
) if response.code === 429
|
109
|
-
raise Birdsong::NoTweetFoundError, "User with id #{id} or username #{username} not found" if response.code === 404
|
110
|
-
raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
|
111
|
-
|
112
|
-
response
|
113
|
-
end
|
114
|
-
|
115
|
-
def self.user_lookup(url, bearer_token, params)
|
116
|
-
options = {
|
117
|
-
method: "get",
|
118
|
-
headers: {
|
119
|
-
"User-Agent": "v2UserLookupRuby",
|
120
|
-
"Authorization": "Bearer #{bearer_token}"
|
121
|
-
},
|
122
|
-
params: params
|
123
|
-
}
|
124
|
-
|
125
|
-
request = Typhoeus::Request.new(url, options)
|
126
|
-
response = request.run
|
127
|
-
|
128
|
-
raise Birdsong::RateLimitExceeded.new(
|
129
|
-
response.headers["x-rate-limit-limit"],
|
130
|
-
response.headers["x-rate-limit-remaining"],
|
131
|
-
response.headers["x-rate-limit-reset"]
|
132
|
-
) if response.code === 429
|
133
|
-
raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
|
134
|
-
|
135
|
-
response
|
136
|
-
end
|
137
51
|
end
|
138
52
|
end
|
data/lib/birdsong/version.rb
CHANGED
data/lib/birdsong.rb
CHANGED
@@ -10,6 +10,11 @@ require "fileutils"
|
|
10
10
|
require_relative "birdsong/version"
|
11
11
|
require_relative "birdsong/tweet"
|
12
12
|
require_relative "birdsong/user"
|
13
|
+
require_relative "birdsong/scrapers/scraper"
|
14
|
+
require_relative "birdsong/scrapers/tweet_scraper"
|
15
|
+
require_relative "birdsong/twitter_guest_key"
|
16
|
+
|
17
|
+
require_relative "birdsong/monkeypatch"
|
13
18
|
|
14
19
|
module Birdsong
|
15
20
|
extend Configuration
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: birdsong
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christopher Guess
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: typhoeus
|
@@ -197,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
197
197
|
- !ruby/object:Gem::Version
|
198
198
|
version: '0'
|
199
199
|
requirements: []
|
200
|
-
rubygems_version: 3.4.
|
200
|
+
rubygems_version: 3.4.19
|
201
201
|
signing_key:
|
202
202
|
specification_version: 4
|
203
203
|
summary: A gem to interface with Twitter's API V2
|