birdsong 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '03486db7a9efbfce2f7d423f25e33506332bad5a7ceb481da98e68c81b0a75fa'
4
- data.tar.gz: 7a46972f68f413a3d3e0cb81b06ece6b9a3a8f9593be5a68664ce0364ab366ec
3
+ metadata.gz: 9db624502354ba7be08fbe804ff22d1b1bc474fde29ff179126b0781ff42d95a
4
+ data.tar.gz: bfea616431da77fb69d1fa5587a5e2d7b39d6ca9ef2c09bc81adade2af7de451
5
5
  SHA512:
6
- metadata.gz: 68eb9d33bac12dc765b776ad32024992640b1363612f99b2cc0b500c6000fd3ef7351d8cc70218d2d21bb681ec40b233b24fdd7062694426891ce3fa22088e62
7
- data.tar.gz: 8ceddb1eb88a57dec49116653f0be80be47d29d395894ea35a28aafa84eee72d5aa13deca419e569861f715f5cceff93732473fe5cc9e32ed29ed0cd4ae0eb4b
6
+ metadata.gz: 2bb4cc4b1ace6b587290aa3952fe97c024939f1ef03e3c903297fb262eb1c4719f99db288c8d7519a14b26a902eb52fc4190fc3cda0a96fac8f8622666da8eab
7
+ data.tar.gz: d9f5c15d5d2c0577146641785d21041264126b0827a2150a2a94ef9513b72ebbb942f1c62b037e7bb1269143e908dcd8b1bc8c7e40e363496b9d2912cc2dcc0f
data/CHANGELOG.md CHANGED
@@ -3,3 +3,7 @@
3
3
  ## [0.1.0] - 2021-04-27
4
4
 
5
5
  - Initial release
6
+
7
+ ## [0.2.0] - 2023-10-04
8
+
9
+ - Fixed to use Selenium for scraping instead of the now defunct API
@@ -9,16 +9,10 @@ module Birdsong
9
9
  # Check that the ids are at least real ids
10
10
  ids.each { |id| raise Birdsong::InvalidIdError if !/\A\d+\z/.match(id) }
11
11
 
12
- response = self.retrieve_data_v2(ids)
13
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
12
+ tweet_objects = ids.map { |id| Birdsong::TweetScraper.new.parse(id) }
14
13
 
15
- json_response = JSON.parse(response.body)
16
- check_for_errors(json_response)
17
-
18
- return [] if json_response["data"].nil?
19
-
20
- json_response["data"].map do |json_tweet|
21
- Tweet.new(json_tweet, json_response["includes"])
14
+ tweet_objects.map do |tweet_object|
15
+ Tweet.new(tweet_object)
22
16
  end
23
17
  end
24
18
 
@@ -33,194 +27,26 @@ module Birdsong
33
27
  attr_reader :image_file_names
34
28
  attr_reader :video_file_names
35
29
  attr_reader :video_file_type
30
+ attr_reader :video_preview_image
36
31
 
37
32
  alias_method :user, :author # Every other gem uses `user` so we can just alias it
38
33
 
39
34
  private
40
35
 
41
- def initialize(json_tweet, includes)
42
- @json = json_tweet
43
- parse(json_tweet, includes)
44
- end
45
-
46
- def parse(json_tweet, includes)
47
- @id = json_tweet["id"]
48
- @created_at = DateTime.parse(json_tweet["created_at"])
49
- @text = json_tweet["text"]
50
- @language = json_tweet["lang"]
51
- @author_id = json_tweet["author_id"]
52
-
53
- # A sanity check to make sure we have media in there correctly
54
- if includes.has_key? "media"
55
- media_items = includes["media"].filter do |media_item|
56
- json_tweet["attachments"]["media_keys"].include? media_item["media_key"]
57
- end
58
- else
59
- media_items = []
60
- end
61
-
62
- @image_file_names = media_items.filter_map do |media_item|
63
- next unless media_item["type"] == "photo"
64
- Birdsong.retrieve_media(media_item["url"])
65
- end
66
-
67
- @video_file_names = media_items.filter_map do |media_item|
68
- next unless (media_item["type"] == "video") || (media_item["type"] == "animated_gif")
69
-
70
- # If the media is video we need to fall back to V1 of the API since V2 doesn't support
71
- # videos yet. This is dumb, but not a big deal.
72
- media_url = get_media_url_from_extended_entities
73
- media_preview_url = get_media_preview_url_from_extended_entities
74
- @video_file_type = media_item["type"]
75
-
76
- # We're returning an array because, in the case that someday more videos are available our
77
- # implementations won't breaks
78
- [{ url: Birdsong.retrieve_media(media_url), preview_url: Birdsong.retrieve_media(media_preview_url) }]
79
- end
80
-
36
+ def initialize(tweet_object)
37
+ @id = tweet_object[:id]
38
+ @created_at = DateTime.parse(tweet_object[:date])
39
+ @text = tweet_object[:text]
40
+ @language = tweet_object[:language]
41
+ @author_id = tweet_object[:user][:id]
42
+
43
+ @image_file_names = tweet_object[:images]
44
+ @video_file_names = tweet_object[:video]
45
+ @video_file_type = tweet_object[:video_file_type]
46
+ @video_preview_image = tweet_object[:video_preview_image]
81
47
  # Look up the author given the new id.
82
48
  # NOTE: This doesn't *seem* like the right place for this, but I"m not sure where else
83
- @author = User.lookup(@author_id).first
84
- end
85
-
86
- # Used to extract a GIF or video URL from the extended entities object in the Twiter API response
87
- # Assumes (as is the case right now) that a Tweet cannot have more than one GIF/video
88
- def get_media_url_from_extended_entities
89
- response = Tweet.retrieve_data_v1(@id)
90
- response = JSON.parse(response.body)
91
- get_largest_variant_url(response["extended_entities"]["media"])
92
- end
93
-
94
- # Used to extract a GIF or video preview URL from the extended entities object in the Twiter API response
95
- # Assumes (as is the case right now) that a Tweet cannot have more than one GIF/video
96
- def get_media_preview_url_from_extended_entities
97
- response = Tweet.retrieve_data_v1(@id)
98
- response = JSON.parse(response.body)
99
- response["extended_entities"]["media"].first["media_url_https"]
100
- end
101
-
102
- def get_largest_variant_url(media_items)
103
- # The API response is pretty deeply nested, but this handles that structure
104
- largest_bitrate_variant = nil
105
- media_items.each do |media_item|
106
- # The API returns multiple different resolutions usually. Since we only want to archive
107
- # the largest we'll run through and find it
108
- media_item["video_info"]["variants"].each do |variant|
109
- # Usually there's constant bitrate variants, and sometimes, a .m3u playlist which is for
110
- # streaming. We want to ignore that one here.
111
- next unless variant&.keys.include?("bitrate")
112
-
113
- if largest_bitrate_variant.nil? || largest_bitrate_variant["bitrate"] < variant["bitrate"]
114
- largest_bitrate_variant = variant
115
- end
116
- end
117
- end
118
- largest_bitrate_variant["url"]
119
- end
120
-
121
- def self.retrieve_data_v2(ids)
122
- bearer_token = Birdsong.twitter_bearer_token
123
-
124
- tweet_lookup_url = "https://api.twitter.com/2/tweets"
125
-
126
- # Specify the Tweet IDs that you want to lookup below (to 100 per request)
127
- tweet_ids = ids.join(",")
128
-
129
- # Add or remove optional parameters values from the params object below. Full list of parameters and their values can be found in the docs:
130
- # https://developer.twitter.com/en/docs/twitter-api/tweets/lookup/api-reference
131
- params = {
132
- "ids": tweet_ids,
133
- "expansions": "attachments.media_keys,author_id,referenced_tweets.id",
134
- "tweet.fields": Birdsong.tweet_fields,
135
- "user.fields": Birdsong.user_fields,
136
- "media.fields": "duration_ms,height,media_key,preview_image_url,public_metrics,type,url,width",
137
- "place.fields": "country_code",
138
- "poll.fields": "options"
139
- }
140
-
141
- response = tweet_lookup_v2(tweet_lookup_url, bearer_token, params)
142
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
143
-
144
- response
145
- end
146
-
147
- def self.tweet_lookup_v2(url, bearer_token, params)
148
- options = {
149
- method: "get",
150
- headers: {
151
- "User-Agent": "v2TweetLookupRuby",
152
- "Authorization": "Bearer #{bearer_token}"
153
- },
154
- params: params
155
- }
156
-
157
- request = Typhoeus::Request.new(url, options)
158
- response = request.run
159
-
160
- raise Birdsong::RateLimitExceeded.new(
161
- response.headers["x-rate-limit-limit"],
162
- response.headers["x-rate-limit-remaining"],
163
- response.headers["x-rate-limit-reset"]
164
- ) if response.code === 429
165
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
166
-
167
- response
168
- end
169
-
170
- # Note that unlike the V2 this only supports one url at a time
171
- def self.retrieve_data_v1(id)
172
- bearer_token = Birdsong.twitter_bearer_token
173
-
174
- tweet_lookup_url = "https://api.twitter.com/1.1/statuses/show.json?tweet_mode=extended&id=#{id}"
175
-
176
- response = tweet_lookup_v1(tweet_lookup_url, bearer_token)
177
- raise Birdsong::RateLimitExceeded.new(
178
- response.headers["x-rate-limit-limit"],
179
- response.headers["x-rate-limit-remaining"],
180
- response.headers["x-rate-limit-reset"]
181
- ) if response.code === 429
182
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
183
-
184
- response
185
- end
186
-
187
- # V2 of the Twitter API (which we use everywhere else) doesn't include videos or gifs yet,
188
- # so we have to fall back to V1.
189
- #
190
- # There's a tracker for this at https://twittercommunity.com/t/where-would-i-find-the-direct-link-to-an-mp4-video-posted-in-v2/146933/2
191
- def self.tweet_lookup_v1(url, bearer_token)
192
- options = {
193
- method: "get",
194
- headers: {
195
- "Authorization": "Bearer #{bearer_token}"
196
- }
197
- }
198
-
199
- request = Typhoeus::Request.new(url, options)
200
- response = request.run
201
-
202
- raise Birdsong::RateLimitExceeded.new(
203
- response.headers["x-rate-limit-limit"],
204
- response.headers["x-rate-limit-remaining"],
205
- response.headers["x-rate-limit-reset"]
206
- ) if response.code === 429
207
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
208
-
209
- response
210
- end
211
-
212
-
213
- def self.check_for_errors(parsed_json)
214
- return false unless parsed_json.key?("errors")
215
- return false if parsed_json["errors"].empty?
216
-
217
- parsed_json["errors"].each do |error|
218
- # If the tweet is removed, or if the user is suspended you get an Authorization Error
219
- if error["title"] == "Not Found Error" || error["title"] == "Authorization Error"
220
- raise Birdsong::NoTweetFoundError, "Tweet with id #{error["value"]} not found"
221
- end
222
- end
223
- false
49
+ @author = User.new(tweet_object[:user])
224
50
  end
225
51
  end
226
52
  end
data/lib/birdsong/user.rb CHANGED
@@ -2,21 +2,6 @@
2
2
 
3
3
  module Birdsong
4
4
  class User
5
- def self.lookup(ids = [])
6
- # If a single id is passed in we make it the appropriate array
7
- ids = [ids] unless ids.kind_of?(Array)
8
-
9
- # Check that the ids are at least real ids
10
- ids.each { |id| raise Birdsong::InvalidIdError if !/\A\d+\z/.match(id) }
11
- self.lookup_primative(ids: ids)
12
- end
13
-
14
- def self.lookup_by_usernames(usernames = [])
15
- # If a single id is passed in we make it the appropriate array
16
- usernames = [usernames] unless usernames.kind_of?(Array)
17
- self.lookup_primative(usernames: usernames)
18
- end
19
-
20
5
  # Attributes for after the response is parsed from Twitter
21
6
  attr_reader :json
22
7
  attr_reader :id
@@ -37,105 +22,31 @@ module Birdsong
37
22
 
38
23
  private
39
24
 
40
- def initialize(json_user)
41
- @json = json_user
42
- parse(json_user)
25
+ def initialize(user_object)
26
+ @json = user_object.to_json
27
+ parse(user_object)
43
28
  end
44
29
 
45
- def parse(json_user)
46
- @id = json_user["id"]
47
- @name = json_user["name"]
48
- @username = json_user["username"]
49
- @created_at = DateTime.parse(json_user["created_at"])
50
- @location = json_user["location"]
30
+ def parse(user_object)
31
+ @id = user_object[:id]
32
+ @name = user_object[:name]
33
+ @username = user_object[:screen_name]
34
+ @created_at = DateTime.parse(user_object[:sign_up_date])
35
+ @location = user_object[:location]
51
36
 
52
37
  # Removing the "normal" here gets us the full-sized image, instead of the 150x150 thumbnail
53
- @profile_image_url = json_user["profile_image_url"].sub!("_normal", "")
38
+ @profile_image_url = user_object[:profile_image_url].sub!("_normal", "")
54
39
 
55
- @description = json_user["description"]
56
- @url = json_user["url"]
40
+ @description = user_object[:description]
41
+ @url = user_object[:url]
57
42
  @url = "https://www.twitter.com/#{@username}" if @url.nil?
58
- @followers_count = json_user["public_metrics"]["followers_count"]
59
- @following_count = json_user["public_metrics"]["following_count"]
60
- @tweet_count = json_user["public_metrics"]["tweet_count"]
61
- @listed_count = json_user["public_metrics"]["listed_count"]
62
- @verified = json_user["verified"]
63
- @profile_image_file_name = Birdsong.retrieve_media(@profile_image_url)
64
- end
65
-
66
- def self.lookup_primative(usernames: nil, ids: nil)
67
- raise Birdsong::InvalidIdError if usernames.nil? && ids.nil? # can't pass in nothing
68
- raise Birdsong::InvalidIdError if usernames.nil? == false && ids.nil? == false # don't pass in both
69
-
70
- response = self.retrieve_data(ids: ids, usernames: usernames)
71
-
72
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
73
-
74
- json_response = JSON.parse(response.body)
75
- return [] if json_response["data"].nil?
76
-
77
- json_response["data"].map do |json_user|
78
- User.new(json_user)
79
- end
80
- end
81
43
 
82
- def self.retrieve_data(usernames: nil, ids: nil)
83
- bearer_token = Birdsong.twitter_bearer_token
84
-
85
- raise Birdsong::InvalidIdError if usernames.nil? && ids.nil? # can't pass in nothing
86
- raise Birdsong::InvalidIdError if usernames.nil? == false && ids.nil? == false # don't pass in both
87
-
88
- # Add or remove optional parameters values from the params object below. Full list of parameters and their values can be found in the docs:
89
- # https://developer.twitter.com/en/docs/twitter-api/tweets/lookup/api-reference
90
- params = {
91
- "expansions": "pinned_tweet_id",
92
- "tweet.fields": Birdsong.tweet_fields,
93
- "user.fields": Birdsong.user_fields,
94
- }
95
-
96
- if usernames.nil? == false
97
- user_lookup_url = "https://api.twitter.com/2/users/by"
98
- # Specify the Usernames that you want to lookup below (to 100 per request)
99
- params["usernames"] = usernames.join(",")
100
- elsif ids.nil? == false
101
- user_lookup_url = "https://api.twitter.com/2/users"
102
- # Specify the User IDs that you want to lookup below (to 100 per request)
103
- params["ids"] = ids.join(",")
104
- end
105
-
106
- response = self.user_lookup(user_lookup_url, bearer_token, params)
107
-
108
- raise Birdsong::RateLimitExceeded.new(
109
- response.headers["x-rate-limit-limit"],
110
- response.headers["x-rate-limit-remaining"],
111
- response.headers["x-rate-limit-reset"]
112
- ) if response.code === 429
113
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
114
-
115
- response
116
- end
117
-
118
- def self.user_lookup(url, bearer_token, params)
119
- options = {
120
- method: "get",
121
- headers: {
122
- "User-Agent": "v2UserLookupRuby",
123
- "Authorization": "Bearer #{bearer_token}"
124
- },
125
- params: params
126
- }
127
-
128
- request = Typhoeus::Request.new(url, options)
129
- response = request.run
130
-
131
- raise Birdsong::RateLimitExceeded.new(
132
- response.headers["x-rate-limit-limit"],
133
- response.headers["x-rate-limit-remaining"],
134
- response.headers["x-rate-limit-reset"]
135
- ) if response.code === 429
136
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
137
-
138
- response
44
+ @followers_count = user_object[:followers_count]
45
+ @following_count = user_object[:friends_count]
46
+ @tweet_count = user_object[:statuses_count]
47
+ @listed_count = user_object[:listed_count]
48
+ @verified = user_object[:verified] # this will always be `false` but we're keeping it here for compatibility
49
+ @profile_image_file_name = Birdsong.retrieve_media(@profile_image_url)
139
50
  end
140
51
  end
141
52
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Birdsong
4
- VERSION = "0.1.1"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/birdsong.rb CHANGED
@@ -10,6 +10,11 @@ require "fileutils"
10
10
  require_relative "birdsong/version"
11
11
  require_relative "birdsong/tweet"
12
12
  require_relative "birdsong/user"
13
+ require_relative "birdsong/scrapers/scraper"
14
+ require_relative "birdsong/scrapers/tweet_scraper"
15
+ require_relative "birdsong/twitter_guest_key"
16
+
17
+ require_relative "birdsong/monkeypatch"
13
18
 
14
19
  module Birdsong
15
20
  extend Configuration
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: birdsong
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-23 00:00:00.000000000 Z
11
+ date: 2023-10-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: typhoeus
@@ -197,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
197
197
  - !ruby/object:Gem::Version
198
198
  version: '0'
199
199
  requirements: []
200
- rubygems_version: 3.3.26
200
+ rubygems_version: 3.4.19
201
201
  signing_key:
202
202
  specification_version: 4
203
203
  summary: A gem to interface with Twitter's API V2