birdsong 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '03486db7a9efbfce2f7d423f25e33506332bad5a7ceb481da98e68c81b0a75fa'
4
- data.tar.gz: 7a46972f68f413a3d3e0cb81b06ece6b9a3a8f9593be5a68664ce0364ab366ec
3
+ metadata.gz: 9db624502354ba7be08fbe804ff22d1b1bc474fde29ff179126b0781ff42d95a
4
+ data.tar.gz: bfea616431da77fb69d1fa5587a5e2d7b39d6ca9ef2c09bc81adade2af7de451
5
5
  SHA512:
6
- metadata.gz: 68eb9d33bac12dc765b776ad32024992640b1363612f99b2cc0b500c6000fd3ef7351d8cc70218d2d21bb681ec40b233b24fdd7062694426891ce3fa22088e62
7
- data.tar.gz: 8ceddb1eb88a57dec49116653f0be80be47d29d395894ea35a28aafa84eee72d5aa13deca419e569861f715f5cceff93732473fe5cc9e32ed29ed0cd4ae0eb4b
6
+ metadata.gz: 2bb4cc4b1ace6b587290aa3952fe97c024939f1ef03e3c903297fb262eb1c4719f99db288c8d7519a14b26a902eb52fc4190fc3cda0a96fac8f8622666da8eab
7
+ data.tar.gz: d9f5c15d5d2c0577146641785d21041264126b0827a2150a2a94ef9513b72ebbb942f1c62b037e7bb1269143e908dcd8b1bc8c7e40e363496b9d2912cc2dcc0f
data/CHANGELOG.md CHANGED
@@ -3,3 +3,7 @@
3
3
  ## [0.1.0] - 2021-04-27
4
4
 
5
5
  - Initial release
6
+
7
+ ## [0.2.0] - 2023-10-04
8
+
9
+ - Fixed to use Selenium for scraping instead of the now defunct API
@@ -9,16 +9,10 @@ module Birdsong
9
9
  # Check that the ids are at least real ids
10
10
  ids.each { |id| raise Birdsong::InvalidIdError if !/\A\d+\z/.match(id) }
11
11
 
12
- response = self.retrieve_data_v2(ids)
13
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
12
+ tweet_objects = ids.map { |id| Birdsong::TweetScraper.new.parse(id) }
14
13
 
15
- json_response = JSON.parse(response.body)
16
- check_for_errors(json_response)
17
-
18
- return [] if json_response["data"].nil?
19
-
20
- json_response["data"].map do |json_tweet|
21
- Tweet.new(json_tweet, json_response["includes"])
14
+ tweet_objects.map do |tweet_object|
15
+ Tweet.new(tweet_object)
22
16
  end
23
17
  end
24
18
 
@@ -33,194 +27,26 @@ module Birdsong
33
27
  attr_reader :image_file_names
34
28
  attr_reader :video_file_names
35
29
  attr_reader :video_file_type
30
+ attr_reader :video_preview_image
36
31
 
37
32
  alias_method :user, :author # Every other gem uses `user` so we can just alias it
38
33
 
39
34
  private
40
35
 
41
- def initialize(json_tweet, includes)
42
- @json = json_tweet
43
- parse(json_tweet, includes)
44
- end
45
-
46
- def parse(json_tweet, includes)
47
- @id = json_tweet["id"]
48
- @created_at = DateTime.parse(json_tweet["created_at"])
49
- @text = json_tweet["text"]
50
- @language = json_tweet["lang"]
51
- @author_id = json_tweet["author_id"]
52
-
53
- # A sanity check to make sure we have media in there correctly
54
- if includes.has_key? "media"
55
- media_items = includes["media"].filter do |media_item|
56
- json_tweet["attachments"]["media_keys"].include? media_item["media_key"]
57
- end
58
- else
59
- media_items = []
60
- end
61
-
62
- @image_file_names = media_items.filter_map do |media_item|
63
- next unless media_item["type"] == "photo"
64
- Birdsong.retrieve_media(media_item["url"])
65
- end
66
-
67
- @video_file_names = media_items.filter_map do |media_item|
68
- next unless (media_item["type"] == "video") || (media_item["type"] == "animated_gif")
69
-
70
- # If the media is video we need to fall back to V1 of the API since V2 doesn't support
71
- # videos yet. This is dumb, but not a big deal.
72
- media_url = get_media_url_from_extended_entities
73
- media_preview_url = get_media_preview_url_from_extended_entities
74
- @video_file_type = media_item["type"]
75
-
76
- # We're returning an array because, in the case that someday more videos are available our
77
- # implementations won't breaks
78
- [{ url: Birdsong.retrieve_media(media_url), preview_url: Birdsong.retrieve_media(media_preview_url) }]
79
- end
80
-
36
+ def initialize(tweet_object)
37
+ @id = tweet_object[:id]
38
+ @created_at = DateTime.parse(tweet_object[:date])
39
+ @text = tweet_object[:text]
40
+ @language = tweet_object[:language]
41
+ @author_id = tweet_object[:user][:id]
42
+
43
+ @image_file_names = tweet_object[:images]
44
+ @video_file_names = tweet_object[:video]
45
+ @video_file_type = tweet_object[:video_file_type]
46
+ @video_preview_image = tweet_object[:video_preview_image]
81
47
  # Look up the author given the new id.
82
48
  # NOTE: This doesn't *seem* like the right place for this, but I"m not sure where else
83
- @author = User.lookup(@author_id).first
84
- end
85
-
86
- # Used to extract a GIF or video URL from the extended entities object in the Twiter API response
87
- # Assumes (as is the case right now) that a Tweet cannot have more than one GIF/video
88
- def get_media_url_from_extended_entities
89
- response = Tweet.retrieve_data_v1(@id)
90
- response = JSON.parse(response.body)
91
- get_largest_variant_url(response["extended_entities"]["media"])
92
- end
93
-
94
- # Used to extract a GIF or video preview URL from the extended entities object in the Twiter API response
95
- # Assumes (as is the case right now) that a Tweet cannot have more than one GIF/video
96
- def get_media_preview_url_from_extended_entities
97
- response = Tweet.retrieve_data_v1(@id)
98
- response = JSON.parse(response.body)
99
- response["extended_entities"]["media"].first["media_url_https"]
100
- end
101
-
102
- def get_largest_variant_url(media_items)
103
- # The API response is pretty deeply nested, but this handles that structure
104
- largest_bitrate_variant = nil
105
- media_items.each do |media_item|
106
- # The API returns multiple different resolutions usually. Since we only want to archive
107
- # the largest we'll run through and find it
108
- media_item["video_info"]["variants"].each do |variant|
109
- # Usually there's constant bitrate variants, and sometimes, a .m3u playlist which is for
110
- # streaming. We want to ignore that one here.
111
- next unless variant&.keys.include?("bitrate")
112
-
113
- if largest_bitrate_variant.nil? || largest_bitrate_variant["bitrate"] < variant["bitrate"]
114
- largest_bitrate_variant = variant
115
- end
116
- end
117
- end
118
- largest_bitrate_variant["url"]
119
- end
120
-
121
- def self.retrieve_data_v2(ids)
122
- bearer_token = Birdsong.twitter_bearer_token
123
-
124
- tweet_lookup_url = "https://api.twitter.com/2/tweets"
125
-
126
- # Specify the Tweet IDs that you want to lookup below (to 100 per request)
127
- tweet_ids = ids.join(",")
128
-
129
- # Add or remove optional parameters values from the params object below. Full list of parameters and their values can be found in the docs:
130
- # https://developer.twitter.com/en/docs/twitter-api/tweets/lookup/api-reference
131
- params = {
132
- "ids": tweet_ids,
133
- "expansions": "attachments.media_keys,author_id,referenced_tweets.id",
134
- "tweet.fields": Birdsong.tweet_fields,
135
- "user.fields": Birdsong.user_fields,
136
- "media.fields": "duration_ms,height,media_key,preview_image_url,public_metrics,type,url,width",
137
- "place.fields": "country_code",
138
- "poll.fields": "options"
139
- }
140
-
141
- response = tweet_lookup_v2(tweet_lookup_url, bearer_token, params)
142
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
143
-
144
- response
145
- end
146
-
147
- def self.tweet_lookup_v2(url, bearer_token, params)
148
- options = {
149
- method: "get",
150
- headers: {
151
- "User-Agent": "v2TweetLookupRuby",
152
- "Authorization": "Bearer #{bearer_token}"
153
- },
154
- params: params
155
- }
156
-
157
- request = Typhoeus::Request.new(url, options)
158
- response = request.run
159
-
160
- raise Birdsong::RateLimitExceeded.new(
161
- response.headers["x-rate-limit-limit"],
162
- response.headers["x-rate-limit-remaining"],
163
- response.headers["x-rate-limit-reset"]
164
- ) if response.code === 429
165
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
166
-
167
- response
168
- end
169
-
170
- # Note that unlike the V2 this only supports one url at a time
171
- def self.retrieve_data_v1(id)
172
- bearer_token = Birdsong.twitter_bearer_token
173
-
174
- tweet_lookup_url = "https://api.twitter.com/1.1/statuses/show.json?tweet_mode=extended&id=#{id}"
175
-
176
- response = tweet_lookup_v1(tweet_lookup_url, bearer_token)
177
- raise Birdsong::RateLimitExceeded.new(
178
- response.headers["x-rate-limit-limit"],
179
- response.headers["x-rate-limit-remaining"],
180
- response.headers["x-rate-limit-reset"]
181
- ) if response.code === 429
182
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
183
-
184
- response
185
- end
186
-
187
- # V2 of the Twitter API (which we use everywhere else) doesn't include videos or gifs yet,
188
- # so we have to fall back to V1.
189
- #
190
- # There's a tracker for this at https://twittercommunity.com/t/where-would-i-find-the-direct-link-to-an-mp4-video-posted-in-v2/146933/2
191
- def self.tweet_lookup_v1(url, bearer_token)
192
- options = {
193
- method: "get",
194
- headers: {
195
- "Authorization": "Bearer #{bearer_token}"
196
- }
197
- }
198
-
199
- request = Typhoeus::Request.new(url, options)
200
- response = request.run
201
-
202
- raise Birdsong::RateLimitExceeded.new(
203
- response.headers["x-rate-limit-limit"],
204
- response.headers["x-rate-limit-remaining"],
205
- response.headers["x-rate-limit-reset"]
206
- ) if response.code === 429
207
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
208
-
209
- response
210
- end
211
-
212
-
213
- def self.check_for_errors(parsed_json)
214
- return false unless parsed_json.key?("errors")
215
- return false if parsed_json["errors"].empty?
216
-
217
- parsed_json["errors"].each do |error|
218
- # If the tweet is removed, or if the user is suspended you get an Authorization Error
219
- if error["title"] == "Not Found Error" || error["title"] == "Authorization Error"
220
- raise Birdsong::NoTweetFoundError, "Tweet with id #{error["value"]} not found"
221
- end
222
- end
223
- false
49
+ @author = User.new(tweet_object[:user])
224
50
  end
225
51
  end
226
52
  end
data/lib/birdsong/user.rb CHANGED
@@ -2,21 +2,6 @@
2
2
 
3
3
  module Birdsong
4
4
  class User
5
- def self.lookup(ids = [])
6
- # If a single id is passed in we make it the appropriate array
7
- ids = [ids] unless ids.kind_of?(Array)
8
-
9
- # Check that the ids are at least real ids
10
- ids.each { |id| raise Birdsong::InvalidIdError if !/\A\d+\z/.match(id) }
11
- self.lookup_primative(ids: ids)
12
- end
13
-
14
- def self.lookup_by_usernames(usernames = [])
15
- # If a single id is passed in we make it the appropriate array
16
- usernames = [usernames] unless usernames.kind_of?(Array)
17
- self.lookup_primative(usernames: usernames)
18
- end
19
-
20
5
  # Attributes for after the response is parsed from Twitter
21
6
  attr_reader :json
22
7
  attr_reader :id
@@ -37,105 +22,31 @@ module Birdsong
37
22
 
38
23
  private
39
24
 
40
- def initialize(json_user)
41
- @json = json_user
42
- parse(json_user)
25
+ def initialize(user_object)
26
+ @json = user_object.to_json
27
+ parse(user_object)
43
28
  end
44
29
 
45
- def parse(json_user)
46
- @id = json_user["id"]
47
- @name = json_user["name"]
48
- @username = json_user["username"]
49
- @created_at = DateTime.parse(json_user["created_at"])
50
- @location = json_user["location"]
30
+ def parse(user_object)
31
+ @id = user_object[:id]
32
+ @name = user_object[:name]
33
+ @username = user_object[:screen_name]
34
+ @created_at = DateTime.parse(user_object[:sign_up_date])
35
+ @location = user_object[:location]
51
36
 
52
37
  # Removing the "normal" here gets us the full-sized image, instead of the 150x150 thumbnail
53
- @profile_image_url = json_user["profile_image_url"].sub!("_normal", "")
38
+ @profile_image_url = user_object[:profile_image_url].sub!("_normal", "")
54
39
 
55
- @description = json_user["description"]
56
- @url = json_user["url"]
40
+ @description = user_object[:description]
41
+ @url = user_object[:url]
57
42
  @url = "https://www.twitter.com/#{@username}" if @url.nil?
58
- @followers_count = json_user["public_metrics"]["followers_count"]
59
- @following_count = json_user["public_metrics"]["following_count"]
60
- @tweet_count = json_user["public_metrics"]["tweet_count"]
61
- @listed_count = json_user["public_metrics"]["listed_count"]
62
- @verified = json_user["verified"]
63
- @profile_image_file_name = Birdsong.retrieve_media(@profile_image_url)
64
- end
65
-
66
- def self.lookup_primative(usernames: nil, ids: nil)
67
- raise Birdsong::InvalidIdError if usernames.nil? && ids.nil? # can't pass in nothing
68
- raise Birdsong::InvalidIdError if usernames.nil? == false && ids.nil? == false # don't pass in both
69
-
70
- response = self.retrieve_data(ids: ids, usernames: usernames)
71
-
72
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
73
-
74
- json_response = JSON.parse(response.body)
75
- return [] if json_response["data"].nil?
76
-
77
- json_response["data"].map do |json_user|
78
- User.new(json_user)
79
- end
80
- end
81
43
 
82
- def self.retrieve_data(usernames: nil, ids: nil)
83
- bearer_token = Birdsong.twitter_bearer_token
84
-
85
- raise Birdsong::InvalidIdError if usernames.nil? && ids.nil? # can't pass in nothing
86
- raise Birdsong::InvalidIdError if usernames.nil? == false && ids.nil? == false # don't pass in both
87
-
88
- # Add or remove optional parameters values from the params object below. Full list of parameters and their values can be found in the docs:
89
- # https://developer.twitter.com/en/docs/twitter-api/tweets/lookup/api-reference
90
- params = {
91
- "expansions": "pinned_tweet_id",
92
- "tweet.fields": Birdsong.tweet_fields,
93
- "user.fields": Birdsong.user_fields,
94
- }
95
-
96
- if usernames.nil? == false
97
- user_lookup_url = "https://api.twitter.com/2/users/by"
98
- # Specify the Usernames that you want to lookup below (to 100 per request)
99
- params["usernames"] = usernames.join(",")
100
- elsif ids.nil? == false
101
- user_lookup_url = "https://api.twitter.com/2/users"
102
- # Specify the User IDs that you want to lookup below (to 100 per request)
103
- params["ids"] = ids.join(",")
104
- end
105
-
106
- response = self.user_lookup(user_lookup_url, bearer_token, params)
107
-
108
- raise Birdsong::RateLimitExceeded.new(
109
- response.headers["x-rate-limit-limit"],
110
- response.headers["x-rate-limit-remaining"],
111
- response.headers["x-rate-limit-reset"]
112
- ) if response.code === 429
113
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
114
-
115
- response
116
- end
117
-
118
- def self.user_lookup(url, bearer_token, params)
119
- options = {
120
- method: "get",
121
- headers: {
122
- "User-Agent": "v2UserLookupRuby",
123
- "Authorization": "Bearer #{bearer_token}"
124
- },
125
- params: params
126
- }
127
-
128
- request = Typhoeus::Request.new(url, options)
129
- response = request.run
130
-
131
- raise Birdsong::RateLimitExceeded.new(
132
- response.headers["x-rate-limit-limit"],
133
- response.headers["x-rate-limit-remaining"],
134
- response.headers["x-rate-limit-reset"]
135
- ) if response.code === 429
136
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
137
-
138
- response
44
+ @followers_count = user_object[:followers_count]
45
+ @following_count = user_object[:friends_count]
46
+ @tweet_count = user_object[:statuses_count]
47
+ @listed_count = user_object[:listed_count]
48
+ @verified = user_object[:verified] # this will always be `false` but we're keeping it here for compatibility
49
+ @profile_image_file_name = Birdsong.retrieve_media(@profile_image_url)
139
50
  end
140
51
  end
141
52
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Birdsong
4
- VERSION = "0.1.1"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/birdsong.rb CHANGED
@@ -10,6 +10,11 @@ require "fileutils"
10
10
  require_relative "birdsong/version"
11
11
  require_relative "birdsong/tweet"
12
12
  require_relative "birdsong/user"
13
+ require_relative "birdsong/scrapers/scraper"
14
+ require_relative "birdsong/scrapers/tweet_scraper"
15
+ require_relative "birdsong/twitter_guest_key"
16
+
17
+ require_relative "birdsong/monkeypatch"
13
18
 
14
19
  module Birdsong
15
20
  extend Configuration
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: birdsong
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-23 00:00:00.000000000 Z
11
+ date: 2023-10-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: typhoeus
@@ -197,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
197
197
  - !ruby/object:Gem::Version
198
198
  version: '0'
199
199
  requirements: []
200
- rubygems_version: 3.3.26
200
+ rubygems_version: 3.4.19
201
201
  signing_key:
202
202
  specification_version: 4
203
203
  summary: A gem to interface with Twitter's API V2