birdsong 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a632c977970e4e7b014373f08dc819091aece69e59292295711ddfce261ded9c
4
- data.tar.gz: ee9d3a05780af53011a3f9018c512a2f6e6dfe733824c2a0401c873b9b529188
3
+ metadata.gz: 9db624502354ba7be08fbe804ff22d1b1bc474fde29ff179126b0781ff42d95a
4
+ data.tar.gz: bfea616431da77fb69d1fa5587a5e2d7b39d6ca9ef2c09bc81adade2af7de451
5
5
  SHA512:
6
- metadata.gz: 5ad9eff7a7a99201eed5821aa897dd232733510cb088e074c56a31102329a751766498800ca62a62c5327d05797e976114e173a685acc35743c82c0ca644d0cc
7
- data.tar.gz: f930c1ff617cdfb255771510a1eb3fd4588dc84c0f513f8d74900c62bb7c7680047238c7ba31345f6ddbb8154fef90467be179e32db3f899d76fb335141aaa95
6
+ metadata.gz: 2bb4cc4b1ace6b587290aa3952fe97c024939f1ef03e3c903297fb262eb1c4719f99db288c8d7519a14b26a902eb52fc4190fc3cda0a96fac8f8622666da8eab
7
+ data.tar.gz: d9f5c15d5d2c0577146641785d21041264126b0827a2150a2a94ef9513b72ebbb942f1c62b037e7bb1269143e908dcd8b1bc8c7e40e363496b9d2912cc2dcc0f
data/CHANGELOG.md CHANGED
@@ -3,3 +3,7 @@
3
3
  ## [0.1.0] - 2021-04-27
4
4
 
5
5
  - Initial release
6
+
7
+ ## [0.2.0] - 2023-10-04
8
+
9
+ - Fixed to use Selenium for scraping instead of the now defunct API
@@ -9,13 +9,10 @@ module Birdsong
9
9
  # Check that the ids are at least real ids
10
10
  ids.each { |id| raise Birdsong::InvalidIdError if !/\A\d+\z/.match(id) }
11
11
 
12
- response = ids.map { |id| self.retrieve_data_v1(id) }
12
+ tweet_objects = ids.map { |id| Birdsong::TweetScraper.new.parse(id) }
13
13
 
14
- json_response = response.map { |r| JSON.parse(r.body) }
15
- check_for_errors(json_response)
16
-
17
- json_response.map do |json_tweet|
18
- Tweet.new(json_tweet)
14
+ tweet_objects.map do |tweet_object|
15
+ Tweet.new(tweet_object)
19
16
  end
20
17
  end
21
18
 
@@ -30,139 +27,26 @@ module Birdsong
30
27
  attr_reader :image_file_names
31
28
  attr_reader :video_file_names
32
29
  attr_reader :video_file_type
30
+ attr_reader :video_preview_image
33
31
 
34
32
  alias_method :user, :author # Every other gem uses `user` so we can just alias it
35
33
 
36
34
  private
37
35
 
38
- def initialize(json_tweet)
39
- @json = json_tweet
40
- parse(json_tweet)
41
- end
42
-
43
- def parse(json_tweet)
44
- @id = json_tweet["id"].to_s
45
- @created_at = DateTime.parse(json_tweet["created_at"])
46
- @text = json_tweet["full_text"]
47
- @language = json_tweet["lang"]
48
- @author_id = json_tweet["user"]["id"]
49
-
50
- # A sanity check to make sure we have media in there correctly
51
- if json_tweet["extended_entities"]&.has_key?("media")
52
- media_items = json_tweet["extended_entities"]["media"]
53
- else
54
- media_items = []
55
- end
56
-
57
- @image_file_names = media_items.filter_map do |media_item|
58
- next unless media_item["type"] == "photo"
59
- Birdsong.retrieve_media(media_item["url"])
60
- end
61
-
62
- @video_file_names = media_items.filter_map do |media_item|
63
- next unless (media_item["type"] == "video") || (media_item["type"] == "animated_gif")
64
-
65
- # If the media is video we need to fall back to V1 of the API since V2 doesn't support
66
- # videos yet. This is dumb, but not a big deal.
67
- media_url = get_largest_variant_url(media_items)
68
- media_preview_url = media_items.first["media_url_https"]
69
- @video_file_type = media_item["type"]
70
-
71
- # We're returning an array because, in the case that someday more videos are available our
72
- # implementations won't breaks
73
- [{ url: Birdsong.retrieve_media(media_url), preview_url: Birdsong.retrieve_media(media_preview_url) }]
74
- end
75
-
36
+ def initialize(tweet_object)
37
+ @id = tweet_object[:id]
38
+ @created_at = DateTime.parse(tweet_object[:date])
39
+ @text = tweet_object[:text]
40
+ @language = tweet_object[:language]
41
+ @author_id = tweet_object[:user][:id]
42
+
43
+ @image_file_names = tweet_object[:images]
44
+ @video_file_names = tweet_object[:video]
45
+ @video_file_type = tweet_object[:video_file_type]
46
+ @video_preview_image = tweet_object[:video_preview_image]
76
47
  # Look up the author given the new id.
77
48
  # NOTE: This doesn't *seem* like the right place for this, but I"m not sure where else
78
- @author = User.lookup(@author_id.to_s).first
79
- end
80
-
81
- def get_largest_variant_url(media_items)
82
- # The API response is pretty deeply nested, but this handles that structure
83
- largest_bitrate_variant = nil
84
- media_items.each do |media_item|
85
- # The API returns multiple different resolutions usually. Since we only want to archive
86
- # the largest we'll run through and find it
87
- media_item["video_info"]["variants"].each do |variant|
88
- # Usually there's constant bitrate variants, and sometimes, a .m3u playlist which is for
89
- # streaming. We want to ignore that one here.
90
- next unless variant&.keys.include?("bitrate")
91
-
92
- if largest_bitrate_variant.nil? || largest_bitrate_variant["bitrate"] < variant["bitrate"]
93
- largest_bitrate_variant = variant
94
- end
95
- end
96
- end
97
- largest_bitrate_variant["url"]
98
- end
99
-
100
- # Note that unlike the V2 this only supports one url at a time
101
- def self.retrieve_data_v1(id)
102
- bearer_token = Birdsong.twitter_bearer_token
103
-
104
- tweet_lookup_url = "https://api.twitter.com/1.1/statuses/show.json?tweet_mode=extended&id=#{id}"
105
-
106
- response = tweet_lookup_v1(tweet_lookup_url, bearer_token)
107
- raise Birdsong::RateLimitExceeded.new(
108
- response.headers["x-rate-limit-limit"],
109
- response.headers["x-rate-limit-remaining"],
110
- response.headers["x-rate-limit-reset"]
111
- ) if response.code === 429
112
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
113
-
114
- response
115
- end
116
-
117
- # V2 of the Twitter API (which we use everywhere else) doesn't include videos or gifs yet,
118
- # so we have to fall back to V1.
119
- #
120
- # There's a tracker for this at https://twittercommunity.com/t/where-would-i-find-the-direct-link-to-an-mp4-video-posted-in-v2/146933/2
121
- def self.tweet_lookup_v1(url, bearer_token)
122
- options = {
123
- method: "get",
124
- headers: {
125
- "Authorization": "Bearer #{bearer_token}"
126
- }
127
- }
128
-
129
- request = Typhoeus::Request.new(url, options)
130
- response = request.run
131
-
132
- raise Birdsong::RateLimitExceeded.new(
133
- response.headers["x-rate-limit-limit"],
134
- response.headers["x-rate-limit-remaining"],
135
- response.headers["x-rate-limit-reset"]
136
- ) if response.code === 429
137
-
138
- raise Birdsong::NoTweetFoundError, "Tweet with id #{url} not found" if response.code === 404
139
- if response.code === 403
140
- json = JSON.parse(response.body)
141
- if json.has_key?("errors")
142
- json["errors"].each do |error|
143
- raise Birdsong::NoTweetFoundError, "User with id #{url} suspended" if error["code"] == 63
144
- end
145
- end
146
- end
147
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
148
-
149
- response
150
- end
151
-
152
-
153
- def self.check_for_errors(parsed_json)
154
- parsed_json.each do |json|
155
- next unless json.key?("errors")
156
- next if json["errors"].empty?
157
-
158
- json["errors"].each do |error|
159
- # If the tweet is removed, or if the user is suspended you get an Authorization Error
160
- if error["title"] == "Not Found Error" || error["title"] == "Authorization Error"
161
- raise Birdsong::NoTweetFoundError, "Tweet with id #{error["value"]} not found"
162
- end
163
- end
164
- end
165
- false
49
+ @author = User.new(tweet_object[:user])
166
50
  end
167
51
  end
168
52
  end
data/lib/birdsong/user.rb CHANGED
@@ -2,21 +2,6 @@
2
2
 
3
3
  module Birdsong
4
4
  class User
5
- def self.lookup(ids = [])
6
- # If a single id is passed in we make it the appropriate array
7
- ids = [ids] unless ids.kind_of?(Array)
8
-
9
- # Check that the ids are at least real ids
10
- ids.each { |id| raise Birdsong::InvalidIdError if !/\A\d+\z/.match(id) }
11
- self.lookup_primative(ids: ids)
12
- end
13
-
14
- def self.lookup_by_usernames(usernames = [])
15
- # If a single id is passed in we make it the appropriate array
16
- usernames = [usernames] unless usernames.kind_of?(Array)
17
- self.lookup_primative(usernames: usernames)
18
- end
19
-
20
5
  # Attributes for after the response is parsed from Twitter
21
6
  attr_reader :json
22
7
  attr_reader :id
@@ -37,102 +22,31 @@ module Birdsong
37
22
 
38
23
  private
39
24
 
40
- def initialize(json_user)
41
- @json = json_user
42
- parse(json_user)
25
+ def initialize(user_object)
26
+ @json = user_object.to_json
27
+ parse(user_object)
43
28
  end
44
29
 
45
- def parse(json_user)
46
- @id = json_user["id"].to_s
47
- @name = json_user["name"]
48
- @username = json_user["screen_name"]
49
- @created_at = DateTime.parse(json_user["created_at"])
50
- @location = json_user["location"]
30
+ def parse(user_object)
31
+ @id = user_object[:id]
32
+ @name = user_object[:name]
33
+ @username = user_object[:screen_name]
34
+ @created_at = DateTime.parse(user_object[:sign_up_date])
35
+ @location = user_object[:location]
51
36
 
52
37
  # Removing the "normal" here gets us the full-sized image, instead of the 150x150 thumbnail
53
- @profile_image_url = json_user["profile_image_url"].sub!("_normal", "")
38
+ @profile_image_url = user_object[:profile_image_url].sub!("_normal", "")
54
39
 
55
- @description = json_user["description"]
56
- @url = json_user["url"]
40
+ @description = user_object[:description]
41
+ @url = user_object[:url]
57
42
  @url = "https://www.twitter.com/#{@username}" if @url.nil?
58
43
 
59
- @followers_count = json_user["followers_count"]
60
- @following_count = json_user["friends_count"]
61
- @tweet_count = json_user["statuses_count"]
62
- @listed_count = json_user["listed_count"]
63
- @verified = json_user["verified"] # this will always be `false` but we're keeping it here for compatibility
44
+ @followers_count = user_object[:followers_count]
45
+ @following_count = user_object[:friends_count]
46
+ @tweet_count = user_object[:statuses_count]
47
+ @listed_count = user_object[:listed_count]
48
+ @verified = user_object[:verified] # this will always be `false` but we're keeping it here for compatibility
64
49
  @profile_image_file_name = Birdsong.retrieve_media(@profile_image_url)
65
50
  end
66
-
67
- def self.lookup_primative(usernames: [], ids: [])
68
- raise Birdsong::InvalidIdError if usernames.empty? && ids.empty? # can't pass in nothing
69
-
70
- if usernames.empty? == false
71
- response = usernames.map { |username| self.retrieve_data(username: username) }
72
- elsif ids.empty? == false
73
- response = ids.map { |id| self.retrieve_data(id: id) }
74
- else
75
- raise Birdsong::InvalidIdError
76
- end
77
-
78
- json_response = response.map { |r| JSON.parse(r.body) }
79
-
80
- json_response.map do |json_user|
81
- User.new(json_user)
82
- end
83
- end
84
-
85
- def self.retrieve_data(username: nil, id: nil)
86
- bearer_token = Birdsong.twitter_bearer_token
87
-
88
- raise Birdsong::InvalidIdError if username.nil? && id.nil? # can't pass in nothing
89
- raise Birdsong::InvalidIdError if username.nil? == false && id.nil? == false # don't pass in both
90
-
91
- user_lookup_url = "https://api.twitter.com/1.1/users/show.json"
92
-
93
- params = {}
94
- if username.nil? == false
95
- # Specify the Usernames that you want to lookup below (to 100 per request)
96
- params["screen_name"] = username
97
- elsif id.nil? == false
98
- # Specify the User IDs that you want to lookup below (to 100 per request)
99
- params["user_id"] = id
100
- end
101
-
102
- response = self.user_lookup(user_lookup_url, bearer_token, params)
103
-
104
- raise Birdsong::RateLimitExceeded.new(
105
- response.headers["x-rate-limit-limit"],
106
- response.headers["x-rate-limit-remaining"],
107
- response.headers["x-rate-limit-reset"]
108
- ) if response.code === 429
109
- raise Birdsong::NoTweetFoundError, "User with id #{id} or username #{username} not found" if response.code === 404
110
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
111
-
112
- response
113
- end
114
-
115
- def self.user_lookup(url, bearer_token, params)
116
- options = {
117
- method: "get",
118
- headers: {
119
- "User-Agent": "v2UserLookupRuby",
120
- "Authorization": "Bearer #{bearer_token}"
121
- },
122
- params: params
123
- }
124
-
125
- request = Typhoeus::Request.new(url, options)
126
- response = request.run
127
-
128
- raise Birdsong::RateLimitExceeded.new(
129
- response.headers["x-rate-limit-limit"],
130
- response.headers["x-rate-limit-remaining"],
131
- response.headers["x-rate-limit-reset"]
132
- ) if response.code === 429
133
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
134
-
135
- response
136
- end
137
51
  end
138
52
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Birdsong
4
- VERSION = "0.1.2"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/birdsong.rb CHANGED
@@ -10,6 +10,11 @@ require "fileutils"
10
10
  require_relative "birdsong/version"
11
11
  require_relative "birdsong/tweet"
12
12
  require_relative "birdsong/user"
13
+ require_relative "birdsong/scrapers/scraper"
14
+ require_relative "birdsong/scrapers/tweet_scraper"
15
+ require_relative "birdsong/twitter_guest_key"
16
+
17
+ require_relative "birdsong/monkeypatch"
13
18
 
14
19
  module Birdsong
15
20
  extend Configuration
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: birdsong
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-21 00:00:00.000000000 Z
11
+ date: 2023-10-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: typhoeus
@@ -197,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
197
197
  - !ruby/object:Gem::Version
198
198
  version: '0'
199
199
  requirements: []
200
- rubygems_version: 3.4.14
200
+ rubygems_version: 3.4.19
201
201
  signing_key:
202
202
  specification_version: 4
203
203
  summary: A gem to interface with Twitter's API V2