birdsong 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a632c977970e4e7b014373f08dc819091aece69e59292295711ddfce261ded9c
4
- data.tar.gz: ee9d3a05780af53011a3f9018c512a2f6e6dfe733824c2a0401c873b9b529188
3
+ metadata.gz: 9db624502354ba7be08fbe804ff22d1b1bc474fde29ff179126b0781ff42d95a
4
+ data.tar.gz: bfea616431da77fb69d1fa5587a5e2d7b39d6ca9ef2c09bc81adade2af7de451
5
5
  SHA512:
6
- metadata.gz: 5ad9eff7a7a99201eed5821aa897dd232733510cb088e074c56a31102329a751766498800ca62a62c5327d05797e976114e173a685acc35743c82c0ca644d0cc
7
- data.tar.gz: f930c1ff617cdfb255771510a1eb3fd4588dc84c0f513f8d74900c62bb7c7680047238c7ba31345f6ddbb8154fef90467be179e32db3f899d76fb335141aaa95
6
+ metadata.gz: 2bb4cc4b1ace6b587290aa3952fe97c024939f1ef03e3c903297fb262eb1c4719f99db288c8d7519a14b26a902eb52fc4190fc3cda0a96fac8f8622666da8eab
7
+ data.tar.gz: d9f5c15d5d2c0577146641785d21041264126b0827a2150a2a94ef9513b72ebbb942f1c62b037e7bb1269143e908dcd8b1bc8c7e40e363496b9d2912cc2dcc0f
data/CHANGELOG.md CHANGED
@@ -3,3 +3,7 @@
3
3
  ## [0.1.0] - 2021-04-27
4
4
 
5
5
  - Initial release
6
+
7
+ ## [0.2.0] - 2023-10-04
8
+
9
+ - Fixed to use Selenium for scraping instead of the now defunct API
@@ -9,13 +9,10 @@ module Birdsong
9
9
  # Check that the ids are at least real ids
10
10
  ids.each { |id| raise Birdsong::InvalidIdError if !/\A\d+\z/.match(id) }
11
11
 
12
- response = ids.map { |id| self.retrieve_data_v1(id) }
12
+ tweet_objects = ids.map { |id| Birdsong::TweetScraper.new.parse(id) }
13
13
 
14
- json_response = response.map { |r| JSON.parse(r.body) }
15
- check_for_errors(json_response)
16
-
17
- json_response.map do |json_tweet|
18
- Tweet.new(json_tweet)
14
+ tweet_objects.map do |tweet_object|
15
+ Tweet.new(tweet_object)
19
16
  end
20
17
  end
21
18
 
@@ -30,139 +27,26 @@ module Birdsong
30
27
  attr_reader :image_file_names
31
28
  attr_reader :video_file_names
32
29
  attr_reader :video_file_type
30
+ attr_reader :video_preview_image
33
31
 
34
32
  alias_method :user, :author # Every other gem uses `user` so we can just alias it
35
33
 
36
34
  private
37
35
 
38
- def initialize(json_tweet)
39
- @json = json_tweet
40
- parse(json_tweet)
41
- end
42
-
43
- def parse(json_tweet)
44
- @id = json_tweet["id"].to_s
45
- @created_at = DateTime.parse(json_tweet["created_at"])
46
- @text = json_tweet["full_text"]
47
- @language = json_tweet["lang"]
48
- @author_id = json_tweet["user"]["id"]
49
-
50
- # A sanity check to make sure we have media in there correctly
51
- if json_tweet["extended_entities"]&.has_key?("media")
52
- media_items = json_tweet["extended_entities"]["media"]
53
- else
54
- media_items = []
55
- end
56
-
57
- @image_file_names = media_items.filter_map do |media_item|
58
- next unless media_item["type"] == "photo"
59
- Birdsong.retrieve_media(media_item["url"])
60
- end
61
-
62
- @video_file_names = media_items.filter_map do |media_item|
63
- next unless (media_item["type"] == "video") || (media_item["type"] == "animated_gif")
64
-
65
- # If the media is video we need to fall back to V1 of the API since V2 doesn't support
66
- # videos yet. This is dumb, but not a big deal.
67
- media_url = get_largest_variant_url(media_items)
68
- media_preview_url = media_items.first["media_url_https"]
69
- @video_file_type = media_item["type"]
70
-
71
- # We're returning an array because, in the case that someday more videos are available our
72
- # implementations won't breaks
73
- [{ url: Birdsong.retrieve_media(media_url), preview_url: Birdsong.retrieve_media(media_preview_url) }]
74
- end
75
-
36
+ def initialize(tweet_object)
37
+ @id = tweet_object[:id]
38
+ @created_at = DateTime.parse(tweet_object[:date])
39
+ @text = tweet_object[:text]
40
+ @language = tweet_object[:language]
41
+ @author_id = tweet_object[:user][:id]
42
+
43
+ @image_file_names = tweet_object[:images]
44
+ @video_file_names = tweet_object[:video]
45
+ @video_file_type = tweet_object[:video_file_type]
46
+ @video_preview_image = tweet_object[:video_preview_image]
76
47
  # Look up the author given the new id.
77
48
  # NOTE: This doesn't *seem* like the right place for this, but I"m not sure where else
78
- @author = User.lookup(@author_id.to_s).first
79
- end
80
-
81
- def get_largest_variant_url(media_items)
82
- # The API response is pretty deeply nested, but this handles that structure
83
- largest_bitrate_variant = nil
84
- media_items.each do |media_item|
85
- # The API returns multiple different resolutions usually. Since we only want to archive
86
- # the largest we'll run through and find it
87
- media_item["video_info"]["variants"].each do |variant|
88
- # Usually there's constant bitrate variants, and sometimes, a .m3u playlist which is for
89
- # streaming. We want to ignore that one here.
90
- next unless variant&.keys.include?("bitrate")
91
-
92
- if largest_bitrate_variant.nil? || largest_bitrate_variant["bitrate"] < variant["bitrate"]
93
- largest_bitrate_variant = variant
94
- end
95
- end
96
- end
97
- largest_bitrate_variant["url"]
98
- end
99
-
100
- # Note that unlike the V2 this only supports one url at a time
101
- def self.retrieve_data_v1(id)
102
- bearer_token = Birdsong.twitter_bearer_token
103
-
104
- tweet_lookup_url = "https://api.twitter.com/1.1/statuses/show.json?tweet_mode=extended&id=#{id}"
105
-
106
- response = tweet_lookup_v1(tweet_lookup_url, bearer_token)
107
- raise Birdsong::RateLimitExceeded.new(
108
- response.headers["x-rate-limit-limit"],
109
- response.headers["x-rate-limit-remaining"],
110
- response.headers["x-rate-limit-reset"]
111
- ) if response.code === 429
112
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
113
-
114
- response
115
- end
116
-
117
- # V2 of the Twitter API (which we use everywhere else) doesn't include videos or gifs yet,
118
- # so we have to fall back to V1.
119
- #
120
- # There's a tracker for this at https://twittercommunity.com/t/where-would-i-find-the-direct-link-to-an-mp4-video-posted-in-v2/146933/2
121
- def self.tweet_lookup_v1(url, bearer_token)
122
- options = {
123
- method: "get",
124
- headers: {
125
- "Authorization": "Bearer #{bearer_token}"
126
- }
127
- }
128
-
129
- request = Typhoeus::Request.new(url, options)
130
- response = request.run
131
-
132
- raise Birdsong::RateLimitExceeded.new(
133
- response.headers["x-rate-limit-limit"],
134
- response.headers["x-rate-limit-remaining"],
135
- response.headers["x-rate-limit-reset"]
136
- ) if response.code === 429
137
-
138
- raise Birdsong::NoTweetFoundError, "Tweet with id #{url} not found" if response.code === 404
139
- if response.code === 403
140
- json = JSON.parse(response.body)
141
- if json.has_key?("errors")
142
- json["errors"].each do |error|
143
- raise Birdsong::NoTweetFoundError, "User with id #{url} suspended" if error["code"] == 63
144
- end
145
- end
146
- end
147
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code === 200
148
-
149
- response
150
- end
151
-
152
-
153
- def self.check_for_errors(parsed_json)
154
- parsed_json.each do |json|
155
- next unless json.key?("errors")
156
- next if json["errors"].empty?
157
-
158
- json["errors"].each do |error|
159
- # If the tweet is removed, or if the user is suspended you get an Authorization Error
160
- if error["title"] == "Not Found Error" || error["title"] == "Authorization Error"
161
- raise Birdsong::NoTweetFoundError, "Tweet with id #{error["value"]} not found"
162
- end
163
- end
164
- end
165
- false
49
+ @author = User.new(tweet_object[:user])
166
50
  end
167
51
  end
168
52
  end
data/lib/birdsong/user.rb CHANGED
@@ -2,21 +2,6 @@
2
2
 
3
3
  module Birdsong
4
4
  class User
5
- def self.lookup(ids = [])
6
- # If a single id is passed in we make it the appropriate array
7
- ids = [ids] unless ids.kind_of?(Array)
8
-
9
- # Check that the ids are at least real ids
10
- ids.each { |id| raise Birdsong::InvalidIdError if !/\A\d+\z/.match(id) }
11
- self.lookup_primative(ids: ids)
12
- end
13
-
14
- def self.lookup_by_usernames(usernames = [])
15
- # If a single id is passed in we make it the appropriate array
16
- usernames = [usernames] unless usernames.kind_of?(Array)
17
- self.lookup_primative(usernames: usernames)
18
- end
19
-
20
5
  # Attributes for after the response is parsed from Twitter
21
6
  attr_reader :json
22
7
  attr_reader :id
@@ -37,102 +22,31 @@ module Birdsong
37
22
 
38
23
  private
39
24
 
40
- def initialize(json_user)
41
- @json = json_user
42
- parse(json_user)
25
+ def initialize(user_object)
26
+ @json = user_object.to_json
27
+ parse(user_object)
43
28
  end
44
29
 
45
- def parse(json_user)
46
- @id = json_user["id"].to_s
47
- @name = json_user["name"]
48
- @username = json_user["screen_name"]
49
- @created_at = DateTime.parse(json_user["created_at"])
50
- @location = json_user["location"]
30
+ def parse(user_object)
31
+ @id = user_object[:id]
32
+ @name = user_object[:name]
33
+ @username = user_object[:screen_name]
34
+ @created_at = DateTime.parse(user_object[:sign_up_date])
35
+ @location = user_object[:location]
51
36
 
52
37
  # Removing the "normal" here gets us the full-sized image, instead of the 150x150 thumbnail
53
- @profile_image_url = json_user["profile_image_url"].sub!("_normal", "")
38
+ @profile_image_url = user_object[:profile_image_url].sub!("_normal", "")
54
39
 
55
- @description = json_user["description"]
56
- @url = json_user["url"]
40
+ @description = user_object[:description]
41
+ @url = user_object[:url]
57
42
  @url = "https://www.twitter.com/#{@username}" if @url.nil?
58
43
 
59
- @followers_count = json_user["followers_count"]
60
- @following_count = json_user["friends_count"]
61
- @tweet_count = json_user["statuses_count"]
62
- @listed_count = json_user["listed_count"]
63
- @verified = json_user["verified"] # this will always be `false` but we're keeping it here for compatibility
44
+ @followers_count = user_object[:followers_count]
45
+ @following_count = user_object[:friends_count]
46
+ @tweet_count = user_object[:statuses_count]
47
+ @listed_count = user_object[:listed_count]
48
+ @verified = user_object[:verified] # this will always be `false` but we're keeping it here for compatibility
64
49
  @profile_image_file_name = Birdsong.retrieve_media(@profile_image_url)
65
50
  end
66
-
67
- def self.lookup_primative(usernames: [], ids: [])
68
- raise Birdsong::InvalidIdError if usernames.empty? && ids.empty? # can't pass in nothing
69
-
70
- if usernames.empty? == false
71
- response = usernames.map { |username| self.retrieve_data(username: username) }
72
- elsif ids.empty? == false
73
- response = ids.map { |id| self.retrieve_data(id: id) }
74
- else
75
- raise Birdsong::InvalidIdError
76
- end
77
-
78
- json_response = response.map { |r| JSON.parse(r.body) }
79
-
80
- json_response.map do |json_user|
81
- User.new(json_user)
82
- end
83
- end
84
-
85
- def self.retrieve_data(username: nil, id: nil)
86
- bearer_token = Birdsong.twitter_bearer_token
87
-
88
- raise Birdsong::InvalidIdError if username.nil? && id.nil? # can't pass in nothing
89
- raise Birdsong::InvalidIdError if username.nil? == false && id.nil? == false # don't pass in both
90
-
91
- user_lookup_url = "https://api.twitter.com/1.1/users/show.json"
92
-
93
- params = {}
94
- if username.nil? == false
95
- # Specify the Usernames that you want to lookup below (to 100 per request)
96
- params["screen_name"] = username
97
- elsif id.nil? == false
98
- # Specify the User IDs that you want to lookup below (to 100 per request)
99
- params["user_id"] = id
100
- end
101
-
102
- response = self.user_lookup(user_lookup_url, bearer_token, params)
103
-
104
- raise Birdsong::RateLimitExceeded.new(
105
- response.headers["x-rate-limit-limit"],
106
- response.headers["x-rate-limit-remaining"],
107
- response.headers["x-rate-limit-reset"]
108
- ) if response.code === 429
109
- raise Birdsong::NoTweetFoundError, "User with id #{id} or username #{username} not found" if response.code === 404
110
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
111
-
112
- response
113
- end
114
-
115
- def self.user_lookup(url, bearer_token, params)
116
- options = {
117
- method: "get",
118
- headers: {
119
- "User-Agent": "v2UserLookupRuby",
120
- "Authorization": "Bearer #{bearer_token}"
121
- },
122
- params: params
123
- }
124
-
125
- request = Typhoeus::Request.new(url, options)
126
- response = request.run
127
-
128
- raise Birdsong::RateLimitExceeded.new(
129
- response.headers["x-rate-limit-limit"],
130
- response.headers["x-rate-limit-remaining"],
131
- response.headers["x-rate-limit-reset"]
132
- ) if response.code === 429
133
- raise Birdsong::AuthorizationError, "Invalid response code #{response.code}" unless response.code == 200
134
-
135
- response
136
- end
137
51
  end
138
52
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Birdsong
4
- VERSION = "0.1.2"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/birdsong.rb CHANGED
@@ -10,6 +10,11 @@ require "fileutils"
10
10
  require_relative "birdsong/version"
11
11
  require_relative "birdsong/tweet"
12
12
  require_relative "birdsong/user"
13
+ require_relative "birdsong/scrapers/scraper"
14
+ require_relative "birdsong/scrapers/tweet_scraper"
15
+ require_relative "birdsong/twitter_guest_key"
16
+
17
+ require_relative "birdsong/monkeypatch"
13
18
 
14
19
  module Birdsong
15
20
  extend Configuration
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: birdsong
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-21 00:00:00.000000000 Z
11
+ date: 2023-10-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: typhoeus
@@ -197,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
197
197
  - !ruby/object:Gem::Version
198
198
  version: '0'
199
199
  requirements: []
200
- rubygems_version: 3.4.14
200
+ rubygems_version: 3.4.19
201
201
  signing_key:
202
202
  specification_version: 4
203
203
  summary: A gem to interface with Twitter's API V2