birdsong 0.2.8 → 0.2.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dda08c757d1c68ec0444b27d413dd2f19691c08c4a2bfed33b99e4029e00ec81
4
- data.tar.gz: 6f67bc01f6bd34abac6db6fc2b182d1f063890e44e127a8e9ecbeffc45b5db4a
3
+ metadata.gz: d9a2c2815cf9cbc7d98e308971c1924f90bba4d2715d7ded7c4e275f523ec84f
4
+ data.tar.gz: cb888671a2171786b5b7c68c96b4d1e0a8957a1d44ce3a1897b4f0fde43fad8b
5
5
  SHA512:
6
- metadata.gz: 579e935b51eb486d037c5ea7a9bc75c23cd7ee3365483aa69dbe4fa2ba56f763ea0566e00eea7402635e483553028e03553a7d44d2117f7f28c4721e308bd54b
7
- data.tar.gz: 81466c8e53b847b3044b1c517ba72647ceedff0857bc291830dce42c914b8261cf143cef5b00629cf80008a5af12ceef7345c88576e4345a0826339677eb9541
6
+ metadata.gz: 42d22a774b5cc9d1e2518911aaf4e3e7f84e5731d8662103d7e5df48ae71c25c0f723efaf334b60cfb3b18274cebce8119d88765a06f7b6957626506e5abcd74
7
+ data.tar.gz: 402eafa8cfd7873e5284ded6fce36a050ea8fa5055a240c6fe460e17bd92ea7d55637b8052bff5afd1fc110f66a16f97360c75d5b47249b25d0d974df230ea5b
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.0.5
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- birdsong (0.2.8)
4
+ birdsong (0.2.7)
5
5
  capybara (~> 3.40)
6
6
  curb (~> 1.0, >= 1.0.5)
7
7
  oauth (~> 0.5.6)
@@ -13,24 +13,25 @@ PATH
13
13
  GEM
14
14
  remote: https://rubygems.org/
15
15
  specs:
16
- activesupport (7.1.4)
16
+ activesupport (7.1.5)
17
17
  base64
18
+ benchmark (>= 0.3)
18
19
  bigdecimal
19
20
  concurrent-ruby (~> 1.0, >= 1.0.2)
20
21
  connection_pool (>= 2.2.5)
21
22
  drb
22
23
  i18n (>= 1.6, < 2)
24
+ logger (>= 1.4.2)
23
25
  minitest (>= 5.1)
24
26
  mutex_m
27
+ securerandom (>= 0.3)
25
28
  tzinfo (~> 2.0)
26
29
  addressable (2.8.7)
27
30
  public_suffix (>= 2.0.2, < 7.0)
28
31
  ast (2.4.2)
29
- backport (1.2.0)
30
32
  base64 (0.2.0)
31
33
  benchmark (0.3.0)
32
34
  bigdecimal (3.1.8)
33
- byebug (11.1.3)
34
35
  capybara (3.40.0)
35
36
  addressable
36
37
  matrix
@@ -43,23 +44,20 @@ GEM
43
44
  concurrent-ruby (1.3.4)
44
45
  connection_pool (2.4.1)
45
46
  curb (1.0.6)
46
- diff-lcs (1.5.1)
47
- dotenv (3.1.2)
47
+ debug (1.7.0)
48
+ irb (>= 1.5.0)
49
+ reline (>= 0.3.1)
50
+ dotenv (3.1.4)
48
51
  drb (2.2.1)
49
- e2mmap (0.1.0)
50
52
  ethon (0.16.0)
51
53
  ffi (>= 1.15.0)
52
- ffi (1.17.0-arm64-darwin)
53
- ffi (1.17.0-x86_64-darwin)
54
- ffi (1.17.0-x86_64-linux-gnu)
54
+ ffi (1.17.0)
55
55
  i18n (1.14.6)
56
56
  concurrent-ruby (~> 1.0)
57
- jaro_winkler (1.6.0)
58
- json (2.7.2)
59
- kramdown (2.4.0)
60
- rexml
61
- kramdown-parser-gfm (1.1.0)
62
- kramdown (~> 2.0)
57
+ io-console (0.6.0)
58
+ irb (1.6.2)
59
+ reline (>= 0.3.0)
60
+ json (2.7.5)
63
61
  language_server-protocol (3.17.0.3)
64
62
  logger (1.6.1)
65
63
  matrix (0.4.2)
@@ -78,22 +76,21 @@ GEM
78
76
  ostruct (>= 0.2)
79
77
  ostruct (0.6.0)
80
78
  parallel (1.26.3)
81
- parser (3.3.5.0)
79
+ parser (3.3.5.1)
82
80
  ast (~> 2.4.1)
83
81
  racc
84
82
  public_suffix (6.0.1)
85
83
  racc (1.8.1)
86
- rack (3.1.7)
84
+ rack (3.1.8)
87
85
  rack-test (2.1.0)
88
86
  rack (>= 1.3)
89
87
  rainbow (3.1.1)
90
88
  rake (13.2.1)
91
- rbs (2.8.4)
92
89
  regexp_parser (2.9.2)
93
- reverse_markdown (2.1.1)
94
- nokogiri
95
- rexml (3.3.8)
96
- rubocop (1.66.1)
90
+ reline (0.3.2)
91
+ io-console (~> 0.5)
92
+ rexml (3.3.9)
93
+ rubocop (1.68.0)
97
94
  json (~> 2.3)
98
95
  language_server-protocol (>= 3.17.0)
99
96
  parallel (~> 1.10)
@@ -103,9 +100,9 @@ GEM
103
100
  rubocop-ast (>= 1.32.2, < 2.0)
104
101
  ruby-progressbar (~> 1.7)
105
102
  unicode-display_width (>= 2.4.0, < 3.0)
106
- rubocop-ast (1.32.3)
103
+ rubocop-ast (1.33.0)
107
104
  parser (>= 3.3.1.0)
108
- rubocop-md (1.2.3)
105
+ rubocop-md (1.2.4)
109
106
  rubocop (>= 1.45)
110
107
  rubocop-minitest (0.36.0)
111
108
  rubocop (>= 1.61, < 2.0)
@@ -115,7 +112,7 @@ GEM
115
112
  rubocop-performance (1.22.1)
116
113
  rubocop (>= 1.48.1, < 2.0)
117
114
  rubocop-ast (>= 1.31.1, < 2.0)
118
- rubocop-rails (2.26.1)
115
+ rubocop-rails (2.27.0)
119
116
  activesupport (>= 4.2.0)
120
117
  rack (>= 1.1)
121
118
  rubocop (>= 1.52.0, < 2.0)
@@ -130,32 +127,15 @@ GEM
130
127
  rubocop-rails (~> 2.0)
131
128
  ruby-progressbar (1.13.0)
132
129
  rubyzip (2.3.2)
133
- selenium-devtools (0.129.0)
130
+ securerandom (0.3.1)
131
+ selenium-devtools (0.130.0)
134
132
  selenium-webdriver (~> 4.2)
135
- selenium-webdriver (4.25.0)
133
+ selenium-webdriver (4.26.0)
136
134
  base64 (~> 0.2)
137
135
  logger (~> 1.4)
138
136
  rexml (~> 3.2, >= 3.2.5)
139
137
  rubyzip (>= 1.2.2, < 3.0)
140
138
  websocket (~> 1.0)
141
- solargraph (0.50.0)
142
- backport (~> 1.2)
143
- benchmark
144
- bundler (~> 2.0)
145
- diff-lcs (~> 1.4)
146
- e2mmap
147
- jaro_winkler (~> 1.5)
148
- kramdown (~> 2.3)
149
- kramdown-parser-gfm (~> 1.1)
150
- parser (~> 3.0)
151
- rbs (~> 2.0)
152
- reverse_markdown (~> 2.0)
153
- rubocop (~> 1.38)
154
- thor (~> 1.0)
155
- tilt (~> 2.0)
156
- yard (~> 0.9, >= 0.9.24)
157
- thor (1.3.2)
158
- tilt (2.4.0)
159
139
  typhoeus (1.4.1)
160
140
  ethon (>= 0.9.0)
161
141
  tzinfo (2.0.6)
@@ -164,7 +144,6 @@ GEM
164
144
  websocket (1.2.11)
165
145
  xpath (3.2.0)
166
146
  nokogiri (~> 1.8)
167
- yard (0.9.37)
168
147
 
169
148
  PLATFORMS
170
149
  arm64-darwin-21
@@ -175,7 +154,7 @@ PLATFORMS
175
154
 
176
155
  DEPENDENCIES
177
156
  birdsong!
178
- byebug
157
+ debug
179
158
  dotenv
180
159
  minitest
181
160
  rake
@@ -183,7 +162,6 @@ DEPENDENCIES
183
162
  rubocop-performance
184
163
  rubocop-rails
185
164
  rubocop-rails_config
186
- solargraph
187
165
 
188
166
  BUNDLED WITH
189
167
  2.3.26
data/birdsong.gemspec CHANGED
@@ -39,7 +39,7 @@ Gem::Specification.new do |spec|
39
39
  spec.add_dependency "selenium-devtools"
40
40
 
41
41
  # Dev dependencies
42
- spec.add_development_dependency "byebug"
42
+ spec.add_development_dependency "debug"
43
43
  spec.add_development_dependency "rake"
44
44
  spec.add_development_dependency "minitest"
45
45
  spec.add_development_dependency "rubocop"
@@ -47,7 +47,6 @@ Gem::Specification.new do |spec|
47
47
  spec.add_development_dependency "rubocop-rails_config"
48
48
  spec.add_development_dependency "rubocop-performance"
49
49
  spec.add_development_dependency "dotenv"
50
- spec.add_development_dependency "solargraph"
51
50
 
52
51
  # For more information and examples about making a new gem, checkout our
53
52
  # guide at: https://bundler.io/guides/creating_gem.html
@@ -67,11 +67,14 @@ module Birdsong
67
67
 
68
68
  page.driver.browser.intercept do |request, &continue|
69
69
  # This passes the request forward unmodified, since we only care about the response
70
+ # puts "checking request: #{request.url}"
71
+
70
72
  continue.call(request) && next unless request.url.include?(subpage_search)
71
73
 
74
+
72
75
  continue.call(request) do |response|
76
+
73
77
  # Check if not a CORS prefetch and finish up if not
74
- # puts "checking request: #{request.url}"
75
78
  if !response.body.empty? && response.body
76
79
  check_passed = true
77
80
  unless additional_search_parameters.nil?
@@ -107,7 +110,6 @@ module Birdsong
107
110
 
108
111
  page.driver.execute_script("window.stop();")
109
112
 
110
- # debugger if response_body.nil?
111
113
  raise Birdsong::NoTweetFoundError if response_body.nil?
112
114
  Oj.load(response_body)
113
115
  rescue Birdsong::WebDriverError
@@ -141,65 +143,49 @@ module Birdsong
141
143
  Capybara.current_driver = :selenium
142
144
  end
143
145
 
144
- def login
145
- # Reset the sessions so that there's nothing laying around
146
- page.quit
146
+ # def login
147
+ # # Reset the sessions so that there's nothing laying around
148
+ # page.quit
147
149
 
148
- # Check if we're on a Twitter page already, if not visit it.
149
- unless page.driver.browser.current_url.include?("twitter.com") || page.driver.browser.current_url.include?("x.com")
150
- # There seems to be a bug in the Linux ARM64 version of chromedriver where this will properly
151
- # navigate but then timeout, crashing it all up. So instead we check and raise the error when
152
- # that then fails again.
153
- page.driver.browser.navigate.to("https://x.com")
154
- end
150
+ # # Check if we're on a Instagram page already, if not visit it.
151
+ # unless page.driver.browser.current_url.include? "twitter.com" || page.driver.browser.current_url.include? "x.com"
152
+ # # There seems to be a bug in the Linux ARM64 version of chromedriver where this will properly
153
+ # # navigate but then timeout, crashing it all up. So instead we check and raise the error when
154
+ # # that then fails again.
155
+ # page.driver.browser.navigate.to("https://x.com")
156
+ # end
155
157
 
156
- # We don't have to login if we already are
157
- begin
158
- return if find_field("Search", wait: 10).present?
159
- rescue Capybara::ElementNotFound; end
160
-
161
- page.driver.browser.find_element(link_text: "Sign in").click # Check if we're redirected to a login page, if we aren't we're already logged in
158
+ # # We don't have to login if we already are
159
+ # begin
160
+ # return if find_field("Search", wait: 10).present?
161
+ # rescue Capybara::ElementNotFound; end
162
162
 
163
+ # # Check if we're redirected to a login page, if we aren't we're already logged in
163
164
  # return unless page.has_xpath?('//*[@id="loginForm"]/div/div[3]/button')
164
165
 
165
- # Try to log in
166
- loop_count = 0
167
- while loop_count < 5 do
168
- 3.times do
169
- sleep(rand * 8.8)
170
- element = page.driver.browser.find_element(tag_name: "input", name: "text")
171
- next if element.nil?
172
- element.click
173
- break
174
- rescue StandardError => e
175
- puts e
176
- next
177
- end
178
-
179
- sleep(rand * 2.8)
180
- fill_in("text", with: ENV["TWITTER_USER_NAME"])
181
- sleep(rand * 2.8)
182
- find_button("Next").click
183
- sleep(rand * 2.1)
184
- fill_in("password", with: ENV["TWITTER_PASSWORD"])
185
-
186
- begin
187
- click_button("Log in", exact_text: true) # Note: "Log in" (lowercase `in`) instead redirects to Facebook's login page
188
- rescue Capybara::ElementNotFound; end # If we can't find it don't break horribly, just keep waiting
189
-
190
- break unless has_css?('p[data-testid="login-error-message"', wait: 10)
191
- loop_count += 1
192
- sleep(rand * 10.3)
193
- end
194
-
195
- # Sometimes Twitter just... doesn't let you log in
196
- raise "Twitter not accessible" if loop_count == 5
197
-
198
- # No we don't want to save our login credentials
199
- begin
200
- click_on("Save Info")
201
- rescue Capybara::ElementNotFound; end
202
- end
166
+ # # Try to log in
167
+ # loop_count = 0
168
+ # while loop_count < 5 do
169
+ # fill_in("username", with: ENV["TWITTER_USER_NAME"])
170
+ # fill_in("password", with: ENV["TWITTER_PASSWORD"])
171
+
172
+ # begin
173
+ # click_button("Log in", exact_text: true) # Note: "Log in" (lowercase `in`) instead redirects to Facebook's login page
174
+ # rescue Capybara::ElementNotFound; end # If we can't find it don't break horribly, just keep waiting
175
+
176
+ # break unless has_css?('p[data-testid="login-error-message"', wait: 10)
177
+ # loop_count += 1
178
+ # sleep(rand * 10.3)
179
+ # end
180
+
181
+ # # Sometimes Instagram just... doesn't let you log in
182
+ # raise "Instagram not accessible" if loop_count == 5
183
+
184
+ # # No we don't want to save our login credentials
185
+ # begin
186
+ # click_on("Save Info")
187
+ # rescue Capybara::ElementNotFound; end
188
+ # end
203
189
 
204
190
  def fetch_image(url)
205
191
  request = Typhoeus::Request.new(url, followlocation: true)
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "typhoeus"
4
4
  require_relative "scraper"
5
+ require "debug"
5
6
 
6
7
  module Birdsong
7
8
  class TweetScraper < Scraper
@@ -20,31 +21,20 @@ module Birdsong
20
21
  # login
21
22
  graphql_object = get_content_of_subpage_from_url(
22
23
  "https://x.com/jack/status/#{id}",
23
- "/TweetResultByRestId",
24
+ "/graphql",
24
25
  "data,tweetResult,result"
25
26
  )
26
27
 
27
28
  graphql_object = graphql_object.first if graphql_object.kind_of?(Array)
28
29
  graphql_object = graphql_object["data"]["tweetResult"]["result"]
29
30
 
30
- raise Birdsong::NoTweetFoundError if graphql_object.nil?
31
-
32
31
  if graphql_object.key?("__typename") && graphql_object["__typename"] == "TweetUnavailable"
33
- raise Birdsong::NoTweetFoundError if graphql_object["reason"] != "NsfwLoggedOut"
34
- @@logger.info "Post is tagged NSFW, logging in to access..."
35
- # Let's login and start this over?
36
- login
37
- @@logger.info "Logged in, retrying post..."
38
-
39
- graphql_object = get_content_of_subpage_from_url(
40
- "https://x.com/jack/status/#{id}",
41
- "/TweetDetail"
42
- )
43
-
44
- # The format gets weird for this request
45
- graphql_object = graphql_object["data"]["threaded_conversation_with_injections_v2"]["instructions"][0]["entries"][0]["content"]["itemContent"]["tweet_results"]["result"]["tweet"]
32
+ raise Birdsong::NoTweetFoundError
46
33
  end
47
34
 
35
+ # Certain types of tweets are wrapped in a "tweet" object
36
+ graphql_object = graphql_object["tweet"] if graphql_object.key?("tweet")
37
+
48
38
  text = graphql_object["legacy"]["full_text"]
49
39
  date = graphql_object["legacy"]["created_at"]
50
40
  id = graphql_object["legacy"]["id_str"]
@@ -65,7 +55,7 @@ module Birdsong
65
55
  video_preview_image = Birdsong.retrieve_media(media["media_url_https"])
66
56
  video_variants = media["video_info"]["variants"]
67
57
  largest_bitrate_variant = video_variants.sort_by do |variant|
68
- variant.has_key?("bitrate") ? variant["bitrate"] : 0
58
+ variant["bitrate"].nil? ? 0 : variant["bitrate"]
69
59
  end.last
70
60
 
71
61
  videos << Birdsong.retrieve_media(largest_bitrate_variant["url"])
@@ -122,7 +112,7 @@ module Birdsong
122
112
  # since it's Instagram's fault for having such a fucked up obfuscated hierarchy # Take the screenshot and return it
123
113
  # rubocop:disable Lint/Debugger
124
114
  save_screenshot("#{Birdsong.temp_storage_location}/instagram_screenshot_#{SecureRandom.uuid}.png")
125
- # rubocop:enable Lint/Debugger
115
+ # rubocop:enable Link/Debugger
126
116
  end
127
117
  end
128
118
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Birdsong
4
- VERSION = "0.2.8"
4
+ VERSION = "0.2.9"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: birdsong
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-10-21 00:00:00.000000000 Z
11
+ date: 2024-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: typhoeus
@@ -127,7 +127,7 @@ dependencies:
127
127
  - !ruby/object:Gem::Version
128
128
  version: '0'
129
129
  - !ruby/object:Gem::Dependency
130
- name: byebug
130
+ name: debug
131
131
  requirement: !ruby/object:Gem::Requirement
132
132
  requirements:
133
133
  - - ">="
@@ -238,20 +238,6 @@ dependencies:
238
238
  - - ">="
239
239
  - !ruby/object:Gem::Version
240
240
  version: '0'
241
- - !ruby/object:Gem::Dependency
242
- name: solargraph
243
- requirement: !ruby/object:Gem::Requirement
244
- requirements:
245
- - - ">="
246
- - !ruby/object:Gem::Version
247
- version: '0'
248
- type: :development
249
- prerelease: false
250
- version_requirements: !ruby/object:Gem::Requirement
251
- requirements:
252
- - - ">="
253
- - !ruby/object:Gem::Version
254
- version: '0'
255
241
  description:
256
242
  email:
257
243
  - cguess@gmail.com
@@ -262,6 +248,7 @@ files:
262
248
  - ".github/workflows/main.yml"
263
249
  - ".gitignore"
264
250
  - ".rubocop.yml"
251
+ - ".ruby-version"
265
252
  - CHANGELOG.md
266
253
  - CODE_OF_CONDUCT.md
267
254
  - Gemfile
@@ -301,7 +288,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
301
288
  - !ruby/object:Gem::Version
302
289
  version: '0'
303
290
  requirements: []
304
- rubygems_version: 3.5.22
291
+ rubygems_version: 3.2.33
305
292
  signing_key:
306
293
  specification_version: 4
307
294
  summary: A gem to interface with Twitter's API V2