forki 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd276bc782515b3b7935eab759da6cf2acdafc146798a56e3e21ee667358ad3c
4
- data.tar.gz: 532d2c06542a0bdfe4e22f1a0b2ec3cf3ded6057b30de93386fad442bfa686ab
3
+ metadata.gz: 78986561738c2e71c7504b8d4810c790e1cefa212b97358eae17accf4b1c2131
4
+ data.tar.gz: 9f72cf4a6496e4c40f3d47d566c2ec9fd1096edf8e54257223a6b26d96f0c9b2
5
5
  SHA512:
6
- metadata.gz: 46aaf4eef616f99ca44eac48134b151af41e1755b808c2efef18fdca905956d8f5cb72d54e61070f64ae883b8e64e88f5adfb94f0d41a166eae136f0474133df
7
- data.tar.gz: 36c8bc7c506c952c036f38eb655522fad7a6b4cd6bf7c3de11e78e60aec4f5c3f124d5449e1aa40425a22cacee71e7c393239092a7e6dbf057a7e46e90457f1a
6
+ metadata.gz: a6fd3e3328b1c1c17d8d4bcab492588d88d7bcfc6feb20f1e85c5b75dfacc24b75c519d388353d733bf2ef0c6899f9c1804f82736d9bdb28971f21299159fed2
7
+ data.tar.gz: eb1f2608844ba87fc294d38091b6327de25e35f505ac1e873f5a56f797994de57fdf2a1e80d4781821330ff05cee15bc4f0d0fbe2f7a1b72ab7abab6806f7765
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- forki (0.2.1)
4
+ forki (0.2.5)
5
5
  apparition
6
6
  capybara
7
7
  oj
@@ -29,12 +29,13 @@ GEM
29
29
  i18n (>= 1.6, < 2)
30
30
  minitest (>= 5.1)
31
31
  tzinfo (~> 2.0)
32
- addressable (2.8.4)
32
+ addressable (2.8.6)
33
33
  public_suffix (>= 2.0.2, < 6.0)
34
34
  apparition (0.6.0)
35
35
  capybara (~> 3.13, < 4)
36
36
  websocket-driver (>= 0.6.5)
37
37
  ast (2.4.2)
38
+ bigdecimal (3.1.5)
38
39
  builder (3.2.4)
39
40
  byebug (11.1.3)
40
41
  capybara (3.39.2)
@@ -53,7 +54,7 @@ GEM
53
54
  erubi (1.12.0)
54
55
  ethon (0.16.0)
55
56
  ffi (>= 1.15.0)
56
- ffi (1.15.5)
57
+ ffi (1.16.3)
57
58
  i18n (1.13.0)
58
59
  concurrent-ruby (~> 1.0)
59
60
  json (2.6.3)
@@ -62,15 +63,16 @@ GEM
62
63
  nokogiri (>= 1.12.0)
63
64
  matrix (0.4.2)
64
65
  method_source (1.0.0)
65
- mini_mime (1.1.2)
66
+ mini_mime (1.1.5)
66
67
  minitest (5.18.0)
67
68
  nokogiri (1.15.1-arm64-darwin)
68
69
  racc (~> 1.4)
69
- oj (3.15.1)
70
+ oj (3.16.3)
71
+ bigdecimal (>= 3.0)
70
72
  parallel (1.23.0)
71
73
  parser (3.2.2.1)
72
74
  ast (~> 2.4.1)
73
- public_suffix (5.0.3)
75
+ public_suffix (5.0.4)
74
76
  racc (1.6.2)
75
77
  rack (2.2.4)
76
78
  rack-test (2.1.0)
@@ -90,7 +92,7 @@ GEM
90
92
  rainbow (3.1.1)
91
93
  rake (13.0.6)
92
94
  regexp_parser (2.8.0)
93
- rexml (3.2.5)
95
+ rexml (3.2.6)
94
96
  rubocop (1.51.0)
95
97
  json (~> 2.3)
96
98
  parallel (~> 1.10)
@@ -127,17 +129,17 @@ GEM
127
129
  rubocop-rails (~> 2.0)
128
130
  ruby-progressbar (1.13.0)
129
131
  rubyzip (2.3.2)
130
- selenium-webdriver (4.11.0)
132
+ selenium-webdriver (4.16.0)
131
133
  rexml (~> 3.2, >= 3.2.5)
132
134
  rubyzip (>= 1.2.2, < 3.0)
133
135
  websocket (~> 1.0)
134
136
  thor (1.2.2)
135
- typhoeus (1.4.0)
137
+ typhoeus (1.4.1)
136
138
  ethon (>= 0.9.0)
137
139
  tzinfo (2.0.6)
138
140
  concurrent-ruby (~> 1.0)
139
141
  unicode-display_width (2.4.2)
140
- websocket (1.2.9)
142
+ websocket (1.2.10)
141
143
  websocket-driver (0.7.6)
142
144
  websocket-extensions (>= 0.1.0)
143
145
  websocket-extensions (0.1.5)
@@ -65,14 +65,23 @@ module Forki
65
65
  graphql_objects.any? do |graphql_object| # if any GraphQL objects contain the top-level keys above, return true
66
66
  return true unless graphql_object.fetch("image", nil).nil? # so long as the associated values are not nil
67
67
  return true unless graphql_object.fetch("currMedia", nil).nil?
68
+ return true unless graphql_object.fetch("photo_image", nil).nil?
68
69
 
69
70
  # This is a complicated form for `web.facebook.com` posts
70
-
71
71
  if !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil?
72
72
  if graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].count.positive?
73
73
  return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "all_subattachments", "nodes")&.first&.dig("media", "image", "uri").nil?
74
74
  end
75
75
  end
76
+
77
+ # Another weird format
78
+ begin
79
+ if !graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].empty?
80
+ return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "media", "photo_image", "uri").nil?
81
+ end
82
+ rescue StandardError
83
+
84
+ end
76
85
  end
77
86
  end
78
87
 
@@ -157,7 +166,7 @@ module Forki
157
166
  graphql_object_array = graphql_strings.map { |graphql_string| JSON.parse(graphql_string) }
158
167
 
159
168
  # Once in awhile it's really easy
160
- video_objects = graphql_object_array.filter {|go| go.has_key?("video") }
169
+ video_objects = graphql_object_array.filter { |go| go.has_key?("video") }
161
170
 
162
171
  if VideoSieve.can_process_with_sieve?(graphql_object_array)
163
172
  # Eventually all of this complexity will be replaced with this
@@ -170,9 +179,15 @@ module Forki
170
179
  return extract_video_post_data_alternative(graphql_object_array) if story_node_object.nil?
171
180
 
172
181
  if story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"].key?("media")
173
- video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["video"]
174
- creation_date = video_object["publish_time"] if video_object&.has_key("publish_time")
175
- creation_date = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"] if creation_date.nil?
182
+ media_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]
183
+ if media_object.has_key?("video")
184
+ video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["video"]
185
+ elsif media_object.has_key?("media") && media_object["media"].has_key?("browser_native_sd_url")
186
+ video_object = media_object["media"]
187
+ end
188
+
189
+ creation_date = video_object["publish_time"] if video_object&.has_key?("publish_time")
190
+ creation_date = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["publish_time"] if creation_date.nil?
176
191
  elsif story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"].key?("style_infos")
177
192
  # For "Reels" we need a separate way to parse this
178
193
  video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["style_infos"].first["fb_shorts_story"]["short_form_video_context"]["playback_video"]
@@ -181,13 +196,20 @@ module Forki
181
196
  raise "Unable to parse video object" if video_objects.empty?
182
197
  end
183
198
 
184
- feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
199
+ begin
200
+ feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
201
+ rescue NoMethodError
202
+ feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["comet_feed_ufi_container"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]
203
+ end
204
+
185
205
  if feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"].key?("cannot_see_top_custom_reactions")
186
206
  reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
187
207
  else
188
208
  reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["top_reactions"])
189
209
  end
190
210
 
211
+ feedback_object = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]
212
+
191
213
  share_count_object = feedback_object.fetch("share_count", {})
192
214
 
193
215
  if story_node_object["comet_sections"]["content"]["story"]["comet_sections"].key? "message"
@@ -202,20 +224,31 @@ module Forki
202
224
  else
203
225
  num_comments = feedback_object["comment_list_renderer"]["feedback"]["total_comment_count"]
204
226
  end
227
+
228
+ view_count = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"]
229
+ reshare_warning = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
230
+ elsif feedback_object.has_key?("comments_count_summary_renderer")
231
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
232
+
233
+ view_count = feedback_object["video_view_count"]
234
+ reshare_warning = feedback_object["should_show_reshare_warning"]
205
235
  else
206
236
  if feedback_object["feedback"].key?("comment_count")
207
237
  num_comments = feedback_object["feedback"]["comment_count"]["total_count"]
208
238
  else
209
239
  num_comments = feedback_object["feedback"]["total_comment_count"]
210
240
  end
241
+
242
+ view_count = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"]
243
+ reshare_warning = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
211
244
  end
212
245
 
213
246
  post_details = {
214
247
  id: video_object["id"],
215
248
  num_comments: num_comments,
216
249
  num_shares: share_count_object.fetch("count", nil),
217
- num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
218
- reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
250
+ num_views: view_count,
251
+ reshare_warning: reshare_warning,
219
252
  video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
220
253
  video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
221
254
  text: text,
@@ -242,9 +275,15 @@ module Forki
242
275
 
243
276
  share_count_object = feedback_object.fetch("share_count", {})
244
277
 
278
+ if feedback_object["comments_count_summary_renderer"]["feedback"].has_key?("comment_rendering_instance")
279
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
280
+ else
281
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"]
282
+ end
283
+
245
284
  post_details = {
246
285
  id: video_object["id"],
247
- num_comments: feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
286
+ num_comments: num_comments,
248
287
  num_shares: share_count_object.fetch("count", nil),
249
288
  num_views: feedback_object["video_view_count"],
250
289
  reshare_warning: feedback_object["should_show_reshare_warning"],
@@ -265,31 +304,55 @@ module Forki
265
304
  # Extracts data from an image post by parsing GraphQL strings as seen in the video post scraper above
266
305
  def extract_image_post_data(graphql_object_array)
267
306
  # This is a weird one-off style
307
+
268
308
  graphql_object = graphql_object_array.find { |graphql_object| !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil? }
269
- unless graphql_object.nil? || graphql_object.count == 0
309
+ unless graphql_object.nil? || graphql_object.count.zero?
310
+ # TODO: These two branches are *super* similar, probably a lot of overlap
270
311
  attachments = graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"]
271
312
 
272
- if graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"].has_key?("cannot_see_top_custom_reactions")
273
- reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
313
+ if graphql_object["node"]["comet_sections"]["feedback"]["story"].key?("feedback_context")
314
+ feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
315
+ elsif graphql_object["node"]["comet_sections"]["feedback"]["story"].has_key?("comet_feed_ufi_container")
316
+ feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["comet_feed_ufi_container"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
274
317
  else
275
- reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["top_reactions"])
318
+ feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
319
+ end
320
+
321
+
322
+ if feedback_object.has_key?("cannot_see_top_custom_reactions")
323
+ reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
324
+ else
325
+ reaction_counts = extract_reaction_counts(feedback_object["top_reactions"])
276
326
  end
277
327
 
278
328
  id = graphql_object["node"]["post_id"]
279
- num_comments = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["share_count"]["count"]
280
- reshare_warning = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
281
- image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
329
+ num_comments = feedback_object["share_count"]["count"]
330
+ reshare_warning = feedback_object["should_show_reshare_warning"]
331
+
332
+ if attachments.first["styles"]["attachment"].key?("all_subattachments")
333
+ image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
334
+ else
335
+ image_url = attachments.first["styles"]["attachment"]["media"]["photo_image"]["uri"]
336
+ end
337
+
282
338
  text = graphql_object["node"]["comet_sections"]["content"]["story"]["message"]["text"]
283
339
  profile_link = graphql_object["node"]["comet_sections"]["content"]["story"]["actors"].first["url"]
284
340
  created_at = graphql_object["node"]["comet_sections"]["content"]["story"]["comet_sections"]["context_layout"]["story"]["comet_sections"]["metadata"].first["story"]["creation_time"]
285
341
  has_video = false
286
342
  else
287
-
288
343
  graphql_object_array.find { |graphql_object| graphql_object.key?("viewer_actor") && graphql_object.key?("display_comments") }
289
344
  curr_media_object = graphql_object_array.find { |graphql_object| graphql_object.key?("currMedia") }
290
345
  creation_story_object = graphql_object_array.find { |graphql_object| graphql_object.key?("creation_story") && graphql_object.key?("message") }
291
346
 
292
347
  feedback_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("comet_ufi_summary_and_actions_renderer") }["comet_ufi_summary_and_actions_renderer"]["feedback"]
348
+
349
+ if feedback_object.key?("top_reactions")
350
+ feedback_object = feedback_object
351
+ else
352
+ # POSSIBLY OUT OF DATE
353
+ feedback_object = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]
354
+ end
355
+
293
356
  share_count_object = feedback_object.fetch("share_count", {})
294
357
 
295
358
  poster = creation_story_object["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]
@@ -301,16 +364,21 @@ module Forki
301
364
  end
302
365
 
303
366
  id = curr_media_object["currMedia"]["id"],
367
+
304
368
  num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
305
- num_shares = share_count_object.fetch("count", nil),
306
- reshare_warning = feedback_object["should_show_reshare_warning"],
307
- image_url = curr_media_object["currMedia"]["image"]["uri"],
308
- text = (creation_story_object["message"] || {}).fetch("text", nil),
309
- profile_link = poster["url"],
310
- created_at = curr_media_object["currMedia"]["created_time"],
311
- has_video = false
369
+ if num_comments.nil? && feedback_object.has_key?("comments_count_summary_renderer")
370
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
371
+ end
312
372
 
373
+ num_shares = share_count_object.fetch("count", nil)
374
+ reshare_warning = feedback_object["should_show_reshare_warning"]
375
+ image_url = curr_media_object["currMedia"]["image"]["uri"]
376
+ text = (creation_story_object["message"] || {}).fetch("text", nil)
377
+ profile_link = poster["url"]
378
+ created_at = curr_media_object["currMedia"]["created_time"]
379
+ has_video = false
313
380
  end
381
+
314
382
  post_details = {
315
383
  id: id,
316
384
  num_comments: num_comments,
@@ -457,4 +525,3 @@ module Forki
457
525
  end
458
526
 
459
527
  require_relative "sieves/video_sieves/video_sieve"
460
-
data/lib/forki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Forki
4
- VERSION = "0.2.5"
4
+ VERSION = "0.2.6"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: forki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - ''
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-12-14 00:00:00.000000000 Z
11
+ date: 2024-04-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara