forki 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f4868fd61b98809521249982e057f69d69018e8defba25792382f44e06b1921a
4
- data.tar.gz: 1c700275aa5fafc19e4f6b42845e5d52c02e2bc3e6cd3f1082e1a59c5c4f472d
3
+ metadata.gz: 78986561738c2e71c7504b8d4810c790e1cefa212b97358eae17accf4b1c2131
4
+ data.tar.gz: 9f72cf4a6496e4c40f3d47d566c2ec9fd1096edf8e54257223a6b26d96f0c9b2
5
5
  SHA512:
6
- metadata.gz: b5967db4c8e10b9d626767f041b2f59627170a49ceb4dc47e6da11f439a668bc656550e9b0c6a3a6a52180c8e75637ab2cbfb8a3c1b007bb91c4a5280e21aabd
7
- data.tar.gz: 8230ec7b913fca196e255820066efab1c520e42ac38818652cb252966a16b8ce915f5f1224606cd5eddb81968fcdbd9f37506e028c641bcad2a402536174d321
6
+ metadata.gz: a6fd3e3328b1c1c17d8d4bcab492588d88d7bcfc6feb20f1e85c5b75dfacc24b75c519d388353d733bf2ef0c6899f9c1804f82736d9bdb28971f21299159fed2
7
+ data.tar.gz: eb1f2608844ba87fc294d38091b6327de25e35f505ac1e873f5a56f797994de57fdf2a1e80d4781821330ff05cee15bc4f0d0fbe2f7a1b72ab7abab6806f7765
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- forki (0.2.1)
4
+ forki (0.2.5)
5
5
  apparition
6
6
  capybara
7
7
  oj
@@ -29,12 +29,13 @@ GEM
29
29
  i18n (>= 1.6, < 2)
30
30
  minitest (>= 5.1)
31
31
  tzinfo (~> 2.0)
32
- addressable (2.8.4)
32
+ addressable (2.8.6)
33
33
  public_suffix (>= 2.0.2, < 6.0)
34
34
  apparition (0.6.0)
35
35
  capybara (~> 3.13, < 4)
36
36
  websocket-driver (>= 0.6.5)
37
37
  ast (2.4.2)
38
+ bigdecimal (3.1.5)
38
39
  builder (3.2.4)
39
40
  byebug (11.1.3)
40
41
  capybara (3.39.2)
@@ -53,7 +54,7 @@ GEM
53
54
  erubi (1.12.0)
54
55
  ethon (0.16.0)
55
56
  ffi (>= 1.15.0)
56
- ffi (1.15.5)
57
+ ffi (1.16.3)
57
58
  i18n (1.13.0)
58
59
  concurrent-ruby (~> 1.0)
59
60
  json (2.6.3)
@@ -62,15 +63,16 @@ GEM
62
63
  nokogiri (>= 1.12.0)
63
64
  matrix (0.4.2)
64
65
  method_source (1.0.0)
65
- mini_mime (1.1.2)
66
+ mini_mime (1.1.5)
66
67
  minitest (5.18.0)
67
68
  nokogiri (1.15.1-arm64-darwin)
68
69
  racc (~> 1.4)
69
- oj (3.15.1)
70
+ oj (3.16.3)
71
+ bigdecimal (>= 3.0)
70
72
  parallel (1.23.0)
71
73
  parser (3.2.2.1)
72
74
  ast (~> 2.4.1)
73
- public_suffix (5.0.3)
75
+ public_suffix (5.0.4)
74
76
  racc (1.6.2)
75
77
  rack (2.2.4)
76
78
  rack-test (2.1.0)
@@ -90,7 +92,7 @@ GEM
90
92
  rainbow (3.1.1)
91
93
  rake (13.0.6)
92
94
  regexp_parser (2.8.0)
93
- rexml (3.2.5)
95
+ rexml (3.2.6)
94
96
  rubocop (1.51.0)
95
97
  json (~> 2.3)
96
98
  parallel (~> 1.10)
@@ -127,17 +129,17 @@ GEM
127
129
  rubocop-rails (~> 2.0)
128
130
  ruby-progressbar (1.13.0)
129
131
  rubyzip (2.3.2)
130
- selenium-webdriver (4.11.0)
132
+ selenium-webdriver (4.16.0)
131
133
  rexml (~> 3.2, >= 3.2.5)
132
134
  rubyzip (>= 1.2.2, < 3.0)
133
135
  websocket (~> 1.0)
134
136
  thor (1.2.2)
135
- typhoeus (1.4.0)
137
+ typhoeus (1.4.1)
136
138
  ethon (>= 0.9.0)
137
139
  tzinfo (2.0.6)
138
140
  concurrent-ruby (~> 1.0)
139
141
  unicode-display_width (2.4.2)
140
- websocket (1.2.9)
142
+ websocket (1.2.10)
141
143
  websocket-driver (0.7.6)
142
144
  websocket-extensions (>= 0.1.0)
143
145
  websocket-extensions (0.1.5)
@@ -65,14 +65,23 @@ module Forki
65
65
  graphql_objects.any? do |graphql_object| # if any GraphQL objects contain the top-level keys above, return true
66
66
  return true unless graphql_object.fetch("image", nil).nil? # so long as the associated values are not nil
67
67
  return true unless graphql_object.fetch("currMedia", nil).nil?
68
+ return true unless graphql_object.fetch("photo_image", nil).nil?
68
69
 
69
70
  # This is a complicated form for `web.facebook.com` posts
70
-
71
71
  if !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil?
72
72
  if graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].count.positive?
73
73
  return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "all_subattachments", "nodes")&.first&.dig("media", "image", "uri").nil?
74
74
  end
75
75
  end
76
+
77
+ # Another weird format
78
+ begin
79
+ if !graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].empty?
80
+ return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "media", "photo_image", "uri").nil?
81
+ end
82
+ rescue StandardError
83
+
84
+ end
76
85
  end
77
86
  end
78
87
 
@@ -157,7 +166,7 @@ module Forki
157
166
  graphql_object_array = graphql_strings.map { |graphql_string| JSON.parse(graphql_string) }
158
167
 
159
168
  # Once in awhile it's really easy
160
- video_objects = graphql_object_array.filter {|go| go.has_key?("video") }
169
+ video_objects = graphql_object_array.filter { |go| go.has_key?("video") }
161
170
 
162
171
  if VideoSieve.can_process_with_sieve?(graphql_object_array)
163
172
  # Eventually all of this complexity will be replaced with this
@@ -170,9 +179,15 @@ module Forki
170
179
  return extract_video_post_data_alternative(graphql_object_array) if story_node_object.nil?
171
180
 
172
181
  if story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"].key?("media")
173
- video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["video"]
174
- creation_date = video_object["publish_time"] if video_object&.has_key("publish_time")
175
- creation_date = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"] if creation_date.nil?
182
+ media_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]
183
+ if media_object.has_key?("video")
184
+ video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["video"]
185
+ elsif media_object.has_key?("media") && media_object["media"].has_key?("browser_native_sd_url")
186
+ video_object = media_object["media"]
187
+ end
188
+
189
+ creation_date = video_object["publish_time"] if video_object&.has_key?("publish_time")
190
+ creation_date = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["publish_time"] if creation_date.nil?
176
191
  elsif story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"].key?("style_infos")
177
192
  # For "Reels" we need a separate way to parse this
178
193
  video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["style_infos"].first["fb_shorts_story"]["short_form_video_context"]["playback_video"]
@@ -181,8 +196,20 @@ module Forki
181
196
  raise "Unable to parse video object" if video_objects.empty?
182
197
  end
183
198
 
184
- feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
185
- reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
199
+ begin
200
+ feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
201
+ rescue NoMethodError
202
+ feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["comet_feed_ufi_container"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]
203
+ end
204
+
205
+ if feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"].key?("cannot_see_top_custom_reactions")
206
+ reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
207
+ else
208
+ reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["top_reactions"])
209
+ end
210
+
211
+ feedback_object = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]
212
+
186
213
  share_count_object = feedback_object.fetch("share_count", {})
187
214
 
188
215
  if story_node_object["comet_sections"]["content"]["story"]["comet_sections"].key? "message"
@@ -191,15 +218,37 @@ module Forki
191
218
  text = ""
192
219
  end
193
220
 
194
- feedback_object["comment_list_renderer"]["feedback"]["comment_count"]["total_count"]
195
- num_comments = feedback_object.has_key?("comment_list_renderer") ? feedback_object["comment_list_renderer"]["feedback"]["comment_count"]["total_count"] : feedback_object["comment_count"]["total_count"]
221
+ if feedback_object.has_key?("comment_list_renderer")
222
+ if feedback_object["comment_list_renderer"]["feedback"].key?("comment_count")
223
+ num_comments = feedback_object["comment_list_renderer"]["feedback"]["comment_count"]["total_count"]
224
+ else
225
+ num_comments = feedback_object["comment_list_renderer"]["feedback"]["total_comment_count"]
226
+ end
227
+
228
+ view_count = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"]
229
+ reshare_warning = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
230
+ elsif feedback_object.has_key?("comments_count_summary_renderer")
231
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
232
+
233
+ view_count = feedback_object["video_view_count"]
234
+ reshare_warning = feedback_object["should_show_reshare_warning"]
235
+ else
236
+ if feedback_object["feedback"].key?("comment_count")
237
+ num_comments = feedback_object["feedback"]["comment_count"]["total_count"]
238
+ else
239
+ num_comments = feedback_object["feedback"]["total_comment_count"]
240
+ end
241
+
242
+ view_count = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"]
243
+ reshare_warning = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
244
+ end
196
245
 
197
246
  post_details = {
198
247
  id: video_object["id"],
199
248
  num_comments: num_comments,
200
249
  num_shares: share_count_object.fetch("count", nil),
201
- num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
202
- reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
250
+ num_views: view_count,
251
+ reshare_warning: reshare_warning,
203
252
  video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
204
253
  video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
205
254
  text: text,
@@ -217,12 +266,24 @@ module Forki
217
266
  sidepane_object = graphql_object_array.find { |graphql_object| graphql_object.key?("tahoe_sidepane_renderer") }
218
267
  video_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("video") }
219
268
  feedback_object = sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]
220
- reaction_counts = extract_reaction_counts(sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
269
+
270
+ if sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"].key?("cannot_see_top_custom_reactions")
271
+ reaction_counts = extract_reaction_counts(sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
272
+ else # if the video has no reactions, it will have a different structure
273
+ reaction_counts = extract_reaction_counts(sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]["top_reactions"])
274
+ end
275
+
221
276
  share_count_object = feedback_object.fetch("share_count", {})
222
277
 
278
+ if feedback_object["comments_count_summary_renderer"]["feedback"].has_key?("comment_rendering_instance")
279
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
280
+ else
281
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"]
282
+ end
283
+
223
284
  post_details = {
224
285
  id: video_object["id"],
225
- num_comments: feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
286
+ num_comments: num_comments,
226
287
  num_shares: share_count_object.fetch("count", nil),
227
288
  num_views: feedback_object["video_view_count"],
228
289
  reshare_warning: feedback_object["should_show_reshare_warning"],
@@ -243,42 +304,81 @@ module Forki
243
304
  # Extracts data from an image post by parsing GraphQL strings as seen in the video post scraper above
244
305
  def extract_image_post_data(graphql_object_array)
245
306
  # This is a weird one-off style
307
+
246
308
  graphql_object = graphql_object_array.find { |graphql_object| !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil? }
247
- unless graphql_object.nil? || graphql_object.count == 0
309
+ unless graphql_object.nil? || graphql_object.count.zero?
310
+ # TODO: These two branches are *super* similar, probably a lot of overlap
248
311
  attachments = graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"]
249
312
 
250
- reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
313
+ if graphql_object["node"]["comet_sections"]["feedback"]["story"].key?("feedback_context")
314
+ feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
315
+ elsif graphql_object["node"]["comet_sections"]["feedback"]["story"].has_key?("comet_feed_ufi_container")
316
+ feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["comet_feed_ufi_container"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
317
+ else
318
+ feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
319
+ end
320
+
321
+
322
+ if feedback_object.has_key?("cannot_see_top_custom_reactions")
323
+ reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
324
+ else
325
+ reaction_counts = extract_reaction_counts(feedback_object["top_reactions"])
326
+ end
327
+
251
328
  id = graphql_object["node"]["post_id"]
252
- num_comments = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["share_count"]["count"]
253
- reshare_warning = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
254
- image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
329
+ num_comments = feedback_object["share_count"]["count"]
330
+ reshare_warning = feedback_object["should_show_reshare_warning"]
331
+
332
+ if attachments.first["styles"]["attachment"].key?("all_subattachments")
333
+ image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
334
+ else
335
+ image_url = attachments.first["styles"]["attachment"]["media"]["photo_image"]["uri"]
336
+ end
337
+
255
338
  text = graphql_object["node"]["comet_sections"]["content"]["story"]["message"]["text"]
256
339
  profile_link = graphql_object["node"]["comet_sections"]["content"]["story"]["actors"].first["url"]
257
340
  created_at = graphql_object["node"]["comet_sections"]["content"]["story"]["comet_sections"]["context_layout"]["story"]["comet_sections"]["metadata"].first["story"]["creation_time"]
258
341
  has_video = false
259
342
  else
260
-
261
343
  graphql_object_array.find { |graphql_object| graphql_object.key?("viewer_actor") && graphql_object.key?("display_comments") }
262
344
  curr_media_object = graphql_object_array.find { |graphql_object| graphql_object.key?("currMedia") }
263
345
  creation_story_object = graphql_object_array.find { |graphql_object| graphql_object.key?("creation_story") && graphql_object.key?("message") }
264
346
 
265
347
  feedback_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("comet_ufi_summary_and_actions_renderer") }["comet_ufi_summary_and_actions_renderer"]["feedback"]
348
+
349
+ if feedback_object.key?("top_reactions")
350
+ feedback_object = feedback_object
351
+ else
352
+ # POSSIBLY OUT OF DATE
353
+ feedback_object = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]
354
+ end
355
+
266
356
  share_count_object = feedback_object.fetch("share_count", {})
267
357
 
268
358
  poster = creation_story_object["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]
269
359
 
270
- reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
360
+ if feedback_object.has_key?("cannot_see_top_custom_reactions")
361
+ reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
362
+ else
363
+ reaction_counts = extract_reaction_counts(feedback_object["top_reactions"])
364
+ end
365
+
271
366
  id = curr_media_object["currMedia"]["id"],
367
+
272
368
  num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
273
- num_shares = share_count_object.fetch("count", nil),
274
- reshare_warning = feedback_object["should_show_reshare_warning"],
275
- image_url = curr_media_object["currMedia"]["image"]["uri"],
276
- text = (creation_story_object["message"] || {}).fetch("text", nil),
277
- profile_link = poster["url"],
278
- created_at = curr_media_object["currMedia"]["created_time"],
279
- has_video = false
369
+ if num_comments.nil? && feedback_object.has_key?("comments_count_summary_renderer")
370
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
371
+ end
280
372
 
373
+ num_shares = share_count_object.fetch("count", nil)
374
+ reshare_warning = feedback_object["should_show_reshare_warning"]
375
+ image_url = curr_media_object["currMedia"]["image"]["uri"]
376
+ text = (creation_story_object["message"] || {}).fetch("text", nil)
377
+ profile_link = poster["url"]
378
+ created_at = curr_media_object["currMedia"]["created_time"]
379
+ has_video = false
281
380
  end
381
+
282
382
  post_details = {
283
383
  id: id,
284
384
  num_comments: num_comments,
@@ -303,7 +403,12 @@ module Forki
303
403
  (graphql_string.include?("live_status")) })
304
404
  video_permalink = creation_story_object["creation_story"]["shareable"]["url"].delete("\\")
305
405
  media_object = video_object["video"]["story"]["attachments"][0]["media"]
306
- reaction_counts = extract_reaction_counts(creation_story_object["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
406
+
407
+ if creation_story_object["feedback"].key?("cannot_see_top_custom_reactions")
408
+ reaction_counts = extract_reaction_counts(creation_story_object["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
409
+ else
410
+ reaction_counts = extract_reaction_counts(creation_story_object["feedback"]["top_reactions"])
411
+ end
307
412
 
308
413
  post_details = {
309
414
  id: video_object["id"],
@@ -331,7 +436,11 @@ module Forki
331
436
  (graphql.include? "creation_story") })["video"]["creation_story"]
332
437
  media_object = JSON.parse(graphql_strings.find { |graphql| graphql.include? "playable_url" })["video"]["creation_story"]["attachments"][0]["media"]
333
438
  video_permalink = creation_story_object["shareable"]["url"].delete("\\")
334
- reaction_counts = extract_reaction_counts(creation_story_object["feedback_context"]["feedback_target_with_context"]["cannot_see_top_custom_reactions"]["top_reactions"])
439
+ if creation_story_object["feedback_context"]["feedback_target_with_context"].key?("cannot_see_top_custom_reactions")
440
+ reaction_counts = extract_reaction_counts(creation_story_object["feedback_context"]["feedback_target_with_context"]["cannot_see_top_custom_reactions"]["top_reactions"])
441
+ else
442
+ reaction_counts = extract_reaction_counts(creation_story_object["feedback_context"]["feedback_target_with_context"]["top_reactions"])
443
+ end
335
444
 
336
445
  post_details = {
337
446
  id: creation_story_object["shareable"]["id"],
@@ -416,4 +525,3 @@ module Forki
416
525
  end
417
526
 
418
527
  require_relative "sieves/video_sieves/video_sieve"
419
-
@@ -14,7 +14,7 @@ class VideoSieve
14
14
  private
15
15
 
16
16
  def self.sieve_class_for_graphql_objects(graphql_objects)
17
- sieves = [VideoSieveWatchTab, VideoSieveVideoPage, VideoSieveReel]
17
+ sieves = [VideoSieveWatchTab, VideoSieveVideoPage, VideoSieveVideoPage2, VideoSieveReel, VideoSieveReel2]
18
18
  sieves.detect { |sieve| sieve.check(graphql_objects) }
19
19
  end
20
20
  end
@@ -4,6 +4,15 @@ class VideoSieveReel < VideoSieve
4
4
  video_object = self.extractor(graphql_objects)
5
5
 
6
6
  return false unless video_object.has_key?("short_form_video_context")
7
+
8
+ # In relation to video_sieve_reel_2
9
+ comment_count = graphql_objects.filter do |go|
10
+ go = go.first if go.kind_of?(Array) && !go.empty?
11
+ !go.dig("feedback", "total_comment_count").nil?
12
+ end.first
13
+
14
+ return false unless comment_count.nil?
15
+
7
16
  true
8
17
  rescue StandardError
9
18
  return false
@@ -0,0 +1,88 @@
1
+ class VideoSieveReel2 < VideoSieve
2
+ # To check if it's valid for the inputted graphql objects
3
+ def self.check(graphql_objects)
4
+ video_object = self.extractor(graphql_objects)
5
+
6
+ return false unless video_object.has_key?("short_form_video_context")
7
+
8
+ comment_count = graphql_objects.filter do |go|
9
+ go = go.first if go.kind_of?(Array) && !go.empty?
10
+ !go.dig("feedback", "total_comment_count").nil?
11
+ end.first
12
+
13
+ return false if comment_count.nil?
14
+
15
+ true
16
+ rescue StandardError
17
+ return false
18
+ end
19
+
20
+ # output the expected format of:
21
+ #
22
+ # post_details = {
23
+ # id: video_object["id"],
24
+ # num_comments: num_comments,
25
+ # num_shares: share_count_object.fetch("count", nil),
26
+ # num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
27
+ # reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
28
+ # video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
29
+ # video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
30
+ # text: text,
31
+ # created_at: creation_date,
32
+ # profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
33
+ # has_video: true
34
+ # }
35
+ # post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
36
+ # post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
37
+ # post_details[:reactions] = reaction_counts
38
+
39
+ def self.sieve(graphql_objects)
40
+ video_object = self.extractor(graphql_objects)
41
+
42
+
43
+ feedback_object = graphql_objects.filter do |go|
44
+ go = go.first if go.kind_of?(Array) && !go.empty?
45
+ !go.dig("feedback", "total_comment_count").nil?
46
+ end.first
47
+
48
+ reels_feedback_renderer = graphql_objects.filter do |go|
49
+ go.dig("reels_feedback_renderer")
50
+ end.first
51
+
52
+ reels_feedback_renderer["reels_feedback_renderer"]["story"]
53
+ reshare_warning = video_object["short_form_video_context"]["playback_video"].dig("warning_screen_renderer", "cix_screen", "view_model", "__typename") == "OverlayWarningScreenViewModel"
54
+
55
+ video_preview_image_url = video_object["short_form_video_context"]["playback_video"]["preferred_thumbnail"]["image"]["uri"]
56
+ video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_hd_url"] || video_object["short_form_video_context"]["playback_video"]["browser_native_sd_url"]
57
+
58
+ post_details = {
59
+ id: video_object["short_form_video_context"]["video"]["id"],
60
+ num_comments: feedback_object["feedback"]["total_comment_count"],
61
+ num_shared: Forki::Scraper.extract_int_from_num_element(feedback_object["feedback"]["share_count_reduced"]),
62
+ num_views: nil,
63
+ reshare_warning: reshare_warning,
64
+ video_preview_image_url: video_preview_image_url,
65
+ video_url: video_url,
66
+ text: nil, # Reels don't have text
67
+ created_at: video_object["creation_time"],
68
+ profile_link: video_object["short_form_video_context"]["video_owner"]["url"],
69
+ has_video: true,
70
+ video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
71
+ video_file: Forki.retrieve_media(video_url),
72
+ reactions: nil # Only available on comments it seems? Look into this again sometime
73
+ }
74
+ rescue StandardError => e
75
+ debugger
76
+ end
77
+
78
+ private
79
+
80
+ def self.extractor(graphql_objects)
81
+ video_objects = graphql_objects.filter do |go|
82
+ go = go.first if go.kind_of?(Array) && !go.empty?
83
+ go.has_key?("video")
84
+ end
85
+
86
+ video_objects.first.dig("video", "creation_story")
87
+ end
88
+ end
@@ -4,9 +4,13 @@ class VideoSieveVideoPage < VideoSieve
4
4
  story_node_object = self.extractor(graphql_objects) # This will error out
5
5
  return false unless story_node_object["content"]["story"]["attachments"].first["styles"]["attachment"].has_key?("media")
6
6
 
7
+ feedback_object = story_node_object["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
8
+ # This is what differs from video_sieve_video_page_2.rb, where this key is unnested
9
+ return false unless feedback_object.has_key?("cannot_see_top_custom_reactions")
10
+
7
11
  true
8
- rescue StandardError
9
- return false
12
+ rescue StandardError => e
13
+ false
10
14
  end
11
15
 
12
16
  # output the expected format of:
@@ -39,7 +43,7 @@ class VideoSieveVideoPage < VideoSieve
39
43
  video_url = video_object["browser_native_hd_url"]
40
44
  video_url = video_object["browser_native_sd_url"] if video_url.nil?
41
45
 
42
- post_details = {
46
+ {
43
47
  id: video_object["id"],
44
48
  num_comments: feedback_object["total_comment_count"],
45
49
  num_shared: feedback_object["share_count"]["count"],
@@ -0,0 +1,70 @@
1
+ class VideoSieveVideoPage2 < VideoSieve
2
+ # To check if it's valid for the inputted graphql objects
3
+ def self.check(graphql_objects)
4
+ story_node_object = self.extractor(graphql_objects) # This will error out
5
+ return false unless story_node_object["content"]["story"]["attachments"].first["styles"]["attachment"].has_key?("media")
6
+
7
+ feedback_object = story_node_object["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
8
+ # This is what differs from video_sieve_video_page.rb, where this key is nested further
9
+ return false unless feedback_object.has_key?("top_reactions")
10
+
11
+ true
12
+ rescue StandardError
13
+ false
14
+ end
15
+
16
+ # output the expected format of:
17
+ #
18
+ # post_details = {
19
+ # id: video_object["id"],
20
+ # num_comments: num_comments,
21
+ # num_shares: share_count_object.fetch("count", nil),
22
+ # num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
23
+ # reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
24
+ # video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
25
+ # video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
26
+ # text: text,
27
+ # created_at: creation_date,
28
+ # profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
29
+ # has_video: true
30
+ # }
31
+ # post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
32
+ # post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
33
+ # post_details[:reactions] = reaction_counts
34
+
35
+ def self.sieve(graphql_objects)
36
+ extracted_text = self.extractor(graphql_objects)
37
+
38
+ story_object = extracted_text["content"]["story"]
39
+ video_object = extracted_text["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]
40
+ feedback_object = extracted_text["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
41
+
42
+ video_preview_image_url = video_object["preferred_thumbnail"]["image"]["uri"]
43
+ video_url = video_object["browser_native_hd_url"]
44
+ video_url = video_object["browser_native_sd_url"] if video_url.nil?
45
+
46
+ {
47
+ id: video_object["id"],
48
+ num_comments: feedback_object["total_comment_count"],
49
+ num_shared: feedback_object["share_count"]["count"],
50
+ num_views: nil,
51
+ reshare_warning: feedback_object["should_show_reshare_warning"],
52
+ video_preview_image_url: video_preview_image_url,
53
+ video_url: video_url,
54
+ text: story_object["message"]["text"],
55
+ created_at: video_object["publish_time"],
56
+ profile_link: story_object["actors"].first["url"],
57
+ has_video: true,
58
+ video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
59
+ video_file: Forki.retrieve_media(video_url),
60
+ reactions: feedback_object["top_reactions"]["edges"]
61
+ }
62
+ end
63
+
64
+ private
65
+
66
+ def self.extractor(graphql_objects)
67
+ story_node_object = graphql_objects.find { |graphql_object| graphql_object.key? "node" }&.fetch("node", nil) # user posted video
68
+ story_node_object["comet_sections"]
69
+ end
70
+ end
@@ -57,6 +57,12 @@ class VideoSieveWatchTab < VideoSieve
57
57
  profile_link = filtered_json["attachments"].first["media"]["creation_story"]["comet_sections"]["title"]["story"]["actors"].first["url"]
58
58
  end
59
59
 
60
+ if feedback_object.key?("cannot_see_top_custom_reactions")
61
+ reactions = feedback_object["cannot_see_top_custom_reactions"]["top_reactions"]["edges"]
62
+ else
63
+ reactions = feedback_object["top_reactions"]["edges"]
64
+ end
65
+
60
66
  post_details = {
61
67
  id: video_object.dig("shareable", "id") || video_object["attachments"].first["media"]["id"],
62
68
  num_comments: feedback_object["total_comment_count"],
@@ -71,7 +77,7 @@ class VideoSieveWatchTab < VideoSieve
71
77
  has_video: true,
72
78
  video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
73
79
  video_file: Forki.retrieve_media(video_url),
74
- reactions: feedback_object["cannot_see_top_custom_reactions"]["top_reactions"]["edges"]
80
+ reactions: reactions
75
81
  }
76
82
  end
77
83
 
data/lib/forki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Forki
4
- VERSION = "0.2.4"
4
+ VERSION = "0.2.6"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: forki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - ''
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-12 00:00:00.000000000 Z
11
+ date: 2024-04-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -127,7 +127,9 @@ files:
127
127
  - lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb
128
128
  - lib/forki/scrapers/sieves/video_sieves/video_sieve.rb
129
129
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_reel.rb
130
+ - lib/forki/scrapers/sieves/video_sieves/video_sieve_reel_2.rb
130
131
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page.rb
132
+ - lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page_2.rb
131
133
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb
132
134
  - lib/forki/scrapers/user_scraper.rb
133
135
  - lib/forki/user.rb