forki 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd276bc782515b3b7935eab759da6cf2acdafc146798a56e3e21ee667358ad3c
4
- data.tar.gz: 532d2c06542a0bdfe4e22f1a0b2ec3cf3ded6057b30de93386fad442bfa686ab
3
+ metadata.gz: 78986561738c2e71c7504b8d4810c790e1cefa212b97358eae17accf4b1c2131
4
+ data.tar.gz: 9f72cf4a6496e4c40f3d47d566c2ec9fd1096edf8e54257223a6b26d96f0c9b2
5
5
  SHA512:
6
- metadata.gz: 46aaf4eef616f99ca44eac48134b151af41e1755b808c2efef18fdca905956d8f5cb72d54e61070f64ae883b8e64e88f5adfb94f0d41a166eae136f0474133df
7
- data.tar.gz: 36c8bc7c506c952c036f38eb655522fad7a6b4cd6bf7c3de11e78e60aec4f5c3f124d5449e1aa40425a22cacee71e7c393239092a7e6dbf057a7e46e90457f1a
6
+ metadata.gz: a6fd3e3328b1c1c17d8d4bcab492588d88d7bcfc6feb20f1e85c5b75dfacc24b75c519d388353d733bf2ef0c6899f9c1804f82736d9bdb28971f21299159fed2
7
+ data.tar.gz: eb1f2608844ba87fc294d38091b6327de25e35f505ac1e873f5a56f797994de57fdf2a1e80d4781821330ff05cee15bc4f0d0fbe2f7a1b72ab7abab6806f7765
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- forki (0.2.1)
4
+ forki (0.2.5)
5
5
  apparition
6
6
  capybara
7
7
  oj
@@ -29,12 +29,13 @@ GEM
29
29
  i18n (>= 1.6, < 2)
30
30
  minitest (>= 5.1)
31
31
  tzinfo (~> 2.0)
32
- addressable (2.8.4)
32
+ addressable (2.8.6)
33
33
  public_suffix (>= 2.0.2, < 6.0)
34
34
  apparition (0.6.0)
35
35
  capybara (~> 3.13, < 4)
36
36
  websocket-driver (>= 0.6.5)
37
37
  ast (2.4.2)
38
+ bigdecimal (3.1.5)
38
39
  builder (3.2.4)
39
40
  byebug (11.1.3)
40
41
  capybara (3.39.2)
@@ -53,7 +54,7 @@ GEM
53
54
  erubi (1.12.0)
54
55
  ethon (0.16.0)
55
56
  ffi (>= 1.15.0)
56
- ffi (1.15.5)
57
+ ffi (1.16.3)
57
58
  i18n (1.13.0)
58
59
  concurrent-ruby (~> 1.0)
59
60
  json (2.6.3)
@@ -62,15 +63,16 @@ GEM
62
63
  nokogiri (>= 1.12.0)
63
64
  matrix (0.4.2)
64
65
  method_source (1.0.0)
65
- mini_mime (1.1.2)
66
+ mini_mime (1.1.5)
66
67
  minitest (5.18.0)
67
68
  nokogiri (1.15.1-arm64-darwin)
68
69
  racc (~> 1.4)
69
- oj (3.15.1)
70
+ oj (3.16.3)
71
+ bigdecimal (>= 3.0)
70
72
  parallel (1.23.0)
71
73
  parser (3.2.2.1)
72
74
  ast (~> 2.4.1)
73
- public_suffix (5.0.3)
75
+ public_suffix (5.0.4)
74
76
  racc (1.6.2)
75
77
  rack (2.2.4)
76
78
  rack-test (2.1.0)
@@ -90,7 +92,7 @@ GEM
90
92
  rainbow (3.1.1)
91
93
  rake (13.0.6)
92
94
  regexp_parser (2.8.0)
93
- rexml (3.2.5)
95
+ rexml (3.2.6)
94
96
  rubocop (1.51.0)
95
97
  json (~> 2.3)
96
98
  parallel (~> 1.10)
@@ -127,17 +129,17 @@ GEM
127
129
  rubocop-rails (~> 2.0)
128
130
  ruby-progressbar (1.13.0)
129
131
  rubyzip (2.3.2)
130
- selenium-webdriver (4.11.0)
132
+ selenium-webdriver (4.16.0)
131
133
  rexml (~> 3.2, >= 3.2.5)
132
134
  rubyzip (>= 1.2.2, < 3.0)
133
135
  websocket (~> 1.0)
134
136
  thor (1.2.2)
135
- typhoeus (1.4.0)
137
+ typhoeus (1.4.1)
136
138
  ethon (>= 0.9.0)
137
139
  tzinfo (2.0.6)
138
140
  concurrent-ruby (~> 1.0)
139
141
  unicode-display_width (2.4.2)
140
- websocket (1.2.9)
142
+ websocket (1.2.10)
141
143
  websocket-driver (0.7.6)
142
144
  websocket-extensions (>= 0.1.0)
143
145
  websocket-extensions (0.1.5)
@@ -65,14 +65,23 @@ module Forki
65
65
  graphql_objects.any? do |graphql_object| # if any GraphQL objects contain the top-level keys above, return true
66
66
  return true unless graphql_object.fetch("image", nil).nil? # so long as the associated values are not nil
67
67
  return true unless graphql_object.fetch("currMedia", nil).nil?
68
+ return true unless graphql_object.fetch("photo_image", nil).nil?
68
69
 
69
70
  # This is a complicated form for `web.facebook.com` posts
70
-
71
71
  if !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil?
72
72
  if graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].count.positive?
73
73
  return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "all_subattachments", "nodes")&.first&.dig("media", "image", "uri").nil?
74
74
  end
75
75
  end
76
+
77
+ # Another weird format
78
+ begin
79
+ if !graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].empty?
80
+ return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "media", "photo_image", "uri").nil?
81
+ end
82
+ rescue StandardError
83
+
84
+ end
76
85
  end
77
86
  end
78
87
 
@@ -157,7 +166,7 @@ module Forki
157
166
  graphql_object_array = graphql_strings.map { |graphql_string| JSON.parse(graphql_string) }
158
167
 
159
168
  # Once in awhile it's really easy
160
- video_objects = graphql_object_array.filter {|go| go.has_key?("video") }
169
+ video_objects = graphql_object_array.filter { |go| go.has_key?("video") }
161
170
 
162
171
  if VideoSieve.can_process_with_sieve?(graphql_object_array)
163
172
  # Eventually all of this complexity will be replaced with this
@@ -170,9 +179,15 @@ module Forki
170
179
  return extract_video_post_data_alternative(graphql_object_array) if story_node_object.nil?
171
180
 
172
181
  if story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"].key?("media")
173
- video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["video"]
174
- creation_date = video_object["publish_time"] if video_object&.has_key("publish_time")
175
- creation_date = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"] if creation_date.nil?
182
+ media_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]
183
+ if media_object.has_key?("video")
184
+ video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["video"]
185
+ elsif media_object.has_key?("media") && media_object["media"].has_key?("browser_native_sd_url")
186
+ video_object = media_object["media"]
187
+ end
188
+
189
+ creation_date = video_object["publish_time"] if video_object&.has_key?("publish_time")
190
+ creation_date = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["publish_time"] if creation_date.nil?
176
191
  elsif story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"].key?("style_infos")
177
192
  # For "Reels" we need a separate way to parse this
178
193
  video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["style_infos"].first["fb_shorts_story"]["short_form_video_context"]["playback_video"]
@@ -181,13 +196,20 @@ module Forki
181
196
  raise "Unable to parse video object" if video_objects.empty?
182
197
  end
183
198
 
184
- feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
199
+ begin
200
+ feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
201
+ rescue NoMethodError
202
+ feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["comet_feed_ufi_container"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]
203
+ end
204
+
185
205
  if feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"].key?("cannot_see_top_custom_reactions")
186
206
  reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
187
207
  else
188
208
  reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["top_reactions"])
189
209
  end
190
210
 
211
+ feedback_object = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]
212
+
191
213
  share_count_object = feedback_object.fetch("share_count", {})
192
214
 
193
215
  if story_node_object["comet_sections"]["content"]["story"]["comet_sections"].key? "message"
@@ -202,20 +224,31 @@ module Forki
202
224
  else
203
225
  num_comments = feedback_object["comment_list_renderer"]["feedback"]["total_comment_count"]
204
226
  end
227
+
228
+ view_count = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"]
229
+ reshare_warning = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
230
+ elsif feedback_object.has_key?("comments_count_summary_renderer")
231
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
232
+
233
+ view_count = feedback_object["video_view_count"]
234
+ reshare_warning = feedback_object["should_show_reshare_warning"]
205
235
  else
206
236
  if feedback_object["feedback"].key?("comment_count")
207
237
  num_comments = feedback_object["feedback"]["comment_count"]["total_count"]
208
238
  else
209
239
  num_comments = feedback_object["feedback"]["total_comment_count"]
210
240
  end
241
+
242
+ view_count = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"]
243
+ reshare_warning = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
211
244
  end
212
245
 
213
246
  post_details = {
214
247
  id: video_object["id"],
215
248
  num_comments: num_comments,
216
249
  num_shares: share_count_object.fetch("count", nil),
217
- num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
218
- reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
250
+ num_views: view_count,
251
+ reshare_warning: reshare_warning,
219
252
  video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
220
253
  video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
221
254
  text: text,
@@ -242,9 +275,15 @@ module Forki
242
275
 
243
276
  share_count_object = feedback_object.fetch("share_count", {})
244
277
 
278
+ if feedback_object["comments_count_summary_renderer"]["feedback"].has_key?("comment_rendering_instance")
279
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
280
+ else
281
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"]
282
+ end
283
+
245
284
  post_details = {
246
285
  id: video_object["id"],
247
- num_comments: feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
286
+ num_comments: num_comments,
248
287
  num_shares: share_count_object.fetch("count", nil),
249
288
  num_views: feedback_object["video_view_count"],
250
289
  reshare_warning: feedback_object["should_show_reshare_warning"],
@@ -265,31 +304,55 @@ module Forki
265
304
  # Extracts data from an image post by parsing GraphQL strings as seen in the video post scraper above
266
305
  def extract_image_post_data(graphql_object_array)
267
306
  # This is a weird one-off style
307
+
268
308
  graphql_object = graphql_object_array.find { |graphql_object| !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil? }
269
- unless graphql_object.nil? || graphql_object.count == 0
309
+ unless graphql_object.nil? || graphql_object.count.zero?
310
+ # TODO: These two branches are *super* similar, probably a lot of overlap
270
311
  attachments = graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"]
271
312
 
272
- if graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"].has_key?("cannot_see_top_custom_reactions")
273
- reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
313
+ if graphql_object["node"]["comet_sections"]["feedback"]["story"].key?("feedback_context")
314
+ feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
315
+ elsif graphql_object["node"]["comet_sections"]["feedback"]["story"].has_key?("comet_feed_ufi_container")
316
+ feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["comet_feed_ufi_container"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
274
317
  else
275
- reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["top_reactions"])
318
+ feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
319
+ end
320
+
321
+
322
+ if feedback_object.has_key?("cannot_see_top_custom_reactions")
323
+ reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
324
+ else
325
+ reaction_counts = extract_reaction_counts(feedback_object["top_reactions"])
276
326
  end
277
327
 
278
328
  id = graphql_object["node"]["post_id"]
279
- num_comments = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["share_count"]["count"]
280
- reshare_warning = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
281
- image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
329
+ num_comments = feedback_object["share_count"]["count"]
330
+ reshare_warning = feedback_object["should_show_reshare_warning"]
331
+
332
+ if attachments.first["styles"]["attachment"].key?("all_subattachments")
333
+ image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
334
+ else
335
+ image_url = attachments.first["styles"]["attachment"]["media"]["photo_image"]["uri"]
336
+ end
337
+
282
338
  text = graphql_object["node"]["comet_sections"]["content"]["story"]["message"]["text"]
283
339
  profile_link = graphql_object["node"]["comet_sections"]["content"]["story"]["actors"].first["url"]
284
340
  created_at = graphql_object["node"]["comet_sections"]["content"]["story"]["comet_sections"]["context_layout"]["story"]["comet_sections"]["metadata"].first["story"]["creation_time"]
285
341
  has_video = false
286
342
  else
287
-
288
343
  graphql_object_array.find { |graphql_object| graphql_object.key?("viewer_actor") && graphql_object.key?("display_comments") }
289
344
  curr_media_object = graphql_object_array.find { |graphql_object| graphql_object.key?("currMedia") }
290
345
  creation_story_object = graphql_object_array.find { |graphql_object| graphql_object.key?("creation_story") && graphql_object.key?("message") }
291
346
 
292
347
  feedback_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("comet_ufi_summary_and_actions_renderer") }["comet_ufi_summary_and_actions_renderer"]["feedback"]
348
+
349
+ if feedback_object.key?("top_reactions")
350
+ feedback_object = feedback_object
351
+ else
352
+ # POSSIBLY OUT OF DATE
353
+ feedback_object = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]
354
+ end
355
+
293
356
  share_count_object = feedback_object.fetch("share_count", {})
294
357
 
295
358
  poster = creation_story_object["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]
@@ -301,16 +364,21 @@ module Forki
301
364
  end
302
365
 
303
366
  id = curr_media_object["currMedia"]["id"],
367
+
304
368
  num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
305
- num_shares = share_count_object.fetch("count", nil),
306
- reshare_warning = feedback_object["should_show_reshare_warning"],
307
- image_url = curr_media_object["currMedia"]["image"]["uri"],
308
- text = (creation_story_object["message"] || {}).fetch("text", nil),
309
- profile_link = poster["url"],
310
- created_at = curr_media_object["currMedia"]["created_time"],
311
- has_video = false
369
+ if num_comments.nil? && feedback_object.has_key?("comments_count_summary_renderer")
370
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
371
+ end
312
372
 
373
+ num_shares = share_count_object.fetch("count", nil)
374
+ reshare_warning = feedback_object["should_show_reshare_warning"]
375
+ image_url = curr_media_object["currMedia"]["image"]["uri"]
376
+ text = (creation_story_object["message"] || {}).fetch("text", nil)
377
+ profile_link = poster["url"]
378
+ created_at = curr_media_object["currMedia"]["created_time"]
379
+ has_video = false
313
380
  end
381
+
314
382
  post_details = {
315
383
  id: id,
316
384
  num_comments: num_comments,
@@ -457,4 +525,3 @@ module Forki
457
525
  end
458
526
 
459
527
  require_relative "sieves/video_sieves/video_sieve"
460
-
data/lib/forki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Forki
4
- VERSION = "0.2.5"
4
+ VERSION = "0.2.6"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: forki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - ''
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-12-14 00:00:00.000000000 Z
11
+ date: 2024-04-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara