forki 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd276bc782515b3b7935eab759da6cf2acdafc146798a56e3e21ee667358ad3c
4
- data.tar.gz: 532d2c06542a0bdfe4e22f1a0b2ec3cf3ded6057b30de93386fad442bfa686ab
3
+ metadata.gz: cf93e965787eaf05b26f6ea1377775fb61ed131a52458a371336474d09e4a639
4
+ data.tar.gz: 729e9409bf76eb8551913f64e02d3905a878d057f045e16a64f712926d0d5cc8
5
5
  SHA512:
6
- metadata.gz: 46aaf4eef616f99ca44eac48134b151af41e1755b808c2efef18fdca905956d8f5cb72d54e61070f64ae883b8e64e88f5adfb94f0d41a166eae136f0474133df
7
- data.tar.gz: 36c8bc7c506c952c036f38eb655522fad7a6b4cd6bf7c3de11e78e60aec4f5c3f124d5449e1aa40425a22cacee71e7c393239092a7e6dbf057a7e46e90457f1a
6
+ metadata.gz: 6d710aee0bb1ae64c3796de31f85a9e39f26791bd12a653785544099d9e10629a3a239f69dff5702b80b009d1e3b6c9abfef50109e7078dcb70194f9f5c65384
7
+ data.tar.gz: 0b34b2dceaeff07c844e9fc8b42f27bfe3fbaae2c631bfe80f605f1c7caa821f0387780b37cc1bdb9b6735c20a967ba51e2d6a58a0a0478ee624520ab060402d
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- forki (0.2.1)
4
+ forki (0.2.5)
5
5
  apparition
6
6
  capybara
7
7
  oj
@@ -29,12 +29,13 @@ GEM
29
29
  i18n (>= 1.6, < 2)
30
30
  minitest (>= 5.1)
31
31
  tzinfo (~> 2.0)
32
- addressable (2.8.4)
32
+ addressable (2.8.6)
33
33
  public_suffix (>= 2.0.2, < 6.0)
34
34
  apparition (0.6.0)
35
35
  capybara (~> 3.13, < 4)
36
36
  websocket-driver (>= 0.6.5)
37
37
  ast (2.4.2)
38
+ bigdecimal (3.1.5)
38
39
  builder (3.2.4)
39
40
  byebug (11.1.3)
40
41
  capybara (3.39.2)
@@ -53,7 +54,7 @@ GEM
53
54
  erubi (1.12.0)
54
55
  ethon (0.16.0)
55
56
  ffi (>= 1.15.0)
56
- ffi (1.15.5)
57
+ ffi (1.16.3)
57
58
  i18n (1.13.0)
58
59
  concurrent-ruby (~> 1.0)
59
60
  json (2.6.3)
@@ -62,15 +63,16 @@ GEM
62
63
  nokogiri (>= 1.12.0)
63
64
  matrix (0.4.2)
64
65
  method_source (1.0.0)
65
- mini_mime (1.1.2)
66
+ mini_mime (1.1.5)
66
67
  minitest (5.18.0)
67
68
  nokogiri (1.15.1-arm64-darwin)
68
69
  racc (~> 1.4)
69
- oj (3.15.1)
70
+ oj (3.16.3)
71
+ bigdecimal (>= 3.0)
70
72
  parallel (1.23.0)
71
73
  parser (3.2.2.1)
72
74
  ast (~> 2.4.1)
73
- public_suffix (5.0.3)
75
+ public_suffix (5.0.4)
74
76
  racc (1.6.2)
75
77
  rack (2.2.4)
76
78
  rack-test (2.1.0)
@@ -90,7 +92,7 @@ GEM
90
92
  rainbow (3.1.1)
91
93
  rake (13.0.6)
92
94
  regexp_parser (2.8.0)
93
- rexml (3.2.5)
95
+ rexml (3.2.6)
94
96
  rubocop (1.51.0)
95
97
  json (~> 2.3)
96
98
  parallel (~> 1.10)
@@ -127,17 +129,17 @@ GEM
127
129
  rubocop-rails (~> 2.0)
128
130
  ruby-progressbar (1.13.0)
129
131
  rubyzip (2.3.2)
130
- selenium-webdriver (4.11.0)
132
+ selenium-webdriver (4.16.0)
131
133
  rexml (~> 3.2, >= 3.2.5)
132
134
  rubyzip (>= 1.2.2, < 3.0)
133
135
  websocket (~> 1.0)
134
136
  thor (1.2.2)
135
- typhoeus (1.4.0)
137
+ typhoeus (1.4.1)
136
138
  ethon (>= 0.9.0)
137
139
  tzinfo (2.0.6)
138
140
  concurrent-ruby (~> 1.0)
139
141
  unicode-display_width (2.4.2)
140
- websocket (1.2.9)
142
+ websocket (1.2.10)
141
143
  websocket-driver (0.7.6)
142
144
  websocket-extensions (>= 0.1.0)
143
145
  websocket-extensions (0.1.5)
@@ -65,14 +65,26 @@ module Forki
65
65
  graphql_objects.any? do |graphql_object| # if any GraphQL objects contain the top-level keys above, return true
66
66
  return true unless graphql_object.fetch("image", nil).nil? # so long as the associated values are not nil
67
67
  return true unless graphql_object.fetch("currMedia", nil).nil?
68
+ return true unless graphql_object.fetch("photo_image", nil).nil?
68
69
 
69
70
  # This is a complicated form for `web.facebook.com` posts
70
-
71
71
  if !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil?
72
72
  if graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].count.positive?
73
73
  return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "all_subattachments", "nodes")&.first&.dig("media", "image", "uri").nil?
74
+
75
+ # Another version I guess
76
+ return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "media", "large_share_image")&.dig("uri").nil?
74
77
  end
75
78
  end
79
+
80
+ # Another weird format
81
+ begin
82
+ if !graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].empty?
83
+ return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "media", "photo_image", "uri").nil?
84
+ end
85
+ rescue StandardError
86
+
87
+ end
76
88
  end
77
89
  end
78
90
 
@@ -157,7 +169,7 @@ module Forki
157
169
  graphql_object_array = graphql_strings.map { |graphql_string| JSON.parse(graphql_string) }
158
170
 
159
171
  # Once in awhile it's really easy
160
- video_objects = graphql_object_array.filter {|go| go.has_key?("video") }
172
+ video_objects = graphql_object_array.filter { |go| go.has_key?("video") }
161
173
 
162
174
  if VideoSieve.can_process_with_sieve?(graphql_object_array)
163
175
  # Eventually all of this complexity will be replaced with this
@@ -170,9 +182,15 @@ module Forki
170
182
  return extract_video_post_data_alternative(graphql_object_array) if story_node_object.nil?
171
183
 
172
184
  if story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"].key?("media")
173
- video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["video"]
174
- creation_date = video_object["publish_time"] if video_object&.has_key("publish_time")
175
- creation_date = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"] if creation_date.nil?
185
+ media_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]
186
+ if media_object.has_key?("video")
187
+ video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["video"]
188
+ elsif media_object.has_key?("media") && media_object["media"].has_key?("browser_native_sd_url")
189
+ video_object = media_object["media"]
190
+ end
191
+
192
+ creation_date = video_object["publish_time"] if video_object&.has_key?("publish_time")
193
+ creation_date = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["publish_time"] if creation_date.nil?
176
194
  elsif story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"].key?("style_infos")
177
195
  # For "Reels" we need a separate way to parse this
178
196
  video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["style_infos"].first["fb_shorts_story"]["short_form_video_context"]["playback_video"]
@@ -181,13 +199,20 @@ module Forki
181
199
  raise "Unable to parse video object" if video_objects.empty?
182
200
  end
183
201
 
184
- feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
202
+ begin
203
+ feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
204
+ rescue NoMethodError
205
+ feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["comet_feed_ufi_container"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]
206
+ end
207
+
185
208
  if feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"].key?("cannot_see_top_custom_reactions")
186
209
  reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
187
210
  else
188
211
  reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["top_reactions"])
189
212
  end
190
213
 
214
+ feedback_object = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]
215
+
191
216
  share_count_object = feedback_object.fetch("share_count", {})
192
217
 
193
218
  if story_node_object["comet_sections"]["content"]["story"]["comet_sections"].key? "message"
@@ -202,20 +227,31 @@ module Forki
202
227
  else
203
228
  num_comments = feedback_object["comment_list_renderer"]["feedback"]["total_comment_count"]
204
229
  end
230
+
231
+ view_count = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"]
232
+ reshare_warning = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
233
+ elsif feedback_object.has_key?("comments_count_summary_renderer")
234
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
235
+
236
+ view_count = feedback_object["video_view_count"]
237
+ reshare_warning = feedback_object["should_show_reshare_warning"]
205
238
  else
206
239
  if feedback_object["feedback"].key?("comment_count")
207
240
  num_comments = feedback_object["feedback"]["comment_count"]["total_count"]
208
241
  else
209
242
  num_comments = feedback_object["feedback"]["total_comment_count"]
210
243
  end
244
+
245
+ view_count = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"]
246
+ reshare_warning = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
211
247
  end
212
248
 
213
249
  post_details = {
214
250
  id: video_object["id"],
215
251
  num_comments: num_comments,
216
252
  num_shares: share_count_object.fetch("count", nil),
217
- num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
218
- reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
253
+ num_views: view_count,
254
+ reshare_warning: reshare_warning,
219
255
  video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
220
256
  video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
221
257
  text: text,
@@ -242,9 +278,15 @@ module Forki
242
278
 
243
279
  share_count_object = feedback_object.fetch("share_count", {})
244
280
 
281
+ if feedback_object["comments_count_summary_renderer"]["feedback"].has_key?("comment_rendering_instance")
282
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
283
+ else
284
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"]
285
+ end
286
+
245
287
  post_details = {
246
288
  id: video_object["id"],
247
- num_comments: feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
289
+ num_comments: num_comments,
248
290
  num_shares: share_count_object.fetch("count", nil),
249
291
  num_views: feedback_object["video_view_count"],
250
292
  reshare_warning: feedback_object["should_show_reshare_warning"],
@@ -265,31 +307,64 @@ module Forki
265
307
  # Extracts data from an image post by parsing GraphQL strings as seen in the video post scraper above
266
308
  def extract_image_post_data(graphql_object_array)
267
309
  # This is a weird one-off style
310
+
268
311
  graphql_object = graphql_object_array.find { |graphql_object| !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil? }
269
- unless graphql_object.nil? || graphql_object.count == 0
312
+ unless graphql_object.nil? || graphql_object.count.zero?
313
+ # TODO: These two branches are *super* similar, probably a lot of overlap
270
314
  attachments = graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"]
271
315
 
272
- if graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"].has_key?("cannot_see_top_custom_reactions")
273
- reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
316
+ if graphql_object["node"]["comet_sections"]["feedback"]["story"].key?("feedback_context")
317
+ feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
318
+ elsif graphql_object["node"]["comet_sections"]["feedback"]["story"].has_key?("comet_feed_ufi_container")
319
+ feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["comet_feed_ufi_container"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
320
+ else
321
+ feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
322
+ end
323
+
324
+ if feedback_object.has_key?("cannot_see_top_custom_reactions")
325
+ reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
274
326
  else
275
- reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["top_reactions"])
327
+ reaction_counts = extract_reaction_counts(feedback_object["top_reactions"])
276
328
  end
277
329
 
278
330
  id = graphql_object["node"]["post_id"]
279
- num_comments = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["share_count"]["count"]
280
- reshare_warning = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
281
- image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
331
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
332
+ reshare_warning = feedback_object["should_show_reshare_warning"]
333
+
334
+ if attachments.first["styles"]["attachment"].key?("all_subattachments")
335
+ image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
336
+ else
337
+ image_url = attachments.first.dig("styles", "attachment", "media", "photo_image", "uri")
338
+
339
+ if image_url.nil?
340
+ image_url = attachments.first["styles"]["attachment"]["media"]["large_share_image"]["uri"]
341
+ end
342
+ end
343
+
282
344
  text = graphql_object["node"]["comet_sections"]["content"]["story"]["message"]["text"]
283
345
  profile_link = graphql_object["node"]["comet_sections"]["content"]["story"]["actors"].first["url"]
284
- created_at = graphql_object["node"]["comet_sections"]["content"]["story"]["comet_sections"]["context_layout"]["story"]["comet_sections"]["metadata"].first["story"]["creation_time"]
346
+
347
+ unless graphql_object["node"]["comet_sections"].dig("content", "story", "comet_sections", "context_layout", "story", "comet_sections", "metadata").nil?
348
+ created_at = graphql_object["node"]["comet_sections"].dig("content", "story", "comet_sections", "context_layout", "story", "comet_sections", "metadata")&.first["story"]["creation_time"]
349
+ else
350
+ created_at = graphql_object["node"]["comet_sections"]["context_layout"]["story"]["comet_sections"]["metadata"].first["story"]["creation_time"]
351
+ end
352
+
285
353
  has_video = false
286
354
  else
287
-
288
355
  graphql_object_array.find { |graphql_object| graphql_object.key?("viewer_actor") && graphql_object.key?("display_comments") }
289
356
  curr_media_object = graphql_object_array.find { |graphql_object| graphql_object.key?("currMedia") }
290
357
  creation_story_object = graphql_object_array.find { |graphql_object| graphql_object.key?("creation_story") && graphql_object.key?("message") }
291
358
 
292
359
  feedback_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("comet_ufi_summary_and_actions_renderer") }["comet_ufi_summary_and_actions_renderer"]["feedback"]
360
+
361
+ if feedback_object.key?("top_reactions")
362
+ feedback_object = feedback_object
363
+ else
364
+ # POSSIBLY OUT OF DATE
365
+ feedback_object = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]
366
+ end
367
+
293
368
  share_count_object = feedback_object.fetch("share_count", {})
294
369
 
295
370
  poster = creation_story_object["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]
@@ -301,16 +376,21 @@ module Forki
301
376
  end
302
377
 
303
378
  id = curr_media_object["currMedia"]["id"],
379
+
304
380
  num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
305
- num_shares = share_count_object.fetch("count", nil),
306
- reshare_warning = feedback_object["should_show_reshare_warning"],
307
- image_url = curr_media_object["currMedia"]["image"]["uri"],
308
- text = (creation_story_object["message"] || {}).fetch("text", nil),
309
- profile_link = poster["url"],
310
- created_at = curr_media_object["currMedia"]["created_time"],
311
- has_video = false
381
+ if num_comments.nil? && feedback_object.has_key?("comments_count_summary_renderer")
382
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
383
+ end
312
384
 
385
+ num_shares = share_count_object.fetch("count", nil)
386
+ reshare_warning = feedback_object["should_show_reshare_warning"]
387
+ image_url = curr_media_object["currMedia"]["image"]["uri"]
388
+ text = (creation_story_object["message"] || {}).fetch("text", nil)
389
+ profile_link = poster["url"]
390
+ created_at = curr_media_object["currMedia"]["created_time"]
391
+ has_video = false
313
392
  end
393
+
314
394
  post_details = {
315
395
  id: id,
316
396
  num_comments: num_comments,
@@ -457,4 +537,3 @@ module Forki
457
537
  end
458
538
 
459
539
  require_relative "sieves/video_sieves/video_sieve"
460
-
@@ -4,7 +4,6 @@ class ImageSieve
4
4
  end
5
5
 
6
6
  def self.sieve_for_graphql_objects(graphql_objects)
7
-
8
7
  sieve = sieve_class_for_graphql_objects(graphql_objects)
9
8
  return nil if sieve.nil?
10
9
 
@@ -19,7 +18,6 @@ private
19
18
  end
20
19
  end
21
20
 
22
-
23
- Dir['./lib/forki/scrapers/sieves/image_sieves/*.rb'].each do |file|
21
+ Dir["./lib/forki/scrapers/sieves/image_sieves/*.rb"].each do |file|
24
22
  require file unless file.end_with?("image_sieve.rb")
25
23
  end
data/lib/forki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Forki
4
- VERSION = "0.2.5"
4
+ VERSION = "0.2.7"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: forki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - ''
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-12-14 00:00:00.000000000 Z
11
+ date: 2024-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -165,7 +165,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0'
167
167
  requirements: []
168
- rubygems_version: 3.4.20
168
+ rubygems_version: 3.5.9
169
169
  signing_key:
170
170
  specification_version: 4
171
171
  summary: A gem to scrape Facebook pages for archive purposes.