forki 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f4868fd61b98809521249982e057f69d69018e8defba25792382f44e06b1921a
4
- data.tar.gz: 1c700275aa5fafc19e4f6b42845e5d52c02e2bc3e6cd3f1082e1a59c5c4f472d
3
+ metadata.gz: cd276bc782515b3b7935eab759da6cf2acdafc146798a56e3e21ee667358ad3c
4
+ data.tar.gz: 532d2c06542a0bdfe4e22f1a0b2ec3cf3ded6057b30de93386fad442bfa686ab
5
5
  SHA512:
6
- metadata.gz: b5967db4c8e10b9d626767f041b2f59627170a49ceb4dc47e6da11f439a668bc656550e9b0c6a3a6a52180c8e75637ab2cbfb8a3c1b007bb91c4a5280e21aabd
7
- data.tar.gz: 8230ec7b913fca196e255820066efab1c520e42ac38818652cb252966a16b8ce915f5f1224606cd5eddb81968fcdbd9f37506e028c641bcad2a402536174d321
6
+ metadata.gz: 46aaf4eef616f99ca44eac48134b151af41e1755b808c2efef18fdca905956d8f5cb72d54e61070f64ae883b8e64e88f5adfb94f0d41a166eae136f0474133df
7
+ data.tar.gz: 36c8bc7c506c952c036f38eb655522fad7a6b4cd6bf7c3de11e78e60aec4f5c3f124d5449e1aa40425a22cacee71e7c393239092a7e6dbf057a7e46e90457f1a
@@ -182,7 +182,12 @@ module Forki
182
182
  end
183
183
 
184
184
  feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
185
- reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
185
+ if feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"].key?("cannot_see_top_custom_reactions")
186
+ reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
187
+ else
188
+ reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["top_reactions"])
189
+ end
190
+
186
191
  share_count_object = feedback_object.fetch("share_count", {})
187
192
 
188
193
  if story_node_object["comet_sections"]["content"]["story"]["comet_sections"].key? "message"
@@ -191,8 +196,19 @@ module Forki
191
196
  text = ""
192
197
  end
193
198
 
194
- feedback_object["comment_list_renderer"]["feedback"]["comment_count"]["total_count"]
195
- num_comments = feedback_object.has_key?("comment_list_renderer") ? feedback_object["comment_list_renderer"]["feedback"]["comment_count"]["total_count"] : feedback_object["comment_count"]["total_count"]
199
+ if feedback_object.has_key?("comment_list_renderer")
200
+ if feedback_object["comment_list_renderer"]["feedback"].key?("comment_count")
201
+ num_comments = feedback_object["comment_list_renderer"]["feedback"]["comment_count"]["total_count"]
202
+ else
203
+ num_comments = feedback_object["comment_list_renderer"]["feedback"]["total_comment_count"]
204
+ end
205
+ else
206
+ if feedback_object["feedback"].key?("comment_count")
207
+ num_comments = feedback_object["feedback"]["comment_count"]["total_count"]
208
+ else
209
+ num_comments = feedback_object["feedback"]["total_comment_count"]
210
+ end
211
+ end
196
212
 
197
213
  post_details = {
198
214
  id: video_object["id"],
@@ -217,7 +233,13 @@ module Forki
217
233
  sidepane_object = graphql_object_array.find { |graphql_object| graphql_object.key?("tahoe_sidepane_renderer") }
218
234
  video_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("video") }
219
235
  feedback_object = sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]
220
- reaction_counts = extract_reaction_counts(sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
236
+
237
+ if sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"].key?("cannot_see_top_custom_reactions")
238
+ reaction_counts = extract_reaction_counts(sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
239
+ else # if the video has no reactions, it will have a different structure
240
+ reaction_counts = extract_reaction_counts(sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]["top_reactions"])
241
+ end
242
+
221
243
  share_count_object = feedback_object.fetch("share_count", {})
222
244
 
223
245
  post_details = {
@@ -247,7 +269,12 @@ module Forki
247
269
  unless graphql_object.nil? || graphql_object.count == 0
248
270
  attachments = graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"]
249
271
 
250
- reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
272
+ if graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"].has_key?("cannot_see_top_custom_reactions")
273
+ reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
274
+ else
275
+ reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["top_reactions"])
276
+ end
277
+
251
278
  id = graphql_object["node"]["post_id"]
252
279
  num_comments = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["share_count"]["count"]
253
280
  reshare_warning = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
@@ -267,7 +294,12 @@ module Forki
267
294
 
268
295
  poster = creation_story_object["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]
269
296
 
270
- reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
297
+ if feedback_object.has_key?("cannot_see_top_custom_reactions")
298
+ reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
299
+ else
300
+ reaction_counts = extract_reaction_counts(feedback_object["top_reactions"])
301
+ end
302
+
271
303
  id = curr_media_object["currMedia"]["id"],
272
304
  num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
273
305
  num_shares = share_count_object.fetch("count", nil),
@@ -303,7 +335,12 @@ module Forki
303
335
  (graphql_string.include?("live_status")) })
304
336
  video_permalink = creation_story_object["creation_story"]["shareable"]["url"].delete("\\")
305
337
  media_object = video_object["video"]["story"]["attachments"][0]["media"]
306
- reaction_counts = extract_reaction_counts(creation_story_object["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
338
+
339
+ if creation_story_object["feedback"].key?("cannot_see_top_custom_reactions")
340
+ reaction_counts = extract_reaction_counts(creation_story_object["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
341
+ else
342
+ reaction_counts = extract_reaction_counts(creation_story_object["feedback"]["top_reactions"])
343
+ end
307
344
 
308
345
  post_details = {
309
346
  id: video_object["id"],
@@ -331,7 +368,11 @@ module Forki
331
368
  (graphql.include? "creation_story") })["video"]["creation_story"]
332
369
  media_object = JSON.parse(graphql_strings.find { |graphql| graphql.include? "playable_url" })["video"]["creation_story"]["attachments"][0]["media"]
333
370
  video_permalink = creation_story_object["shareable"]["url"].delete("\\")
334
- reaction_counts = extract_reaction_counts(creation_story_object["feedback_context"]["feedback_target_with_context"]["cannot_see_top_custom_reactions"]["top_reactions"])
371
+ if creation_story_object["feedback_context"]["feedback_target_with_context"].key?("cannot_see_top_custom_reactions")
372
+ reaction_counts = extract_reaction_counts(creation_story_object["feedback_context"]["feedback_target_with_context"]["cannot_see_top_custom_reactions"]["top_reactions"])
373
+ else
374
+ reaction_counts = extract_reaction_counts(creation_story_object["feedback_context"]["feedback_target_with_context"]["top_reactions"])
375
+ end
335
376
 
336
377
  post_details = {
337
378
  id: creation_story_object["shareable"]["id"],
@@ -14,7 +14,7 @@ class VideoSieve
14
14
  private
15
15
 
16
16
  def self.sieve_class_for_graphql_objects(graphql_objects)
17
- sieves = [VideoSieveWatchTab, VideoSieveVideoPage, VideoSieveReel]
17
+ sieves = [VideoSieveWatchTab, VideoSieveVideoPage, VideoSieveVideoPage2, VideoSieveReel, VideoSieveReel2]
18
18
  sieves.detect { |sieve| sieve.check(graphql_objects) }
19
19
  end
20
20
  end
@@ -4,6 +4,15 @@ class VideoSieveReel < VideoSieve
4
4
  video_object = self.extractor(graphql_objects)
5
5
 
6
6
  return false unless video_object.has_key?("short_form_video_context")
7
+
8
+ # In relation to video_sieve_reel_2
9
+ comment_count = graphql_objects.filter do |go|
10
+ go = go.first if go.kind_of?(Array) && !go.empty?
11
+ !go.dig("feedback", "total_comment_count").nil?
12
+ end.first
13
+
14
+ return false unless comment_count.nil?
15
+
7
16
  true
8
17
  rescue StandardError
9
18
  return false
@@ -0,0 +1,88 @@
1
+ class VideoSieveReel2 < VideoSieve
2
+ # To check if it's valid for the inputted graphql objects
3
+ def self.check(graphql_objects)
4
+ video_object = self.extractor(graphql_objects)
5
+
6
+ return false unless video_object.has_key?("short_form_video_context")
7
+
8
+ comment_count = graphql_objects.filter do |go|
9
+ go = go.first if go.kind_of?(Array) && !go.empty?
10
+ !go.dig("feedback", "total_comment_count").nil?
11
+ end.first
12
+
13
+ return false if comment_count.nil?
14
+
15
+ true
16
+ rescue StandardError
17
+ return false
18
+ end
19
+
20
+ # output the expected format of:
21
+ #
22
+ # post_details = {
23
+ # id: video_object["id"],
24
+ # num_comments: num_comments,
25
+ # num_shares: share_count_object.fetch("count", nil),
26
+ # num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
27
+ # reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
28
+ # video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
29
+ # video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
30
+ # text: text,
31
+ # created_at: creation_date,
32
+ # profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
33
+ # has_video: true
34
+ # }
35
+ # post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
36
+ # post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
37
+ # post_details[:reactions] = reaction_counts
38
+
39
+ def self.sieve(graphql_objects)
40
+ video_object = self.extractor(graphql_objects)
41
+
42
+
43
+ feedback_object = graphql_objects.filter do |go|
44
+ go = go.first if go.kind_of?(Array) && !go.empty?
45
+ !go.dig("feedback", "total_comment_count").nil?
46
+ end.first
47
+
48
+ reels_feedback_renderer = graphql_objects.filter do |go|
49
+ go.dig("reels_feedback_renderer")
50
+ end.first
51
+
52
+ reels_feedback_renderer["reels_feedback_renderer"]["story"]
53
+ reshare_warning = video_object["short_form_video_context"]["playback_video"].dig("warning_screen_renderer", "cix_screen", "view_model", "__typename") == "OverlayWarningScreenViewModel"
54
+
55
+ video_preview_image_url = video_object["short_form_video_context"]["playback_video"]["preferred_thumbnail"]["image"]["uri"]
56
+ video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_hd_url"] || video_object["short_form_video_context"]["playback_video"]["browser_native_sd_url"]
57
+
58
+ post_details = {
59
+ id: video_object["short_form_video_context"]["video"]["id"],
60
+ num_comments: feedback_object["feedback"]["total_comment_count"],
61
+ num_shared: Forki::Scraper.extract_int_from_num_element(feedback_object["feedback"]["share_count_reduced"]),
62
+ num_views: nil,
63
+ reshare_warning: reshare_warning,
64
+ video_preview_image_url: video_preview_image_url,
65
+ video_url: video_url,
66
+ text: nil, # Reels don't have text
67
+ created_at: video_object["creation_time"],
68
+ profile_link: video_object["short_form_video_context"]["video_owner"]["url"],
69
+ has_video: true,
70
+ video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
71
+ video_file: Forki.retrieve_media(video_url),
72
+ reactions: nil # Only available on comments it seems? Look into this again sometime
73
+ }
74
+ rescue StandardError => e
75
+ debugger
76
+ end
77
+
78
+ private
79
+
80
+ def self.extractor(graphql_objects)
81
+ video_objects = graphql_objects.filter do |go|
82
+ go = go.first if go.kind_of?(Array) && !go.empty?
83
+ go.has_key?("video")
84
+ end
85
+
86
+ video_objects.first.dig("video", "creation_story")
87
+ end
88
+ end
@@ -4,9 +4,13 @@ class VideoSieveVideoPage < VideoSieve
4
4
  story_node_object = self.extractor(graphql_objects) # This will error out
5
5
  return false unless story_node_object["content"]["story"]["attachments"].first["styles"]["attachment"].has_key?("media")
6
6
 
7
+ feedback_object = story_node_object["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
8
+ # This is what differs from video_sieve_video_page_2.rb, where this key is unnested
9
+ return false unless feedback_object.has_key?("cannot_see_top_custom_reactions")
10
+
7
11
  true
8
- rescue StandardError
9
- return false
12
+ rescue StandardError => e
13
+ false
10
14
  end
11
15
 
12
16
  # output the expected format of:
@@ -39,7 +43,7 @@ class VideoSieveVideoPage < VideoSieve
39
43
  video_url = video_object["browser_native_hd_url"]
40
44
  video_url = video_object["browser_native_sd_url"] if video_url.nil?
41
45
 
42
- post_details = {
46
+ {
43
47
  id: video_object["id"],
44
48
  num_comments: feedback_object["total_comment_count"],
45
49
  num_shared: feedback_object["share_count"]["count"],
@@ -0,0 +1,70 @@
1
+ class VideoSieveVideoPage2 < VideoSieve
2
+ # To check if it's valid for the inputted graphql objects
3
+ def self.check(graphql_objects)
4
+ story_node_object = self.extractor(graphql_objects) # This will error out
5
+ return false unless story_node_object["content"]["story"]["attachments"].first["styles"]["attachment"].has_key?("media")
6
+
7
+ feedback_object = story_node_object["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
8
+ # This is what differs from video_sieve_video_page.rb, where this key is nested further
9
+ return false unless feedback_object.has_key?("top_reactions")
10
+
11
+ true
12
+ rescue StandardError
13
+ false
14
+ end
15
+
16
+ # output the expected format of:
17
+ #
18
+ # post_details = {
19
+ # id: video_object["id"],
20
+ # num_comments: num_comments,
21
+ # num_shares: share_count_object.fetch("count", nil),
22
+ # num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
23
+ # reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
24
+ # video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
25
+ # video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
26
+ # text: text,
27
+ # created_at: creation_date,
28
+ # profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
29
+ # has_video: true
30
+ # }
31
+ # post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
32
+ # post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
33
+ # post_details[:reactions] = reaction_counts
34
+
35
+ def self.sieve(graphql_objects)
36
+ extracted_text = self.extractor(graphql_objects)
37
+
38
+ story_object = extracted_text["content"]["story"]
39
+ video_object = extracted_text["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]
40
+ feedback_object = extracted_text["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
41
+
42
+ video_preview_image_url = video_object["preferred_thumbnail"]["image"]["uri"]
43
+ video_url = video_object["browser_native_hd_url"]
44
+ video_url = video_object["browser_native_sd_url"] if video_url.nil?
45
+
46
+ {
47
+ id: video_object["id"],
48
+ num_comments: feedback_object["total_comment_count"],
49
+ num_shared: feedback_object["share_count"]["count"],
50
+ num_views: nil,
51
+ reshare_warning: feedback_object["should_show_reshare_warning"],
52
+ video_preview_image_url: video_preview_image_url,
53
+ video_url: video_url,
54
+ text: story_object["message"]["text"],
55
+ created_at: video_object["publish_time"],
56
+ profile_link: story_object["actors"].first["url"],
57
+ has_video: true,
58
+ video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
59
+ video_file: Forki.retrieve_media(video_url),
60
+ reactions: feedback_object["top_reactions"]["edges"]
61
+ }
62
+ end
63
+
64
+ private
65
+
66
+ def self.extractor(graphql_objects)
67
+ story_node_object = graphql_objects.find { |graphql_object| graphql_object.key? "node" }&.fetch("node", nil) # user posted video
68
+ story_node_object["comet_sections"]
69
+ end
70
+ end
@@ -57,6 +57,12 @@ class VideoSieveWatchTab < VideoSieve
57
57
  profile_link = filtered_json["attachments"].first["media"]["creation_story"]["comet_sections"]["title"]["story"]["actors"].first["url"]
58
58
  end
59
59
 
60
+ if feedback_object.key?("cannot_see_top_custom_reactions")
61
+ reactions = feedback_object["cannot_see_top_custom_reactions"]["top_reactions"]["edges"]
62
+ else
63
+ reactions = feedback_object["top_reactions"]["edges"]
64
+ end
65
+
60
66
  post_details = {
61
67
  id: video_object.dig("shareable", "id") || video_object["attachments"].first["media"]["id"],
62
68
  num_comments: feedback_object["total_comment_count"],
@@ -71,7 +77,7 @@ class VideoSieveWatchTab < VideoSieve
71
77
  has_video: true,
72
78
  video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
73
79
  video_file: Forki.retrieve_media(video_url),
74
- reactions: feedback_object["cannot_see_top_custom_reactions"]["top_reactions"]["edges"]
80
+ reactions: reactions
75
81
  }
76
82
  end
77
83
 
data/lib/forki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Forki
4
- VERSION = "0.2.4"
4
+ VERSION = "0.2.5"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: forki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - ''
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-12 00:00:00.000000000 Z
11
+ date: 2023-12-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -127,7 +127,9 @@ files:
127
127
  - lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb
128
128
  - lib/forki/scrapers/sieves/video_sieves/video_sieve.rb
129
129
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_reel.rb
130
+ - lib/forki/scrapers/sieves/video_sieves/video_sieve_reel_2.rb
130
131
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page.rb
132
+ - lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page_2.rb
131
133
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb
132
134
  - lib/forki/scrapers/user_scraper.rb
133
135
  - lib/forki/user.rb