forki 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f4868fd61b98809521249982e057f69d69018e8defba25792382f44e06b1921a
4
- data.tar.gz: 1c700275aa5fafc19e4f6b42845e5d52c02e2bc3e6cd3f1082e1a59c5c4f472d
3
+ metadata.gz: cd276bc782515b3b7935eab759da6cf2acdafc146798a56e3e21ee667358ad3c
4
+ data.tar.gz: 532d2c06542a0bdfe4e22f1a0b2ec3cf3ded6057b30de93386fad442bfa686ab
5
5
  SHA512:
6
- metadata.gz: b5967db4c8e10b9d626767f041b2f59627170a49ceb4dc47e6da11f439a668bc656550e9b0c6a3a6a52180c8e75637ab2cbfb8a3c1b007bb91c4a5280e21aabd
7
- data.tar.gz: 8230ec7b913fca196e255820066efab1c520e42ac38818652cb252966a16b8ce915f5f1224606cd5eddb81968fcdbd9f37506e028c641bcad2a402536174d321
6
+ metadata.gz: 46aaf4eef616f99ca44eac48134b151af41e1755b808c2efef18fdca905956d8f5cb72d54e61070f64ae883b8e64e88f5adfb94f0d41a166eae136f0474133df
7
+ data.tar.gz: 36c8bc7c506c952c036f38eb655522fad7a6b4cd6bf7c3de11e78e60aec4f5c3f124d5449e1aa40425a22cacee71e7c393239092a7e6dbf057a7e46e90457f1a
@@ -182,7 +182,12 @@ module Forki
182
182
  end
183
183
 
184
184
  feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
185
- reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
185
+ if feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"].key?("cannot_see_top_custom_reactions")
186
+ reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
187
+ else
188
+ reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["top_reactions"])
189
+ end
190
+
186
191
  share_count_object = feedback_object.fetch("share_count", {})
187
192
 
188
193
  if story_node_object["comet_sections"]["content"]["story"]["comet_sections"].key? "message"
@@ -191,8 +196,19 @@ module Forki
191
196
  text = ""
192
197
  end
193
198
 
194
- feedback_object["comment_list_renderer"]["feedback"]["comment_count"]["total_count"]
195
- num_comments = feedback_object.has_key?("comment_list_renderer") ? feedback_object["comment_list_renderer"]["feedback"]["comment_count"]["total_count"] : feedback_object["comment_count"]["total_count"]
199
+ if feedback_object.has_key?("comment_list_renderer")
200
+ if feedback_object["comment_list_renderer"]["feedback"].key?("comment_count")
201
+ num_comments = feedback_object["comment_list_renderer"]["feedback"]["comment_count"]["total_count"]
202
+ else
203
+ num_comments = feedback_object["comment_list_renderer"]["feedback"]["total_comment_count"]
204
+ end
205
+ else
206
+ if feedback_object["feedback"].key?("comment_count")
207
+ num_comments = feedback_object["feedback"]["comment_count"]["total_count"]
208
+ else
209
+ num_comments = feedback_object["feedback"]["total_comment_count"]
210
+ end
211
+ end
196
212
 
197
213
  post_details = {
198
214
  id: video_object["id"],
@@ -217,7 +233,13 @@ module Forki
217
233
  sidepane_object = graphql_object_array.find { |graphql_object| graphql_object.key?("tahoe_sidepane_renderer") }
218
234
  video_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("video") }
219
235
  feedback_object = sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]
220
- reaction_counts = extract_reaction_counts(sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
236
+
237
+ if sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"].key?("cannot_see_top_custom_reactions")
238
+ reaction_counts = extract_reaction_counts(sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
239
+ else # if the video has no reactions, it will have a different structure
240
+ reaction_counts = extract_reaction_counts(sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]["top_reactions"])
241
+ end
242
+
221
243
  share_count_object = feedback_object.fetch("share_count", {})
222
244
 
223
245
  post_details = {
@@ -247,7 +269,12 @@ module Forki
247
269
  unless graphql_object.nil? || graphql_object.count == 0
248
270
  attachments = graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"]
249
271
 
250
- reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
272
+ if graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"].has_key?("cannot_see_top_custom_reactions")
273
+ reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
274
+ else
275
+ reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["top_reactions"])
276
+ end
277
+
251
278
  id = graphql_object["node"]["post_id"]
252
279
  num_comments = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["share_count"]["count"]
253
280
  reshare_warning = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
@@ -267,7 +294,12 @@ module Forki
267
294
 
268
295
  poster = creation_story_object["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]
269
296
 
270
- reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
297
+ if feedback_object.has_key?("cannot_see_top_custom_reactions")
298
+ reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
299
+ else
300
+ reaction_counts = extract_reaction_counts(feedback_object["top_reactions"])
301
+ end
302
+
271
303
  id = curr_media_object["currMedia"]["id"],
272
304
  num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
273
305
  num_shares = share_count_object.fetch("count", nil),
@@ -303,7 +335,12 @@ module Forki
303
335
  (graphql_string.include?("live_status")) })
304
336
  video_permalink = creation_story_object["creation_story"]["shareable"]["url"].delete("\\")
305
337
  media_object = video_object["video"]["story"]["attachments"][0]["media"]
306
- reaction_counts = extract_reaction_counts(creation_story_object["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
338
+
339
+ if creation_story_object["feedback"].key?("cannot_see_top_custom_reactions")
340
+ reaction_counts = extract_reaction_counts(creation_story_object["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
341
+ else
342
+ reaction_counts = extract_reaction_counts(creation_story_object["feedback"]["top_reactions"])
343
+ end
307
344
 
308
345
  post_details = {
309
346
  id: video_object["id"],
@@ -331,7 +368,11 @@ module Forki
331
368
  (graphql.include? "creation_story") })["video"]["creation_story"]
332
369
  media_object = JSON.parse(graphql_strings.find { |graphql| graphql.include? "playable_url" })["video"]["creation_story"]["attachments"][0]["media"]
333
370
  video_permalink = creation_story_object["shareable"]["url"].delete("\\")
334
- reaction_counts = extract_reaction_counts(creation_story_object["feedback_context"]["feedback_target_with_context"]["cannot_see_top_custom_reactions"]["top_reactions"])
371
+ if creation_story_object["feedback_context"]["feedback_target_with_context"].key?("cannot_see_top_custom_reactions")
372
+ reaction_counts = extract_reaction_counts(creation_story_object["feedback_context"]["feedback_target_with_context"]["cannot_see_top_custom_reactions"]["top_reactions"])
373
+ else
374
+ reaction_counts = extract_reaction_counts(creation_story_object["feedback_context"]["feedback_target_with_context"]["top_reactions"])
375
+ end
335
376
 
336
377
  post_details = {
337
378
  id: creation_story_object["shareable"]["id"],
@@ -14,7 +14,7 @@ class VideoSieve
14
14
  private
15
15
 
16
16
  def self.sieve_class_for_graphql_objects(graphql_objects)
17
- sieves = [VideoSieveWatchTab, VideoSieveVideoPage, VideoSieveReel]
17
+ sieves = [VideoSieveWatchTab, VideoSieveVideoPage, VideoSieveVideoPage2, VideoSieveReel, VideoSieveReel2]
18
18
  sieves.detect { |sieve| sieve.check(graphql_objects) }
19
19
  end
20
20
  end
@@ -4,6 +4,15 @@ class VideoSieveReel < VideoSieve
4
4
  video_object = self.extractor(graphql_objects)
5
5
 
6
6
  return false unless video_object.has_key?("short_form_video_context")
7
+
8
+ # In relation to video_sieve_reel_2
9
+ comment_count = graphql_objects.filter do |go|
10
+ go = go.first if go.kind_of?(Array) && !go.empty?
11
+ !go.dig("feedback", "total_comment_count").nil?
12
+ end.first
13
+
14
+ return false unless comment_count.nil?
15
+
7
16
  true
8
17
  rescue StandardError
9
18
  return false
@@ -0,0 +1,88 @@
1
+ class VideoSieveReel2 < VideoSieve
2
+ # To check if it's valid for the inputted graphql objects
3
+ def self.check(graphql_objects)
4
+ video_object = self.extractor(graphql_objects)
5
+
6
+ return false unless video_object.has_key?("short_form_video_context")
7
+
8
+ comment_count = graphql_objects.filter do |go|
9
+ go = go.first if go.kind_of?(Array) && !go.empty?
10
+ !go.dig("feedback", "total_comment_count").nil?
11
+ end.first
12
+
13
+ return false if comment_count.nil?
14
+
15
+ true
16
+ rescue StandardError
17
+ return false
18
+ end
19
+
20
+ # output the expected format of:
21
+ #
22
+ # post_details = {
23
+ # id: video_object["id"],
24
+ # num_comments: num_comments,
25
+ # num_shares: share_count_object.fetch("count", nil),
26
+ # num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
27
+ # reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
28
+ # video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
29
+ # video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
30
+ # text: text,
31
+ # created_at: creation_date,
32
+ # profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
33
+ # has_video: true
34
+ # }
35
+ # post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
36
+ # post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
37
+ # post_details[:reactions] = reaction_counts
38
+
39
+ def self.sieve(graphql_objects)
40
+ video_object = self.extractor(graphql_objects)
41
+
42
+
43
+ feedback_object = graphql_objects.filter do |go|
44
+ go = go.first if go.kind_of?(Array) && !go.empty?
45
+ !go.dig("feedback", "total_comment_count").nil?
46
+ end.first
47
+
48
+ reels_feedback_renderer = graphql_objects.filter do |go|
49
+ go.dig("reels_feedback_renderer")
50
+ end.first
51
+
52
+ reels_feedback_renderer["reels_feedback_renderer"]["story"]
53
+ reshare_warning = video_object["short_form_video_context"]["playback_video"].dig("warning_screen_renderer", "cix_screen", "view_model", "__typename") == "OverlayWarningScreenViewModel"
54
+
55
+ video_preview_image_url = video_object["short_form_video_context"]["playback_video"]["preferred_thumbnail"]["image"]["uri"]
56
+ video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_hd_url"] || video_object["short_form_video_context"]["playback_video"]["browser_native_sd_url"]
57
+
58
+ post_details = {
59
+ id: video_object["short_form_video_context"]["video"]["id"],
60
+ num_comments: feedback_object["feedback"]["total_comment_count"],
61
+ num_shared: Forki::Scraper.extract_int_from_num_element(feedback_object["feedback"]["share_count_reduced"]),
62
+ num_views: nil,
63
+ reshare_warning: reshare_warning,
64
+ video_preview_image_url: video_preview_image_url,
65
+ video_url: video_url,
66
+ text: nil, # Reels don't have text
67
+ created_at: video_object["creation_time"],
68
+ profile_link: video_object["short_form_video_context"]["video_owner"]["url"],
69
+ has_video: true,
70
+ video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
71
+ video_file: Forki.retrieve_media(video_url),
72
+ reactions: nil # Only available on comments it seems? Look into this again sometime
73
+ }
74
+ rescue StandardError => e
75
+ debugger
76
+ end
77
+
78
+ private
79
+
80
+ def self.extractor(graphql_objects)
81
+ video_objects = graphql_objects.filter do |go|
82
+ go = go.first if go.kind_of?(Array) && !go.empty?
83
+ go.has_key?("video")
84
+ end
85
+
86
+ video_objects.first.dig("video", "creation_story")
87
+ end
88
+ end
@@ -4,9 +4,13 @@ class VideoSieveVideoPage < VideoSieve
4
4
  story_node_object = self.extractor(graphql_objects) # This will error out
5
5
  return false unless story_node_object["content"]["story"]["attachments"].first["styles"]["attachment"].has_key?("media")
6
6
 
7
+ feedback_object = story_node_object["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
8
+ # This is what differs from video_sieve_video_page_2.rb, where this key is unnested
9
+ return false unless feedback_object.has_key?("cannot_see_top_custom_reactions")
10
+
7
11
  true
8
- rescue StandardError
9
- return false
12
+ rescue StandardError => e
13
+ false
10
14
  end
11
15
 
12
16
  # output the expected format of:
@@ -39,7 +43,7 @@ class VideoSieveVideoPage < VideoSieve
39
43
  video_url = video_object["browser_native_hd_url"]
40
44
  video_url = video_object["browser_native_sd_url"] if video_url.nil?
41
45
 
42
- post_details = {
46
+ {
43
47
  id: video_object["id"],
44
48
  num_comments: feedback_object["total_comment_count"],
45
49
  num_shared: feedback_object["share_count"]["count"],
@@ -0,0 +1,70 @@
1
+ class VideoSieveVideoPage2 < VideoSieve
2
+ # To check if it's valid for the inputted graphql objects
3
+ def self.check(graphql_objects)
4
+ story_node_object = self.extractor(graphql_objects) # This will error out
5
+ return false unless story_node_object["content"]["story"]["attachments"].first["styles"]["attachment"].has_key?("media")
6
+
7
+ feedback_object = story_node_object["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
8
+ # This is what differs from video_sieve_video_page.rb, where this key is nested further
9
+ return false unless feedback_object.has_key?("top_reactions")
10
+
11
+ true
12
+ rescue StandardError
13
+ false
14
+ end
15
+
16
+ # output the expected format of:
17
+ #
18
+ # post_details = {
19
+ # id: video_object["id"],
20
+ # num_comments: num_comments,
21
+ # num_shares: share_count_object.fetch("count", nil),
22
+ # num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
23
+ # reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
24
+ # video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
25
+ # video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
26
+ # text: text,
27
+ # created_at: creation_date,
28
+ # profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
29
+ # has_video: true
30
+ # }
31
+ # post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
32
+ # post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
33
+ # post_details[:reactions] = reaction_counts
34
+
35
+ def self.sieve(graphql_objects)
36
+ extracted_text = self.extractor(graphql_objects)
37
+
38
+ story_object = extracted_text["content"]["story"]
39
+ video_object = extracted_text["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]
40
+ feedback_object = extracted_text["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
41
+
42
+ video_preview_image_url = video_object["preferred_thumbnail"]["image"]["uri"]
43
+ video_url = video_object["browser_native_hd_url"]
44
+ video_url = video_object["browser_native_sd_url"] if video_url.nil?
45
+
46
+ {
47
+ id: video_object["id"],
48
+ num_comments: feedback_object["total_comment_count"],
49
+ num_shared: feedback_object["share_count"]["count"],
50
+ num_views: nil,
51
+ reshare_warning: feedback_object["should_show_reshare_warning"],
52
+ video_preview_image_url: video_preview_image_url,
53
+ video_url: video_url,
54
+ text: story_object["message"]["text"],
55
+ created_at: video_object["publish_time"],
56
+ profile_link: story_object["actors"].first["url"],
57
+ has_video: true,
58
+ video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
59
+ video_file: Forki.retrieve_media(video_url),
60
+ reactions: feedback_object["top_reactions"]["edges"]
61
+ }
62
+ end
63
+
64
+ private
65
+
66
+ def self.extractor(graphql_objects)
67
+ story_node_object = graphql_objects.find { |graphql_object| graphql_object.key? "node" }&.fetch("node", nil) # user posted video
68
+ story_node_object["comet_sections"]
69
+ end
70
+ end
@@ -57,6 +57,12 @@ class VideoSieveWatchTab < VideoSieve
57
57
  profile_link = filtered_json["attachments"].first["media"]["creation_story"]["comet_sections"]["title"]["story"]["actors"].first["url"]
58
58
  end
59
59
 
60
+ if feedback_object.key?("cannot_see_top_custom_reactions")
61
+ reactions = feedback_object["cannot_see_top_custom_reactions"]["top_reactions"]["edges"]
62
+ else
63
+ reactions = feedback_object["top_reactions"]["edges"]
64
+ end
65
+
60
66
  post_details = {
61
67
  id: video_object.dig("shareable", "id") || video_object["attachments"].first["media"]["id"],
62
68
  num_comments: feedback_object["total_comment_count"],
@@ -71,7 +77,7 @@ class VideoSieveWatchTab < VideoSieve
71
77
  has_video: true,
72
78
  video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
73
79
  video_file: Forki.retrieve_media(video_url),
74
- reactions: feedback_object["cannot_see_top_custom_reactions"]["top_reactions"]["edges"]
80
+ reactions: reactions
75
81
  }
76
82
  end
77
83
 
data/lib/forki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Forki
4
- VERSION = "0.2.4"
4
+ VERSION = "0.2.5"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: forki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - ''
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-12 00:00:00.000000000 Z
11
+ date: 2023-12-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -127,7 +127,9 @@ files:
127
127
  - lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb
128
128
  - lib/forki/scrapers/sieves/video_sieves/video_sieve.rb
129
129
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_reel.rb
130
+ - lib/forki/scrapers/sieves/video_sieves/video_sieve_reel_2.rb
130
131
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page.rb
132
+ - lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page_2.rb
131
133
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb
132
134
  - lib/forki/scrapers/user_scraper.rb
133
135
  - lib/forki/user.rb