forki 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 412d3450d71c6ec80cbf868cf005545ed437a3bb06532cb383af0fa65088baaf
4
- data.tar.gz: 78707c68f6c49f65f755c4df5edc08585aacb05dd1f22311fca734285e3f74a4
3
+ metadata.gz: f1c8304ed1312aa0e5a7f2f8f5ad2de9a407c761fb7ab9212d66465f599caf6f
4
+ data.tar.gz: e35a13415b85b9bd57cd9028841c61a324e4011e1a449cd102fe470015ed670d
5
5
  SHA512:
6
- metadata.gz: bde0f077222e99c19da6ac3a9b1dad259d21fa147fc18d6ba52a3e9706aa4ef4408379b59a5c95e2e6b9d5377b1b8272b78ba934f903caa45db94a6da031c803
7
- data.tar.gz: 325224a93a9183695c54024f0de22d2882aa04998fd6d31e2db4f6226c217ebd44de4fd140f3ebdd06bb5edb2ff01b2b0d74f70c2a804915461091fc81482d68
6
+ metadata.gz: 4179d91435cdc9c82c39c92a15f3df4e7e437f250eccf01e79cf9cb631c326b76badaed3e5851cc26c91713c0cdc27feba56127e8d8c647525960286efd50412
7
+ data.tar.gz: b5f8ca4461440a785d544017fc1dbc72e9edfdc1d6924717813cf7cd1bfb67fa8695ecc072d00fd476d55eee3ecedca710c107a11bbfe89e8d4b067f443040ae
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- forki (0.1.4)
4
+ forki (0.2.1)
5
5
  apparition
6
6
  capybara
7
7
  oj
@@ -127,7 +127,7 @@ GEM
127
127
  rubocop-rails (~> 2.0)
128
128
  ruby-progressbar (1.13.0)
129
129
  rubyzip (2.3.2)
130
- selenium-webdriver (4.10.0)
130
+ selenium-webdriver (4.11.0)
131
131
  rexml (~> 3.2, >= 3.2.5)
132
132
  rubyzip (>= 1.2.2, < 3.0)
133
133
  websocket (~> 1.0)
@@ -29,7 +29,7 @@ class ImageSieve<%= camel_name %>Test < Minitest::Test
29
29
  assert_equal false, result[:reshare_warning]
30
30
  assert_not_nil result[:video_preview_image_url]
31
31
  assert_not_nil result[:video_url]
32
- assert_equal nil, text
32
+ assert_equal nil, result[:text]
33
33
  assert_equal 1654989063, result[:created_at]
34
34
  assert_equal nil, result[:profile_link]
35
35
  assert_equal false, result[:has_video]
@@ -28,7 +28,7 @@ class VideoSieve<%= camel_name %>Test < Minitest::Test
28
28
  assert_equal false, result[:reshare_warning]
29
29
  assert_not_nil result[:video_preview_image_url]
30
30
  assert_not_nil result[:video_url]
31
- assert_equal nil, text
31
+ assert_equal nil, result[:text]
32
32
  assert_equal 1654989063, result[:created_at]
33
33
  assert_equal nil, result[:profile_link]
34
34
  assert_equal true, result[:has_video]
@@ -15,7 +15,7 @@ module Forki
15
15
  views_pattern = /[0-9MK, ]+Views/
16
16
  spans = all("span")
17
17
  views_span = spans.find { |s| s.text(:all) =~ views_pattern }
18
- extract_int_from_num_element(views_span)
18
+ Scraper.extract_int_from_num_element(views_span)
19
19
  end
20
20
 
21
21
  def extract_post_data(graphql_strings)
@@ -215,7 +215,7 @@ module Forki
215
215
 
216
216
  def extract_video_post_data_alternative(graphql_object_array)
217
217
  sidepane_object = graphql_object_array.find { |graphql_object| graphql_object.key?("tahoe_sidepane_renderer") }
218
- video_object = graphql_object_array.find { |graphql_object| graphql_object.keys == ["video"] }
218
+ video_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("video") }
219
219
  feedback_object = sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]
220
220
  reaction_counts = extract_reaction_counts(sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
221
221
  share_count_object = feedback_object.fetch("share_count", {})
@@ -209,7 +209,7 @@ module Forki
209
209
  # Extracts an integer out of a string describing a number
210
210
  # e.g. "4K Comments" returns 4000
211
211
  # e.g. "131 Shares" returns 131
212
- def extract_int_from_num_element(element)
212
+ def self.extract_int_from_num_element(element)
213
213
  return unless element
214
214
 
215
215
  if element.class != String # if an html element was passed in
@@ -14,7 +14,7 @@ class VideoSieve
14
14
  private
15
15
 
16
16
  def self.sieve_class_for_graphql_objects(graphql_objects)
17
- sieves = [VideoSieveWatchTab, VideoSieveVideoPage]
17
+ sieves = [VideoSieveWatchTab, VideoSieveVideoPage, VideoSieveReel]
18
18
  sieves.detect { |sieve| sieve.check(graphql_objects) }
19
19
  end
20
20
  end
@@ -0,0 +1,78 @@
1
+ class VideoSieveReel < VideoSieve
2
+ # To check if it's valid for the inputted graphql objects
3
+ def self.check(graphql_objects)
4
+ video_object = self.extractor(graphql_objects)
5
+
6
+ return false unless video_object.has_key?("short_form_video_context")
7
+ true
8
+ rescue StandardError
9
+ return false
10
+ end
11
+
12
+ # output the expected format of:
13
+ #
14
+ # post_details = {
15
+ # id: video_object["id"],
16
+ # num_comments: num_comments,
17
+ # num_shares: share_count_object.fetch("count", nil),
18
+ # num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
19
+ # reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
20
+ # video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
21
+ # video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
22
+ # text: text,
23
+ # created_at: creation_date,
24
+ # profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
25
+ # has_video: true
26
+ # }
27
+ # post_details[:video_preview_image_file] = Forki.retrieve_media(post_details[:video_preview_image_url])
28
+ # post_details[:video_file] = Forki.retrieve_media(post_details[:video_url])
29
+ # post_details[:reactions] = reaction_counts
30
+
31
+ def self.sieve(graphql_objects)
32
+ video_object = self.extractor(graphql_objects)
33
+
34
+
35
+ feedback_object = graphql_objects.filter do |go|
36
+ go = go.first if go.kind_of?(Array) && !go.empty?
37
+ !go.dig("feedback", "top_level_comments").nil?
38
+ end.first
39
+
40
+ reels_feedback_renderer = graphql_objects.filter do |go|
41
+ go.dig("reels_feedback_renderer")
42
+ end.first
43
+
44
+ reels_feedback_renderer["reels_feedback_renderer"]["story"]
45
+ reshare_warning = video_object["short_form_video_context"]["playback_video"].dig("warning_screen_renderer", "cix_screen", "view_model", "__typename") == "OverlayWarningScreenViewModel"
46
+
47
+ video_preview_image_url = video_object["short_form_video_context"]["playback_video"]["preferred_thumbnail"]["image"]["uri"]
48
+ video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_hd_url"] || video_object["short_form_video_context"]["playback_video"]["browser_native_sd_url"]
49
+
50
+ post_details = {
51
+ id: video_object["short_form_video_context"]["video"]["id"],
52
+ num_comments: feedback_object["feedback"]["top_level_comments"]["totalCountIncludingReplies"],
53
+ num_shared: Forki::Scraper.extract_int_from_num_element(feedback_object["feedback"]["share_count_reduced"]),
54
+ num_views: nil,
55
+ reshare_warning: reshare_warning,
56
+ video_preview_image_url: video_preview_image_url,
57
+ video_url: video_url,
58
+ text: nil, # Reels don't have text
59
+ created_at: JSON.parse(feedback_object["tracking"])["page_insights"].first[1]["post_context"]["publish_time"], # Yea, this is weird
60
+ profile_link: video_object["short_form_video_context"]["video_owner"]["url"],
61
+ has_video: true,
62
+ video_preview_image_file: Forki.retrieve_media(video_preview_image_url),
63
+ video_file: Forki.retrieve_media(video_url),
64
+ reactions: nil # Only available on comments it seems? Look into this again sometime
65
+ }
66
+ end
67
+
68
+ private
69
+
70
+ def self.extractor(graphql_objects)
71
+ video_objects = graphql_objects.filter do |go|
72
+ go = go.first if go.kind_of?(Array) && !go.empty?
73
+ go.has_key?("video")
74
+ end
75
+
76
+ video_objects.first.dig("video", "creation_story")
77
+ end
78
+ end
@@ -10,7 +10,7 @@ module Forki
10
10
 
11
11
  return nil if number_of_likes_match.nil?
12
12
 
13
- extract_int_from_num_element(number_of_likes_match.named_captures["num_likes"])
13
+ Scraper.extract_int_from_num_element(number_of_likes_match.named_captures["num_likes"])
14
14
  end
15
15
 
16
16
  # Finds and returns the number of people who follow the current page
@@ -21,12 +21,12 @@ module Forki
21
21
 
22
22
  return nil if number_of_followers_match.nil?
23
23
 
24
- number_of_followers = extract_int_from_num_element(number_of_followers_match.named_captures["num_followers"])
24
+ number_of_followers = Scraper.extract_int_from_num_element(number_of_followers_match.named_captures["num_followers"])
25
25
 
26
26
  # Note, this is sticking around if we want to use it later
27
27
  # if number_of_followers.nil?
28
28
  # number_of_followers_string = JSON.parse(profile_header_str)["user"]["profile_header_renderer"]["user"]["profile_social_context"]["content"].first["text"]["text"]
29
- # number_of_followers = extract_int_from_num_element(number_of_followers_string)
29
+ # number_of_followers = Scraper.extract_int_from_num_element(number_of_followers_string)
30
30
  # end
31
31
 
32
32
  number_of_followers
@@ -36,7 +36,7 @@ module Forki
36
36
  followers_string = profile_followers_node["node"]["timeline_context_item"]["renderer"]["context_item"]["title"]["text"]
37
37
  followers_pattern = /[0-9,]+/
38
38
  number_of_followers_match = followers_pattern.match(followers_string).to_s
39
- extract_int_from_num_element(number_of_followers_match)
39
+ Scraper.extract_int_from_num_element(number_of_followers_match)
40
40
  end
41
41
 
42
42
  # Returns a hash of details about a Facebook user profile
data/lib/forki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Forki
4
- VERSION = "0.2.1"
4
+ VERSION = "0.2.3"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: forki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - ''
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-01 00:00:00.000000000 Z
11
+ date: 2023-08-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -126,6 +126,7 @@ files:
126
126
  - lib/forki/scrapers/scraper.rb
127
127
  - lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb
128
128
  - lib/forki/scrapers/sieves/video_sieves/video_sieve.rb
129
+ - lib/forki/scrapers/sieves/video_sieves/video_sieve_reel.rb
129
130
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page.rb
130
131
  - lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb
131
132
  - lib/forki/scrapers/user_scraper.rb