forki 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +10 -8
- data/lib/forki/scrapers/post_scraper.rb +20 -7
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb +20 -7
- data/lib/forki/version.rb +1 -1
- data/lib/forki.rb +2 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be729f6e73f3a233ef2337076e907a5f62abd6d82541d38cf59ac4cf97ba99ae
|
4
|
+
data.tar.gz: 8cd062ac0cf5688f60fd1d2afd6718013f7bd1a5a037194ee01135f9bb890f15
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f0f6002e321608fc472775ddeef2b476336ad2f104d7a82063c9b8d05ec15f6e795d09dfe67a9990653fb7297c9f82dc200032285143ef43a178b458801669ad
|
7
|
+
data.tar.gz: a8e5a9311783f97d6acbb628025ee6d1513ebdffe1e957b80658a3bf03f8261109fc50d4c7d765b4557046fd03288ce39627cc84deab774b6ac68023aa782dc3
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
forki (0.2.
|
4
|
+
forki (0.2.8)
|
5
5
|
apparition
|
6
6
|
capybara
|
7
7
|
oj
|
@@ -35,14 +35,15 @@ GEM
|
|
35
35
|
capybara (~> 3.13, < 4)
|
36
36
|
websocket-driver (>= 0.6.5)
|
37
37
|
ast (2.4.2)
|
38
|
-
|
38
|
+
base64 (0.2.0)
|
39
|
+
bigdecimal (3.1.8)
|
39
40
|
builder (3.2.4)
|
40
41
|
byebug (11.1.3)
|
41
|
-
capybara (3.
|
42
|
+
capybara (3.40.0)
|
42
43
|
addressable
|
43
44
|
matrix
|
44
45
|
mini_mime (>= 0.1.3)
|
45
|
-
nokogiri (~> 1.
|
46
|
+
nokogiri (~> 1.11)
|
46
47
|
rack (>= 1.6.0)
|
47
48
|
rack-test (>= 0.6.3)
|
48
49
|
regexp_parser (>= 1.5, < 3.0)
|
@@ -54,7 +55,7 @@ GEM
|
|
54
55
|
erubi (1.12.0)
|
55
56
|
ethon (0.16.0)
|
56
57
|
ffi (>= 1.15.0)
|
57
|
-
ffi (1.
|
58
|
+
ffi (1.17.0-arm64-darwin)
|
58
59
|
i18n (1.13.0)
|
59
60
|
concurrent-ruby (~> 1.0)
|
60
61
|
json (2.6.3)
|
@@ -67,12 +68,12 @@ GEM
|
|
67
68
|
minitest (5.18.0)
|
68
69
|
nokogiri (1.15.1-arm64-darwin)
|
69
70
|
racc (~> 1.4)
|
70
|
-
oj (3.16.
|
71
|
+
oj (3.16.4)
|
71
72
|
bigdecimal (>= 3.0)
|
72
73
|
parallel (1.23.0)
|
73
74
|
parser (3.2.2.1)
|
74
75
|
ast (~> 2.4.1)
|
75
|
-
public_suffix (5.
|
76
|
+
public_suffix (5.1.1)
|
76
77
|
racc (1.6.2)
|
77
78
|
rack (2.2.4)
|
78
79
|
rack-test (2.1.0)
|
@@ -129,7 +130,8 @@ GEM
|
|
129
130
|
rubocop-rails (~> 2.0)
|
130
131
|
ruby-progressbar (1.13.0)
|
131
132
|
rubyzip (2.3.2)
|
132
|
-
selenium-webdriver (4.
|
133
|
+
selenium-webdriver (4.21.1)
|
134
|
+
base64 (~> 0.2)
|
133
135
|
rexml (~> 3.2, >= 3.2.5)
|
134
136
|
rubyzip (>= 1.2.2, < 3.0)
|
135
137
|
websocket (~> 1.0)
|
@@ -37,7 +37,9 @@ module Forki
|
|
37
37
|
elsif post_has_image
|
38
38
|
extract_image_post_data(graphql_objects)
|
39
39
|
else
|
40
|
-
|
40
|
+
extract_image_post_data(graphql_objects)
|
41
|
+
|
42
|
+
#raise UnhandledContentError
|
41
43
|
end
|
42
44
|
end
|
43
45
|
|
@@ -202,7 +204,11 @@ module Forki
|
|
202
204
|
begin
|
203
205
|
feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
|
204
206
|
rescue NoMethodError
|
205
|
-
|
207
|
+
begin
|
208
|
+
feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["comet_feed_ufi_container"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]
|
209
|
+
rescue NoMethodError
|
210
|
+
feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]
|
211
|
+
end
|
206
212
|
end
|
207
213
|
|
208
214
|
if feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"].key?("cannot_see_top_custom_reactions")
|
@@ -313,8 +319,14 @@ module Forki
|
|
313
319
|
# TODO: These two branches are *super* similar, probably a lot of overlap
|
314
320
|
attachments = graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"]
|
315
321
|
|
316
|
-
if graphql_object["node"]["comet_sections"]["feedback"]["story"].
|
317
|
-
feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["
|
322
|
+
if graphql_object["node"]["comet_sections"]["feedback"]["story"].has_key?("story_ufi_container")
|
323
|
+
feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
|
324
|
+
elsif graphql_object["node"]["comet_sections"]["feedback"]["story"].dig("feedback_context")
|
325
|
+
begin
|
326
|
+
feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
|
327
|
+
rescue NoMethodError
|
328
|
+
debugger
|
329
|
+
end
|
318
330
|
elsif graphql_object["node"]["comet_sections"]["feedback"]["story"].has_key?("comet_feed_ufi_container")
|
319
331
|
feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["comet_feed_ufi_container"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
|
320
332
|
else
|
@@ -331,13 +343,13 @@ module Forki
|
|
331
343
|
num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
|
332
344
|
reshare_warning = feedback_object["should_show_reshare_warning"]
|
333
345
|
|
334
|
-
if attachments.first["styles"]["attachment"]
|
346
|
+
if attachments.count.positive? && attachments.first["styles"]["attachment"]&.key?("all_subattachments")
|
335
347
|
image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
|
336
348
|
else
|
337
|
-
image_url = attachments.first
|
349
|
+
image_url = attachments.first&.dig("styles", "attachment", "media", "photo_image", "uri")
|
338
350
|
|
339
351
|
if image_url.nil?
|
340
|
-
image_url = attachments.first
|
352
|
+
image_url = attachments.first&.dig("styles", "attachment", "media", "large_share_image", "uri")
|
341
353
|
end
|
342
354
|
end
|
343
355
|
|
@@ -502,6 +514,7 @@ module Forki
|
|
502
514
|
graphql_strings = find_graphql_data_strings(page.html)
|
503
515
|
|
504
516
|
post_data = extract_post_data(graphql_strings)
|
517
|
+
|
505
518
|
post_data[:url] = url
|
506
519
|
user_url = post_data[:profile_link]
|
507
520
|
|
@@ -41,8 +41,12 @@ class VideoSieveWatchTab < VideoSieve
|
|
41
41
|
def self.sieve(graphql_objects)
|
42
42
|
video_object = self.extractor(graphql_objects)
|
43
43
|
|
44
|
-
video_url = video_object["attachments"].first["media"]["browser_native_sd_url"]
|
45
|
-
|
44
|
+
# video_url = video_object["attachments"].first["media"]["browser_native_sd_url"]
|
45
|
+
video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_hd_url"]
|
46
|
+
video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_sd_url"] if video_url.nil?
|
47
|
+
|
48
|
+
# video_preview_image_url = video_object["attachments"].first["media"]["preferred_thumbnail"]["image"]["uri"]
|
49
|
+
video_preview_image_url = video_object["short_form_video_context"]["video"]["first_frame_thumbnail"]
|
46
50
|
|
47
51
|
if !video_object["feedback_context"].nil?
|
48
52
|
feedback_object = video_object["feedback_context"]["feedback_target_with_context"]
|
@@ -51,16 +55,25 @@ class VideoSieveWatchTab < VideoSieve
|
|
51
55
|
feedback_object = feedback_object["feedback"] if feedback_object.has_key?("feedback")
|
52
56
|
end
|
53
57
|
|
54
|
-
|
58
|
+
begin
|
59
|
+
profile_link = video_object["attachments"].first["media"]["owner"]["url"]
|
60
|
+
rescue StandardError => e
|
61
|
+
profile_link = video_object["short_form_video_context"]["video_owner"]["url"]
|
62
|
+
end
|
63
|
+
|
55
64
|
if profile_link.nil?
|
56
65
|
filtered_json = graphql_objects.find { |go| go.has_key? "attachments" }
|
57
66
|
profile_link = filtered_json["attachments"].first["media"]["creation_story"]["comet_sections"]["title"]["story"]["actors"].first["url"]
|
58
67
|
end
|
59
68
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
69
|
+
begin
|
70
|
+
if feedback_object.key?("cannot_see_top_custom_reactions")
|
71
|
+
reactions = feedback_object["cannot_see_top_custom_reactions"]["top_reactions"]["edges"]
|
72
|
+
else
|
73
|
+
reactions = feedback_object["top_reactions"]["edges"]
|
74
|
+
end
|
75
|
+
rescue StandardError => e
|
76
|
+
reactions = feedback_object["unified_reactors"]["count"]
|
64
77
|
end
|
65
78
|
|
66
79
|
post_details = {
|
data/lib/forki/version.rb
CHANGED
data/lib/forki.rb
CHANGED
@@ -53,6 +53,8 @@ module Forki
|
|
53
53
|
# Extract the file extension from a media URL
|
54
54
|
# E.g. ".png" from https://scontent-atl3-2.xx.fbcdn.net/v/t39.30808-1.png?stp=dst-png_p148x148
|
55
55
|
def self.extract_file_extension_from_url(url)
|
56
|
+
return nil if url.nil?
|
57
|
+
|
56
58
|
stripped_url = url.split("?").first # remove URL query params
|
57
59
|
extension = stripped_url.split(".").last
|
58
60
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: forki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ''
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|