forki 0.2.14 → 0.2.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -1
- data/Gemfile.lock +7 -7
- data/lib/forki/scrapers/post_scraper.rb +101 -7
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve.rb +1 -2
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve_reel.rb +3 -3
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve_reel_2.rb +8 -6
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page.rb +2 -2
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve_video_page_2.rb +1 -1
- data/lib/forki/scrapers/sieves/video_sieves/video_sieve_watch_tab.rb +12 -7
- data/lib/forki/scrapers/user_scraper.rb +1 -1
- data/lib/forki/version.rb +1 -1
- data/lib/forki.rb +7 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a81f3f14449d4d0abdb3300bbdffbc4d34b8c5db400c8a4d7beb9bb2958fcc7
|
4
|
+
data.tar.gz: d4b9ade188b782c02da0b53ef64fb39a6d66aaed3eac2354dd968e87c58f1e51
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d9516edea544bd8eefaee678e60f957f9f659a32daa46bbb8157a06da38e4e513ea3eca85108c59714b4d9d70b536ef853a7f6741e1e956c2bdb7c89a908308d
|
7
|
+
data.tar.gz: eb6126c73693e2d13d335d517573736343869b94e3e17e8775d65f2c496e90b55c6baed06797e140ca76539ba5434d2ad2dc603de415d758db76337ee74224c6
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
forki (0.2.
|
4
|
+
forki (0.2.14)
|
5
5
|
apparition
|
6
6
|
capybara
|
7
7
|
oj
|
@@ -67,7 +67,7 @@ GEM
|
|
67
67
|
matrix (0.4.2)
|
68
68
|
method_source (1.0.0)
|
69
69
|
mini_mime (1.1.5)
|
70
|
-
minitest (5.
|
70
|
+
minitest (5.25.1)
|
71
71
|
nokogiri (1.15.1-arm64-darwin)
|
72
72
|
racc (~> 1.4)
|
73
73
|
oj (3.16.6)
|
@@ -97,7 +97,7 @@ GEM
|
|
97
97
|
rainbow (3.1.1)
|
98
98
|
rake (13.0.6)
|
99
99
|
regexp_parser (2.8.0)
|
100
|
-
rexml (3.
|
100
|
+
rexml (3.3.9)
|
101
101
|
rubocop (1.51.0)
|
102
102
|
json (~> 2.3)
|
103
103
|
parallel (~> 1.10)
|
@@ -136,7 +136,7 @@ GEM
|
|
136
136
|
rubyzip (2.3.2)
|
137
137
|
selenium-devtools (0.129.0)
|
138
138
|
selenium-webdriver (~> 4.2)
|
139
|
-
selenium-webdriver (4.
|
139
|
+
selenium-webdriver (4.25.0)
|
140
140
|
base64 (~> 0.2)
|
141
141
|
logger (~> 1.4)
|
142
142
|
rexml (~> 3.2, >= 3.2.5)
|
@@ -155,12 +155,12 @@ GEM
|
|
155
155
|
xpath (3.2.0)
|
156
156
|
nokogiri (~> 1.8)
|
157
157
|
zeitwerk (2.6.8)
|
158
|
-
zorki (0.2.
|
158
|
+
zorki (0.2.8)
|
159
159
|
apparition
|
160
160
|
capybara
|
161
161
|
oj
|
162
162
|
selenium-devtools
|
163
|
-
selenium-webdriver (~> 4.
|
163
|
+
selenium-webdriver (~> 4.25.0)
|
164
164
|
typhoeus
|
165
165
|
|
166
166
|
PLATFORMS
|
@@ -172,7 +172,7 @@ DEPENDENCIES
|
|
172
172
|
curb (~> 1.0, >= 1.0.5)
|
173
173
|
dotenv (~> 2.7.6)
|
174
174
|
forki!
|
175
|
-
minitest (~> 5.
|
175
|
+
minitest (~> 5.25)
|
176
176
|
rack (= 2.2.4)
|
177
177
|
rake (~> 13.0)
|
178
178
|
rubocop (~> 1.7)
|
@@ -23,6 +23,7 @@ module Forki
|
|
23
23
|
raise ContentUnavailableError unless is_post_available?
|
24
24
|
|
25
25
|
graphql_objects = get_graphql_objects(graphql_strings)
|
26
|
+
post_is_text_only = check_if_post_is_text_only(graphql_objects)
|
26
27
|
post_has_video = check_if_post_is_video(graphql_objects)
|
27
28
|
post_has_image = check_if_post_is_image(graphql_objects)
|
28
29
|
|
@@ -30,7 +31,9 @@ module Forki
|
|
30
31
|
# https://www.facebook.com/PlandemicMovie/posts/588866298398729/
|
31
32
|
post_has_video_in_comment_stream = check_if_post_is_in_comment_stream(graphql_objects) if post_has_video == false
|
32
33
|
|
33
|
-
if
|
34
|
+
if post_is_text_only
|
35
|
+
extract_text_post_data(graphql_objects)
|
36
|
+
elsif post_has_video
|
34
37
|
extract_video_post_data(graphql_strings)
|
35
38
|
elsif post_has_video_in_comment_stream
|
36
39
|
extract_video_comment_post_data(graphql_objects)
|
@@ -45,6 +48,21 @@ module Forki
|
|
45
48
|
graphql_strings.map { |graphql_object| JSON.parse(graphql_object) }
|
46
49
|
end
|
47
50
|
|
51
|
+
def check_if_post_is_text_only(graphql_objects)
|
52
|
+
graphql_object = graphql_objects.find do |graphql_object|
|
53
|
+
# next unless graphql_object.key?("nodes")
|
54
|
+
next if graphql_object.dig("node", "comet_sections", "content", "story", "comet_sections", "message", "story", "is_text_only_story").nil?
|
55
|
+
# next unless graphql_object.to_s.include?("is_text_only_story")
|
56
|
+
# graphql_nodes = graphql_object["nodes"]
|
57
|
+
graphql_object.dig("node", "comet_sections", "content", "story", "comet_sections", "message", "story", "is_text_only_story")
|
58
|
+
end
|
59
|
+
|
60
|
+
return false if graphql_object.nil?
|
61
|
+
|
62
|
+
return true if graphql_object.dig("node", "comet_sections", "content", "story", "comet_sections", "message", "story", "is_text_only_story")
|
63
|
+
false
|
64
|
+
end
|
65
|
+
|
48
66
|
def check_if_post_is_video(graphql_objects)
|
49
67
|
graphql_objects.any? { |graphql_object| graphql_object.key?("is_live_streaming") || graphql_object.key?("video") || check_if_post_is_reel(graphql_object) }
|
50
68
|
end
|
@@ -124,6 +142,67 @@ module Forki
|
|
124
142
|
false
|
125
143
|
end
|
126
144
|
|
145
|
+
def extract_text_post_data(graphql_objects)
|
146
|
+
graphql_object = graphql_objects.find do |graphql_object|
|
147
|
+
next if graphql_object.dig("node", "comet_sections", "content", "story", "comet_sections", "message", "story", "is_text_only_story").nil?
|
148
|
+
graphql_object
|
149
|
+
end
|
150
|
+
|
151
|
+
unless graphql_object.nil? || graphql_object.count.zero?
|
152
|
+
if graphql_object["node"]["comet_sections"]["feedback"]["story"].has_key?("story_ufi_container")
|
153
|
+
feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
|
154
|
+
elsif graphql_object["node"]["comet_sections"]["feedback"]["story"].dig("feedback_context")
|
155
|
+
begin
|
156
|
+
feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
|
157
|
+
rescue NoMethodError; end
|
158
|
+
elsif graphql_object["node"]["comet_sections"]["feedback"]["story"].has_key?("comet_feed_ufi_container")
|
159
|
+
feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["comet_feed_ufi_container"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
|
160
|
+
else
|
161
|
+
feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
|
162
|
+
end
|
163
|
+
|
164
|
+
if feedback_object.has_key?("cannot_see_top_custom_reactions")
|
165
|
+
reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
|
166
|
+
else
|
167
|
+
reaction_counts = extract_reaction_counts(feedback_object["top_reactions"])
|
168
|
+
end
|
169
|
+
|
170
|
+
id = graphql_object["node"]["post_id"]
|
171
|
+
num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
|
172
|
+
reshare_warning = feedback_object["should_show_reshare_warning"]
|
173
|
+
share_count_object = feedback_object.fetch("share_count", {})
|
174
|
+
num_shares = share_count_object.fetch("count", nil)
|
175
|
+
|
176
|
+
text = graphql_object["node"]["comet_sections"]["content"]["story"].dig("message", "text")
|
177
|
+
text = "" if text.nil?
|
178
|
+
|
179
|
+
profile_link = graphql_object["node"]["comet_sections"]["content"]["story"]["actors"].first["url"]
|
180
|
+
|
181
|
+
unless graphql_object["node"]["comet_sections"].dig("content", "story", "comet_sections", "context_layout", "story", "comet_sections", "metadata").nil?
|
182
|
+
created_at = graphql_object["node"]["comet_sections"].dig("content", "story", "comet_sections", "context_layout", "story", "comet_sections", "metadata")&.first["story"]["creation_time"]
|
183
|
+
else
|
184
|
+
created_at = graphql_object["node"]["comet_sections"]["context_layout"]["story"]["comet_sections"]["metadata"].first["story"]["creation_time"]
|
185
|
+
end
|
186
|
+
|
187
|
+
has_video = false
|
188
|
+
end
|
189
|
+
|
190
|
+
post_details = {
|
191
|
+
id: id,
|
192
|
+
num_comments: num_comments,
|
193
|
+
num_shares: num_shares,
|
194
|
+
reshare_warning: reshare_warning,
|
195
|
+
image_url: nil,
|
196
|
+
text: text,
|
197
|
+
profile_link: profile_link,
|
198
|
+
created_at: created_at,
|
199
|
+
has_video: has_video
|
200
|
+
}
|
201
|
+
post_details[:image_file] = []
|
202
|
+
post_details[:reactions] = reaction_counts
|
203
|
+
post_details
|
204
|
+
end
|
205
|
+
|
127
206
|
def extract_video_comment_post_data(graphql_objects)
|
128
207
|
graphql_nodes = nil
|
129
208
|
graphql_objects.find do |graphql_object|
|
@@ -185,7 +264,7 @@ module Forki
|
|
185
264
|
media_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]
|
186
265
|
if media_object.has_key?("video")
|
187
266
|
video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["video"]
|
188
|
-
elsif media_object.has_key?("media") && media_object["media"].has_key?("browser_native_sd_url")
|
267
|
+
elsif media_object.has_key?("media") && (media_object["media"].has_key?("browser_native_sd_url") || media_object["media"].has_key?("videoDeliveryLegacyFields"))
|
189
268
|
video_object = media_object["media"]
|
190
269
|
end
|
191
270
|
|
@@ -250,6 +329,12 @@ module Forki
|
|
250
329
|
reshare_warning = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
|
251
330
|
end
|
252
331
|
|
332
|
+
if !video_object.key?("browser_native_hd_url") && !video_object.key?("browser_native_sd_url") && video_object.key?("videoDeliveryLegacyFields")
|
333
|
+
video_url = video_object["videoDeliveryLegacyFields"]["browser_native_hd_url"] || video_object["videoDeliveryLegacyFields"]["browser_native_sd_url"]
|
334
|
+
else
|
335
|
+
video_url = video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"]
|
336
|
+
end
|
337
|
+
|
253
338
|
post_details = {
|
254
339
|
id: video_object["id"],
|
255
340
|
num_comments: num_comments,
|
@@ -257,7 +342,7 @@ module Forki
|
|
257
342
|
num_views: view_count,
|
258
343
|
reshare_warning: reshare_warning,
|
259
344
|
video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
|
260
|
-
video_url:
|
345
|
+
video_url: video_url,
|
261
346
|
text: text,
|
262
347
|
created_at: creation_date,
|
263
348
|
profile_link: story_node_object["comet_sections"]["context_layout"]["story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
|
@@ -272,6 +357,9 @@ module Forki
|
|
272
357
|
def extract_video_post_data_alternative(graphql_object_array)
|
273
358
|
sidepane_object = graphql_object_array.find { |graphql_object| graphql_object.key?("tahoe_sidepane_renderer") }
|
274
359
|
video_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("video") }
|
360
|
+
|
361
|
+
raise Forki::ContentUnavailableError if sidepane_object.nil? && video_object.nil?
|
362
|
+
|
275
363
|
feedback_object = sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"]
|
276
364
|
|
277
365
|
if sidepane_object["tahoe_sidepane_renderer"]["video"]["feedback"].key?("cannot_see_top_custom_reactions")
|
@@ -288,9 +376,14 @@ module Forki
|
|
288
376
|
num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"]
|
289
377
|
end
|
290
378
|
|
291
|
-
text = sidepane_object["tahoe_sidepane_renderer"]["video"]["creation_story"]["comet_sections"]
|
379
|
+
text = sidepane_object["tahoe_sidepane_renderer"]["video"]["creation_story"]["comet_sections"].dig("message", "story", "message", "text")
|
292
380
|
text = "" if text.nil?
|
293
381
|
|
382
|
+
video_url = video_object["video"]["playable_url_quality_hd"] || video_object["video"]["browser_native_hd_url"] || video_object["video"]["browser_native_sd_url"] || video_object["video"]["playable_url"]
|
383
|
+
if video_url.nil? && video_object["video"].key?("videoDeliveryLegacyFields")
|
384
|
+
video_url = video_object["video"]["videoDeliveryLegacyFields"]["browser_native_hd_url"] || video_object["video"]["videoDeliveryLegacyFields"]["browser_native_sd_url"]
|
385
|
+
end
|
386
|
+
|
294
387
|
post_details = {
|
295
388
|
id: video_object["id"],
|
296
389
|
num_comments: num_comments,
|
@@ -298,7 +391,7 @@ module Forki
|
|
298
391
|
num_views: feedback_object["video_view_count"],
|
299
392
|
reshare_warning: feedback_object["should_show_reshare_warning"],
|
300
393
|
video_preview_image_url: video_object["video"]["preferred_thumbnail"]["image"]["uri"],
|
301
|
-
video_url:
|
394
|
+
video_url: video_url,
|
302
395
|
text: text,
|
303
396
|
created_at: video_object["video"]["publish_time"],
|
304
397
|
profile_link: sidepane_object["tahoe_sidepane_renderer"]["video"]["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]["url"],
|
@@ -314,7 +407,6 @@ module Forki
|
|
314
407
|
# Extracts data from an image post by parsing GraphQL strings as seen in the video post scraper above
|
315
408
|
def extract_image_post_data(graphql_object_array)
|
316
409
|
# This is a weird one-off style
|
317
|
-
|
318
410
|
graphql_object = graphql_object_array.find { |graphql_object| !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil? }
|
319
411
|
unless graphql_object.nil? || graphql_object.count.zero?
|
320
412
|
# TODO: These two branches are *super* similar, probably a lot of overlap
|
@@ -352,7 +444,7 @@ module Forki
|
|
352
444
|
end
|
353
445
|
end
|
354
446
|
|
355
|
-
text = graphql_object["node"]["comet_sections"]["content"]["story"].dig(
|
447
|
+
text = graphql_object["node"]["comet_sections"]["content"]["story"].dig("message", "text")
|
356
448
|
text = "" if text.nil?
|
357
449
|
|
358
450
|
profile_link = graphql_object["node"]["comet_sections"]["content"]["story"]["actors"].first["url"]
|
@@ -367,6 +459,8 @@ module Forki
|
|
367
459
|
else
|
368
460
|
graphql_object_array.find { |graphql_object| graphql_object.key?("viewer_actor") && graphql_object.key?("display_comments") }
|
369
461
|
curr_media_object = graphql_object_array.find { |graphql_object| graphql_object.key?("currMedia") }
|
462
|
+
raise Forki::ContentUnavailableError if curr_media_object.nil?
|
463
|
+
|
370
464
|
creation_story_object = graphql_object_array.find { |graphql_object| graphql_object.key?("creation_story") && graphql_object.key?("message") }
|
371
465
|
|
372
466
|
feedback_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("comet_ufi_summary_and_actions_renderer") }["comet_ufi_summary_and_actions_renderer"]["feedback"]
|
@@ -4,7 +4,6 @@ class VideoSieve
|
|
4
4
|
end
|
5
5
|
|
6
6
|
def self.sieve_for_graphql_objects(graphql_objects)
|
7
|
-
|
8
7
|
sieve = sieve_class_for_graphql_objects(graphql_objects)
|
9
8
|
return nil if sieve.nil?
|
10
9
|
|
@@ -19,6 +18,6 @@ private
|
|
19
18
|
end
|
20
19
|
end
|
21
20
|
|
22
|
-
Dir[File.join(__dir__,
|
21
|
+
Dir[File.join(__dir__, "*.rb")].each do |file|
|
23
22
|
require file unless file.end_with?("video_sieve.rb")
|
24
23
|
end
|
@@ -15,7 +15,7 @@ class VideoSieveReel < VideoSieve
|
|
15
15
|
|
16
16
|
true
|
17
17
|
rescue StandardError
|
18
|
-
|
18
|
+
false
|
19
19
|
end
|
20
20
|
|
21
21
|
# output the expected format of:
|
@@ -56,7 +56,7 @@ class VideoSieveReel < VideoSieve
|
|
56
56
|
video_preview_image_url = video_object["short_form_video_context"]["playback_video"]["preferred_thumbnail"]["image"]["uri"]
|
57
57
|
video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_hd_url"] || video_object["short_form_video_context"]["playback_video"]["browser_native_sd_url"]
|
58
58
|
|
59
|
-
|
59
|
+
{
|
60
60
|
id: video_object["short_form_video_context"]["video"]["id"],
|
61
61
|
num_comments: feedback_object["feedback"]["top_level_comments"]["totalCountIncludingReplies"],
|
62
62
|
num_shared: Forki::Scraper.extract_int_from_num_element(feedback_object["feedback"]["share_count_reduced"]),
|
@@ -74,7 +74,7 @@ class VideoSieveReel < VideoSieve
|
|
74
74
|
}
|
75
75
|
end
|
76
76
|
|
77
|
-
|
77
|
+
private
|
78
78
|
|
79
79
|
def self.extractor(graphql_objects)
|
80
80
|
video_objects = graphql_objects.filter do |go|
|
@@ -14,7 +14,7 @@ class VideoSieveReel2 < VideoSieve
|
|
14
14
|
|
15
15
|
true
|
16
16
|
rescue StandardError
|
17
|
-
|
17
|
+
false
|
18
18
|
end
|
19
19
|
|
20
20
|
# output the expected format of:
|
@@ -39,7 +39,6 @@ class VideoSieveReel2 < VideoSieve
|
|
39
39
|
def self.sieve(graphql_objects)
|
40
40
|
video_object = self.extractor(graphql_objects)
|
41
41
|
|
42
|
-
|
43
42
|
feedback_object = graphql_objects.filter do |go|
|
44
43
|
go = go.first if go.kind_of?(Array) && !go.empty?
|
45
44
|
!go.dig("feedback", "total_comment_count").nil?
|
@@ -55,7 +54,12 @@ class VideoSieveReel2 < VideoSieve
|
|
55
54
|
video_preview_image_url = video_object["short_form_video_context"]["playback_video"]["preferred_thumbnail"]["image"]["uri"]
|
56
55
|
video_url = video_object["short_form_video_context"]["playback_video"]["browser_native_hd_url"] || video_object["short_form_video_context"]["playback_video"]["browser_native_sd_url"]
|
57
56
|
|
58
|
-
|
57
|
+
if video_url.nil? && video_object["short_form_video_context"]["playback_video"].has_key?("videoDeliveryLegacyFields")
|
58
|
+
video_url = video_object["short_form_video_context"]["playback_video"]["videoDeliveryLegacyFields"]["browser_native_hd_url"]
|
59
|
+
video_url = video_object["short_form_video_context"]["playback_video"]["videoDeliveryLegacyFields"]["browser_native_sd_url"] if video_url.nil?
|
60
|
+
end
|
61
|
+
|
62
|
+
{
|
59
63
|
id: video_object["short_form_video_context"]["video"]["id"],
|
60
64
|
num_comments: feedback_object["feedback"]["total_comment_count"],
|
61
65
|
num_shared: Forki::Scraper.extract_int_from_num_element(feedback_object["feedback"]["share_count_reduced"]),
|
@@ -71,11 +75,9 @@ class VideoSieveReel2 < VideoSieve
|
|
71
75
|
video_file: Forki.retrieve_media(video_url),
|
72
76
|
reactions: nil # Only available on comments it seems? Look into this again sometime
|
73
77
|
}
|
74
|
-
rescue StandardError => e
|
75
|
-
debugger
|
76
78
|
end
|
77
79
|
|
78
|
-
|
80
|
+
private
|
79
81
|
|
80
82
|
def self.extractor(graphql_objects)
|
81
83
|
video_objects = graphql_objects.filter do |go|
|
@@ -9,7 +9,7 @@ class VideoSieveVideoPage < VideoSieve
|
|
9
9
|
return false unless feedback_object.has_key?("cannot_see_top_custom_reactions")
|
10
10
|
|
11
11
|
true
|
12
|
-
rescue StandardError
|
12
|
+
rescue StandardError
|
13
13
|
false
|
14
14
|
end
|
15
15
|
|
@@ -61,7 +61,7 @@ class VideoSieveVideoPage < VideoSieve
|
|
61
61
|
}
|
62
62
|
end
|
63
63
|
|
64
|
-
|
64
|
+
private
|
65
65
|
|
66
66
|
def self.extractor(graphql_objects)
|
67
67
|
story_node_object = graphql_objects.find { |graphql_object| graphql_object.key? "node" }&.fetch("node", nil) # user posted video
|
@@ -12,11 +12,11 @@ class VideoSieveWatchTab < VideoSieve
|
|
12
12
|
return false unless video_object.kind_of?(Array) && !video_object.empty?
|
13
13
|
|
14
14
|
video_object = video_object.first
|
15
|
-
return false unless video_object.kind_of?(Hash) && video_object.
|
15
|
+
return false unless video_object.kind_of?(Hash) && video_object.key?("media")
|
16
16
|
|
17
17
|
true
|
18
18
|
rescue StandardError
|
19
|
-
|
19
|
+
false
|
20
20
|
end
|
21
21
|
|
22
22
|
# output the expected format of:
|
@@ -46,11 +46,16 @@ class VideoSieveWatchTab < VideoSieve
|
|
46
46
|
video_url = video_object.dig("short_form_video_context", "playback_video", "browser_native_hd_url") if video_url.nil?
|
47
47
|
video_url = video_object.dig("short_form_video_context", "playback_video", "browser_native_sd_url") if video_url.nil?
|
48
48
|
|
49
|
-
|
49
|
+
video_url = video_object["attachments"]&.first.dig("media", "videoDeliveryLegacyFields", "browser_native_hd_url") if video_url.nil?
|
50
|
+
video_url = video_object["attachments"]&.first.dig("media", "videoDeliveryLegacyFields", "browser_native_sd_url") if video_url.nil?
|
51
|
+
|
52
|
+
raise VideoSieveFailedError.new(sieve_class: "VideoSieveWatchTab") if video_url.nil?
|
50
53
|
|
51
54
|
video_preview_image_url = video_object["attachments"]&.first.dig("media", "preferred_thumbnail", "image", "uri")
|
52
55
|
video_preview_image_url = video_object["short_form_video_context"]["video"]["first_frame_thumbnail"] if video_preview_image_url.nil?
|
53
56
|
|
57
|
+
raise VideoSieveFailedError.new(sieve_class: "VideoSieveWatchTab") if video_preview_image_url.nil?
|
58
|
+
|
54
59
|
if !video_object["feedback_context"].nil?
|
55
60
|
feedback_object = video_object["feedback_context"]["feedback_target_with_context"]
|
56
61
|
else
|
@@ -60,7 +65,7 @@ class VideoSieveWatchTab < VideoSieve
|
|
60
65
|
|
61
66
|
begin
|
62
67
|
profile_link = video_object["attachments"].first["media"]["owner"]["url"]
|
63
|
-
rescue StandardError
|
68
|
+
rescue StandardError
|
64
69
|
profile_link = video_object["short_form_video_context"]["video_owner"]["url"]
|
65
70
|
end
|
66
71
|
|
@@ -75,11 +80,11 @@ class VideoSieveWatchTab < VideoSieve
|
|
75
80
|
else
|
76
81
|
reactions = feedback_object["top_reactions"]["edges"]
|
77
82
|
end
|
78
|
-
rescue StandardError
|
83
|
+
rescue StandardError
|
79
84
|
reactions = feedback_object["unified_reactors"]["count"]
|
80
85
|
end
|
81
86
|
|
82
|
-
|
87
|
+
{
|
83
88
|
id: video_object.dig("shareable", "id") || video_object["attachments"].first["media"]["id"],
|
84
89
|
num_comments: feedback_object["total_comment_count"],
|
85
90
|
num_shared: nil, # This is not associated with these videos in this format
|
@@ -97,7 +102,7 @@ class VideoSieveWatchTab < VideoSieve
|
|
97
102
|
}
|
98
103
|
end
|
99
104
|
|
100
|
-
|
105
|
+
private
|
101
106
|
|
102
107
|
def self.extractor(graphql_objects)
|
103
108
|
video_objects = graphql_objects.filter do |go|
|
@@ -124,7 +124,7 @@ module Forki
|
|
124
124
|
uri = URI(url)
|
125
125
|
query = uri.query
|
126
126
|
components = URI.decode_uri_component(query)
|
127
|
-
extracted_url = URI.extract(components).first
|
127
|
+
extracted_url = URI::Parser.new.extract(components).first
|
128
128
|
extracted_uri = URI(extracted_url)
|
129
129
|
username = extracted_uri.to_s.match(/(https:\/\/www.instagram.com\/_u\/[\w]+)/).to_s.split("/").last
|
130
130
|
|
data/lib/forki/version.rb
CHANGED
data/lib/forki.rb
CHANGED
@@ -48,6 +48,13 @@ module Forki
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
+
class VideoSieveFailedError < StandardError
|
52
|
+
def initialize(msg = "Video sieve failed to find a video", sieve_class: VideoSieve)
|
53
|
+
self.msg = "#{sieve_class} failed to find a video" if msg.nil?
|
54
|
+
super
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
51
58
|
define_setting :temp_storage_location, "tmp/forki"
|
52
59
|
|
53
60
|
# Extract the file extension from a media URL
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: forki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ''
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|