forki 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/forki/scrapers/post_scraper.rb +21 -6
- data/lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb +1 -3
- data/lib/forki/version.rb +1 -1
- data/lib/forki.rb +2 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b8c9afa0643b490f011539868cf5c6fa72a816e047e259ecb11dab00016d1ee4
|
4
|
+
data.tar.gz: f24cbd85f1e68dc1bbb8222b6e1c8730a0c101d2d0ee2eb0fd1748fbd276850b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0579059d7a8756652705ceea98d34bac0b0217e0a0898ca891040bcb123306b9b5e9e0ce438566d6d51be9a1eb2bf21b1a12c9a369c8c5d67cb158d57067d8d0'
|
7
|
+
data.tar.gz: d32fc074ee987b6479c5b6076f9b1ae3d69b8fde25beaa895efefed550efb23ce33376be6945f428d0f1fd2524f9914b6d73bd44143b14474cdac77fbb89f10c
|
@@ -37,7 +37,9 @@ module Forki
|
|
37
37
|
elsif post_has_image
|
38
38
|
extract_image_post_data(graphql_objects)
|
39
39
|
else
|
40
|
-
|
40
|
+
extract_image_post_data(graphql_objects)
|
41
|
+
|
42
|
+
#raise UnhandledContentError
|
41
43
|
end
|
42
44
|
end
|
43
45
|
|
@@ -71,6 +73,9 @@ module Forki
|
|
71
73
|
if !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil?
|
72
74
|
if graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].count.positive?
|
73
75
|
return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "all_subattachments", "nodes")&.first&.dig("media", "image", "uri").nil?
|
76
|
+
|
77
|
+
# Another version I guess
|
78
|
+
return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "media", "large_share_image")&.dig("uri").nil?
|
74
79
|
end
|
75
80
|
end
|
76
81
|
|
@@ -318,7 +323,6 @@ module Forki
|
|
318
323
|
feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
|
319
324
|
end
|
320
325
|
|
321
|
-
|
322
326
|
if feedback_object.has_key?("cannot_see_top_custom_reactions")
|
323
327
|
reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
|
324
328
|
else
|
@@ -326,18 +330,28 @@ module Forki
|
|
326
330
|
end
|
327
331
|
|
328
332
|
id = graphql_object["node"]["post_id"]
|
329
|
-
num_comments = feedback_object["
|
333
|
+
num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
|
330
334
|
reshare_warning = feedback_object["should_show_reshare_warning"]
|
331
335
|
|
332
|
-
if attachments.first["styles"]["attachment"]
|
336
|
+
if attachments.count.positive? && attachments.first["styles"]["attachment"]&.key?("all_subattachments")
|
333
337
|
image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
|
334
338
|
else
|
335
|
-
image_url = attachments.first
|
339
|
+
image_url = attachments.first&.dig("styles", "attachment", "media", "photo_image", "uri")
|
340
|
+
|
341
|
+
if image_url.nil?
|
342
|
+
image_url = attachments.first&.dig("styles", "attachment", "media", "large_share_image", "uri")
|
343
|
+
end
|
336
344
|
end
|
337
345
|
|
338
346
|
text = graphql_object["node"]["comet_sections"]["content"]["story"]["message"]["text"]
|
339
347
|
profile_link = graphql_object["node"]["comet_sections"]["content"]["story"]["actors"].first["url"]
|
340
|
-
|
348
|
+
|
349
|
+
unless graphql_object["node"]["comet_sections"].dig("content", "story", "comet_sections", "context_layout", "story", "comet_sections", "metadata").nil?
|
350
|
+
created_at = graphql_object["node"]["comet_sections"].dig("content", "story", "comet_sections", "context_layout", "story", "comet_sections", "metadata")&.first["story"]["creation_time"]
|
351
|
+
else
|
352
|
+
created_at = graphql_object["node"]["comet_sections"]["context_layout"]["story"]["comet_sections"]["metadata"].first["story"]["creation_time"]
|
353
|
+
end
|
354
|
+
|
341
355
|
has_video = false
|
342
356
|
else
|
343
357
|
graphql_object_array.find { |graphql_object| graphql_object.key?("viewer_actor") && graphql_object.key?("display_comments") }
|
@@ -490,6 +504,7 @@ module Forki
|
|
490
504
|
graphql_strings = find_graphql_data_strings(page.html)
|
491
505
|
|
492
506
|
post_data = extract_post_data(graphql_strings)
|
507
|
+
|
493
508
|
post_data[:url] = url
|
494
509
|
user_url = post_data[:profile_link]
|
495
510
|
|
@@ -4,7 +4,6 @@ class ImageSieve
|
|
4
4
|
end
|
5
5
|
|
6
6
|
def self.sieve_for_graphql_objects(graphql_objects)
|
7
|
-
|
8
7
|
sieve = sieve_class_for_graphql_objects(graphql_objects)
|
9
8
|
return nil if sieve.nil?
|
10
9
|
|
@@ -19,7 +18,6 @@ private
|
|
19
18
|
end
|
20
19
|
end
|
21
20
|
|
22
|
-
|
23
|
-
Dir['./lib/forki/scrapers/sieves/image_sieves/*.rb'].each do |file|
|
21
|
+
Dir["./lib/forki/scrapers/sieves/image_sieves/*.rb"].each do |file|
|
24
22
|
require file unless file.end_with?("image_sieve.rb")
|
25
23
|
end
|
data/lib/forki/version.rb
CHANGED
data/lib/forki.rb
CHANGED
@@ -53,6 +53,8 @@ module Forki
|
|
53
53
|
# Extract the file extension from a media URL
|
54
54
|
# E.g. ".png" from https://scontent-atl3-2.xx.fbcdn.net/v/t39.30808-1.png?stp=dst-png_p148x148
|
55
55
|
def self.extract_file_extension_from_url(url)
|
56
|
+
return nil if url.nil?
|
57
|
+
|
56
58
|
stripped_url = url.split("?").first # remove URL query params
|
57
59
|
extension = stripped_url.split(".").last
|
58
60
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: forki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ''
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -165,7 +165,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '0'
|
167
167
|
requirements: []
|
168
|
-
rubygems_version: 3.
|
168
|
+
rubygems_version: 3.5.9
|
169
169
|
signing_key:
|
170
170
|
specification_version: 4
|
171
171
|
summary: A gem to scrape Facebook pages for archive purposes.
|