forki 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/forki/scrapers/post_scraper.rb +53 -18
- data/lib/forki/scrapers/scraper.rb +1 -1
- data/lib/forki/scrapers/user_scraper.rb +8 -3
- data/lib/forki/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a0667614af238aeb8089c1af60794918c0986b7f5e12fa6fe96d33a2a5c1a06f
|
4
|
+
data.tar.gz: 5668a5a6056bdf9bdd9c9fd8f119fa5ed754c5ee0b152d2764b26e9f9d7a8804
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 509ffb30dada2666236ed0900e1def8a61413ee3ba2e8705e31bd01422f8d2baf2cd24479faf906b6374602fcb0c1dc7bb334b7e02a560c798eba6efab30a2c9
|
7
|
+
data.tar.gz: 3f79b804b5505222e06a49352807d92c95f13a8735bedcd1b32cbf079be72b0922052014ef82da8771db108f7b0b674cd132caf7d377084c57cf0d245eb049e1
|
@@ -62,8 +62,16 @@ module Forki
|
|
62
62
|
|
63
63
|
def check_if_post_is_image(graphql_objects)
|
64
64
|
graphql_objects.any? do |graphql_object| # if any GraphQL objects contain the top-level keys above, return true
|
65
|
-
true unless graphql_object.fetch("image", nil).nil? # so long as the associated values are not nil
|
66
|
-
true unless graphql_object.fetch("currMedia", nil).nil?
|
65
|
+
return true unless graphql_object.fetch("image", nil).nil? # so long as the associated values are not nil
|
66
|
+
return true unless graphql_object.fetch("currMedia", nil).nil?
|
67
|
+
|
68
|
+
# This is a complicated form for `web.facebook.com` posts
|
69
|
+
|
70
|
+
if !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil?
|
71
|
+
if graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].count.positive?
|
72
|
+
return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "all_subattachments", "nodes")&.first&.dig("media", "image", "uri").nil?
|
73
|
+
end
|
74
|
+
end
|
67
75
|
end
|
68
76
|
end
|
69
77
|
|
@@ -220,26 +228,53 @@ module Forki
|
|
220
228
|
|
221
229
|
# Extracts data from an image post by parsing GraphQL strings as seen in the video post scraper above
|
222
230
|
def extract_image_post_data(graphql_object_array)
|
223
|
-
|
224
|
-
|
225
|
-
|
231
|
+
# This is a weird one-off style
|
232
|
+
graphql_object = graphql_object_array.find { |graphql_object| !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil? }
|
233
|
+
unless graphql_object.nil? || graphql_object.count == 0
|
234
|
+
attachments = graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"]
|
235
|
+
|
236
|
+
reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
|
237
|
+
id = graphql_object["node"]["post_id"]
|
238
|
+
num_comments = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["share_count"]["count"]
|
239
|
+
reshare_warning = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
|
240
|
+
image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
|
241
|
+
text = graphql_object["node"]["comet_sections"]["content"]["story"]["message"]["text"]
|
242
|
+
profile_link = graphql_object["node"]["comet_sections"]["content"]["story"]["actors"].first["url"]
|
243
|
+
created_at = graphql_object["node"]["comet_sections"]["content"]["story"]["comet_sections"]["context_layout"]["story"]["comet_sections"]["metadata"].first["story"]["creation_time"]
|
244
|
+
has_video = false
|
245
|
+
else
|
226
246
|
|
227
|
-
|
228
|
-
|
247
|
+
graphql_object_array.find { |graphql_object| graphql_object.key?("viewer_actor") && graphql_object.key?("display_comments") }
|
248
|
+
curr_media_object = graphql_object_array.find { |graphql_object| graphql_object.key?("currMedia") }
|
249
|
+
creation_story_object = graphql_object_array.find { |graphql_object| graphql_object.key?("creation_story") && graphql_object.key?("message") }
|
250
|
+
|
251
|
+
feedback_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("comet_ufi_summary_and_actions_renderer") }["comet_ufi_summary_and_actions_renderer"]["feedback"]
|
252
|
+
share_count_object = feedback_object.fetch("share_count", {})
|
253
|
+
|
254
|
+
poster = creation_story_object["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]
|
229
255
|
|
230
|
-
|
256
|
+
reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
|
257
|
+
id = curr_media_object["currMedia"]["id"],
|
258
|
+
num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
|
259
|
+
num_shares = share_count_object.fetch("count", nil),
|
260
|
+
reshare_warning = feedback_object["should_show_reshare_warning"],
|
261
|
+
image_url = curr_media_object["currMedia"]["image"]["uri"],
|
262
|
+
text = (creation_story_object["message"] || {}).fetch("text", nil),
|
263
|
+
profile_link = poster["url"],
|
264
|
+
created_at = curr_media_object["currMedia"]["created_time"],
|
265
|
+
has_video = false
|
231
266
|
|
232
|
-
|
267
|
+
end
|
233
268
|
post_details = {
|
234
|
-
id:
|
235
|
-
num_comments:
|
236
|
-
num_shares:
|
237
|
-
reshare_warning:
|
238
|
-
image_url:
|
239
|
-
text:
|
240
|
-
profile_link:
|
241
|
-
created_at:
|
242
|
-
has_video:
|
269
|
+
id: id,
|
270
|
+
num_comments: num_comments,
|
271
|
+
num_shares: num_shares,
|
272
|
+
reshare_warning: reshare_warning,
|
273
|
+
image_url: image_url,
|
274
|
+
text: text,
|
275
|
+
profile_link: profile_link,
|
276
|
+
created_at: created_at,
|
277
|
+
has_video: has_video
|
243
278
|
}
|
244
279
|
post_details[:image_file] = Forki.retrieve_media(post_details[:image_url])
|
245
280
|
post_details[:reactions] = reaction_counts
|
@@ -6,7 +6,7 @@ require "dotenv/load"
|
|
6
6
|
require "oj"
|
7
7
|
require "selenium-webdriver"
|
8
8
|
require "open-uri"
|
9
|
-
require
|
9
|
+
require "selenium/webdriver/remote/http/curb"
|
10
10
|
|
11
11
|
options = Selenium::WebDriver::Options.chrome(exclude_switches: ["enable-automation"])
|
12
12
|
options.add_argument("--start-maximized")
|
@@ -39,9 +39,14 @@ module Forki
|
|
39
39
|
profile_title_section = graphql_strings.find { |gql| gql.include? "profile_tile_section_type" }
|
40
40
|
|
41
41
|
json = JSON.parse(profile_title_section)
|
42
|
-
|
43
|
-
|
44
|
-
|
42
|
+
|
43
|
+
followers_node = []
|
44
|
+
begin
|
45
|
+
followers_node = json["user"]["profile_tile_sections"]["edges"].first["node"]["profile_tile_views"]["nodes"][1]["view_style_renderer"]["view"]["profile_tile_items"]["nodes"].select do |node|
|
46
|
+
node["node"]["timeline_context_item"]["timeline_context_list_item_type"] == "INTRO_CARD_FOLLOWERS"
|
47
|
+
end
|
48
|
+
rescue NoMethodError; end
|
49
|
+
|
45
50
|
if followers_node.empty?
|
46
51
|
number_of_followers = nil
|
47
52
|
else
|
data/lib/forki/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: forki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ''
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-05
|
11
|
+
date: 2023-06-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|