forki 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/forki/scrapers/post_scraper.rb +53 -18
- data/lib/forki/scrapers/scraper.rb +1 -1
- data/lib/forki/scrapers/user_scraper.rb +8 -3
- data/lib/forki/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a0667614af238aeb8089c1af60794918c0986b7f5e12fa6fe96d33a2a5c1a06f
|
4
|
+
data.tar.gz: 5668a5a6056bdf9bdd9c9fd8f119fa5ed754c5ee0b152d2764b26e9f9d7a8804
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 509ffb30dada2666236ed0900e1def8a61413ee3ba2e8705e31bd01422f8d2baf2cd24479faf906b6374602fcb0c1dc7bb334b7e02a560c798eba6efab30a2c9
|
7
|
+
data.tar.gz: 3f79b804b5505222e06a49352807d92c95f13a8735bedcd1b32cbf079be72b0922052014ef82da8771db108f7b0b674cd132caf7d377084c57cf0d245eb049e1
|
@@ -62,8 +62,16 @@ module Forki
|
|
62
62
|
|
63
63
|
def check_if_post_is_image(graphql_objects)
|
64
64
|
graphql_objects.any? do |graphql_object| # if any GraphQL objects contain the top-level keys above, return true
|
65
|
-
true unless graphql_object.fetch("image", nil).nil? # so long as the associated values are not nil
|
66
|
-
true unless graphql_object.fetch("currMedia", nil).nil?
|
65
|
+
return true unless graphql_object.fetch("image", nil).nil? # so long as the associated values are not nil
|
66
|
+
return true unless graphql_object.fetch("currMedia", nil).nil?
|
67
|
+
|
68
|
+
# This is a complicated form for `web.facebook.com` posts
|
69
|
+
|
70
|
+
if !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil?
|
71
|
+
if graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].count.positive?
|
72
|
+
return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "all_subattachments", "nodes")&.first&.dig("media", "image", "uri").nil?
|
73
|
+
end
|
74
|
+
end
|
67
75
|
end
|
68
76
|
end
|
69
77
|
|
@@ -220,26 +228,53 @@ module Forki
|
|
220
228
|
|
221
229
|
# Extracts data from an image post by parsing GraphQL strings as seen in the video post scraper above
|
222
230
|
def extract_image_post_data(graphql_object_array)
|
223
|
-
|
224
|
-
|
225
|
-
|
231
|
+
# This is a weird one-off style
|
232
|
+
graphql_object = graphql_object_array.find { |graphql_object| !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil? }
|
233
|
+
unless graphql_object.nil? || graphql_object.count == 0
|
234
|
+
attachments = graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"]
|
235
|
+
|
236
|
+
reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
|
237
|
+
id = graphql_object["node"]["post_id"]
|
238
|
+
num_comments = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["share_count"]["count"]
|
239
|
+
reshare_warning = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
|
240
|
+
image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
|
241
|
+
text = graphql_object["node"]["comet_sections"]["content"]["story"]["message"]["text"]
|
242
|
+
profile_link = graphql_object["node"]["comet_sections"]["content"]["story"]["actors"].first["url"]
|
243
|
+
created_at = graphql_object["node"]["comet_sections"]["content"]["story"]["comet_sections"]["context_layout"]["story"]["comet_sections"]["metadata"].first["story"]["creation_time"]
|
244
|
+
has_video = false
|
245
|
+
else
|
226
246
|
|
227
|
-
|
228
|
-
|
247
|
+
graphql_object_array.find { |graphql_object| graphql_object.key?("viewer_actor") && graphql_object.key?("display_comments") }
|
248
|
+
curr_media_object = graphql_object_array.find { |graphql_object| graphql_object.key?("currMedia") }
|
249
|
+
creation_story_object = graphql_object_array.find { |graphql_object| graphql_object.key?("creation_story") && graphql_object.key?("message") }
|
250
|
+
|
251
|
+
feedback_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("comet_ufi_summary_and_actions_renderer") }["comet_ufi_summary_and_actions_renderer"]["feedback"]
|
252
|
+
share_count_object = feedback_object.fetch("share_count", {})
|
253
|
+
|
254
|
+
poster = creation_story_object["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]
|
229
255
|
|
230
|
-
|
256
|
+
reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
|
257
|
+
id = curr_media_object["currMedia"]["id"],
|
258
|
+
num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
|
259
|
+
num_shares = share_count_object.fetch("count", nil),
|
260
|
+
reshare_warning = feedback_object["should_show_reshare_warning"],
|
261
|
+
image_url = curr_media_object["currMedia"]["image"]["uri"],
|
262
|
+
text = (creation_story_object["message"] || {}).fetch("text", nil),
|
263
|
+
profile_link = poster["url"],
|
264
|
+
created_at = curr_media_object["currMedia"]["created_time"],
|
265
|
+
has_video = false
|
231
266
|
|
232
|
-
|
267
|
+
end
|
233
268
|
post_details = {
|
234
|
-
id:
|
235
|
-
num_comments:
|
236
|
-
num_shares:
|
237
|
-
reshare_warning:
|
238
|
-
image_url:
|
239
|
-
text:
|
240
|
-
profile_link:
|
241
|
-
created_at:
|
242
|
-
has_video:
|
269
|
+
id: id,
|
270
|
+
num_comments: num_comments,
|
271
|
+
num_shares: num_shares,
|
272
|
+
reshare_warning: reshare_warning,
|
273
|
+
image_url: image_url,
|
274
|
+
text: text,
|
275
|
+
profile_link: profile_link,
|
276
|
+
created_at: created_at,
|
277
|
+
has_video: has_video
|
243
278
|
}
|
244
279
|
post_details[:image_file] = Forki.retrieve_media(post_details[:image_url])
|
245
280
|
post_details[:reactions] = reaction_counts
|
@@ -6,7 +6,7 @@ require "dotenv/load"
|
|
6
6
|
require "oj"
|
7
7
|
require "selenium-webdriver"
|
8
8
|
require "open-uri"
|
9
|
-
require
|
9
|
+
require "selenium/webdriver/remote/http/curb"
|
10
10
|
|
11
11
|
options = Selenium::WebDriver::Options.chrome(exclude_switches: ["enable-automation"])
|
12
12
|
options.add_argument("--start-maximized")
|
@@ -39,9 +39,14 @@ module Forki
|
|
39
39
|
profile_title_section = graphql_strings.find { |gql| gql.include? "profile_tile_section_type" }
|
40
40
|
|
41
41
|
json = JSON.parse(profile_title_section)
|
42
|
-
|
43
|
-
|
44
|
-
|
42
|
+
|
43
|
+
followers_node = []
|
44
|
+
begin
|
45
|
+
followers_node = json["user"]["profile_tile_sections"]["edges"].first["node"]["profile_tile_views"]["nodes"][1]["view_style_renderer"]["view"]["profile_tile_items"]["nodes"].select do |node|
|
46
|
+
node["node"]["timeline_context_item"]["timeline_context_list_item_type"] == "INTRO_CARD_FOLLOWERS"
|
47
|
+
end
|
48
|
+
rescue NoMethodError; end
|
49
|
+
|
45
50
|
if followers_node.empty?
|
46
51
|
number_of_followers = nil
|
47
52
|
else
|
data/lib/forki/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: forki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ''
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-05
|
11
|
+
date: 2023-06-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|