facebook_scrapper 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/facebook_scrapper.rb +201 -0
- metadata +45 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 26d91e0a8dbf79232c734882341127301ac516a29d3524462be0b56f3a8727b7
|
4
|
+
data.tar.gz: 813d8e43fd8857d7882b6c6fc6873a4b873ed630192107c945bca0e0d2b74f2b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 208d4f649abbe1025b98acc8250038ab04da99de42248e5b87ca7912c9fd3e1ea80a8085119e85c6a9093a76f671b00d75cb4d2446f07fa01b70101dee6d0f1d
|
7
|
+
data.tar.gz: 034df0dbf1b23e915cf65c3c1b639124ec880c3a3e806cc91f50f6de06eb5ffd54f8ce8cdf06acb9c8a62454d9847e1b3a5a17babec50be933aaa137c03826a9
|
@@ -0,0 +1,201 @@
|
|
1
|
+
require "selenium-webdriver"
|
2
|
+
|
3
|
+
class FacebookScrapper
|
4
|
+
def initialize
|
5
|
+
@driver = Selenium::WebDriver.for :chrome
|
6
|
+
end
|
7
|
+
|
8
|
+
def toBasicUrl(url)
|
9
|
+
if url.include? "m.facebook.com"
|
10
|
+
return url.gsub!("m.facebook.com", "mbasic.facebook.com")
|
11
|
+
elsif url.include? "www.facebook.com"
|
12
|
+
return url.gsub!("www.facebook.com", "mbasic.facebook.com")
|
13
|
+
else
|
14
|
+
return url
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def get(url)
|
19
|
+
@driver.get(toBasicUrl(url))
|
20
|
+
end
|
21
|
+
|
22
|
+
def get_driver
|
23
|
+
return @driver
|
24
|
+
end
|
25
|
+
|
26
|
+
def login(email, password)
|
27
|
+
url = "https://mbasic.facebook.com"
|
28
|
+
get(url)
|
29
|
+
email_box = @driver.find_element(name: "email")
|
30
|
+
email_box.send_keys(email)
|
31
|
+
password_box = @driver.find_element(name: "pass")
|
32
|
+
password_box.send_keys(password)
|
33
|
+
password_box.submit
|
34
|
+
# Bypass facebook OneClick Login
|
35
|
+
if @driver.find_element(class: "bi")
|
36
|
+
@driver.find_element(class: "bp").click()
|
37
|
+
end
|
38
|
+
begin
|
39
|
+
@driver.find_element(name: "xc_message")
|
40
|
+
puts "Logged in"
|
41
|
+
return true
|
42
|
+
rescue Selenium::WebDriver::Error::NoSuchElementError => e
|
43
|
+
body = @driver.find_element(tag_name: "body").text
|
44
|
+
if (body.include?("Enter login code to continue"))
|
45
|
+
puts "You 2 factor is turned on. Authenticate it and try again"
|
46
|
+
else
|
47
|
+
puts "Failed to login"
|
48
|
+
@driver.save_screenshot("login_failed.png")
|
49
|
+
end
|
50
|
+
return false
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def logout
|
55
|
+
end
|
56
|
+
|
57
|
+
def write_post_to_url(url, text)
|
58
|
+
begin
|
59
|
+
get(url)
|
60
|
+
textBox = @driver.find_element(name: "xc_message")
|
61
|
+
textBox.send_keys(text)
|
62
|
+
textbox.submit
|
63
|
+
return true
|
64
|
+
rescue => e
|
65
|
+
puts "Failed to post in #{url} for error of #{e}"
|
66
|
+
return false
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def get_posts_group(url)
|
71
|
+
get(url)
|
72
|
+
posts = []
|
73
|
+
all_posts = @driver.find_element(id: "m_group_stories_container").find_elements(css: "div[role='article']")
|
74
|
+
puts "Found #{all_posts.length} posts"
|
75
|
+
all_posts.each do |raw_post|
|
76
|
+
new_post = get_post_object(raw_post)
|
77
|
+
puts new_post
|
78
|
+
posts.push(new_post.to_json) if new_post
|
79
|
+
end
|
80
|
+
return posts
|
81
|
+
end
|
82
|
+
|
83
|
+
def get_posts_from_home
|
84
|
+
get("https://mbasic.facebook.com")
|
85
|
+
posts = []
|
86
|
+
all_posts = @driver.find_elements(css: "div[role='article']")
|
87
|
+
puts "Found #{all_posts.length} posts"
|
88
|
+
all_posts.each do |raw_post|
|
89
|
+
new_post = get_post_object(raw_post)
|
90
|
+
posts.push(new_post) if new_post
|
91
|
+
end
|
92
|
+
return posts
|
93
|
+
end
|
94
|
+
|
95
|
+
def post_in_group(group_url, text)
|
96
|
+
get(group_url)
|
97
|
+
begin
|
98
|
+
text_box = @driver.find_element(name: "xc_message")
|
99
|
+
rescue Selenium::WebDriver::Error::NoSuchElementError
|
100
|
+
@driver.save_screenshot("no_group_found.png")
|
101
|
+
puts "Group url dosnt exist"
|
102
|
+
return false
|
103
|
+
end
|
104
|
+
text_box.send_keys(text)
|
105
|
+
text_box.submit
|
106
|
+
return true
|
107
|
+
end
|
108
|
+
|
109
|
+
def get_post_object(raw_post)
|
110
|
+
begin
|
111
|
+
new_post = raw_post.find_element(css: "div[role='article']")
|
112
|
+
like_data = raw_post
|
113
|
+
rescue
|
114
|
+
new_post = raw_post
|
115
|
+
like_data = raw_post
|
116
|
+
end
|
117
|
+
post = Post.new
|
118
|
+
all_links = like_data.find_elements(tag_name: "a")
|
119
|
+
|
120
|
+
# # dont track not post things
|
121
|
+
return nil unless all_links[-7].text.include?("Like")
|
122
|
+
|
123
|
+
post.owner = new_post.find_elements(tag_name: "a")[0].text
|
124
|
+
post.text = new_post.find_element(tag_name: "p").text
|
125
|
+
post.like_count = all_links[-7].text.to_i
|
126
|
+
post.comment_count = all_links[-4].text.to_i
|
127
|
+
post.time = new_post.find_element(tag_name: "abbr").text
|
128
|
+
post.post_owner_link = all_links[0].attribute("href")
|
129
|
+
post.comment_link = all_links[-4].attribute("href")
|
130
|
+
post.like_link = like_data.find_element(link_text: "Like").attribute("href")
|
131
|
+
post.more_link = all_links[-1].attribute("href")
|
132
|
+
return post.to_h
|
133
|
+
rescue
|
134
|
+
return nil
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
class Person
|
139
|
+
attr_accessor :name, :pofile_link, :add_as_friend_link
|
140
|
+
|
141
|
+
def initialize
|
142
|
+
self.name = ""
|
143
|
+
self.pofile_link = ""
|
144
|
+
self.add_as_friend_link = ""
|
145
|
+
end
|
146
|
+
|
147
|
+
def to_str
|
148
|
+
s = ""
|
149
|
+
s += "#{self.name}:\n"
|
150
|
+
s += "Profile Link: #{self.pofile_link}"
|
151
|
+
if self.add_as_friend_link != ""
|
152
|
+
s += "Addlink ->: #{self.add_as_friend_link}"
|
153
|
+
end
|
154
|
+
return s
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
class Post
|
159
|
+
attr_accessor :owner, :text, :like_count, :comment_count, :like_link, :time, :post_owner_link, :comment_link, :more_link
|
160
|
+
|
161
|
+
def initialize
|
162
|
+
self.owner = ""
|
163
|
+
self.text = ""
|
164
|
+
self.like_count = 0
|
165
|
+
self.comment_count = 0
|
166
|
+
self.time = ""
|
167
|
+
self.post_owner_link = ""
|
168
|
+
self.comment_link = ""
|
169
|
+
self.like_link = ""
|
170
|
+
self.more_link = ""
|
171
|
+
end
|
172
|
+
|
173
|
+
def to_h
|
174
|
+
hash = {}
|
175
|
+
instance_variables.each do |var|
|
176
|
+
hash[var.to_s.delete("@")] = instance_variable_get(var)
|
177
|
+
end
|
178
|
+
return hash
|
179
|
+
end
|
180
|
+
|
181
|
+
def to_json
|
182
|
+
to_h.to_json
|
183
|
+
end
|
184
|
+
|
185
|
+
def to_str
|
186
|
+
s = "\nPost by #{self.owner}: "
|
187
|
+
s += "#{self.text} \n"
|
188
|
+
s += "Likes: #{self.like_count.to_s} - "
|
189
|
+
s += "Comments: #{self.comment_count.to_s} - "
|
190
|
+
s += "#{self.time} "
|
191
|
+
s += " - Privacy: #{self.privacy}\n-"
|
192
|
+
s += "\n Comment -> #{self.comment_link}\n"
|
193
|
+
return s
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# def get_posts(url)
|
198
|
+
# driver.get(url)
|
199
|
+
# driver.find_element(id:'m_group_stories_container').find_elements(css: "div[role='article']")[0].find_element(tag_name: 'p').text
|
200
|
+
# driver.find_element(id:'m_group_stories_container').find_elements(css: "div[role='article']")[1].find_element(link_text: 'Full Story').attribute('href')
|
201
|
+
# end
|
metadata
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: facebook_scrapper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Mujadded Al Rabbani Alif
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-09-14 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: This is a facebook scrapper bot that dosent require api to scrap facebook
|
14
|
+
data
|
15
|
+
email: mujadded.alif@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/facebook_scrapper.rb
|
21
|
+
homepage: https://rubygems.org/gems/facebook_scrapper
|
22
|
+
licenses:
|
23
|
+
- MIT
|
24
|
+
metadata: {}
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 2.7.6
|
42
|
+
signing_key:
|
43
|
+
specification_version: 4
|
44
|
+
summary: scrap data from fb without api
|
45
|
+
test_files: []
|