facebook_scrapper 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/facebook_scrapper.rb +5 -200
- data/lib/facebook_scrapper/person.rb +21 -0
- data/lib/facebook_scrapper/post.rb +40 -0
- data/lib/facebook_scrapper/scrapper.rb +144 -0
- data/lib/facebook_scrapper/version.rb +3 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 39780580d7e538cfced26d2f1205f77b1a9f1b6fad95bea835365670b726f736
|
4
|
+
data.tar.gz: 36f2b706b4bc463cc9e936bf40a16a8c013740c6965d339b4ae5eb1146437c00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ea2d8f679806dbfd1cc05c3d000927a18ce5e51d6c80b212b7db57db340b4cae54c61d79baf68c34850f990237dfdac514f52844aeac22229fdcdf0db1b8a72
|
7
|
+
data.tar.gz: 2a9a6da1138d4c89afbd58efdc5979f25adb81a11d97fa65da4a1e1886de62b296a49ec6bae696cb55fea9ee30af773a0b6214d297136b7e19bbe4746c58d77e
|
data/lib/facebook_scrapper.rb
CHANGED
@@ -1,201 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
end
|
7
|
-
|
8
|
-
def toBasicUrl(url)
|
9
|
-
if url.include? "m.facebook.com"
|
10
|
-
return url.gsub!("m.facebook.com", "mbasic.facebook.com")
|
11
|
-
elsif url.include? "www.facebook.com"
|
12
|
-
return url.gsub!("www.facebook.com", "mbasic.facebook.com")
|
13
|
-
else
|
14
|
-
return url
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def get(url)
|
19
|
-
@driver.get(toBasicUrl(url))
|
20
|
-
end
|
21
|
-
|
22
|
-
def get_driver
|
23
|
-
return @driver
|
24
|
-
end
|
25
|
-
|
26
|
-
def login(email, password)
|
27
|
-
url = "https://mbasic.facebook.com"
|
28
|
-
get(url)
|
29
|
-
email_box = @driver.find_element(name: "email")
|
30
|
-
email_box.send_keys(email)
|
31
|
-
password_box = @driver.find_element(name: "pass")
|
32
|
-
password_box.send_keys(password)
|
33
|
-
password_box.submit
|
34
|
-
# Bypass facebook OneClick Login
|
35
|
-
if @driver.find_element(class: "bi")
|
36
|
-
@driver.find_element(class: "bp").click()
|
37
|
-
end
|
38
|
-
begin
|
39
|
-
@driver.find_element(name: "xc_message")
|
40
|
-
puts "Logged in"
|
41
|
-
return true
|
42
|
-
rescue Selenium::WebDriver::Error::NoSuchElementError => e
|
43
|
-
body = @driver.find_element(tag_name: "body").text
|
44
|
-
if (body.include?("Enter login code to continue"))
|
45
|
-
puts "You 2 factor is turned on. Authenticate it and try again"
|
46
|
-
else
|
47
|
-
puts "Failed to login"
|
48
|
-
@driver.save_screenshot("login_failed.png")
|
49
|
-
end
|
50
|
-
return false
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def logout
|
55
|
-
end
|
56
|
-
|
57
|
-
def write_post_to_url(url, text)
|
58
|
-
begin
|
59
|
-
get(url)
|
60
|
-
textBox = @driver.find_element(name: "xc_message")
|
61
|
-
textBox.send_keys(text)
|
62
|
-
textbox.submit
|
63
|
-
return true
|
64
|
-
rescue => e
|
65
|
-
puts "Failed to post in #{url} for error of #{e}"
|
66
|
-
return false
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def get_posts_group(url)
|
71
|
-
get(url)
|
72
|
-
posts = []
|
73
|
-
all_posts = @driver.find_element(id: "m_group_stories_container").find_elements(css: "div[role='article']")
|
74
|
-
puts "Found #{all_posts.length} posts"
|
75
|
-
all_posts.each do |raw_post|
|
76
|
-
new_post = get_post_object(raw_post)
|
77
|
-
puts new_post
|
78
|
-
posts.push(new_post.to_json) if new_post
|
79
|
-
end
|
80
|
-
return posts
|
81
|
-
end
|
82
|
-
|
83
|
-
def get_posts_from_home
|
84
|
-
get("https://mbasic.facebook.com")
|
85
|
-
posts = []
|
86
|
-
all_posts = @driver.find_elements(css: "div[role='article']")
|
87
|
-
puts "Found #{all_posts.length} posts"
|
88
|
-
all_posts.each do |raw_post|
|
89
|
-
new_post = get_post_object(raw_post)
|
90
|
-
posts.push(new_post) if new_post
|
91
|
-
end
|
92
|
-
return posts
|
93
|
-
end
|
94
|
-
|
95
|
-
def post_in_group(group_url, text)
|
96
|
-
get(group_url)
|
97
|
-
begin
|
98
|
-
text_box = @driver.find_element(name: "xc_message")
|
99
|
-
rescue Selenium::WebDriver::Error::NoSuchElementError
|
100
|
-
@driver.save_screenshot("no_group_found.png")
|
101
|
-
puts "Group url dosnt exist"
|
102
|
-
return false
|
103
|
-
end
|
104
|
-
text_box.send_keys(text)
|
105
|
-
text_box.submit
|
106
|
-
return true
|
107
|
-
end
|
108
|
-
|
109
|
-
def get_post_object(raw_post)
|
110
|
-
begin
|
111
|
-
new_post = raw_post.find_element(css: "div[role='article']")
|
112
|
-
like_data = raw_post
|
113
|
-
rescue
|
114
|
-
new_post = raw_post
|
115
|
-
like_data = raw_post
|
116
|
-
end
|
117
|
-
post = Post.new
|
118
|
-
all_links = like_data.find_elements(tag_name: "a")
|
119
|
-
|
120
|
-
# # dont track not post things
|
121
|
-
return nil unless all_links[-7].text.include?("Like")
|
122
|
-
|
123
|
-
post.owner = new_post.find_elements(tag_name: "a")[0].text
|
124
|
-
post.text = new_post.find_element(tag_name: "p").text
|
125
|
-
post.like_count = all_links[-7].text.to_i
|
126
|
-
post.comment_count = all_links[-4].text.to_i
|
127
|
-
post.time = new_post.find_element(tag_name: "abbr").text
|
128
|
-
post.post_owner_link = all_links[0].attribute("href")
|
129
|
-
post.comment_link = all_links[-4].attribute("href")
|
130
|
-
post.like_link = like_data.find_element(link_text: "Like").attribute("href")
|
131
|
-
post.more_link = all_links[-1].attribute("href")
|
132
|
-
return post.to_h
|
133
|
-
rescue
|
134
|
-
return nil
|
135
|
-
end
|
1
|
+
module FacebookScrapper
|
2
|
+
require "facebook_scrapper/version"
|
3
|
+
require "facebook_scrapper/person"
|
4
|
+
require "facebook_scrapper/post"
|
5
|
+
require "facebook_scrapper/scrapper"
|
136
6
|
end
|
137
|
-
|
138
|
-
class Person
|
139
|
-
attr_accessor :name, :pofile_link, :add_as_friend_link
|
140
|
-
|
141
|
-
def initialize
|
142
|
-
self.name = ""
|
143
|
-
self.pofile_link = ""
|
144
|
-
self.add_as_friend_link = ""
|
145
|
-
end
|
146
|
-
|
147
|
-
def to_str
|
148
|
-
s = ""
|
149
|
-
s += "#{self.name}:\n"
|
150
|
-
s += "Profile Link: #{self.pofile_link}"
|
151
|
-
if self.add_as_friend_link != ""
|
152
|
-
s += "Addlink ->: #{self.add_as_friend_link}"
|
153
|
-
end
|
154
|
-
return s
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
class Post
|
159
|
-
attr_accessor :owner, :text, :like_count, :comment_count, :like_link, :time, :post_owner_link, :comment_link, :more_link
|
160
|
-
|
161
|
-
def initialize
|
162
|
-
self.owner = ""
|
163
|
-
self.text = ""
|
164
|
-
self.like_count = 0
|
165
|
-
self.comment_count = 0
|
166
|
-
self.time = ""
|
167
|
-
self.post_owner_link = ""
|
168
|
-
self.comment_link = ""
|
169
|
-
self.like_link = ""
|
170
|
-
self.more_link = ""
|
171
|
-
end
|
172
|
-
|
173
|
-
def to_h
|
174
|
-
hash = {}
|
175
|
-
instance_variables.each do |var|
|
176
|
-
hash[var.to_s.delete("@")] = instance_variable_get(var)
|
177
|
-
end
|
178
|
-
return hash
|
179
|
-
end
|
180
|
-
|
181
|
-
def to_json
|
182
|
-
to_h.to_json
|
183
|
-
end
|
184
|
-
|
185
|
-
def to_str
|
186
|
-
s = "\nPost by #{self.owner}: "
|
187
|
-
s += "#{self.text} \n"
|
188
|
-
s += "Likes: #{self.like_count.to_s} - "
|
189
|
-
s += "Comments: #{self.comment_count.to_s} - "
|
190
|
-
s += "#{self.time} "
|
191
|
-
s += " - Privacy: #{self.privacy}\n-"
|
192
|
-
s += "\n Comment -> #{self.comment_link}\n"
|
193
|
-
return s
|
194
|
-
end
|
195
|
-
end
|
196
|
-
|
197
|
-
# def get_posts(url)
|
198
|
-
# driver.get(url)
|
199
|
-
# driver.find_element(id:'m_group_stories_container').find_elements(css: "div[role='article']")[0].find_element(tag_name: 'p').text
|
200
|
-
# driver.find_element(id:'m_group_stories_container').find_elements(css: "div[role='article']")[1].find_element(link_text: 'Full Story').attribute('href')
|
201
|
-
# end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module FacebookScrapper
|
2
|
+
class Person
|
3
|
+
attr_accessor :name, :pofile_link, :add_as_friend_link
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
self.name = ""
|
7
|
+
self.pofile_link = ""
|
8
|
+
self.add_as_friend_link = ""
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_str
|
12
|
+
s = ""
|
13
|
+
s += "#{self.name}:\n"
|
14
|
+
s += "Profile Link: #{self.pofile_link}"
|
15
|
+
if self.add_as_friend_link != ""
|
16
|
+
s += "Addlink ->: #{self.add_as_friend_link}"
|
17
|
+
end
|
18
|
+
return s
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module FacebookScrapper
|
2
|
+
class Post
|
3
|
+
attr_accessor :owner, :text, :like_count, :comment_count, :like_link, :time, :post_owner_link, :comment_link, :more_link
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
self.owner = ""
|
7
|
+
self.text = ""
|
8
|
+
self.like_count = 0
|
9
|
+
self.comment_count = 0
|
10
|
+
self.time = ""
|
11
|
+
self.post_owner_link = ""
|
12
|
+
self.comment_link = ""
|
13
|
+
self.like_link = ""
|
14
|
+
self.more_link = ""
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_h
|
18
|
+
hash = {}
|
19
|
+
instance_variables.each do |var|
|
20
|
+
hash[var.to_s.delete("@")] = instance_variable_get(var)
|
21
|
+
end
|
22
|
+
return hash
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_json
|
26
|
+
to_h.to_json
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_str
|
30
|
+
s = "\nPost by #{self.owner}: "
|
31
|
+
s += "#{self.text} \n"
|
32
|
+
s += "Likes: #{self.like_count.to_s} - "
|
33
|
+
s += "Comments: #{self.comment_count.to_s} - "
|
34
|
+
s += "#{self.time} "
|
35
|
+
s += " - Privacy: #{self.privacy}\n-"
|
36
|
+
s += "\n Comment -> #{self.comment_link}\n"
|
37
|
+
return s
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require "selenium-webdriver"
|
2
|
+
|
3
|
+
module FacebookScrapper
|
4
|
+
class Scrapper
|
5
|
+
def initialize
|
6
|
+
@driver = Selenium::WebDriver.for :chrome
|
7
|
+
end
|
8
|
+
|
9
|
+
def toBasicUrl(url)
|
10
|
+
if url.include? "m.facebook.com"
|
11
|
+
return url.gsub!("m.facebook.com", "mbasic.facebook.com")
|
12
|
+
elsif url.include? "www.facebook.com"
|
13
|
+
return url.gsub!("www.facebook.com", "mbasic.facebook.com")
|
14
|
+
else
|
15
|
+
return url
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def get(url)
|
20
|
+
@driver.get(toBasicUrl(url))
|
21
|
+
end
|
22
|
+
|
23
|
+
def login(email, password)
|
24
|
+
url = "https://mbasic.facebook.com"
|
25
|
+
get(url)
|
26
|
+
email_box = @driver.find_element(name: "email")
|
27
|
+
email_box.send_keys(email)
|
28
|
+
password_box = @driver.find_element(name: "pass")
|
29
|
+
password_box.send_keys(password)
|
30
|
+
password_box.submit
|
31
|
+
# Bypass facebook OneClick Login
|
32
|
+
if @driver.find_element(class: "bi")
|
33
|
+
@driver.find_element(class: "bp").click()
|
34
|
+
end
|
35
|
+
begin
|
36
|
+
@driver.find_element(name: "xc_message")
|
37
|
+
puts "Logged in"
|
38
|
+
return true
|
39
|
+
rescue Selenium::WebDriver::Error::NoSuchElementError => e
|
40
|
+
body = @driver.find_element(tag_name: "body").text
|
41
|
+
if (body.include?("Enter login code to continue"))
|
42
|
+
puts "You 2 factor is turned on. Authenticate it and try again"
|
43
|
+
else
|
44
|
+
puts "Failed to login"
|
45
|
+
@driver.save_screenshot("login_failed.png")
|
46
|
+
end
|
47
|
+
return false
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def logout
|
52
|
+
end
|
53
|
+
|
54
|
+
def write_post_to_url(url, text)
|
55
|
+
begin
|
56
|
+
get(url)
|
57
|
+
textBox = @driver.find_element(name: "xc_message")
|
58
|
+
textBox.send_keys(text)
|
59
|
+
textbox.submit
|
60
|
+
return true
|
61
|
+
rescue => e
|
62
|
+
puts "Failed to post in #{url} for error of #{e}"
|
63
|
+
return false
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def get_posts_from_group_url(url, keywords = [])
|
68
|
+
get(url)
|
69
|
+
posts = []
|
70
|
+
all_posts = @driver.find_element(id: "m_group_stories_container").find_elements(css: "div[role='article']")
|
71
|
+
puts "Found #{all_posts.length} posts"
|
72
|
+
all_posts.each do |raw_post|
|
73
|
+
new_post = get_post_object(raw_post, keywords)
|
74
|
+
posts.push(new_post) if new_post
|
75
|
+
end
|
76
|
+
return posts
|
77
|
+
end
|
78
|
+
|
79
|
+
def get_posts_from_home(keywords = [])
|
80
|
+
get("https://mbasic.facebook.com")
|
81
|
+
posts = []
|
82
|
+
all_posts = @driver.find_elements(css: "div[role='article']")
|
83
|
+
puts "Found #{all_posts.length} posts"
|
84
|
+
all_posts.each do |raw_post|
|
85
|
+
new_post = get_post_object(raw_post, keywords)
|
86
|
+
posts.push(new_post) if new_post
|
87
|
+
end
|
88
|
+
return posts
|
89
|
+
end
|
90
|
+
|
91
|
+
def post_in_group(group_url, text)
|
92
|
+
get(group_url)
|
93
|
+
begin
|
94
|
+
text_box = @driver.find_element(name: "xc_message")
|
95
|
+
rescue Selenium::WebDriver::Error::NoSuchElementError
|
96
|
+
@driver.save_screenshot("no_group_found.png")
|
97
|
+
puts "Group url dosnt exist"
|
98
|
+
return false
|
99
|
+
end
|
100
|
+
text_box.send_keys(text)
|
101
|
+
text_box.submit
|
102
|
+
return true
|
103
|
+
end
|
104
|
+
|
105
|
+
private
|
106
|
+
|
107
|
+
def get_post_object(raw_post, keywords = [])
|
108
|
+
begin
|
109
|
+
new_post = raw_post.find_element(css: "div[role='article']")
|
110
|
+
like_data = raw_post
|
111
|
+
rescue
|
112
|
+
new_post = raw_post
|
113
|
+
like_data = raw_post
|
114
|
+
end
|
115
|
+
post = Post.new
|
116
|
+
all_links = like_data.find_elements(tag_name: "a")
|
117
|
+
|
118
|
+
# dont track not post things
|
119
|
+
return nil unless all_links[-7].text.include?("Like")
|
120
|
+
|
121
|
+
with_share = like_data.find_elements(link_text: "Share").empty? ? 1 : 0
|
122
|
+
|
123
|
+
post.owner = new_post.find_elements(tag_name: "a")[0].text
|
124
|
+
post.text = new_post.find_element(tag_name: "p").text
|
125
|
+
post.like_count = all_links[-7 - with_share].text.to_i
|
126
|
+
post.comment_count = all_links[-4 - with_share].text.to_i
|
127
|
+
post.time = new_post.find_element(tag_name: "abbr").text
|
128
|
+
post.post_owner_link = all_links[0].attribute("href")
|
129
|
+
post.comment_link = all_links[-4 - with_share].attribute("href")
|
130
|
+
post.like_link = like_data.find_element(link_text: "Like").attribute("href")
|
131
|
+
post.more_link = all_links[-1].attribute("href")
|
132
|
+
|
133
|
+
keyword_included = keywords.any? { |keyword| post.text.include?(keyword) }
|
134
|
+
|
135
|
+
if (keyword_included || keywords.empty?)
|
136
|
+
return post
|
137
|
+
end
|
138
|
+
|
139
|
+
return nil
|
140
|
+
rescue
|
141
|
+
return nil
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: facebook_scrapper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mujadded Al Rabbani Alif
|
@@ -18,7 +18,11 @@ extensions: []
|
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- lib/facebook_scrapper.rb
|
21
|
-
|
21
|
+
- lib/facebook_scrapper/person.rb
|
22
|
+
- lib/facebook_scrapper/post.rb
|
23
|
+
- lib/facebook_scrapper/scrapper.rb
|
24
|
+
- lib/facebook_scrapper/version.rb
|
25
|
+
homepage: https://github.com/Mujadded/facebook_scrapper
|
22
26
|
licenses:
|
23
27
|
- MIT
|
24
28
|
metadata: {}
|