facebook_scrapper 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/facebook_scrapper.rb +5 -200
- data/lib/facebook_scrapper/person.rb +21 -0
- data/lib/facebook_scrapper/post.rb +40 -0
- data/lib/facebook_scrapper/scrapper.rb +144 -0
- data/lib/facebook_scrapper/version.rb +3 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 39780580d7e538cfced26d2f1205f77b1a9f1b6fad95bea835365670b726f736
|
4
|
+
data.tar.gz: 36f2b706b4bc463cc9e936bf40a16a8c013740c6965d339b4ae5eb1146437c00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ea2d8f679806dbfd1cc05c3d000927a18ce5e51d6c80b212b7db57db340b4cae54c61d79baf68c34850f990237dfdac514f52844aeac22229fdcdf0db1b8a72
|
7
|
+
data.tar.gz: 2a9a6da1138d4c89afbd58efdc5979f25adb81a11d97fa65da4a1e1886de62b296a49ec6bae696cb55fea9ee30af773a0b6214d297136b7e19bbe4746c58d77e
|
data/lib/facebook_scrapper.rb
CHANGED
@@ -1,201 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
end
|
7
|
-
|
8
|
-
def toBasicUrl(url)
|
9
|
-
if url.include? "m.facebook.com"
|
10
|
-
return url.gsub!("m.facebook.com", "mbasic.facebook.com")
|
11
|
-
elsif url.include? "www.facebook.com"
|
12
|
-
return url.gsub!("www.facebook.com", "mbasic.facebook.com")
|
13
|
-
else
|
14
|
-
return url
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def get(url)
|
19
|
-
@driver.get(toBasicUrl(url))
|
20
|
-
end
|
21
|
-
|
22
|
-
def get_driver
|
23
|
-
return @driver
|
24
|
-
end
|
25
|
-
|
26
|
-
def login(email, password)
|
27
|
-
url = "https://mbasic.facebook.com"
|
28
|
-
get(url)
|
29
|
-
email_box = @driver.find_element(name: "email")
|
30
|
-
email_box.send_keys(email)
|
31
|
-
password_box = @driver.find_element(name: "pass")
|
32
|
-
password_box.send_keys(password)
|
33
|
-
password_box.submit
|
34
|
-
# Bypass facebook OneClick Login
|
35
|
-
if @driver.find_element(class: "bi")
|
36
|
-
@driver.find_element(class: "bp").click()
|
37
|
-
end
|
38
|
-
begin
|
39
|
-
@driver.find_element(name: "xc_message")
|
40
|
-
puts "Logged in"
|
41
|
-
return true
|
42
|
-
rescue Selenium::WebDriver::Error::NoSuchElementError => e
|
43
|
-
body = @driver.find_element(tag_name: "body").text
|
44
|
-
if (body.include?("Enter login code to continue"))
|
45
|
-
puts "You 2 factor is turned on. Authenticate it and try again"
|
46
|
-
else
|
47
|
-
puts "Failed to login"
|
48
|
-
@driver.save_screenshot("login_failed.png")
|
49
|
-
end
|
50
|
-
return false
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def logout
|
55
|
-
end
|
56
|
-
|
57
|
-
def write_post_to_url(url, text)
|
58
|
-
begin
|
59
|
-
get(url)
|
60
|
-
textBox = @driver.find_element(name: "xc_message")
|
61
|
-
textBox.send_keys(text)
|
62
|
-
textbox.submit
|
63
|
-
return true
|
64
|
-
rescue => e
|
65
|
-
puts "Failed to post in #{url} for error of #{e}"
|
66
|
-
return false
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def get_posts_group(url)
|
71
|
-
get(url)
|
72
|
-
posts = []
|
73
|
-
all_posts = @driver.find_element(id: "m_group_stories_container").find_elements(css: "div[role='article']")
|
74
|
-
puts "Found #{all_posts.length} posts"
|
75
|
-
all_posts.each do |raw_post|
|
76
|
-
new_post = get_post_object(raw_post)
|
77
|
-
puts new_post
|
78
|
-
posts.push(new_post.to_json) if new_post
|
79
|
-
end
|
80
|
-
return posts
|
81
|
-
end
|
82
|
-
|
83
|
-
def get_posts_from_home
|
84
|
-
get("https://mbasic.facebook.com")
|
85
|
-
posts = []
|
86
|
-
all_posts = @driver.find_elements(css: "div[role='article']")
|
87
|
-
puts "Found #{all_posts.length} posts"
|
88
|
-
all_posts.each do |raw_post|
|
89
|
-
new_post = get_post_object(raw_post)
|
90
|
-
posts.push(new_post) if new_post
|
91
|
-
end
|
92
|
-
return posts
|
93
|
-
end
|
94
|
-
|
95
|
-
def post_in_group(group_url, text)
|
96
|
-
get(group_url)
|
97
|
-
begin
|
98
|
-
text_box = @driver.find_element(name: "xc_message")
|
99
|
-
rescue Selenium::WebDriver::Error::NoSuchElementError
|
100
|
-
@driver.save_screenshot("no_group_found.png")
|
101
|
-
puts "Group url dosnt exist"
|
102
|
-
return false
|
103
|
-
end
|
104
|
-
text_box.send_keys(text)
|
105
|
-
text_box.submit
|
106
|
-
return true
|
107
|
-
end
|
108
|
-
|
109
|
-
def get_post_object(raw_post)
|
110
|
-
begin
|
111
|
-
new_post = raw_post.find_element(css: "div[role='article']")
|
112
|
-
like_data = raw_post
|
113
|
-
rescue
|
114
|
-
new_post = raw_post
|
115
|
-
like_data = raw_post
|
116
|
-
end
|
117
|
-
post = Post.new
|
118
|
-
all_links = like_data.find_elements(tag_name: "a")
|
119
|
-
|
120
|
-
# # dont track not post things
|
121
|
-
return nil unless all_links[-7].text.include?("Like")
|
122
|
-
|
123
|
-
post.owner = new_post.find_elements(tag_name: "a")[0].text
|
124
|
-
post.text = new_post.find_element(tag_name: "p").text
|
125
|
-
post.like_count = all_links[-7].text.to_i
|
126
|
-
post.comment_count = all_links[-4].text.to_i
|
127
|
-
post.time = new_post.find_element(tag_name: "abbr").text
|
128
|
-
post.post_owner_link = all_links[0].attribute("href")
|
129
|
-
post.comment_link = all_links[-4].attribute("href")
|
130
|
-
post.like_link = like_data.find_element(link_text: "Like").attribute("href")
|
131
|
-
post.more_link = all_links[-1].attribute("href")
|
132
|
-
return post.to_h
|
133
|
-
rescue
|
134
|
-
return nil
|
135
|
-
end
|
1
|
+
module FacebookScrapper
|
2
|
+
require "facebook_scrapper/version"
|
3
|
+
require "facebook_scrapper/person"
|
4
|
+
require "facebook_scrapper/post"
|
5
|
+
require "facebook_scrapper/scrapper"
|
136
6
|
end
|
137
|
-
|
138
|
-
class Person
|
139
|
-
attr_accessor :name, :pofile_link, :add_as_friend_link
|
140
|
-
|
141
|
-
def initialize
|
142
|
-
self.name = ""
|
143
|
-
self.pofile_link = ""
|
144
|
-
self.add_as_friend_link = ""
|
145
|
-
end
|
146
|
-
|
147
|
-
def to_str
|
148
|
-
s = ""
|
149
|
-
s += "#{self.name}:\n"
|
150
|
-
s += "Profile Link: #{self.pofile_link}"
|
151
|
-
if self.add_as_friend_link != ""
|
152
|
-
s += "Addlink ->: #{self.add_as_friend_link}"
|
153
|
-
end
|
154
|
-
return s
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
class Post
|
159
|
-
attr_accessor :owner, :text, :like_count, :comment_count, :like_link, :time, :post_owner_link, :comment_link, :more_link
|
160
|
-
|
161
|
-
def initialize
|
162
|
-
self.owner = ""
|
163
|
-
self.text = ""
|
164
|
-
self.like_count = 0
|
165
|
-
self.comment_count = 0
|
166
|
-
self.time = ""
|
167
|
-
self.post_owner_link = ""
|
168
|
-
self.comment_link = ""
|
169
|
-
self.like_link = ""
|
170
|
-
self.more_link = ""
|
171
|
-
end
|
172
|
-
|
173
|
-
def to_h
|
174
|
-
hash = {}
|
175
|
-
instance_variables.each do |var|
|
176
|
-
hash[var.to_s.delete("@")] = instance_variable_get(var)
|
177
|
-
end
|
178
|
-
return hash
|
179
|
-
end
|
180
|
-
|
181
|
-
def to_json
|
182
|
-
to_h.to_json
|
183
|
-
end
|
184
|
-
|
185
|
-
def to_str
|
186
|
-
s = "\nPost by #{self.owner}: "
|
187
|
-
s += "#{self.text} \n"
|
188
|
-
s += "Likes: #{self.like_count.to_s} - "
|
189
|
-
s += "Comments: #{self.comment_count.to_s} - "
|
190
|
-
s += "#{self.time} "
|
191
|
-
s += " - Privacy: #{self.privacy}\n-"
|
192
|
-
s += "\n Comment -> #{self.comment_link}\n"
|
193
|
-
return s
|
194
|
-
end
|
195
|
-
end
|
196
|
-
|
197
|
-
# def get_posts(url)
|
198
|
-
# driver.get(url)
|
199
|
-
# driver.find_element(id:'m_group_stories_container').find_elements(css: "div[role='article']")[0].find_element(tag_name: 'p').text
|
200
|
-
# driver.find_element(id:'m_group_stories_container').find_elements(css: "div[role='article']")[1].find_element(link_text: 'Full Story').attribute('href')
|
201
|
-
# end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module FacebookScrapper
|
2
|
+
class Person
|
3
|
+
attr_accessor :name, :pofile_link, :add_as_friend_link
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
self.name = ""
|
7
|
+
self.pofile_link = ""
|
8
|
+
self.add_as_friend_link = ""
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_str
|
12
|
+
s = ""
|
13
|
+
s += "#{self.name}:\n"
|
14
|
+
s += "Profile Link: #{self.pofile_link}"
|
15
|
+
if self.add_as_friend_link != ""
|
16
|
+
s += "Addlink ->: #{self.add_as_friend_link}"
|
17
|
+
end
|
18
|
+
return s
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module FacebookScrapper
|
2
|
+
class Post
|
3
|
+
attr_accessor :owner, :text, :like_count, :comment_count, :like_link, :time, :post_owner_link, :comment_link, :more_link
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
self.owner = ""
|
7
|
+
self.text = ""
|
8
|
+
self.like_count = 0
|
9
|
+
self.comment_count = 0
|
10
|
+
self.time = ""
|
11
|
+
self.post_owner_link = ""
|
12
|
+
self.comment_link = ""
|
13
|
+
self.like_link = ""
|
14
|
+
self.more_link = ""
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_h
|
18
|
+
hash = {}
|
19
|
+
instance_variables.each do |var|
|
20
|
+
hash[var.to_s.delete("@")] = instance_variable_get(var)
|
21
|
+
end
|
22
|
+
return hash
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_json
|
26
|
+
to_h.to_json
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_str
|
30
|
+
s = "\nPost by #{self.owner}: "
|
31
|
+
s += "#{self.text} \n"
|
32
|
+
s += "Likes: #{self.like_count.to_s} - "
|
33
|
+
s += "Comments: #{self.comment_count.to_s} - "
|
34
|
+
s += "#{self.time} "
|
35
|
+
s += " - Privacy: #{self.privacy}\n-"
|
36
|
+
s += "\n Comment -> #{self.comment_link}\n"
|
37
|
+
return s
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require "selenium-webdriver"
|
2
|
+
|
3
|
+
module FacebookScrapper
|
4
|
+
class Scrapper
|
5
|
+
def initialize
|
6
|
+
@driver = Selenium::WebDriver.for :chrome
|
7
|
+
end
|
8
|
+
|
9
|
+
def toBasicUrl(url)
|
10
|
+
if url.include? "m.facebook.com"
|
11
|
+
return url.gsub!("m.facebook.com", "mbasic.facebook.com")
|
12
|
+
elsif url.include? "www.facebook.com"
|
13
|
+
return url.gsub!("www.facebook.com", "mbasic.facebook.com")
|
14
|
+
else
|
15
|
+
return url
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def get(url)
|
20
|
+
@driver.get(toBasicUrl(url))
|
21
|
+
end
|
22
|
+
|
23
|
+
def login(email, password)
|
24
|
+
url = "https://mbasic.facebook.com"
|
25
|
+
get(url)
|
26
|
+
email_box = @driver.find_element(name: "email")
|
27
|
+
email_box.send_keys(email)
|
28
|
+
password_box = @driver.find_element(name: "pass")
|
29
|
+
password_box.send_keys(password)
|
30
|
+
password_box.submit
|
31
|
+
# Bypass facebook OneClick Login
|
32
|
+
if @driver.find_element(class: "bi")
|
33
|
+
@driver.find_element(class: "bp").click()
|
34
|
+
end
|
35
|
+
begin
|
36
|
+
@driver.find_element(name: "xc_message")
|
37
|
+
puts "Logged in"
|
38
|
+
return true
|
39
|
+
rescue Selenium::WebDriver::Error::NoSuchElementError => e
|
40
|
+
body = @driver.find_element(tag_name: "body").text
|
41
|
+
if (body.include?("Enter login code to continue"))
|
42
|
+
puts "You 2 factor is turned on. Authenticate it and try again"
|
43
|
+
else
|
44
|
+
puts "Failed to login"
|
45
|
+
@driver.save_screenshot("login_failed.png")
|
46
|
+
end
|
47
|
+
return false
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def logout
|
52
|
+
end
|
53
|
+
|
54
|
+
def write_post_to_url(url, text)
|
55
|
+
begin
|
56
|
+
get(url)
|
57
|
+
textBox = @driver.find_element(name: "xc_message")
|
58
|
+
textBox.send_keys(text)
|
59
|
+
textbox.submit
|
60
|
+
return true
|
61
|
+
rescue => e
|
62
|
+
puts "Failed to post in #{url} for error of #{e}"
|
63
|
+
return false
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def get_posts_from_group_url(url, keywords = [])
|
68
|
+
get(url)
|
69
|
+
posts = []
|
70
|
+
all_posts = @driver.find_element(id: "m_group_stories_container").find_elements(css: "div[role='article']")
|
71
|
+
puts "Found #{all_posts.length} posts"
|
72
|
+
all_posts.each do |raw_post|
|
73
|
+
new_post = get_post_object(raw_post, keywords)
|
74
|
+
posts.push(new_post) if new_post
|
75
|
+
end
|
76
|
+
return posts
|
77
|
+
end
|
78
|
+
|
79
|
+
def get_posts_from_home(keywords = [])
|
80
|
+
get("https://mbasic.facebook.com")
|
81
|
+
posts = []
|
82
|
+
all_posts = @driver.find_elements(css: "div[role='article']")
|
83
|
+
puts "Found #{all_posts.length} posts"
|
84
|
+
all_posts.each do |raw_post|
|
85
|
+
new_post = get_post_object(raw_post, keywords)
|
86
|
+
posts.push(new_post) if new_post
|
87
|
+
end
|
88
|
+
return posts
|
89
|
+
end
|
90
|
+
|
91
|
+
def post_in_group(group_url, text)
|
92
|
+
get(group_url)
|
93
|
+
begin
|
94
|
+
text_box = @driver.find_element(name: "xc_message")
|
95
|
+
rescue Selenium::WebDriver::Error::NoSuchElementError
|
96
|
+
@driver.save_screenshot("no_group_found.png")
|
97
|
+
puts "Group url dosnt exist"
|
98
|
+
return false
|
99
|
+
end
|
100
|
+
text_box.send_keys(text)
|
101
|
+
text_box.submit
|
102
|
+
return true
|
103
|
+
end
|
104
|
+
|
105
|
+
private
|
106
|
+
|
107
|
+
def get_post_object(raw_post, keywords = [])
|
108
|
+
begin
|
109
|
+
new_post = raw_post.find_element(css: "div[role='article']")
|
110
|
+
like_data = raw_post
|
111
|
+
rescue
|
112
|
+
new_post = raw_post
|
113
|
+
like_data = raw_post
|
114
|
+
end
|
115
|
+
post = Post.new
|
116
|
+
all_links = like_data.find_elements(tag_name: "a")
|
117
|
+
|
118
|
+
# dont track not post things
|
119
|
+
return nil unless all_links[-7].text.include?("Like")
|
120
|
+
|
121
|
+
with_share = like_data.find_elements(link_text: "Share").empty? ? 1 : 0
|
122
|
+
|
123
|
+
post.owner = new_post.find_elements(tag_name: "a")[0].text
|
124
|
+
post.text = new_post.find_element(tag_name: "p").text
|
125
|
+
post.like_count = all_links[-7 - with_share].text.to_i
|
126
|
+
post.comment_count = all_links[-4 - with_share].text.to_i
|
127
|
+
post.time = new_post.find_element(tag_name: "abbr").text
|
128
|
+
post.post_owner_link = all_links[0].attribute("href")
|
129
|
+
post.comment_link = all_links[-4 - with_share].attribute("href")
|
130
|
+
post.like_link = like_data.find_element(link_text: "Like").attribute("href")
|
131
|
+
post.more_link = all_links[-1].attribute("href")
|
132
|
+
|
133
|
+
keyword_included = keywords.any? { |keyword| post.text.include?(keyword) }
|
134
|
+
|
135
|
+
if (keyword_included || keywords.empty?)
|
136
|
+
return post
|
137
|
+
end
|
138
|
+
|
139
|
+
return nil
|
140
|
+
rescue
|
141
|
+
return nil
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: facebook_scrapper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mujadded Al Rabbani Alif
|
@@ -18,7 +18,11 @@ extensions: []
|
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- lib/facebook_scrapper.rb
|
21
|
-
|
21
|
+
- lib/facebook_scrapper/person.rb
|
22
|
+
- lib/facebook_scrapper/post.rb
|
23
|
+
- lib/facebook_scrapper/scrapper.rb
|
24
|
+
- lib/facebook_scrapper/version.rb
|
25
|
+
homepage: https://github.com/Mujadded/facebook_scrapper
|
22
26
|
licenses:
|
23
27
|
- MIT
|
24
28
|
metadata: {}
|