facebook_scrapper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/facebook_scrapper.rb +201 -0
  3. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 26d91e0a8dbf79232c734882341127301ac516a29d3524462be0b56f3a8727b7
4
+ data.tar.gz: 813d8e43fd8857d7882b6c6fc6873a4b873ed630192107c945bca0e0d2b74f2b
5
+ SHA512:
6
+ metadata.gz: 208d4f649abbe1025b98acc8250038ab04da99de42248e5b87ca7912c9fd3e1ea80a8085119e85c6a9093a76f671b00d75cb4d2446f07fa01b70101dee6d0f1d
7
+ data.tar.gz: 034df0dbf1b23e915cf65c3c1b639124ec880c3a3e806cc91f50f6de06eb5ffd54f8ce8cdf06acb9c8a62454d9847e1b3a5a17babec50be933aaa137c03826a9
@@ -0,0 +1,201 @@
1
+ require "selenium-webdriver"
2
+
3
+ class FacebookScrapper
4
+ def initialize
5
+ @driver = Selenium::WebDriver.for :chrome
6
+ end
7
+
8
+ def toBasicUrl(url)
9
+ if url.include? "m.facebook.com"
10
+ return url.gsub!("m.facebook.com", "mbasic.facebook.com")
11
+ elsif url.include? "www.facebook.com"
12
+ return url.gsub!("www.facebook.com", "mbasic.facebook.com")
13
+ else
14
+ return url
15
+ end
16
+ end
17
+
18
+ def get(url)
19
+ @driver.get(toBasicUrl(url))
20
+ end
21
+
22
+ def get_driver
23
+ return @driver
24
+ end
25
+
26
+ def login(email, password)
27
+ url = "https://mbasic.facebook.com"
28
+ get(url)
29
+ email_box = @driver.find_element(name: "email")
30
+ email_box.send_keys(email)
31
+ password_box = @driver.find_element(name: "pass")
32
+ password_box.send_keys(password)
33
+ password_box.submit
34
+ # Bypass facebook OneClick Login
35
+ if @driver.find_element(class: "bi")
36
+ @driver.find_element(class: "bp").click()
37
+ end
38
+ begin
39
+ @driver.find_element(name: "xc_message")
40
+ puts "Logged in"
41
+ return true
42
+ rescue Selenium::WebDriver::Error::NoSuchElementError => e
43
+ body = @driver.find_element(tag_name: "body").text
44
+ if (body.include?("Enter login code to continue"))
45
+ puts "You 2 factor is turned on. Authenticate it and try again"
46
+ else
47
+ puts "Failed to login"
48
+ @driver.save_screenshot("login_failed.png")
49
+ end
50
+ return false
51
+ end
52
+ end
53
+
54
+ def logout
55
+ end
56
+
57
+ def write_post_to_url(url, text)
58
+ begin
59
+ get(url)
60
+ textBox = @driver.find_element(name: "xc_message")
61
+ textBox.send_keys(text)
62
+ textbox.submit
63
+ return true
64
+ rescue => e
65
+ puts "Failed to post in #{url} for error of #{e}"
66
+ return false
67
+ end
68
+ end
69
+
70
+ def get_posts_group(url)
71
+ get(url)
72
+ posts = []
73
+ all_posts = @driver.find_element(id: "m_group_stories_container").find_elements(css: "div[role='article']")
74
+ puts "Found #{all_posts.length} posts"
75
+ all_posts.each do |raw_post|
76
+ new_post = get_post_object(raw_post)
77
+ puts new_post
78
+ posts.push(new_post.to_json) if new_post
79
+ end
80
+ return posts
81
+ end
82
+
83
+ def get_posts_from_home
84
+ get("https://mbasic.facebook.com")
85
+ posts = []
86
+ all_posts = @driver.find_elements(css: "div[role='article']")
87
+ puts "Found #{all_posts.length} posts"
88
+ all_posts.each do |raw_post|
89
+ new_post = get_post_object(raw_post)
90
+ posts.push(new_post) if new_post
91
+ end
92
+ return posts
93
+ end
94
+
95
+ def post_in_group(group_url, text)
96
+ get(group_url)
97
+ begin
98
+ text_box = @driver.find_element(name: "xc_message")
99
+ rescue Selenium::WebDriver::Error::NoSuchElementError
100
+ @driver.save_screenshot("no_group_found.png")
101
+ puts "Group url dosnt exist"
102
+ return false
103
+ end
104
+ text_box.send_keys(text)
105
+ text_box.submit
106
+ return true
107
+ end
108
+
109
+ def get_post_object(raw_post)
110
+ begin
111
+ new_post = raw_post.find_element(css: "div[role='article']")
112
+ like_data = raw_post
113
+ rescue
114
+ new_post = raw_post
115
+ like_data = raw_post
116
+ end
117
+ post = Post.new
118
+ all_links = like_data.find_elements(tag_name: "a")
119
+
120
+ # # dont track not post things
121
+ return nil unless all_links[-7].text.include?("Like")
122
+
123
+ post.owner = new_post.find_elements(tag_name: "a")[0].text
124
+ post.text = new_post.find_element(tag_name: "p").text
125
+ post.like_count = all_links[-7].text.to_i
126
+ post.comment_count = all_links[-4].text.to_i
127
+ post.time = new_post.find_element(tag_name: "abbr").text
128
+ post.post_owner_link = all_links[0].attribute("href")
129
+ post.comment_link = all_links[-4].attribute("href")
130
+ post.like_link = like_data.find_element(link_text: "Like").attribute("href")
131
+ post.more_link = all_links[-1].attribute("href")
132
+ return post.to_h
133
+ rescue
134
+ return nil
135
+ end
136
+ end
137
+
138
+ class Person
139
+ attr_accessor :name, :pofile_link, :add_as_friend_link
140
+
141
+ def initialize
142
+ self.name = ""
143
+ self.pofile_link = ""
144
+ self.add_as_friend_link = ""
145
+ end
146
+
147
+ def to_str
148
+ s = ""
149
+ s += "#{self.name}:\n"
150
+ s += "Profile Link: #{self.pofile_link}"
151
+ if self.add_as_friend_link != ""
152
+ s += "Addlink ->: #{self.add_as_friend_link}"
153
+ end
154
+ return s
155
+ end
156
+ end
157
+
158
+ class Post
159
+ attr_accessor :owner, :text, :like_count, :comment_count, :like_link, :time, :post_owner_link, :comment_link, :more_link
160
+
161
+ def initialize
162
+ self.owner = ""
163
+ self.text = ""
164
+ self.like_count = 0
165
+ self.comment_count = 0
166
+ self.time = ""
167
+ self.post_owner_link = ""
168
+ self.comment_link = ""
169
+ self.like_link = ""
170
+ self.more_link = ""
171
+ end
172
+
173
+ def to_h
174
+ hash = {}
175
+ instance_variables.each do |var|
176
+ hash[var.to_s.delete("@")] = instance_variable_get(var)
177
+ end
178
+ return hash
179
+ end
180
+
181
+ def to_json
182
+ to_h.to_json
183
+ end
184
+
185
+ def to_str
186
+ s = "\nPost by #{self.owner}: "
187
+ s += "#{self.text} \n"
188
+ s += "Likes: #{self.like_count.to_s} - "
189
+ s += "Comments: #{self.comment_count.to_s} - "
190
+ s += "#{self.time} "
191
+ s += " - Privacy: #{self.privacy}\n-"
192
+ s += "\n Comment -> #{self.comment_link}\n"
193
+ return s
194
+ end
195
+ end
196
+
197
+ # def get_posts(url)
198
+ # driver.get(url)
199
+ # driver.find_element(id:'m_group_stories_container').find_elements(css: "div[role='article']")[0].find_element(tag_name: 'p').text
200
+ # driver.find_element(id:'m_group_stories_container').find_elements(css: "div[role='article']")[1].find_element(link_text: 'Full Story').attribute('href')
201
+ # end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: facebook_scrapper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Mujadded Al Rabbani Alif
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-09-14 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: This is a facebook scrapper bot that dosent require api to scrap facebook
14
+ data
15
+ email: mujadded.alif@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/facebook_scrapper.rb
21
+ homepage: https://rubygems.org/gems/facebook_scrapper
22
+ licenses:
23
+ - MIT
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.7.6
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: scrap data from fb without api
45
+ test_files: []