facebook_scrapper 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 26d91e0a8dbf79232c734882341127301ac516a29d3524462be0b56f3a8727b7
4
- data.tar.gz: 813d8e43fd8857d7882b6c6fc6873a4b873ed630192107c945bca0e0d2b74f2b
3
+ metadata.gz: 39780580d7e538cfced26d2f1205f77b1a9f1b6fad95bea835365670b726f736
4
+ data.tar.gz: 36f2b706b4bc463cc9e936bf40a16a8c013740c6965d339b4ae5eb1146437c00
5
5
  SHA512:
6
- metadata.gz: 208d4f649abbe1025b98acc8250038ab04da99de42248e5b87ca7912c9fd3e1ea80a8085119e85c6a9093a76f671b00d75cb4d2446f07fa01b70101dee6d0f1d
7
- data.tar.gz: 034df0dbf1b23e915cf65c3c1b639124ec880c3a3e806cc91f50f6de06eb5ffd54f8ce8cdf06acb9c8a62454d9847e1b3a5a17babec50be933aaa137c03826a9
6
+ metadata.gz: 5ea2d8f679806dbfd1cc05c3d000927a18ce5e51d6c80b212b7db57db340b4cae54c61d79baf68c34850f990237dfdac514f52844aeac22229fdcdf0db1b8a72
7
+ data.tar.gz: 2a9a6da1138d4c89afbd58efdc5979f25adb81a11d97fa65da4a1e1886de62b296a49ec6bae696cb55fea9ee30af773a0b6214d297136b7e19bbe4746c58d77e
@@ -1,201 +1,6 @@
1
- require "selenium-webdriver"
2
-
3
- class FacebookScrapper
4
- def initialize
5
- @driver = Selenium::WebDriver.for :chrome
6
- end
7
-
8
- def toBasicUrl(url)
9
- if url.include? "m.facebook.com"
10
- return url.gsub!("m.facebook.com", "mbasic.facebook.com")
11
- elsif url.include? "www.facebook.com"
12
- return url.gsub!("www.facebook.com", "mbasic.facebook.com")
13
- else
14
- return url
15
- end
16
- end
17
-
18
- def get(url)
19
- @driver.get(toBasicUrl(url))
20
- end
21
-
22
- def get_driver
23
- return @driver
24
- end
25
-
26
- def login(email, password)
27
- url = "https://mbasic.facebook.com"
28
- get(url)
29
- email_box = @driver.find_element(name: "email")
30
- email_box.send_keys(email)
31
- password_box = @driver.find_element(name: "pass")
32
- password_box.send_keys(password)
33
- password_box.submit
34
- # Bypass facebook OneClick Login
35
- if @driver.find_element(class: "bi")
36
- @driver.find_element(class: "bp").click()
37
- end
38
- begin
39
- @driver.find_element(name: "xc_message")
40
- puts "Logged in"
41
- return true
42
- rescue Selenium::WebDriver::Error::NoSuchElementError => e
43
- body = @driver.find_element(tag_name: "body").text
44
- if (body.include?("Enter login code to continue"))
45
- puts "You 2 factor is turned on. Authenticate it and try again"
46
- else
47
- puts "Failed to login"
48
- @driver.save_screenshot("login_failed.png")
49
- end
50
- return false
51
- end
52
- end
53
-
54
- def logout
55
- end
56
-
57
- def write_post_to_url(url, text)
58
- begin
59
- get(url)
60
- textBox = @driver.find_element(name: "xc_message")
61
- textBox.send_keys(text)
62
- textbox.submit
63
- return true
64
- rescue => e
65
- puts "Failed to post in #{url} for error of #{e}"
66
- return false
67
- end
68
- end
69
-
70
- def get_posts_group(url)
71
- get(url)
72
- posts = []
73
- all_posts = @driver.find_element(id: "m_group_stories_container").find_elements(css: "div[role='article']")
74
- puts "Found #{all_posts.length} posts"
75
- all_posts.each do |raw_post|
76
- new_post = get_post_object(raw_post)
77
- puts new_post
78
- posts.push(new_post.to_json) if new_post
79
- end
80
- return posts
81
- end
82
-
83
- def get_posts_from_home
84
- get("https://mbasic.facebook.com")
85
- posts = []
86
- all_posts = @driver.find_elements(css: "div[role='article']")
87
- puts "Found #{all_posts.length} posts"
88
- all_posts.each do |raw_post|
89
- new_post = get_post_object(raw_post)
90
- posts.push(new_post) if new_post
91
- end
92
- return posts
93
- end
94
-
95
- def post_in_group(group_url, text)
96
- get(group_url)
97
- begin
98
- text_box = @driver.find_element(name: "xc_message")
99
- rescue Selenium::WebDriver::Error::NoSuchElementError
100
- @driver.save_screenshot("no_group_found.png")
101
- puts "Group url dosnt exist"
102
- return false
103
- end
104
- text_box.send_keys(text)
105
- text_box.submit
106
- return true
107
- end
108
-
109
- def get_post_object(raw_post)
110
- begin
111
- new_post = raw_post.find_element(css: "div[role='article']")
112
- like_data = raw_post
113
- rescue
114
- new_post = raw_post
115
- like_data = raw_post
116
- end
117
- post = Post.new
118
- all_links = like_data.find_elements(tag_name: "a")
119
-
120
- # # dont track not post things
121
- return nil unless all_links[-7].text.include?("Like")
122
-
123
- post.owner = new_post.find_elements(tag_name: "a")[0].text
124
- post.text = new_post.find_element(tag_name: "p").text
125
- post.like_count = all_links[-7].text.to_i
126
- post.comment_count = all_links[-4].text.to_i
127
- post.time = new_post.find_element(tag_name: "abbr").text
128
- post.post_owner_link = all_links[0].attribute("href")
129
- post.comment_link = all_links[-4].attribute("href")
130
- post.like_link = like_data.find_element(link_text: "Like").attribute("href")
131
- post.more_link = all_links[-1].attribute("href")
132
- return post.to_h
133
- rescue
134
- return nil
135
- end
1
+ module FacebookScrapper
2
+ require "facebook_scrapper/version"
3
+ require "facebook_scrapper/person"
4
+ require "facebook_scrapper/post"
5
+ require "facebook_scrapper/scrapper"
136
6
  end
137
-
138
- class Person
139
- attr_accessor :name, :pofile_link, :add_as_friend_link
140
-
141
- def initialize
142
- self.name = ""
143
- self.pofile_link = ""
144
- self.add_as_friend_link = ""
145
- end
146
-
147
- def to_str
148
- s = ""
149
- s += "#{self.name}:\n"
150
- s += "Profile Link: #{self.pofile_link}"
151
- if self.add_as_friend_link != ""
152
- s += "Addlink ->: #{self.add_as_friend_link}"
153
- end
154
- return s
155
- end
156
- end
157
-
158
- class Post
159
- attr_accessor :owner, :text, :like_count, :comment_count, :like_link, :time, :post_owner_link, :comment_link, :more_link
160
-
161
- def initialize
162
- self.owner = ""
163
- self.text = ""
164
- self.like_count = 0
165
- self.comment_count = 0
166
- self.time = ""
167
- self.post_owner_link = ""
168
- self.comment_link = ""
169
- self.like_link = ""
170
- self.more_link = ""
171
- end
172
-
173
- def to_h
174
- hash = {}
175
- instance_variables.each do |var|
176
- hash[var.to_s.delete("@")] = instance_variable_get(var)
177
- end
178
- return hash
179
- end
180
-
181
- def to_json
182
- to_h.to_json
183
- end
184
-
185
- def to_str
186
- s = "\nPost by #{self.owner}: "
187
- s += "#{self.text} \n"
188
- s += "Likes: #{self.like_count.to_s} - "
189
- s += "Comments: #{self.comment_count.to_s} - "
190
- s += "#{self.time} "
191
- s += " - Privacy: #{self.privacy}\n-"
192
- s += "\n Comment -> #{self.comment_link}\n"
193
- return s
194
- end
195
- end
196
-
197
- # def get_posts(url)
198
- # driver.get(url)
199
- # driver.find_element(id:'m_group_stories_container').find_elements(css: "div[role='article']")[0].find_element(tag_name: 'p').text
200
- # driver.find_element(id:'m_group_stories_container').find_elements(css: "div[role='article']")[1].find_element(link_text: 'Full Story').attribute('href')
201
- # end
@@ -0,0 +1,21 @@
1
+ module FacebookScrapper
2
+ class Person
3
+ attr_accessor :name, :pofile_link, :add_as_friend_link
4
+
5
+ def initialize
6
+ self.name = ""
7
+ self.pofile_link = ""
8
+ self.add_as_friend_link = ""
9
+ end
10
+
11
+ def to_str
12
+ s = ""
13
+ s += "#{self.name}:\n"
14
+ s += "Profile Link: #{self.pofile_link}"
15
+ if self.add_as_friend_link != ""
16
+ s += "Addlink ->: #{self.add_as_friend_link}"
17
+ end
18
+ return s
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,40 @@
1
+ module FacebookScrapper
2
+ class Post
3
+ attr_accessor :owner, :text, :like_count, :comment_count, :like_link, :time, :post_owner_link, :comment_link, :more_link
4
+
5
+ def initialize
6
+ self.owner = ""
7
+ self.text = ""
8
+ self.like_count = 0
9
+ self.comment_count = 0
10
+ self.time = ""
11
+ self.post_owner_link = ""
12
+ self.comment_link = ""
13
+ self.like_link = ""
14
+ self.more_link = ""
15
+ end
16
+
17
+ def to_h
18
+ hash = {}
19
+ instance_variables.each do |var|
20
+ hash[var.to_s.delete("@")] = instance_variable_get(var)
21
+ end
22
+ return hash
23
+ end
24
+
25
+ def to_json
26
+ to_h.to_json
27
+ end
28
+
29
+ def to_str
30
+ s = "\nPost by #{self.owner}: "
31
+ s += "#{self.text} \n"
32
+ s += "Likes: #{self.like_count.to_s} - "
33
+ s += "Comments: #{self.comment_count.to_s} - "
34
+ s += "#{self.time} "
35
+ s += " - Privacy: #{self.privacy}\n-"
36
+ s += "\n Comment -> #{self.comment_link}\n"
37
+ return s
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,144 @@
1
+ require "selenium-webdriver"
2
+
3
+ module FacebookScrapper
4
+ class Scrapper
5
+ def initialize
6
+ @driver = Selenium::WebDriver.for :chrome
7
+ end
8
+
9
+ def toBasicUrl(url)
10
+ if url.include? "m.facebook.com"
11
+ return url.gsub!("m.facebook.com", "mbasic.facebook.com")
12
+ elsif url.include? "www.facebook.com"
13
+ return url.gsub!("www.facebook.com", "mbasic.facebook.com")
14
+ else
15
+ return url
16
+ end
17
+ end
18
+
19
+ def get(url)
20
+ @driver.get(toBasicUrl(url))
21
+ end
22
+
23
+ def login(email, password)
24
+ url = "https://mbasic.facebook.com"
25
+ get(url)
26
+ email_box = @driver.find_element(name: "email")
27
+ email_box.send_keys(email)
28
+ password_box = @driver.find_element(name: "pass")
29
+ password_box.send_keys(password)
30
+ password_box.submit
31
+ # Bypass facebook OneClick Login
32
+ if @driver.find_element(class: "bi")
33
+ @driver.find_element(class: "bp").click()
34
+ end
35
+ begin
36
+ @driver.find_element(name: "xc_message")
37
+ puts "Logged in"
38
+ return true
39
+ rescue Selenium::WebDriver::Error::NoSuchElementError => e
40
+ body = @driver.find_element(tag_name: "body").text
41
+ if (body.include?("Enter login code to continue"))
42
+ puts "You 2 factor is turned on. Authenticate it and try again"
43
+ else
44
+ puts "Failed to login"
45
+ @driver.save_screenshot("login_failed.png")
46
+ end
47
+ return false
48
+ end
49
+ end
50
+
51
+ def logout
52
+ end
53
+
54
+ def write_post_to_url(url, text)
55
+ begin
56
+ get(url)
57
+ textBox = @driver.find_element(name: "xc_message")
58
+ textBox.send_keys(text)
59
+ textbox.submit
60
+ return true
61
+ rescue => e
62
+ puts "Failed to post in #{url} for error of #{e}"
63
+ return false
64
+ end
65
+ end
66
+
67
+ def get_posts_from_group_url(url, keywords = [])
68
+ get(url)
69
+ posts = []
70
+ all_posts = @driver.find_element(id: "m_group_stories_container").find_elements(css: "div[role='article']")
71
+ puts "Found #{all_posts.length} posts"
72
+ all_posts.each do |raw_post|
73
+ new_post = get_post_object(raw_post, keywords)
74
+ posts.push(new_post) if new_post
75
+ end
76
+ return posts
77
+ end
78
+
79
+ def get_posts_from_home(keywords = [])
80
+ get("https://mbasic.facebook.com")
81
+ posts = []
82
+ all_posts = @driver.find_elements(css: "div[role='article']")
83
+ puts "Found #{all_posts.length} posts"
84
+ all_posts.each do |raw_post|
85
+ new_post = get_post_object(raw_post, keywords)
86
+ posts.push(new_post) if new_post
87
+ end
88
+ return posts
89
+ end
90
+
91
+ def post_in_group(group_url, text)
92
+ get(group_url)
93
+ begin
94
+ text_box = @driver.find_element(name: "xc_message")
95
+ rescue Selenium::WebDriver::Error::NoSuchElementError
96
+ @driver.save_screenshot("no_group_found.png")
97
+ puts "Group url dosnt exist"
98
+ return false
99
+ end
100
+ text_box.send_keys(text)
101
+ text_box.submit
102
+ return true
103
+ end
104
+
105
+ private
106
+
107
+ def get_post_object(raw_post, keywords = [])
108
+ begin
109
+ new_post = raw_post.find_element(css: "div[role='article']")
110
+ like_data = raw_post
111
+ rescue
112
+ new_post = raw_post
113
+ like_data = raw_post
114
+ end
115
+ post = Post.new
116
+ all_links = like_data.find_elements(tag_name: "a")
117
+
118
+ # dont track not post things
119
+ return nil unless all_links[-7].text.include?("Like")
120
+
121
+ with_share = like_data.find_elements(link_text: "Share").empty? ? 1 : 0
122
+
123
+ post.owner = new_post.find_elements(tag_name: "a")[0].text
124
+ post.text = new_post.find_element(tag_name: "p").text
125
+ post.like_count = all_links[-7 - with_share].text.to_i
126
+ post.comment_count = all_links[-4 - with_share].text.to_i
127
+ post.time = new_post.find_element(tag_name: "abbr").text
128
+ post.post_owner_link = all_links[0].attribute("href")
129
+ post.comment_link = all_links[-4 - with_share].attribute("href")
130
+ post.like_link = like_data.find_element(link_text: "Like").attribute("href")
131
+ post.more_link = all_links[-1].attribute("href")
132
+
133
+ keyword_included = keywords.any? { |keyword| post.text.include?(keyword) }
134
+
135
+ if (keyword_included || keywords.empty?)
136
+ return post
137
+ end
138
+
139
+ return nil
140
+ rescue
141
+ return nil
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,3 @@
1
+ module FacebookScrapper
2
+ VERSION = "0.2.0"
3
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: facebook_scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mujadded Al Rabbani Alif
@@ -18,7 +18,11 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - lib/facebook_scrapper.rb
21
- homepage: https://rubygems.org/gems/facebook_scrapper
21
+ - lib/facebook_scrapper/person.rb
22
+ - lib/facebook_scrapper/post.rb
23
+ - lib/facebook_scrapper/scrapper.rb
24
+ - lib/facebook_scrapper/version.rb
25
+ homepage: https://github.com/Mujadded/facebook_scrapper
22
26
  licenses:
23
27
  - MIT
24
28
  metadata: {}