facebook_scrapper 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 26d91e0a8dbf79232c734882341127301ac516a29d3524462be0b56f3a8727b7
4
- data.tar.gz: 813d8e43fd8857d7882b6c6fc6873a4b873ed630192107c945bca0e0d2b74f2b
3
+ metadata.gz: 39780580d7e538cfced26d2f1205f77b1a9f1b6fad95bea835365670b726f736
4
+ data.tar.gz: 36f2b706b4bc463cc9e936bf40a16a8c013740c6965d339b4ae5eb1146437c00
5
5
  SHA512:
6
- metadata.gz: 208d4f649abbe1025b98acc8250038ab04da99de42248e5b87ca7912c9fd3e1ea80a8085119e85c6a9093a76f671b00d75cb4d2446f07fa01b70101dee6d0f1d
7
- data.tar.gz: 034df0dbf1b23e915cf65c3c1b639124ec880c3a3e806cc91f50f6de06eb5ffd54f8ce8cdf06acb9c8a62454d9847e1b3a5a17babec50be933aaa137c03826a9
6
+ metadata.gz: 5ea2d8f679806dbfd1cc05c3d000927a18ce5e51d6c80b212b7db57db340b4cae54c61d79baf68c34850f990237dfdac514f52844aeac22229fdcdf0db1b8a72
7
+ data.tar.gz: 2a9a6da1138d4c89afbd58efdc5979f25adb81a11d97fa65da4a1e1886de62b296a49ec6bae696cb55fea9ee30af773a0b6214d297136b7e19bbe4746c58d77e
@@ -1,201 +1,6 @@
1
- require "selenium-webdriver"
2
-
3
- class FacebookScrapper
4
- def initialize
5
- @driver = Selenium::WebDriver.for :chrome
6
- end
7
-
8
- def toBasicUrl(url)
9
- if url.include? "m.facebook.com"
10
- return url.gsub!("m.facebook.com", "mbasic.facebook.com")
11
- elsif url.include? "www.facebook.com"
12
- return url.gsub!("www.facebook.com", "mbasic.facebook.com")
13
- else
14
- return url
15
- end
16
- end
17
-
18
- def get(url)
19
- @driver.get(toBasicUrl(url))
20
- end
21
-
22
- def get_driver
23
- return @driver
24
- end
25
-
26
- def login(email, password)
27
- url = "https://mbasic.facebook.com"
28
- get(url)
29
- email_box = @driver.find_element(name: "email")
30
- email_box.send_keys(email)
31
- password_box = @driver.find_element(name: "pass")
32
- password_box.send_keys(password)
33
- password_box.submit
34
- # Bypass facebook OneClick Login
35
- if @driver.find_element(class: "bi")
36
- @driver.find_element(class: "bp").click()
37
- end
38
- begin
39
- @driver.find_element(name: "xc_message")
40
- puts "Logged in"
41
- return true
42
- rescue Selenium::WebDriver::Error::NoSuchElementError => e
43
- body = @driver.find_element(tag_name: "body").text
44
- if (body.include?("Enter login code to continue"))
45
- puts "You 2 factor is turned on. Authenticate it and try again"
46
- else
47
- puts "Failed to login"
48
- @driver.save_screenshot("login_failed.png")
49
- end
50
- return false
51
- end
52
- end
53
-
54
- def logout
55
- end
56
-
57
- def write_post_to_url(url, text)
58
- begin
59
- get(url)
60
- textBox = @driver.find_element(name: "xc_message")
61
- textBox.send_keys(text)
62
- textbox.submit
63
- return true
64
- rescue => e
65
- puts "Failed to post in #{url} for error of #{e}"
66
- return false
67
- end
68
- end
69
-
70
- def get_posts_group(url)
71
- get(url)
72
- posts = []
73
- all_posts = @driver.find_element(id: "m_group_stories_container").find_elements(css: "div[role='article']")
74
- puts "Found #{all_posts.length} posts"
75
- all_posts.each do |raw_post|
76
- new_post = get_post_object(raw_post)
77
- puts new_post
78
- posts.push(new_post.to_json) if new_post
79
- end
80
- return posts
81
- end
82
-
83
- def get_posts_from_home
84
- get("https://mbasic.facebook.com")
85
- posts = []
86
- all_posts = @driver.find_elements(css: "div[role='article']")
87
- puts "Found #{all_posts.length} posts"
88
- all_posts.each do |raw_post|
89
- new_post = get_post_object(raw_post)
90
- posts.push(new_post) if new_post
91
- end
92
- return posts
93
- end
94
-
95
- def post_in_group(group_url, text)
96
- get(group_url)
97
- begin
98
- text_box = @driver.find_element(name: "xc_message")
99
- rescue Selenium::WebDriver::Error::NoSuchElementError
100
- @driver.save_screenshot("no_group_found.png")
101
- puts "Group url dosnt exist"
102
- return false
103
- end
104
- text_box.send_keys(text)
105
- text_box.submit
106
- return true
107
- end
108
-
109
- def get_post_object(raw_post)
110
- begin
111
- new_post = raw_post.find_element(css: "div[role='article']")
112
- like_data = raw_post
113
- rescue
114
- new_post = raw_post
115
- like_data = raw_post
116
- end
117
- post = Post.new
118
- all_links = like_data.find_elements(tag_name: "a")
119
-
120
- # # dont track not post things
121
- return nil unless all_links[-7].text.include?("Like")
122
-
123
- post.owner = new_post.find_elements(tag_name: "a")[0].text
124
- post.text = new_post.find_element(tag_name: "p").text
125
- post.like_count = all_links[-7].text.to_i
126
- post.comment_count = all_links[-4].text.to_i
127
- post.time = new_post.find_element(tag_name: "abbr").text
128
- post.post_owner_link = all_links[0].attribute("href")
129
- post.comment_link = all_links[-4].attribute("href")
130
- post.like_link = like_data.find_element(link_text: "Like").attribute("href")
131
- post.more_link = all_links[-1].attribute("href")
132
- return post.to_h
133
- rescue
134
- return nil
135
- end
1
+ module FacebookScrapper
2
+ require "facebook_scrapper/version"
3
+ require "facebook_scrapper/person"
4
+ require "facebook_scrapper/post"
5
+ require "facebook_scrapper/scrapper"
136
6
  end
137
-
138
- class Person
139
- attr_accessor :name, :pofile_link, :add_as_friend_link
140
-
141
- def initialize
142
- self.name = ""
143
- self.pofile_link = ""
144
- self.add_as_friend_link = ""
145
- end
146
-
147
- def to_str
148
- s = ""
149
- s += "#{self.name}:\n"
150
- s += "Profile Link: #{self.pofile_link}"
151
- if self.add_as_friend_link != ""
152
- s += "Addlink ->: #{self.add_as_friend_link}"
153
- end
154
- return s
155
- end
156
- end
157
-
158
- class Post
159
- attr_accessor :owner, :text, :like_count, :comment_count, :like_link, :time, :post_owner_link, :comment_link, :more_link
160
-
161
- def initialize
162
- self.owner = ""
163
- self.text = ""
164
- self.like_count = 0
165
- self.comment_count = 0
166
- self.time = ""
167
- self.post_owner_link = ""
168
- self.comment_link = ""
169
- self.like_link = ""
170
- self.more_link = ""
171
- end
172
-
173
- def to_h
174
- hash = {}
175
- instance_variables.each do |var|
176
- hash[var.to_s.delete("@")] = instance_variable_get(var)
177
- end
178
- return hash
179
- end
180
-
181
- def to_json
182
- to_h.to_json
183
- end
184
-
185
- def to_str
186
- s = "\nPost by #{self.owner}: "
187
- s += "#{self.text} \n"
188
- s += "Likes: #{self.like_count.to_s} - "
189
- s += "Comments: #{self.comment_count.to_s} - "
190
- s += "#{self.time} "
191
- s += " - Privacy: #{self.privacy}\n-"
192
- s += "\n Comment -> #{self.comment_link}\n"
193
- return s
194
- end
195
- end
196
-
197
- # def get_posts(url)
198
- # driver.get(url)
199
- # driver.find_element(id:'m_group_stories_container').find_elements(css: "div[role='article']")[0].find_element(tag_name: 'p').text
200
- # driver.find_element(id:'m_group_stories_container').find_elements(css: "div[role='article']")[1].find_element(link_text: 'Full Story').attribute('href')
201
- # end
@@ -0,0 +1,21 @@
1
+ module FacebookScrapper
2
+ class Person
3
+ attr_accessor :name, :pofile_link, :add_as_friend_link
4
+
5
+ def initialize
6
+ self.name = ""
7
+ self.pofile_link = ""
8
+ self.add_as_friend_link = ""
9
+ end
10
+
11
+ def to_str
12
+ s = ""
13
+ s += "#{self.name}:\n"
14
+ s += "Profile Link: #{self.pofile_link}"
15
+ if self.add_as_friend_link != ""
16
+ s += "Addlink ->: #{self.add_as_friend_link}"
17
+ end
18
+ return s
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,40 @@
1
+ module FacebookScrapper
2
+ class Post
3
+ attr_accessor :owner, :text, :like_count, :comment_count, :like_link, :time, :post_owner_link, :comment_link, :more_link
4
+
5
+ def initialize
6
+ self.owner = ""
7
+ self.text = ""
8
+ self.like_count = 0
9
+ self.comment_count = 0
10
+ self.time = ""
11
+ self.post_owner_link = ""
12
+ self.comment_link = ""
13
+ self.like_link = ""
14
+ self.more_link = ""
15
+ end
16
+
17
+ def to_h
18
+ hash = {}
19
+ instance_variables.each do |var|
20
+ hash[var.to_s.delete("@")] = instance_variable_get(var)
21
+ end
22
+ return hash
23
+ end
24
+
25
+ def to_json
26
+ to_h.to_json
27
+ end
28
+
29
+ def to_str
30
+ s = "\nPost by #{self.owner}: "
31
+ s += "#{self.text} \n"
32
+ s += "Likes: #{self.like_count.to_s} - "
33
+ s += "Comments: #{self.comment_count.to_s} - "
34
+ s += "#{self.time} "
35
+ s += " - Privacy: #{self.privacy}\n-"
36
+ s += "\n Comment -> #{self.comment_link}\n"
37
+ return s
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,144 @@
1
+ require "selenium-webdriver"
2
+
3
+ module FacebookScrapper
4
+ class Scrapper
5
+ def initialize
6
+ @driver = Selenium::WebDriver.for :chrome
7
+ end
8
+
9
+ def toBasicUrl(url)
10
+ if url.include? "m.facebook.com"
11
+ return url.gsub!("m.facebook.com", "mbasic.facebook.com")
12
+ elsif url.include? "www.facebook.com"
13
+ return url.gsub!("www.facebook.com", "mbasic.facebook.com")
14
+ else
15
+ return url
16
+ end
17
+ end
18
+
19
+ def get(url)
20
+ @driver.get(toBasicUrl(url))
21
+ end
22
+
23
+ def login(email, password)
24
+ url = "https://mbasic.facebook.com"
25
+ get(url)
26
+ email_box = @driver.find_element(name: "email")
27
+ email_box.send_keys(email)
28
+ password_box = @driver.find_element(name: "pass")
29
+ password_box.send_keys(password)
30
+ password_box.submit
31
+ # Bypass facebook OneClick Login
32
+ if @driver.find_element(class: "bi")
33
+ @driver.find_element(class: "bp").click()
34
+ end
35
+ begin
36
+ @driver.find_element(name: "xc_message")
37
+ puts "Logged in"
38
+ return true
39
+ rescue Selenium::WebDriver::Error::NoSuchElementError => e
40
+ body = @driver.find_element(tag_name: "body").text
41
+ if (body.include?("Enter login code to continue"))
42
+ puts "You 2 factor is turned on. Authenticate it and try again"
43
+ else
44
+ puts "Failed to login"
45
+ @driver.save_screenshot("login_failed.png")
46
+ end
47
+ return false
48
+ end
49
+ end
50
+
51
+ def logout
52
+ end
53
+
54
+ def write_post_to_url(url, text)
55
+ begin
56
+ get(url)
57
+ textBox = @driver.find_element(name: "xc_message")
58
+ textBox.send_keys(text)
59
+ textbox.submit
60
+ return true
61
+ rescue => e
62
+ puts "Failed to post in #{url} for error of #{e}"
63
+ return false
64
+ end
65
+ end
66
+
67
+ def get_posts_from_group_url(url, keywords = [])
68
+ get(url)
69
+ posts = []
70
+ all_posts = @driver.find_element(id: "m_group_stories_container").find_elements(css: "div[role='article']")
71
+ puts "Found #{all_posts.length} posts"
72
+ all_posts.each do |raw_post|
73
+ new_post = get_post_object(raw_post, keywords)
74
+ posts.push(new_post) if new_post
75
+ end
76
+ return posts
77
+ end
78
+
79
+ def get_posts_from_home(keywords = [])
80
+ get("https://mbasic.facebook.com")
81
+ posts = []
82
+ all_posts = @driver.find_elements(css: "div[role='article']")
83
+ puts "Found #{all_posts.length} posts"
84
+ all_posts.each do |raw_post|
85
+ new_post = get_post_object(raw_post, keywords)
86
+ posts.push(new_post) if new_post
87
+ end
88
+ return posts
89
+ end
90
+
91
+ def post_in_group(group_url, text)
92
+ get(group_url)
93
+ begin
94
+ text_box = @driver.find_element(name: "xc_message")
95
+ rescue Selenium::WebDriver::Error::NoSuchElementError
96
+ @driver.save_screenshot("no_group_found.png")
97
+ puts "Group url dosnt exist"
98
+ return false
99
+ end
100
+ text_box.send_keys(text)
101
+ text_box.submit
102
+ return true
103
+ end
104
+
105
+ private
106
+
107
+ def get_post_object(raw_post, keywords = [])
108
+ begin
109
+ new_post = raw_post.find_element(css: "div[role='article']")
110
+ like_data = raw_post
111
+ rescue
112
+ new_post = raw_post
113
+ like_data = raw_post
114
+ end
115
+ post = Post.new
116
+ all_links = like_data.find_elements(tag_name: "a")
117
+
118
+ # dont track not post things
119
+ return nil unless all_links[-7].text.include?("Like")
120
+
121
+ with_share = like_data.find_elements(link_text: "Share").empty? ? 1 : 0
122
+
123
+ post.owner = new_post.find_elements(tag_name: "a")[0].text
124
+ post.text = new_post.find_element(tag_name: "p").text
125
+ post.like_count = all_links[-7 - with_share].text.to_i
126
+ post.comment_count = all_links[-4 - with_share].text.to_i
127
+ post.time = new_post.find_element(tag_name: "abbr").text
128
+ post.post_owner_link = all_links[0].attribute("href")
129
+ post.comment_link = all_links[-4 - with_share].attribute("href")
130
+ post.like_link = like_data.find_element(link_text: "Like").attribute("href")
131
+ post.more_link = all_links[-1].attribute("href")
132
+
133
+ keyword_included = keywords.any? { |keyword| post.text.include?(keyword) }
134
+
135
+ if (keyword_included || keywords.empty?)
136
+ return post
137
+ end
138
+
139
+ return nil
140
+ rescue
141
+ return nil
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,3 @@
1
+ module FacebookScrapper
2
+ VERSION = "0.2.0"
3
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: facebook_scrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mujadded Al Rabbani Alif
@@ -18,7 +18,11 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - lib/facebook_scrapper.rb
21
- homepage: https://rubygems.org/gems/facebook_scrapper
21
+ - lib/facebook_scrapper/person.rb
22
+ - lib/facebook_scrapper/post.rb
23
+ - lib/facebook_scrapper/scrapper.rb
24
+ - lib/facebook_scrapper/version.rb
25
+ homepage: https://github.com/Mujadded/facebook_scrapper
22
26
  licenses:
23
27
  - MIT
24
28
  metadata: {}