fbcrawl-colly 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/fbcolly/fbcolly.go +35 -13
- data/fbcrawl.proto +1 -0
- data/go.sum +1 -0
- data/lib/fbcrawl_colly/colly.rb +7 -0
- data/lib/fbcrawl_colly/ffi.rb +1 -0
- data/lib/fbcrawl_colly/version.rb +1 -1
- data/main.go +7 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0fc0e07942d352bb9b49c93a106d138bc9946f8e1943bcd81f6181082e79c413
|
4
|
+
data.tar.gz: 1e31f7fe0bc3bf83c82b90d84080a257734cfe63eaa4d79fdb89aa52204e2eb4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55d00db7f51b078c1ca7c46a59b06eb4d62b1859fac6e11a2da3c117f7c2dd3958d442559f9b84573f4d4f5e2d02a77539287d5628ff60f421405a3e2910e2b7
|
7
|
+
data.tar.gz: 550b26405d7bbd13356f1ca7ef30edfa682c04364f2f8fc409269c74fe25e8cb0535464d395083109b04e17de129bd851da3f035dd2dec6af2095ed357a1ddd7
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fbcrawl-colly (0.2.
|
4
|
+
fbcrawl-colly (0.2.4)
|
5
5
|
ffi
|
6
6
|
google-protobuf
|
7
7
|
|
@@ -9,7 +9,7 @@ GEM
|
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
11
|
ffi (1.13.1)
|
12
|
-
google-protobuf (3.
|
12
|
+
google-protobuf (3.13.0)
|
13
13
|
minitest (5.14.1)
|
14
14
|
rake (12.3.3)
|
15
15
|
rake-compiler (1.1.1)
|
data/fbcolly/fbcolly.go
CHANGED
@@ -112,20 +112,23 @@ func (f *Fbcolly) Login(email string, password string, otp string) (string, erro
|
|
112
112
|
|
113
113
|
logger.Info("Login using email", email)
|
114
114
|
loggedIn := false
|
115
|
-
|
115
|
+
firstLogin := true
|
116
116
|
collector.OnHTML("#login_form", func(element *colly.HTMLElement) {
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
117
|
+
if firstLogin {
|
118
|
+
firstLogin = false
|
119
|
+
logger.Info("OnHTML login_form")
|
120
|
+
loginURL, err, reqMap := getForm(element, err)
|
121
|
+
if err != nil {
|
122
|
+
logger.Error(err)
|
123
|
+
return
|
124
|
+
}
|
125
|
+
reqMap["email"] = email
|
126
|
+
reqMap["pass"] = password
|
127
|
+
logger.Info("req map:", reqMap)
|
128
|
+
err = collector.Post(loginURL, reqMap)
|
129
|
+
if err != nil {
|
130
|
+
logger.Error("post err:", err)
|
131
|
+
}
|
129
132
|
}
|
130
133
|
})
|
131
134
|
|
@@ -256,6 +259,25 @@ func (f *Fbcolly) FetchGroupFeed(groupId int64) (error, *fbcrawl.FacebookPostLis
|
|
256
259
|
return err, &fbcrawl.FacebookPostList{Posts: result}
|
257
260
|
}
|
258
261
|
|
262
|
+
func (f *Fbcolly) FetchGroupInfo(groupId int64) (error, *fbcrawl.FacebookGroup) {
|
263
|
+
collector := f.collector.Clone()
|
264
|
+
err := setupSharedCollector(collector)
|
265
|
+
result := &fbcrawl.FacebookGroup{Id: groupId}
|
266
|
+
|
267
|
+
collector.OnHTML("a[href=\"#groupMenuBottom\"] h1", func(element *colly.HTMLElement) {
|
268
|
+
result.Name = element.Text
|
269
|
+
})
|
270
|
+
collector.OnHTML("a[href*=\"view=member\"]", func(element *colly.HTMLElement) {
|
271
|
+
result.MemberCount, _ = strconv.ParseInt(element.DOM.Closest("tr").Find("td:last-child").Text(),10,64)
|
272
|
+
})
|
273
|
+
|
274
|
+
err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/groups/%d?view=info", groupId))
|
275
|
+
if err != nil {
|
276
|
+
logger.Error("crawl by colly err:", err)
|
277
|
+
}
|
278
|
+
return err, result
|
279
|
+
}
|
280
|
+
|
259
281
|
func (f *Fbcolly) FetchContentImages(postId int64) (error, *fbcrawl.FacebookImageList) {
|
260
282
|
collector := f.collector.Clone()
|
261
283
|
err := setupSharedCollector(collector)
|
data/fbcrawl.proto
CHANGED
data/go.sum
CHANGED
@@ -20,6 +20,7 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA
|
|
20
20
|
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
21
21
|
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
22
22
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
23
|
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
23
24
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
24
25
|
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
25
26
|
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
|
data/lib/fbcrawl_colly/colly.rb
CHANGED
@@ -19,6 +19,13 @@ module FbcrawlColly
|
|
19
19
|
FbcrawlColly::FFI.LoginWithCookies(@colly, cookies)
|
20
20
|
end
|
21
21
|
|
22
|
+
def fetch_group_info(group_id)
|
23
|
+
s, ptr = FbcrawlColly::FFI.FetchGroupInfo(@colly, group_id)
|
24
|
+
list = FbcrawlColly::FacebookGroup.decode(s)
|
25
|
+
FbcrawlColly::FFI.free(ptr)
|
26
|
+
list
|
27
|
+
end
|
28
|
+
|
22
29
|
def fetch_group_feed(group_id)
|
23
30
|
s, ptr = FbcrawlColly::FFI.FetchGroupFeed(@colly, group_id)
|
24
31
|
list = FbcrawlColly::FacebookPostList.decode(s)
|
data/lib/fbcrawl_colly/ffi.rb
CHANGED
@@ -9,6 +9,7 @@ module FbcrawlColly::FFI
|
|
9
9
|
attach_function :FreeColly, [:pointer], :pointer
|
10
10
|
attach_function :Login, [:pointer, :string, :string], :strptr
|
11
11
|
attach_function :LoginWithCookies, [:pointer, :string], :void
|
12
|
+
attach_function :FetchGroupInfo, [:pointer, :int64], :strptr
|
12
13
|
attach_function :FetchGroupFeed, [:pointer, :int64], :strptr
|
13
14
|
attach_function :FetchPost, [:pointer, :int64, :int64], :strptr
|
14
15
|
attach_function :FetchContentImages, [:pointer, :int64], :strptr
|
data/main.go
CHANGED
@@ -58,6 +58,13 @@ func LoginWithCookies(pointer unsafe.Pointer, cookies *C.char) {
|
|
58
58
|
p.LoginWithCookies(C.GoString(cookies))
|
59
59
|
}
|
60
60
|
|
61
|
+
//export FetchGroupInfo
|
62
|
+
func FetchGroupInfo(pointer unsafe.Pointer, groupId int64) unsafe.Pointer {
|
63
|
+
p := (*fbcolly.Fbcolly)(pointer)
|
64
|
+
_, groupInfo := p.FetchGroupInfo(groupId)
|
65
|
+
marshaled, _ := proto.Marshal(groupInfo)
|
66
|
+
return C.CBytes(append(marshaled, 0))
|
67
|
+
}
|
61
68
|
//export FetchGroupFeed
|
62
69
|
func FetchGroupFeed(pointer unsafe.Pointer, groupId int64) unsafe.Pointer {
|
63
70
|
p := (*fbcolly.Fbcolly)(pointer)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fbcrawl-colly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Duy Le
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|