fbcrawl-colly 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/fbcolly/fbcolly.go +35 -13
- data/fbcrawl.proto +1 -0
- data/go.sum +1 -0
- data/lib/fbcrawl_colly/colly.rb +7 -0
- data/lib/fbcrawl_colly/ffi.rb +1 -0
- data/lib/fbcrawl_colly/version.rb +1 -1
- data/main.go +7 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0fc0e07942d352bb9b49c93a106d138bc9946f8e1943bcd81f6181082e79c413
|
4
|
+
data.tar.gz: 1e31f7fe0bc3bf83c82b90d84080a257734cfe63eaa4d79fdb89aa52204e2eb4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55d00db7f51b078c1ca7c46a59b06eb4d62b1859fac6e11a2da3c117f7c2dd3958d442559f9b84573f4d4f5e2d02a77539287d5628ff60f421405a3e2910e2b7
|
7
|
+
data.tar.gz: 550b26405d7bbd13356f1ca7ef30edfa682c04364f2f8fc409269c74fe25e8cb0535464d395083109b04e17de129bd851da3f035dd2dec6af2095ed357a1ddd7
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fbcrawl-colly (0.2.
|
4
|
+
fbcrawl-colly (0.2.4)
|
5
5
|
ffi
|
6
6
|
google-protobuf
|
7
7
|
|
@@ -9,7 +9,7 @@ GEM
|
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
11
|
ffi (1.13.1)
|
12
|
-
google-protobuf (3.
|
12
|
+
google-protobuf (3.13.0)
|
13
13
|
minitest (5.14.1)
|
14
14
|
rake (12.3.3)
|
15
15
|
rake-compiler (1.1.1)
|
data/fbcolly/fbcolly.go
CHANGED
@@ -112,20 +112,23 @@ func (f *Fbcolly) Login(email string, password string, otp string) (string, erro
|
|
112
112
|
|
113
113
|
logger.Info("Login using email", email)
|
114
114
|
loggedIn := false
|
115
|
-
|
115
|
+
firstLogin := true
|
116
116
|
collector.OnHTML("#login_form", func(element *colly.HTMLElement) {
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
117
|
+
if firstLogin {
|
118
|
+
firstLogin = false
|
119
|
+
logger.Info("OnHTML login_form")
|
120
|
+
loginURL, err, reqMap := getForm(element, err)
|
121
|
+
if err != nil {
|
122
|
+
logger.Error(err)
|
123
|
+
return
|
124
|
+
}
|
125
|
+
reqMap["email"] = email
|
126
|
+
reqMap["pass"] = password
|
127
|
+
logger.Info("req map:", reqMap)
|
128
|
+
err = collector.Post(loginURL, reqMap)
|
129
|
+
if err != nil {
|
130
|
+
logger.Error("post err:", err)
|
131
|
+
}
|
129
132
|
}
|
130
133
|
})
|
131
134
|
|
@@ -256,6 +259,25 @@ func (f *Fbcolly) FetchGroupFeed(groupId int64) (error, *fbcrawl.FacebookPostLis
|
|
256
259
|
return err, &fbcrawl.FacebookPostList{Posts: result}
|
257
260
|
}
|
258
261
|
|
262
|
+
func (f *Fbcolly) FetchGroupInfo(groupId int64) (error, *fbcrawl.FacebookGroup) {
|
263
|
+
collector := f.collector.Clone()
|
264
|
+
err := setupSharedCollector(collector)
|
265
|
+
result := &fbcrawl.FacebookGroup{Id: groupId}
|
266
|
+
|
267
|
+
collector.OnHTML("a[href=\"#groupMenuBottom\"] h1", func(element *colly.HTMLElement) {
|
268
|
+
result.Name = element.Text
|
269
|
+
})
|
270
|
+
collector.OnHTML("a[href*=\"view=member\"]", func(element *colly.HTMLElement) {
|
271
|
+
result.MemberCount, _ = strconv.ParseInt(element.DOM.Closest("tr").Find("td:last-child").Text(),10,64)
|
272
|
+
})
|
273
|
+
|
274
|
+
err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/groups/%d?view=info", groupId))
|
275
|
+
if err != nil {
|
276
|
+
logger.Error("crawl by colly err:", err)
|
277
|
+
}
|
278
|
+
return err, result
|
279
|
+
}
|
280
|
+
|
259
281
|
func (f *Fbcolly) FetchContentImages(postId int64) (error, *fbcrawl.FacebookImageList) {
|
260
282
|
collector := f.collector.Clone()
|
261
283
|
err := setupSharedCollector(collector)
|
data/fbcrawl.proto
CHANGED
data/go.sum
CHANGED
@@ -20,6 +20,7 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA
|
|
20
20
|
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
21
21
|
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
22
22
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
23
|
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
23
24
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
24
25
|
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
25
26
|
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
|
data/lib/fbcrawl_colly/colly.rb
CHANGED
@@ -19,6 +19,13 @@ module FbcrawlColly
|
|
19
19
|
FbcrawlColly::FFI.LoginWithCookies(@colly, cookies)
|
20
20
|
end
|
21
21
|
|
22
|
+
def fetch_group_info(group_id)
|
23
|
+
s, ptr = FbcrawlColly::FFI.FetchGroupInfo(@colly, group_id)
|
24
|
+
list = FbcrawlColly::FacebookGroup.decode(s)
|
25
|
+
FbcrawlColly::FFI.free(ptr)
|
26
|
+
list
|
27
|
+
end
|
28
|
+
|
22
29
|
def fetch_group_feed(group_id)
|
23
30
|
s, ptr = FbcrawlColly::FFI.FetchGroupFeed(@colly, group_id)
|
24
31
|
list = FbcrawlColly::FacebookPostList.decode(s)
|
data/lib/fbcrawl_colly/ffi.rb
CHANGED
@@ -9,6 +9,7 @@ module FbcrawlColly::FFI
|
|
9
9
|
attach_function :FreeColly, [:pointer], :pointer
|
10
10
|
attach_function :Login, [:pointer, :string, :string], :strptr
|
11
11
|
attach_function :LoginWithCookies, [:pointer, :string], :void
|
12
|
+
attach_function :FetchGroupInfo, [:pointer, :int64], :strptr
|
12
13
|
attach_function :FetchGroupFeed, [:pointer, :int64], :strptr
|
13
14
|
attach_function :FetchPost, [:pointer, :int64, :int64], :strptr
|
14
15
|
attach_function :FetchContentImages, [:pointer, :int64], :strptr
|
data/main.go
CHANGED
@@ -58,6 +58,13 @@ func LoginWithCookies(pointer unsafe.Pointer, cookies *C.char) {
|
|
58
58
|
p.LoginWithCookies(C.GoString(cookies))
|
59
59
|
}
|
60
60
|
|
61
|
+
//export FetchGroupInfo
|
62
|
+
func FetchGroupInfo(pointer unsafe.Pointer, groupId int64) unsafe.Pointer {
|
63
|
+
p := (*fbcolly.Fbcolly)(pointer)
|
64
|
+
_, groupInfo := p.FetchGroupInfo(groupId)
|
65
|
+
marshaled, _ := proto.Marshal(groupInfo)
|
66
|
+
return C.CBytes(append(marshaled, 0))
|
67
|
+
}
|
61
68
|
//export FetchGroupFeed
|
62
69
|
func FetchGroupFeed(pointer unsafe.Pointer, groupId int64) unsafe.Pointer {
|
63
70
|
p := (*fbcolly.Fbcolly)(pointer)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fbcrawl-colly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Duy Le
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|