fbcrawl-colly 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6e43a203c1c38131f0d9eb32b67a1859bd1530aac9249751ff0329a5901388f6
4
- data.tar.gz: f3520762f00d2b1a2d475a9b97d2141fe8cd98faf718ccf3d8a797e61489f976
3
+ metadata.gz: 0fc0e07942d352bb9b49c93a106d138bc9946f8e1943bcd81f6181082e79c413
4
+ data.tar.gz: 1e31f7fe0bc3bf83c82b90d84080a257734cfe63eaa4d79fdb89aa52204e2eb4
5
5
  SHA512:
6
- metadata.gz: 2560aa6a3239671a14f82a95034bbad03684dd44b0fc32cb2092b404678b2ec0e124a33db64a92f5c22f4cd3d9d94a42ccc40a6ab2e1e362088d0f02f05d22f3
7
- data.tar.gz: 305f3d463d75bc3edcfcd1b5a985d9a9882e990e850786132eb1b6d4e93f4e3e31a2c0e9aeb23141cc0633cd1d5ad8e1d88ec4c0e3e4d2d201a6298a7c8699c6
6
+ metadata.gz: 55d00db7f51b078c1ca7c46a59b06eb4d62b1859fac6e11a2da3c117f7c2dd3958d442559f9b84573f4d4f5e2d02a77539287d5628ff60f421405a3e2910e2b7
7
+ data.tar.gz: 550b26405d7bbd13356f1ca7ef30edfa682c04364f2f8fc409269c74fe25e8cb0535464d395083109b04e17de129bd851da3f035dd2dec6af2095ed357a1ddd7
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fbcrawl-colly (0.2.3)
4
+ fbcrawl-colly (0.2.4)
5
5
  ffi
6
6
  google-protobuf
7
7
 
@@ -9,7 +9,7 @@ GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
11
  ffi (1.13.1)
12
- google-protobuf (3.12.4)
12
+ google-protobuf (3.13.0)
13
13
  minitest (5.14.1)
14
14
  rake (12.3.3)
15
15
  rake-compiler (1.1.1)
@@ -112,20 +112,23 @@ func (f *Fbcolly) Login(email string, password string, otp string) (string, erro
112
112
 
113
113
  logger.Info("Login using email", email)
114
114
  loggedIn := false
115
-
115
+ firstLogin := true
116
116
  collector.OnHTML("#login_form", func(element *colly.HTMLElement) {
117
- logger.Info("OnHTML login_form")
118
- loginURL, err, reqMap := getForm(element, err)
119
- if err != nil {
120
- logger.Error(err)
121
- return
122
- }
123
- reqMap["email"] = email
124
- reqMap["pass"] = password
125
- logger.Info("req map:", reqMap)
126
- err = collector.Post(loginURL, reqMap)
127
- if err != nil {
128
- logger.Error("post err:", err)
117
+ if firstLogin {
118
+ firstLogin = false
119
+ logger.Info("OnHTML login_form")
120
+ loginURL, err, reqMap := getForm(element, err)
121
+ if err != nil {
122
+ logger.Error(err)
123
+ return
124
+ }
125
+ reqMap["email"] = email
126
+ reqMap["pass"] = password
127
+ logger.Info("req map:", reqMap)
128
+ err = collector.Post(loginURL, reqMap)
129
+ if err != nil {
130
+ logger.Error("post err:", err)
131
+ }
129
132
  }
130
133
  })
131
134
 
@@ -256,6 +259,25 @@ func (f *Fbcolly) FetchGroupFeed(groupId int64) (error, *fbcrawl.FacebookPostLis
256
259
  return err, &fbcrawl.FacebookPostList{Posts: result}
257
260
  }
258
261
 
262
+ func (f *Fbcolly) FetchGroupInfo(groupId int64) (error, *fbcrawl.FacebookGroup) {
263
+ collector := f.collector.Clone()
264
+ err := setupSharedCollector(collector)
265
+ result := &fbcrawl.FacebookGroup{Id: groupId}
266
+
267
+ collector.OnHTML("a[href=\"#groupMenuBottom\"] h1", func(element *colly.HTMLElement) {
268
+ result.Name = element.Text
269
+ })
270
+ collector.OnHTML("a[href*=\"view=member\"]", func(element *colly.HTMLElement) {
271
+ result.MemberCount, _ = strconv.ParseInt(element.DOM.Closest("tr").Find("td:last-child").Text(),10,64)
272
+ })
273
+
274
+ err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/groups/%d?view=info", groupId))
275
+ if err != nil {
276
+ logger.Error("crawl by colly err:", err)
277
+ }
278
+ return err, result
279
+ }
280
+
259
281
  func (f *Fbcolly) FetchContentImages(postId int64) (error, *fbcrawl.FacebookImageList) {
260
282
  collector := f.collector.Clone()
261
283
  err := setupSharedCollector(collector)
@@ -7,6 +7,7 @@ option go_package = "./fbcrawl;fbcrawl";
7
7
  message FacebookGroup {
8
8
  int64 id = 1;
9
9
  string name = 2;
10
+ int64 member_count = 3;
10
11
  }
11
12
 
12
13
  message FacebookUser {
data/go.sum CHANGED
@@ -20,6 +20,7 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA
20
20
  github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
21
21
  github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
22
22
  github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
23
+ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
23
24
  github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
24
25
  github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
25
26
  github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
@@ -19,6 +19,13 @@ module FbcrawlColly
19
19
  FbcrawlColly::FFI.LoginWithCookies(@colly, cookies)
20
20
  end
21
21
 
22
+ def fetch_group_info(group_id)
23
+ s, ptr = FbcrawlColly::FFI.FetchGroupInfo(@colly, group_id)
24
+ list = FbcrawlColly::FacebookGroup.decode(s)
25
+ FbcrawlColly::FFI.free(ptr)
26
+ list
27
+ end
28
+
22
29
  def fetch_group_feed(group_id)
23
30
  s, ptr = FbcrawlColly::FFI.FetchGroupFeed(@colly, group_id)
24
31
  list = FbcrawlColly::FacebookPostList.decode(s)
@@ -9,6 +9,7 @@ module FbcrawlColly::FFI
9
9
  attach_function :FreeColly, [:pointer], :pointer
10
10
  attach_function :Login, [:pointer, :string, :string], :strptr
11
11
  attach_function :LoginWithCookies, [:pointer, :string], :void
12
+ attach_function :FetchGroupInfo, [:pointer, :int64], :strptr
12
13
  attach_function :FetchGroupFeed, [:pointer, :int64], :strptr
13
14
  attach_function :FetchPost, [:pointer, :int64, :int64], :strptr
14
15
  attach_function :FetchContentImages, [:pointer, :int64], :strptr
@@ -1,3 +1,3 @@
1
1
  module FbcrawlColly
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
data/main.go CHANGED
@@ -58,6 +58,13 @@ func LoginWithCookies(pointer unsafe.Pointer, cookies *C.char) {
58
58
  p.LoginWithCookies(C.GoString(cookies))
59
59
  }
60
60
 
61
+ //export FetchGroupInfo
62
+ func FetchGroupInfo(pointer unsafe.Pointer, groupId int64) unsafe.Pointer {
63
+ p := (*fbcolly.Fbcolly)(pointer)
64
+ _, groupInfo := p.FetchGroupInfo(groupId)
65
+ marshaled, _ := proto.Marshal(groupInfo)
66
+ return C.CBytes(append(marshaled, 0))
67
+ }
61
68
  //export FetchGroupFeed
62
69
  func FetchGroupFeed(pointer unsafe.Pointer, groupId int64) unsafe.Pointer {
63
70
  p := (*fbcolly.Fbcolly)(pointer)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fbcrawl-colly
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Duy Le
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-14 00:00:00.000000000 Z
11
+ date: 2020-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi