fbcrawl-colly 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6e43a203c1c38131f0d9eb32b67a1859bd1530aac9249751ff0329a5901388f6
4
- data.tar.gz: f3520762f00d2b1a2d475a9b97d2141fe8cd98faf718ccf3d8a797e61489f976
3
+ metadata.gz: 0fc0e07942d352bb9b49c93a106d138bc9946f8e1943bcd81f6181082e79c413
4
+ data.tar.gz: 1e31f7fe0bc3bf83c82b90d84080a257734cfe63eaa4d79fdb89aa52204e2eb4
5
5
  SHA512:
6
- metadata.gz: 2560aa6a3239671a14f82a95034bbad03684dd44b0fc32cb2092b404678b2ec0e124a33db64a92f5c22f4cd3d9d94a42ccc40a6ab2e1e362088d0f02f05d22f3
7
- data.tar.gz: 305f3d463d75bc3edcfcd1b5a985d9a9882e990e850786132eb1b6d4e93f4e3e31a2c0e9aeb23141cc0633cd1d5ad8e1d88ec4c0e3e4d2d201a6298a7c8699c6
6
+ metadata.gz: 55d00db7f51b078c1ca7c46a59b06eb4d62b1859fac6e11a2da3c117f7c2dd3958d442559f9b84573f4d4f5e2d02a77539287d5628ff60f421405a3e2910e2b7
7
+ data.tar.gz: 550b26405d7bbd13356f1ca7ef30edfa682c04364f2f8fc409269c74fe25e8cb0535464d395083109b04e17de129bd851da3f035dd2dec6af2095ed357a1ddd7
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fbcrawl-colly (0.2.3)
4
+ fbcrawl-colly (0.2.4)
5
5
  ffi
6
6
  google-protobuf
7
7
 
@@ -9,7 +9,7 @@ GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
11
  ffi (1.13.1)
12
- google-protobuf (3.12.4)
12
+ google-protobuf (3.13.0)
13
13
  minitest (5.14.1)
14
14
  rake (12.3.3)
15
15
  rake-compiler (1.1.1)
@@ -112,20 +112,23 @@ func (f *Fbcolly) Login(email string, password string, otp string) (string, erro
112
112
 
113
113
  logger.Info("Login using email", email)
114
114
  loggedIn := false
115
-
115
+ firstLogin := true
116
116
  collector.OnHTML("#login_form", func(element *colly.HTMLElement) {
117
- logger.Info("OnHTML login_form")
118
- loginURL, err, reqMap := getForm(element, err)
119
- if err != nil {
120
- logger.Error(err)
121
- return
122
- }
123
- reqMap["email"] = email
124
- reqMap["pass"] = password
125
- logger.Info("req map:", reqMap)
126
- err = collector.Post(loginURL, reqMap)
127
- if err != nil {
128
- logger.Error("post err:", err)
117
+ if firstLogin {
118
+ firstLogin = false
119
+ logger.Info("OnHTML login_form")
120
+ loginURL, err, reqMap := getForm(element, err)
121
+ if err != nil {
122
+ logger.Error(err)
123
+ return
124
+ }
125
+ reqMap["email"] = email
126
+ reqMap["pass"] = password
127
+ logger.Info("req map:", reqMap)
128
+ err = collector.Post(loginURL, reqMap)
129
+ if err != nil {
130
+ logger.Error("post err:", err)
131
+ }
129
132
  }
130
133
  })
131
134
 
@@ -256,6 +259,25 @@ func (f *Fbcolly) FetchGroupFeed(groupId int64) (error, *fbcrawl.FacebookPostLis
256
259
  return err, &fbcrawl.FacebookPostList{Posts: result}
257
260
  }
258
261
 
262
+ func (f *Fbcolly) FetchGroupInfo(groupId int64) (error, *fbcrawl.FacebookGroup) {
263
+ collector := f.collector.Clone()
264
+ err := setupSharedCollector(collector)
265
+ result := &fbcrawl.FacebookGroup{Id: groupId}
266
+
267
+ collector.OnHTML("a[href=\"#groupMenuBottom\"] h1", func(element *colly.HTMLElement) {
268
+ result.Name = element.Text
269
+ })
270
+ collector.OnHTML("a[href*=\"view=member\"]", func(element *colly.HTMLElement) {
271
+ result.MemberCount, _ = strconv.ParseInt(element.DOM.Closest("tr").Find("td:last-child").Text(),10,64)
272
+ })
273
+
274
+ err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/groups/%d?view=info", groupId))
275
+ if err != nil {
276
+ logger.Error("crawl by colly err:", err)
277
+ }
278
+ return err, result
279
+ }
280
+
259
281
  func (f *Fbcolly) FetchContentImages(postId int64) (error, *fbcrawl.FacebookImageList) {
260
282
  collector := f.collector.Clone()
261
283
  err := setupSharedCollector(collector)
@@ -7,6 +7,7 @@ option go_package = "./fbcrawl;fbcrawl";
7
7
  message FacebookGroup {
8
8
  int64 id = 1;
9
9
  string name = 2;
10
+ int64 member_count = 3;
10
11
  }
11
12
 
12
13
  message FacebookUser {
data/go.sum CHANGED
@@ -20,6 +20,7 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA
20
20
  github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
21
21
  github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
22
22
  github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
23
+ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
23
24
  github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
24
25
  github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
25
26
  github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
@@ -19,6 +19,13 @@ module FbcrawlColly
19
19
  FbcrawlColly::FFI.LoginWithCookies(@colly, cookies)
20
20
  end
21
21
 
22
+ def fetch_group_info(group_id)
23
+ s, ptr = FbcrawlColly::FFI.FetchGroupInfo(@colly, group_id)
24
+ list = FbcrawlColly::FacebookGroup.decode(s)
25
+ FbcrawlColly::FFI.free(ptr)
26
+ list
27
+ end
28
+
22
29
  def fetch_group_feed(group_id)
23
30
  s, ptr = FbcrawlColly::FFI.FetchGroupFeed(@colly, group_id)
24
31
  list = FbcrawlColly::FacebookPostList.decode(s)
@@ -9,6 +9,7 @@ module FbcrawlColly::FFI
9
9
  attach_function :FreeColly, [:pointer], :pointer
10
10
  attach_function :Login, [:pointer, :string, :string], :strptr
11
11
  attach_function :LoginWithCookies, [:pointer, :string], :void
12
+ attach_function :FetchGroupInfo, [:pointer, :int64], :strptr
12
13
  attach_function :FetchGroupFeed, [:pointer, :int64], :strptr
13
14
  attach_function :FetchPost, [:pointer, :int64, :int64], :strptr
14
15
  attach_function :FetchContentImages, [:pointer, :int64], :strptr
@@ -1,3 +1,3 @@
1
1
  module FbcrawlColly
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
data/main.go CHANGED
@@ -58,6 +58,13 @@ func LoginWithCookies(pointer unsafe.Pointer, cookies *C.char) {
58
58
  p.LoginWithCookies(C.GoString(cookies))
59
59
  }
60
60
 
61
+ //export FetchGroupInfo
62
+ func FetchGroupInfo(pointer unsafe.Pointer, groupId int64) unsafe.Pointer {
63
+ p := (*fbcolly.Fbcolly)(pointer)
64
+ _, groupInfo := p.FetchGroupInfo(groupId)
65
+ marshaled, _ := proto.Marshal(groupInfo)
66
+ return C.CBytes(append(marshaled, 0))
67
+ }
61
68
  //export FetchGroupFeed
62
69
  func FetchGroupFeed(pointer unsafe.Pointer, groupId int64) unsafe.Pointer {
63
70
  p := (*fbcolly.Fbcolly)(pointer)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fbcrawl-colly
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Duy Le
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-14 00:00:00.000000000 Z
11
+ date: 2020-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi