fbcrawl-colly 0.2.0 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 22ec88aaf52a44344fc32ab22f6914f264b49ea10912c527c030ffd97ac12d36
4
- data.tar.gz: d007e4326b15c64e725009548c8e9dac00e263da888d736876b6093a4fba5108
3
+ metadata.gz: 64e7674aa28cfc9c5c6817afef3c3b4ed7106d1620e6da97c3fc7362ac76dc1e
4
+ data.tar.gz: 440cb83ecb7bcb9c461b4c5f9d9e5b3de0f6300384074adee9c08f3be1a68e8b
5
5
  SHA512:
6
- metadata.gz: 448bbb6b045d3b4baa3c0eae23bad85f0daf93159f514ff77e0f9383350b1c52aa3ab5360646099e56ccc19eea0bc8300ba42e67a26262e1e1f3141be61515c0
7
- data.tar.gz: 5d326d26fb0354ea78236849be7328ce49f919435c7c373130f19439c2447f9026ce31f88bc64d5cc72581fadc618a0891cd777f885e438f01217295b58309e5
6
+ metadata.gz: 1fa921c01c8b0381cf85ce4b1544d59912e1997f47fab12f8e29636de10305f410fff12a2137f7370a9749cfd2f28cd2a3ef5953de703056bf63a49770af3daf
7
+ data.tar.gz: 5696f7fe083b55ed24a5cd128dc71a8c7ebe0a7c7a0dda8811fe2e5796a023635c44d3712584b2c98937bfc1a170e7639b6af250c287bca7cbee36211f10669a
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fbcrawl-colly (0.1.1)
4
+ fbcrawl-colly (0.2.5)
5
5
  ffi
6
6
  google-protobuf
7
7
 
@@ -9,7 +9,7 @@ GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
11
  ffi (1.13.1)
12
- google-protobuf (3.12.4-universal-darwin)
12
+ google-protobuf (3.13.0)
13
13
  minitest (5.14.1)
14
14
  rake (12.3.3)
15
15
  rake-compiler (1.1.1)
@@ -6,9 +6,10 @@ import (
6
6
  "errors"
7
7
  "fmt"
8
8
  "github.com/PuerkitoBio/goquery"
9
- "github.com/gocolly/colly"
10
- "github.com/gocolly/colly/extensions"
11
- "github.com/gocolly/colly/storage"
9
+ "github.com/gocolly/colly/v2"
10
+ "github.com/gocolly/colly/v2/debug"
11
+ "github.com/gocolly/colly/v2/extensions"
12
+ "github.com/gocolly/colly/v2/storage"
12
13
  "github.com/google/logger"
13
14
  "github.com/olebedev/when"
14
15
  "github.com/olebedev/when/rules/common"
@@ -42,7 +43,7 @@ type FbDataFt struct {
42
43
  }
43
44
 
44
45
  func sharedOnRequest(request *colly.Request) {
45
- logger.Info("OnRequest")
46
+ logger.Info("OnRequest ", request.URL)
46
47
  //request.Headers.Set("Host", "facebook.com")
47
48
  request.Headers.Set("Accept-Language", "en-US,en;q=0.9")
48
49
  request.Headers.Set("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
@@ -62,9 +63,10 @@ func sharedOnRequest(request *colly.Request) {
62
63
  func setupSharedCollector(collector *colly.Collector) error {
63
64
  var err error
64
65
  extensions.Referer(collector)
65
-
66
+ collector.AllowURLRevisit = true
66
67
  collector.OnRequest(sharedOnRequest)
67
68
  collector.OnResponse(sharedOnResponse)
69
+ collector.SetDebugger(&debug.LogDebugger{})
68
70
  collector.OnError(func(resp *colly.Response, errHttp error) {
69
71
  err = errHttp
70
72
  logger.Error("OnError", err)
@@ -112,20 +114,23 @@ func (f *Fbcolly) Login(email string, password string, otp string) (string, erro
112
114
 
113
115
  logger.Info("Login using email", email)
114
116
  loggedIn := false
115
-
117
+ firstLogin := true
116
118
  collector.OnHTML("#login_form", func(element *colly.HTMLElement) {
117
- logger.Info("OnHTML login_form")
118
- loginURL, err, reqMap := getForm(element, err)
119
- if err != nil {
120
- logger.Error(err)
121
- return
122
- }
123
- reqMap["email"] = email
124
- reqMap["pass"] = password
125
- logger.Info("req map:", reqMap)
126
- err = collector.Post(loginURL, reqMap)
127
- if err != nil {
128
- logger.Error("post err:", err)
119
+ if firstLogin {
120
+ firstLogin = false
121
+ logger.Info("OnHTML login_form")
122
+ loginURL, err, reqMap := getForm(element, err)
123
+ if err != nil {
124
+ logger.Error(err)
125
+ return
126
+ }
127
+ reqMap["email"] = email
128
+ reqMap["pass"] = password
129
+ logger.Info("req map:", reqMap)
130
+ err = collector.Post(loginURL, reqMap)
131
+ if err != nil {
132
+ logger.Error("post err:", err)
133
+ }
129
134
  }
130
135
  })
131
136
 
@@ -201,16 +206,52 @@ func (f *Fbcolly) FetchGroupFeed(groupId int64) (error, *fbcrawl.FacebookPostLis
201
206
  err = collector.Visit("http://mbasic.facebook.com" + element.Attr("href"))
202
207
  }
203
208
  })
209
+ collector.OnHTML("div[role=\"article\"]", func(element *colly.HTMLElement) {
210
+ dataElement := element
211
+ post := &fbcrawl.FacebookPost{}
212
+ var fbDataFt FbDataFt
213
+ jsonData := dataElement.Attr("data-ft")
214
+
215
+ logger.Info(jsonData)
216
+ err = json.Unmarshal([]byte(jsonData), &fbDataFt)
217
+ if err != nil {
218
+ logger.Error(err)
219
+ return
220
+ }
221
+ logger.Info("Post ", fbDataFt)
222
+ post.Id = fbDataFt.TopLevelPostId
223
+ post.Group = &fbcrawl.FacebookGroup{Id: fbDataFt.PageId, Name: dataElement.DOM.Find("h3 strong:nth-child(2) a").Text()}
224
+ post.User = &fbcrawl.FacebookUser{
225
+ Id: fbDataFt.ContentOwnerIdNew,
226
+ Name: dataElement.DOM.Find("h3 strong:nth-child(1) a").Text(),
227
+ }
228
+ post.CreatedAt = fbDataFt.PageInsights[strconv.FormatInt(fbDataFt.PageId, 10)].PublishTime
229
+ //Content
204
230
 
205
- collector.OnXML("//a[text()=\"Full Story\"]", func(element *colly.XMLElement) {
206
- logger.Info("Post found at", element.Attr("href"))
207
- u, _ := url.Parse("http://mbasic.facebook.com" + element.Attr("href"))
208
- postId, _ := strconv.ParseInt(u.Query().Get("id"), 10, 64)
209
- result = append(result, &fbcrawl.FacebookPost{
210
- Id: postId,
211
- Group: &fbcrawl.FacebookGroup{Id: groupId},
212
- })
213
- //f.detailCollector.Visit(url)
231
+ //NO BACKGROUND TEXT ONLY
232
+ post.Content = strings.Join(dataElement.DOM.Find("p").Map(func(i int, selection *goquery.Selection) string {
233
+ return selection.Text()
234
+ }), "\n")
235
+
236
+ if len(post.Content) == 0 {
237
+ // TEXT WITH BACKGROUND
238
+ post.Content = dataElement.DOM.Find("div[style*=\"background-image:url\"]").Text()
239
+ }
240
+
241
+ post.ContentLink = getUrlFromRedirectHref(dataElement.DOM.Find("a[href*=\"https://lm.facebook.com/l.php\"]").AttrOr("href", ""))
242
+ post.ReactionCount = getNumberFromText(element.DOM.Find("span[id*=\"like_\"]").Text())
243
+ post.CommentCount = getNumberFromText(element.DOM.Find("span[id*=\"like_\"] ~ a").Text())
244
+ post.ContentImages = (funk.Map(fbDataFt.PhotoAttachmentsList, func(id string) *fbcrawl.FacebookImage {
245
+ i, _ := strconv.ParseInt(id, 10, 64)
246
+ return &fbcrawl.FacebookImage{
247
+ Id: i,
248
+ }
249
+ })).([]*fbcrawl.FacebookImage)
250
+
251
+ if fbDataFt.PhotoId > 0 {
252
+ post.ContentImage = &fbcrawl.FacebookImage{Id: fbDataFt.PhotoId}
253
+ }
254
+ result = append(result, post)
214
255
  })
215
256
 
216
257
  err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/groups/%d", groupId))
@@ -220,6 +261,26 @@ func (f *Fbcolly) FetchGroupFeed(groupId int64) (error, *fbcrawl.FacebookPostLis
220
261
  return err, &fbcrawl.FacebookPostList{Posts: result}
221
262
  }
222
263
 
264
+ func (f *Fbcolly) FetchGroupInfo(groupIdOrUsername string) (error, *fbcrawl.FacebookGroup) {
265
+ collector := f.collector.Clone()
266
+ err := setupSharedCollector(collector)
267
+ result := &fbcrawl.FacebookGroup{}
268
+
269
+ collector.OnHTML("a[href=\"#groupMenuBottom\"] h1", func(element *colly.HTMLElement) {
270
+ result.Name = element.Text
271
+ })
272
+ collector.OnHTML("a[href*=\"view=member\"]", func(element *colly.HTMLElement) {
273
+ result.Id = getNumberFromText(element.Attr("href"))
274
+ result.MemberCount, _ = strconv.ParseInt(element.DOM.Closest("tr").Find("td:last-child").Text(), 10, 64)
275
+ })
276
+
277
+ err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/groups/%s?view=info", groupIdOrUsername))
278
+ if err != nil {
279
+ logger.Error("crawl by colly err:", err)
280
+ }
281
+ return err, result
282
+ }
283
+
223
284
  func (f *Fbcolly) FetchContentImages(postId int64) (error, *fbcrawl.FacebookImageList) {
224
285
  collector := f.collector.Clone()
225
286
  err := setupSharedCollector(collector)
@@ -300,7 +361,7 @@ func (f *Fbcolly) FetchPost(groupId int64, postId int64) (error, *fbcrawl.Facebo
300
361
  }
301
362
 
302
363
  post.ContentLink = getUrlFromRedirectHref(dataElement.Find("a[href*=\"https://lm.facebook.com/l.php\"]").AttrOr("href", ""))
303
-
364
+ post.ReactionCount = getNumberFromText(element.DOM.Find("div[id*=\"sentence_\"]").Text())
304
365
  post.ContentImages = (funk.Map(result.PhotoAttachmentsList, func(id string) *fbcrawl.FacebookImage {
305
366
  i, _ := strconv.ParseInt(id, 10, 64)
306
367
  return &fbcrawl.FacebookImage{
@@ -311,10 +372,6 @@ func (f *Fbcolly) FetchPost(groupId int64, postId int64) (error, *fbcrawl.Facebo
311
372
  if result.PhotoId > 0 {
312
373
  post.ContentImage = &fbcrawl.FacebookImage{Id: result.PhotoId}
313
374
  }
314
-
315
- logger.Info("content", strings.Join(dataElement.Find("p").Map(func(i int, selection *goquery.Selection) string {
316
- return selection.Text()
317
- }), "\n"))
318
375
  }
319
376
 
320
377
  //Comment
@@ -374,3 +431,21 @@ func getImageIdFromHref(href string) int64 {
374
431
  i, _ := strconv.ParseInt(u.Query().Get("fbid"), 10, 64)
375
432
  return i
376
433
  }
434
+
435
+ func getNumberFromText(text string) int64 {
436
+ logger.Error("reaction", text)
437
+ if len(text) > 0 {
438
+ match := regexp.MustCompile("(\\d+)\\s?([km]?)").FindStringSubmatch(text)
439
+ if len(match) > 0 {
440
+ count, _ := strconv.ParseInt(match[1], 10, 64)
441
+ switch match[2] {
442
+ case "k":
443
+ count *= 1000
444
+ case "m":
445
+ count *= 1000000
446
+ }
447
+ return count
448
+ }
449
+ }
450
+ return 0
451
+ }
@@ -7,6 +7,7 @@ option go_package = "./fbcrawl;fbcrawl";
7
7
  message FacebookGroup {
8
8
  int64 id = 1;
9
9
  string name = 2;
10
+ int64 member_count = 3;
10
11
  }
11
12
 
12
13
  message FacebookUser {
@@ -24,6 +25,8 @@ message FacebookPost {
24
25
  repeated FacebookImage content_images = 7;
25
26
  repeated FacebookComment comments = 5;
26
27
  int64 created_at = 9;
28
+ int64 reaction_count = 10;
29
+ int64 comment_count = 11;
27
30
  }
28
31
 
29
32
  message FacebookImage {
data/go.mod CHANGED
@@ -5,20 +5,11 @@ go 1.14
5
5
  require (
6
6
  github.com/PuerkitoBio/goquery v1.5.1
7
7
  github.com/andybalholm/cascadia v1.2.0 // indirect
8
- github.com/antchfx/htmlquery v1.2.3 // indirect
9
- github.com/antchfx/xmlquery v1.2.4 // indirect
10
- github.com/antchfx/xpath v1.1.9 // indirect
11
- github.com/gobwas/glob v0.2.3 // indirect
12
- github.com/gocolly/colly v1.2.0
8
+ github.com/gocolly/colly/v2 v2.1.0
13
9
  github.com/golang/protobuf v1.4.2
14
10
  github.com/google/logger v1.1.0
15
- github.com/kennygrant/sanitize v1.2.4 // indirect
16
11
  github.com/olebedev/when v0.0.0-20190311101825-c3b538a97254
17
- github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
18
- github.com/temoto/robotstxt v1.1.1 // indirect
19
12
  github.com/thoas/go-funk v0.7.0
20
13
  golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect
21
- golang.org/x/text v0.3.3 // indirect
22
- google.golang.org/appengine v1.6.6 // indirect
23
14
  google.golang.org/protobuf v1.25.0
24
15
  )
data/go.sum CHANGED
@@ -4,7 +4,6 @@ github.com/AlekSi/pointer v1.0.0/go.mod h1:1kjywbfcPFCmncIxtk6fIEub6LKrfMz3gc5QK
4
4
  github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
5
5
  github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
6
6
  github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
7
- github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
8
7
  github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
9
8
  github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
10
9
  github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
@@ -12,14 +11,13 @@ github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz
12
11
  github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
13
12
  github.com/antchfx/xmlquery v1.2.4 h1:T/SH1bYdzdjTMoz2RgsfVKbM5uWh3gjDYYepFqQmFv4=
14
13
  github.com/antchfx/xmlquery v1.2.4/go.mod h1:KQQuESaxSlqugE2ZBcM/qn+ebIpt+d+4Xx7YcSGAIrM=
15
- github.com/antchfx/xpath v1.1.6 h1:6sVh6hB5T6phw1pFpHRQ+C4bd8sNI+O58flqtg7h0R0=
16
14
  github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
17
- github.com/antchfx/xpath v1.1.9 h1:s3QDSTRoZMK6EqLajCUt/BJMUATPHvzwlRqKqg12yjU=
18
- github.com/antchfx/xpath v1.1.9/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
15
+ github.com/antchfx/xpath v1.1.8 h1:PcL6bIX42Px5usSx6xRYw/wjB3wYGkj0MJ9MBzEKVgk=
16
+ github.com/antchfx/xpath v1.1.8/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
19
17
  github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
20
18
  github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
21
- github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
22
19
  github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
20
+ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
23
21
  github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
24
22
  github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
25
23
  github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
@@ -27,12 +25,13 @@ github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
27
25
  github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
28
26
  github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
29
27
  github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
28
+ github.com/gocolly/colly/v2 v2.1.0 h1:k0DuZkDoCsx51bKpRJNEmcxcp+W5N8ziuwGaSDuFoGs=
29
+ github.com/gocolly/colly/v2 v2.1.0/go.mod h1:I2MuhsLjQ+Ex+IzK3afNS8/1qP3AedHOusRPcRdC5o0=
30
30
  github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
31
31
  github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
32
32
  github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
33
33
  github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
34
34
  github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
35
- github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
36
35
  github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
37
36
  github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
38
37
  github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
@@ -51,6 +50,7 @@ github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w=
51
50
  github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
52
51
  github.com/google/logger v1.1.0 h1:saB74Etb4EAJNH3z74CVbCKk75hld/8T0CsXKetWCwM=
53
52
  github.com/google/logger v1.1.0/go.mod h1:w7O8nrRr0xufejBlQMI83MXqRusvREoJdaAxV+CoAB4=
53
+ github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg=
54
54
  github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
55
55
  github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
56
56
  github.com/olebedev/when v0.0.0-20190311101825-c3b538a97254 h1:JYoQR67E1vv1WGoeW8DkdFs7vrIEe/5wP+qJItd5tUE=
@@ -63,7 +63,7 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:
63
63
  github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
64
64
  github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
65
65
  github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
66
- github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
66
+ github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
67
67
  github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
68
68
  github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
69
69
  github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
@@ -72,6 +72,7 @@ github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr
72
72
  github.com/thoas/go-funk v0.7.0 h1:GmirKrs6j6zJbhJIficOsz2aAI7700KsU/5YrdHRM1Y=
73
73
  github.com/thoas/go-funk v0.7.0/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q=
74
74
  golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
75
+ golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
75
76
  golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
76
77
  golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
77
78
  golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@@ -86,6 +87,7 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
86
87
  golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
87
88
  golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
88
89
  golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
90
+ golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
89
91
  golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
90
92
  golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
91
93
  golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
@@ -100,13 +102,12 @@ golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7w
100
102
  golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
101
103
  golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
102
104
  golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
103
- golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
104
- golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
105
105
  golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
106
106
  golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
107
107
  golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
108
108
  golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
109
109
  golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
110
+ golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
110
111
  golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
111
112
  golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
112
113
  google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
@@ -125,9 +126,9 @@ google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQ
125
126
  google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
126
127
  google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
127
128
  google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
128
- google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM=
129
129
  google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
130
130
  google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
131
+ google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
131
132
  google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
132
133
  google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
133
134
  gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
@@ -19,6 +19,13 @@ module FbcrawlColly
19
19
  FbcrawlColly::FFI.LoginWithCookies(@colly, cookies)
20
20
  end
21
21
 
22
+ def fetch_group_info(group_id_or_username)
23
+ s, ptr = FbcrawlColly::FFI.FetchGroupInfo(@colly, group_id_or_username.to_s)
24
+ list = FbcrawlColly::FacebookGroup.decode(s)
25
+ FbcrawlColly::FFI.free(ptr)
26
+ list
27
+ end
28
+
22
29
  def fetch_group_feed(group_id)
23
30
  s, ptr = FbcrawlColly::FFI.FetchGroupFeed(@colly, group_id)
24
31
  list = FbcrawlColly::FacebookPostList.decode(s)
@@ -9,6 +9,7 @@ module FbcrawlColly::FFI
9
9
  attach_function :FreeColly, [:pointer], :pointer
10
10
  attach_function :Login, [:pointer, :string, :string], :strptr
11
11
  attach_function :LoginWithCookies, [:pointer, :string], :void
12
+ attach_function :FetchGroupInfo, [:pointer, :string], :strptr
12
13
  attach_function :FetchGroupFeed, [:pointer, :int64], :strptr
13
14
  attach_function :FetchPost, [:pointer, :int64, :int64], :strptr
14
15
  attach_function :FetchContentImages, [:pointer, :int64], :strptr
@@ -1,3 +1,3 @@
1
1
  module FbcrawlColly
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.6"
3
3
  end
data/main.go CHANGED
@@ -58,6 +58,14 @@ func LoginWithCookies(pointer unsafe.Pointer, cookies *C.char) {
58
58
  p.LoginWithCookies(C.GoString(cookies))
59
59
  }
60
60
 
61
+ //export FetchGroupInfo
62
+ func FetchGroupInfo(pointer unsafe.Pointer, groupIdOrUsername *C.char) unsafe.Pointer {
63
+ p := (*fbcolly.Fbcolly)(pointer)
64
+ _, groupInfo := p.FetchGroupInfo(C.GoString(groupIdOrUsername))
65
+ marshaled, _ := proto.Marshal(groupInfo)
66
+ return C.CBytes(append(marshaled, 0))
67
+ }
68
+
61
69
  //export FetchGroupFeed
62
70
  func FetchGroupFeed(pointer unsafe.Pointer, groupId int64) unsafe.Pointer {
63
71
  p := (*fbcolly.Fbcolly)(pointer)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fbcrawl-colly
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Duy Le
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-13 00:00:00.000000000 Z
11
+ date: 2020-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi