fbcrawl-colly 0.2.0 → 0.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/fbcolly/fbcolly.go +107 -32
- data/fbcrawl.proto +3 -0
- data/go.mod +1 -10
- data/go.sum +11 -10
- data/lib/fbcrawl_colly/colly.rb +7 -0
- data/lib/fbcrawl_colly/ffi.rb +1 -0
- data/lib/fbcrawl_colly/version.rb +1 -1
- data/main.go +8 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64e7674aa28cfc9c5c6817afef3c3b4ed7106d1620e6da97c3fc7362ac76dc1e
|
4
|
+
data.tar.gz: 440cb83ecb7bcb9c461b4c5f9d9e5b3de0f6300384074adee9c08f3be1a68e8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1fa921c01c8b0381cf85ce4b1544d59912e1997f47fab12f8e29636de10305f410fff12a2137f7370a9749cfd2f28cd2a3ef5953de703056bf63a49770af3daf
|
7
|
+
data.tar.gz: 5696f7fe083b55ed24a5cd128dc71a8c7ebe0a7c7a0dda8811fe2e5796a023635c44d3712584b2c98937bfc1a170e7639b6af250c287bca7cbee36211f10669a
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fbcrawl-colly (0.
|
4
|
+
fbcrawl-colly (0.2.5)
|
5
5
|
ffi
|
6
6
|
google-protobuf
|
7
7
|
|
@@ -9,7 +9,7 @@ GEM
|
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
11
|
ffi (1.13.1)
|
12
|
-
google-protobuf (3.
|
12
|
+
google-protobuf (3.13.0)
|
13
13
|
minitest (5.14.1)
|
14
14
|
rake (12.3.3)
|
15
15
|
rake-compiler (1.1.1)
|
data/fbcolly/fbcolly.go
CHANGED
@@ -6,9 +6,10 @@ import (
|
|
6
6
|
"errors"
|
7
7
|
"fmt"
|
8
8
|
"github.com/PuerkitoBio/goquery"
|
9
|
-
"github.com/gocolly/colly"
|
10
|
-
"github.com/gocolly/colly/
|
11
|
-
"github.com/gocolly/colly/
|
9
|
+
"github.com/gocolly/colly/v2"
|
10
|
+
"github.com/gocolly/colly/v2/debug"
|
11
|
+
"github.com/gocolly/colly/v2/extensions"
|
12
|
+
"github.com/gocolly/colly/v2/storage"
|
12
13
|
"github.com/google/logger"
|
13
14
|
"github.com/olebedev/when"
|
14
15
|
"github.com/olebedev/when/rules/common"
|
@@ -42,7 +43,7 @@ type FbDataFt struct {
|
|
42
43
|
}
|
43
44
|
|
44
45
|
func sharedOnRequest(request *colly.Request) {
|
45
|
-
logger.Info("OnRequest")
|
46
|
+
logger.Info("OnRequest ", request.URL)
|
46
47
|
//request.Headers.Set("Host", "facebook.com")
|
47
48
|
request.Headers.Set("Accept-Language", "en-US,en;q=0.9")
|
48
49
|
request.Headers.Set("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
|
@@ -62,9 +63,10 @@ func sharedOnRequest(request *colly.Request) {
|
|
62
63
|
func setupSharedCollector(collector *colly.Collector) error {
|
63
64
|
var err error
|
64
65
|
extensions.Referer(collector)
|
65
|
-
|
66
|
+
collector.AllowURLRevisit = true
|
66
67
|
collector.OnRequest(sharedOnRequest)
|
67
68
|
collector.OnResponse(sharedOnResponse)
|
69
|
+
collector.SetDebugger(&debug.LogDebugger{})
|
68
70
|
collector.OnError(func(resp *colly.Response, errHttp error) {
|
69
71
|
err = errHttp
|
70
72
|
logger.Error("OnError", err)
|
@@ -112,20 +114,23 @@ func (f *Fbcolly) Login(email string, password string, otp string) (string, erro
|
|
112
114
|
|
113
115
|
logger.Info("Login using email", email)
|
114
116
|
loggedIn := false
|
115
|
-
|
117
|
+
firstLogin := true
|
116
118
|
collector.OnHTML("#login_form", func(element *colly.HTMLElement) {
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
119
|
+
if firstLogin {
|
120
|
+
firstLogin = false
|
121
|
+
logger.Info("OnHTML login_form")
|
122
|
+
loginURL, err, reqMap := getForm(element, err)
|
123
|
+
if err != nil {
|
124
|
+
logger.Error(err)
|
125
|
+
return
|
126
|
+
}
|
127
|
+
reqMap["email"] = email
|
128
|
+
reqMap["pass"] = password
|
129
|
+
logger.Info("req map:", reqMap)
|
130
|
+
err = collector.Post(loginURL, reqMap)
|
131
|
+
if err != nil {
|
132
|
+
logger.Error("post err:", err)
|
133
|
+
}
|
129
134
|
}
|
130
135
|
})
|
131
136
|
|
@@ -201,16 +206,52 @@ func (f *Fbcolly) FetchGroupFeed(groupId int64) (error, *fbcrawl.FacebookPostLis
|
|
201
206
|
err = collector.Visit("http://mbasic.facebook.com" + element.Attr("href"))
|
202
207
|
}
|
203
208
|
})
|
209
|
+
collector.OnHTML("div[role=\"article\"]", func(element *colly.HTMLElement) {
|
210
|
+
dataElement := element
|
211
|
+
post := &fbcrawl.FacebookPost{}
|
212
|
+
var fbDataFt FbDataFt
|
213
|
+
jsonData := dataElement.Attr("data-ft")
|
214
|
+
|
215
|
+
logger.Info(jsonData)
|
216
|
+
err = json.Unmarshal([]byte(jsonData), &fbDataFt)
|
217
|
+
if err != nil {
|
218
|
+
logger.Error(err)
|
219
|
+
return
|
220
|
+
}
|
221
|
+
logger.Info("Post ", fbDataFt)
|
222
|
+
post.Id = fbDataFt.TopLevelPostId
|
223
|
+
post.Group = &fbcrawl.FacebookGroup{Id: fbDataFt.PageId, Name: dataElement.DOM.Find("h3 strong:nth-child(2) a").Text()}
|
224
|
+
post.User = &fbcrawl.FacebookUser{
|
225
|
+
Id: fbDataFt.ContentOwnerIdNew,
|
226
|
+
Name: dataElement.DOM.Find("h3 strong:nth-child(1) a").Text(),
|
227
|
+
}
|
228
|
+
post.CreatedAt = fbDataFt.PageInsights[strconv.FormatInt(fbDataFt.PageId, 10)].PublishTime
|
229
|
+
//Content
|
204
230
|
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
231
|
+
//NO BACKGROUND TEXT ONLY
|
232
|
+
post.Content = strings.Join(dataElement.DOM.Find("p").Map(func(i int, selection *goquery.Selection) string {
|
233
|
+
return selection.Text()
|
234
|
+
}), "\n")
|
235
|
+
|
236
|
+
if len(post.Content) == 0 {
|
237
|
+
// TEXT WITH BACKGROUND
|
238
|
+
post.Content = dataElement.DOM.Find("div[style*=\"background-image:url\"]").Text()
|
239
|
+
}
|
240
|
+
|
241
|
+
post.ContentLink = getUrlFromRedirectHref(dataElement.DOM.Find("a[href*=\"https://lm.facebook.com/l.php\"]").AttrOr("href", ""))
|
242
|
+
post.ReactionCount = getNumberFromText(element.DOM.Find("span[id*=\"like_\"]").Text())
|
243
|
+
post.CommentCount = getNumberFromText(element.DOM.Find("span[id*=\"like_\"] ~ a").Text())
|
244
|
+
post.ContentImages = (funk.Map(fbDataFt.PhotoAttachmentsList, func(id string) *fbcrawl.FacebookImage {
|
245
|
+
i, _ := strconv.ParseInt(id, 10, 64)
|
246
|
+
return &fbcrawl.FacebookImage{
|
247
|
+
Id: i,
|
248
|
+
}
|
249
|
+
})).([]*fbcrawl.FacebookImage)
|
250
|
+
|
251
|
+
if fbDataFt.PhotoId > 0 {
|
252
|
+
post.ContentImage = &fbcrawl.FacebookImage{Id: fbDataFt.PhotoId}
|
253
|
+
}
|
254
|
+
result = append(result, post)
|
214
255
|
})
|
215
256
|
|
216
257
|
err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/groups/%d", groupId))
|
@@ -220,6 +261,26 @@ func (f *Fbcolly) FetchGroupFeed(groupId int64) (error, *fbcrawl.FacebookPostLis
|
|
220
261
|
return err, &fbcrawl.FacebookPostList{Posts: result}
|
221
262
|
}
|
222
263
|
|
264
|
+
func (f *Fbcolly) FetchGroupInfo(groupIdOrUsername string) (error, *fbcrawl.FacebookGroup) {
|
265
|
+
collector := f.collector.Clone()
|
266
|
+
err := setupSharedCollector(collector)
|
267
|
+
result := &fbcrawl.FacebookGroup{}
|
268
|
+
|
269
|
+
collector.OnHTML("a[href=\"#groupMenuBottom\"] h1", func(element *colly.HTMLElement) {
|
270
|
+
result.Name = element.Text
|
271
|
+
})
|
272
|
+
collector.OnHTML("a[href*=\"view=member\"]", func(element *colly.HTMLElement) {
|
273
|
+
result.Id = getNumberFromText(element.Attr("href"))
|
274
|
+
result.MemberCount, _ = strconv.ParseInt(element.DOM.Closest("tr").Find("td:last-child").Text(), 10, 64)
|
275
|
+
})
|
276
|
+
|
277
|
+
err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/groups/%s?view=info", groupIdOrUsername))
|
278
|
+
if err != nil {
|
279
|
+
logger.Error("crawl by colly err:", err)
|
280
|
+
}
|
281
|
+
return err, result
|
282
|
+
}
|
283
|
+
|
223
284
|
func (f *Fbcolly) FetchContentImages(postId int64) (error, *fbcrawl.FacebookImageList) {
|
224
285
|
collector := f.collector.Clone()
|
225
286
|
err := setupSharedCollector(collector)
|
@@ -300,7 +361,7 @@ func (f *Fbcolly) FetchPost(groupId int64, postId int64) (error, *fbcrawl.Facebo
|
|
300
361
|
}
|
301
362
|
|
302
363
|
post.ContentLink = getUrlFromRedirectHref(dataElement.Find("a[href*=\"https://lm.facebook.com/l.php\"]").AttrOr("href", ""))
|
303
|
-
|
364
|
+
post.ReactionCount = getNumberFromText(element.DOM.Find("div[id*=\"sentence_\"]").Text())
|
304
365
|
post.ContentImages = (funk.Map(result.PhotoAttachmentsList, func(id string) *fbcrawl.FacebookImage {
|
305
366
|
i, _ := strconv.ParseInt(id, 10, 64)
|
306
367
|
return &fbcrawl.FacebookImage{
|
@@ -311,10 +372,6 @@ func (f *Fbcolly) FetchPost(groupId int64, postId int64) (error, *fbcrawl.Facebo
|
|
311
372
|
if result.PhotoId > 0 {
|
312
373
|
post.ContentImage = &fbcrawl.FacebookImage{Id: result.PhotoId}
|
313
374
|
}
|
314
|
-
|
315
|
-
logger.Info("content", strings.Join(dataElement.Find("p").Map(func(i int, selection *goquery.Selection) string {
|
316
|
-
return selection.Text()
|
317
|
-
}), "\n"))
|
318
375
|
}
|
319
376
|
|
320
377
|
//Comment
|
@@ -374,3 +431,21 @@ func getImageIdFromHref(href string) int64 {
|
|
374
431
|
i, _ := strconv.ParseInt(u.Query().Get("fbid"), 10, 64)
|
375
432
|
return i
|
376
433
|
}
|
434
|
+
|
435
|
+
func getNumberFromText(text string) int64 {
|
436
|
+
logger.Error("reaction", text)
|
437
|
+
if len(text) > 0 {
|
438
|
+
match := regexp.MustCompile("(\\d+)\\s?([km]?)").FindStringSubmatch(text)
|
439
|
+
if len(match) > 0 {
|
440
|
+
count, _ := strconv.ParseInt(match[1], 10, 64)
|
441
|
+
switch match[2] {
|
442
|
+
case "k":
|
443
|
+
count *= 1000
|
444
|
+
case "m":
|
445
|
+
count *= 1000000
|
446
|
+
}
|
447
|
+
return count
|
448
|
+
}
|
449
|
+
}
|
450
|
+
return 0
|
451
|
+
}
|
data/fbcrawl.proto
CHANGED
@@ -7,6 +7,7 @@ option go_package = "./fbcrawl;fbcrawl";
|
|
7
7
|
message FacebookGroup {
|
8
8
|
int64 id = 1;
|
9
9
|
string name = 2;
|
10
|
+
int64 member_count = 3;
|
10
11
|
}
|
11
12
|
|
12
13
|
message FacebookUser {
|
@@ -24,6 +25,8 @@ message FacebookPost {
|
|
24
25
|
repeated FacebookImage content_images = 7;
|
25
26
|
repeated FacebookComment comments = 5;
|
26
27
|
int64 created_at = 9;
|
28
|
+
int64 reaction_count = 10;
|
29
|
+
int64 comment_count = 11;
|
27
30
|
}
|
28
31
|
|
29
32
|
message FacebookImage {
|
data/go.mod
CHANGED
@@ -5,20 +5,11 @@ go 1.14
|
|
5
5
|
require (
|
6
6
|
github.com/PuerkitoBio/goquery v1.5.1
|
7
7
|
github.com/andybalholm/cascadia v1.2.0 // indirect
|
8
|
-
github.com/
|
9
|
-
github.com/antchfx/xmlquery v1.2.4 // indirect
|
10
|
-
github.com/antchfx/xpath v1.1.9 // indirect
|
11
|
-
github.com/gobwas/glob v0.2.3 // indirect
|
12
|
-
github.com/gocolly/colly v1.2.0
|
8
|
+
github.com/gocolly/colly/v2 v2.1.0
|
13
9
|
github.com/golang/protobuf v1.4.2
|
14
10
|
github.com/google/logger v1.1.0
|
15
|
-
github.com/kennygrant/sanitize v1.2.4 // indirect
|
16
11
|
github.com/olebedev/when v0.0.0-20190311101825-c3b538a97254
|
17
|
-
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
|
18
|
-
github.com/temoto/robotstxt v1.1.1 // indirect
|
19
12
|
github.com/thoas/go-funk v0.7.0
|
20
13
|
golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect
|
21
|
-
golang.org/x/text v0.3.3 // indirect
|
22
|
-
google.golang.org/appengine v1.6.6 // indirect
|
23
14
|
google.golang.org/protobuf v1.25.0
|
24
15
|
)
|
data/go.sum
CHANGED
@@ -4,7 +4,6 @@ github.com/AlekSi/pointer v1.0.0/go.mod h1:1kjywbfcPFCmncIxtk6fIEub6LKrfMz3gc5QK
|
|
4
4
|
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
5
5
|
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
|
6
6
|
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
|
7
|
-
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
8
7
|
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
9
8
|
github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
|
10
9
|
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
|
@@ -12,14 +11,13 @@ github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz
|
|
12
11
|
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
|
13
12
|
github.com/antchfx/xmlquery v1.2.4 h1:T/SH1bYdzdjTMoz2RgsfVKbM5uWh3gjDYYepFqQmFv4=
|
14
13
|
github.com/antchfx/xmlquery v1.2.4/go.mod h1:KQQuESaxSlqugE2ZBcM/qn+ebIpt+d+4Xx7YcSGAIrM=
|
15
|
-
github.com/antchfx/xpath v1.1.6 h1:6sVh6hB5T6phw1pFpHRQ+C4bd8sNI+O58flqtg7h0R0=
|
16
14
|
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
|
17
|
-
github.com/antchfx/xpath v1.1.
|
18
|
-
github.com/antchfx/xpath v1.1.
|
15
|
+
github.com/antchfx/xpath v1.1.8 h1:PcL6bIX42Px5usSx6xRYw/wjB3wYGkj0MJ9MBzEKVgk=
|
16
|
+
github.com/antchfx/xpath v1.1.8/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
|
19
17
|
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
20
18
|
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
21
|
-
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
22
19
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
20
|
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
23
21
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
24
22
|
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
25
23
|
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
|
@@ -27,12 +25,13 @@ github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
|
|
27
25
|
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
|
28
26
|
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
|
29
27
|
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
|
28
|
+
github.com/gocolly/colly/v2 v2.1.0 h1:k0DuZkDoCsx51bKpRJNEmcxcp+W5N8ziuwGaSDuFoGs=
|
29
|
+
github.com/gocolly/colly/v2 v2.1.0/go.mod h1:I2MuhsLjQ+Ex+IzK3afNS8/1qP3AedHOusRPcRdC5o0=
|
30
30
|
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
31
31
|
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
|
32
32
|
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
|
33
33
|
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
34
34
|
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
35
|
-
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
|
36
35
|
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
37
36
|
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
38
37
|
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
|
@@ -51,6 +50,7 @@ github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w=
|
|
51
50
|
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
52
51
|
github.com/google/logger v1.1.0 h1:saB74Etb4EAJNH3z74CVbCKk75hld/8T0CsXKetWCwM=
|
53
52
|
github.com/google/logger v1.1.0/go.mod h1:w7O8nrRr0xufejBlQMI83MXqRusvREoJdaAxV+CoAB4=
|
53
|
+
github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg=
|
54
54
|
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
|
55
55
|
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
|
56
56
|
github.com/olebedev/when v0.0.0-20190311101825-c3b538a97254 h1:JYoQR67E1vv1WGoeW8DkdFs7vrIEe/5wP+qJItd5tUE=
|
@@ -63,7 +63,7 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:
|
|
63
63
|
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
|
64
64
|
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
|
65
65
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
66
|
-
github.com/stretchr/
|
66
|
+
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
|
67
67
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
68
68
|
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
|
69
69
|
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
@@ -72,6 +72,7 @@ github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr
|
|
72
72
|
github.com/thoas/go-funk v0.7.0 h1:GmirKrs6j6zJbhJIficOsz2aAI7700KsU/5YrdHRM1Y=
|
73
73
|
github.com/thoas/go-funk v0.7.0/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q=
|
74
74
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
75
|
+
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
75
76
|
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
76
77
|
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
77
78
|
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
@@ -86,6 +87,7 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
|
|
86
87
|
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
87
88
|
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
88
89
|
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
90
|
+
golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
89
91
|
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
|
90
92
|
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
91
93
|
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
@@ -100,13 +102,12 @@ golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7w
|
|
100
102
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
101
103
|
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
|
102
104
|
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
103
|
-
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
|
104
|
-
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
105
105
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
106
106
|
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
107
107
|
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
|
108
108
|
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
|
109
109
|
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
110
|
+
golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
|
110
111
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
111
112
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
112
113
|
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
@@ -125,9 +126,9 @@ google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQ
|
|
125
126
|
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
|
126
127
|
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
|
127
128
|
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
128
|
-
google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM=
|
129
129
|
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
130
130
|
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
131
|
+
google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
|
131
132
|
google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
|
132
133
|
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
|
133
134
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
data/lib/fbcrawl_colly/colly.rb
CHANGED
@@ -19,6 +19,13 @@ module FbcrawlColly
|
|
19
19
|
FbcrawlColly::FFI.LoginWithCookies(@colly, cookies)
|
20
20
|
end
|
21
21
|
|
22
|
+
def fetch_group_info(group_id_or_username)
|
23
|
+
s, ptr = FbcrawlColly::FFI.FetchGroupInfo(@colly, group_id_or_username.to_s)
|
24
|
+
list = FbcrawlColly::FacebookGroup.decode(s)
|
25
|
+
FbcrawlColly::FFI.free(ptr)
|
26
|
+
list
|
27
|
+
end
|
28
|
+
|
22
29
|
def fetch_group_feed(group_id)
|
23
30
|
s, ptr = FbcrawlColly::FFI.FetchGroupFeed(@colly, group_id)
|
24
31
|
list = FbcrawlColly::FacebookPostList.decode(s)
|
data/lib/fbcrawl_colly/ffi.rb
CHANGED
@@ -9,6 +9,7 @@ module FbcrawlColly::FFI
|
|
9
9
|
attach_function :FreeColly, [:pointer], :pointer
|
10
10
|
attach_function :Login, [:pointer, :string, :string], :strptr
|
11
11
|
attach_function :LoginWithCookies, [:pointer, :string], :void
|
12
|
+
attach_function :FetchGroupInfo, [:pointer, :string], :strptr
|
12
13
|
attach_function :FetchGroupFeed, [:pointer, :int64], :strptr
|
13
14
|
attach_function :FetchPost, [:pointer, :int64, :int64], :strptr
|
14
15
|
attach_function :FetchContentImages, [:pointer, :int64], :strptr
|
data/main.go
CHANGED
@@ -58,6 +58,14 @@ func LoginWithCookies(pointer unsafe.Pointer, cookies *C.char) {
|
|
58
58
|
p.LoginWithCookies(C.GoString(cookies))
|
59
59
|
}
|
60
60
|
|
61
|
+
//export FetchGroupInfo
|
62
|
+
func FetchGroupInfo(pointer unsafe.Pointer, groupIdOrUsername *C.char) unsafe.Pointer {
|
63
|
+
p := (*fbcolly.Fbcolly)(pointer)
|
64
|
+
_, groupInfo := p.FetchGroupInfo(C.GoString(groupIdOrUsername))
|
65
|
+
marshaled, _ := proto.Marshal(groupInfo)
|
66
|
+
return C.CBytes(append(marshaled, 0))
|
67
|
+
}
|
68
|
+
|
61
69
|
//export FetchGroupFeed
|
62
70
|
func FetchGroupFeed(pointer unsafe.Pointer, groupId int64) unsafe.Pointer {
|
63
71
|
p := (*fbcolly.Fbcolly)(pointer)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fbcrawl-colly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Duy Le
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|