fbcrawl-colly 0.2.6 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -2
- data/Dockerfile +14 -0
- data/Gemfile.lock +8 -7
- data/README.md +1 -1
- data/fbcrawl-colly.gemspec +2 -6
- data/fbcrawl.proto +76 -3
- data/{fbcolly → fbcrawl}/fbcolly.go +63 -56
- data/fbcrawl/pb/fbcrawl.pb.go +1647 -0
- data/fbcrawl/pb/fbcrawl_grpc.pb.go +380 -0
- data/go.mod +4 -1
- data/go.sum +10 -0
- data/lib/fbcrawl-colly.rb +1 -4
- data/lib/fbcrawl_colly.rb +5 -0
- data/lib/fbcrawl_colly/client.rb +46 -0
- data/lib/fbcrawl_colly/version.rb +1 -1
- data/lib/pb/fbcrawl_pb.rb +117 -0
- data/lib/pb/fbcrawl_services_pb.rb +31 -0
- data/main.go +74 -77
- metadata +14 -26
- data/ext/fbcrawl_colly/.gitignore +0 -2
- data/ext/fbcrawl_colly/Makefile +0 -6
- data/ext/fbcrawl_colly/extconf.rb +0 -6
- data/lib/fbcrawl_colly/colly.rb +0 -57
- data/lib/fbcrawl_colly/ffi.rb +0 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 00d52fcc50bd651adaa80cf6102d7cc3997a07605ebb91fd33006b6e535ed6fc
|
4
|
+
data.tar.gz: 24388fa7c98d6dfbe58ba195c330ea3c363f6ef8e586873dd9e485ad29907a6c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15622279b09030f929218482add87878b58fdcb1e89e1ab8a1124531435707dbe26367fee75204a55fc88f8486c47cbb6ddfdb86035aa24694b1b7d6639aaecc
|
7
|
+
data.tar.gz: df50b30ad0234c824505cca54fdf33d0be54c4725c684f0317b1eba81f89d22d7b434bec4e9373b3d43dd830dc17693ac2fb785e75cefc71c6cbeefc44da2616
|
data/.gitignore
CHANGED
data/Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
FROM golang:1.14-alpine
|
2
|
+
RUN apk add --no-cache git build-base tzdata
|
3
|
+
|
4
|
+
RUN mkdir -p /app
|
5
|
+
WORKDIR /app
|
6
|
+
ADD ./go.mod /app
|
7
|
+
ADD ./go.sum /app
|
8
|
+
ADD ./ /app
|
9
|
+
RUN go get
|
10
|
+
|
11
|
+
|
12
|
+
ENV PORT 3000
|
13
|
+
RUN go build -o server qnetwork.net/fbcrawl
|
14
|
+
ENTRYPOINT ["./server"]
|
data/Gemfile.lock
CHANGED
@@ -1,19 +1,21 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
fbcrawl-colly (0.
|
5
|
-
ffi
|
4
|
+
fbcrawl-colly (1.0.0)
|
6
5
|
google-protobuf
|
6
|
+
grpc
|
7
7
|
|
8
8
|
GEM
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
|
-
|
12
|
-
|
11
|
+
google-protobuf (3.13.0-universal-darwin)
|
12
|
+
googleapis-common-protos-types (1.0.5)
|
13
|
+
google-protobuf (~> 3.11)
|
14
|
+
grpc (1.30.2-universal-darwin)
|
15
|
+
google-protobuf (~> 3.12)
|
16
|
+
googleapis-common-protos-types (~> 1.0)
|
13
17
|
minitest (5.14.1)
|
14
18
|
rake (12.3.3)
|
15
|
-
rake-compiler (1.1.1)
|
16
|
-
rake
|
17
19
|
|
18
20
|
PLATFORMS
|
19
21
|
ruby
|
@@ -22,7 +24,6 @@ DEPENDENCIES
|
|
22
24
|
fbcrawl-colly!
|
23
25
|
minitest (~> 5.0)
|
24
26
|
rake (~> 12.0)
|
25
|
-
rake-compiler
|
26
27
|
|
27
28
|
BUNDLED WITH
|
28
29
|
2.1.4
|
data/README.md
CHANGED
data/fbcrawl-colly.gemspec
CHANGED
@@ -25,12 +25,8 @@ Gem::Specification.new do |spec|
|
|
25
25
|
end
|
26
26
|
# spec.bindir = "exe"
|
27
27
|
# spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
|
-
spec.
|
29
|
-
'ext/fbcrawl_colly/extconf.rb'
|
30
|
-
]
|
31
|
-
spec.require_paths = ["lib"]
|
28
|
+
spec.require_paths = %w[lib lib/pb]
|
32
29
|
|
33
|
-
spec.add_runtime_dependency 'ffi'
|
34
30
|
spec.add_runtime_dependency 'google-protobuf'
|
35
|
-
spec.
|
31
|
+
spec.add_runtime_dependency 'grpc'
|
36
32
|
end
|
data/fbcrawl.proto
CHANGED
@@ -1,7 +1,73 @@
|
|
1
1
|
syntax = "proto3";
|
2
2
|
|
3
3
|
package fbcrawl_colly;
|
4
|
-
option go_package = "./fbcrawl;
|
4
|
+
option go_package = "./fbcrawl/pb;pb";
|
5
|
+
|
6
|
+
service Grpc {
|
7
|
+
// Sends a greeting
|
8
|
+
rpc Init (Empty) returns (Pointer) {}
|
9
|
+
rpc FreeColly (Pointer) returns (Empty) {}
|
10
|
+
rpc Login (LoginRequest) returns (LoginResponse) {}
|
11
|
+
rpc LoginWithCookies (LoginWithCookiesRequest) returns (Empty) {}
|
12
|
+
rpc FetchGroupInfo (FetchGroupInfoRequest) returns (FacebookGroup) {}
|
13
|
+
rpc FetchGroupFeed (FetchGroupFeedRequest) returns (FacebookPostList) {}
|
14
|
+
rpc FetchPost (FetchPostRequest) returns (FacebookPost) {}
|
15
|
+
rpc FetchContentImages (FetchContentImagesRequest) returns (FacebookImageList) {}
|
16
|
+
rpc FetchImageUrl (FetchImageUrlRequest) returns (FacebookImage) {}
|
17
|
+
}
|
18
|
+
|
19
|
+
message Empty {
|
20
|
+
|
21
|
+
}
|
22
|
+
|
23
|
+
message Pointer {
|
24
|
+
int64 address = 1;
|
25
|
+
}
|
26
|
+
|
27
|
+
message LoginRequest {
|
28
|
+
Pointer pointer = 1;
|
29
|
+
string email = 2;
|
30
|
+
string password = 3;
|
31
|
+
string totp_secret = 4;
|
32
|
+
}
|
33
|
+
|
34
|
+
message LoginResponse {
|
35
|
+
string cookies = 1;
|
36
|
+
}
|
37
|
+
|
38
|
+
message LoginWithCookiesRequest {
|
39
|
+
Pointer pointer = 1;
|
40
|
+
string cookies = 2;
|
41
|
+
}
|
42
|
+
|
43
|
+
message FetchGroupInfoRequest {
|
44
|
+
Pointer pointer = 1;
|
45
|
+
string group_username = 2;
|
46
|
+
}
|
47
|
+
|
48
|
+
message FetchGroupFeedRequest {
|
49
|
+
Pointer pointer = 1;
|
50
|
+
int64 group_id = 2;
|
51
|
+
string next_cursor = 3;
|
52
|
+
}
|
53
|
+
|
54
|
+
message FetchPostRequest {
|
55
|
+
Pointer pointer = 1;
|
56
|
+
int64 group_id = 2;
|
57
|
+
int64 post_id = 3;
|
58
|
+
string comment_next_cursor = 4;
|
59
|
+
}
|
60
|
+
|
61
|
+
message FetchContentImagesRequest {
|
62
|
+
Pointer pointer = 1;
|
63
|
+
int64 post_id = 2;
|
64
|
+
string next_cursor = 3;
|
65
|
+
}
|
66
|
+
|
67
|
+
message FetchImageUrlRequest {
|
68
|
+
Pointer pointer = 1;
|
69
|
+
int64 image_id = 2;
|
70
|
+
}
|
5
71
|
|
6
72
|
// The request message containing the user's name.
|
7
73
|
message FacebookGroup {
|
@@ -20,15 +86,20 @@ message FacebookPost {
|
|
20
86
|
FacebookGroup group = 2;
|
21
87
|
FacebookUser user = 3;
|
22
88
|
string content = 4;
|
89
|
+
CommentList comments = 5;
|
23
90
|
string content_link = 6;
|
24
|
-
FacebookImage content_image = 8;
|
25
91
|
repeated FacebookImage content_images = 7;
|
26
|
-
|
92
|
+
FacebookImage content_image = 8;
|
27
93
|
int64 created_at = 9;
|
28
94
|
int64 reaction_count = 10;
|
29
95
|
int64 comment_count = 11;
|
30
96
|
}
|
31
97
|
|
98
|
+
message CommentList {
|
99
|
+
repeated FacebookComment comments = 5;
|
100
|
+
string next_cursor = 12;
|
101
|
+
}
|
102
|
+
|
32
103
|
message FacebookImage {
|
33
104
|
int64 id = 1;
|
34
105
|
string url = 2;
|
@@ -44,8 +115,10 @@ message FacebookComment {
|
|
44
115
|
|
45
116
|
message FacebookPostList {
|
46
117
|
repeated FacebookPost posts = 1;
|
118
|
+
string next_cursor = 2;
|
47
119
|
}
|
48
120
|
|
49
121
|
message FacebookImageList {
|
50
122
|
repeated FacebookImage images = 1;
|
123
|
+
string next_cursor = 2;
|
51
124
|
}
|
@@ -15,8 +15,9 @@ import (
|
|
15
15
|
"github.com/olebedev/when/rules/common"
|
16
16
|
"github.com/olebedev/when/rules/en"
|
17
17
|
"github.com/thoas/go-funk"
|
18
|
+
"github.com/xlzd/gotp"
|
18
19
|
"net/url"
|
19
|
-
"qnetwork.net/fbcrawl/fbcrawl"
|
20
|
+
"qnetwork.net/fbcrawl/fbcrawl/pb"
|
20
21
|
"regexp"
|
21
22
|
"strconv"
|
22
23
|
"strings"
|
@@ -108,7 +109,7 @@ func New() *Fbcolly {
|
|
108
109
|
return &f
|
109
110
|
}
|
110
111
|
|
111
|
-
func (f *Fbcolly) Login(email string, password string,
|
112
|
+
func (f *Fbcolly) Login(email string, password string, totpSecret string) (string, error) {
|
112
113
|
collector := f.collector.Clone()
|
113
114
|
err := setupSharedCollector(collector)
|
114
115
|
|
@@ -157,9 +158,12 @@ func (f *Fbcolly) Login(email string, password string, otp string) (string, erro
|
|
157
158
|
//logger.Info("Please input OTP")
|
158
159
|
//reader := bufio.NewReader(os.Stdin)
|
159
160
|
//code, _ := reader.ReadString('\n')
|
160
|
-
|
161
|
-
|
162
|
-
|
161
|
+
if len(totpSecret) > 0 {
|
162
|
+
code := gotp.NewDefaultTOTP(totpSecret).Now()
|
163
|
+
reqMap["approvals_code"] = code
|
164
|
+
shouldSubmit = true
|
165
|
+
}
|
166
|
+
|
163
167
|
} else {
|
164
168
|
logger.Info("OnHTML Only Continue checkpoint")
|
165
169
|
|
@@ -193,22 +197,17 @@ func (f *Fbcolly) Login(email string, password string, otp string) (string, erro
|
|
193
197
|
|
194
198
|
}
|
195
199
|
|
196
|
-
func (f *Fbcolly) FetchGroupFeed(groupId int64) (error, *
|
200
|
+
func (f *Fbcolly) FetchGroupFeed(groupId int64, nextCursor string) (error, *pb.FacebookPostList) {
|
197
201
|
collector := f.collector.Clone()
|
198
202
|
err := setupSharedCollector(collector)
|
199
|
-
|
200
|
-
var result []*fbcrawl.FacebookPost
|
203
|
+
result := pb.FacebookPostList{Posts: []*pb.FacebookPost{}}
|
201
204
|
|
202
205
|
collector.OnHTML("#m_group_stories_container > :last-child a", func(element *colly.HTMLElement) {
|
203
|
-
|
204
|
-
if currentPage < 3 {
|
205
|
-
logger.Info("Will fetch page", currentPage)
|
206
|
-
err = collector.Visit("http://mbasic.facebook.com" + element.Attr("href"))
|
207
|
-
}
|
206
|
+
result.NextCursor = "http://mbasic.facebook.com" + element.Attr("href")
|
208
207
|
})
|
209
208
|
collector.OnHTML("div[role=\"article\"]", func(element *colly.HTMLElement) {
|
210
209
|
dataElement := element
|
211
|
-
post := &
|
210
|
+
post := &pb.FacebookPost{}
|
212
211
|
var fbDataFt FbDataFt
|
213
212
|
jsonData := dataElement.Attr("data-ft")
|
214
213
|
|
@@ -220,8 +219,8 @@ func (f *Fbcolly) FetchGroupFeed(groupId int64) (error, *fbcrawl.FacebookPostLis
|
|
220
219
|
}
|
221
220
|
logger.Info("Post ", fbDataFt)
|
222
221
|
post.Id = fbDataFt.TopLevelPostId
|
223
|
-
post.Group = &
|
224
|
-
post.User = &
|
222
|
+
post.Group = &pb.FacebookGroup{Id: fbDataFt.PageId, Name: dataElement.DOM.Find("h3 strong:nth-child(2) a").Text()}
|
223
|
+
post.User = &pb.FacebookUser{
|
225
224
|
Id: fbDataFt.ContentOwnerIdNew,
|
226
225
|
Name: dataElement.DOM.Find("h3 strong:nth-child(1) a").Text(),
|
227
226
|
}
|
@@ -241,30 +240,34 @@ func (f *Fbcolly) FetchGroupFeed(groupId int64) (error, *fbcrawl.FacebookPostLis
|
|
241
240
|
post.ContentLink = getUrlFromRedirectHref(dataElement.DOM.Find("a[href*=\"https://lm.facebook.com/l.php\"]").AttrOr("href", ""))
|
242
241
|
post.ReactionCount = getNumberFromText(element.DOM.Find("span[id*=\"like_\"]").Text())
|
243
242
|
post.CommentCount = getNumberFromText(element.DOM.Find("span[id*=\"like_\"] ~ a").Text())
|
244
|
-
post.ContentImages = (funk.Map(fbDataFt.PhotoAttachmentsList, func(id string) *
|
243
|
+
post.ContentImages = (funk.Map(fbDataFt.PhotoAttachmentsList, func(id string) *pb.FacebookImage {
|
245
244
|
i, _ := strconv.ParseInt(id, 10, 64)
|
246
|
-
return &
|
245
|
+
return &pb.FacebookImage{
|
247
246
|
Id: i,
|
248
247
|
}
|
249
|
-
})).([]*
|
248
|
+
})).([]*pb.FacebookImage)
|
250
249
|
|
251
250
|
if fbDataFt.PhotoId > 0 {
|
252
|
-
post.ContentImage = &
|
251
|
+
post.ContentImage = &pb.FacebookImage{Id: fbDataFt.PhotoId}
|
253
252
|
}
|
254
|
-
result = append(result, post)
|
253
|
+
result.Posts = append(result.Posts, post)
|
255
254
|
})
|
255
|
+
if len(nextCursor) > 0 {
|
256
|
+
err = collector.Visit(nextCursor)
|
257
|
+
} else {
|
258
|
+
err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/groups/%d", groupId))
|
259
|
+
}
|
256
260
|
|
257
|
-
err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/groups/%d", groupId))
|
258
261
|
if err != nil {
|
259
262
|
logger.Error("crawl by colly err:", err)
|
260
263
|
}
|
261
|
-
return err, &
|
264
|
+
return err, &result
|
262
265
|
}
|
263
266
|
|
264
|
-
func (f *Fbcolly) FetchGroupInfo(groupIdOrUsername string) (error, *
|
267
|
+
func (f *Fbcolly) FetchGroupInfo(groupIdOrUsername string) (error, *pb.FacebookGroup) {
|
265
268
|
collector := f.collector.Clone()
|
266
269
|
err := setupSharedCollector(collector)
|
267
|
-
result := &
|
270
|
+
result := &pb.FacebookGroup{}
|
268
271
|
|
269
272
|
collector.OnHTML("a[href=\"#groupMenuBottom\"] h1", func(element *colly.HTMLElement) {
|
270
273
|
result.Name = element.Text
|
@@ -281,36 +284,37 @@ func (f *Fbcolly) FetchGroupInfo(groupIdOrUsername string) (error, *fbcrawl.Face
|
|
281
284
|
return err, result
|
282
285
|
}
|
283
286
|
|
284
|
-
func (f *Fbcolly) FetchContentImages(postId int64) (error, *
|
287
|
+
func (f *Fbcolly) FetchContentImages(postId int64, nextCursor string) (error, *pb.FacebookImageList) {
|
285
288
|
collector := f.collector.Clone()
|
286
289
|
err := setupSharedCollector(collector)
|
287
|
-
|
288
|
-
var result []*fbcrawl.FacebookImage
|
290
|
+
result := pb.FacebookImageList{Images: []*pb.FacebookImage{}}
|
289
291
|
|
290
292
|
collector.OnHTML("a[href*=\"/media/set/\"]", func(element *colly.HTMLElement) {
|
291
|
-
|
292
|
-
logger.Info("Will fetch page", currentPage)
|
293
|
-
err = collector.Visit("http://mbasic.facebook.com" + element.Attr("href"))
|
293
|
+
result.NextCursor = "http://mbasic.facebook.com" + element.Attr("href")
|
294
294
|
})
|
295
295
|
|
296
296
|
collector.OnHTML("a[href*=\"/photo.php\"]", func(element *colly.HTMLElement) {
|
297
|
-
result = append(result, &
|
297
|
+
result.Images = append(result.Images, &pb.FacebookImage{
|
298
298
|
Id: getImageIdFromHref(element.Attr("href")),
|
299
299
|
})
|
300
300
|
//f.detailCollector.Visit(url)
|
301
301
|
})
|
302
|
+
if len(nextCursor) > 0 {
|
303
|
+
err = collector.Visit(nextCursor)
|
304
|
+
} else {
|
305
|
+
err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/media/set/?set=pcb.%d", postId))
|
306
|
+
}
|
302
307
|
|
303
|
-
err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/media/set/?set=pcb.%d", postId))
|
304
308
|
if err != nil {
|
305
309
|
logger.Error("crawl by colly err:", err)
|
306
310
|
}
|
307
|
-
return err, &
|
311
|
+
return err, &result
|
308
312
|
}
|
309
313
|
|
310
|
-
func (f *Fbcolly) FetchImageUrl(imageId int64) (error, *
|
314
|
+
func (f *Fbcolly) FetchImageUrl(imageId int64) (error, *pb.FacebookImage) {
|
311
315
|
collector := f.collector.Clone()
|
312
316
|
err := setupSharedCollector(collector)
|
313
|
-
result :=
|
317
|
+
result := pb.FacebookImage{Id: imageId}
|
314
318
|
|
315
319
|
collector.OnHTML("a", func(element *colly.HTMLElement) {
|
316
320
|
result.Url = element.Attr("href")
|
@@ -323,11 +327,10 @@ func (f *Fbcolly) FetchImageUrl(imageId int64) (error, *fbcrawl.FacebookImage) {
|
|
323
327
|
return err, &result
|
324
328
|
}
|
325
329
|
|
326
|
-
func (f *Fbcolly) FetchPost(groupId int64, postId int64) (error, *
|
330
|
+
func (f *Fbcolly) FetchPost(groupId int64, postId int64, commentNextCursor string) (error, *pb.FacebookPost) {
|
327
331
|
collector := f.collector.Clone()
|
328
332
|
err := setupSharedCollector(collector)
|
329
|
-
post := &
|
330
|
-
commentPaging := 0
|
333
|
+
post := &pb.FacebookPost{Comments: &pb.CommentList{Comments: []*pb.FacebookComment{}}}
|
331
334
|
collector.OnHTML("#m_story_permalink_view", func(element *colly.HTMLElement) {
|
332
335
|
dataElement := element.DOM.Find("div[data-ft]")
|
333
336
|
if dataElement.Length() > 0 {
|
@@ -342,8 +345,8 @@ func (f *Fbcolly) FetchPost(groupId int64, postId int64) (error, *fbcrawl.Facebo
|
|
342
345
|
}
|
343
346
|
logger.Info("Post ", result)
|
344
347
|
post.Id = result.TopLevelPostId
|
345
|
-
post.Group = &
|
346
|
-
post.User = &
|
348
|
+
post.Group = &pb.FacebookGroup{Id: result.PageId, Name: dataElement.Find("h3 strong:last-child a").Text()}
|
349
|
+
post.User = &pb.FacebookUser{
|
347
350
|
Id: result.ContentOwnerIdNew,
|
348
351
|
Name: dataElement.Find("h3 strong:first-child a").Text(),
|
349
352
|
}
|
@@ -362,15 +365,15 @@ func (f *Fbcolly) FetchPost(groupId int64, postId int64) (error, *fbcrawl.Facebo
|
|
362
365
|
|
363
366
|
post.ContentLink = getUrlFromRedirectHref(dataElement.Find("a[href*=\"https://lm.facebook.com/l.php\"]").AttrOr("href", ""))
|
364
367
|
post.ReactionCount = getNumberFromText(element.DOM.Find("div[id*=\"sentence_\"]").Text())
|
365
|
-
post.ContentImages = (funk.Map(result.PhotoAttachmentsList, func(id string) *
|
368
|
+
post.ContentImages = (funk.Map(result.PhotoAttachmentsList, func(id string) *pb.FacebookImage {
|
366
369
|
i, _ := strconv.ParseInt(id, 10, 64)
|
367
|
-
return &
|
370
|
+
return &pb.FacebookImage{
|
368
371
|
Id: i,
|
369
372
|
}
|
370
|
-
})).([]*
|
373
|
+
})).([]*pb.FacebookImage)
|
371
374
|
|
372
375
|
if result.PhotoId > 0 {
|
373
|
-
post.ContentImage = &
|
376
|
+
post.ContentImage = &pb.FacebookImage{Id: result.PhotoId}
|
374
377
|
}
|
375
378
|
}
|
376
379
|
|
@@ -380,10 +383,10 @@ func (f *Fbcolly) FetchPost(groupId int64, postId int64) (error, *fbcrawl.Facebo
|
|
380
383
|
commentId, _ := strconv.ParseInt(selection.AttrOr("id", ""), 10, 64)
|
381
384
|
logger.Info("comment", commentId)
|
382
385
|
createdAtWhenResult, _ := f.w.Parse(selection.Find("abbr").Text(), time.Now())
|
383
|
-
post.Comments = append(post.Comments, &
|
386
|
+
post.Comments.Comments = append(post.Comments.Comments, &pb.FacebookComment{
|
384
387
|
Id: commentId,
|
385
|
-
Post: &
|
386
|
-
User: &
|
388
|
+
Post: &pb.FacebookPost{Id: post.Id},
|
389
|
+
User: &pb.FacebookUser{
|
387
390
|
Id: getUserIdFromCommentHref(selection.Find("a[href*=\"#comment_form_\"]").AttrOr("href", "")),
|
388
391
|
Name: selection.Find("h3 > a").Text(),
|
389
392
|
},
|
@@ -396,14 +399,14 @@ func (f *Fbcolly) FetchPost(groupId int64, postId int64) (error, *fbcrawl.Facebo
|
|
396
399
|
})
|
397
400
|
|
398
401
|
collector.OnHTML("div[id*=\"see_prev_\"] > a", func(element *colly.HTMLElement) {
|
399
|
-
|
400
|
-
logger.Info("Comment paging", commentPaging)
|
401
|
-
err = collector.Visit("http://mbasic.facebook.com" + element.Attr("href"))
|
402
|
-
commentPaging = commentPaging + 1
|
403
|
-
}
|
402
|
+
post.Comments.NextCursor = "http://mbasic.facebook.com" + element.Attr("href")
|
404
403
|
})
|
404
|
+
if len(commentNextCursor) > 0 {
|
405
|
+
err = collector.Visit(commentNextCursor)
|
406
|
+
} else {
|
407
|
+
err = collector.Visit(fmt.Sprintf("http://mbasic.facebook.com/groups/%d?view=permalink&id=%d&_rdr", groupId, postId))
|
408
|
+
}
|
405
409
|
|
406
|
-
err = collector.Visit(fmt.Sprintf("http://mbasic.facebook.com/groups/%d?view=permalink&id=%d&_rdr", groupId, postId))
|
407
410
|
return err, post
|
408
411
|
}
|
409
412
|
|
@@ -417,8 +420,12 @@ func (f *Fbcolly) LoginWithCookies(cookies string) error {
|
|
417
420
|
//}
|
418
421
|
|
419
422
|
func getUserIdFromCommentHref(href string) int64 {
|
420
|
-
|
421
|
-
|
423
|
+
match := regexp.MustCompile("#comment_form_(\\d+)").FindStringSubmatch(href)
|
424
|
+
if len(match) > 0 {
|
425
|
+
id, _ := strconv.ParseInt(match[1], 10, 64)
|
426
|
+
return id
|
427
|
+
}
|
428
|
+
return 0
|
422
429
|
}
|
423
430
|
|
424
431
|
func getUrlFromRedirectHref(href string) string {
|