fbcrawl-colly 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.travis.yml +6 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +47 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/fbcrawl_colly/.gitignore +2 -0
- data/ext/fbcrawl_colly/Makefile +6 -0
- data/ext/fbcrawl_colly/extconf.rb +5 -0
- data/fbcolly/fbcolly.go +234 -0
- data/fbcrawl-colly.gemspec +35 -0
- data/fbcrawl.proto +33 -0
- data/go.mod +23 -0
- data/go.sum +131 -0
- data/lib/fbcrawl-colly.rb +16 -0
- data/lib/fbcrawl-colly/version.rb +3 -0
- data/main.go +77 -0
- metadata +95 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 96d233862f21d1dff4a447d57f0f4a4ee6311506d7b5cf1990d75bf92043defa
|
4
|
+
data.tar.gz: a238ee4a34eac5f33fa90bba0d5bdb22169bd0b54034682f8fbb25c1ca4de039
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 36d58d4ef38bf94164f94fe0a841171cc5bdd12a4f2503e818858d77cae029e1fc79cddd3213b2bb9e130e70833703df97ad18700d92c2cc3e31401b8e1f1443
|
7
|
+
data.tar.gz: 5c56a623910f87369812c0d74908e618490b4aeead9d22b055f8076ca040ba9fe5fff0bb380c44e7e435e0d83a13823b259b0f6c57233d7fb3ec38ff83de0832
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
2
|
+
|
3
|
+
## Our Pledge
|
4
|
+
|
5
|
+
In the interest of fostering an open and welcoming environment, we as
|
6
|
+
contributors and maintainers pledge to making participation in our project and
|
7
|
+
our community a harassment-free experience for everyone, regardless of age, body
|
8
|
+
size, disability, ethnicity, gender identity and expression, level of experience,
|
9
|
+
nationality, personal appearance, race, religion, or sexual identity and
|
10
|
+
orientation.
|
11
|
+
|
12
|
+
## Our Standards
|
13
|
+
|
14
|
+
Examples of behavior that contributes to creating a positive environment
|
15
|
+
include:
|
16
|
+
|
17
|
+
* Using welcoming and inclusive language
|
18
|
+
* Being respectful of differing viewpoints and experiences
|
19
|
+
* Gracefully accepting constructive criticism
|
20
|
+
* Focusing on what is best for the community
|
21
|
+
* Showing empathy towards other community members
|
22
|
+
|
23
|
+
Examples of unacceptable behavior by participants include:
|
24
|
+
|
25
|
+
* The use of sexualized language or imagery and unwelcome sexual attention or
|
26
|
+
advances
|
27
|
+
* Trolling, insulting/derogatory comments, and personal or political attacks
|
28
|
+
* Public or private harassment
|
29
|
+
* Publishing others' private information, such as a physical or electronic
|
30
|
+
address, without explicit permission
|
31
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
32
|
+
professional setting
|
33
|
+
|
34
|
+
## Our Responsibilities
|
35
|
+
|
36
|
+
Project maintainers are responsible for clarifying the standards of acceptable
|
37
|
+
behavior and are expected to take appropriate and fair corrective action in
|
38
|
+
response to any instances of unacceptable behavior.
|
39
|
+
|
40
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
41
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
42
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
43
|
+
permanently any contributor for other behaviors that they deem inappropriate,
|
44
|
+
threatening, offensive, or harmful.
|
45
|
+
|
46
|
+
## Scope
|
47
|
+
|
48
|
+
This Code of Conduct applies both within project spaces and in public spaces
|
49
|
+
when an individual is representing the project or its community. Examples of
|
50
|
+
representing a project or community include using an official project e-mail
|
51
|
+
address, posting via an official social media account, or acting as an appointed
|
52
|
+
representative at an online or offline event. Representation of a project may be
|
53
|
+
further defined and clarified by project maintainers.
|
54
|
+
|
55
|
+
## Enforcement
|
56
|
+
|
57
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
58
|
+
reported by contacting the project team at duyleekun@gmail.com. All
|
59
|
+
complaints will be reviewed and investigated and will result in a response that
|
60
|
+
is deemed necessary and appropriate to the circumstances. The project team is
|
61
|
+
obligated to maintain confidentiality with regard to the reporter of an incident.
|
62
|
+
Further details of specific enforcement policies may be posted separately.
|
63
|
+
|
64
|
+
Project maintainers who do not follow or enforce the Code of Conduct in good
|
65
|
+
faith may face temporary or permanent repercussions as determined by other
|
66
|
+
members of the project's leadership.
|
67
|
+
|
68
|
+
## Attribution
|
69
|
+
|
70
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
71
|
+
available at [https://contributor-covenant.org/version/1/4][version]
|
72
|
+
|
73
|
+
[homepage]: https://contributor-covenant.org
|
74
|
+
[version]: https://contributor-covenant.org/version/1/4/
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2020 Duy Le
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# FbcrawlColly
|
2
|
+
|
3
|
+
This project is to crawl mbasic.facebook.com using GO Colly.
|
4
|
+
|
5
|
+
Ruby gem will available to use
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'fbcrawl-colly'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle install
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install fbcrawl-colly
|
22
|
+
|
23
|
+
<!---
|
24
|
+
## Usage
|
25
|
+
|
26
|
+
TODO: Write usage instructions here
|
27
|
+
|
28
|
+
## Development
|
29
|
+
|
30
|
+
|
31
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
32
|
+
|
33
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
34
|
+
-->
|
35
|
+
|
36
|
+
## Contributing
|
37
|
+
|
38
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/fbcrawl-colly. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/fbcrawl-colly/blob/master/CODE_OF_CONDUCT.md).
|
39
|
+
|
40
|
+
|
41
|
+
## License
|
42
|
+
|
43
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
44
|
+
|
45
|
+
## Code of Conduct
|
46
|
+
|
47
|
+
Everyone interacting in the FbcrawlColly project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/fbcrawl-colly/blob/master/CODE_OF_CONDUCT.md).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "fbcrawl-colly"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/fbcolly/fbcolly.go
ADDED
@@ -0,0 +1,234 @@
|
|
1
|
+
package fbcolly
|
2
|
+
|
3
|
+
import "C"
|
4
|
+
import (
|
5
|
+
"encoding/json"
|
6
|
+
"errors"
|
7
|
+
"fmt"
|
8
|
+
"github.com/PuerkitoBio/goquery"
|
9
|
+
"github.com/gocolly/colly"
|
10
|
+
"github.com/gocolly/colly/extensions"
|
11
|
+
"github.com/gocolly/colly/storage"
|
12
|
+
"github.com/google/logger"
|
13
|
+
"net/url"
|
14
|
+
"qnetwork.net/fbcrawl/fbcrawl"
|
15
|
+
"regexp"
|
16
|
+
"strings"
|
17
|
+
)
|
18
|
+
|
19
|
+
type Fbcolly struct {
|
20
|
+
collector *colly.Collector
|
21
|
+
}
|
22
|
+
|
23
|
+
func sharedOnRequest(request *colly.Request) {
|
24
|
+
logger.Info("OnRequest")
|
25
|
+
//request.Headers.Set("Host", "facebook.com")
|
26
|
+
request.Headers.Set("Accept-Language", "en-US,en;q=0.9")
|
27
|
+
request.Headers.Set("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
|
28
|
+
request.Headers.Set("origin", "https://mbasic.facebook.com")
|
29
|
+
|
30
|
+
//logger.Info("Saved referrer is", request.Ctx.Get("_referer"))
|
31
|
+
request.Headers.Set("referer", "https://mbasic.facebook.com/checkpoint/?_rdr")
|
32
|
+
request.Headers.Set("cache-control", "max-age=0")
|
33
|
+
request.Headers.Set("upgrade-insecure-requests", "1")
|
34
|
+
//accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
|
35
|
+
//origin: https://mbasic.facebook.com
|
36
|
+
//referer: https://mbasic.facebook.com/checkpoint/?_rdr
|
37
|
+
request.Headers.Set("User-Agent", "Mozilla/5.0 (Linux; Android 6.0.1; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Mobile Safari/537.36")
|
38
|
+
request.ResponseCharacterEncoding = "utf-8"
|
39
|
+
}
|
40
|
+
|
41
|
+
func setupSharedCollector(collector *colly.Collector) error {
|
42
|
+
var err error
|
43
|
+
extensions.Referer(collector)
|
44
|
+
|
45
|
+
collector.OnRequest(sharedOnRequest)
|
46
|
+
collector.OnResponse(sharedOnResponse)
|
47
|
+
collector.OnError(func(resp *colly.Response, errHttp error) {
|
48
|
+
err = errHttp
|
49
|
+
logger.Error("OnError", err)
|
50
|
+
})
|
51
|
+
return err
|
52
|
+
}
|
53
|
+
|
54
|
+
func sharedOnResponse(response *colly.Response) {
|
55
|
+
logger.Info("OnResponse ./last.html")
|
56
|
+
_ = response.Save("./last.html")
|
57
|
+
//logger.Info(string(resp.Body))
|
58
|
+
}
|
59
|
+
|
60
|
+
func getForm(element *colly.HTMLElement, err error) (string, error, map[string]string) {
|
61
|
+
submitUrl, exists := element.DOM.Attr("action")
|
62
|
+
if !exists {
|
63
|
+
err = errors.New("doesn't have action label")
|
64
|
+
return "", nil, nil
|
65
|
+
}
|
66
|
+
submitUrl = fmt.Sprintf("https://mbasic.facebook.com%s", submitUrl)
|
67
|
+
logger.Info("form url is:", submitUrl)
|
68
|
+
reqMap := make(map[string]string)
|
69
|
+
element.DOM.Find("input").Each(func(i int, s *goquery.Selection) {
|
70
|
+
name, _ := s.Attr("name")
|
71
|
+
value, _ := s.Attr("value")
|
72
|
+
if name != "" && name != "sign_up" && name != "submit[logout-button-with-confirm]" {
|
73
|
+
reqMap[name] = value
|
74
|
+
}
|
75
|
+
})
|
76
|
+
return submitUrl, err, reqMap
|
77
|
+
}
|
78
|
+
|
79
|
+
func New() *Fbcolly {
|
80
|
+
f := Fbcolly{}
|
81
|
+
f.collector = colly.NewCollector()
|
82
|
+
return &f
|
83
|
+
}
|
84
|
+
|
85
|
+
func (f *Fbcolly) Login(email string, password string, otp string) error {
|
86
|
+
collector := f.collector.Clone()
|
87
|
+
setupSharedCollector(collector)
|
88
|
+
|
89
|
+
logger.Info("Login using email", email)
|
90
|
+
|
91
|
+
var err error
|
92
|
+
collector.OnHTML("#login_form", func(element *colly.HTMLElement) {
|
93
|
+
logger.Info("OnHTML login_form")
|
94
|
+
loginURL, err, reqMap := getForm(element, err)
|
95
|
+
if err != nil {
|
96
|
+
logger.Error(err)
|
97
|
+
return
|
98
|
+
}
|
99
|
+
reqMap["email"] = email
|
100
|
+
reqMap["pass"] = password
|
101
|
+
logger.Info("req map:", reqMap)
|
102
|
+
err = collector.Post(loginURL, reqMap)
|
103
|
+
if err != nil {
|
104
|
+
logger.Error("post err:", err)
|
105
|
+
}
|
106
|
+
})
|
107
|
+
|
108
|
+
collector.OnHTML("a[href=\"/login/save-device/cancel/?flow=interstitial_nux&nux_source=regular_login\"]", func(element *colly.HTMLElement) {
|
109
|
+
collector.Visit("http://mbasic.facebook.com" + element.Attr("href"))
|
110
|
+
})
|
111
|
+
|
112
|
+
collector.OnHTML("form[action=\"/login/checkpoint/\"]", func(element *colly.HTMLElement) {
|
113
|
+
|
114
|
+
checkpointUrl, err, reqMap := getForm(element, err)
|
115
|
+
if err != nil {
|
116
|
+
logger.Error(err)
|
117
|
+
return
|
118
|
+
}
|
119
|
+
|
120
|
+
if element.DOM.Find("input[name=\"name_action_selected\"]").Length() > 0 {
|
121
|
+
//Save Device
|
122
|
+
logger.Info("OnHTML Save Device checkpoint")
|
123
|
+
reqMap["name_action_selected"] = "dont_save"
|
124
|
+
} else if element.DOM.Find("input[name=\"approvals_code\"]").Length() > 0 {
|
125
|
+
logger.Info("OnHTML OTP checkpoint")
|
126
|
+
//logger.Info("Please input OTP")
|
127
|
+
//reader := bufio.NewReader(os.Stdin)
|
128
|
+
//code, _ := reader.ReadString('\n')
|
129
|
+
code := otp[0:6]
|
130
|
+
reqMap["approvals_code"] = code
|
131
|
+
} else {
|
132
|
+
logger.Info("OnHTML Only Continue checkpoint")
|
133
|
+
}
|
134
|
+
logger.Info("req map:", reqMap)
|
135
|
+
err = collector.Post(checkpointUrl, reqMap)
|
136
|
+
if err != nil {
|
137
|
+
logger.Error("post err:", err)
|
138
|
+
}
|
139
|
+
})
|
140
|
+
|
141
|
+
collector.OnHTML("form[action=\"/search/\"]", func(element *colly.HTMLElement) {
|
142
|
+
//We're in home
|
143
|
+
logger.Info("I'm IN HOME, navigate to page now")
|
144
|
+
})
|
145
|
+
|
146
|
+
err = collector.Visit("https://mbasic.facebook.com/")
|
147
|
+
if err != nil {
|
148
|
+
logger.Error("crawl by colly err:", err)
|
149
|
+
}
|
150
|
+
logger.Info(storage.StringifyCookies(collector.Cookies("https://mbasic.facebook.com/")))
|
151
|
+
//return err, storage.StringifyCookies(collector.Cookies("https://mbasic.facebook.com/"))
|
152
|
+
//return err, collector.getS.Cookies("https://mbasic.facebook.com/")
|
153
|
+
return err
|
154
|
+
}
|
155
|
+
|
156
|
+
func (f *Fbcolly) FetchGroupFeed(groupId string) (error, *fbcrawl.FacebookPostList) {
|
157
|
+
collector := f.collector.Clone()
|
158
|
+
err := setupSharedCollector(collector)
|
159
|
+
currentPage := 1
|
160
|
+
var result []*fbcrawl.FacebookPost
|
161
|
+
|
162
|
+
collector.OnHTML("#m_group_stories_container > :last-child a", func(element *colly.HTMLElement) {
|
163
|
+
currentPage++
|
164
|
+
if currentPage < 3 {
|
165
|
+
logger.Info("Will fetch page", currentPage)
|
166
|
+
collector.Visit("http://mbasic.facebook.com" + element.Attr("href"))
|
167
|
+
}
|
168
|
+
})
|
169
|
+
|
170
|
+
//TODO: May not need this
|
171
|
+
collector.OnXML("//a[text()=\"Full Story\"]", func(element *colly.XMLElement) {
|
172
|
+
u, _ := url.Parse("http://mbasic.facebook.com" + element.Attr("href"))
|
173
|
+
|
174
|
+
result = append(result, &fbcrawl.FacebookPost{
|
175
|
+
Id: u.Query().Get("id"),
|
176
|
+
Group: &fbcrawl.FacebookGroup{Id: groupId},
|
177
|
+
})
|
178
|
+
//f.detailCollector.Visit(url)
|
179
|
+
})
|
180
|
+
|
181
|
+
err = collector.Visit("https://mbasic.facebook.com/groups/" + groupId)
|
182
|
+
if err != nil {
|
183
|
+
logger.Error("crawl by colly err:", err)
|
184
|
+
}
|
185
|
+
return err, &fbcrawl.FacebookPostList{Posts: result}
|
186
|
+
}
|
187
|
+
|
188
|
+
func (f *Fbcolly) FetchPost(groupId string, postId string) (error, *fbcrawl.FacebookPost) {
|
189
|
+
collector := f.collector.Clone()
|
190
|
+
err := setupSharedCollector(collector)
|
191
|
+
post := &fbcrawl.FacebookPost{}
|
192
|
+
collector.OnHTML("#m_story_permalink_view", func(element *colly.HTMLElement) {
|
193
|
+
dataElement := element.DOM.Find("div[data-ft]")
|
194
|
+
if dataElement.Length() > 0 {
|
195
|
+
var result map[string]string
|
196
|
+
jsonData, isExist := dataElement.Attr("data-ft")
|
197
|
+
if isExist {
|
198
|
+
json.Unmarshal([]byte(jsonData), &result)
|
199
|
+
logger.Info("Post ", result)
|
200
|
+
post.Id = result["top_level_post_id"]
|
201
|
+
post.Group = &fbcrawl.FacebookGroup{Id: result["page_id"], Name: dataElement.Find("h3 strong:last-child a").Text()}
|
202
|
+
post.User = &fbcrawl.FacebookUser{Id: result["content_owner_id_new"], Name: dataElement.Find("h3 strong:first-child a").Text()}
|
203
|
+
//Content
|
204
|
+
post.Content = strings.Join(dataElement.Find("p").Map(func(i int, selection *goquery.Selection) string {
|
205
|
+
return selection.Text()
|
206
|
+
}), "\n")
|
207
|
+
|
208
|
+
logger.Info("content", strings.Join(dataElement.Find("p").Map(func(i int, selection *goquery.Selection) string {
|
209
|
+
return selection.Text()
|
210
|
+
}), "\n"))
|
211
|
+
}
|
212
|
+
post.Comments = []*fbcrawl.FacebookComment{}
|
213
|
+
//Comment
|
214
|
+
element.DOM.Find("h3 + div + div + div").Parent().Parent().Each(func(i int, selection *goquery.Selection) {
|
215
|
+
//author
|
216
|
+
commentId := selection.AttrOr("id", "")
|
217
|
+
logger.Info("comment", commentId)
|
218
|
+
//idRegex, _ := regexp.Compile("")
|
219
|
+
//idRegex.FindString()
|
220
|
+
post.Comments = append(post.Comments, &fbcrawl.FacebookComment{
|
221
|
+
Id: commentId,
|
222
|
+
Post: &fbcrawl.FacebookPost{Id: post.Id},
|
223
|
+
User: &fbcrawl.FacebookUser{
|
224
|
+
Id: regexp.MustCompile("/([\\d\\w.]+)").FindStringSubmatch(selection.Find("h3 > a").AttrOr("href", ""))[1],
|
225
|
+
Name: selection.Find("h3 > a").Text(),
|
226
|
+
},
|
227
|
+
Content: selection.Find("h3 + div").Text(),
|
228
|
+
})
|
229
|
+
})
|
230
|
+
}
|
231
|
+
})
|
232
|
+
collector.Visit(fmt.Sprintf("http://mbasic.facebook.com/groups/%s?view=permalink&id=%s&_rdr", groupId, postId))
|
233
|
+
return err, post
|
234
|
+
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative 'lib/fbcrawl-colly/version'
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "fbcrawl-colly"
|
5
|
+
spec.version = FbcrawlColly::VERSION
|
6
|
+
spec.authors = ["Duy Le"]
|
7
|
+
spec.email = ["duyleekun@gmail.com"]
|
8
|
+
|
9
|
+
spec.summary = %q{Crawl mbasic.facebook.com using GO Colly}
|
10
|
+
spec.description = %q{Crawl mbasic.facebook.com using GO Colly}
|
11
|
+
spec.homepage = "http://github.com/duyleekun/fbcrawl-colly"
|
12
|
+
spec.license = "MIT"
|
13
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
|
+
|
15
|
+
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
16
|
+
|
17
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
18
|
+
spec.metadata["source_code_uri"] = "http://github.com/duyleekun/fbcrawl-colly"
|
19
|
+
spec.metadata["changelog_uri"] = "http://github.com/duyleekun/fbcrawl-colly"
|
20
|
+
|
21
|
+
# Specify which files should be added to the gem when it is released.
|
22
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
23
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
24
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
|
+
end
|
26
|
+
# spec.bindir = "exe"
|
27
|
+
# spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
|
+
spec.extensions = [
|
29
|
+
'ext/fbcrawl_colly/extconf.rb'
|
30
|
+
]
|
31
|
+
spec.require_paths = ["lib"]
|
32
|
+
|
33
|
+
spec.add_runtime_dependency 'ffi'
|
34
|
+
spec.add_runtime_dependency 'google-protobuf'
|
35
|
+
end
|
data/fbcrawl.proto
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
syntax = "proto3";
|
2
|
+
|
3
|
+
option go_package = "./fbcrawl;fbcrawl";
|
4
|
+
|
5
|
+
// The request message containing the user's name.
|
6
|
+
message FacebookGroup {
|
7
|
+
string id = 1;
|
8
|
+
string name = 2;
|
9
|
+
}
|
10
|
+
|
11
|
+
message FacebookUser {
|
12
|
+
string id = 1;
|
13
|
+
string name = 2;
|
14
|
+
}
|
15
|
+
|
16
|
+
message FacebookPost {
|
17
|
+
string id = 1;
|
18
|
+
FacebookGroup group = 2;
|
19
|
+
FacebookUser user = 3;
|
20
|
+
string content = 4;
|
21
|
+
repeated FacebookComment comments = 5;
|
22
|
+
}
|
23
|
+
|
24
|
+
message FacebookComment {
|
25
|
+
string id = 1;
|
26
|
+
FacebookPost post = 2;
|
27
|
+
FacebookUser user = 3;
|
28
|
+
string content = 4;
|
29
|
+
}
|
30
|
+
|
31
|
+
message FacebookPostList {
|
32
|
+
repeated FacebookPost posts = 1;
|
33
|
+
}
|
data/go.mod
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module qnetwork.net/fbcrawl
|
2
|
+
|
3
|
+
go 1.14
|
4
|
+
|
5
|
+
require (
|
6
|
+
github.com/PuerkitoBio/goquery v1.5.1
|
7
|
+
github.com/andybalholm/cascadia v1.2.0 // indirect
|
8
|
+
github.com/antchfx/htmlquery v1.2.3 // indirect
|
9
|
+
github.com/antchfx/xmlquery v1.2.4 // indirect
|
10
|
+
github.com/antchfx/xpath v1.1.9 // indirect
|
11
|
+
github.com/gobwas/glob v0.2.3 // indirect
|
12
|
+
github.com/gocolly/colly v1.2.0
|
13
|
+
github.com/golang/protobuf v1.4.2
|
14
|
+
github.com/google/logger v1.1.0
|
15
|
+
github.com/kennygrant/sanitize v1.2.4 // indirect
|
16
|
+
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
|
17
|
+
github.com/temoto/robotstxt v1.1.1 // indirect
|
18
|
+
github.com/thoas/go-funk v0.7.0
|
19
|
+
golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect
|
20
|
+
golang.org/x/text v0.3.3 // indirect
|
21
|
+
google.golang.org/appengine v1.6.6 // indirect
|
22
|
+
google.golang.org/protobuf v1.25.0
|
23
|
+
)
|
data/go.sum
ADDED
@@ -0,0 +1,131 @@
|
|
1
|
+
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
2
|
+
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
3
|
+
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
|
4
|
+
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
|
5
|
+
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
6
|
+
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
7
|
+
github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
|
8
|
+
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
|
9
|
+
github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M=
|
10
|
+
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
|
11
|
+
github.com/antchfx/xmlquery v1.2.4 h1:T/SH1bYdzdjTMoz2RgsfVKbM5uWh3gjDYYepFqQmFv4=
|
12
|
+
github.com/antchfx/xmlquery v1.2.4/go.mod h1:KQQuESaxSlqugE2ZBcM/qn+ebIpt+d+4Xx7YcSGAIrM=
|
13
|
+
github.com/antchfx/xpath v1.1.6 h1:6sVh6hB5T6phw1pFpHRQ+C4bd8sNI+O58flqtg7h0R0=
|
14
|
+
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
|
15
|
+
github.com/antchfx/xpath v1.1.9 h1:s3QDSTRoZMK6EqLajCUt/BJMUATPHvzwlRqKqg12yjU=
|
16
|
+
github.com/antchfx/xpath v1.1.9/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
|
17
|
+
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
18
|
+
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
19
|
+
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
20
|
+
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
21
|
+
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
22
|
+
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
|
23
|
+
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
|
24
|
+
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
|
25
|
+
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
|
26
|
+
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
|
27
|
+
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
28
|
+
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
|
29
|
+
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
|
30
|
+
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
31
|
+
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
32
|
+
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
|
33
|
+
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
34
|
+
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
35
|
+
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
|
36
|
+
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
|
37
|
+
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
|
38
|
+
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
|
39
|
+
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
|
40
|
+
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
|
41
|
+
github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
|
42
|
+
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
|
43
|
+
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
44
|
+
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
45
|
+
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
46
|
+
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
47
|
+
github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w=
|
48
|
+
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
49
|
+
github.com/google/logger v1.1.0 h1:saB74Etb4EAJNH3z74CVbCKk75hld/8T0CsXKetWCwM=
|
50
|
+
github.com/google/logger v1.1.0/go.mod h1:w7O8nrRr0xufejBlQMI83MXqRusvREoJdaAxV+CoAB4=
|
51
|
+
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
|
52
|
+
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
|
53
|
+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
54
|
+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
55
|
+
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
56
|
+
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
|
57
|
+
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
|
58
|
+
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
59
|
+
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
60
|
+
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
61
|
+
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
|
62
|
+
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
63
|
+
github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA=
|
64
|
+
github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
|
65
|
+
github.com/thoas/go-funk v0.7.0 h1:GmirKrs6j6zJbhJIficOsz2aAI7700KsU/5YrdHRM1Y=
|
66
|
+
github.com/thoas/go-funk v0.7.0/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q=
|
67
|
+
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
68
|
+
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
69
|
+
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
70
|
+
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
71
|
+
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
|
72
|
+
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
|
73
|
+
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
74
|
+
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
75
|
+
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
76
|
+
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
77
|
+
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
78
|
+
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
79
|
+
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
80
|
+
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
81
|
+
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
82
|
+
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
|
83
|
+
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
84
|
+
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
85
|
+
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
86
|
+
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
87
|
+
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
88
|
+
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
89
|
+
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
90
|
+
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
91
|
+
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884=
|
92
|
+
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
93
|
+
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
94
|
+
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
|
95
|
+
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
96
|
+
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
|
97
|
+
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
98
|
+
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
99
|
+
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
100
|
+
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
|
101
|
+
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
|
102
|
+
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
|
103
|
+
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
104
|
+
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
105
|
+
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
106
|
+
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
107
|
+
google.golang.org/appengine v1.6.6 h1:lMO5rYAqUxkmaj76jAkRUvt5JZgFymx/+Q5Mzfivuhc=
|
108
|
+
google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
|
109
|
+
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
110
|
+
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
|
111
|
+
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
|
112
|
+
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
113
|
+
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
|
114
|
+
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
|
115
|
+
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
|
116
|
+
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
|
117
|
+
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
|
118
|
+
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
|
119
|
+
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
|
120
|
+
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
121
|
+
google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM=
|
122
|
+
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
123
|
+
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
|
124
|
+
google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
|
125
|
+
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
|
126
|
+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
127
|
+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
128
|
+
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
|
129
|
+
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
130
|
+
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
131
|
+
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'ffi'
|
2
|
+
require 'fbcrawl_pb'
|
3
|
+
module FbcrawlColly
|
4
|
+
extend FFI::Library
|
5
|
+
|
6
|
+
ffi_lib File.expand_path("../ext/fbcrawl_colly/fbcolly.so", File.dirname(__FILE__))
|
7
|
+
attach_function :free, [ :pointer ], :void
|
8
|
+
|
9
|
+
attach_function :Init, [], :pointer
|
10
|
+
attach_function :Login, [:pointer, :string, :string], :void
|
11
|
+
attach_function :FetchGroupFeed, [:pointer, :string], :string
|
12
|
+
attach_function :FetchPost, [:pointer, :string, :string], :string
|
13
|
+
# attach_function :FetchGroup, [:pointer, :string], :pointer
|
14
|
+
# attach_function :Login, [:pointer, :string, :string, :string], :void
|
15
|
+
# attach_function :FreePointer, [:pointer], :void
|
16
|
+
end
|
data/main.go
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
package main
|
2
|
+
|
3
|
+
/*
|
4
|
+
#include <stdio.h>
|
5
|
+
#include <stdlib.h>
|
6
|
+
|
7
|
+
static void myprint(char* s) {
|
8
|
+
printf("%s\n", s);
|
9
|
+
}
|
10
|
+
|
11
|
+
|
12
|
+
*/
|
13
|
+
import "C"
|
14
|
+
|
15
|
+
import (
|
16
|
+
"flag"
|
17
|
+
"github.com/golang/protobuf/proto"
|
18
|
+
"qnetwork.net/fbcrawl/fbcolly"
|
19
|
+
"unsafe"
|
20
|
+
)
|
21
|
+
|
22
|
+
const logPath = "parse.log"
|
23
|
+
|
24
|
+
var verbose = flag.Bool("verbose", true, "print info level logs to stdout")
|
25
|
+
var email = flag.String("email", "change_me@gmail.com", "facebook email")
|
26
|
+
var password = flag.String("password", "change_me", "facebook password")
|
27
|
+
var otp = flag.String("otp", "123456", "facebook otp")
|
28
|
+
var groupId = flag.String("groupId", "334294967318328", "facebook group id, default is 334294967318328")
|
29
|
+
|
30
|
+
var tmp = fbcolly.New()
|
31
|
+
|
32
|
+
//export Init
|
33
|
+
func Init() uintptr {
|
34
|
+
return (uintptr)(unsafe.Pointer(tmp))
|
35
|
+
}
|
36
|
+
|
37
|
+
//export Login
|
38
|
+
func Login(pointer unsafe.Pointer, email *C.char, password *C.char) {
|
39
|
+
p := (*fbcolly.Fbcolly)(pointer)
|
40
|
+
//print(p.E)
|
41
|
+
p.Login(C.GoString(email), C.GoString(password), "")
|
42
|
+
}
|
43
|
+
|
44
|
+
//export FetchGroupFeed
|
45
|
+
func FetchGroupFeed(pointer unsafe.Pointer, groupId *C.char) unsafe.Pointer {
|
46
|
+
p := (*fbcolly.Fbcolly)(pointer)
|
47
|
+
_, postsList := p.FetchGroupFeed(C.GoString(groupId))
|
48
|
+
marshaledPostsList, _ := proto.Marshal(postsList)
|
49
|
+
return C.CBytes(append(marshaledPostsList, 0))
|
50
|
+
}
|
51
|
+
|
52
|
+
//export FetchPost
|
53
|
+
func FetchPost(pointer unsafe.Pointer, groupId *C.char, postId *C.char) unsafe.Pointer {
|
54
|
+
p := (*fbcolly.Fbcolly)(pointer)
|
55
|
+
_, post := p.FetchPost(C.GoString(groupId), C.GoString(postId))
|
56
|
+
marshaledPost, _ := proto.Marshal(post)
|
57
|
+
return C.CBytes(append(marshaledPost, 0))
|
58
|
+
}
|
59
|
+
|
60
|
+
func main() {
|
61
|
+
//r := regexp.MustCompile("/([\\d\\w.]+)").FindStringSubmatch()[1]
|
62
|
+
//print(r.FindStringSubmatch("/liem.phamthanh.161?refid=18&__tn__=R")[1])
|
63
|
+
//flag.Parse()
|
64
|
+
//
|
65
|
+
////post := fbcrawl.FacebookPost{}
|
66
|
+
////bPost, err := proto.Marshal(&post)
|
67
|
+
//
|
68
|
+
//lf, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0660)
|
69
|
+
//if err != nil {
|
70
|
+
// logger.Fatalf("Failed to open log file: %v", err)
|
71
|
+
//}
|
72
|
+
//defer lf.Close()
|
73
|
+
//defer logger.Init("fb-colly", *verbose, false, lf).Close()
|
74
|
+
//f := fbcolly.New()
|
75
|
+
//err = f.Login(*email, *password, *otp)
|
76
|
+
//f.FetchGroupFeed(*groupId)
|
77
|
+
}
|
metadata
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fbcrawl-colly
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Duy Le
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-08-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ffi
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: google-protobuf
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: Crawl mbasic.facebook.com using GO Colly
|
42
|
+
email:
|
43
|
+
- duyleekun@gmail.com
|
44
|
+
executables: []
|
45
|
+
extensions:
|
46
|
+
- ext/fbcrawl_colly/extconf.rb
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- ".gitignore"
|
50
|
+
- ".travis.yml"
|
51
|
+
- CODE_OF_CONDUCT.md
|
52
|
+
- Gemfile
|
53
|
+
- LICENSE.txt
|
54
|
+
- README.md
|
55
|
+
- Rakefile
|
56
|
+
- bin/console
|
57
|
+
- bin/setup
|
58
|
+
- ext/fbcrawl_colly/.gitignore
|
59
|
+
- ext/fbcrawl_colly/Makefile
|
60
|
+
- ext/fbcrawl_colly/extconf.rb
|
61
|
+
- fbcolly/fbcolly.go
|
62
|
+
- fbcrawl-colly.gemspec
|
63
|
+
- fbcrawl.proto
|
64
|
+
- go.mod
|
65
|
+
- go.sum
|
66
|
+
- lib/fbcrawl-colly.rb
|
67
|
+
- lib/fbcrawl-colly/version.rb
|
68
|
+
- main.go
|
69
|
+
homepage: http://github.com/duyleekun/fbcrawl-colly
|
70
|
+
licenses:
|
71
|
+
- MIT
|
72
|
+
metadata:
|
73
|
+
homepage_uri: http://github.com/duyleekun/fbcrawl-colly
|
74
|
+
source_code_uri: http://github.com/duyleekun/fbcrawl-colly
|
75
|
+
changelog_uri: http://github.com/duyleekun/fbcrawl-colly
|
76
|
+
post_install_message:
|
77
|
+
rdoc_options: []
|
78
|
+
require_paths:
|
79
|
+
- lib
|
80
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: 2.3.0
|
85
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
requirements: []
|
91
|
+
rubygems_version: 3.1.2
|
92
|
+
signing_key:
|
93
|
+
specification_version: 4
|
94
|
+
summary: Crawl mbasic.facebook.com using GO Colly
|
95
|
+
test_files: []
|