fbcrawl-colly 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/Gemfile.lock +28 -0
- data/Rakefile +9 -0
- data/ext/fbcrawl_colly/extconf.rb +5 -4
- data/fbcolly/fbcolly.go +172 -30
- data/fbcrawl-colly.gemspec +2 -1
- data/fbcrawl.proto +19 -4
- data/go.mod +1 -0
- data/go.sum +7 -0
- data/lib/fbcrawl-colly.rb +1 -13
- data/lib/fbcrawl_colly/colly.rb +50 -0
- data/lib/fbcrawl_colly/ffi.rb +17 -0
- data/lib/{fbcrawl-colly → fbcrawl_colly}/version.rb +1 -1
- data/main.go +42 -9
- metadata +20 -4
- data/.travis.yml +0 -6
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 22ec88aaf52a44344fc32ab22f6914f264b49ea10912c527c030ffd97ac12d36
         | 
| 4 | 
            +
              data.tar.gz: d007e4326b15c64e725009548c8e9dac00e263da888d736876b6093a4fba5108
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 448bbb6b045d3b4baa3c0eae23bad85f0daf93159f514ff77e0f9383350b1c52aa3ab5360646099e56ccc19eea0bc8300ba42e67a26262e1e1f3141be61515c0
         | 
| 7 | 
            +
              data.tar.gz: 5d326d26fb0354ea78236849be7328ce49f919435c7c373130f19439c2447f9026ce31f88bc64d5cc72581fadc618a0891cd777f885e438f01217295b58309e5
         | 
    
        data/.gitignore
    CHANGED
    
    
    
        data/Gemfile.lock
    ADDED
    
    | @@ -0,0 +1,28 @@ | |
| 1 | 
            +
            PATH
         | 
| 2 | 
            +
              remote: .
         | 
| 3 | 
            +
              specs:
         | 
| 4 | 
            +
                fbcrawl-colly (0.1.1)
         | 
| 5 | 
            +
                  ffi
         | 
| 6 | 
            +
                  google-protobuf
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            GEM
         | 
| 9 | 
            +
              remote: https://rubygems.org/
         | 
| 10 | 
            +
              specs:
         | 
| 11 | 
            +
                ffi (1.13.1)
         | 
| 12 | 
            +
                google-protobuf (3.12.4-universal-darwin)
         | 
| 13 | 
            +
                minitest (5.14.1)
         | 
| 14 | 
            +
                rake (12.3.3)
         | 
| 15 | 
            +
                rake-compiler (1.1.1)
         | 
| 16 | 
            +
                  rake
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            PLATFORMS
         | 
| 19 | 
            +
              ruby
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            DEPENDENCIES
         | 
| 22 | 
            +
              fbcrawl-colly!
         | 
| 23 | 
            +
              minitest (~> 5.0)
         | 
| 24 | 
            +
              rake (~> 12.0)
         | 
| 25 | 
            +
              rake-compiler
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            BUNDLED WITH
         | 
| 28 | 
            +
               2.1.4
         | 
    
        data/Rakefile
    CHANGED
    
    | @@ -7,4 +7,13 @@ Rake::TestTask.new(:test) do |t| | |
| 7 7 | 
             
              t.test_files = FileList["test/**/*_test.rb"]
         | 
| 8 8 | 
             
            end
         | 
| 9 9 |  | 
| 10 | 
            +
            task :fbcrawl_colly do
         | 
| 11 | 
            +
              Dir.chdir("./ext/fbcrawl_colly/") do
         | 
| 12 | 
            +
                require './extconf'
         | 
| 13 | 
            +
                `make`
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
            end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            task :compile => [:fbcrawl_colly]
         | 
| 18 | 
            +
            task :test => :compile
         | 
| 10 19 | 
             
            task :default => :test
         | 
| @@ -1,5 +1,6 @@ | |
| 1 1 | 
             
            require 'mkmf'
         | 
| 2 | 
            -
             | 
| 3 | 
            -
            MakeMakefile::find_executable ' | 
| 4 | 
            -
            MakeMakefile::find_executable 'protoc | 
| 5 | 
            -
             | 
| 2 | 
            +
            requirement_passed = true
         | 
| 3 | 
            +
            requirement_passed &&= MakeMakefile::find_executable 'go'
         | 
| 4 | 
            +
            requirement_passed &&= MakeMakefile::find_executable 'protoc'
         | 
| 5 | 
            +
            requirement_passed &&= MakeMakefile::find_executable 'protoc-gen-go'
         | 
| 6 | 
            +
            $makefile_created = requirement_passed
         | 
    
        data/fbcolly/fbcolly.go
    CHANGED
    
    | @@ -10,14 +10,35 @@ import ( | |
| 10 10 | 
             
            	"github.com/gocolly/colly/extensions"
         | 
| 11 11 | 
             
            	"github.com/gocolly/colly/storage"
         | 
| 12 12 | 
             
            	"github.com/google/logger"
         | 
| 13 | 
            +
            	"github.com/olebedev/when"
         | 
| 14 | 
            +
            	"github.com/olebedev/when/rules/common"
         | 
| 15 | 
            +
            	"github.com/olebedev/when/rules/en"
         | 
| 16 | 
            +
            	"github.com/thoas/go-funk"
         | 
| 13 17 | 
             
            	"net/url"
         | 
| 14 18 | 
             
            	"qnetwork.net/fbcrawl/fbcrawl"
         | 
| 15 19 | 
             
            	"regexp"
         | 
| 20 | 
            +
            	"strconv"
         | 
| 16 21 | 
             
            	"strings"
         | 
| 22 | 
            +
            	"time"
         | 
| 17 23 | 
             
            )
         | 
| 18 24 |  | 
| 19 25 | 
             
            type Fbcolly struct {
         | 
| 20 26 | 
             
            	collector *colly.Collector
         | 
| 27 | 
            +
            	w         *when.Parser
         | 
| 28 | 
            +
            }
         | 
| 29 | 
            +
            type FbDataPostContext struct {
         | 
| 30 | 
            +
            	PublishTime int64 `json:"publish_time"`
         | 
| 31 | 
            +
            }
         | 
| 32 | 
            +
            type FbDataInsight struct {
         | 
| 33 | 
            +
            	FbDataPostContext `json:"post_context"`
         | 
| 34 | 
            +
            }
         | 
| 35 | 
            +
            type FbDataFt struct {
         | 
| 36 | 
            +
            	ContentOwnerIdNew    int64                    `json:"content_owner_id_new"`
         | 
| 37 | 
            +
            	PhotoAttachmentsList []string                 `json:"photo_attachments_list"`
         | 
| 38 | 
            +
            	PhotoId              int64                    `json:"photo_id,string"`
         | 
| 39 | 
            +
            	PageId               int64                    `json:"page_id,string"`
         | 
| 40 | 
            +
            	TopLevelPostId       int64                    `json:"top_level_post_id,string"`
         | 
| 41 | 
            +
            	PageInsights         map[string]FbDataInsight `json:"page_insights"`
         | 
| 21 42 | 
             
            }
         | 
| 22 43 |  | 
| 23 44 | 
             
            func sharedOnRequest(request *colly.Request) {
         | 
| @@ -79,16 +100,19 @@ func getForm(element *colly.HTMLElement, err error) (string, error, map[string]s | |
| 79 100 | 
             
            func New() *Fbcolly {
         | 
| 80 101 | 
             
            	f := Fbcolly{}
         | 
| 81 102 | 
             
            	f.collector = colly.NewCollector()
         | 
| 103 | 
            +
            	f.w = when.New(nil)
         | 
| 104 | 
            +
            	f.w.Add(en.All...)
         | 
| 105 | 
            +
            	f.w.Add(common.All...)
         | 
| 82 106 | 
             
            	return &f
         | 
| 83 107 | 
             
            }
         | 
| 84 108 |  | 
| 85 | 
            -
            func (f *Fbcolly) Login(email string, password string, otp string) error {
         | 
| 109 | 
            +
            func (f *Fbcolly) Login(email string, password string, otp string) (string, error) {
         | 
| 86 110 | 
             
            	collector := f.collector.Clone()
         | 
| 87 | 
            -
            	setupSharedCollector(collector)
         | 
| 111 | 
            +
            	err := setupSharedCollector(collector)
         | 
| 88 112 |  | 
| 89 113 | 
             
            	logger.Info("Login using email", email)
         | 
| 114 | 
            +
            	loggedIn := false
         | 
| 90 115 |  | 
| 91 | 
            -
            	var err error
         | 
| 92 116 | 
             
            	collector.OnHTML("#login_form", func(element *colly.HTMLElement) {
         | 
| 93 117 | 
             
            		logger.Info("OnHTML login_form")
         | 
| 94 118 | 
             
            		loginURL, err, reqMap := getForm(element, err)
         | 
| @@ -106,12 +130,13 @@ func (f *Fbcolly) Login(email string, password string, otp string) error { | |
| 106 130 | 
             
            	})
         | 
| 107 131 |  | 
| 108 132 | 
             
            	collector.OnHTML("a[href=\"/login/save-device/cancel/?flow=interstitial_nux&nux_source=regular_login\"]", func(element *colly.HTMLElement) {
         | 
| 109 | 
            -
            		collector.Visit("http://mbasic.facebook.com" + element.Attr("href"))
         | 
| 133 | 
            +
            		err = collector.Visit("http://mbasic.facebook.com" + element.Attr("href"))
         | 
| 110 134 | 
             
            	})
         | 
| 111 135 |  | 
| 112 136 | 
             
            	collector.OnHTML("form[action=\"/login/checkpoint/\"]", func(element *colly.HTMLElement) {
         | 
| 113 137 |  | 
| 114 138 | 
             
            		checkpointUrl, err, reqMap := getForm(element, err)
         | 
| 139 | 
            +
            		shouldSubmit := false
         | 
| 115 140 | 
             
            		if err != nil {
         | 
| 116 141 | 
             
            			logger.Error(err)
         | 
| 117 142 | 
             
            			return
         | 
| @@ -121,6 +146,7 @@ func (f *Fbcolly) Login(email string, password string, otp string) error { | |
| 121 146 | 
             
            			//Save Device
         | 
| 122 147 | 
             
            			logger.Info("OnHTML Save Device checkpoint")
         | 
| 123 148 | 
             
            			reqMap["name_action_selected"] = "dont_save"
         | 
| 149 | 
            +
            			shouldSubmit = true
         | 
| 124 150 | 
             
            		} else if element.DOM.Find("input[name=\"approvals_code\"]").Length() > 0 {
         | 
| 125 151 | 
             
            			logger.Info("OnHTML OTP checkpoint")
         | 
| 126 152 | 
             
            			//logger.Info("Please input OTP")
         | 
| @@ -128,11 +154,15 @@ func (f *Fbcolly) Login(email string, password string, otp string) error { | |
| 128 154 | 
             
            			//code, _ := reader.ReadString('\n')
         | 
| 129 155 | 
             
            			code := otp[0:6]
         | 
| 130 156 | 
             
            			reqMap["approvals_code"] = code
         | 
| 157 | 
            +
            			shouldSubmit = true
         | 
| 131 158 | 
             
            		} else {
         | 
| 132 159 | 
             
            			logger.Info("OnHTML Only Continue checkpoint")
         | 
| 160 | 
            +
             | 
| 161 | 
            +
            		}
         | 
| 162 | 
            +
            		if shouldSubmit {
         | 
| 163 | 
            +
            			logger.Info("req map:", reqMap)
         | 
| 164 | 
            +
            			err = collector.Post(checkpointUrl, reqMap)
         | 
| 133 165 | 
             
            		}
         | 
| 134 | 
            -
            		logger.Info("req map:", reqMap)
         | 
| 135 | 
            -
            		err = collector.Post(checkpointUrl, reqMap)
         | 
| 136 166 | 
             
            		if err != nil {
         | 
| 137 167 | 
             
            			logger.Error("post err:", err)
         | 
| 138 168 | 
             
            		}
         | 
| @@ -141,19 +171,24 @@ func (f *Fbcolly) Login(email string, password string, otp string) error { | |
| 141 171 | 
             
            	collector.OnHTML("form[action=\"/search/\"]", func(element *colly.HTMLElement) {
         | 
| 142 172 | 
             
            		//We're in home
         | 
| 143 173 | 
             
            		logger.Info("I'm IN HOME, navigate to page now")
         | 
| 174 | 
            +
            		loggedIn = true
         | 
| 144 175 | 
             
            	})
         | 
| 145 176 |  | 
| 146 177 | 
             
            	err = collector.Visit("https://mbasic.facebook.com/")
         | 
| 147 178 | 
             
            	if err != nil {
         | 
| 148 179 | 
             
            		logger.Error("crawl by colly err:", err)
         | 
| 149 180 | 
             
            	}
         | 
| 150 | 
            -
             | 
| 151 | 
            -
            	 | 
| 152 | 
            -
             | 
| 153 | 
            -
             | 
| 181 | 
            +
             | 
| 182 | 
            +
            	if loggedIn {
         | 
| 183 | 
            +
            		logger.Info(storage.StringifyCookies(collector.Cookies("https://mbasic.facebook.com/")))
         | 
| 184 | 
            +
            		return storage.StringifyCookies(collector.Cookies("https://mbasic.facebook.com/")), err
         | 
| 185 | 
            +
            	} else {
         | 
| 186 | 
            +
            		return "", err
         | 
| 187 | 
            +
            	}
         | 
| 188 | 
            +
             | 
| 154 189 | 
             
            }
         | 
| 155 190 |  | 
| 156 | 
            -
            func (f *Fbcolly) FetchGroupFeed(groupId  | 
| 191 | 
            +
            func (f *Fbcolly) FetchGroupFeed(groupId int64) (error, *fbcrawl.FacebookPostList) {
         | 
| 157 192 | 
             
            	collector := f.collector.Clone()
         | 
| 158 193 | 
             
            	err := setupSharedCollector(collector)
         | 
| 159 194 | 
             
            	currentPage := 1
         | 
| @@ -163,72 +198,179 @@ func (f *Fbcolly) FetchGroupFeed(groupId string) (error, *fbcrawl.FacebookPostLi | |
| 163 198 | 
             
            		currentPage++
         | 
| 164 199 | 
             
            		if currentPage < 3 {
         | 
| 165 200 | 
             
            			logger.Info("Will fetch page", currentPage)
         | 
| 166 | 
            -
            			collector.Visit("http://mbasic.facebook.com" + element.Attr("href"))
         | 
| 201 | 
            +
            			err = collector.Visit("http://mbasic.facebook.com" + element.Attr("href"))
         | 
| 167 202 | 
             
            		}
         | 
| 168 203 | 
             
            	})
         | 
| 169 204 |  | 
| 170 | 
            -
            	//TODO: May not need this
         | 
| 171 205 | 
             
            	collector.OnXML("//a[text()=\"Full Story\"]", func(element *colly.XMLElement) {
         | 
| 206 | 
            +
            		logger.Info("Post found at", element.Attr("href"))
         | 
| 172 207 | 
             
            		u, _ := url.Parse("http://mbasic.facebook.com" + element.Attr("href"))
         | 
| 173 | 
            -
             | 
| 208 | 
            +
            		postId, _ := strconv.ParseInt(u.Query().Get("id"), 10, 64)
         | 
| 174 209 | 
             
            		result = append(result, &fbcrawl.FacebookPost{
         | 
| 175 | 
            -
            			Id:     | 
| 210 | 
            +
            			Id:    postId,
         | 
| 176 211 | 
             
            			Group: &fbcrawl.FacebookGroup{Id: groupId},
         | 
| 177 212 | 
             
            		})
         | 
| 178 213 | 
             
            		//f.detailCollector.Visit(url)
         | 
| 179 214 | 
             
            	})
         | 
| 180 215 |  | 
| 181 | 
            -
            	err = collector.Visit("https://mbasic.facebook.com/groups | 
| 216 | 
            +
            	err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/groups/%d", groupId))
         | 
| 182 217 | 
             
            	if err != nil {
         | 
| 183 218 | 
             
            		logger.Error("crawl by colly err:", err)
         | 
| 184 219 | 
             
            	}
         | 
| 185 220 | 
             
            	return err, &fbcrawl.FacebookPostList{Posts: result}
         | 
| 186 221 | 
             
            }
         | 
| 187 222 |  | 
| 188 | 
            -
            func (f *Fbcolly)  | 
| 223 | 
            +
            func (f *Fbcolly) FetchContentImages(postId int64) (error, *fbcrawl.FacebookImageList) {
         | 
| 224 | 
            +
            	collector := f.collector.Clone()
         | 
| 225 | 
            +
            	err := setupSharedCollector(collector)
         | 
| 226 | 
            +
            	currentPage := 1
         | 
| 227 | 
            +
            	var result []*fbcrawl.FacebookImage
         | 
| 228 | 
            +
             | 
| 229 | 
            +
            	collector.OnHTML("a[href*=\"/media/set/\"]", func(element *colly.HTMLElement) {
         | 
| 230 | 
            +
            		currentPage++
         | 
| 231 | 
            +
            		logger.Info("Will fetch page", currentPage)
         | 
| 232 | 
            +
            		err = collector.Visit("http://mbasic.facebook.com" + element.Attr("href"))
         | 
| 233 | 
            +
            	})
         | 
| 234 | 
            +
             | 
| 235 | 
            +
            	collector.OnHTML("a[href*=\"/photo.php\"]", func(element *colly.HTMLElement) {
         | 
| 236 | 
            +
            		result = append(result, &fbcrawl.FacebookImage{
         | 
| 237 | 
            +
            			Id: getImageIdFromHref(element.Attr("href")),
         | 
| 238 | 
            +
            		})
         | 
| 239 | 
            +
            		//f.detailCollector.Visit(url)
         | 
| 240 | 
            +
            	})
         | 
| 241 | 
            +
             | 
| 242 | 
            +
            	err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/media/set/?set=pcb.%d", postId))
         | 
| 243 | 
            +
            	if err != nil {
         | 
| 244 | 
            +
            		logger.Error("crawl by colly err:", err)
         | 
| 245 | 
            +
            	}
         | 
| 246 | 
            +
            	return err, &fbcrawl.FacebookImageList{Images: result}
         | 
| 247 | 
            +
            }
         | 
| 248 | 
            +
             | 
| 249 | 
            +
            func (f *Fbcolly) FetchImageUrl(imageId int64) (error, *fbcrawl.FacebookImage) {
         | 
| 189 250 | 
             
            	collector := f.collector.Clone()
         | 
| 190 251 | 
             
            	err := setupSharedCollector(collector)
         | 
| 191 | 
            -
            	 | 
| 252 | 
            +
            	result := fbcrawl.FacebookImage{Id: imageId}
         | 
| 253 | 
            +
             | 
| 254 | 
            +
            	collector.OnHTML("a", func(element *colly.HTMLElement) {
         | 
| 255 | 
            +
            		result.Url = element.Attr("href")
         | 
| 256 | 
            +
            	})
         | 
| 257 | 
            +
             | 
| 258 | 
            +
            	err = collector.Visit(fmt.Sprintf("https://mbasic.facebook.com/photo/view_full_size/?fbid=%d", imageId))
         | 
| 259 | 
            +
            	if err != nil {
         | 
| 260 | 
            +
            		logger.Error("crawl by colly err:", err)
         | 
| 261 | 
            +
            	}
         | 
| 262 | 
            +
            	return err, &result
         | 
| 263 | 
            +
            }
         | 
| 264 | 
            +
             | 
| 265 | 
            +
            func (f *Fbcolly) FetchPost(groupId int64, postId int64) (error, *fbcrawl.FacebookPost) {
         | 
| 266 | 
            +
            	collector := f.collector.Clone()
         | 
| 267 | 
            +
            	err := setupSharedCollector(collector)
         | 
| 268 | 
            +
            	post := &fbcrawl.FacebookPost{Comments: []*fbcrawl.FacebookComment{}}
         | 
| 269 | 
            +
            	commentPaging := 0
         | 
| 192 270 | 
             
            	collector.OnHTML("#m_story_permalink_view", func(element *colly.HTMLElement) {
         | 
| 193 271 | 
             
            		dataElement := element.DOM.Find("div[data-ft]")
         | 
| 194 272 | 
             
            		if dataElement.Length() > 0 {
         | 
| 195 | 
            -
            			var result  | 
| 273 | 
            +
            			var result FbDataFt
         | 
| 196 274 | 
             
            			jsonData, isExist := dataElement.Attr("data-ft")
         | 
| 197 275 | 
             
            			if isExist {
         | 
| 198 | 
            -
            				 | 
| 276 | 
            +
            				logger.Info(jsonData)
         | 
| 277 | 
            +
            				err = json.Unmarshal([]byte(jsonData), &result)
         | 
| 278 | 
            +
            				if err != nil {
         | 
| 279 | 
            +
            					logger.Error(err)
         | 
| 280 | 
            +
            					return
         | 
| 281 | 
            +
            				}
         | 
| 199 282 | 
             
            				logger.Info("Post ", result)
         | 
| 200 | 
            -
            				post.Id = result | 
| 201 | 
            -
            				post.Group = &fbcrawl.FacebookGroup{Id: result | 
| 202 | 
            -
            				post.User = &fbcrawl.FacebookUser{ | 
| 283 | 
            +
            				post.Id = result.TopLevelPostId
         | 
| 284 | 
            +
            				post.Group = &fbcrawl.FacebookGroup{Id: result.PageId, Name: dataElement.Find("h3 strong:last-child a").Text()}
         | 
| 285 | 
            +
            				post.User = &fbcrawl.FacebookUser{
         | 
| 286 | 
            +
            					Id:   result.ContentOwnerIdNew,
         | 
| 287 | 
            +
            					Name: dataElement.Find("h3 strong:first-child a").Text(),
         | 
| 288 | 
            +
            				}
         | 
| 289 | 
            +
            				post.CreatedAt = result.PageInsights[strconv.FormatInt(result.PageId, 10)].PublishTime
         | 
| 203 290 | 
             
            				//Content
         | 
| 291 | 
            +
             | 
| 292 | 
            +
            				//NO BACKGROUND TEXT ONLY
         | 
| 204 293 | 
             
            				post.Content = strings.Join(dataElement.Find("p").Map(func(i int, selection *goquery.Selection) string {
         | 
| 205 294 | 
             
            					return selection.Text()
         | 
| 206 295 | 
             
            				}), "\n")
         | 
| 207 296 |  | 
| 297 | 
            +
            				if len(post.Content) == 0 {
         | 
| 298 | 
            +
            					// TEXT WITH BACKGROUND
         | 
| 299 | 
            +
            					post.Content = dataElement.Find("div[style*=\"background-image:url\"]").Text()
         | 
| 300 | 
            +
            				}
         | 
| 301 | 
            +
             | 
| 302 | 
            +
            				post.ContentLink = getUrlFromRedirectHref(dataElement.Find("a[href*=\"https://lm.facebook.com/l.php\"]").AttrOr("href", ""))
         | 
| 303 | 
            +
             | 
| 304 | 
            +
            				post.ContentImages = (funk.Map(result.PhotoAttachmentsList, func(id string) *fbcrawl.FacebookImage {
         | 
| 305 | 
            +
            					i, _ := strconv.ParseInt(id, 10, 64)
         | 
| 306 | 
            +
            					return &fbcrawl.FacebookImage{
         | 
| 307 | 
            +
            						Id: i,
         | 
| 308 | 
            +
            					}
         | 
| 309 | 
            +
            				})).([]*fbcrawl.FacebookImage)
         | 
| 310 | 
            +
             | 
| 311 | 
            +
            				if result.PhotoId > 0 {
         | 
| 312 | 
            +
            					post.ContentImage = &fbcrawl.FacebookImage{Id: result.PhotoId}
         | 
| 313 | 
            +
            				}
         | 
| 314 | 
            +
             | 
| 208 315 | 
             
            				logger.Info("content", strings.Join(dataElement.Find("p").Map(func(i int, selection *goquery.Selection) string {
         | 
| 209 316 | 
             
            					return selection.Text()
         | 
| 210 317 | 
             
            				}), "\n"))
         | 
| 211 318 | 
             
            			}
         | 
| 212 | 
            -
             | 
| 319 | 
            +
             | 
| 213 320 | 
             
            			//Comment
         | 
| 214 321 | 
             
            			element.DOM.Find("h3 + div + div + div").Parent().Parent().Each(func(i int, selection *goquery.Selection) {
         | 
| 215 322 | 
             
            				//author
         | 
| 216 | 
            -
            				commentId := selection.AttrOr("id", "")
         | 
| 323 | 
            +
            				commentId, _ := strconv.ParseInt(selection.AttrOr("id", ""), 10, 64)
         | 
| 217 324 | 
             
            				logger.Info("comment", commentId)
         | 
| 218 | 
            -
            				 | 
| 219 | 
            -
            				//idRegex.FindString()
         | 
| 325 | 
            +
            				createdAtWhenResult, _ := f.w.Parse(selection.Find("abbr").Text(), time.Now())
         | 
| 220 326 | 
             
            				post.Comments = append(post.Comments, &fbcrawl.FacebookComment{
         | 
| 221 327 | 
             
            					Id:   commentId,
         | 
| 222 328 | 
             
            					Post: &fbcrawl.FacebookPost{Id: post.Id},
         | 
| 223 329 | 
             
            					User: &fbcrawl.FacebookUser{
         | 
| 224 | 
            -
            						Id:    | 
| 330 | 
            +
            						Id:   getUserIdFromCommentHref(selection.Find("a[href*=\"#comment_form_\"]").AttrOr("href", "")),
         | 
| 225 331 | 
             
            						Name: selection.Find("h3 > a").Text(),
         | 
| 226 332 | 
             
            					},
         | 
| 227 | 
            -
            					Content: | 
| 333 | 
            +
            					Content:   selection.Find("h3 + div").Text(),
         | 
| 334 | 
            +
            					CreatedAt: createdAtWhenResult.Time.Unix(),
         | 
| 228 335 | 
             
            				})
         | 
| 229 336 | 
             
            			})
         | 
| 337 | 
            +
             | 
| 338 | 
            +
            		}
         | 
| 339 | 
            +
            	})
         | 
| 340 | 
            +
             | 
| 341 | 
            +
            	collector.OnHTML("div[id*=\"see_prev_\"] > a", func(element *colly.HTMLElement) {
         | 
| 342 | 
            +
            		if commentPaging < 3 {
         | 
| 343 | 
            +
            			logger.Info("Comment paging", commentPaging)
         | 
| 344 | 
            +
            			err = collector.Visit("http://mbasic.facebook.com" + element.Attr("href"))
         | 
| 345 | 
            +
            			commentPaging = commentPaging + 1
         | 
| 230 346 | 
             
            		}
         | 
| 231 347 | 
             
            	})
         | 
| 232 | 
            -
             | 
| 348 | 
            +
             | 
| 349 | 
            +
            	err = collector.Visit(fmt.Sprintf("http://mbasic.facebook.com/groups/%d?view=permalink&id=%d&_rdr", groupId, postId))
         | 
| 233 350 | 
             
            	return err, post
         | 
| 234 351 | 
             
            }
         | 
| 352 | 
            +
             | 
| 353 | 
            +
            func (f *Fbcolly) LoginWithCookies(cookies string) error {
         | 
| 354 | 
            +
            	collector := f.collector
         | 
| 355 | 
            +
            	return collector.SetCookies("https://mbasic.facebook.com/", storage.UnstringifyCookies(cookies))
         | 
| 356 | 
            +
            }
         | 
| 357 | 
            +
             | 
| 358 | 
            +
            //func getUsernameFromHref(href string) string {
         | 
| 359 | 
            +
            //	return regexp.MustCompile("/([\\d\\w.]+).*").FindStringSubmatch(href)[1]
         | 
| 360 | 
            +
            //}
         | 
| 361 | 
            +
             | 
| 362 | 
            +
            func getUserIdFromCommentHref(href string) int64 {
         | 
| 363 | 
            +
            	id, _ := strconv.ParseInt(regexp.MustCompile("#comment_form_(\\d+)").FindStringSubmatch(href)[1], 10, 64)
         | 
| 364 | 
            +
            	return id
         | 
| 365 | 
            +
            }
         | 
| 366 | 
            +
             | 
| 367 | 
            +
            func getUrlFromRedirectHref(href string) string {
         | 
| 368 | 
            +
            	u, _ := url.Parse(href)
         | 
| 369 | 
            +
            	return u.Query().Get("u")
         | 
| 370 | 
            +
            }
         | 
| 371 | 
            +
             | 
| 372 | 
            +
            func getImageIdFromHref(href string) int64 {
         | 
| 373 | 
            +
            	u, _ := url.Parse(href)
         | 
| 374 | 
            +
            	i, _ := strconv.ParseInt(u.Query().Get("fbid"), 10, 64)
         | 
| 375 | 
            +
            	return i
         | 
| 376 | 
            +
            }
         | 
    
        data/fbcrawl-colly.gemspec
    CHANGED
    
    | @@ -1,4 +1,4 @@ | |
| 1 | 
            -
            require_relative 'lib/ | 
| 1 | 
            +
            require_relative 'lib/fbcrawl_colly/version'
         | 
| 2 2 |  | 
| 3 3 | 
             
            Gem::Specification.new do |spec|
         | 
| 4 4 | 
             
              spec.name          = "fbcrawl-colly"
         | 
| @@ -32,4 +32,5 @@ Gem::Specification.new do |spec| | |
| 32 32 |  | 
| 33 33 | 
             
              spec.add_runtime_dependency 'ffi'
         | 
| 34 34 | 
             
              spec.add_runtime_dependency 'google-protobuf'
         | 
| 35 | 
            +
              spec.add_development_dependency 'rake-compiler'
         | 
| 35 36 | 
             
            end
         | 
    
        data/fbcrawl.proto
    CHANGED
    
    | @@ -1,33 +1,48 @@ | |
| 1 1 | 
             
            syntax = "proto3";
         | 
| 2 2 |  | 
| 3 | 
            +
            package fbcrawl_colly;
         | 
| 3 4 | 
             
            option go_package = "./fbcrawl;fbcrawl";
         | 
| 4 5 |  | 
| 5 6 | 
             
            // The request message containing the user's name.
         | 
| 6 7 | 
             
            message FacebookGroup {
         | 
| 7 | 
            -
               | 
| 8 | 
            +
              int64 id = 1;
         | 
| 8 9 | 
             
              string name = 2;
         | 
| 9 10 | 
             
            }
         | 
| 10 11 |  | 
| 11 12 | 
             
            message FacebookUser {
         | 
| 12 | 
            -
               | 
| 13 | 
            +
              int64 id = 1;
         | 
| 13 14 | 
             
              string name = 2;
         | 
| 14 15 | 
             
            }
         | 
| 15 16 |  | 
| 16 17 | 
             
            message FacebookPost {
         | 
| 17 | 
            -
               | 
| 18 | 
            +
              int64 id = 1;
         | 
| 18 19 | 
             
              FacebookGroup group = 2;
         | 
| 19 20 | 
             
              FacebookUser user = 3;
         | 
| 20 21 | 
             
              string content = 4;
         | 
| 22 | 
            +
              string content_link = 6;
         | 
| 23 | 
            +
              FacebookImage content_image = 8;
         | 
| 24 | 
            +
              repeated FacebookImage content_images = 7;
         | 
| 21 25 | 
             
              repeated FacebookComment comments = 5;
         | 
| 26 | 
            +
              int64 created_at = 9;
         | 
| 27 | 
            +
            }
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            message FacebookImage {
         | 
| 30 | 
            +
              int64 id = 1;
         | 
| 31 | 
            +
              string url = 2;
         | 
| 22 32 | 
             
            }
         | 
| 23 33 |  | 
| 24 34 | 
             
            message FacebookComment {
         | 
| 25 | 
            -
               | 
| 35 | 
            +
              int64 id = 1;
         | 
| 26 36 | 
             
              FacebookPost post = 2;
         | 
| 27 37 | 
             
              FacebookUser user = 3;
         | 
| 28 38 | 
             
              string content = 4;
         | 
| 39 | 
            +
              int64 created_at = 5;
         | 
| 29 40 | 
             
            }
         | 
| 30 41 |  | 
| 31 42 | 
             
            message FacebookPostList {
         | 
| 32 43 | 
             
              repeated FacebookPost posts = 1;
         | 
| 33 44 | 
             
            }
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            message FacebookImageList {
         | 
| 47 | 
            +
              repeated FacebookImage images = 1;
         | 
| 48 | 
            +
            }
         | 
    
        data/go.mod
    CHANGED
    
    | @@ -13,6 +13,7 @@ require ( | |
| 13 13 | 
             
            	github.com/golang/protobuf v1.4.2
         | 
| 14 14 | 
             
            	github.com/google/logger v1.1.0
         | 
| 15 15 | 
             
            	github.com/kennygrant/sanitize v1.2.4 // indirect
         | 
| 16 | 
            +
            	github.com/olebedev/when v0.0.0-20190311101825-c3b538a97254
         | 
| 16 17 | 
             
            	github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
         | 
| 17 18 | 
             
            	github.com/temoto/robotstxt v1.1.1 // indirect
         | 
| 18 19 | 
             
            	github.com/thoas/go-funk v0.7.0
         | 
    
        data/go.sum
    CHANGED
    
    | @@ -1,4 +1,6 @@ | |
| 1 1 | 
             
            cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
         | 
| 2 | 
            +
            github.com/AlekSi/pointer v1.0.0 h1:KWCWzsvFxNLcmM5XmiqHsGTTsuwZMsLFwWF9Y+//bNE=
         | 
| 3 | 
            +
            github.com/AlekSi/pointer v1.0.0/go.mod h1:1kjywbfcPFCmncIxtk6fIEub6LKrfMz3gc5QKVOSOA8=
         | 
| 2 4 | 
             
            github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
         | 
| 3 5 | 
             
            github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
         | 
| 4 6 | 
             
            github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
         | 
| @@ -18,6 +20,7 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA | |
| 18 20 | 
             
            github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
         | 
| 19 21 | 
             
            github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
         | 
| 20 22 | 
             
            github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
         | 
| 23 | 
            +
            github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
         | 
| 21 24 | 
             
            github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
         | 
| 22 25 | 
             
            github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
         | 
| 23 26 | 
             
            github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
         | 
| @@ -50,6 +53,10 @@ github.com/google/logger v1.1.0 h1:saB74Etb4EAJNH3z74CVbCKk75hld/8T0CsXKetWCwM= | |
| 50 53 | 
             
            github.com/google/logger v1.1.0/go.mod h1:w7O8nrRr0xufejBlQMI83MXqRusvREoJdaAxV+CoAB4=
         | 
| 51 54 | 
             
            github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
         | 
| 52 55 | 
             
            github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
         | 
| 56 | 
            +
            github.com/olebedev/when v0.0.0-20190311101825-c3b538a97254 h1:JYoQR67E1vv1WGoeW8DkdFs7vrIEe/5wP+qJItd5tUE=
         | 
| 57 | 
            +
            github.com/olebedev/when v0.0.0-20190311101825-c3b538a97254/go.mod h1:DPucAeQGDPUzYUt+NaWw6qsF5SFapWWToxEiVDh2aV0=
         | 
| 58 | 
            +
            github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
         | 
| 59 | 
            +
            github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
         | 
| 53 60 | 
             
            github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
         | 
| 54 61 | 
             
            github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
         | 
| 55 62 | 
             
            github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
         | 
    
        data/lib/fbcrawl-colly.rb
    CHANGED
    
    | @@ -1,16 +1,4 @@ | |
| 1 | 
            -
            require 'ffi'
         | 
| 2 | 
            -
            require 'fbcrawl_pb'
         | 
| 3 | 
            -
            module FbcrawlColly
         | 
| 4 | 
            -
              extend FFI::Library
         | 
| 5 1 |  | 
| 6 | 
            -
             | 
| 7 | 
            -
              attach_function :free, [ :pointer ], :void
         | 
| 2 | 
            +
            module FbcrawlColly
         | 
| 8 3 |  | 
| 9 | 
            -
              attach_function :Init, [], :pointer
         | 
| 10 | 
            -
              attach_function :Login, [:pointer, :string, :string], :void
         | 
| 11 | 
            -
              attach_function :FetchGroupFeed, [:pointer, :string], :string
         | 
| 12 | 
            -
              attach_function :FetchPost, [:pointer, :string, :string], :string
         | 
| 13 | 
            -
              # attach_function :FetchGroup, [:pointer, :string], :pointer
         | 
| 14 | 
            -
              # attach_function :Login, [:pointer, :string, :string, :string], :void
         | 
| 15 | 
            -
              # attach_function :FreePointer, [:pointer], :void
         | 
| 16 4 | 
             
            end
         | 
| @@ -0,0 +1,50 @@ | |
| 1 | 
            +
            require 'ffi'
         | 
| 2 | 
            +
            require_relative '../fbcrawl_pb'
         | 
| 3 | 
            +
            require_relative './ffi'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module FbcrawlColly
         | 
| 6 | 
            +
              class Colly
         | 
| 7 | 
            +
                def initialize
         | 
| 8 | 
            +
                  super
         | 
| 9 | 
            +
                  @colly = ::FFI::AutoPointer.new(FbcrawlColly::FFI::Init(), FbcrawlColly::FFI.method(:FreeColly))
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                def login(email, password)
         | 
| 13 | 
            +
                  s, ptr = FbcrawlColly::FFI.Login(@colly, email, password)
         | 
| 14 | 
            +
                  FbcrawlColly::FFI.free(ptr)
         | 
| 15 | 
            +
                  s
         | 
| 16 | 
            +
                end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                def login_with_cookies(cookies)
         | 
| 19 | 
            +
                  FbcrawlColly::FFI.LoginWithCookies(@colly, cookies)
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                def fetch_group_feed(group_id)
         | 
| 23 | 
            +
                  s, ptr = FbcrawlColly::FFI.FetchGroupFeed(@colly, group_id)
         | 
| 24 | 
            +
                  list = FbcrawlColly::FacebookPostList.decode(s)
         | 
| 25 | 
            +
                  FbcrawlColly::FFI.free(ptr)
         | 
| 26 | 
            +
                  list
         | 
| 27 | 
            +
                end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                def fetch_post(group_id, post_id)
         | 
| 30 | 
            +
                  s, ptr = FbcrawlColly::FFI.FetchPost(@colly, group_id, post_id)
         | 
| 31 | 
            +
                  post = FbcrawlColly::FacebookPost.decode(s)
         | 
| 32 | 
            +
                  FbcrawlColly::FFI.free(ptr)
         | 
| 33 | 
            +
                  post
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                def fetch_content_images(post_id)
         | 
| 37 | 
            +
                  s, ptr = FbcrawlColly::FFI.FetchContentImages(@colly, post_id)
         | 
| 38 | 
            +
                  imageList = FbcrawlColly::FacebookImageList.decode(s)
         | 
| 39 | 
            +
                  FbcrawlColly::FFI.free(ptr)
         | 
| 40 | 
            +
                  imageList
         | 
| 41 | 
            +
                end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                def fetch_image_url(image_id)
         | 
| 44 | 
            +
                  s, ptr = FbcrawlColly::FFI.FetchImageUrl(@colly, image_id)
         | 
| 45 | 
            +
                  image = FbcrawlColly::FacebookImage.decode(s)
         | 
| 46 | 
            +
                  FbcrawlColly::FFI.free(ptr)
         | 
| 47 | 
            +
                  image
         | 
| 48 | 
            +
                end
         | 
| 49 | 
            +
              end
         | 
| 50 | 
            +
            end
         | 
| @@ -0,0 +1,17 @@ | |
| 1 | 
            +
            require 'ffi'
         | 
| 2 | 
            +
            module FbcrawlColly::FFI
         | 
| 3 | 
            +
              extend FFI::Library
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              ffi_lib File.expand_path("../../ext/fbcrawl_colly/fbcolly.so", File.dirname(__FILE__))
         | 
| 6 | 
            +
              attach_function :free, [ :pointer ], :void
         | 
| 7 | 
            +
             | 
| 8 | 
            +
              attach_function :Init, [], :pointer
         | 
| 9 | 
            +
              attach_function :FreeColly, [:pointer], :pointer
         | 
| 10 | 
            +
              attach_function :Login, [:pointer, :string, :string], :strptr
         | 
| 11 | 
            +
              attach_function :LoginWithCookies, [:pointer, :string], :void
         | 
| 12 | 
            +
              attach_function :FetchGroupFeed, [:pointer, :int64], :strptr
         | 
| 13 | 
            +
              attach_function :FetchPost, [:pointer, :int64, :int64], :strptr
         | 
| 14 | 
            +
              attach_function :FetchContentImages, [:pointer, :int64], :strptr
         | 
| 15 | 
            +
              attach_function :FetchImageUrl, [:pointer, :int64], :strptr
         | 
| 16 | 
            +
              # attach_function :FetchGroup, [:pointer, :string], :pointer
         | 
| 17 | 
            +
            end
         | 
    
        data/main.go
    CHANGED
    
    | @@ -27,36 +27,69 @@ var password = flag.String("password", "change_me", "facebook password") | |
| 27 27 | 
             
            var otp = flag.String("otp", "123456", "facebook otp")
         | 
| 28 28 | 
             
            var groupId = flag.String("groupId", "334294967318328", "facebook group id, default is 334294967318328")
         | 
| 29 29 |  | 
| 30 | 
            -
            var  | 
| 30 | 
            +
            var allInstances = map[uintptr]*fbcolly.Fbcolly{}
         | 
| 31 31 |  | 
| 32 32 | 
             
            //export Init
         | 
| 33 33 | 
             
            func Init() uintptr {
         | 
| 34 | 
            -
            	 | 
| 34 | 
            +
            	instance := fbcolly.New()
         | 
| 35 | 
            +
            	ptr := (uintptr)(unsafe.Pointer(instance))
         | 
| 36 | 
            +
            	allInstances[ptr] = instance
         | 
| 37 | 
            +
            	return ptr
         | 
| 38 | 
            +
            }
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            //export FreeColly
         | 
| 41 | 
            +
            func FreeColly(pointer unsafe.Pointer) {
         | 
| 42 | 
            +
            	delete(allInstances, uintptr(pointer))
         | 
| 35 43 | 
             
            }
         | 
| 36 44 |  | 
| 37 45 | 
             
            //export Login
         | 
| 38 | 
            -
            func Login(pointer unsafe.Pointer, email *C.char, password *C.char) {
         | 
| 46 | 
            +
            func Login(pointer unsafe.Pointer, email *C.char, password *C.char) *C.char {
         | 
| 47 | 
            +
            	p := (*fbcolly.Fbcolly)(pointer)
         | 
| 48 | 
            +
            	cookies, err := p.Login(C.GoString(email), C.GoString(password), "")
         | 
| 49 | 
            +
            	if err == nil {
         | 
| 50 | 
            +
            		return C.CString(cookies)
         | 
| 51 | 
            +
            	}
         | 
| 52 | 
            +
            	return nil
         | 
| 53 | 
            +
            }
         | 
| 54 | 
            +
             | 
| 55 | 
            +
            //export LoginWithCookies
         | 
| 56 | 
            +
            func LoginWithCookies(pointer unsafe.Pointer, cookies *C.char) {
         | 
| 39 57 | 
             
            	p := (*fbcolly.Fbcolly)(pointer)
         | 
| 40 | 
            -
            	 | 
| 41 | 
            -
            	p.Login(C.GoString(email), C.GoString(password), "")
         | 
| 58 | 
            +
            	p.LoginWithCookies(C.GoString(cookies))
         | 
| 42 59 | 
             
            }
         | 
| 43 60 |  | 
| 44 61 | 
             
            //export FetchGroupFeed
         | 
| 45 | 
            -
            func FetchGroupFeed(pointer unsafe.Pointer, groupId  | 
| 62 | 
            +
            func FetchGroupFeed(pointer unsafe.Pointer, groupId int64) unsafe.Pointer {
         | 
| 46 63 | 
             
            	p := (*fbcolly.Fbcolly)(pointer)
         | 
| 47 | 
            -
            	_, postsList := p.FetchGroupFeed( | 
| 64 | 
            +
            	_, postsList := p.FetchGroupFeed(groupId)
         | 
| 48 65 | 
             
            	marshaledPostsList, _ := proto.Marshal(postsList)
         | 
| 49 66 | 
             
            	return C.CBytes(append(marshaledPostsList, 0))
         | 
| 50 67 | 
             
            }
         | 
| 51 68 |  | 
| 52 69 | 
             
            //export FetchPost
         | 
| 53 | 
            -
            func FetchPost(pointer unsafe.Pointer, groupId  | 
| 70 | 
            +
            func FetchPost(pointer unsafe.Pointer, groupId int64, postId int64) unsafe.Pointer {
         | 
| 54 71 | 
             
            	p := (*fbcolly.Fbcolly)(pointer)
         | 
| 55 | 
            -
            	_, post := p.FetchPost( | 
| 72 | 
            +
            	_, post := p.FetchPost(groupId, postId)
         | 
| 56 73 | 
             
            	marshaledPost, _ := proto.Marshal(post)
         | 
| 57 74 | 
             
            	return C.CBytes(append(marshaledPost, 0))
         | 
| 58 75 | 
             
            }
         | 
| 59 76 |  | 
| 77 | 
            +
            //export FetchContentImages
         | 
| 78 | 
            +
            func FetchContentImages(pointer unsafe.Pointer, postId int64) unsafe.Pointer {
         | 
| 79 | 
            +
            	p := (*fbcolly.Fbcolly)(pointer)
         | 
| 80 | 
            +
            	_, imageList := p.FetchContentImages(postId)
         | 
| 81 | 
            +
            	marshaled, _ := proto.Marshal(imageList)
         | 
| 82 | 
            +
            	return C.CBytes(append(marshaled, 0))
         | 
| 83 | 
            +
            }
         | 
| 84 | 
            +
             | 
| 85 | 
            +
            //export FetchImageUrl
         | 
| 86 | 
            +
            func FetchImageUrl(pointer unsafe.Pointer, imageId int64) unsafe.Pointer {
         | 
| 87 | 
            +
            	p := (*fbcolly.Fbcolly)(pointer)
         | 
| 88 | 
            +
            	_, image := p.FetchImageUrl(imageId)
         | 
| 89 | 
            +
            	marshaled, _ := proto.Marshal(image)
         | 
| 90 | 
            +
            	return C.CBytes(append(marshaled, 0))
         | 
| 91 | 
            +
            }
         | 
| 92 | 
            +
             | 
| 60 93 | 
             
            func main() {
         | 
| 61 94 | 
             
            	//r := regexp.MustCompile("/([\\d\\w.]+)").FindStringSubmatch()[1]
         | 
| 62 95 | 
             
            	//print(r.FindStringSubmatch("/liem.phamthanh.161?refid=18&__tn__=R")[1])
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: fbcrawl-colly
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.2.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Duy Le
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2020-08- | 
| 11 | 
            +
            date: 2020-08-13 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: ffi
         | 
| @@ -38,6 +38,20 @@ dependencies: | |
| 38 38 | 
             
                - - ">="
         | 
| 39 39 | 
             
                  - !ruby/object:Gem::Version
         | 
| 40 40 | 
             
                    version: '0'
         | 
| 41 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 42 | 
            +
              name: rake-compiler
         | 
| 43 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 | 
            +
                requirements:
         | 
| 45 | 
            +
                - - ">="
         | 
| 46 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 47 | 
            +
                    version: '0'
         | 
| 48 | 
            +
              type: :development
         | 
| 49 | 
            +
              prerelease: false
         | 
| 50 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 | 
            +
                requirements:
         | 
| 52 | 
            +
                - - ">="
         | 
| 53 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 54 | 
            +
                    version: '0'
         | 
| 41 55 | 
             
            description: Crawl mbasic.facebook.com using GO Colly
         | 
| 42 56 | 
             
            email:
         | 
| 43 57 | 
             
            - duyleekun@gmail.com
         | 
| @@ -47,9 +61,9 @@ extensions: | |
| 47 61 | 
             
            extra_rdoc_files: []
         | 
| 48 62 | 
             
            files:
         | 
| 49 63 | 
             
            - ".gitignore"
         | 
| 50 | 
            -
            - ".travis.yml"
         | 
| 51 64 | 
             
            - CODE_OF_CONDUCT.md
         | 
| 52 65 | 
             
            - Gemfile
         | 
| 66 | 
            +
            - Gemfile.lock
         | 
| 53 67 | 
             
            - LICENSE.txt
         | 
| 54 68 | 
             
            - README.md
         | 
| 55 69 | 
             
            - Rakefile
         | 
| @@ -64,7 +78,9 @@ files: | |
| 64 78 | 
             
            - go.mod
         | 
| 65 79 | 
             
            - go.sum
         | 
| 66 80 | 
             
            - lib/fbcrawl-colly.rb
         | 
| 67 | 
            -
            - lib/ | 
| 81 | 
            +
            - lib/fbcrawl_colly/colly.rb
         | 
| 82 | 
            +
            - lib/fbcrawl_colly/ffi.rb
         | 
| 83 | 
            +
            - lib/fbcrawl_colly/version.rb
         | 
| 68 84 | 
             
            - main.go
         | 
| 69 85 | 
             
            homepage: http://github.com/duyleekun/fbcrawl-colly
         | 
| 70 86 | 
             
            licenses:
         |