facebook_scrapper 0.2.0 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/facebook_scrapper/scrapper.rb +12 -16
 - data/lib/facebook_scrapper/version.rb +1 -1
 - metadata +1 -1
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 47eff0e332eee397e4315689bbaa12761e963bfdefcf00259c990615a7eeebd7
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: d7da1d1b8c17f07828c0b69884de95e1742b82b2a8859ef4bb280d16df1494c1
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 8ecbe62da7af65a0b4a8b53fae1c9d1220d6d781571500188d6a9d01a5e7d355d7c84b8f1e1c1b7c72dff08fdf2c72b1192b8052353c9bae8c3b22e2e6a40bbf
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 0c7f7eb171bb65b3a3bd2b3802a48453026aea4537ed4737c990a4a27ebae7507093a314bd5041bcbe1654ae4ac07d3d57cd126a0642bbef1facf6c2c3f458c9
         
     | 
| 
         @@ -6,20 +6,6 @@ module FacebookScrapper 
     | 
|
| 
       6 
6 
     | 
    
         
             
                  @driver = Selenium::WebDriver.for :chrome
         
     | 
| 
       7 
7 
     | 
    
         
             
                end
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
     | 
    
         
            -
                def toBasicUrl(url)
         
     | 
| 
       10 
     | 
    
         
            -
                  if url.include? "m.facebook.com"
         
     | 
| 
       11 
     | 
    
         
            -
                    return url.gsub!("m.facebook.com", "mbasic.facebook.com")
         
     | 
| 
       12 
     | 
    
         
            -
                  elsif url.include? "www.facebook.com"
         
     | 
| 
       13 
     | 
    
         
            -
                    return url.gsub!("www.facebook.com", "mbasic.facebook.com")
         
     | 
| 
       14 
     | 
    
         
            -
                  else
         
     | 
| 
       15 
     | 
    
         
            -
                    return url
         
     | 
| 
       16 
     | 
    
         
            -
                  end
         
     | 
| 
       17 
     | 
    
         
            -
                end
         
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
                def get(url)
         
     | 
| 
       20 
     | 
    
         
            -
                  @driver.get(toBasicUrl(url))
         
     | 
| 
       21 
     | 
    
         
            -
                end
         
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
9 
     | 
    
         
             
                def login(email, password)
         
     | 
| 
       24 
10 
     | 
    
         
             
                  url = "https://mbasic.facebook.com"
         
     | 
| 
       25 
11 
     | 
    
         
             
                  get(url)
         
     | 
| 
         @@ -68,11 +54,11 @@ module FacebookScrapper 
     | 
|
| 
       68 
54 
     | 
    
         
             
                  get(url)
         
     | 
| 
       69 
55 
     | 
    
         
             
                  posts = []
         
     | 
| 
       70 
56 
     | 
    
         
             
                  all_posts = @driver.find_element(id: "m_group_stories_container").find_elements(css: "div[role='article']")
         
     | 
| 
       71 
     | 
    
         
            -
                  puts "Found #{all_posts.length} posts"
         
     | 
| 
       72 
57 
     | 
    
         
             
                  all_posts.each do |raw_post|
         
     | 
| 
       73 
58 
     | 
    
         
             
                    new_post = get_post_object(raw_post, keywords)
         
     | 
| 
       74 
59 
     | 
    
         
             
                    posts.push(new_post) if new_post
         
     | 
| 
       75 
60 
     | 
    
         
             
                  end
         
     | 
| 
      
 61 
     | 
    
         
            +
                  puts "Found #{posts.length} posts"
         
     | 
| 
       76 
62 
     | 
    
         
             
                  return posts
         
     | 
| 
       77 
63 
     | 
    
         
             
                end
         
     | 
| 
       78 
64 
     | 
    
         | 
| 
         @@ -80,11 +66,11 @@ module FacebookScrapper 
     | 
|
| 
       80 
66 
     | 
    
         
             
                  get("https://mbasic.facebook.com")
         
     | 
| 
       81 
67 
     | 
    
         
             
                  posts = []
         
     | 
| 
       82 
68 
     | 
    
         
             
                  all_posts = @driver.find_elements(css: "div[role='article']")
         
     | 
| 
       83 
     | 
    
         
            -
                  puts "Found #{all_posts.length} posts"
         
     | 
| 
       84 
69 
     | 
    
         
             
                  all_posts.each do |raw_post|
         
     | 
| 
       85 
70 
     | 
    
         
             
                    new_post = get_post_object(raw_post, keywords)
         
     | 
| 
       86 
71 
     | 
    
         
             
                    posts.push(new_post) if new_post
         
     | 
| 
       87 
72 
     | 
    
         
             
                  end
         
     | 
| 
      
 73 
     | 
    
         
            +
                  puts "Found #{posts.length} posts"
         
     | 
| 
       88 
74 
     | 
    
         
             
                  return posts
         
     | 
| 
       89 
75 
     | 
    
         
             
                end
         
     | 
| 
       90 
76 
     | 
    
         | 
| 
         @@ -104,6 +90,16 @@ module FacebookScrapper 
     | 
|
| 
       104 
90 
     | 
    
         | 
| 
       105 
91 
     | 
    
         
             
                private
         
     | 
| 
       106 
92 
     | 
    
         | 
| 
      
 93 
     | 
    
         
            +
                def toBasicUrl(url)
         
     | 
| 
      
 94 
     | 
    
         
            +
                  if url.include? "m.facebook.com"
         
     | 
| 
      
 95 
     | 
    
         
            +
                    return url.gsub!("m.facebook.com", "mbasic.facebook.com")
         
     | 
| 
      
 96 
     | 
    
         
            +
                  elsif url.include? "www.facebook.com"
         
     | 
| 
      
 97 
     | 
    
         
            +
                    return url.gsub!("www.facebook.com", "mbasic.facebook.com")
         
     | 
| 
      
 98 
     | 
    
         
            +
                  else
         
     | 
| 
      
 99 
     | 
    
         
            +
                    return url
         
     | 
| 
      
 100 
     | 
    
         
            +
                  end
         
     | 
| 
      
 101 
     | 
    
         
            +
                end
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
| 
       107 
103 
     | 
    
         
             
                def get_post_object(raw_post, keywords = [])
         
     | 
| 
       108 
104 
     | 
    
         
             
                  begin
         
     | 
| 
       109 
105 
     | 
    
         
             
                    new_post = raw_post.find_element(css: "div[role='article']")
         
     |