facebook_word_counter 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MWE4YmU0ZmVmZWIzOTJjMmJiYTAxMWEzOWZjYjk5YzdkZmE3ZjQ0OQ==
5
+ data.tar.gz: !binary |-
6
+ N2NiNGRlZjBhODIyMGQ1OTc3NmQ5NmRmYmI4MzZkMzg2NTMwNjk3Yg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ Nzk3MGNhYzEwNjUyMjIyZmEwMjY2N2UyOTBjZjNhNTRlYTRmMzE1YjJiZTYw
10
+ MmU3MDkxOGZmOTdhYjRiNWUzMTA0YTk3NWY3YzQxZjc4ODQwN2I1YzNkZjQx
11
+ OTAxN2NjN2NmYTY2OGY5MTNhMzk5NWVhMDE2MmJjZGYxZTYxZGQ=
12
+ data.tar.gz: !binary |-
13
+ ZDJjYTNkYzUzYzQwM2IwMjYyN2FkNTRjZGRkYWY1OGY3NDdlYjhhMGQwNTVl
14
+ YWMyNGMwMGI5YmRiZDNhNzg4MTY1NjZiNGQ3ZDllZTJlZDE0M2JjMTY4NTM5
15
+ M2UwM2U1YTU3NTRmMDE2ODZkMjdkZDc4NjIxMGNhYmVhZDAyMjM=
data/.gitignore ADDED
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
data/Gemfile.lock ADDED
@@ -0,0 +1,43 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ facebook_word_counter (0.0.1)
5
+ mechanize (~> 2.7.3)
6
+ nokogiri (~> 1.6)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ domain_name (0.5.20)
12
+ unf (>= 0.0.5, < 1.0.0)
13
+ http-cookie (1.0.2)
14
+ domain_name (~> 0.5)
15
+ mechanize (2.7.3)
16
+ domain_name (~> 0.5, >= 0.5.1)
17
+ http-cookie (~> 1.0)
18
+ mime-types (~> 2.0)
19
+ net-http-digest_auth (~> 1.1, >= 1.1.1)
20
+ net-http-persistent (~> 2.5, >= 2.5.2)
21
+ nokogiri (~> 1.4)
22
+ ntlm-http (~> 0.1, >= 0.1.1)
23
+ webrobots (>= 0.0.9, < 0.2)
24
+ mime-types (2.3)
25
+ mini_portile (0.6.0)
26
+ net-http-digest_auth (1.4)
27
+ net-http-persistent (2.9.4)
28
+ nokogiri (1.6.3.1)
29
+ mini_portile (= 0.6.0)
30
+ ntlm-http (0.1.1)
31
+ rake (10.3.2)
32
+ unf (0.1.4)
33
+ unf_ext
34
+ unf_ext (0.0.6)
35
+ webrobots (0.1.1)
36
+
37
+ PLATFORMS
38
+ ruby
39
+
40
+ DEPENDENCIES
41
+ bundler (~> 1.3)
42
+ facebook_word_counter!
43
+ rake
data/README.md ADDED
File without changes
@@ -0,0 +1,25 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ Gem::Specification.new do |spec|
5
+ spec.name = 'facebook_word_counter'
6
+ spec.version = '0.0.1'
7
+ spec.date = '2014-08-21'
8
+ spec.summary = "Get word counts from Facebook pages"
9
+ spec.description = "Given a list of facebook ids, scrapes the front page of posts and returns a hash of word counts"
10
+ spec.authors = ["James Robinson"]
11
+ spec.email = 'james.michael.robinson@gmail.com'
12
+ spec.files = ["lib/facebook_word_counter.rb"]
13
+ spec.homepage = 'http://rubygems.org/gems/facebook_word_counter'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_runtime_dependency "nokogiri", '~> 1.6'
24
+ spec.add_runtime_dependency 'mechanize', '~> 2.7.3'
25
+ end
data/gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in api_roulette.gemspec
4
+ gemspec
@@ -0,0 +1,79 @@
1
+ require 'nokogiri'
2
+ require 'mechanize'
3
+
4
+ class NotLoggedInError < StandardError; end
5
+
6
+ class FacebookWordCounter
7
+ FACEBOOK_URL_PREFIX = "https://www.facebook.com/"
8
+ FACEBOOK_URL_SUFFIX = "?_fb_noscript=1"
9
+
10
+ def self.setup_browser_agent(username, password)
11
+ @agent = Mechanize.new
12
+ @agent.get(FACEBOOK_URL_PREFIX)
13
+ login(username, password)
14
+ end
15
+
16
+ def self.get_word_counts(*facebook_pages)
17
+ raise NotLoggedInError unless logged_in?
18
+
19
+ facebook_pages.map do |url|
20
+ no_js_url = "#{FACEBOOK_URL_PREFIX}#{url}#{FACEBOOK_URL_SUFFIX}"
21
+ @agent.get(no_js_url)
22
+ {
23
+ name: get_facebook_name,
24
+ image: get_facebook_photo_url,
25
+ # facebook_url: "#{FACEBOOK_URL_PREFIX}#{url}",
26
+ words: get_word_counts_on_page
27
+ }
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ def self.login(username, password)
34
+ form = @agent.page.forms.first
35
+ form.email = username
36
+ form.pass = password
37
+ form.submit
38
+ end
39
+
40
+ def self.logged_in?
41
+ !(@agent.page.uri.to_s =~ /login_attempt/)
42
+ end
43
+
44
+ def self.get_facebook_name
45
+ @agent.page.title
46
+ end
47
+
48
+ def self.get_word_counts_on_page
49
+ all_words = @agent.page.search(".userContent").map { |post| clean_text(post.text) }.join(' ')
50
+ WordCounter.count_words(all_words)
51
+ end
52
+
53
+ def self.get_facebook_photo_url
54
+ @agent.page.search("img.profilePic").first['src']
55
+ end
56
+
57
+ def self.clean_text(text)
58
+ text = remove_urls(text.downcase)
59
+ text = remove_punctuation(text)
60
+ text = remove_filler_words(text)
61
+ text.strip
62
+ end
63
+
64
+ def self.remove_urls(text)
65
+ text.gsub(/https?:\/\/[\S]+/, '')
66
+ end
67
+
68
+ def self.remove_punctuation(text)
69
+ text.gsub(/[^a-zA-Z\s]/, '')
70
+ end
71
+
72
+ FILLER_WORDS = %w{ and the of as if is to in my a be will i ive are not my are not can out go am im for }
73
+ def self.remove_filler_words(text)
74
+ FILLER_WORDS.each do |word|
75
+ text.gsub!(/ #{word} / , ' ')
76
+ end
77
+ text
78
+ end
79
+ end
@@ -0,0 +1,8 @@
1
+ class WordCounter
2
+ def self.count_words(words)
3
+ words.split(' ').reduce(Hash.new(0)) do |word_counts, word|
4
+ word_counts[word] += 1
5
+ word_counts
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,2 @@
1
+ require_relative './facebook_word_counter/facebook_word_counter'
2
+ require_relative './facebook_word_counter/word_counter'
metadata ADDED
@@ -0,0 +1,108 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: facebook_word_counter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - James Robinson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '1.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '1.6'
55
+ - !ruby/object:Gem::Dependency
56
+ name: mechanize
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 2.7.3
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 2.7.3
69
+ description: Given a list of facebook ids, scrapes the front page of posts and returns
70
+ a hash of word counts
71
+ email: james.michael.robinson@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - .gitignore
77
+ - Gemfile.lock
78
+ - README.md
79
+ - facebook_word_counter.gemspec
80
+ - gemfile
81
+ - lib/facebook_word_counter.rb
82
+ - lib/facebook_word_counter/facebook_word_counter.rb
83
+ - lib/facebook_word_counter/word_counter.rb
84
+ homepage: http://rubygems.org/gems/facebook_word_counter
85
+ licenses:
86
+ - MIT
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubyforge_project:
104
+ rubygems_version: 2.1.5
105
+ signing_key:
106
+ specification_version: 4
107
+ summary: Get word counts from Facebook pages
108
+ test_files: []