facebook_word_counter 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MWE4YmU0ZmVmZWIzOTJjMmJiYTAxMWEzOWZjYjk5YzdkZmE3ZjQ0OQ==
5
+ data.tar.gz: !binary |-
6
+ N2NiNGRlZjBhODIyMGQ1OTc3NmQ5NmRmYmI4MzZkMzg2NTMwNjk3Yg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ Nzk3MGNhYzEwNjUyMjIyZmEwMjY2N2UyOTBjZjNhNTRlYTRmMzE1YjJiZTYw
10
+ MmU3MDkxOGZmOTdhYjRiNWUzMTA0YTk3NWY3YzQxZjc4ODQwN2I1YzNkZjQx
11
+ OTAxN2NjN2NmYTY2OGY5MTNhMzk5NWVhMDE2MmJjZGYxZTYxZGQ=
12
+ data.tar.gz: !binary |-
13
+ ZDJjYTNkYzUzYzQwM2IwMjYyN2FkNTRjZGRkYWY1OGY3NDdlYjhhMGQwNTVl
14
+ YWMyNGMwMGI5YmRiZDNhNzg4MTY1NjZiNGQ3ZDllZTJlZDE0M2JjMTY4NTM5
15
+ M2UwM2U1YTU3NTRmMDE2ODZkMjdkZDc4NjIxMGNhYmVhZDAyMjM=
data/.gitignore ADDED
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
data/Gemfile.lock ADDED
@@ -0,0 +1,43 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ facebook_word_counter (0.0.1)
5
+ mechanize (~> 2.7.3)
6
+ nokogiri (~> 1.6)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ domain_name (0.5.20)
12
+ unf (>= 0.0.5, < 1.0.0)
13
+ http-cookie (1.0.2)
14
+ domain_name (~> 0.5)
15
+ mechanize (2.7.3)
16
+ domain_name (~> 0.5, >= 0.5.1)
17
+ http-cookie (~> 1.0)
18
+ mime-types (~> 2.0)
19
+ net-http-digest_auth (~> 1.1, >= 1.1.1)
20
+ net-http-persistent (~> 2.5, >= 2.5.2)
21
+ nokogiri (~> 1.4)
22
+ ntlm-http (~> 0.1, >= 0.1.1)
23
+ webrobots (>= 0.0.9, < 0.2)
24
+ mime-types (2.3)
25
+ mini_portile (0.6.0)
26
+ net-http-digest_auth (1.4)
27
+ net-http-persistent (2.9.4)
28
+ nokogiri (1.6.3.1)
29
+ mini_portile (= 0.6.0)
30
+ ntlm-http (0.1.1)
31
+ rake (10.3.2)
32
+ unf (0.1.4)
33
+ unf_ext
34
+ unf_ext (0.0.6)
35
+ webrobots (0.1.1)
36
+
37
+ PLATFORMS
38
+ ruby
39
+
40
+ DEPENDENCIES
41
+ bundler (~> 1.3)
42
+ facebook_word_counter!
43
+ rake
data/README.md ADDED
File without changes
@@ -0,0 +1,25 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ Gem::Specification.new do |spec|
5
+ spec.name = 'facebook_word_counter'
6
+ spec.version = '0.0.1'
7
+ spec.date = '2014-08-21'
8
+ spec.summary = "Get word counts from Facebook pages"
9
+ spec.description = "Given a list of facebook ids, scrapes the front page of posts and returns a hash of word counts"
10
+ spec.authors = ["James Robinson"]
11
+ spec.email = 'james.michael.robinson@gmail.com'
12
+ spec.files = ["lib/facebook_word_counter.rb"]
13
+ spec.homepage = 'http://rubygems.org/gems/facebook_word_counter'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_runtime_dependency "nokogiri", '~> 1.6'
24
+ spec.add_runtime_dependency 'mechanize', '~> 2.7.3'
25
+ end
data/gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in api_roulette.gemspec
4
+ gemspec
@@ -0,0 +1,79 @@
1
+ require 'nokogiri'
2
+ require 'mechanize'
3
+
4
+ class NotLoggedInError < StandardError; end
5
+
6
+ class FacebookWordCounter
7
+ FACEBOOK_URL_PREFIX = "https://www.facebook.com/"
8
+ FACEBOOK_URL_SUFFIX = "?_fb_noscript=1"
9
+
10
+ def self.setup_browser_agent(username, password)
11
+ @agent = Mechanize.new
12
+ @agent.get(FACEBOOK_URL_PREFIX)
13
+ login(username, password)
14
+ end
15
+
16
+ def self.get_word_counts(*facebook_pages)
17
+ raise NotLoggedInError unless logged_in?
18
+
19
+ facebook_pages.map do |url|
20
+ no_js_url = "#{FACEBOOK_URL_PREFIX}#{url}#{FACEBOOK_URL_SUFFIX}"
21
+ @agent.get(no_js_url)
22
+ {
23
+ name: get_facebook_name,
24
+ image: get_facebook_photo_url,
25
+ # facebook_url: "#{FACEBOOK_URL_PREFIX}#{url}",
26
+ words: get_word_counts_on_page
27
+ }
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ def self.login(username, password)
34
+ form = @agent.page.forms.first
35
+ form.email = username
36
+ form.pass = password
37
+ form.submit
38
+ end
39
+
40
+ def self.logged_in?
41
+ !(@agent.page.uri.to_s =~ /login_attempt/)
42
+ end
43
+
44
+ def self.get_facebook_name
45
+ @agent.page.title
46
+ end
47
+
48
+ def self.get_word_counts_on_page
49
+ all_words = @agent.page.search(".userContent").map { |post| clean_text(post.text) }.join(' ')
50
+ WordCounter.count_words(all_words)
51
+ end
52
+
53
+ def self.get_facebook_photo_url
54
+ @agent.page.search("img.profilePic").first['src']
55
+ end
56
+
57
+ def self.clean_text(text)
58
+ text = remove_urls(text.downcase)
59
+ text = remove_punctuation(text)
60
+ text = remove_filler_words(text)
61
+ text.strip
62
+ end
63
+
64
+ def self.remove_urls(text)
65
+ text.gsub(/https?:\/\/[\S]+/, '')
66
+ end
67
+
68
+ def self.remove_punctuation(text)
69
+ text.gsub(/[^a-zA-Z\s]/, '')
70
+ end
71
+
72
+ FILLER_WORDS = %w{ and the of as if is to in my a be will i ive are not my are not can out go am im for }
73
+ def self.remove_filler_words(text)
74
+ FILLER_WORDS.each do |word|
75
+ text.gsub!(/ #{word} / , ' ')
76
+ end
77
+ text
78
+ end
79
+ end
@@ -0,0 +1,8 @@
1
+ class WordCounter
2
+ def self.count_words(words)
3
+ words.split(' ').reduce(Hash.new(0)) do |word_counts, word|
4
+ word_counts[word] += 1
5
+ word_counts
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,2 @@
1
+ require_relative './facebook_word_counter/facebook_word_counter'
2
+ require_relative './facebook_word_counter/word_counter'
metadata ADDED
@@ -0,0 +1,108 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: facebook_word_counter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - James Robinson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '1.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '1.6'
55
+ - !ruby/object:Gem::Dependency
56
+ name: mechanize
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 2.7.3
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 2.7.3
69
+ description: Given a list of facebook ids, scrapes the front page of posts and returns
70
+ a hash of word counts
71
+ email: james.michael.robinson@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - .gitignore
77
+ - Gemfile.lock
78
+ - README.md
79
+ - facebook_word_counter.gemspec
80
+ - gemfile
81
+ - lib/facebook_word_counter.rb
82
+ - lib/facebook_word_counter/facebook_word_counter.rb
83
+ - lib/facebook_word_counter/word_counter.rb
84
+ homepage: http://rubygems.org/gems/facebook_word_counter
85
+ licenses:
86
+ - MIT
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubyforge_project:
104
+ rubygems_version: 2.1.5
105
+ signing_key:
106
+ specification_version: 4
107
+ summary: Get word counts from Facebook pages
108
+ test_files: []