facebook_word_counter 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NGNmZTk4MjQzOGYyNDYxMGVkNWU5ZWMxZDk4ZTUyYTRiMzk5MTNjOA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZjE4ODk1NGRmNjI2YTkxZDNlNTlkYjU1OWE3NTA5MzE2MjU2ZDg2Zg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YzRhYWZkYzcwOGJkZDY4MTYwYWUyYzg0M2ExZDZlYWIyMmZkYWUyZmFjZWVl
|
10
|
+
MWJhOWE5MzMxY2IxODNlNjFhYjlkYjgzM2UzZjdiZGNlOTg4MTNkYTU0OWM0
|
11
|
+
MWU0ZDhkZDY2ZmFjNTA5MTg5ODM5NzkwNWNmZmIwMmEwYWViNzE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
OTVlZjA2YjM0MDY5M2IyZWM2MDFlYjYwZjZlMzE5ODE4NjQzZWVhOGMwOGM2
|
14
|
+
ZmVmMzFjYjViMTNlNTBmOGFkOGYyYWZiMDgwODkyN2NjNWI0ZDA1NDI5N2I1
|
15
|
+
Yzc0Y2RkNjdhOGZmOWE0MjlkNjM2YTJhNWIyNDIxNzQ1Nzk1NzI=
|
@@ -3,8 +3,8 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |spec|
|
5
5
|
spec.name = 'facebook_word_counter'
|
6
|
-
spec.version = '0.0.
|
7
|
-
spec.date = '2014-08-
|
6
|
+
spec.version = '0.0.2'
|
7
|
+
spec.date = '2014-08-22'
|
8
8
|
spec.summary = "Get word counts from Facebook pages"
|
9
9
|
spec.description = "Given a list of facebook ids, scrapes the front page of posts and returns a hash of word counts"
|
10
10
|
spec.authors = ["James Robinson"]
|
@@ -22,7 +22,6 @@ class FacebookWordCounter
|
|
22
22
|
{
|
23
23
|
name: get_facebook_name,
|
24
24
|
image: get_facebook_photo_url,
|
25
|
-
# facebook_url: "#{FACEBOOK_URL_PREFIX}#{url}",
|
26
25
|
words: get_word_counts_on_page
|
27
26
|
}
|
28
27
|
end
|
@@ -46,34 +45,11 @@ class FacebookWordCounter
|
|
46
45
|
end
|
47
46
|
|
48
47
|
def self.get_word_counts_on_page
|
49
|
-
all_words = @agent.page.search(".userContent").map { |post| clean_text(post.text) }.join(' ')
|
48
|
+
all_words = @agent.page.search(".userContent").map { |post| TextCleaner.clean_text(post.text) }.join(' ')
|
50
49
|
WordCounter.count_words(all_words)
|
51
50
|
end
|
52
51
|
|
53
52
|
def self.get_facebook_photo_url
|
54
53
|
@agent.page.search("img.profilePic").first['src']
|
55
54
|
end
|
56
|
-
|
57
|
-
def self.clean_text(text)
|
58
|
-
text = remove_urls(text.downcase)
|
59
|
-
text = remove_punctuation(text)
|
60
|
-
text = remove_filler_words(text)
|
61
|
-
text.strip
|
62
|
-
end
|
63
|
-
|
64
|
-
def self.remove_urls(text)
|
65
|
-
text.gsub(/https?:\/\/[\S]+/, '')
|
66
|
-
end
|
67
|
-
|
68
|
-
def self.remove_punctuation(text)
|
69
|
-
text.gsub(/[^a-zA-Z\s]/, '')
|
70
|
-
end
|
71
|
-
|
72
|
-
FILLER_WORDS = %w{ and the of as if is to in my a be will i ive are not my are not can out go am im for }
|
73
|
-
def self.remove_filler_words(text)
|
74
|
-
FILLER_WORDS.each do |word|
|
75
|
-
text.gsub!(/ #{word} / , ' ')
|
76
|
-
end
|
77
|
-
text
|
78
|
-
end
|
79
55
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module TextCleaner
|
2
|
+
def self.clean_text(text)
|
3
|
+
text = remove_urls(text.downcase)
|
4
|
+
text = remove_punctuation(text)
|
5
|
+
text = remove_filler_words(text)
|
6
|
+
text.strip
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.remove_urls(text)
|
10
|
+
text.gsub(/https?:\/\/[\S]+/, '')
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.remove_punctuation(text)
|
14
|
+
text.gsub(/[^a-zA-Z\s]/, '')
|
15
|
+
end
|
16
|
+
|
17
|
+
FILLER_WORDS = %w{ and the of as if is to in my a be will i ive are not my are not can out go am im for }
|
18
|
+
def self.remove_filler_words(text)
|
19
|
+
FILLER_WORDS.each do |word|
|
20
|
+
text.gsub!(/ #{word} / , ' ')
|
21
|
+
end
|
22
|
+
text
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: facebook_word_counter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robinson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-08-
|
11
|
+
date: 2014-08-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,6 +80,7 @@ files:
|
|
80
80
|
- gemfile
|
81
81
|
- lib/facebook_word_counter.rb
|
82
82
|
- lib/facebook_word_counter/facebook_word_counter.rb
|
83
|
+
- lib/facebook_word_counter/text_cleaner.rb
|
83
84
|
- lib/facebook_word_counter/word_counter.rb
|
84
85
|
homepage: http://rubygems.org/gems/facebook_word_counter
|
85
86
|
licenses:
|