wordcloud 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/wordcloud.rb +49 -17
- metadata +2 -2
data/lib/wordcloud.rb
CHANGED
@@ -1,30 +1,48 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
1
3
|
class WordCloud
|
2
4
|
def initialize(input)
|
3
|
-
@input = input
|
5
|
+
@input = JSON.parse(input)
|
4
6
|
@output = ""
|
5
7
|
@wordhash = Hash.new
|
6
8
|
end
|
7
9
|
|
8
10
|
# Splits corpus on words
|
9
|
-
def parse
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
def parse
|
12
|
+
docnum = 0
|
13
|
+
@input.each do |i|
|
14
|
+
i.each do |j|
|
15
|
+
splitinput = j[1].split(" ")
|
16
|
+
splitinput.each do |w|
|
17
|
+
if w.include? "\\n"
|
18
|
+
w.gsub!("\\n", "<br />")
|
19
|
+
end
|
20
|
+
wordCount(w)
|
21
|
+
end
|
15
22
|
end
|
16
|
-
|
23
|
+
docnum += 1
|
17
24
|
end
|
18
25
|
|
19
|
-
|
26
|
+
@input.each do |i|
|
27
|
+
i.each do |j|
|
28
|
+
@output = @output + "<b>" + j[0] + ": " + "</b>" + genOutput(j[1], docnum) + "<br />"
|
29
|
+
end
|
30
|
+
@output = @output + "<br />"
|
31
|
+
end
|
20
32
|
return @output
|
21
33
|
end
|
22
34
|
|
23
35
|
# Counts number of times a word shows up
|
24
36
|
def wordCount(word)
|
25
|
-
commonwords = ["the", "and", "of", "a", "to", "is", "in", "its", "The", "on", "as", "for", "has", "will", "As", "or", "have", "while", "While", "that", "out", "such", "also", "by", "said", "with", "than", "only", "into", "an", "one", "other", "but", "for", "from", "<br />", "I", "more", "about", "About", "again", "Again", "against", "all", "are", "at", "be", "being", "been", "can", "could", "did", "do", "don't", "down", "up", "each", "few", "get", "got", "great", "had", "have", "has", "he", "her", "she", "he", "it", "we", "they", "if", "thus", "it's", "hers", "his", "how", "why", "when", "where", "just", "like", "you", "me", "my", "most", "more", "no", "not", "yes", "off", "once", "only", "our", "out", "over", "under", "own", "then", "some", "these", "there", "then", "this", "those", "too", "through", "between", "until", "very", "who", "with", "wouldn't", "would"]
|
37
|
+
commonwords = ["the", "and", "of", "a", "to", "is", "in", "its", "The", "on", "as", "for", "has", "will", "As", "or", "have", "while", "While", "that", "out", "such", "also", "by", "said", "with", "than", "only", "into", "an", "one", "other", "but", "for", "from", "<br />", "I", "more", "about", "About", "again", "Again", "against", "all", "are", "at", "be", "being", "been", "can", "could", "did", "do", "don't", "down", "up", "each", "few", "get", "got", "great", "had", "have", "has", "he", "her", "she", "he", "it", "we", "they", "if", "thus", "it's", "hers", "his", "how", "why", "when", "where", "just", "like", "you", "me", "my", "most", "more", "no", "not", "yes", "off", "once", "only", "our", "out", "over", "under", "own", "then", "some", "these", "there", "then", "this", "those", "too", "through", "between", "until", "very", "who", "with", "wouldn't", "would", "was", "were", "itself", "himself", "herself", "which", "make", "during", "before", "after", "if", "any", "become", "around", "several", "them", "their", "however"]
|
26
38
|
|
27
|
-
|
39
|
+
# Make capitalized array of common words
|
40
|
+
commoncaps = Array.new
|
41
|
+
commonwords.each do |c|
|
42
|
+
commoncaps.push(c.capitalize)
|
43
|
+
end
|
44
|
+
|
45
|
+
if (@wordhash[word]) && (!commonwords.include? word) && (!commoncaps.include? word)
|
28
46
|
@wordhash[word] += 1
|
29
47
|
else
|
30
48
|
@wordhash[word] = 1
|
@@ -32,19 +50,33 @@ class WordCloud
|
|
32
50
|
end
|
33
51
|
|
34
52
|
# Generates HTML output based on word size
|
35
|
-
def genOutput
|
36
|
-
splitinput =
|
53
|
+
def genOutput(input, docnum)
|
54
|
+
splitinput = input.split(/ /)
|
55
|
+
output = ""
|
37
56
|
|
38
57
|
splitinput.each do |w|
|
39
|
-
if w
|
40
|
-
w.gsub!(
|
58
|
+
if w =~ /\n/
|
59
|
+
w.gsub!(/\n/, "<br />")
|
41
60
|
end
|
42
61
|
|
43
62
|
if @wordhash[w]
|
44
|
-
size =
|
45
|
-
|
63
|
+
size = 10 + @wordhash[w]
|
64
|
+
if @wordhash[w] > 2
|
65
|
+
size = size - (docnum*0.1)
|
66
|
+
end
|
67
|
+
|
68
|
+
if size > 18
|
69
|
+
size = 18
|
70
|
+
output = output + " <span style=\"font-size:" + size.to_s + "px\"><b>" + w + "</b></span>"
|
71
|
+
else
|
72
|
+
output = output + " <span style=\"font-size:" + size.to_s + "px\">" + w + "</span>"
|
73
|
+
end
|
74
|
+
else
|
75
|
+
output = output + " " + w
|
46
76
|
end
|
47
77
|
end
|
78
|
+
|
79
|
+
return output
|
48
80
|
end
|
49
81
|
end
|
50
82
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wordcloud
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-
|
12
|
+
date: 2014-03-29 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Takes input and outputs the same text with word size changed based on
|
15
15
|
frequency.
|