wordcloud 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/wordcloud.rb +49 -17
  2. metadata +2 -2
@@ -1,30 +1,48 @@
1
+ require 'json'
2
+
1
3
  class WordCloud
2
4
  def initialize(input)
3
- @input = input
5
+ @input = JSON.parse(input)
4
6
  @output = ""
5
7
  @wordhash = Hash.new
6
8
  end
7
9
 
8
10
  # Splits corpus on words
9
- def parse
10
- splitinput = @input.split(" ")
11
-
12
- splitinput.each do |w|
13
- if w.include? "\\n"
14
- w.gsub!("\\n", "<br />")
11
+ def parse
12
+ docnum = 0
13
+ @input.each do |i|
14
+ i.each do |j|
15
+ splitinput = j[1].split(" ")
16
+ splitinput.each do |w|
17
+ if w.include? "\\n"
18
+ w.gsub!("\\n", "<br />")
19
+ end
20
+ wordCount(w)
21
+ end
15
22
  end
16
- wordCount(w)
23
+ docnum += 1
17
24
  end
18
25
 
19
- genOutput
26
+ @input.each do |i|
27
+ i.each do |j|
28
+ @output = @output + "<b>" + j[0] + ": " + "</b>" + genOutput(j[1], docnum) + "<br />"
29
+ end
30
+ @output = @output + "<br />"
31
+ end
20
32
  return @output
21
33
  end
22
34
 
23
35
  # Counts number of times a word shows up
24
36
  def wordCount(word)
25
- commonwords = ["the", "and", "of", "a", "to", "is", "in", "its", "The", "on", "as", "for", "has", "will", "As", "or", "have", "while", "While", "that", "out", "such", "also", "by", "said", "with", "than", "only", "into", "an", "one", "other", "but", "for", "from", "<br />", "I", "more", "about", "About", "again", "Again", "against", "all", "are", "at", "be", "being", "been", "can", "could", "did", "do", "don't", "down", "up", "each", "few", "get", "got", "great", "had", "have", "has", "he", "her", "she", "he", "it", "we", "they", "if", "thus", "it's", "hers", "his", "how", "why", "when", "where", "just", "like", "you", "me", "my", "most", "more", "no", "not", "yes", "off", "once", "only", "our", "out", "over", "under", "own", "then", "some", "these", "there", "then", "this", "those", "too", "through", "between", "until", "very", "who", "with", "wouldn't", "would"]
37
+ commonwords = ["the", "and", "of", "a", "to", "is", "in", "its", "The", "on", "as", "for", "has", "will", "As", "or", "have", "while", "While", "that", "out", "such", "also", "by", "said", "with", "than", "only", "into", "an", "one", "other", "but", "for", "from", "<br />", "I", "more", "about", "About", "again", "Again", "against", "all", "are", "at", "be", "being", "been", "can", "could", "did", "do", "don't", "down", "up", "each", "few", "get", "got", "great", "had", "have", "has", "he", "her", "she", "he", "it", "we", "they", "if", "thus", "it's", "hers", "his", "how", "why", "when", "where", "just", "like", "you", "me", "my", "most", "more", "no", "not", "yes", "off", "once", "only", "our", "out", "over", "under", "own", "then", "some", "these", "there", "then", "this", "those", "too", "through", "between", "until", "very", "who", "with", "wouldn't", "would", "was", "were", "itself", "himself", "herself", "which", "make", "during", "before", "after", "if", "any", "become", "around", "several", "them", "their", "however"]
26
38
 
27
- if (@wordhash[word]) && (!commonwords.include? word)
39
+ # Make capitalized array of common words
40
+ commoncaps = Array.new
41
+ commonwords.each do |c|
42
+ commoncaps.push(c.capitalize)
43
+ end
44
+
45
+ if (@wordhash[word]) && (!commonwords.include? word) && (!commoncaps.include? word)
28
46
  @wordhash[word] += 1
29
47
  else
30
48
  @wordhash[word] = 1
@@ -32,19 +50,33 @@ class WordCloud
32
50
  end
33
51
 
34
52
  # Generates HTML output based on word size
35
- def genOutput
36
- splitinput = @input.split(" ")
53
+ def genOutput(input, docnum)
54
+ splitinput = input.split(/ /)
55
+ output = ""
37
56
 
38
57
  splitinput.each do |w|
39
- if w.include? "\\n"
40
- w.gsub!("\\n", "<br />")
58
+ if w =~ /\n/
59
+ w.gsub!(/\n/, "<br />")
41
60
  end
42
61
 
43
62
  if @wordhash[w]
44
- size = 13 + @wordhash[w]
45
- @output = @output + " <span style=\"font-size:" + size.to_s + "px\">" + w + "</span>"
63
+ size = 10 + @wordhash[w]
64
+ if @wordhash[w] > 2
65
+ size = size - (docnum*0.1)
66
+ end
67
+
68
+ if size > 18
69
+ size = 18
70
+ output = output + " <span style=\"font-size:" + size.to_s + "px\"><b>" + w + "</b></span>"
71
+ else
72
+ output = output + " <span style=\"font-size:" + size.to_s + "px\">" + w + "</span>"
73
+ end
74
+ else
75
+ output = output + " " + w
46
76
  end
47
77
  end
78
+
79
+ return output
48
80
  end
49
81
  end
50
82
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wordcloud
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-01-09 00:00:00.000000000 Z
12
+ date: 2014-03-29 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Takes input and outputs the same text with word size changed based on
15
15
  frequency.