wordcloud 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/wordcloud.rb +49 -17
  2. metadata +2 -2
@@ -1,30 +1,48 @@
1
+ require 'json'
2
+
1
3
  class WordCloud
2
4
  def initialize(input)
3
- @input = input
5
+ @input = JSON.parse(input)
4
6
  @output = ""
5
7
  @wordhash = Hash.new
6
8
  end
7
9
 
8
10
  # Splits corpus on words
9
- def parse
10
- splitinput = @input.split(" ")
11
-
12
- splitinput.each do |w|
13
- if w.include? "\\n"
14
- w.gsub!("\\n", "<br />")
11
+ def parse
12
+ docnum = 0
13
+ @input.each do |i|
14
+ i.each do |j|
15
+ splitinput = j[1].split(" ")
16
+ splitinput.each do |w|
17
+ if w.include? "\\n"
18
+ w.gsub!("\\n", "<br />")
19
+ end
20
+ wordCount(w)
21
+ end
15
22
  end
16
- wordCount(w)
23
+ docnum += 1
17
24
  end
18
25
 
19
- genOutput
26
+ @input.each do |i|
27
+ i.each do |j|
28
+ @output = @output + "<b>" + j[0] + ": " + "</b>" + genOutput(j[1], docnum) + "<br />"
29
+ end
30
+ @output = @output + "<br />"
31
+ end
20
32
  return @output
21
33
  end
22
34
 
23
35
  # Counts number of times a word shows up
24
36
  def wordCount(word)
25
- commonwords = ["the", "and", "of", "a", "to", "is", "in", "its", "The", "on", "as", "for", "has", "will", "As", "or", "have", "while", "While", "that", "out", "such", "also", "by", "said", "with", "than", "only", "into", "an", "one", "other", "but", "for", "from", "<br />", "I", "more", "about", "About", "again", "Again", "against", "all", "are", "at", "be", "being", "been", "can", "could", "did", "do", "don't", "down", "up", "each", "few", "get", "got", "great", "had", "have", "has", "he", "her", "she", "he", "it", "we", "they", "if", "thus", "it's", "hers", "his", "how", "why", "when", "where", "just", "like", "you", "me", "my", "most", "more", "no", "not", "yes", "off", "once", "only", "our", "out", "over", "under", "own", "then", "some", "these", "there", "then", "this", "those", "too", "through", "between", "until", "very", "who", "with", "wouldn't", "would"]
37
+ commonwords = ["the", "and", "of", "a", "to", "is", "in", "its", "The", "on", "as", "for", "has", "will", "As", "or", "have", "while", "While", "that", "out", "such", "also", "by", "said", "with", "than", "only", "into", "an", "one", "other", "but", "for", "from", "<br />", "I", "more", "about", "About", "again", "Again", "against", "all", "are", "at", "be", "being", "been", "can", "could", "did", "do", "don't", "down", "up", "each", "few", "get", "got", "great", "had", "have", "has", "he", "her", "she", "he", "it", "we", "they", "if", "thus", "it's", "hers", "his", "how", "why", "when", "where", "just", "like", "you", "me", "my", "most", "more", "no", "not", "yes", "off", "once", "only", "our", "out", "over", "under", "own", "then", "some", "these", "there", "then", "this", "those", "too", "through", "between", "until", "very", "who", "with", "wouldn't", "would", "was", "were", "itself", "himself", "herself", "which", "make", "during", "before", "after", "if", "any", "become", "around", "several", "them", "their", "however"]
26
38
 
27
- if (@wordhash[word]) && (!commonwords.include? word)
39
+ # Make capitalized array of common words
40
+ commoncaps = Array.new
41
+ commonwords.each do |c|
42
+ commoncaps.push(c.capitalize)
43
+ end
44
+
45
+ if (@wordhash[word]) && (!commonwords.include? word) && (!commoncaps.include? word)
28
46
  @wordhash[word] += 1
29
47
  else
30
48
  @wordhash[word] = 1
@@ -32,19 +50,33 @@ class WordCloud
32
50
  end
33
51
 
34
52
  # Generates HTML output based on word size
35
- def genOutput
36
- splitinput = @input.split(" ")
53
+ def genOutput(input, docnum)
54
+ splitinput = input.split(/ /)
55
+ output = ""
37
56
 
38
57
  splitinput.each do |w|
39
- if w.include? "\\n"
40
- w.gsub!("\\n", "<br />")
58
+ if w =~ /\n/
59
+ w.gsub!(/\n/, "<br />")
41
60
  end
42
61
 
43
62
  if @wordhash[w]
44
- size = 13 + @wordhash[w]
45
- @output = @output + " <span style=\"font-size:" + size.to_s + "px\">" + w + "</span>"
63
+ size = 10 + @wordhash[w]
64
+ if @wordhash[w] > 2
65
+ size = size - (docnum*0.1)
66
+ end
67
+
68
+ if size > 18
69
+ size = 18
70
+ output = output + " <span style=\"font-size:" + size.to_s + "px\"><b>" + w + "</b></span>"
71
+ else
72
+ output = output + " <span style=\"font-size:" + size.to_s + "px\">" + w + "</span>"
73
+ end
74
+ else
75
+ output = output + " " + w
46
76
  end
47
77
  end
78
+
79
+ return output
48
80
  end
49
81
  end
50
82
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wordcloud
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-01-09 00:00:00.000000000 Z
12
+ date: 2014-03-29 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Takes input and outputs the same text with word size changed based on
15
15
  frequency.