ridic 0.5.2 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -20,18 +20,21 @@ Or install it yourself as:
20
20
 
21
21
  $ gem install ridic
22
22
 
23
- ## Usage
23
+ ## Documentation
24
+
25
+ API Methods :
24
26
 
25
- Search Methods :
26
27
  * RiDic.word_match("word")
28
+ * Returns categories if word is an exact match
27
29
  * RiDic.stem_match("word")
30
+ * Returns categories if word is a match for a provided stem
31
+ * RiDic.all_categories_in_document("document text")
32
+ * Returns a list of all categories matching all words in a given body of text
33
+ * RiDic.category_in_document("document text", category_number)
34
+ * Returns a list of a specific category hierarchy contained in a given body of text
35
+ * RiDic.category_distribution("document text", category_number)
36
+ * Returns the occurrence of each specified category (within a hierarchy) contained in a body of text
28
37
 
29
- word_match will match words that are *not* stemmed
30
- stem_match will match words against stems
31
-
32
- The logic is separated for performance reasons -
33
- stem_match will iterate over the dictionary and return once a match has been found
34
- Both methods return nil if no match is found
35
38
 
36
39
  ## Contributing
37
40
 
data/lib/ridic/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module RiDic
2
- VERSION = "0.5.2"
2
+ VERSION = "0.6.1"
3
3
  end
data/lib/ridic.rb CHANGED
@@ -3,12 +3,42 @@ require "ridic/dictionary"
3
3
 
4
4
  module RiDic
5
5
  def self.word_match(text_word)
6
- RiDic::Dictionary.words[text_word.upcase]
6
+ dictionary_1 = RiDic::Dictionary.words[text_word.upcase]
7
+ dictionary_1 == nil ? RiDic::Dictionary.word_stems[text_word.upcase] : dictionary_1
7
8
  end
8
9
 
9
10
  def self.stem_match(text_word)
10
11
  text_word.upcase!
11
- RiDic::Dictionary.word_stems.each {|key, value| (return [key,value]) if text_word.match("^#{key}")}
12
+ RiDic::Dictionary.word_stems.each {|key, value| (return value) if text_word.match("^#{key}")}
12
13
  nil
13
14
  end
15
+
16
+ def self.all_categories_in_document(document_text, result = [])
17
+ sanitize(document_text).split(' ').each do |elem|
18
+ word_match(elem) == nil ? result << stem_match(elem) : result << word_match(elem)
19
+ end
20
+ result
21
+ end
22
+
23
+ def self.category_in_document(document_text, category_number, result = [])
24
+ all_categories_in_document(sanitize(document_text)).delete_if {|i| i == nil}.each {|elem| result << [elem[category_number - 1]]}
25
+ result.delete_if {|i| i == [""]}
26
+ end
27
+
28
+ def self.category_distribution(document_text, category_number, result = Hash.new(0))
29
+ first_categories = category_in_document(sanitize(document_text), category_number).delete_if {|i| i == nil}
30
+ first_categories.each {|elem| result[elem.first] += 1}
31
+ sort_distribution(result)
32
+ end
33
+
34
+ private
35
+
36
+ def self.sort_distribution(distribution_set, result = Hash.new)
37
+ distribution_set.sort_by {|key, value| value}.reverse.each {|i| result[i[0]] = i[1]}
38
+ result
39
+ end
40
+
41
+ def self.sanitize(document_text)
42
+ document_text.split(' ').each {|word| word.gsub!(/\W/, '')}.join(' ')
43
+ end
14
44
  end
data/spec/ridic_spec.rb CHANGED
@@ -13,12 +13,129 @@ describe 'RiDic' do
13
13
 
14
14
  describe '.stem_match' do
15
15
  it 'retrives word if stem found in dictionary' do
16
- RiDic.stem_match('poetically').should eql(["POET", ["EXPRESSIVE_BEH", "", "EMOTIONS"]])
16
+ RiDic.stem_match('poetically').should eql(["EXPRESSIVE_BEH", "", "EMOTIONS"])
17
17
  end
18
18
 
19
19
  it 'returns il if stem does not exist in dictionary' do
20
20
  RiDic.stem_match('mxyzptlk').should eql(nil)
21
21
  end
22
22
  end
23
+
24
+ describe '.all_categories_in_document' do
25
+ let(:document_text) {"splendid and sublime ale critiqued and bit the harlot of an apple"}
26
+ let(:categories_result){RiDic.all_categories_in_document(document_text)}
27
+
28
+ it 'returns an element for every word in the document' do
29
+ categories_result.length.should eql(document_text.split.length)
30
+ end
31
+
32
+ it 'correctly evaluates the first dictionary item' do
33
+ categories_result.first.should eql(["GLORY", "", "EMOTIONS"])
34
+ end
35
+ end
36
+
37
+ describe '.category_in_document' do
38
+ context 'category 1' do
39
+ let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit"}
40
+ let(:result) {RiDic.category_in_document(document_text, 1)}
41
+
42
+ it 'returns an element for each dictionary match' do
43
+ result.length.should eql(6)
44
+ end
45
+
46
+ it 'returns a collection of elements that each also contain a single element' do
47
+ result.first.length.should eql(1)
48
+ end
49
+
50
+ it 'correctly evaluates the first items category to be GLORY' do
51
+ result.first.should eql(['GLORY'])
52
+ end
53
+
54
+ it 'correctly evaluates the last items category to be SOCIAL_BEHAVIOR' do
55
+ result[-1].should eql(['SOCIAL_BEHAVIOR'])
56
+ end
57
+ end
58
+ context 'category 2' do
59
+ let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit"}
60
+ let(:result) {RiDic.category_in_document(document_text, 2)}
61
+
62
+ it 'correctly evaluates the first items category to be GLORY' do
63
+ result.first.should eql(['NEED'])
64
+ end
65
+ end
66
+ context 'category 3' do
67
+ let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit"}
68
+ let(:result) {RiDic.category_in_document(document_text, 3)}
69
+
70
+ it 'correctly evaluates the first items category to be GLORY' do
71
+ result.first.should eql(['EMOTIONS'])
72
+ end
73
+ end
74
+
75
+ end
76
+
77
+ describe '.category_distribution' do
78
+ context 'provided with a list including dictionary words - category 1' do
79
+ let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit advisors"}
80
+ let(:result) {RiDic.category_distribution(document_text, 1)}
81
+
82
+ it 'returns an element for each category match' do
83
+ result.length.should eql(4)
84
+ end
85
+
86
+ it 'evaluates the correct number of words associated with AFFECTION' do
87
+ result["AFFECTION"].should eql(2)
88
+ end
89
+
90
+ it 'evaluates the correct number of words associated with AFFECTION' do
91
+ result["SOCIAL_BEHAVIOR"].should eql(2)
92
+ end
93
+ end
94
+
95
+ context 'provided a list including no dictionary words - category 1' do
96
+ let(:document_text) {"humpty dumpty's funky"}
97
+ let(:result) {RiDic.category_distribution(document_text, 1)}
98
+
99
+ it 'returns no elements' do
100
+ result.length.should eql(0)
101
+ end
102
+ end
103
+
104
+ context 'category 2' do
105
+ let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit advisors"}
106
+ let(:result) {RiDic.category_distribution(document_text, 2)}
107
+
108
+ it 'evaluates the correct number of words associated with NEED' do
109
+ result["NEED"].should eql(1)
110
+ end
111
+ end
112
+
113
+ context 'category 3' do
114
+ let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit advisors"}
115
+ let(:result) {RiDic.category_distribution(document_text, 3)}
116
+
117
+ it 'evaluates the correct number of words associated with EMOTIONS' do
118
+ result["EMOTIONS"].should eql(4)
119
+ end
120
+ end
121
+ end
122
+
123
+ describe '.sanitize' do
124
+ let(:document_text) {"test, this is a string."}
125
+ it 'removes non alphabetic chars from a string' do
126
+ RiDic.sanitize(document_text).should eql("test this is a string")
127
+ end
128
+ end
129
+
130
+ describe '.sort_distribution' do
131
+ let(:document_text) {"splendid amorous endearing and cordial sublime ale whatcha callit advisors"}
132
+ let(:distribution_set) {RiDic.category_distribution(document_text,1)}
133
+ let(:result) {RiDic.sort_distribution(distribution_set)}
134
+
135
+ it 'sorts a distribution set based on the value (number of instances)' do
136
+ result.first.should eql(["AFFECTION", 3])
137
+ end
138
+ end
139
+
23
140
  end
24
141
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ridic
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.6.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-17 00:00:00.000000000 Z
12
+ date: 2012-12-19 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Ruby gem wrapper for the Regressive Imagery Dictionary
15
15
  email: