ridic 0.5.2 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -20,18 +20,21 @@ Or install it yourself as:
20
20
 
21
21
  $ gem install ridic
22
22
 
23
- ## Usage
23
+ ## Documentation
24
+
25
+ API Methods :
24
26
 
25
- Search Methods :
26
27
  * RiDic.word_match("word")
28
+ * Returns categories if word is an exact match
27
29
  * RiDic.stem_match("word")
30
+ * Returns categories if word is a match for a provided stem
31
+ * RiDic.all_categories_in_document("document text")
32
+ * Returns a list of all categories matching all words in a given body of text
33
+ * RiDic.category_in_document("document text", category_number)
34
+ * Returns a list of a specific category hierarchy contained in a given body of text
35
+ * RiDic.category_distribution("document text", category_number)
36
+ * Returns the occurrence of each specified category (within a hierarchy) contained in a body of text
28
37
 
29
- word_match will match words that are *not* stemmed
30
- stem_match will match words against stems
31
-
32
- The logic is separated for performance reasons -
33
- stem_match will iterate over the dictionary and return once a match has been found
34
- Both methods return nil if no match is found
35
38
 
36
39
  ## Contributing
37
40
 
data/lib/ridic/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module RiDic
2
- VERSION = "0.5.2"
2
+ VERSION = "0.6.1"
3
3
  end
data/lib/ridic.rb CHANGED
@@ -3,12 +3,42 @@ require "ridic/dictionary"
3
3
 
4
4
  module RiDic
5
5
  def self.word_match(text_word)
6
- RiDic::Dictionary.words[text_word.upcase]
6
+ dictionary_1 = RiDic::Dictionary.words[text_word.upcase]
7
+ dictionary_1 == nil ? RiDic::Dictionary.word_stems[text_word.upcase] : dictionary_1
7
8
  end
8
9
 
9
10
  def self.stem_match(text_word)
10
11
  text_word.upcase!
11
- RiDic::Dictionary.word_stems.each {|key, value| (return [key,value]) if text_word.match("^#{key}")}
12
+ RiDic::Dictionary.word_stems.each {|key, value| (return value) if text_word.match("^#{key}")}
12
13
  nil
13
14
  end
15
+
16
+ def self.all_categories_in_document(document_text, result = [])
17
+ sanitize(document_text).split(' ').each do |elem|
18
+ word_match(elem) == nil ? result << stem_match(elem) : result << word_match(elem)
19
+ end
20
+ result
21
+ end
22
+
23
+ def self.category_in_document(document_text, category_number, result = [])
24
+ all_categories_in_document(sanitize(document_text)).delete_if {|i| i == nil}.each {|elem| result << [elem[category_number - 1]]}
25
+ result.delete_if {|i| i == [""]}
26
+ end
27
+
28
+ def self.category_distribution(document_text, category_number, result = Hash.new(0))
29
+ first_categories = category_in_document(sanitize(document_text), category_number).delete_if {|i| i == nil}
30
+ first_categories.each {|elem| result[elem.first] += 1}
31
+ sort_distribution(result)
32
+ end
33
+
34
+ private
35
+
36
+ def self.sort_distribution(distribution_set, result = Hash.new)
37
+ distribution_set.sort_by {|key, value| value}.reverse.each {|i| result[i[0]] = i[1]}
38
+ result
39
+ end
40
+
41
+ def self.sanitize(document_text)
42
+ document_text.split(' ').each {|word| word.gsub!(/\W/, '')}.join(' ')
43
+ end
14
44
  end
data/spec/ridic_spec.rb CHANGED
@@ -13,12 +13,129 @@ describe 'RiDic' do
13
13
 
14
14
  describe '.stem_match' do
15
15
  it 'retrives word if stem found in dictionary' do
16
- RiDic.stem_match('poetically').should eql(["POET", ["EXPRESSIVE_BEH", "", "EMOTIONS"]])
16
+ RiDic.stem_match('poetically').should eql(["EXPRESSIVE_BEH", "", "EMOTIONS"])
17
17
  end
18
18
 
19
19
  it 'returns il if stem does not exist in dictionary' do
20
20
  RiDic.stem_match('mxyzptlk').should eql(nil)
21
21
  end
22
22
  end
23
+
24
+ describe '.all_categories_in_document' do
25
+ let(:document_text) {"splendid and sublime ale critiqued and bit the harlot of an apple"}
26
+ let(:categories_result){RiDic.all_categories_in_document(document_text)}
27
+
28
+ it 'returns an element for every word in the document' do
29
+ categories_result.length.should eql(document_text.split.length)
30
+ end
31
+
32
+ it 'correctly evaluates the first dictionary item' do
33
+ categories_result.first.should eql(["GLORY", "", "EMOTIONS"])
34
+ end
35
+ end
36
+
37
+ describe '.category_in_document' do
38
+ context 'category 1' do
39
+ let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit"}
40
+ let(:result) {RiDic.category_in_document(document_text, 1)}
41
+
42
+ it 'returns an element for each dictionary match' do
43
+ result.length.should eql(6)
44
+ end
45
+
46
+ it 'returns a collection of elements that each also contain a single element' do
47
+ result.first.length.should eql(1)
48
+ end
49
+
50
+ it 'correctly evaluates the first items category to be GLORY' do
51
+ result.first.should eql(['GLORY'])
52
+ end
53
+
54
+ it 'correctly evaluates the last items category to be SOCIAL_BEHAVIOR' do
55
+ result[-1].should eql(['SOCIAL_BEHAVIOR'])
56
+ end
57
+ end
58
+ context 'category 2' do
59
+ let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit"}
60
+ let(:result) {RiDic.category_in_document(document_text, 2)}
61
+
62
+ it 'correctly evaluates the first items category to be GLORY' do
63
+ result.first.should eql(['NEED'])
64
+ end
65
+ end
66
+ context 'category 3' do
67
+ let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit"}
68
+ let(:result) {RiDic.category_in_document(document_text, 3)}
69
+
70
+ it 'correctly evaluates the first items category to be GLORY' do
71
+ result.first.should eql(['EMOTIONS'])
72
+ end
73
+ end
74
+
75
+ end
76
+
77
+ describe '.category_distribution' do
78
+ context 'provided with a list including dictionary words - category 1' do
79
+ let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit advisors"}
80
+ let(:result) {RiDic.category_distribution(document_text, 1)}
81
+
82
+ it 'returns an element for each category match' do
83
+ result.length.should eql(4)
84
+ end
85
+
86
+ it 'evaluates the correct number of words associated with AFFECTION' do
87
+ result["AFFECTION"].should eql(2)
88
+ end
89
+
90
+ it 'evaluates the correct number of words associated with AFFECTION' do
91
+ result["SOCIAL_BEHAVIOR"].should eql(2)
92
+ end
93
+ end
94
+
95
+ context 'provided a list including no dictionary words - category 1' do
96
+ let(:document_text) {"humpty dumpty's funky"}
97
+ let(:result) {RiDic.category_distribution(document_text, 1)}
98
+
99
+ it 'returns no elements' do
100
+ result.length.should eql(0)
101
+ end
102
+ end
103
+
104
+ context 'category 2' do
105
+ let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit advisors"}
106
+ let(:result) {RiDic.category_distribution(document_text, 2)}
107
+
108
+ it 'evaluates the correct number of words associated with NEED' do
109
+ result["NEED"].should eql(1)
110
+ end
111
+ end
112
+
113
+ context 'category 3' do
114
+ let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit advisors"}
115
+ let(:result) {RiDic.category_distribution(document_text, 3)}
116
+
117
+ it 'evaluates the correct number of words associated with EMOTIONS' do
118
+ result["EMOTIONS"].should eql(4)
119
+ end
120
+ end
121
+ end
122
+
123
+ describe '.sanitize' do
124
+ let(:document_text) {"test, this is a string."}
125
+ it 'removes non alphabetic chars from a string' do
126
+ RiDic.sanitize(document_text).should eql("test this is a string")
127
+ end
128
+ end
129
+
130
+ describe '.sort_distribution' do
131
+ let(:document_text) {"splendid amorous endearing and cordial sublime ale whatcha callit advisors"}
132
+ let(:distribution_set) {RiDic.category_distribution(document_text,1)}
133
+ let(:result) {RiDic.sort_distribution(distribution_set)}
134
+
135
+ it 'sorts a distribution set based on the value (number of instances)' do
136
+ result.first.should eql(["AFFECTION", 3])
137
+ end
138
+ end
139
+
23
140
  end
24
141
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ridic
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.6.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-17 00:00:00.000000000 Z
12
+ date: 2012-12-19 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Ruby gem wrapper for the Regressive Imagery Dictionary
15
15
  email: