ridic 0.5.2 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +11 -8
- data/lib/ridic/version.rb +1 -1
- data/lib/ridic.rb +32 -2
- data/spec/ridic_spec.rb +118 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -20,18 +20,21 @@ Or install it yourself as:
|
|
20
20
|
|
21
21
|
$ gem install ridic
|
22
22
|
|
23
|
-
##
|
23
|
+
## Documentation
|
24
|
+
|
25
|
+
API Methods :
|
24
26
|
|
25
|
-
Search Methods :
|
26
27
|
* RiDic.word_match("word")
|
28
|
+
* Returns categories if word is an exact match
|
27
29
|
* RiDic.stem_match("word")
|
30
|
+
* Returns categories if word is a match for a provided stem
|
31
|
+
* RiDic.all_categories_in_document("document text")
|
32
|
+
* Returns a list of all categories matching all words in a given body of text
|
33
|
+
* RiDic.category_in_document("document text", category_number)
|
34
|
+
* Returns a list of a specific category hierarchy contained in a given body of text
|
35
|
+
* RiDic.category_distribution("document text", category_number)
|
36
|
+
* Returns the occurrence of each specified category (within a hierarchy) contained in a body of text
|
28
37
|
|
29
|
-
word_match will match words that are *not* stemmed
|
30
|
-
stem_match will match words against stems
|
31
|
-
|
32
|
-
The logic is separated for performance reasons -
|
33
|
-
stem_match will iterate over the dictionary and return once a match has been found
|
34
|
-
Both methods return nil if no match is found
|
35
38
|
|
36
39
|
## Contributing
|
37
40
|
|
data/lib/ridic/version.rb
CHANGED
data/lib/ridic.rb
CHANGED
@@ -3,12 +3,42 @@ require "ridic/dictionary"
|
|
3
3
|
|
4
4
|
module RiDic
|
5
5
|
def self.word_match(text_word)
|
6
|
-
RiDic::Dictionary.words[text_word.upcase]
|
6
|
+
dictionary_1 = RiDic::Dictionary.words[text_word.upcase]
|
7
|
+
dictionary_1 == nil ? RiDic::Dictionary.word_stems[text_word.upcase] : dictionary_1
|
7
8
|
end
|
8
9
|
|
9
10
|
def self.stem_match(text_word)
|
10
11
|
text_word.upcase!
|
11
|
-
RiDic::Dictionary.word_stems.each {|key, value| (return
|
12
|
+
RiDic::Dictionary.word_stems.each {|key, value| (return value) if text_word.match("^#{key}")}
|
12
13
|
nil
|
13
14
|
end
|
15
|
+
|
16
|
+
def self.all_categories_in_document(document_text, result = [])
|
17
|
+
sanitize(document_text).split(' ').each do |elem|
|
18
|
+
word_match(elem) == nil ? result << stem_match(elem) : result << word_match(elem)
|
19
|
+
end
|
20
|
+
result
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.category_in_document(document_text, category_number, result = [])
|
24
|
+
all_categories_in_document(sanitize(document_text)).delete_if {|i| i == nil}.each {|elem| result << [elem[category_number - 1]]}
|
25
|
+
result.delete_if {|i| i == [""]}
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.category_distribution(document_text, category_number, result = Hash.new(0))
|
29
|
+
first_categories = category_in_document(sanitize(document_text), category_number).delete_if {|i| i == nil}
|
30
|
+
first_categories.each {|elem| result[elem.first] += 1}
|
31
|
+
sort_distribution(result)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def self.sort_distribution(distribution_set, result = Hash.new)
|
37
|
+
distribution_set.sort_by {|key, value| value}.reverse.each {|i| result[i[0]] = i[1]}
|
38
|
+
result
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.sanitize(document_text)
|
42
|
+
document_text.split(' ').each {|word| word.gsub!(/\W/, '')}.join(' ')
|
43
|
+
end
|
14
44
|
end
|
data/spec/ridic_spec.rb
CHANGED
@@ -13,12 +13,129 @@ describe 'RiDic' do
|
|
13
13
|
|
14
14
|
describe '.stem_match' do
|
15
15
|
it 'retrives word if stem found in dictionary' do
|
16
|
-
RiDic.stem_match('poetically').should eql(["
|
16
|
+
RiDic.stem_match('poetically').should eql(["EXPRESSIVE_BEH", "", "EMOTIONS"])
|
17
17
|
end
|
18
18
|
|
19
19
|
it 'returns il if stem does not exist in dictionary' do
|
20
20
|
RiDic.stem_match('mxyzptlk').should eql(nil)
|
21
21
|
end
|
22
22
|
end
|
23
|
+
|
24
|
+
describe '.all_categories_in_document' do
|
25
|
+
let(:document_text) {"splendid and sublime ale critiqued and bit the harlot of an apple"}
|
26
|
+
let(:categories_result){RiDic.all_categories_in_document(document_text)}
|
27
|
+
|
28
|
+
it 'returns an element for every word in the document' do
|
29
|
+
categories_result.length.should eql(document_text.split.length)
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'correctly evaluates the first dictionary item' do
|
33
|
+
categories_result.first.should eql(["GLORY", "", "EMOTIONS"])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe '.category_in_document' do
|
38
|
+
context 'category 1' do
|
39
|
+
let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit"}
|
40
|
+
let(:result) {RiDic.category_in_document(document_text, 1)}
|
41
|
+
|
42
|
+
it 'returns an element for each dictionary match' do
|
43
|
+
result.length.should eql(6)
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'returns a collection of elements that each also contain a single element' do
|
47
|
+
result.first.length.should eql(1)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'correctly evaluates the first items category to be GLORY' do
|
51
|
+
result.first.should eql(['GLORY'])
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'correctly evaluates the last items category to be SOCIAL_BEHAVIOR' do
|
55
|
+
result[-1].should eql(['SOCIAL_BEHAVIOR'])
|
56
|
+
end
|
57
|
+
end
|
58
|
+
context 'category 2' do
|
59
|
+
let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit"}
|
60
|
+
let(:result) {RiDic.category_in_document(document_text, 2)}
|
61
|
+
|
62
|
+
it 'correctly evaluates the first items category to be GLORY' do
|
63
|
+
result.first.should eql(['NEED'])
|
64
|
+
end
|
65
|
+
end
|
66
|
+
context 'category 3' do
|
67
|
+
let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit"}
|
68
|
+
let(:result) {RiDic.category_in_document(document_text, 3)}
|
69
|
+
|
70
|
+
it 'correctly evaluates the first items category to be GLORY' do
|
71
|
+
result.first.should eql(['EMOTIONS'])
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
describe '.category_distribution' do
|
78
|
+
context 'provided with a list including dictionary words - category 1' do
|
79
|
+
let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit advisors"}
|
80
|
+
let(:result) {RiDic.category_distribution(document_text, 1)}
|
81
|
+
|
82
|
+
it 'returns an element for each category match' do
|
83
|
+
result.length.should eql(4)
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'evaluates the correct number of words associated with AFFECTION' do
|
87
|
+
result["AFFECTION"].should eql(2)
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'evaluates the correct number of words associated with AFFECTION' do
|
91
|
+
result["SOCIAL_BEHAVIOR"].should eql(2)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
context 'provided a list including no dictionary words - category 1' do
|
96
|
+
let(:document_text) {"humpty dumpty's funky"}
|
97
|
+
let(:result) {RiDic.category_distribution(document_text, 1)}
|
98
|
+
|
99
|
+
it 'returns no elements' do
|
100
|
+
result.length.should eql(0)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
context 'category 2' do
|
105
|
+
let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit advisors"}
|
106
|
+
let(:result) {RiDic.category_distribution(document_text, 2)}
|
107
|
+
|
108
|
+
it 'evaluates the correct number of words associated with NEED' do
|
109
|
+
result["NEED"].should eql(1)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
context 'category 3' do
|
114
|
+
let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit advisors"}
|
115
|
+
let(:result) {RiDic.category_distribution(document_text, 3)}
|
116
|
+
|
117
|
+
it 'evaluates the correct number of words associated with EMOTIONS' do
|
118
|
+
result["EMOTIONS"].should eql(4)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
describe '.sanitize' do
|
124
|
+
let(:document_text) {"test, this is a string."}
|
125
|
+
it 'removes non alphabetic chars from a string' do
|
126
|
+
RiDic.sanitize(document_text).should eql("test this is a string")
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
describe '.sort_distribution' do
|
131
|
+
let(:document_text) {"splendid amorous endearing and cordial sublime ale whatcha callit advisors"}
|
132
|
+
let(:distribution_set) {RiDic.category_distribution(document_text,1)}
|
133
|
+
let(:result) {RiDic.sort_distribution(distribution_set)}
|
134
|
+
|
135
|
+
it 'sorts a distribution set based on the value (number of instances)' do
|
136
|
+
result.first.should eql(["AFFECTION", 3])
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
23
140
|
end
|
24
141
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ridic
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-19 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Ruby gem wrapper for the Regressive Imagery Dictionary
|
15
15
|
email:
|