ridic 0.5.2 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +11 -8
- data/lib/ridic/version.rb +1 -1
- data/lib/ridic.rb +32 -2
- data/spec/ridic_spec.rb +118 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -20,18 +20,21 @@ Or install it yourself as:
|
|
20
20
|
|
21
21
|
$ gem install ridic
|
22
22
|
|
23
|
-
##
|
23
|
+
## Documentation
|
24
|
+
|
25
|
+
API Methods :
|
24
26
|
|
25
|
-
Search Methods :
|
26
27
|
* RiDic.word_match("word")
|
28
|
+
* Returns categories if word is an exact match
|
27
29
|
* RiDic.stem_match("word")
|
30
|
+
* Returns categories if word is a match for a provided stem
|
31
|
+
* RiDic.all_categories_in_document("document text")
|
32
|
+
* Returns a list of all categories matching all words in a given body of text
|
33
|
+
* RiDic.category_in_document("document text", category_number)
|
34
|
+
* Returns a list of a specific category hierarchy contained in a given body of text
|
35
|
+
* RiDic.category_distribution("document text", category_number)
|
36
|
+
* Returns the occurrence of each specified category (within a hierarchy) contained in a body of text
|
28
37
|
|
29
|
-
word_match will match words that are *not* stemmed
|
30
|
-
stem_match will match words against stems
|
31
|
-
|
32
|
-
The logic is separated for performance reasons -
|
33
|
-
stem_match will iterate over the dictionary and return once a match has been found
|
34
|
-
Both methods return nil if no match is found
|
35
38
|
|
36
39
|
## Contributing
|
37
40
|
|
data/lib/ridic/version.rb
CHANGED
data/lib/ridic.rb
CHANGED
@@ -3,12 +3,42 @@ require "ridic/dictionary"
|
|
3
3
|
|
4
4
|
module RiDic
|
5
5
|
def self.word_match(text_word)
|
6
|
-
RiDic::Dictionary.words[text_word.upcase]
|
6
|
+
dictionary_1 = RiDic::Dictionary.words[text_word.upcase]
|
7
|
+
dictionary_1 == nil ? RiDic::Dictionary.word_stems[text_word.upcase] : dictionary_1
|
7
8
|
end
|
8
9
|
|
9
10
|
def self.stem_match(text_word)
|
10
11
|
text_word.upcase!
|
11
|
-
RiDic::Dictionary.word_stems.each {|key, value| (return
|
12
|
+
RiDic::Dictionary.word_stems.each {|key, value| (return value) if text_word.match("^#{key}")}
|
12
13
|
nil
|
13
14
|
end
|
15
|
+
|
16
|
+
def self.all_categories_in_document(document_text, result = [])
|
17
|
+
sanitize(document_text).split(' ').each do |elem|
|
18
|
+
word_match(elem) == nil ? result << stem_match(elem) : result << word_match(elem)
|
19
|
+
end
|
20
|
+
result
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.category_in_document(document_text, category_number, result = [])
|
24
|
+
all_categories_in_document(sanitize(document_text)).delete_if {|i| i == nil}.each {|elem| result << [elem[category_number - 1]]}
|
25
|
+
result.delete_if {|i| i == [""]}
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.category_distribution(document_text, category_number, result = Hash.new(0))
|
29
|
+
first_categories = category_in_document(sanitize(document_text), category_number).delete_if {|i| i == nil}
|
30
|
+
first_categories.each {|elem| result[elem.first] += 1}
|
31
|
+
sort_distribution(result)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def self.sort_distribution(distribution_set, result = Hash.new)
|
37
|
+
distribution_set.sort_by {|key, value| value}.reverse.each {|i| result[i[0]] = i[1]}
|
38
|
+
result
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.sanitize(document_text)
|
42
|
+
document_text.split(' ').each {|word| word.gsub!(/\W/, '')}.join(' ')
|
43
|
+
end
|
14
44
|
end
|
data/spec/ridic_spec.rb
CHANGED
@@ -13,12 +13,129 @@ describe 'RiDic' do
|
|
13
13
|
|
14
14
|
describe '.stem_match' do
|
15
15
|
it 'retrives word if stem found in dictionary' do
|
16
|
-
RiDic.stem_match('poetically').should eql(["
|
16
|
+
RiDic.stem_match('poetically').should eql(["EXPRESSIVE_BEH", "", "EMOTIONS"])
|
17
17
|
end
|
18
18
|
|
19
19
|
it 'returns il if stem does not exist in dictionary' do
|
20
20
|
RiDic.stem_match('mxyzptlk').should eql(nil)
|
21
21
|
end
|
22
22
|
end
|
23
|
+
|
24
|
+
describe '.all_categories_in_document' do
|
25
|
+
let(:document_text) {"splendid and sublime ale critiqued and bit the harlot of an apple"}
|
26
|
+
let(:categories_result){RiDic.all_categories_in_document(document_text)}
|
27
|
+
|
28
|
+
it 'returns an element for every word in the document' do
|
29
|
+
categories_result.length.should eql(document_text.split.length)
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'correctly evaluates the first dictionary item' do
|
33
|
+
categories_result.first.should eql(["GLORY", "", "EMOTIONS"])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe '.category_in_document' do
|
38
|
+
context 'category 1' do
|
39
|
+
let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit"}
|
40
|
+
let(:result) {RiDic.category_in_document(document_text, 1)}
|
41
|
+
|
42
|
+
it 'returns an element for each dictionary match' do
|
43
|
+
result.length.should eql(6)
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'returns a collection of elements that each also contain a single element' do
|
47
|
+
result.first.length.should eql(1)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'correctly evaluates the first items category to be GLORY' do
|
51
|
+
result.first.should eql(['GLORY'])
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'correctly evaluates the last items category to be SOCIAL_BEHAVIOR' do
|
55
|
+
result[-1].should eql(['SOCIAL_BEHAVIOR'])
|
56
|
+
end
|
57
|
+
end
|
58
|
+
context 'category 2' do
|
59
|
+
let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit"}
|
60
|
+
let(:result) {RiDic.category_in_document(document_text, 2)}
|
61
|
+
|
62
|
+
it 'correctly evaluates the first items category to be GLORY' do
|
63
|
+
result.first.should eql(['NEED'])
|
64
|
+
end
|
65
|
+
end
|
66
|
+
context 'category 3' do
|
67
|
+
let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit"}
|
68
|
+
let(:result) {RiDic.category_in_document(document_text, 3)}
|
69
|
+
|
70
|
+
it 'correctly evaluates the first items category to be GLORY' do
|
71
|
+
result.first.should eql(['EMOTIONS'])
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
describe '.category_distribution' do
|
78
|
+
context 'provided with a list including dictionary words - category 1' do
|
79
|
+
let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit advisors"}
|
80
|
+
let(:result) {RiDic.category_distribution(document_text, 1)}
|
81
|
+
|
82
|
+
it 'returns an element for each category match' do
|
83
|
+
result.length.should eql(4)
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'evaluates the correct number of words associated with AFFECTION' do
|
87
|
+
result["AFFECTION"].should eql(2)
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'evaluates the correct number of words associated with AFFECTION' do
|
91
|
+
result["SOCIAL_BEHAVIOR"].should eql(2)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
context 'provided a list including no dictionary words - category 1' do
|
96
|
+
let(:document_text) {"humpty dumpty's funky"}
|
97
|
+
let(:result) {RiDic.category_distribution(document_text, 1)}
|
98
|
+
|
99
|
+
it 'returns no elements' do
|
100
|
+
result.length.should eql(0)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
context 'category 2' do
|
105
|
+
let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit advisors"}
|
106
|
+
let(:result) {RiDic.category_distribution(document_text, 2)}
|
107
|
+
|
108
|
+
it 'evaluates the correct number of words associated with NEED' do
|
109
|
+
result["NEED"].should eql(1)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
context 'category 3' do
|
114
|
+
let(:document_text) {"splendid endearing and cordial sublime ale whatcha callit advisors"}
|
115
|
+
let(:result) {RiDic.category_distribution(document_text, 3)}
|
116
|
+
|
117
|
+
it 'evaluates the correct number of words associated with EMOTIONS' do
|
118
|
+
result["EMOTIONS"].should eql(4)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
describe '.sanitize' do
|
124
|
+
let(:document_text) {"test, this is a string."}
|
125
|
+
it 'removes non alphabetic chars from a string' do
|
126
|
+
RiDic.sanitize(document_text).should eql("test this is a string")
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
describe '.sort_distribution' do
|
131
|
+
let(:document_text) {"splendid amorous endearing and cordial sublime ale whatcha callit advisors"}
|
132
|
+
let(:distribution_set) {RiDic.category_distribution(document_text,1)}
|
133
|
+
let(:result) {RiDic.sort_distribution(distribution_set)}
|
134
|
+
|
135
|
+
it 'sorts a distribution set based on the value (number of instances)' do
|
136
|
+
result.first.should eql(["AFFECTION", 3])
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
23
140
|
end
|
24
141
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ridic
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-19 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Ruby gem wrapper for the Regressive Imagery Dictionary
|
15
15
|
email:
|