tagmemics 0.0.0.beta → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/adjectives.txt +1137 -0
- data/config/adjectives.txt.bak +1136 -0
- data/config/articles.txt +3 -0
- data/config/conjunctions.txt +7 -0
- data/config/linking_verbs.txt +28 -0
- data/config/prepositions.txt +202 -0
- data/config/pronouns.txt +53 -0
- data/lib/tagmemics.rb +41 -24
- data/lib/tagmemics/{config.rb → load_data.rb} +2 -2
- data/lib/tagmemics/version.rb +3 -0
- data/lib/tagmemics/word.rb +46 -56
- data/lib/tagmemics/word/confidence.rb +65 -0
- data/lib/tagmemics/word/wordnet.rb +38 -9
- metadata +22 -7
@@ -0,0 +1,65 @@
|
|
1
|
+
module Tagmemics
|
2
|
+
class Word
|
3
|
+
class << self
|
4
|
+
# FIXME: need to scan left, not hard coded index.
|
5
|
+
# Add up each category and derive percentage.
|
6
|
+
def noun_confidence(arr, index)
|
7
|
+
str = arr[index]
|
8
|
+
wordnet_prob = (WordNetMethods.possibilities(str)['noun'] / 1) * 6
|
9
|
+
left_neighbor_article = article_confidence(arr[index - 2]) * 2
|
10
|
+
subtotal = wordnet_prob + left_neighbor_article
|
11
|
+
|
12
|
+
subtotal / 10.0
|
13
|
+
end
|
14
|
+
|
15
|
+
def verb_confidence(str)
|
16
|
+
wordnet_prob = WordNetMethods.wordnet_probability(str, 'verb') * 6
|
17
|
+
subtotal = wordnet_prob
|
18
|
+
|
19
|
+
subtotal / 10.0
|
20
|
+
end
|
21
|
+
|
22
|
+
# UPDATE PARAMETERS
|
23
|
+
def adjective_confidence(str)
|
24
|
+
wordnet_prob = WordNetMethods.wordnet_probability(str, 'adjective') * 6
|
25
|
+
# lneighbor_adjective = 0 * 2
|
26
|
+
# rneighbor_verb = 0 * 2
|
27
|
+
subtotal = wordnet_prob
|
28
|
+
|
29
|
+
subtotal / 10.0
|
30
|
+
end
|
31
|
+
|
32
|
+
def part_of_speech?(constant, str, positive = false)
|
33
|
+
arr = []
|
34
|
+
constant.each do |word|
|
35
|
+
positive = true if word.downcase == str.downcase
|
36
|
+
break if positive
|
37
|
+
end
|
38
|
+
positive
|
39
|
+
end
|
40
|
+
|
41
|
+
def adverb_confidence(str)
|
42
|
+
end
|
43
|
+
|
44
|
+
def linking_verb_confidence(str)
|
45
|
+
part_of_speech?(Tagmemics::WordSet::LINKING_VERBS, str) ? 1.0 : 0.0
|
46
|
+
end
|
47
|
+
|
48
|
+
def article_confidence(str)
|
49
|
+
part_of_speech?(Tagmemics::WordSet::ARTICLES, str) ? 1.0 : 0.0
|
50
|
+
end
|
51
|
+
|
52
|
+
def preposition_confidence(str)
|
53
|
+
part_of_speech?(Tagmemics::WordSet::PREPOSITIONS, str) ? 1.0 : 0.0
|
54
|
+
end
|
55
|
+
|
56
|
+
def pronoun_confidence(str)
|
57
|
+
part_of_speech?(Tagmemics::WordSet::PRONOUNS, str) ? 1.0 : 0.0
|
58
|
+
end
|
59
|
+
|
60
|
+
def conjunction_confidence(str)
|
61
|
+
part_of_speech?(Tagmemics::WordSet::CONJUNCTIONS, str) ? 1.0 : 0.0
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -1,10 +1,8 @@
|
|
1
1
|
require 'wordnet'
|
2
2
|
require 'facets'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
module Lexicon
|
7
|
-
module WordNet
|
4
|
+
module Tagmemics
|
5
|
+
module WordNetMethods
|
8
6
|
class << self
|
9
7
|
def lex
|
10
8
|
WordNet::Lexicon.new
|
@@ -26,17 +24,48 @@ module Lexicon
|
|
26
24
|
parts_of_speech_frequency(word).values.reduce(:+)
|
27
25
|
end
|
28
26
|
|
29
|
-
|
27
|
+
# returns hash of all possibilities for given dictionary word.
|
28
|
+
def possibilities(word)
|
30
29
|
hsh = parts_of_speech_frequency(word)
|
31
30
|
denom = total_possibilities(word)
|
32
31
|
|
33
32
|
hsh.each { |k, v| hsh[k] = v / denom.to_f }
|
34
33
|
end
|
35
34
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
35
|
+
# Most likely part of speech
|
36
|
+
def most_likely_pos(probability_hsh)
|
37
|
+
return unless probability_hsh.is_a? Hash
|
38
|
+
max = probability_hsh.values.max
|
39
|
+
probability_hsh.select { |_k, v| v == max }
|
40
|
+
end
|
41
|
+
|
42
|
+
####
|
43
|
+
#
|
44
|
+
# DELETE ME?
|
45
|
+
#
|
46
|
+
#
|
47
|
+
def most_likely_probability(hsh)
|
48
|
+
most_likely_pos(hsh).values.reduce(:+)
|
49
|
+
end
|
50
|
+
|
51
|
+
def combine_values(hsh)
|
52
|
+
hsh.values.reduce(:+)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Select highest probable part of speech and combine with any
|
56
|
+
# others with similiar name ie. Adjective and Adjective Satellite,
|
57
|
+
# they will be added together.
|
58
|
+
def wordnet_probability(word, part_of_speech)
|
59
|
+
hsh = possibilities(word)
|
60
|
+
eligibles = hsh.select { |k, _v| k.split.include? part_of_speech }
|
61
|
+
combine_values(eligibles) || 0.0 # return if probability is nil
|
62
|
+
end
|
63
|
+
|
64
|
+
# TODO: Not using this. Delete?
|
65
|
+
def decimal_complete(hsh)
|
66
|
+
total = hsh.length
|
67
|
+
complete = hsh.count { |_k, v| v } # not nil
|
68
|
+
complete / total.to_f
|
40
69
|
end
|
41
70
|
end
|
42
71
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tagmemics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Mason
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: facets
|
@@ -122,7 +122,13 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '2.7'
|
125
|
-
description:
|
125
|
+
description: "\n The English language is extremely complicated. We have words
|
126
|
+
that can have multiple\n parts of speech. Natural language processing is difficult
|
127
|
+
because it is hard to\n tell if a word is a noun when it could be a verb or an
|
128
|
+
adjective, etc.\n\n The purpose of this project is to develop an algorithm that,
|
129
|
+
given a sentence string,\n has a ranking system that detects the part of speech
|
130
|
+
of each word.\n\n Why is the useful? Because understanding the correct parts of
|
131
|
+
speech in a sentence\n is the first step to teaching a robot how to read.\n "
|
126
132
|
email: mace2345@gmail.com
|
127
133
|
executables: []
|
128
134
|
extensions: []
|
@@ -130,10 +136,19 @@ extra_rdoc_files: []
|
|
130
136
|
files:
|
131
137
|
- README.md
|
132
138
|
- Rakefile
|
139
|
+
- config/adjectives.txt
|
140
|
+
- config/adjectives.txt.bak
|
141
|
+
- config/articles.txt
|
142
|
+
- config/conjunctions.txt
|
143
|
+
- config/linking_verbs.txt
|
144
|
+
- config/prepositions.txt
|
145
|
+
- config/pronouns.txt
|
133
146
|
- lib/tagmemics.rb
|
134
|
-
- lib/tagmemics/
|
147
|
+
- lib/tagmemics/load_data.rb
|
135
148
|
- lib/tagmemics/sentence.rb
|
149
|
+
- lib/tagmemics/version.rb
|
136
150
|
- lib/tagmemics/word.rb
|
151
|
+
- lib/tagmemics/word/confidence.rb
|
137
152
|
- lib/tagmemics/word/wordnet.rb
|
138
153
|
homepage: http://github.com/m8ss/tagmemics
|
139
154
|
licenses:
|
@@ -150,13 +165,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
150
165
|
version: '0'
|
151
166
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
167
|
requirements:
|
153
|
-
- - "
|
168
|
+
- - ">="
|
154
169
|
- !ruby/object:Gem::Version
|
155
|
-
version:
|
170
|
+
version: '0'
|
156
171
|
requirements: []
|
157
172
|
rubyforge_project:
|
158
173
|
rubygems_version: 2.4.5
|
159
174
|
signing_key:
|
160
175
|
specification_version: 4
|
161
|
-
summary:
|
176
|
+
summary: Detect parts of speech from a sentence.
|
162
177
|
test_files: []
|