tagmemics 0.0.0.beta → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,65 @@
1
+ module Tagmemics
2
+ class Word
3
+ class << self
4
+ # FIXME: need to scan left, not hard coded index.
5
+ # Add up each category and derive percentage.
6
+ def noun_confidence(arr, index)
7
+ str = arr[index]
8
+ wordnet_prob = (WordNetMethods.possibilities(str)['noun'] / 1) * 6
9
+ left_neighbor_article = article_confidence(arr[index - 2]) * 2
10
+ subtotal = wordnet_prob + left_neighbor_article
11
+
12
+ subtotal / 10.0
13
+ end
14
+
15
+ def verb_confidence(str)
16
+ wordnet_prob = WordNetMethods.wordnet_probability(str, 'verb') * 6
17
+ subtotal = wordnet_prob
18
+
19
+ subtotal / 10.0
20
+ end
21
+
22
+ # UPDATE PARAMETERS
23
+ def adjective_confidence(str)
24
+ wordnet_prob = WordNetMethods.wordnet_probability(str, 'adjective') * 6
25
+ # lneighbor_adjective = 0 * 2
26
+ # rneighbor_verb = 0 * 2
27
+ subtotal = wordnet_prob
28
+
29
+ subtotal / 10.0
30
+ end
31
+
32
+ def part_of_speech?(constant, str, positive = false)
33
+ arr = []
34
+ constant.each do |word|
35
+ positive = true if word.downcase == str.downcase
36
+ break if positive
37
+ end
38
+ positive
39
+ end
40
+
41
+ def adverb_confidence(str)
42
+ end
43
+
44
+ def linking_verb_confidence(str)
45
+ part_of_speech?(Tagmemics::WordSet::LINKING_VERBS, str) ? 1.0 : 0.0
46
+ end
47
+
48
+ def article_confidence(str)
49
+ part_of_speech?(Tagmemics::WordSet::ARTICLES, str) ? 1.0 : 0.0
50
+ end
51
+
52
+ def preposition_confidence(str)
53
+ part_of_speech?(Tagmemics::WordSet::PREPOSITIONS, str) ? 1.0 : 0.0
54
+ end
55
+
56
+ def pronoun_confidence(str)
57
+ part_of_speech?(Tagmemics::WordSet::PRONOUNS, str) ? 1.0 : 0.0
58
+ end
59
+
60
+ def conjunction_confidence(str)
61
+ part_of_speech?(Tagmemics::WordSet::CONJUNCTIONS, str) ? 1.0 : 0.0
62
+ end
63
+ end
64
+ end
65
+ end
@@ -1,10 +1,8 @@
1
1
  require 'wordnet'
2
2
  require 'facets'
3
3
 
4
- puts "you're at the right place"
5
-
6
- module Lexicon
7
- module WordNet
4
+ module Tagmemics
5
+ module WordNetMethods
8
6
  class << self
9
7
  def lex
10
8
  WordNet::Lexicon.new
@@ -26,17 +24,48 @@ module Lexicon
26
24
  parts_of_speech_frequency(word).values.reduce(:+)
27
25
  end
28
26
 
29
- def orig_probability(word)
27
+ # returns hash of all possibilities for given dictionary word.
28
+ def possibilities(word)
30
29
  hsh = parts_of_speech_frequency(word)
31
30
  denom = total_possibilities(word)
32
31
 
33
32
  hsh.each { |k, v| hsh[k] = v / denom.to_f }
34
33
  end
35
34
 
36
- def most_likely(word)
37
- hsh = probability(word)
38
- max = hsh.values.max
39
- hsh.select { |_k, v| v == max }
35
+ # Most likely part of speech
36
+ def most_likely_pos(probability_hsh)
37
+ return unless probability_hsh.is_a? Hash
38
+ max = probability_hsh.values.max
39
+ probability_hsh.select { |_k, v| v == max }
40
+ end
41
+
42
+ ####
43
+ #
44
+ # DELETE ME?
45
+ #
46
+ #
47
+ def most_likely_probability(hsh)
48
+ most_likely_pos(hsh).values.reduce(:+)
49
+ end
50
+
51
+ def combine_values(hsh)
52
+ hsh.values.reduce(:+)
53
+ end
54
+
55
+ # Select highest probable part of speech and combine with any
56
+ # others with similiar name ie. Adjective and Adjective Satellite,
57
+ # they will be added together.
58
+ def wordnet_probability(word, part_of_speech)
59
+ hsh = possibilities(word)
60
+ eligibles = hsh.select { |k, _v| k.split.include? part_of_speech }
61
+ combine_values(eligibles) || 0.0 # return if probability is nil
62
+ end
63
+
64
+ # TODO: Not using this. Delete?
65
+ def decimal_complete(hsh)
66
+ total = hsh.length
67
+ complete = hsh.count { |_k, v| v } # not nil
68
+ complete / total.to_f
40
69
  end
41
70
  end
42
71
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tagmemics
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0.beta
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Mason
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-26 00:00:00.000000000 Z
11
+ date: 2015-10-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: facets
@@ -122,7 +122,13 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '2.7'
125
- description: One day this will be great. Until then, it will be a project.
125
+ description: "\n The English language is extremely complicated. We have words
126
+ that can have multiple\n parts of speech. Natural language processing is difficult
127
+ because it is hard to\n tell if a word is a noun when it could be a verb or an
128
+ adjective, etc.\n\n The purpose of this project is to develop an algorithm that,
129
+ given a sentence string,\n has a ranking system that detects the part of speech
130
+ of each word.\n\n Why is the useful? Because understanding the correct parts of
131
+ speech in a sentence\n is the first step to teaching a robot how to read.\n "
126
132
  email: mace2345@gmail.com
127
133
  executables: []
128
134
  extensions: []
@@ -130,10 +136,19 @@ extra_rdoc_files: []
130
136
  files:
131
137
  - README.md
132
138
  - Rakefile
139
+ - config/adjectives.txt
140
+ - config/adjectives.txt.bak
141
+ - config/articles.txt
142
+ - config/conjunctions.txt
143
+ - config/linking_verbs.txt
144
+ - config/prepositions.txt
145
+ - config/pronouns.txt
133
146
  - lib/tagmemics.rb
134
- - lib/tagmemics/config.rb
147
+ - lib/tagmemics/load_data.rb
135
148
  - lib/tagmemics/sentence.rb
149
+ - lib/tagmemics/version.rb
136
150
  - lib/tagmemics/word.rb
151
+ - lib/tagmemics/word/confidence.rb
137
152
  - lib/tagmemics/word/wordnet.rb
138
153
  homepage: http://github.com/m8ss/tagmemics
139
154
  licenses:
@@ -150,13 +165,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
150
165
  version: '0'
151
166
  required_rubygems_version: !ruby/object:Gem::Requirement
152
167
  requirements:
153
- - - ">"
168
+ - - ">="
154
169
  - !ruby/object:Gem::Version
155
- version: 1.3.1
170
+ version: '0'
156
171
  requirements: []
157
172
  rubyforge_project:
158
173
  rubygems_version: 2.4.5
159
174
  signing_key:
160
175
  specification_version: 4
161
- summary: A more organized way of accessing a language.
176
+ summary: Detect parts of speech from a sentence.
162
177
  test_files: []