tagmemics 0.0.0.beta → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,65 @@
1
+ module Tagmemics
2
+ class Word
3
+ class << self
4
+ # FIXME: need to scan left, not hard coded index.
5
+ # Add up each category and derive percentage.
6
+ def noun_confidence(arr, index)
7
+ str = arr[index]
8
+ wordnet_prob = (WordNetMethods.possibilities(str)['noun'] / 1) * 6
9
+ left_neighbor_article = article_confidence(arr[index - 2]) * 2
10
+ subtotal = wordnet_prob + left_neighbor_article
11
+
12
+ subtotal / 10.0
13
+ end
14
+
15
+ def verb_confidence(str)
16
+ wordnet_prob = WordNetMethods.wordnet_probability(str, 'verb') * 6
17
+ subtotal = wordnet_prob
18
+
19
+ subtotal / 10.0
20
+ end
21
+
22
+ # UPDATE PARAMETERS
23
+ def adjective_confidence(str)
24
+ wordnet_prob = WordNetMethods.wordnet_probability(str, 'adjective') * 6
25
+ # lneighbor_adjective = 0 * 2
26
+ # rneighbor_verb = 0 * 2
27
+ subtotal = wordnet_prob
28
+
29
+ subtotal / 10.0
30
+ end
31
+
32
+ def part_of_speech?(constant, str, positive = false)
33
+ arr = []
34
+ constant.each do |word|
35
+ positive = true if word.downcase == str.downcase
36
+ break if positive
37
+ end
38
+ positive
39
+ end
40
+
41
+ def adverb_confidence(str)
42
+ end
43
+
44
+ def linking_verb_confidence(str)
45
+ part_of_speech?(Tagmemics::WordSet::LINKING_VERBS, str) ? 1.0 : 0.0
46
+ end
47
+
48
+ def article_confidence(str)
49
+ part_of_speech?(Tagmemics::WordSet::ARTICLES, str) ? 1.0 : 0.0
50
+ end
51
+
52
+ def preposition_confidence(str)
53
+ part_of_speech?(Tagmemics::WordSet::PREPOSITIONS, str) ? 1.0 : 0.0
54
+ end
55
+
56
+ def pronoun_confidence(str)
57
+ part_of_speech?(Tagmemics::WordSet::PRONOUNS, str) ? 1.0 : 0.0
58
+ end
59
+
60
+ def conjunction_confidence(str)
61
+ part_of_speech?(Tagmemics::WordSet::CONJUNCTIONS, str) ? 1.0 : 0.0
62
+ end
63
+ end
64
+ end
65
+ end
@@ -1,10 +1,8 @@
1
1
  require 'wordnet'
2
2
  require 'facets'
3
3
 
4
- puts "you're at the right place"
5
-
6
- module Lexicon
7
- module WordNet
4
+ module Tagmemics
5
+ module WordNetMethods
8
6
  class << self
9
7
  def lex
10
8
  WordNet::Lexicon.new
@@ -26,17 +24,48 @@ module Lexicon
26
24
  parts_of_speech_frequency(word).values.reduce(:+)
27
25
  end
28
26
 
29
- def orig_probability(word)
27
+ # returns hash of all possibilities for given dictionary word.
28
+ def possibilities(word)
30
29
  hsh = parts_of_speech_frequency(word)
31
30
  denom = total_possibilities(word)
32
31
 
33
32
  hsh.each { |k, v| hsh[k] = v / denom.to_f }
34
33
  end
35
34
 
36
- def most_likely(word)
37
- hsh = probability(word)
38
- max = hsh.values.max
39
- hsh.select { |_k, v| v == max }
35
+ # Most likely part of speech
36
+ def most_likely_pos(probability_hsh)
37
+ return unless probability_hsh.is_a? Hash
38
+ max = probability_hsh.values.max
39
+ probability_hsh.select { |_k, v| v == max }
40
+ end
41
+
42
+ ####
43
+ #
44
+ # DELETE ME?
45
+ #
46
+ #
47
+ def most_likely_probability(hsh)
48
+ most_likely_pos(hsh).values.reduce(:+)
49
+ end
50
+
51
+ def combine_values(hsh)
52
+ hsh.values.reduce(:+)
53
+ end
54
+
55
+ # Select highest probable part of speech and combine with any
56
+ # others with similiar name ie. Adjective and Adjective Satellite,
57
+ # they will be added together.
58
+ def wordnet_probability(word, part_of_speech)
59
+ hsh = possibilities(word)
60
+ eligibles = hsh.select { |k, _v| k.split.include? part_of_speech }
61
+ combine_values(eligibles) || 0.0 # return if probability is nil
62
+ end
63
+
64
+ # TODO: Not using this. Delete?
65
+ def decimal_complete(hsh)
66
+ total = hsh.length
67
+ complete = hsh.count { |_k, v| v } # not nil
68
+ complete / total.to_f
40
69
  end
41
70
  end
42
71
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tagmemics
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0.beta
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Mason
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-26 00:00:00.000000000 Z
11
+ date: 2015-10-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: facets
@@ -122,7 +122,13 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '2.7'
125
- description: One day this will be great. Until then, it will be a project.
125
+ description: "\n The English language is extremely complicated. We have words
126
+ that can have multiple\n parts of speech. Natural language processing is difficult
127
+ because it is hard to\n tell if a word is a noun when it could be a verb or an
128
+ adjective, etc.\n\n The purpose of this project is to develop an algorithm that,
129
+ given a sentence string,\n has a ranking system that detects the part of speech
130
+ of each word.\n\n Why is the useful? Because understanding the correct parts of
131
+ speech in a sentence\n is the first step to teaching a robot how to read.\n "
126
132
  email: mace2345@gmail.com
127
133
  executables: []
128
134
  extensions: []
@@ -130,10 +136,19 @@ extra_rdoc_files: []
130
136
  files:
131
137
  - README.md
132
138
  - Rakefile
139
+ - config/adjectives.txt
140
+ - config/adjectives.txt.bak
141
+ - config/articles.txt
142
+ - config/conjunctions.txt
143
+ - config/linking_verbs.txt
144
+ - config/prepositions.txt
145
+ - config/pronouns.txt
133
146
  - lib/tagmemics.rb
134
- - lib/tagmemics/config.rb
147
+ - lib/tagmemics/load_data.rb
135
148
  - lib/tagmemics/sentence.rb
149
+ - lib/tagmemics/version.rb
136
150
  - lib/tagmemics/word.rb
151
+ - lib/tagmemics/word/confidence.rb
137
152
  - lib/tagmemics/word/wordnet.rb
138
153
  homepage: http://github.com/m8ss/tagmemics
139
154
  licenses:
@@ -150,13 +165,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
150
165
  version: '0'
151
166
  required_rubygems_version: !ruby/object:Gem::Requirement
152
167
  requirements:
153
- - - ">"
168
+ - - ">="
154
169
  - !ruby/object:Gem::Version
155
- version: 1.3.1
170
+ version: '0'
156
171
  requirements: []
157
172
  rubyforge_project:
158
173
  rubygems_version: 2.4.5
159
174
  signing_key:
160
175
  specification_version: 4
161
- summary: A more organized way of accessing a language.
176
+ summary: Detect parts of speech from a sentence.
162
177
  test_files: []