tagmemics 0.0.0.beta → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,3 @@
1
+ a
2
+ an
3
+ the
@@ -0,0 +1,7 @@
1
+ for
2
+ and
3
+ nor
4
+ but
5
+ or
6
+ yet
7
+ so
@@ -0,0 +1,28 @@
1
+ Be
2
+ Am
3
+ Is
4
+ Are
5
+ Was
6
+ Were
7
+ Has become
8
+ Could have come
9
+ Shall be
10
+ Shall have been
11
+ Have appeared
12
+ Should have appeared
13
+ Will be
14
+ Will have been
15
+ Had seemed
16
+ Should have been
17
+ Has been
18
+ Have been
19
+ Had been
20
+ Can be
21
+ May be
22
+ Might be
23
+ Should be
24
+ Could be
25
+ Become
26
+ Would be
27
+ Appear
28
+ Seem
@@ -0,0 +1,202 @@
1
+ 'gainst
2
+ 'mongst
3
+ 'neath
4
+ 'twixt
5
+ abaft
6
+ abeam
7
+ aboard
8
+ about
9
+ above
10
+ absent
11
+ according to
12
+ across
13
+ afore
14
+ after
15
+ against
16
+ ago
17
+ ahead of
18
+ along
19
+ alongside
20
+ amid
21
+ amidst
22
+ among
23
+ amongst
24
+ anenst
25
+ anent
26
+ anti
27
+ apart
28
+ apart from
29
+ apropos
30
+ apud
31
+ around
32
+ as
33
+ as far as
34
+ as for
35
+ as of
36
+ as opposed to
37
+ as per
38
+ as regards
39
+ as soon as
40
+ as well as
41
+ aside
42
+ aside from
43
+ astern of
44
+ astride
45
+ at
46
+ at the behest of
47
+ athwart
48
+ atop
49
+ away
50
+ ayond
51
+ ayont
52
+ back to
53
+ barring
54
+ because of
55
+ before
56
+ behind
57
+ behither
58
+ below
59
+ beneath
60
+ beside
61
+ besides
62
+ between
63
+ betwixen
64
+ betwixt
65
+ beyond
66
+ biforn
67
+ but
68
+ by
69
+ by means of
70
+ by virtue of
71
+ chez
72
+ circa
73
+ close to
74
+ concerning
75
+ considering
76
+ contra
77
+ cum
78
+ despite
79
+ down
80
+ due to
81
+ during
82
+ ere
83
+ except
84
+ except for
85
+ excluding
86
+ failing
87
+ far from
88
+ following
89
+ for
90
+ for the sake of
91
+ forby
92
+ forenenst
93
+ fornenst
94
+ fornent
95
+ from
96
+ fromward
97
+ froward
98
+ frowards
99
+ gainst
100
+ given
101
+ hence
102
+ in
103
+ in accordance with
104
+ in addition to
105
+ in case of
106
+ in front of
107
+ in lieu of
108
+ in order to
109
+ in place of
110
+ in point of
111
+ in re
112
+ in spite of
113
+ in to
114
+ including
115
+ inside
116
+ inside of
117
+ inside out
118
+ instead of
119
+ into
120
+ left of
121
+ like
122
+ mid
123
+ midst
124
+ minus
125
+ modulo
126
+ near
127
+ near to
128
+ neath
129
+ next
130
+ next to
131
+ notwithstanding
132
+ o'
133
+ of
134
+ off
135
+ on
136
+ on account of
137
+ on behalf of
138
+ on to
139
+ on top of
140
+ onto
141
+ opposite
142
+ opposite of
143
+ opposite to
144
+ out
145
+ out from
146
+ out of
147
+ outside
148
+ outside of
149
+ outwith
150
+ over
151
+ overthwart
152
+ owing to
153
+ pace
154
+ past
155
+ per
156
+ plus
157
+ prior to
158
+ pro
159
+ pursuant to
160
+ qua
161
+ rather than
162
+ re
163
+ regarding
164
+ regardless of
165
+ right of
166
+ round
167
+ sans
168
+ save
169
+ since
170
+ subsequent to
171
+ such as
172
+ than
173
+ thanks to
174
+ that of
175
+ through
176
+ throughout
177
+ till
178
+ times
179
+ to
180
+ tofore
181
+ toforn
182
+ toward
183
+ towards
184
+ under
185
+ underneath
186
+ unlike
187
+ until
188
+ unto
189
+ up
190
+ up to
191
+ upon
192
+ versus
193
+ via
194
+ vice
195
+ with
196
+ with a view to
197
+ with regard to
198
+ with respect to
199
+ withal
200
+ within
201
+ without
202
+ worth
@@ -0,0 +1,53 @@
1
+ I
2
+ he
3
+ her
4
+ hers
5
+ herself
6
+ him
7
+ himself
8
+ his
9
+ hisself
10
+ it
11
+ its
12
+ itself
13
+ me
14
+ mine
15
+ my
16
+ myself
17
+ one
18
+ one's
19
+ oneself
20
+ our
21
+ ours
22
+ ourself
23
+ ourselves
24
+ she
25
+ thee
26
+ their
27
+ theirs
28
+ theirself
29
+ theirselves
30
+ them
31
+ themself
32
+ themselves
33
+ they
34
+ thine
35
+ thou
36
+ thy
37
+ thyself
38
+ us
39
+ we
40
+ who
41
+ whom
42
+ whomself
43
+ whose
44
+ whoself
45
+ y'all
46
+ ye
47
+ you
48
+ you all
49
+ your
50
+ yours
51
+ yourself
52
+ yourselves
53
+ youse
@@ -1,35 +1,52 @@
1
1
  require_relative './tagmemics/word'
2
2
  require_relative './tagmemics/sentence'
3
+ require_relative './tagmemics/load_data'
3
4
 
4
-
5
- module Lexicon
5
+ module Tagmemics
6
6
  def self.parse(str)
7
- ParsedSentence.new(str)
7
+ WordSet.new(str)
8
8
  end
9
- end
10
9
 
11
- # The output of Lexicon.parse
12
- class ParsedSentence
13
- attr_accessor :nouns, :verbs, :articles, :adjectives, :adverbs,
14
- :prepositions, :conjunctions, :pronouns
10
+ # The output of Tagmemics.parse
11
+ class WordSet
12
+ ARTICLES = %w(the an a)
13
+ CONJUNCTIONS = %w(for and nor but or yet so )
14
+ LINKING_VERBS = LoadData.contents_to_a('linking_verbs')
15
+ PRONOUNS = LoadData.contents_to_a('pronouns')
16
+ PREPOSITIONS = LoadData.contents_to_a('prepositions')
15
17
 
16
- def initialize(str)
17
- @str = str
18
- end
18
+ attr_accessor :nouns, :verbs, :articles, :adjectives, :adverbs,
19
+ :prepositions, :conjunctions, :pronouns, :collection
19
20
 
20
- def sentence_to_array(sentence)
21
- sentence.split(/\W/)
22
- end
21
+ def initialize(str)
22
+ @collection = []
23
+ arr = WordSet.sentence_to_array(str)
24
+ arr.each { |word| @collection << Word.new(word) }
25
+ # @set = WordSet.start_hash(WordSet.sentence_to_array(str))
26
+ end
27
+
28
+ class << self
29
+ include LoadData
30
+
31
+ # Will probably want to use punctuation in the future.
32
+ # For now, this removes it.
33
+ def sentence_to_array(sentence)
34
+ sentence.split(/\s+|\W+\z/)
35
+ end
23
36
 
24
- def start_hash(arr)
25
- arr.map do |word|
26
- result =
27
- case
28
- when part_of_speech(ARTICLES, word).any? then :article
29
- when part_of_speech(CONJUNCTIONS, word).any? then :conjunction
30
- when part_of_speech(PRONOUNS, word).any? then :pronoun
31
- end
32
- [word, result]
33
- end.to_h
37
+ # Moved part of speech. This will not work right now.
38
+ # Probably need to delete this.
39
+ def start_hash(arr)
40
+ arr.map do |word|
41
+ result =
42
+ case
43
+ when part_of_speech(ARTICLES, word).any? then :article
44
+ when part_of_speech(CONJUNCTIONS, word).any? then :conjunction
45
+ when part_of_speech(PRONOUNS, word).any? then :pronoun
46
+ end
47
+ [word, result]
48
+ end.to_h
49
+ end
50
+ end
34
51
  end
35
52
  end
@@ -1,5 +1,5 @@
1
1
  # Retrieves data from config folder to save to constants.
2
- module Config
2
+ module LoadData
3
3
  def self.config_path
4
4
  File.join(File.dirname(__FILE__), '../../config')
5
5
  end
@@ -24,7 +24,7 @@ module Config
24
24
  page = agent.get(uri)
25
25
  destination = "./config/#{part_of_speech}.txt"
26
26
  target = page.search(css_selector)
27
- regx = /[^'a-zA-Z\s]/ # anything beside letters, apostrophe or space
27
+ regx = /[^\047a-zA-Z\s]/ # \047 is an apostrophe
28
28
 
29
29
  arr = []
30
30
  target.each do |x|
@@ -0,0 +1,3 @@
1
+ module Tagmemics
2
+ VERSION = '0.0.2'
3
+ end
@@ -1,69 +1,59 @@
1
1
  require 'wordnet'
2
2
  require 'facets'
3
- require_relative './config'
3
+ require_relative './load_data'
4
4
  require_relative './word/wordnet'
5
+ require_relative './word/confidence'
5
6
 
6
- module Lexicon
7
+ module Tagmemics
7
8
  class Word
8
- include Config
9
-
10
- ARTICLES = %w(the an a)
11
- CONJUNCTIONS = %w(for and nor but or yet so )
12
- PRONOUNS = Config.contents_to_a('pronouns')
13
-
14
-
15
- def part_of_speech(constant, str)
16
- arr = []
17
- constant.each do |word|
18
- regx = /\b#{word}\b/i
19
- arr << word if regx =~ str # word phrase matches
20
- end
21
- arr
22
- end
23
-
24
- def decimal_complete(hsh)
25
- total = hsh.length
26
- complete = hsh.count { |_k, v| v } # not nil
27
- complete / total.to_f
28
- end
29
-
30
9
  def initialize(word)
31
- @word = word
32
- @confidence_levels = confidence_levels(word)
33
- end
34
-
35
- def confidence_levels(word)
36
- {
37
- :noun => noun_confidence(word),
38
- :verb => verb_confidence(word),
39
- :adjective => adjective_confidence(word),
40
- :adverb => adverb_confidence(word),
41
- :article => article_confidence(word),
42
- :preposition => preposition_confidence(word),
43
- :conjunction => conjunction_confidence(word)
44
- }
45
- end
46
-
47
- def noun_confidence(str)
48
- (WordNet.orig_probability(str) / 1) * 3
49
- end
50
-
51
- def verb_confidence(str)
52
- end
53
-
54
- def adjective_confidence(str)
55
- end
10
+ @str = word
11
+ puts "examining: #{word}"
12
+ @tagmemic_confidence = Word.confidence_levels(word)
13
+ end
14
+
15
+ class << self
16
+ ##
17
+ # Because WordNet only tracks verbs, nouns, adverbs and adjectives,
18
+ # confidence levels can only be updated for those values. The other words
19
+ # such as pronouns, prepositions, and conjunctions are based off of list
20
+ # in config folder. Their score is pass or fail and is
21
+ # calculated as 0 or 1.0.
22
+ def confidence_levels(word)
23
+ word = word.downcase
24
+ known_hsh = determine_known_words(word)
25
+ wordnet_hsh = determine_wordnet_words(word)
26
+
27
+ hsh = everything_nil(known_hsh) ? wordnet_hsh : known_hsh
28
+ delete_nogos(hsh)
29
+ end
56
30
 
57
- def adverb_confidence(str)
58
- end
31
+ def determine_known_words(word)
32
+ {
33
+ :article => article_confidence(word),
34
+ :preposition => preposition_confidence(word),
35
+ :pronoun => pronoun_confidence(word),
36
+ :conjunction => conjunction_confidence(word),
37
+ :linking_verb => linking_verb_confidence(word)
38
+ }
39
+ end
59
40
 
60
- def article_confidence(str)
61
- end
41
+ def everything_nil(hsh)
42
+ (hsh.select { |_k, v| v != 0.0 && !v.nil? }.empty?)
43
+ end
62
44
 
63
- def preposition_confidence(str)
64
- end
45
+ def delete_nogos(hsh)
46
+ hsh.delete_if { |_k, v| v == 0.0 || v.nil? }
47
+ end
65
48
 
66
- def conjunction_confidence(str)
49
+ def determine_wordnet_words(word)
50
+ {
51
+ :noun => WordNetMethods.wordnet_probability(word, 'noun'),
52
+ :verb => WordNetMethods.wordnet_probability(word, 'verb'),
53
+ :adjective => WordNetMethods.wordnet_probability(word, 'adjective'),
54
+ :adverb => WordNetMethods.wordnet_probability(word, 'adverb')
55
+ }
56
+ end
67
57
  end
68
58
  end
69
59
  end