tagmemics 0.0.0.beta → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ a
2
+ an
3
+ the
@@ -0,0 +1,7 @@
1
+ for
2
+ and
3
+ nor
4
+ but
5
+ or
6
+ yet
7
+ so
@@ -0,0 +1,28 @@
1
+ Be
2
+ Am
3
+ Is
4
+ Are
5
+ Was
6
+ Were
7
+ Has become
8
+ Could have come
9
+ Shall be
10
+ Shall have been
11
+ Have appeared
12
+ Should have appeared
13
+ Will be
14
+ Will have been
15
+ Had seemed
16
+ Should have been
17
+ Has been
18
+ Have been
19
+ Had been
20
+ Can be
21
+ May be
22
+ Might be
23
+ Should be
24
+ Could be
25
+ Become
26
+ Would be
27
+ Appear
28
+ Seem
@@ -0,0 +1,202 @@
1
+ 'gainst
2
+ 'mongst
3
+ 'neath
4
+ 'twixt
5
+ abaft
6
+ abeam
7
+ aboard
8
+ about
9
+ above
10
+ absent
11
+ according to
12
+ across
13
+ afore
14
+ after
15
+ against
16
+ ago
17
+ ahead of
18
+ along
19
+ alongside
20
+ amid
21
+ amidst
22
+ among
23
+ amongst
24
+ anenst
25
+ anent
26
+ anti
27
+ apart
28
+ apart from
29
+ apropos
30
+ apud
31
+ around
32
+ as
33
+ as far as
34
+ as for
35
+ as of
36
+ as opposed to
37
+ as per
38
+ as regards
39
+ as soon as
40
+ as well as
41
+ aside
42
+ aside from
43
+ astern of
44
+ astride
45
+ at
46
+ at the behest of
47
+ athwart
48
+ atop
49
+ away
50
+ ayond
51
+ ayont
52
+ back to
53
+ barring
54
+ because of
55
+ before
56
+ behind
57
+ behither
58
+ below
59
+ beneath
60
+ beside
61
+ besides
62
+ between
63
+ betwixen
64
+ betwixt
65
+ beyond
66
+ biforn
67
+ but
68
+ by
69
+ by means of
70
+ by virtue of
71
+ chez
72
+ circa
73
+ close to
74
+ concerning
75
+ considering
76
+ contra
77
+ cum
78
+ despite
79
+ down
80
+ due to
81
+ during
82
+ ere
83
+ except
84
+ except for
85
+ excluding
86
+ failing
87
+ far from
88
+ following
89
+ for
90
+ for the sake of
91
+ forby
92
+ forenenst
93
+ fornenst
94
+ fornent
95
+ from
96
+ fromward
97
+ froward
98
+ frowards
99
+ gainst
100
+ given
101
+ hence
102
+ in
103
+ in accordance with
104
+ in addition to
105
+ in case of
106
+ in front of
107
+ in lieu of
108
+ in order to
109
+ in place of
110
+ in point of
111
+ in re
112
+ in spite of
113
+ in to
114
+ including
115
+ inside
116
+ inside of
117
+ inside out
118
+ instead of
119
+ into
120
+ left of
121
+ like
122
+ mid
123
+ midst
124
+ minus
125
+ modulo
126
+ near
127
+ near to
128
+ neath
129
+ next
130
+ next to
131
+ notwithstanding
132
+ o'
133
+ of
134
+ off
135
+ on
136
+ on account of
137
+ on behalf of
138
+ on to
139
+ on top of
140
+ onto
141
+ opposite
142
+ opposite of
143
+ opposite to
144
+ out
145
+ out from
146
+ out of
147
+ outside
148
+ outside of
149
+ outwith
150
+ over
151
+ overthwart
152
+ owing to
153
+ pace
154
+ past
155
+ per
156
+ plus
157
+ prior to
158
+ pro
159
+ pursuant to
160
+ qua
161
+ rather than
162
+ re
163
+ regarding
164
+ regardless of
165
+ right of
166
+ round
167
+ sans
168
+ save
169
+ since
170
+ subsequent to
171
+ such as
172
+ than
173
+ thanks to
174
+ that of
175
+ through
176
+ throughout
177
+ till
178
+ times
179
+ to
180
+ tofore
181
+ toforn
182
+ toward
183
+ towards
184
+ under
185
+ underneath
186
+ unlike
187
+ until
188
+ unto
189
+ up
190
+ up to
191
+ upon
192
+ versus
193
+ via
194
+ vice
195
+ with
196
+ with a view to
197
+ with regard to
198
+ with respect to
199
+ withal
200
+ within
201
+ without
202
+ worth
@@ -0,0 +1,53 @@
1
+ I
2
+ he
3
+ her
4
+ hers
5
+ herself
6
+ him
7
+ himself
8
+ his
9
+ hisself
10
+ it
11
+ its
12
+ itself
13
+ me
14
+ mine
15
+ my
16
+ myself
17
+ one
18
+ one's
19
+ oneself
20
+ our
21
+ ours
22
+ ourself
23
+ ourselves
24
+ she
25
+ thee
26
+ their
27
+ theirs
28
+ theirself
29
+ theirselves
30
+ them
31
+ themself
32
+ themselves
33
+ they
34
+ thine
35
+ thou
36
+ thy
37
+ thyself
38
+ us
39
+ we
40
+ who
41
+ whom
42
+ whomself
43
+ whose
44
+ whoself
45
+ y'all
46
+ ye
47
+ you
48
+ you all
49
+ your
50
+ yours
51
+ yourself
52
+ yourselves
53
+ youse
@@ -1,35 +1,52 @@
1
1
  require_relative './tagmemics/word'
2
2
  require_relative './tagmemics/sentence'
3
+ require_relative './tagmemics/load_data'
3
4
 
4
-
5
- module Lexicon
5
+ module Tagmemics
6
6
  def self.parse(str)
7
- ParsedSentence.new(str)
7
+ WordSet.new(str)
8
8
  end
9
- end
10
9
 
11
- # The output of Lexicon.parse
12
- class ParsedSentence
13
- attr_accessor :nouns, :verbs, :articles, :adjectives, :adverbs,
14
- :prepositions, :conjunctions, :pronouns
10
+ # The output of Tagmemics.parse
11
+ class WordSet
12
+ ARTICLES = %w(the an a)
13
+ CONJUNCTIONS = %w(for and nor but or yet so )
14
+ LINKING_VERBS = LoadData.contents_to_a('linking_verbs')
15
+ PRONOUNS = LoadData.contents_to_a('pronouns')
16
+ PREPOSITIONS = LoadData.contents_to_a('prepositions')
15
17
 
16
- def initialize(str)
17
- @str = str
18
- end
18
+ attr_accessor :nouns, :verbs, :articles, :adjectives, :adverbs,
19
+ :prepositions, :conjunctions, :pronouns, :collection
19
20
 
20
- def sentence_to_array(sentence)
21
- sentence.split(/\W/)
22
- end
21
+ def initialize(str)
22
+ @collection = []
23
+ arr = WordSet.sentence_to_array(str)
24
+ arr.each { |word| @collection << Word.new(word) }
25
+ # @set = WordSet.start_hash(WordSet.sentence_to_array(str))
26
+ end
27
+
28
+ class << self
29
+ include LoadData
30
+
31
+ # Will probably want to use punctuation in the future.
32
+ # For now, this removes it.
33
+ def sentence_to_array(sentence)
34
+ sentence.split(/\s+|\W+\z/)
35
+ end
23
36
 
24
- def start_hash(arr)
25
- arr.map do |word|
26
- result =
27
- case
28
- when part_of_speech(ARTICLES, word).any? then :article
29
- when part_of_speech(CONJUNCTIONS, word).any? then :conjunction
30
- when part_of_speech(PRONOUNS, word).any? then :pronoun
31
- end
32
- [word, result]
33
- end.to_h
37
+ # Moved part of speech. This will not work right now.
38
+ # Probably need to delete this.
39
+ def start_hash(arr)
40
+ arr.map do |word|
41
+ result =
42
+ case
43
+ when part_of_speech(ARTICLES, word).any? then :article
44
+ when part_of_speech(CONJUNCTIONS, word).any? then :conjunction
45
+ when part_of_speech(PRONOUNS, word).any? then :pronoun
46
+ end
47
+ [word, result]
48
+ end.to_h
49
+ end
50
+ end
34
51
  end
35
52
  end
@@ -1,5 +1,5 @@
1
1
  # Retrieves data from config folder to save to constants.
2
- module Config
2
+ module LoadData
3
3
  def self.config_path
4
4
  File.join(File.dirname(__FILE__), '../../config')
5
5
  end
@@ -24,7 +24,7 @@ module Config
24
24
  page = agent.get(uri)
25
25
  destination = "./config/#{part_of_speech}.txt"
26
26
  target = page.search(css_selector)
27
- regx = /[^'a-zA-Z\s]/ # anything beside letters, apostrophe or space
27
+ regx = /[^\047a-zA-Z\s]/ # \047 is an apostrophe
28
28
 
29
29
  arr = []
30
30
  target.each do |x|
@@ -0,0 +1,3 @@
1
+ module Tagmemics
2
+ VERSION = '0.0.2'
3
+ end
@@ -1,69 +1,59 @@
1
1
  require 'wordnet'
2
2
  require 'facets'
3
- require_relative './config'
3
+ require_relative './load_data'
4
4
  require_relative './word/wordnet'
5
+ require_relative './word/confidence'
5
6
 
6
- module Lexicon
7
+ module Tagmemics
7
8
  class Word
8
- include Config
9
-
10
- ARTICLES = %w(the an a)
11
- CONJUNCTIONS = %w(for and nor but or yet so )
12
- PRONOUNS = Config.contents_to_a('pronouns')
13
-
14
-
15
- def part_of_speech(constant, str)
16
- arr = []
17
- constant.each do |word|
18
- regx = /\b#{word}\b/i
19
- arr << word if regx =~ str # word phrase matches
20
- end
21
- arr
22
- end
23
-
24
- def decimal_complete(hsh)
25
- total = hsh.length
26
- complete = hsh.count { |_k, v| v } # not nil
27
- complete / total.to_f
28
- end
29
-
30
9
  def initialize(word)
31
- @word = word
32
- @confidence_levels = confidence_levels(word)
33
- end
34
-
35
- def confidence_levels(word)
36
- {
37
- :noun => noun_confidence(word),
38
- :verb => verb_confidence(word),
39
- :adjective => adjective_confidence(word),
40
- :adverb => adverb_confidence(word),
41
- :article => article_confidence(word),
42
- :preposition => preposition_confidence(word),
43
- :conjunction => conjunction_confidence(word)
44
- }
45
- end
46
-
47
- def noun_confidence(str)
48
- (WordNet.orig_probability(str) / 1) * 3
49
- end
50
-
51
- def verb_confidence(str)
52
- end
53
-
54
- def adjective_confidence(str)
55
- end
10
+ @str = word
11
+ puts "examining: #{word}"
12
+ @tagmemic_confidence = Word.confidence_levels(word)
13
+ end
14
+
15
+ class << self
16
+ ##
17
+ # Because WordNet only tracks verbs, nouns, adverbs and adjectives,
18
+ # confidence levels can only be updated for those values. The other words
19
+ # such as pronouns, prepositions, and conjunctions are based off of list
20
+ # in config folder. Their score is pass or fail and is
21
+ # calculated as 0 or 1.0.
22
+ def confidence_levels(word)
23
+ word = word.downcase
24
+ known_hsh = determine_known_words(word)
25
+ wordnet_hsh = determine_wordnet_words(word)
26
+
27
+ hsh = everything_nil(known_hsh) ? wordnet_hsh : known_hsh
28
+ delete_nogos(hsh)
29
+ end
56
30
 
57
- def adverb_confidence(str)
58
- end
31
+ def determine_known_words(word)
32
+ {
33
+ :article => article_confidence(word),
34
+ :preposition => preposition_confidence(word),
35
+ :pronoun => pronoun_confidence(word),
36
+ :conjunction => conjunction_confidence(word),
37
+ :linking_verb => linking_verb_confidence(word)
38
+ }
39
+ end
59
40
 
60
- def article_confidence(str)
61
- end
41
+ def everything_nil(hsh)
42
+ (hsh.select { |_k, v| v != 0.0 && !v.nil? }.empty?)
43
+ end
62
44
 
63
- def preposition_confidence(str)
64
- end
45
+ def delete_nogos(hsh)
46
+ hsh.delete_if { |_k, v| v == 0.0 || v.nil? }
47
+ end
65
48
 
66
- def conjunction_confidence(str)
49
+ def determine_wordnet_words(word)
50
+ {
51
+ :noun => WordNetMethods.wordnet_probability(word, 'noun'),
52
+ :verb => WordNetMethods.wordnet_probability(word, 'verb'),
53
+ :adjective => WordNetMethods.wordnet_probability(word, 'adjective'),
54
+ :adverb => WordNetMethods.wordnet_probability(word, 'adverb')
55
+ }
56
+ end
67
57
  end
68
58
  end
69
59
  end