commonsense 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/README.md +25 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/commonsense.gemspec +23 -0
- data/exe/commonsense +48 -0
- data/lib/commonsense.rb +79 -0
- data/lib/commonsense/basic_english.rb +880 -0
- data/lib/commonsense/version.rb +3 -0
- metadata +98 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 42ba1b7316fd76ddcb89b7308a2a860401907765
|
4
|
+
data.tar.gz: f1461e7070eed57ae7f71ca9777440690a39dd6a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f9b92276ef9abcaded3b683c4a363a483fb7e8ef7ada62b97d1b2c3da818c030c6036db7165a35c98b0ff821028189f742cf220a22239541aeb487048e624466
|
7
|
+
data.tar.gz: 95caae62c0cbd962d21cf45705ff9baf863d06637ad750393db6999e4c14dfe7f8b2baa65cd960e6b0fd89d9019e63d4f12cd5abab24bfcd0516ad85aac6d477
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# Commonsense
|
2
|
+
|
3
|
+
Validate text against the [commonsense specification](https://github.com/beneills/commonsense-spec.) to resist authorship analysis. See the spec for more information.
|
4
|
+
|
5
|
+
This gem contains a Ruby library and single executable.
|
6
|
+
|
7
|
+
## Usage
|
8
|
+
|
9
|
+
$ gem install commonsense
|
10
|
+
$ commonsense thomas_paine.txt
|
11
|
+
|
12
|
+
## Development
|
13
|
+
|
14
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. Run `bundle exec commonsense` to use the gem in this directory, ignoring other installed copies of this gem.
|
15
|
+
|
16
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
17
|
+
|
18
|
+
## Todo
|
19
|
+
|
20
|
+
+ allow verb conjugates
|
21
|
+
+ add feature to publish to anonymous pastebins
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/beneills/commonsense-gem
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "commonsense"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/commonsense.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'commonsense/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "commonsense"
|
8
|
+
spec.version = Commonsense::VERSION
|
9
|
+
spec.authors = ["Ben Eills"]
|
10
|
+
spec.email = ["ben@beneills.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Validate text against the commonsense spec to resist authorship analysis.}
|
13
|
+
spec.homepage = "https://github.com/beneills/commonsense-gem"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
16
|
+
spec.bindir = "exe"
|
17
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_development_dependency "bundler", "~> 1.11"
|
21
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
22
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
23
|
+
end
|
data/exe/commonsense
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "commonsense"
|
4
|
+
|
5
|
+
def test_valididty(name, text)
|
6
|
+
is_valid = Commonsense::valid? text
|
7
|
+
|
8
|
+
if is_valid
|
9
|
+
puts "#{name} is valid commonsense text."
|
10
|
+
else
|
11
|
+
puts "#{name} is not valid commonsense text!"
|
12
|
+
end
|
13
|
+
|
14
|
+
is_valid
|
15
|
+
end
|
16
|
+
|
17
|
+
def main
|
18
|
+
if ARGV.empty?
|
19
|
+
status = test_valididty "STDIN", STDIN.read
|
20
|
+
exit status
|
21
|
+
elsif ARGV.length == 1 and ['--help', '-h'].include?(ARGV.first)
|
22
|
+
puts 'usage: commonsense [FILE...]'
|
23
|
+
puts
|
24
|
+
puts ' where each FILE is a filename or - for standard input.'
|
25
|
+
puts
|
26
|
+
puts 'This utility checks each file against the commonsense text specification'
|
27
|
+
puts 'returning good exit status only if all inputs are valid.'
|
28
|
+
exit 0
|
29
|
+
else
|
30
|
+
results = ARGV.map do |filename|
|
31
|
+
if filename == "-"
|
32
|
+
test_valididty "STDIN", STDIN.read
|
33
|
+
else
|
34
|
+
if File.file? filename
|
35
|
+
test_valididty filename, File.read(filename)
|
36
|
+
else
|
37
|
+
puts "#{filename} does not exist as a regular file!"
|
38
|
+
|
39
|
+
false
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
exit results.all?
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
main
|
data/lib/commonsense.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require "commonsense/basic_english"
|
2
|
+
require "commonsense/version"
|
3
|
+
|
4
|
+
module Commonsense
|
5
|
+
|
6
|
+
HEADING_START_CODEPOINTS = "a-zA-Z0-9"
|
7
|
+
HEADING_BODY_CODEPOINTS = "a-zA-Z0-9 "
|
8
|
+
HEADING_END_CODEPOINTS = "a-zA-Z0-9"
|
9
|
+
|
10
|
+
SENTENCE_START_CODEPOINTS = "A-Z0-9"
|
11
|
+
SENTENCE_BODY_CODEPOINTS = "a-zA-Z0-9 "
|
12
|
+
SENTENCE_END_CODEPOINTS = "a-zA-Z0-9"
|
13
|
+
|
14
|
+
SPACE_CODEPOINT = " "
|
15
|
+
PERIOD_CODEPOINT = "\\."
|
16
|
+
NEWLINE_CODEPOINT = "\n"
|
17
|
+
|
18
|
+
LINE_WHITELIST = "a-zA-Z0-9 \\."
|
19
|
+
TEXT_WHITELIST = "a-zA-Z0-9 \\.\n"
|
20
|
+
|
21
|
+
##
|
22
|
+
# Tests whether line conforms to the commonsense specification of a line.
|
23
|
+
#
|
24
|
+
# +line+ should be a UTF-8 encoded Ruby string with no newline characters.
|
25
|
+
|
26
|
+
def self.valid_line?(line)
|
27
|
+
|
28
|
+
# line should be UTF-8 encoded
|
29
|
+
raise ArgumentError, 'line not UTF-8 encoded' unless line.encoding == Encoding::UTF_8
|
30
|
+
raise ArgumentError, 'line has invalid encoding' unless line.valid_encoding?
|
31
|
+
|
32
|
+
# line should contain only whitelisted codepoints
|
33
|
+
return false unless /^[#{LINE_WHITELIST}]*$/.match line
|
34
|
+
|
35
|
+
# a line should be:
|
36
|
+
# i) a bare heading, or
|
37
|
+
# ii) a single-space-separated list of period-terminated sentences
|
38
|
+
heading = "[#{HEADING_START_CODEPOINTS}]([#{HEADING_BODY_CODEPOINTS}]*[#{HEADING_END_CODEPOINTS}])?"
|
39
|
+
sentence = "[#{SENTENCE_START_CODEPOINTS}]([#{SENTENCE_BODY_CODEPOINTS}]*[#{SENTENCE_END_CODEPOINTS}])?#{PERIOD_CODEPOINT}"
|
40
|
+
sentences = "(#{sentence}#{SPACE_CODEPOINT})*#{sentence}"
|
41
|
+
return false unless /^#{heading}$/.match line or /^#{sentences}$/.match line
|
42
|
+
|
43
|
+
# we must never have two spaces in a row
|
44
|
+
return false if /(#{SPACE_CODEPOINT}#{SPACE_CODEPOINT}+)/.match line
|
45
|
+
|
46
|
+
# split heading or sentence line into indidicual sentences
|
47
|
+
sentences = line.split(".").map { |sentence| sentence.split SPACE_CODEPOINT }
|
48
|
+
|
49
|
+
sentences.each do |sentence|
|
50
|
+
# first word of each sentence/heading should be in wordlist, possibly with de-capitalization
|
51
|
+
return false unless Commonsense::BasicEnglish.fuzzy_valid? sentence.first
|
52
|
+
|
53
|
+
# other words should be in wordlist as-is
|
54
|
+
return false unless sentence[1..-1].all? { |word| Commonsense::BasicEnglish.valid? word }
|
55
|
+
end
|
56
|
+
|
57
|
+
# if the text passes all the above, it conforms to the spec
|
58
|
+
return true
|
59
|
+
end
|
60
|
+
|
61
|
+
##
|
62
|
+
# Tests whether text conforms to the commonsense specification of a multi-line text.
|
63
|
+
#
|
64
|
+
# +text+ should be a UTF-8 encoded Ruby string.
|
65
|
+
|
66
|
+
def self.valid?(text)
|
67
|
+
|
68
|
+
# text should be UTF-8 encoded
|
69
|
+
raise ArgumentError, 'text not UTF-8 encoded' unless text.encoding == Encoding::UTF_8
|
70
|
+
raise ArgumentError, 'text has invalid encoding' unless text.valid_encoding?
|
71
|
+
|
72
|
+
# text should contain only whitelisted codepoints
|
73
|
+
return false unless /^[#{TEXT_WHITELIST}]*$/.match text
|
74
|
+
|
75
|
+
# text should be a sequence of zero or more lines
|
76
|
+
return text.split(NEWLINE_CODEPOINT).all? { |line| line.empty? or valid_line? line }
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,880 @@
|
|
1
|
+
module Commonsense
|
2
|
+
module BasicEnglish
|
3
|
+
WORDS = [
|
4
|
+
'come',
|
5
|
+
'get',
|
6
|
+
'give',
|
7
|
+
'go',
|
8
|
+
'keep',
|
9
|
+
'let',
|
10
|
+
'make',
|
11
|
+
'put',
|
12
|
+
'seem',
|
13
|
+
'take',
|
14
|
+
'be',
|
15
|
+
'do',
|
16
|
+
'have',
|
17
|
+
'say',
|
18
|
+
'see',
|
19
|
+
'send',
|
20
|
+
'may',
|
21
|
+
'will',
|
22
|
+
'about',
|
23
|
+
'across',
|
24
|
+
'after',
|
25
|
+
'against',
|
26
|
+
'among',
|
27
|
+
'at',
|
28
|
+
'before',
|
29
|
+
'between',
|
30
|
+
'by',
|
31
|
+
'down',
|
32
|
+
'from',
|
33
|
+
'in',
|
34
|
+
'off',
|
35
|
+
'on',
|
36
|
+
'over',
|
37
|
+
'through',
|
38
|
+
'to',
|
39
|
+
'under',
|
40
|
+
'up',
|
41
|
+
'with',
|
42
|
+
'as',
|
43
|
+
'for',
|
44
|
+
'of',
|
45
|
+
'till',
|
46
|
+
'than',
|
47
|
+
'a',
|
48
|
+
'the',
|
49
|
+
'all',
|
50
|
+
'any',
|
51
|
+
'every',
|
52
|
+
'little',
|
53
|
+
'much',
|
54
|
+
'no',
|
55
|
+
'other',
|
56
|
+
'some',
|
57
|
+
'such',
|
58
|
+
'that',
|
59
|
+
'this',
|
60
|
+
'I',
|
61
|
+
'he',
|
62
|
+
'you',
|
63
|
+
'who',
|
64
|
+
'and',
|
65
|
+
'because',
|
66
|
+
'but',
|
67
|
+
'or',
|
68
|
+
'if',
|
69
|
+
'though',
|
70
|
+
'while',
|
71
|
+
'how',
|
72
|
+
'when',
|
73
|
+
'where',
|
74
|
+
'why',
|
75
|
+
'again',
|
76
|
+
'ever',
|
77
|
+
'far',
|
78
|
+
'forward',
|
79
|
+
'here',
|
80
|
+
'near',
|
81
|
+
'now',
|
82
|
+
'out',
|
83
|
+
'still',
|
84
|
+
'then',
|
85
|
+
'there',
|
86
|
+
'together',
|
87
|
+
'well',
|
88
|
+
'almost',
|
89
|
+
'enough',
|
90
|
+
'even',
|
91
|
+
'not',
|
92
|
+
'only',
|
93
|
+
'quite',
|
94
|
+
'so',
|
95
|
+
'very',
|
96
|
+
'tomorrow',
|
97
|
+
'yesterday',
|
98
|
+
'north',
|
99
|
+
'south',
|
100
|
+
'east',
|
101
|
+
'west',
|
102
|
+
'please',
|
103
|
+
'yes',
|
104
|
+
'account',
|
105
|
+
'act',
|
106
|
+
'addition',
|
107
|
+
'adjustment',
|
108
|
+
'advertisement',
|
109
|
+
'agreement',
|
110
|
+
'air',
|
111
|
+
'amount',
|
112
|
+
'amusement',
|
113
|
+
'animal',
|
114
|
+
'answer',
|
115
|
+
'apparatus',
|
116
|
+
'approval',
|
117
|
+
'argument',
|
118
|
+
'art',
|
119
|
+
'attack',
|
120
|
+
'attempt',
|
121
|
+
'attention',
|
122
|
+
'attraction',
|
123
|
+
'authority',
|
124
|
+
'back',
|
125
|
+
'balance',
|
126
|
+
'base',
|
127
|
+
'behavior',
|
128
|
+
'belief',
|
129
|
+
'birth',
|
130
|
+
'bit',
|
131
|
+
'bite',
|
132
|
+
'blood',
|
133
|
+
'blow',
|
134
|
+
'body',
|
135
|
+
'brass',
|
136
|
+
'bread',
|
137
|
+
'breath',
|
138
|
+
'brother',
|
139
|
+
'building',
|
140
|
+
'burn',
|
141
|
+
'burst',
|
142
|
+
'business',
|
143
|
+
'butter',
|
144
|
+
'canvas',
|
145
|
+
'care',
|
146
|
+
'cause',
|
147
|
+
'chalk',
|
148
|
+
'chance',
|
149
|
+
'change',
|
150
|
+
'cloth',
|
151
|
+
'coal',
|
152
|
+
'color',
|
153
|
+
'comfort',
|
154
|
+
'committee',
|
155
|
+
'company',
|
156
|
+
'comparison',
|
157
|
+
'competition',
|
158
|
+
'condition',
|
159
|
+
'connection',
|
160
|
+
'control',
|
161
|
+
'cook',
|
162
|
+
'copper',
|
163
|
+
'copy',
|
164
|
+
'cork',
|
165
|
+
'cotton',
|
166
|
+
'cough',
|
167
|
+
'country',
|
168
|
+
'cover',
|
169
|
+
'crack',
|
170
|
+
'credit',
|
171
|
+
'crime',
|
172
|
+
'crush',
|
173
|
+
'cry',
|
174
|
+
'urrent',
|
175
|
+
'curve',
|
176
|
+
'damage',
|
177
|
+
'danger',
|
178
|
+
'daughter',
|
179
|
+
'day',
|
180
|
+
'death',
|
181
|
+
'debt',
|
182
|
+
'decision',
|
183
|
+
'degree',
|
184
|
+
'design',
|
185
|
+
'desire',
|
186
|
+
'destruction',
|
187
|
+
'detail',
|
188
|
+
'development',
|
189
|
+
'digestion',
|
190
|
+
'direction',
|
191
|
+
'discovery',
|
192
|
+
'discussion',
|
193
|
+
'disease',
|
194
|
+
'disgust',
|
195
|
+
'distance',
|
196
|
+
'distribution',
|
197
|
+
'division',
|
198
|
+
'doubt',
|
199
|
+
'drink',
|
200
|
+
'driving',
|
201
|
+
'dust',
|
202
|
+
'earth',
|
203
|
+
'edge',
|
204
|
+
'education',
|
205
|
+
'effect',
|
206
|
+
'end',
|
207
|
+
'error',
|
208
|
+
'event',
|
209
|
+
'example',
|
210
|
+
'exchange',
|
211
|
+
'existence',
|
212
|
+
'expansion',
|
213
|
+
'experience',
|
214
|
+
'expert',
|
215
|
+
'fact',
|
216
|
+
'fall',
|
217
|
+
'family',
|
218
|
+
'father',
|
219
|
+
'fear',
|
220
|
+
'feeling',
|
221
|
+
'fiction',
|
222
|
+
'field',
|
223
|
+
'fight',
|
224
|
+
'fire',
|
225
|
+
'flame',
|
226
|
+
'flight',
|
227
|
+
'flower',
|
228
|
+
'fold',
|
229
|
+
'food',
|
230
|
+
'force',
|
231
|
+
'form',
|
232
|
+
'friend',
|
233
|
+
'front',
|
234
|
+
'fruit',
|
235
|
+
'glass',
|
236
|
+
'gold',
|
237
|
+
'government',
|
238
|
+
'grain',
|
239
|
+
'grass',
|
240
|
+
'grip',
|
241
|
+
'group',
|
242
|
+
'growth',
|
243
|
+
'guide',
|
244
|
+
'harbor',
|
245
|
+
'harmony',
|
246
|
+
'hate',
|
247
|
+
'hearing',
|
248
|
+
'heat',
|
249
|
+
'help',
|
250
|
+
'history',
|
251
|
+
'hole',
|
252
|
+
'hope',
|
253
|
+
'hour',
|
254
|
+
'humor',
|
255
|
+
'ice',
|
256
|
+
'idea',
|
257
|
+
'impulse',
|
258
|
+
'increase',
|
259
|
+
'industry',
|
260
|
+
'ink',
|
261
|
+
'insect',
|
262
|
+
'instrument',
|
263
|
+
'insurance',
|
264
|
+
'interest',
|
265
|
+
'invention',
|
266
|
+
'iron',
|
267
|
+
'jelly',
|
268
|
+
'join',
|
269
|
+
'journey',
|
270
|
+
'judge',
|
271
|
+
'jump',
|
272
|
+
'kick',
|
273
|
+
'kiss',
|
274
|
+
'knowledge',
|
275
|
+
'land',
|
276
|
+
'language',
|
277
|
+
'laugh',
|
278
|
+
'law',
|
279
|
+
'lead',
|
280
|
+
'learning',
|
281
|
+
'leather',
|
282
|
+
'letter',
|
283
|
+
'level',
|
284
|
+
'lift',
|
285
|
+
'light',
|
286
|
+
'limit',
|
287
|
+
'linen',
|
288
|
+
'liquid',
|
289
|
+
'list',
|
290
|
+
'look',
|
291
|
+
'loss',
|
292
|
+
'love',
|
293
|
+
'machine',
|
294
|
+
'man',
|
295
|
+
'manager',
|
296
|
+
'mark',
|
297
|
+
'market',
|
298
|
+
'mass',
|
299
|
+
'meal',
|
300
|
+
'measure',
|
301
|
+
'meat',
|
302
|
+
'meeting',
|
303
|
+
'memory',
|
304
|
+
'metal',
|
305
|
+
'middle',
|
306
|
+
'milk',
|
307
|
+
'mind',
|
308
|
+
'mine',
|
309
|
+
'minute',
|
310
|
+
'mist',
|
311
|
+
'money',
|
312
|
+
'month',
|
313
|
+
'morning',
|
314
|
+
'other',
|
315
|
+
'motion',
|
316
|
+
'mountain',
|
317
|
+
'move',
|
318
|
+
'music',
|
319
|
+
'name',
|
320
|
+
'nation',
|
321
|
+
'need',
|
322
|
+
'news',
|
323
|
+
'night',
|
324
|
+
'noise',
|
325
|
+
'note',
|
326
|
+
'number',
|
327
|
+
'observation',
|
328
|
+
'offer',
|
329
|
+
'oil',
|
330
|
+
'operation',
|
331
|
+
'opinion',
|
332
|
+
'order',
|
333
|
+
'organization',
|
334
|
+
'ornament',
|
335
|
+
'owner',
|
336
|
+
'page',
|
337
|
+
'pain',
|
338
|
+
'paint',
|
339
|
+
'paper',
|
340
|
+
'part',
|
341
|
+
'paste',
|
342
|
+
'payment',
|
343
|
+
'peace',
|
344
|
+
'person',
|
345
|
+
'place',
|
346
|
+
'plant',
|
347
|
+
'play',
|
348
|
+
'pleasure',
|
349
|
+
'point',
|
350
|
+
'poison',
|
351
|
+
'polish',
|
352
|
+
'porter',
|
353
|
+
'position',
|
354
|
+
'powder',
|
355
|
+
'power',
|
356
|
+
'price',
|
357
|
+
'print',
|
358
|
+
'process',
|
359
|
+
'produce',
|
360
|
+
'profit',
|
361
|
+
'property',
|
362
|
+
'prose',
|
363
|
+
'protest',
|
364
|
+
'pull',
|
365
|
+
'punishment',
|
366
|
+
'purpose',
|
367
|
+
'push',
|
368
|
+
'quality',
|
369
|
+
'question',
|
370
|
+
'rain',
|
371
|
+
'range',
|
372
|
+
'rate',
|
373
|
+
'ray',
|
374
|
+
'reaction',
|
375
|
+
'reading',
|
376
|
+
'reason',
|
377
|
+
'record',
|
378
|
+
'regret',
|
379
|
+
'relation',
|
380
|
+
'religion',
|
381
|
+
'representative',
|
382
|
+
'request',
|
383
|
+
'respect',
|
384
|
+
'rest',
|
385
|
+
'reward',
|
386
|
+
'rhythm',
|
387
|
+
'rice',
|
388
|
+
'river',
|
389
|
+
'road',
|
390
|
+
'roll',
|
391
|
+
'room',
|
392
|
+
'rub',
|
393
|
+
'rule',
|
394
|
+
'run',
|
395
|
+
'salt',
|
396
|
+
'sand',
|
397
|
+
'scale',
|
398
|
+
'science',
|
399
|
+
'sea',
|
400
|
+
'seat',
|
401
|
+
'secretary',
|
402
|
+
'selection',
|
403
|
+
'self',
|
404
|
+
'sense',
|
405
|
+
'servant',
|
406
|
+
'sex',
|
407
|
+
'shade',
|
408
|
+
'shake',
|
409
|
+
'shame',
|
410
|
+
'shock',
|
411
|
+
'side',
|
412
|
+
'sign',
|
413
|
+
'silk',
|
414
|
+
'silver',
|
415
|
+
'sister',
|
416
|
+
'size',
|
417
|
+
'sky',
|
418
|
+
'sleep',
|
419
|
+
'slip',
|
420
|
+
'slope',
|
421
|
+
'smash',
|
422
|
+
'smell',
|
423
|
+
'smile',
|
424
|
+
'smoke',
|
425
|
+
'sneeze',
|
426
|
+
'snow',
|
427
|
+
'soap',
|
428
|
+
'society',
|
429
|
+
'son',
|
430
|
+
'song',
|
431
|
+
'sort',
|
432
|
+
'sound',
|
433
|
+
'soup',
|
434
|
+
'space',
|
435
|
+
'stage',
|
436
|
+
'start',
|
437
|
+
'statement',
|
438
|
+
'steam',
|
439
|
+
'steel',
|
440
|
+
'step',
|
441
|
+
'stitch',
|
442
|
+
'stone',
|
443
|
+
'stop',
|
444
|
+
'story',
|
445
|
+
'stretch',
|
446
|
+
'structure',
|
447
|
+
'substance',
|
448
|
+
'sugar',
|
449
|
+
'suggestion',
|
450
|
+
'summer',
|
451
|
+
'support',
|
452
|
+
'surprise',
|
453
|
+
'swim',
|
454
|
+
'system',
|
455
|
+
'talk',
|
456
|
+
'taste',
|
457
|
+
'tax',
|
458
|
+
'teaching',
|
459
|
+
'tendency',
|
460
|
+
'test',
|
461
|
+
'theory',
|
462
|
+
'thing',
|
463
|
+
'thought',
|
464
|
+
'thunder',
|
465
|
+
'time',
|
466
|
+
'tin',
|
467
|
+
'top',
|
468
|
+
'touch',
|
469
|
+
'trade',
|
470
|
+
'transport',
|
471
|
+
'trick',
|
472
|
+
'trouble',
|
473
|
+
'turn',
|
474
|
+
'twist',
|
475
|
+
'unit',
|
476
|
+
'use',
|
477
|
+
'value',
|
478
|
+
'verse',
|
479
|
+
'vessel',
|
480
|
+
'view',
|
481
|
+
'voice',
|
482
|
+
'walk',
|
483
|
+
'war',
|
484
|
+
'wash',
|
485
|
+
'waste',
|
486
|
+
'water',
|
487
|
+
'wave',
|
488
|
+
'wax',
|
489
|
+
'way',
|
490
|
+
'weather',
|
491
|
+
'week',
|
492
|
+
'weight',
|
493
|
+
'wind',
|
494
|
+
'wine',
|
495
|
+
'winter',
|
496
|
+
'woman',
|
497
|
+
'wood',
|
498
|
+
'wool',
|
499
|
+
'word',
|
500
|
+
'work',
|
501
|
+
'wound',
|
502
|
+
'writing',
|
503
|
+
'year',
|
504
|
+
'angle',
|
505
|
+
'ant',
|
506
|
+
'apple',
|
507
|
+
'arch',
|
508
|
+
'arm',
|
509
|
+
'army',
|
510
|
+
'baby',
|
511
|
+
'bag',
|
512
|
+
'ball',
|
513
|
+
'band',
|
514
|
+
'basin',
|
515
|
+
'basket',
|
516
|
+
'bath',
|
517
|
+
'bed',
|
518
|
+
'bee',
|
519
|
+
'bell',
|
520
|
+
'berry',
|
521
|
+
'bird',
|
522
|
+
'blade',
|
523
|
+
'board',
|
524
|
+
'boat',
|
525
|
+
'bone',
|
526
|
+
'book',
|
527
|
+
'boot',
|
528
|
+
'bottle',
|
529
|
+
'box',
|
530
|
+
'boy',
|
531
|
+
'brain',
|
532
|
+
'brake',
|
533
|
+
'branch',
|
534
|
+
'brick',
|
535
|
+
'bridge',
|
536
|
+
'brush',
|
537
|
+
'bucket',
|
538
|
+
'bulb',
|
539
|
+
'button',
|
540
|
+
'cake',
|
541
|
+
'camera',
|
542
|
+
'card',
|
543
|
+
'cart',
|
544
|
+
'carriage',
|
545
|
+
'cat',
|
546
|
+
'chain',
|
547
|
+
'cheese',
|
548
|
+
'chest',
|
549
|
+
'chin',
|
550
|
+
'church',
|
551
|
+
'circle',
|
552
|
+
'clock',
|
553
|
+
'cloud',
|
554
|
+
'coat',
|
555
|
+
'collar',
|
556
|
+
'comb',
|
557
|
+
'cord',
|
558
|
+
'cow',
|
559
|
+
'cup',
|
560
|
+
'curtain',
|
561
|
+
'cushion',
|
562
|
+
'dog',
|
563
|
+
'door',
|
564
|
+
'drain',
|
565
|
+
'drawer',
|
566
|
+
'dress',
|
567
|
+
'drop',
|
568
|
+
'ear',
|
569
|
+
'egg',
|
570
|
+
'engine',
|
571
|
+
'eye',
|
572
|
+
'face',
|
573
|
+
'farm',
|
574
|
+
'feather',
|
575
|
+
'finger',
|
576
|
+
'fish',
|
577
|
+
'flag',
|
578
|
+
'floor',
|
579
|
+
'fly',
|
580
|
+
'foot',
|
581
|
+
'fork',
|
582
|
+
'fowl',
|
583
|
+
'frame',
|
584
|
+
'garden',
|
585
|
+
'girl',
|
586
|
+
'glove',
|
587
|
+
'goat',
|
588
|
+
'gun',
|
589
|
+
'hair',
|
590
|
+
'hammer',
|
591
|
+
'hand',
|
592
|
+
'hat',
|
593
|
+
'head',
|
594
|
+
'heart',
|
595
|
+
'hook',
|
596
|
+
'horn',
|
597
|
+
'horse',
|
598
|
+
'hospital',
|
599
|
+
'house',
|
600
|
+
'island',
|
601
|
+
'jewel',
|
602
|
+
'kettle',
|
603
|
+
'key',
|
604
|
+
'knee',
|
605
|
+
'knife',
|
606
|
+
'knot',
|
607
|
+
'leaf',
|
608
|
+
'leg',
|
609
|
+
'library',
|
610
|
+
'line',
|
611
|
+
'lip',
|
612
|
+
'lock',
|
613
|
+
'map',
|
614
|
+
'match',
|
615
|
+
'monkey',
|
616
|
+
'moon',
|
617
|
+
'mouth',
|
618
|
+
'muscle',
|
619
|
+
'nail',
|
620
|
+
'neck',
|
621
|
+
'needle',
|
622
|
+
'nerve',
|
623
|
+
'net',
|
624
|
+
'nose',
|
625
|
+
'nut',
|
626
|
+
'office',
|
627
|
+
'orange',
|
628
|
+
'oven',
|
629
|
+
'parcel',
|
630
|
+
'pen',
|
631
|
+
'pencil',
|
632
|
+
'picture',
|
633
|
+
'pig',
|
634
|
+
'pin',
|
635
|
+
'pipe',
|
636
|
+
'plane',
|
637
|
+
'plate',
|
638
|
+
'plough/plow',
|
639
|
+
'pocket',
|
640
|
+
'pot',
|
641
|
+
'potato',
|
642
|
+
'prison',
|
643
|
+
'pump',
|
644
|
+
'rail',
|
645
|
+
'rat',
|
646
|
+
'receipt',
|
647
|
+
'ring',
|
648
|
+
'rod',
|
649
|
+
'roof',
|
650
|
+
'root',
|
651
|
+
'sail',
|
652
|
+
'school',
|
653
|
+
'scissors',
|
654
|
+
'screw',
|
655
|
+
'seed',
|
656
|
+
'sheep',
|
657
|
+
'shelf',
|
658
|
+
'ship',
|
659
|
+
'shirt',
|
660
|
+
'shoe',
|
661
|
+
'skin',
|
662
|
+
'skirt',
|
663
|
+
'snake',
|
664
|
+
'sock',
|
665
|
+
'spade',
|
666
|
+
'sponge',
|
667
|
+
'spoon',
|
668
|
+
'spring',
|
669
|
+
'square',
|
670
|
+
'stamp',
|
671
|
+
'star',
|
672
|
+
'station',
|
673
|
+
'stem',
|
674
|
+
'stick',
|
675
|
+
'stocking',
|
676
|
+
'stomach',
|
677
|
+
'store',
|
678
|
+
'street',
|
679
|
+
'sun',
|
680
|
+
'table',
|
681
|
+
'tail',
|
682
|
+
'thread',
|
683
|
+
'throat',
|
684
|
+
'thumb',
|
685
|
+
'ticket',
|
686
|
+
'toe',
|
687
|
+
'tongue',
|
688
|
+
'tooth',
|
689
|
+
'town',
|
690
|
+
'train',
|
691
|
+
'tray',
|
692
|
+
'tree',
|
693
|
+
'trousers',
|
694
|
+
'umbrella',
|
695
|
+
'wall',
|
696
|
+
'watch',
|
697
|
+
'wheel',
|
698
|
+
'whip',
|
699
|
+
'whistle',
|
700
|
+
'window',
|
701
|
+
'wing',
|
702
|
+
'wire',
|
703
|
+
'worm',
|
704
|
+
'able',
|
705
|
+
'acid',
|
706
|
+
'angry',
|
707
|
+
'automatic',
|
708
|
+
'beautiful',
|
709
|
+
'black',
|
710
|
+
'boiling',
|
711
|
+
'bright',
|
712
|
+
'broken',
|
713
|
+
'brown',
|
714
|
+
'cheap',
|
715
|
+
'chemical',
|
716
|
+
'chief',
|
717
|
+
'clean',
|
718
|
+
'clear',
|
719
|
+
'common',
|
720
|
+
'complex',
|
721
|
+
'conscious',
|
722
|
+
'cut',
|
723
|
+
'deep',
|
724
|
+
'dependent',
|
725
|
+
'early',
|
726
|
+
'elastic',
|
727
|
+
'electric',
|
728
|
+
'equal',
|
729
|
+
'fat',
|
730
|
+
'fertile',
|
731
|
+
'first',
|
732
|
+
'fixed',
|
733
|
+
'flat',
|
734
|
+
'free',
|
735
|
+
'frequent',
|
736
|
+
'full',
|
737
|
+
'general',
|
738
|
+
'good',
|
739
|
+
'great',
|
740
|
+
'grey/gray',
|
741
|
+
'hanging',
|
742
|
+
'happy',
|
743
|
+
'hard',
|
744
|
+
'healthy',
|
745
|
+
'high',
|
746
|
+
'hollow',
|
747
|
+
'important',
|
748
|
+
'kind',
|
749
|
+
'like',
|
750
|
+
'living',
|
751
|
+
'long',
|
752
|
+
'male',
|
753
|
+
'married',
|
754
|
+
'material',
|
755
|
+
'medical',
|
756
|
+
'military',
|
757
|
+
'natural',
|
758
|
+
'necessary',
|
759
|
+
'new',
|
760
|
+
'normal',
|
761
|
+
'open',
|
762
|
+
'parallel',
|
763
|
+
'past',
|
764
|
+
'physical',
|
765
|
+
'political',
|
766
|
+
'poor',
|
767
|
+
'possible',
|
768
|
+
'present',
|
769
|
+
'private',
|
770
|
+
'probable',
|
771
|
+
'quick',
|
772
|
+
'quiet',
|
773
|
+
'ready',
|
774
|
+
'red',
|
775
|
+
'regular',
|
776
|
+
'responsible',
|
777
|
+
'right',
|
778
|
+
'round',
|
779
|
+
'same',
|
780
|
+
'second',
|
781
|
+
'separate',
|
782
|
+
'serious',
|
783
|
+
'sharp',
|
784
|
+
'smooth',
|
785
|
+
'sticky',
|
786
|
+
'stiff',
|
787
|
+
'straight',
|
788
|
+
'strong',
|
789
|
+
'sudden',
|
790
|
+
'sweet',
|
791
|
+
'tall',
|
792
|
+
'thick',
|
793
|
+
'tight',
|
794
|
+
'tired',
|
795
|
+
'true',
|
796
|
+
'violent',
|
797
|
+
'waiting',
|
798
|
+
'warm',
|
799
|
+
'wet',
|
800
|
+
'wide',
|
801
|
+
'wise',
|
802
|
+
'yellow',
|
803
|
+
'young',
|
804
|
+
'awake',
|
805
|
+
'bad',
|
806
|
+
'bent',
|
807
|
+
'bitter',
|
808
|
+
'blue',
|
809
|
+
'certain',
|
810
|
+
'cold',
|
811
|
+
'complete',
|
812
|
+
'cruel',
|
813
|
+
'dark',
|
814
|
+
'dead',
|
815
|
+
'dear',
|
816
|
+
'delicate',
|
817
|
+
'different',
|
818
|
+
'dirty',
|
819
|
+
'dry',
|
820
|
+
'false',
|
821
|
+
'feeble',
|
822
|
+
'female',
|
823
|
+
'foolish',
|
824
|
+
'future',
|
825
|
+
'green',
|
826
|
+
'ill',
|
827
|
+
'last',
|
828
|
+
'late',
|
829
|
+
'left',
|
830
|
+
'loose',
|
831
|
+
'loud',
|
832
|
+
'low',
|
833
|
+
'mixed',
|
834
|
+
'narrow',
|
835
|
+
'old',
|
836
|
+
'opposite',
|
837
|
+
'public',
|
838
|
+
'rough',
|
839
|
+
'sad',
|
840
|
+
'safe',
|
841
|
+
'secret',
|
842
|
+
'short',
|
843
|
+
'shut',
|
844
|
+
'simple',
|
845
|
+
'slow',
|
846
|
+
'small',
|
847
|
+
'soft',
|
848
|
+
'solid',
|
849
|
+
'special',
|
850
|
+
'strange',
|
851
|
+
'thin',
|
852
|
+
'white',
|
853
|
+
'wrong'
|
854
|
+
]
|
855
|
+
|
856
|
+
##
|
857
|
+
# Tests whether a word is included in our version of Ogden's Basic English word list,
|
858
|
+
# and therefore suitable as, e.g. a mid-sentence word.
|
859
|
+
#
|
860
|
+
# List taken from http://ogden.basic-english.org/words.html in March 2016.
|
861
|
+
#
|
862
|
+
# +word+ should be a case-sensitive string.
|
863
|
+
|
864
|
+
def self.valid?(word)
|
865
|
+
WORDS.include? word
|
866
|
+
end
|
867
|
+
|
868
|
+
##
|
869
|
+
# Tests whether a word is included, possibly with de-caitalization.
|
870
|
+
#
|
871
|
+
# +word+ should be a case-sensitive string.
|
872
|
+
|
873
|
+
def self.fuzzy_valid?(word)
|
874
|
+
word_decapitalized = word.downcase if word.downcase.capitalize == word
|
875
|
+
|
876
|
+
|
877
|
+
valid? word or ( word_decapitalized and valid? word_decapitalized )
|
878
|
+
end
|
879
|
+
end
|
880
|
+
end
|
metadata
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: commonsense
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ben Eills
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-03-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.11'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.11'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '5.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '5.0'
|
55
|
+
description:
|
56
|
+
email:
|
57
|
+
- ben@beneills.com
|
58
|
+
executables:
|
59
|
+
- commonsense
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- ".gitignore"
|
64
|
+
- ".travis.yml"
|
65
|
+
- Gemfile
|
66
|
+
- README.md
|
67
|
+
- Rakefile
|
68
|
+
- bin/console
|
69
|
+
- bin/setup
|
70
|
+
- commonsense.gemspec
|
71
|
+
- exe/commonsense
|
72
|
+
- lib/commonsense.rb
|
73
|
+
- lib/commonsense/basic_english.rb
|
74
|
+
- lib/commonsense/version.rb
|
75
|
+
homepage: https://github.com/beneills/commonsense-gem
|
76
|
+
licenses: []
|
77
|
+
metadata: {}
|
78
|
+
post_install_message:
|
79
|
+
rdoc_options: []
|
80
|
+
require_paths:
|
81
|
+
- lib
|
82
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: '0'
|
87
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
88
|
+
requirements:
|
89
|
+
- - ">="
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
92
|
+
requirements: []
|
93
|
+
rubyforge_project:
|
94
|
+
rubygems_version: 2.4.6
|
95
|
+
signing_key:
|
96
|
+
specification_version: 4
|
97
|
+
summary: Validate text against the commonsense spec to resist authorship analysis.
|
98
|
+
test_files: []
|