commonsense 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/README.md +25 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/commonsense.gemspec +23 -0
- data/exe/commonsense +48 -0
- data/lib/commonsense.rb +79 -0
- data/lib/commonsense/basic_english.rb +880 -0
- data/lib/commonsense/version.rb +3 -0
- metadata +98 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 42ba1b7316fd76ddcb89b7308a2a860401907765
|
4
|
+
data.tar.gz: f1461e7070eed57ae7f71ca9777440690a39dd6a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f9b92276ef9abcaded3b683c4a363a483fb7e8ef7ada62b97d1b2c3da818c030c6036db7165a35c98b0ff821028189f742cf220a22239541aeb487048e624466
|
7
|
+
data.tar.gz: 95caae62c0cbd962d21cf45705ff9baf863d06637ad750393db6999e4c14dfe7f8b2baa65cd960e6b0fd89d9019e63d4f12cd5abab24bfcd0516ad85aac6d477
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# Commonsense
|
2
|
+
|
3
|
+
Validate text against the [commonsense specification](https://github.com/beneills/commonsense-spec.) to resist authorship analysis. See the spec for more information.
|
4
|
+
|
5
|
+
This gem contains a Ruby library and single executable.
|
6
|
+
|
7
|
+
## Usage
|
8
|
+
|
9
|
+
$ gem install commonsense
|
10
|
+
$ commonsense thomas_paine.txt
|
11
|
+
|
12
|
+
## Development
|
13
|
+
|
14
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. Run `bundle exec commonsense` to use the gem in this directory, ignoring other installed copies of this gem.
|
15
|
+
|
16
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
17
|
+
|
18
|
+
## Todo
|
19
|
+
|
20
|
+
+ allow verb conjugates
|
21
|
+
+ add feature to publish to anonymous pastebins
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/beneills/commonsense-gem
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "commonsense"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/commonsense.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'commonsense/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "commonsense"
|
8
|
+
spec.version = Commonsense::VERSION
|
9
|
+
spec.authors = ["Ben Eills"]
|
10
|
+
spec.email = ["ben@beneills.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Validate text against the commonsense spec to resist authorship analysis.}
|
13
|
+
spec.homepage = "https://github.com/beneills/commonsense-gem"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
16
|
+
spec.bindir = "exe"
|
17
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_development_dependency "bundler", "~> 1.11"
|
21
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
22
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
23
|
+
end
|
data/exe/commonsense
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "commonsense"
|
4
|
+
|
5
|
+
def test_valididty(name, text)
|
6
|
+
is_valid = Commonsense::valid? text
|
7
|
+
|
8
|
+
if is_valid
|
9
|
+
puts "#{name} is valid commonsense text."
|
10
|
+
else
|
11
|
+
puts "#{name} is not valid commonsense text!"
|
12
|
+
end
|
13
|
+
|
14
|
+
is_valid
|
15
|
+
end
|
16
|
+
|
17
|
+
def main
|
18
|
+
if ARGV.empty?
|
19
|
+
status = test_valididty "STDIN", STDIN.read
|
20
|
+
exit status
|
21
|
+
elsif ARGV.length == 1 and ['--help', '-h'].include?(ARGV.first)
|
22
|
+
puts 'usage: commonsense [FILE...]'
|
23
|
+
puts
|
24
|
+
puts ' where each FILE is a filename or - for standard input.'
|
25
|
+
puts
|
26
|
+
puts 'This utility checks each file against the commonsense text specification'
|
27
|
+
puts 'returning good exit status only if all inputs are valid.'
|
28
|
+
exit 0
|
29
|
+
else
|
30
|
+
results = ARGV.map do |filename|
|
31
|
+
if filename == "-"
|
32
|
+
test_valididty "STDIN", STDIN.read
|
33
|
+
else
|
34
|
+
if File.file? filename
|
35
|
+
test_valididty filename, File.read(filename)
|
36
|
+
else
|
37
|
+
puts "#{filename} does not exist as a regular file!"
|
38
|
+
|
39
|
+
false
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
exit results.all?
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
main
|
data/lib/commonsense.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require "commonsense/basic_english"
|
2
|
+
require "commonsense/version"
|
3
|
+
|
4
|
+
module Commonsense
|
5
|
+
|
6
|
+
HEADING_START_CODEPOINTS = "a-zA-Z0-9"
|
7
|
+
HEADING_BODY_CODEPOINTS = "a-zA-Z0-9 "
|
8
|
+
HEADING_END_CODEPOINTS = "a-zA-Z0-9"
|
9
|
+
|
10
|
+
SENTENCE_START_CODEPOINTS = "A-Z0-9"
|
11
|
+
SENTENCE_BODY_CODEPOINTS = "a-zA-Z0-9 "
|
12
|
+
SENTENCE_END_CODEPOINTS = "a-zA-Z0-9"
|
13
|
+
|
14
|
+
SPACE_CODEPOINT = " "
|
15
|
+
PERIOD_CODEPOINT = "\\."
|
16
|
+
NEWLINE_CODEPOINT = "\n"
|
17
|
+
|
18
|
+
LINE_WHITELIST = "a-zA-Z0-9 \\."
|
19
|
+
TEXT_WHITELIST = "a-zA-Z0-9 \\.\n"
|
20
|
+
|
21
|
+
##
|
22
|
+
# Tests whether line conforms to the commonsense specification of a line.
|
23
|
+
#
|
24
|
+
# +line+ should be a UTF-8 encoded Ruby string with no newline characters.
|
25
|
+
|
26
|
+
def self.valid_line?(line)
|
27
|
+
|
28
|
+
# line should be UTF-8 encoded
|
29
|
+
raise ArgumentError, 'line not UTF-8 encoded' unless line.encoding == Encoding::UTF_8
|
30
|
+
raise ArgumentError, 'line has invalid encoding' unless line.valid_encoding?
|
31
|
+
|
32
|
+
# line should contain only whitelisted codepoints
|
33
|
+
return false unless /^[#{LINE_WHITELIST}]*$/.match line
|
34
|
+
|
35
|
+
# a line should be:
|
36
|
+
# i) a bare heading, or
|
37
|
+
# ii) a single-space-separated list of period-terminated sentences
|
38
|
+
heading = "[#{HEADING_START_CODEPOINTS}]([#{HEADING_BODY_CODEPOINTS}]*[#{HEADING_END_CODEPOINTS}])?"
|
39
|
+
sentence = "[#{SENTENCE_START_CODEPOINTS}]([#{SENTENCE_BODY_CODEPOINTS}]*[#{SENTENCE_END_CODEPOINTS}])?#{PERIOD_CODEPOINT}"
|
40
|
+
sentences = "(#{sentence}#{SPACE_CODEPOINT})*#{sentence}"
|
41
|
+
return false unless /^#{heading}$/.match line or /^#{sentences}$/.match line
|
42
|
+
|
43
|
+
# we must never have two spaces in a row
|
44
|
+
return false if /(#{SPACE_CODEPOINT}#{SPACE_CODEPOINT}+)/.match line
|
45
|
+
|
46
|
+
# split heading or sentence line into indidicual sentences
|
47
|
+
sentences = line.split(".").map { |sentence| sentence.split SPACE_CODEPOINT }
|
48
|
+
|
49
|
+
sentences.each do |sentence|
|
50
|
+
# first word of each sentence/heading should be in wordlist, possibly with de-capitalization
|
51
|
+
return false unless Commonsense::BasicEnglish.fuzzy_valid? sentence.first
|
52
|
+
|
53
|
+
# other words should be in wordlist as-is
|
54
|
+
return false unless sentence[1..-1].all? { |word| Commonsense::BasicEnglish.valid? word }
|
55
|
+
end
|
56
|
+
|
57
|
+
# if the text passes all the above, it conforms to the spec
|
58
|
+
return true
|
59
|
+
end
|
60
|
+
|
61
|
+
##
|
62
|
+
# Tests whether text conforms to the commonsense specification of a multi-line text.
|
63
|
+
#
|
64
|
+
# +text+ should be a UTF-8 encoded Ruby string.
|
65
|
+
|
66
|
+
def self.valid?(text)
|
67
|
+
|
68
|
+
# text should be UTF-8 encoded
|
69
|
+
raise ArgumentError, 'text not UTF-8 encoded' unless text.encoding == Encoding::UTF_8
|
70
|
+
raise ArgumentError, 'text has invalid encoding' unless text.valid_encoding?
|
71
|
+
|
72
|
+
# text should contain only whitelisted codepoints
|
73
|
+
return false unless /^[#{TEXT_WHITELIST}]*$/.match text
|
74
|
+
|
75
|
+
# text should be a sequence of zero or more lines
|
76
|
+
return text.split(NEWLINE_CODEPOINT).all? { |line| line.empty? or valid_line? line }
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,880 @@
|
|
1
|
+
module Commonsense
|
2
|
+
module BasicEnglish
|
3
|
+
WORDS = [
|
4
|
+
'come',
|
5
|
+
'get',
|
6
|
+
'give',
|
7
|
+
'go',
|
8
|
+
'keep',
|
9
|
+
'let',
|
10
|
+
'make',
|
11
|
+
'put',
|
12
|
+
'seem',
|
13
|
+
'take',
|
14
|
+
'be',
|
15
|
+
'do',
|
16
|
+
'have',
|
17
|
+
'say',
|
18
|
+
'see',
|
19
|
+
'send',
|
20
|
+
'may',
|
21
|
+
'will',
|
22
|
+
'about',
|
23
|
+
'across',
|
24
|
+
'after',
|
25
|
+
'against',
|
26
|
+
'among',
|
27
|
+
'at',
|
28
|
+
'before',
|
29
|
+
'between',
|
30
|
+
'by',
|
31
|
+
'down',
|
32
|
+
'from',
|
33
|
+
'in',
|
34
|
+
'off',
|
35
|
+
'on',
|
36
|
+
'over',
|
37
|
+
'through',
|
38
|
+
'to',
|
39
|
+
'under',
|
40
|
+
'up',
|
41
|
+
'with',
|
42
|
+
'as',
|
43
|
+
'for',
|
44
|
+
'of',
|
45
|
+
'till',
|
46
|
+
'than',
|
47
|
+
'a',
|
48
|
+
'the',
|
49
|
+
'all',
|
50
|
+
'any',
|
51
|
+
'every',
|
52
|
+
'little',
|
53
|
+
'much',
|
54
|
+
'no',
|
55
|
+
'other',
|
56
|
+
'some',
|
57
|
+
'such',
|
58
|
+
'that',
|
59
|
+
'this',
|
60
|
+
'I',
|
61
|
+
'he',
|
62
|
+
'you',
|
63
|
+
'who',
|
64
|
+
'and',
|
65
|
+
'because',
|
66
|
+
'but',
|
67
|
+
'or',
|
68
|
+
'if',
|
69
|
+
'though',
|
70
|
+
'while',
|
71
|
+
'how',
|
72
|
+
'when',
|
73
|
+
'where',
|
74
|
+
'why',
|
75
|
+
'again',
|
76
|
+
'ever',
|
77
|
+
'far',
|
78
|
+
'forward',
|
79
|
+
'here',
|
80
|
+
'near',
|
81
|
+
'now',
|
82
|
+
'out',
|
83
|
+
'still',
|
84
|
+
'then',
|
85
|
+
'there',
|
86
|
+
'together',
|
87
|
+
'well',
|
88
|
+
'almost',
|
89
|
+
'enough',
|
90
|
+
'even',
|
91
|
+
'not',
|
92
|
+
'only',
|
93
|
+
'quite',
|
94
|
+
'so',
|
95
|
+
'very',
|
96
|
+
'tomorrow',
|
97
|
+
'yesterday',
|
98
|
+
'north',
|
99
|
+
'south',
|
100
|
+
'east',
|
101
|
+
'west',
|
102
|
+
'please',
|
103
|
+
'yes',
|
104
|
+
'account',
|
105
|
+
'act',
|
106
|
+
'addition',
|
107
|
+
'adjustment',
|
108
|
+
'advertisement',
|
109
|
+
'agreement',
|
110
|
+
'air',
|
111
|
+
'amount',
|
112
|
+
'amusement',
|
113
|
+
'animal',
|
114
|
+
'answer',
|
115
|
+
'apparatus',
|
116
|
+
'approval',
|
117
|
+
'argument',
|
118
|
+
'art',
|
119
|
+
'attack',
|
120
|
+
'attempt',
|
121
|
+
'attention',
|
122
|
+
'attraction',
|
123
|
+
'authority',
|
124
|
+
'back',
|
125
|
+
'balance',
|
126
|
+
'base',
|
127
|
+
'behavior',
|
128
|
+
'belief',
|
129
|
+
'birth',
|
130
|
+
'bit',
|
131
|
+
'bite',
|
132
|
+
'blood',
|
133
|
+
'blow',
|
134
|
+
'body',
|
135
|
+
'brass',
|
136
|
+
'bread',
|
137
|
+
'breath',
|
138
|
+
'brother',
|
139
|
+
'building',
|
140
|
+
'burn',
|
141
|
+
'burst',
|
142
|
+
'business',
|
143
|
+
'butter',
|
144
|
+
'canvas',
|
145
|
+
'care',
|
146
|
+
'cause',
|
147
|
+
'chalk',
|
148
|
+
'chance',
|
149
|
+
'change',
|
150
|
+
'cloth',
|
151
|
+
'coal',
|
152
|
+
'color',
|
153
|
+
'comfort',
|
154
|
+
'committee',
|
155
|
+
'company',
|
156
|
+
'comparison',
|
157
|
+
'competition',
|
158
|
+
'condition',
|
159
|
+
'connection',
|
160
|
+
'control',
|
161
|
+
'cook',
|
162
|
+
'copper',
|
163
|
+
'copy',
|
164
|
+
'cork',
|
165
|
+
'cotton',
|
166
|
+
'cough',
|
167
|
+
'country',
|
168
|
+
'cover',
|
169
|
+
'crack',
|
170
|
+
'credit',
|
171
|
+
'crime',
|
172
|
+
'crush',
|
173
|
+
'cry',
|
174
|
+
'urrent',
|
175
|
+
'curve',
|
176
|
+
'damage',
|
177
|
+
'danger',
|
178
|
+
'daughter',
|
179
|
+
'day',
|
180
|
+
'death',
|
181
|
+
'debt',
|
182
|
+
'decision',
|
183
|
+
'degree',
|
184
|
+
'design',
|
185
|
+
'desire',
|
186
|
+
'destruction',
|
187
|
+
'detail',
|
188
|
+
'development',
|
189
|
+
'digestion',
|
190
|
+
'direction',
|
191
|
+
'discovery',
|
192
|
+
'discussion',
|
193
|
+
'disease',
|
194
|
+
'disgust',
|
195
|
+
'distance',
|
196
|
+
'distribution',
|
197
|
+
'division',
|
198
|
+
'doubt',
|
199
|
+
'drink',
|
200
|
+
'driving',
|
201
|
+
'dust',
|
202
|
+
'earth',
|
203
|
+
'edge',
|
204
|
+
'education',
|
205
|
+
'effect',
|
206
|
+
'end',
|
207
|
+
'error',
|
208
|
+
'event',
|
209
|
+
'example',
|
210
|
+
'exchange',
|
211
|
+
'existence',
|
212
|
+
'expansion',
|
213
|
+
'experience',
|
214
|
+
'expert',
|
215
|
+
'fact',
|
216
|
+
'fall',
|
217
|
+
'family',
|
218
|
+
'father',
|
219
|
+
'fear',
|
220
|
+
'feeling',
|
221
|
+
'fiction',
|
222
|
+
'field',
|
223
|
+
'fight',
|
224
|
+
'fire',
|
225
|
+
'flame',
|
226
|
+
'flight',
|
227
|
+
'flower',
|
228
|
+
'fold',
|
229
|
+
'food',
|
230
|
+
'force',
|
231
|
+
'form',
|
232
|
+
'friend',
|
233
|
+
'front',
|
234
|
+
'fruit',
|
235
|
+
'glass',
|
236
|
+
'gold',
|
237
|
+
'government',
|
238
|
+
'grain',
|
239
|
+
'grass',
|
240
|
+
'grip',
|
241
|
+
'group',
|
242
|
+
'growth',
|
243
|
+
'guide',
|
244
|
+
'harbor',
|
245
|
+
'harmony',
|
246
|
+
'hate',
|
247
|
+
'hearing',
|
248
|
+
'heat',
|
249
|
+
'help',
|
250
|
+
'history',
|
251
|
+
'hole',
|
252
|
+
'hope',
|
253
|
+
'hour',
|
254
|
+
'humor',
|
255
|
+
'ice',
|
256
|
+
'idea',
|
257
|
+
'impulse',
|
258
|
+
'increase',
|
259
|
+
'industry',
|
260
|
+
'ink',
|
261
|
+
'insect',
|
262
|
+
'instrument',
|
263
|
+
'insurance',
|
264
|
+
'interest',
|
265
|
+
'invention',
|
266
|
+
'iron',
|
267
|
+
'jelly',
|
268
|
+
'join',
|
269
|
+
'journey',
|
270
|
+
'judge',
|
271
|
+
'jump',
|
272
|
+
'kick',
|
273
|
+
'kiss',
|
274
|
+
'knowledge',
|
275
|
+
'land',
|
276
|
+
'language',
|
277
|
+
'laugh',
|
278
|
+
'law',
|
279
|
+
'lead',
|
280
|
+
'learning',
|
281
|
+
'leather',
|
282
|
+
'letter',
|
283
|
+
'level',
|
284
|
+
'lift',
|
285
|
+
'light',
|
286
|
+
'limit',
|
287
|
+
'linen',
|
288
|
+
'liquid',
|
289
|
+
'list',
|
290
|
+
'look',
|
291
|
+
'loss',
|
292
|
+
'love',
|
293
|
+
'machine',
|
294
|
+
'man',
|
295
|
+
'manager',
|
296
|
+
'mark',
|
297
|
+
'market',
|
298
|
+
'mass',
|
299
|
+
'meal',
|
300
|
+
'measure',
|
301
|
+
'meat',
|
302
|
+
'meeting',
|
303
|
+
'memory',
|
304
|
+
'metal',
|
305
|
+
'middle',
|
306
|
+
'milk',
|
307
|
+
'mind',
|
308
|
+
'mine',
|
309
|
+
'minute',
|
310
|
+
'mist',
|
311
|
+
'money',
|
312
|
+
'month',
|
313
|
+
'morning',
|
314
|
+
'other',
|
315
|
+
'motion',
|
316
|
+
'mountain',
|
317
|
+
'move',
|
318
|
+
'music',
|
319
|
+
'name',
|
320
|
+
'nation',
|
321
|
+
'need',
|
322
|
+
'news',
|
323
|
+
'night',
|
324
|
+
'noise',
|
325
|
+
'note',
|
326
|
+
'number',
|
327
|
+
'observation',
|
328
|
+
'offer',
|
329
|
+
'oil',
|
330
|
+
'operation',
|
331
|
+
'opinion',
|
332
|
+
'order',
|
333
|
+
'organization',
|
334
|
+
'ornament',
|
335
|
+
'owner',
|
336
|
+
'page',
|
337
|
+
'pain',
|
338
|
+
'paint',
|
339
|
+
'paper',
|
340
|
+
'part',
|
341
|
+
'paste',
|
342
|
+
'payment',
|
343
|
+
'peace',
|
344
|
+
'person',
|
345
|
+
'place',
|
346
|
+
'plant',
|
347
|
+
'play',
|
348
|
+
'pleasure',
|
349
|
+
'point',
|
350
|
+
'poison',
|
351
|
+
'polish',
|
352
|
+
'porter',
|
353
|
+
'position',
|
354
|
+
'powder',
|
355
|
+
'power',
|
356
|
+
'price',
|
357
|
+
'print',
|
358
|
+
'process',
|
359
|
+
'produce',
|
360
|
+
'profit',
|
361
|
+
'property',
|
362
|
+
'prose',
|
363
|
+
'protest',
|
364
|
+
'pull',
|
365
|
+
'punishment',
|
366
|
+
'purpose',
|
367
|
+
'push',
|
368
|
+
'quality',
|
369
|
+
'question',
|
370
|
+
'rain',
|
371
|
+
'range',
|
372
|
+
'rate',
|
373
|
+
'ray',
|
374
|
+
'reaction',
|
375
|
+
'reading',
|
376
|
+
'reason',
|
377
|
+
'record',
|
378
|
+
'regret',
|
379
|
+
'relation',
|
380
|
+
'religion',
|
381
|
+
'representative',
|
382
|
+
'request',
|
383
|
+
'respect',
|
384
|
+
'rest',
|
385
|
+
'reward',
|
386
|
+
'rhythm',
|
387
|
+
'rice',
|
388
|
+
'river',
|
389
|
+
'road',
|
390
|
+
'roll',
|
391
|
+
'room',
|
392
|
+
'rub',
|
393
|
+
'rule',
|
394
|
+
'run',
|
395
|
+
'salt',
|
396
|
+
'sand',
|
397
|
+
'scale',
|
398
|
+
'science',
|
399
|
+
'sea',
|
400
|
+
'seat',
|
401
|
+
'secretary',
|
402
|
+
'selection',
|
403
|
+
'self',
|
404
|
+
'sense',
|
405
|
+
'servant',
|
406
|
+
'sex',
|
407
|
+
'shade',
|
408
|
+
'shake',
|
409
|
+
'shame',
|
410
|
+
'shock',
|
411
|
+
'side',
|
412
|
+
'sign',
|
413
|
+
'silk',
|
414
|
+
'silver',
|
415
|
+
'sister',
|
416
|
+
'size',
|
417
|
+
'sky',
|
418
|
+
'sleep',
|
419
|
+
'slip',
|
420
|
+
'slope',
|
421
|
+
'smash',
|
422
|
+
'smell',
|
423
|
+
'smile',
|
424
|
+
'smoke',
|
425
|
+
'sneeze',
|
426
|
+
'snow',
|
427
|
+
'soap',
|
428
|
+
'society',
|
429
|
+
'son',
|
430
|
+
'song',
|
431
|
+
'sort',
|
432
|
+
'sound',
|
433
|
+
'soup',
|
434
|
+
'space',
|
435
|
+
'stage',
|
436
|
+
'start',
|
437
|
+
'statement',
|
438
|
+
'steam',
|
439
|
+
'steel',
|
440
|
+
'step',
|
441
|
+
'stitch',
|
442
|
+
'stone',
|
443
|
+
'stop',
|
444
|
+
'story',
|
445
|
+
'stretch',
|
446
|
+
'structure',
|
447
|
+
'substance',
|
448
|
+
'sugar',
|
449
|
+
'suggestion',
|
450
|
+
'summer',
|
451
|
+
'support',
|
452
|
+
'surprise',
|
453
|
+
'swim',
|
454
|
+
'system',
|
455
|
+
'talk',
|
456
|
+
'taste',
|
457
|
+
'tax',
|
458
|
+
'teaching',
|
459
|
+
'tendency',
|
460
|
+
'test',
|
461
|
+
'theory',
|
462
|
+
'thing',
|
463
|
+
'thought',
|
464
|
+
'thunder',
|
465
|
+
'time',
|
466
|
+
'tin',
|
467
|
+
'top',
|
468
|
+
'touch',
|
469
|
+
'trade',
|
470
|
+
'transport',
|
471
|
+
'trick',
|
472
|
+
'trouble',
|
473
|
+
'turn',
|
474
|
+
'twist',
|
475
|
+
'unit',
|
476
|
+
'use',
|
477
|
+
'value',
|
478
|
+
'verse',
|
479
|
+
'vessel',
|
480
|
+
'view',
|
481
|
+
'voice',
|
482
|
+
'walk',
|
483
|
+
'war',
|
484
|
+
'wash',
|
485
|
+
'waste',
|
486
|
+
'water',
|
487
|
+
'wave',
|
488
|
+
'wax',
|
489
|
+
'way',
|
490
|
+
'weather',
|
491
|
+
'week',
|
492
|
+
'weight',
|
493
|
+
'wind',
|
494
|
+
'wine',
|
495
|
+
'winter',
|
496
|
+
'woman',
|
497
|
+
'wood',
|
498
|
+
'wool',
|
499
|
+
'word',
|
500
|
+
'work',
|
501
|
+
'wound',
|
502
|
+
'writing',
|
503
|
+
'year',
|
504
|
+
'angle',
|
505
|
+
'ant',
|
506
|
+
'apple',
|
507
|
+
'arch',
|
508
|
+
'arm',
|
509
|
+
'army',
|
510
|
+
'baby',
|
511
|
+
'bag',
|
512
|
+
'ball',
|
513
|
+
'band',
|
514
|
+
'basin',
|
515
|
+
'basket',
|
516
|
+
'bath',
|
517
|
+
'bed',
|
518
|
+
'bee',
|
519
|
+
'bell',
|
520
|
+
'berry',
|
521
|
+
'bird',
|
522
|
+
'blade',
|
523
|
+
'board',
|
524
|
+
'boat',
|
525
|
+
'bone',
|
526
|
+
'book',
|
527
|
+
'boot',
|
528
|
+
'bottle',
|
529
|
+
'box',
|
530
|
+
'boy',
|
531
|
+
'brain',
|
532
|
+
'brake',
|
533
|
+
'branch',
|
534
|
+
'brick',
|
535
|
+
'bridge',
|
536
|
+
'brush',
|
537
|
+
'bucket',
|
538
|
+
'bulb',
|
539
|
+
'button',
|
540
|
+
'cake',
|
541
|
+
'camera',
|
542
|
+
'card',
|
543
|
+
'cart',
|
544
|
+
'carriage',
|
545
|
+
'cat',
|
546
|
+
'chain',
|
547
|
+
'cheese',
|
548
|
+
'chest',
|
549
|
+
'chin',
|
550
|
+
'church',
|
551
|
+
'circle',
|
552
|
+
'clock',
|
553
|
+
'cloud',
|
554
|
+
'coat',
|
555
|
+
'collar',
|
556
|
+
'comb',
|
557
|
+
'cord',
|
558
|
+
'cow',
|
559
|
+
'cup',
|
560
|
+
'curtain',
|
561
|
+
'cushion',
|
562
|
+
'dog',
|
563
|
+
'door',
|
564
|
+
'drain',
|
565
|
+
'drawer',
|
566
|
+
'dress',
|
567
|
+
'drop',
|
568
|
+
'ear',
|
569
|
+
'egg',
|
570
|
+
'engine',
|
571
|
+
'eye',
|
572
|
+
'face',
|
573
|
+
'farm',
|
574
|
+
'feather',
|
575
|
+
'finger',
|
576
|
+
'fish',
|
577
|
+
'flag',
|
578
|
+
'floor',
|
579
|
+
'fly',
|
580
|
+
'foot',
|
581
|
+
'fork',
|
582
|
+
'fowl',
|
583
|
+
'frame',
|
584
|
+
'garden',
|
585
|
+
'girl',
|
586
|
+
'glove',
|
587
|
+
'goat',
|
588
|
+
'gun',
|
589
|
+
'hair',
|
590
|
+
'hammer',
|
591
|
+
'hand',
|
592
|
+
'hat',
|
593
|
+
'head',
|
594
|
+
'heart',
|
595
|
+
'hook',
|
596
|
+
'horn',
|
597
|
+
'horse',
|
598
|
+
'hospital',
|
599
|
+
'house',
|
600
|
+
'island',
|
601
|
+
'jewel',
|
602
|
+
'kettle',
|
603
|
+
'key',
|
604
|
+
'knee',
|
605
|
+
'knife',
|
606
|
+
'knot',
|
607
|
+
'leaf',
|
608
|
+
'leg',
|
609
|
+
'library',
|
610
|
+
'line',
|
611
|
+
'lip',
|
612
|
+
'lock',
|
613
|
+
'map',
|
614
|
+
'match',
|
615
|
+
'monkey',
|
616
|
+
'moon',
|
617
|
+
'mouth',
|
618
|
+
'muscle',
|
619
|
+
'nail',
|
620
|
+
'neck',
|
621
|
+
'needle',
|
622
|
+
'nerve',
|
623
|
+
'net',
|
624
|
+
'nose',
|
625
|
+
'nut',
|
626
|
+
'office',
|
627
|
+
'orange',
|
628
|
+
'oven',
|
629
|
+
'parcel',
|
630
|
+
'pen',
|
631
|
+
'pencil',
|
632
|
+
'picture',
|
633
|
+
'pig',
|
634
|
+
'pin',
|
635
|
+
'pipe',
|
636
|
+
'plane',
|
637
|
+
'plate',
|
638
|
+
'plough/plow',
|
639
|
+
'pocket',
|
640
|
+
'pot',
|
641
|
+
'potato',
|
642
|
+
'prison',
|
643
|
+
'pump',
|
644
|
+
'rail',
|
645
|
+
'rat',
|
646
|
+
'receipt',
|
647
|
+
'ring',
|
648
|
+
'rod',
|
649
|
+
'roof',
|
650
|
+
'root',
|
651
|
+
'sail',
|
652
|
+
'school',
|
653
|
+
'scissors',
|
654
|
+
'screw',
|
655
|
+
'seed',
|
656
|
+
'sheep',
|
657
|
+
'shelf',
|
658
|
+
'ship',
|
659
|
+
'shirt',
|
660
|
+
'shoe',
|
661
|
+
'skin',
|
662
|
+
'skirt',
|
663
|
+
'snake',
|
664
|
+
'sock',
|
665
|
+
'spade',
|
666
|
+
'sponge',
|
667
|
+
'spoon',
|
668
|
+
'spring',
|
669
|
+
'square',
|
670
|
+
'stamp',
|
671
|
+
'star',
|
672
|
+
'station',
|
673
|
+
'stem',
|
674
|
+
'stick',
|
675
|
+
'stocking',
|
676
|
+
'stomach',
|
677
|
+
'store',
|
678
|
+
'street',
|
679
|
+
'sun',
|
680
|
+
'table',
|
681
|
+
'tail',
|
682
|
+
'thread',
|
683
|
+
'throat',
|
684
|
+
'thumb',
|
685
|
+
'ticket',
|
686
|
+
'toe',
|
687
|
+
'tongue',
|
688
|
+
'tooth',
|
689
|
+
'town',
|
690
|
+
'train',
|
691
|
+
'tray',
|
692
|
+
'tree',
|
693
|
+
'trousers',
|
694
|
+
'umbrella',
|
695
|
+
'wall',
|
696
|
+
'watch',
|
697
|
+
'wheel',
|
698
|
+
'whip',
|
699
|
+
'whistle',
|
700
|
+
'window',
|
701
|
+
'wing',
|
702
|
+
'wire',
|
703
|
+
'worm',
|
704
|
+
'able',
|
705
|
+
'acid',
|
706
|
+
'angry',
|
707
|
+
'automatic',
|
708
|
+
'beautiful',
|
709
|
+
'black',
|
710
|
+
'boiling',
|
711
|
+
'bright',
|
712
|
+
'broken',
|
713
|
+
'brown',
|
714
|
+
'cheap',
|
715
|
+
'chemical',
|
716
|
+
'chief',
|
717
|
+
'clean',
|
718
|
+
'clear',
|
719
|
+
'common',
|
720
|
+
'complex',
|
721
|
+
'conscious',
|
722
|
+
'cut',
|
723
|
+
'deep',
|
724
|
+
'dependent',
|
725
|
+
'early',
|
726
|
+
'elastic',
|
727
|
+
'electric',
|
728
|
+
'equal',
|
729
|
+
'fat',
|
730
|
+
'fertile',
|
731
|
+
'first',
|
732
|
+
'fixed',
|
733
|
+
'flat',
|
734
|
+
'free',
|
735
|
+
'frequent',
|
736
|
+
'full',
|
737
|
+
'general',
|
738
|
+
'good',
|
739
|
+
'great',
|
740
|
+
'grey/gray',
|
741
|
+
'hanging',
|
742
|
+
'happy',
|
743
|
+
'hard',
|
744
|
+
'healthy',
|
745
|
+
'high',
|
746
|
+
'hollow',
|
747
|
+
'important',
|
748
|
+
'kind',
|
749
|
+
'like',
|
750
|
+
'living',
|
751
|
+
'long',
|
752
|
+
'male',
|
753
|
+
'married',
|
754
|
+
'material',
|
755
|
+
'medical',
|
756
|
+
'military',
|
757
|
+
'natural',
|
758
|
+
'necessary',
|
759
|
+
'new',
|
760
|
+
'normal',
|
761
|
+
'open',
|
762
|
+
'parallel',
|
763
|
+
'past',
|
764
|
+
'physical',
|
765
|
+
'political',
|
766
|
+
'poor',
|
767
|
+
'possible',
|
768
|
+
'present',
|
769
|
+
'private',
|
770
|
+
'probable',
|
771
|
+
'quick',
|
772
|
+
'quiet',
|
773
|
+
'ready',
|
774
|
+
'red',
|
775
|
+
'regular',
|
776
|
+
'responsible',
|
777
|
+
'right',
|
778
|
+
'round',
|
779
|
+
'same',
|
780
|
+
'second',
|
781
|
+
'separate',
|
782
|
+
'serious',
|
783
|
+
'sharp',
|
784
|
+
'smooth',
|
785
|
+
'sticky',
|
786
|
+
'stiff',
|
787
|
+
'straight',
|
788
|
+
'strong',
|
789
|
+
'sudden',
|
790
|
+
'sweet',
|
791
|
+
'tall',
|
792
|
+
'thick',
|
793
|
+
'tight',
|
794
|
+
'tired',
|
795
|
+
'true',
|
796
|
+
'violent',
|
797
|
+
'waiting',
|
798
|
+
'warm',
|
799
|
+
'wet',
|
800
|
+
'wide',
|
801
|
+
'wise',
|
802
|
+
'yellow',
|
803
|
+
'young',
|
804
|
+
'awake',
|
805
|
+
'bad',
|
806
|
+
'bent',
|
807
|
+
'bitter',
|
808
|
+
'blue',
|
809
|
+
'certain',
|
810
|
+
'cold',
|
811
|
+
'complete',
|
812
|
+
'cruel',
|
813
|
+
'dark',
|
814
|
+
'dead',
|
815
|
+
'dear',
|
816
|
+
'delicate',
|
817
|
+
'different',
|
818
|
+
'dirty',
|
819
|
+
'dry',
|
820
|
+
'false',
|
821
|
+
'feeble',
|
822
|
+
'female',
|
823
|
+
'foolish',
|
824
|
+
'future',
|
825
|
+
'green',
|
826
|
+
'ill',
|
827
|
+
'last',
|
828
|
+
'late',
|
829
|
+
'left',
|
830
|
+
'loose',
|
831
|
+
'loud',
|
832
|
+
'low',
|
833
|
+
'mixed',
|
834
|
+
'narrow',
|
835
|
+
'old',
|
836
|
+
'opposite',
|
837
|
+
'public',
|
838
|
+
'rough',
|
839
|
+
'sad',
|
840
|
+
'safe',
|
841
|
+
'secret',
|
842
|
+
'short',
|
843
|
+
'shut',
|
844
|
+
'simple',
|
845
|
+
'slow',
|
846
|
+
'small',
|
847
|
+
'soft',
|
848
|
+
'solid',
|
849
|
+
'special',
|
850
|
+
'strange',
|
851
|
+
'thin',
|
852
|
+
'white',
|
853
|
+
'wrong'
|
854
|
+
]
|
855
|
+
|
856
|
+
##
|
857
|
+
# Tests whether a word is included in our version of Ogden's Basic English word list,
|
858
|
+
# and therefore suitable as, e.g. a mid-sentence word.
|
859
|
+
#
|
860
|
+
# List taken from http://ogden.basic-english.org/words.html in March 2016.
|
861
|
+
#
|
862
|
+
# +word+ should be a case-sensitive string.
|
863
|
+
|
864
|
+
def self.valid?(word)
|
865
|
+
WORDS.include? word
|
866
|
+
end
|
867
|
+
|
868
|
+
##
|
869
|
+
# Tests whether a word is included, possibly with de-caitalization.
|
870
|
+
#
|
871
|
+
# +word+ should be a case-sensitive string.
|
872
|
+
|
873
|
+
def self.fuzzy_valid?(word)
|
874
|
+
word_decapitalized = word.downcase if word.downcase.capitalize == word
|
875
|
+
|
876
|
+
|
877
|
+
valid? word or ( word_decapitalized and valid? word_decapitalized )
|
878
|
+
end
|
879
|
+
end
|
880
|
+
end
|
metadata
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: commonsense
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ben Eills
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-03-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.11'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.11'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '5.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '5.0'
|
55
|
+
description:
|
56
|
+
email:
|
57
|
+
- ben@beneills.com
|
58
|
+
executables:
|
59
|
+
- commonsense
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- ".gitignore"
|
64
|
+
- ".travis.yml"
|
65
|
+
- Gemfile
|
66
|
+
- README.md
|
67
|
+
- Rakefile
|
68
|
+
- bin/console
|
69
|
+
- bin/setup
|
70
|
+
- commonsense.gemspec
|
71
|
+
- exe/commonsense
|
72
|
+
- lib/commonsense.rb
|
73
|
+
- lib/commonsense/basic_english.rb
|
74
|
+
- lib/commonsense/version.rb
|
75
|
+
homepage: https://github.com/beneills/commonsense-gem
|
76
|
+
licenses: []
|
77
|
+
metadata: {}
|
78
|
+
post_install_message:
|
79
|
+
rdoc_options: []
|
80
|
+
require_paths:
|
81
|
+
- lib
|
82
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: '0'
|
87
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
88
|
+
requirements:
|
89
|
+
- - ">="
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
92
|
+
requirements: []
|
93
|
+
rubyforge_project:
|
94
|
+
rubygems_version: 2.4.6
|
95
|
+
signing_key:
|
96
|
+
specification_version: 4
|
97
|
+
summary: Validate text against the commonsense spec to resist authorship analysis.
|
98
|
+
test_files: []
|