correct-horse-battery-staple 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data.tar.gz.sig +1 -1
  2. data/.gemtest +0 -0
  3. data/Gemfile +53 -0
  4. data/Gemfile.lock +109 -0
  5. data/History.txt +6 -0
  6. data/Manifest.txt +57 -0
  7. data/README.txt +115 -0
  8. data/Rakefile +47 -0
  9. data/bin/chbs +234 -0
  10. data/bin/chbs-mkpass +16 -0
  11. data/correct-horse-battery-staple.gemspec +59 -0
  12. data/lib/correct_horse_battery_staple.rb +117 -0
  13. data/lib/correct_horse_battery_staple/assembler.rb +45 -0
  14. data/lib/correct_horse_battery_staple/backend.rb +6 -0
  15. data/lib/correct_horse_battery_staple/backend/isam_kd.rb +410 -0
  16. data/lib/correct_horse_battery_staple/backend/redis.rb +95 -0
  17. data/lib/correct_horse_battery_staple/backend/redis/d_range.rb +105 -0
  18. data/lib/correct_horse_battery_staple/corpus.rb +33 -0
  19. data/lib/correct_horse_battery_staple/corpus/base.rb +278 -0
  20. data/lib/correct_horse_battery_staple/corpus/isam.rb +258 -0
  21. data/lib/correct_horse_battery_staple/corpus/isam_kd.rb +60 -0
  22. data/lib/correct_horse_battery_staple/corpus/redis.rb +188 -0
  23. data/lib/correct_horse_battery_staple/corpus/redis2.rb +88 -0
  24. data/lib/correct_horse_battery_staple/corpus/serialized.rb +121 -0
  25. data/lib/correct_horse_battery_staple/corpus/sqlite.rb +266 -0
  26. data/lib/correct_horse_battery_staple/generator.rb +40 -0
  27. data/lib/correct_horse_battery_staple/memoize.rb +25 -0
  28. data/lib/correct_horse_battery_staple/parser.rb +5 -0
  29. data/lib/correct_horse_battery_staple/parser/base.rb +5 -0
  30. data/lib/correct_horse_battery_staple/parser/regex.rb +58 -0
  31. data/lib/correct_horse_battery_staple/range_parser.rb +29 -0
  32. data/lib/correct_horse_battery_staple/statistical_array.rb +74 -0
  33. data/lib/correct_horse_battery_staple/stats.rb +22 -0
  34. data/lib/correct_horse_battery_staple/word.rb +90 -0
  35. data/lib/correct_horse_battery_staple/writer.rb +29 -0
  36. data/lib/correct_horse_battery_staple/writer/base.rb +22 -0
  37. data/lib/correct_horse_battery_staple/writer/csv.rb +15 -0
  38. data/lib/correct_horse_battery_staple/writer/file.rb +54 -0
  39. data/lib/correct_horse_battery_staple/writer/isam.rb +50 -0
  40. data/lib/correct_horse_battery_staple/writer/isam_kd.rb +12 -0
  41. data/lib/correct_horse_battery_staple/writer/json.rb +19 -0
  42. data/lib/correct_horse_battery_staple/writer/marshal.rb +10 -0
  43. data/lib/correct_horse_battery_staple/writer/redis.rb +41 -0
  44. data/lib/correct_horse_battery_staple/writer/sqlite.rb +115 -0
  45. data/script/generate_all +34 -0
  46. data/script/load_redis +17 -0
  47. data/script/perftest +74 -0
  48. data/spec/corpus/serialized_spec.rb +62 -0
  49. data/spec/corpus_spec.rb +50 -0
  50. data/spec/correct_horse_battery_staple_spec.rb +73 -0
  51. data/spec/fixtures/100.json +101 -0
  52. data/spec/fixtures/corpus1.csv +101 -0
  53. data/spec/fixtures/corpus100.json +101 -0
  54. data/spec/fixtures/wiktionary1000.htm +648 -0
  55. data/spec/range_parser_spec.rb +54 -0
  56. data/spec/spec_helper.rb +20 -0
  57. data/spec/statistical_array_spec.rb +52 -0
  58. data/spec/support/spec_pry.rb +1 -0
  59. data/spec/word_spec.rb +95 -0
  60. metadata +264 -0
  61. metadata.gz.sig +1 -0
@@ -0,0 +1 @@
1
+ ��>��J�y���{�=�W2:ޣq��x����}(F�^f��j���b���~������e��#?��{��C~S��ա(��v�+�כ�g���qE��u�E�v��~Y� �pR��[
- �<� nF�"|�c�8���bd@�B@�o��h'{�N�� �@U����ł�u�<�@��C4���kڏ-�w)���
File without changes
data/Gemfile ADDED
@@ -0,0 +1,53 @@
1
+ source :rubygems
2
+
3
+ # data formats
4
+ gem "fastercsv", :platforms => [:mri_18, :jruby]
5
+ gem "json"
6
+
7
+ # performance
8
+ # gem "memoizable"
9
+
10
+ # external DBs
11
+ gem "sqlite3", :platforms => [:mri]
12
+
13
+ platform :mri do
14
+ gem "hiredis"
15
+ gem "redis", ">= 2.2.0" # , :require => ["redis", "redis/connection/hiredis"]
16
+ end
17
+
18
+ platform :jruby do
19
+ gem "redis", ">= 2.2.0"
20
+ end
21
+
22
+ gem "tupalo-kdtree"
23
+
24
+ # cmdline
25
+ gem "commander"
26
+ platform :jruby do
27
+ gem "ffi-ncurses"
28
+ end
29
+
30
+ gem "rdoc"
31
+
32
+ group :test do
33
+ gem "rspec"
34
+ end
35
+
36
+ group :development do
37
+ # debugging
38
+ gem "pry"
39
+
40
+ # gem creation
41
+ gem "rubyforge"
42
+ gem "hoe"
43
+ gem "hoe-git"
44
+ gem "hoe-bundler"
45
+ gem "hoe-yard"
46
+ gem "hoe-gemspec"
47
+ gem "hoe-debugging"
48
+
49
+ gem "ruby-prof", :platforms => [:mri]
50
+
51
+ # CI
52
+ gem "tddium"
53
+ end
@@ -0,0 +1,109 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ coderay (0.9.8)
5
+ commander (4.0.6)
6
+ highline (~> 1.5.0)
7
+ diff-lcs (1.1.3)
8
+ fastercsv (1.5.4)
9
+ ffi (1.0.11-java)
10
+ ffi-locale (1.0.1)
11
+ ffi (>= 1.0.9)
12
+ ffi-ncurses (0.4.0)
13
+ ffi (>= 1.0.9)
14
+ ffi-locale (>= 1.0.0)
15
+ highline (1.5.2)
16
+ hiredis (0.4.4)
17
+ hoe (2.12.3)
18
+ rake (~> 0.8)
19
+ hoe-bundler (1.1.0)
20
+ hoe (>= 2.2.0)
21
+ hoe-debugging (1.0.2)
22
+ hoe (>= 2.2.0)
23
+ hoe-gemspec (1.0.0)
24
+ hoe (>= 2.2.0)
25
+ hoe-git (1.4.1)
26
+ hoe (>= 2.2.0)
27
+ hoe-yard (0.1.2)
28
+ yard (>= 0.2.3.1)
29
+ httparty (0.8.1)
30
+ multi_json
31
+ multi_xml
32
+ json (1.6.1)
33
+ json (1.6.1-java)
34
+ json_pure (1.6.3)
35
+ method_source (0.6.6)
36
+ ruby_parser (~> 2.0.5)
37
+ multi_json (1.0.4)
38
+ multi_xml (0.4.1)
39
+ pry (0.9.6.2)
40
+ coderay (~> 0.9.8)
41
+ method_source (~> 0.6.5)
42
+ ruby_parser (~> 2.0.5)
43
+ slop (~> 2.1.0)
44
+ spoon (~> 0.0.1)
45
+ pry (0.9.6.2-java)
46
+ coderay (~> 0.9.8)
47
+ method_source (~> 0.6.5)
48
+ ruby_parser (~> 2.0.5)
49
+ slop (~> 2.1.0)
50
+ spoon (~> 0.0.1)
51
+ rake (0.9.2)
52
+ rdoc (3.12)
53
+ json (~> 1.4)
54
+ redis (2.2.2)
55
+ rspec (2.5.0)
56
+ rspec-core (~> 2.5.0)
57
+ rspec-expectations (~> 2.5.0)
58
+ rspec-mocks (~> 2.5.0)
59
+ rspec-core (2.5.2)
60
+ rspec-expectations (2.5.0)
61
+ diff-lcs (~> 1.1.2)
62
+ rspec-mocks (2.5.0)
63
+ ruby-prof (0.10.8)
64
+ ruby_parser (2.0.6)
65
+ sexp_processor (~> 3.0)
66
+ rubyforge (2.0.4)
67
+ json_pure (>= 1.1.7)
68
+ sexp_processor (3.0.7)
69
+ slop (2.1.0)
70
+ spoon (0.0.1)
71
+ sqlite3 (1.3.5)
72
+ tddium (1.0.6)
73
+ bundler
74
+ highline
75
+ json
76
+ tddium_client (~> 0.1.1)
77
+ thor
78
+ tddium_client (0.1.3)
79
+ httparty (>= 0.8.1)
80
+ json
81
+ thor (0.14.6)
82
+ tupalo-kdtree (0.2.3)
83
+ yard (0.7.4)
84
+
85
+ PLATFORMS
86
+ java
87
+ ruby
88
+
89
+ DEPENDENCIES
90
+ commander
91
+ fastercsv
92
+ ffi-ncurses
93
+ hiredis
94
+ hoe
95
+ hoe-bundler
96
+ hoe-debugging
97
+ hoe-gemspec
98
+ hoe-git
99
+ hoe-yard
100
+ json
101
+ pry
102
+ rdoc
103
+ redis (>= 2.2.0)
104
+ rspec
105
+ ruby-prof
106
+ rubyforge
107
+ sqlite3
108
+ tddium
109
+ tupalo-kdtree
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2011-12-23
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
@@ -0,0 +1,57 @@
1
+ Gemfile
2
+ Gemfile.lock
3
+ History.txt
4
+ Manifest.txt
5
+ README.txt
6
+ Rakefile
7
+ bin/chbs
8
+ bin/chbs-mkpass
9
+ lib/correct_horse_battery_staple.rb
10
+ lib/correct_horse_battery_staple/assembler.rb
11
+ lib/correct_horse_battery_staple/backend.rb
12
+ lib/correct_horse_battery_staple/backend/isam_kd.rb
13
+ lib/correct_horse_battery_staple/backend/redis.rb
14
+ lib/correct_horse_battery_staple/backend/redis/d_range.rb
15
+ lib/correct_horse_battery_staple/corpus.rb
16
+ lib/correct_horse_battery_staple/corpus/base.rb
17
+ lib/correct_horse_battery_staple/corpus/isam.rb
18
+ lib/correct_horse_battery_staple/corpus/isam_kd.rb
19
+ lib/correct_horse_battery_staple/corpus/redis.rb
20
+ lib/correct_horse_battery_staple/corpus/redis2.rb
21
+ lib/correct_horse_battery_staple/corpus/serialized.rb
22
+ lib/correct_horse_battery_staple/corpus/sqlite.rb
23
+ lib/correct_horse_battery_staple/generator.rb
24
+ lib/correct_horse_battery_staple/memoize.rb
25
+ lib/correct_horse_battery_staple/parser.rb
26
+ lib/correct_horse_battery_staple/parser/base.rb
27
+ lib/correct_horse_battery_staple/parser/regex.rb
28
+ lib/correct_horse_battery_staple/range_parser.rb
29
+ lib/correct_horse_battery_staple/statistical_array.rb
30
+ lib/correct_horse_battery_staple/stats.rb
31
+ lib/correct_horse_battery_staple/word.rb
32
+ lib/correct_horse_battery_staple/writer.rb
33
+ lib/correct_horse_battery_staple/writer/base.rb
34
+ lib/correct_horse_battery_staple/writer/csv.rb
35
+ lib/correct_horse_battery_staple/writer/file.rb
36
+ lib/correct_horse_battery_staple/writer/isam.rb
37
+ lib/correct_horse_battery_staple/writer/isam_kd.rb
38
+ lib/correct_horse_battery_staple/writer/json.rb
39
+ lib/correct_horse_battery_staple/writer/marshal.rb
40
+ lib/correct_horse_battery_staple/writer/redis.rb
41
+ lib/correct_horse_battery_staple/writer/sqlite.rb
42
+ script/generate_all
43
+ script/load_redis
44
+ script/perftest
45
+ spec/corpus/serialized_spec.rb
46
+ spec/corpus_spec.rb
47
+ spec/correct_horse_battery_staple_spec.rb
48
+ spec/fixtures/100.json
49
+ spec/fixtures/corpus1.csv
50
+ spec/fixtures/corpus100.json
51
+ spec/fixtures/wiktionary1000.htm
52
+ spec/range_parser_spec.rb
53
+ spec/spec_helper.rb
54
+ spec/statistical_array_spec.rb
55
+ spec/support/spec_pry.rb
56
+ spec/word_spec.rb
57
+ correct-horse-battery-staple.gemspec
@@ -0,0 +1,115 @@
1
+ = chbs
2
+
3
+ * http://github.com/rsanders/correct-horse-battery-staple
4
+
5
+ == DESCRIPTION:
6
+
7
+ Generate a 4 word password from words of size 3-8 characters, with
8
+ frequencies in the 30th-60th percentile. This range gives a nice set
9
+ of uncommon but not completely alien words.
10
+
11
+ $ chbs generate --verbose -W 3..8 -P 30..60
12
+ Corpus size: 6396 candidate words of 33075 total
13
+ Entropy: 48 bits (2^48 = 281474976710656)
14
+ Years to guess at 1000 guesses/sec: 8926
15
+ magnate-thermal-sandbank-augur
16
+
17
+ With the --verbose flag, the utility will calculate a time-to-guess
18
+ based on a completely arbitrary 1000 guesses/sec. If you'd like a
19
+ more secure password, either relax the various filtering rules (-W and
20
+ -P), add more words to the password, or use a larger corpus.
21
+
22
+ By default we use the American TV Shows & Scripts corpus taken from
23
+ Wiktionary.
24
+
25
+ Others provided:
26
+
27
+ * Project Gutenberg 2005 corpus taken from Wiktionary.
28
+ * 1 of every 7 of the top 60000 lemmas from wordfrequency.info (6900
29
+ actual lemmas after processing)
30
+
31
+ See http://xkcd.com/936/ for the genesis of the idea.
32
+
33
+ Data sources:
34
+
35
+ http://en.wiktionary.org/wiki/Wiktionary:Frequency_lists
36
+ http://wordfrequency.info/
37
+
38
+ == FEATURES/PROBLEMS:
39
+
40
+ * Generates pretty decent XKCD-style passwords using pretty simple logic
41
+ * Meant to be a proof-of-concept, and succeeds at that.
42
+
43
+ Not so good:
44
+
45
+ * Corpus loading is slow and memory-hungry
46
+ * Needs a good corpus abstraction beyond serialized arrays/objects
47
+ loaded from CSV/JSON/Marshal
48
+ * Should probably store default filter params per-corpus (e.g., 30-70
49
+ percentile works great for one corpus, badly for another)
50
+ * Probably needs a ~/.correct-horse-battery-staple file to set defaults
51
+
52
+
53
+ == SYNOPSIS:
54
+
55
+ Command line usage, for a password of 4 words (default), each word of
56
+ length between 3-8 letters, taken from the 30th through the 60th
57
+ percentile range of the corpus sorted by word frequency from least to
58
+ most frequent:
59
+
60
+ $ chbs generate --verbose -W 3..8 -P 30..60
61
+
62
+ Corpus size: 6396 candidate words of 33075 total
63
+ Entropy: 48 bits (2^48 = 281474976710656)
64
+ Years to guess at 1000 guesses/sec: 8926
65
+ magnate-thermal-sandbank-augur
66
+
67
+ The 'chbs' command line program is the best reference for usage of the
68
+ underlying library. This would be a minimal version:
69
+
70
+ require 'correct_horse_battery_staple'
71
+ corpus = CorrectHorseBatteryStaple.default_corpus
72
+ generator = CorrectHorseBatteryStaple::Generator.new(corpus)
73
+ puts generator.make(4)
74
+
75
+ == REQUIREMENTS:
76
+
77
+ * Ruby 1.8.7, 1.9.x, or (approximately) JRuby 1.5.x or later.
78
+
79
+ == INSTALL:
80
+
81
+ Just "gem install" and use the "chbs" wrapper program.
82
+
83
+ == DEVELOPERS:
84
+
85
+ After checking out the source, run:
86
+
87
+ $ rake newb
88
+
89
+ This task will install any missing dependencies, run the tests/specs,
90
+ and generate the RDoc.
91
+
92
+ == LICENSE:
93
+
94
+ (The MIT License)
95
+
96
+ Copyright (c) 2011 Robert Sanders, opensource@esquimaux.otherinbox.com
97
+
98
+ Permission is hereby granted, free of charge, to any person obtaining
99
+ a copy of this software and associated documentation files (the
100
+ 'Software'), to deal in the Software without restriction, including
101
+ without limitation the rights to use, copy, modify, merge, publish,
102
+ distribute, sublicense, and/or sell copies of the Software, and to
103
+ permit persons to whom the Software is furnished to do so, subject to
104
+ the following conditions:
105
+
106
+ The above copyright notice and this permission notice shall be
107
+ included in all copies or substantial portions of the Software.
108
+
109
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
110
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
111
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
112
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
113
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
114
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
115
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,47 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ # Hoe.plugin :compiler
7
+ # Hoe.plugin :gem_prelude_sucks
8
+ # Hoe.plugin :inline
9
+ # Hoe.plugin :racc
10
+
11
+ Hoe.plugin :bundler
12
+ Hoe.plugin :git
13
+ Hoe.plugin :rubyforge
14
+ Hoe.plugin :gemspec
15
+
16
+ Hoe.spec 'correct-horse-battery-staple' do
17
+ developer('Robert Sanders', 'robert@curioussquid.com')
18
+ dependency 'commander', '>= 4.0'
19
+ dependency 'fastercsv', '>= 1.5.3'
20
+ dependency 'json', '>= 1.6.0'
21
+ dependency 'redis', '>= 2.2.2'
22
+ dependency 'hiredis', '>= 0.4.0'
23
+ dependency 'tupalo-kdtree', '>= 0.2.3'
24
+ dependency 'sqlite3', '>= 1.3.0'
25
+ end
26
+
27
+ namespace :chbs do
28
+ task :generate_corpus => "corpus/tvscripts.json"
29
+
30
+ file "corpus/tvscripts.json" do |task|
31
+ sh "./script/generate_all"
32
+ end
33
+ task :corpus => "corpus/tvscripts.json"
34
+
35
+ task :clean do
36
+ sh "rm -f corpus/*"
37
+ end
38
+ end
39
+
40
+ task :corpus => "chbs:corpus"
41
+
42
+ ["spec"].each do |task|
43
+ Rake::Task[task].prerequisites.unshift "chbs:corpus"
44
+ end
45
+ task :clean => "chbs:clean"
46
+
47
+ # -*- mode: Ruby -*-
@@ -0,0 +1,234 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'correct_horse_battery_staple'
4
+ require 'correct_horse_battery_staple/assembler'
5
+ require 'commander/import'
6
+ require 'logger'
7
+
8
+ program :version, "0.1.0"
9
+ program :description, "Correct Horse Battery Staple - XKCD-style Passphrases"
10
+
11
+ global_option '-D', '--debug', 'Display full protocol trace'
12
+ global_option '-V', '--verbose', 'Shows request and response'
13
+
14
+ logger = Logger.new(STDERR)
15
+
16
+ default_command :generate
17
+
18
+ command :repl do |c|
19
+ c.syntax = 'repl'
20
+ c.summary = 'Open Pry-based REPL, optionally with a loaded corpus'
21
+ c.option '-c CORPUSFILE', '--corpus CORPUSFILE', "File to use as the source corpus"
22
+ c.action do |args, options|
23
+ require 'pry'
24
+
25
+ if options.corpus
26
+ format = CorrectHorseBatteryStaple::Corpus.format_for(options.corpus)
27
+ corpus = CorrectHorseBatteryStaple.load_corpus(options.corpus, format)
28
+ end
29
+
30
+ binding.pry
31
+ end
32
+ end
33
+
34
+ command :convert do |c|
35
+ c.syntax = 'convert <infile> [<outfile>]'
36
+ c.summary = 'Convert a corpus from one format to another'
37
+ c.option '-i IFORMAT', '--iformat IFORMAT', 'Set the output corpus format'
38
+ c.option '-o OFORMAT', '--oformat OFORMAT', 'Set the output corpus format'
39
+ c.option '-r', '--recalculate', 'Recalculate all statistics before outputting'
40
+ c.option '-S FIELD', '--sortby FIELD', 'Sort words by field'
41
+ c.option '-R', '--randomize', 'Randomize word order'
42
+ c.option '-l COUNT', '--limit COUNT', Integer, 'Set the maximum words processed'
43
+ c.option '-P', '--precache', 'Precache the source word list in memory'
44
+
45
+ c.action do |args, options|
46
+ options.default :limit => -1
47
+
48
+ infile = args[0]
49
+ outfile = args[1] && args[1] != "-" ?
50
+ (!args[1].include?(':') ? open(args[1], "w") : args[1]) :
51
+ STDOUT
52
+
53
+ options.iformat ||= CorrectHorseBatteryStaple::Corpus.format_for(infile)
54
+ unless options.oformat
55
+ if args[1]
56
+ options.oformat = CorrectHorseBatteryStaple::Corpus.format_for(args[1])
57
+ else
58
+ raise ArgumentError, "You must supply an output format via the file extension or -o option"
59
+ end
60
+ end
61
+
62
+ corpus = CorrectHorseBatteryStaple.load_corpus(args[0], options.iformat)
63
+ if options.precache
64
+ corpus.precache(16_000_000)
65
+ end
66
+
67
+ # CSV doesn't currently persist stats
68
+ corpus.recalculate if options.recalculate || options.iformat.to_s == "csv"
69
+
70
+ corpus.table.shuffle! if options.randomize
71
+ if options.limit > -1
72
+ corpus.table.slice!(options.limit..-1) if corpus.size > options.limit
73
+ end
74
+
75
+ with_tempfile(outfile) do |tempfile|
76
+ writer = CorrectHorseBatteryStaple::Writer.make_writer(tempfile, options.oformat)
77
+ writer.write_corpus(corpus)
78
+ writer.close
79
+ end
80
+
81
+ end
82
+ end
83
+
84
+ command :stats do |c|
85
+ c.syntax = 'stats <corpusfile>'
86
+ c.summary = 'Show statistics from a corpus'
87
+
88
+ c.action do |args,options|
89
+ corpus = CorrectHorseBatteryStaple::Corpus.read(args[0])
90
+
91
+ stats = corpus.stats
92
+ stats.to_hash.each do |key, val|
93
+ puts "#{key}: #{val}"
94
+ end
95
+ end
96
+ end
97
+
98
+ command :inspect do |c|
99
+ c.syntax = 'inspect <corpusfile>'
100
+ c.summary = 'Show implementation information for a corpus'
101
+
102
+ c.action do |args,options|
103
+ corpus = CorrectHorseBatteryStaple::Corpus.read(args[0])
104
+ puts corpus.inspect
105
+ end
106
+ end
107
+
108
+ command :generate do |c|
109
+ c.syntax = 'generate [<number of words>]'
110
+ c.summary = 'Generate a passphrase with a given number of words'
111
+ #c.description = ''
112
+ #c.example 'description', 'command example'
113
+ c.option '-f FORMAT', '--format FORMAT', 'Set the corpus format'
114
+ c.option '-c CORPUSFILE', '--corpus CORPUSFILE', "File to use as the source corpus"
115
+ c.option '-n COUNT', '--repeat COUNT', Integer, 'How many passwords to generate'
116
+ c.option '-P', '--precache', 'Precache the word list in memory'
117
+
118
+ # filters
119
+ c.option '-W MIN..MAX', '--wordsize MIN..MAX',
120
+ 'Set the allowed word size in number of characters'
121
+ c.option '-P MIN..MAX', '--percentile MIN..MAX',
122
+ 'Set the percentile range of the word frequency'
123
+
124
+ c.action do |args, options|
125
+ options.default :wordsize => "3..7", :repeat => 1,
126
+ :corpus => CorrectHorseBatteryStaple::DEFAULT_CORPUS_NAME,
127
+ :percentile => "30..80"
128
+
129
+ number_of_words = (args[0] || 4).to_i
130
+ count = options.repeat.to_i
131
+
132
+ corpus = CorrectHorseBatteryStaple.load_corpus(options.corpus, options.format)
133
+ if options.precache
134
+ corpus.precache(16_000_000)
135
+ end
136
+ puts "loaded corpus!" if options.debug
137
+ unfiltered_size = corpus.count
138
+
139
+ make_options = {}
140
+
141
+ f_wordsize = CorrectHorseBatteryStaple::RangeParser.new.parse options.wordsize
142
+ unless f_wordsize.begin <= 1 && f_wordsize.end >= 30
143
+ make_options[:word_length] = f_wordsize
144
+ end
145
+
146
+ f_percentile = CorrectHorseBatteryStaple::RangeParser.new.parse options.percentile
147
+ unless f_percentile.include? 0..100
148
+ make_options[:percentile] = f_percentile
149
+ end
150
+
151
+ if options.verbose
152
+ entropy = number_of_words * corpus.entropy_per_word.floor
153
+ guesses_per_sec = 1000.0
154
+ search_space = 2**entropy
155
+ years = search_space / guesses_per_sec / (365*24*3600)
156
+ puts "Corpus size: #{corpus.length} candidate words of #{unfiltered_size} total"
157
+ puts "Entropy: #{entropy} bits (2^#{entropy} = #{search_space})"
158
+ puts "Years to guess at #{guesses_per_sec.to_i} guesses/sec: #{years.round}"
159
+ end
160
+
161
+ generator = CorrectHorseBatteryStaple::Generator.new(corpus)
162
+
163
+ count.times do
164
+ puts generator.make(number_of_words, make_options)
165
+ end
166
+ end
167
+ end
168
+
169
+ command :list do |c|
170
+ c.syntax = 'list'
171
+ c.summary = 'Show a list of available corpus names'
172
+ c.option '-p', '--paths', 'Show paths'
173
+
174
+ c.action do |args, options|
175
+ list = CorrectHorseBatteryStaple.corpus_list(:with_paths => options.paths)
176
+
177
+ list.each_with_index do |item, i|
178
+ list[i] = "[#{item}]" if item.include? CorrectHorseBatteryStaple::DEFAULT_CORPUS_NAME
179
+ end
180
+ puts list.sort.join(options.paths ? "\n" : ', ')
181
+ end
182
+ end
183
+
184
+ command :mkcorpus do |c|
185
+ c.syntax = 'mkcorpus file [file, file, ...]'
186
+ c.summary = 'Generate a JSON or CSV corpus from an input file'
187
+ #c.description = ''
188
+ #c.example 'description', 'command example'
189
+ c.option '-o FILENAME', '--output FILENAME', 'Set the output filename or spec'
190
+ c.option '-I FORMAT', '--iformat FORMAT', 'Set the input format - "wiktionary" or "wordfrequency"'
191
+ c.option '-O FORMAT', '--oformat FORMAT', 'Set the output format - "marshal", "json", or "csv"'
192
+ c.option '-R', '--randomize', 'Randomize word order'
193
+ c.option '-l COUNT', '--limit COUNT', Integer, 'Set the maximum words processed'
194
+ c.action do |args, options|
195
+ options.default :iformat => "wiktionary", :limit => -1
196
+
197
+ assembler = CorrectHorseBatteryStaple::Assembler.new(
198
+ CorrectHorseBatteryStaple::Parser::Regex.new(options.iformat.to_sym))
199
+ assembler.read(args)
200
+ assembler.randomize if options.randomize
201
+ if options.limit > -1
202
+ assembler.limit(options.limit)
203
+ end
204
+ corpus = assembler.corpus
205
+
206
+ destination = options.output || "-"
207
+
208
+ options.oformat ||= CorrectHorseBatteryStaple::Corpus.format_for(destination)
209
+
210
+ with_tempfile(destination) do |tempname|
211
+ CorrectHorseBatteryStaple::Writer.write(corpus, tempname, options.oformat)
212
+ end
213
+ end
214
+ end
215
+
216
+ def with_tempfile(realname)
217
+ pathpart = realname.respond_to?(:path) ? realname.path : realname
218
+ tempname = nil
219
+ if (realname.is_a?(String) ||
220
+ (realname.is_a?(File) && File.exist?(realname.path))) &&
221
+ ! pathpart != '-' &&
222
+ ! pathpart.include?(':')
223
+ begin
224
+ tempname = File.join(File.dirname(pathpart), ".temp-#{File.basename(pathpart)}")
225
+ yield tempname
226
+ File.rename tempname, pathpart
227
+ rescue
228
+ File.delete(tempname) if tempname && File.exist?(tempname)
229
+ raise
230
+ end
231
+ else
232
+ yield realname
233
+ end
234
+ end