crypto-toolbox 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 24485a636d5cc38818c7a94973eb2315c6f99a6c
4
- data.tar.gz: 7d6a52bb6e920fbf76cafc6e69c2945c62a67433
3
+ metadata.gz: 2eab51c42b84900135dc165a677a18be4d470a1b
4
+ data.tar.gz: a2ec0819114133ae5f613ef2ce4ed153c10a3dff
5
5
  SHA512:
6
- metadata.gz: 251989795e3c562c979523a2c2dac33f51f74a7cba13817bf1a8ed5e695838b3220f02b1a237405276c01a248852cf0bdadc137ebfc940ded46c3d20a9269dd9
7
- data.tar.gz: 6e9a96b977eb116e2d2e0f4026074ac08565d250e0ceb9b36516a8ee00257e3cdf84fac99fabfca872f6c9b9c05c8105bba874220ba834fdc4301ea1b531fbcd
6
+ metadata.gz: 340f89d19a2bb483beffa8b93c16c4c260c6ddb289ce2e187fb53b731413078eb7aa79b8c93b633ad1d5e8e29bde23eded39b7605d873530c0b30c3aff50aaa8
7
+ data.tar.gz: 0be69d404f8007cd889051c403e92ab4cf09b0600bb8aa04432115c3042fadcb1505429f873cfbd7625333206826cd12a7ac5bcc036eeedb7c3e2626e1fe5c60
@@ -9,13 +9,3 @@ else
9
9
 
10
10
  Analyzers::VigenereXor.new.analyze(ciphertext)
11
11
  end
12
-
13
-
14
-
15
-
16
-
17
-
18
-
19
-
20
-
21
-
@@ -1,10 +1,12 @@
1
+ require 'crypto-toolbox/crypt_buffer_input_converter.rb'
1
2
  require 'crypto-toolbox/crypt_buffer.rb'
2
3
 
3
- require 'crypto-toolbox/key_filter.rb'
4
- require 'crypto-toolbox/spell_checker.rb'
5
4
 
5
+ require 'crypto-toolbox/analyzers/utils/key_filter.rb'
6
+ require 'crypto-toolbox/analyzers/utils/spell_checker.rb'
6
7
  require 'crypto-toolbox/analyzers/padding_oracle.rb'
7
8
  require 'crypto-toolbox/analyzers/vigenere_xor.rb'
8
9
 
10
+
9
11
  require 'crypto-toolbox/ciphers/caesar.rb'
10
12
  require 'crypto-toolbox/ciphers/rot13.rb'
@@ -0,0 +1,44 @@
1
+ require 'crypto-toolbox/analyzers/utils/spell_checker.rb'
2
+
3
+ module Analyzers
4
+ module Utils
5
+ module KeyFilter
6
+ class AsciiPlain
7
+
8
+
9
+ def initialize(keys,ciphertext,dict_lang="en_GB")
10
+ @keys = keys
11
+ @c = @ciphertext = ciphertext
12
+ @keylen = keys.first.length
13
+ @dict = FFI::Hunspell.dict(dict_lang)
14
+ end
15
+
16
+ def filter
17
+ # how often is the key repeated
18
+ reps = @c.bytes.length / @keylen
19
+ result =[]
20
+ spell_checker = Analyzers::Utils::SpellChecker.new("en_GB")
21
+
22
+ # should we fork here ?
23
+ @keys.each_with_index do |key,i| # i is used as a simple counter only !
24
+ test = CryptBuffer.new(@c.bytes[0,@keylen]).xor(key).str
25
+ repkey = CryptBuffer.new((key*reps) + key[0,(@c.bytes.length % reps).to_i])
26
+ str = @c.xor(repkey).to_s
27
+
28
+ if spell_checker.human_language?(str)
29
+ result << repkey
30
+ break
31
+ else
32
+ if (i % 50000).zero?
33
+ puts "[Progress] #{i}/#{@keys.length} (#{(i.to_f/@keys.length*100).round(4)}%)"
34
+ end
35
+ end
36
+ end
37
+ return result
38
+ end
39
+
40
+
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,57 @@
1
+ require 'ffi/hunspell'
2
+
3
+
4
+ module Analyzers
5
+ module Utils
6
+ class SpellChecker
7
+
8
+
9
+ def initialize(dict_lang="en_GB")
10
+ @dict = FFI::Hunspell.dict(dict_lang)
11
+ end
12
+ =begin
13
+ NOTE: About spelling error rates and language detection:
14
+
15
+ missing punctuation support may lead to > 2% errors on valid texts, thus we use a high value .
16
+ invalid decryptions tend to have spell error rates > 70
17
+ Some statistics about it:
18
+ > summary(invalids)
19
+ Min. 1st Qu. Median Mean 3rd Qu. Max.
20
+ 0.6000 1.0000 1.0000 0.9878 1.0000 1.0000
21
+ > summary(cut(invalids,10))
22
+ (0.6,0.64] (0.64,0.68] (0.68,0.72] (0.72,0.76] (0.76,0.8] (0.8,0.84]
23
+ 8 13 9 534 1319 2809
24
+ (0.84,0.88] (0.88,0.92] (0.92,0.96] (0.96,1]
25
+ 10581 46598 198477 1440651
26
+ =end
27
+ def known_words(str)
28
+ words = str.split(" ").select{|w| @dict.check?(w) }
29
+ end
30
+
31
+ def suggest(str)
32
+ @dict.suggest(str)
33
+ end
34
+
35
+ # Check whether a given string seems to be part of a human language using the given dictionary
36
+ #
37
+ # NOTE:
38
+ # Using shell instead of hunspell ffi causes lots of escaping errors, even with shellwords.escape
39
+ # errors = Float(`echo '#{Shellwords.escape(str)}' |hunspell -l |wc -l `.split.first)
40
+ def human_language?(str)
41
+ words = str.split(" ").length
42
+ errors = str.split(" ").map{|e| @dict.check?(e) }.count{|e| e == false}
43
+
44
+ error_rate = errors.to_f/words
45
+
46
+ $stderr.puts error_rate.round(4) if ENV["CRYPTO_TOOBOX_PRINT_ERROR_RATES"]
47
+
48
+ error_rate_sufficient?(error_rate)
49
+ end
50
+
51
+ private
52
+ def error_rate_sufficient?(rate)
53
+ rate < 0.5
54
+ end
55
+ end
56
+ end
57
+ end
@@ -8,6 +8,25 @@
8
8
  =end
9
9
  module Analyzers
10
10
  class VigenereXor
11
+ # This crypto analyzers takes a hex encoded ciphertext as input string
12
+ # and tries to find the plaintext by doing the following crypto analysis:
13
+ #
14
+ # 1) Search for a recurring pattern of the 8th bit of the ciphertext
15
+ # since ascii plaintext chars to have this bit set, the pattern will
16
+ # imply the key length
17
+ #
18
+ # 2) Create a map of all possible bytes for every position of the key
19
+ # The amount of candidates can be reduced by only allowing bytes that
20
+ # lead to a ascii english char
21
+ #
22
+ # 3) create the product of all possible combinations
23
+ # This only works for short key lengths due to the exponential growth
24
+ #
25
+ # 4) Do an English language Analysis of the possible result by using
26
+ # the error rate of the candidate plaintext using hunspell
27
+ #
28
+
29
+
11
30
  def jot(message, debug: false)
12
31
  if debug == false || ENV["DEBUG_ANALYSIS"]
13
32
  puts message
@@ -25,7 +44,7 @@ module Analyzers
25
44
  def find_pattern(buf)
26
45
  bitstring = buf.nth_bits(7).join("")
27
46
 
28
- 1.upto([buf.bytes.length,62].min).map do |ksize|
47
+ 1.upto(buf.bytes.length).map do |ksize|
29
48
  parts = bitstring.scan(/.{#{ksize}}/)
30
49
  if parts.uniq.length == 1
31
50
  parts.first
@@ -34,68 +53,77 @@ module Analyzers
34
53
  end
35
54
  end.compact.first
36
55
  end
37
-
38
- def analyze(input)
39
- buf = CryptBuffer.from_hex(input)
40
- result = find_pattern(buf)
41
-
42
- if result.nil?
43
- $stderr.puts "failed to find keylength by ASCII-8-Bit anlysis"
44
- exit(1)
45
- end
46
-
47
- keylen = result.length
48
- jot "Found recurring key pattern: #{result}"
49
- jot "Detected key length: #{keylen}"
50
56
 
57
+ def create_candidate_map(buf,keylen)
51
58
  candidate_map ={}
52
- (0..(keylen-1)).each do |key_byte|
59
+ (0..(keylen-1)).each do |key_byte_pos|
53
60
 
54
- nth_stream = (key_byte).step(buf.bytes.length() -1, keylen).map{|i| buf.bytes[i]}
61
+ nth_stream = (key_byte_pos).step(buf.bytes.length() -1, keylen).map{|i| buf.bytes[i]}
55
62
  smart_buf = CryptBuffer.new(nth_stream)
56
63
 
57
- candidate_map[key_byte]=[]
58
- 1.upto(255).each do |possible_key_value|
59
- if smart_buf.xor_all_with(possible_key_value).bytes.all?{|byte| acceptable_char?(byte) }
60
- jot("YES: " + smart_buf.xor_all_with(possible_key_value).to_s,debug: true)
61
- candidate_map[key_byte] << possible_key_value
64
+ candidate_map[key_byte_pos]=[]
65
+ 1.upto(255).each do |guess|
66
+ if smart_buf.xor_all_with(guess).bytes.all?{|byte| acceptable_char?(byte) }
67
+ jot("YES: " + smart_buf.xor_all_with(guess).to_s,debug: true)
68
+ candidate_map[key_byte_pos] << guess
62
69
  else
63
70
  # the current byte does not create a plain ascii result ( thus skip it )
64
- #jot "NO: " + smart_buf.xor_all_with(possible_key_value).to_s
71
+ #jot "NO: " + smart_buf.xor_all_with(guess).to_s
65
72
  end
66
73
  end
67
74
  end
75
+
76
+ candidate_map
77
+ end
78
+
79
+ def analyze(input)
80
+ buf = CryptBuffer.from_hex(input)
68
81
 
69
- head,*tail = candidate_map.map{|k,v|v}
70
-
71
- jot "Amount of candidate keys: #{candidate_map.map{|k,v| v.length}.reduce(&:*)}. Starting Permutation (RAM intensive)"
82
+ # Example: "100100" || nil
83
+ key_pattern = find_pattern(buf)
84
+ if key_pattern.nil?
85
+ $stderr.puts "failed to find keylength by ASCII-8-Bit anlysis"
86
+ exit(1)
87
+ end
88
+ keylen = key_pattern.length
89
+ jot "Found recurring key pattern: #{key_pattern}"
90
+ jot "Detected key length: #{keylen}"
72
91
 
92
+
93
+ candidate_map = create_candidate_map(buf,keylen)
94
+ jot "Amount of candidate keys: #{candidate_map.map{|k,v| v.length}.reduce(&:*)}. Starting Permutation (RAM intensive)"
95
+
96
+ # split the candidate map into head and*tail to create the prduct of all combinations
97
+ head,*tail = candidate_map.map{|k,v|v}
73
98
  combinations = head.product(*tail)
74
- # make sure all permutations are still according to the bytes per position map
75
- #x = combinations.select do |arr|
76
- # #binding.pry
77
- # arr.map.with_index{|e,i| candidate_map[i].include?(e) }.all?{|e| e ==true}
78
- #end
79
- if ENV["SEMI_AUTO_ANALYSIS"] && ENV["DEBUG_ANALYSIS"]
80
- print_candidate_encryptions(candidate_map,keylen,buf)
99
+
100
+ if ENV["DEBUG_ANALYSIS"]
101
+ ensure_consistent_result!(combinations,candidate_map)
102
+ print_candidate_decryptions(candidate_map,keylen,buf)
81
103
  end
82
104
 
83
- results = KeySearch::Filter::AsciiPlain.new(combinations,buf).filter
105
+ results = Analyzers::Utils::KeyFilter::AsciiPlain.new(combinations,buf).filter
84
106
  report_result(results,buf)
85
107
  end
86
108
 
109
+ def ensure_consistent_result!(combinations,condidate_map)
110
+ # NOTE Consistency check ( enable if you dont trust the generation anymore )
111
+ # make sure all permutations are still according to the bytes per position map
112
+ combinations.select do |arr|
113
+ raise "Inconsistent key candidate combinations" unless arr.map.with_index{|e,i| candidate_map[i].include?(e) }.all?{|e| e ==true}
114
+ end
115
+ end
116
+
87
117
  def report_result(results,buf)
88
118
  unless results.empty?
89
- jot "[Success] Found valid result(s)"
119
+ jot "[Success] Found valid result(s):"
90
120
  results.each do |r|
91
- print_delimiter_line
92
121
  jot r.xor(buf).str
93
- print_delimiter_line
94
122
  end
95
123
  end
96
124
  end
97
125
 
98
- def print_candidate_encryptions(candidate_map,keylen,buf)
126
+ def print_candidate_decryptions(candidate_map,keylen,buf)
99
127
  # printout for debugging. (Manual analysis of the characters)
100
128
  print "======= Decryption result of first #{keylen} bytes with all candidate keys =======\n"
101
129
  (0..keylen-1).each do|i|
@@ -110,12 +138,6 @@ module Analyzers
110
138
  end
111
139
  end
112
140
 
113
- =begin
114
- NOTE: we may at digram and trigram support?
115
- #trigram="the "
116
- #x = CryptBuffer.new(trigram)
117
- =end
118
-
119
141
 
120
142
 
121
143
 
@@ -32,7 +32,7 @@ Letter Array include?(A): 76997.0 i/s - 42.73x slower
32
32
  mod = (char =~ /[a-z]/) ? 123 : 91
33
33
  offset = (char =~ /[a-z]/) ? 97 : 65
34
34
 
35
- (char =~ /[^a-zA-Z]/) ? char : CryptBuffer.new(char).add(real_shift, mod: mod, offset: offset).str
35
+ (char =~ /[^a-zA-Z]/) ? char : CryptBuffer(char).add(real_shift, mod: mod, offset: offset).str
36
36
  end.join
37
37
  end
38
38
 
@@ -32,22 +32,16 @@ class CryptBuffer
32
32
  attr_accessor :bytes
33
33
  alias_method :b, :bytes
34
34
 
35
-
36
-
37
35
 
38
- def initialize(input)
39
- @bytes = bytes_from_any(input)
36
+ def initialize(byte_array)
37
+ @bytes = byte_array
40
38
  end
41
39
 
42
40
  # Make sure input strings are always interpreted as hex strings
43
41
  # This is especially useful for unknown or uncertain inputs like
44
42
  # strings with or without leading 0x
45
43
  def self.from_hex(input)
46
- hexstr =""
47
- unless input.nil?
48
- hexstr = (input =~ /^0x/ ? input : "0x#{pad_hex_char(input)}" )
49
- end
50
- CryptBuffer.new(hexstr)
44
+ CryptBufferInputConverter.new.from_hex(input)
51
45
  end
52
46
 
53
47
  # Returns an array of the nth least sigificant by bit of each byte
@@ -61,68 +55,11 @@ class CryptBuffer
61
55
  def chunks_of(n)
62
56
  self.bytes.each_slice(n).map{|chunk| CryptBuffer(chunk) }
63
57
  end
64
-
65
-
66
-
67
58
 
68
59
  private
69
60
  def xor_multiple(byte,bytes)
70
61
  ([byte] + bytes).reduce(:^)
71
62
  end
72
-
73
- def bytes_from_any(input)
74
- case input
75
- when Array
76
- input
77
- when String
78
- str2bytes(input)
79
- when CryptBuffer
80
- input.b
81
- when Fixnum
82
- int2bytes(input)
83
- else
84
- raise "Unsupported input: #{input.inspect} of class #{input.class}"
85
- end
86
- end
87
-
88
- def normalize_hex(str)
89
- tmp = self.class.pad_hex_char(str)
90
- tmp.gsub(/(^0x|\s)/,"").upcase
91
- end
92
-
93
- def self.pad_hex_char(str)
94
- (str.length == 1) ? "0#{str}" : "#{str}"
95
- end
96
-
97
- def strip_hex_prefix(hex)
98
- raise "remove 0x from hexinput"
99
- end
100
-
101
- def int2bytes(input)
102
- # integers as strings dont have a 0x prefix
103
- if input.to_s(16).match(/^[0-9a-fA-F]+$/)
104
- # assume 0x prefixed integer
105
- hex2bytes(normalize_hex(input.to_s(16)))
106
- else
107
- # regular number
108
- [input].pack('C*').bytes
109
- end
110
- end
111
-
112
- def hex2bytes(hexstr)
113
- hexstr.scan(/../).map{|h| h.to_i(16) }
114
- end
115
-
116
- def str2bytes(str)
117
- if str.match(/^0x[0-9a-fA-F]+$/).nil?
118
- str.bytes.to_a
119
- else
120
- hex2bytes(normalize_hex(str))
121
- end
122
- end
123
63
  end
124
64
 
125
65
 
126
- def CryptBuffer(input)
127
- CryptBuffer.new(input)
128
- end
@@ -1,7 +1,7 @@
1
1
  module CryptBufferConcern
2
2
  module Comparable
3
3
  def ==(other)
4
- bytes == bytes_from_any(other)
4
+ bytes == CryptBuffer(other).bytes
5
5
  end
6
6
  end
7
7
  end
@@ -20,12 +20,12 @@ module CryptBufferConcern
20
20
  if expand_input
21
21
  xor_all_with(input)
22
22
  else
23
- xor_bytes(bytes_from_any(input))
23
+ xor_bytes(CryptBuffer(input).bytes)
24
24
  end
25
25
  end
26
26
 
27
27
  def xor_all_with(input)
28
- expanded = expand_bytes(bytes_from_any(input),self.bytes.length)
28
+ expanded = expand_bytes(CryptBuffer(input).bytes,self.bytes.length)
29
29
  xor_bytes(expanded)
30
30
  end
31
31
 
@@ -0,0 +1,72 @@
1
+
2
+
3
+ class CryptBufferInputConverter
4
+ def convert(input)
5
+ bytes_from_any(input)
6
+ end
7
+
8
+ # Make sure input strings are always interpreted as hex strings
9
+ # This is especially useful for unknown or uncertain inputs like
10
+ # strings with or without leading 0x
11
+ def from_hex(input)
12
+ hexstr =""
13
+ unless input.nil?
14
+ hexstr = normalize_hex(input)
15
+ end
16
+ CryptBuffer.new(hex2bytes(hexstr))
17
+ end
18
+
19
+ private
20
+ def bytes_from_any(input)
21
+ case input
22
+ when Array
23
+ input
24
+ when String
25
+ str2bytes(input)
26
+ when CryptBuffer
27
+ input.b
28
+ when Fixnum
29
+ int2bytes(input)
30
+ else
31
+ raise "Unsupported input: #{input.inspect} of class #{input.class}"
32
+ end
33
+ end
34
+
35
+ def int2bytes(input)
36
+ # integers as strings dont have a 0x prefix
37
+ if input.to_s(16).match(/^[0-9a-fA-F]+$/)
38
+ # assume 0x prefixed integer
39
+ hex2bytes(normalize_hex(input.to_s(16)))
40
+ else
41
+ # regular number
42
+ [input].pack('C*').bytes
43
+ end
44
+ end
45
+
46
+ def hex2bytes(hexstr)
47
+ hexstr.scan(/../).map{|h| h.to_i(16) }
48
+ end
49
+
50
+ def str2bytes(str)
51
+ if str.match(/^0x[0-9a-fA-F]+$/).nil?
52
+ str.bytes.to_a
53
+ else
54
+ hex2bytes(normalize_hex(str))
55
+ end
56
+ end
57
+
58
+ def pad_hex_char(str)
59
+ (str.length == 1) ? "0#{str}" : "#{str}"
60
+ end
61
+
62
+ def normalize_hex(str)
63
+ tmp = pad_hex_char(str)
64
+ tmp.gsub(/(^0x|\s)/,"").upcase
65
+ end
66
+
67
+ end
68
+
69
+ def CryptBuffer(input)
70
+ bytes = CryptBufferInputConverter.new.convert(input)
71
+ CryptBuffer.new(bytes)
72
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crypto-toolbox
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dennis Sivia
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-21 00:00:00.000000000 Z
11
+ date: 2015-04-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aes
@@ -52,6 +52,8 @@ files:
52
52
  - lib/crypto-toolbox/analyzers/padding_oracle/analyzer.rb
53
53
  - lib/crypto-toolbox/analyzers/padding_oracle/oracles/http_oracle.rb
54
54
  - lib/crypto-toolbox/analyzers/padding_oracle/oracles/tcp_oracle.rb
55
+ - lib/crypto-toolbox/analyzers/utils/key_filter.rb
56
+ - lib/crypto-toolbox/analyzers/utils/spell_checker.rb
55
57
  - lib/crypto-toolbox/analyzers/vigenere_xor.rb
56
58
  - lib/crypto-toolbox/ciphers/caesar.rb
57
59
  - lib/crypto-toolbox/ciphers/rot13.rb
@@ -64,8 +66,7 @@ files:
64
66
  - lib/crypto-toolbox/crypt_buffer/concerns/pretty_print.rb
65
67
  - lib/crypto-toolbox/crypt_buffer/concerns/random.rb
66
68
  - lib/crypto-toolbox/crypt_buffer/concerns/xor.rb
67
- - lib/crypto-toolbox/key_filter.rb
68
- - lib/crypto-toolbox/spell_checker.rb
69
+ - lib/crypto-toolbox/crypt_buffer_input_converter.rb
69
70
  homepage: https://github.com/scepticulous/crypto-toolbox
70
71
  licenses:
71
72
  - GPLv3
@@ -1,40 +0,0 @@
1
- require_relative './crypt_buffer.rb'
2
- require_relative './spell_checker.rb'
3
-
4
- module KeySearch
5
- module Filter
6
- class AsciiPlain
7
- def initialize(keys,ciphertext,dict_lang="en_GB")
8
- @keys = keys
9
- @c = @ciphertext = ciphertext
10
- @keylen = keys.first.length
11
- @dict = FFI::Hunspell.dict(dict_lang)
12
- end
13
-
14
-
15
- def filter
16
- # how often is the key repeated
17
- reps = @c.bytes.length / @keylen
18
- result =[]
19
- spell_checker = SpellChecker.new("en_GB")
20
-
21
- @keys.each_with_index do |key,i| # i is used as a simple counter only !
22
- test = CryptBuffer.new(@c.bytes[0,@keylen]).xor(key).str
23
- repkey = CryptBuffer.new((key*reps) + key[0,(@c.bytes.length % reps).to_i])
24
- str = @c.xor(repkey).to_s
25
-
26
- if spell_checker.human_language?(str)
27
- result << repkey
28
- break
29
- else
30
- if (i % 50000).zero?
31
- puts "[Progress] #{i}/#{@keys.length} (#{(i.to_f/@keys.length*100).round(4)}%)"
32
- end
33
- end
34
- end
35
- return result
36
- end
37
-
38
- end
39
- end
40
- end
@@ -1,48 +0,0 @@
1
- require 'ffi/hunspell'
2
- class SpellChecker
3
- def initialize(dict_lang="en_GB")
4
- @dict = FFI::Hunspell.dict(dict_lang)
5
- end
6
- =begin
7
- NOTE: About spelling error rates and language detection:
8
-
9
- missing punctuation support may lead to > 2% errors on valid texts, thus we use a high value .
10
- invalid decryptions tend to have spell error rates > 70
11
- Some statistics about it:
12
- > summary(invalids)
13
- Min. 1st Qu. Median Mean 3rd Qu. Max.
14
- 0.6000 1.0000 1.0000 0.9878 1.0000 1.0000
15
- > summary(cut(invalids,10))
16
- (0.6,0.64] (0.64,0.68] (0.68,0.72] (0.72,0.76] (0.76,0.8] (0.8,0.84]
17
- 8 13 9 534 1319 2809
18
- (0.84,0.88] (0.88,0.92] (0.92,0.96] (0.96,1]
19
- 10581 46598 198477 1440651
20
- =end
21
- def known_words(str)
22
- words = str.split(" ").select{|w| @dict.check?(w) }
23
- end
24
-
25
- def suggest(str)
26
- @dict.suggest(str)
27
- end
28
-
29
- def human_language?(str)
30
- words = str.split(" ").length
31
- errors = str.split(" ").map{|e| @dict.check?(e) }.count{|e| e == false}
32
- # using shell instead of hunspell ffi causes lots of escaping errors, even with shellwords.escape
33
- #errors = Float(`echo '#{Shellwords.escape(str)}' |hunspell -l |wc -l `.split.first)
34
-
35
- error_rate = errors.to_f/words
36
-
37
- $stderr.puts error_rate.round(4) if ENV["CRYPTO_TOOBOX_PRINT_ERROR_RATES"]
38
-
39
- if error_rate < 0.5
40
- puts "[Success] Found valid result (spell error_rate: #{error_rate*100}% is below threshold: 20%)"
41
- return true
42
- else
43
- return false
44
- end
45
- end
46
-
47
-
48
- end