wordlist 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +3 -0
- data/.gitignore +11 -0
- data/.rspec +1 -0
- data/.yardopts +1 -0
- data/{History.txt → ChangeLog.md} +5 -1
- data/LICENSE.txt +22 -0
- data/README.md +96 -0
- data/Rakefile +30 -17
- data/bin/wordlist +10 -0
- data/gemspec.yml +22 -0
- data/lib/wordlist/builder.rb +144 -25
- data/lib/wordlist/builders/website.rb +184 -12
- data/lib/wordlist/flat_file.rb +15 -4
- data/lib/wordlist/list.rb +63 -32
- data/lib/wordlist/mutator.rb +38 -9
- data/lib/wordlist/parsers.rb +24 -19
- data/lib/wordlist/runners.rb +2 -0
- data/lib/wordlist/runners/list.rb +116 -0
- data/lib/wordlist/runners/runner.rb +67 -0
- data/lib/wordlist/unique_filter.rb +47 -8
- data/lib/wordlist/version.rb +1 -1
- data/scripts/benchmark +43 -2
- data/spec/builder_examples.rb +46 -0
- data/spec/builder_spec.rb +97 -6
- data/spec/classes/parser_class.rb +2 -0
- data/spec/helpers/text.rb +6 -0
- data/spec/helpers/wordlist.rb +23 -0
- data/spec/spec_helper.rb +2 -4
- data/wordlist.gemspec +60 -0
- metadata +106 -62
- data/Manifest.txt +0 -30
- data/README.txt +0 -103
- data/tasks/spec.rb +0 -9
data/lib/wordlist/parsers.rb
CHANGED
@@ -1,34 +1,39 @@
|
|
1
1
|
module Wordlist
|
2
2
|
module Parsers
|
3
|
-
|
4
|
-
|
5
|
-
# Ignore case of parsed text
|
6
|
-
attr_accessor :ignore_case
|
3
|
+
# Ignore case of parsed text
|
4
|
+
attr_accessor :ignore_case
|
7
5
|
|
8
|
-
|
9
|
-
|
6
|
+
# Ignore the punctuation of parsed text
|
7
|
+
attr_accessor :ignore_punctuation
|
10
8
|
|
11
|
-
|
12
|
-
|
9
|
+
# Ignore URLs
|
10
|
+
attr_accessor :ignore_urls
|
13
11
|
|
14
|
-
|
15
|
-
|
12
|
+
# Ignore Phone numbers
|
13
|
+
attr_accessor :ignore_phone_numbers
|
16
14
|
|
17
|
-
|
18
|
-
|
19
|
-
end
|
20
|
-
end
|
15
|
+
# Ignore References
|
16
|
+
attr_accessor :ignore_references
|
21
17
|
|
18
|
+
#
|
19
|
+
# Initializes the parsers settings.
|
20
|
+
#
|
22
21
|
def initialize
|
23
|
-
@ignore_case
|
24
|
-
@ignore_punctuation
|
25
|
-
@ignore_urls
|
22
|
+
@ignore_case = false
|
23
|
+
@ignore_punctuation = true
|
24
|
+
@ignore_urls = true
|
26
25
|
@ignore_phone_numbers = false
|
27
|
-
@ignore_references
|
26
|
+
@ignore_references = false
|
28
27
|
end
|
29
28
|
|
30
29
|
#
|
31
|
-
# Parses the
|
30
|
+
# Parses the given text.
|
31
|
+
#
|
32
|
+
# @param [String] text
|
33
|
+
# The text to parse.
|
34
|
+
#
|
35
|
+
# @return [Array<String>]
|
36
|
+
# The Array of parsed tokens.
|
32
37
|
#
|
33
38
|
def parse(text)
|
34
39
|
text = text.to_s
|
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'wordlist/runners/runner'
|
2
|
+
require 'wordlist/flat_file'
|
3
|
+
|
4
|
+
module Wordlist
|
5
|
+
module Runners
|
6
|
+
class List < Runner
|
7
|
+
|
8
|
+
#
|
9
|
+
# Creates a new List Runner.
|
10
|
+
#
|
11
|
+
def initialize
|
12
|
+
@file = nil
|
13
|
+
@min_length = nil
|
14
|
+
@max_length = nil
|
15
|
+
@mutations = []
|
16
|
+
|
17
|
+
@words = false
|
18
|
+
@unique_words = false
|
19
|
+
|
20
|
+
@output = nil
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# Runs the list runner.
|
25
|
+
#
|
26
|
+
# @param [Array<String>] args
|
27
|
+
# Arguments to run the runner with.
|
28
|
+
#
|
29
|
+
def run(*args)
|
30
|
+
super(*args)
|
31
|
+
|
32
|
+
list = if @file
|
33
|
+
FlatFile.new(
|
34
|
+
@file,
|
35
|
+
:min_length => @min_length,
|
36
|
+
:max_length => @max_length
|
37
|
+
)
|
38
|
+
else
|
39
|
+
print_error('the --file option must be specified')
|
40
|
+
exit -1
|
41
|
+
end
|
42
|
+
|
43
|
+
@mutations.each do |pattern,substitute|
|
44
|
+
list.mutate(pattern,substitute)
|
45
|
+
end
|
46
|
+
|
47
|
+
words = lambda { |output|
|
48
|
+
puts = output.method(:puts)
|
49
|
+
|
50
|
+
if @unique_words
|
51
|
+
list.each_unique(&puts)
|
52
|
+
elsif @words
|
53
|
+
list.each_word(&puts)
|
54
|
+
else
|
55
|
+
list.each(&puts)
|
56
|
+
end
|
57
|
+
}
|
58
|
+
|
59
|
+
if @output
|
60
|
+
File.open(@output,'w+',&words)
|
61
|
+
else
|
62
|
+
words.call(Kernel)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
protected
|
67
|
+
|
68
|
+
#
|
69
|
+
# Parses the given arguments.
|
70
|
+
#
|
71
|
+
# @param [Array<String>] args
|
72
|
+
# Arguments to parse.
|
73
|
+
#
|
74
|
+
def optparse(*args)
|
75
|
+
super(*args) do |opts|
|
76
|
+
opts.banner = 'usage: wordlist [options]'
|
77
|
+
|
78
|
+
opts.on('-f','--file FILE','The wordlist file to list') do |file|
|
79
|
+
@file = file
|
80
|
+
end
|
81
|
+
|
82
|
+
opts.on('--min-length NUM','Minimum length of words in characters') do |min|
|
83
|
+
@min_length = min
|
84
|
+
end
|
85
|
+
|
86
|
+
opts.on('--max-length NUM','Maximum length of words in characters') do |max|
|
87
|
+
@max_length = max
|
88
|
+
end
|
89
|
+
|
90
|
+
opts.on('-m','--mutate SUBSTRING::REPLACE','Adds a mutation rule') do |substring_and_replace|
|
91
|
+
@mutations << substring_and_replace.split('::',2)
|
92
|
+
end
|
93
|
+
|
94
|
+
opts.on('-M','--mutate-pattern PATTERN::REPLACE','Adds a mutation rule') do |pattern_and_replace|
|
95
|
+
pattern, replace = substring_and_replace.split('::',2)
|
96
|
+
|
97
|
+
@mutations << [Regexp.new(pattern), replace]
|
98
|
+
end
|
99
|
+
|
100
|
+
opts.on('-w','--words','Only print the words in the wordlist') do
|
101
|
+
@words = true
|
102
|
+
end
|
103
|
+
|
104
|
+
opts.on('-u','--unique','Only print the unique words in the wordlist') do
|
105
|
+
@unique_words = true
|
106
|
+
end
|
107
|
+
|
108
|
+
opts.on('-o','--output FILE','Optional file to output the wordlist to') do |file|
|
109
|
+
@output = File.expand_path(file)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module Wordlist
|
4
|
+
module Runners
|
5
|
+
class Runner
|
6
|
+
#
|
7
|
+
# Creates and runs the runner with the given arguments.
|
8
|
+
#
|
9
|
+
# @param [Array<String>] args
|
10
|
+
# Arguments to parse.
|
11
|
+
#
|
12
|
+
def self.run(*args)
|
13
|
+
runner = new()
|
14
|
+
runner.run(*args)
|
15
|
+
end
|
16
|
+
|
17
|
+
#
|
18
|
+
# Runs the runner with the given arguments.
|
19
|
+
#
|
20
|
+
# @param [Array<String>] args
|
21
|
+
# Arguments to run the runner with.
|
22
|
+
#
|
23
|
+
def run(*args)
|
24
|
+
optparse(*args)
|
25
|
+
end
|
26
|
+
|
27
|
+
protected
|
28
|
+
|
29
|
+
#
|
30
|
+
# Prints the given error message.
|
31
|
+
#
|
32
|
+
# @param [String] message
|
33
|
+
# The error message to print.
|
34
|
+
#
|
35
|
+
def print_error(message)
|
36
|
+
$stderr.puts "#{$0}: #{message}"
|
37
|
+
end
|
38
|
+
|
39
|
+
#
|
40
|
+
# Parses the given arguments.
|
41
|
+
#
|
42
|
+
# @param [Array<String>] args
|
43
|
+
# Arguments to parse.
|
44
|
+
#
|
45
|
+
# @yield [opts]
|
46
|
+
# If a block is given, it will be passed the option parse to be
|
47
|
+
# configured.
|
48
|
+
#
|
49
|
+
# @yieldparam [OptionParser] opts
|
50
|
+
# The option parser to be configured.
|
51
|
+
#
|
52
|
+
def optparse(*args)
|
53
|
+
opts = OptionParser.new()
|
54
|
+
|
55
|
+
yield opts if block_given?
|
56
|
+
|
57
|
+
begin
|
58
|
+
opts.parse!(args)
|
59
|
+
rescue OptionParser::InvalidOption => e
|
60
|
+
$stderr.puts e.message
|
61
|
+
$stderr.puts opts
|
62
|
+
exit -1
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -14,8 +14,13 @@ module Wordlist
|
|
14
14
|
end
|
15
15
|
|
16
16
|
#
|
17
|
-
#
|
18
|
-
#
|
17
|
+
# Determines if the given word has been previously seen.
|
18
|
+
#
|
19
|
+
# @param [String] word
|
20
|
+
# The word to check for.
|
21
|
+
#
|
22
|
+
# @return [Boolean]
|
23
|
+
# Specifies whether the word has been previously seen.
|
19
24
|
#
|
20
25
|
def seen?(word)
|
21
26
|
length = word.length
|
@@ -24,12 +29,18 @@ module Wordlist
|
|
24
29
|
end
|
25
30
|
|
26
31
|
#
|
27
|
-
# Marks the
|
28
|
-
#
|
32
|
+
# Marks the given word as previously seen.
|
33
|
+
#
|
34
|
+
# @param [String] word
|
35
|
+
# The word to mark as previously seen.
|
36
|
+
#
|
37
|
+
# @return [Boolean]
|
38
|
+
# Specifies whether or not the word has not been previously seen
|
39
|
+
# until now.
|
29
40
|
#
|
30
41
|
def saw!(word)
|
31
42
|
length = word.length
|
32
|
-
crc
|
43
|
+
crc = crc32(word)
|
33
44
|
|
34
45
|
if @seen.has_key?(length)
|
35
46
|
return false if @seen[length].include?(crc)
|
@@ -42,8 +53,19 @@ module Wordlist
|
|
42
53
|
end
|
43
54
|
|
44
55
|
#
|
45
|
-
# Passes the
|
46
|
-
#
|
56
|
+
# Passes the given word through the unique filter.
|
57
|
+
#
|
58
|
+
# @param [String] word
|
59
|
+
# The word to pass through the unique filter.
|
60
|
+
#
|
61
|
+
# @yield [word]
|
62
|
+
# The given block will be passed the word, if the word has not been
|
63
|
+
# previously seen by the filter.
|
64
|
+
#
|
65
|
+
# @yieldparam [String] word
|
66
|
+
# A unique word that has not been previously seen by the filter.
|
67
|
+
#
|
68
|
+
# @return [nil]
|
47
69
|
#
|
48
70
|
def pass(word)
|
49
71
|
if saw!(word)
|
@@ -53,10 +75,27 @@ module Wordlist
|
|
53
75
|
return nil
|
54
76
|
end
|
55
77
|
|
78
|
+
#
|
79
|
+
# Clears the unique filter.
|
80
|
+
#
|
81
|
+
# @return [UniqueFilter]
|
82
|
+
# The cleared filter.
|
83
|
+
#
|
84
|
+
def clear
|
85
|
+
@seen.clear
|
86
|
+
return self
|
87
|
+
end
|
88
|
+
|
56
89
|
protected
|
57
90
|
|
58
91
|
#
|
59
|
-
# Returns the CRC32 checksum of the
|
92
|
+
# Returns the CRC32 checksum of the given word.
|
93
|
+
#
|
94
|
+
# @param [String] word
|
95
|
+
# The word to calculate a CRC32 checksum for.
|
96
|
+
#
|
97
|
+
# @return [Integer]
|
98
|
+
# The CRC32 checksum for the given word.
|
60
99
|
#
|
61
100
|
def crc32(word)
|
62
101
|
r = 0xffffffff
|
data/lib/wordlist/version.rb
CHANGED
data/scripts/benchmark
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
$LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__),'..','lib')))
|
3
3
|
|
4
|
-
require 'wordlist
|
4
|
+
require 'wordlist'
|
5
5
|
require 'benchmark'
|
6
6
|
require 'fileutils'
|
7
7
|
|
@@ -10,9 +10,50 @@ path = File.expand_path(File.join(File.dirname(__FILE__),'shakespeare_wordlist.t
|
|
10
10
|
FileUtils.rm_f(path)
|
11
11
|
|
12
12
|
Benchmark.bm do |bm|
|
13
|
-
bm.report('build
|
13
|
+
bm.report('build') do
|
14
14
|
Wordlist::Builder.build(path) do |wordlist|
|
15
15
|
wordlist.parse_file('/home/hal/shaks12.txt')
|
16
16
|
end
|
17
17
|
end
|
18
|
+
|
19
|
+
bm.report('each_unique') do
|
20
|
+
Wordlist::FlatFile.new(path) do |wordlist|
|
21
|
+
wordlist.each_unique { |word| word }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
bm.report('each_mutation (1)') do
|
26
|
+
Wordlist::FlatFile.new(path) do |wordlist|
|
27
|
+
wordlist.mutate /o/i, '0'
|
28
|
+
|
29
|
+
wordlist.each_mutation { |word| word }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
bm.report('each_mutation (2)') do
|
34
|
+
Wordlist::FlatFile.new(path) do |wordlist|
|
35
|
+
wordlist.mutate /o/i, '0'
|
36
|
+
wordlist.mutate /a/i, '@'
|
37
|
+
|
38
|
+
wordlist.each_mutation { |word| word }
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
bm.report('each_mutation (3)') do
|
43
|
+
Wordlist::FlatFile.new(path) do |wordlist|
|
44
|
+
wordlist.mutate /o/i, '0'
|
45
|
+
wordlist.mutate /a/i, '@'
|
46
|
+
wordlist.mutate /e/i, '3'
|
47
|
+
|
48
|
+
wordlist.each_mutation { |word| word }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
Benchmark.bm do |bm|
|
54
|
+
mutator = Wordlist::Mutator.new(/o/i, '0')
|
55
|
+
|
56
|
+
bm.report('Mutator#each') do
|
57
|
+
mutator.each('lololololoLOLOLOLOLO') { |word| }
|
58
|
+
end
|
18
59
|
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'helpers/text'
|
3
|
+
require 'helpers/wordlist'
|
4
|
+
|
5
|
+
shared_examples_for "a wordlist Builder" do
|
6
|
+
include Helpers
|
7
|
+
|
8
|
+
before(:all) do
|
9
|
+
@words = ['dog', 'cat', 'catx', 'dat', 'dog', 'cat']
|
10
|
+
@sentence = 'dog cat catx, dog dat.'
|
11
|
+
@text = 'dog cat: catx. dog cat dat dog.'
|
12
|
+
@file = Helpers::SAMPLE_TEXT
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should build a unique wordlist from words" do
|
16
|
+
Builder.build(@path) do |wordlist|
|
17
|
+
wordlist += @words
|
18
|
+
end
|
19
|
+
|
20
|
+
should_contain_words(@path,@expected)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should build a unique wordlist from a sentence" do
|
24
|
+
Builder.build(@path) do |wordlist|
|
25
|
+
wordlist.parse(@sentence)
|
26
|
+
end
|
27
|
+
|
28
|
+
should_contain_words(@path,@expected)
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should build a unique wordlist from text" do
|
32
|
+
Builder.build(@path) do |wordlist|
|
33
|
+
wordlist.parse(@text)
|
34
|
+
end
|
35
|
+
|
36
|
+
should_contain_words(@path,@expected)
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should build a unique wordlist from a file" do
|
40
|
+
Builder.build(@path) do |wordlist|
|
41
|
+
wordlist.parse_file(@file)
|
42
|
+
end
|
43
|
+
|
44
|
+
should_contain_words(@path,@expected)
|
45
|
+
end
|
46
|
+
end
|