wordlist 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +9 -0
- data/Manifest.txt +30 -0
- data/README.txt +103 -0
- data/Rakefile +22 -0
- data/lib/wordlist.rb +4 -0
- data/lib/wordlist/builder.rb +128 -0
- data/lib/wordlist/builders.rb +1 -0
- data/lib/wordlist/builders/website.rb +44 -0
- data/lib/wordlist/flat_file.rb +36 -0
- data/lib/wordlist/list.rb +131 -0
- data/lib/wordlist/mutator.rb +84 -0
- data/lib/wordlist/parsers.rb +69 -0
- data/lib/wordlist/unique_filter.rb +73 -0
- data/lib/wordlist/version.rb +4 -0
- data/scripts/benchmark +18 -0
- data/scripts/text/comedy_of_errors.txt +4011 -0
- data/spec/builder_spec.rb +36 -0
- data/spec/classes/parser_class.rb +5 -0
- data/spec/classes/test_list.rb +9 -0
- data/spec/flat_file_spec.rb +25 -0
- data/spec/list_spec.rb +58 -0
- data/spec/mutator_spec.rb +43 -0
- data/spec/parsers_spec.rb +118 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/text/flat_file.txt +3 -0
- data/spec/text/previous_wordlist.txt +3 -0
- data/spec/text/sample.txt +3 -0
- data/spec/unique_filter_spec.rb +34 -0
- data/spec/wordlist_spec.rb +9 -0
- data/tasks/spec.rb +9 -0
- metadata +123 -0
data/History.txt
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
=== 0.1.0 / 2009-08-31
|
2
|
+
|
3
|
+
* Initial release:
|
4
|
+
* Supports building word-lists from arbitrary text.
|
5
|
+
* Supports building word-lists from files.
|
6
|
+
* Supports building word-lists from websites.
|
7
|
+
* Supports enumerating through flat-file word-lists.
|
8
|
+
* Supports applying multiple mutation rules to each word in a word-list.
|
9
|
+
|
data/Manifest.txt
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
README.txt
|
4
|
+
Rakefile
|
5
|
+
lib/wordlist.rb
|
6
|
+
lib/wordlist/unique_filter.rb
|
7
|
+
lib/wordlist/parsers.rb
|
8
|
+
lib/wordlist/builder.rb
|
9
|
+
lib/wordlist/builders.rb
|
10
|
+
lib/wordlist/builders/website.rb
|
11
|
+
lib/wordlist/mutator.rb
|
12
|
+
lib/wordlist/list.rb
|
13
|
+
lib/wordlist/flat_file.rb
|
14
|
+
lib/wordlist/version.rb
|
15
|
+
tasks/spec.rb
|
16
|
+
scripts/benchmark
|
17
|
+
scripts/text/comedy_of_errors.txt
|
18
|
+
spec/classes/parser_class.rb
|
19
|
+
spec/classes/test_list.rb
|
20
|
+
spec/text/previous_wordlist.txt
|
21
|
+
spec/text/sample.txt
|
22
|
+
spec/text/flat_file.txt
|
23
|
+
spec/spec_helper.rb
|
24
|
+
spec/unique_filter_spec.rb
|
25
|
+
spec/parsers_spec.rb
|
26
|
+
spec/mutator_spec.rb
|
27
|
+
spec/builder_spec.rb
|
28
|
+
spec/list_spec.rb
|
29
|
+
spec/flat_file_spec.rb
|
30
|
+
spec/wordlist_spec.rb
|
data/README.txt
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
= Wordlist
|
2
|
+
|
3
|
+
* http://wordlist.rubyforge.org/
|
4
|
+
* http://github.com/sophsec/wordlist/
|
5
|
+
* Postmodern (postmodern.mod3 at gmail.com)
|
6
|
+
|
7
|
+
== DESCRIPTION:
|
8
|
+
|
9
|
+
A Ruby library for generating and working with word-lists. Wordlist allows
|
10
|
+
one to efficiently generate unique word-lists from arbitrary text or
|
11
|
+
other sources, such as website content. Wordlist can also quickly enumerate
|
12
|
+
through words within an existing word-list, applying multiple mutation
|
13
|
+
rules to each word in the list.
|
14
|
+
|
15
|
+
== FEATURES:
|
16
|
+
|
17
|
+
* Uses a bucket system of CRC32 hashes for efficient filtering of duplicate
|
18
|
+
words.
|
19
|
+
* Supports adding mutation rules to a word-list, which are applied to
|
20
|
+
words as the list is enumerated.
|
21
|
+
* Supports building word-lists from arbitrary text.
|
22
|
+
* Supports custom word-list builders:
|
23
|
+
* Wordlist::Builders::Website: Build word-lists from website content.
|
24
|
+
* Supports custom word-list formats:
|
25
|
+
* Wordlist::FlatFile: Enumerates through the words in a flat-file
|
26
|
+
word-list.
|
27
|
+
|
28
|
+
== EXAMPLES:
|
29
|
+
|
30
|
+
* Build a word-list from arbitrary text:
|
31
|
+
|
32
|
+
Wordlist::Builder.build('list.txt') do |builder|
|
33
|
+
builder.parse(some_text)
|
34
|
+
end
|
35
|
+
|
36
|
+
* Build a word-list from another file:
|
37
|
+
|
38
|
+
Wordlist::Builder.build('list.txt') do |builder|
|
39
|
+
builder.parse_file('some/file.txt')
|
40
|
+
end
|
41
|
+
|
42
|
+
* Build a word-list from content off a website:
|
43
|
+
|
44
|
+
require 'wordlist/builders/website'
|
45
|
+
|
46
|
+
Wordlist::Builders::Website.build('list.txt','www.example.com')
|
47
|
+
|
48
|
+
* Enumerate through each word in a flat-file word-list:
|
49
|
+
|
50
|
+
list = Wordlist::FlatFile.new('list.txt')
|
51
|
+
list.each_word do |word|
|
52
|
+
puts word
|
53
|
+
end
|
54
|
+
|
55
|
+
* Enumerate through each unique word in a flat-file word-list:
|
56
|
+
|
57
|
+
list.each_unique do |word|
|
58
|
+
puts word
|
59
|
+
end
|
60
|
+
|
61
|
+
* Define mutation rules, and enumerate through each unique mutation of each
|
62
|
+
unique word in the word-list:
|
63
|
+
|
64
|
+
list.mutate 'o', '0'
|
65
|
+
list.mutate 'a', 0x41
|
66
|
+
list.mutate(/[hax]/i) { |match| match.swapcase }
|
67
|
+
|
68
|
+
list.each_mutation do |word|
|
69
|
+
puts word
|
70
|
+
end
|
71
|
+
|
72
|
+
== REQUIREMENTS:
|
73
|
+
|
74
|
+
* {spidr}[http://spidr.rubyforge.org] >= 0.1.9
|
75
|
+
|
76
|
+
== INSTALL:
|
77
|
+
|
78
|
+
$ sudo gem install wordlist
|
79
|
+
|
80
|
+
== LICENSE:
|
81
|
+
|
82
|
+
Wordlist - A Ruby library for generating and working with word-lists.
|
83
|
+
|
84
|
+
Copyright (c) 2009 Hal Brodigan
|
85
|
+
|
86
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
87
|
+
a copy of this software and associated documentation files (the
|
88
|
+
'Software'), to deal in the Software without restriction, including
|
89
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
90
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
91
|
+
permit persons to whom the Software is furnished to do so, subject to
|
92
|
+
the following conditions:
|
93
|
+
|
94
|
+
The above copyright notice and this permission notice shall be
|
95
|
+
included in all copies or substantial portions of the Software.
|
96
|
+
|
97
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
98
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
99
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
100
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
101
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
102
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
103
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
require 'hoe/signing'
|
6
|
+
require './tasks/spec.rb'
|
7
|
+
require './lib/wordlist/version.rb'
|
8
|
+
|
9
|
+
Hoe.spec('wordlist') do
|
10
|
+
self.rubyforge_name = 'wordlist'
|
11
|
+
self.developer('Postmodern','postmodern.mod3@gmail.com')
|
12
|
+
self.remote_rdoc_dir = '/'
|
13
|
+
self.extra_deps = [
|
14
|
+
['spidr', '>=0.1.9']
|
15
|
+
]
|
16
|
+
|
17
|
+
self.extra_dev_deps = [
|
18
|
+
['rspec', '>=1.1.12']
|
19
|
+
]
|
20
|
+
end
|
21
|
+
|
22
|
+
# vim: syntax=Ruby
|
data/lib/wordlist.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'wordlist/unique_filter'
|
2
|
+
require 'wordlist/parsers'
|
3
|
+
|
4
|
+
module Wordlist
|
5
|
+
class Builder
|
6
|
+
|
7
|
+
include Parsers
|
8
|
+
|
9
|
+
# Path of the word-list
|
10
|
+
attr_reader :path
|
11
|
+
|
12
|
+
# File for the word-list
|
13
|
+
attr_reader :file
|
14
|
+
|
15
|
+
#
|
16
|
+
# Creates a new word-list Builder object with the specified _path_.
|
17
|
+
# If a _block_ is given, it will be passed the newly created
|
18
|
+
# Builder object.
|
19
|
+
#
|
20
|
+
def initialize(path,&block)
|
21
|
+
super()
|
22
|
+
|
23
|
+
@path = File.expand_path(path)
|
24
|
+
@file = nil
|
25
|
+
@filter = nil
|
26
|
+
|
27
|
+
block.call(self) if block
|
28
|
+
end
|
29
|
+
|
30
|
+
#
|
31
|
+
# Creates a new Builder object with the given _arguments_, opens the
|
32
|
+
# word-list file, passes the builder object to the given _block_
|
33
|
+
# then finally closes the word-list file.
|
34
|
+
#
|
35
|
+
# Builder.build('some/path') do |builder|
|
36
|
+
# builder.parse(readline)
|
37
|
+
# end
|
38
|
+
#
|
39
|
+
def self.build(*arguments,&block)
|
40
|
+
self.new(*arguments) do |builder|
|
41
|
+
builder.open!
|
42
|
+
builder.build!(&block)
|
43
|
+
builder.close!
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# Opens the word-list file for writing. If the file already exists, the
|
49
|
+
# previous words will be used to filter future duplicate words.
|
50
|
+
#
|
51
|
+
def open!
|
52
|
+
@filter = UniqueFilter.new
|
53
|
+
|
54
|
+
if File.file?(@path)
|
55
|
+
File.open(@path) do |file|
|
56
|
+
file.each_line do |line|
|
57
|
+
@filter.saw!(line.chomp)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
@file = File.new(@path,File::RDWR | File::CREAT | File::APPEND)
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Default to be called when the word-list is to be built, simply
|
67
|
+
# calls the given _block_.
|
68
|
+
#
|
69
|
+
def build!(&block)
|
70
|
+
block.call(self) if block
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# Appends the specified _word_ to the word-list file, only if it has not
|
75
|
+
# been previously seen.
|
76
|
+
#
|
77
|
+
def <<(word)
|
78
|
+
if @file
|
79
|
+
@filter.pass(word) do |unique|
|
80
|
+
@file.puts unique
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
return self
|
85
|
+
end
|
86
|
+
|
87
|
+
#
|
88
|
+
# Add the specified _words_ to the word-list.
|
89
|
+
#
|
90
|
+
def +(words)
|
91
|
+
words.each { |word| self << word }
|
92
|
+
return self
|
93
|
+
end
|
94
|
+
|
95
|
+
#
|
96
|
+
# Parses the specified _text_ adding each unique word to the word-list
|
97
|
+
# file.
|
98
|
+
#
|
99
|
+
def parse(text)
|
100
|
+
super(text).each { |word| self << word }
|
101
|
+
end
|
102
|
+
|
103
|
+
#
|
104
|
+
# Parses the contents of the file at the specified _path_, adding
|
105
|
+
# each unique word to the word-list file.
|
106
|
+
#
|
107
|
+
def parse_file(path)
|
108
|
+
File.open(path) do |file|
|
109
|
+
file.each_line do |line|
|
110
|
+
parse(line)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
#
|
116
|
+
# Closes the word-list file.
|
117
|
+
#
|
118
|
+
def close!
|
119
|
+
if @file
|
120
|
+
@file.close
|
121
|
+
|
122
|
+
@file = nil
|
123
|
+
@filter = nil
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'wordlist/builders/website'
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'wordlist/builder'
|
2
|
+
|
3
|
+
require 'spidr'
|
4
|
+
|
5
|
+
module Wordlist
|
6
|
+
module Builders
|
7
|
+
class Website < Builder
|
8
|
+
|
9
|
+
# Host to spider
|
10
|
+
attr_accessor :host
|
11
|
+
|
12
|
+
#
|
13
|
+
# Creates a new Website builder object with the specified _path_
|
14
|
+
# and _host_. If a _block_ is given, it will be passed the new created
|
15
|
+
# Website builder object.
|
16
|
+
#
|
17
|
+
def initialize(path,host,&block)
|
18
|
+
@host = host
|
19
|
+
|
20
|
+
super(path,&block)
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# Builds the word-list file by spidering the +host+ and parsing the
|
25
|
+
# inner-text from all HTML pages. If a _block_ is given, it will be
|
26
|
+
# called before all HTML pages on the +host+ have been parsed.
|
27
|
+
#
|
28
|
+
def build!(&block)
|
29
|
+
super(&block)
|
30
|
+
|
31
|
+
Spidr.host(@host) do |spidr|
|
32
|
+
spidr.every_page do |page|
|
33
|
+
if page.html?
|
34
|
+
page.doc.search('//h1|//h2|//h3|//h4|//h5|//p|//span').each do |element|
|
35
|
+
parse(element.inner_text)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'wordlist/list'
|
2
|
+
|
3
|
+
module Wordlist
|
4
|
+
class FlatFile < List
|
5
|
+
|
6
|
+
# The path to the flat-file
|
7
|
+
attr_accessor :path
|
8
|
+
|
9
|
+
#
|
10
|
+
# Creates a new FlatFile list with the specified _path_ and given
|
11
|
+
# _options_.
|
12
|
+
#
|
13
|
+
def initialize(path,options={},&block)
|
14
|
+
@path = path
|
15
|
+
|
16
|
+
super(options,&block)
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
# Enumerates through every word in the flat-file, passing each
|
21
|
+
# word to the given _block_.
|
22
|
+
#
|
23
|
+
# flat_file.each_word do |word|
|
24
|
+
# puts word
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
def each_word(&block)
|
28
|
+
File.open(@path) do |file|
|
29
|
+
file.each_line do |line|
|
30
|
+
yield line.chomp
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
require 'wordlist/unique_filter'
|
2
|
+
require 'wordlist/mutator'
|
3
|
+
|
4
|
+
module Wordlist
|
5
|
+
class List
|
6
|
+
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# Maximum length of words
|
10
|
+
attr_accessor :max_length
|
11
|
+
|
12
|
+
# Minimum length of words
|
13
|
+
attr_accessor :min_length
|
14
|
+
|
15
|
+
#
|
16
|
+
# Creates a new List object with the given _options_. If a _block_
|
17
|
+
# is given, it will be passed the newly created List object.
|
18
|
+
#
|
19
|
+
# _options_ may include the following keys:
|
20
|
+
# <tt>:max_length</tt>:: The maximum length of words produced by the
|
21
|
+
# list.
|
22
|
+
# <tt>:min_length</tt>:: The minimum length of words produced by the
|
23
|
+
# list.
|
24
|
+
#
|
25
|
+
def initialize(options={},&block)
|
26
|
+
@mutators = []
|
27
|
+
|
28
|
+
@max_length = nil
|
29
|
+
@min_length = 0
|
30
|
+
|
31
|
+
if options[:max_length]
|
32
|
+
@max_length = options[:max_length]
|
33
|
+
end
|
34
|
+
|
35
|
+
if options[:min_length]
|
36
|
+
@min_length = options[:min_length]
|
37
|
+
end
|
38
|
+
|
39
|
+
block.call(self) if block
|
40
|
+
end
|
41
|
+
|
42
|
+
#
|
43
|
+
# Adds a mutation rule for the specified _pattern_, to be replaced
|
44
|
+
# using the specified _substitute_. If a _block_ is given, and the
|
45
|
+
# _substitute_ data omitted, then the _block_ will be used to
|
46
|
+
# replace data matched by the _pattern_.
|
47
|
+
#
|
48
|
+
# list.mutate 'o', '0'
|
49
|
+
#
|
50
|
+
# list.mutate '0', 0x41
|
51
|
+
#
|
52
|
+
# list.mutate(/[oO]/) do |match|
|
53
|
+
# match.swapcase
|
54
|
+
# end
|
55
|
+
#
|
56
|
+
def mutate(pattern,substitute=nil,&block)
|
57
|
+
@mutators << Mutator.new(pattern,substitute,&block)
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# Enumerate through every word in the list, passing each word to
|
62
|
+
# the given block. By default this method passes nothing to the given
|
63
|
+
# _block_.
|
64
|
+
#
|
65
|
+
# list.each_word do |word|
|
66
|
+
# puts word
|
67
|
+
# end
|
68
|
+
#
|
69
|
+
def each_word(&block)
|
70
|
+
end
|
71
|
+
|
72
|
+
#
|
73
|
+
# Enumerates through every unique word in the list, passing each
|
74
|
+
# unique word to the given block.
|
75
|
+
#
|
76
|
+
# list.each_unique do |word|
|
77
|
+
# puts word
|
78
|
+
# end
|
79
|
+
#
|
80
|
+
def each_unique
|
81
|
+
unique_filter = UniqueFilter.new()
|
82
|
+
|
83
|
+
each_word do |word|
|
84
|
+
if unique_filter.saw!(word)
|
85
|
+
yield word
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
unique_filter = nil
|
90
|
+
end
|
91
|
+
|
92
|
+
#
|
93
|
+
# Enumerates through every unique mutation, of every unique word, using
|
94
|
+
# the mutator rules define for the list. Every possible unique mutation
|
95
|
+
# will be passed to the given _block_.
|
96
|
+
#
|
97
|
+
# list.each_mutation do |word|
|
98
|
+
# puts word
|
99
|
+
# end
|
100
|
+
#
|
101
|
+
def each_mutation(&block)
|
102
|
+
mutation_filter = UniqueFilter.new()
|
103
|
+
|
104
|
+
mutator_stack = [lambda { |mutated_word|
|
105
|
+
# skip words shorter than the minimum length
|
106
|
+
next if mutated_word.length < @min_length
|
107
|
+
|
108
|
+
# truncate words longer than the maximum length
|
109
|
+
mutated_word = mutated_word[0,@max_length] if @max_length
|
110
|
+
|
111
|
+
if mutation_filter.saw!(mutated_word)
|
112
|
+
yield mutated_word
|
113
|
+
end
|
114
|
+
}]
|
115
|
+
|
116
|
+
(@mutators.length-1).downto(0) do |index|
|
117
|
+
mutator_stack.unshift(lambda { |word|
|
118
|
+
prev_mutator = @mutators[index]
|
119
|
+
next_mutator = mutator_stack[index+1]
|
120
|
+
|
121
|
+
prev_mutator.each(word,&next_mutator)
|
122
|
+
})
|
123
|
+
end
|
124
|
+
|
125
|
+
each_unique(&(mutator_stack.first))
|
126
|
+
end
|
127
|
+
|
128
|
+
alias each each_mutation
|
129
|
+
|
130
|
+
end
|
131
|
+
end
|