wordlist 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +9 -0
- data/Manifest.txt +30 -0
- data/README.txt +103 -0
- data/Rakefile +22 -0
- data/lib/wordlist.rb +4 -0
- data/lib/wordlist/builder.rb +128 -0
- data/lib/wordlist/builders.rb +1 -0
- data/lib/wordlist/builders/website.rb +44 -0
- data/lib/wordlist/flat_file.rb +36 -0
- data/lib/wordlist/list.rb +131 -0
- data/lib/wordlist/mutator.rb +84 -0
- data/lib/wordlist/parsers.rb +69 -0
- data/lib/wordlist/unique_filter.rb +73 -0
- data/lib/wordlist/version.rb +4 -0
- data/scripts/benchmark +18 -0
- data/scripts/text/comedy_of_errors.txt +4011 -0
- data/spec/builder_spec.rb +36 -0
- data/spec/classes/parser_class.rb +5 -0
- data/spec/classes/test_list.rb +9 -0
- data/spec/flat_file_spec.rb +25 -0
- data/spec/list_spec.rb +58 -0
- data/spec/mutator_spec.rb +43 -0
- data/spec/parsers_spec.rb +118 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/text/flat_file.txt +3 -0
- data/spec/text/previous_wordlist.txt +3 -0
- data/spec/text/sample.txt +3 -0
- data/spec/unique_filter_spec.rb +34 -0
- data/spec/wordlist_spec.rb +9 -0
- data/tasks/spec.rb +9 -0
- metadata +123 -0
data/History.txt
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
=== 0.1.0 / 2009-08-31
|
2
|
+
|
3
|
+
* Initial release:
|
4
|
+
* Supports building word-lists from arbitrary text.
|
5
|
+
* Supports building word-lists from files.
|
6
|
+
* Supports building word-lists from websites.
|
7
|
+
* Supports enumerating through flat-file word-lists.
|
8
|
+
* Supports applying multiple mutation rules to each word in a word-list.
|
9
|
+
|
data/Manifest.txt
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
README.txt
|
4
|
+
Rakefile
|
5
|
+
lib/wordlist.rb
|
6
|
+
lib/wordlist/unique_filter.rb
|
7
|
+
lib/wordlist/parsers.rb
|
8
|
+
lib/wordlist/builder.rb
|
9
|
+
lib/wordlist/builders.rb
|
10
|
+
lib/wordlist/builders/website.rb
|
11
|
+
lib/wordlist/mutator.rb
|
12
|
+
lib/wordlist/list.rb
|
13
|
+
lib/wordlist/flat_file.rb
|
14
|
+
lib/wordlist/version.rb
|
15
|
+
tasks/spec.rb
|
16
|
+
scripts/benchmark
|
17
|
+
scripts/text/comedy_of_errors.txt
|
18
|
+
spec/classes/parser_class.rb
|
19
|
+
spec/classes/test_list.rb
|
20
|
+
spec/text/previous_wordlist.txt
|
21
|
+
spec/text/sample.txt
|
22
|
+
spec/text/flat_file.txt
|
23
|
+
spec/spec_helper.rb
|
24
|
+
spec/unique_filter_spec.rb
|
25
|
+
spec/parsers_spec.rb
|
26
|
+
spec/mutator_spec.rb
|
27
|
+
spec/builder_spec.rb
|
28
|
+
spec/list_spec.rb
|
29
|
+
spec/flat_file_spec.rb
|
30
|
+
spec/wordlist_spec.rb
|
data/README.txt
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
= Wordlist
|
2
|
+
|
3
|
+
* http://wordlist.rubyforge.org/
|
4
|
+
* http://github.com/sophsec/wordlist/
|
5
|
+
* Postmodern (postmodern.mod3 at gmail.com)
|
6
|
+
|
7
|
+
== DESCRIPTION:
|
8
|
+
|
9
|
+
A Ruby library for generating and working with word-lists. Wordlist allows
|
10
|
+
one to efficiently generate unique word-lists from arbitrary text or
|
11
|
+
other sources, such as website content. Wordlist can also quickly enumerate
|
12
|
+
through words within an existing word-list, applying multiple mutation
|
13
|
+
rules to each word in the list.
|
14
|
+
|
15
|
+
== FEATURES:
|
16
|
+
|
17
|
+
* Uses a bucket system of CRC32 hashes for efficient filtering of duplicate
|
18
|
+
words.
|
19
|
+
* Supports adding mutation rules to a word-list, which are applied to
|
20
|
+
words as the list is enumerated.
|
21
|
+
* Supports building word-lists from arbitrary text.
|
22
|
+
* Supports custom word-list builders:
|
23
|
+
* Wordlist::Builders::Website: Build word-lists from website content.
|
24
|
+
* Supports custom word-list formats:
|
25
|
+
* Wordlist::FlatFile: Enumerates through the words in a flat-file
|
26
|
+
word-list.
|
27
|
+
|
28
|
+
== EXAMPLES:
|
29
|
+
|
30
|
+
* Build a word-list from arbitrary text:
|
31
|
+
|
32
|
+
Wordlist::Builder.build('list.txt') do |builder|
|
33
|
+
builder.parse(some_text)
|
34
|
+
end
|
35
|
+
|
36
|
+
* Build a word-list from another file:
|
37
|
+
|
38
|
+
Wordlist::Builder.build('list.txt') do |builder|
|
39
|
+
builder.parse_file('some/file.txt')
|
40
|
+
end
|
41
|
+
|
42
|
+
* Build a word-list from content off a website:
|
43
|
+
|
44
|
+
require 'wordlist/builders/website'
|
45
|
+
|
46
|
+
Wordlist::Builders::Website.build('list.txt','www.example.com')
|
47
|
+
|
48
|
+
* Enumerate through each word in a flat-file word-list:
|
49
|
+
|
50
|
+
list = Wordlist::FlatFile.new('list.txt')
|
51
|
+
list.each_word do |word|
|
52
|
+
puts word
|
53
|
+
end
|
54
|
+
|
55
|
+
* Enumerate through each unique word in a flat-file word-list:
|
56
|
+
|
57
|
+
list.each_unique do |word|
|
58
|
+
puts word
|
59
|
+
end
|
60
|
+
|
61
|
+
* Define mutation rules, and enumerate through each unique mutation of each
|
62
|
+
unique word in the word-list:
|
63
|
+
|
64
|
+
list.mutate 'o', '0'
|
65
|
+
list.mutate 'a', 0x41
|
66
|
+
list.mutate(/[hax]/i) { |match| match.swapcase }
|
67
|
+
|
68
|
+
list.each_mutation do |word|
|
69
|
+
puts word
|
70
|
+
end
|
71
|
+
|
72
|
+
== REQUIREMENTS:
|
73
|
+
|
74
|
+
* {spidr}[http://spidr.rubyforge.org] >= 0.1.9
|
75
|
+
|
76
|
+
== INSTALL:
|
77
|
+
|
78
|
+
$ sudo gem install wordlist
|
79
|
+
|
80
|
+
== LICENSE:
|
81
|
+
|
82
|
+
Wordlist - A Ruby library for generating and working with word-lists.
|
83
|
+
|
84
|
+
Copyright (c) 2009 Hal Brodigan
|
85
|
+
|
86
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
87
|
+
a copy of this software and associated documentation files (the
|
88
|
+
'Software'), to deal in the Software without restriction, including
|
89
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
90
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
91
|
+
permit persons to whom the Software is furnished to do so, subject to
|
92
|
+
the following conditions:
|
93
|
+
|
94
|
+
The above copyright notice and this permission notice shall be
|
95
|
+
included in all copies or substantial portions of the Software.
|
96
|
+
|
97
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
98
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
99
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
100
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
101
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
102
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
103
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
require 'hoe/signing'
|
6
|
+
require './tasks/spec.rb'
|
7
|
+
require './lib/wordlist/version.rb'
|
8
|
+
|
9
|
+
Hoe.spec('wordlist') do
|
10
|
+
self.rubyforge_name = 'wordlist'
|
11
|
+
self.developer('Postmodern','postmodern.mod3@gmail.com')
|
12
|
+
self.remote_rdoc_dir = '/'
|
13
|
+
self.extra_deps = [
|
14
|
+
['spidr', '>=0.1.9']
|
15
|
+
]
|
16
|
+
|
17
|
+
self.extra_dev_deps = [
|
18
|
+
['rspec', '>=1.1.12']
|
19
|
+
]
|
20
|
+
end
|
21
|
+
|
22
|
+
# vim: syntax=Ruby
|
data/lib/wordlist.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'wordlist/unique_filter'
|
2
|
+
require 'wordlist/parsers'
|
3
|
+
|
4
|
+
module Wordlist
|
5
|
+
class Builder
|
6
|
+
|
7
|
+
include Parsers
|
8
|
+
|
9
|
+
# Path of the word-list
|
10
|
+
attr_reader :path
|
11
|
+
|
12
|
+
# File for the word-list
|
13
|
+
attr_reader :file
|
14
|
+
|
15
|
+
#
|
16
|
+
# Creates a new word-list Builder object with the specified _path_.
|
17
|
+
# If a _block_ is given, it will be passed the newly created
|
18
|
+
# Builder object.
|
19
|
+
#
|
20
|
+
def initialize(path,&block)
|
21
|
+
super()
|
22
|
+
|
23
|
+
@path = File.expand_path(path)
|
24
|
+
@file = nil
|
25
|
+
@filter = nil
|
26
|
+
|
27
|
+
block.call(self) if block
|
28
|
+
end
|
29
|
+
|
30
|
+
#
|
31
|
+
# Creates a new Builder object with the given _arguments_, opens the
|
32
|
+
# word-list file, passes the builder object to the given _block_
|
33
|
+
# then finally closes the word-list file.
|
34
|
+
#
|
35
|
+
# Builder.build('some/path') do |builder|
|
36
|
+
# builder.parse(readline)
|
37
|
+
# end
|
38
|
+
#
|
39
|
+
def self.build(*arguments,&block)
|
40
|
+
self.new(*arguments) do |builder|
|
41
|
+
builder.open!
|
42
|
+
builder.build!(&block)
|
43
|
+
builder.close!
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# Opens the word-list file for writing. If the file already exists, the
|
49
|
+
# previous words will be used to filter future duplicate words.
|
50
|
+
#
|
51
|
+
def open!
|
52
|
+
@filter = UniqueFilter.new
|
53
|
+
|
54
|
+
if File.file?(@path)
|
55
|
+
File.open(@path) do |file|
|
56
|
+
file.each_line do |line|
|
57
|
+
@filter.saw!(line.chomp)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
@file = File.new(@path,File::RDWR | File::CREAT | File::APPEND)
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Default to be called when the word-list is to be built, simply
|
67
|
+
# calls the given _block_.
|
68
|
+
#
|
69
|
+
def build!(&block)
|
70
|
+
block.call(self) if block
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# Appends the specified _word_ to the word-list file, only if it has not
|
75
|
+
# been previously seen.
|
76
|
+
#
|
77
|
+
def <<(word)
|
78
|
+
if @file
|
79
|
+
@filter.pass(word) do |unique|
|
80
|
+
@file.puts unique
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
return self
|
85
|
+
end
|
86
|
+
|
87
|
+
#
|
88
|
+
# Add the specified _words_ to the word-list.
|
89
|
+
#
|
90
|
+
def +(words)
|
91
|
+
words.each { |word| self << word }
|
92
|
+
return self
|
93
|
+
end
|
94
|
+
|
95
|
+
#
|
96
|
+
# Parses the specified _text_ adding each unique word to the word-list
|
97
|
+
# file.
|
98
|
+
#
|
99
|
+
def parse(text)
|
100
|
+
super(text).each { |word| self << word }
|
101
|
+
end
|
102
|
+
|
103
|
+
#
|
104
|
+
# Parses the contents of the file at the specified _path_, adding
|
105
|
+
# each unique word to the word-list file.
|
106
|
+
#
|
107
|
+
def parse_file(path)
|
108
|
+
File.open(path) do |file|
|
109
|
+
file.each_line do |line|
|
110
|
+
parse(line)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
#
|
116
|
+
# Closes the word-list file.
|
117
|
+
#
|
118
|
+
def close!
|
119
|
+
if @file
|
120
|
+
@file.close
|
121
|
+
|
122
|
+
@file = nil
|
123
|
+
@filter = nil
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'wordlist/builders/website'
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'wordlist/builder'
|
2
|
+
|
3
|
+
require 'spidr'
|
4
|
+
|
5
|
+
module Wordlist
|
6
|
+
module Builders
|
7
|
+
class Website < Builder
|
8
|
+
|
9
|
+
# Host to spider
|
10
|
+
attr_accessor :host
|
11
|
+
|
12
|
+
#
|
13
|
+
# Creates a new Website builder object with the specified _path_
|
14
|
+
# and _host_. If a _block_ is given, it will be passed the new created
|
15
|
+
# Website builder object.
|
16
|
+
#
|
17
|
+
def initialize(path,host,&block)
|
18
|
+
@host = host
|
19
|
+
|
20
|
+
super(path,&block)
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# Builds the word-list file by spidering the +host+ and parsing the
|
25
|
+
# inner-text from all HTML pages. If a _block_ is given, it will be
|
26
|
+
# called before all HTML pages on the +host+ have been parsed.
|
27
|
+
#
|
28
|
+
def build!(&block)
|
29
|
+
super(&block)
|
30
|
+
|
31
|
+
Spidr.host(@host) do |spidr|
|
32
|
+
spidr.every_page do |page|
|
33
|
+
if page.html?
|
34
|
+
page.doc.search('//h1|//h2|//h3|//h4|//h5|//p|//span').each do |element|
|
35
|
+
parse(element.inner_text)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'wordlist/list'
|
2
|
+
|
3
|
+
module Wordlist
|
4
|
+
class FlatFile < List
|
5
|
+
|
6
|
+
# The path to the flat-file
|
7
|
+
attr_accessor :path
|
8
|
+
|
9
|
+
#
|
10
|
+
# Creates a new FlatFile list with the specified _path_ and given
|
11
|
+
# _options_.
|
12
|
+
#
|
13
|
+
def initialize(path,options={},&block)
|
14
|
+
@path = path
|
15
|
+
|
16
|
+
super(options,&block)
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
# Enumerates through every word in the flat-file, passing each
|
21
|
+
# word to the given _block_.
|
22
|
+
#
|
23
|
+
# flat_file.each_word do |word|
|
24
|
+
# puts word
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
def each_word(&block)
|
28
|
+
File.open(@path) do |file|
|
29
|
+
file.each_line do |line|
|
30
|
+
yield line.chomp
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
require 'wordlist/unique_filter'
|
2
|
+
require 'wordlist/mutator'
|
3
|
+
|
4
|
+
module Wordlist
|
5
|
+
class List
|
6
|
+
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# Maximum length of words
|
10
|
+
attr_accessor :max_length
|
11
|
+
|
12
|
+
# Minimum length of words
|
13
|
+
attr_accessor :min_length
|
14
|
+
|
15
|
+
#
|
16
|
+
# Creates a new List object with the given _options_. If a _block_
|
17
|
+
# is given, it will be passed the newly created List object.
|
18
|
+
#
|
19
|
+
# _options_ may include the following keys:
|
20
|
+
# <tt>:max_length</tt>:: The maximum length of words produced by the
|
21
|
+
# list.
|
22
|
+
# <tt>:min_length</tt>:: The minimum length of words produced by the
|
23
|
+
# list.
|
24
|
+
#
|
25
|
+
def initialize(options={},&block)
|
26
|
+
@mutators = []
|
27
|
+
|
28
|
+
@max_length = nil
|
29
|
+
@min_length = 0
|
30
|
+
|
31
|
+
if options[:max_length]
|
32
|
+
@max_length = options[:max_length]
|
33
|
+
end
|
34
|
+
|
35
|
+
if options[:min_length]
|
36
|
+
@min_length = options[:min_length]
|
37
|
+
end
|
38
|
+
|
39
|
+
block.call(self) if block
|
40
|
+
end
|
41
|
+
|
42
|
+
#
|
43
|
+
# Adds a mutation rule for the specified _pattern_, to be replaced
|
44
|
+
# using the specified _substitute_. If a _block_ is given, and the
|
45
|
+
# _substitute_ data omitted, then the _block_ will be used to
|
46
|
+
# replace data matched by the _pattern_.
|
47
|
+
#
|
48
|
+
# list.mutate 'o', '0'
|
49
|
+
#
|
50
|
+
# list.mutate '0', 0x41
|
51
|
+
#
|
52
|
+
# list.mutate(/[oO]/) do |match|
|
53
|
+
# match.swapcase
|
54
|
+
# end
|
55
|
+
#
|
56
|
+
def mutate(pattern,substitute=nil,&block)
|
57
|
+
@mutators << Mutator.new(pattern,substitute,&block)
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# Enumerate through every word in the list, passing each word to
|
62
|
+
# the given block. By default this method passes nothing to the given
|
63
|
+
# _block_.
|
64
|
+
#
|
65
|
+
# list.each_word do |word|
|
66
|
+
# puts word
|
67
|
+
# end
|
68
|
+
#
|
69
|
+
def each_word(&block)
|
70
|
+
end
|
71
|
+
|
72
|
+
#
|
73
|
+
# Enumerates through every unique word in the list, passing each
|
74
|
+
# unique word to the given block.
|
75
|
+
#
|
76
|
+
# list.each_unique do |word|
|
77
|
+
# puts word
|
78
|
+
# end
|
79
|
+
#
|
80
|
+
def each_unique
|
81
|
+
unique_filter = UniqueFilter.new()
|
82
|
+
|
83
|
+
each_word do |word|
|
84
|
+
if unique_filter.saw!(word)
|
85
|
+
yield word
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
unique_filter = nil
|
90
|
+
end
|
91
|
+
|
92
|
+
#
|
93
|
+
# Enumerates through every unique mutation, of every unique word, using
|
94
|
+
# the mutator rules define for the list. Every possible unique mutation
|
95
|
+
# will be passed to the given _block_.
|
96
|
+
#
|
97
|
+
# list.each_mutation do |word|
|
98
|
+
# puts word
|
99
|
+
# end
|
100
|
+
#
|
101
|
+
def each_mutation(&block)
|
102
|
+
mutation_filter = UniqueFilter.new()
|
103
|
+
|
104
|
+
mutator_stack = [lambda { |mutated_word|
|
105
|
+
# skip words shorter than the minimum length
|
106
|
+
next if mutated_word.length < @min_length
|
107
|
+
|
108
|
+
# truncate words longer than the maximum length
|
109
|
+
mutated_word = mutated_word[0,@max_length] if @max_length
|
110
|
+
|
111
|
+
if mutation_filter.saw!(mutated_word)
|
112
|
+
yield mutated_word
|
113
|
+
end
|
114
|
+
}]
|
115
|
+
|
116
|
+
(@mutators.length-1).downto(0) do |index|
|
117
|
+
mutator_stack.unshift(lambda { |word|
|
118
|
+
prev_mutator = @mutators[index]
|
119
|
+
next_mutator = mutator_stack[index+1]
|
120
|
+
|
121
|
+
prev_mutator.each(word,&next_mutator)
|
122
|
+
})
|
123
|
+
end
|
124
|
+
|
125
|
+
each_unique(&(mutator_stack.first))
|
126
|
+
end
|
127
|
+
|
128
|
+
alias each each_mutation
|
129
|
+
|
130
|
+
end
|
131
|
+
end
|