wordlist 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +3 -0
- data/.gitignore +11 -0
- data/.rspec +1 -0
- data/.yardopts +1 -0
- data/{History.txt → ChangeLog.md} +5 -1
- data/LICENSE.txt +22 -0
- data/README.md +96 -0
- data/Rakefile +30 -17
- data/bin/wordlist +10 -0
- data/gemspec.yml +22 -0
- data/lib/wordlist/builder.rb +144 -25
- data/lib/wordlist/builders/website.rb +184 -12
- data/lib/wordlist/flat_file.rb +15 -4
- data/lib/wordlist/list.rb +63 -32
- data/lib/wordlist/mutator.rb +38 -9
- data/lib/wordlist/parsers.rb +24 -19
- data/lib/wordlist/runners.rb +2 -0
- data/lib/wordlist/runners/list.rb +116 -0
- data/lib/wordlist/runners/runner.rb +67 -0
- data/lib/wordlist/unique_filter.rb +47 -8
- data/lib/wordlist/version.rb +1 -1
- data/scripts/benchmark +43 -2
- data/spec/builder_examples.rb +46 -0
- data/spec/builder_spec.rb +97 -6
- data/spec/classes/parser_class.rb +2 -0
- data/spec/helpers/text.rb +6 -0
- data/spec/helpers/wordlist.rb +23 -0
- data/spec/spec_helper.rb +2 -4
- data/wordlist.gemspec +60 -0
- metadata +106 -62
- data/Manifest.txt +0 -30
- data/README.txt +0 -103
- data/tasks/spec.rb +0 -9
data/.document
ADDED
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour --format documentation
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--markup markdown --title 'Wordlist Documentation' --protected
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Wordlist - A Ruby library for generating and working with word-lists.
|
2
|
+
|
3
|
+
Copyright (c) 2009-2012 Hal Brodigan
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
'Software'), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
20
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
21
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
22
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
# Wordlist
|
2
|
+
|
3
|
+
* [Source](https://github.com/sophsec/wordlist#readme)
|
4
|
+
* [Issues](https://github.com/sophsec/wordlist/issues)
|
5
|
+
* [Email](mailto:postmodern.mod3 at gmail.com)
|
6
|
+
|
7
|
+
## Description
|
8
|
+
|
9
|
+
A Ruby library for generating and working with word-lists. Wordlist allows
|
10
|
+
one to efficiently generate unique word-lists from arbitrary text or
|
11
|
+
other sources, such as website content. Wordlist can also quickly enumerate
|
12
|
+
through words within an existing word-list, applying multiple mutation
|
13
|
+
rules to each word in the list.
|
14
|
+
|
15
|
+
## Features
|
16
|
+
|
17
|
+
* Uses a bucket system of CRC32 hashes for efficient filtering of duplicate
|
18
|
+
words.
|
19
|
+
* Can build wordlists containing multi-word phrases.
|
20
|
+
* Can build wordlists containing phrases containing a minimum and maximum
|
21
|
+
number of words.
|
22
|
+
* Supports adding mutation rules to a word-list, which are applied to
|
23
|
+
words as the list is enumerated.
|
24
|
+
* Supports building word-lists from arbitrary text.
|
25
|
+
* Supports custom word-list builders:
|
26
|
+
* Wordlist::Builders::Website: Build word-lists from website content.
|
27
|
+
* Supports custom word-list formats:
|
28
|
+
* Wordlist::FlatFile: Enumerates through the words in a flat-file
|
29
|
+
word-list.
|
30
|
+
|
31
|
+
## Examples
|
32
|
+
|
33
|
+
Build a word-list from arbitrary text:
|
34
|
+
|
35
|
+
Wordlist::Builder.build('list.txt') do |builder|
|
36
|
+
builder.parse(some_text)
|
37
|
+
end
|
38
|
+
|
39
|
+
Build a word-list from another file:
|
40
|
+
|
41
|
+
Wordlist::Builder.build('list.txt') do |builder|
|
42
|
+
builder.parse_file('some/file.txt')
|
43
|
+
end
|
44
|
+
|
45
|
+
Build a word-list of phrases containing at most three words, from the
|
46
|
+
arbitrary text:
|
47
|
+
|
48
|
+
Wordlist::Builder.build('list.txt', :max_words => 3) do |builder|
|
49
|
+
builder.parse(some_text)
|
50
|
+
end
|
51
|
+
|
52
|
+
Build a word-list from content off a website:
|
53
|
+
|
54
|
+
require 'wordlist/builders/website'
|
55
|
+
|
56
|
+
Wordlist::Builders::Website.build(
|
57
|
+
'list.txt',
|
58
|
+
:host => 'www.example.com'
|
59
|
+
)
|
60
|
+
|
61
|
+
Enumerate through each word in a flat-file word-list:
|
62
|
+
|
63
|
+
list = Wordlist::FlatFile.new('list.txt')
|
64
|
+
list.each_word do |word|
|
65
|
+
puts word
|
66
|
+
end
|
67
|
+
|
68
|
+
Enumerate through each unique word in a flat-file word-list:
|
69
|
+
|
70
|
+
list.each_unique do |word|
|
71
|
+
puts word
|
72
|
+
end
|
73
|
+
|
74
|
+
Define mutation rules, and enumerate through each unique mutation of each
|
75
|
+
unique word in the word-list:
|
76
|
+
|
77
|
+
list.mutate 'o', '0'
|
78
|
+
list.mutate '@', 0x41
|
79
|
+
list.mutate(/[hax]/i) { |match| match.swapcase }
|
80
|
+
|
81
|
+
list.each_mutation do |word|
|
82
|
+
puts word
|
83
|
+
end
|
84
|
+
|
85
|
+
## Requirements
|
86
|
+
|
87
|
+
* [spidr](http://spidr.rubyforge.org) >= 0.1.9
|
88
|
+
|
89
|
+
## Install
|
90
|
+
|
91
|
+
$ gem install wordlist
|
92
|
+
|
93
|
+
## License
|
94
|
+
|
95
|
+
See {file:LICENSE.txt} for license information.
|
96
|
+
|
data/Rakefile
CHANGED
@@ -1,22 +1,35 @@
|
|
1
|
-
# -*- ruby -*-
|
2
|
-
|
3
1
|
require 'rubygems'
|
4
|
-
require '
|
5
|
-
|
6
|
-
|
7
|
-
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
gem 'rubygems-tasks', '~> 0.1'
|
6
|
+
require 'rubygems/tasks'
|
7
|
+
|
8
|
+
Gem::Tasks.new
|
9
|
+
rescue LoadError => e
|
10
|
+
warn e.message
|
11
|
+
warn "Run `gem install rubygems-tasks` to install 'rubygems/tasks'."
|
12
|
+
end
|
8
13
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
self.remote_rdoc_dir = '/'
|
13
|
-
self.extra_deps = [
|
14
|
-
['spidr', '>=0.1.9']
|
15
|
-
]
|
14
|
+
begin
|
15
|
+
gem 'rspec', '~> 2.4'
|
16
|
+
require 'rspec/core/rake_task'
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
18
|
+
RSpec::Core::RakeTask.new
|
19
|
+
rescue LoadError => e
|
20
|
+
task :spec do
|
21
|
+
abort "Please run `gem install rspec` to install RSpec."
|
22
|
+
end
|
20
23
|
end
|
24
|
+
task :default => :spec
|
25
|
+
|
26
|
+
begin
|
27
|
+
gem 'yard', '~> 0.8'
|
28
|
+
require 'yard'
|
21
29
|
|
22
|
-
|
30
|
+
YARD::Rake::YardocTask.new
|
31
|
+
rescue LoadError => e
|
32
|
+
task :yard do
|
33
|
+
abort "Please run `gem install yard` to install YARD."
|
34
|
+
end
|
35
|
+
end
|
data/bin/wordlist
ADDED
data/gemspec.yml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
name: wordlist
|
2
|
+
summary: A Ruby library for generating and working with word-lists.
|
3
|
+
description:
|
4
|
+
A Ruby library for generating and working with word-lists. Wordlist
|
5
|
+
allows one to efficiently generate unique word-lists from arbitrary text
|
6
|
+
or other sources, such as website content. Wordlist can also quickly
|
7
|
+
enumerate through words within an existing word-list, applying multiple
|
8
|
+
mutation rules to each word in the list.
|
9
|
+
|
10
|
+
license: MIT
|
11
|
+
authors: Postmodern
|
12
|
+
email: postmodern.mod3@gmail.com
|
13
|
+
homepage: https://github.com/sophsec/wordlist
|
14
|
+
has_yard: true
|
15
|
+
|
16
|
+
dependencies:
|
17
|
+
spidr: ~> 0.2
|
18
|
+
|
19
|
+
development_dependencies:
|
20
|
+
rubygems-tasks: ~> 0.1
|
21
|
+
rspec: ~> 2.4
|
22
|
+
yard: ~> 0.8
|
data/lib/wordlist/builder.rb
CHANGED
@@ -9,29 +9,70 @@ module Wordlist
|
|
9
9
|
# Path of the word-list
|
10
10
|
attr_reader :path
|
11
11
|
|
12
|
+
# Minimum number of words
|
13
|
+
attr_reader :min_words
|
14
|
+
|
15
|
+
# Maximum number of words
|
16
|
+
attr_reader :max_words
|
17
|
+
|
12
18
|
# File for the word-list
|
13
19
|
attr_reader :file
|
14
20
|
|
21
|
+
# The unique word filter
|
22
|
+
attr_reader :filter
|
23
|
+
|
24
|
+
# The queue of words awaiting processing
|
25
|
+
attr_reader :word_queue
|
26
|
+
|
27
|
+
#
|
28
|
+
# Creates a new word-list Builder object.
|
29
|
+
#
|
30
|
+
# @param [String] path
|
31
|
+
# The path of the word-list file.
|
15
32
|
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
# Builder object.
|
33
|
+
# @param [Hash] options
|
34
|
+
# Additional options.
|
19
35
|
#
|
20
|
-
|
36
|
+
# @option options [Integer] :min_words (1)
|
37
|
+
# The minimum number of words each line of the word-list must contain.
|
38
|
+
#
|
39
|
+
# @option options [Integer] :max_words
|
40
|
+
# The maximum number of words each line of the word-list may contain.
|
41
|
+
# Defaults to the value of `:min_words`, if not given.
|
42
|
+
#
|
43
|
+
def initialize(path,options={})
|
21
44
|
super()
|
22
45
|
|
23
46
|
@path = File.expand_path(path)
|
24
|
-
@file = nil
|
25
|
-
@filter = nil
|
26
47
|
|
27
|
-
|
48
|
+
@min_words = options.fetch(:min_words,1)
|
49
|
+
@max_words = options.fetch(:max_words,@min_words)
|
50
|
+
|
51
|
+
@file = nil
|
52
|
+
@filter = UniqueFilter.new
|
53
|
+
@word_queue = []
|
54
|
+
|
55
|
+
yield self if block_given?
|
28
56
|
end
|
29
57
|
|
30
58
|
#
|
31
|
-
# Creates a new Builder object with the given
|
32
|
-
# word-list file, passes the builder object to the given
|
59
|
+
# Creates a new Builder object with the given arguments, opens the
|
60
|
+
# word-list file, passes the builder object to the given block
|
33
61
|
# then finally closes the word-list file.
|
34
62
|
#
|
63
|
+
# @param [Array] arguments
|
64
|
+
# Additional arguments to pass to {#initialize}.
|
65
|
+
#
|
66
|
+
# @yield [builder]
|
67
|
+
# If a block is given, it will be passed the new builder.
|
68
|
+
#
|
69
|
+
# @yieldparam [Builder] builder
|
70
|
+
# The newly created builer object.
|
71
|
+
#
|
72
|
+
# @return [Builder]
|
73
|
+
# The newly created builder object.
|
74
|
+
#
|
75
|
+
# @example
|
35
76
|
# Builder.build('some/path') do |builder|
|
36
77
|
# builder.parse(readline)
|
37
78
|
# end
|
@@ -48,9 +89,10 @@ module Wordlist
|
|
48
89
|
# Opens the word-list file for writing. If the file already exists, the
|
49
90
|
# previous words will be used to filter future duplicate words.
|
50
91
|
#
|
92
|
+
# @return [File]
|
93
|
+
# The open word-list file.
|
94
|
+
#
|
51
95
|
def open!
|
52
|
-
@filter = UniqueFilter.new
|
53
|
-
|
54
96
|
if File.file?(@path)
|
55
97
|
File.open(@path) do |file|
|
56
98
|
file.each_line do |line|
|
@@ -63,21 +105,86 @@ module Wordlist
|
|
63
105
|
end
|
64
106
|
|
65
107
|
#
|
66
|
-
# Default to be called when the word-list is to be built
|
67
|
-
#
|
108
|
+
# Default to be called when the word-list is to be built.
|
109
|
+
#
|
110
|
+
# @yield [builder]
|
111
|
+
# If a block is given, it will be passed the new builder object.
|
112
|
+
#
|
113
|
+
def build!
|
114
|
+
yield self if block_given?
|
115
|
+
end
|
116
|
+
|
117
|
+
#
|
118
|
+
# Enqueues a given word for processing.
|
119
|
+
#
|
120
|
+
# @param [String] word
|
121
|
+
# The word to enqueue.
|
122
|
+
#
|
123
|
+
# @return [String]
|
124
|
+
# The enqueued word.
|
125
|
+
#
|
126
|
+
def enqueue(word)
|
127
|
+
# enqueue the word
|
128
|
+
if @max_words == 1
|
129
|
+
@word_queue[0] = word.to_s
|
130
|
+
else
|
131
|
+
@word_queue << word.to_s
|
132
|
+
|
133
|
+
# make sure the queue does not overflow
|
134
|
+
if @word_queue.length > @max_words
|
135
|
+
@word_queue.shift
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
return word
|
140
|
+
end
|
141
|
+
|
142
|
+
#
|
143
|
+
# Enumerates over the combinations of previously seen words.
|
144
|
+
#
|
145
|
+
# @yield [combination]
|
146
|
+
# The given block will be passed the combinations of previously
|
147
|
+
# seen words.
|
148
|
+
#
|
149
|
+
# @yieldparam [String] combination
|
150
|
+
# A combination of one or more space-separated words.
|
68
151
|
#
|
69
|
-
def
|
70
|
-
|
152
|
+
def word_combinations
|
153
|
+
if @max_words == 1
|
154
|
+
yield @word_queue[0]
|
155
|
+
else
|
156
|
+
current_words = @word_queue.length
|
157
|
+
|
158
|
+
# we must have atleast the minimum amount of words
|
159
|
+
if current_words >= @min_words
|
160
|
+
upper_bound = (current_words - @min_words)
|
161
|
+
|
162
|
+
# combine the words
|
163
|
+
upper_bound.downto(0) do |i|
|
164
|
+
yield @word_queue[i..-1].join(' ')
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
71
168
|
end
|
72
169
|
|
73
170
|
#
|
74
|
-
# Appends the
|
171
|
+
# Appends the given word to the word-list file, only if it has not
|
75
172
|
# been previously seen.
|
76
173
|
#
|
174
|
+
# @param [String] word
|
175
|
+
# The word to append.
|
176
|
+
#
|
177
|
+
# @return [Builder]
|
178
|
+
# The builder object.
|
179
|
+
#
|
77
180
|
def <<(word)
|
181
|
+
enqueue(word)
|
182
|
+
|
78
183
|
if @file
|
79
|
-
|
80
|
-
@
|
184
|
+
word_combinations do |words|
|
185
|
+
@filter.pass(words) do |unique|
|
186
|
+
@file.puts unique
|
187
|
+
end
|
81
188
|
end
|
82
189
|
end
|
83
190
|
|
@@ -85,7 +192,13 @@ module Wordlist
|
|
85
192
|
end
|
86
193
|
|
87
194
|
#
|
88
|
-
# Add the
|
195
|
+
# Add the given words to the word-list.
|
196
|
+
#
|
197
|
+
# @param [Array<String>] words
|
198
|
+
# The words to add to the list.
|
199
|
+
#
|
200
|
+
# @return [Builder]
|
201
|
+
# The builder object.
|
89
202
|
#
|
90
203
|
def +(words)
|
91
204
|
words.each { |word| self << word }
|
@@ -93,16 +206,21 @@ module Wordlist
|
|
93
206
|
end
|
94
207
|
|
95
208
|
#
|
96
|
-
# Parses the
|
97
|
-
#
|
209
|
+
# Parses the given text, adding each unique word to the word-list file.
|
210
|
+
#
|
211
|
+
# @param [String] text
|
212
|
+
# The text to parse.
|
98
213
|
#
|
99
214
|
def parse(text)
|
100
215
|
super(text).each { |word| self << word }
|
101
216
|
end
|
102
217
|
|
103
218
|
#
|
104
|
-
# Parses the contents of the file at the
|
105
|
-
#
|
219
|
+
# Parses the contents of the file at the given path, adding each unique
|
220
|
+
# word to the word-list file.
|
221
|
+
#
|
222
|
+
# @param [String] path
|
223
|
+
# The path of the file to parse.
|
106
224
|
#
|
107
225
|
def parse_file(path)
|
108
226
|
File.open(path) do |file|
|
@@ -118,9 +236,10 @@ module Wordlist
|
|
118
236
|
def close!
|
119
237
|
if @file
|
120
238
|
@file.close
|
121
|
-
|
122
239
|
@file = nil
|
123
|
-
|
240
|
+
|
241
|
+
@filter.clear
|
242
|
+
@word_queue.clear
|
124
243
|
end
|
125
244
|
end
|
126
245
|
|