wordlist 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +3 -0
- data/.gitignore +11 -0
- data/.rspec +1 -0
- data/.yardopts +1 -0
- data/{History.txt → ChangeLog.md} +5 -1
- data/LICENSE.txt +22 -0
- data/README.md +96 -0
- data/Rakefile +30 -17
- data/bin/wordlist +10 -0
- data/gemspec.yml +22 -0
- data/lib/wordlist/builder.rb +144 -25
- data/lib/wordlist/builders/website.rb +184 -12
- data/lib/wordlist/flat_file.rb +15 -4
- data/lib/wordlist/list.rb +63 -32
- data/lib/wordlist/mutator.rb +38 -9
- data/lib/wordlist/parsers.rb +24 -19
- data/lib/wordlist/runners.rb +2 -0
- data/lib/wordlist/runners/list.rb +116 -0
- data/lib/wordlist/runners/runner.rb +67 -0
- data/lib/wordlist/unique_filter.rb +47 -8
- data/lib/wordlist/version.rb +1 -1
- data/scripts/benchmark +43 -2
- data/spec/builder_examples.rb +46 -0
- data/spec/builder_spec.rb +97 -6
- data/spec/classes/parser_class.rb +2 -0
- data/spec/helpers/text.rb +6 -0
- data/spec/helpers/wordlist.rb +23 -0
- data/spec/spec_helper.rb +2 -4
- data/wordlist.gemspec +60 -0
- metadata +106 -62
- data/Manifest.txt +0 -30
- data/README.txt +0 -103
- data/tasks/spec.rb +0 -9
data/.document
ADDED
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour --format documentation
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--markup markdown --title 'Wordlist Documentation' --protected
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Wordlist - A Ruby library for generating and working with word-lists.
|
2
|
+
|
3
|
+
Copyright (c) 2009-2012 Hal Brodigan
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
'Software'), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
20
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
21
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
22
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
# Wordlist
|
2
|
+
|
3
|
+
* [Source](https://github.com/sophsec/wordlist#readme)
|
4
|
+
* [Issues](https://github.com/sophsec/wordlist/issues)
|
5
|
+
* [Email](mailto:postmodern.mod3 at gmail.com)
|
6
|
+
|
7
|
+
## Description
|
8
|
+
|
9
|
+
A Ruby library for generating and working with word-lists. Wordlist allows
|
10
|
+
one to efficiently generate unique word-lists from arbitrary text or
|
11
|
+
other sources, such as website content. Wordlist can also quickly enumerate
|
12
|
+
through words within an existing word-list, applying multiple mutation
|
13
|
+
rules to each word in the list.
|
14
|
+
|
15
|
+
## Features
|
16
|
+
|
17
|
+
* Uses a bucket system of CRC32 hashes for efficient filtering of duplicate
|
18
|
+
words.
|
19
|
+
* Can build wordlists containing multi-word phrases.
|
20
|
+
* Can build wordlists containing phrases containing a minimum and maximum
|
21
|
+
number of words.
|
22
|
+
* Supports adding mutation rules to a word-list, which are applied to
|
23
|
+
words as the list is enumerated.
|
24
|
+
* Supports building word-lists from arbitrary text.
|
25
|
+
* Supports custom word-list builders:
|
26
|
+
* Wordlist::Builders::Website: Build word-lists from website content.
|
27
|
+
* Supports custom word-list formats:
|
28
|
+
* Wordlist::FlatFile: Enumerates through the words in a flat-file
|
29
|
+
word-list.
|
30
|
+
|
31
|
+
## Examples
|
32
|
+
|
33
|
+
Build a word-list from arbitrary text:
|
34
|
+
|
35
|
+
Wordlist::Builder.build('list.txt') do |builder|
|
36
|
+
builder.parse(some_text)
|
37
|
+
end
|
38
|
+
|
39
|
+
Build a word-list from another file:
|
40
|
+
|
41
|
+
Wordlist::Builder.build('list.txt') do |builder|
|
42
|
+
builder.parse_file('some/file.txt')
|
43
|
+
end
|
44
|
+
|
45
|
+
Build a word-list of phrases containing at most three words, from the
|
46
|
+
arbitrary text:
|
47
|
+
|
48
|
+
Wordlist::Builder.build('list.txt', :max_words => 3) do |builder|
|
49
|
+
builder.parse(some_text)
|
50
|
+
end
|
51
|
+
|
52
|
+
Build a word-list from content off a website:
|
53
|
+
|
54
|
+
require 'wordlist/builders/website'
|
55
|
+
|
56
|
+
Wordlist::Builders::Website.build(
|
57
|
+
'list.txt',
|
58
|
+
:host => 'www.example.com'
|
59
|
+
)
|
60
|
+
|
61
|
+
Enumerate through each word in a flat-file word-list:
|
62
|
+
|
63
|
+
list = Wordlist::FlatFile.new('list.txt')
|
64
|
+
list.each_word do |word|
|
65
|
+
puts word
|
66
|
+
end
|
67
|
+
|
68
|
+
Enumerate through each unique word in a flat-file word-list:
|
69
|
+
|
70
|
+
list.each_unique do |word|
|
71
|
+
puts word
|
72
|
+
end
|
73
|
+
|
74
|
+
Define mutation rules, and enumerate through each unique mutation of each
|
75
|
+
unique word in the word-list:
|
76
|
+
|
77
|
+
list.mutate 'o', '0'
|
78
|
+
list.mutate '@', 0x41
|
79
|
+
list.mutate(/[hax]/i) { |match| match.swapcase }
|
80
|
+
|
81
|
+
list.each_mutation do |word|
|
82
|
+
puts word
|
83
|
+
end
|
84
|
+
|
85
|
+
## Requirements
|
86
|
+
|
87
|
+
* [spidr](http://spidr.rubyforge.org) >= 0.1.9
|
88
|
+
|
89
|
+
## Install
|
90
|
+
|
91
|
+
$ gem install wordlist
|
92
|
+
|
93
|
+
## License
|
94
|
+
|
95
|
+
See {file:LICENSE.txt} for license information.
|
96
|
+
|
data/Rakefile
CHANGED
@@ -1,22 +1,35 @@
|
|
1
|
-
# -*- ruby -*-
|
2
|
-
|
3
1
|
require 'rubygems'
|
4
|
-
require '
|
5
|
-
|
6
|
-
|
7
|
-
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
gem 'rubygems-tasks', '~> 0.1'
|
6
|
+
require 'rubygems/tasks'
|
7
|
+
|
8
|
+
Gem::Tasks.new
|
9
|
+
rescue LoadError => e
|
10
|
+
warn e.message
|
11
|
+
warn "Run `gem install rubygems-tasks` to install 'rubygems/tasks'."
|
12
|
+
end
|
8
13
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
self.remote_rdoc_dir = '/'
|
13
|
-
self.extra_deps = [
|
14
|
-
['spidr', '>=0.1.9']
|
15
|
-
]
|
14
|
+
begin
|
15
|
+
gem 'rspec', '~> 2.4'
|
16
|
+
require 'rspec/core/rake_task'
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
18
|
+
RSpec::Core::RakeTask.new
|
19
|
+
rescue LoadError => e
|
20
|
+
task :spec do
|
21
|
+
abort "Please run `gem install rspec` to install RSpec."
|
22
|
+
end
|
20
23
|
end
|
24
|
+
task :default => :spec
|
25
|
+
|
26
|
+
begin
|
27
|
+
gem 'yard', '~> 0.8'
|
28
|
+
require 'yard'
|
21
29
|
|
22
|
-
|
30
|
+
YARD::Rake::YardocTask.new
|
31
|
+
rescue LoadError => e
|
32
|
+
task :yard do
|
33
|
+
abort "Please run `gem install yard` to install YARD."
|
34
|
+
end
|
35
|
+
end
|
data/bin/wordlist
ADDED
data/gemspec.yml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
name: wordlist
|
2
|
+
summary: A Ruby library for generating and working with word-lists.
|
3
|
+
description:
|
4
|
+
A Ruby library for generating and working with word-lists. Wordlist
|
5
|
+
allows one to efficiently generate unique word-lists from arbitrary text
|
6
|
+
or other sources, such as website content. Wordlist can also quickly
|
7
|
+
enumerate through words within an existing word-list, applying multiple
|
8
|
+
mutation rules to each word in the list.
|
9
|
+
|
10
|
+
license: MIT
|
11
|
+
authors: Postmodern
|
12
|
+
email: postmodern.mod3@gmail.com
|
13
|
+
homepage: https://github.com/sophsec/wordlist
|
14
|
+
has_yard: true
|
15
|
+
|
16
|
+
dependencies:
|
17
|
+
spidr: ~> 0.2
|
18
|
+
|
19
|
+
development_dependencies:
|
20
|
+
rubygems-tasks: ~> 0.1
|
21
|
+
rspec: ~> 2.4
|
22
|
+
yard: ~> 0.8
|
data/lib/wordlist/builder.rb
CHANGED
@@ -9,29 +9,70 @@ module Wordlist
|
|
9
9
|
# Path of the word-list
|
10
10
|
attr_reader :path
|
11
11
|
|
12
|
+
# Minimum number of words
|
13
|
+
attr_reader :min_words
|
14
|
+
|
15
|
+
# Maximum number of words
|
16
|
+
attr_reader :max_words
|
17
|
+
|
12
18
|
# File for the word-list
|
13
19
|
attr_reader :file
|
14
20
|
|
21
|
+
# The unique word filter
|
22
|
+
attr_reader :filter
|
23
|
+
|
24
|
+
# The queue of words awaiting processing
|
25
|
+
attr_reader :word_queue
|
26
|
+
|
27
|
+
#
|
28
|
+
# Creates a new word-list Builder object.
|
29
|
+
#
|
30
|
+
# @param [String] path
|
31
|
+
# The path of the word-list file.
|
15
32
|
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
# Builder object.
|
33
|
+
# @param [Hash] options
|
34
|
+
# Additional options.
|
19
35
|
#
|
20
|
-
|
36
|
+
# @option options [Integer] :min_words (1)
|
37
|
+
# The minimum number of words each line of the word-list must contain.
|
38
|
+
#
|
39
|
+
# @option options [Integer] :max_words
|
40
|
+
# The maximum number of words each line of the word-list may contain.
|
41
|
+
# Defaults to the value of `:min_words`, if not given.
|
42
|
+
#
|
43
|
+
def initialize(path,options={})
|
21
44
|
super()
|
22
45
|
|
23
46
|
@path = File.expand_path(path)
|
24
|
-
@file = nil
|
25
|
-
@filter = nil
|
26
47
|
|
27
|
-
|
48
|
+
@min_words = options.fetch(:min_words,1)
|
49
|
+
@max_words = options.fetch(:max_words,@min_words)
|
50
|
+
|
51
|
+
@file = nil
|
52
|
+
@filter = UniqueFilter.new
|
53
|
+
@word_queue = []
|
54
|
+
|
55
|
+
yield self if block_given?
|
28
56
|
end
|
29
57
|
|
30
58
|
#
|
31
|
-
# Creates a new Builder object with the given
|
32
|
-
# word-list file, passes the builder object to the given
|
59
|
+
# Creates a new Builder object with the given arguments, opens the
|
60
|
+
# word-list file, passes the builder object to the given block
|
33
61
|
# then finally closes the word-list file.
|
34
62
|
#
|
63
|
+
# @param [Array] arguments
|
64
|
+
# Additional arguments to pass to {#initialize}.
|
65
|
+
#
|
66
|
+
# @yield [builder]
|
67
|
+
# If a block is given, it will be passed the new builder.
|
68
|
+
#
|
69
|
+
# @yieldparam [Builder] builder
|
70
|
+
# The newly created builer object.
|
71
|
+
#
|
72
|
+
# @return [Builder]
|
73
|
+
# The newly created builder object.
|
74
|
+
#
|
75
|
+
# @example
|
35
76
|
# Builder.build('some/path') do |builder|
|
36
77
|
# builder.parse(readline)
|
37
78
|
# end
|
@@ -48,9 +89,10 @@ module Wordlist
|
|
48
89
|
# Opens the word-list file for writing. If the file already exists, the
|
49
90
|
# previous words will be used to filter future duplicate words.
|
50
91
|
#
|
92
|
+
# @return [File]
|
93
|
+
# The open word-list file.
|
94
|
+
#
|
51
95
|
def open!
|
52
|
-
@filter = UniqueFilter.new
|
53
|
-
|
54
96
|
if File.file?(@path)
|
55
97
|
File.open(@path) do |file|
|
56
98
|
file.each_line do |line|
|
@@ -63,21 +105,86 @@ module Wordlist
|
|
63
105
|
end
|
64
106
|
|
65
107
|
#
|
66
|
-
# Default to be called when the word-list is to be built
|
67
|
-
#
|
108
|
+
# Default to be called when the word-list is to be built.
|
109
|
+
#
|
110
|
+
# @yield [builder]
|
111
|
+
# If a block is given, it will be passed the new builder object.
|
112
|
+
#
|
113
|
+
def build!
|
114
|
+
yield self if block_given?
|
115
|
+
end
|
116
|
+
|
117
|
+
#
|
118
|
+
# Enqueues a given word for processing.
|
119
|
+
#
|
120
|
+
# @param [String] word
|
121
|
+
# The word to enqueue.
|
122
|
+
#
|
123
|
+
# @return [String]
|
124
|
+
# The enqueued word.
|
125
|
+
#
|
126
|
+
def enqueue(word)
|
127
|
+
# enqueue the word
|
128
|
+
if @max_words == 1
|
129
|
+
@word_queue[0] = word.to_s
|
130
|
+
else
|
131
|
+
@word_queue << word.to_s
|
132
|
+
|
133
|
+
# make sure the queue does not overflow
|
134
|
+
if @word_queue.length > @max_words
|
135
|
+
@word_queue.shift
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
return word
|
140
|
+
end
|
141
|
+
|
142
|
+
#
|
143
|
+
# Enumerates over the combinations of previously seen words.
|
144
|
+
#
|
145
|
+
# @yield [combination]
|
146
|
+
# The given block will be passed the combinations of previously
|
147
|
+
# seen words.
|
148
|
+
#
|
149
|
+
# @yieldparam [String] combination
|
150
|
+
# A combination of one or more space-separated words.
|
68
151
|
#
|
69
|
-
def
|
70
|
-
|
152
|
+
def word_combinations
|
153
|
+
if @max_words == 1
|
154
|
+
yield @word_queue[0]
|
155
|
+
else
|
156
|
+
current_words = @word_queue.length
|
157
|
+
|
158
|
+
# we must have atleast the minimum amount of words
|
159
|
+
if current_words >= @min_words
|
160
|
+
upper_bound = (current_words - @min_words)
|
161
|
+
|
162
|
+
# combine the words
|
163
|
+
upper_bound.downto(0) do |i|
|
164
|
+
yield @word_queue[i..-1].join(' ')
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
71
168
|
end
|
72
169
|
|
73
170
|
#
|
74
|
-
# Appends the
|
171
|
+
# Appends the given word to the word-list file, only if it has not
|
75
172
|
# been previously seen.
|
76
173
|
#
|
174
|
+
# @param [String] word
|
175
|
+
# The word to append.
|
176
|
+
#
|
177
|
+
# @return [Builder]
|
178
|
+
# The builder object.
|
179
|
+
#
|
77
180
|
def <<(word)
|
181
|
+
enqueue(word)
|
182
|
+
|
78
183
|
if @file
|
79
|
-
|
80
|
-
@
|
184
|
+
word_combinations do |words|
|
185
|
+
@filter.pass(words) do |unique|
|
186
|
+
@file.puts unique
|
187
|
+
end
|
81
188
|
end
|
82
189
|
end
|
83
190
|
|
@@ -85,7 +192,13 @@ module Wordlist
|
|
85
192
|
end
|
86
193
|
|
87
194
|
#
|
88
|
-
# Add the
|
195
|
+
# Add the given words to the word-list.
|
196
|
+
#
|
197
|
+
# @param [Array<String>] words
|
198
|
+
# The words to add to the list.
|
199
|
+
#
|
200
|
+
# @return [Builder]
|
201
|
+
# The builder object.
|
89
202
|
#
|
90
203
|
def +(words)
|
91
204
|
words.each { |word| self << word }
|
@@ -93,16 +206,21 @@ module Wordlist
|
|
93
206
|
end
|
94
207
|
|
95
208
|
#
|
96
|
-
# Parses the
|
97
|
-
#
|
209
|
+
# Parses the given text, adding each unique word to the word-list file.
|
210
|
+
#
|
211
|
+
# @param [String] text
|
212
|
+
# The text to parse.
|
98
213
|
#
|
99
214
|
def parse(text)
|
100
215
|
super(text).each { |word| self << word }
|
101
216
|
end
|
102
217
|
|
103
218
|
#
|
104
|
-
# Parses the contents of the file at the
|
105
|
-
#
|
219
|
+
# Parses the contents of the file at the given path, adding each unique
|
220
|
+
# word to the word-list file.
|
221
|
+
#
|
222
|
+
# @param [String] path
|
223
|
+
# The path of the file to parse.
|
106
224
|
#
|
107
225
|
def parse_file(path)
|
108
226
|
File.open(path) do |file|
|
@@ -118,9 +236,10 @@ module Wordlist
|
|
118
236
|
def close!
|
119
237
|
if @file
|
120
238
|
@file.close
|
121
|
-
|
122
239
|
@file = nil
|
123
|
-
|
240
|
+
|
241
|
+
@filter.clear
|
242
|
+
@word_queue.clear
|
124
243
|
end
|
125
244
|
end
|
126
245
|
|