wordlist 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,9 @@
1
+ === 0.1.0 / 2009-08-31
2
+
3
+ * Initial release:
4
+ * Supports building word-lists from arbitrary text.
5
+ * Supports building word-lists from files.
6
+ * Supports building word-lists from websites.
7
+ * Supports enumerating through flat-file word-lists.
8
+ * Supports applying multiple mutation rules to each word in a word-list.
9
+
@@ -0,0 +1,30 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/wordlist.rb
6
+ lib/wordlist/unique_filter.rb
7
+ lib/wordlist/parsers.rb
8
+ lib/wordlist/builder.rb
9
+ lib/wordlist/builders.rb
10
+ lib/wordlist/builders/website.rb
11
+ lib/wordlist/mutator.rb
12
+ lib/wordlist/list.rb
13
+ lib/wordlist/flat_file.rb
14
+ lib/wordlist/version.rb
15
+ tasks/spec.rb
16
+ scripts/benchmark
17
+ scripts/text/comedy_of_errors.txt
18
+ spec/classes/parser_class.rb
19
+ spec/classes/test_list.rb
20
+ spec/text/previous_wordlist.txt
21
+ spec/text/sample.txt
22
+ spec/text/flat_file.txt
23
+ spec/spec_helper.rb
24
+ spec/unique_filter_spec.rb
25
+ spec/parsers_spec.rb
26
+ spec/mutator_spec.rb
27
+ spec/builder_spec.rb
28
+ spec/list_spec.rb
29
+ spec/flat_file_spec.rb
30
+ spec/wordlist_spec.rb
@@ -0,0 +1,103 @@
1
+ = Wordlist
2
+
3
+ * http://wordlist.rubyforge.org/
4
+ * http://github.com/sophsec/wordlist/
5
+ * Postmodern (postmodern.mod3 at gmail.com)
6
+
7
+ == DESCRIPTION:
8
+
9
+ A Ruby library for generating and working with word-lists. Wordlist allows
10
+ one to efficiently generate unique word-lists from arbitrary text or
11
+ other sources, such as website content. Wordlist can also quickly enumerate
12
+ through words within an existing word-list, applying multiple mutation
13
+ rules to each word in the list.
14
+
15
+ == FEATURES:
16
+
17
+ * Uses a bucket system of CRC32 hashes for efficient filtering of duplicate
18
+ words.
19
+ * Supports adding mutation rules to a word-list, which are applied to
20
+ words as the list is enumerated.
21
+ * Supports building word-lists from arbitrary text.
22
+ * Supports custom word-list builders:
23
+ * Wordlist::Builders::Website: Build word-lists from website content.
24
+ * Supports custom word-list formats:
25
+ * Wordlist::FlatFile: Enumerates through the words in a flat-file
26
+ word-list.
27
+
28
+ == EXAMPLES:
29
+
30
+ * Build a word-list from arbitrary text:
31
+
32
+ Wordlist::Builder.build('list.txt') do |builder|
33
+ builder.parse(some_text)
34
+ end
35
+
36
+ * Build a word-list from another file:
37
+
38
+ Wordlist::Builder.build('list.txt') do |builder|
39
+ builder.parse_file('some/file.txt')
40
+ end
41
+
42
+ * Build a word-list from content off a website:
43
+
44
+ require 'wordlist/builders/website'
45
+
46
+ Wordlist::Builders::Website.build('list.txt','www.example.com')
47
+
48
+ * Enumerate through each word in a flat-file word-list:
49
+
50
+ list = Wordlist::FlatFile.new('list.txt')
51
+ list.each_word do |word|
52
+ puts word
53
+ end
54
+
55
+ * Enumerate through each unique word in a flat-file word-list:
56
+
57
+ list.each_unique do |word|
58
+ puts word
59
+ end
60
+
61
+ * Define mutation rules, and enumerate through each unique mutation of each
62
+ unique word in the word-list:
63
+
64
+ list.mutate 'o', '0'
65
+ list.mutate 'a', 0x41
66
+ list.mutate(/[hax]/i) { |match| match.swapcase }
67
+
68
+ list.each_mutation do |word|
69
+ puts word
70
+ end
71
+
72
+ == REQUIREMENTS:
73
+
74
+ * {spidr}[http://spidr.rubyforge.org] >= 0.1.9
75
+
76
+ == INSTALL:
77
+
78
+ $ sudo gem install wordlist
79
+
80
+ == LICENSE:
81
+
82
+ Wordlist - A Ruby library for generating and working with word-lists.
83
+
84
+ Copyright (c) 2009 Hal Brodigan
85
+
86
+ Permission is hereby granted, free of charge, to any person obtaining
87
+ a copy of this software and associated documentation files (the
88
+ 'Software'), to deal in the Software without restriction, including
89
+ without limitation the rights to use, copy, modify, merge, publish,
90
+ distribute, sublicense, and/or sell copies of the Software, and to
91
+ permit persons to whom the Software is furnished to do so, subject to
92
+ the following conditions:
93
+
94
+ The above copyright notice and this permission notice shall be
95
+ included in all copies or substantial portions of the Software.
96
+
97
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
98
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
99
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
100
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
101
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
102
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
103
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,22 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require 'hoe/signing'
6
+ require './tasks/spec.rb'
7
+ require './lib/wordlist/version.rb'
8
+
9
+ Hoe.spec('wordlist') do
10
+ self.rubyforge_name = 'wordlist'
11
+ self.developer('Postmodern','postmodern.mod3@gmail.com')
12
+ self.remote_rdoc_dir = '/'
13
+ self.extra_deps = [
14
+ ['spidr', '>=0.1.9']
15
+ ]
16
+
17
+ self.extra_dev_deps = [
18
+ ['rspec', '>=1.1.12']
19
+ ]
20
+ end
21
+
22
+ # vim: syntax=Ruby
@@ -0,0 +1,4 @@
1
+ require 'wordlist/builder'
2
+ require 'wordlist/list'
3
+ require 'wordlist/flat_file'
4
+ require 'wordlist/version'
@@ -0,0 +1,128 @@
1
+ require 'wordlist/unique_filter'
2
+ require 'wordlist/parsers'
3
+
4
+ module Wordlist
5
+ class Builder
6
+
7
+ include Parsers
8
+
9
+ # Path of the word-list
10
+ attr_reader :path
11
+
12
+ # File for the word-list
13
+ attr_reader :file
14
+
15
+ #
16
+ # Creates a new word-list Builder object with the specified _path_.
17
+ # If a _block_ is given, it will be passed the newly created
18
+ # Builder object.
19
+ #
20
+ def initialize(path,&block)
21
+ super()
22
+
23
+ @path = File.expand_path(path)
24
+ @file = nil
25
+ @filter = nil
26
+
27
+ block.call(self) if block
28
+ end
29
+
30
+ #
31
+ # Creates a new Builder object with the given _arguments_, opens the
32
+ # word-list file, passes the builder object to the given _block_
33
+ # then finally closes the word-list file.
34
+ #
35
+ # Builder.build('some/path') do |builder|
36
+ # builder.parse(readline)
37
+ # end
38
+ #
39
+ def self.build(*arguments,&block)
40
+ self.new(*arguments) do |builder|
41
+ builder.open!
42
+ builder.build!(&block)
43
+ builder.close!
44
+ end
45
+ end
46
+
47
+ #
48
+ # Opens the word-list file for writing. If the file already exists, the
49
+ # previous words will be used to filter future duplicate words.
50
+ #
51
+ def open!
52
+ @filter = UniqueFilter.new
53
+
54
+ if File.file?(@path)
55
+ File.open(@path) do |file|
56
+ file.each_line do |line|
57
+ @filter.saw!(line.chomp)
58
+ end
59
+ end
60
+ end
61
+
62
+ @file = File.new(@path,File::RDWR | File::CREAT | File::APPEND)
63
+ end
64
+
65
+ #
66
+ # Default to be called when the word-list is to be built, simply
67
+ # calls the given _block_.
68
+ #
69
+ def build!(&block)
70
+ block.call(self) if block
71
+ end
72
+
73
+ #
74
+ # Appends the specified _word_ to the word-list file, only if it has not
75
+ # been previously seen.
76
+ #
77
+ def <<(word)
78
+ if @file
79
+ @filter.pass(word) do |unique|
80
+ @file.puts unique
81
+ end
82
+ end
83
+
84
+ return self
85
+ end
86
+
87
+ #
88
+ # Add the specified _words_ to the word-list.
89
+ #
90
+ def +(words)
91
+ words.each { |word| self << word }
92
+ return self
93
+ end
94
+
95
+ #
96
+ # Parses the specified _text_ adding each unique word to the word-list
97
+ # file.
98
+ #
99
+ def parse(text)
100
+ super(text).each { |word| self << word }
101
+ end
102
+
103
+ #
104
+ # Parses the contents of the file at the specified _path_, adding
105
+ # each unique word to the word-list file.
106
+ #
107
+ def parse_file(path)
108
+ File.open(path) do |file|
109
+ file.each_line do |line|
110
+ parse(line)
111
+ end
112
+ end
113
+ end
114
+
115
+ #
116
+ # Closes the word-list file.
117
+ #
118
+ def close!
119
+ if @file
120
+ @file.close
121
+
122
+ @file = nil
123
+ @filter = nil
124
+ end
125
+ end
126
+
127
+ end
128
+ end
@@ -0,0 +1 @@
1
+ require 'wordlist/builders/website'
@@ -0,0 +1,44 @@
1
+ require 'wordlist/builder'
2
+
3
+ require 'spidr'
4
+
5
+ module Wordlist
6
+ module Builders
7
+ class Website < Builder
8
+
9
+ # Host to spider
10
+ attr_accessor :host
11
+
12
+ #
13
+ # Creates a new Website builder object with the specified _path_
14
+ # and _host_. If a _block_ is given, it will be passed the new created
15
+ # Website builder object.
16
+ #
17
+ def initialize(path,host,&block)
18
+ @host = host
19
+
20
+ super(path,&block)
21
+ end
22
+
23
+ #
24
+ # Builds the word-list file by spidering the +host+ and parsing the
25
+ # inner-text from all HTML pages. If a _block_ is given, it will be
26
+ # called before all HTML pages on the +host+ have been parsed.
27
+ #
28
+ def build!(&block)
29
+ super(&block)
30
+
31
+ Spidr.host(@host) do |spidr|
32
+ spidr.every_page do |page|
33
+ if page.html?
34
+ page.doc.search('//h1|//h2|//h3|//h4|//h5|//p|//span').each do |element|
35
+ parse(element.inner_text)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,36 @@
1
+ require 'wordlist/list'
2
+
3
+ module Wordlist
4
+ class FlatFile < List
5
+
6
+ # The path to the flat-file
7
+ attr_accessor :path
8
+
9
+ #
10
+ # Creates a new FlatFile list with the specified _path_ and given
11
+ # _options_.
12
+ #
13
+ def initialize(path,options={},&block)
14
+ @path = path
15
+
16
+ super(options,&block)
17
+ end
18
+
19
+ #
20
+ # Enumerates through every word in the flat-file, passing each
21
+ # word to the given _block_.
22
+ #
23
+ # flat_file.each_word do |word|
24
+ # puts word
25
+ # end
26
+ #
27
+ def each_word(&block)
28
+ File.open(@path) do |file|
29
+ file.each_line do |line|
30
+ yield line.chomp
31
+ end
32
+ end
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,131 @@
1
+ require 'wordlist/unique_filter'
2
+ require 'wordlist/mutator'
3
+
4
+ module Wordlist
5
+ class List
6
+
7
+ include Enumerable
8
+
9
+ # Maximum length of words
10
+ attr_accessor :max_length
11
+
12
+ # Minimum length of words
13
+ attr_accessor :min_length
14
+
15
+ #
16
+ # Creates a new List object with the given _options_. If a _block_
17
+ # is given, it will be passed the newly created List object.
18
+ #
19
+ # _options_ may include the following keys:
20
+ # <tt>:max_length</tt>:: The maximum length of words produced by the
21
+ # list.
22
+ # <tt>:min_length</tt>:: The minimum length of words produced by the
23
+ # list.
24
+ #
25
+ def initialize(options={},&block)
26
+ @mutators = []
27
+
28
+ @max_length = nil
29
+ @min_length = 0
30
+
31
+ if options[:max_length]
32
+ @max_length = options[:max_length]
33
+ end
34
+
35
+ if options[:min_length]
36
+ @min_length = options[:min_length]
37
+ end
38
+
39
+ block.call(self) if block
40
+ end
41
+
42
+ #
43
+ # Adds a mutation rule for the specified _pattern_, to be replaced
44
+ # using the specified _substitute_. If a _block_ is given, and the
45
+ # _substitute_ data omitted, then the _block_ will be used to
46
+ # replace data matched by the _pattern_.
47
+ #
48
+ # list.mutate 'o', '0'
49
+ #
50
+ # list.mutate '0', 0x41
51
+ #
52
+ # list.mutate(/[oO]/) do |match|
53
+ # match.swapcase
54
+ # end
55
+ #
56
+ def mutate(pattern,substitute=nil,&block)
57
+ @mutators << Mutator.new(pattern,substitute,&block)
58
+ end
59
+
60
+ #
61
+ # Enumerate through every word in the list, passing each word to
62
+ # the given block. By default this method passes nothing to the given
63
+ # _block_.
64
+ #
65
+ # list.each_word do |word|
66
+ # puts word
67
+ # end
68
+ #
69
+ def each_word(&block)
70
+ end
71
+
72
+ #
73
+ # Enumerates through every unique word in the list, passing each
74
+ # unique word to the given block.
75
+ #
76
+ # list.each_unique do |word|
77
+ # puts word
78
+ # end
79
+ #
80
+ def each_unique
81
+ unique_filter = UniqueFilter.new()
82
+
83
+ each_word do |word|
84
+ if unique_filter.saw!(word)
85
+ yield word
86
+ end
87
+ end
88
+
89
+ unique_filter = nil
90
+ end
91
+
92
+ #
93
+ # Enumerates through every unique mutation, of every unique word, using
94
+ # the mutator rules define for the list. Every possible unique mutation
95
+ # will be passed to the given _block_.
96
+ #
97
+ # list.each_mutation do |word|
98
+ # puts word
99
+ # end
100
+ #
101
+ def each_mutation(&block)
102
+ mutation_filter = UniqueFilter.new()
103
+
104
+ mutator_stack = [lambda { |mutated_word|
105
+ # skip words shorter than the minimum length
106
+ next if mutated_word.length < @min_length
107
+
108
+ # truncate words longer than the maximum length
109
+ mutated_word = mutated_word[0,@max_length] if @max_length
110
+
111
+ if mutation_filter.saw!(mutated_word)
112
+ yield mutated_word
113
+ end
114
+ }]
115
+
116
+ (@mutators.length-1).downto(0) do |index|
117
+ mutator_stack.unshift(lambda { |word|
118
+ prev_mutator = @mutators[index]
119
+ next_mutator = mutator_stack[index+1]
120
+
121
+ prev_mutator.each(word,&next_mutator)
122
+ })
123
+ end
124
+
125
+ each_unique(&(mutator_stack.first))
126
+ end
127
+
128
+ alias each each_mutation
129
+
130
+ end
131
+ end