wordlist 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ === 0.1.0 / 2009-08-31
2
+
3
+ * Initial release:
4
+ * Supports building word-lists from arbitrary text.
5
+ * Supports building word-lists from files.
6
+ * Supports building word-lists from websites.
7
+ * Supports enumerating through flat-file word-lists.
8
+ * Supports applying multiple mutation rules to each word in a word-list.
9
+
@@ -0,0 +1,30 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/wordlist.rb
6
+ lib/wordlist/unique_filter.rb
7
+ lib/wordlist/parsers.rb
8
+ lib/wordlist/builder.rb
9
+ lib/wordlist/builders.rb
10
+ lib/wordlist/builders/website.rb
11
+ lib/wordlist/mutator.rb
12
+ lib/wordlist/list.rb
13
+ lib/wordlist/flat_file.rb
14
+ lib/wordlist/version.rb
15
+ tasks/spec.rb
16
+ scripts/benchmark
17
+ scripts/text/comedy_of_errors.txt
18
+ spec/classes/parser_class.rb
19
+ spec/classes/test_list.rb
20
+ spec/text/previous_wordlist.txt
21
+ spec/text/sample.txt
22
+ spec/text/flat_file.txt
23
+ spec/spec_helper.rb
24
+ spec/unique_filter_spec.rb
25
+ spec/parsers_spec.rb
26
+ spec/mutator_spec.rb
27
+ spec/builder_spec.rb
28
+ spec/list_spec.rb
29
+ spec/flat_file_spec.rb
30
+ spec/wordlist_spec.rb
@@ -0,0 +1,103 @@
1
+ = Wordlist
2
+
3
+ * http://wordlist.rubyforge.org/
4
+ * http://github.com/sophsec/wordlist/
5
+ * Postmodern (postmodern.mod3 at gmail.com)
6
+
7
+ == DESCRIPTION:
8
+
9
+ A Ruby library for generating and working with word-lists. Wordlist allows
10
+ one to efficiently generate unique word-lists from arbitrary text or
11
+ other sources, such as website content. Wordlist can also quickly enumerate
12
+ through words within an existing word-list, applying multiple mutation
13
+ rules to each word in the list.
14
+
15
+ == FEATURES:
16
+
17
+ * Uses a bucket system of CRC32 hashes for efficient filtering of duplicate
18
+ words.
19
+ * Supports adding mutation rules to a word-list, which are applied to
20
+ words as the list is enumerated.
21
+ * Supports building word-lists from arbitrary text.
22
+ * Supports custom word-list builders:
23
+ * Wordlist::Builders::Website: Build word-lists from website content.
24
+ * Supports custom word-list formats:
25
+ * Wordlist::FlatFile: Enumerates through the words in a flat-file
26
+ word-list.
27
+
28
+ == EXAMPLES:
29
+
30
+ * Build a word-list from arbitrary text:
31
+
32
+ Wordlist::Builder.build('list.txt') do |builder|
33
+ builder.parse(some_text)
34
+ end
35
+
36
+ * Build a word-list from another file:
37
+
38
+ Wordlist::Builder.build('list.txt') do |builder|
39
+ builder.parse_file('some/file.txt')
40
+ end
41
+
42
+ * Build a word-list from content off a website:
43
+
44
+ require 'wordlist/builders/website'
45
+
46
+ Wordlist::Builders::Website.build('list.txt','www.example.com')
47
+
48
+ * Enumerate through each word in a flat-file word-list:
49
+
50
+ list = Wordlist::FlatFile.new('list.txt')
51
+ list.each_word do |word|
52
+ puts word
53
+ end
54
+
55
+ * Enumerate through each unique word in a flat-file word-list:
56
+
57
+ list.each_unique do |word|
58
+ puts word
59
+ end
60
+
61
+ * Define mutation rules, and enumerate through each unique mutation of each
62
+ unique word in the word-list:
63
+
64
+ list.mutate 'o', '0'
65
+ list.mutate 'a', 0x41
66
+ list.mutate(/[hax]/i) { |match| match.swapcase }
67
+
68
+ list.each_mutation do |word|
69
+ puts word
70
+ end
71
+
72
+ == REQUIREMENTS:
73
+
74
+ * {spidr}[http://spidr.rubyforge.org] >= 0.1.9
75
+
76
+ == INSTALL:
77
+
78
+ $ sudo gem install wordlist
79
+
80
+ == LICENSE:
81
+
82
+ Wordlist - A Ruby library for generating and working with word-lists.
83
+
84
+ Copyright (c) 2009 Hal Brodigan
85
+
86
+ Permission is hereby granted, free of charge, to any person obtaining
87
+ a copy of this software and associated documentation files (the
88
+ 'Software'), to deal in the Software without restriction, including
89
+ without limitation the rights to use, copy, modify, merge, publish,
90
+ distribute, sublicense, and/or sell copies of the Software, and to
91
+ permit persons to whom the Software is furnished to do so, subject to
92
+ the following conditions:
93
+
94
+ The above copyright notice and this permission notice shall be
95
+ included in all copies or substantial portions of the Software.
96
+
97
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
98
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
99
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
100
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
101
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
102
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
103
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,22 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require 'hoe/signing'
6
+ require './tasks/spec.rb'
7
+ require './lib/wordlist/version.rb'
8
+
9
+ Hoe.spec('wordlist') do
10
+ self.rubyforge_name = 'wordlist'
11
+ self.developer('Postmodern','postmodern.mod3@gmail.com')
12
+ self.remote_rdoc_dir = '/'
13
+ self.extra_deps = [
14
+ ['spidr', '>=0.1.9']
15
+ ]
16
+
17
+ self.extra_dev_deps = [
18
+ ['rspec', '>=1.1.12']
19
+ ]
20
+ end
21
+
22
+ # vim: syntax=Ruby
@@ -0,0 +1,4 @@
1
+ require 'wordlist/builder'
2
+ require 'wordlist/list'
3
+ require 'wordlist/flat_file'
4
+ require 'wordlist/version'
@@ -0,0 +1,128 @@
1
+ require 'wordlist/unique_filter'
2
+ require 'wordlist/parsers'
3
+
4
+ module Wordlist
5
+ class Builder
6
+
7
+ include Parsers
8
+
9
+ # Path of the word-list
10
+ attr_reader :path
11
+
12
+ # File for the word-list
13
+ attr_reader :file
14
+
15
+ #
16
+ # Creates a new word-list Builder object with the specified _path_.
17
+ # If a _block_ is given, it will be passed the newly created
18
+ # Builder object.
19
+ #
20
+ def initialize(path,&block)
21
+ super()
22
+
23
+ @path = File.expand_path(path)
24
+ @file = nil
25
+ @filter = nil
26
+
27
+ block.call(self) if block
28
+ end
29
+
30
+ #
31
+ # Creates a new Builder object with the given _arguments_, opens the
32
+ # word-list file, passes the builder object to the given _block_
33
+ # then finally closes the word-list file.
34
+ #
35
+ # Builder.build('some/path') do |builder|
36
+ # builder.parse(readline)
37
+ # end
38
+ #
39
+ def self.build(*arguments,&block)
40
+ self.new(*arguments) do |builder|
41
+ builder.open!
42
+ builder.build!(&block)
43
+ builder.close!
44
+ end
45
+ end
46
+
47
+ #
48
+ # Opens the word-list file for writing. If the file already exists, the
49
+ # previous words will be used to filter future duplicate words.
50
+ #
51
+ def open!
52
+ @filter = UniqueFilter.new
53
+
54
+ if File.file?(@path)
55
+ File.open(@path) do |file|
56
+ file.each_line do |line|
57
+ @filter.saw!(line.chomp)
58
+ end
59
+ end
60
+ end
61
+
62
+ @file = File.new(@path,File::RDWR | File::CREAT | File::APPEND)
63
+ end
64
+
65
+ #
66
+ # Default to be called when the word-list is to be built, simply
67
+ # calls the given _block_.
68
+ #
69
+ def build!(&block)
70
+ block.call(self) if block
71
+ end
72
+
73
+ #
74
+ # Appends the specified _word_ to the word-list file, only if it has not
75
+ # been previously seen.
76
+ #
77
+ def <<(word)
78
+ if @file
79
+ @filter.pass(word) do |unique|
80
+ @file.puts unique
81
+ end
82
+ end
83
+
84
+ return self
85
+ end
86
+
87
+ #
88
+ # Add the specified _words_ to the word-list.
89
+ #
90
+ def +(words)
91
+ words.each { |word| self << word }
92
+ return self
93
+ end
94
+
95
+ #
96
+ # Parses the specified _text_ adding each unique word to the word-list
97
+ # file.
98
+ #
99
+ def parse(text)
100
+ super(text).each { |word| self << word }
101
+ end
102
+
103
+ #
104
+ # Parses the contents of the file at the specified _path_, adding
105
+ # each unique word to the word-list file.
106
+ #
107
+ def parse_file(path)
108
+ File.open(path) do |file|
109
+ file.each_line do |line|
110
+ parse(line)
111
+ end
112
+ end
113
+ end
114
+
115
+ #
116
+ # Closes the word-list file.
117
+ #
118
+ def close!
119
+ if @file
120
+ @file.close
121
+
122
+ @file = nil
123
+ @filter = nil
124
+ end
125
+ end
126
+
127
+ end
128
+ end
@@ -0,0 +1 @@
1
+ require 'wordlist/builders/website'
@@ -0,0 +1,44 @@
1
+ require 'wordlist/builder'
2
+
3
+ require 'spidr'
4
+
5
+ module Wordlist
6
+ module Builders
7
+ class Website < Builder
8
+
9
+ # Host to spider
10
+ attr_accessor :host
11
+
12
+ #
13
+ # Creates a new Website builder object with the specified _path_
14
+ # and _host_. If a _block_ is given, it will be passed the new created
15
+ # Website builder object.
16
+ #
17
+ def initialize(path,host,&block)
18
+ @host = host
19
+
20
+ super(path,&block)
21
+ end
22
+
23
+ #
24
+ # Builds the word-list file by spidering the +host+ and parsing the
25
+ # inner-text from all HTML pages. If a _block_ is given, it will be
26
+ # called before all HTML pages on the +host+ have been parsed.
27
+ #
28
+ def build!(&block)
29
+ super(&block)
30
+
31
+ Spidr.host(@host) do |spidr|
32
+ spidr.every_page do |page|
33
+ if page.html?
34
+ page.doc.search('//h1|//h2|//h3|//h4|//h5|//p|//span').each do |element|
35
+ parse(element.inner_text)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,36 @@
1
+ require 'wordlist/list'
2
+
3
+ module Wordlist
4
+ class FlatFile < List
5
+
6
+ # The path to the flat-file
7
+ attr_accessor :path
8
+
9
+ #
10
+ # Creates a new FlatFile list with the specified _path_ and given
11
+ # _options_.
12
+ #
13
+ def initialize(path,options={},&block)
14
+ @path = path
15
+
16
+ super(options,&block)
17
+ end
18
+
19
+ #
20
+ # Enumerates through every word in the flat-file, passing each
21
+ # word to the given _block_.
22
+ #
23
+ # flat_file.each_word do |word|
24
+ # puts word
25
+ # end
26
+ #
27
+ def each_word(&block)
28
+ File.open(@path) do |file|
29
+ file.each_line do |line|
30
+ yield line.chomp
31
+ end
32
+ end
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,131 @@
1
+ require 'wordlist/unique_filter'
2
+ require 'wordlist/mutator'
3
+
4
+ module Wordlist
5
+ class List
6
+
7
+ include Enumerable
8
+
9
+ # Maximum length of words
10
+ attr_accessor :max_length
11
+
12
+ # Minimum length of words
13
+ attr_accessor :min_length
14
+
15
+ #
16
+ # Creates a new List object with the given _options_. If a _block_
17
+ # is given, it will be passed the newly created List object.
18
+ #
19
+ # _options_ may include the following keys:
20
+ # <tt>:max_length</tt>:: The maximum length of words produced by the
21
+ # list.
22
+ # <tt>:min_length</tt>:: The minimum length of words produced by the
23
+ # list.
24
+ #
25
+ def initialize(options={},&block)
26
+ @mutators = []
27
+
28
+ @max_length = nil
29
+ @min_length = 0
30
+
31
+ if options[:max_length]
32
+ @max_length = options[:max_length]
33
+ end
34
+
35
+ if options[:min_length]
36
+ @min_length = options[:min_length]
37
+ end
38
+
39
+ block.call(self) if block
40
+ end
41
+
42
+ #
43
+ # Adds a mutation rule for the specified _pattern_, to be replaced
44
+ # using the specified _substitute_. If a _block_ is given, and the
45
+ # _substitute_ data omitted, then the _block_ will be used to
46
+ # replace data matched by the _pattern_.
47
+ #
48
+ # list.mutate 'o', '0'
49
+ #
50
+ # list.mutate '0', 0x41
51
+ #
52
+ # list.mutate(/[oO]/) do |match|
53
+ # match.swapcase
54
+ # end
55
+ #
56
+ def mutate(pattern,substitute=nil,&block)
57
+ @mutators << Mutator.new(pattern,substitute,&block)
58
+ end
59
+
60
+ #
61
+ # Enumerate through every word in the list, passing each word to
62
+ # the given block. By default this method passes nothing to the given
63
+ # _block_.
64
+ #
65
+ # list.each_word do |word|
66
+ # puts word
67
+ # end
68
+ #
69
+ def each_word(&block)
70
+ end
71
+
72
+ #
73
+ # Enumerates through every unique word in the list, passing each
74
+ # unique word to the given block.
75
+ #
76
+ # list.each_unique do |word|
77
+ # puts word
78
+ # end
79
+ #
80
+ def each_unique
81
+ unique_filter = UniqueFilter.new()
82
+
83
+ each_word do |word|
84
+ if unique_filter.saw!(word)
85
+ yield word
86
+ end
87
+ end
88
+
89
+ unique_filter = nil
90
+ end
91
+
92
+ #
93
+ # Enumerates through every unique mutation, of every unique word, using
94
+ # the mutator rules define for the list. Every possible unique mutation
95
+ # will be passed to the given _block_.
96
+ #
97
+ # list.each_mutation do |word|
98
+ # puts word
99
+ # end
100
+ #
101
+ def each_mutation(&block)
102
+ mutation_filter = UniqueFilter.new()
103
+
104
+ mutator_stack = [lambda { |mutated_word|
105
+ # skip words shorter than the minimum length
106
+ next if mutated_word.length < @min_length
107
+
108
+ # truncate words longer than the maximum length
109
+ mutated_word = mutated_word[0,@max_length] if @max_length
110
+
111
+ if mutation_filter.saw!(mutated_word)
112
+ yield mutated_word
113
+ end
114
+ }]
115
+
116
+ (@mutators.length-1).downto(0) do |index|
117
+ mutator_stack.unshift(lambda { |word|
118
+ prev_mutator = @mutators[index]
119
+ next_mutator = mutator_stack[index+1]
120
+
121
+ prev_mutator.each(word,&next_mutator)
122
+ })
123
+ end
124
+
125
+ each_unique(&(mutator_stack.first))
126
+ end
127
+
128
+ alias each each_mutation
129
+
130
+ end
131
+ end